ODFPY 1.2.0
 
Loading...
Searching...
No Matches
opendocument.py
Go to the documentation of this file.
1# -*- coding: utf-8 -*-
2# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
3#
4# This library is free software; you can redistribute it and/or
5# modify it under the terms of the GNU Lesser General Public
6# License as published by the Free Software Foundation; either
7# version 2.1 of the License, or (at your option) any later version.
8#
9# This library is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12# Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public
15# License along with this library; if not, write to the Free Software
16# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17#
18# Contributor(s):
19#
20# Copyright (C) 2014 Georges Khaznadar <georgesk@debian.org>
21# migration to Python3, JavaDOC comments and automatic
22# build of documentation
23#
24
25__doc__="""Use OpenDocument to generate your documents."""
26
27import zipfile, time, uuid, sys, mimetypes, copy, os.path
28
29# to allow Python3 to access modules in the same path
30sys.path.append(os.path.dirname(__file__))
31
32# using BytesIO provides a cleaner interface than StringIO
33# with both Python2 and Python3: the programmer must care to
34# convert strings or unicode to bytes, which is valid for Python 2 and 3.
35from io import StringIO, BytesIO
36
37from odf.namespaces import *
38import odf.manifest as manifest
39import odf.meta as meta
40from odf.office import *
41import odf.element as element
42from odf.attrconverters import make_NCName
43from xml.sax.xmlreader import InputSource
44from odf.odfmanifest import manifestlist
45import codecs
46
47if sys.version_info[0] == 3:
48 unicode=str # unicode function does not exist
49
50__version__= TOOLSVERSION
51
52
55_XMLPROLOGUE = u"<?xml version='1.0' encoding='UTF-8'?>\n"
56
57
65UNIXPERMS = 2175008768
66
67IS_FILENAME = 0
68IS_IMAGE = 1
69# We need at least Python 2.2
70assert sys.version_info[0]>=2 and sys.version_info[1] >= 2
71
72#sys.setrecursionlimit(100)
73#The recursion limit is set conservative so mistakes like
74# s=content() s.addElement(s) won't eat up too much processor time.
75
76
79odmimetypes = {
80 u'application/vnd.oasis.opendocument.text': u'.odt',
81 u'application/vnd.oasis.opendocument.text-template': u'.ott',
82 u'application/vnd.oasis.opendocument.graphics': u'.odg',
83 u'application/vnd.oasis.opendocument.graphics-template': u'.otg',
84 u'application/vnd.oasis.opendocument.presentation': u'.odp',
85 u'application/vnd.oasis.opendocument.presentation-template': u'.otp',
86 u'application/vnd.oasis.opendocument.spreadsheet': u'.ods',
87 u'application/vnd.oasis.opendocument.spreadsheet-template': u'.ots',
88 u'application/vnd.oasis.opendocument.chart': u'.odc',
89 u'application/vnd.oasis.opendocument.chart-template': u'.otc',
90 u'application/vnd.oasis.opendocument.image': u'.odi',
91 u'application/vnd.oasis.opendocument.image-template': u'.oti',
92 u'application/vnd.oasis.opendocument.formula': u'.odf',
93 u'application/vnd.oasis.opendocument.formula-template': u'.otf',
94 u'application/vnd.oasis.opendocument.text-master': u'.odm',
95 u'application/vnd.oasis.opendocument.text-web': u'.oth',
96}
97
98
100
102
107
108 def __init__(self, filename, mediatype, content=None):
109 assert(type(filename)==type(u""))
110 assert(type(mediatype)==type(u""))
111 assert(type(content)==type(b"") or content == None)
112
113 self.mediatype = mediatype
114 self.filename = filename
115 self.content = content
116
117
122
124 thumbnail = None
125
126
130
131 def __init__(self, mimetype, add_generator=True):
132 assert(type(mimetype)==type(u""))
133 assert(isinstance(add_generator,True.__class__))
134
135 self.mimetype = mimetype
136 self.childobjects = []
137 self._extra_extra = []
138 self.folder = u"" # Always empty for toplevel documents
139 self.topnode = Document(mimetype=self.mimetype)
140 self.topnode.ownerDocument = self
141
142 self.clear_caches()
143
144 self.Pictures = {}
145 self.meta = Meta()
146 self.topnode.addElement(self.meta)
147 if add_generator:
148 self.meta.addElement(meta.Generator(text=TOOLSVERSION))
150 self.topnode.addElement(self.scripts)
152 self.topnode.addElement(self.fontfacedecls)
154 self.topnode.addElement(self.settings)
155 self.styles = Styles()
156 self.topnode.addElement(self.styles)
158 self.topnode.addElement(self.automaticstyles)
160 self.topnode.addElement(self.masterstyles)
161 self.body = Body()
162 self.topnode.addElement(self.body)
163
164 def rebuild_caches(self, node=None):
165 if node is None: node = self.topnode
166 self.build_caches(node)
167 for e in node.childNodes:
168 if e.nodeType == element.Node.ELEMENT_NODE:
169 self.rebuild_caches(e)
170
171
173
179
182
183 def build_caches(self, elt):
184 # assert(isinstance(elt, element.Element))
185 # why do I need this more intricated assertion?
186 # with Python3, the type of elt pops out as odf.element.Element
187 # in one test ???
188 import odf.element
189 assert(isinstance(elt, element.Element) or isinstance(elt, odf.element.Element) )
190
191 if elt.qname not in self.element_dictelement_dict:
192 self.element_dictelement_dict[elt.qname] = []
193 self.element_dictelement_dict[elt.qname].append(elt)
194 if elt.qname == (STYLENS, u'style'):
195 self.__register_stylename(elt) # Add to style dictionary
196 styleref = elt.getAttrNS(TEXTNS,u'style-name')
197 if styleref is not None and styleref in self._styles_ooo_fix:
198 elt.setAttrNS(TEXTNS,u'style-name', self._styles_ooo_fix[styleref])
199
200
203
204 def remove_from_caches(self, elt):
205 # See remark in build_caches about the following assertion
206 import odf.element
207 assert(isinstance(elt, element.Element) or isinstance(elt, odf.element.Element))
208
209 self.element_dictelement_dict[elt.qname].remove(elt)
210 for e in elt.childNodes:
211 if e.nodeType == element.Node.ELEMENT_NODE:
212 self.remove_from_caches(e)
213
214 if elt.qname == (STYLENS, u'style'):
215 del self._styles_dict_styles_dict[elt.getAttrNS(STYLENS, u'name')]
216
217
224
225 def __register_stylename(self, elt):
226 assert(isinstance(elt, element.Element))
227
228 name = elt.getAttrNS(STYLENS, u'name')
229 if name is None:
230 return
231 if elt.parentNode.qname in ((OFFICENS,u'styles'), (OFFICENS,u'automatic-styles')):
233 newname = u'M'+name # Rename style
234 self._styles_ooo_fix[name] = newname
235 # From here on all references to the old name will refer to the new one
236 name = newname
237 elt.setAttrNS(STYLENS, u'name', name)
238 self._styles_dict_styles_dict[name] = elt
239
240
248
249 def toXml(self, filename=u''):
250 assert(type(filename)==type(u""))
251
252 result=None
253 xml=StringIO()
254 if sys.version_info[0]==2:
255 xml.write(_XMLPROLOGUE)
256 else:
257 xml.write(_XMLPROLOGUE)
258 self.body.toXml(0, xml)
259 if not filename:
260 result=xml.getvalue()
261 else:
262 f=codecs.open(filename,'w', encoding='utf-8')
263 f.write(xml.getvalue())
264 f.close()
265 return result
266
267
270
271 def xml(self):
272 self.__replaceGenerator()
273 xml=StringIO()
274 if sys.version_info[0]==2:
275 xml.write(_XMLPROLOGUE)
276 else:
277 xml.write(_XMLPROLOGUE)
278 self.topnode.toXml(0, xml)
279 return xml.getvalue().encode("utf-8")
280
281
282
285
286 def contentxml(self):
287 xml=StringIO()
288 xml.write(_XMLPROLOGUE)
289 x = DocumentContent()
290 x.write_open_tag(0, xml)
291 if self.scripts.hasChildNodes():
292 self.scripts.toXml(1, xml)
293 if self.fontfacedecls.hasChildNodes():
294 self.fontfacedecls.toXml(1, xml)
295 a = AutomaticStyles()
296 stylelist = self._used_auto_styles([self.styles, self.automaticstyles, self.body])
297 if len(stylelist) > 0:
298 a.write_open_tag(1, xml)
299 for s in stylelist:
300 s.toXml(2, xml)
301 a.write_close_tag(1, xml)
302 else:
303 a.toXml(1, xml)
304 self.body.toXml(1, xml)
305 x.write_close_tag(0, xml)
306 return xml.getvalue().encode("utf-8")
307
308
314
315 def __manifestxml(self):
316 xml=StringIO()
317 xml.write(_XMLPROLOGUE)
318 self.manifest.toXml(0,xml)
319 result=xml.getvalue()
320 assert(type(result)==type(u""))
321 return result
322
323
326
327 def metaxml(self):
328 self.__replaceGenerator()
329 x = DocumentMeta()
330 x.addElement(self.meta)
331 xml=StringIO()
332 xml.write(_XMLPROLOGUE)
333 x.toXml(0,xml)
334 result=xml.getvalue()
335 assert(type(result)==type(u""))
336 return result
337
338
341
342 def settingsxml(self):
343 x = DocumentSettings()
344 x.addElement(self.settings)
345 xml=StringIO()
346 if sys.version_info[0]==2:
347 xml.write(_XMLPROLOGUE)
348 else:
349 xml.write(_XMLPROLOGUE)
350 x.toXml(0,xml)
351 result=xml.getvalue()
352 assert(type(result)==type(u""))
353 return result
354
355
362
363 def _parseoneelement(self, top, stylenamelist):
364 for e in top.childNodes:
365 if e.nodeType == element.Node.ELEMENT_NODE:
366 for styleref in (
367 (CHARTNS,u'style-name'),
368 (DRAWNS,u'style-name'),
369 (DRAWNS,u'text-style-name'),
370 (PRESENTATIONNS,u'style-name'),
371 (STYLENS,u'data-style-name'),
372 (STYLENS,u'list-style-name'),
373 (STYLENS,u'page-layout-name'),
374 (STYLENS,u'style-name'),
375 (TABLENS,u'default-cell-style-name'),
376 (TABLENS,u'style-name'),
377 (TEXTNS,u'style-name') ):
378 if e.getAttrNS(styleref[0],styleref[1]):
379 stylename = e.getAttrNS(styleref[0],styleref[1])
380 if stylename not in stylenamelist:
381 # due to the polymorphism of e.getAttrNS(),
382 # a unicode type is enforced for elements
383 stylenamelist.append(unicode(stylename))
384 stylenamelist = self._parseoneelement(e, stylenamelist)
385 return stylenamelist
386
387
396
397 def _used_auto_styles(self, segments):
398 stylenamelist = []
399 for top in segments:
400 stylenamelist = self._parseoneelement(top, stylenamelist)
401 stylelist = []
402 for e in self.automaticstyles.childNodes:
403 if isinstance(e, element.Element) and e.getAttrNS(STYLENS,u'name') in stylenamelist:
404 stylelist.append(e)
405
406 # check the type of the returned data
407 ok=True
408 for e in stylelist: ok = ok and isinstance(e, element.Element)
409 assert(ok)
410
411 return stylelist
412
413
416
417 def stylesxml(self):
418 xml=StringIO()
419 xml.write(_XMLPROLOGUE)
420 x = DocumentStyles()
421 x.write_open_tag(0, xml)
422 if self.fontfacedecls.hasChildNodes():
423 self.fontfacedecls.toXml(1, xml)
424 self.styles.toXml(1, xml)
425 a = AutomaticStyles()
426 a.write_open_tag(1, xml)
427 for s in self._used_auto_styles([self.masterstyles]):
428 s.toXml(2, xml)
429 a.write_close_tag(1, xml)
430 if self.masterstyles.hasChildNodes():
431 self.masterstyles.toXml(1, xml)
432 x.write_close_tag(0, xml)
433 result = xml.getvalue()
434
435 assert(type(result)==type(u""))
436
437 return result
438
439
449
450 def addPicture(self, filename, mediatype=None, content=None):
451 if content is None:
452 if mediatype is None:
453 mediatype, encoding = mimetypes.guess_type(filename)
454 if mediatype is None:
455 mediatype = u''
456 try: ext = filename[filename.rindex(u'.'):]
457 except: ext=u''
458 else:
459 ext = mimetypes.guess_extension(mediatype)
460 manifestfn = u"Pictures/%s%s" % (uuid.uuid4().hex.upper(), ext)
461 self.Pictures[manifestfn] = (IS_FILENAME, filename, mediatype)
462 content=b"" # this value is only use by the assert further
463 filename=u"" # this value is only use by the assert further
464 else:
465 manifestfn = filename
466 self.Pictures[manifestfn] = (IS_IMAGE, content, mediatype)
467
468 assert(type(filename)==type(u""))
469 assert(type(content) == type(b""))
470
471 return manifestfn
472
473
482
483 def addPictureFromFile(self, filename, mediatype=None):
484 if mediatype is None:
485 mediatype, encoding = mimetypes.guess_type(filename)
486 if mediatype is None:
487 mediatype = u''
488 try: ext = filename[filename.rindex(u'.'):]
489 except ValueError: ext=u''
490 else:
491 ext = mimetypes.guess_extension(mediatype)
492 manifestfn = u"Pictures/%s%s" % (uuid.uuid4().hex.upper(), ext)
493 self.Pictures[manifestfn] = (IS_FILENAME, filename, mediatype)
494
495 assert(type(filename)==type(u""))
496 assert(type(mediatype)==type(u""))
497
498 return manifestfn
499
500
509
510 def addPictureFromString(self, content, mediatype):
511 assert(type(content)==type(b""))
512 assert(type(mediatype)==type(u""))
513
514 ext = mimetypes.guess_extension(mediatype)
515 manifestfn = u"Pictures/%s%s" % (uuid.uuid4().hex.upper(), ext)
516 self.Pictures[manifestfn] = (IS_IMAGE, content, mediatype)
517 return manifestfn
518
519
523
524 def addThumbnail(self, filecontent=None):
525 assert(type(filecontent)==type(b""))
526
527 if filecontent is None:
528 import thumbnail
529 self.thumbnail = thumbnail.thumbnail()
530 else:
531 self.thumbnail = filecontent
532
533
539
540 def addObject(self, document, objectname=None):
541 assert(isinstance(document, OpenDocument))
542 assert(type(objectname)==type(u"") or objectname == None)
543
544 self.childobjects.append(document)
545 if objectname is None:
546 document.folder = u"%s/Object %d" % (self.folder, len(self.childobjects))
547 else:
548 document.folder = objectname
549 return u".%s" % document.folder
550
551
557
558 def _savePictures(self, anObject, folder):
559 assert(isinstance(anObject, OpenDocument))
560 assert(type(folder)==type(u""))
561
562 hasPictures = False
563 for arcname, picturerec in anObject.Pictures.items():
564 what_it_is, fileobj, mediatype = picturerec
565 self.manifest.addElement(manifest.FileEntry(fullpath=u"%s%s" % ( folder ,arcname), mediatype=mediatype))
566 hasPictures = True
567 if what_it_is == IS_FILENAME:
568 self._z.write(fileobj, folder + arcname, zipfile.ZIP_STORED)
569 else:
570 zi = zipfile.ZipInfo(str(arcname), self._now)
571 zi.compress_type = zipfile.ZIP_STORED
572 zi.external_attr = UNIXPERMS
573 self._z.writestr(zi, fileobj)
574 # According to section 17.7.3 in ODF 1.1, the pictures folder should not have a manifest entry
575# if hasPictures:
576# self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder, mediatype=""))
577 # Look in subobjects
578 subobjectnum = 1
579 for subobject in anObject.childobjects:
580 self._savePictures(subobject, u'%sObject %d/' % (folder, subobjectnum))
581 subobjectnum += 1
582
583
590
591 def __replaceGenerator(self):
592 for m in self.meta.childNodes[:]:
593 if hasattr(m,'qname') and m.qname == (METANS, u'generator'):
594 self.meta.removeChild(m)
595 self.meta.addElement(meta.Generator(text=TOOLSVERSION))
596
597
604
605 def save(self, outputfile, addsuffix=False):
606
607 if outputfile == u'-':
608 outputfp = zipfile.ZipFile(sys.stdout,"w")
609 else:
610 if addsuffix:
611 outputfile = outputfile + odmimetypes.get(self.mimetype,u'.xxx')
612 outputfp = zipfile.ZipFile(outputfile, "w")
613 self.__zipwrite(outputfp)
614 outputfp.close()
615
616
620
621 def write(self, outputfp):
622 zipoutputfp = zipfile.ZipFile(outputfp,"w")
623 self.__zipwrite(zipoutputfp)
624
625
631
632 def __zipwrite(self, outputfp):
633 assert(isinstance(outputfp, zipfile.ZipFile))
634
635 self._z = outputfp
636 self._now = time.localtime()[:6]
637 self.manifest = manifest.Manifest()
638
639 # Write mimetype
640 zi = zipfile.ZipInfo('mimetype', self._now)
641 zi.compress_type = zipfile.ZIP_STORED
642 zi.external_attr = UNIXPERMS
643 self._z.writestr(zi, self.mimetype.encode("utf-8"))
644
645 self._saveXmlObjects(self,u"")
646
647 # Write pictures
648 self._savePictures(self,u"")
649
650 # Write the thumbnail
651 if self.thumbnail is not None:
652 self.manifest.addElement(manifest.FileEntry(fullpath=u"Thumbnails/", mediatype=u''))
653 self.manifest.addElement(manifest.FileEntry(fullpath=u"Thumbnails/thumbnail.png", mediatype=u''))
654 zi = zipfile.ZipInfo(u"Thumbnails/thumbnail.png", self._now)
655 zi.compress_type = zipfile.ZIP_DEFLATED
656 zi.external_attr = UNIXPERMS
657 self._z.writestr(zi, self.thumbnail)
658
659 # Write any extra files
660 for op in self._extra_extra:
661 if op.filename == u"META-INF/documentsignatures.xml": continue # Don't save signatures
662 self.manifest.addElement(manifest.FileEntry(fullpath=op.filename, mediatype=op.mediatype))
663 if sys.version_info[0]==3:
664 zi = zipfile.ZipInfo(op.filename, self._now)
665 else:
666 zi = zipfile.ZipInfo(op.filename.encode('utf-8'), self._now)
667 zi.compress_type = zipfile.ZIP_DEFLATED
668 zi.external_attr = UNIXPERMS
669 if op.content is not None:
670 self._z.writestr(zi, op.content)
671 # Write manifest
672 zi = zipfile.ZipInfo(u"META-INF/manifest.xml", self._now)
673 zi.compress_type = zipfile.ZIP_DEFLATED
674 zi.external_attr = UNIXPERMS
675 self._z.writestr(zi, self.__manifestxml() )
676 del self._z
677 del self._now
678 del self.manifest
679
680
681
687
688 def _saveXmlObjects(self, anObject, folder):
689 assert(isinstance(anObject, OpenDocument))
690 assert(type(folder)==type(u""))
691
692 if self == anObject:
693 self.manifest.addElement(manifest.FileEntry(fullpath=u"/", mediatype=anObject.mimetype))
694 else:
695 self.manifest.addElement(manifest.FileEntry(fullpath=folder, mediatype=anObject.mimetype))
696 # Write styles
697 self.manifest.addElement(manifest.FileEntry(fullpath=u"%sstyles.xml" % folder, mediatype=u"text/xml"))
698 zi = zipfile.ZipInfo(u"%sstyles.xml" % folder, self._now)
699 zi.compress_type = zipfile.ZIP_DEFLATED
700 zi.external_attr = UNIXPERMS
701 self._z.writestr(zi, anObject.stylesxml().encode("utf-8") )
702
703 # Write content
704 self.manifest.addElement(manifest.FileEntry(fullpath=u"%scontent.xml" % folder, mediatype=u"text/xml"))
705 zi = zipfile.ZipInfo(u"%scontent.xml" % folder, self._now)
706 zi.compress_type = zipfile.ZIP_DEFLATED
707 zi.external_attr = UNIXPERMS
708 self._z.writestr(zi, anObject.contentxml() )
709
710 # Write settings
711 if anObject.settings.hasChildNodes():
712 self.manifest.addElement(manifest.FileEntry(fullpath=u"%ssettings.xml" % folder, mediatype=u"text/xml"))
713 zi = zipfile.ZipInfo(u"%ssettings.xml" % folder, self._now)
714 zi.compress_type = zipfile.ZIP_DEFLATED
715 zi.external_attr = UNIXPERMS
716 self._z.writestr(zi, anObject.settingsxml().encode("utf-8") )
717
718 # Write meta
719 if self == anObject:
720 self.manifest.addElement(manifest.FileEntry(fullpath=u"meta.xml", mediatype=u"text/xml"))
721 zi = zipfile.ZipInfo(u"meta.xml", self._now)
722 zi.compress_type = zipfile.ZIP_DEFLATED
723 zi.external_attr = UNIXPERMS
724 self._z.writestr(zi, anObject.metaxml().encode("utf-8") )
725
726 # Write subobjects
727 subobjectnum = 1
728 for subobject in anObject.childobjects:
729 self._saveXmlObjects(subobject, u'%sObject %d/' % (folder, subobjectnum))
730 subobjectnum += 1
731
732# Document's DOM methods
733
738
739 def createElement(self, elt):
740 assert(isinstance(elt, element.Element))
741
742 # this old code is ambiguous: is 'element' the module or is it the
743 # local variable? To disambiguate this, the local variable has been
744 # renamed to 'elt'
745 #return element(check_grammar=False)
746 return elt(check_grammar=False)
747
748
752
753 def createTextNode(self, data):
754 assert(type(data)==type(u""))
755
756 return element.Text(data)
757
758
762
763 def createCDATASection(self, data):
764 assert(type(data)==type(u""))
765
766 return element.CDATASection(cdata)
767
768
771
772 def getMediaType(self):
773 assert (type(self.mimetype)==type(u""))
774
775 return self.mimetype
776
777
781
782 def getStyleByName(self, name):
783 assert(type(name)==type(u""))
784
785 ncname = make_NCName(name)
786 if self._styles_dict_styles_dict == {}:
787 self.rebuild_caches()
788 result=self._styles_dict_styles_dict.get(ncname, None)
789
790 assert(isinstance(result, element.Element))
791 return result
792
793
798
799 def getElementsByType(self, elt):
800 import types
801 assert(isinstance (elt, types.FunctionType))
802
803 obj = elt(check_grammar=False)
804 assert (isinstance(obj, element.Element))
805
806 if self.element_dictelement_dict == {}:
807 self.rebuild_caches()
808
809 # This previous code was ambiguous
810 # was "element" the module name or the local variable?
811 # the local variable is renamed to "elt" to disambiguate the code
812 #return self.element_dict.get(obj.qname, [])
813
814 result=self.element_dictelement_dict.get(obj.qname, [])
815
816 ok=True
817 for e in result: ok = ok and isinstance(e, element.Element)
818 assert(ok)
819
820 return result
821
822# Convenience functions
823
826
828 doc = OpenDocument(u'application/vnd.oasis.opendocument.chart')
829 doc.chart = Chart()
830 doc.body.addElement(doc.chart)
831 return doc
832
833
836
838 doc = OpenDocument(u'application/vnd.oasis.opendocument.graphics')
839 doc.drawing = Drawing()
840 doc.body.addElement(doc.drawing)
841 return doc
842
843
846
848 doc = OpenDocument(u'application/vnd.oasis.opendocument.image')
849 doc.image = Image()
850 doc.body.addElement(doc.image)
851 return doc
852
853
856
858 doc = OpenDocument(u'application/vnd.oasis.opendocument.presentation')
859 doc.presentation = Presentation()
860 doc.body.addElement(doc.presentation)
861 return doc
862
863
866
868 doc = OpenDocument(u'application/vnd.oasis.opendocument.spreadsheet')
869 doc.spreadsheet = Spreadsheet()
870 doc.body.addElement(doc.spreadsheet)
871 return doc
872
873
876
878 doc = OpenDocument(u'application/vnd.oasis.opendocument.text')
879 doc.text = Text()
880 doc.body.addElement(doc.text)
881 return doc
882
883
886
888 doc = OpenDocument(u'application/vnd.oasis.opendocument.text-master')
889 doc.text = Text()
890 doc.body.addElement(doc.text)
891 return doc
892
893
901
902def __loadxmlparts(z, manifest, doc, objectpath):
903 assert(isinstance(z, zipfile.ZipFile))
904 assert(type(manifest)==type(dict()))
905 assert(isinstance(doc, OpenDocument))
906 assert(type(objectpath)==type(u""))
907
908 from odf.load import LoadParser
909 from defusedxml.sax import make_parser
910 from xml.sax import handler
911
912 for xmlfile in (objectpath+u'settings.xml', objectpath+u'meta.xml', objectpath+u'content.xml', objectpath+u'styles.xml'):
913 if xmlfile not in manifest:
914 continue
915
918 from xml.sax._exceptions import SAXParseException
919
920 try:
921 xmlpart = z.read(xmlfile).decode("utf-8")
922 doc._parsing = xmlfile
923
924 parser = make_parser()
925 parser.setFeature(handler.feature_namespaces, 1)
926 parser.setFeature(handler.feature_external_ges, 0)
927 parser.setContentHandler(LoadParser(doc))
928 parser.setErrorHandler(handler.ErrorHandler())
929
930 inpsrc = InputSource()
931
936 xmlpart=__fixXmlPart(xmlpart)
937
938 inpsrc.setByteStream(BytesIO(xmlpart.encode("utf-8")))
939 parser.parse(inpsrc)
940 del doc._parsing
941 except KeyError as v: pass
942 except SAXParseException:
943 print (u"====== SAX FAILED TO PARSE ==========\n", xmlpart)
944
945
953
954def __fixXmlPart(xmlpart):
955 result=xmlpart
956 requestedPrefixes = (u'meta', u'config', u'dc', u'style',
957 u'svg', u'fo',u'draw', u'table',u'form')
958 for prefix in requestedPrefixes:
959 if u' xmlns:{prefix}'.format(prefix=prefix) not in xmlpart:
960
966 try:
967 pos=result.index(u" xmlns:")
968 toInsert=u' xmlns:{prefix}="urn:oasis:names:tc:opendocument:xmlns:{prefix}:1.0"'.format(prefix=prefix)
969 result=result[:pos]+toInsert+result[pos:]
970 except:
971 pass
972 return result
973
974
975
982
983def __detectmimetype(zipfd, odffile):
984 assert(isinstance(zipfd, zipfile.ZipFile))
985
986 try:
987 mimetype = zipfd.read('mimetype').decode("utf-8")
988 return mimetype
989 except:
990 pass
991 # Fall-through to next mechanism
992 manifestpart = zipfd.read('META-INF/manifest.xml')
993 manifest = manifestlist(manifestpart)
994 for mentry,mvalue in manifest.items():
995 if mentry == "/":
996 assert(type(mvalue['media-type'])==type(u""))
997 return mvalue['media-type']
998
999 # Fall-through to last mechanism
1000 return u'application/vnd.oasis.opendocument.text'
1001
1002
1007
1008def load(odffile):
1009 z = zipfile.ZipFile(odffile)
1010 mimetype = __detectmimetype(z, odffile)
1011 doc = OpenDocument(mimetype, add_generator=False)
1012
1013 # Look in the manifest file to see if which of the four files there are
1014 manifestpart = z.read('META-INF/manifest.xml')
1015 manifest = manifestlist(manifestpart)
1016 __loadxmlparts(z, manifest, doc, u'')
1017 for mentry,mvalue in manifest.items():
1018 if mentry[:9] == u"Pictures/" and len(mentry) > 9:
1019 doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry))
1020 elif mentry == u"Thumbnails/thumbnail.png":
1021 doc.addThumbnail(z.read(mentry))
1022 elif mentry in (u'settings.xml', u'meta.xml', u'content.xml', u'styles.xml'):
1023 pass
1024 # Load subobjects into structure
1025 elif mentry[:7] == u"Object " and len(mentry) < 11 and mentry[-1] == u"/":
1026 subdoc = OpenDocument(mvalue['media-type'], add_generator=False)
1027 doc.addObject(subdoc, u"/" + mentry[:-1])
1028 __loadxmlparts(z, manifest, subdoc, mentry)
1029 elif mentry[:7] == u"Object ":
1030 pass # Don't load subobjects as opaque objects
1031 else:
1032 if mvalue['full-path'][-1] == u'/':
1033 doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], None))
1034 else:
1035 doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], z.read(mentry)))
1036 # Add the SUN junk here to the struct somewhere
1037 # It is cached data, so it can be out-of-date
1038 z.close()
1039 b = doc.getElementsByType(Body)
1040 if mimetype[:39] == u'application/vnd.oasis.opendocument.text':
1041 doc.text = b[0].firstChild
1042 elif mimetype[:43] == u'application/vnd.oasis.opendocument.graphics':
1043 doc.graphics = b[0].firstChild
1044 elif mimetype[:47] == u'application/vnd.oasis.opendocument.presentation':
1045 doc.presentation = b[0].firstChild
1046 elif mimetype[:46] == u'application/vnd.oasis.opendocument.spreadsheet':
1047 doc.spreadsheet = b[0].firstChild
1048 elif mimetype[:40] == u'application/vnd.oasis.opendocument.chart':
1049 doc.chart = b[0].firstChild
1050 elif mimetype[:40] == u'application/vnd.oasis.opendocument.image':
1051 doc.image = b[0].firstChild
1052 elif mimetype[:42] == u'application/vnd.oasis.opendocument.formula':
1053 doc.formula = b[0].firstChild
1054
1055 return doc
1056
1057# vim: set expandtab sw=4 :
Creates a arbitrary element and is intended to be subclassed not used on its own.
Definition element.py:361
just a record to bear a filename, a mediatype and a bytes content
__init__(self, filename, mediatype, content=None)
the constructor
createTextNode(self, data)
Method to create a text node.
getStyleByName(self, name)
Finds a style object based on the name.
metaxml(self)
Generates the meta.xml file.
clear_caches(self)
Clears internal caches.
stylesxml(self)
Generates the styles.xml file.
contentxml(self)
Generates the content.xml file.
addThumbnail(self, filecontent=None)
Add a fixed thumbnail The thumbnail in the library is big, so this is pretty useless.
_savePictures(self, anObject, folder)
saves pictures contained in an object
createElement(self, elt)
Inconvenient interface to create an element, but follows XML-DOM.
createCDATASection(self, data)
Method to create a CDATA section.
settingsxml(self)
Generates the settings.xml file.
toXml(self, filename=u'')
converts the document to a valid Xml format.
save(self, outputfile, addsuffix=False)
Save the document under the filename.
getElementsByType(self, elt)
Gets elements based on the type, which is function from text.py, draw.py etc.
_used_auto_styles(self, segments)
Loop through the masterstyles elements, and find the automatic styles that are used.
addPictureFromString(self, content, mediatype)
Add a picture from contents given as a Byte string.
_saveXmlObjects(self, anObject, folder)
save xml objects of an opendocument to some folder
_parseoneelement(self, top, stylenamelist)
Finds references to style objects in master-styles and add the style name to the style list if not al...
addObject(self, document, objectname=None)
Adds an object (subdocument).
remove_from_caches(self, elt)
Updates internal caches when an element has been removed.
__init__(self, mimetype, add_generator=True)
the constructor
build_caches(self, elt)
Builds internal caches; called from element.py.
rebuild_caches(self, node=None)
write(self, outputfp)
User API to write the ODF file to an open file descriptor Writes the ZIP format.
addPicture(self, filename, mediatype=None, content=None)
Add a picture It uses the same convention as OOo, in that it saves the picture in the zipfile in the ...
addPictureFromFile(self, filename, mediatype=None)
Add a picture It uses the same convention as OOo, in that it saves the picture in the zipfile in the ...
getMediaType(self)
Returns the media type.
AutomaticStyles(**args)
Definition office.py:32
Document(version="1.2", **args)
Definition office.py:50
Meta(**args)
Definition office.py:83
Scripts(**args)
Definition office.py:92
Body(**args)
Definition office.py:38
FontFaceDecls(**args)
Definition office.py:71
Styles(**args)
Definition office.py:101
Spreadsheet(**args)
Definition office.py:98
DocumentContent(version="1.2", **args)
Definition office.py:53
Image(**args)
Definition office.py:77
Drawing(**args)
Definition office.py:65
DocumentStyles(version="1.2", **args)
Definition office.py:62
Text(**args)
Definition office.py:104
DocumentMeta(version="1.2", **args)
Definition office.py:56
MasterStyles(**args)
Definition office.py:80
Settings(**args)
Definition office.py:95
DocumentSettings(version="1.2", **args)
Definition office.py:59
Presentation(**args)
Definition office.py:86
Chart(**args)
Definition office.py:44
OpenDocumentDrawing()
Creates a drawing document.
OpenDocumentText()
Creates a text document.
OpenDocumentPresentation()
Creates a presentation document.
OpenDocumentImage()
Creates an image document.
OpenDocumentSpreadsheet()
Creates a spreadsheet document.
OpenDocumentTextMaster()
Creates a text master document.
OpenDocumentChart()
Creates a chart document.