ODFPY 1.2.0
 
Loading...
Searching...
No Matches
odf2xhtml.py
Go to the documentation of this file.
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
4#
5# This library is free software; you can redistribute it and/or
6# modify it under the terms of the GNU Lesser General Public
7# License as published by the Free Software Foundation; either
8# version 2.1 of the License, or (at your option) any later version.
9#
10# This library is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13# Lesser General Public License for more details.
14#
15# You should have received a copy of the GNU Lesser General Public
16# License along with this library; if not, write to the Free Software
17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18#
19# Contributor(s):
20#
21#import pdb
22#pdb.set_trace()
23
24import sys, os.path
25sys.path.append(os.path.dirname(__file__))
26from xml.sax import handler
27from xml.sax.saxutils import escape, quoteattr
28from xml.dom import Node
29
30from opendocument import load
31
32from odf.namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \
33 FORMNS, MATHNS, METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, SCRIPTNS, \
34 SMILNS, STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS
35
36# Handling of styles
37#
38# First there are font face declarations. These set up a font style that will be
39# referenced from a text-property. The declaration describes the font making
40# it possible for the application to find a similar font should the system not
41# have that particular one. The StyleToCSS stores these attributes to be used
42# for the CSS2 font declaration.
43#
44# Then there are default-styles. These set defaults for various style types:
45# "text", "paragraph", "section", "ruby", "table", "table-column", "table-row",
46# "table-cell", "graphic", "presentation", "drawing-page", "chart".
47# Since CSS2 can't refer to another style, ODF2XHTML add these to all
48# styles unless overridden.
49#
50# The real styles are declared in the <style:style> element. They have a
51# family referring to the default-styles, and may have a parent style.
52#
53# Styles have scope. The same name can be used for both paragraph and
54# character etc. styles Since CSS2 has no scope we use a prefix. (Not elegant)
55# In ODF a style can have a parent, these parents can be chained.
56
57
62
63 def __init__(self):
64 # Font declarations
65 self.fontdict = {}
66
67 # Fill-images from presentations for backgrounds
68 self.fillimages = {}
69
71 (DRAWNS,u'fill-image-name'): self.c_drawfillimage,
72 (FONS,u"background-color"): self.c_fo,
73 (FONS,u"border"): self.c_fo,
74 (FONS,u"border-bottom"): self.c_fo,
75 (FONS,u"border-left"): self.c_fo,
76 (FONS,u"border-right"): self.c_fo,
77 (FONS,u"border-top"): self.c_fo,
78 (FONS,u"color"): self.c_fo,
79 (FONS,u"font-family"): self.c_fo,
80 (FONS,u"font-size"): self.c_fo,
81 (FONS,u"font-style"): self.c_fo,
82 (FONS,u"font-variant"): self.c_fo,
83 (FONS,u"font-weight"): self.c_fo,
84 (FONS,u"line-height"): self.c_fo,
85 (FONS,u"margin"): self.c_fo,
86 (FONS,u"margin-bottom"): self.c_fo,
87 (FONS,u"margin-left"): self.c_fo,
88 (FONS,u"margin-right"): self.c_fo,
89 (FONS,u"margin-top"): self.c_fo,
90 (FONS,u"min-height"): self.c_fo,
91 (FONS,u"padding"): self.c_fo,
92 (FONS,u"padding-bottom"): self.c_fo,
93 (FONS,u"padding-left"): self.c_fo,
94 (FONS,u"padding-right"): self.c_fo,
95 (FONS,u"padding-top"): self.c_fo,
96 (FONS,u"page-width"): self.c_page_width,
97 (FONS,u"page-height"): self.c_page_height,
98 (FONS,u"text-align"): self.c_text_align,
99 (FONS,u"text-indent") :self.c_fo,
100 (TABLENS,u'border-model') :self.c_border_model,
101 (STYLENS,u'column-width') : self.c_width,
102 (STYLENS,u"font-name"): self.c_fn,
103 (STYLENS,u'horizontal-pos'): self.c_hp,
104 (STYLENS,u'text-position'): self.c_text_position,
105 (STYLENS,u'text-line-through-style'): self.c_text_line_through_style,
106 (STYLENS,u'text-underline-style'): self.c_text_underline_style,
107 (STYLENS,u'width') : self.c_width,
108 # FIXME Should do style:vertical-pos here
109 }
110
111
118 def save_font(self, name, family, generic):
119 htmlgeneric = "sans-serif"
120 if generic == "roman": htmlgeneric = "serif"
121 elif generic == "swiss": htmlgeneric = "sans-serif"
122 elif generic == "modern": htmlgeneric = "monospace"
123 elif generic == "decorative": htmlgeneric = "sans-serif"
124 elif generic == "script": htmlgeneric = "monospace"
125 elif generic == "system": htmlgeneric = "serif"
126 self.fontdict[name] = (family, htmlgeneric)
127
128
132 def c_drawfillimage(self, ruleset, sdict, rule, val):
133 sdict['background-image'] = "url('%s')" % self.fillimages[val]
134
135
136 def c_fo(self, ruleset, sdict, rule, val):
137 selector = rule[1]
138 sdict[selector] = val
139
140
141 def c_border_model(self, ruleset, sdict, rule, val):
142 if val == 'collapsing':
143 sdict['border-collapse'] ='collapse'
144 else:
145 sdict['border-collapse'] ='separate'
146
147
148 def c_width(self, ruleset, sdict, rule, val):
149 sdict['width'] = val
150
151
152 def c_text_align(self, ruleset, sdict, rule, align):
153 if align == "start": align = "left"
154 if align == "end": align = "right"
155 sdict['text-align'] = align
156
157
161 def c_fn(self, ruleset, sdict, rule, fontstyle):
162 generic = ruleset.get((STYLENS,'font-family-generic') )
163 if generic is not None:
164 self.save_font(fontstyle, fontstyle, generic)
165 family, htmlgeneric = self.fontdict.get(fontstyle, (fontstyle, 'serif'))
166 sdict['font-family'] = '%s, %s' % (family, htmlgeneric)
167
168
186 def c_text_position(self, ruleset, sdict, rule, tp):
187 textpos = tp.split(' ')
188 if len(textpos) == 2 and textpos[0] != "0%":
189 # Bug in OpenOffice. If vertical-align is 0% - ignore the text size.
190 sdict['font-size'] = textpos[1]
191 if textpos[0] == "super":
192 sdict['vertical-align'] = "33%"
193 elif textpos[0] == "sub":
194 sdict['vertical-align'] = "-33%"
195 else:
196 sdict['vertical-align'] = textpos[0]
197
198 def c_hp(self, ruleset, sdict, rule, hpos):
199 #FIXME: Frames wrap-style defaults to 'parallel', graphics to 'none'.
200 # It is properly set in the parent-styles, but the program doesn't
201 # collect the information.
202 wrap = ruleset.get((STYLENS,'wrap'),'parallel')
203 # Can have: from-left, left, center, right, from-inside, inside, outside
204 if hpos == "center":
205 sdict['margin-left'] = "auto"
206 sdict['margin-right'] = "auto"
207# else:
208# # force it to be *something* then delete it
209# sdict['margin-left'] = sdict['margin-right'] = ''
210# del sdict['margin-left'], sdict['margin-right']
211
212 if hpos in ("right","outside"):
213 if wrap in ( "left", "parallel","dynamic"):
214 sdict['float'] = "right"
215 elif wrap == "run-through":
216 sdict['position'] = "absolute" # Simulate run-through
217 sdict['top'] = "0"
218 sdict['right'] = "0";
219 else: # No wrapping
220 sdict['margin-left'] = "auto"
221 sdict['margin-right'] = "0cm"
222 elif hpos in ("left", "inside"):
223 if wrap in ( "right", "parallel","dynamic"):
224 sdict['float'] = "left"
225 elif wrap == "run-through":
226 sdict['position'] = "absolute" # Simulate run-through
227 sdict['top'] = "0"
228 sdict['left'] = "0"
229 else: # No wrapping
230 sdict['margin-left'] = "0cm"
231 sdict['margin-right'] = "auto"
232 elif hpos in ("from-left", "from-inside"):
233 if wrap in ( "right", "parallel"):
234 sdict['float'] = "left"
235 else:
236 sdict['position'] = "relative" # No wrapping
237 if (SVGNS,'x') in ruleset:
238 sdict['left'] = ruleset[(SVGNS,'x')]
239
240
243 def c_page_width(self, ruleset, sdict, rule, val):
244 sdict['width'] = val
245
246
249 def c_text_underline_style(self, ruleset, sdict, rule, val):
250 if val and val != "none":
251 sdict['text-decoration'] = "underline"
252
253
256 def c_text_line_through_style(self, ruleset, sdict, rule, val):
257 if val and val != "none":
258 sdict['text-decoration'] = "line-through"
259
260
261 def c_page_height(self, ruleset, sdict, rule, val):
262 sdict['height'] = val
263
264
267 def convert_styles(self, ruleset):
268 sdict = {}
269 procedures=[]
270 for rule,val in ruleset.items():
271 if rule[0] == '':
272 sdict[rule[1]] = val
273 continue
274 method = self.ruleconversions.get(rule, None )
275 if method:
276 procedures.append([method, ruleset, sdict, rule, val])
277 # this ensures that the procedures for horizontal position
278 # are run last! It is important since Python3 makes the order
279 # of dictionaries unpredictable
280 for p in filter(lambda x: x[0] != self.c_hp, procedures):
281 method, ruleset, sdict, rule, val = p
282 method(ruleset, sdict, rule, val)
283 for p in filter(lambda x: x[0] == self.c_hp, procedures):
284 method, ruleset, sdict, rule, val = p
285 method(ruleset, sdict, rule, val)
286
287 return sdict
288
289
291 def __init__(self):
292 self.stackstack = []
293
294 def push(self, tag, attrs):
295 self.stackstack.append( (tag, attrs) )
296
297 def pop(self):
298 item = self.stackstack.pop()
299 return item
300
301 def stackparent(self):
302 item = self.stackstack[-1]
303 return item[1]
304
305
306 def rfindattr(self, attr):
307 for tag, attrs in self.stackstack:
308 if attr in attrs:
309 return attrs[attr]
310 return None
311 def count_tags(self, tag):
312 c = 0
313 for ttag, tattrs in self.stackstack:
314 if ttag == tag: c = c + 1
315 return c
316
317special_styles = {
318 'S-Emphasis':'em',
319 'S-Citation':'cite',
320 'S-Strong_20_Emphasis':'strong',
321 'S-Variable':'var',
322 'S-Definition':'dfn',
323 'S-Teletype':'tt',
324 'P-Heading_20_1':'h1',
325 'P-Heading_20_2':'h2',
326 'P-Heading_20_3':'h3',
327 'P-Heading_20_4':'h4',
328 'P-Heading_20_5':'h5',
329 'P-Heading_20_6':'h6',
330# 'P-Caption':'caption',
331 'P-Addressee':'address',
332# 'P-List_20_Heading':'dt',
333# 'P-List_20_Contents':'dd',
334 'P-Preformatted_20_Text':'pre',
335# 'P-Table_20_Heading':'th',
336# 'P-Table_20_Contents':'td',
337# 'P-Text_20_body':'p'
338}
339
340#-----------------------------------------------------------------------------
341#
342# ODFCONTENTHANDLER
343#
344#-----------------------------------------------------------------------------
345
346class ODF2XHTML(handler.ContentHandler):
347
348 def __init__(self, generate_css=True, embedable=False):
349 # Tags
351 self.elements = {
352 (DCNS, 'title'): (self.s_processcont, self.e_dc_title),
353 (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
354 (DCNS, 'creator'): (self.s_processcont, self.e_dc_creator),
355 (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag),
356 (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag),
357 (DRAWNS, 'custom-shape'): (self.s_custom_shape, self.e_custom_shape),
358 (DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame),
359 (DRAWNS, 'image'): (self.s_draw_image, None),
360 (DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
361 (DRAWNS, "layer-set"):(self.s_ignorexml, None),
362 (DRAWNS, 'object'): (self.s_draw_object, None),
363 (DRAWNS, 'object-ole'): (self.s_draw_object_ole, None),
364 (DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
365 (DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox),
366 (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
367 (METANS, 'generator'):(self.s_processcont, self.e_dc_metatag),
368 (METANS, 'initial-creator'): (self.s_processcont, self.e_dc_metatag),
369 (METANS, 'keyword'): (self.s_processcont, self.e_dc_metatag),
370 (NUMBERNS, "boolean-style"):(self.s_ignorexml, None),
371 (NUMBERNS, "currency-style"):(self.s_ignorexml, None),
372 (NUMBERNS, "date-style"):(self.s_ignorexml, None),
373 (NUMBERNS, "number-style"):(self.s_ignorexml, None),
374 (NUMBERNS, "text-style"):(self.s_ignorexml, None),
375 (OFFICENS, "annotation"):(self.s_ignorexml, None),
376 (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
377 (OFFICENS, "document"):(self.s_office_document_content, self.e_office_document_content),
378 (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
379 (OFFICENS, "forms"):(self.s_ignorexml, None),
380 (OFFICENS, "master-styles"):(self.s_office_master_styles, None),
381 (OFFICENS, "meta"):(self.s_ignorecont, None),
382 (OFFICENS, "presentation"):(self.s_office_presentation, self.e_office_presentation),
383 (OFFICENS, "spreadsheet"):(self.s_office_spreadsheet, self.e_office_spreadsheet),
384 (OFFICENS, "styles"):(self.s_office_styles, None),
385 (OFFICENS, "text"):(self.s_office_text, self.e_office_text),
386 (OFFICENS, "scripts"):(self.s_ignorexml, None),
387 (OFFICENS, "settings"):(self.s_ignorexml, None),
388 (PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
389# (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
390 (STYLENS, "default-page-layout"):(self.s_ignorexml, None),
391 (STYLENS, "default-style"):(self.s_style_default_style, self.e_style_default_style),
392 (STYLENS, "drawing-page-properties"):(self.s_style_handle_properties, None),
393 (STYLENS, "font-face"):(self.s_style_font_face, None),
394# (STYLENS, "footer"):(self.s_style_footer, self.e_style_footer),
395# (STYLENS, "footer-style"):(self.s_style_footer_style, None),
396 (STYLENS, "graphic-properties"):(self.s_style_handle_properties, None),
397 (STYLENS, "handout-master"):(self.s_ignorexml, None),
398# (STYLENS, "header"):(self.s_style_header, self.e_style_header),
399# (STYLENS, "header-footer-properties"):(self.s_style_handle_properties, None),
400# (STYLENS, "header-style"):(self.s_style_header_style, None),
401 (STYLENS, "master-page"):(self.s_style_master_page, None),
402 (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None),
403 (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
404# (STYLENS, "page-layout"):(self.s_ignorexml, None),
405 (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None),
406 (STYLENS, "style"):(self.s_style_style, self.e_style_style),
407 (STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None),
408 (STYLENS, "table-column-properties"):(self.s_style_handle_properties, None),
409 (STYLENS, "table-properties"):(self.s_style_handle_properties, None),
410 (STYLENS, "text-properties"):(self.s_style_handle_properties, None),
411 (SVGNS, 'desc'): (self.s_ignorexml, None),
412 (TABLENS, 'covered-table-cell'): (self.s_ignorexml, None),
413 (TABLENS, 'table-cell'): (self.s_table_table_cell, self.e_table_table_cell),
414 (TABLENS, 'table-column'): (self.s_table_table_column, None),
415 (TABLENS, 'table-row'): (self.s_table_table_row, self.e_table_table_row),
416 (TABLENS, 'table'): (self.s_table_table, self.e_table_table),
417 (TEXTNS, 'a'): (self.s_text_a, self.e_text_a),
418 (TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source),
419 (TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None),
420 (TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source),
421 (TEXTNS, 'bookmark'): (self.s_text_bookmark, None),
422 (TEXTNS, 'bookmark-start'): (self.s_text_bookmark, None),
423 (TEXTNS, 'bookmark-ref'): (self.s_text_bookmark_ref, self.e_text_a),
424 (TEXTNS, 'bookmark-ref-start'): (self.s_text_bookmark_ref, None),
425 (TEXTNS, 'h'): (self.s_text_h, self.e_text_h),
426 (TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source),
427 (TEXTNS, 'line-break'):(self.s_text_line_break, None),
428 (TEXTNS, "linenumbering-configuration"):(self.s_ignorexml, None),
429 (TEXTNS, "list"):(self.s_text_list, self.e_text_list),
430 (TEXTNS, "list-item"):(self.s_text_list_item, self.e_text_list_item),
431 (TEXTNS, "list-level-style-bullet"):(self.s_text_list_level_style_bullet, self.e_text_list_level_style_bullet),
432 (TEXTNS, "list-level-style-number"):(self.s_text_list_level_style_number, self.e_text_list_level_style_number),
433 (TEXTNS, "list-style"):(None, None),
434 (TEXTNS, "note"):(self.s_text_note, None),
435 (TEXTNS, "note-body"):(self.s_text_note_body, self.e_text_note_body),
436 (TEXTNS, "note-citation"):(None, self.e_text_note_citation),
437 (TEXTNS, "notes-configuration"):(self.s_ignorexml, None),
438 (TEXTNS, "object-index-source"):(self.s_text_x_source, self.e_text_x_source),
439 (TEXTNS, 'p'): (self.s_text_p, self.e_text_p),
440 (TEXTNS, 's'): (self.s_text_s, None),
441 (TEXTNS, 'span'): (self.s_text_span, self.e_text_span),
442 (TEXTNS, 'tab'): (self.s_text_tab, None),
443 (TEXTNS, "table-index-source"):(self.s_text_x_source, self.e_text_x_source),
444 (TEXTNS, "table-of-content-source"):(self.s_text_x_source, self.e_text_x_source),
445 (TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
446 }
447 if embedable:
448 self.set_embedable()
449 self._resetobject()
450
451
452 def set_plain(self):
454
455
456 def set_embedable(self):
457 self.elements[(OFFICENS, u"text")] = (None,None)
458 self.elements[(OFFICENS, u"spreadsheet")] = (None,None)
459 self.elements[(OFFICENS, u"presentation")] = (None,None)
460 self.elements[(OFFICENS, u"document-content")] = (None,None)
461
462
463
466 def add_style_file(self, stylefilename, media=None):
467 self.use_internal_css = False
468 self.stylefilename = stylefilename
469 if media:
470 self.metatags.append('<link rel="stylesheet" type="text/css" href="%s" media="%s"/>\n' % (stylefilename,media))
471 else:
472 self.metatags.append('<link rel="stylesheet" type="text/css" href="%s"/>\n' % (stylefilename))
473
475 # Footnotes and endnotes
476 self.notedict = {}
478 self.notebody = ''
479
480 def _resetobject(self):
481 self.lines = []
483 self.xmlfile = ''
484 self.title = ''
485 self.language = ''
486 self.creator = ''
487 self.data = []
489 self.htmlstack = []
490 self.pstack = []
493 self.listtypes = {}
494 self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10
495 self.use_internal_css = True
496 self.cs = StyleToCSS()
498
499 # Style declarations
501 self.styledict = {}
503
504 self._resetfootnotes()
505
506 # Tags from meta.xml
507 self.metatags = []
508
509
510 def writeout(self, s):
511 if s != '':
512 self._wfunc_wfunc(s)
513
514 def writedata(self):
515 d = ''.join(self.data)
516 if d != '':
517 self.writeout(escape(d))
518
519
520 def opentag(self, tag, attrs={}, block=False):
521 self.htmlstack.append((tag,attrs,block))
522 a = []
523 for key,val in attrs.items():
524 a.append('''%s=%s''' % (key, quoteattr(val)))
525 if len(a) == 0:
526 self.writeout("<%s>" % tag)
527 else:
528 self.writeout("<%s %s>" % (tag, " ".join(a)))
529 if block == True:
530 self.writeout("\n")
531
532
533 def closetag(self, tag, block=True):
534 self.htmlstack.pop()
535 self.writeout("</%s>" % tag)
536 if block == True:
537 self.writeout("\n")
538
539 def emptytag(self, tag, attrs={}):
540 a = []
541 for key,val in attrs.items():
542 a.append('''%s=%s''' % (key, quoteattr(val)))
543 self.writeout("<%s %s/>\n" % (tag, " ".join(a)))
544
545#--------------------------------------------------
546# Interface to parser
547#--------------------------------------------------
548 def characters(self, data):
550 self.data.append(data)
551
552 def startElementNS(self, tag, qname, attrs):
555 method = self.elements.get(tag, (None, None) )[0]
556 if method:
557 self.handle_starttag(tag, method, attrs)
558 else:
559 self.unknown_starttag(tag,attrs)
560 self.tagstack.push( tag, attrs )
561
562 def endElementNS(self, tag, qname):
563 stag, attrs = self.tagstack.pop()
565 method = self.elements.get(tag, (None, None) )[1]
566 if method:
567 self.handle_endtag(tag, attrs, method)
568 else:
569 self.unknown_endtag(tag, attrs)
571
572#--------------------------------------------------
573 def handle_starttag(self, tag, method, attrs):
574 method(tag,attrs)
575
576 def handle_endtag(self, tag, attrs, method):
577 method(tag, attrs)
578
579 def unknown_starttag(self, tag, attrs):
580 pass
581
582 def unknown_endtag(self, tag, attrs):
583 pass
584
585
588 def s_ignorexml(self, tag, attrs):
589 self.processelemprocesselem = False
590
591
592 def s_ignorecont(self, tag, attrs):
593 self.processcontprocesscont = False
594
595
596 def s_processcont(self, tag, attrs):
597 self.processcontprocesscont = True
598
599
600 def classname(self, attrs):
601 c = attrs.get((TEXTNS,'style-name'),'')
602 c = c.replace(".","_")
603 return c
604
605
606 def get_anchor(self, name):
607 if name not in self.anchorsanchors:
608 self.anchorsanchors[name] = "anchor%03d" % (len(self.anchorsanchors) + 1)
609 return self.anchorsanchors.get(name)
610
611
612#--------------------------------------------------
613
614 def purgedata(self):
615 self.data = []
616
617#-----------------------------------------------------------------------------
618#
619# Handle meta data
620#
621#-----------------------------------------------------------------------------
622
624 def e_dc_title(self, tag, attrs):
625 self.title = ''.join(self.data)
626 #self.metatags.append('<title>%s</title>\n' % escape(self.title))
627 self.data = []
628
629
631 def e_dc_metatag(self, tag, attrs):
632 self.metatags.append('<meta name="%s" content=%s/>\n' % (tag[1], quoteattr(''.join(self.data))))
633 self.data = []
634
635
637 def e_dc_contentlanguage(self, tag, attrs):
638 self.language = ''.join(self.data)
639 self.metatags.append('<meta http-equiv="content-language" content="%s"/>\n' % escape(self.language))
640 self.data = []
641
642
644 def e_dc_creator(self, tag, attrs):
645 self.creator = ''.join(self.data)
646 self.metatags.append('<meta http-equiv="creator" content="%s"/>\n' % escape(self.creator))
647 self.data = []
648
649
651 def s_custom_shape(self, tag, attrs):
652 anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
653 htmltag = 'div'
654 name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
655 if name == 'G-':
656 name = "PR-" + attrs.get( (PRESENTATIONNS,'style-name'), "")
657 name = name.replace(".","_")
658 if anchor_type == "paragraph":
659 style = 'position:absolute;'
660 elif anchor_type == 'char':
661 style = "position:absolute;"
662 elif anchor_type == 'as-char':
663 htmltag = 'div'
664 style = ''
665 else:
666 style = "position: absolute;"
667 if (SVGNS,"width")in attrs:
668 style = style + "width:" + attrs[(SVGNS,"width")] + ";"
669 if (SVGNS,"height") in attrs:
670 style = style + "height:" + attrs[(SVGNS,"height")] + ";"
671 if (SVGNS,"x") in attrs:
672 style = style + "left:" + attrs[(SVGNS,"x")] + ";"
673 if (SVGNS,"y") in attrs:
674 style = style + "top:" + attrs[(SVGNS,"y")] + ";"
676 self.opentag(htmltag, {'class': name, 'style': style})
677 else:
678 self.opentag(htmltag)
679
680
682 def e_custom_shape(self, tag, attrs):
683 self.closetag('div')
684
685
687 def s_draw_frame(self, tag, attrs):
688 anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
689 htmltag = 'div'
690 name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
691 if name == 'G-':
692 name = "PR-" + attrs.get( (PRESENTATIONNS,'style-name'), "")
693 name = name.replace(".","_")
694 if anchor_type == "paragraph":
695 style = 'position:relative;'
696 elif anchor_type == 'char':
697 style = "position:relative;"
698 elif anchor_type == 'as-char':
699 htmltag = 'div'
700 style = ''
701 else:
702 style = "position:absolute;"
703 if (SVGNS,"width") in attrs:
704 style = style + "width:" + attrs[(SVGNS,"width")] + ";"
705 if (SVGNS,"height") in attrs:
706 style = style + "height:" + attrs[(SVGNS,"height")] + ";"
707 if (SVGNS,"x") in attrs:
708 style = style + "left:" + attrs[(SVGNS,"x")] + ";"
709 if (SVGNS,"y") in attrs:
710 style = style + "top:" + attrs[(SVGNS,"y")] + ";"
712 self.opentag(htmltag, {'class': name, 'style': style})
713 else:
714 self.opentag(htmltag)
715
716
718 def e_draw_frame(self, tag, attrs):
719 self.closetag('div')
720
721 def s_draw_fill_image(self, tag, attrs):
722 name = attrs.get( (DRAWNS,'name'), "NoName")
723 imghref = attrs[(XLINKNS,"href")]
724 imghref = self.rewritelink(imghref)
725 self.cs.fillimages[name] = imghref
726
727
730 def rewritelink(self, imghref):
731 return imghref
732
733
735 def s_draw_image(self, tag, attrs):
736 parent = self.tagstack.stackparent()
737 anchor_type = parent.get((TEXTNS,'anchor-type'))
738 imghref = attrs[(XLINKNS,"href")]
739 imghref = self.rewritelink(imghref)
740 htmlattrs = {'alt':"", 'src':imghref }
742 if anchor_type != "char":
743 htmlattrs['style'] = "display: block;"
744 self.emptytag('img', htmlattrs)
745
746
748 def s_draw_object(self, tag, attrs):
749 objhref = attrs[(XLINKNS,"href")]
750 # Remove leading "./": from "./Object 1" to "Object 1"
751 # objhref = objhref [2:]
752
753 # Not using os.path.join since it fails to find the file on Windows.
754 # objcontentpath = '/'.join([objhref, 'content.xml'])
755
759 for c in self.document.childobjects:
760 if c.folder == objhref:
761 self._walknode(c.topnode)
762
763
765 def s_draw_object_ole(self, tag, attrs):
766 class_id = attrs[(DRAWNS,"class-id")]
767 if class_id and class_id.lower() == "00020803-0000-0000-c000-000000000046":
768 tagattrs = { 'name':'object_ole_graph', 'class':'ole-graph' }
769 self.opentag('a', tagattrs)
770 self.closetag('a', tagattrs)
771
772
776 def s_draw_page(self, tag, attrs):
777 name = attrs.get( (DRAWNS,'name'), "NoName")
778 stylename = attrs.get( (DRAWNS,'style-name'), "")
779 stylename = stylename.replace(".","_")
780 masterpage = attrs.get( (DRAWNS,'master-page-name'),"")
781 masterpage = masterpage.replace(".","_")
783 self.opentag('fieldset', {'class':"DP-%s MP-%s" % (stylename, masterpage) })
784 else:
785 self.opentag('fieldset')
786 self.opentag('legend')
787 self.writeout(escape(name))
788 self.closetag('legend')
789
790 def e_draw_page(self, tag, attrs):
791 self.closetag('fieldset')
792
793 def s_draw_textbox(self, tag, attrs):
794 style = ''
795 if (FONS,"min-height") in attrs:
796 style = style + "min-height:" + attrs[(FONS,"min-height")] + ";"
797 self.opentag('div')
798# self.opentag('div', {'style': style})
799
800
802 def e_draw_textbox(self, tag, attrs):
803 self.closetag('div')
804
805 def html_body(self, tag, attrs):
806 self.writedata()
808 self.opentag('style', {'type':"text/css"}, True)
809 self.writeout('/*<![CDATA[*/\n')
811 self.writeout('/*]]>*/\n')
812 self.closetag('style')
813 self.purgedata()
814 self.closetag('head')
815 self.opentag('body', block=True)
816
817 default_styles = """
818img { width: 100%; height: 100%; }
819* { padding: 0; margin: 0; background-color:white; }
820body { margin: 0 1em; }
821ol, ul { padding-left: 2em; }
822"""
823
825 for name in self.stylestackstylestack:
826 styles = self.styledict.get(name)
827 # Preload with the family's default style
828 if '__style-family'in styles and styles['__style-family'] in self.styledict:
829 familystyle = self.styledict[styles['__style-family']].copy()
830 del styles['__style-family']
831 for style, val in styles.items():
832 familystyle[style] = val
833 styles = familystyle
834 # Resolve the remaining parent styles
835 while '__parent-style-name' in styles and styles['__parent-style-name'] in self.styledict:
836 parentstyle = self.styledict[styles['__parent-style-name']].copy()
837 del styles['__parent-style-name']
838 for style, val in styles.items():
839 parentstyle[style] = val
840 styles = parentstyle
841 self.styledict[name] = styles
842 # Write the styles to HTML
843 self.writeout(self.default_styles)
844 for name in self.stylestackstylestack:
845 styles = self.styledict.get(name)
846 css2 = self.cs.convert_styles(styles)
847 self.writeout("%s {\n" % name)
848 for style, val in css2.items():
849 self.writeout("\t%s: %s;\n" % (style, val) )
850 self.writeout("}\n")
851
853 if self.currentnotecurrentnote == 0:
854 return
856 self.opentag('ol', {'style':'border-top: 1px solid black'}, True)
857 else:
858 self.opentag('ol')
859 for key in range(1,self.currentnotecurrentnote+1):
860 note = self.notedict[key]
861# for key,note in self.notedict.items():
862 self.opentag('li', { 'id':"footnote-%d" % key })
863# self.opentag('sup')
864# self.writeout(escape(note['citation']))
865# self.closetag('sup', False)
866 self.writeout(note['body'])
867 self.closetag('li')
868 self.closetag('ol')
869
870 def s_office_automatic_styles(self, tag, attrs):
871 if self.xmlfile == 'styles.xml':
872 self.autoprefix = "A"
873 else:
874 self.autoprefix = ""
875
876
877 def s_office_document_content(self, tag, attrs):
878 self.writeout('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ')
879 self.writeout('"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
880 self.opentag('html', {'xmlns':"http://www.w3.org/1999/xhtml"}, True)
881 self.opentag('head', block=True)
882 self.emptytag('meta', { 'http-equiv':"Content-Type", 'content':"text/html;charset=UTF-8"})
883 for metaline in self.metatags:
884 self.writeout(metaline)
885 self.writeout('<title>%s</title>\n' % escape(self.title))
886
887
888 def e_office_document_content(self, tag, attrs):
889 self.closetag('html')
890
891 def s_office_master_styles(self, tag, attrs):
892 """ """
893
894
898 def s_office_presentation(self, tag, attrs):
899 self.styledict['p'] = {(FONS,u'font-size'): u"24pt" }
900 self.styledict['presentation'] = {(FONS,u'font-size'): u"24pt" }
901 self.html_body(tag, attrs)
902
903 def e_office_presentation(self, tag, attrs):
904 self.generate_footnotes()
905 self.closetag('body')
906
907 def s_office_spreadsheet(self, tag, attrs):
908 self.html_body(tag, attrs)
909
910 def e_office_spreadsheet(self, tag, attrs):
911 self.generate_footnotes()
912 self.closetag('body')
913
914 def s_office_styles(self, tag, attrs):
915 self.autoprefix = ""
916
917
918 def s_office_text(self, tag, attrs):
919 self.styledict['frame'] = { (STYLENS,'wrap'): u'parallel'}
920 self.html_body(tag, attrs)
921
922 def e_office_text(self, tag, attrs):
923 self.generate_footnotes()
924 self.closetag('body')
925
926
929 def s_style_handle_properties(self, tag, attrs):
930 for key,attr in attrs.items():
931 self.styledict[self.currentstylecurrentstyle][key] = attr
932
933
934 familymap = {'frame':'frame', 'paragraph':'p', 'presentation':'presentation',
935 'text':'span','section':'div',
936 'table':'table','table-cell':'td','table-column':'col',
937 'table-row':'tr','graphic':'graphic' }
938
939
941 def s_style_default_style(self, tag, attrs):
942 family = attrs[(STYLENS,'family')]
943 htmlfamily = self.familymap.get(family,'unknown')
944 self.currentstylecurrentstyle = htmlfamily
945# self.stylestack.append(self.currentstyle)
946 self.styledict[self.currentstylecurrentstyle] = {}
947
948 def e_style_default_style(self, tag, attrs):
949 self.currentstylecurrentstyle = None
950
951
957 def s_style_font_face(self, tag, attrs):
958 name = attrs[(STYLENS,"name")]
959 family = attrs[(SVGNS,"font-family")]
960 generic = attrs.get( (STYLENS,'font-family-generic'),"" )
961 self.cs.save_font(name, family, generic)
962
963 def s_style_footer(self, tag, attrs):
964 self.opentag('div', { 'id':"footer" })
965 self.purgedata()
966
967 def e_style_footer(self, tag, attrs):
968 self.writedata()
969 self.closetag('div')
970 self.purgedata()
971
972 def s_style_footer_style(self, tag, attrs):
973 self.currentstylecurrentstyle = "@print #footer"
975 self.styledict[self.currentstylecurrentstyle] = {}
976
977 def s_style_header(self, tag, attrs):
978 self.opentag('div', { 'id':"header" })
979 self.purgedata()
980
981 def e_style_header(self, tag, attrs):
982 self.writedata()
983 self.closetag('div')
984 self.purgedata()
985
986 def s_style_header_style(self, tag, attrs):
987 self.currentstylecurrentstyle = "@print #header"
989 self.styledict[self.currentstylecurrentstyle] = {}
990
991
993 def s_style_default_page_layout(self, tag, attrs):
994 self.currentstylecurrentstyle = "@page"
996 self.styledict[self.currentstylecurrentstyle] = {}
997
998
1002 def s_style_page_layout(self, tag, attrs):
1003 name = attrs[(STYLENS,'name')]
1004 name = name.replace(".","_")
1005 self.currentstylecurrentstyle = ".PL-" + name
1007 self.styledict[self.currentstylecurrentstyle] = {}
1008
1009
1011 def e_style_page_layout(self, tag, attrs):
1012 self.currentstylecurrentstyle = None
1013
1014
1016 def s_style_master_page(self, tag, attrs):
1017 name = attrs[(STYLENS,'name')]
1018 name = name.replace(".","_")
1019
1020 self.currentstylecurrentstyle = ".MP-" + name
1022 self.styledict[self.currentstylecurrentstyle] = {('','position'):'relative'}
1023 # Then load the pagelayout style if we find it
1024 pagelayout = attrs.get( (STYLENS,'page-layout-name'), None)
1025 if pagelayout:
1026 pagelayout = ".PL-" + pagelayout
1027 if pagelayout in self.styledict:
1028 styles = self.styledict[pagelayout]
1029 for style, val in styles.items():
1030 self.styledict[self.currentstylecurrentstyle][style] = val
1031 else:
1032 self.styledict[self.currentstylecurrentstyle]['__parent-style-name'] = pagelayout
1033 self.s_ignorexml(tag, attrs)
1034
1035 # Short prefixes for class selectors
1036
1039 _familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR',
1040 'text':'S', 'section':'D',
1041 'table':'T', 'table-cell':'TD', 'table-column':'TC',
1042 'table-row':'TR', 'graphic':'G' }
1043
1044
1050 def s_style_style(self, tag, attrs):
1051 name = attrs[(STYLENS,'name')]
1052 name = name.replace(".","_")
1053 family = attrs[(STYLENS,'family')]
1054 htmlfamily = self.familymap.get(family,'unknown')
1055 sfamily = self._familyshort.get(family,'X')
1056 name = "%s%s-%s" % (self.autoprefix, sfamily, name)
1057 parent = attrs.get( (STYLENS,'parent-style-name') )
1058 self.currentstylecurrentstyle = special_styles.get(name,"."+name)
1060 if self.currentstylecurrentstyle not in self.styledict:
1061 self.styledict[self.currentstylecurrentstyle] = {}
1062
1063 self.styledict[self.currentstylecurrentstyle]['__style-family'] = htmlfamily
1064
1065 # Then load the parent style if we find it
1066 if parent:
1067 parent = "%s-%s" % (sfamily, parent)
1068 parent = special_styles.get(parent, "."+parent)
1069 if parent in self.styledict:
1070 styles = self.styledict[parent]
1071 for style, val in styles.items():
1072 self.styledict[self.currentstylecurrentstyle][style] = val
1073 else:
1074 self.styledict[self.currentstylecurrentstyle]['__parent-style-name'] = parent
1075
1076
1078 def e_style_style(self, tag, attrs):
1079 self.currentstylecurrentstyle = None
1080
1081
1083 def s_table_table(self, tag, attrs):
1084 c = attrs.get( (TABLENS,'style-name'), None)
1086 c = c.replace(".","_")
1087 self.opentag('table',{ 'class': "T-%s" % c })
1088 else:
1089 self.opentag('table')
1090 self.purgedata()
1091
1092
1094 def e_table_table(self, tag, attrs):
1095 self.writedata()
1096 self.closetag('table')
1097 self.purgedata()
1098
1099
1100 def s_table_table_cell(self, tag, attrs):
1101 #FIXME: number-columns-repeated § 8.1.3
1102 #repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1))
1103 htmlattrs = {}
1104 rowspan = attrs.get( (TABLENS,'number-rows-spanned') )
1105 if rowspan:
1106 htmlattrs['rowspan'] = rowspan
1107 colspan = attrs.get( (TABLENS,'number-columns-spanned') )
1108 if colspan:
1109 htmlattrs['colspan'] = colspan
1110
1111 c = attrs.get( (TABLENS,'style-name') )
1112 if c:
1113 htmlattrs['class'] = 'TD-%s' % c.replace(".","_")
1114 self.opentag('td', htmlattrs)
1115 self.purgedata()
1116
1117
1118 def e_table_table_cell(self, tag, attrs):
1119 self.writedata()
1120 self.closetag('td')
1121 self.purgedata()
1122
1123
1124 def s_table_table_column(self, tag, attrs):
1125 c = attrs.get( (TABLENS,'style-name'), None)
1126 repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1))
1127 htmlattrs = {}
1128 if c:
1129 htmlattrs['class'] = "TC-%s" % c.replace(".","_")
1130 for x in range(repeated):
1131 self.emptytag('col', htmlattrs)
1132 self.purgedata()
1133
1134
1135 def s_table_table_row(self, tag, attrs):
1136 #FIXME: table:number-rows-repeated
1137 c = attrs.get( (TABLENS,'style-name'), None)
1138 htmlattrs = {}
1139 if c:
1140 htmlattrs['class'] = "TR-%s" % c.replace(".","_")
1141 self.opentag('tr', htmlattrs)
1142 self.purgedata()
1143
1144
1145 def e_table_table_row(self, tag, attrs):
1146 self.writedata()
1147 self.closetag('tr')
1148 self.purgedata()
1149
1150
1151 def s_text_a(self, tag, attrs):
1152 self.writedata()
1153 href = attrs[(XLINKNS,"href")].split("|")[0]
1154 if href[0] == "#":
1155 href = "#" + self.get_anchor(href[1:])
1156 self.opentag('a', {'href':href})
1157 self.purgedata()
1158
1159
1160 def e_text_a(self, tag, attrs):
1161 self.writedata()
1162 self.closetag('a', False)
1163 self.purgedata()
1164
1165
1166 def s_text_bookmark(self, tag, attrs):
1167 name = attrs[(TEXTNS,'name')]
1168 html_id = self.get_anchor(name)
1169 self.writedata()
1170 self.opentag('span', {'id':html_id})
1171 self.closetag('span', False)
1172 self.purgedata()
1173
1174
1175 def s_text_bookmark_ref(self, tag, attrs):
1176 name = attrs[(TEXTNS,'ref-name')]
1177 html_id = "#" + self.get_anchor(name)
1178 self.writedata()
1179 self.opentag('a', {'href':html_id})
1180 self.purgedata()
1181
1182
1183 def s_text_h(self, tag, attrs):
1184 level = int(attrs[(TEXTNS,'outline-level')])
1185 if level > 6: level = 6 # Heading levels go only to 6 in XHTML
1186 if level < 1: level = 1
1187 self.headinglevels[level] = self.headinglevels[level] + 1
1188 name = self.classname(attrs)
1189 for x in range(level + 1,10):
1190 self.headinglevels[x] = 0
1191 special = special_styles.get("P-"+name)
1192 if special or not self.generate_cssgenerate_cssgenerate_css:
1193 self.opentag('h%s' % level)
1194 else:
1195 self.opentag('h%s' % level, {'class':"P-%s" % name })
1196 self.purgedata()
1197
1198
1202 def e_text_h(self, tag, attrs):
1203 self.writedata()
1204 level = int(attrs[(TEXTNS,'outline-level')])
1205 if level > 6: level = 6 # Heading levels go only to 6 in XHTML
1206 if level < 1: level = 1
1207 lev = self.headinglevels[1:level+1]
1208 outline = '.'.join(map(str,lev) )
1209 heading = ''.join(self.data)
1210 if self.title == '': self.title = heading
1211 anchor = self.get_anchor("%s.%s" % ( outline, heading))
1212 self.opentag('a', {'id': anchor} )
1213 self.closetag('a', False)
1214 self.closetag('h%s' % level)
1215 self.purgedata()
1216
1217
1218 def s_text_line_break(self, tag, attrs):
1219 self.writedata()
1220 self.emptytag('br')
1221 self.purgedata()
1222
1223
1227 def s_text_list(self, tag, attrs):
1228 name = attrs.get( (TEXTNS,'style-name') )
1229 level = self.tagstack.count_tags(tag) + 1
1230 if name:
1231 name = name.replace(".","_")
1232 else:
1233 # FIXME: If a list is contained in a table cell or text box,
1234 # the list level must return to 1, even though the table or
1235 # textbox itself may be nested within another list.
1236 name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
1237 list_class = "%s_%d" % (name, level)
1239 self.opentag('%s' % self.listtypes.get(list_class,'ul'), {'class': list_class })
1240 else:
1241 self.opentag('%s' % self.listtypes.get(list_class,'ul'))
1242 self.purgedata()
1243
1244
1245 def e_text_list(self, tag, attrs):
1246 self.writedata()
1247 name = attrs.get( (TEXTNS,'style-name') )
1248 level = self.tagstack.count_tags(tag) + 1
1249 if name:
1250 name = name.replace(".","_")
1251 else:
1252 # FIXME: If a list is contained in a table cell or text box,
1253 # the list level must return to 1, even though the table or
1254 # textbox itself may be nested within another list.
1255 name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
1256 list_class = "%s_%d" % (name, level)
1257 self.closetag(self.listtypes.get(list_class,'ul'))
1258 self.purgedata()
1259
1260
1261 def s_text_list_item(self, tag, attrs):
1262 self.opentag('li')
1263 self.purgedata()
1264
1265
1266 def e_text_list_item(self, tag, attrs):
1267 self.writedata()
1268 self.closetag('li')
1269 self.purgedata()
1270
1271
1275 def s_text_list_level_style_bullet(self, tag, attrs):
1276 name = self.tagstack.rfindattr( (STYLENS,'name') )
1277 level = attrs[(TEXTNS,'level')]
1279 list_class = "%s_%s" % (name, level)
1280 self.listtypes[list_class] = 'ul'
1281 self.currentstylecurrentstyle = ".%s_%s" % ( name.replace(".","_"), level)
1283 self.styledict[self.currentstylecurrentstyle] = {}
1284
1285 level = int(level)
1286 listtype = ("square", "disc", "circle")[level % 3]
1287 self.styledict[self.currentstylecurrentstyle][('','list-style-type')] = listtype
1288
1289 def e_text_list_level_style_bullet(self, tag, attrs):
1291 del self.prevstyle
1292
1293 def s_text_list_level_style_number(self, tag, attrs):
1294 name = self.tagstack.stackparent()[(STYLENS,'name')]
1295 level = attrs[(TEXTNS,'level')]
1296 num_format = attrs.get( (STYLENS,'name'),"1")
1297 list_class = "%s_%s" % (name, level)
1299 self.currentstylecurrentstyle = ".%s_%s" % ( name.replace(".","_"), level)
1300 self.listtypes[list_class] = 'ol'
1302 self.styledict[self.currentstylecurrentstyle] = {}
1303 if num_format == "1": listtype = "decimal"
1304 elif num_format == "I": listtype = "upper-roman"
1305 elif num_format == "i": listtype = "lower-roman"
1306 elif num_format == "A": listtype = "upper-alpha"
1307 elif num_format == "a": listtype = "lower-alpha"
1308 else: listtype = "decimal"
1309 self.styledict[self.currentstylecurrentstyle][('','list-style-type')] = listtype
1310
1311 def e_text_list_level_style_number(self, tag, attrs):
1313 del self.prevstyle
1314
1315 def s_text_note(self, tag, attrs):
1316 self.writedata()
1317 self.purgedata()
1319 self.notedict[self.currentnotecurrentnote] = {}
1320 self.notebody = []
1321
1322 def e_text_note(self, tag, attrs):
1323 pass
1324
1325 def collectnote(self,s):
1326 if s != '':
1327 self.notebody.append(s)
1328
1329 def s_text_note_body(self, tag, attrs):
1331 self._wfunc_wfunc = self.collectnote
1332
1333 def e_text_note_body(self, tag, attrs):
1335 self.notedict[self.currentnotecurrentnote]['body'] = ''.join(self.notebody)
1336 self.notebody = ''
1337 del self._orgwfunc
1338
1339 def e_text_note_citation(self, tag, attrs):
1340 mark = ''.join(self.data)
1341 self.notedict[self.currentnotecurrentnote]['citation'] = mark
1342 self.opentag('a',{ 'href': "#footnote-%s" % self.currentnotecurrentnote })
1343 self.opentag('sup')
1344# self.writeout( escape(mark) )
1345 # Since HTML only knows about endnotes, there is too much risk that the
1346 # marker is reused in the source. Therefore we force numeric markers
1347 if sys.version_info[0]==3:
1349 else:
1350 self.writeout(unicode(self.currentnotecurrentnote))
1351 self.closetag('sup')
1352 self.closetag('a')
1353
1354
1356 def s_text_p(self, tag, attrs):
1357 htmlattrs = {}
1358 specialtag = "p"
1359 c = attrs.get( (TEXTNS,'style-name'), None)
1360 if c:
1361 c = c.replace(".","_")
1362 specialtag = special_styles.get("P-"+c)
1363 if specialtag is None:
1364 specialtag = 'p'
1366 htmlattrs['class'] = "P-%s" % c
1367 self.opentag(specialtag, htmlattrs)
1368 self.purgedata()
1369
1370
1372 def e_text_p(self, tag, attrs):
1373 specialtag = "p"
1374 c = attrs.get( (TEXTNS,'style-name'), None)
1375 if c:
1376 c = c.replace(".","_")
1377 specialtag = special_styles.get("P-"+c)
1378 if specialtag is None:
1379 specialtag = 'p'
1380 self.writedata()
1381 self.closetag(specialtag)
1382 self.purgedata()
1383
1384
1387 def s_text_s(self, tag, attrs):
1388 c = attrs.get( (TEXTNS,'c'),"1")
1389 for x in range(int(c)):
1390 self.writeout('&#160;')
1391
1392
1395 def s_text_span(self, tag, attrs):
1396 self.writedata()
1397 c = attrs.get( (TEXTNS,'style-name'), None)
1398 htmlattrs = {}
1399 if c:
1400 c = c.replace(".","_")
1401 special = special_styles.get("S-"+c)
1402 if special is None and self.generate_cssgenerate_cssgenerate_css:
1403 htmlattrs['class'] = "S-%s" % c
1404 self.opentag('span', htmlattrs)
1405 self.purgedata()
1406
1407
1408 def e_text_span(self, tag, attrs):
1409 self.writedata()
1410 self.closetag('span', False)
1411 self.purgedata()
1412
1413
1415 def s_text_tab(self, tag, attrs):
1416 self.writedata()
1417 self.writeout(' ')
1418 self.purgedata()
1419
1420
1422 def s_text_x_source(self, tag, attrs):
1423 self.writedata()
1424 self.purgedata()
1425 self.s_ignorexml(tag, attrs)
1426
1427
1429 def e_text_x_source(self, tag, attrs):
1430 self.writedata()
1431 self.purgedata()
1432
1433
1434#-----------------------------------------------------------------------------
1435#
1436# Reading the file
1437#
1438#-----------------------------------------------------------------------------
1439
1440
1445
1446 def load(self, odffile):
1447 assert(type(odffile)==type(u"") or 'rb' in repr(odffile) or 'BufferedReader' in repr(odffile) or 'BytesIO' in repr(odffile))
1448
1449 self.lines = []
1450 self._wfunc_wfunc = self._wlines
1451 self.document = load(odffile)
1452 self._walknode(self.document.topnode)
1453
1454 def _walknode(self, node):
1455 if node.nodeType == Node.ELEMENT_NODE:
1456 self.startElementNS(node.qname, node.tagName, node.attributes)
1457 for c in node.childNodes:
1458 self._walknode(c)
1459 self.endElementNS(node.qname, node.tagName)
1460 if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
1461 if sys.version_info[0]==3:
1462 self.characters(str(node))
1463 else:
1464 self.characters(unicode(node))
1465
1466
1467
1472
1473 def odf2xhtml(self, odffile):
1474 assert(type(odffile)==type(u"") or 'rb' in repr(odffile) or 'BufferedReader' in repr(odffile) or 'BytesIO' in repr(odffile))
1475
1476
1477 self.load(odffile)
1478
1479 result=self.xhtml()
1480 assert(type(result)==type(u""))
1481 return result
1482
1483 def _wlines(self,s):
1484 if s != '': self.lines.append(s)
1485
1486
1488 def xhtml(self):
1489 return ''.join(self.lines)
1490
1491 def _writecss(self, s):
1492 if s != '': self._csslines.append(s)
1493
1494 def _writenothing(self, s):
1495 pass
1496
1497
1498 def css(self):
1499 self._csslines = []
1500 self._wfunc_wfunc = self._writecss
1501 self.generate_stylesheet()
1502 res = ''.join(self._csslines)
1503 self._wfunc_wfunc = self._wlines
1504 del self._csslines
1505 return res
1506
1507
1511 def save(self, outputfile, addsuffix=False):
1512 if outputfile == '-':
1513 outputfp = sys.stdout
1514 else:
1515 if addsuffix:
1516 outputfile = outputfile + ".html"
1517 outputfp = file(outputfile, "w")
1518 outputfp.write(self.xhtml().encode('us-ascii','xmlcharrefreplace'))
1519 outputfp.close()
1520
1521
1522
1524
1525 def __init__(self, lines, generate_css=True, embedable=False):
1526 self._resetobject()
1527 self.lineslines = lines
1528
1529 # Tags
1531 self.elements = {
1532# (DCNS, 'title'): (self.s_processcont, self.e_dc_title),
1533# (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
1534# (DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag),
1535# (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag),
1536# (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag),
1537 (DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame),
1538 (DRAWNS, 'image'): (self.s_draw_image, None),
1539 (DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
1540 (DRAWNS, "layer-set"):(self.s_ignorexml, None),
1541 (DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
1542 (DRAWNS, 'object'): (self.s_draw_object, None),
1543 (DRAWNS, 'object-ole'): (self.s_draw_object_ole, None),
1544 (DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox),
1545# (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
1546# (METANS, 'generator'):(self.s_processcont, self.e_dc_metatag),
1547# (METANS, 'initial-creator'): (self.s_processcont, self.e_dc_metatag),
1548# (METANS, 'keyword'): (self.s_processcont, self.e_dc_metatag),
1549 (NUMBERNS, "boolean-style"):(self.s_ignorexml, None),
1550 (NUMBERNS, "currency-style"):(self.s_ignorexml, None),
1551 (NUMBERNS, "date-style"):(self.s_ignorexml, None),
1552 (NUMBERNS, "number-style"):(self.s_ignorexml, None),
1553 (NUMBERNS, "text-style"):(self.s_ignorexml, None),
1554# (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
1555# (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
1556 (OFFICENS, "forms"):(self.s_ignorexml, None),
1557# (OFFICENS, "master-styles"):(self.s_office_master_styles, None),
1558 (OFFICENS, "meta"):(self.s_ignorecont, None),
1559# (OFFICENS, "presentation"):(self.s_office_presentation, self.e_office_presentation),
1560# (OFFICENS, "spreadsheet"):(self.s_office_spreadsheet, self.e_office_spreadsheet),
1561# (OFFICENS, "styles"):(self.s_office_styles, None),
1562# (OFFICENS, "text"):(self.s_office_text, self.e_office_text),
1563 (OFFICENS, "scripts"):(self.s_ignorexml, None),
1564 (PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
1565
1587 (SVGNS, 'desc'): (self.s_ignorexml, None),
1588 (TABLENS, 'covered-table-cell'): (self.s_ignorexml, None),
1589 (TABLENS, 'table-cell'): (self.s_table_table_cell, self.e_table_table_cell),
1590 (TABLENS, 'table-column'): (self.s_table_table_column, None),
1591 (TABLENS, 'table-row'): (self.s_table_table_row, self.e_table_table_row),
1592 (TABLENS, 'table'): (self.s_table_table, self.e_table_table),
1593 (TEXTNS, 'a'): (self.s_text_a, self.e_text_a),
1594 (TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source),
1595 (TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None),
1596 (TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source),
1597 (TEXTNS, 'h'): (self.s_text_h, self.e_text_h),
1598 (TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source),
1599 (TEXTNS, 'line-break'):(self.s_text_line_break, None),
1600 (TEXTNS, "linenumbering-configuration"):(self.s_ignorexml, None),
1601 (TEXTNS, "list"):(self.s_text_list, self.e_text_list),
1602 (TEXTNS, "list-item"):(self.s_text_list_item, self.e_text_list_item),
1603 (TEXTNS, "list-level-style-bullet"):(self.s_text_list_level_style_bullet, self.e_text_list_level_style_bullet),
1604 (TEXTNS, "list-level-style-number"):(self.s_text_list_level_style_number, self.e_text_list_level_style_number),
1605 (TEXTNS, "list-style"):(None, None),
1606 (TEXTNS, "note"):(self.s_text_note, None),
1607 (TEXTNS, "note-body"):(self.s_text_note_body, self.e_text_note_body),
1608 (TEXTNS, "note-citation"):(None, self.e_text_note_citation),
1609 (TEXTNS, "notes-configuration"):(self.s_ignorexml, None),
1610 (TEXTNS, "object-index-source"):(self.s_text_x_source, self.e_text_x_source),
1611 (TEXTNS, 'p'): (self.s_text_p, self.e_text_p),
1612 (TEXTNS, 's'): (self.s_text_s, None),
1613 (TEXTNS, 'span'): (self.s_text_span, self.e_text_span),
1614 (TEXTNS, 'tab'): (self.s_text_tab, None),
1615 (TEXTNS, "table-index-source"):(self.s_text_x_source, self.e_text_x_source),
1616 (TEXTNS, "table-of-content-source"):(self.s_text_x_source, self.e_text_x_source),
1617 (TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
1618 (TEXTNS, "page-number"):(None, None),
1619 }
1620
The ODF2XHTML parses an ODF file and produces XHTML.
Definition odf2xhtml.py:346
e_dc_metatag(self, tag, attrs)
Any other meta data is added as a <meta> element.
Definition odf2xhtml.py:631
e_draw_page(self, tag, attrs)
Definition odf2xhtml.py:790
s_style_header(self, tag, attrs)
Definition odf2xhtml.py:977
rewritelink(self, imghref)
Intended to be overloaded if you don't store your pictures in a Pictures subfolder.
Definition odf2xhtml.py:730
s_text_x_source(self, tag, attrs)
Various indexes and tables of contents.
e_text_h(self, tag, attrs)
Headings end Side If there is no title in the metadata, then it is taken from the first heading of an...
s_text_line_break(self, tag, attrs)
Force a line break ( )
s_style_header_style(self, tag, attrs)
Definition odf2xhtml.py:986
opentag(self, tag, attrs={}, block=False)
Create an open HTML tag.
Definition odf2xhtml.py:520
s_text_list_item(self, tag, attrs)
Start list item.
s_text_bookmark(self, tag, attrs)
Bookmark definition.
s_table_table(self, tag, attrs)
Start a table.
endElementNS(self, tag, qname)
Definition odf2xhtml.py:562
e_text_list(self, tag, attrs)
End a list.
s_text_note(self, tag, attrs)
s_text_note_body(self, tag, attrs)
s_draw_frame(self, tag, attrs)
A <draw:frame> is made into a.
Definition odf2xhtml.py:687
s_draw_textbox(self, tag, attrs)
Definition odf2xhtml.py:793
startElementNS(self, tag, qname, attrs)
Definition odf2xhtml.py:552
s_style_master_page(self, tag, attrs)
Collect the formatting for the page layout style.
s_draw_page(self, tag, attrs)
A <draw:page> is a slide in a presentation.
Definition odf2xhtml.py:776
s_office_text(self, tag, attrs)
OpenDocument text.
Definition odf2xhtml.py:918
s_office_spreadsheet(self, tag, attrs)
Definition odf2xhtml.py:907
s_office_document_content(self, tag, attrs)
First tag in the content.xml file.
Definition odf2xhtml.py:877
s_office_presentation(self, tag, attrs)
For some odd reason, OpenOffice Impress doesn't define a default-style for the 'paragraph'.
Definition odf2xhtml.py:898
e_text_a(self, tag, attrs)
End an anchor or bookmark reference.
e_style_header(self, tag, attrs)
Definition odf2xhtml.py:981
e_style_page_layout(self, tag, attrs)
End this style.
s_office_styles(self, tag, attrs)
Definition odf2xhtml.py:914
xhtml(self)
Returns the xhtml.
e_office_text(self, tag, attrs)
Definition odf2xhtml.py:922
closetag(self, tag, block=True)
Close an open HTML tag.
Definition odf2xhtml.py:533
emptytag(self, tag, attrs={})
Definition odf2xhtml.py:539
e_text_list_item(self, tag, attrs)
End list item.
s_style_handle_properties(self, tag, attrs)
Copy all attributes to a struct.
Definition odf2xhtml.py:929
handle_endtag(self, tag, attrs, method)
Definition odf2xhtml.py:576
e_style_default_style(self, tag, attrs)
Definition odf2xhtml.py:948
e_table_table(self, tag, attrs)
End a table.
classname(self, attrs)
Generate a class name from a style name.
Definition odf2xhtml.py:600
unknown_endtag(self, tag, attrs)
Definition odf2xhtml.py:582
s_text_s(self, tag, attrs)
Generate a number of spaces.
save(self, outputfile, addsuffix=False)
Save the HTML under the filename.
s_processcont(self, tag, attrs)
Start processing the text nodes.
Definition odf2xhtml.py:596
e_office_spreadsheet(self, tag, attrs)
Definition odf2xhtml.py:910
e_text_note_body(self, tag, attrs)
e_draw_textbox(self, tag, attrs)
End the <draw:text-box>
Definition odf2xhtml.py:802
add_style_file(self, stylefilename, media=None)
Add a link to an external style file.
Definition odf2xhtml.py:466
s_style_footer(self, tag, attrs)
Definition odf2xhtml.py:963
s_text_list_level_style_bullet(self, tag, attrs)
CSS doesn't have the ability to set the glyph to a particular character, so we just go through the av...
__init__(self, generate_css=True, embedable=False)
Definition odf2xhtml.py:348
s_style_font_face(self, tag, attrs)
It is possible that the HTML browser doesn't know how to show a particular font.
Definition odf2xhtml.py:957
s_ignorecont(self, tag, attrs)
Stop processing the text nodes.
Definition odf2xhtml.py:592
set_plain(self)
Tell the parser to not generate CSS.
Definition odf2xhtml.py:452
e_text_p(self, tag, attrs)
End Paragraph.
e_dc_creator(self, tag, attrs)
Set the content creator.
Definition odf2xhtml.py:644
e_text_note_citation(self, tag, attrs)
s_table_table_row(self, tag, attrs)
Start a table row.
e_text_span(self, tag, attrs)
End the <text:span>
s_style_default_style(self, tag, attrs)
A default style is like a style on an HTML tag.
Definition odf2xhtml.py:941
get_anchor(self, name)
Create a unique anchor id for a href name.
Definition odf2xhtml.py:606
s_text_tab(self, tag, attrs)
Move to the next tabstop.
e_style_footer(self, tag, attrs)
Definition odf2xhtml.py:967
s_draw_object_ole(self, tag, attrs)
A <draw:object-ole> is embedded OLE object in the document (e.g.
Definition odf2xhtml.py:765
s_draw_image(self, tag, attrs)
A <draw:image> becomes an element.
Definition odf2xhtml.py:735
s_style_footer_style(self, tag, attrs)
Definition odf2xhtml.py:972
s_text_list(self, tag, attrs)
Start a list (.
s_office_automatic_styles(self, tag, attrs)
Definition odf2xhtml.py:870
e_custom_shape(self, tag, attrs)
End the <draw:frame>
Definition odf2xhtml.py:682
handle_starttag(self, tag, method, attrs)
Definition odf2xhtml.py:573
e_table_table_row(self, tag, attrs)
End a table row.
s_text_h(self, tag, attrs)
Headings start.
unknown_starttag(self, tag, attrs)
Definition odf2xhtml.py:579
e_text_note(self, tag, attrs)
e_text_x_source(self, tag, attrs)
Various indexes and tables of contents.
s_draw_fill_image(self, tag, attrs)
Definition odf2xhtml.py:721
s_style_page_layout(self, tag, attrs)
Collect the formatting for the page layout style.
e_draw_frame(self, tag, attrs)
End the <draw:frame>
Definition odf2xhtml.py:718
s_custom_shape(self, tag, attrs)
A <draw:custom-shape> is made into a.
Definition odf2xhtml.py:651
e_dc_contentlanguage(self, tag, attrs)
Set the content language.
Definition odf2xhtml.py:637
e_office_presentation(self, tag, attrs)
Definition odf2xhtml.py:903
s_table_table_cell(self, tag, attrs)
Start a table cell.
s_text_bookmark_ref(self, tag, attrs)
Bookmark reference.
e_dc_title(self, tag, attrs)
Get the title from the meta data and create a HTML <title>
Definition odf2xhtml.py:624
css(self)
Returns the CSS content.
s_office_master_styles(self, tag, attrs)
Definition odf2xhtml.py:891
s_table_table_column(self, tag, attrs)
Start a table column.
load(self, odffile)
Loads a document into the parser and parses it.
s_text_a(self, tag, attrs)
Anchors start.
e_text_list_level_style_bullet(self, tag, attrs)
set_embedable(self)
Tells the converter to only output the parts inside the <body>
Definition odf2xhtml.py:456
e_table_table_cell(self, tag, attrs)
End a table cell.
s_text_list_level_style_number(self, tag, attrs)
html_body(self, tag, attrs)
Definition odf2xhtml.py:805
e_style_style(self, tag, attrs)
End this style.
s_draw_object(self, tag, attrs)
A <draw:object> is embedded object in the document (e.g.
Definition odf2xhtml.py:748
s_text_p(self, tag, attrs)
Paragraph.
e_text_list_level_style_number(self, tag, attrs)
e_office_document_content(self, tag, attrs)
Last tag.
Definition odf2xhtml.py:888
s_ignorexml(self, tag, attrs)
Ignore this xml element and all children of it It will automatically stop ignoring.
Definition odf2xhtml.py:588
s_style_style(self, tag, attrs)
Collect the formatting for the style.
s_text_span(self, tag, attrs)
The <text:span> element matches the element in HTML.
s_style_default_page_layout(self, tag, attrs)
Collect the formatting for the default page layout style.
Definition odf2xhtml.py:993
The ODF2XHTML parses an ODF file and produces XHTML.
__init__(self, lines, generate_css=True, embedable=False)
The purpose of the StyleToCSS class is to contain the rules to convert ODF styles to CSS2.
Definition odf2xhtml.py:61
c_text_underline_style(self, ruleset, sdict, rule, val)
Set underline decoration HTML doesn't really have a page-width.
Definition odf2xhtml.py:249
c_fn(self, ruleset, sdict, rule, fontstyle)
Generate the CSS font family A generic font can be found in two ways.
Definition odf2xhtml.py:161
c_text_line_through_style(self, ruleset, sdict, rule, val)
Set underline decoration HTML doesn't really have a page-width.
Definition odf2xhtml.py:256
convert_styles(self, ruleset)
Rule is a tuple of (namespace, name).
Definition odf2xhtml.py:267
c_page_height(self, ruleset, sdict, rule, val)
Set height of box.
Definition odf2xhtml.py:261
c_drawfillimage(self, ruleset, sdict, rule, val)
Fill a figure with an image.
Definition odf2xhtml.py:132
c_text_position(self, ruleset, sdict, rule, tp)
Text position.
Definition odf2xhtml.py:186
c_hp(self, ruleset, sdict, rule, hpos)
Definition odf2xhtml.py:198
c_text_align(self, ruleset, sdict, rule, align)
Text align.
Definition odf2xhtml.py:152
c_fo(self, ruleset, sdict, rule, val)
XSL formatting attributes.
Definition odf2xhtml.py:136
c_page_width(self, ruleset, sdict, rule, val)
Set width of box HTML doesn't really have a page-width.
Definition odf2xhtml.py:243
c_border_model(self, ruleset, sdict, rule, val)
Convert to CSS2 border model.
Definition odf2xhtml.py:141
c_width(self, ruleset, sdict, rule, val)
Set width of box.
Definition odf2xhtml.py:148
save_font(self, name, family, generic)
It is possible that the HTML browser doesn't know how to show a particular font.
Definition odf2xhtml.py:118
push(self, tag, attrs)
Definition odf2xhtml.py:294
rfindattr(self, attr)
Find a tag with the given attribute.
Definition odf2xhtml.py:306