23import sys, zipfile, xml.dom.minidom
35] + [ nsdict[item[0]]+
":"+item[1]
for item
in empty_elements]
37INLINE_TAGS = [ nsdict[item[0]]+
":"+item[1]
for item
in inline_elements]
56 elif value ==
"normal":
62 elif value ==
"normal":
69 if value
and value !=
"none":
73 if value
and value !=
"none":
77 if value
is None or value ==
'':
79 posisize = value.split(
' ')
81 if textpos.find(
'%') == -1:
85 elif textpos ==
"super":
89 itextpos = int(textpos[:textpos.find(
'%')])
99 return "[italic=%s, bold=i%s, fixed=%s]" % (str(self.
italic),
129 return "[bq=%s, h=%d, code=%s]" % (str(self.
blockquote),
167 for tag
in IGNORED_TAGS:
170 for tag
in INLINE_TAGS:
182 for fontFace
in fontDecl.getElementsByTagName(
"style:font-face"):
183 if fontFace.getAttribute(
"style:font-pitch") ==
"fixed":
184 self.
fixedFonts.append(fontFace.getAttribute(
"style:name"))
194 parentProp = self.
textStyles.get(parent,
None)
196 textProp = parentProp
198 textPropEl = style.getElementsByTagName(
"style:text-properties")
199 if not textPropEl:
return textProps
201 textPropEl = textPropEl[0]
203 textProps.setItalic(textPropEl.getAttribute(
"fo:font-style"))
204 textProps.setBold(textPropEl.getAttribute(
"fo:font-weight"))
205 textProps.setUnderlined(textPropEl.getAttribute(
"style:text-underline-style"))
206 textProps.setStrikethrough(textPropEl.getAttribute(
"style:text-line-through-style"))
207 textProps.setPosition(textPropEl.getAttribute(
"style:text-position"))
209 if textPropEl.getAttribute(
"style:font-name")
in self.
fixedFonts:
210 textProps.setFixed(
True)
219 name = style.getAttribute(
"style:name")
221 if name.startswith(
"Heading_20_"):
225 paraProps.setHeading(level)
230 paraProps.setTitle(
True)
232 paraPropEl = style.getElementsByTagName(
"style:paragraph-properties")
234 paraPropEl = paraPropEl[0]
235 leftMargin = paraPropEl.getAttribute(
"fo:margin-left")
238 leftMargin = float(leftMargin[:-2])
239 if leftMargin > 0.01:
240 paraProps.setIndented(
True)
246 paraProps.setCode(
True)
255 for style
in styleElements:
257 name = style.getAttribute(
"style:name")
259 if name ==
"Standard":
continue
261 family = style.getAttribute(
"style:family")
262 parent = style.getAttribute(
"style:parent-style-name")
267 elif family ==
"paragraph":
274 for style
in listStyleElements:
275 name = style.getAttribute(
"style:name")
278 if style.hasChildNodes():
279 subitems = [el
for el
in style.childNodes
280 if el.nodeType == xml.dom.Node.ELEMENT_NODE
281 and el.tagName ==
"text:list-level-style-number"]
282 if len(subitems) > 0:
283 prop.setOrdered(
True)
289 def load(self, filepath):
291 zip = zipfile.ZipFile(filepath)
293 styles_doc = xml.dom.minidom.parseString(zip.read(
"styles.xml"))
294 fontfacedecls = styles_doc.getElementsByTagName(
"office:font-face-decls")
297 self.
processStyles(styles_doc.getElementsByTagName(
"style:style"))
300 self.
content = xml.dom.minidom.parseString(zip.read(
"content.xml"))
301 fontfacedecls = self.
content.getElementsByTagName(
"office:font-face-decls")
312 lines = text.split(
"\n")
314 numLines = len(lines)
315 for i
in range(numLines):
317 if (lines[i].strip()
or i == numLines-1
or i == 0
or
318 not ( lines[i-1].startswith(
" ")
319 and lines[i+1].startswith(
" ") ) ):
320 buffer.append(
"\n" + lines[i])
322 return ''.join(buffer)
332 link = node.getAttribute(
"xlink:href")
333 if link
and link[:2] ==
'./':
335 if link
and link[:9] ==
'Pictures/':
337 return "[[Image(%s)]]\n" % link
341 link = node.getAttribute(
"xlink:href")
342 if link.strip() == text.strip():
343 return "[%s] " % link.strip()
345 return "[%s %s] " % (link.strip(), text.strip())
352 cite = (node.getElementsByTagName(
"text:note-citation")[0]
353 .childNodes[0].nodeValue)
354 body = (node.getElementsByTagName(
"text:note-body")[0]
361 num = int(node.getAttribute(
"text:c"))
375 styleName = node.getAttribute(
"text:style-name")
379 return "`" + text +
"`"
389 if style.strikethrough:
391 if style.superscript:
397 return "%s%s%s" % (
''.join(mark), text,
''.join(revmark))
405 styleName = listElement.getAttribute(
"text:style-name")
409 for item
in listElement.childNodes:
410 buffer.append(
" "*indent)
414 number =
" " + number +
". "
415 buffer.append(
" 1. ")
418 subitems = [el
for el
in item.childNodes
419 if el.tagName
in [
"text:p",
"text:h",
"text:list"]]
420 for subitem
in subitems:
421 if subitem.tagName ==
"text:list":
430 return ''.join(buffer)
439 for item
in tableElement.childNodes:
441 if item.tagName ==
"table:table-header-rows":
443 if item.tagName ==
"table:table-row":
444 buffer.append(
"\n||")
445 for cell
in item.childNodes:
449 return ''.join(buffer)
456 body = self.
content.getElementsByTagName(
"office:body")[0]
457 text = body.childNodes[0]
461 paragraphs = [el
for el
in text.childNodes
462 if el.tagName
in [
"draw:page",
"text:p",
"text:h",
"text:section",
463 "text:list",
"table:table"]]
465 for paragraph
in paragraphs:
466 if paragraph.tagName ==
"text:list":
468 elif paragraph.tagName ==
"text:section":
470 elif paragraph.tagName ==
"table:table":
479 buffer.append(
"----")
481 buffer.append(
"%s: %s" % (cite, body))
492 for node
in element.childNodes:
494 if node.nodeType == xml.dom.Node.TEXT_NODE:
495 buffer.append(node.nodeValue)
497 elif node.nodeType == xml.dom.Node.ELEMENT_NODE:
500 if tag
in (
"draw:text-box",
"draw:frame"):
503 elif tag
in (
"text:p",
"text:h"):
507 elif tag ==
"text:list":
512 buffer.append(method(node))
514 buffer.append(
" {" + tag +
"} ")
516 return ''.join(buffer)
522 style_name = paragraph.getAttribute(
"text:style-name")
526 if paraProps
and not paraProps.code:
536 return "= " + text +
" =\n"
538 outlinelevel = paragraph.getAttribute(
"text:outline-level")
541 level = int(outlinelevel)
545 return "=" * level +
" " + text +
" " +
"=" * level +
"\n"
548 return "{{{\n" + text +
"\n}}}\n"
550 if paraProps.indented:
551 return self.
wrapParagraph(text, indent = indent, blockquote =
True)
566 return ''.join(buffer) + text
568 for token
in text.split():
570 if counter > LIMIT - indent:
571 buffer.append(
"\n" +
" "*indent)
576 buffer.append(token +
" ")
577 counter += len(token)
579 return ''.join(buffer)
Holds properties for a list style.
processFontDeclarations(self, fontDecl)
Extracts necessary font information from a font-declaration element.
load(self, filepath)
Loads an ODT file.
tableToString(self, tableElement)
MoinMoin uses || to delimit table cells.
textToString(self, element)
compressCodeBlocks(self, text)
Removes extra blank lines from code blocks.
toString(self)
Converts the document to a string.
paragraphToString(self, paragraph, indent=0)
listToString(self, listElement, indent=0)
processListStyles(self, listStyleElements)
processStyles(self, styleElements)
Runs through "style" elements extracting necessary information.
extractParagraphProperties(self, style, parent=None)
Extracts paragraph properties from a style element.
inline_markup(self, node)
extractTextProperties(self, style, parent=None)
Extracts text properties from a style element.
wrapParagraph(self, text, indent=0, blockquote=False)
text_line_break(self, node)
Holds properties of a paragraph style.
Holds properties for a text style.
setUnderlined(self, value)
setStrikethrough(self, value)