#! /usr/bin/env python """Perform massive transformations on a document tree created from the LaTeX of the Python documentation, and dump the ESIS data for the transformed tree. """ __version__ = '$Revision$' import errno import esistools import re import string import sys import xml.dom import xml.dom.minidom ELEMENT = xml.dom.Node.ELEMENT_NODE ENTITY_REFERENCE = xml.dom.Node.ENTITY_REFERENCE_NODE TEXT = xml.dom.Node.TEXT_NODE class ConversionError(Exception): pass ewrite = sys.stderr.write try: # We can only do this trick on Unix (if tput is on $PATH)! if sys.platform != "posix" or not sys.stderr.isatty(): raise ImportError import commands except ImportError: bwrite = ewrite else: def bwrite(s, BOLDON=commands.getoutput("tput bold"), BOLDOFF=commands.getoutput("tput sgr0")): ewrite("%s%s%s" % (BOLDON, s, BOLDOFF)) PARA_ELEMENT = "para" DEBUG_PARA_FIXER = 0 if DEBUG_PARA_FIXER: def para_msg(s): ewrite("*** %s\n" % s) else: def para_msg(s): pass def get_first_element(doc, gi): for n in doc.childNodes: if n.nodeName == gi: return n def extract_first_element(doc, gi): node = get_first_element(doc, gi) if node is not None: doc.removeChild(node) return node def get_documentElement(node): result = None for child in node.childNodes: if child.nodeType == ELEMENT: result = child return result def set_tagName(elem, gi): elem.nodeName = elem.tagName = gi def find_all_elements(doc, gi): nodes = [] if doc.nodeName == gi: nodes.append(doc) for child in doc.childNodes: if child.nodeType == ELEMENT: if child.tagName == gi: nodes.append(child) for node in child.getElementsByTagName(gi): nodes.append(node) return nodes def find_all_child_elements(doc, gi): nodes = [] for child in doc.childNodes: if child.nodeName == gi: nodes.append(child) return nodes def find_all_elements_from_set(doc, gi_set): return __find_all_elements_from_set(doc, gi_set, []) def __find_all_elements_from_set(doc, gi_set, nodes): if doc.nodeName in gi_set: nodes.append(doc) for child in doc.childNodes: if child.nodeType == ELEMENT: __find_all_elements_from_set(child, gi_set, nodes) return nodes def simplify(doc, fragment): # Try to rationalize the document a bit, since these things are simply # not valid SGML/XML documents as they stand, and need a little work. documentclass = "document" inputs = [] node = extract_first_element(fragment, "documentclass") if node is not None: documentclass = node.getAttribute("classname") node = extract_first_element(fragment, "title") if node is not None: inputs.append(node) # update the name of the root element node = get_first_element(fragment, "document") if node is not None: set_tagName(node, documentclass) while 1: node = extract_first_element(fragment, "input") if node is None: break inputs.append(node) if inputs: docelem = get_documentElement(fragment) inputs.reverse() for node in inputs: text = doc.createTextNode("\n") docelem.insertBefore(text, docelem.firstChild) docelem.insertBefore(node, text) docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild) while fragment.firstChild and fragment.firstChild.nodeType == TEXT: fragment.removeChild(fragment.firstChild) def cleanup_root_text(doc): discards = [] skip = 0 for n in doc.childNodes: prevskip = skip skip = 0 if n.nodeType == TEXT and not prevskip: discards.append(n) elif n.nodeName == "COMMENT": skip = 1 for node in discards: doc.removeChild(node) DESCRIPTOR_ELEMENTS = ( "cfuncdesc", "cvardesc", "ctypedesc", "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni", "excdesc", "funcdesc", "funcdescni", "opcodedesc", "datadesc", "datadescni", ) def fixup_descriptors(doc, fragment): sections = find_all_elements(fragment, "section") for section in sections: find_and_fix_descriptors(doc, section) def find_and_fix_descriptors(doc, container): children = container.childNodes for child in children: if child.nodeType == ELEMENT: tagName = child.tagName if tagName in DESCRIPTOR_ELEMENTS: rewrite_descriptor(doc, child) elif tagName == "subsection": find_and_fix_descriptors(doc, child) def rewrite_descriptor(doc, descriptor): # # Do these things: # 1. Add an "index='no'" attribute to the element if the tagName # ends in 'ni', removing the 'ni' from the name. # 2. Create a from the name attribute # 2a.Create an if it appears to be available. # 3. Create additional s from <*line{,ni}> elements, # if found. # 4. If a is found, move it to an attribute on the # descriptor. # 5. Move remaining child nodes to a element. # 6. Put it back together. # # 1. descname = descriptor.tagName index = 1 if descname[-2:] == "ni": descname = descname[:-2] descriptor.setAttribute("index", "no") set_tagName(descriptor, descname) index = 0 desctype = descname[:-4] # remove 'desc' linename = desctype + "line" if not index: linename = linename + "ni" # 2. signature = doc.createElement("signature") name = doc.createElement("name") signature.appendChild(doc.createTextNode("\n ")) signature.appendChild(name) name.appendChild(doc.createTextNode(descriptor.getAttribute("name"))) descriptor.removeAttribute("name") # 2a. if descriptor.hasAttribute("var"): if descname != "opcodedesc": raise RuntimeError, \ "got 'var' attribute on descriptor other than opcodedesc" variable = descriptor.getAttribute("var") if variable: args = doc.createElement("args") args.appendChild(doc.createTextNode(variable)) signature.appendChild(doc.createTextNode("\n ")) signature.appendChild(args) descriptor.removeAttribute("var") newchildren = [signature] children = descriptor.childNodes pos = skip_leading_nodes(children) if pos < len(children): child = children[pos] if child.nodeName == "args": # move to , or remove if empty: child.parentNode.removeChild(child) if len(child.childNodes): signature.appendChild(doc.createTextNode("\n ")) signature.appendChild(child) signature.appendChild(doc.createTextNode("\n ")) # 3, 4. pos = skip_leading_nodes(children, pos) while pos < len(children) \ and children[pos].nodeName in (linename, "versionadded"): if children[pos].tagName == linename: # this is really a supplemental signature, create oldchild = children[pos].cloneNode(1) try: sig = methodline_to_signature(doc, children[pos]) except KeyError: print oldchild.toxml() raise newchildren.append(sig) else: # descriptor.setAttribute( "added", children[pos].getAttribute("version")) pos = skip_leading_nodes(children, pos + 1) # 5. description = doc.createElement("description") description.appendChild(doc.createTextNode("\n")) newchildren.append(description) move_children(descriptor, description, pos) last = description.childNodes[-1] if last.nodeType == TEXT: last.data = string.rstrip(last.data) + "\n " # 6. # should have nothing but whitespace and signature lines in ; # discard them while descriptor.childNodes: descriptor.removeChild(descriptor.childNodes[0]) for node in newchildren: descriptor.appendChild(doc.createTextNode("\n ")) descriptor.appendChild(node) descriptor.appendChild(doc.createTextNode("\n")) def methodline_to_signature(doc, methodline): signature = doc.createElement("signature") signature.appendChild(doc.createTextNode("\n ")) name = doc.createElement("name") name.appendChild(doc.createTextNode(methodline.getAttribute("name"))) methodline.removeAttribute("name") signature.appendChild(name) if len(methodline.childNodes): args = doc.createElement("args") signature.appendChild(doc.createTextNode("\n ")) signature.appendChild(args) move_children(methodline, args) signature.appendChild(doc.createTextNode("\n ")) return signature def move_children(origin, dest, start=0): children = origin.childNodes while start < len(children): node = children[start] origin.removeChild(node) dest.appendChild(node) def handle_appendix(doc, fragment): # must be called after simplfy() if document is multi-rooted to begin with docelem = get_documentElement(fragment) toplevel = docelem.tagName == "manual" and "chapter" or "section" appendices = 0 nodes = [] for node in docelem.childNodes: if appendices: nodes.append(node) elif node.nodeType == ELEMENT: appnodes = node.getElementsByTagName("appendix") if appnodes: appendices = 1 parent = appnodes[0].parentNode parent.removeChild(appnodes[0]) parent.normalize() if nodes: map(docelem.removeChild, nodes) docelem.appendChild(doc.createTextNode("\n\n\n")) back = doc.createElement("back-matter") docelem.appendChild(back) back.appendChild(doc.createTextNode("\n")) while nodes and nodes[0].nodeType == TEXT \ and not string.strip(nodes[0].data): del nodes[0] map(back.appendChild, nodes) docelem.appendChild(doc.createTextNode("\n")) def handle_labels(doc, fragment): for label in find_all_elements(fragment, "label"): id = label.getAttribute("id") if not id: continue parent = label.parentNode parentTagName = parent.tagName if parentTagName == "title": parent.parentNode.setAttribute("id", id) else: parent.setAttribute("id", id) # now, remove