diff options
author | Fred Drake <fdrake@acm.org> | 1998-11-23 17:02:03 (GMT) |
---|---|---|
committer | Fred Drake <fdrake@acm.org> | 1998-11-23 17:02:03 (GMT) |
commit | 0320473a29fef5d35a5da1056e5c30e72cf25ac0 (patch) | |
tree | 2dcffb074c9d95057b4b70be3fa1efb7bcff515c | |
parent | 30a68c7a2bb12b8f10d239206033492e4a9e27a6 (diff) | |
download | cpython-0320473a29fef5d35a5da1056e5c30e72cf25ac0.zip cpython-0320473a29fef5d35a5da1056e5c30e72cf25ac0.tar.gz cpython-0320473a29fef5d35a5da1056e5c30e72cf25ac0.tar.bz2 |
Script to squirrel around with the DOM tree of document fragments from the
LaTeX-based ESIS streams to be a little better structured, and generally
perform clean-up.
Preliminary.
-rwxr-xr-x | Doc/tools/sgmlconv/docfixer.py | 193 |
1 files changed, 193 insertions, 0 deletions
diff --git a/Doc/tools/sgmlconv/docfixer.py b/Doc/tools/sgmlconv/docfixer.py new file mode 100755 index 0000000..00d4727 --- /dev/null +++ b/Doc/tools/sgmlconv/docfixer.py @@ -0,0 +1,193 @@ +#! /usr/bin/env python + +"""Promote the IDs from <label/> elements to the enclosing section / chapter / +whatever, then remove the <label/> elements. This allows *ML style internal +linking rather than the bogus LaTeX model. + +Note that <label/>s in <title> elements are promoted two steps, since the +<title> elements are artificially created from the section parameter, and the +label really refers to the sectioning construct. +""" +__version__ = '$Revision$' + + +import errno +import string +import sys +import xml.dom.core +import xml.dom.esis_builder + + +# Workaround to deal with invalid documents (multiple root elements). This +# does not indicate a bug in the DOM implementation. +# +def get_documentElement(self): + docelem = None + for n in self._node.children: + if n.type == xml.dom.core.ELEMENT: + docelem = xml.dom.core.Element(n, self, self) + return docelem + +xml.dom.core.Document.get_documentElement = get_documentElement + + +# Replace get_childNodes for the Document class; without this, children +# accessed from the Document object via .childNodes (no matter how many +# levels of access are used) will be given an ownerDocument of None. +# +def get_childNodes(self): + return xml.dom.core.NodeList(self._node.children, self, self) + +xml.dom.core.Document.get_childNodes = get_childNodes + + +def get_first_element(doc, gi): + for n in doc.childNodes: + if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi: + return n + +def extract_first_element(doc, gi): + node = get_first_element(doc, gi) + if node is not None: + doc.removeChild(node) + return node + + +def simplify(doc): + # Try to rationalize the document a bit, since these things are simply + # not valid SGML/XML documents as they stand, and need a little work. + documentclass = "document" + inputs = [] + node = extract_first_element(doc, "documentclass") + if node is not None: + documentclass = node.getAttribute("classname") + node = extract_first_element(doc, "title") + if node is not None: + inputs.append(node) + # update the name of the root element + node = get_first_element(doc, "document") + if node is not None: + node._node.name = documentclass + while 1: + node = extract_first_element(doc, "input") + if node is None: + break + inputs.append(node) + if inputs: + docelem = doc.documentElement + inputs.reverse() + for node in inputs: + text = doc.createTextNode("\n") + docelem.insertBefore(text, docelem.firstChild) + docelem.insertBefore(node, text) + docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild) + while doc.firstChild.nodeType == xml.dom.core.TEXT: + doc.removeChild(doc.firstChild) + + +def cleanup_root_text(doc): + discards = [] + skip = 0 + for n in doc.childNodes: + prevskip = skip + skip = 0 + if n.nodeType == xml.dom.core.TEXT and not prevskip: + discards.append(n) + elif n.nodeType == xml.dom.core.COMMENT: + skip = 1 + for node in discards: + doc.removeChild(node) + + +def rewrite_desc_entries(doc, argname_gi): + argnodes = doc.getElementsByTagName(argname_gi) + for node in argnodes: + parent = node.parentNode + nodes = [] + for n in parent.childNodes: + if n.nodeType != xml.dom.core.ELEMENT or n.tagName != argname_gi: + nodes.append(n) + desc = doc.createElement("description") + for n in nodes: + parent.removeChild(n) + desc.appendChild(n) + if node.childNodes: + # keep the <args>...</args>, newline & indent + parent.insertBefore(doc.createText("\n "), node) + else: + # no arguments, remove the <args/> node + parent.removeChild(node) + parent.appendChild(doc.createText("\n ")) + parent.appendChild(desc) + parent.appendChild(doc.createText("\n")) + +def handle_args(doc): + rewrite_desc_entries(doc, "args") + rewrite_desc_entries(doc, "constructor-args") + + +def handle_comments(doc, node=None): + if node is None: + node = doc + for n in node.childNodes: + if n.nodeType == xml.dom.core.ELEMENT: + if n.tagName == "COMMENT": + comment = doc.createComment(n.childNodes[0].data) + node.replaceChild(comment, n) + else: + handle_comments(doc, n) + + +def handle_labels(doc): + labels = doc.getElementsByTagName("label") + for label in labels: + id = label.getAttribute("id") + if not id: + continue + parent = label.parentNode + if parent.tagName == "title": + parent.parentNode.setAttribute("id", id) + else: + parent.setAttribute("id", id) + # now, remove <label id="..."/> from parent: + parent.removeChild(label) + + +def convert(ifp, ofp): + p = xml.dom.esis_builder.EsisBuilder() + p.feed(ifp.read()) + doc = p.document + handle_args(doc) + handle_comments(doc) + simplify(doc) + handle_labels(doc) + cleanup_root_text(doc) + try: + ofp.write(doc.toxml()) + ofp.write("\n") + except IOError, (err, msg): + # Ignore EPIPE; it just means that whoever we're writing to stopped + # reading. The rest of the output would be ignored. All other errors + # should still be reported, + if err != errno.EPIPE: + raise + + +def main(): + if len(sys.argv) == 1: + ifp = sys.stdin + ofp = sys.stdout + elif len(sys.argv) == 2: + ifp = open(sys.argv[1]) + ofp = sys.stdout + elif len(sys.argv) == 3: + ifp = open(sys.argv[1]) + ofp = open(sys.argv[2], "w") + else: + usage() + sys.exit(2) + convert(ifp, ofp) + + +if __name__ == "__main__": + main() |