summaryrefslogtreecommitdiffstats
path: root/Doc/tools
diff options
context:
space:
mode:
authorFred Drake <fdrake@acm.org>1998-11-23 17:02:03 (GMT)
committerFred Drake <fdrake@acm.org>1998-11-23 17:02:03 (GMT)
commit0320473a29fef5d35a5da1056e5c30e72cf25ac0 (patch)
tree2dcffb074c9d95057b4b70be3fa1efb7bcff515c /Doc/tools
parent30a68c7a2bb12b8f10d239206033492e4a9e27a6 (diff)
downloadcpython-0320473a29fef5d35a5da1056e5c30e72cf25ac0.zip
cpython-0320473a29fef5d35a5da1056e5c30e72cf25ac0.tar.gz
cpython-0320473a29fef5d35a5da1056e5c30e72cf25ac0.tar.bz2
Script to squirrel around with the DOM tree of document fragments from the
LaTeX-based ESIS streams to be a little better structured, and generally perform clean-up. Preliminary.
Diffstat (limited to 'Doc/tools')
-rwxr-xr-xDoc/tools/sgmlconv/docfixer.py193
1 files changed, 193 insertions, 0 deletions
diff --git a/Doc/tools/sgmlconv/docfixer.py b/Doc/tools/sgmlconv/docfixer.py
new file mode 100755
index 0000000..00d4727
--- /dev/null
+++ b/Doc/tools/sgmlconv/docfixer.py
@@ -0,0 +1,193 @@
+#! /usr/bin/env python
+
+"""Promote the IDs from <label/> elements to the enclosing section / chapter /
+whatever, then remove the <label/> elements. This allows *ML style internal
+linking rather than the bogus LaTeX model.
+
+Note that <label/>s in <title> elements are promoted two steps, since the
+<title> elements are artificially created from the section parameter, and the
+label really refers to the sectioning construct.
+"""
+__version__ = '$Revision$'
+
+
+import errno
+import string
+import sys
+import xml.dom.core
+import xml.dom.esis_builder
+
+
+# Workaround to deal with invalid documents (multiple root elements). This
+# does not indicate a bug in the DOM implementation.
+#
+def get_documentElement(self):
+ docelem = None
+ for n in self._node.children:
+ if n.type == xml.dom.core.ELEMENT:
+ docelem = xml.dom.core.Element(n, self, self)
+ return docelem
+
+xml.dom.core.Document.get_documentElement = get_documentElement
+
+
+# Replace get_childNodes for the Document class; without this, children
+# accessed from the Document object via .childNodes (no matter how many
+# levels of access are used) will be given an ownerDocument of None.
+#
+def get_childNodes(self):
+ return xml.dom.core.NodeList(self._node.children, self, self)
+
+xml.dom.core.Document.get_childNodes = get_childNodes
+
+
+def get_first_element(doc, gi):
+ for n in doc.childNodes:
+ if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
+ return n
+
+def extract_first_element(doc, gi):
+ node = get_first_element(doc, gi)
+ if node is not None:
+ doc.removeChild(node)
+ return node
+
+
+def simplify(doc):
+ # Try to rationalize the document a bit, since these things are simply
+ # not valid SGML/XML documents as they stand, and need a little work.
+ documentclass = "document"
+ inputs = []
+ node = extract_first_element(doc, "documentclass")
+ if node is not None:
+ documentclass = node.getAttribute("classname")
+ node = extract_first_element(doc, "title")
+ if node is not None:
+ inputs.append(node)
+ # update the name of the root element
+ node = get_first_element(doc, "document")
+ if node is not None:
+ node._node.name = documentclass
+ while 1:
+ node = extract_first_element(doc, "input")
+ if node is None:
+ break
+ inputs.append(node)
+ if inputs:
+ docelem = doc.documentElement
+ inputs.reverse()
+ for node in inputs:
+ text = doc.createTextNode("\n")
+ docelem.insertBefore(text, docelem.firstChild)
+ docelem.insertBefore(node, text)
+ docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
+ while doc.firstChild.nodeType == xml.dom.core.TEXT:
+ doc.removeChild(doc.firstChild)
+
+
+def cleanup_root_text(doc):
+ discards = []
+ skip = 0
+ for n in doc.childNodes:
+ prevskip = skip
+ skip = 0
+ if n.nodeType == xml.dom.core.TEXT and not prevskip:
+ discards.append(n)
+ elif n.nodeType == xml.dom.core.COMMENT:
+ skip = 1
+ for node in discards:
+ doc.removeChild(node)
+
+
+def rewrite_desc_entries(doc, argname_gi):
+ argnodes = doc.getElementsByTagName(argname_gi)
+ for node in argnodes:
+ parent = node.parentNode
+ nodes = []
+ for n in parent.childNodes:
+ if n.nodeType != xml.dom.core.ELEMENT or n.tagName != argname_gi:
+ nodes.append(n)
+ desc = doc.createElement("description")
+ for n in nodes:
+ parent.removeChild(n)
+ desc.appendChild(n)
+ if node.childNodes:
+ # keep the <args>...</args>, newline & indent
+ parent.insertBefore(doc.createText("\n "), node)
+ else:
+ # no arguments, remove the <args/> node
+ parent.removeChild(node)
+ parent.appendChild(doc.createText("\n "))
+ parent.appendChild(desc)
+ parent.appendChild(doc.createText("\n"))
+
+def handle_args(doc):
+ rewrite_desc_entries(doc, "args")
+ rewrite_desc_entries(doc, "constructor-args")
+
+
+def handle_comments(doc, node=None):
+ if node is None:
+ node = doc
+ for n in node.childNodes:
+ if n.nodeType == xml.dom.core.ELEMENT:
+ if n.tagName == "COMMENT":
+ comment = doc.createComment(n.childNodes[0].data)
+ node.replaceChild(comment, n)
+ else:
+ handle_comments(doc, n)
+
+
+def handle_labels(doc):
+ labels = doc.getElementsByTagName("label")
+ for label in labels:
+ id = label.getAttribute("id")
+ if not id:
+ continue
+ parent = label.parentNode
+ if parent.tagName == "title":
+ parent.parentNode.setAttribute("id", id)
+ else:
+ parent.setAttribute("id", id)
+ # now, remove <label id="..."/> from parent:
+ parent.removeChild(label)
+
+
+def convert(ifp, ofp):
+ p = xml.dom.esis_builder.EsisBuilder()
+ p.feed(ifp.read())
+ doc = p.document
+ handle_args(doc)
+ handle_comments(doc)
+ simplify(doc)
+ handle_labels(doc)
+ cleanup_root_text(doc)
+ try:
+ ofp.write(doc.toxml())
+ ofp.write("\n")
+ except IOError, (err, msg):
+ # Ignore EPIPE; it just means that whoever we're writing to stopped
+ # reading. The rest of the output would be ignored. All other errors
+ # should still be reported,
+ if err != errno.EPIPE:
+ raise
+
+
+def main():
+ if len(sys.argv) == 1:
+ ifp = sys.stdin
+ ofp = sys.stdout
+ elif len(sys.argv) == 2:
+ ifp = open(sys.argv[1])
+ ofp = sys.stdout
+ elif len(sys.argv) == 3:
+ ifp = open(sys.argv[1])
+ ofp = open(sys.argv[2], "w")
+ else:
+ usage()
+ sys.exit(2)
+ convert(ifp, ofp)
+
+
+if __name__ == "__main__":
+ main()