Lots of adjustments to deal with the document content now being stored

in a fragment rather than the main document object.
author: Fred Drake <fdrake@acm.org> 1999-05-10 19:36:52 (GMT)
committer: Fred Drake <fdrake@acm.org> 1999-05-10 19:36:52 (GMT)
commit: e779d4f03bc475582ee585d9ada170462508b74b (patch)
tree: ff2df8f61e4a0f5d18c01aeeed44d645bd117a99 /Doc/tools
parent: 54fb7fb9d0ff1c96849572ff809cc9323e87bfa4 (diff)
download: cpython-e779d4f03bc475582ee585d9ada170462508b74b.zip
cpython-e779d4f03bc475582ee585d9ada170462508b74b.tar.gz
cpython-e779d4f03bc475582ee585d9ada170462508b74b.tar.bz2
1 files changed, 94 insertions, 91 deletions
diff --git a/Doc/tools/sgmlconv/docfixer.py b/Doc/tools/sgmlconv/docfixer.py
index 11c487d..f700134 100755
--- a/Doc/tools/sgmlconv/docfixer.py
+++ b/Doc/tools/sgmlconv/docfixer.py
@@ -12,7 +12,10 @@ import re
 import string
 import sys
 import xml.dom.core
-import xml.dom.esis_builder
+
+from xml.dom.core import \
+     ELEMENT, \
+     TEXT
 
 
 class ConversionError(Exception):
@@ -32,11 +35,11 @@ else:
 # Workaround to deal with invalid documents (multiple root elements).  This
 # does not indicate a bug in the DOM implementation.
 #
-def get_documentElement(self):
+def get_documentElement(doc):
     docelem = None
-    for n in self._node.children:
-        if n.type == xml.dom.core.ELEMENT:
-            docelem = xml.dom.core.Element(n, self, self)
+    for n in doc.childNodes:
+        if n.nodeType == ELEMENT:
+            docelem = n
     return docelem
 
 xml.dom.core.Document.get_documentElement = get_documentElement
@@ -46,15 +49,15 @@ xml.dom.core.Document.get_documentElement = get_documentElement
 # accessed from the Document object via .childNodes (no matter how many
 # levels of access are used) will be given an ownerDocument of None.
 #
-def get_childNodes(self):
-    return xml.dom.core.NodeList(self._node.children, self, self)
+def get_childNodes(doc):
+    return xml.dom.core.NodeList(doc._node.children, doc._node)
 
 xml.dom.core.Document.get_childNodes = get_childNodes
 
 
 def get_first_element(doc, gi):
     for n in doc.childNodes:
-        if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
+        if n.nodeType == ELEMENT and n.tagName == gi:
             return n
 
 def extract_first_element(doc, gi):
@@ -66,10 +69,10 @@ def extract_first_element(doc, gi):
 
 def find_all_elements(doc, gi):
     nodes = []
-    if doc.nodeType == xml.dom.core.ELEMENT and doc.tagName == gi:
+    if doc.nodeType == ELEMENT and doc.tagName == gi:
         nodes.append(doc)
     for child in doc.childNodes:
-        if child.nodeType == xml.dom.core.ELEMENT:
+        if child.nodeType == ELEMENT:
             if child.tagName == gi:
                 nodes.append(child)
             for node in child.getElementsByTagName(gi):
@@ -77,36 +80,36 @@ def find_all_elements(doc, gi):
     return nodes        
 
 
-def simplify(doc):
+def simplify(doc, fragment):
     # Try to rationalize the document a bit, since these things are simply
     # not valid SGML/XML documents as they stand, and need a little work.
     documentclass = "document"
     inputs = []
-    node = extract_first_element(doc, "documentclass")
+    node = extract_first_element(fragment, "documentclass")
     if node is not None:
         documentclass = node.getAttribute("classname")
-    node = extract_first_element(doc, "title")
+    node = extract_first_element(fragment, "title")
     if node is not None:
         inputs.append(node)
     # update the name of the root element
-    node = get_first_element(doc, "document")
+    node = get_first_element(fragment, "document")
     if node is not None:
         node._node.name = documentclass
     while 1:
-        node = extract_first_element(doc, "input")
+        node = extract_first_element(fragment, "input")
         if node is None:
             break
         inputs.append(node)
     if inputs:
-        docelem = doc.documentElement
+        docelem = get_documentElement(fragment)
         inputs.reverse()
         for node in inputs:
             text = doc.createTextNode("\n")
             docelem.insertBefore(text, docelem.firstChild)
             docelem.insertBefore(node, text)
         docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
-    while doc.firstChild.nodeType == xml.dom.core.TEXT:
-        doc.removeChild(doc.firstChild)
+    while fragment.firstChild.nodeType == TEXT:
+        fragment.removeChild(fragment.firstChild)
 
 
 def cleanup_root_text(doc):
@@ -115,9 +118,9 @@ def cleanup_root_text(doc):
     for n in doc.childNodes:
         prevskip = skip
         skip = 0
-        if n.nodeType == xml.dom.core.TEXT and not prevskip:
+        if n.nodeType == TEXT and not prevskip:
             discards.append(n)
-        elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT":
+        elif n.nodeType == ELEMENT and n.tagName == "COMMENT":
             skip = 1
     for node in discards:
         doc.removeChild(node)
@@ -130,8 +133,8 @@ DESCRIPTOR_ELEMENTS = (
     "datadesc", "datadescni",
     )
 
-def fixup_descriptors(doc):
-    sections = find_all_elements(doc, "section")
+def fixup_descriptors(doc, fragment):
+    sections = find_all_elements(fragment, "section")
     for section in sections:
         find_and_fix_descriptors(doc, section)
 
@@ -139,7 +142,7 @@ def fixup_descriptors(doc):
 def find_and_fix_descriptors(doc, container):
     children = container.childNodes
     for child in children:
-        if child.nodeType == xml.dom.core.ELEMENT:
+        if child.nodeType == ELEMENT:
             tagName = child.tagName
             if tagName in DESCRIPTOR_ELEMENTS:
                 rewrite_descriptor(doc, child)
@@ -191,7 +194,7 @@ def rewrite_descriptor(doc, descriptor):
     pos = skip_leading_nodes(children, 0)
     if pos < len(children):
         child = children[pos]
-        if child.nodeType == xml.dom.core.ELEMENT and child.tagName == "args":
+        if child.nodeType == ELEMENT and child.tagName == "args":
             # create an <args> in <signature>:
             args = doc.createElement("args")
             argchildren = []
@@ -205,7 +208,7 @@ def rewrite_descriptor(doc, descriptor):
     # 3, 4.
     pos = skip_leading_nodes(children, pos + 1)
     while pos < len(children) \
-          and children[pos].nodeType == xml.dom.core.ELEMENT \
+          and children[pos].nodeType == ELEMENT \
           and children[pos].tagName in (linename, "versionadded"):
         if children[pos].tagName == linename:
             # this is really a supplemental signature, create <signature>
@@ -222,7 +225,7 @@ def rewrite_descriptor(doc, descriptor):
     newchildren.append(description)
     move_children(descriptor, description, pos)
     last = description.childNodes[-1]
-    if last.nodeType == xml.dom.core.TEXT:
+    if last.nodeType == TEXT:
         last.data = string.rstrip(last.data) + "\n  "
     # 6.
     # should have nothing but whitespace and signature lines in <descriptor>;
@@ -259,16 +262,16 @@ def move_children(origin, dest, start=0):
         dest.appendChild(node)
 
 
-def handle_appendix(doc):
+def handle_appendix(doc, fragment):
     # must be called after simplfy() if document is multi-rooted to begin with
-    docelem = doc.documentElement
+    docelem = get_documentElement(fragment)
     toplevel = docelem.tagName == "manual" and "chapter" or "section"
     appendices = 0
     nodes = []
     for node in docelem.childNodes:
         if appendices:
             nodes.append(node)
-        elif node.nodeType == xml.dom.core.ELEMENT:
+        elif node.nodeType == ELEMENT:
             appnodes = node.getElementsByTagName("appendix")
             if appnodes:
                 appendices = 1
@@ -281,7 +284,7 @@ def handle_appendix(doc):
         back = doc.createElement("back-matter")
         docelem.appendChild(back)
         back.appendChild(doc.createTextNode("\n"))
-        while nodes and nodes[0].nodeType == xml.dom.core.TEXT \
+        while nodes and nodes[0].nodeType == TEXT \
               and not string.strip(nodes[0].data):
             del nodes[0]
         map(back.appendChild, nodes)
@@ -307,28 +310,28 @@ def fixup_trailing_whitespace(doc, wsmap):
     while queue:
         node = queue[0]
         del queue[0]
-        if node.nodeType == xml.dom.core.ELEMENT \
+        if node.nodeType == ELEMENT \
            and wsmap.has_key(node.tagName):
             ws = wsmap[node.tagName]
             children = node.childNodes
             children.reverse()
-            if children[0].nodeType == xml.dom.core.TEXT:
+            if children[0].nodeType == TEXT:
                 data = string.rstrip(children[0].data) + ws
                 children[0].data = data
             children.reverse()
             # hack to get the title in place:
             if node.tagName == "title" \
-               and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT:
+               and node.parentNode.firstChild.nodeType == ELEMENT:
                 node.parentNode.insertBefore(doc.createText("\n  "),
                                              node.parentNode.firstChild)
         for child in node.childNodes:
-            if child.nodeType == xml.dom.core.ELEMENT:
+            if child.nodeType == ELEMENT:
                 queue.append(child)
 
 
 def normalize(doc):
     for node in doc.childNodes:
-        if node.nodeType == xml.dom.core.ELEMENT:
+        if node.nodeType == ELEMENT:
             node.normalize()
 
 
@@ -339,7 +342,7 @@ def cleanup_trailing_parens(doc, element_names):
     rewrite_element = d.has_key
     queue = []
     for node in doc.childNodes:
-        if node.nodeType == xml.dom.core.ELEMENT:
+        if node.nodeType == ELEMENT:
             queue.append(node)
     while queue:
         node = queue[0]
@@ -347,13 +350,13 @@ def cleanup_trailing_parens(doc, element_names):
         if rewrite_element(node.tagName):
             children = node.childNodes
             if len(children) == 1 \
-               and children[0].nodeType == xml.dom.core.TEXT:
+               and children[0].nodeType == TEXT:
                 data = children[0].data
                 if data[-2:] == "()":
                     children[0].data = data[:-2]
         else:
             for child in node.childNodes:
-                if child.nodeType == xml.dom.core.ELEMENT:
+                if child.nodeType == ELEMENT:
                     queue.append(child)
 
 
@@ -366,13 +369,13 @@ def contents_match(left, right):
         nodeType = l.nodeType
         if nodeType != r.nodeType:
             return 0
-        if nodeType == xml.dom.core.ELEMENT:
+        if nodeType == ELEMENT:
             if l.tagName != r.tagName:
                 return 0
             # should check attributes, but that's not a problem here
             if not contents_match(l, r):
                 return 0
-        elif nodeType == xml.dom.core.TEXT:
+        elif nodeType == TEXT:
             if l.data != r.data:
                 return 0
         else:
@@ -388,7 +391,7 @@ def create_module_info(doc, section):
         return
     node._node.name = "synopsis"
     lastchild = node.childNodes[-1]
-    if lastchild.nodeType == xml.dom.core.TEXT \
+    if lastchild.nodeType == TEXT \
        and lastchild.data[-1:] == ".":
         lastchild.data = lastchild.data[:-1]
     modauthor = extract_first_element(section, "moduleauthor")
@@ -423,7 +426,7 @@ def create_module_info(doc, section):
         if title:
             children = title.childNodes
             if len(children) >= 2 \
-               and children[0].nodeType == xml.dom.core.ELEMENT \
+               and children[0].nodeType == ELEMENT \
                and children[0].tagName == "module" \
                and children[0].childNodes[0].data == name:
                 # this is it; morph the <title> into <short-synopsis>
@@ -431,7 +434,7 @@ def create_module_info(doc, section):
                 if first_data.data[:4] == " ---":
                     first_data.data = string.lstrip(first_data.data[4:])
                 title._node.name = "short-synopsis"
-                if children[-1].nodeType == xml.dom.core.TEXT \
+                if children[-1].nodeType == TEXT \
                    and children[-1].data[-1:] == ".":
                     children[-1].data = children[-1].data[:-1]
                 section.removeChild(title)
@@ -470,10 +473,10 @@ def create_module_info(doc, section):
         children = section.childNodes
         for i in range(len(children)):
             node = children[i]
-            if node.nodeType == xml.dom.core.ELEMENT \
+            if node.nodeType == ELEMENT \
                and node.tagName == "moduleinfo":
                 nextnode = children[i+1]
-                if nextnode.nodeType == xml.dom.core.TEXT:
+                if nextnode.nodeType == TEXT:
                     data = nextnode.data
                     if len(string.lstrip(data)) < (len(data) - 4):
                         nextnode.data = "\n\n\n" + string.lstrip(data)
@@ -487,7 +490,7 @@ def cleanup_synopses(doc):
 def remap_element_names(root, name_map):
     queue = []
     for child in root.childNodes:
-        if child.nodeType == xml.dom.core.ELEMENT:
+        if child.nodeType == ELEMENT:
             queue.append(child)
     while queue:
         node = queue.pop()
@@ -498,13 +501,13 @@ def remap_element_names(root, name_map):
             for attr, value in attrs.items():
                 node.setAttribute(attr, value)
         for child in node.childNodes:
-            if child.nodeType == xml.dom.core.ELEMENT:
+            if child.nodeType == ELEMENT:
                 queue.append(child)
 
 
-def fixup_table_structures(doc):
+def fixup_table_structures(doc, fragment):
     # must be done after remap_element_names(), or the tables won't be found
-    for table in find_all_elements(doc, "table"):
+    for table in find_all_elements(fragment, "table"):
         fixup_table(doc, table)
 
 
@@ -522,7 +525,7 @@ def fixup_table(doc, table):
     last_was_hline = 0
     children = table.childNodes
     for child in children:
-        if child.nodeType == xml.dom.core.ELEMENT:
+        if child.nodeType == ELEMENT:
             tagName = child.tagName
             if tagName == "hline" and prev_row is not None:
                 prev_row.setAttribute("rowsep", "1")
@@ -535,12 +538,12 @@ def fixup_table(doc, table):
     while children:
         child = children[0]
         nodeType = child.nodeType
-        if nodeType == xml.dom.core.TEXT:
+        if nodeType == TEXT:
             if string.strip(child.data):
                 raise ConversionError("unexpected free data in table")
             table.removeChild(child)
             continue
-        if nodeType == xml.dom.core.ELEMENT:
+        if nodeType == ELEMENT:
             if child.tagName != "hline":
                 raise ConversionError(
                     "unexpected <%s> in table" % child.tagName)
@@ -572,7 +575,7 @@ def fixup_row(doc, row):
 def move_elements_by_name(doc, source, dest, name, sep=None):
     nodes = []
     for child in source.childNodes:
-        if child.nodeType == xml.dom.core.ELEMENT and child.tagName == name:
+        if child.nodeType == ELEMENT and child.tagName == name:
             nodes.append(child)
     for node in nodes:
         source.removeChild(node)
@@ -606,13 +609,13 @@ PARA_LEVEL_PRECEEDERS = (
     )
 
 
-def fixup_paras(doc):
-    for child in doc.childNodes:
-        if child.nodeType == xml.dom.core.ELEMENT \
+def fixup_paras(doc, fragment):
+    for child in fragment.childNodes:
+        if child.nodeType == ELEMENT \
            and child.tagName in RECURSE_INTO_PARA_CONTAINERS:
             #
             fixup_paras_helper(doc, child)
-    descriptions = find_all_elements(doc, "description")
+    descriptions = find_all_elements(fragment, "description")
     for description in descriptions:
         fixup_paras_helper(doc, description)
 
@@ -628,7 +631,7 @@ def fixup_paras_helper(doc, container, depth=0):
         #
         # Either paragraph material or something to recurse into:
         #
-        if (children[start].nodeType == xml.dom.core.ELEMENT) \
+        if (children[start].nodeType == ELEMENT) \
            and (children[start].tagName in RECURSE_INTO_PARA_CONTAINERS):
             fixup_paras_helper(doc, children[start])
             start = skip_leading_nodes(children, start + 1)
@@ -653,11 +656,11 @@ def build_para(doc, parent, start, i):
         after = j + 1
         child = children[j]
         nodeType = child.nodeType
-        if nodeType == xml.dom.core.ELEMENT:
+        if nodeType == ELEMENT:
             if child.tagName in BREAK_ELEMENTS:
                 after = j
                 break
-        elif nodeType == xml.dom.core.TEXT:
+        elif nodeType == TEXT:
             pos = string.find(child.data, "\n\n")
             if pos == 0:
                 after = j
@@ -670,7 +673,7 @@ def build_para(doc, parent, start, i):
     if (start + 1) > after:
         raise ConversionError(
             "build_para() could not identify content to turn into a paragraph")
-    if children[after - 1].nodeType == xml.dom.core.TEXT:
+    if children[after - 1].nodeType == TEXT:
         # we may need to split off trailing white space:
         child = children[after - 1]
         data = child.data
@@ -707,7 +710,7 @@ def skip_leading_nodes(children, start):
         # skip over leading comments and whitespace:
         child = children[start]
         nodeType = child.nodeType
-        if nodeType == xml.dom.core.TEXT:
+        if nodeType == TEXT:
             data = child.data
             shortened = string.lstrip(data)
             if shortened:
@@ -717,7 +720,7 @@ def skip_leading_nodes(children, start):
                     return start + 1
                 return start
             # all whitespace, just skip
-        elif nodeType == xml.dom.core.ELEMENT:
+        elif nodeType == ELEMENT:
             tagName = child.tagName
             if tagName in RECURSE_INTO_PARA_CONTAINERS:
                 return start
@@ -727,15 +730,15 @@ def skip_leading_nodes(children, start):
     return start
 
 
-def fixup_rfc_references(doc):
-    for rfcnode in find_all_elements(doc, "rfc"):
+def fixup_rfc_references(doc, fragment):
+    for rfcnode in find_all_elements(fragment, "rfc"):
         rfcnode.appendChild(doc.createTextNode(
             "RFC " + rfcnode.getAttribute("num")))
 
 
-def fixup_signatures(doc):
-    for child in doc.childNodes:
-        if child.nodeType == xml.dom.core.ELEMENT:
+def fixup_signatures(doc, fragment):
+    for child in fragment.childNodes:
+        if child.nodeType == ELEMENT:
             args = child.getElementsByTagName("args")
             for arg in args:
                 fixup_args(doc, arg)
@@ -748,7 +751,7 @@ def fixup_signatures(doc):
 
 def fixup_args(doc, arglist):
     for child in arglist.childNodes:
-        if child.nodeType == xml.dom.core.ELEMENT \
+        if child.nodeType == ELEMENT \
            and child.tagName == "optional":
             # found it; fix and return
             arglist.insertBefore(doc.createTextNode("["), child)
@@ -762,8 +765,8 @@ def fixup_args(doc, arglist):
             return fixup_args(doc, arglist)
 
 
-def fixup_sectionauthors(doc):
-    for sectauth in find_all_elements(doc, "sectionauthor"):
+def fixup_sectionauthors(doc, fragment):
+    for sectauth in find_all_elements(fragment, "sectionauthor"):
         section = sectauth.parentNode
         section.removeChild(sectauth)
         sectauth._node.name = "author"
@@ -772,7 +775,7 @@ def fixup_sectionauthors(doc):
         sectauth.removeAttribute("name")
         after = section.childNodes[2]
         title = section.childNodes[1]
-        if title.nodeType == xml.dom.core.ELEMENT and title.tagName != "title":
+        if title.nodeType == ELEMENT and title.tagName != "title":
             after = section.childNodes[0]
         section.insertBefore(doc.createTextNode("\n  "), after)
         section.insertBefore(sectauth, after)
@@ -781,10 +784,9 @@ def fixup_sectionauthors(doc):
 def fixup_verbatims(doc):
     for verbatim in find_all_elements(doc, "verbatim"):
         child = verbatim.childNodes[0]
-        if child.nodeType == xml.dom.core.TEXT \
+        if child.nodeType == TEXT \
            and string.lstrip(child.data)[:3] == ">>>":
-            verbatim._node.name = "interpreter-session"
-            #verbatim.setAttribute("interactive", "interactive")
+            verbatim._node.name = "interactive-session"
 
 
 _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
@@ -792,7 +794,7 @@ _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
 def write_esis(doc, ofp, knownempty):
     for node in doc.childNodes:
         nodeType = node.nodeType
-        if nodeType == xml.dom.core.ELEMENT:
+        if nodeType == ELEMENT:
             gi = node.tagName
             if knownempty(gi):
                 if node.hasChildNodes():
@@ -808,7 +810,7 @@ def write_esis(doc, ofp, knownempty):
             ofp.write("(%s\n" % gi)
             write_esis(node, ofp, knownempty)
             ofp.write(")%s\n" % gi)
-        elif nodeType == xml.dom.core.TEXT:
+        elif nodeType == TEXT:
             ofp.write("-%s\n" % esistools.encode(node.data))
         else:
             raise RuntimeError, "unsupported node type: %s" % nodeType
@@ -818,10 +820,11 @@ def convert(ifp, ofp):
     p = esistools.ExtendedEsisBuilder()
     p.feed(ifp.read())
     doc = p.document
-    normalize(doc)
-    simplify(doc)
-    handle_labels(doc)
-    handle_appendix(doc)
+    fragment = p.fragment
+    normalize(fragment)
+    simplify(doc, fragment)
+    handle_labels(fragment)
+    handle_appendix(doc, fragment)
     fixup_trailing_whitespace(doc, {
         "abstract": "\n",
         "title": "",
@@ -835,12 +838,12 @@ def convert(ifp, ofp):
     cleanup_root_text(doc)
     cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
     cleanup_synopses(doc)
-    fixup_descriptors(doc)
-    fixup_verbatims(doc)
-    normalize(doc)
-    fixup_paras(doc)
-    fixup_sectionauthors(doc)
-    remap_element_names(doc, {
+    fixup_descriptors(doc, fragment)
+    fixup_verbatims(fragment)
+    normalize(fragment)
+    fixup_paras(doc, fragment)
+    fixup_sectionauthors(doc, fragment)
+    remap_element_names(fragment, {
         "tableii": ("table", {"cols": "2"}),
         "tableiii": ("table", {"cols": "3"}),
         "tableiv": ("table", {"cols": "4"}),
@@ -849,9 +852,9 @@ def convert(ifp, ofp):
         "lineiv": ("row", {}),
         "refmodule": ("module", {"link": "link"}),
         })
-    fixup_table_structures(doc)
-    fixup_rfc_references(doc)
-    fixup_signatures(doc)
+    fixup_table_structures(doc, fragment)
+    fixup_rfc_references(doc, fragment)
+    fixup_signatures(doc, fragment)
     #
     d = {}
     for gi in p.get_empties():
@@ -861,7 +864,7 @@ def convert(ifp, ofp):
     knownempty = d.has_key
     #
     try:
-        write_esis(doc, ofp, knownempty)
+        write_esis(fragment, ofp, knownempty)
     except IOError, (err, msg):
         # Ignore EPIPE; it just means that whoever we're writing to stopped
         # reading.  The rest of the output would be ignored.  All other errors
author	Fred Drake <fdrake@acm.org>	1999-05-10 19:36:52 (GMT)
committer	Fred Drake <fdrake@acm.org>	1999-05-10 19:36:52 (GMT)
commit	e779d4f03bc475582ee585d9ada170462508b74b (patch)
tree	ff2df8f61e4a0f5d18c01aeeed44d645bd117a99 /Doc/tools
parent	54fb7fb9d0ff1c96849572ff809cc9323e87bfa4 (diff)
download	cpython-e779d4f03bc475582ee585d9ada170462508b74b.zip cpython-e779d4f03bc475582ee585d9ada170462508b74b.tar.gz cpython-e779d4f03bc475582ee585d9ada170462508b74b.tar.bz2