Moved into tools/sgmlconv/.

author: Fred Drake <fdrake@acm.org> 1998-11-23 16:59:39 (GMT)
committer: Fred Drake <fdrake@acm.org> 1998-11-23 16:59:39 (GMT)
commit: 30a68c7a2bb12b8f10d239206033492e4a9e27a6 (patch)
tree: aa026df8f95a569033de3dad5e601b4c21802bbe /Doc
parent: 63de8f6d55214da3f7337a80a4ed8de0762fcd56 (diff)
download: cpython-30a68c7a2bb12b8f10d239206033492e4a9e27a6.zip
cpython-30a68c7a2bb12b8f10d239206033492e4a9e27a6.tar.gz
cpython-30a68c7a2bb12b8f10d239206033492e4a9e27a6.tar.bz2
2 files changed, 468 insertions, 0 deletions
diff --git a/Doc/tools/sgmlconv/esis2sgml.py b/Doc/tools/sgmlconv/esis2sgml.py
new file mode 100755
index 0000000..5f8c6e8
--- /dev/null
+++ b/Doc/tools/sgmlconv/esis2sgml.py
@@ -0,0 +1,131 @@
+#! /usr/bin/env python
+
+"""Convert ESIS events to SGML or XML markup.
+
+This is limited, but seems sufficient for the ESIS generated by the
+latex2esis.py script when run over the Python documentation.
+"""
+__version__ = '$Revision$'
+
+import errno
+import re
+import string
+
+
+_data_rx = re.compile(r"[^\\][^\\]*")
+
+def decode(s):
+    r = ''
+    while s:
+        m = _data_rx.match(s)
+        if m:
+            r = r + m.group()
+            s = s[len(m.group()):]
+        elif s[1] == "\\":
+            r = r + "\\"
+            s = s[2:]
+        elif s[1] == "n":
+            r = r + "\n"
+            s = s[2:]
+        else:
+            raise ValueError, "can't handle " + `s`
+    return r
+
+
+def format_attrs(attrs):
+    attrs = attrs.items()
+    attrs.sort()
+    s = ''
+    for name, value in attrs:
+        s = '%s %s="%s"' % (s, name, value)
+    return s
+
+
+def do_convert(ifp, ofp, knownempties, xml=0):
+    attrs = {}
+    lastopened = None
+    knownempty = 0
+    lastempty = 0
+    while 1:
+        line = ifp.readline()
+        if not line:
+            break
+
+        type = line[0]
+        data = line[1:]
+        if data and data[-1] == "\n":
+            data = data[:-1]
+        if type == "-":
+            data = decode(data)
+            ofp.write(data)
+            if "\n" in data:
+                lastopened = None
+            knownempty = 0
+            lastempty = 0
+        elif type == "(":
+            if knownempty and xml:
+                ofp.write("<%s%s/>" % (data, format_attrs(attrs)))
+            else:
+                ofp.write("<%s%s>" % (data, format_attrs(attrs)))
+            if knownempty and data not in knownempties:
+                # accumulate knowledge!
+                knownempties.append(data)
+            attrs = {}
+            lastopened = data
+            lastempty = knownempty
+            knownempty = 0
+        elif type == ")":
+            if xml:
+                if not lastempty:
+                    ofp.write("</%s>" % data)
+            elif data not in knownempties:
+                if lastopened == data:
+                    ofp.write("</>")
+                else:
+                    ofp.write("</%s>" % data)
+            lastopened = None
+            lastempty = 0
+        elif type == "A":
+            name, type, value = string.split(data, " ", 2)
+            attrs[name] = decode(value)
+        elif type == "e":
+            knownempty = 1
+
+
+def sgml_convert(ifp, ofp, knownempties=()):
+    return do_convert(ifp, ofp, list(knownempties), xml=0)
+
+
+def xml_convert(ifp, ofp, knownempties=()):
+    return do_convert(ifp, ofp, list(knownempties), xml=1)
+
+
+def main():
+    import sys
+    #
+    convert = sgml_convert
+    if sys.argv[1:] and sys.argv[1] in ("-x", "--xml"):
+        convert = xml_convert
+        del sys.argv[1]
+    if len(sys.argv) == 1:
+        ifp = sys.stdin
+        ofp = sys.stdout
+    elif len(sys.argv) == 2:
+        ifp = open(sys.argv[1])
+        ofp = sys.stdout
+    elif len(sys.argv) == 3:
+        ifp = open(sys.argv[1])
+        ofp = open(sys.argv[2], "w")
+    else:
+        usage()
+        sys.exit(2)
+    # knownempties is ignored in the XML version
+    try:
+        convert(ifp, ofp)
+    except IOError, (err, msg):
+        if err != errno.EPIPE:
+            raise
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Doc/tools/sgmlconv/latex2esis.py b/Doc/tools/sgmlconv/latex2esis.py
new file mode 100755
index 0000000..afa2d86
--- /dev/null
+++ b/Doc/tools/sgmlconv/latex2esis.py
@@ -0,0 +1,337 @@
+#! /usr/bin/env python
+
+"""Generate ESIS events based on a LaTeX source document and configuration
+data.
+
+
+"""
+__version__ = '$Revision$'
+
+import errno
+import re
+import string
+import StringIO
+import sys
+
+
+class Error(Exception):
+    pass
+
+class LaTeXFormatError(Error):
+    pass
+
+
+_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
+_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
+_begin_macro_rx = re.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)")
+_comment_rx = re.compile("%+[ \t]*(.*)\n")
+_text_rx = re.compile(r"[^]%\\{}]+")
+_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
+_parameter_rx = re.compile("[ \n]*{([^}]*)}")
+_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
+_start_group_rx = re.compile("[ \n]*{")
+_start_optional_rx = re.compile("[ \n]*[[]")
+
+
+_charmap = {}
+for c in map(chr, range(256)):
+    _charmap[c] = c
+_charmap["\n"] = r"\n"
+_charmap["\\"] = r"\\"
+del c
+
+def encode(s):
+    return string.join(map(_charmap.get, s), '')
+
+
+ESCAPED_CHARS = "$%#^ {}"
+
+
+def subconvert(line, ofp, table, discards, autoclosing, knownempty,
+               endchar=None):
+    stack = []
+    while line:
+        if line[0] == endchar and not stack:
+            return line[1:]
+        m = _comment_rx.match(line)
+        if m:
+            text = m.group(1)
+            if text:
+                ofp.write("(COMMENT\n")
+                ofp.write("- %s \n" % encode(text))
+                ofp.write(")COMMENT\n")
+                ofp.write("-\\n\n")
+            else:
+                ofp.write("-\\n\n")
+            line = line[m.end():]
+            continue
+        m = _begin_env_rx.match(line)
+        if m:
+            # re-write to use the macro handler
+            line = r"\%s %s" % (m.group(1), line[m.end():])
+            continue
+        m =_end_env_rx.match(line)
+        if m:
+            # end of environment
+            envname = m.group(1)
+            if envname == "document":
+                # special magic
+                for n in stack[1:]:
+                    if n not in autoclosing:
+                        raise LaTeXFormatError("open element on stack: " + `n`)
+                # should be more careful, but this is easier to code:
+                stack = []
+                ofp.write(")document\n")
+            elif envname == stack[-1]:
+                ofp.write(")%s\n" % envname)
+                del stack[-1]
+            else:
+                raise LaTeXFormatError("environment close doesn't match")
+            line = line[m.end():]
+            continue
+        m = _begin_macro_rx.match(line)
+        if m:
+            # start of macro
+            macroname = m.group(1)
+            if macroname == "verbatim":
+                # really magic case!
+                pos = string.find(line, "\\end{verbatim}")
+                text = line[m.end(1):pos]
+                ofp.write("(verbatim\n")
+                ofp.write("-%s\n" % encode(text))
+                ofp.write(")verbatim\n")
+                line = line[pos + len("\\end{verbatim}"):]
+                continue
+            numbered = 1
+            if macroname[-1] == "*":
+                macroname = macroname[:-1]
+                numbered = 0
+            if macroname in autoclosing and macroname in stack:
+                while stack[-1] != macroname:
+                    if stack[-1] and stack[-1] not in discards:
+                        ofp.write(")%s\n-\\n\n" % stack[-1])
+                    del stack[-1]
+                if macroname not in discards:
+                    ofp.write("-\\n\n)%s\n-\\n\n" % macroname)
+                del stack[-1]
+            real_ofp = ofp
+            if macroname in discards:
+                ofp = StringIO.StringIO()
+            #
+            conversion = table.get(macroname, ([], 0, 0))
+            params, optional, empty = conversion
+            empty = empty or knownempty(macroname)
+            if empty:
+                ofp.write("e\n")
+            if not numbered:
+                ofp.write("Anumbered TOKEN no\n")
+            # rip off the macroname
+            if params:
+                if optional and len(params) == 1:
+                    line = line = line[m.end():]
+                else:
+                    line = line[m.end(1):]
+            elif empty:
+                line = line[m.end(1):]
+            else:
+                line = line[m.end():]
+            #
+            # Very ugly special case to deal with \item[].  The catch is that
+            # this needs to occur outside the for loop that handles attribute
+            # parsing so we can 'continue' the outer loop.
+            #
+            if optional and type(params[0]) is type(()):
+                # the attribute name isn't used in this special case
+                stack.append(macroname)
+                ofp.write("(%s\n" % macroname)
+                m = _start_optional_rx.match(line)
+                if m:
+                    line = line[m.end():]
+                    line = subconvert(line, ofp, table, discards,
+                                      autoclosing, knownempty, endchar="]")
+                line = "}" + line
+                continue
+            # handle attribute mappings here:
+            for attrname in params:
+                if optional:
+                    optional = 0
+                    if type(attrname) is type(""):
+                        m = _optional_rx.match(line)
+                        if m:
+                            line = line[m.end():]
+                            ofp.write("A%s TOKEN %s\n"
+                                      % (attrname, encode(m.group(1))))
+                elif type(attrname) is type(()):
+                    # This is a sub-element; but don't place the
+                    # element we found on the stack (\section-like)
+                    stack.append(macroname)
+                    ofp.write("(%s\n" % macroname)
+                    macroname = attrname[0]
+                    m = _start_group_rx.match(line)
+                    if m:
+                        line = line[m.end():]
+                elif type(attrname) is type([]):
+                    # A normal subelement.
+                    attrname = attrname[0]
+                    stack.append(macroname)
+                    stack.append(attrname)
+                    ofp.write("(%s\n" % macroname)
+                    macroname = attrname
+                else:
+                    m = _parameter_rx.match(line)
+                    if not m:
+                        raise LaTeXFormatError(
+                            "could not extract parameter %s for %s: %s"
+                            % (attrname, macroname, `line[:100]`))
+                    value = m.group(1)
+                    if _token_rx.match(value):
+                        dtype = "TOKEN"
+                    else:
+                        dtype = "CDATA"
+                    ofp.write("A%s %s %s\n"
+                              % (attrname, dtype, encode(value)))
+                    line = line[m.end():]
+            stack.append(macroname)
+            ofp.write("(%s\n" % macroname)
+            if empty:
+                line = "}" + line
+            ofp = real_ofp
+            continue
+        if line[0] == "}":
+            # end of macro
+            macroname = stack[-1]
+            conversion = table.get(macroname)
+            if macroname \
+               and macroname not in discards \
+               and type(conversion) is not type(""):
+                # otherwise, it was just a bare group
+                ofp.write(")%s\n" % stack[-1])
+            del stack[-1]
+            line = line[1:]
+            continue
+        if line[0] == "{":
+            stack.append("")
+            line = line[1:]
+            continue
+        if line[0] == "\\" and line[1] in ESCAPED_CHARS:
+            ofp.write("-%s\n" % encode(line[1]))
+            line = line[2:]
+            continue
+        if line[:2] == r"\\":
+            ofp.write("(BREAK\n)BREAK\n")
+            line = line[2:]
+            continue
+        m = _text_rx.match(line)
+        if m:
+            text = encode(m.group())
+            ofp.write("-%s\n" % text)
+            line = line[m.end():]
+            continue
+        # special case because of \item[]
+        if line[0] == "]":
+            ofp.write("-]\n")
+            line = line[1:]
+            continue
+        # avoid infinite loops
+        extra = ""
+        if len(line) > 100:
+            extra = "..."
+        raise LaTeXFormatError("could not identify markup: %s%s"
+                               % (`line[:100]`, extra))
+
+
+def convert(ifp, ofp, table={}, discards=(), autoclosing=(), knownempties=()):
+    d = {}
+    for gi in knownempties:
+        d[gi] = gi
+    try:
+        subconvert(ifp.read(), ofp, table, discards, autoclosing, d.has_key)
+    except IOError, (err, msg):
+        if err != errno.EPIPE:
+            raise
+
+
+def main():
+    if len(sys.argv) == 2:
+        ifp = open(sys.argv[1])
+        ofp = sys.stdout
+    elif len(sys.argv) == 3:
+        ifp = open(sys.argv[1])
+        ofp = open(sys.argv[2], "w")
+    else:
+        usage()
+        sys.exit(2)
+    convert(ifp, ofp, {
+        # entries are name
+        #          -> ([list of attribute names], first_is_optional, empty)
+        "cfuncdesc": (["type", "name", ("args",)], 0, 0),
+        "chapter": ([("title",)], 0, 0),
+        "chapter*": ([("title",)], 0, 0),
+        "classdesc": (["name", ("constructor-args",)], 0, 0),
+        "ctypedesc": (["name"], 0, 0),
+        "cvardesc":  (["type", "name"], 0, 0),
+        "datadesc":  (["name"], 0, 0),
+        "declaremodule": (["id", "type", "name"], 1, 1),
+        "deprecated": (["release"], 0, 1),
+        "documentclass": (["classname"], 0, 1),
+        "excdesc": (["name"], 0, 0),
+        "funcdesc": (["name", ("args",)], 0, 0),
+        "funcdescni": (["name", ("args",)], 0, 0),
+        "indexii": (["ie1", "ie2"], 0, 1),
+        "indexiii": (["ie1", "ie2", "ie3"], 0, 1),
+        "indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1),
+        "input": (["source"], 0, 1),
+        "item": ([("leader",)], 1, 0),
+        "label": (["id"], 0, 1),
+        "manpage": (["name", "section"], 0, 1),
+        "memberdesc": (["class", "name"], 1, 0),
+        "methoddesc": (["class", "name", ("args",)], 1, 0),
+        "methoddescni": (["class", "name", ("args",)], 1, 0),
+        "opcodedesc": (["name", "var"], 0, 0),
+        "par": ([], 0, 1),
+        "paragraph": ([("title",)], 0, 0),
+        "rfc": (["number"], 0, 1),
+        "section": ([("title",)], 0, 0),
+        "seemodule": (["ref", "name"], 1, 0),
+        "subparagraph": ([("title",)], 0, 0),
+        "subsection": ([("title",)], 0, 0),
+        "subsubsection": ([("title",)], 0, 0),
+        "tableii": (["colspec", "style", "head1", "head2"], 0, 0),
+        "tableiii": (["colspec", "style", "head1", "head2", "head3"], 0, 0),
+        "tableiv": (["colspec", "style", "head1", "head2", "head3", "head4"],
+                    0, 0),
+        "versionadded": (["version"], 0, 1),
+        "versionchanged": (["version"], 0, 1),
+        #
+        "ABC": ([], 0, 1),
+        "ASCII": ([], 0, 1),
+        "C": ([], 0, 1),
+        "Cpp": ([], 0, 1),
+        "EOF": ([], 0, 1),
+        "e": ([], 0, 1),
+        "ldots": ([], 0, 1),
+        "NULL": ([], 0, 1),
+        "POSIX": ([], 0, 1),
+        "UNIX": ([], 0, 1),
+        #
+        # Things that will actually be going away!
+        #
+        "fi": ([], 0, 1),
+        "ifhtml": ([], 0, 1),
+        "makeindex": ([], 0, 1),
+        "makemodindex": ([], 0, 1),
+        "maketitle": ([], 0, 1),
+        "noindent": ([], 0, 1),
+        "tableofcontents": ([], 0, 1),
+        },
+            discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
+                      "noindent", "tableofcontents"],
+            autoclosing=["chapter", "section", "subsection", "subsubsection",
+                         "paragraph", "subparagraph", ],
+            knownempties=["appendix",
+                          "maketitle", "makeindex", "makemodindex",
+                          "localmoduletable"])
+
+
+if __name__ == "__main__":
+    main()
author	Fred Drake <fdrake@acm.org>	1998-11-23 16:59:39 (GMT)
committer	Fred Drake <fdrake@acm.org>	1998-11-23 16:59:39 (GMT)
commit	30a68c7a2bb12b8f10d239206033492e4a9e27a6 (patch)
tree	aa026df8f95a569033de3dad5e601b4c21802bbe /Doc
parent	63de8f6d55214da3f7337a80a4ed8de0762fcd56 (diff)
download	cpython-30a68c7a2bb12b8f10d239206033492e4a9e27a6.zip cpython-30a68c7a2bb12b8f10d239206033492e4a9e27a6.tar.gz cpython-30a68c7a2bb12b8f10d239206033492e4a9e27a6.tar.bz2