diff options
author | Fred Drake <fdrake@acm.org> | 1998-11-23 16:59:39 (GMT) |
---|---|---|
committer | Fred Drake <fdrake@acm.org> | 1998-11-23 16:59:39 (GMT) |
commit | 30a68c7a2bb12b8f10d239206033492e4a9e27a6 (patch) | |
tree | aa026df8f95a569033de3dad5e601b4c21802bbe /Doc/tools/sgmlconv | |
parent | 63de8f6d55214da3f7337a80a4ed8de0762fcd56 (diff) | |
download | cpython-30a68c7a2bb12b8f10d239206033492e4a9e27a6.zip cpython-30a68c7a2bb12b8f10d239206033492e4a9e27a6.tar.gz cpython-30a68c7a2bb12b8f10d239206033492e4a9e27a6.tar.bz2 |
Moved into tools/sgmlconv/.
Diffstat (limited to 'Doc/tools/sgmlconv')
-rwxr-xr-x | Doc/tools/sgmlconv/esis2sgml.py | 131 | ||||
-rwxr-xr-x | Doc/tools/sgmlconv/latex2esis.py | 337 |
2 files changed, 468 insertions, 0 deletions
diff --git a/Doc/tools/sgmlconv/esis2sgml.py b/Doc/tools/sgmlconv/esis2sgml.py new file mode 100755 index 0000000..5f8c6e8 --- /dev/null +++ b/Doc/tools/sgmlconv/esis2sgml.py @@ -0,0 +1,131 @@ +#! /usr/bin/env python + +"""Convert ESIS events to SGML or XML markup. + +This is limited, but seems sufficient for the ESIS generated by the +latex2esis.py script when run over the Python documentation. +""" +__version__ = '$Revision$' + +import errno +import re +import string + + +_data_rx = re.compile(r"[^\\][^\\]*") + +def decode(s): + r = '' + while s: + m = _data_rx.match(s) + if m: + r = r + m.group() + s = s[len(m.group()):] + elif s[1] == "\\": + r = r + "\\" + s = s[2:] + elif s[1] == "n": + r = r + "\n" + s = s[2:] + else: + raise ValueError, "can't handle " + `s` + return r + + +def format_attrs(attrs): + attrs = attrs.items() + attrs.sort() + s = '' + for name, value in attrs: + s = '%s %s="%s"' % (s, name, value) + return s + + +def do_convert(ifp, ofp, knownempties, xml=0): + attrs = {} + lastopened = None + knownempty = 0 + lastempty = 0 + while 1: + line = ifp.readline() + if not line: + break + + type = line[0] + data = line[1:] + if data and data[-1] == "\n": + data = data[:-1] + if type == "-": + data = decode(data) + ofp.write(data) + if "\n" in data: + lastopened = None + knownempty = 0 + lastempty = 0 + elif type == "(": + if knownempty and xml: + ofp.write("<%s%s/>" % (data, format_attrs(attrs))) + else: + ofp.write("<%s%s>" % (data, format_attrs(attrs))) + if knownempty and data not in knownempties: + # accumulate knowledge! + knownempties.append(data) + attrs = {} + lastopened = data + lastempty = knownempty + knownempty = 0 + elif type == ")": + if xml: + if not lastempty: + ofp.write("</%s>" % data) + elif data not in knownempties: + if lastopened == data: + ofp.write("</>") + else: + ofp.write("</%s>" % data) + lastopened = None + lastempty = 0 + elif type == "A": + name, type, value = string.split(data, " ", 2) + attrs[name] = decode(value) + elif type == "e": + knownempty = 1 + + +def sgml_convert(ifp, ofp, knownempties=()): + return do_convert(ifp, ofp, list(knownempties), xml=0) + + +def xml_convert(ifp, ofp, knownempties=()): + return do_convert(ifp, ofp, list(knownempties), xml=1) + + +def main(): + import sys + # + convert = sgml_convert + if sys.argv[1:] and sys.argv[1] in ("-x", "--xml"): + convert = xml_convert + del sys.argv[1] + if len(sys.argv) == 1: + ifp = sys.stdin + ofp = sys.stdout + elif len(sys.argv) == 2: + ifp = open(sys.argv[1]) + ofp = sys.stdout + elif len(sys.argv) == 3: + ifp = open(sys.argv[1]) + ofp = open(sys.argv[2], "w") + else: + usage() + sys.exit(2) + # knownempties is ignored in the XML version + try: + convert(ifp, ofp) + except IOError, (err, msg): + if err != errno.EPIPE: + raise + + +if __name__ == "__main__": + main() diff --git a/Doc/tools/sgmlconv/latex2esis.py b/Doc/tools/sgmlconv/latex2esis.py new file mode 100755 index 0000000..afa2d86 --- /dev/null +++ b/Doc/tools/sgmlconv/latex2esis.py @@ -0,0 +1,337 @@ +#! /usr/bin/env python + +"""Generate ESIS events based on a LaTeX source document and configuration +data. + + +""" +__version__ = '$Revision$' + +import errno +import re +import string +import StringIO +import sys + + +class Error(Exception): + pass + +class LaTeXFormatError(Error): + pass + + +_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}") +_end_env_rx = re.compile(r"[\\]end{([^}]*)}") +_begin_macro_rx = re.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)") +_comment_rx = re.compile("%+[ \t]*(.*)\n") +_text_rx = re.compile(r"[^]%\\{}]+") +_optional_rx = re.compile(r"\s*[[]([^]]*)[]]") +_parameter_rx = re.compile("[ \n]*{([^}]*)}") +_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$") +_start_group_rx = re.compile("[ \n]*{") +_start_optional_rx = re.compile("[ \n]*[[]") + + +_charmap = {} +for c in map(chr, range(256)): + _charmap[c] = c +_charmap["\n"] = r"\n" +_charmap["\\"] = r"\\" +del c + +def encode(s): + return string.join(map(_charmap.get, s), '') + + +ESCAPED_CHARS = "$%#^ {}" + + +def subconvert(line, ofp, table, discards, autoclosing, knownempty, + endchar=None): + stack = [] + while line: + if line[0] == endchar and not stack: + return line[1:] + m = _comment_rx.match(line) + if m: + text = m.group(1) + if text: + ofp.write("(COMMENT\n") + ofp.write("- %s \n" % encode(text)) + ofp.write(")COMMENT\n") + ofp.write("-\\n\n") + else: + ofp.write("-\\n\n") + line = line[m.end():] + continue + m = _begin_env_rx.match(line) + if m: + # re-write to use the macro handler + line = r"\%s %s" % (m.group(1), line[m.end():]) + continue + m =_end_env_rx.match(line) + if m: + # end of environment + envname = m.group(1) + if envname == "document": + # special magic + for n in stack[1:]: + if n not in autoclosing: + raise LaTeXFormatError("open element on stack: " + `n`) + # should be more careful, but this is easier to code: + stack = [] + ofp.write(")document\n") + elif envname == stack[-1]: + ofp.write(")%s\n" % envname) + del stack[-1] + else: + raise LaTeXFormatError("environment close doesn't match") + line = line[m.end():] + continue + m = _begin_macro_rx.match(line) + if m: + # start of macro + macroname = m.group(1) + if macroname == "verbatim": + # really magic case! + pos = string.find(line, "\\end{verbatim}") + text = line[m.end(1):pos] + ofp.write("(verbatim\n") + ofp.write("-%s\n" % encode(text)) + ofp.write(")verbatim\n") + line = line[pos + len("\\end{verbatim}"):] + continue + numbered = 1 + if macroname[-1] == "*": + macroname = macroname[:-1] + numbered = 0 + if macroname in autoclosing and macroname in stack: + while stack[-1] != macroname: + if stack[-1] and stack[-1] not in discards: + ofp.write(")%s\n-\\n\n" % stack[-1]) + del stack[-1] + if macroname not in discards: + ofp.write("-\\n\n)%s\n-\\n\n" % macroname) + del stack[-1] + real_ofp = ofp + if macroname in discards: + ofp = StringIO.StringIO() + # + conversion = table.get(macroname, ([], 0, 0)) + params, optional, empty = conversion + empty = empty or knownempty(macroname) + if empty: + ofp.write("e\n") + if not numbered: + ofp.write("Anumbered TOKEN no\n") + # rip off the macroname + if params: + if optional and len(params) == 1: + line = line = line[m.end():] + else: + line = line[m.end(1):] + elif empty: + line = line[m.end(1):] + else: + line = line[m.end():] + # + # Very ugly special case to deal with \item[]. The catch is that + # this needs to occur outside the for loop that handles attribute + # parsing so we can 'continue' the outer loop. + # + if optional and type(params[0]) is type(()): + # the attribute name isn't used in this special case + stack.append(macroname) + ofp.write("(%s\n" % macroname) + m = _start_optional_rx.match(line) + if m: + line = line[m.end():] + line = subconvert(line, ofp, table, discards, + autoclosing, knownempty, endchar="]") + line = "}" + line + continue + # handle attribute mappings here: + for attrname in params: + if optional: + optional = 0 + if type(attrname) is type(""): + m = _optional_rx.match(line) + if m: + line = line[m.end():] + ofp.write("A%s TOKEN %s\n" + % (attrname, encode(m.group(1)))) + elif type(attrname) is type(()): + # This is a sub-element; but don't place the + # element we found on the stack (\section-like) + stack.append(macroname) + ofp.write("(%s\n" % macroname) + macroname = attrname[0] + m = _start_group_rx.match(line) + if m: + line = line[m.end():] + elif type(attrname) is type([]): + # A normal subelement. + attrname = attrname[0] + stack.append(macroname) + stack.append(attrname) + ofp.write("(%s\n" % macroname) + macroname = attrname + else: + m = _parameter_rx.match(line) + if not m: + raise LaTeXFormatError( + "could not extract parameter %s for %s: %s" + % (attrname, macroname, `line[:100]`)) + value = m.group(1) + if _token_rx.match(value): + dtype = "TOKEN" + else: + dtype = "CDATA" + ofp.write("A%s %s %s\n" + % (attrname, dtype, encode(value))) + line = line[m.end():] + stack.append(macroname) + ofp.write("(%s\n" % macroname) + if empty: + line = "}" + line + ofp = real_ofp + continue + if line[0] == "}": + # end of macro + macroname = stack[-1] + conversion = table.get(macroname) + if macroname \ + and macroname not in discards \ + and type(conversion) is not type(""): + # otherwise, it was just a bare group + ofp.write(")%s\n" % stack[-1]) + del stack[-1] + line = line[1:] + continue + if line[0] == "{": + stack.append("") + line = line[1:] + continue + if line[0] == "\\" and line[1] in ESCAPED_CHARS: + ofp.write("-%s\n" % encode(line[1])) + line = line[2:] + continue + if line[:2] == r"\\": + ofp.write("(BREAK\n)BREAK\n") + line = line[2:] + continue + m = _text_rx.match(line) + if m: + text = encode(m.group()) + ofp.write("-%s\n" % text) + line = line[m.end():] + continue + # special case because of \item[] + if line[0] == "]": + ofp.write("-]\n") + line = line[1:] + continue + # avoid infinite loops + extra = "" + if len(line) > 100: + extra = "..." + raise LaTeXFormatError("could not identify markup: %s%s" + % (`line[:100]`, extra)) + + +def convert(ifp, ofp, table={}, discards=(), autoclosing=(), knownempties=()): + d = {} + for gi in knownempties: + d[gi] = gi + try: + subconvert(ifp.read(), ofp, table, discards, autoclosing, d.has_key) + except IOError, (err, msg): + if err != errno.EPIPE: + raise + + +def main(): + if len(sys.argv) == 2: + ifp = open(sys.argv[1]) + ofp = sys.stdout + elif len(sys.argv) == 3: + ifp = open(sys.argv[1]) + ofp = open(sys.argv[2], "w") + else: + usage() + sys.exit(2) + convert(ifp, ofp, { + # entries are name + # -> ([list of attribute names], first_is_optional, empty) + "cfuncdesc": (["type", "name", ("args",)], 0, 0), + "chapter": ([("title",)], 0, 0), + "chapter*": ([("title",)], 0, 0), + "classdesc": (["name", ("constructor-args",)], 0, 0), + "ctypedesc": (["name"], 0, 0), + "cvardesc": (["type", "name"], 0, 0), + "datadesc": (["name"], 0, 0), + "declaremodule": (["id", "type", "name"], 1, 1), + "deprecated": (["release"], 0, 1), + "documentclass": (["classname"], 0, 1), + "excdesc": (["name"], 0, 0), + "funcdesc": (["name", ("args",)], 0, 0), + "funcdescni": (["name", ("args",)], 0, 0), + "indexii": (["ie1", "ie2"], 0, 1), + "indexiii": (["ie1", "ie2", "ie3"], 0, 1), + "indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1), + "input": (["source"], 0, 1), + "item": ([("leader",)], 1, 0), + "label": (["id"], 0, 1), + "manpage": (["name", "section"], 0, 1), + "memberdesc": (["class", "name"], 1, 0), + "methoddesc": (["class", "name", ("args",)], 1, 0), + "methoddescni": (["class", "name", ("args",)], 1, 0), + "opcodedesc": (["name", "var"], 0, 0), + "par": ([], 0, 1), + "paragraph": ([("title",)], 0, 0), + "rfc": (["number"], 0, 1), + "section": ([("title",)], 0, 0), + "seemodule": (["ref", "name"], 1, 0), + "subparagraph": ([("title",)], 0, 0), + "subsection": ([("title",)], 0, 0), + "subsubsection": ([("title",)], 0, 0), + "tableii": (["colspec", "style", "head1", "head2"], 0, 0), + "tableiii": (["colspec", "style", "head1", "head2", "head3"], 0, 0), + "tableiv": (["colspec", "style", "head1", "head2", "head3", "head4"], + 0, 0), + "versionadded": (["version"], 0, 1), + "versionchanged": (["version"], 0, 1), + # + "ABC": ([], 0, 1), + "ASCII": ([], 0, 1), + "C": ([], 0, 1), + "Cpp": ([], 0, 1), + "EOF": ([], 0, 1), + "e": ([], 0, 1), + "ldots": ([], 0, 1), + "NULL": ([], 0, 1), + "POSIX": ([], 0, 1), + "UNIX": ([], 0, 1), + # + # Things that will actually be going away! + # + "fi": ([], 0, 1), + "ifhtml": ([], 0, 1), + "makeindex": ([], 0, 1), + "makemodindex": ([], 0, 1), + "maketitle": ([], 0, 1), + "noindent": ([], 0, 1), + "tableofcontents": ([], 0, 1), + }, + discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle", + "noindent", "tableofcontents"], + autoclosing=["chapter", "section", "subsection", "subsubsection", + "paragraph", "subparagraph", ], + knownempties=["appendix", + "maketitle", "makeindex", "makemodindex", + "localmoduletable"]) + + +if __name__ == "__main__": + main() |