diff options
author | Fred Drake <fdrake@acm.org> | 1999-07-29 22:22:13 (GMT) |
---|---|---|
committer | Fred Drake <fdrake@acm.org> | 1999-07-29 22:22:13 (GMT) |
commit | 96e4a06fa6de789770f154fa651adcf057c57fcf (patch) | |
tree | 3eb1367e721c687c3c5e5d18faf2805d154d2857 /Doc/tools/sgmlconv | |
parent | 2394c98c05d5d6ca61f253be0ce6b918b3fa0a4a (diff) | |
download | cpython-96e4a06fa6de789770f154fa651adcf057c57fcf.zip cpython-96e4a06fa6de789770f154fa651adcf057c57fcf.tar.gz cpython-96e4a06fa6de789770f154fa651adcf057c57fcf.tar.bz2 |
Massive changes.
Separate the Conversion class into a base and a subclass; the subclass
is pretty minimal but the separation is useful for....
NewConversion: New class that implements a somewhat different
approach to the conversion. This uses a table of
instances (rather than tuples) that have more
information than the tuples used for the older
conversion procedure. This allows a lot more control
over the conversion, and it seems to be pretty
stable.
TableEntry,
Parameter: New classes that are used to build the conversion
specification used by NewConversion.
TableParser: xmllib.XMLParser subclass that builds a conversion
specification from an XML document.
load_table(): Convenience function that loads a table from a file.
main(): Added flags --new and --old; these select which conversion is
used. The default is --new.
Several fixes have been made in the old conversion as well; these were
done before writing & switching to the new conversion, and should be
archived.
The next checkin of this file will discard the old conversion; is is
kept in this checkin to allow it to be retrieved if needed, and to
avoid lossing the bugfixes that have been made to it in the interim.
Diffstat (limited to 'Doc/tools/sgmlconv')
-rwxr-xr-x | Doc/tools/sgmlconv/latex2esis.py | 516 |
1 files changed, 478 insertions, 38 deletions
diff --git a/Doc/tools/sgmlconv/latex2esis.py b/Doc/tools/sgmlconv/latex2esis.py index b6e9822..051c374 100755 --- a/Doc/tools/sgmlconv/latex2esis.py +++ b/Doc/tools/sgmlconv/latex2esis.py @@ -16,26 +16,41 @@ to load an alternate table from an external file. """ __version__ = '$Revision$' +import copy import errno +import getopt +import os import re import string import StringIO import sys +import UserList from esistools import encode from types import ListType, StringType, TupleType +try: + from xml.parsers.xmllib import XMLParser +except ImportError: + from xmllib import XMLParser + DEBUG = 0 -class Error(Exception): +class LaTeXFormatError(Exception): pass -class LaTeXFormatError(Error): - pass +class LaTeXStackError(LaTeXFormatError): + def __init__(self, found, stack): + msg = "environment close for %s doesn't match;\n stack = %s" \ + % (found, stack) + self.found = found + self.stack = stack[:] + LaTeXFormatError.__init__(self, msg) + _begin_env_rx = re.compile(r"[\\]begin{([^}]*)}") _end_env_rx = re.compile(r"[\\]end{([^}]*)}") _begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)") @@ -58,22 +73,49 @@ def dbgmsg(msg): sys.stderr.write(msg + "\n") def pushing(name, point, depth): - dbgmsg("%s<%s> at %s" % (" "*depth, name, point)) + dbgmsg("pushing <%s> at %s" % (name, point)) def popping(name, point, depth): - dbgmsg("%s</%s> at %s" % (" "*depth, name, point)) + dbgmsg("popping </%s> at %s" % (name, point)) + + +class _Stack(UserList.UserList): + StringType = type('') + + def append(self, entry): + if type(entry) is not self.StringType: + raise LaTeXFormatError("cannot push non-string on stack: " + + `entry`) + sys.stderr.write("%s<%s>\n" % (" "*len(self.data), entry)) + self.data.append(entry) + def pop(self, index=-1): + entry = self.data[index] + del self.data[index] + sys.stderr.write("%s</%s>\n" % (" "*len(self.data), entry)) + + def __delitem__(self, index): + entry = self.data[index] + del self.data[index] + sys.stderr.write("%s</%s>\n" % (" "*len(self.data), entry)) + + +def new_stack(): + if DEBUG: + return _Stack() + return [] -class Conversion: - def __init__(self, ifp, ofp, table=None, discards=(), autoclosing=()): + +class BaseConversion: + def __init__(self, ifp, ofp, table={}, discards=(), autoclosing=()): self.ofp_stack = [ofp] self.pop_output() self.table = table self.discards = discards self.autoclosing = autoclosing self.line = string.join(map(string.rstrip, ifp.readlines()), "\n") - self.err_write = sys.stderr.write self.preamble = 1 + self.stack = new_stack() def push_output(self, ofp): self.ofp_stack.append(self.ofp) @@ -84,16 +126,20 @@ class Conversion: self.ofp = self.ofp_stack.pop() self.write = self.ofp.write + def err_write(self, msg): + if DEBUG: + sys.stderr.write(str(msg) + "\n") + + def convert(self): + self.subconvert() + + +class Conversion(BaseConversion): def subconvert(self, endchar=None, depth=0): - stack = [] + stack = self.stack line = self.line - if DEBUG and endchar: - self.err_write( - "subconvert(%s)\n line = %s\n" % (`endchar`, `line[:20]`)) while line: if line[0] == endchar and not stack: - if DEBUG: - self.err_write("subconvert() --> %s\n" % `line[1:21]`) self.line = line return line m = _comment_rx.match(line) @@ -117,19 +163,16 @@ class Conversion: # special magic for n in stack[1:]: if n not in self.autoclosing: + self.err_write(stack) raise LaTeXFormatError( "open element on stack: " + `n`) - # should be more careful, but this is easier to code: - stack = [] self.write(")document\n") elif stack and envname == stack[-1]: self.write(")%s\n" % envname) del stack[-1] popping(envname, "a", len(stack) + depth) else: - self.err_write("stack: %s\n" % `stack`) - raise LaTeXFormatError( - "environment close for %s doesn't match" % envname) + raise LaTeXStackError(envname, stack) line = line[m.end():] continue m = _begin_macro_rx.match(line) @@ -171,7 +214,7 @@ class Conversion: self.write("Anumbered TOKEN no\n") # rip off the macroname if params: - line = line[m.end(1):] + line = line[m.end(1):] elif empty: line = line[m.end(1):] else: @@ -184,7 +227,6 @@ class Conversion: # if optional and type(params[0]) is TupleType: # the attribute name isn't used in this special case - pushing(macroname, "a", depth + len(stack)) stack.append(macroname) self.write("(%s\n" % macroname) m = _start_optional_rx.match(line) @@ -210,7 +252,6 @@ class Conversion: # of the attribute element, and the macro will # have to be closed some other way (such as # auto-closing). - pushing(macroname, "b", len(stack) + depth) stack.append(macroname) self.write("(%s\n" % macroname) macroname = attrname[0] @@ -262,8 +303,6 @@ class Conversion: self.pop_output() continue if line[0] == endchar and not stack: - if DEBUG: - self.err_write("subconvert() --> %s\n" % `line[1:21]`) self.line = line[1:] return self.line if line[0] == "}": @@ -318,9 +357,6 @@ class Conversion: + string.join(stack, ", ")) # otherwise we just ran out of input here... - def convert(self): - self.subconvert() - def start_macro(self, name): conversion = self.table.get(name, ([], 0, 0, 0, 0)) params, optional, empty, environ, nocontent = conversion @@ -331,7 +367,275 @@ class Conversion: return params, optional, empty, environ -def convert(ifp, ofp, table={}, discards=(), autoclosing=()): +class NewConversion(BaseConversion): + def __init__(self, ifp, ofp, table={}): + BaseConversion.__init__(self, ifp, ofp, table) + self.discards = [] + + def subconvert(self, endchar=None, depth=0): + # + # Parses content, including sub-structures, until the character + # 'endchar' is found (with no open structures), or until the end + # of the input data is endchar is None. + # + stack = new_stack() + line = self.line + while line: + if line[0] == endchar and not stack: + self.line = line + return line + m = _comment_rx.match(line) + if m: + text = m.group(1) + if text: + self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" + % encode(text)) + line = line[m.end():] + continue + m = _begin_env_rx.match(line) + if m: + name = m.group(1) + entry = self.get_env_entry(name) + # re-write to use the macro handler + line = r"\%s %s" % (name, line[m.end():]) + continue + m = _end_env_rx.match(line) + if m: + # end of environment + envname = m.group(1) + entry = self.get_entry(envname) + while stack and envname != stack[-1] \ + and stack[-1] in entry.endcloses: + self.write(")%s\n" % stack.pop()) + if stack and envname == stack[-1]: + self.write(")%s\n" % entry.outputname) + del stack[-1] + else: + raise LaTeXStackError(envname, stack) + line = line[m.end():] + continue + m = _begin_macro_rx.match(line) + if m: + # start of macro + macroname = m.group(1) + entry = self.get_entry(macroname) + if entry.verbatim: + # magic case! + pos = string.find(line, "\\end{%s}" % macroname) + text = line[m.end(1):pos] + stack.append(entry.name) + self.write("(%s\n" % entry.outputname) + self.write("-%s\n" % encode(text)) + self.write(")%s\n" % entry.outputname) + stack.pop() + line = line[pos + len("\\end{%s}" % macroname):] + continue + while stack and stack[-1] in entry.closes: + top = stack.pop() + topentry = self.get_entry(top) + if topentry.outputname: + self.write(")%s\n-\\n\n" % topentry.outputname) + # + if entry.outputname: + if entry.empty: + self.write("e\n") + self.push_output(self.ofp) + else: + self.push_output(StringIO.StringIO()) + # + params, optional, empty, environ = self.start_macro(macroname) + # rip off the macroname + if params: + line = line[m.end(1):] + elif empty: + line = line[m.end(1):] + else: + line = line[m.end():] + opened = 0 + implied_content = 0 + + # handle attribute mappings here: + for pentry in params: + if pentry.type == "attribute": + if pentry.optional: + m = _optional_rx.match(line) + if m: + line = line[m.end():] + self.dump_attr(pentry, m.group(1)) + elif pentry.text: + # value supplied by conversion spec: + self.dump_attr(pentry, pentry.text) + else: + m = _parameter_rx.match(line) + if not m: + raise LaTeXFormatError( + "could not extract parameter %s for %s: %s" + % (pentry.name, macroname, `line[:100]`)) + self.dump_attr(pentry, m.group(1)) +## if entry.name == "label": +## sys.stderr.write("[%s]" % m.group(1)) + line = line[m.end():] + elif pentry.type == "child": + if pentry.optional: + m = _optional_rx.match(line) + if m: + line = line[m.end():] + if entry.outputname and not opened: + opened = 1 + self.write("(%s\n" % entry.outputname) + stack.append(macroname) + stack.append(pentry.name) + self.write("(%s\n" % pentry.name) + self.write("-%s\n" % encode(m.group(1))) + self.write(")%s\n" % pentry.name) + stack.pop() + else: + if entry.outputname and not opened: + opened = 1 + self.write("(%s\n" % entry.outputname) + stack.append(entry.name) + self.write("(%s\n" % pentry.name) + stack.append(pentry.name) + self.line = skip_white(line)[1:] + line = self.subconvert( + "}", len(stack) + depth + 1)[1:] + self.write(")%s\n" % stack.pop()) + elif pentry.type == "content": + if pentry.implied: + implied_content = 1 + else: + if entry.outputname and not opened: + opened = 1 + self.write("(%s\n" % entry.outputname) + stack.append(entry.name) + line = skip_white(line) + if line[0] != "{": + raise LaTeXFormatError( + "missing content for " + macroname) + self.line = line[1:] + line = self.subconvert("}", len(stack) + depth + 1) + if line and line[0] == "}": + line = line[1:] + elif pentry.type == "text": + if pentry.text: + if entry.outputname and not opened: + opened = 1 + stack.append(entry.name) + self.write("(%s\n" % entry.outputname) + self.write("-%s\n" % encode(pentry.text)) + if entry.outputname: + if not opened: + self.write("(%s\n" % entry.outputname) + stack.append(entry.name) + if not implied_content: + self.write(")%s\n" % entry.outputname) + stack.pop() + self.pop_output() + continue + if line[0] == endchar and not stack: + self.line = line[1:] + return self.line + if line[0] == "}": + # end of macro or group + macroname = stack[-1] + if macroname: + conversion = self.table.get(macroname) + if conversion.outputname: + # otherwise, it was just a bare group + self.write(")%s\n" % conversion.outputname) + del stack[-1] + line = line[1:] + continue + if line[0] == "{": + stack.append("") + line = line[1:] + continue + if line[0] == "\\" and line[1] in ESCAPED_CHARS: + self.write("-%s\n" % encode(line[1])) + line = line[2:] + continue + if line[:2] == r"\\": + self.write("(BREAK\n)BREAK\n") + line = line[2:] + continue + m = _text_rx.match(line) + if m: + text = encode(m.group()) + self.write("-%s\n" % text) + line = line[m.end():] + continue + # special case because of \item[] + # XXX can we axe this??? + if line[0] == "]": + self.write("-]\n") + line = line[1:] + continue + # avoid infinite loops + extra = "" + if len(line) > 100: + extra = "..." + raise LaTeXFormatError("could not identify markup: %s%s" + % (`line[:100]`, extra)) + while stack: + entry = self.get_entry(stack[-1]) + if entry.closes: + self.write(")%s\n-%s\n" % (entry.outputname, encode("\n"))) + del stack[-1] + else: + break + if stack: + raise LaTeXFormatError("elements remain on stack: " + + string.join(stack, ", ")) + # otherwise we just ran out of input here... + + def start_macro(self, name): + conversion = self.get_entry(name) + parameters = conversion.parameters + optional = parameters and parameters[0].optional +## empty = not len(parameters) +## if empty: +## self.write("e\n") +## elif conversion.empty: +## empty = 1 + return parameters, optional, conversion.empty, conversion.environment + + def get_entry(self, name): + entry = self.table.get(name) + if entry is None: + self.err_write("get_entry(%s) failing; building default entry!" + % `name`) + # not defined; build a default entry: + entry = TableEntry(name) + entry.has_content = 1 + entry.parameters.append(Parameter("content")) + self.table[name] = entry + return entry + + def get_env_entry(self, name): + entry = self.table.get(name) + if entry is None: + # not defined; build a default entry: + entry = TableEntry(name, 1) + entry.has_content = 1 + entry.parameters.append(Parameter("content")) + entry.parameters[-1].implied = 1 + self.table[name] = entry + elif not entry.environment: + raise LaTeXFormatError( + name + " is defined as a macro; expected environment") + return entry + + def dump_attr(self, pentry, value): + if not (pentry.name and value): + return + if _token_rx.match(value): + dtype = "TOKEN" + else: + dtype = "CDATA" + self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value))) + + +def old_convert(ifp, ofp, table={}, discards=(), autoclosing=()): c = Conversion(ifp, ofp, table, discards, autoclosing) try: c.convert() @@ -340,32 +644,162 @@ def convert(ifp, ofp, table={}, discards=(), autoclosing=()): raise +def new_convert(ifp, ofp, table={}, discards=(), autoclosing=()): + c = NewConversion(ifp, ofp, table) + try: + c.convert() + except IOError, (err, msg): + if err != errno.EPIPE: + raise + + def skip_white(line): - while line and line[0] in " %\n\t": + while line and line[0] in " %\n\t\r": line = string.lstrip(line[1:]) return line + +class TableEntry: + def __init__(self, name, environment=0): + self.name = name + self.outputname = name + self.environment = environment + self.empty = not environment + self.has_content = 0 + self.verbatim = 0 + self.auto_close = 0 + self.parameters = [] + self.closes = [] + self.endcloses = [] + +class Parameter: + def __init__(self, type, name=None, optional=0): + self.type = type + self.name = name + self.optional = optional + self.text = '' + self.implied = 0 + + +class TableParser(XMLParser): + def __init__(self): + self.__table = {} + self.__current = None + self.__buffer = '' + XMLParser.__init__(self) + + def get_table(self): + for entry in self.__table.values(): + if entry.environment and not entry.has_content: + p = Parameter("content") + p.implied = 1 + entry.parameters.append(p) + entry.has_content = 1 + return self.__table + + def start_environment(self, attrs): + name = attrs["name"] + self.__current = TableEntry(name, environment=1) + self.__current.verbatim = attrs.get("verbatim") == "yes" + if attrs.has_key("outputname"): + self.__current.outputname = attrs.get("outputname") + self.__current.endcloses = string.split(attrs.get("endcloses", "")) + def end_environment(self): + self.end_macro() + + def start_macro(self, attrs): + name = attrs["name"] + self.__current = TableEntry(name) + self.__current.closes = string.split(attrs.get("closes", "")) + if attrs.has_key("outputname"): + self.__current.outputname = attrs.get("outputname") + def end_macro(self): +## if self.__current.parameters and not self.__current.outputname: +## raise ValueError, "markup with parameters must have an output name" + self.__table[self.__current.name] = self.__current + self.__current = None + + def start_attribute(self, attrs): + name = attrs.get("name") + optional = attrs.get("optional") == "yes" + if name: + p = Parameter("attribute", name, optional=optional) + else: + p = Parameter("attribute", optional=optional) + self.__current.parameters.append(p) + self.__buffer = '' + def end_attribute(self): + self.__current.parameters[-1].text = self.__buffer + + def start_child(self, attrs): + name = attrs["name"] + p = Parameter("child", name, attrs.get("optional") == "yes") + self.__current.parameters.append(p) + self.__current.empty = 0 + + def start_content(self, attrs): + p = Parameter("content") + p.implied = attrs.get("implied") == "yes" + if self.__current.environment: + p.implied = 1 + self.__current.parameters.append(p) + self.__current.has_content = 1 + self.__current.empty = 0 + + def start_text(self, attrs): + self.__buffer = '' + def end_text(self): + p = Parameter("text") + p.text = self.__buffer + self.__current.parameters.append(p) + + def handle_data(self, data): + self.__buffer = self.__buffer + data + + +def load_table(fp): + parser = TableParser() + parser.feed(fp.read()) + parser.close() + return parser.get_table() + + def main(): - if len(sys.argv) == 2: - ifp = open(sys.argv[1]) + global DEBUG + # + convert = new_convert + newstyle = 1 + opts, args = getopt.getopt(sys.argv[1:], "Dn", ["debug", "new"]) + for opt, arg in opts: + if opt in ("-n", "--new"): + convert = new_convert + newstyle = 1 + elif opt in ("-o", "--old"): + convert = old_convert + newstyle = 0 + elif opt in ("-D", "--debug"): + DEBUG = DEBUG + 1 + if len(args) == 0: + ifp = sys.stdin + ofp = sys.stdout + elif len(args) == 1: + ifp = open(args) ofp = sys.stdout - elif len(sys.argv) == 3: - ifp = open(sys.argv[1]) - ofp = open(sys.argv[2], "w") + elif len(args) == 2: + ifp = open(args[0]) + ofp = open(args[1], "w") else: usage() sys.exit(2) - convert(ifp, ofp, { + table = { # entries have the form: # name: ([attribute names], is1stOptional, isEmpty, isEnv, nocontent) # attribute names can be: # "string" -- normal attribute # ("string",) -- sub-element with content of macro; like for \section # ["string"] -- sub-element - "appendix": ([], 0, 1, 0, 0), "bifuncindex": (["name"], 0, 1, 0, 0), - "catcode": ([], 0, 1, 0, 0), "cfuncdesc": (["type", "name", ("args",)], 0, 0, 1, 0), "chapter": ([("title",)], 0, 0, 0, 0), "chapter*": ([("title",)], 0, 0, 0, 0), @@ -405,6 +839,7 @@ def main(): "maketitle": ([], 0, 1, 0, 0), "manpage": (["name", "section"], 0, 1, 0, 0), "memberdesc": (["class", "name"], 1, 0, 1, 0), + "memberdescni": (["class", "name"], 1, 0, 1, 0), "methoddesc": (["class", "name", ("args",)], 1, 0, 1, 0), "methoddescni": (["class", "name", ("args",)], 1, 0, 1, 0), "methodline": (["class", "name"], 1, 0, 0, 0), @@ -452,6 +887,8 @@ def main(): # # Things that will actually be going away! # + "appendix": ([], 0, 1, 0, 0), + "catcode": ([], 0, 1, 0, 0), "fi": ([], 0, 1, 0, 0), "ifhtml": ([], 0, 1, 0, 0), "makeindex": ([], 0, 1, 0, 0), @@ -460,7 +897,10 @@ def main(): "noindent": ([], 0, 1, 0, 0), "protect": ([], 0, 1, 0, 0), "tableofcontents": ([], 0, 1, 0, 0), - }, + } + if newstyle: + table = load_table(open(os.path.join(sys.path[0], 'conversion.xml'))) + convert(ifp, ofp, table, discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle", "noindent", "tableofcontents"], autoclosing=["chapter", "section", "subsection", "subsubsection", |