From 96e4a06fa6de789770f154fa651adcf057c57fcf Mon Sep 17 00:00:00 2001 From: Fred Drake Date: Thu, 29 Jul 1999 22:22:13 +0000 Subject: Massive changes. Separate the Conversion class into a base and a subclass; the subclass is pretty minimal but the separation is useful for.... NewConversion: New class that implements a somewhat different approach to the conversion. This uses a table of instances (rather than tuples) that have more information than the tuples used for the older conversion procedure. This allows a lot more control over the conversion, and it seems to be pretty stable. TableEntry, Parameter: New classes that are used to build the conversion specification used by NewConversion. TableParser: xmllib.XMLParser subclass that builds a conversion specification from an XML document. load_table(): Convenience function that loads a table from a file. main(): Added flags --new and --old; these select which conversion is used. The default is --new. Several fixes have been made in the old conversion as well; these were done before writing & switching to the new conversion, and should be archived. The next checkin of this file will discard the old conversion; is is kept in this checkin to allow it to be retrieved if needed, and to avoid lossing the bugfixes that have been made to it in the interim. --- Doc/tools/sgmlconv/latex2esis.py | 516 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 478 insertions(+), 38 deletions(-) diff --git a/Doc/tools/sgmlconv/latex2esis.py b/Doc/tools/sgmlconv/latex2esis.py index b6e9822..051c374 100755 --- a/Doc/tools/sgmlconv/latex2esis.py +++ b/Doc/tools/sgmlconv/latex2esis.py @@ -16,26 +16,41 @@ to load an alternate table from an external file. """ __version__ = '$Revision$' +import copy import errno +import getopt +import os import re import string import StringIO import sys +import UserList from esistools import encode from types import ListType, StringType, TupleType +try: + from xml.parsers.xmllib import XMLParser +except ImportError: + from xmllib import XMLParser + DEBUG = 0 -class Error(Exception): +class LaTeXFormatError(Exception): pass -class LaTeXFormatError(Error): - pass +class LaTeXStackError(LaTeXFormatError): + def __init__(self, found, stack): + msg = "environment close for %s doesn't match;\n stack = %s" \ + % (found, stack) + self.found = found + self.stack = stack[:] + LaTeXFormatError.__init__(self, msg) + _begin_env_rx = re.compile(r"[\\]begin{([^}]*)}") _end_env_rx = re.compile(r"[\\]end{([^}]*)}") _begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)") @@ -58,22 +73,49 @@ def dbgmsg(msg): sys.stderr.write(msg + "\n") def pushing(name, point, depth): - dbgmsg("%s<%s> at %s" % (" "*depth, name, point)) + dbgmsg("pushing <%s> at %s" % (name, point)) def popping(name, point, depth): - dbgmsg("%s at %s" % (" "*depth, name, point)) + dbgmsg("popping at %s" % (name, point)) + + +class _Stack(UserList.UserList): + StringType = type('') + + def append(self, entry): + if type(entry) is not self.StringType: + raise LaTeXFormatError("cannot push non-string on stack: " + + `entry`) + sys.stderr.write("%s<%s>\n" % (" "*len(self.data), entry)) + self.data.append(entry) + def pop(self, index=-1): + entry = self.data[index] + del self.data[index] + sys.stderr.write("%s\n" % (" "*len(self.data), entry)) + + def __delitem__(self, index): + entry = self.data[index] + del self.data[index] + sys.stderr.write("%s\n" % (" "*len(self.data), entry)) + + +def new_stack(): + if DEBUG: + return _Stack() + return [] -class Conversion: - def __init__(self, ifp, ofp, table=None, discards=(), autoclosing=()): + +class BaseConversion: + def __init__(self, ifp, ofp, table={}, discards=(), autoclosing=()): self.ofp_stack = [ofp] self.pop_output() self.table = table self.discards = discards self.autoclosing = autoclosing self.line = string.join(map(string.rstrip, ifp.readlines()), "\n") - self.err_write = sys.stderr.write self.preamble = 1 + self.stack = new_stack() def push_output(self, ofp): self.ofp_stack.append(self.ofp) @@ -84,16 +126,20 @@ class Conversion: self.ofp = self.ofp_stack.pop() self.write = self.ofp.write + def err_write(self, msg): + if DEBUG: + sys.stderr.write(str(msg) + "\n") + + def convert(self): + self.subconvert() + + +class Conversion(BaseConversion): def subconvert(self, endchar=None, depth=0): - stack = [] + stack = self.stack line = self.line - if DEBUG and endchar: - self.err_write( - "subconvert(%s)\n line = %s\n" % (`endchar`, `line[:20]`)) while line: if line[0] == endchar and not stack: - if DEBUG: - self.err_write("subconvert() --> %s\n" % `line[1:21]`) self.line = line return line m = _comment_rx.match(line) @@ -117,19 +163,16 @@ class Conversion: # special magic for n in stack[1:]: if n not in self.autoclosing: + self.err_write(stack) raise LaTeXFormatError( "open element on stack: " + `n`) - # should be more careful, but this is easier to code: - stack = [] self.write(")document\n") elif stack and envname == stack[-1]: self.write(")%s\n" % envname) del stack[-1] popping(envname, "a", len(stack) + depth) else: - self.err_write("stack: %s\n" % `stack`) - raise LaTeXFormatError( - "environment close for %s doesn't match" % envname) + raise LaTeXStackError(envname, stack) line = line[m.end():] continue m = _begin_macro_rx.match(line) @@ -171,7 +214,7 @@ class Conversion: self.write("Anumbered TOKEN no\n") # rip off the macroname if params: - line = line[m.end(1):] + line = line[m.end(1):] elif empty: line = line[m.end(1):] else: @@ -184,7 +227,6 @@ class Conversion: # if optional and type(params[0]) is TupleType: # the attribute name isn't used in this special case - pushing(macroname, "a", depth + len(stack)) stack.append(macroname) self.write("(%s\n" % macroname) m = _start_optional_rx.match(line) @@ -210,7 +252,6 @@ class Conversion: # of the attribute element, and the macro will # have to be closed some other way (such as # auto-closing). - pushing(macroname, "b", len(stack) + depth) stack.append(macroname) self.write("(%s\n" % macroname) macroname = attrname[0] @@ -262,8 +303,6 @@ class Conversion: self.pop_output() continue if line[0] == endchar and not stack: - if DEBUG: - self.err_write("subconvert() --> %s\n" % `line[1:21]`) self.line = line[1:] return self.line if line[0] == "}": @@ -318,9 +357,6 @@ class Conversion: + string.join(stack, ", ")) # otherwise we just ran out of input here... - def convert(self): - self.subconvert() - def start_macro(self, name): conversion = self.table.get(name, ([], 0, 0, 0, 0)) params, optional, empty, environ, nocontent = conversion @@ -331,7 +367,275 @@ class Conversion: return params, optional, empty, environ -def convert(ifp, ofp, table={}, discards=(), autoclosing=()): +class NewConversion(BaseConversion): + def __init__(self, ifp, ofp, table={}): + BaseConversion.__init__(self, ifp, ofp, table) + self.discards = [] + + def subconvert(self, endchar=None, depth=0): + # + # Parses content, including sub-structures, until the character + # 'endchar' is found (with no open structures), or until the end + # of the input data is endchar is None. + # + stack = new_stack() + line = self.line + while line: + if line[0] == endchar and not stack: + self.line = line + return line + m = _comment_rx.match(line) + if m: + text = m.group(1) + if text: + self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" + % encode(text)) + line = line[m.end():] + continue + m = _begin_env_rx.match(line) + if m: + name = m.group(1) + entry = self.get_env_entry(name) + # re-write to use the macro handler + line = r"\%s %s" % (name, line[m.end():]) + continue + m = _end_env_rx.match(line) + if m: + # end of environment + envname = m.group(1) + entry = self.get_entry(envname) + while stack and envname != stack[-1] \ + and stack[-1] in entry.endcloses: + self.write(")%s\n" % stack.pop()) + if stack and envname == stack[-1]: + self.write(")%s\n" % entry.outputname) + del stack[-1] + else: + raise LaTeXStackError(envname, stack) + line = line[m.end():] + continue + m = _begin_macro_rx.match(line) + if m: + # start of macro + macroname = m.group(1) + entry = self.get_entry(macroname) + if entry.verbatim: + # magic case! + pos = string.find(line, "\\end{%s}" % macroname) + text = line[m.end(1):pos] + stack.append(entry.name) + self.write("(%s\n" % entry.outputname) + self.write("-%s\n" % encode(text)) + self.write(")%s\n" % entry.outputname) + stack.pop() + line = line[pos + len("\\end{%s}" % macroname):] + continue + while stack and stack[-1] in entry.closes: + top = stack.pop() + topentry = self.get_entry(top) + if topentry.outputname: + self.write(")%s\n-\\n\n" % topentry.outputname) + # + if entry.outputname: + if entry.empty: + self.write("e\n") + self.push_output(self.ofp) + else: + self.push_output(StringIO.StringIO()) + # + params, optional, empty, environ = self.start_macro(macroname) + # rip off the macroname + if params: + line = line[m.end(1):] + elif empty: + line = line[m.end(1):] + else: + line = line[m.end():] + opened = 0 + implied_content = 0 + + # handle attribute mappings here: + for pentry in params: + if pentry.type == "attribute": + if pentry.optional: + m = _optional_rx.match(line) + if m: + line = line[m.end():] + self.dump_attr(pentry, m.group(1)) + elif pentry.text: + # value supplied by conversion spec: + self.dump_attr(pentry, pentry.text) + else: + m = _parameter_rx.match(line) + if not m: + raise LaTeXFormatError( + "could not extract parameter %s for %s: %s" + % (pentry.name, macroname, `line[:100]`)) + self.dump_attr(pentry, m.group(1)) +## if entry.name == "label": +## sys.stderr.write("[%s]" % m.group(1)) + line = line[m.end():] + elif pentry.type == "child": + if pentry.optional: + m = _optional_rx.match(line) + if m: + line = line[m.end():] + if entry.outputname and not opened: + opened = 1 + self.write("(%s\n" % entry.outputname) + stack.append(macroname) + stack.append(pentry.name) + self.write("(%s\n" % pentry.name) + self.write("-%s\n" % encode(m.group(1))) + self.write(")%s\n" % pentry.name) + stack.pop() + else: + if entry.outputname and not opened: + opened = 1 + self.write("(%s\n" % entry.outputname) + stack.append(entry.name) + self.write("(%s\n" % pentry.name) + stack.append(pentry.name) + self.line = skip_white(line)[1:] + line = self.subconvert( + "}", len(stack) + depth + 1)[1:] + self.write(")%s\n" % stack.pop()) + elif pentry.type == "content": + if pentry.implied: + implied_content = 1 + else: + if entry.outputname and not opened: + opened = 1 + self.write("(%s\n" % entry.outputname) + stack.append(entry.name) + line = skip_white(line) + if line[0] != "{": + raise LaTeXFormatError( + "missing content for " + macroname) + self.line = line[1:] + line = self.subconvert("}", len(stack) + depth + 1) + if line and line[0] == "}": + line = line[1:] + elif pentry.type == "text": + if pentry.text: + if entry.outputname and not opened: + opened = 1 + stack.append(entry.name) + self.write("(%s\n" % entry.outputname) + self.write("-%s\n" % encode(pentry.text)) + if entry.outputname: + if not opened: + self.write("(%s\n" % entry.outputname) + stack.append(entry.name) + if not implied_content: + self.write(")%s\n" % entry.outputname) + stack.pop() + self.pop_output() + continue + if line[0] == endchar and not stack: + self.line = line[1:] + return self.line + if line[0] == "}": + # end of macro or group + macroname = stack[-1] + if macroname: + conversion = self.table.get(macroname) + if conversion.outputname: + # otherwise, it was just a bare group + self.write(")%s\n" % conversion.outputname) + del stack[-1] + line = line[1:] + continue + if line[0] == "{": + stack.append("") + line = line[1:] + continue + if line[0] == "\\" and line[1] in ESCAPED_CHARS: + self.write("-%s\n" % encode(line[1])) + line = line[2:] + continue + if line[:2] == r"\\": + self.write("(BREAK\n)BREAK\n") + line = line[2:] + continue + m = _text_rx.match(line) + if m: + text = encode(m.group()) + self.write("-%s\n" % text) + line = line[m.end():] + continue + # special case because of \item[] + # XXX can we axe this??? + if line[0] == "]": + self.write("-]\n") + line = line[1:] + continue + # avoid infinite loops + extra = "" + if len(line) > 100: + extra = "..." + raise LaTeXFormatError("could not identify markup: %s%s" + % (`line[:100]`, extra)) + while stack: + entry = self.get_entry(stack[-1]) + if entry.closes: + self.write(")%s\n-%s\n" % (entry.outputname, encode("\n"))) + del stack[-1] + else: + break + if stack: + raise LaTeXFormatError("elements remain on stack: " + + string.join(stack, ", ")) + # otherwise we just ran out of input here... + + def start_macro(self, name): + conversion = self.get_entry(name) + parameters = conversion.parameters + optional = parameters and parameters[0].optional +## empty = not len(parameters) +## if empty: +## self.write("e\n") +## elif conversion.empty: +## empty = 1 + return parameters, optional, conversion.empty, conversion.environment + + def get_entry(self, name): + entry = self.table.get(name) + if entry is None: + self.err_write("get_entry(%s) failing; building default entry!" + % `name`) + # not defined; build a default entry: + entry = TableEntry(name) + entry.has_content = 1 + entry.parameters.append(Parameter("content")) + self.table[name] = entry + return entry + + def get_env_entry(self, name): + entry = self.table.get(name) + if entry is None: + # not defined; build a default entry: + entry = TableEntry(name, 1) + entry.has_content = 1 + entry.parameters.append(Parameter("content")) + entry.parameters[-1].implied = 1 + self.table[name] = entry + elif not entry.environment: + raise LaTeXFormatError( + name + " is defined as a macro; expected environment") + return entry + + def dump_attr(self, pentry, value): + if not (pentry.name and value): + return + if _token_rx.match(value): + dtype = "TOKEN" + else: + dtype = "CDATA" + self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value))) + + +def old_convert(ifp, ofp, table={}, discards=(), autoclosing=()): c = Conversion(ifp, ofp, table, discards, autoclosing) try: c.convert() @@ -340,32 +644,162 @@ def convert(ifp, ofp, table={}, discards=(), autoclosing=()): raise +def new_convert(ifp, ofp, table={}, discards=(), autoclosing=()): + c = NewConversion(ifp, ofp, table) + try: + c.convert() + except IOError, (err, msg): + if err != errno.EPIPE: + raise + + def skip_white(line): - while line and line[0] in " %\n\t": + while line and line[0] in " %\n\t\r": line = string.lstrip(line[1:]) return line + +class TableEntry: + def __init__(self, name, environment=0): + self.name = name + self.outputname = name + self.environment = environment + self.empty = not environment + self.has_content = 0 + self.verbatim = 0 + self.auto_close = 0 + self.parameters = [] + self.closes = [] + self.endcloses = [] + +class Parameter: + def __init__(self, type, name=None, optional=0): + self.type = type + self.name = name + self.optional = optional + self.text = '' + self.implied = 0 + + +class TableParser(XMLParser): + def __init__(self): + self.__table = {} + self.__current = None + self.__buffer = '' + XMLParser.__init__(self) + + def get_table(self): + for entry in self.__table.values(): + if entry.environment and not entry.has_content: + p = Parameter("content") + p.implied = 1 + entry.parameters.append(p) + entry.has_content = 1 + return self.__table + + def start_environment(self, attrs): + name = attrs["name"] + self.__current = TableEntry(name, environment=1) + self.__current.verbatim = attrs.get("verbatim") == "yes" + if attrs.has_key("outputname"): + self.__current.outputname = attrs.get("outputname") + self.__current.endcloses = string.split(attrs.get("endcloses", "")) + def end_environment(self): + self.end_macro() + + def start_macro(self, attrs): + name = attrs["name"] + self.__current = TableEntry(name) + self.__current.closes = string.split(attrs.get("closes", "")) + if attrs.has_key("outputname"): + self.__current.outputname = attrs.get("outputname") + def end_macro(self): +## if self.__current.parameters and not self.__current.outputname: +## raise ValueError, "markup with parameters must have an output name" + self.__table[self.__current.name] = self.__current + self.__current = None + + def start_attribute(self, attrs): + name = attrs.get("name") + optional = attrs.get("optional") == "yes" + if name: + p = Parameter("attribute", name, optional=optional) + else: + p = Parameter("attribute", optional=optional) + self.__current.parameters.append(p) + self.__buffer = '' + def end_attribute(self): + self.__current.parameters[-1].text = self.__buffer + + def start_child(self, attrs): + name = attrs["name"] + p = Parameter("child", name, attrs.get("optional") == "yes") + self.__current.parameters.append(p) + self.__current.empty = 0 + + def start_content(self, attrs): + p = Parameter("content") + p.implied = attrs.get("implied") == "yes" + if self.__current.environment: + p.implied = 1 + self.__current.parameters.append(p) + self.__current.has_content = 1 + self.__current.empty = 0 + + def start_text(self, attrs): + self.__buffer = '' + def end_text(self): + p = Parameter("text") + p.text = self.__buffer + self.__current.parameters.append(p) + + def handle_data(self, data): + self.__buffer = self.__buffer + data + + +def load_table(fp): + parser = TableParser() + parser.feed(fp.read()) + parser.close() + return parser.get_table() + + def main(): - if len(sys.argv) == 2: - ifp = open(sys.argv[1]) + global DEBUG + # + convert = new_convert + newstyle = 1 + opts, args = getopt.getopt(sys.argv[1:], "Dn", ["debug", "new"]) + for opt, arg in opts: + if opt in ("-n", "--new"): + convert = new_convert + newstyle = 1 + elif opt in ("-o", "--old"): + convert = old_convert + newstyle = 0 + elif opt in ("-D", "--debug"): + DEBUG = DEBUG + 1 + if len(args) == 0: + ifp = sys.stdin + ofp = sys.stdout + elif len(args) == 1: + ifp = open(args) ofp = sys.stdout - elif len(sys.argv) == 3: - ifp = open(sys.argv[1]) - ofp = open(sys.argv[2], "w") + elif len(args) == 2: + ifp = open(args[0]) + ofp = open(args[1], "w") else: usage() sys.exit(2) - convert(ifp, ofp, { + table = { # entries have the form: # name: ([attribute names], is1stOptional, isEmpty, isEnv, nocontent) # attribute names can be: # "string" -- normal attribute # ("string",) -- sub-element with content of macro; like for \section # ["string"] -- sub-element - "appendix": ([], 0, 1, 0, 0), "bifuncindex": (["name"], 0, 1, 0, 0), - "catcode": ([], 0, 1, 0, 0), "cfuncdesc": (["type", "name", ("args",)], 0, 0, 1, 0), "chapter": ([("title",)], 0, 0, 0, 0), "chapter*": ([("title",)], 0, 0, 0, 0), @@ -405,6 +839,7 @@ def main(): "maketitle": ([], 0, 1, 0, 0), "manpage": (["name", "section"], 0, 1, 0, 0), "memberdesc": (["class", "name"], 1, 0, 1, 0), + "memberdescni": (["class", "name"], 1, 0, 1, 0), "methoddesc": (["class", "name", ("args",)], 1, 0, 1, 0), "methoddescni": (["class", "name", ("args",)], 1, 0, 1, 0), "methodline": (["class", "name"], 1, 0, 0, 0), @@ -452,6 +887,8 @@ def main(): # # Things that will actually be going away! # + "appendix": ([], 0, 1, 0, 0), + "catcode": ([], 0, 1, 0, 0), "fi": ([], 0, 1, 0, 0), "ifhtml": ([], 0, 1, 0, 0), "makeindex": ([], 0, 1, 0, 0), @@ -460,7 +897,10 @@ def main(): "noindent": ([], 0, 1, 0, 0), "protect": ([], 0, 1, 0, 0), "tableofcontents": ([], 0, 1, 0, 0), - }, + } + if newstyle: + table = load_table(open(os.path.join(sys.path[0], 'conversion.xml'))) + convert(ifp, ofp, table, discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle", "noindent", "tableofcontents"], autoclosing=["chapter", "section", "subsection", "subsubsection", -- cgit v0.12