diff options
author | Fred Drake <fdrake@acm.org> | 1999-05-07 19:59:02 (GMT) |
---|---|---|
committer | Fred Drake <fdrake@acm.org> | 1999-05-07 19:59:02 (GMT) |
commit | 96c00b0b5e25b0de2d72bf69d8a362e1ebddd5b2 (patch) | |
tree | 5d23ce7078b8d95ed73f986756bc3c1d5486ca36 /Doc/tools/sgmlconv | |
parent | b0bc7f2d6cd1872613ece64da6ca6e7f8a73f331 (diff) | |
download | cpython-96c00b0b5e25b0de2d72bf69d8a362e1ebddd5b2.zip cpython-96c00b0b5e25b0de2d72bf69d8a362e1ebddd5b2.tar.gz cpython-96c00b0b5e25b0de2d72bf69d8a362e1ebddd5b2.tar.bz2 |
Define & use a Conversion object. It's still really ugly, but at
least there's a token object in here now! ;-)
Diffstat (limited to 'Doc/tools/sgmlconv')
-rwxr-xr-x | Doc/tools/sgmlconv/latex2esis.py | 482 |
1 files changed, 255 insertions, 227 deletions
diff --git a/Doc/tools/sgmlconv/latex2esis.py b/Doc/tools/sgmlconv/latex2esis.py index 43ed2c5..1ea928d 100755 --- a/Doc/tools/sgmlconv/latex2esis.py +++ b/Doc/tools/sgmlconv/latex2esis.py @@ -27,7 +27,7 @@ class LaTeXFormatError(Error): _begin_env_rx = re.compile(r"[\\]begin{([^}]*)}") _end_env_rx = re.compile(r"[\\]end{([^}]*)}") _begin_macro_rx = re.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)") -_comment_rx = re.compile("%+ ?(.*)\n *") +_comment_rx = re.compile("%+ ?(.*)\n[ \t]*") _text_rx = re.compile(r"[^]%\\{}]+") _optional_rx = re.compile(r"\s*[[]([^]]*)[]]") # _parameter_rx is this complicated to allow {...} inside a parameter; @@ -50,248 +50,276 @@ def popping(name, point, depth): sys.stderr.write("%s</%s> at %s\n" % (" "*depth, name, point)) -def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0): - if DEBUG and endchar: - sys.stderr.write("subconvert(%s, ..., endchar=%s)\n" - % (`line[:20]`, `endchar`)) - stack = [] - while line: - if line[0] == endchar and not stack: - if DEBUG: - sys.stderr.write("subconvert() --> %s\n" % `line[1:21]`) - return line[1:] - m = _comment_rx.match(line) - if m: - text = m.group(1) - if text: - ofp.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" % encode(text)) - line = line[m.end():] - continue - m = _begin_env_rx.match(line) - if m: - # re-write to use the macro handler - line = r"\%s %s" % (m.group(1), line[m.end():]) - continue - m = _end_env_rx.match(line) - if m: - # end of environment - envname = m.group(1) - if envname == "document": - # special magic - for n in stack[1:]: - if n not in autoclosing: - raise LaTeXFormatError("open element on stack: " + `n`) - # should be more careful, but this is easier to code: - stack = [] - ofp.write(")document\n") - elif envname == stack[-1]: - ofp.write(")%s\n" % envname) - del stack[-1] - popping(envname, "a", len(stack) + depth) - else: - sys.stderr.write("stack: %s\n" % `stack`) - raise LaTeXFormatError( - "environment close for %s doesn't match" % envname) - line = line[m.end():] - continue - m = _begin_macro_rx.match(line) - if m: - # start of macro - macroname = m.group(1) - if macroname == "verbatim": - # really magic case! - pos = string.find(line, "\\end{verbatim}") - text = line[m.end(1):pos] - ofp.write("(verbatim\n") - ofp.write("-%s\n" % encode(text)) - ofp.write(")verbatim\n") - line = line[pos + len("\\end{verbatim}"):] +class Conversion: + def __init__(self, ifp, ofp, table=None, discards=(), autoclosing=()): + self.ofp_stack = [ofp] + self.pop_output() + self.table = table + self.discards = discards + self.autoclosing = autoclosing + self.line = string.join(map(string.rstrip, ifp.readlines()), "\n") + self.err_write = sys.stderr.write + self.preamble = 1 + + def push_output(self, ofp): + self.ofp_stack.append(self.ofp) + self.ofp = ofp + self.write = ofp.write + + def pop_output(self): + self.ofp = self.ofp_stack.pop() + self.write = self.ofp.write + + def subconvert(self, endchar=None, depth=0): + if DEBUG and endchar: + self.err_write( + "subconvert(%s)\n line = %s\n" % (`endchar`, `line[:20]`)) + stack = [] + line = self.line + while line: + if line[0] == endchar and not stack: + if DEBUG: + self.err_write("subconvert() --> %s\n" % `line[1:21]`) + self.line = line + return line + m = _comment_rx.match(line) + if m: + text = m.group(1) + if text: + self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" + % encode(text)) + line = line[m.end():] continue - numbered = 1 - if macroname[-1] == "*": - macroname = macroname[:-1] - numbered = 0 - if macroname in autoclosing and macroname in stack: - while stack[-1] != macroname: - if stack[-1] and stack[-1] not in discards: - ofp.write(")%s\n-\\n\n" % stack[-1]) - popping(stack[-1], "b", len(stack) + depth - 1) + m = _begin_env_rx.match(line) + if m: + # re-write to use the macro handler + line = r"\%s %s" % (m.group(1), line[m.end():]) + continue + m = _end_env_rx.match(line) + if m: + # end of environment + envname = m.group(1) + if envname == "document": + # special magic + for n in stack[1:]: + if n not in self.autoclosing: + raise LaTeXFormatError( + "open element on stack: " + `n`) + # should be more careful, but this is easier to code: + stack = [] + self.write(")document\n") + elif envname == stack[-1]: + self.write(")%s\n" % envname) del stack[-1] - if macroname not in discards: - ofp.write("-\\n\n)%s\n-\\n\n" % macroname) - popping(macroname, "c", len(stack) + depth - 1) - del stack[-1] - real_ofp = ofp - if macroname in discards: - ofp = StringIO.StringIO() - # - conversion = table.get(macroname, ([], 0, 0, 0, 0)) - params, optional, empty, environ, nocontent = conversion - if empty: - ofp.write("e\n") - elif nocontent: - empty = 1 - if not numbered: - ofp.write("Anumbered TOKEN no\n") - opened = 0 - # rip off the macroname - if params: - if optional and len(params) == 1: - line = line = line[m.end():] + popping(envname, "a", len(stack) + depth) else: - line = line[m.end(1):] - elif empty: - line = line[m.end(1):] - else: + self.err_write("stack: %s\n" % `stack`) + raise LaTeXFormatError( + "environment close for %s doesn't match" % envname) line = line[m.end():] - # - # Very ugly special case to deal with \item[]. The catch is that - # this needs to occur outside the for loop that handles attribute - # parsing so we can 'continue' the outer loop. - # - if optional and type(params[0]) is type(()): - # the attribute name isn't used in this special case - pushing(macroname, "a", depth + len(stack)) - stack.append(macroname) - ofp.write("(%s\n" % macroname) - m = _start_optional_rx.match(line) - if m: - line = line[m.end():] - line = subconvert(line, ofp, table, discards, - autoclosing, endchar="]", - depth=depth + len(stack)) - line = "}" + line continue - # handle attribute mappings here: - for attrname in params: - if optional: - optional = 0 - if type(attrname) is type(""): - m = _optional_rx.match(line) - if m: - line = line[m.end():] - ofp.write("A%s TOKEN %s\n" - % (attrname, encode(m.group(1)))) - elif type(attrname) is type(()): - # This is a sub-element; but don't place the - # element we found on the stack (\section-like) - pushing(macroname, "b", len(stack) + depth) + m = _begin_macro_rx.match(line) + if m: + # start of macro + macroname = m.group(1) + if macroname == "verbatim": + # really magic case! + pos = string.find(line, "\\end{verbatim}") + text = line[m.end(1):pos] + self.write("(verbatim\n") + self.write("-%s\n" % encode(text)) + self.write(")verbatim\n") + line = line[pos + len("\\end{verbatim}"):] + continue + numbered = 1 + opened = 0 + if macroname[-1] == "*": + macroname = macroname[:-1] + numbered = 0 + if macroname in self.autoclosing and macroname in stack: + while stack[-1] != macroname: + top = stack.pop() + if top and top not in self.discards: + self.write(")%s\n-\\n\n" % top) + popping(top, "b", len(stack) + depth) + if macroname not in self.discards: + self.write("-\\n\n)%s\n-\\n\n" % macroname) + popping(macroname, "c", len(stack) + depth - 1) + del stack[-1] + # + if macroname in self.discards: + self.push_output(StringIO.StringIO()) + else: + self.push_output(self.ofp) + # + params, optional, empty, environ = self.start_macro(macroname) + if not numbered: + self.write("Anumbered TOKEN no\n") + # rip off the macroname + if params: + if optional and len(params) == 1: + line = line[m.end():] + else: + line = line[m.end(1):] + elif empty: + line = line[m.end(1):] + else: + line = line[m.end():] + # + # Very ugly special case to deal with \item[]. The catch + # is that this needs to occur outside the for loop that + # handles attribute parsing so we can 'continue' the outer + # loop. + # + if optional and type(params[0]) is type(()): + # the attribute name isn't used in this special case + pushing(macroname, "a", depth + len(stack)) stack.append(macroname) - ofp.write("(%s\n" % macroname) - macroname = attrname[0] - m = _start_group_rx.match(line) + self.write("(%s\n" % macroname) + m = _start_optional_rx.match(line) if m: + self.line = line[m.end():] + line = self.subconvert("]", depth + len(stack)) + line = "}" + line + continue + # handle attribute mappings here: + for attrname in params: + if optional: + optional = 0 + if type(attrname) is type(""): + m = _optional_rx.match(line) + if m: + line = line[m.end():] + self.write("A%s TOKEN %s\n" + % (attrname, encode(m.group(1)))) + elif type(attrname) is type(()): + # This is a sub-element; but don't place the + # element we found on the stack (\section-like) + pushing(macroname, "b", len(stack) + depth) + stack.append(macroname) + self.write("(%s\n" % macroname) + macroname = attrname[0] + m = _start_group_rx.match(line) + if m: + line = line[m.end():] + elif type(attrname) is type([]): + # A normal subelement. + attrname = attrname[0] + if not opened: + opened = 1 + self.write("(%s\n" % macroname) + pushing(macroname, "c", len(stack) + depth) + self.write("(%s\n" % attrname) + pushing(attrname, "sub-elem", len(stack) + depth + 1) + self.line = skip_white(line)[1:] + line = subconvert("}", depth + len(stack) + 2) + popping(attrname, "sub-elem", len(stack) + depth + 1) + self.write(")%s\n" % attrname) + else: + m = _parameter_rx.match(line) + if not m: + raise LaTeXFormatError( + "could not extract parameter %s for %s: %s" + % (attrname, macroname, `line[:100]`)) + value = m.group(1) + if _token_rx.match(value): + dtype = "TOKEN" + else: + dtype = "CDATA" + self.write("A%s %s %s\n" + % (attrname, dtype, encode(value))) line = line[m.end():] - elif type(attrname) is type([]): - # A normal subelement. - attrname = attrname[0] - if not opened: - opened = 1 - ofp.write("(%s\n" % macroname) - pushing(macroname, "c", len(stack) + depth) - ofp.write("(%s\n" % attrname) - pushing(attrname, "sub-elem", len(stack) + depth + 1) - line = subconvert(skip_white(line)[1:], ofp, table, - discards, autoclosing, endchar="}", - depth=depth + len(stack) + 2) - popping(attrname, "sub-elem", len(stack) + depth + 1) - ofp.write(")%s\n" % attrname) - else: - m = _parameter_rx.match(line) + if params and type(params[-1]) is type('') \ + and (not empty) and not environ: + # attempt to strip off next '{' + m = _start_group_rx.match(line) if not m: raise LaTeXFormatError( - "could not extract parameter %s for %s: %s" - % (attrname, macroname, `line[:100]`)) - value = m.group(1) - if _token_rx.match(value): - dtype = "TOKEN" - else: - dtype = "CDATA" - ofp.write("A%s %s %s\n" - % (attrname, dtype, encode(value))) + "non-empty element '%s' has no content: %s" + % (macroname, line[:12])) line = line[m.end():] - if params and type(params[-1]) is type('') \ - and (not empty) and not environ: - # attempt to strip off next '{' - m = _start_group_rx.match(line) - if not m: - raise LaTeXFormatError( - "non-empty element '%s' has no content: %s" - % (macroname, line[:12])) + if not opened: + self.write("(%s\n" % macroname) + pushing(macroname, "d", len(stack) + depth) + if empty: + line = "}" + line + stack.append(macroname) + self.pop_output() + continue + if line[0] == endchar and not stack: + if DEBUG: + self.err_write("subconvert() --> %s\n" % `line[1:21]`) + self.line = line[1:] + return self.line + if line[0] == "}": + # end of macro or group + macroname = stack[-1] + conversion = self.table.get(macroname) + if macroname \ + and macroname not in self.discards \ + and type(conversion) is not type(""): + # otherwise, it was just a bare group + self.write(")%s\n" % stack[-1]) + popping(macroname, "d", len(stack) + depth - 1) + del stack[-1] + line = line[1:] + continue + if line[0] == "{": + pushing("", "e", len(stack) + depth) + stack.append("") + line = line[1:] + continue + if line[0] == "\\" and line[1] in ESCAPED_CHARS: + self.write("-%s\n" % encode(line[1])) + line = line[2:] + continue + if line[:2] == r"\\": + self.write("(BREAK\n)BREAK\n") + line = line[2:] + continue + m = _text_rx.match(line) + if m: + text = encode(m.group()) + self.write("-%s\n" % text) line = line[m.end():] - if not opened: - ofp.write("(%s\n" % macroname) - pushing(macroname, "d", len(stack) + depth) - if empty: - line = "}" + line - stack.append(macroname) - ofp = real_ofp - continue - if line[0] == endchar and not stack: - if DEBUG: - sys.stderr.write("subconvert() --> %s\n" % `line[1:21]`) - return line[1:] - if line[0] == "}": - # end of macro - macroname = stack[-1] - conversion = table.get(macroname) - if macroname \ - and macroname not in discards \ - and type(conversion) is not type(""): - # otherwise, it was just a bare group - ofp.write(")%s\n" % stack[-1]) - popping(macroname, "d", len(stack) + depth - 1) - del stack[-1] - line = line[1:] - continue - if line[0] == "{": - pushing("", "e", len(stack) + depth) - stack.append("") - line = line[1:] - continue - if line[0] == "\\" and line[1] in ESCAPED_CHARS: - ofp.write("-%s\n" % encode(line[1])) - line = line[2:] - continue - if line[:2] == r"\\": - ofp.write("(BREAK\n)BREAK\n") - line = line[2:] - continue - m = _text_rx.match(line) - if m: - text = encode(m.group()) - ofp.write("-%s\n" % text) - line = line[m.end():] - continue - # special case because of \item[] - if line[0] == "]": - ofp.write("-]\n") - line = line[1:] - continue - # avoid infinite loops - extra = "" - if len(line) > 100: - extra = "..." - raise LaTeXFormatError("could not identify markup: %s%s" - % (`line[:100]`, extra)) - while stack and stack[-1] in autoclosing: - ofp.write("-\\n\n") - ofp.write(")%s\n" % stack[-1]) - popping(stack[-1], "e", len(stack) + depth - 1) - del stack[-1] - if stack: - raise LaTeXFormatError("elements remain on stack: " - + string.join(stack)) - # otherwise we just ran out of input here... + continue + # special case because of \item[] + if line[0] == "]": + self.write("-]\n") + line = line[1:] + continue + # avoid infinite loops + extra = "" + if len(line) > 100: + extra = "..." + raise LaTeXFormatError("could not identify markup: %s%s" + % (`line[:100]`, extra)) + while stack and stack[-1] in self.autoclosing: + self.write("-\\n\n") + self.write(")%s\n" % stack[-1]) + popping(stack.pop(), "e", len(stack) + depth - 1) + if stack: + raise LaTeXFormatError("elements remain on stack: " + + string.join(stack, ", ")) + # otherwise we just ran out of input here... + + def convert(self): + self.subconvert() + + def start_macro(self, name): + conversion = self.table.get(name, ([], 0, 0, 0, 0)) + params, optional, empty, environ, nocontent = conversion + if empty: + self.write("e\n") + elif nocontent: + empty = 1 + return params, optional, empty, environ def convert(ifp, ofp, table={}, discards=(), autoclosing=()): - lines = string.split(ifp.read(), "\n") - for i in range(len(lines)): - lines[i] = string.rstrip(lines[i]) - data = string.join(lines, "\n") + c = Conversion(ifp, ofp, table, discards, autoclosing) try: - subconvert(data, ofp, table, discards, autoclosing) + c.convert() except IOError, (err, msg): if err != errno.EPIPE: raise |