summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFred Drake <fdrake@acm.org>1999-07-29 22:22:13 (GMT)
committerFred Drake <fdrake@acm.org>1999-07-29 22:22:13 (GMT)
commit96e4a06fa6de789770f154fa651adcf057c57fcf (patch)
tree3eb1367e721c687c3c5e5d18faf2805d154d2857
parent2394c98c05d5d6ca61f253be0ce6b918b3fa0a4a (diff)
downloadcpython-96e4a06fa6de789770f154fa651adcf057c57fcf.zip
cpython-96e4a06fa6de789770f154fa651adcf057c57fcf.tar.gz
cpython-96e4a06fa6de789770f154fa651adcf057c57fcf.tar.bz2
Massive changes.
Separate the Conversion class into a base and a subclass; the subclass is pretty minimal but the separation is useful for.... NewConversion: New class that implements a somewhat different approach to the conversion. This uses a table of instances (rather than tuples) that have more information than the tuples used for the older conversion procedure. This allows a lot more control over the conversion, and it seems to be pretty stable. TableEntry, Parameter: New classes that are used to build the conversion specification used by NewConversion. TableParser: xmllib.XMLParser subclass that builds a conversion specification from an XML document. load_table(): Convenience function that loads a table from a file. main(): Added flags --new and --old; these select which conversion is used. The default is --new. Several fixes have been made in the old conversion as well; these were done before writing & switching to the new conversion, and should be archived. The next checkin of this file will discard the old conversion; is is kept in this checkin to allow it to be retrieved if needed, and to avoid lossing the bugfixes that have been made to it in the interim.
-rwxr-xr-xDoc/tools/sgmlconv/latex2esis.py516
1 files changed, 478 insertions, 38 deletions
diff --git a/Doc/tools/sgmlconv/latex2esis.py b/Doc/tools/sgmlconv/latex2esis.py
index b6e9822..051c374 100755
--- a/Doc/tools/sgmlconv/latex2esis.py
+++ b/Doc/tools/sgmlconv/latex2esis.py
@@ -16,26 +16,41 @@ to load an alternate table from an external file.
"""
__version__ = '$Revision$'
+import copy
import errno
+import getopt
+import os
import re
import string
import StringIO
import sys
+import UserList
from esistools import encode
from types import ListType, StringType, TupleType
+try:
+ from xml.parsers.xmllib import XMLParser
+except ImportError:
+ from xmllib import XMLParser
+
DEBUG = 0
-class Error(Exception):
+class LaTeXFormatError(Exception):
pass
-class LaTeXFormatError(Error):
- pass
+class LaTeXStackError(LaTeXFormatError):
+ def __init__(self, found, stack):
+ msg = "environment close for %s doesn't match;\n stack = %s" \
+ % (found, stack)
+ self.found = found
+ self.stack = stack[:]
+ LaTeXFormatError.__init__(self, msg)
+
_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)")
@@ -58,22 +73,49 @@ def dbgmsg(msg):
sys.stderr.write(msg + "\n")
def pushing(name, point, depth):
- dbgmsg("%s<%s> at %s" % (" "*depth, name, point))
+ dbgmsg("pushing <%s> at %s" % (name, point))
def popping(name, point, depth):
- dbgmsg("%s</%s> at %s" % (" "*depth, name, point))
+ dbgmsg("popping </%s> at %s" % (name, point))
+
+
+class _Stack(UserList.UserList):
+ StringType = type('')
+
+ def append(self, entry):
+ if type(entry) is not self.StringType:
+ raise LaTeXFormatError("cannot push non-string on stack: "
+ + `entry`)
+ sys.stderr.write("%s<%s>\n" % (" "*len(self.data), entry))
+ self.data.append(entry)
+ def pop(self, index=-1):
+ entry = self.data[index]
+ del self.data[index]
+ sys.stderr.write("%s</%s>\n" % (" "*len(self.data), entry))
+
+ def __delitem__(self, index):
+ entry = self.data[index]
+ del self.data[index]
+ sys.stderr.write("%s</%s>\n" % (" "*len(self.data), entry))
+
+
+def new_stack():
+ if DEBUG:
+ return _Stack()
+ return []
-class Conversion:
- def __init__(self, ifp, ofp, table=None, discards=(), autoclosing=()):
+
+class BaseConversion:
+ def __init__(self, ifp, ofp, table={}, discards=(), autoclosing=()):
self.ofp_stack = [ofp]
self.pop_output()
self.table = table
self.discards = discards
self.autoclosing = autoclosing
self.line = string.join(map(string.rstrip, ifp.readlines()), "\n")
- self.err_write = sys.stderr.write
self.preamble = 1
+ self.stack = new_stack()
def push_output(self, ofp):
self.ofp_stack.append(self.ofp)
@@ -84,16 +126,20 @@ class Conversion:
self.ofp = self.ofp_stack.pop()
self.write = self.ofp.write
+ def err_write(self, msg):
+ if DEBUG:
+ sys.stderr.write(str(msg) + "\n")
+
+ def convert(self):
+ self.subconvert()
+
+
+class Conversion(BaseConversion):
def subconvert(self, endchar=None, depth=0):
- stack = []
+ stack = self.stack
line = self.line
- if DEBUG and endchar:
- self.err_write(
- "subconvert(%s)\n line = %s\n" % (`endchar`, `line[:20]`))
while line:
if line[0] == endchar and not stack:
- if DEBUG:
- self.err_write("subconvert() --> %s\n" % `line[1:21]`)
self.line = line
return line
m = _comment_rx.match(line)
@@ -117,19 +163,16 @@ class Conversion:
# special magic
for n in stack[1:]:
if n not in self.autoclosing:
+ self.err_write(stack)
raise LaTeXFormatError(
"open element on stack: " + `n`)
- # should be more careful, but this is easier to code:
- stack = []
self.write(")document\n")
elif stack and envname == stack[-1]:
self.write(")%s\n" % envname)
del stack[-1]
popping(envname, "a", len(stack) + depth)
else:
- self.err_write("stack: %s\n" % `stack`)
- raise LaTeXFormatError(
- "environment close for %s doesn't match" % envname)
+ raise LaTeXStackError(envname, stack)
line = line[m.end():]
continue
m = _begin_macro_rx.match(line)
@@ -171,7 +214,7 @@ class Conversion:
self.write("Anumbered TOKEN no\n")
# rip off the macroname
if params:
- line = line[m.end(1):]
+ line = line[m.end(1):]
elif empty:
line = line[m.end(1):]
else:
@@ -184,7 +227,6 @@ class Conversion:
#
if optional and type(params[0]) is TupleType:
# the attribute name isn't used in this special case
- pushing(macroname, "a", depth + len(stack))
stack.append(macroname)
self.write("(%s\n" % macroname)
m = _start_optional_rx.match(line)
@@ -210,7 +252,6 @@ class Conversion:
# of the attribute element, and the macro will
# have to be closed some other way (such as
# auto-closing).
- pushing(macroname, "b", len(stack) + depth)
stack.append(macroname)
self.write("(%s\n" % macroname)
macroname = attrname[0]
@@ -262,8 +303,6 @@ class Conversion:
self.pop_output()
continue
if line[0] == endchar and not stack:
- if DEBUG:
- self.err_write("subconvert() --> %s\n" % `line[1:21]`)
self.line = line[1:]
return self.line
if line[0] == "}":
@@ -318,9 +357,6 @@ class Conversion:
+ string.join(stack, ", "))
# otherwise we just ran out of input here...
- def convert(self):
- self.subconvert()
-
def start_macro(self, name):
conversion = self.table.get(name, ([], 0, 0, 0, 0))
params, optional, empty, environ, nocontent = conversion
@@ -331,7 +367,275 @@ class Conversion:
return params, optional, empty, environ
-def convert(ifp, ofp, table={}, discards=(), autoclosing=()):
+class NewConversion(BaseConversion):
+ def __init__(self, ifp, ofp, table={}):
+ BaseConversion.__init__(self, ifp, ofp, table)
+ self.discards = []
+
+ def subconvert(self, endchar=None, depth=0):
+ #
+ # Parses content, including sub-structures, until the character
+ # 'endchar' is found (with no open structures), or until the end
+ # of the input data is endchar is None.
+ #
+ stack = new_stack()
+ line = self.line
+ while line:
+ if line[0] == endchar and not stack:
+ self.line = line
+ return line
+ m = _comment_rx.match(line)
+ if m:
+ text = m.group(1)
+ if text:
+ self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
+ % encode(text))
+ line = line[m.end():]
+ continue
+ m = _begin_env_rx.match(line)
+ if m:
+ name = m.group(1)
+ entry = self.get_env_entry(name)
+ # re-write to use the macro handler
+ line = r"\%s %s" % (name, line[m.end():])
+ continue
+ m = _end_env_rx.match(line)
+ if m:
+ # end of environment
+ envname = m.group(1)
+ entry = self.get_entry(envname)
+ while stack and envname != stack[-1] \
+ and stack[-1] in entry.endcloses:
+ self.write(")%s\n" % stack.pop())
+ if stack and envname == stack[-1]:
+ self.write(")%s\n" % entry.outputname)
+ del stack[-1]
+ else:
+ raise LaTeXStackError(envname, stack)
+ line = line[m.end():]
+ continue
+ m = _begin_macro_rx.match(line)
+ if m:
+ # start of macro
+ macroname = m.group(1)
+ entry = self.get_entry(macroname)
+ if entry.verbatim:
+ # magic case!
+ pos = string.find(line, "\\end{%s}" % macroname)
+ text = line[m.end(1):pos]
+ stack.append(entry.name)
+ self.write("(%s\n" % entry.outputname)
+ self.write("-%s\n" % encode(text))
+ self.write(")%s\n" % entry.outputname)
+ stack.pop()
+ line = line[pos + len("\\end{%s}" % macroname):]
+ continue
+ while stack and stack[-1] in entry.closes:
+ top = stack.pop()
+ topentry = self.get_entry(top)
+ if topentry.outputname:
+ self.write(")%s\n-\\n\n" % topentry.outputname)
+ #
+ if entry.outputname:
+ if entry.empty:
+ self.write("e\n")
+ self.push_output(self.ofp)
+ else:
+ self.push_output(StringIO.StringIO())
+ #
+ params, optional, empty, environ = self.start_macro(macroname)
+ # rip off the macroname
+ if params:
+ line = line[m.end(1):]
+ elif empty:
+ line = line[m.end(1):]
+ else:
+ line = line[m.end():]
+ opened = 0
+ implied_content = 0
+
+ # handle attribute mappings here:
+ for pentry in params:
+ if pentry.type == "attribute":
+ if pentry.optional:
+ m = _optional_rx.match(line)
+ if m:
+ line = line[m.end():]
+ self.dump_attr(pentry, m.group(1))
+ elif pentry.text:
+ # value supplied by conversion spec:
+ self.dump_attr(pentry, pentry.text)
+ else:
+ m = _parameter_rx.match(line)
+ if not m:
+ raise LaTeXFormatError(
+ "could not extract parameter %s for %s: %s"
+ % (pentry.name, macroname, `line[:100]`))
+ self.dump_attr(pentry, m.group(1))
+## if entry.name == "label":
+## sys.stderr.write("[%s]" % m.group(1))
+ line = line[m.end():]
+ elif pentry.type == "child":
+ if pentry.optional:
+ m = _optional_rx.match(line)
+ if m:
+ line = line[m.end():]
+ if entry.outputname and not opened:
+ opened = 1
+ self.write("(%s\n" % entry.outputname)
+ stack.append(macroname)
+ stack.append(pentry.name)
+ self.write("(%s\n" % pentry.name)
+ self.write("-%s\n" % encode(m.group(1)))
+ self.write(")%s\n" % pentry.name)
+ stack.pop()
+ else:
+ if entry.outputname and not opened:
+ opened = 1
+ self.write("(%s\n" % entry.outputname)
+ stack.append(entry.name)
+ self.write("(%s\n" % pentry.name)
+ stack.append(pentry.name)
+ self.line = skip_white(line)[1:]
+ line = self.subconvert(
+ "}", len(stack) + depth + 1)[1:]
+ self.write(")%s\n" % stack.pop())
+ elif pentry.type == "content":
+ if pentry.implied:
+ implied_content = 1
+ else:
+ if entry.outputname and not opened:
+ opened = 1
+ self.write("(%s\n" % entry.outputname)
+ stack.append(entry.name)
+ line = skip_white(line)
+ if line[0] != "{":
+ raise LaTeXFormatError(
+ "missing content for " + macroname)
+ self.line = line[1:]
+ line = self.subconvert("}", len(stack) + depth + 1)
+ if line and line[0] == "}":
+ line = line[1:]
+ elif pentry.type == "text":
+ if pentry.text:
+ if entry.outputname and not opened:
+ opened = 1
+ stack.append(entry.name)
+ self.write("(%s\n" % entry.outputname)
+ self.write("-%s\n" % encode(pentry.text))
+ if entry.outputname:
+ if not opened:
+ self.write("(%s\n" % entry.outputname)
+ stack.append(entry.name)
+ if not implied_content:
+ self.write(")%s\n" % entry.outputname)
+ stack.pop()
+ self.pop_output()
+ continue
+ if line[0] == endchar and not stack:
+ self.line = line[1:]
+ return self.line
+ if line[0] == "}":
+ # end of macro or group
+ macroname = stack[-1]
+ if macroname:
+ conversion = self.table.get(macroname)
+ if conversion.outputname:
+ # otherwise, it was just a bare group
+ self.write(")%s\n" % conversion.outputname)
+ del stack[-1]
+ line = line[1:]
+ continue
+ if line[0] == "{":
+ stack.append("")
+ line = line[1:]
+ continue
+ if line[0] == "\\" and line[1] in ESCAPED_CHARS:
+ self.write("-%s\n" % encode(line[1]))
+ line = line[2:]
+ continue
+ if line[:2] == r"\\":
+ self.write("(BREAK\n)BREAK\n")
+ line = line[2:]
+ continue
+ m = _text_rx.match(line)
+ if m:
+ text = encode(m.group())
+ self.write("-%s\n" % text)
+ line = line[m.end():]
+ continue
+ # special case because of \item[]
+ # XXX can we axe this???
+ if line[0] == "]":
+ self.write("-]\n")
+ line = line[1:]
+ continue
+ # avoid infinite loops
+ extra = ""
+ if len(line) > 100:
+ extra = "..."
+ raise LaTeXFormatError("could not identify markup: %s%s"
+ % (`line[:100]`, extra))
+ while stack:
+ entry = self.get_entry(stack[-1])
+ if entry.closes:
+ self.write(")%s\n-%s\n" % (entry.outputname, encode("\n")))
+ del stack[-1]
+ else:
+ break
+ if stack:
+ raise LaTeXFormatError("elements remain on stack: "
+ + string.join(stack, ", "))
+ # otherwise we just ran out of input here...
+
+ def start_macro(self, name):
+ conversion = self.get_entry(name)
+ parameters = conversion.parameters
+ optional = parameters and parameters[0].optional
+## empty = not len(parameters)
+## if empty:
+## self.write("e\n")
+## elif conversion.empty:
+## empty = 1
+ return parameters, optional, conversion.empty, conversion.environment
+
+ def get_entry(self, name):
+ entry = self.table.get(name)
+ if entry is None:
+ self.err_write("get_entry(%s) failing; building default entry!"
+ % `name`)
+ # not defined; build a default entry:
+ entry = TableEntry(name)
+ entry.has_content = 1
+ entry.parameters.append(Parameter("content"))
+ self.table[name] = entry
+ return entry
+
+ def get_env_entry(self, name):
+ entry = self.table.get(name)
+ if entry is None:
+ # not defined; build a default entry:
+ entry = TableEntry(name, 1)
+ entry.has_content = 1
+ entry.parameters.append(Parameter("content"))
+ entry.parameters[-1].implied = 1
+ self.table[name] = entry
+ elif not entry.environment:
+ raise LaTeXFormatError(
+ name + " is defined as a macro; expected environment")
+ return entry
+
+ def dump_attr(self, pentry, value):
+ if not (pentry.name and value):
+ return
+ if _token_rx.match(value):
+ dtype = "TOKEN"
+ else:
+ dtype = "CDATA"
+ self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value)))
+
+
+def old_convert(ifp, ofp, table={}, discards=(), autoclosing=()):
c = Conversion(ifp, ofp, table, discards, autoclosing)
try:
c.convert()
@@ -340,32 +644,162 @@ def convert(ifp, ofp, table={}, discards=(), autoclosing=()):
raise
+def new_convert(ifp, ofp, table={}, discards=(), autoclosing=()):
+ c = NewConversion(ifp, ofp, table)
+ try:
+ c.convert()
+ except IOError, (err, msg):
+ if err != errno.EPIPE:
+ raise
+
+
def skip_white(line):
- while line and line[0] in " %\n\t":
+ while line and line[0] in " %\n\t\r":
line = string.lstrip(line[1:])
return line
+
+class TableEntry:
+ def __init__(self, name, environment=0):
+ self.name = name
+ self.outputname = name
+ self.environment = environment
+ self.empty = not environment
+ self.has_content = 0
+ self.verbatim = 0
+ self.auto_close = 0
+ self.parameters = []
+ self.closes = []
+ self.endcloses = []
+
+class Parameter:
+ def __init__(self, type, name=None, optional=0):
+ self.type = type
+ self.name = name
+ self.optional = optional
+ self.text = ''
+ self.implied = 0
+
+
+class TableParser(XMLParser):
+ def __init__(self):
+ self.__table = {}
+ self.__current = None
+ self.__buffer = ''
+ XMLParser.__init__(self)
+
+ def get_table(self):
+ for entry in self.__table.values():
+ if entry.environment and not entry.has_content:
+ p = Parameter("content")
+ p.implied = 1
+ entry.parameters.append(p)
+ entry.has_content = 1
+ return self.__table
+
+ def start_environment(self, attrs):
+ name = attrs["name"]
+ self.__current = TableEntry(name, environment=1)
+ self.__current.verbatim = attrs.get("verbatim") == "yes"
+ if attrs.has_key("outputname"):
+ self.__current.outputname = attrs.get("outputname")
+ self.__current.endcloses = string.split(attrs.get("endcloses", ""))
+ def end_environment(self):
+ self.end_macro()
+
+ def start_macro(self, attrs):
+ name = attrs["name"]
+ self.__current = TableEntry(name)
+ self.__current.closes = string.split(attrs.get("closes", ""))
+ if attrs.has_key("outputname"):
+ self.__current.outputname = attrs.get("outputname")
+ def end_macro(self):
+## if self.__current.parameters and not self.__current.outputname:
+## raise ValueError, "markup with parameters must have an output name"
+ self.__table[self.__current.name] = self.__current
+ self.__current = None
+
+ def start_attribute(self, attrs):
+ name = attrs.get("name")
+ optional = attrs.get("optional") == "yes"
+ if name:
+ p = Parameter("attribute", name, optional=optional)
+ else:
+ p = Parameter("attribute", optional=optional)
+ self.__current.parameters.append(p)
+ self.__buffer = ''
+ def end_attribute(self):
+ self.__current.parameters[-1].text = self.__buffer
+
+ def start_child(self, attrs):
+ name = attrs["name"]
+ p = Parameter("child", name, attrs.get("optional") == "yes")
+ self.__current.parameters.append(p)
+ self.__current.empty = 0
+
+ def start_content(self, attrs):
+ p = Parameter("content")
+ p.implied = attrs.get("implied") == "yes"
+ if self.__current.environment:
+ p.implied = 1
+ self.__current.parameters.append(p)
+ self.__current.has_content = 1
+ self.__current.empty = 0
+
+ def start_text(self, attrs):
+ self.__buffer = ''
+ def end_text(self):
+ p = Parameter("text")
+ p.text = self.__buffer
+ self.__current.parameters.append(p)
+
+ def handle_data(self, data):
+ self.__buffer = self.__buffer + data
+
+
+def load_table(fp):
+ parser = TableParser()
+ parser.feed(fp.read())
+ parser.close()
+ return parser.get_table()
+
+
def main():
- if len(sys.argv) == 2:
- ifp = open(sys.argv[1])
+ global DEBUG
+ #
+ convert = new_convert
+ newstyle = 1
+ opts, args = getopt.getopt(sys.argv[1:], "Dn", ["debug", "new"])
+ for opt, arg in opts:
+ if opt in ("-n", "--new"):
+ convert = new_convert
+ newstyle = 1
+ elif opt in ("-o", "--old"):
+ convert = old_convert
+ newstyle = 0
+ elif opt in ("-D", "--debug"):
+ DEBUG = DEBUG + 1
+ if len(args) == 0:
+ ifp = sys.stdin
+ ofp = sys.stdout
+ elif len(args) == 1:
+ ifp = open(args)
ofp = sys.stdout
- elif len(sys.argv) == 3:
- ifp = open(sys.argv[1])
- ofp = open(sys.argv[2], "w")
+ elif len(args) == 2:
+ ifp = open(args[0])
+ ofp = open(args[1], "w")
else:
usage()
sys.exit(2)
- convert(ifp, ofp, {
+ table = {
# entries have the form:
# name: ([attribute names], is1stOptional, isEmpty, isEnv, nocontent)
# attribute names can be:
# "string" -- normal attribute
# ("string",) -- sub-element with content of macro; like for \section
# ["string"] -- sub-element
- "appendix": ([], 0, 1, 0, 0),
"bifuncindex": (["name"], 0, 1, 0, 0),
- "catcode": ([], 0, 1, 0, 0),
"cfuncdesc": (["type", "name", ("args",)], 0, 0, 1, 0),
"chapter": ([("title",)], 0, 0, 0, 0),
"chapter*": ([("title",)], 0, 0, 0, 0),
@@ -405,6 +839,7 @@ def main():
"maketitle": ([], 0, 1, 0, 0),
"manpage": (["name", "section"], 0, 1, 0, 0),
"memberdesc": (["class", "name"], 1, 0, 1, 0),
+ "memberdescni": (["class", "name"], 1, 0, 1, 0),
"methoddesc": (["class", "name", ("args",)], 1, 0, 1, 0),
"methoddescni": (["class", "name", ("args",)], 1, 0, 1, 0),
"methodline": (["class", "name"], 1, 0, 0, 0),
@@ -452,6 +887,8 @@ def main():
#
# Things that will actually be going away!
#
+ "appendix": ([], 0, 1, 0, 0),
+ "catcode": ([], 0, 1, 0, 0),
"fi": ([], 0, 1, 0, 0),
"ifhtml": ([], 0, 1, 0, 0),
"makeindex": ([], 0, 1, 0, 0),
@@ -460,7 +897,10 @@ def main():
"noindent": ([], 0, 1, 0, 0),
"protect": ([], 0, 1, 0, 0),
"tableofcontents": ([], 0, 1, 0, 0),
- },
+ }
+ if newstyle:
+ table = load_table(open(os.path.join(sys.path[0], 'conversion.xml')))
+ convert(ifp, ofp, table,
discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
"noindent", "tableofcontents"],
autoclosing=["chapter", "section", "subsection", "subsubsection",