summaryrefslogtreecommitdiffstats
path: root/Doc/tools/sgmlconv
diff options
context:
space:
mode:
authorFred Drake <fdrake@acm.org>1999-05-07 19:59:02 (GMT)
committerFred Drake <fdrake@acm.org>1999-05-07 19:59:02 (GMT)
commit96c00b0b5e25b0de2d72bf69d8a362e1ebddd5b2 (patch)
tree5d23ce7078b8d95ed73f986756bc3c1d5486ca36 /Doc/tools/sgmlconv
parentb0bc7f2d6cd1872613ece64da6ca6e7f8a73f331 (diff)
downloadcpython-96c00b0b5e25b0de2d72bf69d8a362e1ebddd5b2.zip
cpython-96c00b0b5e25b0de2d72bf69d8a362e1ebddd5b2.tar.gz
cpython-96c00b0b5e25b0de2d72bf69d8a362e1ebddd5b2.tar.bz2
Define & use a Conversion object. It's still really ugly, but at
least there's a token object in here now! ;-)
Diffstat (limited to 'Doc/tools/sgmlconv')
-rwxr-xr-xDoc/tools/sgmlconv/latex2esis.py482
1 files changed, 255 insertions, 227 deletions
diff --git a/Doc/tools/sgmlconv/latex2esis.py b/Doc/tools/sgmlconv/latex2esis.py
index 43ed2c5..1ea928d 100755
--- a/Doc/tools/sgmlconv/latex2esis.py
+++ b/Doc/tools/sgmlconv/latex2esis.py
@@ -27,7 +27,7 @@ class LaTeXFormatError(Error):
_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
_begin_macro_rx = re.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)")
-_comment_rx = re.compile("%+ ?(.*)\n *")
+_comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
_text_rx = re.compile(r"[^]%\\{}]+")
_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
# _parameter_rx is this complicated to allow {...} inside a parameter;
@@ -50,248 +50,276 @@ def popping(name, point, depth):
sys.stderr.write("%s</%s> at %s\n" % (" "*depth, name, point))
-def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0):
- if DEBUG and endchar:
- sys.stderr.write("subconvert(%s, ..., endchar=%s)\n"
- % (`line[:20]`, `endchar`))
- stack = []
- while line:
- if line[0] == endchar and not stack:
- if DEBUG:
- sys.stderr.write("subconvert() --> %s\n" % `line[1:21]`)
- return line[1:]
- m = _comment_rx.match(line)
- if m:
- text = m.group(1)
- if text:
- ofp.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" % encode(text))
- line = line[m.end():]
- continue
- m = _begin_env_rx.match(line)
- if m:
- # re-write to use the macro handler
- line = r"\%s %s" % (m.group(1), line[m.end():])
- continue
- m = _end_env_rx.match(line)
- if m:
- # end of environment
- envname = m.group(1)
- if envname == "document":
- # special magic
- for n in stack[1:]:
- if n not in autoclosing:
- raise LaTeXFormatError("open element on stack: " + `n`)
- # should be more careful, but this is easier to code:
- stack = []
- ofp.write(")document\n")
- elif envname == stack[-1]:
- ofp.write(")%s\n" % envname)
- del stack[-1]
- popping(envname, "a", len(stack) + depth)
- else:
- sys.stderr.write("stack: %s\n" % `stack`)
- raise LaTeXFormatError(
- "environment close for %s doesn't match" % envname)
- line = line[m.end():]
- continue
- m = _begin_macro_rx.match(line)
- if m:
- # start of macro
- macroname = m.group(1)
- if macroname == "verbatim":
- # really magic case!
- pos = string.find(line, "\\end{verbatim}")
- text = line[m.end(1):pos]
- ofp.write("(verbatim\n")
- ofp.write("-%s\n" % encode(text))
- ofp.write(")verbatim\n")
- line = line[pos + len("\\end{verbatim}"):]
+class Conversion:
+ def __init__(self, ifp, ofp, table=None, discards=(), autoclosing=()):
+ self.ofp_stack = [ofp]
+ self.pop_output()
+ self.table = table
+ self.discards = discards
+ self.autoclosing = autoclosing
+ self.line = string.join(map(string.rstrip, ifp.readlines()), "\n")
+ self.err_write = sys.stderr.write
+ self.preamble = 1
+
+ def push_output(self, ofp):
+ self.ofp_stack.append(self.ofp)
+ self.ofp = ofp
+ self.write = ofp.write
+
+ def pop_output(self):
+ self.ofp = self.ofp_stack.pop()
+ self.write = self.ofp.write
+
+ def subconvert(self, endchar=None, depth=0):
+ if DEBUG and endchar:
+ self.err_write(
+ "subconvert(%s)\n line = %s\n" % (`endchar`, `line[:20]`))
+ stack = []
+ line = self.line
+ while line:
+ if line[0] == endchar and not stack:
+ if DEBUG:
+ self.err_write("subconvert() --> %s\n" % `line[1:21]`)
+ self.line = line
+ return line
+ m = _comment_rx.match(line)
+ if m:
+ text = m.group(1)
+ if text:
+ self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
+ % encode(text))
+ line = line[m.end():]
continue
- numbered = 1
- if macroname[-1] == "*":
- macroname = macroname[:-1]
- numbered = 0
- if macroname in autoclosing and macroname in stack:
- while stack[-1] != macroname:
- if stack[-1] and stack[-1] not in discards:
- ofp.write(")%s\n-\\n\n" % stack[-1])
- popping(stack[-1], "b", len(stack) + depth - 1)
+ m = _begin_env_rx.match(line)
+ if m:
+ # re-write to use the macro handler
+ line = r"\%s %s" % (m.group(1), line[m.end():])
+ continue
+ m = _end_env_rx.match(line)
+ if m:
+ # end of environment
+ envname = m.group(1)
+ if envname == "document":
+ # special magic
+ for n in stack[1:]:
+ if n not in self.autoclosing:
+ raise LaTeXFormatError(
+ "open element on stack: " + `n`)
+ # should be more careful, but this is easier to code:
+ stack = []
+ self.write(")document\n")
+ elif envname == stack[-1]:
+ self.write(")%s\n" % envname)
del stack[-1]
- if macroname not in discards:
- ofp.write("-\\n\n)%s\n-\\n\n" % macroname)
- popping(macroname, "c", len(stack) + depth - 1)
- del stack[-1]
- real_ofp = ofp
- if macroname in discards:
- ofp = StringIO.StringIO()
- #
- conversion = table.get(macroname, ([], 0, 0, 0, 0))
- params, optional, empty, environ, nocontent = conversion
- if empty:
- ofp.write("e\n")
- elif nocontent:
- empty = 1
- if not numbered:
- ofp.write("Anumbered TOKEN no\n")
- opened = 0
- # rip off the macroname
- if params:
- if optional and len(params) == 1:
- line = line = line[m.end():]
+ popping(envname, "a", len(stack) + depth)
else:
- line = line[m.end(1):]
- elif empty:
- line = line[m.end(1):]
- else:
+ self.err_write("stack: %s\n" % `stack`)
+ raise LaTeXFormatError(
+ "environment close for %s doesn't match" % envname)
line = line[m.end():]
- #
- # Very ugly special case to deal with \item[]. The catch is that
- # this needs to occur outside the for loop that handles attribute
- # parsing so we can 'continue' the outer loop.
- #
- if optional and type(params[0]) is type(()):
- # the attribute name isn't used in this special case
- pushing(macroname, "a", depth + len(stack))
- stack.append(macroname)
- ofp.write("(%s\n" % macroname)
- m = _start_optional_rx.match(line)
- if m:
- line = line[m.end():]
- line = subconvert(line, ofp, table, discards,
- autoclosing, endchar="]",
- depth=depth + len(stack))
- line = "}" + line
continue
- # handle attribute mappings here:
- for attrname in params:
- if optional:
- optional = 0
- if type(attrname) is type(""):
- m = _optional_rx.match(line)
- if m:
- line = line[m.end():]
- ofp.write("A%s TOKEN %s\n"
- % (attrname, encode(m.group(1))))
- elif type(attrname) is type(()):
- # This is a sub-element; but don't place the
- # element we found on the stack (\section-like)
- pushing(macroname, "b", len(stack) + depth)
+ m = _begin_macro_rx.match(line)
+ if m:
+ # start of macro
+ macroname = m.group(1)
+ if macroname == "verbatim":
+ # really magic case!
+ pos = string.find(line, "\\end{verbatim}")
+ text = line[m.end(1):pos]
+ self.write("(verbatim\n")
+ self.write("-%s\n" % encode(text))
+ self.write(")verbatim\n")
+ line = line[pos + len("\\end{verbatim}"):]
+ continue
+ numbered = 1
+ opened = 0
+ if macroname[-1] == "*":
+ macroname = macroname[:-1]
+ numbered = 0
+ if macroname in self.autoclosing and macroname in stack:
+ while stack[-1] != macroname:
+ top = stack.pop()
+ if top and top not in self.discards:
+ self.write(")%s\n-\\n\n" % top)
+ popping(top, "b", len(stack) + depth)
+ if macroname not in self.discards:
+ self.write("-\\n\n)%s\n-\\n\n" % macroname)
+ popping(macroname, "c", len(stack) + depth - 1)
+ del stack[-1]
+ #
+ if macroname in self.discards:
+ self.push_output(StringIO.StringIO())
+ else:
+ self.push_output(self.ofp)
+ #
+ params, optional, empty, environ = self.start_macro(macroname)
+ if not numbered:
+ self.write("Anumbered TOKEN no\n")
+ # rip off the macroname
+ if params:
+ if optional and len(params) == 1:
+ line = line[m.end():]
+ else:
+ line = line[m.end(1):]
+ elif empty:
+ line = line[m.end(1):]
+ else:
+ line = line[m.end():]
+ #
+ # Very ugly special case to deal with \item[]. The catch
+ # is that this needs to occur outside the for loop that
+ # handles attribute parsing so we can 'continue' the outer
+ # loop.
+ #
+ if optional and type(params[0]) is type(()):
+ # the attribute name isn't used in this special case
+ pushing(macroname, "a", depth + len(stack))
stack.append(macroname)
- ofp.write("(%s\n" % macroname)
- macroname = attrname[0]
- m = _start_group_rx.match(line)
+ self.write("(%s\n" % macroname)
+ m = _start_optional_rx.match(line)
if m:
+ self.line = line[m.end():]
+ line = self.subconvert("]", depth + len(stack))
+ line = "}" + line
+ continue
+ # handle attribute mappings here:
+ for attrname in params:
+ if optional:
+ optional = 0
+ if type(attrname) is type(""):
+ m = _optional_rx.match(line)
+ if m:
+ line = line[m.end():]
+ self.write("A%s TOKEN %s\n"
+ % (attrname, encode(m.group(1))))
+ elif type(attrname) is type(()):
+ # This is a sub-element; but don't place the
+ # element we found on the stack (\section-like)
+ pushing(macroname, "b", len(stack) + depth)
+ stack.append(macroname)
+ self.write("(%s\n" % macroname)
+ macroname = attrname[0]
+ m = _start_group_rx.match(line)
+ if m:
+ line = line[m.end():]
+ elif type(attrname) is type([]):
+ # A normal subelement.
+ attrname = attrname[0]
+ if not opened:
+ opened = 1
+ self.write("(%s\n" % macroname)
+ pushing(macroname, "c", len(stack) + depth)
+ self.write("(%s\n" % attrname)
+ pushing(attrname, "sub-elem", len(stack) + depth + 1)
+ self.line = skip_white(line)[1:]
+ line = subconvert("}", depth + len(stack) + 2)
+ popping(attrname, "sub-elem", len(stack) + depth + 1)
+ self.write(")%s\n" % attrname)
+ else:
+ m = _parameter_rx.match(line)
+ if not m:
+ raise LaTeXFormatError(
+ "could not extract parameter %s for %s: %s"
+ % (attrname, macroname, `line[:100]`))
+ value = m.group(1)
+ if _token_rx.match(value):
+ dtype = "TOKEN"
+ else:
+ dtype = "CDATA"
+ self.write("A%s %s %s\n"
+ % (attrname, dtype, encode(value)))
line = line[m.end():]
- elif type(attrname) is type([]):
- # A normal subelement.
- attrname = attrname[0]
- if not opened:
- opened = 1
- ofp.write("(%s\n" % macroname)
- pushing(macroname, "c", len(stack) + depth)
- ofp.write("(%s\n" % attrname)
- pushing(attrname, "sub-elem", len(stack) + depth + 1)
- line = subconvert(skip_white(line)[1:], ofp, table,
- discards, autoclosing, endchar="}",
- depth=depth + len(stack) + 2)
- popping(attrname, "sub-elem", len(stack) + depth + 1)
- ofp.write(")%s\n" % attrname)
- else:
- m = _parameter_rx.match(line)
+ if params and type(params[-1]) is type('') \
+ and (not empty) and not environ:
+ # attempt to strip off next '{'
+ m = _start_group_rx.match(line)
if not m:
raise LaTeXFormatError(
- "could not extract parameter %s for %s: %s"
- % (attrname, macroname, `line[:100]`))
- value = m.group(1)
- if _token_rx.match(value):
- dtype = "TOKEN"
- else:
- dtype = "CDATA"
- ofp.write("A%s %s %s\n"
- % (attrname, dtype, encode(value)))
+ "non-empty element '%s' has no content: %s"
+ % (macroname, line[:12]))
line = line[m.end():]
- if params and type(params[-1]) is type('') \
- and (not empty) and not environ:
- # attempt to strip off next '{'
- m = _start_group_rx.match(line)
- if not m:
- raise LaTeXFormatError(
- "non-empty element '%s' has no content: %s"
- % (macroname, line[:12]))
+ if not opened:
+ self.write("(%s\n" % macroname)
+ pushing(macroname, "d", len(stack) + depth)
+ if empty:
+ line = "}" + line
+ stack.append(macroname)
+ self.pop_output()
+ continue
+ if line[0] == endchar and not stack:
+ if DEBUG:
+ self.err_write("subconvert() --> %s\n" % `line[1:21]`)
+ self.line = line[1:]
+ return self.line
+ if line[0] == "}":
+ # end of macro or group
+ macroname = stack[-1]
+ conversion = self.table.get(macroname)
+ if macroname \
+ and macroname not in self.discards \
+ and type(conversion) is not type(""):
+ # otherwise, it was just a bare group
+ self.write(")%s\n" % stack[-1])
+ popping(macroname, "d", len(stack) + depth - 1)
+ del stack[-1]
+ line = line[1:]
+ continue
+ if line[0] == "{":
+ pushing("", "e", len(stack) + depth)
+ stack.append("")
+ line = line[1:]
+ continue
+ if line[0] == "\\" and line[1] in ESCAPED_CHARS:
+ self.write("-%s\n" % encode(line[1]))
+ line = line[2:]
+ continue
+ if line[:2] == r"\\":
+ self.write("(BREAK\n)BREAK\n")
+ line = line[2:]
+ continue
+ m = _text_rx.match(line)
+ if m:
+ text = encode(m.group())
+ self.write("-%s\n" % text)
line = line[m.end():]
- if not opened:
- ofp.write("(%s\n" % macroname)
- pushing(macroname, "d", len(stack) + depth)
- if empty:
- line = "}" + line
- stack.append(macroname)
- ofp = real_ofp
- continue
- if line[0] == endchar and not stack:
- if DEBUG:
- sys.stderr.write("subconvert() --> %s\n" % `line[1:21]`)
- return line[1:]
- if line[0] == "}":
- # end of macro
- macroname = stack[-1]
- conversion = table.get(macroname)
- if macroname \
- and macroname not in discards \
- and type(conversion) is not type(""):
- # otherwise, it was just a bare group
- ofp.write(")%s\n" % stack[-1])
- popping(macroname, "d", len(stack) + depth - 1)
- del stack[-1]
- line = line[1:]
- continue
- if line[0] == "{":
- pushing("", "e", len(stack) + depth)
- stack.append("")
- line = line[1:]
- continue
- if line[0] == "\\" and line[1] in ESCAPED_CHARS:
- ofp.write("-%s\n" % encode(line[1]))
- line = line[2:]
- continue
- if line[:2] == r"\\":
- ofp.write("(BREAK\n)BREAK\n")
- line = line[2:]
- continue
- m = _text_rx.match(line)
- if m:
- text = encode(m.group())
- ofp.write("-%s\n" % text)
- line = line[m.end():]
- continue
- # special case because of \item[]
- if line[0] == "]":
- ofp.write("-]\n")
- line = line[1:]
- continue
- # avoid infinite loops
- extra = ""
- if len(line) > 100:
- extra = "..."
- raise LaTeXFormatError("could not identify markup: %s%s"
- % (`line[:100]`, extra))
- while stack and stack[-1] in autoclosing:
- ofp.write("-\\n\n")
- ofp.write(")%s\n" % stack[-1])
- popping(stack[-1], "e", len(stack) + depth - 1)
- del stack[-1]
- if stack:
- raise LaTeXFormatError("elements remain on stack: "
- + string.join(stack))
- # otherwise we just ran out of input here...
+ continue
+ # special case because of \item[]
+ if line[0] == "]":
+ self.write("-]\n")
+ line = line[1:]
+ continue
+ # avoid infinite loops
+ extra = ""
+ if len(line) > 100:
+ extra = "..."
+ raise LaTeXFormatError("could not identify markup: %s%s"
+ % (`line[:100]`, extra))
+ while stack and stack[-1] in self.autoclosing:
+ self.write("-\\n\n")
+ self.write(")%s\n" % stack[-1])
+ popping(stack.pop(), "e", len(stack) + depth - 1)
+ if stack:
+ raise LaTeXFormatError("elements remain on stack: "
+ + string.join(stack, ", "))
+ # otherwise we just ran out of input here...
+
+ def convert(self):
+ self.subconvert()
+
+ def start_macro(self, name):
+ conversion = self.table.get(name, ([], 0, 0, 0, 0))
+ params, optional, empty, environ, nocontent = conversion
+ if empty:
+ self.write("e\n")
+ elif nocontent:
+ empty = 1
+ return params, optional, empty, environ
def convert(ifp, ofp, table={}, discards=(), autoclosing=()):
- lines = string.split(ifp.read(), "\n")
- for i in range(len(lines)):
- lines[i] = string.rstrip(lines[i])
- data = string.join(lines, "\n")
+ c = Conversion(ifp, ofp, table, discards, autoclosing)
try:
- subconvert(data, ofp, table, discards, autoclosing)
+ c.convert()
except IOError, (err, msg):
if err != errno.EPIPE:
raise