summaryrefslogtreecommitdiffstats
path: root/Doc
diff options
context:
space:
mode:
Diffstat (limited to 'Doc')
-rwxr-xr-xDoc/tools/sgmlconv/esis2sgml.py131
-rwxr-xr-xDoc/tools/sgmlconv/latex2esis.py337
2 files changed, 468 insertions, 0 deletions
diff --git a/Doc/tools/sgmlconv/esis2sgml.py b/Doc/tools/sgmlconv/esis2sgml.py
new file mode 100755
index 0000000..5f8c6e8
--- /dev/null
+++ b/Doc/tools/sgmlconv/esis2sgml.py
@@ -0,0 +1,131 @@
+#! /usr/bin/env python
+
+"""Convert ESIS events to SGML or XML markup.
+
+This is limited, but seems sufficient for the ESIS generated by the
+latex2esis.py script when run over the Python documentation.
+"""
+__version__ = '$Revision$'
+
+import errno
+import re
+import string
+
+
+_data_rx = re.compile(r"[^\\][^\\]*")
+
+def decode(s):
+ r = ''
+ while s:
+ m = _data_rx.match(s)
+ if m:
+ r = r + m.group()
+ s = s[len(m.group()):]
+ elif s[1] == "\\":
+ r = r + "\\"
+ s = s[2:]
+ elif s[1] == "n":
+ r = r + "\n"
+ s = s[2:]
+ else:
+ raise ValueError, "can't handle " + `s`
+ return r
+
+
+def format_attrs(attrs):
+ attrs = attrs.items()
+ attrs.sort()
+ s = ''
+ for name, value in attrs:
+ s = '%s %s="%s"' % (s, name, value)
+ return s
+
+
+def do_convert(ifp, ofp, knownempties, xml=0):
+ attrs = {}
+ lastopened = None
+ knownempty = 0
+ lastempty = 0
+ while 1:
+ line = ifp.readline()
+ if not line:
+ break
+
+ type = line[0]
+ data = line[1:]
+ if data and data[-1] == "\n":
+ data = data[:-1]
+ if type == "-":
+ data = decode(data)
+ ofp.write(data)
+ if "\n" in data:
+ lastopened = None
+ knownempty = 0
+ lastempty = 0
+ elif type == "(":
+ if knownempty and xml:
+ ofp.write("<%s%s/>" % (data, format_attrs(attrs)))
+ else:
+ ofp.write("<%s%s>" % (data, format_attrs(attrs)))
+ if knownempty and data not in knownempties:
+ # accumulate knowledge!
+ knownempties.append(data)
+ attrs = {}
+ lastopened = data
+ lastempty = knownempty
+ knownempty = 0
+ elif type == ")":
+ if xml:
+ if not lastempty:
+ ofp.write("</%s>" % data)
+ elif data not in knownempties:
+ if lastopened == data:
+ ofp.write("</>")
+ else:
+ ofp.write("</%s>" % data)
+ lastopened = None
+ lastempty = 0
+ elif type == "A":
+ name, type, value = string.split(data, " ", 2)
+ attrs[name] = decode(value)
+ elif type == "e":
+ knownempty = 1
+
+
+def sgml_convert(ifp, ofp, knownempties=()):
+ return do_convert(ifp, ofp, list(knownempties), xml=0)
+
+
+def xml_convert(ifp, ofp, knownempties=()):
+ return do_convert(ifp, ofp, list(knownempties), xml=1)
+
+
+def main():
+ import sys
+ #
+ convert = sgml_convert
+ if sys.argv[1:] and sys.argv[1] in ("-x", "--xml"):
+ convert = xml_convert
+ del sys.argv[1]
+ if len(sys.argv) == 1:
+ ifp = sys.stdin
+ ofp = sys.stdout
+ elif len(sys.argv) == 2:
+ ifp = open(sys.argv[1])
+ ofp = sys.stdout
+ elif len(sys.argv) == 3:
+ ifp = open(sys.argv[1])
+ ofp = open(sys.argv[2], "w")
+ else:
+ usage()
+ sys.exit(2)
+ # knownempties is ignored in the XML version
+ try:
+ convert(ifp, ofp)
+ except IOError, (err, msg):
+ if err != errno.EPIPE:
+ raise
+
+
+if __name__ == "__main__":
+ main()
diff --git a/Doc/tools/sgmlconv/latex2esis.py b/Doc/tools/sgmlconv/latex2esis.py
new file mode 100755
index 0000000..afa2d86
--- /dev/null
+++ b/Doc/tools/sgmlconv/latex2esis.py
@@ -0,0 +1,337 @@
+#! /usr/bin/env python
+
+"""Generate ESIS events based on a LaTeX source document and configuration
+data.
+
+
+"""
+__version__ = '$Revision$'
+
+import errno
+import re
+import string
+import StringIO
+import sys
+
+
+class Error(Exception):
+ pass
+
+class LaTeXFormatError(Error):
+ pass
+
+
+_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
+_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
+_begin_macro_rx = re.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)")
+_comment_rx = re.compile("%+[ \t]*(.*)\n")
+_text_rx = re.compile(r"[^]%\\{}]+")
+_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
+_parameter_rx = re.compile("[ \n]*{([^}]*)}")
+_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
+_start_group_rx = re.compile("[ \n]*{")
+_start_optional_rx = re.compile("[ \n]*[[]")
+
+
+_charmap = {}
+for c in map(chr, range(256)):
+ _charmap[c] = c
+_charmap["\n"] = r"\n"
+_charmap["\\"] = r"\\"
+del c
+
+def encode(s):
+ return string.join(map(_charmap.get, s), '')
+
+
+ESCAPED_CHARS = "$%#^ {}"
+
+
+def subconvert(line, ofp, table, discards, autoclosing, knownempty,
+ endchar=None):
+ stack = []
+ while line:
+ if line[0] == endchar and not stack:
+ return line[1:]
+ m = _comment_rx.match(line)
+ if m:
+ text = m.group(1)
+ if text:
+ ofp.write("(COMMENT\n")
+ ofp.write("- %s \n" % encode(text))
+ ofp.write(")COMMENT\n")
+ ofp.write("-\\n\n")
+ else:
+ ofp.write("-\\n\n")
+ line = line[m.end():]
+ continue
+ m = _begin_env_rx.match(line)
+ if m:
+ # re-write to use the macro handler
+ line = r"\%s %s" % (m.group(1), line[m.end():])
+ continue
+ m =_end_env_rx.match(line)
+ if m:
+ # end of environment
+ envname = m.group(1)
+ if envname == "document":
+ # special magic
+ for n in stack[1:]:
+ if n not in autoclosing:
+ raise LaTeXFormatError("open element on stack: " + `n`)
+ # should be more careful, but this is easier to code:
+ stack = []
+ ofp.write(")document\n")
+ elif envname == stack[-1]:
+ ofp.write(")%s\n" % envname)
+ del stack[-1]
+ else:
+ raise LaTeXFormatError("environment close doesn't match")
+ line = line[m.end():]
+ continue
+ m = _begin_macro_rx.match(line)
+ if m:
+ # start of macro
+ macroname = m.group(1)
+ if macroname == "verbatim":
+ # really magic case!
+ pos = string.find(line, "\\end{verbatim}")
+ text = line[m.end(1):pos]
+ ofp.write("(verbatim\n")
+ ofp.write("-%s\n" % encode(text))
+ ofp.write(")verbatim\n")
+ line = line[pos + len("\\end{verbatim}"):]
+ continue
+ numbered = 1
+ if macroname[-1] == "*":
+ macroname = macroname[:-1]
+ numbered = 0
+ if macroname in autoclosing and macroname in stack:
+ while stack[-1] != macroname:
+ if stack[-1] and stack[-1] not in discards:
+ ofp.write(")%s\n-\\n\n" % stack[-1])
+ del stack[-1]
+ if macroname not in discards:
+ ofp.write("-\\n\n)%s\n-\\n\n" % macroname)
+ del stack[-1]
+ real_ofp = ofp
+ if macroname in discards:
+ ofp = StringIO.StringIO()
+ #
+ conversion = table.get(macroname, ([], 0, 0))
+ params, optional, empty = conversion
+ empty = empty or knownempty(macroname)
+ if empty:
+ ofp.write("e\n")
+ if not numbered:
+ ofp.write("Anumbered TOKEN no\n")
+ # rip off the macroname
+ if params:
+ if optional and len(params) == 1:
+ line = line = line[m.end():]
+ else:
+ line = line[m.end(1):]
+ elif empty:
+ line = line[m.end(1):]
+ else:
+ line = line[m.end():]
+ #
+ # Very ugly special case to deal with \item[]. The catch is that
+ # this needs to occur outside the for loop that handles attribute
+ # parsing so we can 'continue' the outer loop.
+ #
+ if optional and type(params[0]) is type(()):
+ # the attribute name isn't used in this special case
+ stack.append(macroname)
+ ofp.write("(%s\n" % macroname)
+ m = _start_optional_rx.match(line)
+ if m:
+ line = line[m.end():]
+ line = subconvert(line, ofp, table, discards,
+ autoclosing, knownempty, endchar="]")
+ line = "}" + line
+ continue
+ # handle attribute mappings here:
+ for attrname in params:
+ if optional:
+ optional = 0
+ if type(attrname) is type(""):
+ m = _optional_rx.match(line)
+ if m:
+ line = line[m.end():]
+ ofp.write("A%s TOKEN %s\n"
+ % (attrname, encode(m.group(1))))
+ elif type(attrname) is type(()):
+ # This is a sub-element; but don't place the
+ # element we found on the stack (\section-like)
+ stack.append(macroname)
+ ofp.write("(%s\n" % macroname)
+ macroname = attrname[0]
+ m = _start_group_rx.match(line)
+ if m:
+ line = line[m.end():]
+ elif type(attrname) is type([]):
+ # A normal subelement.
+ attrname = attrname[0]
+ stack.append(macroname)
+ stack.append(attrname)
+ ofp.write("(%s\n" % macroname)
+ macroname = attrname
+ else:
+ m = _parameter_rx.match(line)
+ if not m:
+ raise LaTeXFormatError(
+ "could not extract parameter %s for %s: %s"
+ % (attrname, macroname, `line[:100]`))
+ value = m.group(1)
+ if _token_rx.match(value):
+ dtype = "TOKEN"
+ else:
+ dtype = "CDATA"
+ ofp.write("A%s %s %s\n"
+ % (attrname, dtype, encode(value)))
+ line = line[m.end():]
+ stack.append(macroname)
+ ofp.write("(%s\n" % macroname)
+ if empty:
+ line = "}" + line
+ ofp = real_ofp
+ continue
+ if line[0] == "}":
+ # end of macro
+ macroname = stack[-1]
+ conversion = table.get(macroname)
+ if macroname \
+ and macroname not in discards \
+ and type(conversion) is not type(""):
+ # otherwise, it was just a bare group
+ ofp.write(")%s\n" % stack[-1])
+ del stack[-1]
+ line = line[1:]
+ continue
+ if line[0] == "{":
+ stack.append("")
+ line = line[1:]
+ continue
+ if line[0] == "\\" and line[1] in ESCAPED_CHARS:
+ ofp.write("-%s\n" % encode(line[1]))
+ line = line[2:]
+ continue
+ if line[:2] == r"\\":
+ ofp.write("(BREAK\n)BREAK\n")
+ line = line[2:]
+ continue
+ m = _text_rx.match(line)
+ if m:
+ text = encode(m.group())
+ ofp.write("-%s\n" % text)
+ line = line[m.end():]
+ continue
+ # special case because of \item[]
+ if line[0] == "]":
+ ofp.write("-]\n")
+ line = line[1:]
+ continue
+ # avoid infinite loops
+ extra = ""
+ if len(line) > 100:
+ extra = "..."
+ raise LaTeXFormatError("could not identify markup: %s%s"
+ % (`line[:100]`, extra))
+
+
+def convert(ifp, ofp, table={}, discards=(), autoclosing=(), knownempties=()):
+ d = {}
+ for gi in knownempties:
+ d[gi] = gi
+ try:
+ subconvert(ifp.read(), ofp, table, discards, autoclosing, d.has_key)
+ except IOError, (err, msg):
+ if err != errno.EPIPE:
+ raise
+
+
+def main():
+ if len(sys.argv) == 2:
+ ifp = open(sys.argv[1])
+ ofp = sys.stdout
+ elif len(sys.argv) == 3:
+ ifp = open(sys.argv[1])
+ ofp = open(sys.argv[2], "w")
+ else:
+ usage()
+ sys.exit(2)
+ convert(ifp, ofp, {
+ # entries are name
+ # -> ([list of attribute names], first_is_optional, empty)
+ "cfuncdesc": (["type", "name", ("args",)], 0, 0),
+ "chapter": ([("title",)], 0, 0),
+ "chapter*": ([("title",)], 0, 0),
+ "classdesc": (["name", ("constructor-args",)], 0, 0),
+ "ctypedesc": (["name"], 0, 0),
+ "cvardesc": (["type", "name"], 0, 0),
+ "datadesc": (["name"], 0, 0),
+ "declaremodule": (["id", "type", "name"], 1, 1),
+ "deprecated": (["release"], 0, 1),
+ "documentclass": (["classname"], 0, 1),
+ "excdesc": (["name"], 0, 0),
+ "funcdesc": (["name", ("args",)], 0, 0),
+ "funcdescni": (["name", ("args",)], 0, 0),
+ "indexii": (["ie1", "ie2"], 0, 1),
+ "indexiii": (["ie1", "ie2", "ie3"], 0, 1),
+ "indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1),
+ "input": (["source"], 0, 1),
+ "item": ([("leader",)], 1, 0),
+ "label": (["id"], 0, 1),
+ "manpage": (["name", "section"], 0, 1),
+ "memberdesc": (["class", "name"], 1, 0),
+ "methoddesc": (["class", "name", ("args",)], 1, 0),
+ "methoddescni": (["class", "name", ("args",)], 1, 0),
+ "opcodedesc": (["name", "var"], 0, 0),
+ "par": ([], 0, 1),
+ "paragraph": ([("title",)], 0, 0),
+ "rfc": (["number"], 0, 1),
+ "section": ([("title",)], 0, 0),
+ "seemodule": (["ref", "name"], 1, 0),
+ "subparagraph": ([("title",)], 0, 0),
+ "subsection": ([("title",)], 0, 0),
+ "subsubsection": ([("title",)], 0, 0),
+ "tableii": (["colspec", "style", "head1", "head2"], 0, 0),
+ "tableiii": (["colspec", "style", "head1", "head2", "head3"], 0, 0),
+ "tableiv": (["colspec", "style", "head1", "head2", "head3", "head4"],
+ 0, 0),
+ "versionadded": (["version"], 0, 1),
+ "versionchanged": (["version"], 0, 1),
+ #
+ "ABC": ([], 0, 1),
+ "ASCII": ([], 0, 1),
+ "C": ([], 0, 1),
+ "Cpp": ([], 0, 1),
+ "EOF": ([], 0, 1),
+ "e": ([], 0, 1),
+ "ldots": ([], 0, 1),
+ "NULL": ([], 0, 1),
+ "POSIX": ([], 0, 1),
+ "UNIX": ([], 0, 1),
+ #
+ # Things that will actually be going away!
+ #
+ "fi": ([], 0, 1),
+ "ifhtml": ([], 0, 1),
+ "makeindex": ([], 0, 1),
+ "makemodindex": ([], 0, 1),
+ "maketitle": ([], 0, 1),
+ "noindent": ([], 0, 1),
+ "tableofcontents": ([], 0, 1),
+ },
+ discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
+ "noindent", "tableofcontents"],
+ autoclosing=["chapter", "section", "subsection", "subsubsection",
+ "paragraph", "subparagraph", ],
+ knownempties=["appendix",
+ "maketitle", "makeindex", "makemodindex",
+ "localmoduletable"])
+
+
+if __name__ == "__main__":
+ main()