diff options
author | Fred Drake <fdrake@acm.org> | 1999-02-18 16:30:16 (GMT) |
---|---|---|
committer | Fred Drake <fdrake@acm.org> | 1999-02-18 16:30:16 (GMT) |
commit | 607aed7a2cb3bc43d3e7a4a5c94e273666da6948 (patch) | |
tree | 8767300f68e32eb3123b22a9d6af3f1a94253066 /Doc | |
parent | 4cc902f4641a49971b9b6eaea002d5b2c861ff00 (diff) | |
download | cpython-607aed7a2cb3bc43d3e7a4a5c94e273666da6948.zip cpython-607aed7a2cb3bc43d3e7a4a5c94e273666da6948.tar.gz cpython-607aed7a2cb3bc43d3e7a4a5c94e273666da6948.tar.bz2 |
Some adjustments, mostly to make it more general.
Diffstat (limited to 'Doc')
-rwxr-xr-x | Doc/tools/sgmlconv/esis2sgml.py | 118 |
1 files changed, 97 insertions, 21 deletions
diff --git a/Doc/tools/sgmlconv/esis2sgml.py b/Doc/tools/sgmlconv/esis2sgml.py index 762e5ff..aea9962 100755 --- a/Doc/tools/sgmlconv/esis2sgml.py +++ b/Doc/tools/sgmlconv/esis2sgml.py @@ -5,6 +5,10 @@ This is limited, but seems sufficient for the ESIS generated by the latex2esis.py script when run over the Python documentation. """ + +# This should have an explicit option to indicate whether the *INPUT* was +# generated from an SGML or an XML application. + __version__ = '$Revision$' import errno @@ -16,29 +20,52 @@ import string from xml.utils import escape +AUTOCLOSE = () + EMPTIES_FILENAME = "../sgml/empties.dat" LIST_EMPTIES = 0 +_elem_map = {} +_attr_map = {} +_token_map = {} + +_normalize_case = str + +def map_gi(sgmlgi, map): + uncased = _normalize_case(sgmlgi) + try: + return map[uncased] + except IndexError: + map[uncased] = sgmlgi + return sgmlgi + +def null_map_gi(sgmlgi, map): + return sgmlgi + + def format_attrs(attrs, xml=0): attrs = attrs.items() attrs.sort() - s = '' + parts = [] + append = parts.append for name, value in attrs: if xml: - s = '%s %s="%s"' % (s, name, escape(value)) + append('%s="%s"' % (name, escape(value))) else: # this is a little bogus, but should do for now if name == value and isnmtoken(value): - s = "%s %s" % (s, value) + append(value) elif istoken(value): if value == "no" + name: - s = "%s %s" % (s, value) + append(value) else: - s = "%s %s=%s" % (s, name, value) + append("%s=%s" % (name, value)) else: - s = '%s %s="%s"' % (s, name, escape(value)) - return s + append('%s="%s"' % (name, escape(value))) + if parts: + parts.insert(0, '') + return string.join(parts) _nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE) @@ -78,6 +105,7 @@ def do_convert(ifp, ofp, xml=0, autoclose=()): if data == "COMMENT": ofp.write("<!--") continue + data = map_gi(data, _elem_map) if knownempty and xml: ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml))) else: @@ -93,6 +121,7 @@ def do_convert(ifp, ofp, xml=0, autoclose=()): if data == "COMMENT": ofp.write("-->") continue + data = map_gi(data, _elem_map) if xml: if not lastempty: ofp.write("</%s>" % data) @@ -107,19 +136,24 @@ def do_convert(ifp, ofp, xml=0, autoclose=()): lastempty = 0 elif type == "A": name, type, value = string.split(data, " ", 2) + name = map_gi(name, _attr_map) attrs[name] = esistools.decode(value) elif type == "e": knownempty = 1 if LIST_EMPTIES: - knownempties.append("") - if os.path.isfile(EMPTIES_FILENAME): - mode = "a" - else: - mode = "w" - fp = open(EMPTIES_FILENAME, mode) - fp.write(string.join(knownempties, "\n")) - fp.close() + dump_empty_element_names(knownempties) + + +def dump_empty_element_names(knownempties): + knownempties.append("") + if os.path.isfile(EMPTIES_FILENAME): + mode = "a" + else: + mode = "w" + fp = open(EMPTIES_FILENAME, mode) + fp.write(string.join(knownempties, "\n")) + fp.close() def sgml_convert(ifp, ofp, autoclose): @@ -130,7 +164,13 @@ def xml_convert(ifp, ofp, autoclose): return do_convert(ifp, ofp, xml=1, autoclose=autoclose) -AUTOCLOSE = ("para", "term",) +def update_gi_map(map, names, fromsgml=1): + for name in string.split(names, ","): + if fromsgml: + uncased = string.lower(name) + else: + uncased = name + map[uncased] = name def main(): @@ -138,19 +178,39 @@ def main(): import sys # autoclose = AUTOCLOSE - convert = sgml_convert - xml = 0 + convert = xml_convert + xml = 1 xmldecl = 0 - opts, args = getopt.getopt(sys.argv[1:], "adx", - ["autoclose", "declare", "xml"]) + elem_names = '' + attr_names = '' + value_names = '' + opts, args = getopt.getopt(sys.argv[1:], "adesx", + ["autoclose=", "declare", "sgml", "xml", + "elements-map=", "attributes-map", + "values-map="]) for opt, arg in opts: if opt in ("-d", "--declare"): xmldecl = 1 + elif opt == "-e": + global LIST_EMPTIES + LIST_EMPTIES = 1 + elif opt in ("-s", "--sgml"): + xml = 0 + convert = sgml_convert elif opt in ("-x", "--xml"): xml = 1 convert = xml_convert elif opt in ("-a", "--autoclose"): autoclose = string.split(arg, ",") + elif opt == "--elements-map": + elem_names = ("%s,%s" % (elem_names, arg))[1:] + elif opt == "--attributes-map": + attr_names = ("%s,%s" % (attr_names, arg))[1:] + elif opt == "--values-map": + value_names = ("%s,%s" % (value_names, arg))[1:] + # + # open input streams: + # if len(args) == 0: ifp = sys.stdin ofp = sys.stdout @@ -163,7 +223,23 @@ def main(): else: usage() sys.exit(2) - # knownempties is ignored in the XML version + # + # setup the name maps: + # + if elem_names or attr_names or value_names: + # assume the origin was SGML; ignore case of the names from the ESIS + # stream but set up conversion tables to get the case right on output + global _normalize_case + _normalize_case = string.lower + update_gi_map(_elem_map, string.split(elem_names, ",")) + update_gi_map(_attr_map, string.split(attr_names, ",")) + update_gi_map(_values_map, string.split(value_names, ",")) + else: + global map_gi + map_gi = null_map_gi + # + # run the conversion: + # try: if xml and xmldecl: opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n') |