summaryrefslogtreecommitdiffstats
path: root/Doc/tools/sgmlconv
diff options
context:
space:
mode:
authorFred Drake <fdrake@acm.org>1999-02-18 16:30:16 (GMT)
committerFred Drake <fdrake@acm.org>1999-02-18 16:30:16 (GMT)
commit607aed7a2cb3bc43d3e7a4a5c94e273666da6948 (patch)
tree8767300f68e32eb3123b22a9d6af3f1a94253066 /Doc/tools/sgmlconv
parent4cc902f4641a49971b9b6eaea002d5b2c861ff00 (diff)
downloadcpython-607aed7a2cb3bc43d3e7a4a5c94e273666da6948.zip
cpython-607aed7a2cb3bc43d3e7a4a5c94e273666da6948.tar.gz
cpython-607aed7a2cb3bc43d3e7a4a5c94e273666da6948.tar.bz2
Some adjustments, mostly to make it more general.
Diffstat (limited to 'Doc/tools/sgmlconv')
-rwxr-xr-xDoc/tools/sgmlconv/esis2sgml.py118
1 files changed, 97 insertions, 21 deletions
diff --git a/Doc/tools/sgmlconv/esis2sgml.py b/Doc/tools/sgmlconv/esis2sgml.py
index 762e5ff..aea9962 100755
--- a/Doc/tools/sgmlconv/esis2sgml.py
+++ b/Doc/tools/sgmlconv/esis2sgml.py
@@ -5,6 +5,10 @@
This is limited, but seems sufficient for the ESIS generated by the
latex2esis.py script when run over the Python documentation.
"""
+
+# This should have an explicit option to indicate whether the *INPUT* was
+# generated from an SGML or an XML application.
+
__version__ = '$Revision$'
import errno
@@ -16,29 +20,52 @@ import string
from xml.utils import escape
+AUTOCLOSE = ()
+
EMPTIES_FILENAME = "../sgml/empties.dat"
LIST_EMPTIES = 0
+_elem_map = {}
+_attr_map = {}
+_token_map = {}
+
+_normalize_case = str
+
+def map_gi(sgmlgi, map):
+ uncased = _normalize_case(sgmlgi)
+ try:
+ return map[uncased]
+ except IndexError:
+ map[uncased] = sgmlgi
+ return sgmlgi
+
+def null_map_gi(sgmlgi, map):
+ return sgmlgi
+
+
def format_attrs(attrs, xml=0):
attrs = attrs.items()
attrs.sort()
- s = ''
+ parts = []
+ append = parts.append
for name, value in attrs:
if xml:
- s = '%s %s="%s"' % (s, name, escape(value))
+ append('%s="%s"' % (name, escape(value)))
else:
# this is a little bogus, but should do for now
if name == value and isnmtoken(value):
- s = "%s %s" % (s, value)
+ append(value)
elif istoken(value):
if value == "no" + name:
- s = "%s %s" % (s, value)
+ append(value)
else:
- s = "%s %s=%s" % (s, name, value)
+ append("%s=%s" % (name, value))
else:
- s = '%s %s="%s"' % (s, name, escape(value))
- return s
+ append('%s="%s"' % (name, escape(value)))
+ if parts:
+ parts.insert(0, '')
+ return string.join(parts)
_nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE)
@@ -78,6 +105,7 @@ def do_convert(ifp, ofp, xml=0, autoclose=()):
if data == "COMMENT":
ofp.write("<!--")
continue
+ data = map_gi(data, _elem_map)
if knownempty and xml:
ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml)))
else:
@@ -93,6 +121,7 @@ def do_convert(ifp, ofp, xml=0, autoclose=()):
if data == "COMMENT":
ofp.write("-->")
continue
+ data = map_gi(data, _elem_map)
if xml:
if not lastempty:
ofp.write("</%s>" % data)
@@ -107,19 +136,24 @@ def do_convert(ifp, ofp, xml=0, autoclose=()):
lastempty = 0
elif type == "A":
name, type, value = string.split(data, " ", 2)
+ name = map_gi(name, _attr_map)
attrs[name] = esistools.decode(value)
elif type == "e":
knownempty = 1
if LIST_EMPTIES:
- knownempties.append("")
- if os.path.isfile(EMPTIES_FILENAME):
- mode = "a"
- else:
- mode = "w"
- fp = open(EMPTIES_FILENAME, mode)
- fp.write(string.join(knownempties, "\n"))
- fp.close()
+ dump_empty_element_names(knownempties)
+
+
+def dump_empty_element_names(knownempties):
+ knownempties.append("")
+ if os.path.isfile(EMPTIES_FILENAME):
+ mode = "a"
+ else:
+ mode = "w"
+ fp = open(EMPTIES_FILENAME, mode)
+ fp.write(string.join(knownempties, "\n"))
+ fp.close()
def sgml_convert(ifp, ofp, autoclose):
@@ -130,7 +164,13 @@ def xml_convert(ifp, ofp, autoclose):
return do_convert(ifp, ofp, xml=1, autoclose=autoclose)
-AUTOCLOSE = ("para", "term",)
+def update_gi_map(map, names, fromsgml=1):
+ for name in string.split(names, ","):
+ if fromsgml:
+ uncased = string.lower(name)
+ else:
+ uncased = name
+ map[uncased] = name
def main():
@@ -138,19 +178,39 @@ def main():
import sys
#
autoclose = AUTOCLOSE
- convert = sgml_convert
- xml = 0
+ convert = xml_convert
+ xml = 1
xmldecl = 0
- opts, args = getopt.getopt(sys.argv[1:], "adx",
- ["autoclose", "declare", "xml"])
+ elem_names = ''
+ attr_names = ''
+ value_names = ''
+ opts, args = getopt.getopt(sys.argv[1:], "adesx",
+ ["autoclose=", "declare", "sgml", "xml",
+ "elements-map=", "attributes-map",
+ "values-map="])
for opt, arg in opts:
if opt in ("-d", "--declare"):
xmldecl = 1
+ elif opt == "-e":
+ global LIST_EMPTIES
+ LIST_EMPTIES = 1
+ elif opt in ("-s", "--sgml"):
+ xml = 0
+ convert = sgml_convert
elif opt in ("-x", "--xml"):
xml = 1
convert = xml_convert
elif opt in ("-a", "--autoclose"):
autoclose = string.split(arg, ",")
+ elif opt == "--elements-map":
+ elem_names = ("%s,%s" % (elem_names, arg))[1:]
+ elif opt == "--attributes-map":
+ attr_names = ("%s,%s" % (attr_names, arg))[1:]
+ elif opt == "--values-map":
+ value_names = ("%s,%s" % (value_names, arg))[1:]
+ #
+ # open input streams:
+ #
if len(args) == 0:
ifp = sys.stdin
ofp = sys.stdout
@@ -163,7 +223,23 @@ def main():
else:
usage()
sys.exit(2)
- # knownempties is ignored in the XML version
+ #
+ # setup the name maps:
+ #
+ if elem_names or attr_names or value_names:
+ # assume the origin was SGML; ignore case of the names from the ESIS
+ # stream but set up conversion tables to get the case right on output
+ global _normalize_case
+ _normalize_case = string.lower
+ update_gi_map(_elem_map, string.split(elem_names, ","))
+ update_gi_map(_attr_map, string.split(attr_names, ","))
+ update_gi_map(_values_map, string.split(value_names, ","))
+ else:
+ global map_gi
+ map_gi = null_map_gi
+ #
+ # run the conversion:
+ #
try:
if xml and xmldecl:
opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n')