diff options
author | Fred Drake <fdrake@acm.org> | 1998-03-27 05:25:43 (GMT) |
---|---|---|
committer | Fred Drake <fdrake@acm.org> | 1998-03-27 05:25:43 (GMT) |
commit | ec56109a0af6d3f705feffc50fc59abab1c0ccea (patch) | |
tree | 5608c14f17d16baadb67c5d840d5afcf461795e9 /Doc/tools | |
parent | 19e2ce58bf98640e8fdf009bbb2edef2ce1236ef (diff) | |
download | cpython-ec56109a0af6d3f705feffc50fc59abab1c0ccea.zip cpython-ec56109a0af6d3f705feffc50fc59abab1c0ccea.tar.gz cpython-ec56109a0af6d3f705feffc50fc59abab1c0ccea.tar.bz2 |
First (rough) cut at HTML index generation in Python. Not very pretty, but
easier to work with than the Perl code.
Diffstat (limited to 'Doc/tools')
-rwxr-xr-x | Doc/tools/buildindex.py | 211 |
1 files changed, 211 insertions, 0 deletions
diff --git a/Doc/tools/buildindex.py b/Doc/tools/buildindex.py new file mode 100755 index 0000000..a2c57b6 --- /dev/null +++ b/Doc/tools/buildindex.py @@ -0,0 +1,211 @@ +#! /usr/bin/env python + +""" +""" +__version__ = '$Revision$' + +import re +import string +import sys + + +class Node: + + __rmtt = re.compile(r"(.*)<tt>(.*)</tt>(.*)$", re.IGNORECASE) + __rmjunk = re.compile("<#\d+#>") + + def __init__(self, link, str, seqno): + self.links = [link] + self.seqno = seqno + # remove <#\d+#> left in by moving the data out of LaTeX2HTML + str = self.__rmjunk.sub('', str) + # now remove <tt>...</tt> markup; contents remain. + if '<' in str: + m = self.__rmtt.match(str) + if m: + kstr = string.join(m.group(1, 2, 3), '') + else: + kstr = str + else: + kstr = str + kstr = string.lower(kstr) + # build up the text + self.text = [] + parts = string.split(str, '!') + parts = map(string.split, parts, ['@'] * len(parts)) + for entry in parts: + if len(entry) != 1: + key, text = entry + else: + text = entry[0] + self.text.append(text) + # Building the key must be separate since any <tt> has been stripped + # from the key, but can be avoided if both key and text sources are + # the same. + if kstr != str: + self.key = [] + kparts = string.split(kstr, '!') + kparts = map(string.split, kparts, ['@'] * len(kparts)) + for entry in kparts: + if len(entry) != 1: + key, text = entry + else: + key = entry[0] + self.key.append(key) + else: + self.key = self.text + + def __cmp__(self, other): + """Comparison operator includes sequence number, for use with + list.sort().""" + return self.cmp_entry(other) or cmp(self.seqno, other.seqno) + + def cmp_entry(self, other): + """Comparison 'operator' that ignores sequence number.""" + for i in range(min(len(self.key), len(other.key))): + c = (cmp(self.key[i], other.key[i]) + or cmp(self.text[i], other.text[i])) + if c: + return c + return cmp(self.key, other.key) + + def __repr__(self): + return "<Node for %s (%s)>" % (string.join(self.text, '!'), self.seqno) + + def __str__(self): + return string.join(self.key, '!') + + def dump(self): + return "%s\0%s###%s\n" \ + % (string.join(self.links, "\0"), + string.join(self.text, '!'), + self.seqno) + + +def load(fp): + nodes = [] + rx = re.compile(r"(.*)\0(.*)###(.*)$") + while 1: + line = fp.readline() + if not line: + break + m = rx.match(line) + if m: + link, str, seqno = m.group(1, 2, 3) + nodes.append(Node(link, str, seqno)) + return nodes + + +def split_letters(nodes): + letter_groups = [] + group = [] + append = group.append + if nodes: + letter = nodes[0].key[0][0] + letter_groups.append((letter, group)) + for node in nodes: + nletter = node.key[0][0] + if letter != nletter: + letter = nletter + group = [] + letter_groups.append((letter, group)) + append = group.append + append(node) + return letter_groups + + +def format_nodes(nodes): + # Does not create multiple links to multiple targets for the same entry; + # uses a separate entry for each target. This is a bug. + level = 0 + strings = ["<dl compact>"] + append = strings.append + prev = None + for node in nodes: + nlevel = len(node.key) - 1 + if nlevel > level: + if prev is None or node.key[level] != prev.key[level]: + append("%s\n<dl compact>" % node.text[level]) + else: + append("<dl compact>") + level = nlevel + elif nlevel < level: + append("</dl>" * (level - len(node.key) + 1)) + level = nlevel + if prev is not None and node.key[level] != prev.key[level]: + append("</dl>") + else: + append("<dl compact>") + elif level: + if node.key[level-1] != prev.key[level-1]: + append("</dl>\n%s<dl compact>" + % node.text[level-1]) + append("%s%s</a><br>" % (node.links[0], node.text[-1])) + for link in node.links[1:]: + strings[-1] = strings[-1][:-4] + "," + append(link + "[Link]</a><br>") + prev = node + append("</dl>" * (level + 1)) + append("") + append("") + return string.join(strings, "\n") + + +def format_letter(letter): + if letter == '.': + lettername = ". (dot)" + elif letter == '_': + lettername = "_ (underscore)" + else: + lettername = string.upper(letter) + return "<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \ + % (letter, lettername) + + +def format_html(nodes): + letter_groups = split_letters(nodes) + items = [] + for letter, nodes in letter_groups: + s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter) + items.append(s) + s = "<hr><center>\n%s</center>\n" % string.join(items, " |\n") + for letter, nodes in letter_groups: + s = s + format_letter(letter) + format_nodes(nodes) + return s + + +def collapse(nodes): + """Collapse sequences of nodes with matching keys into a single node. + Destructive.""" + if len(nodes) < 2: + return + prev = nodes[0] + i = 1 + while i < len(nodes): + node = nodes[i] + if not node.cmp_entry(prev): + prev.links.append(node.links[0]) + del nodes[i] +## sys.stderr.write("collapsing %s\n" % `node`) + else: + i = i + 1 + prev = node + + +def dump(nodes, fp): + for node in nodes: + fp.write(node.dump()) + + +def main(): + fn = sys.argv[1] + nodes = load(open(fn)) + nodes.sort() + dump(nodes, open(fn + ".dump-1", "w")) + collapse(nodes) + dump(nodes, open(fn + ".dump-2", "w")) + sys.stdout.write(format_html(nodes)) + + +if __name__ == "__main__": + main() |