summaryrefslogtreecommitdiffstats
path: root/Doc/tools/buildindex.py
diff options
context:
space:
mode:
authorFred Drake <fdrake@acm.org>1998-03-27 05:25:43 (GMT)
committerFred Drake <fdrake@acm.org>1998-03-27 05:25:43 (GMT)
commitec56109a0af6d3f705feffc50fc59abab1c0ccea (patch)
tree5608c14f17d16baadb67c5d840d5afcf461795e9 /Doc/tools/buildindex.py
parent19e2ce58bf98640e8fdf009bbb2edef2ce1236ef (diff)
downloadcpython-ec56109a0af6d3f705feffc50fc59abab1c0ccea.zip
cpython-ec56109a0af6d3f705feffc50fc59abab1c0ccea.tar.gz
cpython-ec56109a0af6d3f705feffc50fc59abab1c0ccea.tar.bz2
First (rough) cut at HTML index generation in Python. Not very pretty, but
easier to work with than the Perl code.
Diffstat (limited to 'Doc/tools/buildindex.py')
-rwxr-xr-xDoc/tools/buildindex.py211
1 files changed, 211 insertions, 0 deletions
diff --git a/Doc/tools/buildindex.py b/Doc/tools/buildindex.py
new file mode 100755
index 0000000..a2c57b6
--- /dev/null
+++ b/Doc/tools/buildindex.py
@@ -0,0 +1,211 @@
+#! /usr/bin/env python
+
+"""
+"""
+__version__ = '$Revision$'
+
+import re
+import string
+import sys
+
+
+class Node:
+
+ __rmtt = re.compile(r"(.*)<tt>(.*)</tt>(.*)$", re.IGNORECASE)
+ __rmjunk = re.compile("<#\d+#>")
+
+ def __init__(self, link, str, seqno):
+ self.links = [link]
+ self.seqno = seqno
+ # remove <#\d+#> left in by moving the data out of LaTeX2HTML
+ str = self.__rmjunk.sub('', str)
+ # now remove <tt>...</tt> markup; contents remain.
+ if '<' in str:
+ m = self.__rmtt.match(str)
+ if m:
+ kstr = string.join(m.group(1, 2, 3), '')
+ else:
+ kstr = str
+ else:
+ kstr = str
+ kstr = string.lower(kstr)
+ # build up the text
+ self.text = []
+ parts = string.split(str, '!')
+ parts = map(string.split, parts, ['@'] * len(parts))
+ for entry in parts:
+ if len(entry) != 1:
+ key, text = entry
+ else:
+ text = entry[0]
+ self.text.append(text)
+ # Building the key must be separate since any <tt> has been stripped
+ # from the key, but can be avoided if both key and text sources are
+ # the same.
+ if kstr != str:
+ self.key = []
+ kparts = string.split(kstr, '!')
+ kparts = map(string.split, kparts, ['@'] * len(kparts))
+ for entry in kparts:
+ if len(entry) != 1:
+ key, text = entry
+ else:
+ key = entry[0]
+ self.key.append(key)
+ else:
+ self.key = self.text
+
+ def __cmp__(self, other):
+ """Comparison operator includes sequence number, for use with
+ list.sort()."""
+ return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
+
+ def cmp_entry(self, other):
+ """Comparison 'operator' that ignores sequence number."""
+ for i in range(min(len(self.key), len(other.key))):
+ c = (cmp(self.key[i], other.key[i])
+ or cmp(self.text[i], other.text[i]))
+ if c:
+ return c
+ return cmp(self.key, other.key)
+
+ def __repr__(self):
+ return "<Node for %s (%s)>" % (string.join(self.text, '!'), self.seqno)
+
+ def __str__(self):
+ return string.join(self.key, '!')
+
+ def dump(self):
+ return "%s\0%s###%s\n" \
+ % (string.join(self.links, "\0"),
+ string.join(self.text, '!'),
+ self.seqno)
+
+
+def load(fp):
+ nodes = []
+ rx = re.compile(r"(.*)\0(.*)###(.*)$")
+ while 1:
+ line = fp.readline()
+ if not line:
+ break
+ m = rx.match(line)
+ if m:
+ link, str, seqno = m.group(1, 2, 3)
+ nodes.append(Node(link, str, seqno))
+ return nodes
+
+
+def split_letters(nodes):
+ letter_groups = []
+ group = []
+ append = group.append
+ if nodes:
+ letter = nodes[0].key[0][0]
+ letter_groups.append((letter, group))
+ for node in nodes:
+ nletter = node.key[0][0]
+ if letter != nletter:
+ letter = nletter
+ group = []
+ letter_groups.append((letter, group))
+ append = group.append
+ append(node)
+ return letter_groups
+
+
+def format_nodes(nodes):
+ # Does not create multiple links to multiple targets for the same entry;
+ # uses a separate entry for each target. This is a bug.
+ level = 0
+ strings = ["<dl compact>"]
+ append = strings.append
+ prev = None
+ for node in nodes:
+ nlevel = len(node.key) - 1
+ if nlevel > level:
+ if prev is None or node.key[level] != prev.key[level]:
+ append("%s\n<dl compact>" % node.text[level])
+ else:
+ append("<dl compact>")
+ level = nlevel
+ elif nlevel < level:
+ append("</dl>" * (level - len(node.key) + 1))
+ level = nlevel
+ if prev is not None and node.key[level] != prev.key[level]:
+ append("</dl>")
+ else:
+ append("<dl compact>")
+ elif level:
+ if node.key[level-1] != prev.key[level-1]:
+ append("</dl>\n%s<dl compact>"
+ % node.text[level-1])
+ append("%s%s</a><br>" % (node.links[0], node.text[-1]))
+ for link in node.links[1:]:
+ strings[-1] = strings[-1][:-4] + ","
+ append(link + "[Link]</a><br>")
+ prev = node
+ append("</dl>" * (level + 1))
+ append("")
+ append("")
+ return string.join(strings, "\n")
+
+
+def format_letter(letter):
+ if letter == '.':
+ lettername = ". (dot)"
+ elif letter == '_':
+ lettername = "_ (underscore)"
+ else:
+ lettername = string.upper(letter)
+ return "<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \
+ % (letter, lettername)
+
+
+def format_html(nodes):
+ letter_groups = split_letters(nodes)
+ items = []
+ for letter, nodes in letter_groups:
+ s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
+ items.append(s)
+ s = "<hr><center>\n%s</center>\n" % string.join(items, " |\n")
+ for letter, nodes in letter_groups:
+ s = s + format_letter(letter) + format_nodes(nodes)
+ return s
+
+
+def collapse(nodes):
+ """Collapse sequences of nodes with matching keys into a single node.
+ Destructive."""
+ if len(nodes) < 2:
+ return
+ prev = nodes[0]
+ i = 1
+ while i < len(nodes):
+ node = nodes[i]
+ if not node.cmp_entry(prev):
+ prev.links.append(node.links[0])
+ del nodes[i]
+## sys.stderr.write("collapsing %s\n" % `node`)
+ else:
+ i = i + 1
+ prev = node
+
+
+def dump(nodes, fp):
+ for node in nodes:
+ fp.write(node.dump())
+
+
+def main():
+ fn = sys.argv[1]
+ nodes = load(open(fn))
+ nodes.sort()
+ dump(nodes, open(fn + ".dump-1", "w"))
+ collapse(nodes)
+ dump(nodes, open(fn + ".dump-2", "w"))
+ sys.stdout.write(format_html(nodes))
+
+
+if __name__ == "__main__":
+ main()