#! /usr/bin/env python """ """ __version__ = '$Revision$' import re import string import sys class Node: __rmtt = re.compile(r"(.*)(.*)(.*)$", re.IGNORECASE) __rmjunk = re.compile("<#\d+#>") def __init__(self, link, str, seqno): self.links = [link] self.seqno = seqno # remove <#\d+#> left in by moving the data out of LaTeX2HTML str = self.__rmjunk.sub('', str) # now remove ... markup; contents remain. if '<' in str: m = self.__rmtt.match(str) if m: kstr = string.join(m.group(1, 2, 3), '') else: kstr = str else: kstr = str kstr = string.lower(kstr) # build up the text self.text = [] parts = string.split(str, '!') parts = map(string.split, parts, ['@'] * len(parts)) for entry in parts: if len(entry) != 1: key, text = entry else: text = entry[0] self.text.append(text) # Building the key must be separate since any has been stripped # from the key, but can be avoided if both key and text sources are # the same. if kstr != str: self.key = [] kparts = string.split(kstr, '!') kparts = map(string.split, kparts, ['@'] * len(kparts)) for entry in kparts: if len(entry) != 1: key, text = entry else: key = entry[0] self.key.append(key) else: self.key = self.text def __cmp__(self, other): """Comparison operator includes sequence number, for use with list.sort().""" return self.cmp_entry(other) or cmp(self.seqno, other.seqno) def cmp_entry(self, other): """Comparison 'operator' that ignores sequence number.""" for i in range(min(len(self.key), len(other.key))): c = (cmp(self.key[i], other.key[i]) or cmp(self.text[i], other.text[i])) if c: return c return cmp(self.key, other.key) def __repr__(self): return "" % (string.join(self.text, '!'), self.seqno) def __str__(self): return string.join(self.key, '!') def dump(self): return "%s\0%s###%s\n" \ % (string.join(self.links, "\0"), string.join(self.text, '!'), self.seqno) def load(fp): nodes = [] rx = re.compile(r"(.*)\0(.*)###(.*)$") while 1: line = fp.readline() if not line: break m = rx.match(line) if m: link, str, seqno = m.group(1, 2, 3) nodes.append(Node(link, str, seqno)) return nodes def split_letters(nodes): letter_groups = [] group = [] append = group.append if nodes: letter = nodes[0].key[0][0] letter_groups.append((letter, group)) for node in nodes: nletter = node.key[0][0] if letter != nletter: letter = nletter group = [] letter_groups.append((letter, group)) append = group.append append(node) return letter_groups def format_nodes(nodes): # Does not create multiple links to multiple targets for the same entry; # uses a separate entry for each target. This is a bug. level = 0 strings = ["
"] append = strings.append prev = None for node in nodes: nlevel = len(node.key) - 1 if nlevel > level: if prev is None or node.key[level] != prev.key[level]: append("%s\n
" % node.text[level]) else: append("
") level = nlevel elif nlevel < level: append("
" * (level - len(node.key) + 1)) level = nlevel if prev is not None and node.key[level] != prev.key[level]: append("
") else: append("
") elif level: if node.key[level-1] != prev.key[level-1]: append("
\n%s
" % node.text[level-1]) append("%s%s
" % (node.links[0], node.text[-1])) for link in node.links[1:]: strings[-1] = strings[-1][:-4] + "," append(link + "[Link]
") prev = node append("
" * (level + 1)) append("") append("") return string.join(strings, "\n") def format_letter(letter): if letter == '.': lettername = ". (dot)" elif letter == '_': lettername = "_ (underscore)" else: lettername = string.upper(letter) return "
\n

%s

\n\n" \ % (letter, lettername) def format_html(nodes): letter_groups = split_letters(nodes) items = [] for letter, nodes in letter_groups: s = "%s" % (letter, letter) items.append(s) s = "
\n%s
\n" % string.join(items, " |\n") for letter, nodes in letter_groups: s = s + format_letter(letter) + format_nodes(nodes) return s def collapse(nodes): """Collapse sequences of nodes with matching keys into a single node. Destructive.""" if len(nodes) < 2: return prev = nodes[0] i = 1 while i < len(nodes): node = nodes[i] if not node.cmp_entry(prev): prev.links.append(node.links[0]) del nodes[i] ## sys.stderr.write("collapsing %s\n" % `node`) else: i = i + 1 prev = node def dump(nodes, fp): for node in nodes: fp.write(node.dump()) def main(): fn = sys.argv[1] nodes = load(open(fn)) nodes.sort() dump(nodes, open(fn + ".dump-1", "w")) collapse(nodes) dump(nodes, open(fn + ".dump-2", "w")) sys.stdout.write(format_html(nodes)) if __name__ == "__main__": main()