#! /usr/bin/env python
"""
"""
__version__ = '$Revision$'
import re
import string
import sys
class Node:
__rmtt = re.compile(r"(.*)(.*)(.*)$", re.IGNORECASE)
__rmjunk = re.compile("<#\d+#>")
def __init__(self, link, str, seqno):
self.links = [link]
self.seqno = seqno
# remove <#\d+#> left in by moving the data out of LaTeX2HTML
str = self.__rmjunk.sub('', str)
# now remove ... markup; contents remain.
if '<' in str:
m = self.__rmtt.match(str)
if m:
kstr = string.join(m.group(1, 2, 3), '')
else:
kstr = str
else:
kstr = str
kstr = string.lower(kstr)
# build up the text
self.text = []
parts = string.split(str, '!')
parts = map(string.split, parts, ['@'] * len(parts))
for entry in parts:
if len(entry) != 1:
key, text = entry
else:
text = entry[0]
self.text.append(text)
# Building the key must be separate since any has been stripped
# from the key, but can be avoided if both key and text sources are
# the same.
if kstr != str:
self.key = []
kparts = string.split(kstr, '!')
kparts = map(string.split, kparts, ['@'] * len(kparts))
for entry in kparts:
if len(entry) != 1:
key, text = entry
else:
key = entry[0]
self.key.append(key)
else:
self.key = self.text
def __cmp__(self, other):
"""Comparison operator includes sequence number, for use with
list.sort()."""
return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
def cmp_entry(self, other):
"""Comparison 'operator' that ignores sequence number."""
for i in range(min(len(self.key), len(other.key))):
c = (cmp(self.key[i], other.key[i])
or cmp(self.text[i], other.text[i]))
if c:
return c
return cmp(self.key, other.key)
def __repr__(self):
return "" % (string.join(self.text, '!'), self.seqno)
def __str__(self):
return string.join(self.key, '!')
def dump(self):
return "%s\0%s###%s\n" \
% (string.join(self.links, "\0"),
string.join(self.text, '!'),
self.seqno)
def load(fp):
nodes = []
rx = re.compile(r"(.*)\0(.*)###(.*)$")
while 1:
line = fp.readline()
if not line:
break
m = rx.match(line)
if m:
link, str, seqno = m.group(1, 2, 3)
nodes.append(Node(link, str, seqno))
return nodes
def split_letters(nodes):
letter_groups = []
group = []
append = group.append
if nodes:
letter = nodes[0].key[0][0]
letter_groups.append((letter, group))
for node in nodes:
nletter = node.key[0][0]
if letter != nletter:
letter = nletter
group = []
letter_groups.append((letter, group))
append = group.append
append(node)
return letter_groups
def format_nodes(nodes):
# Does not create multiple links to multiple targets for the same entry;
# uses a separate entry for each target. This is a bug.
level = 0
strings = [""]
append = strings.append
prev = None
for node in nodes:
nlevel = len(node.key) - 1
if nlevel > level:
if prev is None or node.key[level] != prev.key[level]:
append("%s\n" % node.text[level])
else:
append("")
level = nlevel
elif nlevel < level:
append("
" * (level - len(node.key) + 1))
level = nlevel
if prev is not None and node.key[level] != prev.key[level]:
append("
")
else:
append("")
elif level:
if node.key[level-1] != prev.key[level-1]:
append("
\n%s"
% node.text[level-1])
append("%s%s
" % (node.links[0], node.text[-1]))
for link in node.links[1:]:
strings[-1] = strings[-1][:-4] + ","
append(link + "[Link]
")
prev = node
append("
" * (level + 1))
append("")
append("")
return string.join(strings, "\n")
def format_letter(letter):
if letter == '.':
lettername = ". (dot)"
elif letter == '_':
lettername = "_ (underscore)"
else:
lettername = string.upper(letter)
return "
\n\n\n" \
% (letter, lettername)
def format_html(nodes):
letter_groups = split_letters(nodes)
items = []
for letter, nodes in letter_groups:
s = "%s" % (letter, letter)
items.append(s)
s = "
\n%s\n" % string.join(items, " |\n")
for letter, nodes in letter_groups:
s = s + format_letter(letter) + format_nodes(nodes)
return s
def collapse(nodes):
"""Collapse sequences of nodes with matching keys into a single node.
Destructive."""
if len(nodes) < 2:
return
prev = nodes[0]
i = 1
while i < len(nodes):
node = nodes[i]
if not node.cmp_entry(prev):
prev.links.append(node.links[0])
del nodes[i]
## sys.stderr.write("collapsing %s\n" % `node`)
else:
i = i + 1
prev = node
def dump(nodes, fp):
for node in nodes:
fp.write(node.dump())
def main():
fn = sys.argv[1]
nodes = load(open(fn))
nodes.sort()
dump(nodes, open(fn + ".dump-1", "w"))
collapse(nodes)
dump(nodes, open(fn + ".dump-2", "w"))
sys.stdout.write(format_html(nodes))
if __name__ == "__main__":
main()