diff options
Diffstat (limited to 'Doc/tools/buildindex.py')
-rwxr-xr-x | Doc/tools/buildindex.py | 388 |
1 files changed, 0 insertions, 388 deletions
diff --git a/Doc/tools/buildindex.py b/Doc/tools/buildindex.py deleted file mode 100755 index 5870462..0000000 --- a/Doc/tools/buildindex.py +++ /dev/null @@ -1,388 +0,0 @@ -#! /usr/bin/env python - -__version__ = '$Revision$' - -import os.path -import re -import string -import sys - -from xml.sax.saxutils import quoteattr - - -bang_join = "!".join -null_join = "".join - -REPLACEMENTS = [ - # Hackish way to deal with macros replaced with simple text - (re.compile(r"\\ABC\b"), "ABC"), - (re.compile(r"\\ASCII\b"), "ASCII"), - (re.compile(r"\\Cpp\b"), "C++"), - (re.compile(r"\\EOF\b"), "EOF"), - (re.compile(r"\\NULL\b"), "NULL"), - (re.compile(r"\\POSIX\b"), "POSIX"), - (re.compile(r"\\UNIX\b"), "Unix"), - # deal with turds left over from LaTeX2HTML - (re.compile(r"<#\d+#>"), ""), - ] - -class Node: - continuation = 0 - - def __init__(self, link, str, seqno): - self.links = [link] - self.seqno = seqno - for pattern, replacement in REPLACEMENTS: - str = pattern.sub(replacement, str) - # build up the text - self.text = split_entry_text(str) - self.key = split_entry_key(str) - - def __cmp__(self, other): - """Comparison operator includes sequence number, for use with - list.sort().""" - return self.cmp_entry(other) or cmp(self.seqno, other.seqno) - - def cmp_entry(self, other): - """Comparison 'operator' that ignores sequence number.""" - c = 0 - for i in range(min(len(self.key), len(other.key))): - c = (cmp_part(self.key[i], other.key[i]) - or cmp_part(self.text[i], other.text[i])) - if c: - break - return c or cmp(self.key, other.key) or cmp(self.text, other.text) - - def __repr__(self): - return "<Node for %s (%s)>" % (bang_join(self.text), self.seqno) - - def __str__(self): - return bang_join(self.key) - - def dump(self): - return "%s\1%s###%s\n" \ - % ("\1".join(self.links), - bang_join(self.text), - self.seqno) - - -def cmp_part(s1, s2): - result = cmp(s1, s2) - if result == 0: - return 0 - l1 = s1.lower() - l2 = s2.lower() - minlen = min(len(s1), len(s2)) - if len(s1) < len(s2) and l1 == l2[:len(s1)]: - result = -1 - elif len(s2) < len(s1) and l2 == l1[:len(s2)]: - result = 1 - else: - result = cmp(l1, l2) or cmp(s1, s2) - return result - - -def split_entry(str, which): - stuff = [] - parts = str.split('!') - parts = [part.split('@') for part in parts] - for entry in parts: - if len(entry) != 1: - key = entry[which] - else: - key = entry[0] - stuff.append(key) - return stuff - - -_rmtt = re.compile(r"""(.*)<tt(?: class=['"][a-z0-9]+["'])?>(.*)</tt>(.*)$""", - re.IGNORECASE) -_rmparens = re.compile(r"\(\)") - -def split_entry_key(str): - parts = split_entry(str, 1) - for i in range(len(parts)): - m = _rmtt.match(parts[i]) - if m: - parts[i] = null_join(m.group(1, 2, 3)) - else: - parts[i] = parts[i].lower() - # remove '()' from the key: - parts[i] = _rmparens.sub('', parts[i]) - return map(trim_ignored_letters, parts) - - -def split_entry_text(str): - if '<' in str: - m = _rmtt.match(str) - if m: - str = null_join(m.group(1, 2, 3)) - return split_entry(str, 1) - - -def load(fp): - nodes = [] - rx = re.compile("(.*)\1(.*)###(.*)$") - while 1: - line = fp.readline() - if not line: - break - m = rx.match(line) - if m: - link, str, seqno = m.group(1, 2, 3) - nodes.append(Node(link, str, seqno)) - return nodes - - -def trim_ignored_letters(s): - # ignore $ to keep environment variables with the - # leading letter from the name - if s.startswith("$"): - return s[1:].lower() - else: - return s.lower() - -def get_first_letter(s): - if s.startswith("<tex2html_percent_mark>"): - return "%" - else: - return trim_ignored_letters(s)[0] - - -def split_letters(nodes): - letter_groups = [] - if nodes: - group = [] - append = group.append - letter = get_first_letter(nodes[0].text[0]) - letter_groups.append((letter, group)) - for node in nodes: - nletter = get_first_letter(node.text[0]) - if letter != nletter: - letter = nletter - group = [] - letter_groups.append((letter, group)) - append = group.append - append(node) - return letter_groups - - -def group_symbols(groups): - entries = [] - ident_letters = string.ascii_letters + "_" - while groups[0][0] not in ident_letters: - entries += groups[0][1] - del groups[0] - if entries: - groups.insert(0, ("Symbols", entries)) - - -# need a function to separate the nodes into columns... -def split_columns(nodes, columns=1): - if columns <= 1: - return [nodes] - # This is a rough height; we may have to increase to avoid breaks before - # a subitem. - colheight = int(len(nodes) / columns) - numlong = int(len(nodes) % columns) - if numlong: - colheight = colheight + 1 - else: - numlong = columns - cols = [] - for i in range(numlong): - start = i * colheight - end = start + colheight - cols.append(nodes[start:end]) - del nodes[:end] - colheight = colheight - 1 - try: - numshort = int(len(nodes) / colheight) - except ZeroDivisionError: - cols = cols + (columns - len(cols)) * [[]] - else: - for i in range(numshort): - start = i * colheight - end = start + colheight - cols.append(nodes[start:end]) - # - # If items continue across columns, make sure they are marked - # as continuations so the user knows to look at the previous column. - # - for i in range(len(cols) - 1): - try: - prev = cols[i][-1] - next = cols[i + 1][0] - except IndexError: - return cols - else: - n = min(len(prev.key), len(next.key)) - for j in range(n): - if prev.key[j] != next.key[j]: - break - next.continuation = j + 1 - return cols - - -DL_LEVEL_INDENT = " " - -def format_column(nodes): - strings = ["<dl compact='compact'>"] - append = strings.append - level = 0 - previous = [] - for node in nodes: - current = node.text - count = 0 - for i in range(min(len(current), len(previous))): - if previous[i] != current[i]: - break - count = i + 1 - if count > level: - append("<dl compact='compact'>" * (count - level) + "\n") - level = count - elif level > count: - append("\n") - append(level * DL_LEVEL_INDENT) - append("</dl>" * (level - count)) - level = count - # else: level == count - for i in range(count, len(current) - 1): - term = node.text[i] - level = level + 1 - if node.continuation > i: - extra = " (continued)" - else: - extra = "" - append("\n<dt>%s%s\n<dd>\n%s<dl compact='compact'>" - % (term, extra, level * DL_LEVEL_INDENT)) - append("\n%s<dt>%s%s</a>" - % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1])) - for link in node.links[1:]: - append(",\n%s %s[Link]</a>" % (level * DL_LEVEL_INDENT, link)) - previous = current - append("\n") - append("</dl>" * (level + 1)) - return null_join(strings) - - -def format_nodes(nodes, columns=1): - strings = [] - append = strings.append - if columns > 1: - colnos = range(columns) - colheight = int(len(nodes) / columns) - if len(nodes) % columns: - colheight = colheight + 1 - colwidth = int(100 / columns) - append('<table width="100%"><tr valign="top">') - for col in split_columns(nodes, columns): - append('<td width="%d%%">\n' % colwidth) - append(format_column(col)) - append("\n</td>") - append("\n</tr></table>") - else: - append(format_column(nodes)) - return null_join(strings) - - -def format_letter(letter): - if letter == '.': - lettername = ". (dot)" - elif letter == '_': - lettername = "_ (underscore)" - else: - lettername = letter.capitalize() - return "\n<hr />\n<h2 id=%s>%s</h2>\n\n" \ - % (quoteattr("letter-" + letter), lettername) - - -def format_html_letters(nodes, columns, group_symbol_nodes): - letter_groups = split_letters(nodes) - if group_symbol_nodes: - group_symbols(letter_groups) - items = [] - for letter, nodes in letter_groups: - s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter) - items.append(s) - s = ["<hr /><center>\n%s</center>\n" % " |\n".join(items)] - for letter, nodes in letter_groups: - s.append(format_letter(letter)) - s.append(format_nodes(nodes, columns)) - return null_join(s) - -def format_html(nodes, columns): - return format_nodes(nodes, columns) - - -def collapse(nodes): - """Collapse sequences of nodes with matching keys into a single node. - Destructive.""" - if len(nodes) < 2: - return - prev = nodes[0] - i = 1 - while i < len(nodes): - node = nodes[i] - if not node.cmp_entry(prev): - prev.links.append(node.links[0]) - del nodes[i] - else: - i = i + 1 - prev = node - - -def dump(nodes, fp): - for node in nodes: - fp.write(node.dump()) - - -def process_nodes(nodes, columns, letters=0, group_symbol_nodes=0): - nodes.sort() - collapse(nodes) - if letters: - return format_html_letters(nodes, columns, group_symbol_nodes) - else: - return format_html(nodes, columns) - - -def main(): - import getopt - ifn = "-" - ofn = "-" - columns = 1 - letters = 0 - group_symbol_nodes = 1 - opts, args = getopt.getopt(sys.argv[1:], "c:lo:", - ["columns=", "dont-group-symbols", - "group-symbols", "letters", "output="]) - for opt, val in opts: - if opt in ("-o", "--output"): - ofn = val - elif opt in ("-c", "--columns"): - columns = int(val, 10) - elif opt in ("-l", "--letters"): - letters = 1 - elif opt == "--group-symbols": - group_symbol_nodes = 1 - elif opt == "--dont-group-symbols": - group_symbol_nodes = 0 - if not args: - args = [ifn] - nodes = [] - for fn in args: - nodes = nodes + load(open(fn)) - num_nodes = len(nodes) - html = process_nodes(nodes, columns, letters, group_symbol_nodes) - program = os.path.basename(sys.argv[0]) - if ofn == "-": - sys.stdout.write(html) - sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes)) - else: - open(ofn, "w").write(html) - print - print "%s: %d index nodes" % (program, num_nodes) - - -if __name__ == "__main__": - main() |