diff options
author | Guido van Rossum <guido@python.org> | 1995-04-10 11:47:11 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1995-04-10 11:47:11 (GMT) |
commit | 5dd52d37ebc7acfd165bd8cde38e11e70a6a6252 (patch) | |
tree | 0d9370391f448f3d9be1523f05bfd81a66171e00 /Demo/ibrowse/ifile.py | |
parent | 5b98ac5b14b7f01d24aecd3d371ed899ed3f671c (diff) | |
download | cpython-5dd52d37ebc7acfd165bd8cde38e11e70a6a6252.zip cpython-5dd52d37ebc7acfd165bd8cde38e11e70a6a6252.tar.gz cpython-5dd52d37ebc7acfd165bd8cde38e11e70a6a6252.tar.bz2 |
commit -- why not
Diffstat (limited to 'Demo/ibrowse/ifile.py')
-rwxr-xr-x | Demo/ibrowse/ifile.py | 328 |
1 files changed, 328 insertions, 0 deletions
diff --git a/Demo/ibrowse/ifile.py b/Demo/ibrowse/ifile.py new file mode 100755 index 0000000..b8d59ee --- /dev/null +++ b/Demo/ibrowse/ifile.py @@ -0,0 +1,328 @@ +# Tools for info file processing. + +# XXX Need to be more careful with reading ahead searching for nodes. + + +import regexp +import string + + +# Exported exceptions. +# +NoSuchFile = 'no such file' +NoSuchNode = 'no such node' + + +# The search path for info files; this is site-specific. +# Directory names should end in a partname delimiter, +# so they can simply be concatenated to a relative pathname. +# +#INFOPATH = ['', ':Info.Ibrowse:', ':Info:'] # Mac +INFOPATH = ['', '/usr/local/emacs/info/'] # X11 on UNIX + + +# Tunable constants. +# +BLOCKSIZE = 512 # Qty to align reads to, if possible +FUZZ = 2*BLOCKSIZE # Qty to back-up before searching for a node +CHUNKSIZE = 4*BLOCKSIZE # Qty to read at once when reading lots of data + + +# Regular expressions used. +# Note that it is essential that Python leaves unrecognized backslash +# escapes in a string so they can be seen by regexp.compile! +# +findheader = regexp.compile('\037\014?\n(.*\n)').match +findescape = regexp.compile('\037').match +parseheader = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match +findfirstline = regexp.compile('^.*\n').match +findnode = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match +findprev = regexp.compile('[pP]rev[ious]*:[ \t]*([^\t,\n]*)').match +findnext = regexp.compile('[nN]ext:[ \t]*([^\t,\n]*)').match +findup = regexp.compile('[uU]p:[ \t]*([^\t,\n]*)').match +findmenu = regexp.compile('^\* [mM]enu:').match +findmenuitem = regexp.compile( \ + '^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match +findfootnote = regexp.compile( \ + '\*[nN]ote ([^:]+):[ \t]*(:|[^:][^\t,\n.]*)').match +parsenoderef = regexp.compile('^\((.*)\)(.*)$').match + + +# Get a node and all information pertaining to it. +# This doesn't work if there is an indirect tag table, +# and in general you are better off using icache.get_node() instead. +# Functions get_whole_file() and get_file_node() provide part +# functionality used by icache. +# Raise NoSuchFile or NoSuchNode as appropriate. +# +def get_node(curfile, ref): + file, node = parse_ref(curfile, ref) + if node == '*': + return get_whole_file(file) + else: + return get_file_node(file, 0, node) +# +def get_whole_file(file): + f = try_open(file) # May raise NoSuchFile + text = f.read() + header, menu, footnotes = ('', '', ''), [], [] + return file, '*', header, menu, footnotes, text +# +def get_file_node(file, offset, node): + f = try_open(file) # May raise NoSuchFile + text = find_node(f, offset, node) # May raise NoSuchNode + node, header, menu, footnotes = analyze_node(text) + return file, node, header, menu, footnotes, text + + +# Parse a node reference into a file (possibly default) and node name. +# Possible reference formats are: "NODE", "(FILE)", "(FILE)NODE". +# Default file is the curfile argument; default node is Top. +# A node value of '*' is a special case: the whole file should +# be interpreted (by the caller!) as a single node. +# +def parse_ref(curfile, ref): + match = parsenoderef(ref) + if not match: + file, node = curfile, ref + else: + (a, b), (a1, b1), (a2, b2) = match + file, node = ref[a1:b1], ref[a2:b2] + if not file: + file = curfile # (Is this necessary?) + if not node: + node = 'Top' + return file, node + + +# Extract node name, links, menu and footnotes from the node text. +# +def analyze_node(text): + # + # Get node name and links from the header line + # + match = findfirstline(text) + if match: + (a, b) = match[0] + line = text[a:b] + else: + line = '' + node = get_it(text, findnode) + prev = get_it(text, findprev) + next = get_it(text, findnext) + up = get_it(text, findup) + # + # Get the menu items, if there is a menu + # + menu = [] + match = findmenu(text) + if match: + (a, b) = match[0] + while 1: + match = findmenuitem(text, b) + if not match: + break + (a, b), (a1, b1), (a2, b2) = match + topic, ref = text[a1:b1], text[a2:b2] + if ref == ':': + ref = topic + menu.append(topic, ref) + # + # Get the footnotes + # + footnotes = [] + b = 0 + while 1: + match = findfootnote(text, b) + if not match: + break + (a, b), (a1, b1), (a2, b2) = match + topic, ref = text[a1:b1], text[a2:b2] + if ref == ':': + ref = topic + footnotes.append(topic, ref) + # + return node, (prev, next, up), menu, footnotes +# +def get_it(line, matcher): + match = matcher(line) + if not match: + return '' + else: + (a, b), (a1, b1) = match + return line[a1:b1] + + +# Find a node in an open file. +# The offset (from the tags table) is a hint about the node's position. +# Pass zero if there is no tags table. +# Raise NoSuchNode if the node isn't found. +# NB: This seeks around in the file. +# +def find_node(f, offset, node): + node = string.lower(node) # Just to be sure + # + # Position a little before the given offset, + # so we may find the node even if it has moved around + # in the file a little. + # + offset = max(0, ((offset-FUZZ) / BLOCKSIZE) * BLOCKSIZE) + f.seek(offset) + # + # Loop, hunting for a matching node header. + # + while 1: + buf = f.read(CHUNKSIZE) + if not buf: + break + i = 0 + while 1: + match = findheader(buf, i) + if match: + (a,b), (a1,b1) = match + start = a1 + line = buf[a1:b1] + i = b + match = parseheader(line) + if match: + (a,b), (a1,b1) = match + key = string.lower(line[a1:b1]) + if key == node: + # Got it! Now read the rest. + return read_node(f, buf[start:]) + elif findescape(buf, i): + next = f.read(CHUNKSIZE) + if not next: + break + buf = buf + next + else: + break + # + # If we get here, we didn't find it. Too bad. + # + raise NoSuchNode, node + + +# Finish off getting a node (subroutine for find_node()). +# The node begins at the start of buf and may end in buf; +# if it doesn't end there, read additional data from f. +# +def read_node(f, buf): + i = 0 + match = findescape(buf, i) + while not match: + next = f.read(CHUNKSIZE) + if not next: + end = len(buf) + break + i = len(buf) + buf = buf + next + match = findescape(buf, i) + else: + # Got a match + (a, b) = match[0] + end = a + # Strip trailing newlines + while end > 0 and buf[end-1] == '\n': + end = end-1 + buf = buf[:end] + return buf + + +# Read reverse starting at offset until the beginning of a node is found. +# Then return a buffer containing the beginning of the node, +# with f positioned just after the buffer. +# The buffer will contain at least the full header line of the node; +# the caller should finish off with read_node() if it is the right node. +# (It is also possible that the buffer extends beyond the node!) +# Return an empty string if there is no node before the given offset. +# +def backup_node(f, offset): + start = max(0, ((offset-CHUNKSIZE) / BLOCKSIZE) * BLOCKSIZE) + end = offset + while start < end: + f.seek(start) + buf = f.read(end-start) + i = 0 + hit = -1 + while 1: + match = findheader(buf, i) + if match: + (a,b), (a1,b1) = match + hit = a1 + i = b + elif end < offset and findescape(buf, i): + next = f.read(min(offset-end, BLOCKSIZE)) + if not next: + break + buf = buf + next + end = end + len(next) + else: + break + if hit >= 0: + return buf[hit:] + end = start + start = max(0, end - CHUNKSIZE) + return '' + + +# Make a tag table for the given file by scanning the file. +# The file must be open for reading, and positioned at the beginning +# (or wherever the hunt for tags must begin; it is read till the end). +# +def make_tags(f): + tags = {} + while 1: + offset = f.tell() + buf = f.read(CHUNKSIZE) + if not buf: + break + i = 0 + while 1: + match = findheader(buf, i) + if match: + (a,b), (a1,b1) = match + start = offset+a1 + line = buf[a1:b1] + i = b + match = parseheader(line) + if match: + (a,b), (a1,b1) = match + key = string.lower(line[a1:b1]) + if tags.has_key(key): + print 'Duplicate node:', + print key + tags[key] = '', start, line + elif findescape(buf, i): + next = f.read(CHUNKSIZE) + if not next: + break + buf = buf + next + else: + break + return tags + + +# Try to open a file, return a file object if succeeds. +# Raise NoSuchFile if the file can't be opened. +# Should treat absolute pathnames special. +# +def try_open(file): + for dir in INFOPATH: + try: + return open(dir + file, 'r') + except IOError: + pass + raise NoSuchFile, file + + +# A little test for the speed of make_tags(). +# +TESTFILE = 'texinfo-1' +def test_make_tags(): + import time + f = try_open(TESTFILE) + t1 = time.time() + tags = make_tags(f) + t2 = time.time() + print 'Making tag table for', `TESTFILE`, 'took', t2-t1, 'sec.' |