summaryrefslogtreecommitdiffstats
path: root/Demo/ibrowse/ifile.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1995-04-10 11:47:11 (GMT)
committerGuido van Rossum <guido@python.org>1995-04-10 11:47:11 (GMT)
commit5dd52d37ebc7acfd165bd8cde38e11e70a6a6252 (patch)
tree0d9370391f448f3d9be1523f05bfd81a66171e00 /Demo/ibrowse/ifile.py
parent5b98ac5b14b7f01d24aecd3d371ed899ed3f671c (diff)
downloadcpython-5dd52d37ebc7acfd165bd8cde38e11e70a6a6252.zip
cpython-5dd52d37ebc7acfd165bd8cde38e11e70a6a6252.tar.gz
cpython-5dd52d37ebc7acfd165bd8cde38e11e70a6a6252.tar.bz2
commit -- why not
Diffstat (limited to 'Demo/ibrowse/ifile.py')
-rwxr-xr-xDemo/ibrowse/ifile.py328
1 files changed, 328 insertions, 0 deletions
diff --git a/Demo/ibrowse/ifile.py b/Demo/ibrowse/ifile.py
new file mode 100755
index 0000000..b8d59ee
--- /dev/null
+++ b/Demo/ibrowse/ifile.py
@@ -0,0 +1,328 @@
+# Tools for info file processing.
+
+# XXX Need to be more careful with reading ahead searching for nodes.
+
+
+import regexp
+import string
+
+
+# Exported exceptions.
+#
+NoSuchFile = 'no such file'
+NoSuchNode = 'no such node'
+
+
+# The search path for info files; this is site-specific.
+# Directory names should end in a partname delimiter,
+# so they can simply be concatenated to a relative pathname.
+#
+#INFOPATH = ['', ':Info.Ibrowse:', ':Info:'] # Mac
+INFOPATH = ['', '/usr/local/emacs/info/'] # X11 on UNIX
+
+
+# Tunable constants.
+#
+BLOCKSIZE = 512 # Qty to align reads to, if possible
+FUZZ = 2*BLOCKSIZE # Qty to back-up before searching for a node
+CHUNKSIZE = 4*BLOCKSIZE # Qty to read at once when reading lots of data
+
+
+# Regular expressions used.
+# Note that it is essential that Python leaves unrecognized backslash
+# escapes in a string so they can be seen by regexp.compile!
+#
+findheader = regexp.compile('\037\014?\n(.*\n)').match
+findescape = regexp.compile('\037').match
+parseheader = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
+findfirstline = regexp.compile('^.*\n').match
+findnode = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
+findprev = regexp.compile('[pP]rev[ious]*:[ \t]*([^\t,\n]*)').match
+findnext = regexp.compile('[nN]ext:[ \t]*([^\t,\n]*)').match
+findup = regexp.compile('[uU]p:[ \t]*([^\t,\n]*)').match
+findmenu = regexp.compile('^\* [mM]enu:').match
+findmenuitem = regexp.compile( \
+ '^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match
+findfootnote = regexp.compile( \
+ '\*[nN]ote ([^:]+):[ \t]*(:|[^:][^\t,\n.]*)').match
+parsenoderef = regexp.compile('^\((.*)\)(.*)$').match
+
+
+# Get a node and all information pertaining to it.
+# This doesn't work if there is an indirect tag table,
+# and in general you are better off using icache.get_node() instead.
+# Functions get_whole_file() and get_file_node() provide part
+# functionality used by icache.
+# Raise NoSuchFile or NoSuchNode as appropriate.
+#
+def get_node(curfile, ref):
+ file, node = parse_ref(curfile, ref)
+ if node == '*':
+ return get_whole_file(file)
+ else:
+ return get_file_node(file, 0, node)
+#
+def get_whole_file(file):
+ f = try_open(file) # May raise NoSuchFile
+ text = f.read()
+ header, menu, footnotes = ('', '', ''), [], []
+ return file, '*', header, menu, footnotes, text
+#
+def get_file_node(file, offset, node):
+ f = try_open(file) # May raise NoSuchFile
+ text = find_node(f, offset, node) # May raise NoSuchNode
+ node, header, menu, footnotes = analyze_node(text)
+ return file, node, header, menu, footnotes, text
+
+
+# Parse a node reference into a file (possibly default) and node name.
+# Possible reference formats are: "NODE", "(FILE)", "(FILE)NODE".
+# Default file is the curfile argument; default node is Top.
+# A node value of '*' is a special case: the whole file should
+# be interpreted (by the caller!) as a single node.
+#
+def parse_ref(curfile, ref):
+ match = parsenoderef(ref)
+ if not match:
+ file, node = curfile, ref
+ else:
+ (a, b), (a1, b1), (a2, b2) = match
+ file, node = ref[a1:b1], ref[a2:b2]
+ if not file:
+ file = curfile # (Is this necessary?)
+ if not node:
+ node = 'Top'
+ return file, node
+
+
+# Extract node name, links, menu and footnotes from the node text.
+#
+def analyze_node(text):
+ #
+ # Get node name and links from the header line
+ #
+ match = findfirstline(text)
+ if match:
+ (a, b) = match[0]
+ line = text[a:b]
+ else:
+ line = ''
+ node = get_it(text, findnode)
+ prev = get_it(text, findprev)
+ next = get_it(text, findnext)
+ up = get_it(text, findup)
+ #
+ # Get the menu items, if there is a menu
+ #
+ menu = []
+ match = findmenu(text)
+ if match:
+ (a, b) = match[0]
+ while 1:
+ match = findmenuitem(text, b)
+ if not match:
+ break
+ (a, b), (a1, b1), (a2, b2) = match
+ topic, ref = text[a1:b1], text[a2:b2]
+ if ref == ':':
+ ref = topic
+ menu.append(topic, ref)
+ #
+ # Get the footnotes
+ #
+ footnotes = []
+ b = 0
+ while 1:
+ match = findfootnote(text, b)
+ if not match:
+ break
+ (a, b), (a1, b1), (a2, b2) = match
+ topic, ref = text[a1:b1], text[a2:b2]
+ if ref == ':':
+ ref = topic
+ footnotes.append(topic, ref)
+ #
+ return node, (prev, next, up), menu, footnotes
+#
+def get_it(line, matcher):
+ match = matcher(line)
+ if not match:
+ return ''
+ else:
+ (a, b), (a1, b1) = match
+ return line[a1:b1]
+
+
+# Find a node in an open file.
+# The offset (from the tags table) is a hint about the node's position.
+# Pass zero if there is no tags table.
+# Raise NoSuchNode if the node isn't found.
+# NB: This seeks around in the file.
+#
+def find_node(f, offset, node):
+ node = string.lower(node) # Just to be sure
+ #
+ # Position a little before the given offset,
+ # so we may find the node even if it has moved around
+ # in the file a little.
+ #
+ offset = max(0, ((offset-FUZZ) / BLOCKSIZE) * BLOCKSIZE)
+ f.seek(offset)
+ #
+ # Loop, hunting for a matching node header.
+ #
+ while 1:
+ buf = f.read(CHUNKSIZE)
+ if not buf:
+ break
+ i = 0
+ while 1:
+ match = findheader(buf, i)
+ if match:
+ (a,b), (a1,b1) = match
+ start = a1
+ line = buf[a1:b1]
+ i = b
+ match = parseheader(line)
+ if match:
+ (a,b), (a1,b1) = match
+ key = string.lower(line[a1:b1])
+ if key == node:
+ # Got it! Now read the rest.
+ return read_node(f, buf[start:])
+ elif findescape(buf, i):
+ next = f.read(CHUNKSIZE)
+ if not next:
+ break
+ buf = buf + next
+ else:
+ break
+ #
+ # If we get here, we didn't find it. Too bad.
+ #
+ raise NoSuchNode, node
+
+
+# Finish off getting a node (subroutine for find_node()).
+# The node begins at the start of buf and may end in buf;
+# if it doesn't end there, read additional data from f.
+#
+def read_node(f, buf):
+ i = 0
+ match = findescape(buf, i)
+ while not match:
+ next = f.read(CHUNKSIZE)
+ if not next:
+ end = len(buf)
+ break
+ i = len(buf)
+ buf = buf + next
+ match = findescape(buf, i)
+ else:
+ # Got a match
+ (a, b) = match[0]
+ end = a
+ # Strip trailing newlines
+ while end > 0 and buf[end-1] == '\n':
+ end = end-1
+ buf = buf[:end]
+ return buf
+
+
+# Read reverse starting at offset until the beginning of a node is found.
+# Then return a buffer containing the beginning of the node,
+# with f positioned just after the buffer.
+# The buffer will contain at least the full header line of the node;
+# the caller should finish off with read_node() if it is the right node.
+# (It is also possible that the buffer extends beyond the node!)
+# Return an empty string if there is no node before the given offset.
+#
+def backup_node(f, offset):
+ start = max(0, ((offset-CHUNKSIZE) / BLOCKSIZE) * BLOCKSIZE)
+ end = offset
+ while start < end:
+ f.seek(start)
+ buf = f.read(end-start)
+ i = 0
+ hit = -1
+ while 1:
+ match = findheader(buf, i)
+ if match:
+ (a,b), (a1,b1) = match
+ hit = a1
+ i = b
+ elif end < offset and findescape(buf, i):
+ next = f.read(min(offset-end, BLOCKSIZE))
+ if not next:
+ break
+ buf = buf + next
+ end = end + len(next)
+ else:
+ break
+ if hit >= 0:
+ return buf[hit:]
+ end = start
+ start = max(0, end - CHUNKSIZE)
+ return ''
+
+
+# Make a tag table for the given file by scanning the file.
+# The file must be open for reading, and positioned at the beginning
+# (or wherever the hunt for tags must begin; it is read till the end).
+#
+def make_tags(f):
+ tags = {}
+ while 1:
+ offset = f.tell()
+ buf = f.read(CHUNKSIZE)
+ if not buf:
+ break
+ i = 0
+ while 1:
+ match = findheader(buf, i)
+ if match:
+ (a,b), (a1,b1) = match
+ start = offset+a1
+ line = buf[a1:b1]
+ i = b
+ match = parseheader(line)
+ if match:
+ (a,b), (a1,b1) = match
+ key = string.lower(line[a1:b1])
+ if tags.has_key(key):
+ print 'Duplicate node:',
+ print key
+ tags[key] = '', start, line
+ elif findescape(buf, i):
+ next = f.read(CHUNKSIZE)
+ if not next:
+ break
+ buf = buf + next
+ else:
+ break
+ return tags
+
+
+# Try to open a file, return a file object if succeeds.
+# Raise NoSuchFile if the file can't be opened.
+# Should treat absolute pathnames special.
+#
+def try_open(file):
+ for dir in INFOPATH:
+ try:
+ return open(dir + file, 'r')
+ except IOError:
+ pass
+ raise NoSuchFile, file
+
+
+# A little test for the speed of make_tags().
+#
+TESTFILE = 'texinfo-1'
+def test_make_tags():
+ import time
+ f = try_open(TESTFILE)
+ t1 = time.time()
+ tags = make_tags(f)
+ t2 = time.time()
+ print 'Making tag table for', `TESTFILE`, 'took', t2-t1, 'sec.'