commit -- why not

author: Guido van Rossum <guido@python.org> 1995-04-10 11:47:11 (GMT)
committer: Guido van Rossum <guido@python.org> 1995-04-10 11:47:11 (GMT)
commit: 5dd52d37ebc7acfd165bd8cde38e11e70a6a6252 (patch)
tree: 0d9370391f448f3d9be1523f05bfd81a66171e00 /Demo/ibrowse/ifile.py
parent: 5b98ac5b14b7f01d24aecd3d371ed899ed3f671c (diff)
download: cpython-5dd52d37ebc7acfd165bd8cde38e11e70a6a6252.zip
cpython-5dd52d37ebc7acfd165bd8cde38e11e70a6a6252.tar.gz
cpython-5dd52d37ebc7acfd165bd8cde38e11e70a6a6252.tar.bz2
1 files changed, 328 insertions, 0 deletions
diff --git a/Demo/ibrowse/ifile.py b/Demo/ibrowse/ifile.py
new file mode 100755
index 0000000..b8d59ee
--- /dev/null
+++ b/Demo/ibrowse/ifile.py
@@ -0,0 +1,328 @@
+# Tools for info file processing.
+
+# XXX Need to be more careful with reading ahead searching for nodes.
+
+
+import regexp
+import string
+
+
+# Exported exceptions.
+#
+NoSuchFile = 'no such file'
+NoSuchNode = 'no such node'
+
+
+# The search path for info files; this is site-specific.
+# Directory names should end in a partname delimiter,
+# so they can simply be concatenated to a relative pathname.
+#
+#INFOPATH = ['', ':Info.Ibrowse:', ':Info:']	# Mac
+INFOPATH = ['', '/usr/local/emacs/info/']	# X11 on UNIX
+
+
+# Tunable constants.
+#
+BLOCKSIZE = 512			# Qty to align reads to, if possible
+FUZZ = 2*BLOCKSIZE		# Qty to back-up before searching for a node
+CHUNKSIZE = 4*BLOCKSIZE		# Qty to read at once when reading lots of data
+
+
+# Regular expressions used.
+# Note that it is essential that Python leaves unrecognized backslash
+# escapes in a string so they can be seen by regexp.compile!
+#
+findheader = regexp.compile('\037\014?\n(.*\n)').match
+findescape = regexp.compile('\037').match
+parseheader = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
+findfirstline = regexp.compile('^.*\n').match
+findnode = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
+findprev = regexp.compile('[pP]rev[ious]*:[ \t]*([^\t,\n]*)').match
+findnext = regexp.compile('[nN]ext:[ \t]*([^\t,\n]*)').match
+findup = regexp.compile('[uU]p:[ \t]*([^\t,\n]*)').match
+findmenu = regexp.compile('^\* [mM]enu:').match
+findmenuitem = regexp.compile( \
+	'^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match
+findfootnote = regexp.compile( \
+	'\*[nN]ote ([^:]+):[ \t]*(:|[^:][^\t,\n.]*)').match
+parsenoderef = regexp.compile('^\((.*)\)(.*)$').match
+
+
+# Get a node and all information pertaining to it.
+# This doesn't work if there is an indirect tag table,
+# and in general you are better off using icache.get_node() instead.
+# Functions get_whole_file() and get_file_node() provide part
+# functionality used by icache.
+# Raise NoSuchFile or NoSuchNode as appropriate.
+#
+def get_node(curfile, ref):
+	file, node = parse_ref(curfile, ref)
+	if node == '*':
+		return get_whole_file(file)
+	else:
+		return get_file_node(file, 0, node)
+#
+def get_whole_file(file):
+	f = try_open(file) # May raise NoSuchFile
+	text = f.read()
+	header, menu, footnotes = ('', '', ''), [], []
+	return file, '*', header, menu, footnotes, text
+#
+def get_file_node(file, offset, node):
+	f = try_open(file) # May raise NoSuchFile
+	text = find_node(f, offset, node) # May raise NoSuchNode
+	node, header, menu, footnotes = analyze_node(text)
+	return file, node, header, menu, footnotes, text
+
+
+# Parse a node reference into a file (possibly default) and node name.
+# Possible reference formats are: "NODE", "(FILE)", "(FILE)NODE".
+# Default file is the curfile argument; default node is Top.
+# A node value of '*' is a special case: the whole file should
+# be interpreted (by the caller!) as a single node.
+#
+def parse_ref(curfile, ref):
+	match = parsenoderef(ref)
+	if not match:
+		file, node = curfile, ref
+	else:
+		(a, b), (a1, b1), (a2, b2) = match
+		file, node = ref[a1:b1], ref[a2:b2]
+	if not file:
+		file = curfile # (Is this necessary?)
+	if not node:
+		node = 'Top'
+	return file, node
+
+
+# Extract node name, links, menu and footnotes from the node text.
+#
+def analyze_node(text):
+	#
+	# Get node name and links from the header line
+	#
+	match = findfirstline(text)
+	if match:
+		(a, b) = match[0]
+		line = text[a:b]
+	else:
+		line = ''
+	node = get_it(text, findnode)
+	prev = get_it(text, findprev)
+	next = get_it(text, findnext)
+	up = get_it(text, findup)
+	#
+	# Get the menu items, if there is a menu
+	#
+	menu = []
+	match = findmenu(text)
+	if match:
+		(a, b) = match[0]
+		while 1:
+			match = findmenuitem(text, b)
+			if not match:
+				break
+			(a, b), (a1, b1), (a2, b2) = match
+			topic, ref = text[a1:b1], text[a2:b2]
+			if ref == ':':
+				ref = topic
+			menu.append(topic, ref)
+	#
+	# Get the footnotes
+	#
+	footnotes = []
+	b = 0
+	while 1:
+		match = findfootnote(text, b)
+		if not match:
+			break
+		(a, b), (a1, b1), (a2, b2) = match
+		topic, ref = text[a1:b1], text[a2:b2]
+		if ref == ':':
+			ref = topic
+		footnotes.append(topic, ref)
+	#
+	return node, (prev, next, up), menu, footnotes
+#
+def get_it(line, matcher):
+	match = matcher(line)
+	if not match:
+		return ''
+	else:
+		(a, b), (a1, b1) = match
+		return line[a1:b1]
+
+
+# Find a node in an open file.
+# The offset (from the tags table) is a hint about the node's position.
+# Pass zero if there is no tags table.
+# Raise NoSuchNode if the node isn't found.
+# NB: This seeks around in the file.
+#
+def find_node(f, offset, node):
+	node = string.lower(node) # Just to be sure
+	#
+	# Position a little before the given offset,
+	# so we may find the node even if it has moved around
+	# in the file a little.
+	#
+	offset = max(0, ((offset-FUZZ) / BLOCKSIZE) * BLOCKSIZE)
+	f.seek(offset)
+	#
+	# Loop, hunting for a matching node header.
+	#
+	while 1:
+		buf = f.read(CHUNKSIZE)
+		if not buf:
+			break
+		i = 0
+		while 1:
+			match = findheader(buf, i)
+			if match:
+				(a,b), (a1,b1) = match
+				start = a1
+				line = buf[a1:b1]
+				i = b
+				match = parseheader(line)
+				if match:
+					(a,b), (a1,b1) = match
+					key = string.lower(line[a1:b1])
+					if key == node:
+						# Got it!  Now read the rest.
+						return read_node(f, buf[start:])
+			elif findescape(buf, i):
+				next = f.read(CHUNKSIZE)
+				if not next:
+					break
+				buf = buf + next
+			else:
+				break
+	#
+	# If we get here, we didn't find it.  Too bad.
+	#
+	raise NoSuchNode, node
+
+
+# Finish off getting a node (subroutine for find_node()).
+# The node begins at the start of buf and may end in buf;
+# if it doesn't end there, read additional data from f.
+#
+def read_node(f, buf):
+	i = 0
+	match = findescape(buf, i)
+	while not match:
+		next = f.read(CHUNKSIZE)
+		if not next:
+			end = len(buf)
+			break
+		i = len(buf)
+		buf = buf + next
+		match = findescape(buf, i)
+	else:
+		# Got a match
+		(a, b) = match[0]
+		end = a
+	# Strip trailing newlines
+	while end > 0 and buf[end-1] == '\n':
+		end = end-1
+	buf = buf[:end]
+	return buf
+
+
+# Read reverse starting at offset until the beginning of a node is found.
+# Then return a buffer containing the beginning of the node,
+# with f positioned just after the buffer.
+# The buffer will contain at least the full header line of the node;
+# the caller should finish off with read_node() if it is the right node.
+# (It is also possible that the buffer extends beyond the node!)
+# Return an empty string if there is no node before the given offset.
+#
+def backup_node(f, offset):
+	start = max(0, ((offset-CHUNKSIZE) / BLOCKSIZE) * BLOCKSIZE)
+	end = offset
+	while start < end:
+		f.seek(start)
+		buf = f.read(end-start)
+		i = 0
+		hit = -1
+		while 1:
+			match = findheader(buf, i)
+			if match:
+				(a,b), (a1,b1) = match
+				hit = a1
+				i = b
+			elif end < offset and findescape(buf, i):
+				next = f.read(min(offset-end, BLOCKSIZE))
+				if not next:
+					break
+				buf = buf + next
+				end = end + len(next)
+			else:
+				break
+		if hit >= 0:
+			return buf[hit:]
+		end = start
+		start = max(0, end - CHUNKSIZE)
+	return ''
+
+
+# Make a tag table for the given file by scanning the file.
+# The file must be open for reading, and positioned at the beginning
+# (or wherever the hunt for tags must begin; it is read till the end).
+#
+def make_tags(f):
+	tags = {}
+	while 1:
+		offset = f.tell()
+		buf = f.read(CHUNKSIZE)
+		if not buf:
+			break
+		i = 0
+		while 1:
+			match = findheader(buf, i)
+			if match:
+				(a,b), (a1,b1) = match
+				start = offset+a1
+				line = buf[a1:b1]
+				i = b
+				match = parseheader(line)
+				if match:
+					(a,b), (a1,b1) = match
+					key = string.lower(line[a1:b1])
+					if tags.has_key(key):
+						print 'Duplicate node:',
+						print key
+					tags[key] = '', start, line
+			elif findescape(buf, i):
+				next = f.read(CHUNKSIZE)
+				if not next:
+					break
+				buf = buf + next
+			else:
+				break
+	return tags
+
+
+# Try to open a file, return a file object if succeeds.
+# Raise NoSuchFile if the file can't be opened.
+# Should treat absolute pathnames special.
+#
+def try_open(file):
+	for dir in INFOPATH:
+		try:
+			return open(dir + file, 'r')
+		except IOError:
+			pass
+	raise NoSuchFile, file
+
+
+# A little test for the speed of make_tags().
+#
+TESTFILE = 'texinfo-1'
+def test_make_tags():
+	import time
+	f = try_open(TESTFILE)
+	t1 = time.time()
+	tags = make_tags(f)
+	t2 = time.time()
+	print 'Making tag table for', `TESTFILE`, 'took', t2-t1, 'sec.'
author	Guido van Rossum <guido@python.org>	1995-04-10 11:47:11 (GMT)
committer	Guido van Rossum <guido@python.org>	1995-04-10 11:47:11 (GMT)
commit	5dd52d37ebc7acfd165bd8cde38e11e70a6a6252 (patch)
tree	0d9370391f448f3d9be1523f05bfd81a66171e00 /Demo/ibrowse/ifile.py
parent	5b98ac5b14b7f01d24aecd3d371ed899ed3f671c (diff)
download	cpython-5dd52d37ebc7acfd165bd8cde38e11e70a6a6252.zip cpython-5dd52d37ebc7acfd165bd8cde38e11e70a6a6252.tar.gz cpython-5dd52d37ebc7acfd165bd8cde38e11e70a6a6252.tar.bz2