initial checkin of www Tk examples

author: Guido van Rossum <guido@python.org> 1995-01-10 17:05:37 (GMT)
committer: Guido van Rossum <guido@python.org> 1995-01-10 17:05:37 (GMT)
commit: dfa70a9fbc23bfef2badb6b17639354c4a9aeda8 (patch)
tree: fb4775eb6039fb91dbbb50ba7c105e54490fe8fe /Demo/tkinter/www/htmllib.py
parent: ca9b323c525d2e5975ca0fbedd158a722110ca9a (diff)
download: cpython-dfa70a9fbc23bfef2badb6b17639354c4a9aeda8.zip
cpython-dfa70a9fbc23bfef2badb6b17639354c4a9aeda8.tar.gz
cpython-dfa70a9fbc23bfef2badb6b17639354c4a9aeda8.tar.bz2
1 files changed, 639 insertions, 0 deletions
diff --git a/Demo/tkinter/www/htmllib.py b/Demo/tkinter/www/htmllib.py
new file mode 100755
index 0000000..f45657f
--- /dev/null
+++ b/Demo/tkinter/www/htmllib.py
@@ -0,0 +1,639 @@
+# A parser for HTML documents
+
+
+# HTML: HyperText Markup Language; an SGML-like syntax used by WWW to
+# describe hypertext documents
+#
+# SGML: Standard Generalized Markup Language
+#
+# WWW: World-Wide Web; a distributed hypertext system develped at CERN
+#
+# CERN: European Particle Physics Laboratory in Geneva, Switzerland
+
+
+# This file is only concerned with parsing and formatting HTML
+# documents, not with the other (hypertext and networking) aspects of
+# the WWW project.  (It does support highlighting of anchors.)
+
+
+import os
+import sys
+import regex
+import string
+import sgmllib
+
+
+class HTMLParser(sgmllib.SGMLParser):
+
+	# Copy base class entities and add some
+	entitydefs = {}
+	for key in sgmllib.SGMLParser.entitydefs.keys():
+		entitydefs[key] = sgmllib.SGMLParser.entitydefs[key]
+	entitydefs['bullet'] = '*'
+
+	# Provided -- handlers for tags introducing literal text
+	
+	def start_listing(self, attrs):
+		self.setliteral('listing')
+		self.literal_bgn('listing', attrs)
+
+	def end_listing(self):
+		self.literal_end('listing')
+
+	def start_xmp(self, attrs):
+		self.setliteral('xmp')
+		self.literal_bgn('xmp', attrs)
+
+	def end_xmp(self):
+		self.literal_end('xmp')
+
+	def do_plaintext(self, attrs):
+		self.setnomoretags()
+		self.literal_bgn('plaintext', attrs)
+
+	# To be overridden -- begin/end literal mode
+	def literal_bgn(self, tag, attrs): pass
+	def literal_end(self, tag): pass
+
+
+# Next level of sophistication -- collect anchors, title, nextid and isindex
+class CollectingParser(HTMLParser):
+	#
+	def __init__(self):
+		HTMLParser.__init__(self)
+		self.savetext = None
+		self.nextid = ''
+		self.isindex = 0
+		self.title = ''
+		self.inanchor = 0
+		self.anchors = []
+		self.anchornames = []
+		self.anchortypes = []
+	#
+	def start_a(self, attrs):
+		self.inanchor = 0
+		href = ''
+		name = ''
+		type = ''
+		for attrname, value in attrs:
+			if attrname == 'href':
+				href = value
+			if attrname == 'name=':
+				name = value
+			if attrname == 'type=':
+				type = string.lower(value)
+		if not (href or name):
+			return
+		self.anchors.append(href)
+		self.anchornames.append(name)
+		self.anchortypes.append(type)
+		self.inanchor = len(self.anchors)
+		if not href:
+			self.inanchor = -self.inanchor
+	#
+	def end_a(self):
+		if self.inanchor > 0:
+			# Don't show anchors pointing into the current document
+			if self.anchors[self.inanchor-1][:1] <> '#':
+				self.handle_data('[' + `self.inanchor` + ']')
+		self.inanchor = 0
+	#
+	def start_header(self, attrs): pass
+	def end_header(self): pass
+	#
+	# (head is the same as header)
+	def start_head(self, attrs): pass
+	def end_head(self): pass
+	#
+	def start_body(self, attrs): pass
+	def end_body(self): pass
+	#
+	def do_nextid(self, attrs):
+		self.nextid = attrs
+	#
+	def do_isindex(self, attrs):
+		self.isindex = 1
+	#
+	def start_title(self, attrs):
+		self.savetext = ''
+	#
+	def end_title(self):
+		if self.savetext <> None:
+			self.title = self.savetext
+			self.savetext = None
+	#
+	def handle_data(self, text):
+		if self.savetext is not None:
+			self.savetext = self.savetext + text
+
+
+# Formatting parser -- takes a formatter and a style sheet as arguments
+
+# XXX The use of style sheets should change: for each tag and end tag
+# there should be a style definition, and a style definition should
+# encompass many more parameters: font, justification, indentation,
+# vspace before, vspace after, hanging tag...
+
+wordprog = regex.compile('[^ \t\n]*')
+spaceprog = regex.compile('[ \t\n]*')
+
+class FormattingParser(CollectingParser):
+
+	def __init__(self, formatter, stylesheet):
+		CollectingParser.__init__(self)
+		self.fmt = formatter
+		self.stl = stylesheet
+		self.savetext = None
+		self.compact = 0
+		self.nofill = 0
+		self.resetfont()
+		self.setindent(self.stl.stdindent)
+
+	def resetfont(self):
+		self.fontstack = []
+		self.stylestack = []
+		self.fontset = self.stl.stdfontset
+		self.style = ROMAN
+		self.passfont()
+
+	def passfont(self):
+		font = self.fontset[self.style]
+		self.fmt.setfont(font)
+
+	def pushstyle(self, style):
+		self.stylestack.append(self.style)
+		self.style = min(style, len(self.fontset)-1)
+		self.passfont()
+
+	def popstyle(self):
+		self.style = self.stylestack[-1]
+		del self.stylestack[-1]
+		self.passfont()
+
+	def pushfontset(self, fontset, style):
+		self.fontstack.append(self.fontset)
+		self.fontset = fontset
+		self.pushstyle(style)
+
+	def popfontset(self):
+		self.fontset = self.fontstack[-1]
+		del self.fontstack[-1]
+		self.popstyle()
+
+	def flush(self):
+		self.fmt.flush()
+
+	def setindent(self, n):
+		self.fmt.setleftindent(n)
+
+	def needvspace(self, n):
+		self.fmt.needvspace(n)
+
+	def close(self):
+		HTMLParser.close(self)
+		self.fmt.flush()
+
+	def handle_literal(self, text):
+		lines = string.splitfields(text, '\n')
+		for i in range(1, len(lines)):
+			lines[i] = string.expandtabs(lines[i], 8)
+		for line in lines[:-1]:
+			self.fmt.addword(line, 0)
+			self.fmt.flush()
+			self.fmt.nospace = 0
+		for line in lines[-1:]:
+			self.fmt.addword(line, 0)
+
+	def handle_data(self, text):
+		if self.savetext is not None:
+			self.savetext = self.savetext + text
+			return
+		if self.literal:
+			self.handle_literal(text)
+			return
+		i = 0
+		n = len(text)
+		while i < n:
+			j = i + wordprog.match(text, i)
+			word = text[i:j]
+			i = j + spaceprog.match(text, j)
+			self.fmt.addword(word, i-j)
+			if self.nofill and '\n' in text[j:i]:
+				self.fmt.flush()
+				self.fmt.nospace = 0
+				i = j+1
+				while text[i-1] <> '\n': i = i+1
+
+	def literal_bgn(self, tag, attrs):
+		if tag == 'plaintext':
+			self.flush()
+		else:
+			self.needvspace(1)
+		self.pushfontset(self.stl.stdfontset, FIXED)
+		self.setindent(self.stl.literalindent)
+
+	def literal_end(self, tag):
+		self.needvspace(1)
+		self.popfontset()
+		self.setindent(self.stl.stdindent)
+
+	def start_title(self, attrs):
+		self.flush()
+		self.savetext = ''
+	# NB end_title is unchanged
+
+	def do_p(self, attrs):
+		if self.compact:
+			self.flush()
+		else:
+			self.needvspace(1)
+
+	def do_hr(self, attrs):
+		self.fmt.hrule()
+
+	def start_h1(self, attrs):
+		self.needvspace(2)
+		self.setindent(self.stl.h1indent)
+		self.pushfontset(self.stl.h1fontset, BOLD)
+		self.fmt.setjust('c')
+
+	def end_h1(self):
+		self.popfontset()
+		self.needvspace(2)
+		self.setindent(self.stl.stdindent)
+		self.fmt.setjust('l')
+
+	def start_h2(self, attrs):
+		self.needvspace(1)
+		self.setindent(self.stl.h2indent)
+		self.pushfontset(self.stl.h2fontset, BOLD)
+
+	def end_h2(self):
+		self.popfontset()
+		self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+
+	def start_h3(self, attrs):
+		self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+		self.pushfontset(self.stl.h3fontset, BOLD)
+
+	def end_h3(self):
+		self.popfontset()
+		self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+
+	def start_h4(self, attrs):
+		self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+		self.pushfontset(self.stl.stdfontset, BOLD)
+
+	def end_h4(self):
+		self.popfontset()
+		self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+
+	start_h5 = start_h4
+	end_h5 = end_h4
+
+	start_h6 = start_h5
+	end_h6 = end_h5
+
+	start_h7 = start_h6
+	end_h7 = end_h6
+
+	def start_ul(self, attrs):
+		self.needvspace(1)
+		for attrname, value in attrs:
+			if attrname == 'compact':
+				self.compact = 1
+				self.setindent(0)
+				break
+		else:
+			self.setindent(self.stl.ulindent)
+
+	start_dir = start_menu = start_ol = start_ul
+
+	do_li = do_p
+
+	def end_ul(self):
+		self.compact = 0
+		self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+
+	end_dir = end_menu = end_ol = end_ul
+
+	def start_dl(self, attrs):
+		for attrname, value in attrs:
+			if attrname == 'compact':
+				self.compact = 1
+		self.needvspace(1)
+
+	def end_dl(self):
+		self.compact = 0
+		self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+
+	def do_dt(self, attrs):
+		if self.compact:
+			self.flush()
+		else:
+			self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+
+	def do_dd(self, attrs):
+		self.fmt.addword('', 1)
+		self.setindent(self.stl.ddindent)
+
+	def start_address(self, attrs):
+		self.compact = 1
+		self.needvspace(1)
+		self.fmt.setjust('r')
+
+	def end_address(self):
+		self.compact = 0
+		self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+		self.fmt.setjust('l')
+
+	def start_pre(self, attrs):
+		self.needvspace(1)
+		self.nofill = self.nofill + 1
+		self.pushstyle(FIXED)
+
+	def end_pre(self):
+		self.popstyle()
+		self.nofill = self.nofill - 1
+		self.needvspace(1)
+
+	start_typewriter = start_pre
+	end_typewriter = end_pre
+
+	def do_img(self, attrs):
+		self.fmt.addword('(image)', 0)
+
+	# Physical styles
+
+	def start_tt(self, attrs): self.pushstyle(FIXED)
+	def end_tt(self): self.popstyle()
+
+	def start_b(self, attrs): self.pushstyle(BOLD)
+	def end_b(self): self.popstyle()
+
+	def start_i(self, attrs): self.pushstyle(ITALIC)
+	def end_i(self): self.popstyle()
+
+	def start_u(self, attrs): self.pushstyle(ITALIC) # Underline???
+	def end_u(self): self.popstyle()
+
+	def start_r(self, attrs): self.pushstyle(ROMAN) # Not official
+	def end_r(self): self.popstyle()
+
+	# Logical styles
+
+	start_em = start_i
+	end_em = end_i
+
+	start_strong = start_b
+	end_strong = end_b
+
+	start_code = start_tt
+	end_code = end_tt
+
+	start_samp = start_tt
+	end_samp = end_tt
+
+	start_kbd = start_tt
+	end_kbd = end_tt
+
+	start_file = start_tt # unofficial
+	end_file = end_tt
+
+	start_var = start_i
+	end_var = end_i
+
+	start_dfn = start_i
+	end_dfn = end_i
+
+	start_cite = start_i
+	end_cite = end_i
+
+	start_hp1 = start_i
+	end_hp1 = start_i
+
+	start_hp2 = start_b
+	end_hp2 = end_b
+
+	def unknown_starttag(self, tag, attrs):
+		print '*** unknown <' + tag + '>'
+
+	def unknown_endtag(self, tag):
+		print '*** unknown </' + tag + '>'
+
+
+# An extension of the formatting parser which formats anchors differently.
+class AnchoringParser(FormattingParser):
+
+	def start_a(self, attrs):
+		FormattingParser.start_a(self, attrs)
+		if self.inanchor:
+			self.fmt.bgn_anchor(self.inanchor)
+
+	def end_a(self):
+		if self.inanchor:
+			self.fmt.end_anchor(self.inanchor)
+			self.inanchor = 0
+
+
+# Style sheet -- this is never instantiated, but the attributes
+# of the class object itself are used to specify fonts to be used
+# for various paragraph styles.
+# A font set is a non-empty list of fonts, in the order:
+# [roman, italic, bold, fixed].
+# When a style is not available the nearest lower style is used
+
+ROMAN = 0
+ITALIC = 1
+BOLD = 2
+FIXED = 3
+
+class NullStylesheet:
+	# Fonts -- none
+	stdfontset = [None]
+	h1fontset = [None]
+	h2fontset = [None]
+	h3fontset = [None]
+	# Indents
+	stdindent = 2
+	ddindent = 25
+	ulindent = 4
+	h1indent = 0
+	h2indent = 0
+	literalindent = 0
+
+
+class X11Stylesheet(NullStylesheet):
+	stdfontset = [ \
+		'-*-helvetica-medium-r-normal-*-*-100-100-*-*-*-*-*', \
+		'-*-helvetica-medium-o-normal-*-*-100-100-*-*-*-*-*', \
+		'-*-helvetica-bold-r-normal-*-*-100-100-*-*-*-*-*', \
+		'-*-courier-medium-r-normal-*-*-100-100-*-*-*-*-*', \
+		]
+	h1fontset = [ \
+		'-*-helvetica-medium-r-normal-*-*-180-100-*-*-*-*-*', \
+		'-*-helvetica-medium-o-normal-*-*-180-100-*-*-*-*-*', \
+		'-*-helvetica-bold-r-normal-*-*-180-100-*-*-*-*-*', \
+		]
+	h2fontset = [ \
+		'-*-helvetica-medium-r-normal-*-*-140-100-*-*-*-*-*', \
+		'-*-helvetica-medium-o-normal-*-*-140-100-*-*-*-*-*', \
+		'-*-helvetica-bold-r-normal-*-*-140-100-*-*-*-*-*', \
+		]
+	h3fontset = [ \
+		'-*-helvetica-medium-r-normal-*-*-120-100-*-*-*-*-*', \
+		'-*-helvetica-medium-o-normal-*-*-120-100-*-*-*-*-*', \
+		'-*-helvetica-bold-r-normal-*-*-120-100-*-*-*-*-*', \
+		]
+	ddindent = 40
+
+
+class MacStylesheet(NullStylesheet):
+	stdfontset = [ \
+		('Geneva', 'p', 10), \
+		('Geneva', 'i', 10), \
+		('Geneva', 'b', 10), \
+		('Monaco', 'p', 10), \
+		]
+	h1fontset = [ \
+		('Geneva', 'p', 18), \
+		('Geneva', 'i', 18), \
+		('Geneva', 'b', 18), \
+		('Monaco', 'p', 18), \
+		]
+	h3fontset = [ \
+		('Geneva', 'p', 14), \
+		('Geneva', 'i', 14), \
+		('Geneva', 'b', 14), \
+		('Monaco', 'p', 14), \
+		]
+	h3fontset = [ \
+		('Geneva', 'p', 12), \
+		('Geneva', 'i', 12), \
+		('Geneva', 'b', 12), \
+		('Monaco', 'p', 12), \
+		]
+
+
+if os.name == 'mac':
+	StdwinStylesheet = MacStylesheet
+else:
+	StdwinStylesheet = X11Stylesheet
+
+
+class GLStylesheet(NullStylesheet):
+	stdfontset = [ \
+		'Helvetica 10', \
+		'Helvetica-Italic 10', \
+		'Helvetica-Bold 10', \
+		'Courier 10', \
+		]
+	h1fontset = [ \
+		'Helvetica 18', \
+		'Helvetica-Italic 18', \
+		'Helvetica-Bold 18', \
+		'Courier 18', \
+		]
+	h2fontset = [ \
+		'Helvetica 14', \
+		'Helvetica-Italic 14', \
+		'Helvetica-Bold 14', \
+		'Courier 14', \
+		]
+	h3fontset = [ \
+		'Helvetica 12', \
+		'Helvetica-Italic 12', \
+		'Helvetica-Bold 12', \
+		'Courier 12', \
+		]
+
+
+# Test program -- produces no output but times how long it takes
+# to send a document to a null formatter, exclusive of I/O
+
+def test():
+	import fmt
+	import time
+	import urllib
+	if sys.argv[1:]: file = sys.argv[1]
+	else: file = 'test.html'
+	data = urllib.urlopen(file).read()
+	t0 = time.time()
+	fmtr = fmt.WritingFormatter(sys.stdout, 79)
+	p = FormattingParser(fmtr, NullStylesheet)
+	p.feed(data)
+	p.close()
+	t1 = time.time()
+	print
+	print '*** Formatting time:', round(t1-t0, 3), 'seconds.'
+
+
+# Test program using stdwin
+
+def testStdwin():
+	import stdwin, fmt
+	from stdwinevents import *
+	if sys.argv[1:]: file = sys.argv[1]
+	else: file = 'test.html'
+	data = open(file, 'r').read()
+	window = stdwin.open('testStdwin')
+	b = None
+	while 1:
+		etype, ewin, edetail = stdwin.getevent()
+		if etype == WE_CLOSE:
+			break
+		if etype == WE_SIZE:
+			window.setdocsize(0, 0)
+			window.setorigin(0, 0)
+			window.change((0, 0), (10000, 30000)) # XXX
+		if etype == WE_DRAW:
+			if not b:
+				b = fmt.StdwinBackEnd(window, 1)
+				f = fmt.BaseFormatter(b.d, b)
+				p = FormattingParser(f, \
+							    MacStylesheet)
+				p.feed(data)
+				p.close()
+				b.finish()
+			else:
+				b.redraw(edetail)
+	window.close()
+
+
+# Test program using GL
+
+def testGL():
+	import gl, GL, fmt
+	if sys.argv[1:]: file = sys.argv[1]
+	else: file = 'test.html'
+	data = open(file, 'r').read()
+	W, H = 600, 600
+	gl.foreground()
+	gl.prefsize(W, H)
+	wid = gl.winopen('testGL')
+	gl.ortho2(0, W, H, 0)
+	gl.color(GL.WHITE)
+	gl.clear()
+	gl.color(GL.BLACK)
+	b = fmt.GLBackEnd(wid)
+	f = fmt.BaseFormatter(b.d, b)
+	p = FormattingParser(f, GLStylesheet)
+	p.feed(data)
+	p.close()
+	b.finish()
+	#
+	import time
+	time.sleep(5)
+
+
+if __name__ == '__main__':
+	test()
author	Guido van Rossum <guido@python.org>	1995-01-10 17:05:37 (GMT)
committer	Guido van Rossum <guido@python.org>	1995-01-10 17:05:37 (GMT)
commit	dfa70a9fbc23bfef2badb6b17639354c4a9aeda8 (patch)
tree	fb4775eb6039fb91dbbb50ba7c105e54490fe8fe /Demo/tkinter/www/htmllib.py
parent	ca9b323c525d2e5975ca0fbedd158a722110ca9a (diff)
download	cpython-dfa70a9fbc23bfef2badb6b17639354c4a9aeda8.zip cpython-dfa70a9fbc23bfef2badb6b17639354c4a9aeda8.tar.gz cpython-dfa70a9fbc23bfef2badb6b17639354c4a9aeda8.tar.bz2