diff options
Diffstat (limited to 'Demo/tkinter/www/htmllib.py')
-rwxr-xr-x | Demo/tkinter/www/htmllib.py | 639 |
1 files changed, 0 insertions, 639 deletions
diff --git a/Demo/tkinter/www/htmllib.py b/Demo/tkinter/www/htmllib.py deleted file mode 100755 index f45657f..0000000 --- a/Demo/tkinter/www/htmllib.py +++ /dev/null @@ -1,639 +0,0 @@ -# A parser for HTML documents - - -# HTML: HyperText Markup Language; an SGML-like syntax used by WWW to -# describe hypertext documents -# -# SGML: Standard Generalized Markup Language -# -# WWW: World-Wide Web; a distributed hypertext system develped at CERN -# -# CERN: European Particle Physics Laboratory in Geneva, Switzerland - - -# This file is only concerned with parsing and formatting HTML -# documents, not with the other (hypertext and networking) aspects of -# the WWW project. (It does support highlighting of anchors.) - - -import os -import sys -import regex -import string -import sgmllib - - -class HTMLParser(sgmllib.SGMLParser): - - # Copy base class entities and add some - entitydefs = {} - for key in sgmllib.SGMLParser.entitydefs.keys(): - entitydefs[key] = sgmllib.SGMLParser.entitydefs[key] - entitydefs['bullet'] = '*' - - # Provided -- handlers for tags introducing literal text - - def start_listing(self, attrs): - self.setliteral('listing') - self.literal_bgn('listing', attrs) - - def end_listing(self): - self.literal_end('listing') - - def start_xmp(self, attrs): - self.setliteral('xmp') - self.literal_bgn('xmp', attrs) - - def end_xmp(self): - self.literal_end('xmp') - - def do_plaintext(self, attrs): - self.setnomoretags() - self.literal_bgn('plaintext', attrs) - - # To be overridden -- begin/end literal mode - def literal_bgn(self, tag, attrs): pass - def literal_end(self, tag): pass - - -# Next level of sophistication -- collect anchors, title, nextid and isindex -class CollectingParser(HTMLParser): - # - def __init__(self): - HTMLParser.__init__(self) - self.savetext = None - self.nextid = '' - self.isindex = 0 - self.title = '' - self.inanchor = 0 - self.anchors = [] - self.anchornames = [] - self.anchortypes = [] - # - def start_a(self, attrs): - self.inanchor = 0 - href = '' - name = '' - type = '' - for attrname, value in attrs: - if attrname == 'href': - href = value - if attrname == 'name=': - name = value - if attrname == 'type=': - type = string.lower(value) - if not (href or name): - return - self.anchors.append(href) - self.anchornames.append(name) - self.anchortypes.append(type) - self.inanchor = len(self.anchors) - if not href: - self.inanchor = -self.inanchor - # - def end_a(self): - if self.inanchor > 0: - # Don't show anchors pointing into the current document - if self.anchors[self.inanchor-1][:1] <> '#': - self.handle_data('[' + `self.inanchor` + ']') - self.inanchor = 0 - # - def start_header(self, attrs): pass - def end_header(self): pass - # - # (head is the same as header) - def start_head(self, attrs): pass - def end_head(self): pass - # - def start_body(self, attrs): pass - def end_body(self): pass - # - def do_nextid(self, attrs): - self.nextid = attrs - # - def do_isindex(self, attrs): - self.isindex = 1 - # - def start_title(self, attrs): - self.savetext = '' - # - def end_title(self): - if self.savetext <> None: - self.title = self.savetext - self.savetext = None - # - def handle_data(self, text): - if self.savetext is not None: - self.savetext = self.savetext + text - - -# Formatting parser -- takes a formatter and a style sheet as arguments - -# XXX The use of style sheets should change: for each tag and end tag -# there should be a style definition, and a style definition should -# encompass many more parameters: font, justification, indentation, -# vspace before, vspace after, hanging tag... - -wordprog = regex.compile('[^ \t\n]*') -spaceprog = regex.compile('[ \t\n]*') - -class FormattingParser(CollectingParser): - - def __init__(self, formatter, stylesheet): - CollectingParser.__init__(self) - self.fmt = formatter - self.stl = stylesheet - self.savetext = None - self.compact = 0 - self.nofill = 0 - self.resetfont() - self.setindent(self.stl.stdindent) - - def resetfont(self): - self.fontstack = [] - self.stylestack = [] - self.fontset = self.stl.stdfontset - self.style = ROMAN - self.passfont() - - def passfont(self): - font = self.fontset[self.style] - self.fmt.setfont(font) - - def pushstyle(self, style): - self.stylestack.append(self.style) - self.style = min(style, len(self.fontset)-1) - self.passfont() - - def popstyle(self): - self.style = self.stylestack[-1] - del self.stylestack[-1] - self.passfont() - - def pushfontset(self, fontset, style): - self.fontstack.append(self.fontset) - self.fontset = fontset - self.pushstyle(style) - - def popfontset(self): - self.fontset = self.fontstack[-1] - del self.fontstack[-1] - self.popstyle() - - def flush(self): - self.fmt.flush() - - def setindent(self, n): - self.fmt.setleftindent(n) - - def needvspace(self, n): - self.fmt.needvspace(n) - - def close(self): - HTMLParser.close(self) - self.fmt.flush() - - def handle_literal(self, text): - lines = string.splitfields(text, '\n') - for i in range(1, len(lines)): - lines[i] = string.expandtabs(lines[i], 8) - for line in lines[:-1]: - self.fmt.addword(line, 0) - self.fmt.flush() - self.fmt.nospace = 0 - for line in lines[-1:]: - self.fmt.addword(line, 0) - - def handle_data(self, text): - if self.savetext is not None: - self.savetext = self.savetext + text - return - if self.literal: - self.handle_literal(text) - return - i = 0 - n = len(text) - while i < n: - j = i + wordprog.match(text, i) - word = text[i:j] - i = j + spaceprog.match(text, j) - self.fmt.addword(word, i-j) - if self.nofill and '\n' in text[j:i]: - self.fmt.flush() - self.fmt.nospace = 0 - i = j+1 - while text[i-1] <> '\n': i = i+1 - - def literal_bgn(self, tag, attrs): - if tag == 'plaintext': - self.flush() - else: - self.needvspace(1) - self.pushfontset(self.stl.stdfontset, FIXED) - self.setindent(self.stl.literalindent) - - def literal_end(self, tag): - self.needvspace(1) - self.popfontset() - self.setindent(self.stl.stdindent) - - def start_title(self, attrs): - self.flush() - self.savetext = '' - # NB end_title is unchanged - - def do_p(self, attrs): - if self.compact: - self.flush() - else: - self.needvspace(1) - - def do_hr(self, attrs): - self.fmt.hrule() - - def start_h1(self, attrs): - self.needvspace(2) - self.setindent(self.stl.h1indent) - self.pushfontset(self.stl.h1fontset, BOLD) - self.fmt.setjust('c') - - def end_h1(self): - self.popfontset() - self.needvspace(2) - self.setindent(self.stl.stdindent) - self.fmt.setjust('l') - - def start_h2(self, attrs): - self.needvspace(1) - self.setindent(self.stl.h2indent) - self.pushfontset(self.stl.h2fontset, BOLD) - - def end_h2(self): - self.popfontset() - self.needvspace(1) - self.setindent(self.stl.stdindent) - - def start_h3(self, attrs): - self.needvspace(1) - self.setindent(self.stl.stdindent) - self.pushfontset(self.stl.h3fontset, BOLD) - - def end_h3(self): - self.popfontset() - self.needvspace(1) - self.setindent(self.stl.stdindent) - - def start_h4(self, attrs): - self.needvspace(1) - self.setindent(self.stl.stdindent) - self.pushfontset(self.stl.stdfontset, BOLD) - - def end_h4(self): - self.popfontset() - self.needvspace(1) - self.setindent(self.stl.stdindent) - - start_h5 = start_h4 - end_h5 = end_h4 - - start_h6 = start_h5 - end_h6 = end_h5 - - start_h7 = start_h6 - end_h7 = end_h6 - - def start_ul(self, attrs): - self.needvspace(1) - for attrname, value in attrs: - if attrname == 'compact': - self.compact = 1 - self.setindent(0) - break - else: - self.setindent(self.stl.ulindent) - - start_dir = start_menu = start_ol = start_ul - - do_li = do_p - - def end_ul(self): - self.compact = 0 - self.needvspace(1) - self.setindent(self.stl.stdindent) - - end_dir = end_menu = end_ol = end_ul - - def start_dl(self, attrs): - for attrname, value in attrs: - if attrname == 'compact': - self.compact = 1 - self.needvspace(1) - - def end_dl(self): - self.compact = 0 - self.needvspace(1) - self.setindent(self.stl.stdindent) - - def do_dt(self, attrs): - if self.compact: - self.flush() - else: - self.needvspace(1) - self.setindent(self.stl.stdindent) - - def do_dd(self, attrs): - self.fmt.addword('', 1) - self.setindent(self.stl.ddindent) - - def start_address(self, attrs): - self.compact = 1 - self.needvspace(1) - self.fmt.setjust('r') - - def end_address(self): - self.compact = 0 - self.needvspace(1) - self.setindent(self.stl.stdindent) - self.fmt.setjust('l') - - def start_pre(self, attrs): - self.needvspace(1) - self.nofill = self.nofill + 1 - self.pushstyle(FIXED) - - def end_pre(self): - self.popstyle() - self.nofill = self.nofill - 1 - self.needvspace(1) - - start_typewriter = start_pre - end_typewriter = end_pre - - def do_img(self, attrs): - self.fmt.addword('(image)', 0) - - # Physical styles - - def start_tt(self, attrs): self.pushstyle(FIXED) - def end_tt(self): self.popstyle() - - def start_b(self, attrs): self.pushstyle(BOLD) - def end_b(self): self.popstyle() - - def start_i(self, attrs): self.pushstyle(ITALIC) - def end_i(self): self.popstyle() - - def start_u(self, attrs): self.pushstyle(ITALIC) # Underline??? - def end_u(self): self.popstyle() - - def start_r(self, attrs): self.pushstyle(ROMAN) # Not official - def end_r(self): self.popstyle() - - # Logical styles - - start_em = start_i - end_em = end_i - - start_strong = start_b - end_strong = end_b - - start_code = start_tt - end_code = end_tt - - start_samp = start_tt - end_samp = end_tt - - start_kbd = start_tt - end_kbd = end_tt - - start_file = start_tt # unofficial - end_file = end_tt - - start_var = start_i - end_var = end_i - - start_dfn = start_i - end_dfn = end_i - - start_cite = start_i - end_cite = end_i - - start_hp1 = start_i - end_hp1 = start_i - - start_hp2 = start_b - end_hp2 = end_b - - def unknown_starttag(self, tag, attrs): - print '*** unknown <' + tag + '>' - - def unknown_endtag(self, tag): - print '*** unknown </' + tag + '>' - - -# An extension of the formatting parser which formats anchors differently. -class AnchoringParser(FormattingParser): - - def start_a(self, attrs): - FormattingParser.start_a(self, attrs) - if self.inanchor: - self.fmt.bgn_anchor(self.inanchor) - - def end_a(self): - if self.inanchor: - self.fmt.end_anchor(self.inanchor) - self.inanchor = 0 - - -# Style sheet -- this is never instantiated, but the attributes -# of the class object itself are used to specify fonts to be used -# for various paragraph styles. -# A font set is a non-empty list of fonts, in the order: -# [roman, italic, bold, fixed]. -# When a style is not available the nearest lower style is used - -ROMAN = 0 -ITALIC = 1 -BOLD = 2 -FIXED = 3 - -class NullStylesheet: - # Fonts -- none - stdfontset = [None] - h1fontset = [None] - h2fontset = [None] - h3fontset = [None] - # Indents - stdindent = 2 - ddindent = 25 - ulindent = 4 - h1indent = 0 - h2indent = 0 - literalindent = 0 - - -class X11Stylesheet(NullStylesheet): - stdfontset = [ \ - '-*-helvetica-medium-r-normal-*-*-100-100-*-*-*-*-*', \ - '-*-helvetica-medium-o-normal-*-*-100-100-*-*-*-*-*', \ - '-*-helvetica-bold-r-normal-*-*-100-100-*-*-*-*-*', \ - '-*-courier-medium-r-normal-*-*-100-100-*-*-*-*-*', \ - ] - h1fontset = [ \ - '-*-helvetica-medium-r-normal-*-*-180-100-*-*-*-*-*', \ - '-*-helvetica-medium-o-normal-*-*-180-100-*-*-*-*-*', \ - '-*-helvetica-bold-r-normal-*-*-180-100-*-*-*-*-*', \ - ] - h2fontset = [ \ - '-*-helvetica-medium-r-normal-*-*-140-100-*-*-*-*-*', \ - '-*-helvetica-medium-o-normal-*-*-140-100-*-*-*-*-*', \ - '-*-helvetica-bold-r-normal-*-*-140-100-*-*-*-*-*', \ - ] - h3fontset = [ \ - '-*-helvetica-medium-r-normal-*-*-120-100-*-*-*-*-*', \ - '-*-helvetica-medium-o-normal-*-*-120-100-*-*-*-*-*', \ - '-*-helvetica-bold-r-normal-*-*-120-100-*-*-*-*-*', \ - ] - ddindent = 40 - - -class MacStylesheet(NullStylesheet): - stdfontset = [ \ - ('Geneva', 'p', 10), \ - ('Geneva', 'i', 10), \ - ('Geneva', 'b', 10), \ - ('Monaco', 'p', 10), \ - ] - h1fontset = [ \ - ('Geneva', 'p', 18), \ - ('Geneva', 'i', 18), \ - ('Geneva', 'b', 18), \ - ('Monaco', 'p', 18), \ - ] - h3fontset = [ \ - ('Geneva', 'p', 14), \ - ('Geneva', 'i', 14), \ - ('Geneva', 'b', 14), \ - ('Monaco', 'p', 14), \ - ] - h3fontset = [ \ - ('Geneva', 'p', 12), \ - ('Geneva', 'i', 12), \ - ('Geneva', 'b', 12), \ - ('Monaco', 'p', 12), \ - ] - - -if os.name == 'mac': - StdwinStylesheet = MacStylesheet -else: - StdwinStylesheet = X11Stylesheet - - -class GLStylesheet(NullStylesheet): - stdfontset = [ \ - 'Helvetica 10', \ - 'Helvetica-Italic 10', \ - 'Helvetica-Bold 10', \ - 'Courier 10', \ - ] - h1fontset = [ \ - 'Helvetica 18', \ - 'Helvetica-Italic 18', \ - 'Helvetica-Bold 18', \ - 'Courier 18', \ - ] - h2fontset = [ \ - 'Helvetica 14', \ - 'Helvetica-Italic 14', \ - 'Helvetica-Bold 14', \ - 'Courier 14', \ - ] - h3fontset = [ \ - 'Helvetica 12', \ - 'Helvetica-Italic 12', \ - 'Helvetica-Bold 12', \ - 'Courier 12', \ - ] - - -# Test program -- produces no output but times how long it takes -# to send a document to a null formatter, exclusive of I/O - -def test(): - import fmt - import time - import urllib - if sys.argv[1:]: file = sys.argv[1] - else: file = 'test.html' - data = urllib.urlopen(file).read() - t0 = time.time() - fmtr = fmt.WritingFormatter(sys.stdout, 79) - p = FormattingParser(fmtr, NullStylesheet) - p.feed(data) - p.close() - t1 = time.time() - print - print '*** Formatting time:', round(t1-t0, 3), 'seconds.' - - -# Test program using stdwin - -def testStdwin(): - import stdwin, fmt - from stdwinevents import * - if sys.argv[1:]: file = sys.argv[1] - else: file = 'test.html' - data = open(file, 'r').read() - window = stdwin.open('testStdwin') - b = None - while 1: - etype, ewin, edetail = stdwin.getevent() - if etype == WE_CLOSE: - break - if etype == WE_SIZE: - window.setdocsize(0, 0) - window.setorigin(0, 0) - window.change((0, 0), (10000, 30000)) # XXX - if etype == WE_DRAW: - if not b: - b = fmt.StdwinBackEnd(window, 1) - f = fmt.BaseFormatter(b.d, b) - p = FormattingParser(f, \ - MacStylesheet) - p.feed(data) - p.close() - b.finish() - else: - b.redraw(edetail) - window.close() - - -# Test program using GL - -def testGL(): - import gl, GL, fmt - if sys.argv[1:]: file = sys.argv[1] - else: file = 'test.html' - data = open(file, 'r').read() - W, H = 600, 600 - gl.foreground() - gl.prefsize(W, H) - wid = gl.winopen('testGL') - gl.ortho2(0, W, H, 0) - gl.color(GL.WHITE) - gl.clear() - gl.color(GL.BLACK) - b = fmt.GLBackEnd(wid) - f = fmt.BaseFormatter(b.d, b) - p = FormattingParser(f, GLStylesheet) - p.feed(data) - p.close() - b.finish() - # - import time - time.sleep(5) - - -if __name__ == '__main__': - test() |