summaryrefslogtreecommitdiffstats
path: root/Demo/tkinter/www/htmllib.py
diff options
context:
space:
mode:
Diffstat (limited to 'Demo/tkinter/www/htmllib.py')
-rwxr-xr-xDemo/tkinter/www/htmllib.py639
1 files changed, 0 insertions, 639 deletions
diff --git a/Demo/tkinter/www/htmllib.py b/Demo/tkinter/www/htmllib.py
deleted file mode 100755
index f45657f..0000000
--- a/Demo/tkinter/www/htmllib.py
+++ /dev/null
@@ -1,639 +0,0 @@
-# A parser for HTML documents
-
-
-# HTML: HyperText Markup Language; an SGML-like syntax used by WWW to
-# describe hypertext documents
-#
-# SGML: Standard Generalized Markup Language
-#
-# WWW: World-Wide Web; a distributed hypertext system develped at CERN
-#
-# CERN: European Particle Physics Laboratory in Geneva, Switzerland
-
-
-# This file is only concerned with parsing and formatting HTML
-# documents, not with the other (hypertext and networking) aspects of
-# the WWW project. (It does support highlighting of anchors.)
-
-
-import os
-import sys
-import regex
-import string
-import sgmllib
-
-
-class HTMLParser(sgmllib.SGMLParser):
-
- # Copy base class entities and add some
- entitydefs = {}
- for key in sgmllib.SGMLParser.entitydefs.keys():
- entitydefs[key] = sgmllib.SGMLParser.entitydefs[key]
- entitydefs['bullet'] = '*'
-
- # Provided -- handlers for tags introducing literal text
-
- def start_listing(self, attrs):
- self.setliteral('listing')
- self.literal_bgn('listing', attrs)
-
- def end_listing(self):
- self.literal_end('listing')
-
- def start_xmp(self, attrs):
- self.setliteral('xmp')
- self.literal_bgn('xmp', attrs)
-
- def end_xmp(self):
- self.literal_end('xmp')
-
- def do_plaintext(self, attrs):
- self.setnomoretags()
- self.literal_bgn('plaintext', attrs)
-
- # To be overridden -- begin/end literal mode
- def literal_bgn(self, tag, attrs): pass
- def literal_end(self, tag): pass
-
-
-# Next level of sophistication -- collect anchors, title, nextid and isindex
-class CollectingParser(HTMLParser):
- #
- def __init__(self):
- HTMLParser.__init__(self)
- self.savetext = None
- self.nextid = ''
- self.isindex = 0
- self.title = ''
- self.inanchor = 0
- self.anchors = []
- self.anchornames = []
- self.anchortypes = []
- #
- def start_a(self, attrs):
- self.inanchor = 0
- href = ''
- name = ''
- type = ''
- for attrname, value in attrs:
- if attrname == 'href':
- href = value
- if attrname == 'name=':
- name = value
- if attrname == 'type=':
- type = string.lower(value)
- if not (href or name):
- return
- self.anchors.append(href)
- self.anchornames.append(name)
- self.anchortypes.append(type)
- self.inanchor = len(self.anchors)
- if not href:
- self.inanchor = -self.inanchor
- #
- def end_a(self):
- if self.inanchor > 0:
- # Don't show anchors pointing into the current document
- if self.anchors[self.inanchor-1][:1] <> '#':
- self.handle_data('[' + `self.inanchor` + ']')
- self.inanchor = 0
- #
- def start_header(self, attrs): pass
- def end_header(self): pass
- #
- # (head is the same as header)
- def start_head(self, attrs): pass
- def end_head(self): pass
- #
- def start_body(self, attrs): pass
- def end_body(self): pass
- #
- def do_nextid(self, attrs):
- self.nextid = attrs
- #
- def do_isindex(self, attrs):
- self.isindex = 1
- #
- def start_title(self, attrs):
- self.savetext = ''
- #
- def end_title(self):
- if self.savetext <> None:
- self.title = self.savetext
- self.savetext = None
- #
- def handle_data(self, text):
- if self.savetext is not None:
- self.savetext = self.savetext + text
-
-
-# Formatting parser -- takes a formatter and a style sheet as arguments
-
-# XXX The use of style sheets should change: for each tag and end tag
-# there should be a style definition, and a style definition should
-# encompass many more parameters: font, justification, indentation,
-# vspace before, vspace after, hanging tag...
-
-wordprog = regex.compile('[^ \t\n]*')
-spaceprog = regex.compile('[ \t\n]*')
-
-class FormattingParser(CollectingParser):
-
- def __init__(self, formatter, stylesheet):
- CollectingParser.__init__(self)
- self.fmt = formatter
- self.stl = stylesheet
- self.savetext = None
- self.compact = 0
- self.nofill = 0
- self.resetfont()
- self.setindent(self.stl.stdindent)
-
- def resetfont(self):
- self.fontstack = []
- self.stylestack = []
- self.fontset = self.stl.stdfontset
- self.style = ROMAN
- self.passfont()
-
- def passfont(self):
- font = self.fontset[self.style]
- self.fmt.setfont(font)
-
- def pushstyle(self, style):
- self.stylestack.append(self.style)
- self.style = min(style, len(self.fontset)-1)
- self.passfont()
-
- def popstyle(self):
- self.style = self.stylestack[-1]
- del self.stylestack[-1]
- self.passfont()
-
- def pushfontset(self, fontset, style):
- self.fontstack.append(self.fontset)
- self.fontset = fontset
- self.pushstyle(style)
-
- def popfontset(self):
- self.fontset = self.fontstack[-1]
- del self.fontstack[-1]
- self.popstyle()
-
- def flush(self):
- self.fmt.flush()
-
- def setindent(self, n):
- self.fmt.setleftindent(n)
-
- def needvspace(self, n):
- self.fmt.needvspace(n)
-
- def close(self):
- HTMLParser.close(self)
- self.fmt.flush()
-
- def handle_literal(self, text):
- lines = string.splitfields(text, '\n')
- for i in range(1, len(lines)):
- lines[i] = string.expandtabs(lines[i], 8)
- for line in lines[:-1]:
- self.fmt.addword(line, 0)
- self.fmt.flush()
- self.fmt.nospace = 0
- for line in lines[-1:]:
- self.fmt.addword(line, 0)
-
- def handle_data(self, text):
- if self.savetext is not None:
- self.savetext = self.savetext + text
- return
- if self.literal:
- self.handle_literal(text)
- return
- i = 0
- n = len(text)
- while i < n:
- j = i + wordprog.match(text, i)
- word = text[i:j]
- i = j + spaceprog.match(text, j)
- self.fmt.addword(word, i-j)
- if self.nofill and '\n' in text[j:i]:
- self.fmt.flush()
- self.fmt.nospace = 0
- i = j+1
- while text[i-1] <> '\n': i = i+1
-
- def literal_bgn(self, tag, attrs):
- if tag == 'plaintext':
- self.flush()
- else:
- self.needvspace(1)
- self.pushfontset(self.stl.stdfontset, FIXED)
- self.setindent(self.stl.literalindent)
-
- def literal_end(self, tag):
- self.needvspace(1)
- self.popfontset()
- self.setindent(self.stl.stdindent)
-
- def start_title(self, attrs):
- self.flush()
- self.savetext = ''
- # NB end_title is unchanged
-
- def do_p(self, attrs):
- if self.compact:
- self.flush()
- else:
- self.needvspace(1)
-
- def do_hr(self, attrs):
- self.fmt.hrule()
-
- def start_h1(self, attrs):
- self.needvspace(2)
- self.setindent(self.stl.h1indent)
- self.pushfontset(self.stl.h1fontset, BOLD)
- self.fmt.setjust('c')
-
- def end_h1(self):
- self.popfontset()
- self.needvspace(2)
- self.setindent(self.stl.stdindent)
- self.fmt.setjust('l')
-
- def start_h2(self, attrs):
- self.needvspace(1)
- self.setindent(self.stl.h2indent)
- self.pushfontset(self.stl.h2fontset, BOLD)
-
- def end_h2(self):
- self.popfontset()
- self.needvspace(1)
- self.setindent(self.stl.stdindent)
-
- def start_h3(self, attrs):
- self.needvspace(1)
- self.setindent(self.stl.stdindent)
- self.pushfontset(self.stl.h3fontset, BOLD)
-
- def end_h3(self):
- self.popfontset()
- self.needvspace(1)
- self.setindent(self.stl.stdindent)
-
- def start_h4(self, attrs):
- self.needvspace(1)
- self.setindent(self.stl.stdindent)
- self.pushfontset(self.stl.stdfontset, BOLD)
-
- def end_h4(self):
- self.popfontset()
- self.needvspace(1)
- self.setindent(self.stl.stdindent)
-
- start_h5 = start_h4
- end_h5 = end_h4
-
- start_h6 = start_h5
- end_h6 = end_h5
-
- start_h7 = start_h6
- end_h7 = end_h6
-
- def start_ul(self, attrs):
- self.needvspace(1)
- for attrname, value in attrs:
- if attrname == 'compact':
- self.compact = 1
- self.setindent(0)
- break
- else:
- self.setindent(self.stl.ulindent)
-
- start_dir = start_menu = start_ol = start_ul
-
- do_li = do_p
-
- def end_ul(self):
- self.compact = 0
- self.needvspace(1)
- self.setindent(self.stl.stdindent)
-
- end_dir = end_menu = end_ol = end_ul
-
- def start_dl(self, attrs):
- for attrname, value in attrs:
- if attrname == 'compact':
- self.compact = 1
- self.needvspace(1)
-
- def end_dl(self):
- self.compact = 0
- self.needvspace(1)
- self.setindent(self.stl.stdindent)
-
- def do_dt(self, attrs):
- if self.compact:
- self.flush()
- else:
- self.needvspace(1)
- self.setindent(self.stl.stdindent)
-
- def do_dd(self, attrs):
- self.fmt.addword('', 1)
- self.setindent(self.stl.ddindent)
-
- def start_address(self, attrs):
- self.compact = 1
- self.needvspace(1)
- self.fmt.setjust('r')
-
- def end_address(self):
- self.compact = 0
- self.needvspace(1)
- self.setindent(self.stl.stdindent)
- self.fmt.setjust('l')
-
- def start_pre(self, attrs):
- self.needvspace(1)
- self.nofill = self.nofill + 1
- self.pushstyle(FIXED)
-
- def end_pre(self):
- self.popstyle()
- self.nofill = self.nofill - 1
- self.needvspace(1)
-
- start_typewriter = start_pre
- end_typewriter = end_pre
-
- def do_img(self, attrs):
- self.fmt.addword('(image)', 0)
-
- # Physical styles
-
- def start_tt(self, attrs): self.pushstyle(FIXED)
- def end_tt(self): self.popstyle()
-
- def start_b(self, attrs): self.pushstyle(BOLD)
- def end_b(self): self.popstyle()
-
- def start_i(self, attrs): self.pushstyle(ITALIC)
- def end_i(self): self.popstyle()
-
- def start_u(self, attrs): self.pushstyle(ITALIC) # Underline???
- def end_u(self): self.popstyle()
-
- def start_r(self, attrs): self.pushstyle(ROMAN) # Not official
- def end_r(self): self.popstyle()
-
- # Logical styles
-
- start_em = start_i
- end_em = end_i
-
- start_strong = start_b
- end_strong = end_b
-
- start_code = start_tt
- end_code = end_tt
-
- start_samp = start_tt
- end_samp = end_tt
-
- start_kbd = start_tt
- end_kbd = end_tt
-
- start_file = start_tt # unofficial
- end_file = end_tt
-
- start_var = start_i
- end_var = end_i
-
- start_dfn = start_i
- end_dfn = end_i
-
- start_cite = start_i
- end_cite = end_i
-
- start_hp1 = start_i
- end_hp1 = start_i
-
- start_hp2 = start_b
- end_hp2 = end_b
-
- def unknown_starttag(self, tag, attrs):
- print '*** unknown <' + tag + '>'
-
- def unknown_endtag(self, tag):
- print '*** unknown </' + tag + '>'
-
-
-# An extension of the formatting parser which formats anchors differently.
-class AnchoringParser(FormattingParser):
-
- def start_a(self, attrs):
- FormattingParser.start_a(self, attrs)
- if self.inanchor:
- self.fmt.bgn_anchor(self.inanchor)
-
- def end_a(self):
- if self.inanchor:
- self.fmt.end_anchor(self.inanchor)
- self.inanchor = 0
-
-
-# Style sheet -- this is never instantiated, but the attributes
-# of the class object itself are used to specify fonts to be used
-# for various paragraph styles.
-# A font set is a non-empty list of fonts, in the order:
-# [roman, italic, bold, fixed].
-# When a style is not available the nearest lower style is used
-
-ROMAN = 0
-ITALIC = 1
-BOLD = 2
-FIXED = 3
-
-class NullStylesheet:
- # Fonts -- none
- stdfontset = [None]
- h1fontset = [None]
- h2fontset = [None]
- h3fontset = [None]
- # Indents
- stdindent = 2
- ddindent = 25
- ulindent = 4
- h1indent = 0
- h2indent = 0
- literalindent = 0
-
-
-class X11Stylesheet(NullStylesheet):
- stdfontset = [ \
- '-*-helvetica-medium-r-normal-*-*-100-100-*-*-*-*-*', \
- '-*-helvetica-medium-o-normal-*-*-100-100-*-*-*-*-*', \
- '-*-helvetica-bold-r-normal-*-*-100-100-*-*-*-*-*', \
- '-*-courier-medium-r-normal-*-*-100-100-*-*-*-*-*', \
- ]
- h1fontset = [ \
- '-*-helvetica-medium-r-normal-*-*-180-100-*-*-*-*-*', \
- '-*-helvetica-medium-o-normal-*-*-180-100-*-*-*-*-*', \
- '-*-helvetica-bold-r-normal-*-*-180-100-*-*-*-*-*', \
- ]
- h2fontset = [ \
- '-*-helvetica-medium-r-normal-*-*-140-100-*-*-*-*-*', \
- '-*-helvetica-medium-o-normal-*-*-140-100-*-*-*-*-*', \
- '-*-helvetica-bold-r-normal-*-*-140-100-*-*-*-*-*', \
- ]
- h3fontset = [ \
- '-*-helvetica-medium-r-normal-*-*-120-100-*-*-*-*-*', \
- '-*-helvetica-medium-o-normal-*-*-120-100-*-*-*-*-*', \
- '-*-helvetica-bold-r-normal-*-*-120-100-*-*-*-*-*', \
- ]
- ddindent = 40
-
-
-class MacStylesheet(NullStylesheet):
- stdfontset = [ \
- ('Geneva', 'p', 10), \
- ('Geneva', 'i', 10), \
- ('Geneva', 'b', 10), \
- ('Monaco', 'p', 10), \
- ]
- h1fontset = [ \
- ('Geneva', 'p', 18), \
- ('Geneva', 'i', 18), \
- ('Geneva', 'b', 18), \
- ('Monaco', 'p', 18), \
- ]
- h3fontset = [ \
- ('Geneva', 'p', 14), \
- ('Geneva', 'i', 14), \
- ('Geneva', 'b', 14), \
- ('Monaco', 'p', 14), \
- ]
- h3fontset = [ \
- ('Geneva', 'p', 12), \
- ('Geneva', 'i', 12), \
- ('Geneva', 'b', 12), \
- ('Monaco', 'p', 12), \
- ]
-
-
-if os.name == 'mac':
- StdwinStylesheet = MacStylesheet
-else:
- StdwinStylesheet = X11Stylesheet
-
-
-class GLStylesheet(NullStylesheet):
- stdfontset = [ \
- 'Helvetica 10', \
- 'Helvetica-Italic 10', \
- 'Helvetica-Bold 10', \
- 'Courier 10', \
- ]
- h1fontset = [ \
- 'Helvetica 18', \
- 'Helvetica-Italic 18', \
- 'Helvetica-Bold 18', \
- 'Courier 18', \
- ]
- h2fontset = [ \
- 'Helvetica 14', \
- 'Helvetica-Italic 14', \
- 'Helvetica-Bold 14', \
- 'Courier 14', \
- ]
- h3fontset = [ \
- 'Helvetica 12', \
- 'Helvetica-Italic 12', \
- 'Helvetica-Bold 12', \
- 'Courier 12', \
- ]
-
-
-# Test program -- produces no output but times how long it takes
-# to send a document to a null formatter, exclusive of I/O
-
-def test():
- import fmt
- import time
- import urllib
- if sys.argv[1:]: file = sys.argv[1]
- else: file = 'test.html'
- data = urllib.urlopen(file).read()
- t0 = time.time()
- fmtr = fmt.WritingFormatter(sys.stdout, 79)
- p = FormattingParser(fmtr, NullStylesheet)
- p.feed(data)
- p.close()
- t1 = time.time()
- print
- print '*** Formatting time:', round(t1-t0, 3), 'seconds.'
-
-
-# Test program using stdwin
-
-def testStdwin():
- import stdwin, fmt
- from stdwinevents import *
- if sys.argv[1:]: file = sys.argv[1]
- else: file = 'test.html'
- data = open(file, 'r').read()
- window = stdwin.open('testStdwin')
- b = None
- while 1:
- etype, ewin, edetail = stdwin.getevent()
- if etype == WE_CLOSE:
- break
- if etype == WE_SIZE:
- window.setdocsize(0, 0)
- window.setorigin(0, 0)
- window.change((0, 0), (10000, 30000)) # XXX
- if etype == WE_DRAW:
- if not b:
- b = fmt.StdwinBackEnd(window, 1)
- f = fmt.BaseFormatter(b.d, b)
- p = FormattingParser(f, \
- MacStylesheet)
- p.feed(data)
- p.close()
- b.finish()
- else:
- b.redraw(edetail)
- window.close()
-
-
-# Test program using GL
-
-def testGL():
- import gl, GL, fmt
- if sys.argv[1:]: file = sys.argv[1]
- else: file = 'test.html'
- data = open(file, 'r').read()
- W, H = 600, 600
- gl.foreground()
- gl.prefsize(W, H)
- wid = gl.winopen('testGL')
- gl.ortho2(0, W, H, 0)
- gl.color(GL.WHITE)
- gl.clear()
- gl.color(GL.BLACK)
- b = fmt.GLBackEnd(wid)
- f = fmt.BaseFormatter(b.d, b)
- p = FormattingParser(f, GLStylesheet)
- p.feed(data)
- p.close()
- b.finish()
- #
- import time
- time.sleep(5)
-
-
-if __name__ == '__main__':
- test()