summaryrefslogtreecommitdiffstats
path: root/Demo/tkinter/www/sgmllib.py
diff options
context:
space:
mode:
Diffstat (limited to 'Demo/tkinter/www/sgmllib.py')
-rwxr-xr-xDemo/tkinter/www/sgmllib.py321
1 files changed, 0 insertions, 321 deletions
diff --git a/Demo/tkinter/www/sgmllib.py b/Demo/tkinter/www/sgmllib.py
deleted file mode 100755
index af75e0d..0000000
--- a/Demo/tkinter/www/sgmllib.py
+++ /dev/null
@@ -1,321 +0,0 @@
-# A parser for SGML, using the derived class as static DTD.
-
-# XXX This only supports those SGML features used by HTML.
-
-# XXX There should be a way to distinguish between PCDATA (parsed
-# character data -- the normal case), RCDATA (replaceable character
-# data -- only char and entity references and end tags are special)
-# and CDATA (character data -- only end tags are special).
-
-
-import regex
-import string
-
-
-# Regular expressions used for parsing
-
-incomplete = regex.compile( \
- '<!-?\|</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*\|</?\|' + \
- '&#[a-zA-Z0-9]*\|&[a-zA-Z][a-zA-Z0-9]*\|&')
-entityref = regex.compile('&[a-zA-Z][a-zA-Z0-9]*[;.]')
-charref = regex.compile('&#[a-zA-Z0-9]+;')
-starttagopen = regex.compile('<[a-zA-Z]')
-endtag = regex.compile('</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*>')
-commentopen = regex.compile('<!--')
-
-
-# SGML parser base class -- find tags and call handler functions.
-# Usage: p = SGMLParser(); p.feed(data); ...; p.close().
-# The dtd is defined by deriving a class which defines methods
-# with special names to handle tags: start_foo and end_foo to handle
-# <foo> and </foo>, respectively, or do_foo to handle <foo> by itself.
-# (Tags are converted to lower case for this purpose.) The data
-# between tags is passed to the parser by calling self.handle_data()
-# with some data as argument (the data may be split up in arbutrary
-# chunks). Entity references are passed by calling
-# self.handle_entityref() with the entity reference as argument.
-
-class SGMLParser:
-
- # Interface -- initialize and reset this instance
- def __init__(self):
- self.reset()
-
- # Interface -- reset this instance. Loses all unprocessed data
- def reset(self):
- self.rawdata = ''
- self.stack = []
- self.nomoretags = 0
- self.literal = 0
-
- # For derived classes only -- enter literal mode (CDATA) till EOF
- def setnomoretags(self):
- self.nomoretags = self.literal = 1
-
- # For derived classes only -- enter literal mode (CDATA)
- def setliteral(self, *args):
- self.literal = 1
-
- # Interface -- feed some data to the parser. Call this as
- # often as you want, with as little or as much text as you
- # want (may include '\n'). (This just saves the text, all the
- # processing is done by process() or close().)
- def feed(self, data):
- self.rawdata = self.rawdata + data
- self.goahead(0)
-
- # Interface -- handle the remaining data
- def close(self):
- self.goahead(1)
-
- # Internal -- handle data as far as reasonable. May leave state
- # and data to be processed by a subsequent call. If 'end' is
- # true, force handling all data as if followed by EOF marker.
- def goahead(self, end):
- rawdata = self.rawdata
- i = 0
- n = len(rawdata)
- while i < n:
- if self.nomoretags:
- self.handle_data(rawdata[i:n])
- i = n
- break
- j = incomplete.search(rawdata, i)
- if j < 0: j = n
- if i < j: self.handle_data(rawdata[i:j])
- i = j
- if i == n: break
- if rawdata[i] == '<':
- if starttagopen.match(rawdata, i) >= 0:
- if self.literal:
- self.handle_data(rawdata[i])
- i = i+1
- continue
- k = self.parse_starttag(i)
- if k < 0: break
- i = i + k
- continue
- k = endtag.match(rawdata, i)
- if k >= 0:
- j = i+k
- self.parse_endtag(rawdata[i:j])
- i = j
- self.literal = 0
- continue
- if commentopen.match(rawdata, i) >= 0:
- if self.literal:
- self.handle_data(rawdata[i])
- i = i+1
- continue
- k = self.parse_comment(i)
- if k < 0: break
- i = i+k
- continue
- elif rawdata[i] == '&':
- k = charref.match(rawdata, i)
- if k >= 0:
- j = i+k
- self.handle_charref(rawdata[i+2:j-1])
- i = j
- continue
- k = entityref.match(rawdata, i)
- if k >= 0:
- j = i+k
- self.handle_entityref(rawdata[i+1:j-1])
- i = j
- continue
- else:
- raise RuntimeError, 'neither < nor & ??'
- # We get here only if incomplete matches but
- # nothing else
- k = incomplete.match(rawdata, i)
- if k < 0: raise RuntimeError, 'no incomplete match ??'
- j = i+k
- if j == n: break # Really incomplete
- self.handle_data(rawdata[i:j])
- i = j
- # end while
- if end and i < n:
- self.handle_data(rawdata[i:n])
- i = n
- self.rawdata = rawdata[i:]
- # XXX if end: check for empty stack
-
- # Internal -- parse comment, return length or -1 if not ternimated
- def parse_comment(self, i):
- rawdata = self.rawdata
- if rawdata[i:i+4] <> '<!--':
- raise RuntimeError, 'unexpected call to handle_comment'
- try:
- j = string.index(rawdata, '--', i+4)
- except string.index_error:
- return -1
- self.handle_comment(rawdata[i+4: j])
- j = j+2
- n = len(rawdata)
- while j < n and rawdata[j] in ' \t\n': j = j+1
- if j == n: return -1 # Wait for final '>'
- if rawdata[j] == '>':
- j = j+1
- else:
- print '*** comment not terminated with >'
- print repr(rawdata[j-5:j]), '*!*', repr(rawdata[j:j+5])
- return j-i
-
- # Internal -- handle starttag, return length or -1 if not terminated
- def parse_starttag(self, i):
- rawdata = self.rawdata
- try:
- j = string.index(rawdata, '>', i)
- except string.index_error:
- return -1
- # Now parse the data between i+1 and j into a tag and attrs
- attrs = []
- tagfind = regex.compile('[a-zA-Z][a-zA-Z0-9]*')
- attrfind = regex.compile( \
- '[ \t\n]+\([a-zA-Z][a-zA-Z0-9]*\)' + \
- '\([ \t\n]*=[ \t\n]*' + \
- '\(\'[^\']*\';\|"[^"]*"\|[-a-zA-Z0-9./:+*%?!()_#]+\)\)?')
- k = tagfind.match(rawdata, i+1)
- if k < 0:
- raise RuntimeError, 'unexpected call to parse_starttag'
- k = i+1+k
- tag = string.lower(rawdata[i+1:k])
- while k < j:
- l = attrfind.match(rawdata, k)
- if l < 0: break
- regs = attrfind.regs
- a1, b1 = regs[1]
- a2, b2 = regs[2]
- a3, b3 = regs[3]
- attrname = rawdata[a1:b1]
- if '=' in rawdata[k:k+l]:
- attrvalue = rawdata[a3:b3]
- if attrvalue[:1] == '\'' == attrvalue[-1:] or \
- attrvalue[:1] == '"' == attrvalue[-1:]:
- attrvalue = attrvalue[1:-1]
- else:
- attrvalue = ''
- attrs.append(string.lower(attrname), attrvalue)
- k = k + l
- j = j+1
- try:
- method = getattr(self, 'start_' + tag)
- except AttributeError:
- try:
- method = getattr(self, 'do_' + tag)
- except AttributeError:
- self.unknown_starttag(tag, attrs)
- return j-i
- method(attrs)
- return j-i
- self.stack.append(tag)
- method(attrs)
- return j-i
-
- # Internal -- parse endtag
- def parse_endtag(self, data):
- if data[:2] <> '</' or data[-1:] <> '>':
- raise RuntimeError, 'unexpected call to parse_endtag'
- tag = string.lower(string.strip(data[2:-1]))
- try:
- method = getattr(self, 'end_' + tag)
- except AttributeError:
- self.unknown_endtag(tag)
- return
- if self.stack and self.stack[-1] == tag:
- del self.stack[-1]
- else:
- print '*** Unbalanced </' + tag + '>'
- print '*** Stack:', self.stack
- found = None
- for i in range(len(self.stack)):
- if self.stack[i] == tag: found = i
- if found <> None:
- del self.stack[found:]
- method()
-
- # Example -- handle character reference, no need to override
- def handle_charref(self, name):
- try:
- n = string.atoi(name)
- except string.atoi_error:
- self.unknown_charref(name)
- return
- if not 0 <= n <= 255:
- self.unknown_charref(name)
- return
- self.handle_data(chr(n))
-
- # Definition of entities -- derived classes may override
- entitydefs = \
- {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
-
- # Example -- handle entity reference, no need to override
- def handle_entityref(self, name):
- table = self.__class__.entitydefs
- name = string.lower(name)
- if table.has_key(name):
- self.handle_data(table[name])
- else:
- self.unknown_entityref(name)
- return
-
- # Example -- handle data, should be overridden
- def handle_data(self, data):
- pass
-
- # Example -- handle comment, could be overridden
- def handle_comment(self, data):
- pass
-
- # To be overridden -- handlers for unknown objects
- def unknown_starttag(self, tag, attrs): pass
- def unknown_endtag(self, tag): pass
- def unknown_charref(self, ref): pass
- def unknown_entityref(self, ref): pass
-
-
-class TestSGML(SGMLParser):
-
- def handle_data(self, data):
- r = repr(data)
- if len(r) > 72:
- r = r[:35] + '...' + r[-35:]
- print 'data:', r
-
- def handle_comment(self, data):
- r = repr(data)
- if len(r) > 68:
- r = r[:32] + '...' + r[-32:]
- print 'comment:', r
-
- def unknown_starttag(self, tag, attrs):
- print 'start tag: <' + tag,
- for name, value in attrs:
- print name + '=' + '"' + value + '"',
- print '>'
-
- def unknown_endtag(self, tag):
- print 'end tag: </' + tag + '>'
-
- def unknown_entityref(self, ref):
- print '*** unknown entity ref: &' + ref + ';'
-
- def unknown_charref(self, ref):
- print '*** unknown char ref: &#' + ref + ';'
-
-
-def test():
- file = 'test.html'
- f = open(file, 'r')
- x = TestSGML()
- while 1:
- line = f.readline()
- if not line:
- x.close()
- break
- x.feed(line)
-
-
-#test()