diff options
author | Guido van Rossum <guido@python.org> | 1995-08-04 04:22:39 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1995-08-04 04:22:39 (GMT) |
commit | 145b2e0168ddd865e476b498705ea84d8c7b82b1 (patch) | |
tree | 9ff541f8b73e53aa82d1668ef54d324013804a2b /Lib/sgmllib.py | |
parent | 667d704997f26a1a22f4e981bbb3c2f8399cfc41 (diff) | |
download | cpython-145b2e0168ddd865e476b498705ea84d8c7b82b1.zip cpython-145b2e0168ddd865e476b498705ea84d8c7b82b1.tar.gz cpython-145b2e0168ddd865e476b498705ea84d8c7b82b1.tar.bz2 |
changed comment parsing
Diffstat (limited to 'Lib/sgmllib.py')
-rw-r--r-- | Lib/sgmllib.py | 27 |
1 files changed, 14 insertions, 13 deletions
diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py index 2c92c31..695530a 100644 --- a/Lib/sgmllib.py +++ b/Lib/sgmllib.py @@ -21,7 +21,9 @@ entityref = regex.compile('&[a-zA-Z][a-zA-Z0-9]*[;.]') charref = regex.compile('&#[a-zA-Z0-9]+;') starttagopen = regex.compile('<[a-zA-Z]') endtag = regex.compile('</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*>') +special = regex.compile('<![^<>]*>') commentopen = regex.compile('<!--') +commentclose = regex.compile('--[ \t\n]*>') # SGML parser base class -- find tags and call handler functions. @@ -111,6 +113,14 @@ class SGMLParser: if k < 0: break i = i+k continue + k = special.match(rawdata, i) + if k >= 0: + if self.literal: + self.handle_data(rawdata[i]) + i = i+1 + continue + i = i+k + continue elif rawdata[i] == '&': k = charref.match(rawdata, i) if k >= 0: @@ -141,25 +151,16 @@ class SGMLParser: self.rawdata = rawdata[i:] # XXX if end: check for empty stack - # Internal -- parse comment, return length or -1 if not ternimated + # Internal -- parse comment, return length or -1 if not terminated def parse_comment(self, i): rawdata = self.rawdata if rawdata[i:i+4] <> '<!--': raise RuntimeError, 'unexpected call to handle_comment' - try: - j = string.index(rawdata, '--', i+4) - except string.index_error: + j = commentclose.search(rawdata, i+4) + if j < 0: return -1 self.handle_comment(rawdata[i+4: j]) - j = j+2 - n = len(rawdata) - while j < n and rawdata[j] in ' \t\n': j = j+1 - if j == n: return -1 # Wait for final '>' - if rawdata[j] == '>': - j = j+1 - else: - print '*** comment not terminated with >' - print repr(rawdata[j-5:j]), '*!*', repr(rawdata[j:j+5]) + j = j+commentclose.match(rawdata, j) return j-i # Internal -- handle starttag, return length or -1 if not terminated |