summaryrefslogtreecommitdiffstats
path: root/Lib/sgmllib.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1995-08-04 04:22:39 (GMT)
committerGuido van Rossum <guido@python.org>1995-08-04 04:22:39 (GMT)
commit145b2e0168ddd865e476b498705ea84d8c7b82b1 (patch)
tree9ff541f8b73e53aa82d1668ef54d324013804a2b /Lib/sgmllib.py
parent667d704997f26a1a22f4e981bbb3c2f8399cfc41 (diff)
downloadcpython-145b2e0168ddd865e476b498705ea84d8c7b82b1.zip
cpython-145b2e0168ddd865e476b498705ea84d8c7b82b1.tar.gz
cpython-145b2e0168ddd865e476b498705ea84d8c7b82b1.tar.bz2
changed comment parsing
Diffstat (limited to 'Lib/sgmllib.py')
-rw-r--r--Lib/sgmllib.py27
1 files changed, 14 insertions, 13 deletions
diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py
index 2c92c31..695530a 100644
--- a/Lib/sgmllib.py
+++ b/Lib/sgmllib.py
@@ -21,7 +21,9 @@ entityref = regex.compile('&[a-zA-Z][a-zA-Z0-9]*[;.]')
charref = regex.compile('&#[a-zA-Z0-9]+;')
starttagopen = regex.compile('<[a-zA-Z]')
endtag = regex.compile('</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*>')
+special = regex.compile('<![^<>]*>')
commentopen = regex.compile('<!--')
+commentclose = regex.compile('--[ \t\n]*>')
# SGML parser base class -- find tags and call handler functions.
@@ -111,6 +113,14 @@ class SGMLParser:
if k < 0: break
i = i+k
continue
+ k = special.match(rawdata, i)
+ if k >= 0:
+ if self.literal:
+ self.handle_data(rawdata[i])
+ i = i+1
+ continue
+ i = i+k
+ continue
elif rawdata[i] == '&':
k = charref.match(rawdata, i)
if k >= 0:
@@ -141,25 +151,16 @@ class SGMLParser:
self.rawdata = rawdata[i:]
# XXX if end: check for empty stack
- # Internal -- parse comment, return length or -1 if not ternimated
+ # Internal -- parse comment, return length or -1 if not terminated
def parse_comment(self, i):
rawdata = self.rawdata
if rawdata[i:i+4] <> '<!--':
raise RuntimeError, 'unexpected call to handle_comment'
- try:
- j = string.index(rawdata, '--', i+4)
- except string.index_error:
+ j = commentclose.search(rawdata, i+4)
+ if j < 0:
return -1
self.handle_comment(rawdata[i+4: j])
- j = j+2
- n = len(rawdata)
- while j < n and rawdata[j] in ' \t\n': j = j+1
- if j == n: return -1 # Wait for final '>'
- if rawdata[j] == '>':
- j = j+1
- else:
- print '*** comment not terminated with >'
- print repr(rawdata[j-5:j]), '*!*', repr(rawdata[j:j+5])
+ j = j+commentclose.match(rawdata, j)
return j-i
# Internal -- handle starttag, return length or -1 if not terminated