changed comment parsing

author: Guido van Rossum <guido@python.org> 1995-08-04 04:22:39 (GMT)
committer: Guido van Rossum <guido@python.org> 1995-08-04 04:22:39 (GMT)
commit: 145b2e0168ddd865e476b498705ea84d8c7b82b1 (patch)
tree: 9ff541f8b73e53aa82d1668ef54d324013804a2b /Lib/sgmllib.py
parent: 667d704997f26a1a22f4e981bbb3c2f8399cfc41 (diff)
download: cpython-145b2e0168ddd865e476b498705ea84d8c7b82b1.zip
cpython-145b2e0168ddd865e476b498705ea84d8c7b82b1.tar.gz
cpython-145b2e0168ddd865e476b498705ea84d8c7b82b1.tar.bz2
1 files changed, 14 insertions, 13 deletions
diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py
index 2c92c31..695530a 100644
--- a/Lib/sgmllib.py
+++ b/Lib/sgmllib.py
@@ -21,7 +21,9 @@ entityref = regex.compile('&[a-zA-Z][a-zA-Z0-9]*[;.]')
 charref = regex.compile('&#[a-zA-Z0-9]+;')
 starttagopen = regex.compile('<[a-zA-Z]')
 endtag = regex.compile('</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*>')
+special = regex.compile('<![^<>]*>')
 commentopen = regex.compile('<!--')
+commentclose = regex.compile('--[ \t\n]*>')
 
 
 # SGML parser base class -- find tags and call handler functions.
@@ -111,6 +113,14 @@ class SGMLParser:
 					if k < 0: break
 					i = i+k
 					continue
+				k = special.match(rawdata, i)
+				if k >= 0:
+					if self.literal:
+						self.handle_data(rawdata[i])
+						i = i+1
+						continue
+					i = i+k
+					continue
 			elif rawdata[i] == '&':
 				k = charref.match(rawdata, i)
 				if k >= 0:
@@ -141,25 +151,16 @@ class SGMLParser:
 		self.rawdata = rawdata[i:]
 		# XXX if end: check for empty stack
 
-	# Internal -- parse comment, return length or -1 if not ternimated
+	# Internal -- parse comment, return length or -1 if not terminated
 	def parse_comment(self, i):
 		rawdata = self.rawdata
 		if rawdata[i:i+4] <> '<!--':
 			raise RuntimeError, 'unexpected call to handle_comment'
-		try:
-			j = string.index(rawdata, '--', i+4)
-		except string.index_error:
+		j = commentclose.search(rawdata, i+4)
+		if j < 0:
 			return -1
 		self.handle_comment(rawdata[i+4: j])
-		j = j+2
-		n = len(rawdata)
-		while j < n and rawdata[j] in ' \t\n': j = j+1
-		if j == n: return -1 # Wait for final '>'
-		if rawdata[j] == '>':
-			j = j+1
-		else:
-			print '*** comment not terminated with >'
-			print repr(rawdata[j-5:j]), '*!*', repr(rawdata[j:j+5])
+		j = j+commentclose.match(rawdata, j)
 		return j-i
 
 	# Internal -- handle starttag, return length or -1 if not terminated
author	Guido van Rossum <guido@python.org>	1995-08-04 04:22:39 (GMT)
committer	Guido van Rossum <guido@python.org>	1995-08-04 04:22:39 (GMT)
commit	145b2e0168ddd865e476b498705ea84d8c7b82b1 (patch)
tree	9ff541f8b73e53aa82d1668ef54d324013804a2b /Lib/sgmllib.py
parent	667d704997f26a1a22f4e981bbb3c2f8399cfc41 (diff)
download	cpython-145b2e0168ddd865e476b498705ea84d8c7b82b1.zip cpython-145b2e0168ddd865e476b498705ea84d8c7b82b1.tar.gz cpython-145b2e0168ddd865e476b498705ea84d8c7b82b1.tar.bz2