summaryrefslogtreecommitdiffstats
path: root/Lib/sgmllib.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2001-05-21 20:17:17 (GMT)
committerGuido van Rossum <guido@python.org>2001-05-21 20:17:17 (GMT)
commit39d345127e7cdf09024420596136b0b785239199 (patch)
treecd3b5d46979d2f8b662486edad69d2b3d40f0cea /Lib/sgmllib.py
parent2b63969a5adbc43a3843102f95b45424da229745 (diff)
downloadcpython-39d345127e7cdf09024420596136b0b785239199.zip
cpython-39d345127e7cdf09024420596136b0b785239199.tar.gz
cpython-39d345127e7cdf09024420596136b0b785239199.tar.bz2
parse_declaration(): be more lenient in what we accept. We now
basically accept <!...> where the dots can be single- or double-quoted strings or any other character except >. Background: I found a real-life example that failed to parse with the old assumption: http://www.opensource.org/licenses/jabberpl.html contains a few constructs of the form <![if !supportLists]>...<![endif]>.
Diffstat (limited to 'Lib/sgmllib.py')
-rw-r--r--Lib/sgmllib.py19
1 files changed, 7 insertions, 12 deletions
diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py
index 5388c07..a471c05 100644
--- a/Lib/sgmllib.py
+++ b/Lib/sgmllib.py
@@ -39,7 +39,7 @@ attrfind = re.compile(
r'\s*([a-zA-Z_][-.a-zA-Z_0-9]*)(\s*=\s*'
r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:;+*%?!&$\(\)_#=~]*))?')
-declname = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*')
+decldata = re.compile(r'[^>\'\"]+')
declstringlit = re.compile(r'(\'[^\']*\'|"[^"]*")\s*')
@@ -212,8 +212,8 @@ class SGMLParser:
def parse_declaration(self, i):
rawdata = self.rawdata
j = i + 2
- # in practice, this should look like: ((name|stringlit) S*)+ '>'
- while 1:
+ n = len(rawdata)
+ while j < n:
c = rawdata[j:j+1]
if c == ">":
# end of declaration syntax
@@ -225,19 +225,14 @@ class SGMLParser:
# incomplete or an error?
return -1
j = m.end()
- elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
- m = declname.match(rawdata, j)
+ else:
+ m = decldata.match(rawdata, j)
if not m:
# incomplete or an error?
return -1
j = m.end()
- elif i == len(rawdata):
- # end of buffer between tokens
- return -1
- else:
- raise SGMLParseError(
- "unexpected char in declaration: %s" % `rawdata[i]`)
- assert 0, "can't get here!"
+ # end of buffer between tokens
+ return -1
# Internal -- parse processing instr, return length or -1 if not terminated
def parse_pi(self, i):