diff options
author | Guido van Rossum <guido@python.org> | 2001-05-21 20:17:17 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2001-05-21 20:17:17 (GMT) |
commit | 39d345127e7cdf09024420596136b0b785239199 (patch) | |
tree | cd3b5d46979d2f8b662486edad69d2b3d40f0cea /Lib/sgmllib.py | |
parent | 2b63969a5adbc43a3843102f95b45424da229745 (diff) | |
download | cpython-39d345127e7cdf09024420596136b0b785239199.zip cpython-39d345127e7cdf09024420596136b0b785239199.tar.gz cpython-39d345127e7cdf09024420596136b0b785239199.tar.bz2 |
parse_declaration(): be more lenient in what we accept. We now
basically accept <!...> where the dots can be single- or double-quoted
strings or any other character except >.
Background: I found a real-life example that failed to parse with
the old assumption: http://www.opensource.org/licenses/jabberpl.html
contains a few constructs of the form <![if !supportLists]>...<![endif]>.
Diffstat (limited to 'Lib/sgmllib.py')
-rw-r--r-- | Lib/sgmllib.py | 19 |
1 files changed, 7 insertions, 12 deletions
diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py index 5388c07..a471c05 100644 --- a/Lib/sgmllib.py +++ b/Lib/sgmllib.py @@ -39,7 +39,7 @@ attrfind = re.compile( r'\s*([a-zA-Z_][-.a-zA-Z_0-9]*)(\s*=\s*' r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:;+*%?!&$\(\)_#=~]*))?') -declname = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*') +decldata = re.compile(r'[^>\'\"]+') declstringlit = re.compile(r'(\'[^\']*\'|"[^"]*")\s*') @@ -212,8 +212,8 @@ class SGMLParser: def parse_declaration(self, i): rawdata = self.rawdata j = i + 2 - # in practice, this should look like: ((name|stringlit) S*)+ '>' - while 1: + n = len(rawdata) + while j < n: c = rawdata[j:j+1] if c == ">": # end of declaration syntax @@ -225,19 +225,14 @@ class SGMLParser: # incomplete or an error? return -1 j = m.end() - elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ": - m = declname.match(rawdata, j) + else: + m = decldata.match(rawdata, j) if not m: # incomplete or an error? return -1 j = m.end() - elif i == len(rawdata): - # end of buffer between tokens - return -1 - else: - raise SGMLParseError( - "unexpected char in declaration: %s" % `rawdata[i]`) - assert 0, "can't get here!" + # end of buffer between tokens + return -1 # Internal -- parse processing instr, return length or -1 if not terminated def parse_pi(self, i): |