diff options
author | Georg Brandl <georg@python.org> | 2006-04-01 08:35:18 (GMT) |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2006-04-01 08:35:18 (GMT) |
commit | 7f6b67c2359d9b52b2eabc1e2ccff4cedb5c78b7 (patch) | |
tree | 85426998f3d08e048d4b0211361b7140b04bc414 /Lib/sgmllib.py | |
parent | 48d5e508ebc136f8f67a2cb2bdd29e55324f5a95 (diff) | |
download | cpython-7f6b67c2359d9b52b2eabc1e2ccff4cedb5c78b7.zip cpython-7f6b67c2359d9b52b2eabc1e2ccff4cedb5c78b7.tar.gz cpython-7f6b67c2359d9b52b2eabc1e2ccff4cedb5c78b7.tar.bz2 |
patch #1462498: handle entityrefs in attribute values.
Diffstat (limited to 'Lib/sgmllib.py')
-rw-r--r-- | Lib/sgmllib.py | 34 |
1 files changed, 31 insertions, 3 deletions
diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py index 08e365b..784dbe1 100644 --- a/Lib/sgmllib.py +++ b/Lib/sgmllib.py @@ -269,9 +269,37 @@ class SGMLParser(markupbase.ParserBase): attrname, rest, attrvalue = match.group(1, 2, 3) if not rest: attrvalue = attrname - elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ - attrvalue[:1] == '"' == attrvalue[-1:]: - attrvalue = attrvalue[1:-1] + else: + if (attrvalue[:1] == "'" == attrvalue[-1:] or + attrvalue[:1] == '"' == attrvalue[-1:]): + # strip quotes + attrvalue = attrvalue[1:-1] + l = 0 + new_attrvalue = '' + while l < len(attrvalue): + av_match = entityref.match(attrvalue, l) + if (av_match and av_match.group(1) in self.entitydefs and + attrvalue[av_match.end(1)] == ';'): + # only substitute entityrefs ending in ';' since + # otherwise we may break <a href='?p=x&q=y'> + # which is very common + new_attrvalue += self.entitydefs[av_match.group(1)] + l = av_match.end(0) + continue + ch_match = charref.match(attrvalue, l) + if ch_match: + try: + char = chr(int(ch_match.group(1))) + new_attrvalue += char + l = ch_match.end(0) + continue + except ValueError: + # invalid character reference, don't substitute + pass + # all other cases + new_attrvalue += attrvalue[l] + l += 1 + attrvalue = new_attrvalue attrs.append((attrname.lower(), attrvalue)) k = match.end(0) if rawdata[j] == '>': |