summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2006-04-01 08:35:18 (GMT)
committerGeorg Brandl <georg@python.org>2006-04-01 08:35:18 (GMT)
commit7f6b67c2359d9b52b2eabc1e2ccff4cedb5c78b7 (patch)
tree85426998f3d08e048d4b0211361b7140b04bc414 /Lib
parent48d5e508ebc136f8f67a2cb2bdd29e55324f5a95 (diff)
downloadcpython-7f6b67c2359d9b52b2eabc1e2ccff4cedb5c78b7.zip
cpython-7f6b67c2359d9b52b2eabc1e2ccff4cedb5c78b7.tar.gz
cpython-7f6b67c2359d9b52b2eabc1e2ccff4cedb5c78b7.tar.bz2
patch #1462498: handle entityrefs in attribute values.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/sgmllib.py34
-rw-r--r--Lib/test/test_sgmllib.py14
2 files changed, 45 insertions, 3 deletions
diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py
index 08e365b..784dbe1 100644
--- a/Lib/sgmllib.py
+++ b/Lib/sgmllib.py
@@ -269,9 +269,37 @@ class SGMLParser(markupbase.ParserBase):
attrname, rest, attrvalue = match.group(1, 2, 3)
if not rest:
attrvalue = attrname
- elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
- attrvalue[:1] == '"' == attrvalue[-1:]:
- attrvalue = attrvalue[1:-1]
+ else:
+ if (attrvalue[:1] == "'" == attrvalue[-1:] or
+ attrvalue[:1] == '"' == attrvalue[-1:]):
+ # strip quotes
+ attrvalue = attrvalue[1:-1]
+ l = 0
+ new_attrvalue = ''
+ while l < len(attrvalue):
+ av_match = entityref.match(attrvalue, l)
+ if (av_match and av_match.group(1) in self.entitydefs and
+ attrvalue[av_match.end(1)] == ';'):
+ # only substitute entityrefs ending in ';' since
+ # otherwise we may break <a href='?p=x&q=y'>
+ # which is very common
+ new_attrvalue += self.entitydefs[av_match.group(1)]
+ l = av_match.end(0)
+ continue
+ ch_match = charref.match(attrvalue, l)
+ if ch_match:
+ try:
+ char = chr(int(ch_match.group(1)))
+ new_attrvalue += char
+ l = ch_match.end(0)
+ continue
+ except ValueError:
+ # invalid character reference, don't substitute
+ pass
+ # all other cases
+ new_attrvalue += attrvalue[l]
+ l += 1
+ attrvalue = new_attrvalue
attrs.append((attrname.lower(), attrvalue))
k = match.end(0)
if rawdata[j] == '>':
diff --git a/Lib/test/test_sgmllib.py b/Lib/test/test_sgmllib.py
index bc25bd0..8e8b02f 100644
--- a/Lib/test/test_sgmllib.py
+++ b/Lib/test/test_sgmllib.py
@@ -214,6 +214,20 @@ DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
("starttag", "e", [("a", "rgb(1,2,3)")]),
])
+ def test_attr_values_entities(self):
+ """Substitution of entities and charrefs in attribute values"""
+ # SF bug #1452246
+ self.check_events("""<a b=&lt; c=&lt;&gt; d=&lt-&gt; e='&lt; '
+ f="&xxx;" g='&#32;&#33;' h='&#500;' i='x?a=b&c=d;'>""",
+ [("starttag", "a", [("b", "<"),
+ ("c", "<>"),
+ ("d", "&lt->"),
+ ("e", "< "),
+ ("f", "&xxx;"),
+ ("g", " !"),
+ ("h", "&#500;"),
+ ("i", "x?a=b&c=d;"), ])])
+
def test_attr_funky_names(self):
self.check_events("""<a a.b='v' c:d=v e-f=v>""", [
("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),