summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorAndrew M. Kuchling <amk@amk.ca>2004-06-05 15:31:45 (GMT)
committerAndrew M. Kuchling <amk@amk.ca>2004-06-05 15:31:45 (GMT)
commitb7d8ce0275d7b4c9a9c2312d0add835c6eac1730 (patch)
tree444603ff2dbf2002db0ff976d64f16516be4bbe7 /Lib
parent9021c955951c63ef3075b4751615af329427d3ec (diff)
downloadcpython-b7d8ce0275d7b4c9a9c2312d0add835c6eac1730.zip
cpython-b7d8ce0275d7b4c9a9c2312d0add835c6eac1730.tar.gz
cpython-b7d8ce0275d7b4c9a9c2312d0add835c6eac1730.tar.bz2
[Bug #921657] Allow '@' in unquoted HTML attributes. Not strictly legal according to the HTML REC, but HTMLParser is already a pretty loose parser. Reported by Bernd Zimmermann.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/HTMLParser.py2
-rwxr-xr-xLib/test/test_htmlparser.py4
2 files changed, 5 insertions, 1 deletions
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py
index 7334581..553e842 100644
--- a/Lib/HTMLParser.py
+++ b/Lib/HTMLParser.py
@@ -26,7 +26,7 @@ commentclose = re.compile(r'--\s*>')
tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
attrfind = re.compile(
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
- r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~]*))?')
+ r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
locatestarttagend = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index a830ed7..5b4bd56 100755
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -204,6 +204,10 @@ DOCTYPE html [
self._run_check("<e a=rgb(1,2,3)>", [
("starttag", "e", [("a", "rgb(1,2,3)")]),
])
+ # Regression test for SF bug #921657.
+ self._run_check("<a href=mailto:xyz@example.com>", [
+ ("starttag", "a", [("href", "mailto:xyz@example.com")]),
+ ])
def test_attr_entity_replacement(self):
self._run_check("""<a b='&amp;&gt;&lt;&quot;&apos;'>""", [