diff options
author | Andrew M. Kuchling <amk@amk.ca> | 2004-06-05 15:31:45 (GMT) |
---|---|---|
committer | Andrew M. Kuchling <amk@amk.ca> | 2004-06-05 15:31:45 (GMT) |
commit | b7d8ce0275d7b4c9a9c2312d0add835c6eac1730 (patch) | |
tree | 444603ff2dbf2002db0ff976d64f16516be4bbe7 | |
parent | 9021c955951c63ef3075b4751615af329427d3ec (diff) | |
download | cpython-b7d8ce0275d7b4c9a9c2312d0add835c6eac1730.zip cpython-b7d8ce0275d7b4c9a9c2312d0add835c6eac1730.tar.gz cpython-b7d8ce0275d7b4c9a9c2312d0add835c6eac1730.tar.bz2 |
[Bug #921657] Allow '@' in unquoted HTML attributes. Not strictly legal according to the HTML REC, but HTMLParser is already a pretty loose parser. Reported by Bernd Zimmermann.
-rw-r--r-- | Lib/HTMLParser.py | 2 | ||||
-rwxr-xr-x | Lib/test/test_htmlparser.py | 4 |
2 files changed, 5 insertions, 1 deletions
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index 7334581..553e842 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -26,7 +26,7 @@ commentclose = re.compile(r'--\s*>') tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*') attrfind = re.compile( r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' - r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~]*))?') + r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?') locatestarttagend = re.compile(r""" <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index a830ed7..5b4bd56 100755 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -204,6 +204,10 @@ DOCTYPE html [ self._run_check("<e a=rgb(1,2,3)>", [ ("starttag", "e", [("a", "rgb(1,2,3)")]), ]) + # Regression test for SF bug #921657. + self._run_check("<a href=mailto:xyz@example.com>", [ + ("starttag", "a", [("href", "mailto:xyz@example.com")]), + ]) def test_attr_entity_replacement(self): self._run_check("""<a b='&><"''>""", [ |