From b7d8ce0275d7b4c9a9c2312d0add835c6eac1730 Mon Sep 17 00:00:00 2001 From: "Andrew M. Kuchling" Date: Sat, 5 Jun 2004 15:31:45 +0000 Subject: [Bug #921657] Allow '@' in unquoted HTML attributes. Not strictly legal according to the HTML REC, but HTMLParser is already a pretty loose parser. Reported by Bernd Zimmermann. --- Lib/HTMLParser.py | 2 +- Lib/test/test_htmlparser.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index 7334581..553e842 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -26,7 +26,7 @@ commentclose = re.compile(r'--\s*>') tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*') attrfind = re.compile( r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' - r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~]*))?') + r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?') locatestarttagend = re.compile(r""" <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index a830ed7..5b4bd56 100755 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -204,6 +204,10 @@ DOCTYPE html [ self._run_check("", [ ("starttag", "e", [("a", "rgb(1,2,3)")]), ]) + # Regression test for SF bug #921657. + self._run_check("", [ + ("starttag", "a", [("href", "mailto:xyz@example.com")]), + ]) def test_attr_entity_replacement(self): self._run_check("""""", [ -- cgit v0.12