diff options
author | Fred Drake <fdrake@acm.org> | 2003-04-29 22:12:55 (GMT) |
---|---|---|
committer | Fred Drake <fdrake@acm.org> | 2003-04-29 22:12:55 (GMT) |
commit | 75ab1462d5725a2fc46cd56f07fc21f919ca93e0 (patch) | |
tree | 9fa80495c0f166f8e19c0136c6f7dd4f9f02f325 | |
parent | b616f179027da74dc61292d08431ca734c1fb4ae (diff) | |
download | cpython-75ab1462d5725a2fc46cd56f07fc21f919ca93e0.zip cpython-75ab1462d5725a2fc46cd56f07fc21f919ca93e0.tar.gz cpython-75ab1462d5725a2fc46cd56f07fc21f919ca93e0.tar.bz2 |
Allow "@" in unquoted attribute values.
Added test that checks for characters allowed in the query part of URLs.
Backport candidate.
-rw-r--r-- | Lib/sgmllib.py | 2 | ||||
-rw-r--r-- | Lib/test/test_sgmllib.py | 9 |
2 files changed, 10 insertions, 1 deletions
diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py index 4a8c3b4..833e06f 100644 --- a/Lib/sgmllib.py +++ b/Lib/sgmllib.py @@ -33,7 +33,7 @@ endbracket = re.compile('[<>]') tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*') attrfind = re.compile( r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*' - r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"]*))?') + r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?') class SGMLParseError(RuntimeError): diff --git a/Lib/test/test_sgmllib.py b/Lib/test/test_sgmllib.py index 6f4454f..6a77e07 100644 --- a/Lib/test/test_sgmllib.py +++ b/Lib/test/test_sgmllib.py @@ -200,6 +200,15 @@ DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN' self.check_events("""<a b='' c="">""", [ ("starttag", "a", [("b", ""), ("c", "")]), ]) + # URL construction stuff from RFC 1808: + safe = "$-_.+" + extra = "!*'()," + reserved = ";/?:@&=" + url = "http://example.com:8080/path/to/file?%s%s%s" % ( + safe, extra, reserved) + self.check_events("""<e a=%s>""" % url, [ + ("starttag", "e", [("a", url)]), + ]) # Regression test for SF patch #669683. self.check_events("<e a=rgb(1,2,3)>", [ ("starttag", "e", [("a", "rgb(1,2,3)")]), |