From 75ab1462d5725a2fc46cd56f07fc21f919ca93e0 Mon Sep 17 00:00:00 2001 From: Fred Drake Date: Tue, 29 Apr 2003 22:12:55 +0000 Subject: Allow "@" in unquoted attribute values. Added test that checks for characters allowed in the query part of URLs. Backport candidate. --- Lib/sgmllib.py | 2 +- Lib/test/test_sgmllib.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py index 4a8c3b4..833e06f 100644 --- a/Lib/sgmllib.py +++ b/Lib/sgmllib.py @@ -33,7 +33,7 @@ endbracket = re.compile('[<>]') tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*') attrfind = re.compile( r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*' - r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"]*))?') + r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?') class SGMLParseError(RuntimeError): diff --git a/Lib/test/test_sgmllib.py b/Lib/test/test_sgmllib.py index 6f4454f..6a77e07 100644 --- a/Lib/test/test_sgmllib.py +++ b/Lib/test/test_sgmllib.py @@ -200,6 +200,15 @@ DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN' self.check_events("""""", [ ("starttag", "a", [("b", ""), ("c", "")]), ]) + # URL construction stuff from RFC 1808: + safe = "$-_.+" + extra = "!*'()," + reserved = ";/?:@&=" + url = "http://example.com:8080/path/to/file?%s%s%s" % ( + safe, extra, reserved) + self.check_events("""""" % url, [ + ("starttag", "e", [("a", url)]), + ]) # Regression test for SF patch #669683. self.check_events("", [ ("starttag", "e", [("a", "rgb(1,2,3)")]), -- cgit v0.12