summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFred Drake <fdrake@acm.org>2003-04-29 22:12:55 (GMT)
committerFred Drake <fdrake@acm.org>2003-04-29 22:12:55 (GMT)
commit75ab1462d5725a2fc46cd56f07fc21f919ca93e0 (patch)
tree9fa80495c0f166f8e19c0136c6f7dd4f9f02f325
parentb616f179027da74dc61292d08431ca734c1fb4ae (diff)
downloadcpython-75ab1462d5725a2fc46cd56f07fc21f919ca93e0.zip
cpython-75ab1462d5725a2fc46cd56f07fc21f919ca93e0.tar.gz
cpython-75ab1462d5725a2fc46cd56f07fc21f919ca93e0.tar.bz2
Allow "@" in unquoted attribute values.
Added test that checks for characters allowed in the query part of URLs. Backport candidate.
-rw-r--r--Lib/sgmllib.py2
-rw-r--r--Lib/test/test_sgmllib.py9
2 files changed, 10 insertions, 1 deletions
diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py
index 4a8c3b4..833e06f 100644
--- a/Lib/sgmllib.py
+++ b/Lib/sgmllib.py
@@ -33,7 +33,7 @@ endbracket = re.compile('[<>]')
tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*')
attrfind = re.compile(
r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
- r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"]*))?')
+ r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?')
class SGMLParseError(RuntimeError):
diff --git a/Lib/test/test_sgmllib.py b/Lib/test/test_sgmllib.py
index 6f4454f..6a77e07 100644
--- a/Lib/test/test_sgmllib.py
+++ b/Lib/test/test_sgmllib.py
@@ -200,6 +200,15 @@ DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
self.check_events("""<a b='' c="">""", [
("starttag", "a", [("b", ""), ("c", "")]),
])
+ # URL construction stuff from RFC 1808:
+ safe = "$-_.+"
+ extra = "!*'(),"
+ reserved = ";/?:@&="
+ url = "http://example.com:8080/path/to/file?%s%s%s" % (
+ safe, extra, reserved)
+ self.check_events("""<e a=%s>""" % url, [
+ ("starttag", "e", [("a", url)]),
+ ])
# Regression test for SF patch #669683.
self.check_events("<e a=rgb(1,2,3)>", [
("starttag", "e", [("a", "rgb(1,2,3)")]),