summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFred Drake <fdrake@acm.org>2001-07-05 18:21:57 (GMT)
committerFred Drake <fdrake@acm.org>2001-07-05 18:21:57 (GMT)
commitdc19163b18e193a729e75cb98926ff96c3154f99 (patch)
tree47b2b437d8774e07feac6cbd73a8a284835cb1ff
parent25211f5724087516f10ddb8a232d63e09a9b9bec (diff)
downloadcpython-dc19163b18e193a729e75cb98926ff96c3154f99.zip
cpython-dc19163b18e193a729e75cb98926ff96c3154f99.tar.gz
cpython-dc19163b18e193a729e75cb98926ff96c3154f99.tar.bz2
Allow underscores in tag names and quote characters in unquoted attribute
values. The change for attribute values matches the way Mozilla and Navigator view the world, at least. This closes SF bug #436621.
-rw-r--r--Lib/sgmllib.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py
index a471c05..fe91c1b 100644
--- a/Lib/sgmllib.py
+++ b/Lib/sgmllib.py
@@ -34,10 +34,10 @@ endbracket = re.compile('[<>]')
special = re.compile('<![^<>]*>')
commentopen = re.compile('<!--')
commentclose = re.compile(r'--\s*>')
-tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9]*')
+tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*')
attrfind = re.compile(
r'\s*([a-zA-Z_][-.a-zA-Z_0-9]*)(\s*=\s*'
- r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:;+*%?!&$\(\)_#=~]*))?')
+ r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:;+*%?!&$\(\)_#=~\'"]*))?')
decldata = re.compile(r'[^>\'\"]+')
declstringlit = re.compile(r'(\'[^\']*\'|"[^"]*")\s*')