#1745761, #755670, #13357, #12629, #1200313: merge with 3.2.

author: Ezio Melotti <ezio.melotti@gmail.com> 2011-11-14 16:56:11 (GMT)
committer: Ezio Melotti <ezio.melotti@gmail.com> 2011-11-14 16:56:11 (GMT)
commit: d5d4406c8ebbbdf8a8961fc119be22b15a1c40ad (patch)
tree: 585f97fa7990268f7ef46a60d78407838e334bc3 /Lib/html/parser.py
parent: 84b48a6c46ce7720a23d92f4d64961812d00ce1b (diff)
parent: c2fe57762b6cfa8849908e1a0475036cd0b058ba (diff)
download: cpython-d5d4406c8ebbbdf8a8961fc119be22b15a1c40ad.zip
cpython-d5d4406c8ebbbdf8a8961fc119be22b15a1c40ad.tar.gz
cpython-d5d4406c8ebbbdf8a8961fc119be22b15a1c40ad.tar.bz2
1 files changed, 10 insertions, 9 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index afdb305..662e855 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -30,8 +30,8 @@ attrfind = re.compile(
     r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
     r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?')
 attrfind_tolerant = re.compile(
-    r',?\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
-    r'(\'[^\']*\'|"[^"]*"|[^>\s]*))?')
+    r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*'
+    r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?')
 locatestarttagend = re.compile(r"""
   <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
   (?:\s+                             # whitespace before attribute name
@@ -49,16 +49,16 @@ locatestarttagend = re.compile(r"""
 locatestarttagend_tolerant = re.compile(r"""
   <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
   (?:\s*                             # optional whitespace before attribute name
-    (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
-      (?:\s*=\s*                     # value indicator
+    (?:(?<=['"\s])[^\s/>][^\s/=>]*   # attribute name
+      (?:\s*=+\s*                    # value indicator
         (?:'[^']*'                   # LITA-enclosed value
-          |\"[^\"]*\"                # LIT-enclosed value
-          |[^'\">\s]+                # bare value
+          |"[^"]*"                   # LIT-enclosed value
+          |(?!['"])[^>\s]*           # bare value
          )
          (?:\s*,)*                   # possibly followed by a comma
-       )?
-     )
-   )*
+       )?\s*
+     )*
+   )?
   \s*                                # trailing whitespace
 """, re.VERBOSE)
 endendtag = re.compile('>')
@@ -295,6 +295,7 @@ class HTMLParser(_markupbase.ParserBase):
             elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
                  attrvalue[:1] == '"' == attrvalue[-1:]:
                 attrvalue = attrvalue[1:-1]
+            if attrvalue:
                 attrvalue = self.unescape(attrvalue)
             attrs.append((attrname.lower(), attrvalue))
             k = m.end()
author	Ezio Melotti <ezio.melotti@gmail.com>	2011-11-14 16:56:11 (GMT)
committer	Ezio Melotti <ezio.melotti@gmail.com>	2011-11-14 16:56:11 (GMT)
commit	d5d4406c8ebbbdf8a8961fc119be22b15a1c40ad (patch)
tree	585f97fa7990268f7ef46a60d78407838e334bc3 /Lib/html/parser.py
parent	84b48a6c46ce7720a23d92f4d64961812d00ce1b (diff)
parent	c2fe57762b6cfa8849908e1a0475036cd0b058ba (diff)
download	cpython-d5d4406c8ebbbdf8a8961fc119be22b15a1c40ad.zip cpython-d5d4406c8ebbbdf8a8961fc119be22b15a1c40ad.tar.gz cpython-d5d4406c8ebbbdf8a8961fc119be22b15a1c40ad.tar.bz2