From f50ffa94abe67c6ef5e615198af15f72e7cd2a9b Mon Sep 17 00:00:00 2001
From: Ezio Melotti <ezio.melotti@gmail.com>
Date: Fri, 28 Oct 2011 13:21:09 +0300
Subject: #13273: fix a bug that prevented HTMLParser to properly detect some
 tags when strict=False.

---
 Lib/html/parser.py          |  5 ++---
 Lib/test/test_htmlparser.py | 33 +++++++++++++++++++++++++++++++++
 Misc/NEWS                   |  3 +++
 3 files changed, 38 insertions(+), 3 deletions(-)
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index a6d5be9..affaf73 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -30,7 +30,7 @@ attrfind = re.compile(
     r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
     r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?')
 attrfind_tolerant = re.compile(
-    r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
+    r',?\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
     r'(\'[^\']*\'|"[^"]*"|[^>\s]*))?')
 locatestarttagend = re.compile(r"""
   <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
@@ -277,12 +277,11 @@ class HTMLParser(_markupbase.ParserBase):
         assert match, 'unexpected call to parse_starttag()'
         k = match.end()
         self.lasttag = tag = rawdata[i+1:k].lower()
-
         while k < endpos:
             if self.strict:
                 m = attrfind.match(rawdata, k)
             else:
-                m = attrfind_tolerant.search(rawdata, k)
+                m = attrfind_tolerant.match(rawdata, k)
             if not m:
                 break
             attrname, rest, attrvalue = m.group(1, 2, 3)
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index d45e453..9664485 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -373,6 +373,39 @@ class HTMLParserTolerantTestCase(TestCaseBase):
                                 [('action', 'bogus|&#()value')])],
                         collector = self.collector)
 
+    def test_issue13273(self):
+        html = ('<div style=""    ><b>The <a href="some_url">rain</a> '
+                '<br /> in <span>Spain</span></b></div>')
+        expected = [
+            ('starttag', 'div', [('style', '')]),
+            ('starttag', 'b', []),
+            ('data', 'The '),
+            ('starttag', 'a', [('href', 'some_url')]),
+            ('data', 'rain'),
+            ('endtag', 'a'),
+            ('data', ' '),
+            ('startendtag', 'br', []),
+            ('data', ' in '),
+            ('starttag', 'span', []),
+            ('data', 'Spain'),
+            ('endtag', 'span'),
+            ('endtag', 'b'),
+            ('endtag', 'div')
+        ]
+        self._run_check(html, expected, collector=self.collector)
+
+    def test_issue13273_2(self):
+        html = '<div style="", foo = "bar" ><b>The <a href="some_url">rain</a>'
+        expected = [
+            ('starttag', 'div', [('style', ''), ('foo', 'bar')]),
+            ('starttag', 'b', []),
+            ('data', 'The '),
+            ('starttag', 'a', [('href', 'some_url')]),
+            ('data', 'rain'),
+            ('endtag', 'a'),
+        ]
+        self._run_check(html, expected, collector=self.collector)
+
     def test_unescape_function(self):
         p = html.parser.HTMLParser()
         self.assertEqual(p.unescape('&#bad;'),'&#bad;')
diff --git a/Misc/NEWS b/Misc/NEWS
index cd8c9bf..6be52fb 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -61,6 +61,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #13273: fix a bug that prevented HTMLParser to properly detect some
+  tags when strict=False.
+
 - Issue #10332: multiprocessing: fix a race condition when a Pool is closed
   before all tasks have completed.
 
-- 
cgit v0.12