#20288: fix handling of invalid numeric charrefs in HTMLParser.

author: Ezio Melotti <ezio.melotti@gmail.com> 2014-02-01 19:21:01 (GMT)
committer: Ezio Melotti <ezio.melotti@gmail.com> 2014-02-01 19:21:01 (GMT)
commit: f27b9a741ac7771aa1f6c1219d86a61222fdc20a (patch)
tree: 517d3d39e33b79607e4135bdf1811a2ff908ccd1 /Lib
parent: a479b7505e88947dd8eff82b6bff604636287893 (diff)
download: cpython-f27b9a741ac7771aa1f6c1219d86a61222fdc20a.zip
cpython-f27b9a741ac7771aa1f6c1219d86a61222fdc20a.tar.gz
cpython-f27b9a741ac7771aa1f6c1219d86a61222fdc20a.tar.bz2
2 files changed, 9 insertions, 3 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index 2d3bef3..63fe774 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -228,9 +228,9 @@ class HTMLParser(_markupbase.ParserBase):
                     i = self.updatepos(i, k)
                     continue
                 else:
-                    if ";" in rawdata[i:]: #bail by consuming &#
-                        self.handle_data(rawdata[0:2])
-                        i = self.updatepos(i, 2)
+                    if ";" in rawdata[i:]:  # bail by consuming &#
+                        self.handle_data(rawdata[i:i+2])
+                        i = self.updatepos(i, i+2)
                     break
             elif startswith('&', i):
                 match = entityref.match(rawdata, i)
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index c977a9d..11d9c9c 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -151,6 +151,12 @@ text
             ("data", "&#bad;"),
             ("endtag", "p"),
         ])
+        # add the [] as a workaround to avoid buffering (see #20288)
+        self._run_check(["<div>&#bad;</div>"], [
+            ("starttag", "div", []),
+            ("data", "&#bad;"),
+            ("endtag", "div"),
+        ])
 
     def test_unclosed_entityref(self):
         self._run_check("&entityref foo", [
author	Ezio Melotti <ezio.melotti@gmail.com>	2014-02-01 19:21:01 (GMT)
committer	Ezio Melotti <ezio.melotti@gmail.com>	2014-02-01 19:21:01 (GMT)
commit	f27b9a741ac7771aa1f6c1219d86a61222fdc20a (patch)
tree	517d3d39e33b79607e4135bdf1811a2ff908ccd1 /Lib
parent	a479b7505e88947dd8eff82b6bff604636287893 (diff)
download	cpython-f27b9a741ac7771aa1f6c1219d86a61222fdc20a.zip cpython-f27b9a741ac7771aa1f6c1219d86a61222fdc20a.tar.gz cpython-f27b9a741ac7771aa1f6c1219d86a61222fdc20a.tar.bz2