summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEzio Melotti <ezio.melotti@gmail.com>2014-02-01 19:22:26 (GMT)
committerEzio Melotti <ezio.melotti@gmail.com>2014-02-01 19:22:26 (GMT)
commit153d97b24e7253f344860094eb2c98ed93657720 (patch)
tree9256d843a13b443631941b250ee4362fbbcb2ab8
parent145dff856713d5db6984e2cd888f5f2e5a9904bc (diff)
parentf27b9a741ac7771aa1f6c1219d86a61222fdc20a (diff)
downloadcpython-153d97b24e7253f344860094eb2c98ed93657720.zip
cpython-153d97b24e7253f344860094eb2c98ed93657720.tar.gz
cpython-153d97b24e7253f344860094eb2c98ed93657720.tar.bz2
#20288: merge with 3.3.
-rw-r--r--Lib/html/parser.py6
-rw-r--r--Lib/test/test_htmlparser.py6
-rw-r--r--Misc/NEWS2
3 files changed, 11 insertions, 3 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index 12c28b8..a650d5e 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -264,9 +264,9 @@ class HTMLParser(_markupbase.ParserBase):
i = self.updatepos(i, k)
continue
else:
- if ";" in rawdata[i:]: #bail by consuming &#
- self.handle_data(rawdata[0:2])
- i = self.updatepos(i, 2)
+ if ";" in rawdata[i:]: # bail by consuming &#
+ self.handle_data(rawdata[i:i+2])
+ i = self.updatepos(i, i+2)
break
elif startswith('&', i):
match = entityref.match(rawdata, i)
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index 1a480c8..2d771a2 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -167,6 +167,12 @@ text
("data", "&#bad;"),
("endtag", "p"),
])
+ # add the [] as a workaround to avoid buffering (see #20288)
+ self._run_check(["<div>&#bad;</div>"], [
+ ("starttag", "div", []),
+ ("data", "&#bad;"),
+ ("endtag", "div"),
+ ])
def test_unclosed_entityref(self):
self._run_check("&entityref foo", [
diff --git a/Misc/NEWS b/Misc/NEWS
index 71892a6..7e96863 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -41,6 +41,8 @@ Library
ValueError instead of assert for forbidden subprocess_{shell,exec}
arguments. (More to follow -- a convenience API for subprocesses.)
+- Issue #20288: fix handling of invalid numeric charrefs in HTMLParser.
+
- Issue #20424: Python implementation of io.StringIO now supports lone surrogates.
- Issue #20308: inspect.signature now works on classes without user-defined