diff options
author | Ezio Melotti <ezio.melotti@gmail.com> | 2014-02-01 19:22:26 (GMT) |
---|---|---|
committer | Ezio Melotti <ezio.melotti@gmail.com> | 2014-02-01 19:22:26 (GMT) |
commit | 153d97b24e7253f344860094eb2c98ed93657720 (patch) | |
tree | 9256d843a13b443631941b250ee4362fbbcb2ab8 | |
parent | 145dff856713d5db6984e2cd888f5f2e5a9904bc (diff) | |
parent | f27b9a741ac7771aa1f6c1219d86a61222fdc20a (diff) | |
download | cpython-153d97b24e7253f344860094eb2c98ed93657720.zip cpython-153d97b24e7253f344860094eb2c98ed93657720.tar.gz cpython-153d97b24e7253f344860094eb2c98ed93657720.tar.bz2 |
#20288: merge with 3.3.
-rw-r--r-- | Lib/html/parser.py | 6 | ||||
-rw-r--r-- | Lib/test/test_htmlparser.py | 6 | ||||
-rw-r--r-- | Misc/NEWS | 2 |
3 files changed, 11 insertions, 3 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 12c28b8..a650d5e 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -264,9 +264,9 @@ class HTMLParser(_markupbase.ParserBase): i = self.updatepos(i, k) continue else: - if ";" in rawdata[i:]: #bail by consuming &# - self.handle_data(rawdata[0:2]) - i = self.updatepos(i, 2) + if ";" in rawdata[i:]: # bail by consuming &# + self.handle_data(rawdata[i:i+2]) + i = self.updatepos(i, i+2) break elif startswith('&', i): match = entityref.match(rawdata, i) diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 1a480c8..2d771a2 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -167,6 +167,12 @@ text ("data", "&#bad;"), ("endtag", "p"), ]) + # add the [] as a workaround to avoid buffering (see #20288) + self._run_check(["<div>&#bad;</div>"], [ + ("starttag", "div", []), + ("data", "&#bad;"), + ("endtag", "div"), + ]) def test_unclosed_entityref(self): self._run_check("&entityref foo", [ @@ -41,6 +41,8 @@ Library ValueError instead of assert for forbidden subprocess_{shell,exec} arguments. (More to follow -- a convenience API for subprocesses.) +- Issue #20288: fix handling of invalid numeric charrefs in HTMLParser. + - Issue #20424: Python implementation of io.StringIO now supports lone surrogates. - Issue #20308: inspect.signature now works on classes without user-defined |