diff options
| -rw-r--r-- | Lib/HTMLParser.py | 3 | ||||
| -rw-r--r-- | Lib/test/test_htmlparser.py | 7 | ||||
| -rw-r--r-- | Misc/NEWS | 2 |
3 files changed, 12 insertions, 0 deletions
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index 2cbc2ec..7cee47a 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -175,6 +175,9 @@ class HTMLParser(markupbase.ParserBase): i = self.updatepos(i, k) continue else: + if ";" in rawdata[i:]: #bail by consuming &# + self.handle_data(rawdata[0:2]) + i = self.updatepos(i, 2) break elif startswith('&', i): match = entityref.match(rawdata, i) diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 810af6c..c45cf00 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -313,6 +313,13 @@ DOCTYPE html [ ("starttag", "html", [("foo", u"\u20AC&aa&unsupported;")]) ]) + def test_malformatted_charref(self): + self._run_check("<p>&#bad;</p>", [ + ("starttag", "p", []), + ("data", "&#bad;"), + ("endtag", "p"), + ]) + def test_main(): test_support.run_unittest(HTMLParserTestCase) @@ -29,6 +29,8 @@ C-API Library ------- +- Issue #6662: Fix parsing of malformatted charref (&#bad;) + - Issue #8016: Add the CP858 codec. - Issue #3924: Ignore cookies with invalid "version" field in cookielib. |
