diff options
-rw-r--r-- | Lib/html/parser.py | 3 | ||||
-rw-r--r-- | Lib/test/test_htmlparser.py | 7 | ||||
-rw-r--r-- | Misc/ACKS | 1 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
4 files changed, 14 insertions, 0 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 83a5825..c2c7f6b 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -175,6 +175,9 @@ class HTMLParser(_markupbase.ParserBase): i = self.updatepos(i, k) continue else: + if ";" in rawdata[i:]: #bail by consuming &# + self.handle_data(rawdata[0:2]) + i = self.updatepos(i, 2) break elif startswith('&', i): match = entityref.match(rawdata, i) diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index dd74aac..e982218 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -136,6 +136,13 @@ text ("data", "\n"), ]) + def test_malformatted_charref(self): + self._run_check("<p>&#bad;</p>", [ + ("starttag", "p", []), + ("data", "&#bad;"), + ("endtag", "p"), + ]) + def test_unclosed_entityref(self): self._run_check("&entityref foo", [ ("entityref", "entityref"), @@ -871,3 +871,4 @@ Siebren van der Zee Uwe Zessin Tarek Ziadé Peter Åstrand +Fredrik Håård @@ -392,6 +392,9 @@ C-API Library ------- +- Issue #6662: Fix parsing of malformatted charref (&#bad;), patch written by + Fredrik Håård + - Issue #8540: Decimal module: rename the Context._clamp attribute to Context.clamp and make it public. This is useful in creating contexts that correspond to the decimal interchange formats |