summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/html/parser.py3
-rwxr-xr-xLib/test/test_htmlparser.py7
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS3
4 files changed, 14 insertions, 0 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index 83a5825..c2c7f6b 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -175,6 +175,9 @@ class HTMLParser(_markupbase.ParserBase):
i = self.updatepos(i, k)
continue
else:
+ if ";" in rawdata[i:]: #bail by consuming &#
+ self.handle_data(rawdata[0:2])
+ i = self.updatepos(i, 2)
break
elif startswith('&', i):
match = entityref.match(rawdata, i)
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index dd74aac..e982218 100755
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -136,6 +136,13 @@ text
("data", "\n"),
])
+ def test_malformatted_charref(self):
+ self._run_check("<p>&#bad;</p>", [
+ ("starttag", "p", []),
+ ("data", "&#bad;"),
+ ("endtag", "p"),
+ ])
+
def test_unclosed_entityref(self):
self._run_check("&entityref foo", [
("entityref", "entityref"),
diff --git a/Misc/ACKS b/Misc/ACKS
index 838f6f0..459e216 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -846,3 +846,4 @@ Siebren van der Zee
Uwe Zessin
Tarek Ziadé
Peter Åstrand
+Fredrik Håård
diff --git a/Misc/NEWS b/Misc/NEWS
index 27d16aa..2b5b791 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -54,6 +54,9 @@ C-API
Library
-------
+- Issue #6662: Fix parsing of malformatted charref (&#bad;), patch written by
+ Fredrik Håård
+
- Issue #6268: Fix seek() method of codecs.open(), don't read or write the BOM
twice after seek(0). Fix also reset() method of codecs, UTF-16, UTF-32 and
StreamWriter classes.