summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorEzio Melotti <ezio.melotti@gmail.com>2013-05-01 13:18:25 (GMT)
committerEzio Melotti <ezio.melotti@gmail.com>2013-05-01 13:18:25 (GMT)
commit8e596a765cf323cbd2ba31b15f2939f903d87913 (patch)
tree184a1f476856981e2aee50ee2ec9939994916ce6 /Lib
parenta771a1b48e7fcf73b13e905f87f319fcd4cb25b2 (diff)
downloadcpython-8e596a765cf323cbd2ba31b15f2939f903d87913.zip
cpython-8e596a765cf323cbd2ba31b15f2939f903d87913.tar.gz
cpython-8e596a765cf323cbd2ba31b15f2939f903d87913.tar.bz2
#17802: Fix an UnboundLocalError in html.parser. Initial tests by Thomas Barlow.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/html/parser.py1
-rw-r--r--Lib/test/test_htmlparser.py14
2 files changed, 15 insertions, 0 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index f8ac828..60a322a 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -249,6 +249,7 @@ class HTMLParser(_markupbase.ParserBase):
if self.strict:
self.error("EOF in middle of entity or char ref")
else:
+ k = match.end()
if k <= i:
k = n
i = self.updatepos(i, i + 1)
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index c5d878d..b15b6fd 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -535,6 +535,20 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
]
self._run_check(html, expected)
+ def test_EOF_in_charref(self):
+ # see #17802
+ # This test checks that the UnboundLocalError reported in the issue
+ # is not raised, however I'm not sure the returned values are correct.
+ # Maybe HTMLParser should use self.unescape for these
+ data = [
+ ('a&', [('data', 'a&')]),
+ ('a&b', [('data', 'ab')]),
+ ('a&b ', [('data', 'a'), ('entityref', 'b'), ('data', ' ')]),
+ ('a&b;', [('data', 'a'), ('entityref', 'b')]),
+ ]
+ for html, expected in data:
+ self._run_check(html, expected)
+
def test_unescape_function(self):
p = self.get_collector()
self.assertEqual(p.unescape('&#bad;'),'&#bad;')