diff options
author | Senthil Kumaran <orsenthil@gmail.com> | 2010-12-28 16:05:07 (GMT) |
---|---|---|
committer | Senthil Kumaran <orsenthil@gmail.com> | 2010-12-28 16:05:07 (GMT) |
commit | 3f60f09eb23be3289ac5cc019391711dcdf800b3 (patch) | |
tree | 25930497b54b42a4c61318d5ede15fded795d149 /Lib | |
parent | 06fdbedf81c49fd9614379ebd68d6388525bf42f (diff) | |
download | cpython-3f60f09eb23be3289ac5cc019391711dcdf800b3.zip cpython-3f60f09eb23be3289ac5cc019391711dcdf800b3.tar.gz cpython-3f60f09eb23be3289ac5cc019391711dcdf800b3.tar.bz2 |
Fix Issue10759 - HTMLParser.unescape() to handle malform charrefs.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/HTMLParser.py | 17 | ||||
-rw-r--r-- | Lib/test/test_htmlparser.py | 5 |
2 files changed, 15 insertions, 7 deletions
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index 7cee47a..4fdc09a 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -367,13 +367,16 @@ class HTMLParser(markupbase.ParserBase): return s def replaceEntities(s): s = s.groups()[0] - if s[0] == "#": - s = s[1:] - if s[0] in ['x','X']: - c = int(s[1:], 16) - else: - c = int(s) - return unichr(c) + try: + if s[0] == "#": + s = s[1:] + if s[0] in ['x','X']: + c = int(s[1:], 16) + else: + c = int(s) + return unichr(c) + except ValueError: + return '&#'+s+';' else: # Cannot use name2codepoint directly, because HTMLParser supports apos, # which is not part of HTML 4 diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index c45cf00..717585c 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -320,6 +320,11 @@ DOCTYPE html [ ("endtag", "p"), ]) + def test_unescape_function(self): + parser = HTMLParser.HTMLParser() + self.assertEqual(parser.unescape('&#bad;'),'&#bad;') + self.assertEqual(parser.unescape('&'),'&') + def test_main(): test_support.run_unittest(HTMLParserTestCase) |