diff options
| -rw-r--r-- | Lib/HTMLParser.py | 17 | ||||
| -rw-r--r-- | Lib/test/test_htmlparser.py | 5 |
2 files changed, 15 insertions, 7 deletions
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index 7cee47a..4fdc09a 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -367,13 +367,16 @@ class HTMLParser(markupbase.ParserBase): return s def replaceEntities(s): s = s.groups()[0] - if s[0] == "#": - s = s[1:] - if s[0] in ['x','X']: - c = int(s[1:], 16) - else: - c = int(s) - return unichr(c) + try: + if s[0] == "#": + s = s[1:] + if s[0] in ['x','X']: + c = int(s[1:], 16) + else: + c = int(s) + return unichr(c) + except ValueError: + return '&#'+s+';' else: # Cannot use name2codepoint directly, because HTMLParser supports apos, # which is not part of HTML 4 diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index c45cf00..717585c 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -320,6 +320,11 @@ DOCTYPE html [ ("endtag", "p"), ]) + def test_unescape_function(self): + parser = HTMLParser.HTMLParser() + self.assertEqual(parser.unescape('&#bad;'),'&#bad;') + self.assertEqual(parser.unescape('&'),'&') + def test_main(): test_support.run_unittest(HTMLParserTestCase) |
