diff options
author | Senthil Kumaran <orsenthil@gmail.com> | 2010-12-28 16:05:07 (GMT) |
---|---|---|
committer | Senthil Kumaran <orsenthil@gmail.com> | 2010-12-28 16:05:07 (GMT) |
commit | 3f60f09eb23be3289ac5cc019391711dcdf800b3 (patch) | |
tree | 25930497b54b42a4c61318d5ede15fded795d149 /Lib/HTMLParser.py | |
parent | 06fdbedf81c49fd9614379ebd68d6388525bf42f (diff) | |
download | cpython-3f60f09eb23be3289ac5cc019391711dcdf800b3.zip cpython-3f60f09eb23be3289ac5cc019391711dcdf800b3.tar.gz cpython-3f60f09eb23be3289ac5cc019391711dcdf800b3.tar.bz2 |
Fix Issue10759 - HTMLParser.unescape() to handle malform charrefs.
Diffstat (limited to 'Lib/HTMLParser.py')
-rw-r--r-- | Lib/HTMLParser.py | 17 |
1 files changed, 10 insertions, 7 deletions
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index 7cee47a..4fdc09a 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -367,13 +367,16 @@ class HTMLParser(markupbase.ParserBase): return s def replaceEntities(s): s = s.groups()[0] - if s[0] == "#": - s = s[1:] - if s[0] in ['x','X']: - c = int(s[1:], 16) - else: - c = int(s) - return unichr(c) + try: + if s[0] == "#": + s = s[1:] + if s[0] in ['x','X']: + c = int(s[1:], 16) + else: + c = int(s) + return unichr(c) + except ValueError: + return '&#'+s+';' else: # Cannot use name2codepoint directly, because HTMLParser supports apos, # which is not part of HTML 4 |