summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2009-03-31 22:11:53 (GMT)
committerGeorg Brandl <georg@python.org>2009-03-31 22:11:53 (GMT)
commit0c7b2c9c191a97b4aede17c15e2b5013e7edcfb5 (patch)
tree1119329789eadc18b6f0bb2dec9cceb7fdedc5b3
parent95fafec73281b58ecea37b2a61a2a44d0ce98e3a (diff)
downloadcpython-0c7b2c9c191a97b4aede17c15e2b5013e7edcfb5.zip
cpython-0c7b2c9c191a97b4aede17c15e2b5013e7edcfb5.tar.gz
cpython-0c7b2c9c191a97b4aede17c15e2b5013e7edcfb5.tar.bz2
#1651995: fix _convert_ref for non-ASCII characters.
-rw-r--r--Lib/sgmllib.py2
-rw-r--r--Lib/test/test_sgmllib.py9
2 files changed, 10 insertions, 1 deletions
diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py
index e5fbd32..104b25f 100644
--- a/Lib/sgmllib.py
+++ b/Lib/sgmllib.py
@@ -396,7 +396,7 @@ class SGMLParser(markupbase.ParserBase):
n = int(name)
except ValueError:
return
- if not 0 <= n <= 255:
+ if not 0 <= n <= 127:
return
return self.convert_codepoint(n)
diff --git a/Lib/test/test_sgmllib.py b/Lib/test/test_sgmllib.py
index 34fd7f0..081e0e1 100644
--- a/Lib/test/test_sgmllib.py
+++ b/Lib/test/test_sgmllib.py
@@ -373,6 +373,15 @@ DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
if len(data) != CHUNK:
break
+ def test_only_decode_ascii(self):
+ # SF bug #1651995, make sure non-ascii character references are not decoded
+ s = '<signs exclamation="&#33" copyright="&#169" quoteleft="&#8216;">'
+ self.check_events(s, [
+ ('starttag', 'signs',
+ [('exclamation', '!'), ('copyright', '&#169'),
+ ('quoteleft', '&#8216;')]),
+ ])
+
# XXX These tests have been disabled by prefixing their names with
# an underscore. The first two exercise outstanding bugs in the
# sgmllib module, and the third exhibits questionable behavior