diff options
| author | Fred Drake <fdrake@acm.org> | 2006-06-16 23:45:06 (GMT) |
|---|---|---|
| committer | Fred Drake <fdrake@acm.org> | 2006-06-16 23:45:06 (GMT) |
| commit | fab461a4b5a2304828d578bbdd24225dd9c252e9 (patch) | |
| tree | c9b1dd064b30162d9f99e1231446ce0001c4be40 /Lib/test/test_sgmllib.py | |
| parent | 274facfd1d8f73babcc687486b907d9cad9757c5 (diff) | |
| download | cpython-fab461a4b5a2304828d578bbdd24225dd9c252e9.zip cpython-fab461a4b5a2304828d578bbdd24225dd9c252e9.tar.gz cpython-fab461a4b5a2304828d578bbdd24225dd9c252e9.tar.bz2 | |
SF patch 1504676: Make sgmllib char and entity references pluggable
(implementation/tests contributed by Sam Ruby)
Diffstat (limited to 'Lib/test/test_sgmllib.py')
| -rw-r--r-- | Lib/test/test_sgmllib.py | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/Lib/test/test_sgmllib.py b/Lib/test/test_sgmllib.py index ec417d0..31b54de 100644 --- a/Lib/test/test_sgmllib.py +++ b/Lib/test/test_sgmllib.py @@ -64,6 +64,23 @@ class CDATAEventCollector(EventCollector): self.setliteral() +class HTMLEntityCollector(EventCollector): + import re, htmlentitydefs + entity_or_charref = re.compile('(?:&([a-zA-Z][-.a-zA-Z0-9]*)' + '|&#(x[0-9a-zA-Z]+|[0-9]+))(;?)') + + def convert_charref(self, name): + self.append(("charref", "convert", name)) + if name.startswith('x'): + return unichr(int(name[1:],16)) + else: + return unichr(int(name)) + + def convert_entityref(self, name): + self.append(("entityref", "convert", name)) + return unichr(self.htmlentitydefs.name2codepoint[name]) + + class SGMLParserTestCase(unittest.TestCase): collector = EventCollector @@ -233,6 +250,16 @@ DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN' ("k", "*"), ])]) + def test_convert_overrides(self): + self.collector = HTMLEntityCollector + self.check_events('<a title="“test”">foo</a>', [ + ('entityref', 'convert', 'ldquo'), + ('charref', 'convert', 'x201d'), + ('starttag', 'a', [('title', u'\u201ctest\u201d')]), + ('data', 'foo'), + ('endtag', 'a'), + ]) + def test_attr_funky_names(self): self.check_events("""<a a.b='v' c:d=v e-f=v>""", [ ("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]), |
