diff options
author | Ezio Melotti <ezio.melotti@gmail.com> | 2012-02-13 18:36:55 (GMT) |
---|---|---|
committer | Ezio Melotti <ezio.melotti@gmail.com> | 2012-02-13 18:36:55 (GMT) |
commit | 369cbd744ed06b3e01fe7a2e6a86ea4d85250231 (patch) | |
tree | 3cd0eacf3c320dcc5ec695529b2f68ebbf041268 | |
parent | ef18737b7f920bc9e8f406cfb71730b9f07c13f5 (diff) | |
download | cpython-369cbd744ed06b3e01fe7a2e6a86ea4d85250231.zip cpython-369cbd744ed06b3e01fe7a2e6a86ea4d85250231.tar.gz cpython-369cbd744ed06b3e01fe7a2e6a86ea4d85250231.tar.bz2 |
Fix an index, add more tests, avoid raising errors for unknown declarations, and clean up comments.
-rw-r--r-- | Lib/HTMLParser.py | 5 | ||||
-rw-r--r-- | Lib/test/test_htmlparser.py | 24 |
2 files changed, 27 insertions, 2 deletions
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index 6cc9ff1..f230c5f 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -229,12 +229,13 @@ class HTMLParser(markupbase.ParserBase): if rawdata[i:i+2] != '<!': self.error('unexpected call to parse_html_declaration()') if rawdata[i:i+4] == '<!--': + # this case is actually already handled in goahead() return self.parse_comment(i) elif rawdata[i:i+3] == '<![': return self.parse_marked_section(i) elif rawdata[i:i+9].lower() == '<!doctype': # find the closing > - gtpos = rawdata.find('>', 9) + gtpos = rawdata.find('>', i+9) if gtpos == -1: return -1 self.handle_decl(rawdata[i+2:gtpos]) @@ -427,7 +428,7 @@ class HTMLParser(markupbase.ParserBase): pass def unknown_decl(self, data): - self.error("unknown declaration: %r" % (data,)) + pass # Internal -- helper to remove special character quoting entitydefs = None diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index c15bb66..6667512 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -215,6 +215,30 @@ text self._parse_error("<a foo='>'") self._parse_error("<a foo='>") + def test_valid_doctypes(self): + # from http://www.w3.org/QA/2002/04/valid-dtd-list.html + dtds = ['HTML', # HTML5 doctype + ('HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' + '"http://www.w3.org/TR/html4/strict.dtd"'), + ('HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" ' + '"http://www.w3.org/TR/html4/loose.dtd"'), + ('html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" ' + '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"'), + ('html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" ' + '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"'), + ('math PUBLIC "-//W3C//DTD MathML 2.0//EN" ' + '"http://www.w3.org/Math/DTD/mathml2/mathml2.dtd"'), + ('html PUBLIC "-//W3C//DTD ' + 'XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" ' + '"http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd"'), + ('svg PUBLIC "-//W3C//DTD SVG 1.1//EN" ' + '"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"'), + 'html PUBLIC "-//IETF//DTD HTML 2.0//EN"', + 'html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"'] + for dtd in dtds: + self._run_check("<!DOCTYPE %s>" % dtd, + [('decl', 'DOCTYPE ' + dtd)]) + def test_declaration_junk_chars(self): self._run_check("<!DOCTYPE foo $ >", [('decl', 'DOCTYPE foo $ ')]) |