Fix an index, add more tests, avoid raising errors for unknown declarations, and clean up comments.

author: Ezio Melotti <ezio.melotti@gmail.com> 2012-02-13 18:36:55 (GMT)
committer: Ezio Melotti <ezio.melotti@gmail.com> 2012-02-13 18:36:55 (GMT)
commit: 369cbd744ed06b3e01fe7a2e6a86ea4d85250231 (patch)
tree: 3cd0eacf3c320dcc5ec695529b2f68ebbf041268
parent: ef18737b7f920bc9e8f406cfb71730b9f07c13f5 (diff)
download: cpython-369cbd744ed06b3e01fe7a2e6a86ea4d85250231.zip
cpython-369cbd744ed06b3e01fe7a2e6a86ea4d85250231.tar.gz
cpython-369cbd744ed06b3e01fe7a2e6a86ea4d85250231.tar.bz2
2 files changed, 27 insertions, 2 deletions
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py
index 6cc9ff1..f230c5f 100644
--- a/Lib/HTMLParser.py
+++ b/Lib/HTMLParser.py
@@ -229,12 +229,13 @@ class HTMLParser(markupbase.ParserBase):
         if rawdata[i:i+2] != '<!':
             self.error('unexpected call to parse_html_declaration()')
         if rawdata[i:i+4] == '<!--':
+            # this case is actually already handled in goahead()
             return self.parse_comment(i)
         elif rawdata[i:i+3] == '<![':
             return self.parse_marked_section(i)
         elif rawdata[i:i+9].lower() == '<!doctype':
             # find the closing >
-            gtpos = rawdata.find('>', 9)
+            gtpos = rawdata.find('>', i+9)
             if gtpos == -1:
                 return -1
             self.handle_decl(rawdata[i+2:gtpos])
@@ -427,7 +428,7 @@ class HTMLParser(markupbase.ParserBase):
         pass
 
     def unknown_decl(self, data):
-        self.error("unknown declaration: %r" % (data,))
+        pass
 
     # Internal -- helper to remove special character quoting
     entitydefs = None
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index c15bb66..6667512 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -215,6 +215,30 @@ text
         self._parse_error("<a foo='>'")
         self._parse_error("<a foo='>")
 
+    def test_valid_doctypes(self):
+        # from http://www.w3.org/QA/2002/04/valid-dtd-list.html
+        dtds = ['HTML',  # HTML5 doctype
+                ('HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
+                 '"http://www.w3.org/TR/html4/strict.dtd"'),
+                ('HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" '
+                 '"http://www.w3.org/TR/html4/loose.dtd"'),
+                ('html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" '
+                 '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"'),
+                ('html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" '
+                 '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"'),
+                ('math PUBLIC "-//W3C//DTD MathML 2.0//EN" '
+                 '"http://www.w3.org/Math/DTD/mathml2/mathml2.dtd"'),
+                ('html PUBLIC "-//W3C//DTD '
+                 'XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" '
+                 '"http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd"'),
+                ('svg PUBLIC "-//W3C//DTD SVG 1.1//EN" '
+                 '"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"'),
+                'html PUBLIC "-//IETF//DTD HTML 2.0//EN"',
+                'html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"']
+        for dtd in dtds:
+            self._run_check("<!DOCTYPE %s>" % dtd,
+                            [('decl', 'DOCTYPE ' + dtd)])
+
     def test_declaration_junk_chars(self):
         self._run_check("<!DOCTYPE foo $ >", [('decl', 'DOCTYPE foo $ ')])
author	Ezio Melotti <ezio.melotti@gmail.com>	2012-02-13 18:36:55 (GMT)
committer	Ezio Melotti <ezio.melotti@gmail.com>	2012-02-13 18:36:55 (GMT)
commit	369cbd744ed06b3e01fe7a2e6a86ea4d85250231 (patch)
tree	3cd0eacf3c320dcc5ec695529b2f68ebbf041268
parent	ef18737b7f920bc9e8f406cfb71730b9f07c13f5 (diff)
download	cpython-369cbd744ed06b3e01fe7a2e6a86ea4d85250231.zip cpython-369cbd744ed06b3e01fe7a2e6a86ea4d85250231.tar.gz cpython-369cbd744ed06b3e01fe7a2e6a86ea4d85250231.tar.bz2