diff options
-rw-r--r-- | Lib/_markupbase.py | 4 | ||||
-rw-r--r-- | Lib/test/test_htmlparser.py | 43 |
2 files changed, 47 insertions, 0 deletions
diff --git a/Lib/_markupbase.py b/Lib/_markupbase.py index 98b9037..2af5f1c 100644 --- a/Lib/_markupbase.py +++ b/Lib/_markupbase.py @@ -107,6 +107,10 @@ class ParserBase: if decltype == "doctype": self.handle_decl(data) else: + # According to the HTML5 specs sections "8.2.4.44 Bogus + # comment state" and "8.2.4.45 Markup declaration open + # state", a comment token should be emitted. + # Calling unknown_decl provides more flexibility though. self.unknown_decl(data) return j + 1 if c in "\"'": diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 87b5060..8c2e25e 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -323,6 +323,16 @@ DOCTYPE html [ ("endtag", element_lower)], collector=Collector()) + def test_condcoms(self): + html = ('<!--[if IE & !(lte IE 8)]>aren\'t<![endif]-->' + '<!--[if IE 8]>condcoms<![endif]-->' + '<!--[if lte IE 7]>pretty?<![endif]-->') + expected = [('comment', "[if IE & !(lte IE 8)]>aren't<![endif]"), + ('comment', '[if IE 8]>condcoms<![endif]'), + ('comment', '[if lte IE 7]>pretty?<![endif]')] + self._run_check(html, expected) + + class HTMLParserTolerantTestCase(HTMLParserStrictTestCase): def get_collector(self): @@ -416,6 +426,39 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase): # see #12888 self.assertEqual(p.unescape('{ ' * 1050), '{ ' * 1050) + def test_broken_condcoms(self): + # these condcoms are missing the '--' after '<!' and before the '>' + html = ('<![if !(IE)]>broken condcom<![endif]>' + '<![if ! IE]><link href="favicon.tiff"/><![endif]>' + '<![if !IE 6]><img src="firefox.png" /><![endif]>' + '<![if !ie 6]><b>foo</b><![endif]>' + '<![if (!IE)|(lt IE 9)]><img src="mammoth.bmp" /><![endif]>') + # According to the HTML5 specs sections "8.2.4.44 Bogus comment state" + # and "8.2.4.45 Markup declaration open state", comment tokens should + # be emitted instead of 'unknown decl', but calling unknown_decl + # provides more flexibility. + # See also Lib/_markupbase.py:parse_declaration + expected = [ + ('unknown decl', 'if !(IE)'), + ('data', 'broken condcom'), + ('unknown decl', 'endif'), + ('unknown decl', 'if ! IE'), + ('startendtag', 'link', [('href', 'favicon.tiff')]), + ('unknown decl', 'endif'), + ('unknown decl', 'if !IE 6'), + ('startendtag', 'img', [('src', 'firefox.png')]), + ('unknown decl', 'endif'), + ('unknown decl', 'if !ie 6'), + ('starttag', 'b', []), + ('data', 'foo'), + ('endtag', 'b'), + ('unknown decl', 'endif'), + ('unknown decl', 'if (!IE)|(lt IE 9)'), + ('startendtag', 'img', [('src', 'mammoth.bmp')]), + ('unknown decl', 'endif') + ] + self._run_check(html, expected) + class AttributesStrictTestCase(TestCaseBase): |