summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/_markupbase.py4
-rw-r--r--Lib/test/test_htmlparser.py43
2 files changed, 47 insertions, 0 deletions
diff --git a/Lib/_markupbase.py b/Lib/_markupbase.py
index 98b9037..2af5f1c 100644
--- a/Lib/_markupbase.py
+++ b/Lib/_markupbase.py
@@ -107,6 +107,10 @@ class ParserBase:
if decltype == "doctype":
self.handle_decl(data)
else:
+ # According to the HTML5 specs sections "8.2.4.44 Bogus
+ # comment state" and "8.2.4.45 Markup declaration open
+ # state", a comment token should be emitted.
+ # Calling unknown_decl provides more flexibility though.
self.unknown_decl(data)
return j + 1
if c in "\"'":
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index 87b5060..8c2e25e 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -323,6 +323,16 @@ DOCTYPE html [
("endtag", element_lower)],
collector=Collector())
+ def test_condcoms(self):
+ html = ('<!--[if IE & !(lte IE 8)]>aren\'t<![endif]-->'
+ '<!--[if IE 8]>condcoms<![endif]-->'
+ '<!--[if lte IE 7]>pretty?<![endif]-->')
+ expected = [('comment', "[if IE & !(lte IE 8)]>aren't<![endif]"),
+ ('comment', '[if IE 8]>condcoms<![endif]'),
+ ('comment', '[if lte IE 7]>pretty?<![endif]')]
+ self._run_check(html, expected)
+
+
class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
def get_collector(self):
@@ -416,6 +426,39 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
# see #12888
self.assertEqual(p.unescape('&#123; ' * 1050), '{ ' * 1050)
+ def test_broken_condcoms(self):
+ # these condcoms are missing the '--' after '<!' and before the '>'
+ html = ('<![if !(IE)]>broken condcom<![endif]>'
+ '<![if ! IE]><link href="favicon.tiff"/><![endif]>'
+ '<![if !IE 6]><img src="firefox.png" /><![endif]>'
+ '<![if !ie 6]><b>foo</b><![endif]>'
+ '<![if (!IE)|(lt IE 9)]><img src="mammoth.bmp" /><![endif]>')
+ # According to the HTML5 specs sections "8.2.4.44 Bogus comment state"
+ # and "8.2.4.45 Markup declaration open state", comment tokens should
+ # be emitted instead of 'unknown decl', but calling unknown_decl
+ # provides more flexibility.
+ # See also Lib/_markupbase.py:parse_declaration
+ expected = [
+ ('unknown decl', 'if !(IE)'),
+ ('data', 'broken condcom'),
+ ('unknown decl', 'endif'),
+ ('unknown decl', 'if ! IE'),
+ ('startendtag', 'link', [('href', 'favicon.tiff')]),
+ ('unknown decl', 'endif'),
+ ('unknown decl', 'if !IE 6'),
+ ('startendtag', 'img', [('src', 'firefox.png')]),
+ ('unknown decl', 'endif'),
+ ('unknown decl', 'if !ie 6'),
+ ('starttag', 'b', []),
+ ('data', 'foo'),
+ ('endtag', 'b'),
+ ('unknown decl', 'endif'),
+ ('unknown decl', 'if (!IE)|(lt IE 9)'),
+ ('startendtag', 'img', [('src', 'mammoth.bmp')]),
+ ('unknown decl', 'endif')
+ ]
+ self._run_check(html, expected)
+
class AttributesStrictTestCase(TestCaseBase):