summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_htmlparser.py
diff options
context:
space:
mode:
authorEzio Melotti <ezio.melotti@gmail.com>2012-02-10 08:45:44 (GMT)
committerEzio Melotti <ezio.melotti@gmail.com>2012-02-10 08:45:44 (GMT)
commitfa3702dc28fa8aef291785c560832c9af60305a8 (patch)
tree3f83918e3a492d96c097ea85df7fda26559ab43d /Lib/test/test_htmlparser.py
parent5b14d732d8790a6a19cc8aa410740575ff94c85a (diff)
downloadcpython-fa3702dc28fa8aef291785c560832c9af60305a8.zip
cpython-fa3702dc28fa8aef291785c560832c9af60305a8.tar.gz
cpython-fa3702dc28fa8aef291785c560832c9af60305a8.tar.bz2
#13960: HTMLParser is now able to handle broken comments when strict=False.
Diffstat (limited to 'Lib/test/test_htmlparser.py')
-rw-r--r--Lib/test/test_htmlparser.py30
1 files changed, 30 insertions, 0 deletions
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index 8c2e25e..7af9131 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -323,6 +323,23 @@ DOCTYPE html [
("endtag", element_lower)],
collector=Collector())
+ def test_comments(self):
+ html = ("<!-- I'm a valid comment -->"
+ '<!--me too!-->'
+ '<!------>'
+ '<!---->'
+ '<!----I have many hyphens---->'
+ '<!-- I have a > in the middle -->'
+ '<!-- and I have -- in the middle! -->')
+ expected = [('comment', " I'm a valid comment "),
+ ('comment', 'me too!'),
+ ('comment', '--'),
+ ('comment', ''),
+ ('comment', '--I have many hyphens--'),
+ ('comment', ' I have a > in the middle '),
+ ('comment', ' and I have -- in the middle! ')]
+ self._run_check(html, expected)
+
def test_condcoms(self):
html = ('<!--[if IE & !(lte IE 8)]>aren\'t<![endif]-->'
'<!--[if IE 8]>condcoms<![endif]-->'
@@ -426,6 +443,19 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
# see #12888
self.assertEqual(p.unescape('&#123; ' * 1050), '{ ' * 1050)
+ def test_broken_comments(self):
+ html = ('<! not really a comment >'
+ '<! not a comment either -->'
+ '<! -- close enough -->'
+ '<!!! another bogus comment !!!>')
+ expected = [
+ ('comment', ' not really a comment '),
+ ('comment', ' not a comment either --'),
+ ('comment', ' -- close enough --'),
+ ('comment', '!! another bogus comment !!!'),
+ ]
+ self._run_check(html, expected)
+
def test_broken_condcoms(self):
# these condcoms are missing the '--' after '<!' and before the '>'
html = ('<![if !(IE)]>broken condcom<![endif]>'