diff options
author | Ezio Melotti <ezio.melotti@gmail.com> | 2012-06-23 13:27:51 (GMT) |
---|---|---|
committer | Ezio Melotti <ezio.melotti@gmail.com> | 2012-06-23 13:27:51 (GMT) |
commit | 3861d8b27127a261391ee49ff8634a4ef3ba1dd3 (patch) | |
tree | c76b34f4269c742f50be485890703200262def43 /Lib | |
parent | a4db02c7a38c5669b5678f1e972d8b9c6d3a2238 (diff) | |
download | cpython-3861d8b27127a261391ee49ff8634a4ef3ba1dd3.zip cpython-3861d8b27127a261391ee49ff8634a4ef3ba1dd3.tar.gz cpython-3861d8b27127a261391ee49ff8634a4ef3ba1dd3.tar.bz2 |
#15114: the strict mode of HTMLParser and the HTMLParseError exception are deprecated now that the parser is able to parse invalid markup.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/html/parser.py | 21 | ||||
-rw-r--r-- | Lib/test/test_htmlparser.py | 6 |
2 files changed, 16 insertions, 11 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py index de504ab..494cf24 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -10,6 +10,7 @@ import _markupbase import re +import warnings # Regular expressions used for parsing @@ -113,14 +114,16 @@ class HTMLParser(_markupbase.ParserBase): CDATA_CONTENT_ELEMENTS = ("script", "style") - def __init__(self, strict=True): + def __init__(self, strict=False): """Initialize and reset this instance. - If strict is set to True (the default), errors are raised when invalid - HTML is encountered. If set to False, an attempt is instead made to - continue parsing, making "best guesses" about the intended meaning, in - a fashion similar to what browsers typically do. + If strict is set to False (the default) the parser will parse invalid + markup, otherwise it will raise an error. Note that the strict mode + is deprecated. """ + if strict: + warnings.warn("The strict mode is deprecated.", + DeprecationWarning, stacklevel=2) self.strict = strict self.reset() @@ -271,8 +274,8 @@ class HTMLParser(_markupbase.ParserBase): # See also parse_declaration in _markupbase def parse_html_declaration(self, i): rawdata = self.rawdata - if rawdata[i:i+2] != '<!': - self.error('unexpected call to parse_html_declaration()') + assert rawdata[i:i+2] == '<!', ('unexpected call to ' + 'parse_html_declaration()') if rawdata[i:i+4] == '<!--': # this case is actually already handled in goahead() return self.parse_comment(i) @@ -292,8 +295,8 @@ class HTMLParser(_markupbase.ParserBase): # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state def parse_bogus_comment(self, i, report=1): rawdata = self.rawdata - if rawdata[i:i+2] not in ('<!', '</'): - self.error('unexpected call to parse_comment()') + assert rawdata[i:i+2] in ('<!', '</'), ('unexpected call to ' + 'parse_comment()') pos = rawdata.find('>', i+2) if pos == -1: return -1 diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index c4f80cc..64a4f5d 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -102,7 +102,8 @@ class TestCaseBase(unittest.TestCase): class HTMLParserStrictTestCase(TestCaseBase): def get_collector(self): - return EventCollector(strict=True) + with support.check_warnings(("", DeprecationWarning), quite=False): + return EventCollector(strict=True) def test_processing_instruction_only(self): self._run_check("<?processing instruction>", [ @@ -594,7 +595,8 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase): class AttributesStrictTestCase(TestCaseBase): def get_collector(self): - return EventCollector(strict=True) + with support.check_warnings(("", DeprecationWarning), quite=False): + return EventCollector(strict=True) def test_attr_syntax(self): output = [ |