#15114: the strict mode of HTMLParser and the HTMLParseError exception are deprecated now that the parser is able to parse invalid markup.

author: Ezio Melotti <ezio.melotti@gmail.com> 2012-06-23 13:27:51 (GMT)
committer: Ezio Melotti <ezio.melotti@gmail.com> 2012-06-23 13:27:51 (GMT)
commit: 3861d8b27127a261391ee49ff8634a4ef3ba1dd3 (patch)
tree: c76b34f4269c742f50be485890703200262def43 /Lib
parent: a4db02c7a38c5669b5678f1e972d8b9c6d3a2238 (diff)
download: cpython-3861d8b27127a261391ee49ff8634a4ef3ba1dd3.zip
cpython-3861d8b27127a261391ee49ff8634a4ef3ba1dd3.tar.gz
cpython-3861d8b27127a261391ee49ff8634a4ef3ba1dd3.tar.bz2
2 files changed, 16 insertions, 11 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index de504ab..494cf24 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -10,6 +10,7 @@
 
 import _markupbase
 import re
+import warnings
 
 # Regular expressions used for parsing
 
@@ -113,14 +114,16 @@ class HTMLParser(_markupbase.ParserBase):
 
     CDATA_CONTENT_ELEMENTS = ("script", "style")
 
-    def __init__(self, strict=True):
+    def __init__(self, strict=False):
         """Initialize and reset this instance.
 
-        If strict is set to True (the default), errors are raised when invalid
-        HTML is encountered.  If set to False, an attempt is instead made to
-        continue parsing, making "best guesses" about the intended meaning, in
-        a fashion similar to what browsers typically do.
+        If strict is set to False (the default) the parser will parse invalid
+        markup, otherwise it will raise an error.  Note that the strict mode
+        is deprecated.
         """
+        if strict:
+            warnings.warn("The strict mode is deprecated.",
+                          DeprecationWarning, stacklevel=2)
         self.strict = strict
         self.reset()
 
@@ -271,8 +274,8 @@ class HTMLParser(_markupbase.ParserBase):
     # See also parse_declaration in _markupbase
     def parse_html_declaration(self, i):
         rawdata = self.rawdata
-        if rawdata[i:i+2] != '<!':
-            self.error('unexpected call to parse_html_declaration()')
+        assert rawdata[i:i+2] == '<!', ('unexpected call to '
+                                        'parse_html_declaration()')
         if rawdata[i:i+4] == '<!--':
             # this case is actually already handled in goahead()
             return self.parse_comment(i)
@@ -292,8 +295,8 @@ class HTMLParser(_markupbase.ParserBase):
     # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
     def parse_bogus_comment(self, i, report=1):
         rawdata = self.rawdata
-        if rawdata[i:i+2] not in ('<!', '</'):
-            self.error('unexpected call to parse_comment()')
+        assert rawdata[i:i+2] in ('<!', '</'), ('unexpected call to '
+                                                'parse_comment()')
         pos = rawdata.find('>', i+2)
         if pos == -1:
             return -1
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index c4f80cc..64a4f5d 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -102,7 +102,8 @@ class TestCaseBase(unittest.TestCase):
 class HTMLParserStrictTestCase(TestCaseBase):
 
     def get_collector(self):
-        return EventCollector(strict=True)
+        with support.check_warnings(("", DeprecationWarning), quite=False):
+            return EventCollector(strict=True)
 
     def test_processing_instruction_only(self):
         self._run_check("<?processing instruction>", [
@@ -594,7 +595,8 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
 class AttributesStrictTestCase(TestCaseBase):
 
     def get_collector(self):
-        return EventCollector(strict=True)
+        with support.check_warnings(("", DeprecationWarning), quite=False):
+            return EventCollector(strict=True)
 
     def test_attr_syntax(self):
         output = [
author	Ezio Melotti <ezio.melotti@gmail.com>	2012-06-23 13:27:51 (GMT)
committer	Ezio Melotti <ezio.melotti@gmail.com>	2012-06-23 13:27:51 (GMT)
commit	3861d8b27127a261391ee49ff8634a4ef3ba1dd3 (patch)
tree	c76b34f4269c742f50be485890703200262def43 /Lib
parent	a4db02c7a38c5669b5678f1e972d8b9c6d3a2238 (diff)
download	cpython-3861d8b27127a261391ee49ff8634a4ef3ba1dd3.zip cpython-3861d8b27127a261391ee49ff8634a4ef3ba1dd3.tar.gz cpython-3861d8b27127a261391ee49ff8634a4ef3ba1dd3.tar.bz2