#21047: set the default value for the *convert_charrefs* argument of HTMLParser to True. Patch by Berker Peksag.

author: Ezio Melotti <ezio.melotti@gmail.com> 2014-08-02 15:36:12 (GMT)
committer: Ezio Melotti <ezio.melotti@gmail.com> 2014-08-02 15:36:12 (GMT)
commit: 6fc16d81af0ec11e79a17e2af12e8653382cef26 (patch)
tree: 46c2ed00a7cb5efb78bafd818c3e2b9e04a66c01
parent: 11bec7a1b83d0edffbcff29dccbabd660f5a9a7b (diff)
download: cpython-6fc16d81af0ec11e79a17e2af12e8653382cef26.zip
cpython-6fc16d81af0ec11e79a17e2af12e8653382cef26.tar.gz
cpython-6fc16d81af0ec11e79a17e2af12e8653382cef26.tar.bz2
4 files changed, 12 insertions, 17 deletions
diff --git a/Doc/library/html.parser.rst b/Doc/library/html.parser.rst
index 67ae139..b84c60b 100644
--- a/Doc/library/html.parser.rst
+++ b/Doc/library/html.parser.rst
@@ -16,15 +16,13 @@
 This module defines a class :class:`HTMLParser` which serves as the basis for
 parsing text files formatted in HTML (HyperText Mark-up Language) and XHTML.
 
-.. class:: HTMLParser(*, convert_charrefs=False)
+.. class:: HTMLParser(*, convert_charrefs=True)
 
    Create a parser instance able to parse invalid markup.
 
-   If *convert_charrefs* is ``True`` (default: ``False``), all character
+   If *convert_charrefs* is ``True`` (the default), all character
    references (except the ones in ``script``/``style`` elements) are
    automatically converted to the corresponding Unicode characters.
-   The use of ``convert_charrefs=True`` is encouraged and will become
-   the default in Python 3.5.
 
    An :class:`.HTMLParser` instance is fed HTML data and calls handler methods
    when start tags, end tags, text, comments, and other markup elements are
@@ -37,6 +35,9 @@ parsing text files formatted in HTML (HyperText Mark-up Language) and XHTML.
    .. versionchanged:: 3.4
       *convert_charrefs* keyword argument added.
 
+   .. versionchanged:: 3.5
+      The default value for argument *convert_charrefs* is now ``True``.
+
 
 Example HTML Parser Application
 -------------------------------
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index 5a4f9e1..390d4cc 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -59,7 +59,6 @@ endendtag = re.compile('>')
 endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
 
 
-_default_sentinel = object()
 
 class HTMLParser(_markupbase.ParserBase):
     """Find tags and other markup and call handler functions.
@@ -85,17 +84,12 @@ class HTMLParser(_markupbase.ParserBase):
 
     CDATA_CONTENT_ELEMENTS = ("script", "style")
 
-    def __init__(self, *, convert_charrefs=_default_sentinel):
+    def __init__(self, *, convert_charrefs=True):
         """Initialize and reset this instance.
 
-        If convert_charrefs is True (default: False), all character references
+        If convert_charrefs is True (the default), all character references
         are automatically converted to the corresponding Unicode characters.
         """
-        if convert_charrefs is _default_sentinel:
-            convert_charrefs = False  # default
-            warnings.warn("The value of convert_charrefs will become True in "
-                          "3.5. You are encouraged to set the value explicitly.",
-                          DeprecationWarning, stacklevel=2)
         self.convert_charrefs = convert_charrefs
         self.reset()
 
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index 1aa150803..de8f3e8 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -346,7 +346,8 @@ text
         self._run_check(html, expected)
 
     def test_convert_charrefs(self):
-        collector = lambda: EventCollectorCharrefs(convert_charrefs=True)
+        # default value for convert_charrefs is now True
+        collector = lambda: EventCollectorCharrefs()
         self.assertTrue(collector().convert_charrefs)
         charrefs = ['&quot;', '&#34;', '&#x22;', '&quot', '&#34', '&#x22']
         # check charrefs in the middle of the text/attributes
@@ -383,10 +384,6 @@ text
         self._run_check('no charrefs here', [('data', 'no charrefs here')],
                         collector=collector())
 
-    def test_deprecation_warnings(self):
-        with self.assertWarns(DeprecationWarning):
-            EventCollector()  # convert_charrefs not passed explicitly
-
     # the remaining tests were for the "tolerant" parser (which is now
     # the default), and check various kind of broken markup
     def test_tolerant_parsing(self):
diff --git a/Misc/NEWS b/Misc/NEWS
index 5d1b331..864f894 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -121,6 +121,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #21047: set the default value for the *convert_charrefs* argument
+  of HTMLParser to True.  Patch by Berker Peksag.
+
 - Add an __all__ to html.entities.
 
 - Issue #15114: the strict mode and argument of HTMLParser, HTMLParser.error,
author	Ezio Melotti <ezio.melotti@gmail.com>	2014-08-02 15:36:12 (GMT)
committer	Ezio Melotti <ezio.melotti@gmail.com>	2014-08-02 15:36:12 (GMT)
commit	6fc16d81af0ec11e79a17e2af12e8653382cef26 (patch)
tree	46c2ed00a7cb5efb78bafd818c3e2b9e04a66c01
parent	11bec7a1b83d0edffbcff29dccbabd660f5a9a7b (diff)
download	cpython-6fc16d81af0ec11e79a17e2af12e8653382cef26.zip cpython-6fc16d81af0ec11e79a17e2af12e8653382cef26.tar.gz cpython-6fc16d81af0ec11e79a17e2af12e8653382cef26.tar.bz2