From 582d188e6e3487180891f1fc457a80dec8be26a8 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Mon, 24 Sep 2018 14:38:31 +0200 Subject: [3.6] bpo-17239: Disable external entities in SAX parser (GH-9217) (GH-9512) The SAX parser no longer processes general external entities by default to increase security. Before, the parser created network connections to fetch remote files or loaded local files from the file system for DTD and entities. Signed-off-by: Christian Heimes https://bugs.python.org/issue17239. (cherry picked from commit 17b1d5d4e36aa57a9b25a0e694affbd1ee637e45) Co-authored-by: Christian Heimes https://bugs.python.org/issue17239 --- Doc/library/xml.dom.pulldom.rst | 14 +++++ Doc/library/xml.rst | 6 ++- Doc/library/xml.sax.rst | 8 +++ Doc/whatsnew/3.6.rst | 18 ++++++- Lib/test/test_pulldom.py | 7 +++ Lib/test/test_sax.py | 60 +++++++++++++++++++++- Lib/test/test_xml_etree.py | 13 +++++ Lib/xml/sax/expatreader.py | 2 +- .../2018-09-11-18-30-55.bpo-17239.kOpwK2.rst | 3 ++ 9 files changed, 125 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2018-09-11-18-30-55.bpo-17239.kOpwK2.rst diff --git a/Doc/library/xml.dom.pulldom.rst b/Doc/library/xml.dom.pulldom.rst index 5c0f469..2504409 100644 --- a/Doc/library/xml.dom.pulldom.rst +++ b/Doc/library/xml.dom.pulldom.rst @@ -25,6 +25,20 @@ events until either processing is finished or an error condition occurs. maliciously constructed data. If you need to parse untrusted or unauthenticated data see :ref:`xml-vulnerabilities`. +.. versionchanged:: 3.6.7 + + The SAX parser no longer processes general external entities by default to + increase security by default. To enable processing of external entities, + pass a custom parser instance in:: + + from xml.dom.pulldom import parse + from xml.sax import make_parser + from xml.sax.handler import feature_external_ges + + parser = make_parser() + parser.setFeature(feature_external_ges, True) + parse(filename, parser=parser) + Example:: diff --git a/Doc/library/xml.rst b/Doc/library/xml.rst index 63c24f8..9b8ba6b 100644 --- a/Doc/library/xml.rst +++ b/Doc/library/xml.rst @@ -65,8 +65,8 @@ kind sax etree minidom p ========================= ============== =============== ============== ============== ============== billion laughs **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** quadratic blowup **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** -external entity expansion **Vulnerable** Safe (1) Safe (2) **Vulnerable** Safe (3) -`DTD`_ retrieval **Vulnerable** Safe Safe **Vulnerable** Safe +external entity expansion Safe (4) Safe (1) Safe (2) Safe (4) Safe (3) +`DTD`_ retrieval Safe (4) Safe Safe Safe (4) Safe decompression bomb Safe Safe Safe Safe **Vulnerable** ========================= ============== =============== ============== ============== ============== @@ -75,6 +75,8 @@ decompression bomb Safe Safe Safe S 2. :mod:`xml.dom.minidom` doesn't expand external entities and simply returns the unexpanded entity verbatim. 3. :mod:`xmlrpclib` doesn't expand external entities and omits them. +4. Since Python 3.8.0, external general entities are no longer processed by + default since Python. billion laughs / exponential entity expansion diff --git a/Doc/library/xml.sax.rst b/Doc/library/xml.sax.rst index 78d6633..1a8f183 100644 --- a/Doc/library/xml.sax.rst +++ b/Doc/library/xml.sax.rst @@ -24,6 +24,14 @@ the SAX API. constructed data. If you need to parse untrusted or unauthenticated data see :ref:`xml-vulnerabilities`. +.. versionchanged:: 3.6.7 + + The SAX parser no longer processes general external entities by default + to increase security. Before, the parser created network connections + to fetch remote files or loaded local files from the file + system for DTD and entities. The feature can be enabled again with method + :meth:`~xml.sax.xmlreader.XMLReader.setFeature` on the parser object + and argument :data:`~xml.sax.handler.feature_external_ges`. The convenience functions are: diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst index d375cff..08d2d24 100644 --- a/Doc/whatsnew/3.6.rst +++ b/Doc/whatsnew/3.6.rst @@ -2055,6 +2055,15 @@ connected to and thus what Python interpreter will be used by the virtual environment. (Contributed by Brett Cannon in :issue:`25154`.) +xml +--- + +* As mitigation against DTD and external entity retrieval, the + :mod:`xml.dom.minidom` and mod:`xml.sax` modules no longer process + external entities by default. + (Contributed by Christian Heimes in :issue:`17239`.) + + Deprecated functions and types of the C API ------------------------------------------- @@ -2163,7 +2172,7 @@ Changes in the Python API * The functions in the :mod:`compileall` module now return booleans instead of ``1`` or ``0`` to represent success or failure, respectively. Thanks to - booleans being a subclass of integers, this should only be an issue if you + booleans being a subclass of integers, this should only be an issue if you7 were doing identity checks for ``1`` or ``0``. See :issue:`25768`. * Reading the :attr:`~urllib.parse.SplitResult.port` attribute of @@ -2408,3 +2417,10 @@ Notable changes in Python 3.6.5 The :func:`locale.localeconv` function now sets temporarily the ``LC_CTYPE`` locale to the ``LC_NUMERIC`` locale in some cases. (Contributed by Victor Stinner in :issue:`31900`.) + + +Notable changes in Python 3.6.7 +=============================== + +:mod:`xml.dom.minidom` and mod:`xml.sax` modules no longer process +external entities by default. See also :issue:`17239`. diff --git a/Lib/test/test_pulldom.py b/Lib/test/test_pulldom.py index 3d89e3a..6dc51e4 100644 --- a/Lib/test/test_pulldom.py +++ b/Lib/test/test_pulldom.py @@ -3,6 +3,7 @@ import unittest import xml.sax from xml.sax.xmlreader import AttributesImpl +from xml.sax.handler import feature_external_ges from xml.dom import pulldom from test.support import findfile @@ -159,6 +160,12 @@ class PullDOMTestCase(unittest.TestCase): self.fail( "Ran out of events, but should have received END_DOCUMENT") + def test_external_ges_default(self): + parser = pulldom.parseString(SMALL_SAMPLE) + saxparser = parser.parser + ges = saxparser.getFeature(feature_external_ges) + self.assertEqual(ges, False) + class ThoroughTestCase(unittest.TestCase): """Test the hard-to-reach parts of pulldom.""" diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 2eb6290..3044960 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -13,13 +13,14 @@ except SAXReaderNotAvailable: from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ XMLFilterBase, prepare_input_source from xml.sax.expatreader import create_parser -from xml.sax.handler import feature_namespaces +from xml.sax.handler import feature_namespaces, feature_external_ges from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl from io import BytesIO, StringIO import codecs import gc import os.path import shutil +from urllib.error import URLError from test import support from test.support import findfile, run_unittest, TESTFN @@ -911,6 +912,18 @@ class ExpatReaderTest(XmlTestBase): def unparsedEntityDecl(self, name, publicId, systemId, ndata): self._entities.append((name, publicId, systemId, ndata)) + + class TestEntityRecorder: + def __init__(self): + self.entities = [] + + def resolveEntity(self, publicId, systemId): + self.entities.append((publicId, systemId)) + source = InputSource() + source.setPublicId(publicId) + source.setSystemId(systemId) + return source + def test_expat_dtdhandler(self): parser = create_parser() handler = self.TestDTDHandler() @@ -927,6 +940,32 @@ class ExpatReaderTest(XmlTestBase): [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)]) self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")]) + def test_expat_external_dtd_enabled(self): + parser = create_parser() + parser.setFeature(feature_external_ges, True) + resolver = self.TestEntityRecorder() + parser.setEntityResolver(resolver) + + with self.assertRaises(URLError): + parser.feed( + '\n' + ) + self.assertEqual( + resolver.entities, [(None, 'unsupported://non-existing')] + ) + + def test_expat_external_dtd_default(self): + parser = create_parser() + resolver = self.TestEntityRecorder() + parser.setEntityResolver(resolver) + + parser.feed( + '\n' + ) + parser.feed('') + parser.close() + self.assertEqual(resolver.entities, []) + # ===== EntityResolver support class TestEntityResolver: @@ -936,8 +975,9 @@ class ExpatReaderTest(XmlTestBase): inpsrc.setByteStream(BytesIO(b"")) return inpsrc - def test_expat_entityresolver(self): + def test_expat_entityresolver_enabled(self): parser = create_parser() + parser.setFeature(feature_external_ges, True) parser.setEntityResolver(self.TestEntityResolver()) result = BytesIO() parser.setContentHandler(XMLGenerator(result)) @@ -951,6 +991,22 @@ class ExpatReaderTest(XmlTestBase): self.assertEqual(result.getvalue(), start + b"") + def test_expat_entityresolver_default(self): + parser = create_parser() + self.assertEqual(parser.getFeature(feature_external_ges), False) + parser.setEntityResolver(self.TestEntityResolver()) + result = BytesIO() + parser.setContentHandler(XMLGenerator(result)) + + parser.feed('\n') + parser.feed(']>\n') + parser.feed('&test;') + parser.close() + + self.assertEqual(result.getvalue(), start + + b"") + # ===== Attributes support class AttrGatherer(ContentHandler): diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index c14f839..2b8c5e5 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -90,6 +90,12 @@ ENTITY_XML = """\ &entity; """ +EXTERNAL_ENTITY_XML = """\ + +]> +&entity; +""" class ModuleTest(unittest.TestCase): def test_sanity(self): @@ -846,6 +852,13 @@ class ElementTreeTest(unittest.TestCase): root = parser.close() self.serialize_check(root, 'text') + # 4) external (SYSTEM) entity + + with self.assertRaises(ET.ParseError) as cm: + ET.XML(EXTERNAL_ENTITY_XML) + self.assertEqual(str(cm.exception), + 'undefined entity &entity;: line 4, column 10') + def test_namespace(self): # Test namespace issues. diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py index 421358f..5066ffc 100644 --- a/Lib/xml/sax/expatreader.py +++ b/Lib/xml/sax/expatreader.py @@ -95,7 +95,7 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): self._lex_handler_prop = None self._parsing = 0 self._entity_stack = [] - self._external_ges = 1 + self._external_ges = 0 self._interning = None # XMLReader methods diff --git a/Misc/NEWS.d/next/Security/2018-09-11-18-30-55.bpo-17239.kOpwK2.rst b/Misc/NEWS.d/next/Security/2018-09-11-18-30-55.bpo-17239.kOpwK2.rst new file mode 100644 index 0000000..8dd0fe8 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2018-09-11-18-30-55.bpo-17239.kOpwK2.rst @@ -0,0 +1,3 @@ +The xml.sax and xml.dom.minidom parsers no longer processes external +entities by default. External DTD and ENTITY declarations no longer +load files or create network connections. -- cgit v0.12