From 7380a67267d9ec59b70617ea59ff31819f530942 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Tue, 26 Mar 2013 17:35:55 +0100 Subject: Issue 17538: Document XML vulnerabilties --- Doc/library/markup.rst | 1 + Doc/library/pyexpat.rst | 7 ++ Doc/library/xml.dom.minidom.rst | 8 +++ Doc/library/xml.dom.pulldom.rst | 8 +++ Doc/library/xml.etree.elementtree.rst | 8 +++ Doc/library/xml.rst | 131 ++++++++++++++++++++++++++++++++++ Doc/library/xml.sax.rst | 8 +++ Doc/library/xmlrpc.client.rst | 7 ++ Doc/library/xmlrpc.server.rst | 7 ++ Misc/NEWS | 2 + 10 files changed, 187 insertions(+) create mode 100644 Doc/library/xml.rst diff --git a/Doc/library/markup.rst b/Doc/library/markup.rst index 1b4cca5..ed24ba2 100644 --- a/Doc/library/markup.rst +++ b/Doc/library/markup.rst @@ -23,6 +23,7 @@ definition of the Python bindings for the DOM and SAX interfaces. html.rst html.parser.rst html.entities.rst + xml.rst xml.etree.elementtree.rst xml.dom.rst xml.dom.minidom.rst diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index 861546c..420e407 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -14,6 +14,13 @@ references to these attributes should be marked using the :member: role. +.. warning:: + + The :mod:`pyexpat` module is not secure against maliciously + constructed data. If you need to parse untrusted or unauthenticated data see + :ref:`xml-vulnerabilities`. + + .. index:: single: Expat The :mod:`xml.parsers.expat` module is a Python interface to the Expat diff --git a/Doc/library/xml.dom.minidom.rst b/Doc/library/xml.dom.minidom.rst index b512d7e..30182e4 100644 --- a/Doc/library/xml.dom.minidom.rst +++ b/Doc/library/xml.dom.minidom.rst @@ -17,6 +17,14 @@ to be simpler than the full DOM and also significantly smaller. Users who are not already proficient with the DOM should consider using the :mod:`xml.etree.ElementTree` module for their XML processing instead + +.. warning:: + + The :mod:`xml.dom.minidom` module is not secure against + maliciously constructed data. If you need to parse untrusted or + unauthenticated data see :ref:`xml-vulnerabilities`. + + DOM applications typically start by parsing some XML into a DOM. With :mod:`xml.dom.minidom`, this is done through the parse functions:: diff --git a/Doc/library/xml.dom.pulldom.rst b/Doc/library/xml.dom.pulldom.rst index eb16a09..8aa9cfb 100644 --- a/Doc/library/xml.dom.pulldom.rst +++ b/Doc/library/xml.dom.pulldom.rst @@ -17,6 +17,14 @@ processing model together with callbacks, the user of a pull parser is responsible for explicitly pulling events from the stream, looping over those events until either processing is finished or an error condition occurs. + +.. warning:: + + The :mod:`xml.dom.pulldom` module is not secure against + maliciously constructed data. If you need to parse untrusted or + unauthenticated data see :ref:`xml-vulnerabilities`. + + Example:: from xml.dom import pulldom diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index cf0c33f..dc9ebb9 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -13,6 +13,14 @@ The :class:`Element` type is a flexible container object, designed to store hierarchical data structures in memory. The type can be described as a cross between a list and a dictionary. + +.. warning:: + + The :mod:`xml.etree.ElementTree` module is not secure against + maliciously constructed data. If you need to parse untrusted or + unauthenticated data see :ref:`xml-vulnerabilities`. + + Each element has a number of properties associated with it: * a tag which is a string identifying what kind of data this element represents diff --git a/Doc/library/xml.rst b/Doc/library/xml.rst new file mode 100644 index 0000000..f84af58 --- /dev/null +++ b/Doc/library/xml.rst @@ -0,0 +1,131 @@ +.. _xml: + +XML Processing Modules +====================== + +.. module:: xml + :synopsis: Package containing XML processing modules +.. sectionauthor:: Christian Heimes +.. sectionauthor:: Georg Brandl + + +Python's interfaces for processing XML are grouped in the ``xml`` package. + +.. warning:: + + The XML modules are not secure against erroneous or maliciously + constructed data. If you need to parse untrusted or unauthenticated data see + :ref:`xml-vulnerabilities`. + +It is important to note that modules in the :mod:`xml` package require that +there be at least one SAX-compliant XML parser available. The Expat parser is +included with Python, so the :mod:`xml.parsers.expat` module will always be +available. + +The documentation for the :mod:`xml.dom` and :mod:`xml.sax` packages are the +definition of the Python bindings for the DOM and SAX interfaces. + +The XML handling submodules are: + +* :mod:`xml.etree.ElementTree`: the ElementTree API, a simple and lightweight + +.. + +* :mod:`xml.dom`: the DOM API definition +* :mod:`xml.dom.minidom`: a lightweight DOM implementation +* :mod:`xml.dom.pulldom`: support for building partial DOM trees + +.. + +* :mod:`xml.sax`: SAX2 base classes and convenience functions +* :mod:`xml.parsers.expat`: the Expat parser binding + + +.. _xml-vulnerabilities: + +XML vulnerabilities +=================== + +The XML processing modules are not secure against maliciously constructed data. +An attacker can abuse vulnerabilities for e.g. denial of service attacks, to +access local files, to generate network connections to other machines, or +to or circumvent firewalls. The attacks on XML abuse unfamiliar features +like inline `DTD`_ (document type definition) with entities. + + +========================= ======== ========= ========= ======== ========= +kind sax etree minidom pulldom xmlrpc +========================= ======== ========= ========= ======== ========= +billion laughs **True** **True** **True** **True** **True** +quadratic blowup **True** **True** **True** **True** **True** +external entity expansion **True** False (1) False (2) **True** False (3) +DTD retrieval **True** False False **True** False +decompression bomb False False False False **True** +========================= ======== ========= ========= ======== ========= + +1. :mod:`xml.etree.ElementTree` doesn't expand external entities and raises a + ParserError when an entity occurs. +2. :mod:`xml.dom.minidom` doesn't expand external entities and simply returns + the unexpanded entity verbatim. +3. :mod:`xmlrpclib` doesn't expand external entities and omits them. + + +billion laughs / exponential entity expansion + The `Billion Laughs`_ attack -- also known as exponential entity expansion -- + uses multiple levels of nested entities. Each entity refers to another entity + several times, the final entity definition contains a small string. Eventually + the small string is expanded to several gigabytes. The exponential expansion + consumes lots of CPU time, too. + +quadratic blowup entity expansion + A quadratic blowup attack is similar to a `Billion Laughs`_ attack; it abuses + entity expansion, too. Instead of nested entities it repeats one large entity + with a couple of thousand chars over and over again. The attack isn't as + efficient as the exponential case but it avoids triggering countermeasures of + parsers against heavily nested entities. + +external entity expansion + Entity declarations can contain more than just text for replacement. They can + also point to external resources by public identifiers or system identifiers. + System identifiers are standard URIs or can refer to local files. The XML + parser retrieves the resource with e.g. HTTP or FTP requests and embeds the + content into the XML document. + +DTD retrieval + Some XML libraries like Python's mod:'xml.dom.pulldom' retrieve document type + definitions from remote or local locations. The feature has similar + implications as the external entity expansion issue. + +decompression bomb + The issue of decompression bombs (aka `ZIP bomb`_) apply to all XML libraries + that can parse compressed XML stream like gzipped HTTP streams or LZMA-ed + files. For an attacker it can reduce the amount of transmitted data by three + magnitudes or more. + +The documentation of `defusedxml`_ on PyPI has further information about +all known attack vectors with examples and references. + +defused packages +---------------- + +`defusedxml`_ is a pure Python package with modified subclasses of all stdlib +XML parsers that prevent any potentially malicious operation. The courses of +action are recommended for any server code that parses untrusted XML data. The +package also ships with example exploits and an extended documentation on more +XML exploits like xpath injection. + +`defusedexpat`_ provides a modified libexpat and patched replacment +:mod:`pyexpat` extension module with countermeasures against entity expansion +DoS attacks. Defusedexpat still allows a sane and configurable amount of entity +expansions. The modifications will be merged into future releases of Python. + +The workarounds and modifications are not included in patch releases as they +break backward compatibility. After all inline DTD and entity expansion are +well-definied XML features. + + +.. _defusedxml: +.. _defusedexpat: +.. _Billion Laughs: http://en.wikipedia.org/wiki/Billion_laughs +.. _ZIP bomb: http://en.wikipedia.org/wiki/Zip_bomb +.. _DTD: http://en.wikipedia.org/wiki/Document_Type_Definition diff --git a/Doc/library/xml.sax.rst b/Doc/library/xml.sax.rst index 1bf55b4..d5c56b6 100644 --- a/Doc/library/xml.sax.rst +++ b/Doc/library/xml.sax.rst @@ -13,6 +13,14 @@ Simple API for XML (SAX) interface for Python. The package itself provides the SAX exceptions and the convenience functions which will be most used by users of the SAX API. + +.. warning:: + + The :mod:`xml.sax` module is not secure against maliciously + constructed data. If you need to parse untrusted or unauthenticated data see + :ref:`xml-vulnerabilities`. + + The convenience functions are: diff --git a/Doc/library/xmlrpc.client.rst b/Doc/library/xmlrpc.client.rst index e72770a..4592fb4 100644 --- a/Doc/library/xmlrpc.client.rst +++ b/Doc/library/xmlrpc.client.rst @@ -21,6 +21,13 @@ supports writing XML-RPC client code; it handles all the details of translating between conformable Python objects and XML on the wire. +.. warning:: + + The :mod:`xmlrpc.client` module is not secure against maliciously + constructed data. If you need to parse untrusted or unauthenticated data see + :ref:`xml-vulnerabilities`. + + .. class:: ServerProxy(uri, transport=None, encoding=None, verbose=False, allow_none=False, use_datetime=False) A :class:`ServerProxy` instance is an object that manages communication with a diff --git a/Doc/library/xmlrpc.server.rst b/Doc/library/xmlrpc.server.rst index 67feba6..6b4c202 100644 --- a/Doc/library/xmlrpc.server.rst +++ b/Doc/library/xmlrpc.server.rst @@ -16,6 +16,13 @@ servers written in Python. Servers can either be free standing, using :class:`CGIXMLRPCRequestHandler`. +.. warning:: + + The :mod:`xmlrpc.client` module is not secure against maliciously + constructed data. If you need to parse untrusted or unauthenticated data see + :ref:`xml-vulnerabilities`. + + .. class:: SimpleXMLRPCServer(addr, requestHandler=SimpleXMLRPCRequestHandler, logRequests=True, allow_none=False, encoding=None, bind_and_activate=True) Create a new server instance. This class provides methods for registration of diff --git a/Misc/NEWS b/Misc/NEWS index dbc12e7..5bdca7f 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -1156,6 +1156,8 @@ Tools/Demos Documentation ------------- +- Issue 17538: Document XML vulnerabilties + - Issue #17047: remove doubled words in docs and docstrings reported by Serhiy Storchaka and Matthew Barnett. -- cgit v0.12 From 265281ac02d4ab82a8acdd38a1c38c4ecd9ce60d Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Wed, 27 Mar 2013 20:11:55 +0200 Subject: #17329: document unittest.SkipTest. Initial patch by Zachary Ware. --- Doc/library/unittest.rst | 13 ++++++++++--- Lib/unittest/case.py | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst index 53013d7..b3372f9 100644 --- a/Doc/library/unittest.rst +++ b/Doc/library/unittest.rst @@ -663,7 +663,7 @@ the test unless the passed object has a certain attribute:: def skipUnlessHasattr(obj, attr): if hasattr(obj, attr): return lambda func: func - return unittest.skip("{0!r} doesn't have {1!r}".format(obj, attr)) + return unittest.skip("{!r} doesn't have {!r}".format(obj, attr)) The following decorators implement test skipping and expected failures: @@ -685,6 +685,13 @@ The following decorators implement test skipping and expected failures: Mark the test as an expected failure. If the test fails when run, the test is not counted as a failure. +.. exception:: SkipTest(reason) + + This exception is raised to skip a test. + + Usually you can use :meth:`TestCase.skipTest` or one of the skipping + decorators instead of raising this directly. + Skipped tests will not have :meth:`setUp` or :meth:`tearDown` run around them. Skipped classes will not have :meth:`setUpClass` or :meth:`tearDownClass` run. @@ -2105,7 +2112,7 @@ then you must call up to them yourself. The implementations in If an exception is raised during a ``setUpClass`` then the tests in the class are not run and the ``tearDownClass`` is not run. Skipped classes will not have ``setUpClass`` or ``tearDownClass`` run. If the exception is a -``SkipTest`` exception then the class will be reported as having been skipped +:exc:`SkipTest` exception then the class will be reported as having been skipped instead of as an error. @@ -2122,7 +2129,7 @@ These should be implemented as functions:: If an exception is raised in a ``setUpModule`` then none of the tests in the module will be run and the ``tearDownModule`` will not be run. If the exception is a -``SkipTest`` exception then the module will be reported as having been skipped +:exc:`SkipTest` exception then the module will be reported as having been skipped instead of as an error. diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index bea8107..0ef3de7 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -23,7 +23,7 @@ class SkipTest(Exception): """ Raise this exception in a test to skip it. - Usually you can use TestResult.skip() or one of the skipping decorators + Usually you can use TestCase.skipTest() or one of the skipping decorators instead of raising this directly. """ -- cgit v0.12 From fd4365a4d84d064046669267123d4858926f88fa Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Thu, 28 Mar 2013 04:31:53 +0200 Subject: Update suspicious ignore file. --- Doc/faq/library.rst | 2 +- Doc/howto/logging-cookbook.rst | 2 +- Doc/library/sys.rst | 2 +- Doc/tools/sphinxext/susp-ignored.csv | 17 ++++++++--------- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/Doc/faq/library.rst b/Doc/faq/library.rst index 7385c59..cab2d7b 100644 --- a/Doc/faq/library.rst +++ b/Doc/faq/library.rst @@ -351,7 +351,7 @@ When run, this will produce the following output: Worker running with argument 5 ... -Consult the module's documentation for more details; the :class:`~queue.Queue`` +Consult the module's documentation for more details; the :class:`~queue.Queue` class provides a featureful interface. diff --git a/Doc/howto/logging-cookbook.rst b/Doc/howto/logging-cookbook.rst index c361938..970ebec 100644 --- a/Doc/howto/logging-cookbook.rst +++ b/Doc/howto/logging-cookbook.rst @@ -1572,7 +1572,7 @@ UTF-8, then you need to do the following: 'ASCII section\ufeffUnicode section' - The Unicode code point ``'\feff```, when encoded using UTF-8, will be + The Unicode code point ``'\ufeff'``, when encoded using UTF-8, will be encoded as a UTF-8 BOM -- the byte-string ``b'\xef\xbb\xbf'``. #. Replace the ASCII section with whatever placeholders you like, but make sure diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index a3c14e6..1166b64 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -764,7 +764,7 @@ always available. independent Python files are installed; by default, this is the string ``'/usr/local'``. This can be set at build time with the ``--prefix`` argument to the :program:`configure` script. The main collection of Python - library modules is installed in the directory :file:`{prefix}/lib/python{X.Y}`` + library modules is installed in the directory :file:`{prefix}/lib/python{X.Y}` while the platform independent header files (all except :file:`pyconfig.h`) are stored in :file:`{prefix}/include/python{X.Y}`, where *X.Y* is the version number of Python, for example ``3.2``. diff --git a/Doc/tools/sphinxext/susp-ignored.csv b/Doc/tools/sphinxext/susp-ignored.csv index 5076aed..dbf6df1 100644 --- a/Doc/tools/sphinxext/susp-ignored.csv +++ b/Doc/tools/sphinxext/susp-ignored.csv @@ -88,8 +88,8 @@ library/repr,,`,"return `obj`" library/smtplib,,:port,"as well as a regular host:port server." library/socket,,::,'5aef:2b::8' library/sqlite3,,:memory, -library/sqlite3,,:age,"select name_last, age from people where name_last=:who and age=:age" -library/sqlite3,,:who,"select name_last, age from people where name_last=:who and age=:age" +library/sqlite3,,:who,"cur.execute(""select * from people where name_last=:who and age=:age"", {""who"": who, ""age"": age})" +library/sqlite3,,:age,"cur.execute(""select * from people where name_last=:who and age=:age"", {""who"": who, ""age"": age})" library/ssl,,:My,"Organization Name (eg, company) [Internet Widgits Pty Ltd]:My Organization, Inc." library/ssl,,:My,"Organizational Unit Name (eg, section) []:My Group" library/ssl,,:myserver,"Common Name (eg, YOUR name) []:myserver.mygroup.myorganization.com" @@ -112,9 +112,9 @@ library/turtle,,::,Example:: library/urllib,,:port,:port library/urllib2,,:password,"""joe:password@python.org""" library/uuid,,:uuid,urn:uuid:12345678-1234-5678-1234-567812345678 -library/xmlrpclib,,:pass,http://user:pass@host:port/path -library/xmlrpclib,,:pass,user:pass -library/xmlrpclib,,:port,http://user:pass@host:port/path +library/xmlrpc.client,,:pass,http://user:pass@host:port/path +library/xmlrpc.client,,:port,http://user:pass@host:port/path +library/xmlrpc.client,,:pass,user:pass license,,`,THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND license,,:zooko,mailto:zooko@zooko.com license,,`,THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND @@ -169,10 +169,12 @@ faq/programming,,:reduce,"Sx=Sx,Sy=Sy:reduce(lambda x,y:x+y,map(lambda x,xc=Ru,y faq/programming,,:chr,">=4.0) or 1+f(xc,yc,x*x-y*y+xc,2.0*x*y+yc,k-1,f):f(xc,yc,x,y,k,f):chr(" faq/programming,,::,for x in sequence[::-1]: faq/windows,229,:EOF,@setlocal enableextensions & python -x %~f0 %* & goto :EOF +faq/windows,,:bd8afb90ebf2,"Python 3.3.0 (v3.3.0:bd8afb90ebf2, Sep 29 2012, 10:55:48) [MSC v.1600 32 bit (Intel)] on win32" faq/windows,393,:REG,.py :REG_SZ: c:\\python.exe -u %s %s library/bisect,32,:hi,all(val >= x for val in a[i:hi]) library/bisect,42,:hi,all(val > x for val in a[i:hi]) -library/http.client,52,:port,host:port +library/http.client,,:port,host:port +library/http.cookies,,`,!#$%&'*+-.^_`|~ library/nntplib,,:bytes,:bytes library/nntplib,,:lines,:lines library/nntplib,,:lines,"['xref', 'from', ':lines', ':bytes', 'references', 'date', 'message-id', 'subject']" @@ -185,9 +187,6 @@ library/stdtypes,,:end,s[start:end] library/urllib.request,,:close,Connection:close library/urllib.request,,:password,"""joe:password@python.org""" library/urllib.request,,:lang,"xmlns=""http://www.w3.org/1999/xhtml"" xml:lang=""en"" lang=""en"">\n\n\n" -library/xmlrpc.client,103,:pass,http://user:pass@host:port/path -library/xmlrpc.client,103,:port,http://user:pass@host:port/path -library/xmlrpc.client,103,:pass,user:pass license,,`,* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY license,,`,* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND license,,`,"``Software''), to deal in the Software without restriction, including" -- cgit v0.12