summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorSebastian Pipping <sebastian@pipping.org>2024-02-29 22:52:50 (GMT)
committerGitHub <noreply@github.com>2024-02-29 22:52:50 (GMT)
commit6a95676bb526261434dd068d6c49927c44d24a9b (patch)
treee610bee9b3ca230b4e157745ebbe38c8a9923f89 /Modules
parentd01886c5c9e3a62921b304ba7e5145daaa56d3cf (diff)
downloadcpython-6a95676bb526261434dd068d6c49927c44d24a9b.zip
cpython-6a95676bb526261434dd068d6c49927c44d24a9b.tar.gz
cpython-6a95676bb526261434dd068d6c49927c44d24a9b.tar.bz2
gh-115398: Expose Expat >=2.6.0 reparse deferral API (CVE-2023-52425) (GH-115623)
Allow controlling Expat >=2.6.0 reparse deferral (CVE-2023-52425) by adding five new methods: - `xml.etree.ElementTree.XMLParser.flush` - `xml.etree.ElementTree.XMLPullParser.flush` - `xml.parsers.expat.xmlparser.GetReparseDeferralEnabled` - `xml.parsers.expat.xmlparser.SetReparseDeferralEnabled` - `xml.sax.expatreader.ExpatParser.flush` Based on the "flush" idea from https://github.com/python/cpython/pull/115138#issuecomment-1932444270 . ### Notes - Please treat as a security fix related to CVE-2023-52425. Includes code suggested-by: Snild Dolkow <snild@sony.com> and by core dev Serhiy Storchaka.
Diffstat (limited to 'Modules')
-rw-r--r--Modules/_elementtree.c35
-rw-r--r--Modules/clinic/_elementtree.c.h19
-rw-r--r--Modules/clinic/pyexpat.c.h49
-rw-r--r--Modules/expat/pyexpatns.h1
-rw-r--r--Modules/pyexpat.c53
5 files changed, 155 insertions, 2 deletions
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index 5445108..edd2f88 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -3895,6 +3895,40 @@ _elementtree_XMLParser_close_impl(XMLParserObject *self)
}
/*[clinic input]
+_elementtree.XMLParser.flush
+
+[clinic start generated code]*/
+
+static PyObject *
+_elementtree_XMLParser_flush_impl(XMLParserObject *self)
+/*[clinic end generated code: output=42fdb8795ca24509 input=effbecdb28715949]*/
+{
+ if (!_check_xmlparser(self)) {
+ return NULL;
+ }
+
+ elementtreestate *st = self->state;
+
+ if (EXPAT(st, SetReparseDeferralEnabled) == NULL) {
+ Py_RETURN_NONE;
+ }
+
+ // NOTE: The Expat parser in the C implementation of ElementTree is not
+ // exposed to the outside; as a result we known that reparse deferral
+ // is currently enabled, or we would not even have access to function
+ // XML_SetReparseDeferralEnabled in the first place (which we checked
+ // for, a few lines up).
+
+ EXPAT(st, SetReparseDeferralEnabled)(self->parser, XML_FALSE);
+
+ PyObject *res = expat_parse(st, self, "", 0, XML_FALSE);
+
+ EXPAT(st, SetReparseDeferralEnabled)(self->parser, XML_TRUE);
+
+ return res;
+}
+
+/*[clinic input]
_elementtree.XMLParser.feed
data: object
@@ -4288,6 +4322,7 @@ static PyType_Spec treebuilder_spec = {
static PyMethodDef xmlparser_methods[] = {
_ELEMENTTREE_XMLPARSER_FEED_METHODDEF
_ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
+ _ELEMENTTREE_XMLPARSER_FLUSH_METHODDEF
_ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
_ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
{NULL, NULL}
diff --git a/Modules/clinic/_elementtree.c.h b/Modules/clinic/_elementtree.c.h
index 9622591..10b2dd1 100644
--- a/Modules/clinic/_elementtree.c.h
+++ b/Modules/clinic/_elementtree.c.h
@@ -1169,6 +1169,23 @@ _elementtree_XMLParser_close(XMLParserObject *self, PyObject *Py_UNUSED(ignored)
return _elementtree_XMLParser_close_impl(self);
}
+PyDoc_STRVAR(_elementtree_XMLParser_flush__doc__,
+"flush($self, /)\n"
+"--\n"
+"\n");
+
+#define _ELEMENTTREE_XMLPARSER_FLUSH_METHODDEF \
+ {"flush", (PyCFunction)_elementtree_XMLParser_flush, METH_NOARGS, _elementtree_XMLParser_flush__doc__},
+
+static PyObject *
+_elementtree_XMLParser_flush_impl(XMLParserObject *self);
+
+static PyObject *
+_elementtree_XMLParser_flush(XMLParserObject *self, PyObject *Py_UNUSED(ignored))
+{
+ return _elementtree_XMLParser_flush_impl(self);
+}
+
PyDoc_STRVAR(_elementtree_XMLParser_feed__doc__,
"feed($self, data, /)\n"
"--\n"
@@ -1219,4 +1236,4 @@ skip_optional:
exit:
return return_value;
}
-/*[clinic end generated code: output=218ec9e6a889f796 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=aed9f53eeb0404e0 input=a9049054013a1b77]*/
diff --git a/Modules/clinic/pyexpat.c.h b/Modules/clinic/pyexpat.c.h
index a5b93e6..343cb91 100644
--- a/Modules/clinic/pyexpat.c.h
+++ b/Modules/clinic/pyexpat.c.h
@@ -8,6 +8,53 @@ preserve
#endif
#include "pycore_modsupport.h" // _PyArg_UnpackKeywords()
+PyDoc_STRVAR(pyexpat_xmlparser_SetReparseDeferralEnabled__doc__,
+"SetReparseDeferralEnabled($self, enabled, /)\n"
+"--\n"
+"\n"
+"Enable/Disable reparse deferral; enabled by default with Expat >=2.6.0.");
+
+#define PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF \
+ {"SetReparseDeferralEnabled", (PyCFunction)pyexpat_xmlparser_SetReparseDeferralEnabled, METH_O, pyexpat_xmlparser_SetReparseDeferralEnabled__doc__},
+
+static PyObject *
+pyexpat_xmlparser_SetReparseDeferralEnabled_impl(xmlparseobject *self,
+ int enabled);
+
+static PyObject *
+pyexpat_xmlparser_SetReparseDeferralEnabled(xmlparseobject *self, PyObject *arg)
+{
+ PyObject *return_value = NULL;
+ int enabled;
+
+ enabled = PyObject_IsTrue(arg);
+ if (enabled < 0) {
+ goto exit;
+ }
+ return_value = pyexpat_xmlparser_SetReparseDeferralEnabled_impl(self, enabled);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(pyexpat_xmlparser_GetReparseDeferralEnabled__doc__,
+"GetReparseDeferralEnabled($self, /)\n"
+"--\n"
+"\n"
+"Retrieve reparse deferral enabled status; always returns false with Expat <2.6.0.");
+
+#define PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF \
+ {"GetReparseDeferralEnabled", (PyCFunction)pyexpat_xmlparser_GetReparseDeferralEnabled, METH_NOARGS, pyexpat_xmlparser_GetReparseDeferralEnabled__doc__},
+
+static PyObject *
+pyexpat_xmlparser_GetReparseDeferralEnabled_impl(xmlparseobject *self);
+
+static PyObject *
+pyexpat_xmlparser_GetReparseDeferralEnabled(xmlparseobject *self, PyObject *Py_UNUSED(ignored))
+{
+ return pyexpat_xmlparser_GetReparseDeferralEnabled_impl(self);
+}
+
PyDoc_STRVAR(pyexpat_xmlparser_Parse__doc__,
"Parse($self, data, isfinal=False, /)\n"
"--\n"
@@ -498,4 +545,4 @@ exit:
#ifndef PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
#define PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
#endif /* !defined(PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF) */
-/*[clinic end generated code: output=48c4296e43777df4 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=892e48e41f9b6e4b input=a9049054013a1b77]*/
diff --git a/Modules/expat/pyexpatns.h b/Modules/expat/pyexpatns.h
index d45d9b6..8ee03ef 100644
--- a/Modules/expat/pyexpatns.h
+++ b/Modules/expat/pyexpatns.h
@@ -108,6 +108,7 @@
#define XML_SetNotStandaloneHandler PyExpat_XML_SetNotStandaloneHandler
#define XML_SetParamEntityParsing PyExpat_XML_SetParamEntityParsing
#define XML_SetProcessingInstructionHandler PyExpat_XML_SetProcessingInstructionHandler
+#define XML_SetReparseDeferralEnabled PyExpat_XML_SetReparseDeferralEnabled
#define XML_SetReturnNSTriplet PyExpat_XML_SetReturnNSTriplet
#define XML_SetSkippedEntityHandler PyExpat_XML_SetSkippedEntityHandler
#define XML_SetStartCdataSectionHandler PyExpat_XML_SetStartCdataSectionHandler
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 62cd262..f04f96b 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -7,6 +7,7 @@
#include "pycore_pyhash.h" // _Py_HashSecret
#include "pycore_traceback.h" // _PyTraceback_Add()
+#include <stdbool.h>
#include <stddef.h> // offsetof()
#include "expat.h"
#include "pyexpat.h"
@@ -81,6 +82,12 @@ typedef struct {
/* NULL if not enabled */
int buffer_size; /* Size of buffer, in XML_Char units */
int buffer_used; /* Buffer units in use */
+ bool reparse_deferral_enabled; /* Whether to defer reparsing of
+ unfinished XML tokens; a de-facto cache of
+ what Expat has the authority on, for lack
+ of a getter API function
+ "XML_GetReparseDeferralEnabled" in Expat
+ 2.6.0 */
PyObject *intern; /* Dictionary to intern strings */
PyObject **handlers;
} xmlparseobject;
@@ -704,6 +711,40 @@ get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv)
#define MAX_CHUNK_SIZE (1 << 20)
/*[clinic input]
+pyexpat.xmlparser.SetReparseDeferralEnabled
+
+ enabled: bool
+ /
+
+Enable/Disable reparse deferral; enabled by default with Expat >=2.6.0.
+[clinic start generated code]*/
+
+static PyObject *
+pyexpat_xmlparser_SetReparseDeferralEnabled_impl(xmlparseobject *self,
+ int enabled)
+/*[clinic end generated code: output=5ec539e3b63c8c49 input=021eb9e0bafc32c5]*/
+{
+#if XML_COMBINED_VERSION >= 20600
+ XML_SetReparseDeferralEnabled(self->itself, enabled ? XML_TRUE : XML_FALSE);
+ self->reparse_deferral_enabled = (bool)enabled;
+#endif
+ Py_RETURN_NONE;
+}
+
+/*[clinic input]
+pyexpat.xmlparser.GetReparseDeferralEnabled
+
+Retrieve reparse deferral enabled status; always returns false with Expat <2.6.0.
+[clinic start generated code]*/
+
+static PyObject *
+pyexpat_xmlparser_GetReparseDeferralEnabled_impl(xmlparseobject *self)
+/*[clinic end generated code: output=4e91312e88a595a8 input=54b5f11d32b20f3e]*/
+{
+ return PyBool_FromLong(self->reparse_deferral_enabled);
+}
+
+/*[clinic input]
pyexpat.xmlparser.Parse
cls: defining_class
@@ -1063,6 +1104,8 @@ static struct PyMethodDef xmlparse_methods[] = {
#if XML_COMBINED_VERSION >= 19505
PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
#endif
+ PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF
+ PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF
{NULL, NULL} /* sentinel */
};
@@ -1158,6 +1201,11 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
self->ns_prefixes = 0;
self->handlers = NULL;
self->intern = Py_XNewRef(intern);
+#if XML_COMBINED_VERSION >= 20600
+ self->reparse_deferral_enabled = true;
+#else
+ self->reparse_deferral_enabled = false;
+#endif
/* namespace_separator is either NULL or contains one char + \0 */
self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
@@ -2019,6 +2067,11 @@ pyexpat_exec(PyObject *mod)
#else
capi->SetHashSalt = NULL;
#endif
+#if XML_COMBINED_VERSION >= 20600
+ capi->SetReparseDeferralEnabled = XML_SetReparseDeferralEnabled;
+#else
+ capi->SetReparseDeferralEnabled = NULL;
+#endif
/* export using capsule */
PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME,