diff options
author | Sebastian Pipping <sebastian@pipping.org> | 2024-02-29 22:52:50 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-29 22:52:50 (GMT) |
commit | 6a95676bb526261434dd068d6c49927c44d24a9b (patch) | |
tree | e610bee9b3ca230b4e157745ebbe38c8a9923f89 /Modules | |
parent | d01886c5c9e3a62921b304ba7e5145daaa56d3cf (diff) | |
download | cpython-6a95676bb526261434dd068d6c49927c44d24a9b.zip cpython-6a95676bb526261434dd068d6c49927c44d24a9b.tar.gz cpython-6a95676bb526261434dd068d6c49927c44d24a9b.tar.bz2 |
gh-115398: Expose Expat >=2.6.0 reparse deferral API (CVE-2023-52425) (GH-115623)
Allow controlling Expat >=2.6.0 reparse deferral (CVE-2023-52425) by adding five new methods:
- `xml.etree.ElementTree.XMLParser.flush`
- `xml.etree.ElementTree.XMLPullParser.flush`
- `xml.parsers.expat.xmlparser.GetReparseDeferralEnabled`
- `xml.parsers.expat.xmlparser.SetReparseDeferralEnabled`
- `xml.sax.expatreader.ExpatParser.flush`
Based on the "flush" idea from https://github.com/python/cpython/pull/115138#issuecomment-1932444270 .
### Notes
- Please treat as a security fix related to CVE-2023-52425.
Includes code suggested-by: Snild Dolkow <snild@sony.com>
and by core dev Serhiy Storchaka.
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/_elementtree.c | 35 | ||||
-rw-r--r-- | Modules/clinic/_elementtree.c.h | 19 | ||||
-rw-r--r-- | Modules/clinic/pyexpat.c.h | 49 | ||||
-rw-r--r-- | Modules/expat/pyexpatns.h | 1 | ||||
-rw-r--r-- | Modules/pyexpat.c | 53 |
5 files changed, 155 insertions, 2 deletions
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 5445108..edd2f88 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -3895,6 +3895,40 @@ _elementtree_XMLParser_close_impl(XMLParserObject *self) } /*[clinic input] +_elementtree.XMLParser.flush + +[clinic start generated code]*/ + +static PyObject * +_elementtree_XMLParser_flush_impl(XMLParserObject *self) +/*[clinic end generated code: output=42fdb8795ca24509 input=effbecdb28715949]*/ +{ + if (!_check_xmlparser(self)) { + return NULL; + } + + elementtreestate *st = self->state; + + if (EXPAT(st, SetReparseDeferralEnabled) == NULL) { + Py_RETURN_NONE; + } + + // NOTE: The Expat parser in the C implementation of ElementTree is not + // exposed to the outside; as a result we known that reparse deferral + // is currently enabled, or we would not even have access to function + // XML_SetReparseDeferralEnabled in the first place (which we checked + // for, a few lines up). + + EXPAT(st, SetReparseDeferralEnabled)(self->parser, XML_FALSE); + + PyObject *res = expat_parse(st, self, "", 0, XML_FALSE); + + EXPAT(st, SetReparseDeferralEnabled)(self->parser, XML_TRUE); + + return res; +} + +/*[clinic input] _elementtree.XMLParser.feed data: object @@ -4288,6 +4322,7 @@ static PyType_Spec treebuilder_spec = { static PyMethodDef xmlparser_methods[] = { _ELEMENTTREE_XMLPARSER_FEED_METHODDEF _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF + _ELEMENTTREE_XMLPARSER_FLUSH_METHODDEF _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF {NULL, NULL} diff --git a/Modules/clinic/_elementtree.c.h b/Modules/clinic/_elementtree.c.h index 9622591..10b2dd1 100644 --- a/Modules/clinic/_elementtree.c.h +++ b/Modules/clinic/_elementtree.c.h @@ -1169,6 +1169,23 @@ _elementtree_XMLParser_close(XMLParserObject *self, PyObject *Py_UNUSED(ignored) return _elementtree_XMLParser_close_impl(self); } +PyDoc_STRVAR(_elementtree_XMLParser_flush__doc__, +"flush($self, /)\n" +"--\n" +"\n"); + +#define _ELEMENTTREE_XMLPARSER_FLUSH_METHODDEF \ + {"flush", (PyCFunction)_elementtree_XMLParser_flush, METH_NOARGS, _elementtree_XMLParser_flush__doc__}, + +static PyObject * +_elementtree_XMLParser_flush_impl(XMLParserObject *self); + +static PyObject * +_elementtree_XMLParser_flush(XMLParserObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _elementtree_XMLParser_flush_impl(self); +} + PyDoc_STRVAR(_elementtree_XMLParser_feed__doc__, "feed($self, data, /)\n" "--\n" @@ -1219,4 +1236,4 @@ skip_optional: exit: return return_value; } -/*[clinic end generated code: output=218ec9e6a889f796 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=aed9f53eeb0404e0 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/pyexpat.c.h b/Modules/clinic/pyexpat.c.h index a5b93e6..343cb91 100644 --- a/Modules/clinic/pyexpat.c.h +++ b/Modules/clinic/pyexpat.c.h @@ -8,6 +8,53 @@ preserve #endif #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() +PyDoc_STRVAR(pyexpat_xmlparser_SetReparseDeferralEnabled__doc__, +"SetReparseDeferralEnabled($self, enabled, /)\n" +"--\n" +"\n" +"Enable/Disable reparse deferral; enabled by default with Expat >=2.6.0."); + +#define PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF \ + {"SetReparseDeferralEnabled", (PyCFunction)pyexpat_xmlparser_SetReparseDeferralEnabled, METH_O, pyexpat_xmlparser_SetReparseDeferralEnabled__doc__}, + +static PyObject * +pyexpat_xmlparser_SetReparseDeferralEnabled_impl(xmlparseobject *self, + int enabled); + +static PyObject * +pyexpat_xmlparser_SetReparseDeferralEnabled(xmlparseobject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + int enabled; + + enabled = PyObject_IsTrue(arg); + if (enabled < 0) { + goto exit; + } + return_value = pyexpat_xmlparser_SetReparseDeferralEnabled_impl(self, enabled); + +exit: + return return_value; +} + +PyDoc_STRVAR(pyexpat_xmlparser_GetReparseDeferralEnabled__doc__, +"GetReparseDeferralEnabled($self, /)\n" +"--\n" +"\n" +"Retrieve reparse deferral enabled status; always returns false with Expat <2.6.0."); + +#define PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF \ + {"GetReparseDeferralEnabled", (PyCFunction)pyexpat_xmlparser_GetReparseDeferralEnabled, METH_NOARGS, pyexpat_xmlparser_GetReparseDeferralEnabled__doc__}, + +static PyObject * +pyexpat_xmlparser_GetReparseDeferralEnabled_impl(xmlparseobject *self); + +static PyObject * +pyexpat_xmlparser_GetReparseDeferralEnabled(xmlparseobject *self, PyObject *Py_UNUSED(ignored)) +{ + return pyexpat_xmlparser_GetReparseDeferralEnabled_impl(self); +} + PyDoc_STRVAR(pyexpat_xmlparser_Parse__doc__, "Parse($self, data, isfinal=False, /)\n" "--\n" @@ -498,4 +545,4 @@ exit: #ifndef PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #define PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #endif /* !defined(PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF) */ -/*[clinic end generated code: output=48c4296e43777df4 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=892e48e41f9b6e4b input=a9049054013a1b77]*/ diff --git a/Modules/expat/pyexpatns.h b/Modules/expat/pyexpatns.h index d45d9b6..8ee03ef 100644 --- a/Modules/expat/pyexpatns.h +++ b/Modules/expat/pyexpatns.h @@ -108,6 +108,7 @@ #define XML_SetNotStandaloneHandler PyExpat_XML_SetNotStandaloneHandler #define XML_SetParamEntityParsing PyExpat_XML_SetParamEntityParsing #define XML_SetProcessingInstructionHandler PyExpat_XML_SetProcessingInstructionHandler +#define XML_SetReparseDeferralEnabled PyExpat_XML_SetReparseDeferralEnabled #define XML_SetReturnNSTriplet PyExpat_XML_SetReturnNSTriplet #define XML_SetSkippedEntityHandler PyExpat_XML_SetSkippedEntityHandler #define XML_SetStartCdataSectionHandler PyExpat_XML_SetStartCdataSectionHandler diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 62cd262..f04f96b 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -7,6 +7,7 @@ #include "pycore_pyhash.h" // _Py_HashSecret #include "pycore_traceback.h" // _PyTraceback_Add() +#include <stdbool.h> #include <stddef.h> // offsetof() #include "expat.h" #include "pyexpat.h" @@ -81,6 +82,12 @@ typedef struct { /* NULL if not enabled */ int buffer_size; /* Size of buffer, in XML_Char units */ int buffer_used; /* Buffer units in use */ + bool reparse_deferral_enabled; /* Whether to defer reparsing of + unfinished XML tokens; a de-facto cache of + what Expat has the authority on, for lack + of a getter API function + "XML_GetReparseDeferralEnabled" in Expat + 2.6.0 */ PyObject *intern; /* Dictionary to intern strings */ PyObject **handlers; } xmlparseobject; @@ -704,6 +711,40 @@ get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv) #define MAX_CHUNK_SIZE (1 << 20) /*[clinic input] +pyexpat.xmlparser.SetReparseDeferralEnabled + + enabled: bool + / + +Enable/Disable reparse deferral; enabled by default with Expat >=2.6.0. +[clinic start generated code]*/ + +static PyObject * +pyexpat_xmlparser_SetReparseDeferralEnabled_impl(xmlparseobject *self, + int enabled) +/*[clinic end generated code: output=5ec539e3b63c8c49 input=021eb9e0bafc32c5]*/ +{ +#if XML_COMBINED_VERSION >= 20600 + XML_SetReparseDeferralEnabled(self->itself, enabled ? XML_TRUE : XML_FALSE); + self->reparse_deferral_enabled = (bool)enabled; +#endif + Py_RETURN_NONE; +} + +/*[clinic input] +pyexpat.xmlparser.GetReparseDeferralEnabled + +Retrieve reparse deferral enabled status; always returns false with Expat <2.6.0. +[clinic start generated code]*/ + +static PyObject * +pyexpat_xmlparser_GetReparseDeferralEnabled_impl(xmlparseobject *self) +/*[clinic end generated code: output=4e91312e88a595a8 input=54b5f11d32b20f3e]*/ +{ + return PyBool_FromLong(self->reparse_deferral_enabled); +} + +/*[clinic input] pyexpat.xmlparser.Parse cls: defining_class @@ -1063,6 +1104,8 @@ static struct PyMethodDef xmlparse_methods[] = { #if XML_COMBINED_VERSION >= 19505 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #endif + PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF + PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF {NULL, NULL} /* sentinel */ }; @@ -1158,6 +1201,11 @@ newxmlparseobject(pyexpat_state *state, const char *encoding, self->ns_prefixes = 0; self->handlers = NULL; self->intern = Py_XNewRef(intern); +#if XML_COMBINED_VERSION >= 20600 + self->reparse_deferral_enabled = true; +#else + self->reparse_deferral_enabled = false; +#endif /* namespace_separator is either NULL or contains one char + \0 */ self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler, @@ -2019,6 +2067,11 @@ pyexpat_exec(PyObject *mod) #else capi->SetHashSalt = NULL; #endif +#if XML_COMBINED_VERSION >= 20600 + capi->SetReparseDeferralEnabled = XML_SetReparseDeferralEnabled; +#else + capi->SetReparseDeferralEnabled = NULL; +#endif /* export using capsule */ PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME, |