diff options
author | Eli Bendersky <eliben@gmail.com> | 2013-05-25 12:25:48 (GMT) |
---|---|---|
committer | Eli Bendersky <eliben@gmail.com> | 2013-05-25 12:25:48 (GMT) |
commit | 6dc32b34ddfba0ddb990cbbb77cf8803879d20f9 (patch) | |
tree | 52b3529adfb9b0ccdf71bd5a34a1edb8bb50fdff /Modules | |
parent | 6b5a38c728bbca3273b6917308559cf22e298531 (diff) | |
download | cpython-6dc32b34ddfba0ddb990cbbb77cf8803879d20f9.zip cpython-6dc32b34ddfba0ddb990cbbb77cf8803879d20f9.tar.gz cpython-6dc32b34ddfba0ddb990cbbb77cf8803879d20f9.tar.bz2 |
Issue #13612: handle unknown encodings without a buffer overflow.
This affects pyexpat and _elementtree. PyExpat_CAPI now exposes a new
function - DefaultUnknownEncodingHandler.
Based on a patch by Serhiy Storchaka.
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/_elementtree.c | 43 | ||||
-rw-r--r-- | Modules/pyexpat.c | 58 |
2 files changed, 28 insertions, 73 deletions
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 5078b83..d3784e8 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -3136,47 +3136,6 @@ expat_pi_handler(XMLParserObject* self, const XML_Char* target_in, } } -static int -expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name, - XML_Encoding *info) -{ - PyObject* u; - unsigned char s[256]; - int i; - void *data; - unsigned int kind; - - memset(info, 0, sizeof(XML_Encoding)); - - for (i = 0; i < 256; i++) - s[i] = i; - - u = PyUnicode_Decode((char*) s, 256, name, "replace"); - if (!u) - return XML_STATUS_ERROR; - if (PyUnicode_READY(u)) - return XML_STATUS_ERROR; - - if (PyUnicode_GET_LENGTH(u) != 256) { - Py_DECREF(u); - return XML_STATUS_ERROR; - } - - kind = PyUnicode_KIND(u); - data = PyUnicode_DATA(u); - for (i = 0; i < 256; i++) { - Py_UCS4 ch = PyUnicode_READ(kind, data, i); - if (ch != Py_UNICODE_REPLACEMENT_CHARACTER) - info->map[i] = ch; - else - info->map[i] = -1; - } - - Py_DECREF(u); - - return XML_STATUS_OK; -} - /* -------------------------------------------------------------------- */ static PyObject * @@ -3278,7 +3237,7 @@ xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds) ); EXPAT(SetUnknownEncodingHandler)( self_xp->parser, - (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL + EXPAT(DefaultUnknownEncodingHandler), NULL ); return 0; diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 4750225..01ac14e 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1111,53 +1111,49 @@ static struct PyMethodDef xmlparse_methods[] = { Make it as simple as possible. */ -static char template_buffer[257]; - -static void -init_template_buffer(void) -{ - int i; - for (i = 0; i < 256; i++) { - template_buffer[i] = i; - } - template_buffer[256] = 0; -} - static int PyUnknownEncodingHandler(void *encodingHandlerData, const XML_Char *name, XML_Encoding *info) { - PyUnicodeObject *_u_string = NULL; - int result = 0; + static unsigned char template_buffer[256] = {0}; + PyObject* u; int i; - int kind; void *data; + unsigned int kind; - /* Yes, supports only 8bit encodings */ - _u_string = (PyUnicodeObject *) - PyUnicode_Decode(template_buffer, 256, name, "replace"); + if (template_buffer[1] == 0) { + for (i = 0; i < 256; i++) + template_buffer[i] = i; + } - if (_u_string == NULL || PyUnicode_READY(_u_string) == -1) - return result; + u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace"); + if (u == NULL || PyUnicode_READY(u)) + return XML_STATUS_ERROR; - kind = PyUnicode_KIND(_u_string); - data = PyUnicode_DATA(_u_string); + if (PyUnicode_GET_LENGTH(u) != 256) { + Py_DECREF(u); + PyErr_SetString(PyExc_ValueError, + "multi-byte encodings are not supported"); + return XML_STATUS_ERROR; + } + kind = PyUnicode_KIND(u); + data = PyUnicode_DATA(u); for (i = 0; i < 256; i++) { - /* Stupid to access directly, but fast */ - Py_UCS4 c = PyUnicode_READ(kind, data, i); - if (c == Py_UNICODE_REPLACEMENT_CHARACTER) - info->map[i] = -1; + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + if (ch != Py_UNICODE_REPLACEMENT_CHARACTER) + info->map[i] = ch; else - info->map[i] = c; + info->map[i] = -1; } + info->data = NULL; info->convert = NULL; info->release = NULL; - result = 1; - Py_DECREF(_u_string); - return result; + Py_DECREF(u); + + return XML_STATUS_OK; } @@ -1752,7 +1748,6 @@ MODULE_INITFUNC(void) Py_BuildValue("(iii)", info.major, info.minor, info.micro)); } - init_template_buffer(); /* XXX When Expat supports some way of figuring out how it was compiled, this should check and set native_encoding appropriately. @@ -1938,6 +1933,7 @@ MODULE_INITFUNC(void) capi.SetUserData = XML_SetUserData; capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler; capi.SetEncoding = XML_SetEncoding; + capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler; /* export using capsule */ capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL); |