diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-02-04 16:28:01 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-02-04 16:28:01 (GMT) |
commit | 36b365ccff21cdc563a3f4209b0bbaa079572487 (patch) | |
tree | 2f1c79701b06d6afc8c5b53a1e9aace985dab2e3 /Modules/pyexpat.c | |
parent | b7be42b1f949a4ddd6fe9a2d2b6451fba565c037 (diff) | |
parent | 43536e9e373f395a047403831c08acedf3c5f258 (diff) | |
download | cpython-36b365ccff21cdc563a3f4209b0bbaa079572487.zip cpython-36b365ccff21cdc563a3f4209b0bbaa079572487.tar.gz cpython-36b365ccff21cdc563a3f4209b0bbaa079572487.tar.bz2 |
Issue #17089: Expat parser now correctly works with string input not only when
an internal XML encoding is UTF-8 or US-ASCII. It now accepts bytes and
strings larger than 2 GiB.
Diffstat (limited to 'Modules/pyexpat.c')
-rw-r--r-- | Modules/pyexpat.c | 40 |
1 files changed, 36 insertions, 4 deletions
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 3f59f0f..022b0cb 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -778,17 +778,49 @@ PyDoc_STRVAR(xmlparse_Parse__doc__, "Parse(data[, isfinal])\n\ Parse XML data. `isfinal' should be true at end of input."); +#define MAX_CHUNK_SIZE (1 << 20) + static PyObject * xmlparse_Parse(xmlparseobject *self, PyObject *args) { - char *s; - int slen; + PyObject *data; int isFinal = 0; + const char *s; + Py_ssize_t slen; + Py_buffer view; + int rc; - if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal)) + if (!PyArg_ParseTuple(args, "O|i:Parse", &data, &isFinal)) return NULL; - return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal)); + if (PyUnicode_Check(data)) { + view.buf = NULL; + s = PyUnicode_AsUTF8AndSize(data, &slen); + if (s == NULL) + return NULL; + /* Explicitly set UTF-8 encoding. Return code ignored. */ + (void)XML_SetEncoding(self->itself, "utf-8"); + } + else { + if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0) + return NULL; + s = view.buf; + slen = view.len; + } + + while (slen > MAX_CHUNK_SIZE) { + rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0); + if (!rc) + goto done; + s += MAX_CHUNK_SIZE; + slen -= MAX_CHUNK_SIZE; + } + rc = XML_Parse(self->itself, s, slen, isFinal); + +done: + if (view.buf != NULL) + PyBuffer_Release(&view); + return get_parse_result(self, rc); } /* File reading copied from cPickle */ |