summaryrefslogtreecommitdiffstats
path: root/Modules/pyexpat.c
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2013-02-04 16:28:01 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2013-02-04 16:28:01 (GMT)
commit36b365ccff21cdc563a3f4209b0bbaa079572487 (patch)
tree2f1c79701b06d6afc8c5b53a1e9aace985dab2e3 /Modules/pyexpat.c
parentb7be42b1f949a4ddd6fe9a2d2b6451fba565c037 (diff)
parent43536e9e373f395a047403831c08acedf3c5f258 (diff)
downloadcpython-36b365ccff21cdc563a3f4209b0bbaa079572487.zip
cpython-36b365ccff21cdc563a3f4209b0bbaa079572487.tar.gz
cpython-36b365ccff21cdc563a3f4209b0bbaa079572487.tar.bz2
Issue #17089: Expat parser now correctly works with string input not only when
an internal XML encoding is UTF-8 or US-ASCII. It now accepts bytes and strings larger than 2 GiB.
Diffstat (limited to 'Modules/pyexpat.c')
-rw-r--r--Modules/pyexpat.c40
1 files changed, 36 insertions, 4 deletions
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 3f59f0f..022b0cb 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -778,17 +778,49 @@ PyDoc_STRVAR(xmlparse_Parse__doc__,
"Parse(data[, isfinal])\n\
Parse XML data. `isfinal' should be true at end of input.");
+#define MAX_CHUNK_SIZE (1 << 20)
+
static PyObject *
xmlparse_Parse(xmlparseobject *self, PyObject *args)
{
- char *s;
- int slen;
+ PyObject *data;
int isFinal = 0;
+ const char *s;
+ Py_ssize_t slen;
+ Py_buffer view;
+ int rc;
- if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
+ if (!PyArg_ParseTuple(args, "O|i:Parse", &data, &isFinal))
return NULL;
- return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
+ if (PyUnicode_Check(data)) {
+ view.buf = NULL;
+ s = PyUnicode_AsUTF8AndSize(data, &slen);
+ if (s == NULL)
+ return NULL;
+ /* Explicitly set UTF-8 encoding. Return code ignored. */
+ (void)XML_SetEncoding(self->itself, "utf-8");
+ }
+ else {
+ if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
+ return NULL;
+ s = view.buf;
+ slen = view.len;
+ }
+
+ while (slen > MAX_CHUNK_SIZE) {
+ rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
+ if (!rc)
+ goto done;
+ s += MAX_CHUNK_SIZE;
+ slen -= MAX_CHUNK_SIZE;
+ }
+ rc = XML_Parse(self->itself, s, slen, isFinal);
+
+done:
+ if (view.buf != NULL)
+ PyBuffer_Release(&view);
+ return get_parse_result(self, rc);
}
/* File reading copied from cPickle */