summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2013-04-07 15:38:11 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2013-04-07 15:38:11 (GMT)
commit299978dfe387ad543706754fa5601297408240c2 (patch)
tree6c251029f470b388aab65815f2c893d81da635a9 /Modules
parent5a3c6dbe6a9c6100dcdf91e9773d50d97268e1ba (diff)
downloadcpython-299978dfe387ad543706754fa5601297408240c2.zip
cpython-299978dfe387ad543706754fa5601297408240c2.tar.gz
cpython-299978dfe387ad543706754fa5601297408240c2.tar.bz2
Issue #15596: Faster pickling of unicode strings.
Diffstat (limited to 'Modules')
-rw-r--r--Modules/_pickle.c104
1 files changed, 69 insertions, 35 deletions
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index c213a51..f0d3e79 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -1873,63 +1873,97 @@ done:
}
static int
-save_unicode(PicklerObject *self, PyObject *obj)
+write_utf8(PicklerObject *self, char *data, Py_ssize_t size)
{
- Py_ssize_t size;
- PyObject *encoded = NULL;
+ char pdata[5];
- if (self->bin) {
- char pdata[5];
+#if SIZEOF_SIZE_T > 4
+ if (size > 0xffffffffUL) {
+ /* string too large */
+ PyErr_SetString(PyExc_OverflowError,
+ "cannot serialize a string larger than 4GB");
+ return -1;
+ }
+#endif
- encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
- if (encoded == NULL)
- goto error;
+ pdata[0] = BINUNICODE;
+ pdata[1] = (unsigned char)(size & 0xff);
+ pdata[2] = (unsigned char)((size >> 8) & 0xff);
+ pdata[3] = (unsigned char)((size >> 16) & 0xff);
+ pdata[4] = (unsigned char)((size >> 24) & 0xff);
- size = PyBytes_GET_SIZE(encoded);
- if (size > 0xffffffffL) {
- PyErr_SetString(PyExc_OverflowError,
- "cannot serialize a string larger than 4 GiB");
- goto error; /* string too large */
- }
+ if (_Pickler_Write(self, pdata, sizeof(pdata)) < 0)
+ return -1;
+
+ if (_Pickler_Write(self, data, size) < 0)
+ return -1;
- pdata[0] = BINUNICODE;
- pdata[1] = (unsigned char)(size & 0xff);
- pdata[2] = (unsigned char)((size >> 8) & 0xff);
- pdata[3] = (unsigned char)((size >> 16) & 0xff);
- pdata[4] = (unsigned char)((size >> 24) & 0xff);
+ return 0;
+}
- if (_Pickler_Write(self, pdata, 5) < 0)
- goto error;
+static int
+write_unicode_binary(PicklerObject *self, PyObject *obj)
+{
+ PyObject *encoded = NULL;
+ Py_ssize_t size;
+ char *data;
+ int r;
- if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
- goto error;
+ if (PyUnicode_READY(obj))
+ return -1;
+
+ data = PyUnicode_AsUTF8AndSize(obj, &size);
+ if (data != NULL)
+ return write_utf8(self, data, size);
+
+ /* Issue #8383: for strings with lone surrogates, fallback on the
+ "surrogatepass" error handler. */
+ PyErr_Clear();
+ encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
+ if (encoded == NULL)
+ return -1;
+
+ r = write_utf8(self, PyBytes_AS_STRING(encoded),
+ PyBytes_GET_SIZE(encoded));
+ Py_DECREF(encoded);
+ return r;
+}
+
+static int
+save_unicode(PicklerObject *self, PyObject *obj)
+{
+ if (self->bin) {
+ if (write_unicode_binary(self, obj) < 0)
+ return -1;
}
else {
+ PyObject *encoded;
+ Py_ssize_t size;
const char unicode_op = UNICODE;
encoded = raw_unicode_escape(obj);
if (encoded == NULL)
- goto error;
+ return -1;
- if (_Pickler_Write(self, &unicode_op, 1) < 0)
- goto error;
+ if (_Pickler_Write(self, &unicode_op, 1) < 0) {
+ Py_DECREF(encoded);
+ return -1;
+ }
size = PyBytes_GET_SIZE(encoded);
- if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
- goto error;
+ if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
+ Py_DECREF(encoded);
+ return -1;
+ }
+ Py_DECREF(encoded);
if (_Pickler_Write(self, "\n", 1) < 0)
- goto error;
+ return -1;
}
if (memo_put(self, obj) < 0)
- goto error;
+ return -1;
- Py_DECREF(encoded);
return 0;
-
- error:
- Py_XDECREF(encoded);
- return -1;
}
/* A helper for save_tuple. Push the len elements in tuple t on the stack. */