diff options
Diffstat (limited to 'Modules/_pickle.c')
-rw-r--r-- | Modules/_pickle.c | 139 |
1 files changed, 80 insertions, 59 deletions
diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 195ee5d..bbae2b1 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1748,8 +1748,10 @@ save_bytes(PicklerObject *self, PyObject *obj) return -1; if (latin1 == NULL) { latin1 = PyUnicode_InternFromString("latin1"); - if (latin1 == NULL) + if (latin1 == NULL) { + Py_DECREF(unicode_str); return -1; + } } reduce_value = Py_BuildValue("(O(OO))", codecs_encode, unicode_str, latin1); @@ -1873,63 +1875,97 @@ done: } static int -save_unicode(PicklerObject *self, PyObject *obj) +write_utf8(PicklerObject *self, char *data, Py_ssize_t size) { - Py_ssize_t size; - PyObject *encoded = NULL; + char pdata[5]; - if (self->bin) { - char pdata[5]; +#if SIZEOF_SIZE_T > 4 + if (size > 0xffffffffUL) { + /* string too large */ + PyErr_SetString(PyExc_OverflowError, + "cannot serialize a string larger than 4GiB"); + return -1; + } +#endif - encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass"); - if (encoded == NULL) - goto error; + pdata[0] = BINUNICODE; + pdata[1] = (unsigned char)(size & 0xff); + pdata[2] = (unsigned char)((size >> 8) & 0xff); + pdata[3] = (unsigned char)((size >> 16) & 0xff); + pdata[4] = (unsigned char)((size >> 24) & 0xff); - size = PyBytes_GET_SIZE(encoded); - if (size > 0xffffffffL) { - PyErr_SetString(PyExc_OverflowError, - "cannot serialize a string larger than 4 GiB"); - goto error; /* string too large */ - } + if (_Pickler_Write(self, pdata, sizeof(pdata)) < 0) + return -1; - pdata[0] = BINUNICODE; - pdata[1] = (unsigned char)(size & 0xff); - pdata[2] = (unsigned char)((size >> 8) & 0xff); - pdata[3] = (unsigned char)((size >> 16) & 0xff); - pdata[4] = (unsigned char)((size >> 24) & 0xff); + if (_Pickler_Write(self, data, size) < 0) + return -1; - if (_Pickler_Write(self, pdata, 5) < 0) - goto error; + return 0; +} - if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) - goto error; +static int +write_unicode_binary(PicklerObject *self, PyObject *obj) +{ + PyObject *encoded = NULL; + Py_ssize_t size; + char *data; + int r; + + if (PyUnicode_READY(obj)) + return -1; + + data = PyUnicode_AsUTF8AndSize(obj, &size); + if (data != NULL) + return write_utf8(self, data, size); + + /* Issue #8383: for strings with lone surrogates, fallback on the + "surrogatepass" error handler. */ + PyErr_Clear(); + encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass"); + if (encoded == NULL) + return -1; + + r = write_utf8(self, PyBytes_AS_STRING(encoded), + PyBytes_GET_SIZE(encoded)); + Py_DECREF(encoded); + return r; +} + +static int +save_unicode(PicklerObject *self, PyObject *obj) +{ + if (self->bin) { + if (write_unicode_binary(self, obj) < 0) + return -1; } else { + PyObject *encoded; + Py_ssize_t size; const char unicode_op = UNICODE; encoded = raw_unicode_escape(obj); if (encoded == NULL) - goto error; + return -1; - if (_Pickler_Write(self, &unicode_op, 1) < 0) - goto error; + if (_Pickler_Write(self, &unicode_op, 1) < 0) { + Py_DECREF(encoded); + return -1; + } size = PyBytes_GET_SIZE(encoded); - if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) - goto error; + if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) { + Py_DECREF(encoded); + return -1; + } + Py_DECREF(encoded); if (_Pickler_Write(self, "\n", 1) < 0) - goto error; + return -1; } if (memo_put(self, obj) < 0) - goto error; + return -1; - Py_DECREF(encoded); return 0; - - error: - Py_XDECREF(encoded); - return -1; } /* A helper for save_tuple. Push the len elements in tuple t on the stack. */ @@ -4171,36 +4207,23 @@ load_string(UnpicklerObject *self) if ((len = _Unpickler_Readline(self, &s)) < 0) return -1; - if (len < 2) - return bad_readline(); - if ((s = strdup(s)) == NULL) { - PyErr_NoMemory(); - return -1; - } - + /* Strip the newline */ + len--; /* Strip outermost quotes */ - while (len > 0 && s[len - 1] <= ' ') - len--; - if (len > 1 && s[0] == '"' && s[len - 1] == '"') { - s[len - 1] = '\0'; - p = s + 1; - len -= 2; - } - else if (len > 1 && s[0] == '\'' && s[len - 1] == '\'') { - s[len - 1] = '\0'; + if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) { p = s + 1; len -= 2; } else { - free(s); - PyErr_SetString(PyExc_ValueError, "insecure string pickle"); + PyErr_SetString(UnpicklingError, + "the STRING opcode argument must be quoted"); return -1; } + assert(len >= 0); /* Use the PyBytes API to decode the string, since that is what is used to encode, and then coerce the result to Unicode. */ bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL); - free(s); if (bytes == NULL) return -1; str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors); @@ -4226,8 +4249,7 @@ load_binbytes(UnpicklerObject *self) if (x < 0) { PyErr_Format(PyExc_OverflowError, "BINBYTES exceeds system's maximum size of %zd bytes", - PY_SSIZE_T_MAX - ); + PY_SSIZE_T_MAX); return -1; } @@ -4351,8 +4373,7 @@ load_binunicode(UnpicklerObject *self) if (size < 0) { PyErr_Format(PyExc_OverflowError, "BINUNICODE exceeds system's maximum size of %zd bytes", - PY_SSIZE_T_MAX - ); + PY_SSIZE_T_MAX); return -1; } |