diff options
author | Alexandre Vassalotti <alexandre@peadrop.com> | 2011-12-13 18:08:09 (GMT) |
---|---|---|
committer | Alexandre Vassalotti <alexandre@peadrop.com> | 2011-12-13 18:08:09 (GMT) |
commit | 3bfc65a25ba2e010ef12db0cff006c2cbbeb18f9 (patch) | |
tree | 48d1cd0accf5405ab51196dee2f68551fd9e9175 /Modules/_pickle.c | |
parent | 7b7e39a61f0c784252f94eeaae8ee44a44968a6f (diff) | |
download | cpython-3bfc65a25ba2e010ef12db0cff006c2cbbeb18f9.zip cpython-3bfc65a25ba2e010ef12db0cff006c2cbbeb18f9.tar.gz cpython-3bfc65a25ba2e010ef12db0cff006c2cbbeb18f9.tar.bz2 |
Issue #13505: Make pickling of bytes object compatible with Python 2.
Initial patch by sbt.
Diffstat (limited to 'Modules/_pickle.c')
-rw-r--r-- | Modules/_pickle.c | 58 |
1 files changed, 45 insertions, 13 deletions
diff --git a/Modules/_pickle.c b/Modules/_pickle.c index cb58349..fc5f871 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -369,7 +369,7 @@ typedef struct UnpicklerObject { char *errors; /* Name of errors handling scheme to used when decoding strings. The default value is "strict". */ - Py_ssize_t *marks; /* Mark stack, used for unpickling container + Py_ssize_t *marks; /* Mark stack, used for unpickling container objects. */ Py_ssize_t num_marks; /* Number of marks in the mark stack. */ Py_ssize_t marks_size; /* Current allocated size of the mark stack. */ @@ -1700,26 +1700,58 @@ save_bytes(PicklerObject *self, PyObject *obj) if (self->proto < 3) { /* Older pickle protocols do not have an opcode for pickling bytes objects. Therefore, we need to fake the copy protocol (i.e., - the __reduce__ method) to permit bytes object unpickling. */ + the __reduce__ method) to permit bytes object unpickling. + + Here we use a hack to be compatible with Python 2. Since in Python + 2 'bytes' is just an alias for 'str' (which has different + parameters than the actual bytes object), we use codecs.encode + to create the appropriate 'str' object when unpickled using + Python 2 *and* the appropriate 'bytes' object when unpickled + using Python 3. Again this is a hack and we don't need to do this + with newer protocols. */ + static PyObject *codecs_encode = NULL; PyObject *reduce_value = NULL; - PyObject *bytelist = NULL; int status; - bytelist = PySequence_List(obj); - if (bytelist == NULL) - return -1; + if (codecs_encode == NULL) { + PyObject *codecs_module = PyImport_ImportModule("codecs"); + if (codecs_module == NULL) { + return -1; + } + codecs_encode = PyObject_GetAttrString(codecs_module, "encode"); + Py_DECREF(codecs_module); + if (codecs_encode == NULL) { + return -1; + } + } - reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type, - bytelist); - if (reduce_value == NULL) { - Py_DECREF(bytelist); - return -1; + if (PyBytes_GET_SIZE(obj) == 0) { + reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type); } + else { + static PyObject *latin1 = NULL; + PyObject *unicode_str = + PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj), + PyBytes_GET_SIZE(obj), + "strict"); + if (unicode_str == NULL) + return -1; + if (latin1 == NULL) { + latin1 = PyUnicode_InternFromString("latin1"); + if (latin1 == NULL) + return -1; + } + reduce_value = Py_BuildValue("(O(OO))", + codecs_encode, unicode_str, latin1); + Py_DECREF(unicode_str); + } + + if (reduce_value == NULL) + return -1; /* save_reduce() will memoize the object automatically. */ status = save_reduce(self, reduce_value, obj); Py_DECREF(reduce_value); - Py_DECREF(bytelist); return status; } else { @@ -1727,7 +1759,7 @@ save_bytes(PicklerObject *self, PyObject *obj) char header[5]; Py_ssize_t len; - size = PyBytes_Size(obj); + size = PyBytes_GET_SIZE(obj); if (size < 0) return -1; |