summaryrefslogtreecommitdiffstats
path: root/Modules/_pickle.c
diff options
context:
space:
mode:
authorAlexandre Vassalotti <alexandre@peadrop.com>2011-12-13 18:08:09 (GMT)
committerAlexandre Vassalotti <alexandre@peadrop.com>2011-12-13 18:08:09 (GMT)
commit3bfc65a25ba2e010ef12db0cff006c2cbbeb18f9 (patch)
tree48d1cd0accf5405ab51196dee2f68551fd9e9175 /Modules/_pickle.c
parent7b7e39a61f0c784252f94eeaae8ee44a44968a6f (diff)
downloadcpython-3bfc65a25ba2e010ef12db0cff006c2cbbeb18f9.zip
cpython-3bfc65a25ba2e010ef12db0cff006c2cbbeb18f9.tar.gz
cpython-3bfc65a25ba2e010ef12db0cff006c2cbbeb18f9.tar.bz2
Issue #13505: Make pickling of bytes object compatible with Python 2.
Initial patch by sbt.
Diffstat (limited to 'Modules/_pickle.c')
-rw-r--r--Modules/_pickle.c58
1 files changed, 45 insertions, 13 deletions
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index cb58349..fc5f871 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -369,7 +369,7 @@ typedef struct UnpicklerObject {
char *errors; /* Name of errors handling scheme to used when
decoding strings. The default value is
"strict". */
- Py_ssize_t *marks; /* Mark stack, used for unpickling container
+ Py_ssize_t *marks; /* Mark stack, used for unpickling container
objects. */
Py_ssize_t num_marks; /* Number of marks in the mark stack. */
Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
@@ -1700,26 +1700,58 @@ save_bytes(PicklerObject *self, PyObject *obj)
if (self->proto < 3) {
/* Older pickle protocols do not have an opcode for pickling bytes
objects. Therefore, we need to fake the copy protocol (i.e.,
- the __reduce__ method) to permit bytes object unpickling. */
+ the __reduce__ method) to permit bytes object unpickling.
+
+ Here we use a hack to be compatible with Python 2. Since in Python
+ 2 'bytes' is just an alias for 'str' (which has different
+ parameters than the actual bytes object), we use codecs.encode
+ to create the appropriate 'str' object when unpickled using
+ Python 2 *and* the appropriate 'bytes' object when unpickled
+ using Python 3. Again this is a hack and we don't need to do this
+ with newer protocols. */
+ static PyObject *codecs_encode = NULL;
PyObject *reduce_value = NULL;
- PyObject *bytelist = NULL;
int status;
- bytelist = PySequence_List(obj);
- if (bytelist == NULL)
- return -1;
+ if (codecs_encode == NULL) {
+ PyObject *codecs_module = PyImport_ImportModule("codecs");
+ if (codecs_module == NULL) {
+ return -1;
+ }
+ codecs_encode = PyObject_GetAttrString(codecs_module, "encode");
+ Py_DECREF(codecs_module);
+ if (codecs_encode == NULL) {
+ return -1;
+ }
+ }
- reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
- bytelist);
- if (reduce_value == NULL) {
- Py_DECREF(bytelist);
- return -1;
+ if (PyBytes_GET_SIZE(obj) == 0) {
+ reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
}
+ else {
+ static PyObject *latin1 = NULL;
+ PyObject *unicode_str =
+ PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
+ PyBytes_GET_SIZE(obj),
+ "strict");
+ if (unicode_str == NULL)
+ return -1;
+ if (latin1 == NULL) {
+ latin1 = PyUnicode_InternFromString("latin1");
+ if (latin1 == NULL)
+ return -1;
+ }
+ reduce_value = Py_BuildValue("(O(OO))",
+ codecs_encode, unicode_str, latin1);
+ Py_DECREF(unicode_str);
+ }
+
+ if (reduce_value == NULL)
+ return -1;
/* save_reduce() will memoize the object automatically. */
status = save_reduce(self, reduce_value, obj);
Py_DECREF(reduce_value);
- Py_DECREF(bytelist);
return status;
}
else {
@@ -1727,7 +1759,7 @@ save_bytes(PicklerObject *self, PyObject *obj)
char header[5];
Py_ssize_t len;
- size = PyBytes_Size(obj);
+ size = PyBytes_GET_SIZE(obj);
if (size < 0)
return -1;