summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/pickle.py6
-rw-r--r--Lib/pickletools.py70
-rw-r--r--Lib/test/pickletester.py12
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/_pickle.c58
5 files changed, 93 insertions, 56 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py
index ca36274..c01a6af 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -485,7 +485,11 @@ class _Pickler:
def save_bytes(self, obj, pack=struct.pack):
if self.proto < 3:
- self.save_reduce(bytes, (list(obj),), obj=obj)
+ if len(obj) == 0:
+ self.save_reduce(bytes, (), obj=obj)
+ else:
+ self.save_reduce(codecs.encode,
+ (str(obj, 'latin1'), 'latin1'), obj=obj)
return
n = len(obj)
if n < 256:
diff --git a/Lib/pickletools.py b/Lib/pickletools.py
index 9086416..66f4edd 100644
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -2082,27 +2082,22 @@ _dis_test = r"""
29: ( MARK
30: d DICT (MARK at 29)
31: p PUT 2
- 34: c GLOBAL '__builtin__ bytes'
- 53: p PUT 3
- 56: ( MARK
- 57: ( MARK
- 58: l LIST (MARK at 57)
+ 34: c GLOBAL '_codecs encode'
+ 50: p PUT 3
+ 53: ( MARK
+ 54: V UNICODE 'abc'
59: p PUT 4
- 62: L LONG 97
- 67: a APPEND
- 68: L LONG 98
- 73: a APPEND
- 74: L LONG 99
- 79: a APPEND
- 80: t TUPLE (MARK at 56)
- 81: p PUT 5
- 84: R REDUCE
- 85: p PUT 6
- 88: V UNICODE 'def'
- 93: p PUT 7
- 96: s SETITEM
- 97: a APPEND
- 98: . STOP
+ 62: V UNICODE 'latin1'
+ 70: p PUT 5
+ 73: t TUPLE (MARK at 53)
+ 74: p PUT 6
+ 77: R REDUCE
+ 78: p PUT 7
+ 81: V UNICODE 'def'
+ 86: p PUT 8
+ 89: s SETITEM
+ 90: a APPEND
+ 91: . STOP
highest protocol among opcodes = 0
Try again with a "binary" pickle.
@@ -2121,25 +2116,22 @@ Try again with a "binary" pickle.
14: q BINPUT 1
16: } EMPTY_DICT
17: q BINPUT 2
- 19: c GLOBAL '__builtin__ bytes'
- 38: q BINPUT 3
- 40: ( MARK
- 41: ] EMPTY_LIST
- 42: q BINPUT 4
- 44: ( MARK
- 45: K BININT1 97
- 47: K BININT1 98
- 49: K BININT1 99
- 51: e APPENDS (MARK at 44)
- 52: t TUPLE (MARK at 40)
- 53: q BINPUT 5
- 55: R REDUCE
- 56: q BINPUT 6
- 58: X BINUNICODE 'def'
- 66: q BINPUT 7
- 68: s SETITEM
- 69: e APPENDS (MARK at 3)
- 70: . STOP
+ 19: c GLOBAL '_codecs encode'
+ 35: q BINPUT 3
+ 37: ( MARK
+ 38: X BINUNICODE 'abc'
+ 46: q BINPUT 4
+ 48: X BINUNICODE 'latin1'
+ 59: q BINPUT 5
+ 61: t TUPLE (MARK at 37)
+ 62: q BINPUT 6
+ 64: R REDUCE
+ 65: q BINPUT 7
+ 67: X BINUNICODE 'def'
+ 75: q BINPUT 8
+ 77: s SETITEM
+ 78: e APPENDS (MARK at 3)
+ 79: . STOP
highest protocol among opcodes = 1
Exercise the INST/OBJ/BUILD family.
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 828225d..831306f 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -637,9 +637,15 @@ class AbstractPickleTests(unittest.TestCase):
def test_bytes(self):
for proto in protocols:
- for u in b'', b'xyz', b'xyz'*100:
- p = self.dumps(u)
- self.assertEqual(self.loads(p), u)
+ for s in b'', b'xyz', b'xyz'*100:
+ p = self.dumps(s)
+ self.assertEqual(self.loads(p), s)
+ for s in [bytes([i]) for i in range(256)]:
+ p = self.dumps(s)
+ self.assertEqual(self.loads(p), s)
+ for s in [bytes([i, i]) for i in range(256)]:
+ p = self.dumps(s)
+ self.assertEqual(self.loads(p), s)
def test_ints(self):
import sys
diff --git a/Misc/NEWS b/Misc/NEWS
index 81b98db..bf7399d 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.3 Alpha 1?
Core and Builtins
-----------------
+- Issue #13505: Pickle bytes objects in a way that is compatible with
+ Python 2 when using protocols <= 2.
+
- Issue #11147: Fix an unused argument in _Py_ANNOTATE_MEMORY_ORDER. (Fix
given by Campbell Barton).
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index f1bb730..a0c1029 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -369,7 +369,7 @@ typedef struct UnpicklerObject {
char *errors; /* Name of errors handling scheme to used when
decoding strings. The default value is
"strict". */
- Py_ssize_t *marks; /* Mark stack, used for unpickling container
+ Py_ssize_t *marks; /* Mark stack, used for unpickling container
objects. */
Py_ssize_t num_marks; /* Number of marks in the mark stack. */
Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
@@ -1708,26 +1708,58 @@ save_bytes(PicklerObject *self, PyObject *obj)
if (self->proto < 3) {
/* Older pickle protocols do not have an opcode for pickling bytes
objects. Therefore, we need to fake the copy protocol (i.e.,
- the __reduce__ method) to permit bytes object unpickling. */
+ the __reduce__ method) to permit bytes object unpickling.
+
+ Here we use a hack to be compatible with Python 2. Since in Python
+ 2 'bytes' is just an alias for 'str' (which has different
+ parameters than the actual bytes object), we use codecs.encode
+ to create the appropriate 'str' object when unpickled using
+ Python 2 *and* the appropriate 'bytes' object when unpickled
+ using Python 3. Again this is a hack and we don't need to do this
+ with newer protocols. */
+ static PyObject *codecs_encode = NULL;
PyObject *reduce_value = NULL;
- PyObject *bytelist = NULL;
int status;
- bytelist = PySequence_List(obj);
- if (bytelist == NULL)
- return -1;
+ if (codecs_encode == NULL) {
+ PyObject *codecs_module = PyImport_ImportModule("codecs");
+ if (codecs_module == NULL) {
+ return -1;
+ }
+ codecs_encode = PyObject_GetAttrString(codecs_module, "encode");
+ Py_DECREF(codecs_module);
+ if (codecs_encode == NULL) {
+ return -1;
+ }
+ }
- reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
- bytelist);
- if (reduce_value == NULL) {
- Py_DECREF(bytelist);
- return -1;
+ if (PyBytes_GET_SIZE(obj) == 0) {
+ reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
}
+ else {
+ static PyObject *latin1 = NULL;
+ PyObject *unicode_str =
+ PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
+ PyBytes_GET_SIZE(obj),
+ "strict");
+ if (unicode_str == NULL)
+ return -1;
+ if (latin1 == NULL) {
+ latin1 = PyUnicode_InternFromString("latin1");
+ if (latin1 == NULL)
+ return -1;
+ }
+ reduce_value = Py_BuildValue("(O(OO))",
+ codecs_encode, unicode_str, latin1);
+ Py_DECREF(unicode_str);
+ }
+
+ if (reduce_value == NULL)
+ return -1;
/* save_reduce() will memoize the object automatically. */
status = save_reduce(self, reduce_value, obj);
Py_DECREF(reduce_value);
- Py_DECREF(bytelist);
return status;
}
else {
@@ -1735,7 +1767,7 @@ save_bytes(PicklerObject *self, PyObject *obj)
char header[5];
Py_ssize_t len;
- size = PyBytes_Size(obj);
+ size = PyBytes_GET_SIZE(obj);
if (size < 0)
return -1;