summaryrefslogtreecommitdiffstats
path: root/Lib/pickle.py
diff options
context:
space:
mode:
authorAlexandre Vassalotti <alexandre@peadrop.com>2013-12-07 09:09:27 (GMT)
committerAlexandre Vassalotti <alexandre@peadrop.com>2013-12-07 09:09:27 (GMT)
commitd05c9ff84501d93b13de40a9c7b0360c7d2ebada (patch)
treeae840ca5e91d21e53cc60e6c3e7fdd64b5a9fec4 /Lib/pickle.py
parentee07b94788e5e3e79f6632e92a5295adc3937bf4 (diff)
downloadcpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.zip
cpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.tar.gz
cpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.tar.bz2
Issue #6784: Strings from Python 2 can now be unpickled as bytes objects.
Initial patch by Merlijn van Deen. I've added a few unrelated docstring fixes in the patch while I was at it, which makes the documentation for pickle a bit more consistent.
Diffstat (limited to 'Lib/pickle.py')
-rw-r--r--Lib/pickle.py71
1 files changed, 42 insertions, 29 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py
index c57149a..9cd0132 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -348,24 +348,25 @@ class _Pickler:
def __init__(self, file, protocol=None, *, fix_imports=True):
"""This takes a binary file for writing a pickle data stream.
- The optional protocol argument tells the pickler to use the
+ The optional *protocol* argument tells the pickler to use the
given protocol; supported protocols are 0, 1, 2, 3 and 4. The
- default protocol is 3; a backward-incompatible protocol designed for
- Python 3.
+ default protocol is 3; a backward-incompatible protocol designed
+ for Python 3.
Specifying a negative protocol version selects the highest
protocol version supported. The higher the protocol used, the
more recent the version of Python needed to read the pickle
produced.
- The file argument must have a write() method that accepts a single
- bytes argument. It can thus be a file object opened for binary
- writing, a io.BytesIO instance, or any other custom object that
- meets this interface.
+ The *file* argument must have a write() method that accepts a
+ single bytes argument. It can thus be a file object opened for
+ binary writing, a io.BytesIO instance, or any other custom
+ object that meets this interface.
- If fix_imports is True and protocol is less than 3, pickle will try to
- map the new Python 3 names to the old module names used in Python 2,
- so that the pickle data stream is readable with Python 2.
+ If *fix_imports* is True and *protocol* is less than 3, pickle
+ will try to map the new Python 3 names to the old module names
+ used in Python 2, so that the pickle data stream is readable
+ with Python 2.
"""
if protocol is None:
protocol = DEFAULT_PROTOCOL
@@ -389,10 +390,9 @@ class _Pickler:
"""Clears the pickler's "memo".
The memo is the data structure that remembers which objects the
- pickler has already seen, so that shared or recursive objects are
- pickled by reference and not by value. This method is useful when
- re-using picklers.
-
+ pickler has already seen, so that shared or recursive objects
+ are pickled by reference and not by value. This method is
+ useful when re-using picklers.
"""
self.memo.clear()
@@ -975,8 +975,14 @@ class _Unpickler:
encoding="ASCII", errors="strict"):
"""This takes a binary file for reading a pickle data stream.
- The protocol version of the pickle is detected automatically, so no
- proto argument is needed.
+ The protocol version of the pickle is detected automatically, so
+ no proto argument is needed.
+
+ The argument *file* must have two methods, a read() method that
+ takes an integer argument, and a readline() method that requires
+ no arguments. Both methods should return bytes. Thus *file*
+ can be a binary file object opened for reading, a io.BytesIO
+ object, or any other custom object that meets this interface.
The file-like object must have two methods, a read() method
that takes an integer argument, and a readline() method that
@@ -985,13 +991,14 @@ class _Unpickler:
reading, a BytesIO object, or any other custom object that
meets this interface.
- Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
- which are used to control compatiblity support for pickle stream
- generated by Python 2.x. If *fix_imports* is True, pickle will try to
- map the old Python 2.x names to the new names used in Python 3.x. The
- *encoding* and *errors* tell pickle how to decode 8-bit string
- instances pickled by Python 2.x; these default to 'ASCII' and
- 'strict', respectively.
+ Optional keyword arguments are *fix_imports*, *encoding* and
+ *errors*, which are used to control compatiblity support for
+ pickle stream generated by Python 2. If *fix_imports* is True,
+ pickle will try to map the old Python 2 names to the new names
+ used in Python 3. The *encoding* and *errors* tell pickle how
+ to decode 8-bit string instances pickled by Python 2; these
+ default to 'ASCII' and 'strict', respectively. *encoding* can be
+ 'bytes' to read theses 8-bit string instances as bytes objects.
"""
self._file_readline = file.readline
self._file_read = file.read
@@ -1139,6 +1146,15 @@ class _Unpickler:
self.append(unpack('>d', self.read(8))[0])
dispatch[BINFLOAT[0]] = load_binfloat
+ def _decode_string(self, value):
+ # Used to allow strings from Python 2 to be decoded either as
+ # bytes or Unicode strings. This should be used only with the
+ # STRING, BINSTRING and SHORT_BINSTRING opcodes.
+ if self.encoding == "bytes":
+ return value
+ else:
+ return value.decode(self.encoding, self.errors)
+
def load_string(self):
data = self.readline()[:-1]
# Strip outermost quotes
@@ -1146,8 +1162,7 @@ class _Unpickler:
data = data[1:-1]
else:
raise UnpicklingError("the STRING opcode argument must be quoted")
- self.append(codecs.escape_decode(data)[0]
- .decode(self.encoding, self.errors))
+ self.append(self._decode_string(codecs.escape_decode(data)[0]))
dispatch[STRING[0]] = load_string
def load_binstring(self):
@@ -1156,8 +1171,7 @@ class _Unpickler:
if len < 0:
raise UnpicklingError("BINSTRING pickle has negative byte count")
data = self.read(len)
- value = str(data, self.encoding, self.errors)
- self.append(value)
+ self.append(self._decode_string(data))
dispatch[BINSTRING[0]] = load_binstring
def load_binbytes(self):
@@ -1191,8 +1205,7 @@ class _Unpickler:
def load_short_binstring(self):
len = self.read(1)[0]
data = self.read(len)
- value = str(data, self.encoding, self.errors)
- self.append(value)
+ self.append(self._decode_string(data))
dispatch[SHORT_BINSTRING[0]] = load_short_binstring
def load_short_binbytes(self):