diff options
author | Alexandre Vassalotti <alexandre@peadrop.com> | 2013-12-07 09:09:27 (GMT) |
---|---|---|
committer | Alexandre Vassalotti <alexandre@peadrop.com> | 2013-12-07 09:09:27 (GMT) |
commit | d05c9ff84501d93b13de40a9c7b0360c7d2ebada (patch) | |
tree | ae840ca5e91d21e53cc60e6c3e7fdd64b5a9fec4 /Lib/pickle.py | |
parent | ee07b94788e5e3e79f6632e92a5295adc3937bf4 (diff) | |
download | cpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.zip cpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.tar.gz cpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.tar.bz2 |
Issue #6784: Strings from Python 2 can now be unpickled as bytes objects.
Initial patch by Merlijn van Deen.
I've added a few unrelated docstring fixes in the patch while I was at
it, which makes the documentation for pickle a bit more consistent.
Diffstat (limited to 'Lib/pickle.py')
-rw-r--r-- | Lib/pickle.py | 71 |
1 files changed, 42 insertions, 29 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py index c57149a..9cd0132 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -348,24 +348,25 @@ class _Pickler: def __init__(self, file, protocol=None, *, fix_imports=True): """This takes a binary file for writing a pickle data stream. - The optional protocol argument tells the pickler to use the + The optional *protocol* argument tells the pickler to use the given protocol; supported protocols are 0, 1, 2, 3 and 4. The - default protocol is 3; a backward-incompatible protocol designed for - Python 3. + default protocol is 3; a backward-incompatible protocol designed + for Python 3. Specifying a negative protocol version selects the highest protocol version supported. The higher the protocol used, the more recent the version of Python needed to read the pickle produced. - The file argument must have a write() method that accepts a single - bytes argument. It can thus be a file object opened for binary - writing, a io.BytesIO instance, or any other custom object that - meets this interface. + The *file* argument must have a write() method that accepts a + single bytes argument. It can thus be a file object opened for + binary writing, a io.BytesIO instance, or any other custom + object that meets this interface. - If fix_imports is True and protocol is less than 3, pickle will try to - map the new Python 3 names to the old module names used in Python 2, - so that the pickle data stream is readable with Python 2. + If *fix_imports* is True and *protocol* is less than 3, pickle + will try to map the new Python 3 names to the old module names + used in Python 2, so that the pickle data stream is readable + with Python 2. """ if protocol is None: protocol = DEFAULT_PROTOCOL @@ -389,10 +390,9 @@ class _Pickler: """Clears the pickler's "memo". The memo is the data structure that remembers which objects the - pickler has already seen, so that shared or recursive objects are - pickled by reference and not by value. This method is useful when - re-using picklers. - + pickler has already seen, so that shared or recursive objects + are pickled by reference and not by value. This method is + useful when re-using picklers. """ self.memo.clear() @@ -975,8 +975,14 @@ class _Unpickler: encoding="ASCII", errors="strict"): """This takes a binary file for reading a pickle data stream. - The protocol version of the pickle is detected automatically, so no - proto argument is needed. + The protocol version of the pickle is detected automatically, so + no proto argument is needed. + + The argument *file* must have two methods, a read() method that + takes an integer argument, and a readline() method that requires + no arguments. Both methods should return bytes. Thus *file* + can be a binary file object opened for reading, a io.BytesIO + object, or any other custom object that meets this interface. The file-like object must have two methods, a read() method that takes an integer argument, and a readline() method that @@ -985,13 +991,14 @@ class _Unpickler: reading, a BytesIO object, or any other custom object that meets this interface. - Optional keyword arguments are *fix_imports*, *encoding* and *errors*, - which are used to control compatiblity support for pickle stream - generated by Python 2.x. If *fix_imports* is True, pickle will try to - map the old Python 2.x names to the new names used in Python 3.x. The - *encoding* and *errors* tell pickle how to decode 8-bit string - instances pickled by Python 2.x; these default to 'ASCII' and - 'strict', respectively. + Optional keyword arguments are *fix_imports*, *encoding* and + *errors*, which are used to control compatiblity support for + pickle stream generated by Python 2. If *fix_imports* is True, + pickle will try to map the old Python 2 names to the new names + used in Python 3. The *encoding* and *errors* tell pickle how + to decode 8-bit string instances pickled by Python 2; these + default to 'ASCII' and 'strict', respectively. *encoding* can be + 'bytes' to read theses 8-bit string instances as bytes objects. """ self._file_readline = file.readline self._file_read = file.read @@ -1139,6 +1146,15 @@ class _Unpickler: self.append(unpack('>d', self.read(8))[0]) dispatch[BINFLOAT[0]] = load_binfloat + def _decode_string(self, value): + # Used to allow strings from Python 2 to be decoded either as + # bytes or Unicode strings. This should be used only with the + # STRING, BINSTRING and SHORT_BINSTRING opcodes. + if self.encoding == "bytes": + return value + else: + return value.decode(self.encoding, self.errors) + def load_string(self): data = self.readline()[:-1] # Strip outermost quotes @@ -1146,8 +1162,7 @@ class _Unpickler: data = data[1:-1] else: raise UnpicklingError("the STRING opcode argument must be quoted") - self.append(codecs.escape_decode(data)[0] - .decode(self.encoding, self.errors)) + self.append(self._decode_string(codecs.escape_decode(data)[0])) dispatch[STRING[0]] = load_string def load_binstring(self): @@ -1156,8 +1171,7 @@ class _Unpickler: if len < 0: raise UnpicklingError("BINSTRING pickle has negative byte count") data = self.read(len) - value = str(data, self.encoding, self.errors) - self.append(value) + self.append(self._decode_string(data)) dispatch[BINSTRING[0]] = load_binstring def load_binbytes(self): @@ -1191,8 +1205,7 @@ class _Unpickler: def load_short_binstring(self): len = self.read(1)[0] data = self.read(len) - value = str(data, self.encoding, self.errors) - self.append(value) + self.append(self._decode_string(data)) dispatch[SHORT_BINSTRING[0]] = load_short_binstring def load_short_binbytes(self): |