Issue #6784: Strings from Python 2 can now be unpickled as bytes objects.

Initial patch by Merlijn van Deen. I've added a few unrelated docstring fixes in the patch while I was at it, which makes the documentation for pickle a bit more consistent.
author: Alexandre Vassalotti <alexandre@peadrop.com> 2013-12-07 09:09:27 (GMT)
committer: Alexandre Vassalotti <alexandre@peadrop.com> 2013-12-07 09:09:27 (GMT)
commit: d05c9ff84501d93b13de40a9c7b0360c7d2ebada (patch)
tree: ae840ca5e91d21e53cc60e6c3e7fdd64b5a9fec4 /Lib/pickletools.py
parent: ee07b94788e5e3e79f6632e92a5295adc3937bf4 (diff)
download: cpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.zip
cpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.tar.gz
cpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.tar.bz2
1 files changed, 92 insertions, 93 deletions
diff --git a/Lib/pickletools.py b/Lib/pickletools.py
index a2480f6..71c2aa1 100644
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -969,113 +969,107 @@ class StackObject(object):
         return self.name
 
 
-pyint = StackObject(
-            name='int',
-            obtype=int,
-            doc="A short (as opposed to long) Python integer object.")
-
-pylong = StackObject(
-             name='long',
-             obtype=int,
-             doc="A long (as opposed to short) Python integer object.")
+pyint = pylong = StackObject(
+    name='int',
+    obtype=int,
+    doc="A Python integer object.")
 
 pyinteger_or_bool = StackObject(
-                        name='int_or_bool',
-                        obtype=(int, bool),
-                        doc="A Python integer object (short or long), or "
-                            "a Python bool.")
+    name='int_or_bool',
+    obtype=(int, bool),
+    doc="A Python integer or boolean object.")
 
 pybool = StackObject(
-             name='bool',
-             obtype=(bool,),
-             doc="A Python bool object.")
+    name='bool',
+    obtype=bool,
+    doc="A Python boolean object.")
 
 pyfloat = StackObject(
-              name='float',
-              obtype=float,
-              doc="A Python float object.")
+    name='float',
+    obtype=float,
+    doc="A Python float object.")
 
-pystring = StackObject(
-               name='string',
-               obtype=bytes,
-               doc="A Python (8-bit) string object.")
+pybytes_or_str = pystring = StackObject(
+    name='bytes_or_str',
+    obtype=(bytes, str),
+    doc="A Python bytes or (Unicode) string object.")
 
 pybytes = StackObject(
-               name='bytes',
-               obtype=bytes,
-               doc="A Python bytes object.")
+    name='bytes',
+    obtype=bytes,
+    doc="A Python bytes object.")
 
 pyunicode = StackObject(
-                name='str',
-                obtype=str,
-                doc="A Python (Unicode) string object.")
+    name='str',
+    obtype=str,
+    doc="A Python (Unicode) string object.")
 
 pynone = StackObject(
-             name="None",
-             obtype=type(None),
-             doc="The Python None object.")
+    name="None",
+    obtype=type(None),
+    doc="The Python None object.")
 
 pytuple = StackObject(
-              name="tuple",
-              obtype=tuple,
-              doc="A Python tuple object.")
+    name="tuple",
+    obtype=tuple,
+    doc="A Python tuple object.")
 
 pylist = StackObject(
-             name="list",
-             obtype=list,
-             doc="A Python list object.")
+    name="list",
+    obtype=list,
+    doc="A Python list object.")
 
 pydict = StackObject(
-             name="dict",
-             obtype=dict,
-             doc="A Python dict object.")
+    name="dict",
+    obtype=dict,
+    doc="A Python dict object.")
 
 pyset = StackObject(
-            name="set",
-            obtype=set,
-            doc="A Python set object.")
+    name="set",
+    obtype=set,
+    doc="A Python set object.")
 
 pyfrozenset = StackObject(
-                  name="frozenset",
-                  obtype=set,
-                  doc="A Python frozenset object.")
+    name="frozenset",
+    obtype=set,
+    doc="A Python frozenset object.")
 
 anyobject = StackObject(
-                name='any',
-                obtype=object,
-                doc="Any kind of object whatsoever.")
+    name='any',
+    obtype=object,
+    doc="Any kind of object whatsoever.")
 
 markobject = StackObject(
-                 name="mark",
-                 obtype=StackObject,
-                 doc="""'The mark' is a unique object.
-
-                 Opcodes that operate on a variable number of objects
-                 generally don't embed the count of objects in the opcode,
-                 or pull it off the stack.  Instead the MARK opcode is used
-                 to push a special marker object on the stack, and then
-                 some other opcodes grab all the objects from the top of
-                 the stack down to (but not including) the topmost marker
-                 object.
-                 """)
+    name="mark",
+    obtype=StackObject,
+    doc="""'The mark' is a unique object.
+
+Opcodes that operate on a variable number of objects
+generally don't embed the count of objects in the opcode,
+or pull it off the stack.  Instead the MARK opcode is used
+to push a special marker object on the stack, and then
+some other opcodes grab all the objects from the top of
+the stack down to (but not including) the topmost marker
+object.
+""")
 
 stackslice = StackObject(
-                 name="stackslice",
-                 obtype=StackObject,
-                 doc="""An object representing a contiguous slice of the stack.
+    name="stackslice",
+    obtype=StackObject,
+    doc="""An object representing a contiguous slice of the stack.
 
-                 This is used in conjunction with markobject, to represent all
-                 of the stack following the topmost markobject.  For example,
-                 the POP_MARK opcode changes the stack from
+This is used in conjunction with markobject, to represent all
+of the stack following the topmost markobject.  For example,
+the POP_MARK opcode changes the stack from
 
-                     [..., markobject, stackslice]
-                 to
-                     [...]
+    [..., markobject, stackslice]
+to
+    [...]
 
-                 No matter how many object are on the stack after the topmost
-                 markobject, POP_MARK gets rid of all of them (including the
-                 topmost markobject too).
-                 """)
+No matter how many object are on the stack after the topmost
+markobject, POP_MARK gets rid of all of them (including the
+topmost markobject too).
+""")
 
 ##############################################################################
 # Descriptors for pickle opcodes.
@@ -1212,7 +1206,7 @@ opcodes = [
       code='L',
       arg=decimalnl_long,
       stack_before=[],
-      stack_after=[pylong],
+      stack_after=[pyint],
       proto=0,
       doc="""Push a long integer.
 
@@ -1230,7 +1224,7 @@ opcodes = [
       code='\x8a',
       arg=long1,
       stack_before=[],
-      stack_after=[pylong],
+      stack_after=[pyint],
       proto=2,
       doc="""Long integer using one-byte length.
 
@@ -1241,7 +1235,7 @@ opcodes = [
       code='\x8b',
       arg=long4,
       stack_before=[],
-      stack_after=[pylong],
+      stack_after=[pyint],
       proto=2,
       doc="""Long integer using found-byte length.
 
@@ -1254,45 +1248,50 @@ opcodes = [
       code='S',
       arg=stringnl,
       stack_before=[],
-      stack_after=[pystring],
+      stack_after=[pybytes_or_str],
       proto=0,
       doc="""Push a Python string object.
 
       The argument is a repr-style string, with bracketing quote characters,
       and perhaps embedded escapes.  The argument extends until the next
-      newline character.  (Actually, they are decoded into a str instance
+      newline character.  These are usually decoded into a str instance
       using the encoding given to the Unpickler constructor. or the default,
-      'ASCII'.)
+      'ASCII'.  If the encoding given was 'bytes' however, they will be
+      decoded as bytes object instead.
       """),
 
     I(name='BINSTRING',
       code='T',
       arg=string4,
       stack_before=[],
-      stack_after=[pystring],
+      stack_after=[pybytes_or_str],
       proto=1,
       doc="""Push a Python string object.
 
-      There are two arguments:  the first is a 4-byte little-endian signed int
-      giving the number of bytes in the string, and the second is that many
-      bytes, which are taken literally as the string content.  (Actually,
-      they are decoded into a str instance using the encoding given to the
-      Unpickler constructor. or the default, 'ASCII'.)
+      There are two arguments: the first is a 4-byte little-endian
+      signed int giving the number of bytes in the string, and the
+      second is that many bytes, which are taken literally as the string
+      content.  These are usually decoded into a str instance using the
+      encoding given to the Unpickler constructor. or the default,
+      'ASCII'.  If the encoding given was 'bytes' however, they will be
+      decoded as bytes object instead.
       """),
 
     I(name='SHORT_BINSTRING',
       code='U',
       arg=string1,
       stack_before=[],
-      stack_after=[pystring],
+      stack_after=[pybytes_or_str],
       proto=1,
       doc="""Push a Python string object.
 
-      There are two arguments:  the first is a 1-byte unsigned int giving
-      the number of bytes in the string, and the second is that many bytes,
-      which are taken literally as the string content.  (Actually, they
-      are decoded into a str instance using the encoding given to the
-      Unpickler constructor. or the default, 'ASCII'.)
+      There are two arguments: the first is a 1-byte unsigned int giving
+      the number of bytes in the string, and the second is that many
+      bytes, which are taken literally as the string content.  These are
+      usually decoded into a str instance using the encoding given to
+      the Unpickler constructor. or the default, 'ASCII'.  If the
+      encoding given was 'bytes' however, they will be decoded as bytes
+      object instead.
       """),
 
     # Bytes (protocol 3 only; older protocols don't support bytes at all)
author	Alexandre Vassalotti <alexandre@peadrop.com>	2013-12-07 09:09:27 (GMT)
committer	Alexandre Vassalotti <alexandre@peadrop.com>	2013-12-07 09:09:27 (GMT)
commit	d05c9ff84501d93b13de40a9c7b0360c7d2ebada (patch)
tree	ae840ca5e91d21e53cc60e6c3e7fdd64b5a9fec4 /Lib/pickletools.py
parent	ee07b94788e5e3e79f6632e92a5295adc3937bf4 (diff)
download	cpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.zip cpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.tar.gz cpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.tar.bz2