diff options
author | Alexandre Vassalotti <alexandre@peadrop.com> | 2013-12-07 09:09:27 (GMT) |
---|---|---|
committer | Alexandre Vassalotti <alexandre@peadrop.com> | 2013-12-07 09:09:27 (GMT) |
commit | d05c9ff84501d93b13de40a9c7b0360c7d2ebada (patch) | |
tree | ae840ca5e91d21e53cc60e6c3e7fdd64b5a9fec4 /Lib/pickletools.py | |
parent | ee07b94788e5e3e79f6632e92a5295adc3937bf4 (diff) | |
download | cpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.zip cpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.tar.gz cpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.tar.bz2 |
Issue #6784: Strings from Python 2 can now be unpickled as bytes objects.
Initial patch by Merlijn van Deen.
I've added a few unrelated docstring fixes in the patch while I was at
it, which makes the documentation for pickle a bit more consistent.
Diffstat (limited to 'Lib/pickletools.py')
-rw-r--r-- | Lib/pickletools.py | 185 |
1 files changed, 92 insertions, 93 deletions
diff --git a/Lib/pickletools.py b/Lib/pickletools.py index a2480f6..71c2aa1 100644 --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -969,113 +969,107 @@ class StackObject(object): return self.name -pyint = StackObject( - name='int', - obtype=int, - doc="A short (as opposed to long) Python integer object.") - -pylong = StackObject( - name='long', - obtype=int, - doc="A long (as opposed to short) Python integer object.") +pyint = pylong = StackObject( + name='int', + obtype=int, + doc="A Python integer object.") pyinteger_or_bool = StackObject( - name='int_or_bool', - obtype=(int, bool), - doc="A Python integer object (short or long), or " - "a Python bool.") + name='int_or_bool', + obtype=(int, bool), + doc="A Python integer or boolean object.") pybool = StackObject( - name='bool', - obtype=(bool,), - doc="A Python bool object.") + name='bool', + obtype=bool, + doc="A Python boolean object.") pyfloat = StackObject( - name='float', - obtype=float, - doc="A Python float object.") + name='float', + obtype=float, + doc="A Python float object.") -pystring = StackObject( - name='string', - obtype=bytes, - doc="A Python (8-bit) string object.") +pybytes_or_str = pystring = StackObject( + name='bytes_or_str', + obtype=(bytes, str), + doc="A Python bytes or (Unicode) string object.") pybytes = StackObject( - name='bytes', - obtype=bytes, - doc="A Python bytes object.") + name='bytes', + obtype=bytes, + doc="A Python bytes object.") pyunicode = StackObject( - name='str', - obtype=str, - doc="A Python (Unicode) string object.") + name='str', + obtype=str, + doc="A Python (Unicode) string object.") pynone = StackObject( - name="None", - obtype=type(None), - doc="The Python None object.") + name="None", + obtype=type(None), + doc="The Python None object.") pytuple = StackObject( - name="tuple", - obtype=tuple, - doc="A Python tuple object.") + name="tuple", + obtype=tuple, + doc="A Python tuple object.") pylist = StackObject( - name="list", - obtype=list, - doc="A Python list object.") + name="list", + obtype=list, + doc="A Python list object.") pydict = StackObject( - name="dict", - obtype=dict, - doc="A Python dict object.") + name="dict", + obtype=dict, + doc="A Python dict object.") pyset = StackObject( - name="set", - obtype=set, - doc="A Python set object.") + name="set", + obtype=set, + doc="A Python set object.") pyfrozenset = StackObject( - name="frozenset", - obtype=set, - doc="A Python frozenset object.") + name="frozenset", + obtype=set, + doc="A Python frozenset object.") anyobject = StackObject( - name='any', - obtype=object, - doc="Any kind of object whatsoever.") + name='any', + obtype=object, + doc="Any kind of object whatsoever.") markobject = StackObject( - name="mark", - obtype=StackObject, - doc="""'The mark' is a unique object. - - Opcodes that operate on a variable number of objects - generally don't embed the count of objects in the opcode, - or pull it off the stack. Instead the MARK opcode is used - to push a special marker object on the stack, and then - some other opcodes grab all the objects from the top of - the stack down to (but not including) the topmost marker - object. - """) + name="mark", + obtype=StackObject, + doc="""'The mark' is a unique object. + +Opcodes that operate on a variable number of objects +generally don't embed the count of objects in the opcode, +or pull it off the stack. Instead the MARK opcode is used +to push a special marker object on the stack, and then +some other opcodes grab all the objects from the top of +the stack down to (but not including) the topmost marker +object. +""") stackslice = StackObject( - name="stackslice", - obtype=StackObject, - doc="""An object representing a contiguous slice of the stack. + name="stackslice", + obtype=StackObject, + doc="""An object representing a contiguous slice of the stack. - This is used in conjunction with markobject, to represent all - of the stack following the topmost markobject. For example, - the POP_MARK opcode changes the stack from +This is used in conjunction with markobject, to represent all +of the stack following the topmost markobject. For example, +the POP_MARK opcode changes the stack from - [..., markobject, stackslice] - to - [...] + [..., markobject, stackslice] +to + [...] - No matter how many object are on the stack after the topmost - markobject, POP_MARK gets rid of all of them (including the - topmost markobject too). - """) +No matter how many object are on the stack after the topmost +markobject, POP_MARK gets rid of all of them (including the +topmost markobject too). +""") ############################################################################## # Descriptors for pickle opcodes. @@ -1212,7 +1206,7 @@ opcodes = [ code='L', arg=decimalnl_long, stack_before=[], - stack_after=[pylong], + stack_after=[pyint], proto=0, doc="""Push a long integer. @@ -1230,7 +1224,7 @@ opcodes = [ code='\x8a', arg=long1, stack_before=[], - stack_after=[pylong], + stack_after=[pyint], proto=2, doc="""Long integer using one-byte length. @@ -1241,7 +1235,7 @@ opcodes = [ code='\x8b', arg=long4, stack_before=[], - stack_after=[pylong], + stack_after=[pyint], proto=2, doc="""Long integer using found-byte length. @@ -1254,45 +1248,50 @@ opcodes = [ code='S', arg=stringnl, stack_before=[], - stack_after=[pystring], + stack_after=[pybytes_or_str], proto=0, doc="""Push a Python string object. The argument is a repr-style string, with bracketing quote characters, and perhaps embedded escapes. The argument extends until the next - newline character. (Actually, they are decoded into a str instance + newline character. These are usually decoded into a str instance using the encoding given to the Unpickler constructor. or the default, - 'ASCII'.) + 'ASCII'. If the encoding given was 'bytes' however, they will be + decoded as bytes object instead. """), I(name='BINSTRING', code='T', arg=string4, stack_before=[], - stack_after=[pystring], + stack_after=[pybytes_or_str], proto=1, doc="""Push a Python string object. - There are two arguments: the first is a 4-byte little-endian signed int - giving the number of bytes in the string, and the second is that many - bytes, which are taken literally as the string content. (Actually, - they are decoded into a str instance using the encoding given to the - Unpickler constructor. or the default, 'ASCII'.) + There are two arguments: the first is a 4-byte little-endian + signed int giving the number of bytes in the string, and the + second is that many bytes, which are taken literally as the string + content. These are usually decoded into a str instance using the + encoding given to the Unpickler constructor. or the default, + 'ASCII'. If the encoding given was 'bytes' however, they will be + decoded as bytes object instead. """), I(name='SHORT_BINSTRING', code='U', arg=string1, stack_before=[], - stack_after=[pystring], + stack_after=[pybytes_or_str], proto=1, doc="""Push a Python string object. - There are two arguments: the first is a 1-byte unsigned int giving - the number of bytes in the string, and the second is that many bytes, - which are taken literally as the string content. (Actually, they - are decoded into a str instance using the encoding given to the - Unpickler constructor. or the default, 'ASCII'.) + There are two arguments: the first is a 1-byte unsigned int giving + the number of bytes in the string, and the second is that many + bytes, which are taken literally as the string content. These are + usually decoded into a str instance using the encoding given to + the Unpickler constructor. or the default, 'ASCII'. If the + encoding given was 'bytes' however, they will be decoded as bytes + object instead. """), # Bytes (protocol 3 only; older protocols don't support bytes at all) |