summaryrefslogtreecommitdiffstats
path: root/Lib/pickletools.py
diff options
context:
space:
mode:
authorAlexandre Vassalotti <alexandre@peadrop.com>2013-12-07 09:09:27 (GMT)
committerAlexandre Vassalotti <alexandre@peadrop.com>2013-12-07 09:09:27 (GMT)
commitd05c9ff84501d93b13de40a9c7b0360c7d2ebada (patch)
treeae840ca5e91d21e53cc60e6c3e7fdd64b5a9fec4 /Lib/pickletools.py
parentee07b94788e5e3e79f6632e92a5295adc3937bf4 (diff)
downloadcpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.zip
cpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.tar.gz
cpython-d05c9ff84501d93b13de40a9c7b0360c7d2ebada.tar.bz2
Issue #6784: Strings from Python 2 can now be unpickled as bytes objects.
Initial patch by Merlijn van Deen. I've added a few unrelated docstring fixes in the patch while I was at it, which makes the documentation for pickle a bit more consistent.
Diffstat (limited to 'Lib/pickletools.py')
-rw-r--r--Lib/pickletools.py185
1 files changed, 92 insertions, 93 deletions
diff --git a/Lib/pickletools.py b/Lib/pickletools.py
index a2480f6..71c2aa1 100644
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -969,113 +969,107 @@ class StackObject(object):
return self.name
-pyint = StackObject(
- name='int',
- obtype=int,
- doc="A short (as opposed to long) Python integer object.")
-
-pylong = StackObject(
- name='long',
- obtype=int,
- doc="A long (as opposed to short) Python integer object.")
+pyint = pylong = StackObject(
+ name='int',
+ obtype=int,
+ doc="A Python integer object.")
pyinteger_or_bool = StackObject(
- name='int_or_bool',
- obtype=(int, bool),
- doc="A Python integer object (short or long), or "
- "a Python bool.")
+ name='int_or_bool',
+ obtype=(int, bool),
+ doc="A Python integer or boolean object.")
pybool = StackObject(
- name='bool',
- obtype=(bool,),
- doc="A Python bool object.")
+ name='bool',
+ obtype=bool,
+ doc="A Python boolean object.")
pyfloat = StackObject(
- name='float',
- obtype=float,
- doc="A Python float object.")
+ name='float',
+ obtype=float,
+ doc="A Python float object.")
-pystring = StackObject(
- name='string',
- obtype=bytes,
- doc="A Python (8-bit) string object.")
+pybytes_or_str = pystring = StackObject(
+ name='bytes_or_str',
+ obtype=(bytes, str),
+ doc="A Python bytes or (Unicode) string object.")
pybytes = StackObject(
- name='bytes',
- obtype=bytes,
- doc="A Python bytes object.")
+ name='bytes',
+ obtype=bytes,
+ doc="A Python bytes object.")
pyunicode = StackObject(
- name='str',
- obtype=str,
- doc="A Python (Unicode) string object.")
+ name='str',
+ obtype=str,
+ doc="A Python (Unicode) string object.")
pynone = StackObject(
- name="None",
- obtype=type(None),
- doc="The Python None object.")
+ name="None",
+ obtype=type(None),
+ doc="The Python None object.")
pytuple = StackObject(
- name="tuple",
- obtype=tuple,
- doc="A Python tuple object.")
+ name="tuple",
+ obtype=tuple,
+ doc="A Python tuple object.")
pylist = StackObject(
- name="list",
- obtype=list,
- doc="A Python list object.")
+ name="list",
+ obtype=list,
+ doc="A Python list object.")
pydict = StackObject(
- name="dict",
- obtype=dict,
- doc="A Python dict object.")
+ name="dict",
+ obtype=dict,
+ doc="A Python dict object.")
pyset = StackObject(
- name="set",
- obtype=set,
- doc="A Python set object.")
+ name="set",
+ obtype=set,
+ doc="A Python set object.")
pyfrozenset = StackObject(
- name="frozenset",
- obtype=set,
- doc="A Python frozenset object.")
+ name="frozenset",
+ obtype=set,
+ doc="A Python frozenset object.")
anyobject = StackObject(
- name='any',
- obtype=object,
- doc="Any kind of object whatsoever.")
+ name='any',
+ obtype=object,
+ doc="Any kind of object whatsoever.")
markobject = StackObject(
- name="mark",
- obtype=StackObject,
- doc="""'The mark' is a unique object.
-
- Opcodes that operate on a variable number of objects
- generally don't embed the count of objects in the opcode,
- or pull it off the stack. Instead the MARK opcode is used
- to push a special marker object on the stack, and then
- some other opcodes grab all the objects from the top of
- the stack down to (but not including) the topmost marker
- object.
- """)
+ name="mark",
+ obtype=StackObject,
+ doc="""'The mark' is a unique object.
+
+Opcodes that operate on a variable number of objects
+generally don't embed the count of objects in the opcode,
+or pull it off the stack. Instead the MARK opcode is used
+to push a special marker object on the stack, and then
+some other opcodes grab all the objects from the top of
+the stack down to (but not including) the topmost marker
+object.
+""")
stackslice = StackObject(
- name="stackslice",
- obtype=StackObject,
- doc="""An object representing a contiguous slice of the stack.
+ name="stackslice",
+ obtype=StackObject,
+ doc="""An object representing a contiguous slice of the stack.
- This is used in conjunction with markobject, to represent all
- of the stack following the topmost markobject. For example,
- the POP_MARK opcode changes the stack from
+This is used in conjunction with markobject, to represent all
+of the stack following the topmost markobject. For example,
+the POP_MARK opcode changes the stack from
- [..., markobject, stackslice]
- to
- [...]
+ [..., markobject, stackslice]
+to
+ [...]
- No matter how many object are on the stack after the topmost
- markobject, POP_MARK gets rid of all of them (including the
- topmost markobject too).
- """)
+No matter how many object are on the stack after the topmost
+markobject, POP_MARK gets rid of all of them (including the
+topmost markobject too).
+""")
##############################################################################
# Descriptors for pickle opcodes.
@@ -1212,7 +1206,7 @@ opcodes = [
code='L',
arg=decimalnl_long,
stack_before=[],
- stack_after=[pylong],
+ stack_after=[pyint],
proto=0,
doc="""Push a long integer.
@@ -1230,7 +1224,7 @@ opcodes = [
code='\x8a',
arg=long1,
stack_before=[],
- stack_after=[pylong],
+ stack_after=[pyint],
proto=2,
doc="""Long integer using one-byte length.
@@ -1241,7 +1235,7 @@ opcodes = [
code='\x8b',
arg=long4,
stack_before=[],
- stack_after=[pylong],
+ stack_after=[pyint],
proto=2,
doc="""Long integer using found-byte length.
@@ -1254,45 +1248,50 @@ opcodes = [
code='S',
arg=stringnl,
stack_before=[],
- stack_after=[pystring],
+ stack_after=[pybytes_or_str],
proto=0,
doc="""Push a Python string object.
The argument is a repr-style string, with bracketing quote characters,
and perhaps embedded escapes. The argument extends until the next
- newline character. (Actually, they are decoded into a str instance
+ newline character. These are usually decoded into a str instance
using the encoding given to the Unpickler constructor. or the default,
- 'ASCII'.)
+ 'ASCII'. If the encoding given was 'bytes' however, they will be
+ decoded as bytes object instead.
"""),
I(name='BINSTRING',
code='T',
arg=string4,
stack_before=[],
- stack_after=[pystring],
+ stack_after=[pybytes_or_str],
proto=1,
doc="""Push a Python string object.
- There are two arguments: the first is a 4-byte little-endian signed int
- giving the number of bytes in the string, and the second is that many
- bytes, which are taken literally as the string content. (Actually,
- they are decoded into a str instance using the encoding given to the
- Unpickler constructor. or the default, 'ASCII'.)
+ There are two arguments: the first is a 4-byte little-endian
+ signed int giving the number of bytes in the string, and the
+ second is that many bytes, which are taken literally as the string
+ content. These are usually decoded into a str instance using the
+ encoding given to the Unpickler constructor. or the default,
+ 'ASCII'. If the encoding given was 'bytes' however, they will be
+ decoded as bytes object instead.
"""),
I(name='SHORT_BINSTRING',
code='U',
arg=string1,
stack_before=[],
- stack_after=[pystring],
+ stack_after=[pybytes_or_str],
proto=1,
doc="""Push a Python string object.
- There are two arguments: the first is a 1-byte unsigned int giving
- the number of bytes in the string, and the second is that many bytes,
- which are taken literally as the string content. (Actually, they
- are decoded into a str instance using the encoding given to the
- Unpickler constructor. or the default, 'ASCII'.)
+ There are two arguments: the first is a 1-byte unsigned int giving
+ the number of bytes in the string, and the second is that many
+ bytes, which are taken literally as the string content. These are
+ usually decoded into a str instance using the encoding given to
+ the Unpickler constructor. or the default, 'ASCII'. If the
+ encoding given was 'bytes' however, they will be decoded as bytes
+ object instead.
"""),
# Bytes (protocol 3 only; older protocols don't support bytes at all)