summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/pickle.py16
-rw-r--r--Lib/pickletools.py288
2 files changed, 304 insertions, 0 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py
index d3d1dcf..79ee8af 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -127,6 +127,22 @@ BINFLOAT = 'G' # push float; arg is 8-byte float encoding
TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py
FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py
+# Protocol 2 (not yet implemented) (XXX comments will be added later)
+
+NEWOBJ = '\x81'
+PROTO = '\x80'
+EXT2 = '\x83'
+EXT1 = '\x82'
+TUPLE1 = '\x85'
+EXT4 = '\x84'
+TUPLE3 = '\x87'
+TUPLE2 = '\x86'
+NEWFALSE = '\x89'
+NEWTRUE = '\x88'
+LONG2 = '\x8b'
+LONG1 = '\x8a'
+LONG4 = '\x8c'
+
__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
del x
diff --git a/Lib/pickletools.py b/Lib/pickletools.py
index e744309..59e0a15 100644
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -578,6 +578,123 @@ float8 = ArgumentDescriptor(
(may not survive roundtrip pickling intact).
""")
+# Protocol 2 formats
+
+def decode_long(data):
+ r"""Decode a long from a two's complement little-endian binary string.
+ >>> decode_long("\xff\x00")
+ 255L
+ >>> decode_long("\xff\x7f")
+ 32767L
+ >>> decode_long("\x00\xff")
+ -256L
+ >>> decode_long("\x00\x80")
+ -32768L
+ >>>
+ """
+ x = 0L
+ i = 0L
+ for c in data:
+ x |= long(ord(c)) << i
+ i += 8L
+ if i and (x & (1L << (i-1L))):
+ x -= 1L << i
+ return x
+
+def read_long1(f):
+ r"""
+ >>> import StringIO
+ >>> read_long1(StringIO.StringIO("\x02\xff\x00"))
+ 255L
+ >>> read_long1(StringIO.StringIO("\x02\xff\x7f"))
+ 32767L
+ >>> read_long1(StringIO.StringIO("\x02\x00\xff"))
+ -256L
+ >>> read_long1(StringIO.StringIO("\x02\x00\x80"))
+ -32768L
+ >>>
+ """
+
+ n = read_uint1(f)
+ data = f.read(n)
+ if len(data) != n:
+ raise ValueError("not enough data in stream to read long1")
+ return decode_long(data)
+
+long1 = ArgumentDescriptor(
+ name="long1",
+ n=TAKEN_FROM_ARGUMENT,
+ reader=read_long1,
+ doc="""A binary long, little-endian, using 1-byte size.
+
+ This first reads one byte as an unsigned size, then reads that
+ many bytes and interprets them as a little-endian long.
+ """)
+
+def read_long2(f):
+ r"""
+ >>> import StringIO
+ >>> read_long2(StringIO.StringIO("\x02\x00\xff\x00"))
+ 255L
+ >>> read_long2(StringIO.StringIO("\x02\x00\xff\x7f"))
+ 32767L
+ >>> read_long2(StringIO.StringIO("\x02\x00\x00\xff"))
+ -256L
+ >>> read_long2(StringIO.StringIO("\x02\x00\x00\x80"))
+ -32768L
+ >>>
+ """
+
+ n = read_uint2(f)
+ data = f.read(n)
+ if len(data) != n:
+ raise ValueError("not enough data in stream to read long2")
+ return decode_long(data)
+
+long2 = ArgumentDescriptor(
+ name="long2",
+ n=TAKEN_FROM_ARGUMENT,
+ reader=read_long2,
+ doc="""A binary long, little-endian, using 2-byte size.
+
+ This first reads two byte as an unsigned size, then reads that
+ many bytes and interprets them as a little-endian long.
+ """)
+
+def read_long4(f):
+ r"""
+ >>> import StringIO
+ >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\xff\x00"))
+ 255L
+ >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\xff\x7f"))
+ 32767L
+ >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\x00\xff"))
+ -256L
+ >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\x00\x80"))
+ -32768L
+ >>>
+ """
+
+ n = read_int4(f)
+ if n < 0:
+ raise ValueError("unicodestring4 byte count < 0: %d" % n)
+ data = f.read(n)
+ if len(data) != n:
+ raise ValueError("not enough data in stream to read long1")
+ return decode_long(data)
+
+long4 = ArgumentDescriptor(
+ name="long4",
+ n=TAKEN_FROM_ARGUMENT,
+ reader=read_long4,
+ doc="""A binary representation of a long, little-endian.
+
+ This first reads four bytes as a signed size (but requires the
+ size to be >= 0), then reads that many bytes and interprets them
+ as a little-endian long.
+ """)
+
+
##############################################################################
# Object descriptors. The stack used by the pickle machine holds objects,
# and in the stack_before and stack_after attributes of OpcodeInfo
@@ -627,6 +744,11 @@ pyinteger_or_bool = StackObject(
doc="A Python integer object (short or long), or "
"a Python bool.")
+pybool = StackObject(
+ name='bool',
+ obtype=(bool,),
+ doc="A Python bool object.")
+
pyfloat = StackObject(
name='float',
obtype=float,
@@ -1436,6 +1558,172 @@ opcodes = [
ID is passed to self.persistent_load(), and whatever object that
returns is pushed on the stack. See PERSID for more detail.
"""),
+
+ # Protocol 2 opcodes
+
+ I(name='PROTO',
+ code='\x80',
+ arg=uint1,
+ stack_before=[],
+ stack_after=[],
+ proto=2,
+ doc="""Protocol version indicator.
+
+ For protocol 2 and above, a pickle must start with this opcode.
+ The argument is the protocol version, an int in range(2, 256).
+ """),
+
+ I(name='NEWOBJ',
+ code='\x81',
+ arg=None,
+ stack_before=[anyobject, anyobject],
+ stack_after=[anyobject],
+ proto=2,
+ doc="""Build an object instance.
+
+ The stack before should be thought of as containing a class
+ object followed by an argument tuple (the tuple being the stack
+ top). Call these cls and args. They are popped off the stack,
+ and the value returned by cls.__new__(cls, *args) is pushed back
+ onto the stack.
+ """),
+
+ I(name='EXT1',
+ code='\x82',
+ arg=uint1,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=2,
+ doc="""Extension code.
+
+ This code and the similar EXT2 and EXT4 allow using a registry
+ of popular objects that are pickled by name, typically classes.
+ It is envisioned that through a global negotiation and
+ registration process, third parties can set up a mapping between
+ ints and object names.
+
+ In order to guarantee pickle interchangeability, the extension
+ code registry ought to be global, although a range of codes may
+ be reserved for private use.
+ """),
+
+ I(name='EXT2',
+ code='\x83',
+ arg=uint2,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=2,
+ doc="""Extension code.
+
+ See EXT1.
+ """),
+
+ I(name='EXT4',
+ code='\x84',
+ arg=int4,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=2,
+ doc="""Extension code.
+
+ See EXT1.
+ """),
+
+ I(name='TUPLE1',
+ code='\x85',
+ arg=None,
+ stack_before=[anyobject],
+ stack_after=[pytuple],
+ proto=2,
+ doc="""One-tuple.
+
+ This code pops one value off the stack and pushes a tuple of
+ length 1 whose one item is that value back onto it. IOW:
+
+ stack[-1] = tuple(stack[-1:])
+ """),
+
+ I(name='TUPLE2',
+ code='\x86',
+ arg=None,
+ stack_before=[anyobject, anyobject],
+ stack_after=[pytuple],
+ proto=2,
+ doc="""One-tuple.
+
+ This code pops two values off the stack and pushes a tuple
+ of length 2 whose items are those values back onto it. IOW:
+
+ stack[-2:] = [tuple(stack[-2:])]
+ """),
+
+ I(name='TUPLE3',
+ code='\x87',
+ arg=None,
+ stack_before=[anyobject, anyobject, anyobject],
+ stack_after=[pytuple],
+ proto=2,
+ doc="""One-tuple.
+
+ This code pops three values off the stack and pushes a tuple
+ of length 3 whose items are those values back onto it. IOW:
+
+ stack[-3:] = [tuple(stack[-3:])]
+ """),
+
+ I(name='NEWTRUE',
+ code='\x88',
+ arg=None,
+ stack_before=[],
+ stack_after=[pybool],
+ proto=2,
+ doc="""True.
+
+ Push True onto the stack."""),
+
+ I(name='NEWFALSE',
+ code='\x89',
+ arg=None,
+ stack_before=[],
+ stack_after=[pybool],
+ proto=2,
+ doc="""True.
+
+ Push False onto the stack."""),
+
+ I(name="LONG1",
+ code='\x8a',
+ arg=long1,
+ stack_before=[],
+ stack_after=[pylong],
+ proto=2,
+ doc="""Long integer using one-byte length.
+
+ A more efficient encoding of a Python long; the long1 encoding
+ says it all."""),
+
+ I(name="LONG2",
+ code='\x8b',
+ arg=long2,
+ stack_before=[],
+ stack_after=[pylong],
+ proto=2,
+ doc="""Long integer using two-byte length.
+
+ A more efficient encoding of a Python long; the long2 encoding
+ says it all."""),
+
+ I(name="LONG4",
+ code='\x8c',
+ arg=long4,
+ stack_before=[],
+ stack_after=[pylong],
+ proto=2,
+ doc="""Long integer using found-byte length.
+
+ A more efficient encoding of a Python long; the long4 encoding
+ says it all."""),
+
]
del I