summaryrefslogtreecommitdiffstats
path: root/Lib/pickletools.py
diff options
context:
space:
mode:
authorAlexandre Vassalotti <alexandre@peadrop.com>2013-04-14 10:30:35 (GMT)
committerAlexandre Vassalotti <alexandre@peadrop.com>2013-04-14 10:30:35 (GMT)
commit8db89ca56c681de67fa4e64907dce5bbc0ac9e6b (patch)
tree74d594d07a7a987609f9858cc48f3da6fee3bf9e /Lib/pickletools.py
parent5c4874f7a24dbec473dbcfcf6863ea57ede7d233 (diff)
downloadcpython-8db89ca56c681de67fa4e64907dce5bbc0ac9e6b.zip
cpython-8db89ca56c681de67fa4e64907dce5bbc0ac9e6b.tar.gz
cpython-8db89ca56c681de67fa4e64907dce5bbc0ac9e6b.tar.bz2
Issue #16550: Update the opcode descriptions of pickletools to use unsigned
integers where appropriate. Initial patch by Serhiy Storchaka.
Diffstat (limited to 'Lib/pickletools.py')
-rw-r--r--Lib/pickletools.py127
1 files changed, 106 insertions, 21 deletions
diff --git a/Lib/pickletools.py b/Lib/pickletools.py
index 66f4edd..69cedf7 100644
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -13,6 +13,7 @@ dis(pickle, out=None, memo=None, indentlevel=4)
import codecs
import pickle
import re
+import sys
__all__ = ['dis', 'genops', 'optimize']
@@ -165,8 +166,9 @@ UP_TO_NEWLINE = -1
# Represents the number of bytes consumed by a two-argument opcode where
# the first argument gives the number of bytes in the second argument.
-TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
-TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
+TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
+TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
+TAKEN_FROM_ARGUMENT4U = -4 # num bytes is 4-byte unsigned little-endian int
class ArgumentDescriptor(object):
__slots__ = (
@@ -194,7 +196,8 @@ class ArgumentDescriptor(object):
assert isinstance(n, int) and (n >= 0 or
n in (UP_TO_NEWLINE,
TAKEN_FROM_ARGUMENT1,
- TAKEN_FROM_ARGUMENT4))
+ TAKEN_FROM_ARGUMENT4,
+ TAKEN_FROM_ARGUMENT4U))
self.n = n
self.reader = reader
@@ -265,6 +268,27 @@ int4 = ArgumentDescriptor(
doc="Four-byte signed integer, little-endian, 2's complement.")
+def read_uint4(f):
+ r"""
+ >>> import io
+ >>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00'))
+ 255
+ >>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31
+ True
+ """
+
+ data = f.read(4)
+ if len(data) == 4:
+ return _unpack("<I", data)[0]
+ raise ValueError("not enough data in stream to read uint4")
+
+uint4 = ArgumentDescriptor(
+ name='uint4',
+ n=4,
+ reader=read_uint4,
+ doc="Four-byte unsigned integer, little-endian.")
+
+
def read_stringnl(f, decode=True, stripquotes=True):
r"""
>>> import io
@@ -421,6 +445,67 @@ string1 = ArgumentDescriptor(
""")
+def read_bytes1(f):
+ r"""
+ >>> import io
+ >>> read_bytes1(io.BytesIO(b"\x00"))
+ b''
+ >>> read_bytes1(io.BytesIO(b"\x03abcdef"))
+ b'abc'
+ """
+
+ n = read_uint1(f)
+ assert n >= 0
+ data = f.read(n)
+ if len(data) == n:
+ return data
+ raise ValueError("expected %d bytes in a bytes1, but only %d remain" %
+ (n, len(data)))
+
+bytes1 = ArgumentDescriptor(
+ name="bytes1",
+ n=TAKEN_FROM_ARGUMENT1,
+ reader=read_bytes1,
+ doc="""A counted bytes string.
+
+ The first argument is a 1-byte unsigned int giving the number
+ of bytes, and the second argument is that many bytes.
+ """)
+
+
+def read_bytes4(f):
+ r"""
+ >>> import io
+ >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc"))
+ b''
+ >>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
+ b'abc'
+ >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
+ Traceback (most recent call last):
+ ...
+ ValueError: expected 50331648 bytes in a bytes4, but only 6 remain
+ """
+
+ n = read_uint4(f)
+ if n > sys.maxsize:
+ raise ValueError("bytes4 byte count > sys.maxsize: %d" % n)
+ data = f.read(n)
+ if len(data) == n:
+ return data
+ raise ValueError("expected %d bytes in a bytes4, but only %d remain" %
+ (n, len(data)))
+
+bytes4 = ArgumentDescriptor(
+ name="bytes4",
+ n=TAKEN_FROM_ARGUMENT4U,
+ reader=read_bytes4,
+ doc="""A counted bytes string.
+
+ The first argument is a 4-byte little-endian unsigned int giving
+ the number of bytes, and the second argument is that many bytes.
+ """)
+
+
def read_unicodestringnl(f):
r"""
>>> import io
@@ -464,9 +549,9 @@ def read_unicodestring4(f):
ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
"""
- n = read_int4(f)
- if n < 0:
- raise ValueError("unicodestring4 byte count < 0: %d" % n)
+ n = read_uint4(f)
+ if n > sys.maxsize:
+ raise ValueError("unicodestring4 byte count > sys.maxsize: %d" % n)
data = f.read(n)
if len(data) == n:
return str(data, 'utf-8', 'surrogatepass')
@@ -475,7 +560,7 @@ def read_unicodestring4(f):
unicodestring4 = ArgumentDescriptor(
name="unicodestring4",
- n=TAKEN_FROM_ARGUMENT4,
+ n=TAKEN_FROM_ARGUMENT4U,
reader=read_unicodestring4,
doc="""A counted Unicode string.
@@ -872,7 +957,7 @@ class OpcodeInfo(object):
assert isinstance(x, StackObject)
self.stack_after = stack_after
- assert isinstance(proto, int) and 0 <= proto <= 3
+ assert isinstance(proto, int) and 0 <= proto <= pickle.HIGHEST_PROTOCOL
self.proto = proto
assert isinstance(doc, str)
@@ -1038,28 +1123,28 @@ opcodes = [
I(name='BINBYTES',
code='B',
- arg=string4,
+ arg=bytes4,
stack_before=[],
stack_after=[pybytes],
proto=3,
doc="""Push a Python bytes object.
- There are two arguments: the first is a 4-byte little-endian signed int
- giving the number of bytes in the string, and the second is that many
- bytes, which are taken literally as the bytes content.
+ There are two arguments: the first is a 4-byte little-endian unsigned int
+ giving the number of bytes, and the second is that many bytes, which are
+ taken literally as the bytes content.
"""),
I(name='SHORT_BINBYTES',
code='C',
- arg=string1,
+ arg=bytes1,
stack_before=[],
stack_after=[pybytes],
proto=3,
- doc="""Push a Python string object.
+ doc="""Push a Python bytes object.
There are two arguments: the first is a 1-byte unsigned int giving
- the number of bytes in the string, and the second is that many bytes,
- which are taken literally as the string content.
+ the number of bytes, and the second is that many bytes, which are taken
+ literally as the string content.
"""),
# Ways to spell None.
@@ -1118,7 +1203,7 @@ opcodes = [
proto=1,
doc="""Push a Python Unicode string object.
- There are two arguments: the first is a 4-byte little-endian signed int
+ There are two arguments: the first is a 4-byte little-endian unsigned int
giving the number of bytes in the string. The second is that many
bytes, and is the UTF-8 encoding of the Unicode string.
"""),
@@ -1422,13 +1507,13 @@ opcodes = [
I(name='LONG_BINGET',
code='j',
- arg=int4,
+ arg=uint4,
stack_before=[],
stack_after=[anyobject],
proto=1,
doc="""Read an object from the memo and push it on the stack.
- The index of the memo object to push is given by the 4-byte signed
+ The index of the memo object to push is given by the 4-byte unsigned
little-endian integer following.
"""),
@@ -1459,14 +1544,14 @@ opcodes = [
I(name='LONG_BINPUT',
code='r',
- arg=int4,
+ arg=uint4,
stack_before=[],
stack_after=[],
proto=1,
doc="""Store the stack top into the memo. The stack is not popped.
The index of the memo location to write into is given by the 4-byte
- signed little-endian integer following.
+ unsigned little-endian integer following.
"""),
# Access the extension registry (predefined objects). Akin to the GET