diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2013-11-23 17:59:12 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2013-11-23 17:59:12 (GMT) |
commit | c9dc4a2a8a6dcfe1674685bea4a4af935c0e37ca (patch) | |
tree | afbde5318538e73815668dc73a0fb91dfb88ca61 /Lib | |
parent | 95401c5f6b9f07b094924559177c9b30a1c38998 (diff) | |
download | cpython-c9dc4a2a8a6dcfe1674685bea4a4af935c0e37ca.zip cpython-c9dc4a2a8a6dcfe1674685bea4a4af935c0e37ca.tar.gz cpython-c9dc4a2a8a6dcfe1674685bea4a4af935c0e37ca.tar.bz2 |
Issue #17810: Implement PEP 3154, pickle protocol 4.
Most of the work is by Alexandre.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/copyreg.py | 6 | ||||
-rw-r--r-- | Lib/pickle.py | 582 | ||||
-rw-r--r-- | Lib/pickletools.py | 471 | ||||
-rw-r--r-- | Lib/test/pickletester.py | 487 | ||||
-rw-r--r-- | Lib/test/test_descr.py | 605 |
5 files changed, 1629 insertions, 522 deletions
diff --git a/Lib/copyreg.py b/Lib/copyreg.py index 66c0f8a..67f5bb0 100644 --- a/Lib/copyreg.py +++ b/Lib/copyreg.py @@ -87,6 +87,12 @@ def _reduce_ex(self, proto): def __newobj__(cls, *args): return cls.__new__(cls, *args) +def __newobj_ex__(cls, args, kwargs): + """Used by pickle protocol 4, instead of __newobj__ to allow classes with + keyword-only arguments to be pickled correctly. + """ + return cls.__new__(cls, *args, **kwargs) + def _slotnames(cls): """Return a list of slot names for a given class. diff --git a/Lib/pickle.py b/Lib/pickle.py index dbc196a..d1f1538 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -23,7 +23,7 @@ Misc variables: """ -from types import FunctionType, BuiltinFunctionType +from types import FunctionType, BuiltinFunctionType, ModuleType from copyreg import dispatch_table from copyreg import _extension_registry, _inverted_registry, _extension_cache from itertools import islice @@ -42,17 +42,18 @@ __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", bytes_types = (bytes, bytearray) # These are purely informational; no code uses these. -format_version = "3.0" # File format version we write +format_version = "4.0" # File format version we write compatible_formats = ["1.0", # Original protocol 0 "1.1", # Protocol 0 with INST added "1.2", # Original protocol 1 "1.3", # Protocol 1 with BINFLOAT added "2.0", # Protocol 2 "3.0", # Protocol 3 + "4.0", # Protocol 4 ] # Old format versions we can read # This is the highest protocol number we know how to read. -HIGHEST_PROTOCOL = 3 +HIGHEST_PROTOCOL = 4 # The protocol we write by default. May be less than HIGHEST_PROTOCOL. # We intentionally write a protocol that Python 2.x cannot read; @@ -164,7 +165,196 @@ _tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3] BINBYTES = b'B' # push bytes; counted binary string argument SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes -__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)]) +# Protocol 4 +SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes +BINUNICODE8 = b'\x8d' # push very long string +BINBYTES8 = b'\x8e' # push very long bytes string +EMPTY_SET = b'\x8f' # push empty set on the stack +ADDITEMS = b'\x90' # modify set by adding topmost stack items +FROZENSET = b'\x91' # build frozenset from topmost stack items +NEWOBJ_EX = b'\x92' # like NEWOBJ but work with keyword only arguments +STACK_GLOBAL = b'\x93' # same as GLOBAL but using names on the stacks +MEMOIZE = b'\x94' # store top of the stack in memo +FRAME = b'\x95' # indicate the beginning of a new frame + +__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)]) + + +class _Framer: + + _FRAME_SIZE_TARGET = 64 * 1024 + + def __init__(self, file_write): + self.file_write = file_write + self.current_frame = None + + def _commit_frame(self): + f = self.current_frame + with f.getbuffer() as data: + n = len(data) + write = self.file_write + write(FRAME) + write(pack("<Q", n)) + write(data) + f.seek(0) + f.truncate() + + def start_framing(self): + self.current_frame = io.BytesIO() + + def end_framing(self): + if self.current_frame is not None: + self._commit_frame() + self.current_frame = None + + def write(self, data): + f = self.current_frame + if f is None: + return self.file_write(data) + else: + n = len(data) + if f.tell() >= self._FRAME_SIZE_TARGET: + self._commit_frame() + return f.write(data) + +class _Unframer: + + def __init__(self, file_read, file_readline, file_tell=None): + self.file_read = file_read + self.file_readline = file_readline + self.file_tell = file_tell + self.framing_enabled = False + self.current_frame = None + self.frame_start = None + + def read(self, n): + if n == 0: + return b'' + _file_read = self.file_read + if not self.framing_enabled: + return _file_read(n) + f = self.current_frame + if f is not None: + data = f.read(n) + if data: + if len(data) < n: + raise UnpicklingError( + "pickle exhausted before end of frame") + return data + frame_opcode = _file_read(1) + if frame_opcode != FRAME: + raise UnpicklingError( + "expected a FRAME opcode, got {} instead".format(frame_opcode)) + frame_size, = unpack("<Q", _file_read(8)) + if frame_size > sys.maxsize: + raise ValueError("frame size > sys.maxsize: %d" % frame_size) + if self.file_tell is not None: + self.frame_start = self.file_tell() + f = self.current_frame = io.BytesIO(_file_read(frame_size)) + self.readline = f.readline + data = f.read(n) + assert len(data) == n, (len(data), n) + return data + + def readline(self): + if not self.framing_enabled: + return self.file_readline() + else: + return self.current_frame.readline() + + def tell(self): + if self.file_tell is None: + return None + elif self.current_frame is None: + return self.file_tell() + else: + return self.frame_start + self.current_frame.tell() + + +# Tools used for pickling. + +def _getattribute(obj, name, allow_qualname=False): + dotted_path = name.split(".") + if not allow_qualname and len(dotted_path) > 1: + raise AttributeError("Can't get qualified attribute {!r} on {!r}; " + + "use protocols >= 4 to enable support" + .format(name, obj)) + for subpath in dotted_path: + if subpath == '<locals>': + raise AttributeError("Can't get local attribute {!r} on {!r}" + .format(name, obj)) + try: + obj = getattr(obj, subpath) + except AttributeError: + raise AttributeError("Can't get attribute {!r} on {!r}" + .format(name, obj)) + return obj + +def whichmodule(obj, name, allow_qualname=False): + """Find the module an object belong to.""" + module_name = getattr(obj, '__module__', None) + if module_name is not None: + return module_name + for module_name, module in sys.modules.items(): + if module_name == '__main__' or module is None: + continue + try: + if _getattribute(module, name, allow_qualname) is obj: + return module_name + except AttributeError: + pass + return '__main__' + +def encode_long(x): + r"""Encode a long to a two's complement little-endian binary string. + Note that 0 is a special case, returning an empty string, to save a + byte in the LONG1 pickling context. + + >>> encode_long(0) + b'' + >>> encode_long(255) + b'\xff\x00' + >>> encode_long(32767) + b'\xff\x7f' + >>> encode_long(-256) + b'\x00\xff' + >>> encode_long(-32768) + b'\x00\x80' + >>> encode_long(-128) + b'\x80' + >>> encode_long(127) + b'\x7f' + >>> + """ + if x == 0: + return b'' + nbytes = (x.bit_length() >> 3) + 1 + result = x.to_bytes(nbytes, byteorder='little', signed=True) + if x < 0 and nbytes > 1: + if result[-1] == 0xff and (result[-2] & 0x80) != 0: + result = result[:-1] + return result + +def decode_long(data): + r"""Decode a long from a two's complement little-endian binary string. + + >>> decode_long(b'') + 0 + >>> decode_long(b"\xff\x00") + 255 + >>> decode_long(b"\xff\x7f") + 32767 + >>> decode_long(b"\x00\xff") + -256 + >>> decode_long(b"\x00\x80") + -32768 + >>> decode_long(b"\x80") + -128 + >>> decode_long(b"\x7f") + 127 + """ + return int.from_bytes(data, byteorder='little', signed=True) + # Pickling machinery @@ -174,9 +364,9 @@ class _Pickler: """This takes a binary file for writing a pickle data stream. The optional protocol argument tells the pickler to use the - given protocol; supported protocols are 0, 1, 2, 3. The default - protocol is 3; a backward-incompatible protocol designed for - Python 3.0. + given protocol; supported protocols are 0, 1, 2, 3 and 4. The + default protocol is 3; a backward-incompatible protocol designed for + Python 3. Specifying a negative protocol version selects the highest protocol version supported. The higher the protocol used, the @@ -189,8 +379,8 @@ class _Pickler: meets this interface. If fix_imports is True and protocol is less than 3, pickle will try to - map the new Python 3.x names to the old module names used in Python - 2.x, so that the pickle data stream is readable with Python 2.x. + map the new Python 3 names to the old module names used in Python 2, + so that the pickle data stream is readable with Python 2. """ if protocol is None: protocol = DEFAULT_PROTOCOL @@ -199,7 +389,7 @@ class _Pickler: elif not 0 <= protocol <= HIGHEST_PROTOCOL: raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL) try: - self.write = file.write + self._file_write = file.write except AttributeError: raise TypeError("file must have a 'write' attribute") self.memo = {} @@ -223,13 +413,22 @@ class _Pickler: """Write a pickled representation of obj to the open file.""" # Check whether Pickler was initialized correctly. This is # only needed to mimic the behavior of _pickle.Pickler.dump(). - if not hasattr(self, "write"): + if not hasattr(self, "_file_write"): raise PicklingError("Pickler.__init__() was not called by " "%s.__init__()" % (self.__class__.__name__,)) if self.proto >= 2: - self.write(PROTO + pack("<B", self.proto)) + self._file_write(PROTO + pack("<B", self.proto)) + if self.proto >= 4: + framer = _Framer(self._file_write) + framer.start_framing() + self.write = framer.write + else: + framer = None + self.write = self._file_write self.save(obj) self.write(STOP) + if framer is not None: + framer.end_framing() def memoize(self, obj): """Store an object in the memo.""" @@ -249,19 +448,21 @@ class _Pickler: if self.fast: return assert id(obj) not in self.memo - memo_len = len(self.memo) - self.write(self.put(memo_len)) - self.memo[id(obj)] = memo_len, obj + idx = len(self.memo) + self.write(self.put(idx)) + self.memo[id(obj)] = idx, obj # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i. - def put(self, i): - if self.bin: - if i < 256: - return BINPUT + pack("<B", i) + def put(self, idx): + if self.proto >= 4: + return MEMOIZE + elif self.bin: + if idx < 256: + return BINPUT + pack("<B", idx) else: - return LONG_BINPUT + pack("<I", i) - - return PUT + repr(i).encode("ascii") + b'\n' + return LONG_BINPUT + pack("<I", idx) + else: + return PUT + repr(idx).encode("ascii") + b'\n' # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i. def get(self, i): @@ -349,24 +550,33 @@ class _Pickler: else: self.write(PERSID + str(pid).encode("ascii") + b'\n') - def save_reduce(self, func, args, state=None, - listitems=None, dictitems=None, obj=None): + def save_reduce(self, func, args, state=None, listitems=None, + dictitems=None, obj=None): # This API is called by some subclasses - # Assert that args is a tuple if not isinstance(args, tuple): - raise PicklingError("args from save_reduce() should be a tuple") - - # Assert that func is callable + raise PicklingError("args from save_reduce() must be a tuple") if not callable(func): - raise PicklingError("func from save_reduce() should be callable") + raise PicklingError("func from save_reduce() must be callable") save = self.save write = self.write - # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ - if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__": - # A __reduce__ implementation can direct protocol 2 to + func_name = getattr(func, "__name__", "") + if self.proto >= 4 and func_name == "__newobj_ex__": + cls, args, kwargs = args + if not hasattr(cls, "__new__"): + raise PicklingError("args[0] from {} args has no __new__" + .format(func_name)) + if obj is not None and cls is not obj.__class__: + raise PicklingError("args[0] from {} args has the wrong class" + .format(func_name)) + save(cls) + save(args) + save(kwargs) + write(NEWOBJ_EX) + elif self.proto >= 2 and func_name == "__newobj__": + # A __reduce__ implementation can direct protocol 2 or newer to # use the more efficient NEWOBJ opcode, while still # allowing protocol 0 and 1 to work normally. For this to # work, the function returned by __reduce__ should be @@ -409,7 +619,13 @@ class _Pickler: write(REDUCE) if obj is not None: - self.memoize(obj) + # If the object is already in the memo, this means it is + # recursive. In this case, throw away everything we put on the + # stack, and fetch the object back from the memo. + if id(obj) in self.memo: + write(POP + self.get(self.memo[id(obj)][0])) + else: + self.memoize(obj) # More new special cases (that work with older protocols as # well): when __reduce__ returns a tuple with 4 or 5 items, @@ -493,8 +709,10 @@ class _Pickler: (str(obj, 'latin1'), 'latin1'), obj=obj) return n = len(obj) - if n < 256: + if n <= 0xff: self.write(SHORT_BINBYTES + pack("<B", n) + obj) + elif n > 0xffffffff and self.proto >= 4: + self.write(BINBYTES8 + pack("<Q", n) + obj) else: self.write(BINBYTES + pack("<I", n) + obj) self.memoize(obj) @@ -504,11 +722,17 @@ class _Pickler: if self.bin: encoded = obj.encode('utf-8', 'surrogatepass') n = len(encoded) - self.write(BINUNICODE + pack("<I", n) + encoded) + if n <= 0xff and self.proto >= 4: + self.write(SHORT_BINUNICODE + pack("<B", n) + encoded) + elif n > 0xffffffff and self.proto >= 4: + self.write(BINUNICODE8 + pack("<Q", n) + encoded) + else: + self.write(BINUNICODE + pack("<I", n) + encoded) else: obj = obj.replace("\\", "\\u005c") obj = obj.replace("\n", "\\u000a") - self.write(UNICODE + obj.encode('raw-unicode-escape') + b'\n') + self.write(UNICODE + obj.encode('raw-unicode-escape') + + b'\n') self.memoize(obj) dispatch[str] = save_str @@ -647,33 +871,79 @@ class _Pickler: if n < self._BATCHSIZE: return + def save_set(self, obj): + save = self.save + write = self.write + + if self.proto < 4: + self.save_reduce(set, (list(obj),), obj=obj) + return + + write(EMPTY_SET) + self.memoize(obj) + + it = iter(obj) + while True: + batch = list(islice(it, self._BATCHSIZE)) + n = len(batch) + if n > 0: + write(MARK) + for item in batch: + save(item) + write(ADDITEMS) + if n < self._BATCHSIZE: + return + dispatch[set] = save_set + + def save_frozenset(self, obj): + save = self.save + write = self.write + + if self.proto < 4: + self.save_reduce(frozenset, (list(obj),), obj=obj) + return + + write(MARK) + for item in obj: + save(item) + + if id(obj) in self.memo: + # If the object is already in the memo, this means it is + # recursive. In this case, throw away everything we put on the + # stack, and fetch the object back from the memo. + write(POP_MARK + self.get(self.memo[id(obj)][0])) + return + + write(FROZENSET) + self.memoize(obj) + dispatch[frozenset] = save_frozenset + def save_global(self, obj, name=None): write = self.write memo = self.memo + if name is None and self.proto >= 4: + name = getattr(obj, '__qualname__', None) if name is None: name = obj.__name__ - module = getattr(obj, "__module__", None) - if module is None: - module = whichmodule(obj, name) - + module_name = whichmodule(obj, name, allow_qualname=self.proto >= 4) try: - __import__(module, level=0) - mod = sys.modules[module] - klass = getattr(mod, name) + __import__(module_name, level=0) + module = sys.modules[module_name] + obj2 = _getattribute(module, name, allow_qualname=self.proto >= 4) except (ImportError, KeyError, AttributeError): raise PicklingError( "Can't pickle %r: it's not found as %s.%s" % - (obj, module, name)) + (obj, module_name, name)) else: - if klass is not obj: + if obj2 is not obj: raise PicklingError( "Can't pickle %r: it's not the same object as %s.%s" % - (obj, module, name)) + (obj, module_name, name)) if self.proto >= 2: - code = _extension_registry.get((module, name)) + code = _extension_registry.get((module_name, name)) if code: assert code > 0 if code <= 0xff: @@ -684,17 +954,23 @@ class _Pickler: write(EXT4 + pack("<i", code)) return # Non-ASCII identifiers are supported only with protocols >= 3. - if self.proto >= 3: - write(GLOBAL + bytes(module, "utf-8") + b'\n' + + if self.proto >= 4: + self.save(module_name) + self.save(name) + write(STACK_GLOBAL) + elif self.proto >= 3: + write(GLOBAL + bytes(module_name, "utf-8") + b'\n' + bytes(name, "utf-8") + b'\n') else: if self.fix_imports: - if (module, name) in _compat_pickle.REVERSE_NAME_MAPPING: - module, name = _compat_pickle.REVERSE_NAME_MAPPING[(module, name)] - if module in _compat_pickle.REVERSE_IMPORT_MAPPING: - module = _compat_pickle.REVERSE_IMPORT_MAPPING[module] + r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING + r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING + if (module_name, name) in r_name_mapping: + module_name, name = r_name_mapping[(module_name, name)] + if module_name in r_import_mapping: + module_name = r_import_mapping[module_name] try: - write(GLOBAL + bytes(module, "ascii") + b'\n' + + write(GLOBAL + bytes(module_name, "ascii") + b'\n' + bytes(name, "ascii") + b'\n') except UnicodeEncodeError: raise PicklingError( @@ -703,40 +979,16 @@ class _Pickler: self.memoize(obj) + def save_method(self, obj): + if obj.__self__ is None or type(obj.__self__) is ModuleType: + self.save_global(obj) + else: + self.save_reduce(getattr, (obj.__self__, obj.__name__), obj=obj) + dispatch[FunctionType] = save_global - dispatch[BuiltinFunctionType] = save_global + dispatch[BuiltinFunctionType] = save_method dispatch[type] = save_global -# A cache for whichmodule(), mapping a function object to the name of -# the module in which the function was found. - -classmap = {} # called classmap for backwards compatibility - -def whichmodule(func, funcname): - """Figure out the module in which a function occurs. - - Search sys.modules for the module. - Cache in classmap. - Return a module name. - If the function cannot be found, return "__main__". - """ - # Python functions should always get an __module__ from their globals. - mod = getattr(func, "__module__", None) - if mod is not None: - return mod - if func in classmap: - return classmap[func] - - for name, module in list(sys.modules.items()): - if module is None: - continue # skip dummy package entries - if name != '__main__' and getattr(module, funcname, None) is func: - break - else: - name = '__main__' - classmap[func] = name - return name - # Unpickling machinery @@ -764,8 +1016,8 @@ class _Unpickler: instances pickled by Python 2.x; these default to 'ASCII' and 'strict', respectively. """ - self.readline = file.readline - self.read = file.read + self._file_readline = file.readline + self._file_read = file.read self.memo = {} self.encoding = encoding self.errors = errors @@ -779,12 +1031,16 @@ class _Unpickler: """ # Check whether Unpickler was initialized correctly. This is # only needed to mimic the behavior of _pickle.Unpickler.dump(). - if not hasattr(self, "read"): + if not hasattr(self, "_file_read"): raise UnpicklingError("Unpickler.__init__() was not called by " "%s.__init__()" % (self.__class__.__name__,)) + self._unframer = _Unframer(self._file_read, self._file_readline) + self.read = self._unframer.read + self.readline = self._unframer.readline self.mark = object() # any new unique object self.stack = [] self.append = self.stack.append + self.proto = 0 read = self.read dispatch = self.dispatch try: @@ -822,6 +1078,8 @@ class _Unpickler: if not 0 <= proto <= HIGHEST_PROTOCOL: raise ValueError("unsupported pickle protocol: %d" % proto) self.proto = proto + if proto >= 4: + self._unframer.framing_enabled = True dispatch[PROTO[0]] = load_proto def load_persid(self): @@ -940,6 +1198,14 @@ class _Unpickler: self.append(str(self.read(len), 'utf-8', 'surrogatepass')) dispatch[BINUNICODE[0]] = load_binunicode + def load_binunicode8(self): + len, = unpack('<Q', self.read(8)) + if len > maxsize: + raise UnpicklingError("BINUNICODE8 exceeds system's maximum size " + "of %d bytes" % maxsize) + self.append(str(self.read(len), 'utf-8', 'surrogatepass')) + dispatch[BINUNICODE8[0]] = load_binunicode8 + def load_short_binstring(self): len = self.read(1)[0] data = self.read(len) @@ -952,6 +1218,11 @@ class _Unpickler: self.append(self.read(len)) dispatch[SHORT_BINBYTES[0]] = load_short_binbytes + def load_short_binunicode(self): + len = self.read(1)[0] + self.append(str(self.read(len), 'utf-8', 'surrogatepass')) + dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode + def load_tuple(self): k = self.marker() self.stack[k:] = [tuple(self.stack[k+1:])] @@ -981,6 +1252,15 @@ class _Unpickler: self.append({}) dispatch[EMPTY_DICT[0]] = load_empty_dictionary + def load_empty_set(self): + self.append(set()) + dispatch[EMPTY_SET[0]] = load_empty_set + + def load_frozenset(self): + k = self.marker() + self.stack[k:] = [frozenset(self.stack[k+1:])] + dispatch[FROZENSET[0]] = load_frozenset + def load_list(self): k = self.marker() self.stack[k:] = [self.stack[k+1:]] @@ -1029,11 +1309,19 @@ class _Unpickler: def load_newobj(self): args = self.stack.pop() - cls = self.stack[-1] + cls = self.stack.pop() obj = cls.__new__(cls, *args) - self.stack[-1] = obj + self.append(obj) dispatch[NEWOBJ[0]] = load_newobj + def load_newobj_ex(self): + kwargs = self.stack.pop() + args = self.stack.pop() + cls = self.stack.pop() + obj = cls.__new__(cls, *args, **kwargs) + self.append(obj) + dispatch[NEWOBJ_EX[0]] = load_newobj_ex + def load_global(self): module = self.readline()[:-1].decode("utf-8") name = self.readline()[:-1].decode("utf-8") @@ -1041,6 +1329,14 @@ class _Unpickler: self.append(klass) dispatch[GLOBAL[0]] = load_global + def load_stack_global(self): + name = self.stack.pop() + module = self.stack.pop() + if type(name) is not str or type(module) is not str: + raise UnpicklingError("STACK_GLOBAL requires str") + self.append(self.find_class(module, name)) + dispatch[STACK_GLOBAL[0]] = load_stack_global + def load_ext1(self): code = self.read(1)[0] self.get_extension(code) @@ -1080,9 +1376,8 @@ class _Unpickler: if module in _compat_pickle.IMPORT_MAPPING: module = _compat_pickle.IMPORT_MAPPING[module] __import__(module, level=0) - mod = sys.modules[module] - klass = getattr(mod, name) - return klass + return _getattribute(sys.modules[module], name, + allow_qualname=self.proto >= 4) def load_reduce(self): stack = self.stack @@ -1146,6 +1441,11 @@ class _Unpickler: self.memo[i] = self.stack[-1] dispatch[LONG_BINPUT[0]] = load_long_binput + def load_memoize(self): + memo = self.memo + memo[len(memo)] = self.stack[-1] + dispatch[MEMOIZE[0]] = load_memoize + def load_append(self): stack = self.stack value = stack.pop() @@ -1185,6 +1485,20 @@ class _Unpickler: del stack[mark:] dispatch[SETITEMS[0]] = load_setitems + def load_additems(self): + stack = self.stack + mark = self.marker() + set_obj = stack[mark - 1] + items = stack[mark + 1:] + if isinstance(set_obj, set): + set_obj.update(items) + else: + add = set_obj.add + for item in items: + add(item) + del stack[mark:] + dispatch[ADDITEMS[0]] = load_additems + def load_build(self): stack = self.stack state = stack.pop() @@ -1218,86 +1532,46 @@ class _Unpickler: raise _Stop(value) dispatch[STOP[0]] = load_stop -# Encode/decode ints. - -def encode_long(x): - r"""Encode a long to a two's complement little-endian binary string. - Note that 0 is a special case, returning an empty string, to save a - byte in the LONG1 pickling context. - - >>> encode_long(0) - b'' - >>> encode_long(255) - b'\xff\x00' - >>> encode_long(32767) - b'\xff\x7f' - >>> encode_long(-256) - b'\x00\xff' - >>> encode_long(-32768) - b'\x00\x80' - >>> encode_long(-128) - b'\x80' - >>> encode_long(127) - b'\x7f' - >>> - """ - if x == 0: - return b'' - nbytes = (x.bit_length() >> 3) + 1 - result = x.to_bytes(nbytes, byteorder='little', signed=True) - if x < 0 and nbytes > 1: - if result[-1] == 0xff and (result[-2] & 0x80) != 0: - result = result[:-1] - return result - -def decode_long(data): - r"""Decode an int from a two's complement little-endian binary string. - - >>> decode_long(b'') - 0 - >>> decode_long(b"\xff\x00") - 255 - >>> decode_long(b"\xff\x7f") - 32767 - >>> decode_long(b"\x00\xff") - -256 - >>> decode_long(b"\x00\x80") - -32768 - >>> decode_long(b"\x80") - -128 - >>> decode_long(b"\x7f") - 127 - """ - return int.from_bytes(data, byteorder='little', signed=True) # Shorthands -def dump(obj, file, protocol=None, *, fix_imports=True): - Pickler(file, protocol, fix_imports=fix_imports).dump(obj) +def _dump(obj, file, protocol=None, *, fix_imports=True): + _Pickler(file, protocol, fix_imports=fix_imports).dump(obj) -def dumps(obj, protocol=None, *, fix_imports=True): +def _dumps(obj, protocol=None, *, fix_imports=True): f = io.BytesIO() - Pickler(f, protocol, fix_imports=fix_imports).dump(obj) + _Pickler(f, protocol, fix_imports=fix_imports).dump(obj) res = f.getvalue() assert isinstance(res, bytes_types) return res -def load(file, *, fix_imports=True, encoding="ASCII", errors="strict"): - return Unpickler(file, fix_imports=fix_imports, +def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict"): + return _Unpickler(file, fix_imports=fix_imports, encoding=encoding, errors=errors).load() -def loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"): +def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"): if isinstance(s, str): raise TypeError("Can't load pickle from unicode string") file = io.BytesIO(s) - return Unpickler(file, fix_imports=fix_imports, - encoding=encoding, errors=errors).load() + return _Unpickler(file, fix_imports=fix_imports, + encoding=encoding, errors=errors).load() # Use the faster _pickle if possible try: - from _pickle import * + from _pickle import ( + PickleError, + PicklingError, + UnpicklingError, + Pickler, + Unpickler, + dump, + dumps, + load, + loads + ) except ImportError: Pickler, Unpickler = _Pickler, _Unpickler + dump, dumps, load, loads = _dump, _dumps, _load, _loads # Doctest def _test(): diff --git a/Lib/pickletools.py b/Lib/pickletools.py index e92146d..d711bf0 100644 --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -11,6 +11,7 @@ dis(pickle, out=None, memo=None, indentlevel=4) ''' import codecs +import io import pickle import re import sys @@ -168,6 +169,7 @@ UP_TO_NEWLINE = -1 TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int TAKEN_FROM_ARGUMENT4U = -4 # num bytes is 4-byte unsigned little-endian int +TAKEN_FROM_ARGUMENT8U = -5 # num bytes is 8-byte unsigned little-endian int class ArgumentDescriptor(object): __slots__ = ( @@ -175,7 +177,7 @@ class ArgumentDescriptor(object): 'name', # length of argument, in bytes; an int; UP_TO_NEWLINE and - # TAKEN_FROM_ARGUMENT{1,4} are negative values for variable-length + # TAKEN_FROM_ARGUMENT{1,4,8} are negative values for variable-length # cases 'n', @@ -196,7 +198,8 @@ class ArgumentDescriptor(object): n in (UP_TO_NEWLINE, TAKEN_FROM_ARGUMENT1, TAKEN_FROM_ARGUMENT4, - TAKEN_FROM_ARGUMENT4U)) + TAKEN_FROM_ARGUMENT4U, + TAKEN_FROM_ARGUMENT8U)) self.n = n self.reader = reader @@ -288,6 +291,27 @@ uint4 = ArgumentDescriptor( doc="Four-byte unsigned integer, little-endian.") +def read_uint8(f): + r""" + >>> import io + >>> read_uint8(io.BytesIO(b'\xff\x00\x00\x00\x00\x00\x00\x00')) + 255 + >>> read_uint8(io.BytesIO(b'\xff' * 8)) == 2**64-1 + True + """ + + data = f.read(8) + if len(data) == 8: + return _unpack("<Q", data)[0] + raise ValueError("not enough data in stream to read uint8") + +uint8 = ArgumentDescriptor( + name='uint8', + n=8, + reader=read_uint8, + doc="Eight-byte unsigned integer, little-endian.") + + def read_stringnl(f, decode=True, stripquotes=True): r""" >>> import io @@ -381,6 +405,36 @@ stringnl_noescape_pair = ArgumentDescriptor( a single blank separating the two strings. """) + +def read_string1(f): + r""" + >>> import io + >>> read_string1(io.BytesIO(b"\x00")) + '' + >>> read_string1(io.BytesIO(b"\x03abcdef")) + 'abc' + """ + + n = read_uint1(f) + assert n >= 0 + data = f.read(n) + if len(data) == n: + return data.decode("latin-1") + raise ValueError("expected %d bytes in a string1, but only %d remain" % + (n, len(data))) + +string1 = ArgumentDescriptor( + name="string1", + n=TAKEN_FROM_ARGUMENT1, + reader=read_string1, + doc="""A counted string. + + The first argument is a 1-byte unsigned int giving the number + of bytes in the string, and the second argument is that many + bytes. + """) + + def read_string4(f): r""" >>> import io @@ -415,28 +469,28 @@ string4 = ArgumentDescriptor( """) -def read_string1(f): +def read_bytes1(f): r""" >>> import io - >>> read_string1(io.BytesIO(b"\x00")) - '' - >>> read_string1(io.BytesIO(b"\x03abcdef")) - 'abc' + >>> read_bytes1(io.BytesIO(b"\x00")) + b'' + >>> read_bytes1(io.BytesIO(b"\x03abcdef")) + b'abc' """ n = read_uint1(f) assert n >= 0 data = f.read(n) if len(data) == n: - return data.decode("latin-1") - raise ValueError("expected %d bytes in a string1, but only %d remain" % + return data + raise ValueError("expected %d bytes in a bytes1, but only %d remain" % (n, len(data))) -string1 = ArgumentDescriptor( - name="string1", +bytes1 = ArgumentDescriptor( + name="bytes1", n=TAKEN_FROM_ARGUMENT1, - reader=read_string1, - doc="""A counted string. + reader=read_bytes1, + doc="""A counted bytes string. The first argument is a 1-byte unsigned int giving the number of bytes in the string, and the second argument is that many @@ -486,6 +540,7 @@ def read_bytes4(f): """ n = read_uint4(f) + assert n >= 0 if n > sys.maxsize: raise ValueError("bytes4 byte count > sys.maxsize: %d" % n) data = f.read(n) @@ -505,6 +560,39 @@ bytes4 = ArgumentDescriptor( """) +def read_bytes8(f): + r""" + >>> import io + >>> read_bytes8(io.BytesIO(b"\x00\x00\x00\x00\x00\x00\x00\x00abc")) + b'' + >>> read_bytes8(io.BytesIO(b"\x03\x00\x00\x00\x00\x00\x00\x00abcdef")) + b'abc' + >>> read_bytes8(io.BytesIO(b"\x00\x00\x00\x00\x00\x00\x03\x00abcdef")) + Traceback (most recent call last): + ... + ValueError: expected 844424930131968 bytes in a bytes8, but only 6 remain + """ + + n = read_uint8(f) + assert n >= 0 + if n > sys.maxsize: + raise ValueError("bytes8 byte count > sys.maxsize: %d" % n) + data = f.read(n) + if len(data) == n: + return data + raise ValueError("expected %d bytes in a bytes8, but only %d remain" % + (n, len(data))) + +bytes8 = ArgumentDescriptor( + name="bytes8", + n=TAKEN_FROM_ARGUMENT8U, + reader=read_bytes8, + doc="""A counted bytes string. + + The first argument is a 8-byte little-endian unsigned int giving + the number of bytes, and the second argument is that many bytes. + """) + def read_unicodestringnl(f): r""" >>> import io @@ -530,6 +618,46 @@ unicodestringnl = ArgumentDescriptor( escape sequences. """) + +def read_unicodestring1(f): + r""" + >>> import io + >>> s = 'abcd\uabcd' + >>> enc = s.encode('utf-8') + >>> enc + b'abcd\xea\xaf\x8d' + >>> n = bytes([len(enc)]) # little-endian 1-byte length + >>> t = read_unicodestring1(io.BytesIO(n + enc + b'junk')) + >>> s == t + True + + >>> read_unicodestring1(io.BytesIO(n + enc[:-1])) + Traceback (most recent call last): + ... + ValueError: expected 7 bytes in a unicodestring1, but only 6 remain + """ + + n = read_uint1(f) + assert n >= 0 + data = f.read(n) + if len(data) == n: + return str(data, 'utf-8', 'surrogatepass') + raise ValueError("expected %d bytes in a unicodestring1, but only %d " + "remain" % (n, len(data))) + +unicodestring1 = ArgumentDescriptor( + name="unicodestring1", + n=TAKEN_FROM_ARGUMENT1, + reader=read_unicodestring1, + doc="""A counted Unicode string. + + The first argument is a 1-byte little-endian signed int + giving the number of bytes in the string, and the second + argument-- the UTF-8 encoding of the Unicode string -- + contains that many bytes. + """) + + def read_unicodestring4(f): r""" >>> import io @@ -549,6 +677,7 @@ def read_unicodestring4(f): """ n = read_uint4(f) + assert n >= 0 if n > sys.maxsize: raise ValueError("unicodestring4 byte count > sys.maxsize: %d" % n) data = f.read(n) @@ -570,6 +699,47 @@ unicodestring4 = ArgumentDescriptor( """) +def read_unicodestring8(f): + r""" + >>> import io + >>> s = 'abcd\uabcd' + >>> enc = s.encode('utf-8') + >>> enc + b'abcd\xea\xaf\x8d' + >>> n = bytes([len(enc)]) + bytes(7) # little-endian 8-byte length + >>> t = read_unicodestring8(io.BytesIO(n + enc + b'junk')) + >>> s == t + True + + >>> read_unicodestring8(io.BytesIO(n + enc[:-1])) + Traceback (most recent call last): + ... + ValueError: expected 7 bytes in a unicodestring8, but only 6 remain + """ + + n = read_uint8(f) + assert n >= 0 + if n > sys.maxsize: + raise ValueError("unicodestring8 byte count > sys.maxsize: %d" % n) + data = f.read(n) + if len(data) == n: + return str(data, 'utf-8', 'surrogatepass') + raise ValueError("expected %d bytes in a unicodestring8, but only %d " + "remain" % (n, len(data))) + +unicodestring8 = ArgumentDescriptor( + name="unicodestring8", + n=TAKEN_FROM_ARGUMENT8U, + reader=read_unicodestring8, + doc="""A counted Unicode string. + + The first argument is a 8-byte little-endian signed int + giving the number of bytes in the string, and the second + argument-- the UTF-8 encoding of the Unicode string -- + contains that many bytes. + """) + + def read_decimalnl_short(f): r""" >>> import io @@ -859,6 +1029,16 @@ pydict = StackObject( obtype=dict, doc="A Python dict object.") +pyset = StackObject( + name="set", + obtype=set, + doc="A Python set object.") + +pyfrozenset = StackObject( + name="frozenset", + obtype=set, + doc="A Python frozenset object.") + anyobject = StackObject( name='any', obtype=object, @@ -1142,6 +1322,19 @@ opcodes = [ literally as the string content. """), + I(name='BINBYTES8', + code='\x8e', + arg=bytes8, + stack_before=[], + stack_after=[pybytes], + proto=4, + doc="""Push a Python bytes object. + + There are two arguments: the first is a 8-byte unsigned int giving + the number of bytes in the string, and the second is that many bytes, + which are taken literally as the string content. + """), + # Ways to spell None. I(name='NONE', @@ -1190,6 +1383,19 @@ opcodes = [ until the next newline character. """), + I(name='SHORT_BINUNICODE', + code='\x8c', + arg=unicodestring1, + stack_before=[], + stack_after=[pyunicode], + proto=4, + doc="""Push a Python Unicode string object. + + There are two arguments: the first is a 1-byte little-endian signed int + giving the number of bytes in the string. The second is that many + bytes, and is the UTF-8 encoding of the Unicode string. + """), + I(name='BINUNICODE', code='X', arg=unicodestring4, @@ -1203,6 +1409,19 @@ opcodes = [ bytes, and is the UTF-8 encoding of the Unicode string. """), + I(name='BINUNICODE8', + code='\x8d', + arg=unicodestring8, + stack_before=[], + stack_after=[pyunicode], + proto=4, + doc="""Push a Python Unicode string object. + + There are two arguments: the first is a 8-byte little-endian signed int + giving the number of bytes in the string. The second is that many + bytes, and is the UTF-8 encoding of the Unicode string. + """), + # Ways to spell floats. I(name='FLOAT', @@ -1428,6 +1647,54 @@ opcodes = [ 1, 2, ..., n, and in that order. """), + # Ways to build sets + + I(name='EMPTY_SET', + code='\x8f', + arg=None, + stack_before=[], + stack_after=[pyset], + proto=4, + doc="Push an empty set."), + + I(name='ADDITEMS', + code='\x90', + arg=None, + stack_before=[pyset, markobject, stackslice], + stack_after=[pyset], + proto=4, + doc="""Add an arbitrary number of items to an existing set. + + The slice of the stack following the topmost markobject is taken as + a sequence of items, added to the set immediately under the topmost + markobject. Everything at and after the topmost markobject is popped, + leaving the mutated set at the top of the stack. + + Stack before: ... pyset markobject item_1 ... item_n + Stack after: ... pyset + + where pyset has been modified via pyset.add(item_i) = item_i for i in + 1, 2, ..., n, and in that order. + """), + + # Way to build frozensets + + I(name='FROZENSET', + code='\x91', + arg=None, + stack_before=[markobject, stackslice], + stack_after=[pyfrozenset], + proto=4, + doc="""Build a frozenset out of the topmost slice, after markobject. + + All the stack entries following the topmost markobject are placed into + a single Python frozenset, which single frozenset object replaces all + of the stack from the topmost markobject onward. For example, + + Stack before: ... markobject 1 2 3 + Stack after: ... frozenset({1, 2, 3}) + """), + # Stack manipulation. I(name='POP', @@ -1549,6 +1816,18 @@ opcodes = [ unsigned little-endian integer following. """), + I(name='MEMOIZE', + code='\x94', + arg=None, + stack_before=[anyobject], + stack_after=[anyobject], + proto=4, + doc="""Store the stack top into the memo. The stack is not popped. + + The index of the memo location to write is the number of + elements currently present in the memo. + """), + # Access the extension registry (predefined objects). Akin to the GET # family. @@ -1614,6 +1893,15 @@ opcodes = [ stack, so unpickling subclasses can override this form of lookup. """), + I(name='STACK_GLOBAL', + code='\x93', + arg=None, + stack_before=[pyunicode, pyunicode], + stack_after=[anyobject], + proto=0, + doc="""Push a global object (module.attr) on the stack. + """), + # Ways to build objects of classes pickle doesn't know about directly # (user-defined classes). I despair of documenting this accurately # and comprehensibly -- you really have to read the pickle code to @@ -1770,6 +2058,21 @@ opcodes = [ onto the stack. """), + I(name='NEWOBJ_EX', + code='\x92', + arg=None, + stack_before=[anyobject, anyobject, anyobject], + stack_after=[anyobject], + proto=4, + doc="""Build an object instance. + + The stack before should be thought of as containing a class + object followed by an argument tuple and by a keyword argument dict + (the dict being the stack top). Call these cls and args. They are + popped off the stack, and the value returned by + cls.__new__(cls, *args, *kwargs) is pushed back onto the stack. + """), + # Machine control. I(name='PROTO', @@ -1797,6 +2100,20 @@ opcodes = [ empty then. """), + # Framing support. + + I(name='FRAME', + code='\x95', + arg=uint8, + stack_before=[], + stack_after=[], + proto=4, + doc="""Indicate the beginning of a new frame. + + The unpickler may use this opcode to safely prefetch data from its + underlying stream. + """), + # Ways to deal with persistent IDs. I(name='PERSID', @@ -1903,6 +2220,38 @@ del assure_pickle_consistency ############################################################################## # A pickle opcode generator. +def _genops(data, yield_end_pos=False): + if isinstance(data, bytes_types): + data = io.BytesIO(data) + + if hasattr(data, "tell"): + getpos = data.tell + else: + getpos = lambda: None + + while True: + pos = getpos() + code = data.read(1) + opcode = code2op.get(code.decode("latin-1")) + if opcode is None: + if code == b"": + raise ValueError("pickle exhausted before seeing STOP") + else: + raise ValueError("at position %s, opcode %r unknown" % ( + "<unknown>" if pos is None else pos, + code)) + if opcode.arg is None: + arg = None + else: + arg = opcode.arg.reader(data) + if yield_end_pos: + yield opcode, arg, pos, getpos() + else: + yield opcode, arg, pos + if code == b'.': + assert opcode.name == 'STOP' + break + def genops(pickle): """Generate all the opcodes in a pickle. @@ -1926,62 +2275,47 @@ def genops(pickle): used. Else (the pickle doesn't have a tell(), and it's not obvious how to query its current position) pos is None. """ - - if isinstance(pickle, bytes_types): - import io - pickle = io.BytesIO(pickle) - - if hasattr(pickle, "tell"): - getpos = pickle.tell - else: - getpos = lambda: None - - while True: - pos = getpos() - code = pickle.read(1) - opcode = code2op.get(code.decode("latin-1")) - if opcode is None: - if code == b"": - raise ValueError("pickle exhausted before seeing STOP") - else: - raise ValueError("at position %s, opcode %r unknown" % ( - pos is None and "<unknown>" or pos, - code)) - if opcode.arg is None: - arg = None - else: - arg = opcode.arg.reader(pickle) - yield opcode, arg, pos - if code == b'.': - assert opcode.name == 'STOP' - break + return _genops(pickle) ############################################################################## # A pickle optimizer. def optimize(p): 'Optimize a pickle string by removing unused PUT opcodes' - gets = set() # set of args used by a GET opcode - puts = [] # (arg, startpos, stoppos) for the PUT opcodes - prevpos = None # set to pos if previous opcode was a PUT - for opcode, arg, pos in genops(p): - if prevpos is not None: - puts.append((prevarg, prevpos, pos)) - prevpos = None + not_a_put = object() + gets = { not_a_put } # set of args used by a GET opcode + opcodes = [] # (startpos, stoppos, putid) + proto = 0 + for opcode, arg, pos, end_pos in _genops(p, yield_end_pos=True): if 'PUT' in opcode.name: - prevarg, prevpos = arg, pos - elif 'GET' in opcode.name: - gets.add(arg) - - # Copy the pickle string except for PUTS without a corresponding GET - s = [] - i = 0 - for arg, start, stop in puts: - j = stop if (arg in gets) else start - s.append(p[i:j]) - i = stop - s.append(p[i:]) - return b''.join(s) + opcodes.append((pos, end_pos, arg)) + elif 'FRAME' in opcode.name: + pass + else: + if 'GET' in opcode.name: + gets.add(arg) + elif opcode.name == 'PROTO': + assert pos == 0, pos + proto = arg + opcodes.append((pos, end_pos, not_a_put)) + prevpos, prevarg = pos, None + + # Copy the opcodes except for PUTS without a corresponding GET + out = io.BytesIO() + opcodes = iter(opcodes) + if proto >= 2: + # Write the PROTO header before any framing + start, stop, _ = next(opcodes) + out.write(p[start:stop]) + buf = pickle._Framer(out.write) + if proto >= 4: + buf.start_framing() + for start, stop, putid in opcodes: + if putid in gets: + buf.write(p[start:stop]) + if proto >= 4: + buf.end_framing() + return out.getvalue() ############################################################################## # A symbolic pickle disassembler. @@ -2081,17 +2415,20 @@ def dis(pickle, out=None, memo=None, indentlevel=4, annotate=0): errormsg = markmsg = "no MARK exists on stack" # Check for correct memo usage. - if opcode.name in ("PUT", "BINPUT", "LONG_BINPUT"): - assert arg is not None - if arg in memo: + if opcode.name in ("PUT", "BINPUT", "LONG_BINPUT", "MEMOIZE"): + if opcode.name == "MEMOIZE": + memo_idx = len(memo) + else: + assert arg is not None + memo_idx = arg + if memo_idx in memo: errormsg = "memo key %r already defined" % arg elif not stack: errormsg = "stack is empty -- can't store into memo" elif stack[-1] is markobject: errormsg = "can't store markobject in the memo" else: - memo[arg] = stack[-1] - + memo[memo_idx] = stack[-1] elif opcode.name in ("GET", "BINGET", "LONG_BINGET"): if arg in memo: assert len(after) == 1 diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 1971120..cadc5a7 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -1,9 +1,10 @@ +import copyreg import io -import unittest import pickle import pickletools +import random import sys -import copyreg +import unittest import weakref from http.cookies import SimpleCookie @@ -95,6 +96,9 @@ class E(C): def __getinitargs__(self): return () +class H(object): + pass + import __main__ __main__.C = C C.__module__ = "__main__" @@ -102,6 +106,8 @@ __main__.D = D D.__module__ = "__main__" __main__.E = E E.__module__ = "__main__" +__main__.H = H +H.__module__ = "__main__" class myint(int): def __init__(self, x): @@ -428,6 +434,7 @@ def create_data(): x.append(5) return x + class AbstractPickleTests(unittest.TestCase): # Subclass must define self.dumps, self.loads. @@ -436,23 +443,41 @@ class AbstractPickleTests(unittest.TestCase): def setUp(self): pass + def assert_is_copy(self, obj, objcopy, msg=None): + """Utility method to verify if two objects are copies of each others. + """ + if msg is None: + msg = "{!r} is not a copy of {!r}".format(obj, objcopy) + self.assertEqual(obj, objcopy, msg=msg) + self.assertIs(type(obj), type(objcopy), msg=msg) + if hasattr(obj, '__dict__'): + self.assertDictEqual(obj.__dict__, objcopy.__dict__, msg=msg) + self.assertIsNot(obj.__dict__, objcopy.__dict__, msg=msg) + if hasattr(obj, '__slots__'): + self.assertListEqual(obj.__slots__, objcopy.__slots__, msg=msg) + for slot in obj.__slots__: + self.assertEqual( + hasattr(obj, slot), hasattr(objcopy, slot), msg=msg) + self.assertEqual(getattr(obj, slot, None), + getattr(objcopy, slot, None), msg=msg) + def test_misc(self): # test various datatypes not tested by testdata for proto in protocols: x = myint(4) s = self.dumps(x, proto) y = self.loads(s) - self.assertEqual(x, y) + self.assert_is_copy(x, y) x = (1, ()) s = self.dumps(x, proto) y = self.loads(s) - self.assertEqual(x, y) + self.assert_is_copy(x, y) x = initarg(1, x) s = self.dumps(x, proto) y = self.loads(s) - self.assertEqual(x, y) + self.assert_is_copy(x, y) # XXX test __reduce__ protocol? @@ -461,16 +486,16 @@ class AbstractPickleTests(unittest.TestCase): for proto in protocols: s = self.dumps(expected, proto) got = self.loads(s) - self.assertEqual(expected, got) + self.assert_is_copy(expected, got) def test_load_from_data0(self): - self.assertEqual(self._testdata, self.loads(DATA0)) + self.assert_is_copy(self._testdata, self.loads(DATA0)) def test_load_from_data1(self): - self.assertEqual(self._testdata, self.loads(DATA1)) + self.assert_is_copy(self._testdata, self.loads(DATA1)) def test_load_from_data2(self): - self.assertEqual(self._testdata, self.loads(DATA2)) + self.assert_is_copy(self._testdata, self.loads(DATA2)) def test_load_classic_instance(self): # See issue5180. Test loading 2.x pickles that @@ -492,7 +517,7 @@ class AbstractPickleTests(unittest.TestCase): b"X\n" b"p0\n" b"(dp1\nb.").replace(b'X', xname) - self.assertEqual(X(*args), self.loads(pickle0)) + self.assert_is_copy(X(*args), self.loads(pickle0)) # Protocol 1 (binary mode pickle) """ @@ -509,7 +534,7 @@ class AbstractPickleTests(unittest.TestCase): pickle1 = (b'(c__main__\n' b'X\n' b'q\x00oq\x01}q\x02b.').replace(b'X', xname) - self.assertEqual(X(*args), self.loads(pickle1)) + self.assert_is_copy(X(*args), self.loads(pickle1)) # Protocol 2 (pickle2 = b'\x80\x02' + pickle1) """ @@ -527,7 +552,7 @@ class AbstractPickleTests(unittest.TestCase): pickle2 = (b'\x80\x02(c__main__\n' b'X\n' b'q\x00oq\x01}q\x02b.').replace(b'X', xname) - self.assertEqual(X(*args), self.loads(pickle2)) + self.assert_is_copy(X(*args), self.loads(pickle2)) # There are gratuitous differences between pickles produced by # pickle and cPickle, largely because cPickle starts PUT indices at @@ -552,6 +577,7 @@ class AbstractPickleTests(unittest.TestCase): for proto in protocols: s = self.dumps(l, proto) x = self.loads(s) + self.assertIsInstance(x, list) self.assertEqual(len(x), 1) self.assertTrue(x is x[0]) @@ -561,6 +587,7 @@ class AbstractPickleTests(unittest.TestCase): for proto in protocols: s = self.dumps(t, proto) x = self.loads(s) + self.assertIsInstance(x, tuple) self.assertEqual(len(x), 1) self.assertEqual(len(x[0]), 1) self.assertTrue(x is x[0][0]) @@ -571,15 +598,39 @@ class AbstractPickleTests(unittest.TestCase): for proto in protocols: s = self.dumps(d, proto) x = self.loads(s) + self.assertIsInstance(x, dict) self.assertEqual(list(x.keys()), [1]) self.assertTrue(x[1] is x) + def test_recursive_set(self): + h = H() + y = set({h}) + h.attr = y + for proto in protocols: + s = self.dumps(y, proto) + x = self.loads(s) + self.assertIsInstance(x, set) + self.assertIs(list(x)[0].attr, x) + self.assertEqual(len(x), 1) + + def test_recursive_frozenset(self): + h = H() + y = frozenset({h}) + h.attr = y + for proto in protocols: + s = self.dumps(y, proto) + x = self.loads(s) + self.assertIsInstance(x, frozenset) + self.assertIs(list(x)[0].attr, x) + self.assertEqual(len(x), 1) + def test_recursive_inst(self): i = C() i.attr = i for proto in protocols: s = self.dumps(i, proto) x = self.loads(s) + self.assertIsInstance(x, C) self.assertEqual(dir(x), dir(i)) self.assertIs(x.attr, x) @@ -592,6 +643,7 @@ class AbstractPickleTests(unittest.TestCase): for proto in protocols: s = self.dumps(l, proto) x = self.loads(s) + self.assertIsInstance(x, list) self.assertEqual(len(x), 1) self.assertEqual(dir(x[0]), dir(i)) self.assertEqual(list(x[0].attr.keys()), [1]) @@ -599,7 +651,8 @@ class AbstractPickleTests(unittest.TestCase): def test_get(self): self.assertRaises(KeyError, self.loads, b'g0\np0') - self.assertEqual(self.loads(b'((Kdtp0\nh\x00l.))'), [(100,), (100,)]) + self.assert_is_copy([(100,), (100,)], + self.loads(b'((Kdtp0\nh\x00l.))')) def test_unicode(self): endcases = ['', '<\\u>', '<\\\u1234>', '<\n>', @@ -610,26 +663,26 @@ class AbstractPickleTests(unittest.TestCase): for u in endcases: p = self.dumps(u, proto) u2 = self.loads(p) - self.assertEqual(u2, u) + self.assert_is_copy(u, u2) def test_unicode_high_plane(self): t = '\U00012345' for proto in protocols: p = self.dumps(t, proto) t2 = self.loads(p) - self.assertEqual(t2, t) + self.assert_is_copy(t, t2) def test_bytes(self): for proto in protocols: for s in b'', b'xyz', b'xyz'*100: p = self.dumps(s, proto) - self.assertEqual(self.loads(p), s) + self.assert_is_copy(s, self.loads(p)) for s in [bytes([i]) for i in range(256)]: p = self.dumps(s, proto) - self.assertEqual(self.loads(p), s) + self.assert_is_copy(s, self.loads(p)) for s in [bytes([i, i]) for i in range(256)]: p = self.dumps(s, proto) - self.assertEqual(self.loads(p), s) + self.assert_is_copy(s, self.loads(p)) def test_ints(self): import sys @@ -639,14 +692,14 @@ class AbstractPickleTests(unittest.TestCase): for expected in (-n, n): s = self.dumps(expected, proto) n2 = self.loads(s) - self.assertEqual(expected, n2) + self.assert_is_copy(expected, n2) n = n >> 1 def test_maxint64(self): maxint64 = (1 << 63) - 1 data = b'I' + str(maxint64).encode("ascii") + b'\n.' got = self.loads(data) - self.assertEqual(got, maxint64) + self.assert_is_copy(maxint64, got) # Try too with a bogus literal. data = b'I' + str(maxint64).encode("ascii") + b'JUNK\n.' @@ -661,7 +714,7 @@ class AbstractPickleTests(unittest.TestCase): for n in npos, -npos: pickle = self.dumps(n, proto) got = self.loads(pickle) - self.assertEqual(n, got) + self.assert_is_copy(n, got) # Try a monster. This is quadratic-time in protos 0 & 1, so don't # bother with those. nbase = int("deadbeeffeedface", 16) @@ -669,7 +722,7 @@ class AbstractPickleTests(unittest.TestCase): for n in nbase, -nbase: p = self.dumps(n, 2) got = self.loads(p) - self.assertEqual(n, got) + self.assert_is_copy(n, got) def test_float(self): test_values = [0.0, 4.94e-324, 1e-310, 7e-308, 6.626e-34, 0.1, 0.5, @@ -679,7 +732,7 @@ class AbstractPickleTests(unittest.TestCase): for value in test_values: pickle = self.dumps(value, proto) got = self.loads(pickle) - self.assertEqual(value, got) + self.assert_is_copy(value, got) @run_with_locale('LC_ALL', 'de_DE', 'fr_FR') def test_float_format(self): @@ -711,6 +764,7 @@ class AbstractPickleTests(unittest.TestCase): s = self.dumps(a, proto) b = self.loads(s) self.assertEqual(a, b) + self.assertIs(type(a), type(b)) def test_structseq(self): import time @@ -720,48 +774,48 @@ class AbstractPickleTests(unittest.TestCase): for proto in protocols: s = self.dumps(t, proto) u = self.loads(s) - self.assertEqual(t, u) + self.assert_is_copy(t, u) if hasattr(os, "stat"): t = os.stat(os.curdir) s = self.dumps(t, proto) u = self.loads(s) - self.assertEqual(t, u) + self.assert_is_copy(t, u) if hasattr(os, "statvfs"): t = os.statvfs(os.curdir) s = self.dumps(t, proto) u = self.loads(s) - self.assertEqual(t, u) + self.assert_is_copy(t, u) def test_ellipsis(self): for proto in protocols: s = self.dumps(..., proto) u = self.loads(s) - self.assertEqual(..., u) + self.assertIs(..., u) def test_notimplemented(self): for proto in protocols: s = self.dumps(NotImplemented, proto) u = self.loads(s) - self.assertEqual(NotImplemented, u) + self.assertIs(NotImplemented, u) # Tests for protocol 2 def test_proto(self): - build_none = pickle.NONE + pickle.STOP for proto in protocols: - expected = build_none + pickled = self.dumps(None, proto) if proto >= 2: - expected = pickle.PROTO + bytes([proto]) + expected - p = self.dumps(None, proto) - self.assertEqual(p, expected) + proto_header = pickle.PROTO + bytes([proto]) + self.assertTrue(pickled.startswith(proto_header)) + else: + self.assertEqual(count_opcode(pickle.PROTO, pickled), 0) oob = protocols[-1] + 1 # a future protocol + build_none = pickle.NONE + pickle.STOP badpickle = pickle.PROTO + bytes([oob]) + build_none try: self.loads(badpickle) - except ValueError as detail: - self.assertTrue(str(detail).startswith( - "unsupported pickle protocol")) + except ValueError as err: + self.assertIn("unsupported pickle protocol", str(err)) else: self.fail("expected bad protocol number to raise ValueError") @@ -770,7 +824,7 @@ class AbstractPickleTests(unittest.TestCase): for proto in protocols: s = self.dumps(x, proto) y = self.loads(s) - self.assertEqual(x, y) + self.assert_is_copy(x, y) self.assertEqual(opcode_in_pickle(pickle.LONG1, s), proto >= 2) def test_long4(self): @@ -778,7 +832,7 @@ class AbstractPickleTests(unittest.TestCase): for proto in protocols: s = self.dumps(x, proto) y = self.loads(s) - self.assertEqual(x, y) + self.assert_is_copy(x, y) self.assertEqual(opcode_in_pickle(pickle.LONG4, s), proto >= 2) def test_short_tuples(self): @@ -816,9 +870,9 @@ class AbstractPickleTests(unittest.TestCase): for x in a, b, c, d, e: s = self.dumps(x, proto) y = self.loads(s) - self.assertEqual(x, y, (proto, x, s, y)) - expected = expected_opcode[proto, len(x)] - self.assertEqual(opcode_in_pickle(expected, s), True) + self.assert_is_copy(x, y) + expected = expected_opcode[min(proto, 3), len(x)] + self.assertTrue(opcode_in_pickle(expected, s)) def test_singletons(self): # Map (proto, singleton) to expected opcode. @@ -842,8 +896,8 @@ class AbstractPickleTests(unittest.TestCase): s = self.dumps(x, proto) y = self.loads(s) self.assertTrue(x is y, (proto, x, s, y)) - expected = expected_opcode[proto, x] - self.assertEqual(opcode_in_pickle(expected, s), True) + expected = expected_opcode[min(proto, 3), x] + self.assertTrue(opcode_in_pickle(expected, s)) def test_newobj_tuple(self): x = MyTuple([1, 2, 3]) @@ -852,8 +906,7 @@ class AbstractPickleTests(unittest.TestCase): for proto in protocols: s = self.dumps(x, proto) y = self.loads(s) - self.assertEqual(tuple(x), tuple(y)) - self.assertEqual(x.__dict__, y.__dict__) + self.assert_is_copy(x, y) def test_newobj_list(self): x = MyList([1, 2, 3]) @@ -862,8 +915,7 @@ class AbstractPickleTests(unittest.TestCase): for proto in protocols: s = self.dumps(x, proto) y = self.loads(s) - self.assertEqual(list(x), list(y)) - self.assertEqual(x.__dict__, y.__dict__) + self.assert_is_copy(x, y) def test_newobj_generic(self): for proto in protocols: @@ -874,6 +926,7 @@ class AbstractPickleTests(unittest.TestCase): s = self.dumps(x, proto) y = self.loads(s) detail = (proto, C, B, x, y, type(y)) + self.assert_is_copy(x, y) # XXX revisit self.assertEqual(B(x), B(y), detail) self.assertEqual(x.__dict__, y.__dict__, detail) @@ -912,11 +965,10 @@ class AbstractPickleTests(unittest.TestCase): s1 = self.dumps(x, 1) self.assertIn(__name__.encode("utf-8"), s1) self.assertIn(b"MyList", s1) - self.assertEqual(opcode_in_pickle(opcode, s1), False) + self.assertFalse(opcode_in_pickle(opcode, s1)) y = self.loads(s1) - self.assertEqual(list(x), list(y)) - self.assertEqual(x.__dict__, y.__dict__) + self.assert_is_copy(x, y) # Dump using protocol 2 for test. s2 = self.dumps(x, 2) @@ -925,9 +977,7 @@ class AbstractPickleTests(unittest.TestCase): self.assertEqual(opcode_in_pickle(opcode, s2), True, repr(s2)) y = self.loads(s2) - self.assertEqual(list(x), list(y)) - self.assertEqual(x.__dict__, y.__dict__) - + self.assert_is_copy(x, y) finally: e.restore() @@ -951,7 +1001,7 @@ class AbstractPickleTests(unittest.TestCase): for proto in protocols: s = self.dumps(x, proto) y = self.loads(s) - self.assertEqual(x, y) + self.assert_is_copy(x, y) num_appends = count_opcode(pickle.APPENDS, s) self.assertEqual(num_appends, proto > 0) @@ -960,7 +1010,7 @@ class AbstractPickleTests(unittest.TestCase): for proto in protocols: s = self.dumps(x, proto) y = self.loads(s) - self.assertEqual(x, y) + self.assert_is_copy(x, y) num_appends = count_opcode(pickle.APPENDS, s) if proto == 0: self.assertEqual(num_appends, 0) @@ -974,7 +1024,7 @@ class AbstractPickleTests(unittest.TestCase): s = self.dumps(x, proto) self.assertIsInstance(s, bytes_types) y = self.loads(s) - self.assertEqual(x, y) + self.assert_is_copy(x, y) num_setitems = count_opcode(pickle.SETITEMS, s) self.assertEqual(num_setitems, proto > 0) @@ -983,22 +1033,49 @@ class AbstractPickleTests(unittest.TestCase): for proto in protocols: s = self.dumps(x, proto) y = self.loads(s) - self.assertEqual(x, y) + self.assert_is_copy(x, y) num_setitems = count_opcode(pickle.SETITEMS, s) if proto == 0: self.assertEqual(num_setitems, 0) else: self.assertTrue(num_setitems >= 2) + def test_set_chunking(self): + n = 10 # too small to chunk + x = set(range(n)) + for proto in protocols: + s = self.dumps(x, proto) + y = self.loads(s) + self.assert_is_copy(x, y) + num_additems = count_opcode(pickle.ADDITEMS, s) + if proto < 4: + self.assertEqual(num_additems, 0) + else: + self.assertEqual(num_additems, 1) + + n = 2500 # expect at least two chunks when proto >= 4 + x = set(range(n)) + for proto in protocols: + s = self.dumps(x, proto) + y = self.loads(s) + self.assert_is_copy(x, y) + num_additems = count_opcode(pickle.ADDITEMS, s) + if proto < 4: + self.assertEqual(num_additems, 0) + else: + self.assertGreaterEqual(num_additems, 2) + def test_simple_newobj(self): x = object.__new__(SimpleNewObj) # avoid __init__ x.abc = 666 for proto in protocols: s = self.dumps(x, proto) - self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s), proto >= 2) + self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s), + 2 <= proto < 4) + self.assertEqual(opcode_in_pickle(pickle.NEWOBJ_EX, s), + proto >= 4) y = self.loads(s) # will raise TypeError if __init__ called - self.assertEqual(y.abc, 666) - self.assertEqual(x.__dict__, y.__dict__) + self.assert_is_copy(x, y) def test_newobj_list_slots(self): x = SlotList([1, 2, 3]) @@ -1006,10 +1083,7 @@ class AbstractPickleTests(unittest.TestCase): x.bar = "hello" s = self.dumps(x, 2) y = self.loads(s) - self.assertEqual(list(x), list(y)) - self.assertEqual(x.__dict__, y.__dict__) - self.assertEqual(x.foo, y.foo) - self.assertEqual(x.bar, y.bar) + self.assert_is_copy(x, y) def test_reduce_overrides_default_reduce_ex(self): for proto in protocols: @@ -1058,11 +1132,10 @@ class AbstractPickleTests(unittest.TestCase): @no_tracing def test_bad_getattr(self): + # Issue #3514: crash when there is an infinite loop in __getattr__ x = BadGetattr() - for proto in 0, 1: + for proto in protocols: self.assertRaises(RuntimeError, self.dumps, x, proto) - # protocol 2 don't raise a RuntimeError. - d = self.dumps(x, 2) def test_reduce_bad_iterator(self): # Issue4176: crash when 4th and 5th items of __reduce__() @@ -1095,11 +1168,10 @@ class AbstractPickleTests(unittest.TestCase): obj = [dict(large_dict), dict(large_dict), dict(large_dict)] for proto in protocols: - dumped = self.dumps(obj, proto) - loaded = self.loads(dumped) - self.assertEqual(loaded, obj, - "Failed protocol %d: %r != %r" - % (proto, obj, loaded)) + with self.subTest(proto=proto): + dumped = self.dumps(obj, proto) + loaded = self.loads(dumped) + self.assert_is_copy(obj, loaded) def test_attribute_name_interning(self): # Test that attribute names of pickled objects are interned when @@ -1155,11 +1227,14 @@ class AbstractPickleTests(unittest.TestCase): def test_int_pickling_efficiency(self): # Test compacity of int representation (see issue #12744) for proto in protocols: - sizes = [len(self.dumps(2**n, proto)) for n in range(70)] - # the size function is monotonic - self.assertEqual(sorted(sizes), sizes) - if proto >= 2: - self.assertLessEqual(sizes[-1], 14) + with self.subTest(proto=proto): + pickles = [self.dumps(2**n, proto) for n in range(70)] + sizes = list(map(len, pickles)) + # the size function is monotonic + self.assertEqual(sorted(sizes), sizes) + if proto >= 2: + for p in pickles: + self.assertFalse(opcode_in_pickle(pickle.LONG, p)) def check_negative_32b_binXXX(self, dumped): if sys.maxsize > 2**32: @@ -1242,6 +1317,137 @@ class AbstractPickleTests(unittest.TestCase): else: self._check_pickling_with_opcode(obj, pickle.SETITEMS, proto) + # Exercise framing (proto >= 4) for significant workloads + + FRAME_SIZE_TARGET = 64 * 1024 + + def test_framing_many_objects(self): + obj = list(range(10**5)) + for proto in range(4, pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(proto=proto): + pickled = self.dumps(obj, proto) + unpickled = self.loads(pickled) + self.assertEqual(obj, unpickled) + # Test the framing heuristic is sane, + # assuming a given frame size target. + bytes_per_frame = (len(pickled) / + pickled.count(b'\x00\x00\x00\x00\x00')) + self.assertGreater(bytes_per_frame, + self.FRAME_SIZE_TARGET / 2) + self.assertLessEqual(bytes_per_frame, + self.FRAME_SIZE_TARGET * 1) + + def test_framing_large_objects(self): + N = 1024 * 1024 + obj = [b'x' * N, b'y' * N, b'z' * N] + for proto in range(4, pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(proto=proto): + pickled = self.dumps(obj, proto) + unpickled = self.loads(pickled) + self.assertEqual(obj, unpickled) + # At least one frame was emitted per large bytes object. + n_frames = pickled.count(b'\x00\x00\x00\x00\x00') + self.assertGreaterEqual(n_frames, len(obj)) + + def test_nested_names(self): + global Nested + class Nested: + class A: + class B: + class C: + pass + + for proto in range(4, pickle.HIGHEST_PROTOCOL + 1): + for obj in [Nested.A, Nested.A.B, Nested.A.B.C]: + with self.subTest(proto=proto, obj=obj): + unpickled = self.loads(self.dumps(obj, proto)) + self.assertIs(obj, unpickled) + + def test_py_methods(self): + global PyMethodsTest + class PyMethodsTest: + @staticmethod + def cheese(): + return "cheese" + @classmethod + def wine(cls): + assert cls is PyMethodsTest + return "wine" + def biscuits(self): + assert isinstance(self, PyMethodsTest) + return "biscuits" + class Nested: + "Nested class" + @staticmethod + def ketchup(): + return "ketchup" + @classmethod + def maple(cls): + assert cls is PyMethodsTest.Nested + return "maple" + def pie(self): + assert isinstance(self, PyMethodsTest.Nested) + return "pie" + + py_methods = ( + PyMethodsTest.cheese, + PyMethodsTest.wine, + PyMethodsTest().biscuits, + PyMethodsTest.Nested.ketchup, + PyMethodsTest.Nested.maple, + PyMethodsTest.Nested().pie + ) + py_unbound_methods = ( + (PyMethodsTest.biscuits, PyMethodsTest), + (PyMethodsTest.Nested.pie, PyMethodsTest.Nested) + ) + for proto in range(4, pickle.HIGHEST_PROTOCOL + 1): + for method in py_methods: + with self.subTest(proto=proto, method=method): + unpickled = self.loads(self.dumps(method, proto)) + self.assertEqual(method(), unpickled()) + for method, cls in py_unbound_methods: + obj = cls() + with self.subTest(proto=proto, method=method): + unpickled = self.loads(self.dumps(method, proto)) + self.assertEqual(method(obj), unpickled(obj)) + + + def test_c_methods(self): + global Subclass + class Subclass(tuple): + class Nested(str): + pass + + c_methods = ( + # bound built-in method + ("abcd".index, ("c",)), + # unbound built-in method + (str.index, ("abcd", "c")), + # bound "slot" method + ([1, 2, 3].__len__, ()), + # unbound "slot" method + (list.__len__, ([1, 2, 3],)), + # bound "coexist" method + ({1, 2}.__contains__, (2,)), + # unbound "coexist" method + (set.__contains__, ({1, 2}, 2)), + # built-in class method + (dict.fromkeys, (("a", 1), ("b", 2))), + # built-in static method + (bytearray.maketrans, (b"abc", b"xyz")), + # subclass methods + (Subclass([1,2,2]).count, (2,)), + (Subclass.count, (Subclass([1,2,2]), 2)), + (Subclass.Nested("sweet").count, ("e",)), + (Subclass.Nested.count, (Subclass.Nested("sweet"), "e")), + ) + for proto in range(4, pickle.HIGHEST_PROTOCOL + 1): + for method, args in c_methods: + with self.subTest(proto=proto, method=method): + unpickled = self.loads(self.dumps(method, proto)) + self.assertEqual(method(*args), unpickled(*args)) + class BigmemPickleTests(unittest.TestCase): @@ -1252,10 +1458,11 @@ class BigmemPickleTests(unittest.TestCase): data = 1 << (8 * size) try: for proto in protocols: - if proto < 2: - continue - with self.assertRaises((ValueError, OverflowError)): - self.dumps(data, protocol=proto) + with self.subTest(proto=proto): + if proto < 2: + continue + with self.assertRaises((ValueError, OverflowError)): + self.dumps(data, protocol=proto) finally: data = None @@ -1268,14 +1475,15 @@ class BigmemPickleTests(unittest.TestCase): data = b"abcd" * (size // 4) try: for proto in protocols: - if proto < 3: - continue - try: - pickled = self.dumps(data, protocol=proto) - self.assertTrue(b"abcd" in pickled[:15]) - self.assertTrue(b"abcd" in pickled[-15:]) - finally: - pickled = None + with self.subTest(proto=proto): + if proto < 3: + continue + try: + pickled = self.dumps(data, protocol=proto) + self.assertTrue(b"abcd" in pickled[:19]) + self.assertTrue(b"abcd" in pickled[-18:]) + finally: + pickled = None finally: data = None @@ -1284,10 +1492,11 @@ class BigmemPickleTests(unittest.TestCase): data = b"a" * size try: for proto in protocols: - if proto < 3: - continue - with self.assertRaises((ValueError, OverflowError)): - self.dumps(data, protocol=proto) + with self.subTest(proto=proto): + if proto < 3: + continue + with self.assertRaises((ValueError, OverflowError)): + self.dumps(data, protocol=proto) finally: data = None @@ -1299,27 +1508,38 @@ class BigmemPickleTests(unittest.TestCase): data = "abcd" * (size // 4) try: for proto in protocols: - try: - pickled = self.dumps(data, protocol=proto) - self.assertTrue(b"abcd" in pickled[:15]) - self.assertTrue(b"abcd" in pickled[-15:]) - finally: - pickled = None + with self.subTest(proto=proto): + try: + pickled = self.dumps(data, protocol=proto) + self.assertTrue(b"abcd" in pickled[:19]) + self.assertTrue(b"abcd" in pickled[-18:]) + finally: + pickled = None finally: data = None - # BINUNICODE (protocols 1, 2 and 3) cannot carry more than - # 2**32 - 1 bytes of utf-8 encoded unicode. + # BINUNICODE (protocols 1, 2 and 3) cannot carry more than 2**32 - 1 bytes + # of utf-8 encoded unicode. BINUNICODE8 (protocol 4) supports these huge + # unicode strings however. - @bigmemtest(size=_4G, memuse=1 + ascii_char_size, dry_run=False) + @bigmemtest(size=_4G, memuse=2 + ascii_char_size, dry_run=False) def test_huge_str_64b(self, size): - data = "a" * size + data = "abcd" * (size // 4) try: for proto in protocols: - if proto == 0: - continue - with self.assertRaises((ValueError, OverflowError)): - self.dumps(data, protocol=proto) + with self.subTest(proto=proto): + if proto == 0: + continue + if proto < 4: + with self.assertRaises((ValueError, OverflowError)): + self.dumps(data, protocol=proto) + else: + try: + pickled = self.dumps(data, protocol=proto) + self.assertTrue(b"abcd" in pickled[:19]) + self.assertTrue(b"abcd" in pickled[-18:]) + finally: + pickled = None finally: data = None @@ -1363,8 +1583,8 @@ class REX_five(object): return object.__reduce__(self) class REX_six(object): - """This class is used to check the 4th argument (list iterator) of the reduce - protocol. + """This class is used to check the 4th argument (list iterator) of + the reduce protocol. """ def __init__(self, items=None): self.items = items if items is not None else [] @@ -1376,8 +1596,8 @@ class REX_six(object): return type(self), (), None, iter(self.items), None class REX_seven(object): - """This class is used to check the 5th argument (dict iterator) of the reduce - protocol. + """This class is used to check the 5th argument (dict iterator) of + the reduce protocol. """ def __init__(self, table=None): self.table = table if table is not None else {} @@ -1415,10 +1635,16 @@ class MyList(list): class MyDict(dict): sample = {"a": 1, "b": 2} +class MySet(set): + sample = {"a", "b"} + +class MyFrozenSet(frozenset): + sample = frozenset({"a", "b"}) + myclasses = [MyInt, MyFloat, MyComplex, MyStr, MyUnicode, - MyTuple, MyList, MyDict] + MyTuple, MyList, MyDict, MySet, MyFrozenSet] class SlotList(MyList): @@ -1428,6 +1654,8 @@ class SimpleNewObj(object): def __init__(self, a, b, c): # raise an error, to make sure this isn't called raise TypeError("SimpleNewObj.__init__() didn't expect to get called") + def __eq__(self, other): + return self.__dict__ == other.__dict__ class BadGetattr: def __getattr__(self, key): @@ -1464,7 +1692,7 @@ class AbstractPickleModuleTests(unittest.TestCase): def test_highest_protocol(self): # Of course this needs to be changed when HIGHEST_PROTOCOL changes. - self.assertEqual(pickle.HIGHEST_PROTOCOL, 3) + self.assertEqual(pickle.HIGHEST_PROTOCOL, 4) def test_callapi(self): f = io.BytesIO() @@ -1645,22 +1873,23 @@ class AbstractPicklerUnpicklerObjectTests(unittest.TestCase): def _check_multiple_unpicklings(self, ioclass): for proto in protocols: - data1 = [(x, str(x)) for x in range(2000)] + [b"abcde", len] - f = ioclass() - pickler = self.pickler_class(f, protocol=proto) - pickler.dump(data1) - pickled = f.getvalue() - - N = 5 - f = ioclass(pickled * N) - unpickler = self.unpickler_class(f) - for i in range(N): - if f.seekable(): - pos = f.tell() - self.assertEqual(unpickler.load(), data1) - if f.seekable(): - self.assertEqual(f.tell(), pos + len(pickled)) - self.assertRaises(EOFError, unpickler.load) + with self.subTest(proto=proto): + data1 = [(x, str(x)) for x in range(2000)] + [b"abcde", len] + f = ioclass() + pickler = self.pickler_class(f, protocol=proto) + pickler.dump(data1) + pickled = f.getvalue() + + N = 5 + f = ioclass(pickled * N) + unpickler = self.unpickler_class(f) + for i in range(N): + if f.seekable(): + pos = f.tell() + self.assertEqual(unpickler.load(), data1) + if f.seekable(): + self.assertEqual(f.tell(), pos + len(pickled)) + self.assertRaises(EOFError, unpickler.load) def test_multiple_unpicklings_seekable(self): self._check_multiple_unpicklings(io.BytesIO) diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 5aa4b0a..8ef51de 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -1,8 +1,11 @@ import builtins +import copyreg import gc +import itertools +import math +import pickle import sys import types -import math import unittest import weakref @@ -3153,176 +3156,6 @@ order (MRO) for bases """ self.assertEqual(e.a, 1) self.assertEqual(can_delete_dict(e), can_delete_dict(ValueError())) - def test_pickles(self): - # Testing pickling and copying new-style classes and objects... - import pickle - - def sorteditems(d): - L = list(d.items()) - L.sort() - return L - - global C - class C(object): - def __init__(self, a, b): - super(C, self).__init__() - self.a = a - self.b = b - def __repr__(self): - return "C(%r, %r)" % (self.a, self.b) - - global C1 - class C1(list): - def __new__(cls, a, b): - return super(C1, cls).__new__(cls) - def __getnewargs__(self): - return (self.a, self.b) - def __init__(self, a, b): - self.a = a - self.b = b - def __repr__(self): - return "C1(%r, %r)<%r>" % (self.a, self.b, list(self)) - - global C2 - class C2(int): - def __new__(cls, a, b, val=0): - return super(C2, cls).__new__(cls, val) - def __getnewargs__(self): - return (self.a, self.b, int(self)) - def __init__(self, a, b, val=0): - self.a = a - self.b = b - def __repr__(self): - return "C2(%r, %r)<%r>" % (self.a, self.b, int(self)) - - global C3 - class C3(object): - def __init__(self, foo): - self.foo = foo - def __getstate__(self): - return self.foo - def __setstate__(self, foo): - self.foo = foo - - global C4classic, C4 - class C4classic: # classic - pass - class C4(C4classic, object): # mixed inheritance - pass - - for bin in 0, 1: - for cls in C, C1, C2: - s = pickle.dumps(cls, bin) - cls2 = pickle.loads(s) - self.assertIs(cls2, cls) - - a = C1(1, 2); a.append(42); a.append(24) - b = C2("hello", "world", 42) - s = pickle.dumps((a, b), bin) - x, y = pickle.loads(s) - self.assertEqual(x.__class__, a.__class__) - self.assertEqual(sorteditems(x.__dict__), sorteditems(a.__dict__)) - self.assertEqual(y.__class__, b.__class__) - self.assertEqual(sorteditems(y.__dict__), sorteditems(b.__dict__)) - self.assertEqual(repr(x), repr(a)) - self.assertEqual(repr(y), repr(b)) - # Test for __getstate__ and __setstate__ on new style class - u = C3(42) - s = pickle.dumps(u, bin) - v = pickle.loads(s) - self.assertEqual(u.__class__, v.__class__) - self.assertEqual(u.foo, v.foo) - # Test for picklability of hybrid class - u = C4() - u.foo = 42 - s = pickle.dumps(u, bin) - v = pickle.loads(s) - self.assertEqual(u.__class__, v.__class__) - self.assertEqual(u.foo, v.foo) - - # Testing copy.deepcopy() - import copy - for cls in C, C1, C2: - cls2 = copy.deepcopy(cls) - self.assertIs(cls2, cls) - - a = C1(1, 2); a.append(42); a.append(24) - b = C2("hello", "world", 42) - x, y = copy.deepcopy((a, b)) - self.assertEqual(x.__class__, a.__class__) - self.assertEqual(sorteditems(x.__dict__), sorteditems(a.__dict__)) - self.assertEqual(y.__class__, b.__class__) - self.assertEqual(sorteditems(y.__dict__), sorteditems(b.__dict__)) - self.assertEqual(repr(x), repr(a)) - self.assertEqual(repr(y), repr(b)) - - def test_pickle_slots(self): - # Testing pickling of classes with __slots__ ... - import pickle - # Pickling of classes with __slots__ but without __getstate__ should fail - # (if using protocol 0 or 1) - global B, C, D, E - class B(object): - pass - for base in [object, B]: - class C(base): - __slots__ = ['a'] - class D(C): - pass - try: - pickle.dumps(C(), 0) - except TypeError: - pass - else: - self.fail("should fail: pickle C instance - %s" % base) - try: - pickle.dumps(C(), 0) - except TypeError: - pass - else: - self.fail("should fail: pickle D instance - %s" % base) - # Give C a nice generic __getstate__ and __setstate__ - class C(base): - __slots__ = ['a'] - def __getstate__(self): - try: - d = self.__dict__.copy() - except AttributeError: - d = {} - for cls in self.__class__.__mro__: - for sn in cls.__dict__.get('__slots__', ()): - try: - d[sn] = getattr(self, sn) - except AttributeError: - pass - return d - def __setstate__(self, d): - for k, v in list(d.items()): - setattr(self, k, v) - class D(C): - pass - # Now it should work - x = C() - y = pickle.loads(pickle.dumps(x)) - self.assertNotHasAttr(y, 'a') - x.a = 42 - y = pickle.loads(pickle.dumps(x)) - self.assertEqual(y.a, 42) - x = D() - x.a = 42 - x.b = 100 - y = pickle.loads(pickle.dumps(x)) - self.assertEqual(y.a + y.b, 142) - # A subclass that adds a slot should also work - class E(C): - __slots__ = ['b'] - x = E() - x.a = 42 - x.b = "foo" - y = pickle.loads(pickle.dumps(x)) - self.assertEqual(y.a, x.a) - self.assertEqual(y.b, x.b) - def test_binary_operator_override(self): # Testing overrides of binary operations... class I(int): @@ -4690,11 +4523,439 @@ class MiscTests(unittest.TestCase): self.assertEqual(X.mykey2, 'from Base2') +class PicklingTests(unittest.TestCase): + + def _check_reduce(self, proto, obj, args=(), kwargs={}, state=None, + listitems=None, dictitems=None): + if proto >= 4: + reduce_value = obj.__reduce_ex__(proto) + self.assertEqual(reduce_value[:3], + (copyreg.__newobj_ex__, + (type(obj), args, kwargs), + state)) + if listitems is not None: + self.assertListEqual(list(reduce_value[3]), listitems) + else: + self.assertIsNone(reduce_value[3]) + if dictitems is not None: + self.assertDictEqual(dict(reduce_value[4]), dictitems) + else: + self.assertIsNone(reduce_value[4]) + elif proto >= 2: + reduce_value = obj.__reduce_ex__(proto) + self.assertEqual(reduce_value[:3], + (copyreg.__newobj__, + (type(obj),) + args, + state)) + if listitems is not None: + self.assertListEqual(list(reduce_value[3]), listitems) + else: + self.assertIsNone(reduce_value[3]) + if dictitems is not None: + self.assertDictEqual(dict(reduce_value[4]), dictitems) + else: + self.assertIsNone(reduce_value[4]) + else: + base_type = type(obj).__base__ + reduce_value = (copyreg._reconstructor, + (type(obj), + base_type, + None if base_type is object else base_type(obj))) + if state is not None: + reduce_value += (state,) + self.assertEqual(obj.__reduce_ex__(proto), reduce_value) + self.assertEqual(obj.__reduce__(), reduce_value) + + def test_reduce(self): + protocols = range(pickle.HIGHEST_PROTOCOL + 1) + args = (-101, "spam") + kwargs = {'bacon': -201, 'fish': -301} + state = {'cheese': -401} + + class C1: + def __getnewargs__(self): + return args + obj = C1() + for proto in protocols: + self._check_reduce(proto, obj, args) + + for name, value in state.items(): + setattr(obj, name, value) + for proto in protocols: + self._check_reduce(proto, obj, args, state=state) + + class C2: + def __getnewargs__(self): + return "bad args" + obj = C2() + for proto in protocols: + if proto >= 2: + with self.assertRaises(TypeError): + obj.__reduce_ex__(proto) + + class C3: + def __getnewargs_ex__(self): + return (args, kwargs) + obj = C3() + for proto in protocols: + if proto >= 4: + self._check_reduce(proto, obj, args, kwargs) + elif proto >= 2: + with self.assertRaises(ValueError): + obj.__reduce_ex__(proto) + + class C4: + def __getnewargs_ex__(self): + return (args, "bad dict") + class C5: + def __getnewargs_ex__(self): + return ("bad tuple", kwargs) + class C6: + def __getnewargs_ex__(self): + return () + class C7: + def __getnewargs_ex__(self): + return "bad args" + for proto in protocols: + for cls in C4, C5, C6, C7: + obj = cls() + if proto >= 2: + with self.assertRaises((TypeError, ValueError)): + obj.__reduce_ex__(proto) + + class C8: + def __getnewargs_ex__(self): + return (args, kwargs) + obj = C8() + for proto in protocols: + if 2 <= proto < 4: + with self.assertRaises(ValueError): + obj.__reduce_ex__(proto) + class C9: + def __getnewargs_ex__(self): + return (args, {}) + obj = C9() + for proto in protocols: + self._check_reduce(proto, obj, args) + + class C10: + def __getnewargs_ex__(self): + raise IndexError + obj = C10() + for proto in protocols: + if proto >= 2: + with self.assertRaises(IndexError): + obj.__reduce_ex__(proto) + + class C11: + def __getstate__(self): + return state + obj = C11() + for proto in protocols: + self._check_reduce(proto, obj, state=state) + + class C12: + def __getstate__(self): + return "not dict" + obj = C12() + for proto in protocols: + self._check_reduce(proto, obj, state="not dict") + + class C13: + def __getstate__(self): + raise IndexError + obj = C13() + for proto in protocols: + with self.assertRaises(IndexError): + obj.__reduce_ex__(proto) + if proto < 2: + with self.assertRaises(IndexError): + obj.__reduce__() + + class C14: + __slots__ = tuple(state) + def __init__(self): + for name, value in state.items(): + setattr(self, name, value) + + obj = C14() + for proto in protocols: + if proto >= 2: + self._check_reduce(proto, obj, state=(None, state)) + else: + with self.assertRaises(TypeError): + obj.__reduce_ex__(proto) + with self.assertRaises(TypeError): + obj.__reduce__() + + class C15(dict): + pass + obj = C15({"quebec": -601}) + for proto in protocols: + self._check_reduce(proto, obj, dictitems=dict(obj)) + + class C16(list): + pass + obj = C16(["yukon"]) + for proto in protocols: + self._check_reduce(proto, obj, listitems=list(obj)) + + def _assert_is_copy(self, obj, objcopy, msg=None): + """Utility method to verify if two objects are copies of each others. + """ + if msg is None: + msg = "{!r} is not a copy of {!r}".format(obj, objcopy) + if type(obj).__repr__ is object.__repr__: + # We have this limitation for now because we use the object's repr + # to help us verify that the two objects are copies. This allows + # us to delegate the non-generic verification logic to the objects + # themselves. + raise ValueError("object passed to _assert_is_copy must " + + "override the __repr__ method.") + self.assertIsNot(obj, objcopy, msg=msg) + self.assertIs(type(obj), type(objcopy), msg=msg) + if hasattr(obj, '__dict__'): + self.assertDictEqual(obj.__dict__, objcopy.__dict__, msg=msg) + self.assertIsNot(obj.__dict__, objcopy.__dict__, msg=msg) + if hasattr(obj, '__slots__'): + self.assertListEqual(obj.__slots__, objcopy.__slots__, msg=msg) + for slot in obj.__slots__: + self.assertEqual( + hasattr(obj, slot), hasattr(objcopy, slot), msg=msg) + self.assertEqual(getattr(obj, slot, None), + getattr(objcopy, slot, None), msg=msg) + self.assertEqual(repr(obj), repr(objcopy), msg=msg) + + @staticmethod + def _generate_pickle_copiers(): + """Utility method to generate the many possible pickle configurations. + """ + class PickleCopier: + "This class copies object using pickle." + def __init__(self, proto, dumps, loads): + self.proto = proto + self.dumps = dumps + self.loads = loads + def copy(self, obj): + return self.loads(self.dumps(obj, self.proto)) + def __repr__(self): + # We try to be as descriptive as possible here since this is + # the string which we will allow us to tell the pickle + # configuration we are using during debugging. + return ("PickleCopier(proto={}, dumps={}.{}, loads={}.{})" + .format(self.proto, + self.dumps.__module__, self.dumps.__qualname__, + self.loads.__module__, self.loads.__qualname__)) + return (PickleCopier(*args) for args in + itertools.product(range(pickle.HIGHEST_PROTOCOL + 1), + {pickle.dumps, pickle._dumps}, + {pickle.loads, pickle._loads})) + + def test_pickle_slots(self): + # Tests pickling of classes with __slots__. + + # Pickling of classes with __slots__ but without __getstate__ should + # fail (if using protocol 0 or 1) + global C + class C: + __slots__ = ['a'] + with self.assertRaises(TypeError): + pickle.dumps(C(), 0) + + global D + class D(C): + pass + with self.assertRaises(TypeError): + pickle.dumps(D(), 0) + + class C: + "A class with __getstate__ and __setstate__ implemented." + __slots__ = ['a'] + def __getstate__(self): + state = getattr(self, '__dict__', {}).copy() + for cls in type(self).__mro__: + for slot in cls.__dict__.get('__slots__', ()): + try: + state[slot] = getattr(self, slot) + except AttributeError: + pass + return state + def __setstate__(self, state): + for k, v in state.items(): + setattr(self, k, v) + def __repr__(self): + return "%s()<%r>" % (type(self).__name__, self.__getstate__()) + + class D(C): + "A subclass of a class with slots." + pass + + global E + class E(C): + "A subclass with an extra slot." + __slots__ = ['b'] + + # Now it should work + for pickle_copier in self._generate_pickle_copiers(): + with self.subTest(pickle_copier=pickle_copier): + x = C() + y = pickle_copier.copy(x) + self._assert_is_copy(x, y) + + x.a = 42 + y = pickle_copier.copy(x) + self._assert_is_copy(x, y) + + x = D() + x.a = 42 + x.b = 100 + y = pickle_copier.copy(x) + self._assert_is_copy(x, y) + + x = E() + x.a = 42 + x.b = "foo" + y = pickle_copier.copy(x) + self._assert_is_copy(x, y) + + def test_reduce_copying(self): + # Tests pickling and copying new-style classes and objects. + global C1 + class C1: + "The state of this class is copyable via its instance dict." + ARGS = (1, 2) + NEED_DICT_COPYING = True + def __init__(self, a, b): + super().__init__() + self.a = a + self.b = b + def __repr__(self): + return "C1(%r, %r)" % (self.a, self.b) + + global C2 + class C2(list): + "A list subclass copyable via __getnewargs__." + ARGS = (1, 2) + NEED_DICT_COPYING = False + def __new__(cls, a, b): + self = super().__new__(cls) + self.a = a + self.b = b + return self + def __init__(self, *args): + super().__init__() + # This helps testing that __init__ is not called during the + # unpickling process, which would cause extra appends. + self.append("cheese") + @classmethod + def __getnewargs__(cls): + return cls.ARGS + def __repr__(self): + return "C2(%r, %r)<%r>" % (self.a, self.b, list(self)) + + global C3 + class C3(list): + "A list subclass copyable via __getstate__." + ARGS = (1, 2) + NEED_DICT_COPYING = False + def __init__(self, a, b): + self.a = a + self.b = b + # This helps testing that __init__ is not called during the + # unpickling process, which would cause extra appends. + self.append("cheese") + @classmethod + def __getstate__(cls): + return cls.ARGS + def __setstate__(self, state): + a, b = state + self.a = a + self.b = b + def __repr__(self): + return "C3(%r, %r)<%r>" % (self.a, self.b, list(self)) + + global C4 + class C4(int): + "An int subclass copyable via __getnewargs__." + ARGS = ("hello", "world", 1) + NEED_DICT_COPYING = False + def __new__(cls, a, b, value): + self = super().__new__(cls, value) + self.a = a + self.b = b + return self + @classmethod + def __getnewargs__(cls): + return cls.ARGS + def __repr__(self): + return "C4(%r, %r)<%r>" % (self.a, self.b, int(self)) + + global C5 + class C5(int): + "An int subclass copyable via __getnewargs_ex__." + ARGS = (1, 2) + KWARGS = {'value': 3} + NEED_DICT_COPYING = False + def __new__(cls, a, b, *, value=0): + self = super().__new__(cls, value) + self.a = a + self.b = b + return self + @classmethod + def __getnewargs_ex__(cls): + return (cls.ARGS, cls.KWARGS) + def __repr__(self): + return "C5(%r, %r)<%r>" % (self.a, self.b, int(self)) + + test_classes = (C1, C2, C3, C4, C5) + # Testing copying through pickle + pickle_copiers = self._generate_pickle_copiers() + for cls, pickle_copier in itertools.product(test_classes, pickle_copiers): + with self.subTest(cls=cls, pickle_copier=pickle_copier): + kwargs = getattr(cls, 'KWARGS', {}) + obj = cls(*cls.ARGS, **kwargs) + proto = pickle_copier.proto + if 2 <= proto < 4 and hasattr(cls, '__getnewargs_ex__'): + with self.assertRaises(ValueError): + pickle_copier.dumps(obj, proto) + continue + objcopy = pickle_copier.copy(obj) + self._assert_is_copy(obj, objcopy) + # For test classes that supports this, make sure we didn't go + # around the reduce protocol by simply copying the attribute + # dictionary. We clear attributes using the previous copy to + # not mutate the original argument. + if proto >= 2 and not cls.NEED_DICT_COPYING: + objcopy.__dict__.clear() + objcopy2 = pickle_copier.copy(objcopy) + self._assert_is_copy(obj, objcopy2) + + # Testing copying through copy.deepcopy() + for cls in test_classes: + with self.subTest(cls=cls): + kwargs = getattr(cls, 'KWARGS', {}) + obj = cls(*cls.ARGS, **kwargs) + # XXX: We need to modify the copy module to support PEP 3154's + # reduce protocol 4. + if hasattr(cls, '__getnewargs_ex__'): + continue + objcopy = deepcopy(obj) + self._assert_is_copy(obj, objcopy) + # For test classes that supports this, make sure we didn't go + # around the reduce protocol by simply copying the attribute + # dictionary. We clear attributes using the previous copy to + # not mutate the original argument. + if not cls.NEED_DICT_COPYING: + objcopy.__dict__.clear() + objcopy2 = deepcopy(objcopy) + self._assert_is_copy(obj, objcopy2) + + def test_main(): # Run all local test cases, with PTypesLongInitTest first. support.run_unittest(PTypesLongInitTest, OperatorsTest, ClassPropertiesAndMethods, DictProxyTests, - MiscTests) + MiscTests, PicklingTests) if __name__ == "__main__": test_main() |