diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2009-06-04 20:32:06 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2009-06-04 20:32:06 (GMT) |
commit | d9dfaa948775061ef6218b181dfb617206db9e8c (patch) | |
tree | 1dd832a72a230d64f5e3a6fb6b153ec18c0a073d /Lib | |
parent | 751899a59f27e84547c454cf10dec71a8cdf8171 (diff) | |
download | cpython-d9dfaa948775061ef6218b181dfb617206db9e8c.zip cpython-d9dfaa948775061ef6218b181dfb617206db9e8c.tar.gz cpython-d9dfaa948775061ef6218b181dfb617206db9e8c.tar.bz2 |
Issue #6137: The pickle module now translates module names when loading
or dumping pickles with a 2.x-compatible protocol, in order to make data
sharing and migration easier. This behaviour can be disabled using the
new `fix_imports` optional argument.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/_compat_pickle.py | 81 | ||||
-rw-r--r-- | Lib/pickle.py | 56 | ||||
-rw-r--r-- | Lib/pickletools.py | 170 | ||||
-rw-r--r-- | Lib/test/pickletester.py | 42 | ||||
-rw-r--r-- | Lib/test/test_pickletools.py | 3 |
5 files changed, 251 insertions, 101 deletions
diff --git a/Lib/_compat_pickle.py b/Lib/_compat_pickle.py new file mode 100644 index 0000000..700c80c --- /dev/null +++ b/Lib/_compat_pickle.py @@ -0,0 +1,81 @@ +# This module is used to map the old Python 2 names to the new names used in +# Python 3 for the pickle module. This needed to make pickle streams +# generated with Python 2 loadable by Python 3. + +# This is a copy of lib2to3.fixes.fix_imports.MAPPING. We cannot import +# lib2to3 and use the mapping defined there, because lib2to3 uses pickle. +# Thus, this could cause the module to be imported recursively. +IMPORT_MAPPING = { + 'StringIO': 'io', + 'cStringIO': 'io', + 'cPickle': 'pickle', + '__builtin__' : 'builtins', + 'copy_reg': 'copyreg', + 'Queue': 'queue', + 'SocketServer': 'socketserver', + 'ConfigParser': 'configparser', + 'repr': 'reprlib', + 'FileDialog': 'tkinter.filedialog', + 'tkFileDialog': 'tkinter.filedialog', + 'SimpleDialog': 'tkinter.simpledialog', + 'tkSimpleDialog': 'tkinter.simpledialog', + 'tkColorChooser': 'tkinter.colorchooser', + 'tkCommonDialog': 'tkinter.commondialog', + 'Dialog': 'tkinter.dialog', + 'Tkdnd': 'tkinter.dnd', + 'tkFont': 'tkinter.font', + 'tkMessageBox': 'tkinter.messagebox', + 'ScrolledText': 'tkinter.scrolledtext', + 'Tkconstants': 'tkinter.constants', + 'Tix': 'tkinter.tix', + 'ttk': 'tkinter.ttk', + 'Tkinter': 'tkinter', + 'markupbase': '_markupbase', + '_winreg': 'winreg', + 'thread': '_thread', + 'dummy_thread': '_dummy_thread', + 'dbhash': 'dbm.bsd', + 'dumbdbm': 'dbm.dumb', + 'dbm': 'dbm.ndbm', + 'gdbm': 'dbm.gnu', + 'xmlrpclib': 'xmlrpc.client', + 'DocXMLRPCServer': 'xmlrpc.server', + 'SimpleXMLRPCServer': 'xmlrpc.server', + 'httplib': 'http.client', + 'htmlentitydefs' : 'html.entities', + 'HTMLParser' : 'html.parser', + 'Cookie': 'http.cookies', + 'cookielib': 'http.cookiejar', + 'BaseHTTPServer': 'http.server', + 'SimpleHTTPServer': 'http.server', + 'CGIHTTPServer': 'http.server', + 'test.test_support': 'test.support', + 'commands': 'subprocess', + 'UserString' : 'collections', + 'UserList' : 'collections', + 'urlparse' : 'urllib.parse', + 'robotparser' : 'urllib.robotparser', + 'whichdb': 'dbm', + 'anydbm': 'dbm' +} + + +# This contains rename rules that are easy to handle. We ignore the more +# complex stuff (e.g. mapping the names in the urllib and types modules). +# These rules should be run before import names are fixed. +NAME_MAPPING = { + ('__builtin__', 'xrange'): ('builtins', 'range'), + ('__builtin__', 'reduce'): ('functools', 'reduce'), + ('__builtin__', 'intern'): ('sys', 'intern'), + ('__builtin__', 'unichr'): ('builtins', 'chr'), + ('__builtin__', 'basestring'): ('builtins', 'str'), + ('__builtin__', 'long'): ('builtins', 'int'), + ('itertools', 'izip'): ('builtins', 'zip'), + ('itertools', 'imap'): ('builtins', 'map'), + ('itertools', 'ifilter'): ('builtins', 'filter'), + ('itertools', 'ifilterfalse'): ('itertools', 'filterfalse'), +} + +# Same, but for 3.x to 2.x +REVERSE_IMPORT_MAPPING = dict((v, k) for (k, v) in IMPORT_MAPPING.items()) +REVERSE_NAME_MAPPING = dict((v, k) for (k, v) in NAME_MAPPING.items()) diff --git a/Lib/pickle.py b/Lib/pickle.py index 720c1a0..7af4ce9 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -34,6 +34,7 @@ import struct import re import io import codecs +import _compat_pickle __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", "Unpickler", "dump", "dumps", "load", "loads"] @@ -171,12 +172,11 @@ SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)]) - # Pickling machinery class _Pickler: - def __init__(self, file, protocol=None): + def __init__(self, file, protocol=None, *, fix_imports=True): """This takes a binary file for writing a pickle data stream. The optional protocol argument tells the pickler to use the @@ -193,6 +193,10 @@ class _Pickler: bytes argument. It can thus be a file object opened for binary writing, a io.BytesIO instance, or any other custom object that meets this interface. + + If fix_imports is True and protocol is less than 3, pickle will try to + map the new Python 3.x names to the old module names used in Python + 2.x, so that the pickle data stream is readable with Python 2.x. """ if protocol is None: protocol = DEFAULT_PROTOCOL @@ -208,6 +212,7 @@ class _Pickler: self.proto = int(protocol) self.bin = protocol >= 1 self.fast = 0 + self.fix_imports = fix_imports and protocol < 3 def clear_memo(self): """Clears the pickler's "memo". @@ -698,6 +703,11 @@ class _Pickler: write(GLOBAL + bytes(module, "utf-8") + b'\n' + bytes(name, "utf-8") + b'\n') else: + if self.fix_imports: + if (module, name) in _compat_pickle.REVERSE_NAME_MAPPING: + module, name = _compat_pickle.REVERSE_NAME_MAPPING[(module, name)] + if module in _compat_pickle.REVERSE_IMPORT_MAPPING: + module = _compat_pickle.REVERSE_IMPORT_MAPPING[module] try: write(GLOBAL + bytes(module, "ascii") + b'\n' + bytes(name, "ascii") + b'\n') @@ -766,7 +776,8 @@ def whichmodule(func, funcname): class _Unpickler: - def __init__(self, file, *, encoding="ASCII", errors="strict"): + def __init__(self, file, *, fix_imports=True, + encoding="ASCII", errors="strict"): """This takes a binary file for reading a pickle data stream. The protocol version of the pickle is detected automatically, so no @@ -779,15 +790,21 @@ class _Unpickler: reading, a BytesIO object, or any other custom object that meets this interface. - Optional keyword arguments are encoding and errors, which are - used to decode 8-bit string instances pickled by Python 2.x. - These default to 'ASCII' and 'strict', respectively. + Optional keyword arguments are *fix_imports*, *encoding* and *errors*, + which are used to control compatiblity support for pickle stream + generated by Python 2.x. If *fix_imports* is True, pickle will try to + map the old Python 2.x names to the new names used in Python 3.x. The + *encoding* and *errors* tell pickle how to decode 8-bit string + instances pickled by Python 2.x; these default to 'ASCII' and + 'strict', respectively. """ self.readline = file.readline self.read = file.read self.memo = {} self.encoding = encoding self.errors = errors + self.proto = 0 + self.fix_imports = fix_imports def load(self): """Read a pickled object representation from the open file. @@ -838,6 +855,7 @@ class _Unpickler: proto = ord(self.read(1)) if not 0 <= proto <= HIGHEST_PROTOCOL: raise ValueError("unsupported pickle protocol: %d" % proto) + self.proto = proto dispatch[PROTO[0]] = load_proto def load_persid(self): @@ -1088,7 +1106,12 @@ class _Unpickler: self.append(obj) def find_class(self, module, name): - # Subclasses may override this + # Subclasses may override this. + if self.proto < 3 and self.fix_imports: + if (module, name) in _compat_pickle.NAME_MAPPING: + module, name = _compat_pickle.NAME_MAPPING[(module, name)] + if module in _compat_pickle.IMPORT_MAPPING: + module = _compat_pickle.IMPORT_MAPPING[module] __import__(module, level=0) mod = sys.modules[module] klass = getattr(mod, name) @@ -1327,27 +1350,28 @@ except ImportError: # Shorthands -def dump(obj, file, protocol=None): - Pickler(file, protocol).dump(obj) +def dump(obj, file, protocol=None, *, fix_imports=True): + Pickler(file, protocol, fix_imports=fix_imports).dump(obj) -def dumps(obj, protocol=None): +def dumps(obj, protocol=None, *, fix_imports=True): f = io.BytesIO() - Pickler(f, protocol).dump(obj) + Pickler(f, protocol, fix_imports=fix_imports).dump(obj) res = f.getvalue() assert isinstance(res, bytes_types) return res -def load(file, *, encoding="ASCII", errors="strict"): - return Unpickler(file, encoding=encoding, errors=errors).load() +def load(file, *, fix_imports=True, encoding="ASCII", errors="strict"): + return Unpickler(file, fix_imports=fix_imports, + encoding=encoding, errors=errors).load() -def loads(s, *, encoding="ASCII", errors="strict"): +def loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"): if isinstance(s, str): raise TypeError("Can't load pickle from unicode string") file = io.BytesIO(s) - return Unpickler(file, encoding=encoding, errors=errors).load() + return Unpickler(file, fix_imports=fix_imports, + encoding=encoding, errors=errors).load() # Doctest - def _test(): import doctest return doctest.testmod() diff --git a/Lib/pickletools.py b/Lib/pickletools.py index 2bb69d1..ca11aa3 100644 --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -2066,27 +2066,27 @@ _dis_test = r""" 29: ( MARK 30: d DICT (MARK at 29) 31: p PUT 2 - 34: c GLOBAL 'builtins bytes' - 50: p PUT 3 - 53: ( MARK - 54: ( MARK - 55: l LIST (MARK at 54) - 56: p PUT 4 - 59: L LONG 97 - 64: a APPEND - 65: L LONG 98 - 70: a APPEND - 71: L LONG 99 - 76: a APPEND - 77: t TUPLE (MARK at 53) - 78: p PUT 5 - 81: R REDUCE - 82: p PUT 6 - 85: V UNICODE 'def' - 90: p PUT 7 - 93: s SETITEM - 94: a APPEND - 95: . STOP + 34: c GLOBAL '__builtin__ bytes' + 53: p PUT 3 + 56: ( MARK + 57: ( MARK + 58: l LIST (MARK at 57) + 59: p PUT 4 + 62: L LONG 97 + 67: a APPEND + 68: L LONG 98 + 73: a APPEND + 74: L LONG 99 + 79: a APPEND + 80: t TUPLE (MARK at 56) + 81: p PUT 5 + 84: R REDUCE + 85: p PUT 6 + 88: V UNICODE 'def' + 93: p PUT 7 + 96: s SETITEM + 97: a APPEND + 98: . STOP highest protocol among opcodes = 0 Try again with a "binary" pickle. @@ -2105,25 +2105,25 @@ Try again with a "binary" pickle. 14: q BINPUT 1 16: } EMPTY_DICT 17: q BINPUT 2 - 19: c GLOBAL 'builtins bytes' - 35: q BINPUT 3 - 37: ( MARK - 38: ] EMPTY_LIST - 39: q BINPUT 4 - 41: ( MARK - 42: K BININT1 97 - 44: K BININT1 98 - 46: K BININT1 99 - 48: e APPENDS (MARK at 41) - 49: t TUPLE (MARK at 37) - 50: q BINPUT 5 - 52: R REDUCE - 53: q BINPUT 6 - 55: X BINUNICODE 'def' - 63: q BINPUT 7 - 65: s SETITEM - 66: e APPENDS (MARK at 3) - 67: . STOP + 19: c GLOBAL '__builtin__ bytes' + 38: q BINPUT 3 + 40: ( MARK + 41: ] EMPTY_LIST + 42: q BINPUT 4 + 44: ( MARK + 45: K BININT1 97 + 47: K BININT1 98 + 49: K BININT1 99 + 51: e APPENDS (MARK at 44) + 52: t TUPLE (MARK at 40) + 53: q BINPUT 5 + 55: R REDUCE + 56: q BINPUT 6 + 58: X BINUNICODE 'def' + 66: q BINPUT 7 + 68: s SETITEM + 69: e APPENDS (MARK at 3) + 70: . STOP highest protocol among opcodes = 1 Exercise the INST/OBJ/BUILD family. @@ -2141,58 +2141,58 @@ highest protocol among opcodes = 0 0: ( MARK 1: l LIST (MARK at 0) 2: p PUT 0 - 5: c GLOBAL 'copyreg _reconstructor' - 29: p PUT 1 - 32: ( MARK - 33: c GLOBAL 'pickletools _Example' - 55: p PUT 2 - 58: c GLOBAL 'builtins object' - 75: p PUT 3 - 78: N NONE - 79: t TUPLE (MARK at 32) - 80: p PUT 4 - 83: R REDUCE - 84: p PUT 5 - 87: ( MARK - 88: d DICT (MARK at 87) - 89: p PUT 6 - 92: V UNICODE 'value' - 99: p PUT 7 - 102: L LONG 42 - 107: s SETITEM - 108: b BUILD - 109: a APPEND - 110: g GET 5 + 5: c GLOBAL 'copy_reg _reconstructor' + 30: p PUT 1 + 33: ( MARK + 34: c GLOBAL 'pickletools _Example' + 56: p PUT 2 + 59: c GLOBAL '__builtin__ object' + 79: p PUT 3 + 82: N NONE + 83: t TUPLE (MARK at 33) + 84: p PUT 4 + 87: R REDUCE + 88: p PUT 5 + 91: ( MARK + 92: d DICT (MARK at 91) + 93: p PUT 6 + 96: V UNICODE 'value' + 103: p PUT 7 + 106: L LONG 42 + 111: s SETITEM + 112: b BUILD 113: a APPEND - 114: . STOP + 114: g GET 5 + 117: a APPEND + 118: . STOP highest protocol among opcodes = 0 >>> dis(pickle.dumps(x, 1)) 0: ] EMPTY_LIST 1: q BINPUT 0 3: ( MARK - 4: c GLOBAL 'copyreg _reconstructor' - 28: q BINPUT 1 - 30: ( MARK - 31: c GLOBAL 'pickletools _Example' - 53: q BINPUT 2 - 55: c GLOBAL 'builtins object' - 72: q BINPUT 3 - 74: N NONE - 75: t TUPLE (MARK at 30) - 76: q BINPUT 4 - 78: R REDUCE - 79: q BINPUT 5 - 81: } EMPTY_DICT - 82: q BINPUT 6 - 84: X BINUNICODE 'value' - 94: q BINPUT 7 - 96: K BININT1 42 - 98: s SETITEM - 99: b BUILD - 100: h BINGET 5 - 102: e APPENDS (MARK at 3) - 103: . STOP + 4: c GLOBAL 'copy_reg _reconstructor' + 29: q BINPUT 1 + 31: ( MARK + 32: c GLOBAL 'pickletools _Example' + 54: q BINPUT 2 + 56: c GLOBAL '__builtin__ object' + 76: q BINPUT 3 + 78: N NONE + 79: t TUPLE (MARK at 31) + 80: q BINPUT 4 + 82: R REDUCE + 83: q BINPUT 5 + 85: } EMPTY_DICT + 86: q BINPUT 6 + 88: X BINUNICODE 'value' + 98: q BINPUT 7 + 100: K BININT1 42 + 102: s SETITEM + 103: b BUILD + 104: h BINGET 5 + 106: e APPENDS (MARK at 3) + 107: . STOP highest protocol among opcodes = 1 Try "the canonical" recursive-object test. diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 58ce3b5..3ed26b8 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -3,6 +3,7 @@ import unittest import pickle import pickletools import copyreg +from http.cookies import SimpleCookie from test.support import TestFailed, TESTFN, run_with_locale @@ -342,6 +343,24 @@ DATA2_DIS = """\ highest protocol among opcodes = 2 """ +# set([1,2]) pickled from 2.x with protocol 2 +DATA3 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01(K\x01K\x02e\x85q\x02Rq\x03.' + +# xrange(5) pickled from 2.x with protocol 2 +DATA4 = b'\x80\x02c__builtin__\nxrange\nq\x00K\x00K\x05K\x01\x87q\x01Rq\x02.' + +# a SimpleCookie() object pickled from 2.x with protocol 2 +DATA5 = (b'\x80\x02cCookie\nSimpleCookie\nq\x00)\x81q\x01U\x03key' + b'q\x02cCookie\nMorsel\nq\x03)\x81q\x04(U\x07commentq\x05U' + b'\x00q\x06U\x06domainq\x07h\x06U\x06secureq\x08h\x06U\x07' + b'expiresq\th\x06U\x07max-ageq\nh\x06U\x07versionq\x0bh\x06U' + b'\x04pathq\x0ch\x06U\x08httponlyq\rh\x06u}q\x0e(U\x0b' + b'coded_valueq\x0fU\x05valueq\x10h\x10h\x10h\x02h\x02ubs}q\x11b.') + +# set([3]) pickled from 2.x with protocol 2 +DATA6 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01K\x03a\x85q\x02Rq\x03.' + + def create_data(): c = C() c.foo = 1 @@ -956,6 +975,29 @@ class AbstractPickleTests(unittest.TestCase): for x_key, y_key in zip(x_keys, y_keys): self.assertIs(x_key, y_key) + def test_unpickle_from_2x(self): + # Unpickle non-trivial data from Python 2.x. + loaded = self.loads(DATA3) + self.assertEqual(loaded, set([1, 2])) + loaded = self.loads(DATA4) + self.assertEqual(type(loaded), type(range(0))) + self.assertEqual(list(loaded), list(range(5))) + loaded = self.loads(DATA5) + self.assertEqual(type(loaded), SimpleCookie) + self.assertEqual(list(loaded.keys()), ["key"]) + self.assertEqual(loaded["key"].value, "Set-Cookie: key=value") + + def test_pickle_to_2x(self): + # Pickle non-trivial data with protocol 2, expecting that it yields + # the same result as Python 2.x did. + # NOTE: this test is a bit too strong since we can produce different + # bytecode that 2.x will still understand. + dumped = self.dumps(range(5), 2) + self.assertEqual(dumped, DATA4) + dumped = self.dumps(set([3]), 2) + self.assertEqual(dumped, DATA6) + + # Test classes for reduce_ex class REX_one(object): diff --git a/Lib/test/test_pickletools.py b/Lib/test/test_pickletools.py index 3e701b0..823b0c2 100644 --- a/Lib/test/test_pickletools.py +++ b/Lib/test/test_pickletools.py @@ -12,6 +12,9 @@ class OptimizedPickleTests(AbstractPickleTests, AbstractPickleModuleTests): def loads(self, buf): return pickle.loads(buf) + # Test relies on precise output of dumps() + test_pickle_to_2x = None + def test_main(): support.run_unittest(OptimizedPickleTests) |