summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2009-06-04 20:32:06 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2009-06-04 20:32:06 (GMT)
commitd9dfaa948775061ef6218b181dfb617206db9e8c (patch)
tree1dd832a72a230d64f5e3a6fb6b153ec18c0a073d /Lib
parent751899a59f27e84547c454cf10dec71a8cdf8171 (diff)
downloadcpython-d9dfaa948775061ef6218b181dfb617206db9e8c.zip
cpython-d9dfaa948775061ef6218b181dfb617206db9e8c.tar.gz
cpython-d9dfaa948775061ef6218b181dfb617206db9e8c.tar.bz2
Issue #6137: The pickle module now translates module names when loading
or dumping pickles with a 2.x-compatible protocol, in order to make data sharing and migration easier. This behaviour can be disabled using the new `fix_imports` optional argument.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/_compat_pickle.py81
-rw-r--r--Lib/pickle.py56
-rw-r--r--Lib/pickletools.py170
-rw-r--r--Lib/test/pickletester.py42
-rw-r--r--Lib/test/test_pickletools.py3
5 files changed, 251 insertions, 101 deletions
diff --git a/Lib/_compat_pickle.py b/Lib/_compat_pickle.py
new file mode 100644
index 0000000..700c80c
--- /dev/null
+++ b/Lib/_compat_pickle.py
@@ -0,0 +1,81 @@
+# This module is used to map the old Python 2 names to the new names used in
+# Python 3 for the pickle module. This needed to make pickle streams
+# generated with Python 2 loadable by Python 3.
+
+# This is a copy of lib2to3.fixes.fix_imports.MAPPING. We cannot import
+# lib2to3 and use the mapping defined there, because lib2to3 uses pickle.
+# Thus, this could cause the module to be imported recursively.
+IMPORT_MAPPING = {
+ 'StringIO': 'io',
+ 'cStringIO': 'io',
+ 'cPickle': 'pickle',
+ '__builtin__' : 'builtins',
+ 'copy_reg': 'copyreg',
+ 'Queue': 'queue',
+ 'SocketServer': 'socketserver',
+ 'ConfigParser': 'configparser',
+ 'repr': 'reprlib',
+ 'FileDialog': 'tkinter.filedialog',
+ 'tkFileDialog': 'tkinter.filedialog',
+ 'SimpleDialog': 'tkinter.simpledialog',
+ 'tkSimpleDialog': 'tkinter.simpledialog',
+ 'tkColorChooser': 'tkinter.colorchooser',
+ 'tkCommonDialog': 'tkinter.commondialog',
+ 'Dialog': 'tkinter.dialog',
+ 'Tkdnd': 'tkinter.dnd',
+ 'tkFont': 'tkinter.font',
+ 'tkMessageBox': 'tkinter.messagebox',
+ 'ScrolledText': 'tkinter.scrolledtext',
+ 'Tkconstants': 'tkinter.constants',
+ 'Tix': 'tkinter.tix',
+ 'ttk': 'tkinter.ttk',
+ 'Tkinter': 'tkinter',
+ 'markupbase': '_markupbase',
+ '_winreg': 'winreg',
+ 'thread': '_thread',
+ 'dummy_thread': '_dummy_thread',
+ 'dbhash': 'dbm.bsd',
+ 'dumbdbm': 'dbm.dumb',
+ 'dbm': 'dbm.ndbm',
+ 'gdbm': 'dbm.gnu',
+ 'xmlrpclib': 'xmlrpc.client',
+ 'DocXMLRPCServer': 'xmlrpc.server',
+ 'SimpleXMLRPCServer': 'xmlrpc.server',
+ 'httplib': 'http.client',
+ 'htmlentitydefs' : 'html.entities',
+ 'HTMLParser' : 'html.parser',
+ 'Cookie': 'http.cookies',
+ 'cookielib': 'http.cookiejar',
+ 'BaseHTTPServer': 'http.server',
+ 'SimpleHTTPServer': 'http.server',
+ 'CGIHTTPServer': 'http.server',
+ 'test.test_support': 'test.support',
+ 'commands': 'subprocess',
+ 'UserString' : 'collections',
+ 'UserList' : 'collections',
+ 'urlparse' : 'urllib.parse',
+ 'robotparser' : 'urllib.robotparser',
+ 'whichdb': 'dbm',
+ 'anydbm': 'dbm'
+}
+
+
+# This contains rename rules that are easy to handle. We ignore the more
+# complex stuff (e.g. mapping the names in the urllib and types modules).
+# These rules should be run before import names are fixed.
+NAME_MAPPING = {
+ ('__builtin__', 'xrange'): ('builtins', 'range'),
+ ('__builtin__', 'reduce'): ('functools', 'reduce'),
+ ('__builtin__', 'intern'): ('sys', 'intern'),
+ ('__builtin__', 'unichr'): ('builtins', 'chr'),
+ ('__builtin__', 'basestring'): ('builtins', 'str'),
+ ('__builtin__', 'long'): ('builtins', 'int'),
+ ('itertools', 'izip'): ('builtins', 'zip'),
+ ('itertools', 'imap'): ('builtins', 'map'),
+ ('itertools', 'ifilter'): ('builtins', 'filter'),
+ ('itertools', 'ifilterfalse'): ('itertools', 'filterfalse'),
+}
+
+# Same, but for 3.x to 2.x
+REVERSE_IMPORT_MAPPING = dict((v, k) for (k, v) in IMPORT_MAPPING.items())
+REVERSE_NAME_MAPPING = dict((v, k) for (k, v) in NAME_MAPPING.items())
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 720c1a0..7af4ce9 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -34,6 +34,7 @@ import struct
import re
import io
import codecs
+import _compat_pickle
__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
"Unpickler", "dump", "dumps", "load", "loads"]
@@ -171,12 +172,11 @@ SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes
__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
-
# Pickling machinery
class _Pickler:
- def __init__(self, file, protocol=None):
+ def __init__(self, file, protocol=None, *, fix_imports=True):
"""This takes a binary file for writing a pickle data stream.
The optional protocol argument tells the pickler to use the
@@ -193,6 +193,10 @@ class _Pickler:
bytes argument. It can thus be a file object opened for binary
writing, a io.BytesIO instance, or any other custom object that
meets this interface.
+
+ If fix_imports is True and protocol is less than 3, pickle will try to
+ map the new Python 3.x names to the old module names used in Python
+ 2.x, so that the pickle data stream is readable with Python 2.x.
"""
if protocol is None:
protocol = DEFAULT_PROTOCOL
@@ -208,6 +212,7 @@ class _Pickler:
self.proto = int(protocol)
self.bin = protocol >= 1
self.fast = 0
+ self.fix_imports = fix_imports and protocol < 3
def clear_memo(self):
"""Clears the pickler's "memo".
@@ -698,6 +703,11 @@ class _Pickler:
write(GLOBAL + bytes(module, "utf-8") + b'\n' +
bytes(name, "utf-8") + b'\n')
else:
+ if self.fix_imports:
+ if (module, name) in _compat_pickle.REVERSE_NAME_MAPPING:
+ module, name = _compat_pickle.REVERSE_NAME_MAPPING[(module, name)]
+ if module in _compat_pickle.REVERSE_IMPORT_MAPPING:
+ module = _compat_pickle.REVERSE_IMPORT_MAPPING[module]
try:
write(GLOBAL + bytes(module, "ascii") + b'\n' +
bytes(name, "ascii") + b'\n')
@@ -766,7 +776,8 @@ def whichmodule(func, funcname):
class _Unpickler:
- def __init__(self, file, *, encoding="ASCII", errors="strict"):
+ def __init__(self, file, *, fix_imports=True,
+ encoding="ASCII", errors="strict"):
"""This takes a binary file for reading a pickle data stream.
The protocol version of the pickle is detected automatically, so no
@@ -779,15 +790,21 @@ class _Unpickler:
reading, a BytesIO object, or any other custom object that
meets this interface.
- Optional keyword arguments are encoding and errors, which are
- used to decode 8-bit string instances pickled by Python 2.x.
- These default to 'ASCII' and 'strict', respectively.
+ Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
+ which are used to control compatiblity support for pickle stream
+ generated by Python 2.x. If *fix_imports* is True, pickle will try to
+ map the old Python 2.x names to the new names used in Python 3.x. The
+ *encoding* and *errors* tell pickle how to decode 8-bit string
+ instances pickled by Python 2.x; these default to 'ASCII' and
+ 'strict', respectively.
"""
self.readline = file.readline
self.read = file.read
self.memo = {}
self.encoding = encoding
self.errors = errors
+ self.proto = 0
+ self.fix_imports = fix_imports
def load(self):
"""Read a pickled object representation from the open file.
@@ -838,6 +855,7 @@ class _Unpickler:
proto = ord(self.read(1))
if not 0 <= proto <= HIGHEST_PROTOCOL:
raise ValueError("unsupported pickle protocol: %d" % proto)
+ self.proto = proto
dispatch[PROTO[0]] = load_proto
def load_persid(self):
@@ -1088,7 +1106,12 @@ class _Unpickler:
self.append(obj)
def find_class(self, module, name):
- # Subclasses may override this
+ # Subclasses may override this.
+ if self.proto < 3 and self.fix_imports:
+ if (module, name) in _compat_pickle.NAME_MAPPING:
+ module, name = _compat_pickle.NAME_MAPPING[(module, name)]
+ if module in _compat_pickle.IMPORT_MAPPING:
+ module = _compat_pickle.IMPORT_MAPPING[module]
__import__(module, level=0)
mod = sys.modules[module]
klass = getattr(mod, name)
@@ -1327,27 +1350,28 @@ except ImportError:
# Shorthands
-def dump(obj, file, protocol=None):
- Pickler(file, protocol).dump(obj)
+def dump(obj, file, protocol=None, *, fix_imports=True):
+ Pickler(file, protocol, fix_imports=fix_imports).dump(obj)
-def dumps(obj, protocol=None):
+def dumps(obj, protocol=None, *, fix_imports=True):
f = io.BytesIO()
- Pickler(f, protocol).dump(obj)
+ Pickler(f, protocol, fix_imports=fix_imports).dump(obj)
res = f.getvalue()
assert isinstance(res, bytes_types)
return res
-def load(file, *, encoding="ASCII", errors="strict"):
- return Unpickler(file, encoding=encoding, errors=errors).load()
+def load(file, *, fix_imports=True, encoding="ASCII", errors="strict"):
+ return Unpickler(file, fix_imports=fix_imports,
+ encoding=encoding, errors=errors).load()
-def loads(s, *, encoding="ASCII", errors="strict"):
+def loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"):
if isinstance(s, str):
raise TypeError("Can't load pickle from unicode string")
file = io.BytesIO(s)
- return Unpickler(file, encoding=encoding, errors=errors).load()
+ return Unpickler(file, fix_imports=fix_imports,
+ encoding=encoding, errors=errors).load()
# Doctest
-
def _test():
import doctest
return doctest.testmod()
diff --git a/Lib/pickletools.py b/Lib/pickletools.py
index 2bb69d1..ca11aa3 100644
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -2066,27 +2066,27 @@ _dis_test = r"""
29: ( MARK
30: d DICT (MARK at 29)
31: p PUT 2
- 34: c GLOBAL 'builtins bytes'
- 50: p PUT 3
- 53: ( MARK
- 54: ( MARK
- 55: l LIST (MARK at 54)
- 56: p PUT 4
- 59: L LONG 97
- 64: a APPEND
- 65: L LONG 98
- 70: a APPEND
- 71: L LONG 99
- 76: a APPEND
- 77: t TUPLE (MARK at 53)
- 78: p PUT 5
- 81: R REDUCE
- 82: p PUT 6
- 85: V UNICODE 'def'
- 90: p PUT 7
- 93: s SETITEM
- 94: a APPEND
- 95: . STOP
+ 34: c GLOBAL '__builtin__ bytes'
+ 53: p PUT 3
+ 56: ( MARK
+ 57: ( MARK
+ 58: l LIST (MARK at 57)
+ 59: p PUT 4
+ 62: L LONG 97
+ 67: a APPEND
+ 68: L LONG 98
+ 73: a APPEND
+ 74: L LONG 99
+ 79: a APPEND
+ 80: t TUPLE (MARK at 56)
+ 81: p PUT 5
+ 84: R REDUCE
+ 85: p PUT 6
+ 88: V UNICODE 'def'
+ 93: p PUT 7
+ 96: s SETITEM
+ 97: a APPEND
+ 98: . STOP
highest protocol among opcodes = 0
Try again with a "binary" pickle.
@@ -2105,25 +2105,25 @@ Try again with a "binary" pickle.
14: q BINPUT 1
16: } EMPTY_DICT
17: q BINPUT 2
- 19: c GLOBAL 'builtins bytes'
- 35: q BINPUT 3
- 37: ( MARK
- 38: ] EMPTY_LIST
- 39: q BINPUT 4
- 41: ( MARK
- 42: K BININT1 97
- 44: K BININT1 98
- 46: K BININT1 99
- 48: e APPENDS (MARK at 41)
- 49: t TUPLE (MARK at 37)
- 50: q BINPUT 5
- 52: R REDUCE
- 53: q BINPUT 6
- 55: X BINUNICODE 'def'
- 63: q BINPUT 7
- 65: s SETITEM
- 66: e APPENDS (MARK at 3)
- 67: . STOP
+ 19: c GLOBAL '__builtin__ bytes'
+ 38: q BINPUT 3
+ 40: ( MARK
+ 41: ] EMPTY_LIST
+ 42: q BINPUT 4
+ 44: ( MARK
+ 45: K BININT1 97
+ 47: K BININT1 98
+ 49: K BININT1 99
+ 51: e APPENDS (MARK at 44)
+ 52: t TUPLE (MARK at 40)
+ 53: q BINPUT 5
+ 55: R REDUCE
+ 56: q BINPUT 6
+ 58: X BINUNICODE 'def'
+ 66: q BINPUT 7
+ 68: s SETITEM
+ 69: e APPENDS (MARK at 3)
+ 70: . STOP
highest protocol among opcodes = 1
Exercise the INST/OBJ/BUILD family.
@@ -2141,58 +2141,58 @@ highest protocol among opcodes = 0
0: ( MARK
1: l LIST (MARK at 0)
2: p PUT 0
- 5: c GLOBAL 'copyreg _reconstructor'
- 29: p PUT 1
- 32: ( MARK
- 33: c GLOBAL 'pickletools _Example'
- 55: p PUT 2
- 58: c GLOBAL 'builtins object'
- 75: p PUT 3
- 78: N NONE
- 79: t TUPLE (MARK at 32)
- 80: p PUT 4
- 83: R REDUCE
- 84: p PUT 5
- 87: ( MARK
- 88: d DICT (MARK at 87)
- 89: p PUT 6
- 92: V UNICODE 'value'
- 99: p PUT 7
- 102: L LONG 42
- 107: s SETITEM
- 108: b BUILD
- 109: a APPEND
- 110: g GET 5
+ 5: c GLOBAL 'copy_reg _reconstructor'
+ 30: p PUT 1
+ 33: ( MARK
+ 34: c GLOBAL 'pickletools _Example'
+ 56: p PUT 2
+ 59: c GLOBAL '__builtin__ object'
+ 79: p PUT 3
+ 82: N NONE
+ 83: t TUPLE (MARK at 33)
+ 84: p PUT 4
+ 87: R REDUCE
+ 88: p PUT 5
+ 91: ( MARK
+ 92: d DICT (MARK at 91)
+ 93: p PUT 6
+ 96: V UNICODE 'value'
+ 103: p PUT 7
+ 106: L LONG 42
+ 111: s SETITEM
+ 112: b BUILD
113: a APPEND
- 114: . STOP
+ 114: g GET 5
+ 117: a APPEND
+ 118: . STOP
highest protocol among opcodes = 0
>>> dis(pickle.dumps(x, 1))
0: ] EMPTY_LIST
1: q BINPUT 0
3: ( MARK
- 4: c GLOBAL 'copyreg _reconstructor'
- 28: q BINPUT 1
- 30: ( MARK
- 31: c GLOBAL 'pickletools _Example'
- 53: q BINPUT 2
- 55: c GLOBAL 'builtins object'
- 72: q BINPUT 3
- 74: N NONE
- 75: t TUPLE (MARK at 30)
- 76: q BINPUT 4
- 78: R REDUCE
- 79: q BINPUT 5
- 81: } EMPTY_DICT
- 82: q BINPUT 6
- 84: X BINUNICODE 'value'
- 94: q BINPUT 7
- 96: K BININT1 42
- 98: s SETITEM
- 99: b BUILD
- 100: h BINGET 5
- 102: e APPENDS (MARK at 3)
- 103: . STOP
+ 4: c GLOBAL 'copy_reg _reconstructor'
+ 29: q BINPUT 1
+ 31: ( MARK
+ 32: c GLOBAL 'pickletools _Example'
+ 54: q BINPUT 2
+ 56: c GLOBAL '__builtin__ object'
+ 76: q BINPUT 3
+ 78: N NONE
+ 79: t TUPLE (MARK at 31)
+ 80: q BINPUT 4
+ 82: R REDUCE
+ 83: q BINPUT 5
+ 85: } EMPTY_DICT
+ 86: q BINPUT 6
+ 88: X BINUNICODE 'value'
+ 98: q BINPUT 7
+ 100: K BININT1 42
+ 102: s SETITEM
+ 103: b BUILD
+ 104: h BINGET 5
+ 106: e APPENDS (MARK at 3)
+ 107: . STOP
highest protocol among opcodes = 1
Try "the canonical" recursive-object test.
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 58ce3b5..3ed26b8 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -3,6 +3,7 @@ import unittest
import pickle
import pickletools
import copyreg
+from http.cookies import SimpleCookie
from test.support import TestFailed, TESTFN, run_with_locale
@@ -342,6 +343,24 @@ DATA2_DIS = """\
highest protocol among opcodes = 2
"""
+# set([1,2]) pickled from 2.x with protocol 2
+DATA3 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01(K\x01K\x02e\x85q\x02Rq\x03.'
+
+# xrange(5) pickled from 2.x with protocol 2
+DATA4 = b'\x80\x02c__builtin__\nxrange\nq\x00K\x00K\x05K\x01\x87q\x01Rq\x02.'
+
+# a SimpleCookie() object pickled from 2.x with protocol 2
+DATA5 = (b'\x80\x02cCookie\nSimpleCookie\nq\x00)\x81q\x01U\x03key'
+ b'q\x02cCookie\nMorsel\nq\x03)\x81q\x04(U\x07commentq\x05U'
+ b'\x00q\x06U\x06domainq\x07h\x06U\x06secureq\x08h\x06U\x07'
+ b'expiresq\th\x06U\x07max-ageq\nh\x06U\x07versionq\x0bh\x06U'
+ b'\x04pathq\x0ch\x06U\x08httponlyq\rh\x06u}q\x0e(U\x0b'
+ b'coded_valueq\x0fU\x05valueq\x10h\x10h\x10h\x02h\x02ubs}q\x11b.')
+
+# set([3]) pickled from 2.x with protocol 2
+DATA6 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01K\x03a\x85q\x02Rq\x03.'
+
+
def create_data():
c = C()
c.foo = 1
@@ -956,6 +975,29 @@ class AbstractPickleTests(unittest.TestCase):
for x_key, y_key in zip(x_keys, y_keys):
self.assertIs(x_key, y_key)
+ def test_unpickle_from_2x(self):
+ # Unpickle non-trivial data from Python 2.x.
+ loaded = self.loads(DATA3)
+ self.assertEqual(loaded, set([1, 2]))
+ loaded = self.loads(DATA4)
+ self.assertEqual(type(loaded), type(range(0)))
+ self.assertEqual(list(loaded), list(range(5)))
+ loaded = self.loads(DATA5)
+ self.assertEqual(type(loaded), SimpleCookie)
+ self.assertEqual(list(loaded.keys()), ["key"])
+ self.assertEqual(loaded["key"].value, "Set-Cookie: key=value")
+
+ def test_pickle_to_2x(self):
+ # Pickle non-trivial data with protocol 2, expecting that it yields
+ # the same result as Python 2.x did.
+ # NOTE: this test is a bit too strong since we can produce different
+ # bytecode that 2.x will still understand.
+ dumped = self.dumps(range(5), 2)
+ self.assertEqual(dumped, DATA4)
+ dumped = self.dumps(set([3]), 2)
+ self.assertEqual(dumped, DATA6)
+
+
# Test classes for reduce_ex
class REX_one(object):
diff --git a/Lib/test/test_pickletools.py b/Lib/test/test_pickletools.py
index 3e701b0..823b0c2 100644
--- a/Lib/test/test_pickletools.py
+++ b/Lib/test/test_pickletools.py
@@ -12,6 +12,9 @@ class OptimizedPickleTests(AbstractPickleTests, AbstractPickleModuleTests):
def loads(self, buf):
return pickle.loads(buf)
+ # Test relies on precise output of dumps()
+ test_pickle_to_2x = None
+
def test_main():
support.run_unittest(OptimizedPickleTests)