diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2009-06-04 20:32:06 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2009-06-04 20:32:06 (GMT) |
commit | d9dfaa948775061ef6218b181dfb617206db9e8c (patch) | |
tree | 1dd832a72a230d64f5e3a6fb6b153ec18c0a073d /Modules | |
parent | 751899a59f27e84547c454cf10dec71a8cdf8171 (diff) | |
download | cpython-d9dfaa948775061ef6218b181dfb617206db9e8c.zip cpython-d9dfaa948775061ef6218b181dfb617206db9e8c.tar.gz cpython-d9dfaa948775061ef6218b181dfb617206db9e8c.tar.bz2 |
Issue #6137: The pickle module now translates module names when loading
or dumping pickles with a 2.x-compatible protocol, in order to make data
sharing and migration easier. This behaviour can be disabled using the
new `fix_imports` optional argument.
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/_pickle.c | 281 |
1 files changed, 240 insertions, 41 deletions
diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 3ad55b5..0e6df34 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -103,25 +103,33 @@ enum { /* Exception classes for pickle. These should override the ones defined in pickle.py, when the C-optimized Pickler and Unpickler are used. */ -static PyObject *PickleError; -static PyObject *PicklingError; -static PyObject *UnpicklingError; +static PyObject *PickleError = NULL; +static PyObject *PicklingError = NULL; +static PyObject *UnpicklingError = NULL; /* copyreg.dispatch_table, {type_object: pickling_function} */ -static PyObject *dispatch_table; +static PyObject *dispatch_table = NULL; /* For EXT[124] opcodes. */ /* copyreg._extension_registry, {(module_name, function_name): code} */ -static PyObject *extension_registry; +static PyObject *extension_registry = NULL; /* copyreg._inverted_registry, {code: (module_name, function_name)} */ -static PyObject *inverted_registry; +static PyObject *inverted_registry = NULL; /* copyreg._extension_cache, {code: object} */ -static PyObject *extension_cache; +static PyObject *extension_cache = NULL; + +/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */ +static PyObject *name_mapping_2to3 = NULL; +/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */ +static PyObject *import_mapping_2to3 = NULL; +/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */ +static PyObject *name_mapping_3to2 = NULL; +static PyObject *import_mapping_3to2 = NULL; /* XXX: Are these really nescessary? */ /* As the name says, an empty tuple. */ -static PyObject *empty_tuple; +static PyObject *empty_tuple = NULL; /* For looking up name pairs in copyreg._extension_registry. */ -static PyObject *two_tuple; +static PyObject *two_tuple = NULL; static int stack_underflow(void) @@ -315,6 +323,8 @@ typedef struct PicklerObject { should not be used if with self-referential objects. */ int fast_nesting; + int fix_imports; /* Indicate whether Pickler should fix + the name of globals for Python 2.x. */ PyObject *fast_memo; } PicklerObject; @@ -340,6 +350,9 @@ typedef struct UnpicklerObject { objects. */ Py_ssize_t num_marks; /* Number of marks in the mark stack. */ Py_ssize_t marks_size; /* Current allocated size of the mark stack. */ + int proto; /* Protocol of the pickle loaded. */ + int fix_imports; /* Indicate whether Unpickler should fix + the name of globals pickled by Python 2.x. */ } UnpicklerObject; /* Forward declarations */ @@ -1972,6 +1985,63 @@ save_global(PicklerObject *self, PyObject *obj, PyObject *name) unicode_encoder = PyUnicode_AsASCIIString; } + /* For protocol < 3 and if the user didn't request against doing so, + we convert module names to the old 2.x module names. */ + if (self->fix_imports) { + PyObject *key; + PyObject *item; + + key = PyTuple_Pack(2, module_name, global_name); + if (key == NULL) + goto error; + item = PyDict_GetItemWithError(name_mapping_3to2, key); + Py_DECREF(key); + if (item) { + if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_NAME_MAPPING values " + "should be 2-tuples, not %.200s", + Py_TYPE(item)->tp_name); + goto error; + } + Py_CLEAR(module_name); + Py_CLEAR(global_name); + module_name = PyTuple_GET_ITEM(item, 0); + global_name = PyTuple_GET_ITEM(item, 1); + if (!PyUnicode_Check(module_name) || + !PyUnicode_Check(global_name)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_NAME_MAPPING values " + "should be pairs of str, not (%.200s, %.200s)", + Py_TYPE(module_name)->tp_name, + Py_TYPE(global_name)->tp_name); + goto error; + } + Py_INCREF(module_name); + Py_INCREF(global_name); + } + else if (PyErr_Occurred()) { + goto error; + } + + item = PyDict_GetItemWithError(import_mapping_3to2, module_name); + if (item) { + if (!PyUnicode_Check(item)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_IMPORT_MAPPING values " + "should be strings, not %.200s", + Py_TYPE(item)->tp_name); + goto error; + } + Py_CLEAR(module_name); + module_name = item; + Py_INCREF(module_name); + } + else if (PyErr_Occurred()) { + goto error; + } + } + /* Save the name of the module. */ encoded = unicode_encoder(module_name); if (encoded == NULL) { @@ -2608,18 +2678,23 @@ PyDoc_STRVAR(Pickler_doc, "The file argument must have a write() method that accepts a single\n" "bytes argument. It can thus be a file object opened for binary\n" "writing, a io.BytesIO instance, or any other custom object that\n" -"meets this interface.\n"); +"meets this interface.\n" +"\n" +"If fix_imports is True and protocol is less than 3, pickle will try to\n" +"map the new Python 3.x names to the old module names used in Python\n" +"2.x, so that the pickle data stream is readable with Python 2.x.\n"); static int Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"file", "protocol", 0}; + static char *kwlist[] = {"file", "protocol", "fix_imports", 0}; PyObject *file; PyObject *proto_obj = NULL; long proto = 0; + int fix_imports = 1; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler", - kwlist, &file, &proto_obj)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oi:Pickler", + kwlist, &file, &proto_obj, &fix_imports)) return -1; /* In case of multiple __init__() calls, clear previous content. */ @@ -2628,8 +2703,11 @@ Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds) if (proto_obj == NULL || proto_obj == Py_None) proto = DEFAULT_PROTOCOL; - else + else { proto = PyLong_AsLong(proto_obj); + if (proto == -1 && PyErr_Occurred()) + return -1; + } if (proto < 0) proto = HIGHEST_PROTOCOL; @@ -2639,12 +2717,13 @@ Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds) return -1; } - self->proto = proto; - self->bin = proto > 0; - self->arg = NULL; - self->fast = 0; - self->fast_nesting = 0; - self->fast_memo = NULL; + self->proto = proto; + self->bin = proto > 0; + self->arg = NULL; + self->fast = 0; + self->fast_nesting = 0; + self->fast_memo = NULL; + self->fix_imports = fix_imports && proto < 3; if (!PyObject_HasAttrString(file, "write")) { PyErr_SetString(PyExc_TypeError, @@ -4220,8 +4299,10 @@ load_proto(UnpicklerObject *self) return -1; i = (unsigned char)s[0]; - if (i <= HIGHEST_PROTOCOL) + if (i <= HIGHEST_PROTOCOL) { + self->proto = i; return 0; + } PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i); return -1; @@ -4383,12 +4464,67 @@ Unpickler_find_class(UnpicklerObject *self, PyObject *args) &module_name, &global_name)) return NULL; + /* Try to map the old names used in Python 2.x to the new ones used in + Python 3.x. We do this only with old pickle protocols and when the + user has not disabled the feature. */ + if (self->proto < 3 && self->fix_imports) { + PyObject *key; + PyObject *item; + + /* Check if the global (i.e., a function or a class) was renamed + or moved to another module. */ + key = PyTuple_Pack(2, module_name, global_name); + if (key == NULL) + return NULL; + item = PyDict_GetItemWithError(name_mapping_2to3, key); + Py_DECREF(key); + if (item) { + if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.NAME_MAPPING values should be " + "2-tuples, not %.200s", Py_TYPE(item)->tp_name); + return NULL; + } + module_name = PyTuple_GET_ITEM(item, 0); + global_name = PyTuple_GET_ITEM(item, 1); + if (!PyUnicode_Check(module_name) || + !PyUnicode_Check(global_name)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.NAME_MAPPING values should be " + "pairs of str, not (%.200s, %.200s)", + Py_TYPE(module_name)->tp_name, + Py_TYPE(global_name)->tp_name); + return NULL; + } + } + else if (PyErr_Occurred()) { + return NULL; + } + + /* Check if the module was renamed. */ + item = PyDict_GetItemWithError(import_mapping_2to3, module_name); + if (item) { + if (!PyUnicode_Check(item)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.IMPORT_MAPPING values should be " + "strings, not %.200s", Py_TYPE(item)->tp_name); + return NULL; + } + module_name = item; + } + else if (PyErr_Occurred()) { + return NULL; + } + } + modules_dict = PySys_GetObject("modules"); if (modules_dict == NULL) return NULL; - module = PyDict_GetItem(modules_dict, module_name); + module = PyDict_GetItemWithError(modules_dict, module_name); if (module == NULL) { + if (PyErr_Occurred()) + return NULL; module = PyImport_Import(module_name); if (module == NULL) return NULL; @@ -4477,15 +4613,20 @@ PyDoc_STRVAR(Unpickler_doc, "reading, a BytesIO object, or any other custom object that\n" "meets this interface.\n" "\n" -"Optional keyword arguments are encoding and errors, which are\n" -"used to decode 8-bit string instances pickled by Python 2.x.\n" -"These default to 'ASCII' and 'strict', respectively.\n"); +"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n" +"which are used to control compatiblity support for pickle stream\n" +"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n" +"map the old Python 2.x names to the new names used in Python 3.x. The\n" +"*encoding* and *errors* tell pickle how to decode 8-bit string\n" +"instances pickled by Python 2.x; these default to 'ASCII' and\n" +"'strict', respectively.\n"); static int Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"file", "encoding", "errors", 0}; + static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0}; PyObject *file; + int fix_imports = 1; char *encoding = NULL; char *errors = NULL; @@ -4504,8 +4645,8 @@ Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds) extra careful in the other Unpickler methods, since a subclass could forget to call Unpickler.__init__() thus breaking our internal invariants. */ - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist, - &file, &encoding, &errors)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|iss:Unpickler", kwlist, + &file, &fix_imports, &encoding, &errors)) return -1; /* In case of multiple __init__() calls, clear previous content. */ @@ -4549,6 +4690,8 @@ Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds) self->last_string = NULL; self->arg = NULL; + self->proto = 0; + self->fix_imports = fix_imports; return 0; } @@ -4672,40 +4815,85 @@ static PyTypeObject Unpickler_Type = { }; static int -init_stuff(void) +initmodule(void) { - PyObject *copyreg; + PyObject *copyreg = NULL; + PyObject *compat_pickle = NULL; + + /* XXX: We should ensure that the types of the dictionaries imported are + exactly PyDict objects. Otherwise, it is possible to crash the pickle + since we use the PyDict API directly to access these dictionaries. */ copyreg = PyImport_ImportModule("copyreg"); if (!copyreg) - return -1; - + goto error; dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table"); if (!dispatch_table) goto error; - extension_registry = \ PyObject_GetAttrString(copyreg, "_extension_registry"); if (!extension_registry) goto error; - inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry"); if (!inverted_registry) goto error; - extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache"); if (!extension_cache) goto error; + Py_CLEAR(copyreg); - Py_DECREF(copyreg); + /* Load the 2.x -> 3.x stdlib module mapping tables */ + compat_pickle = PyImport_ImportModule("_compat_pickle"); + if (!compat_pickle) + goto error; + name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING"); + if (!name_mapping_2to3) + goto error; + if (!PyDict_CheckExact(name_mapping_2to3)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.NAME_MAPPING should be a dict, not %.200s", + Py_TYPE(name_mapping_2to3)->tp_name); + goto error; + } + import_mapping_2to3 = PyObject_GetAttrString(compat_pickle, + "IMPORT_MAPPING"); + if (!import_mapping_2to3) + goto error; + if (!PyDict_CheckExact(import_mapping_2to3)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.IMPORT_MAPPING should be a dict, " + "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name); + goto error; + } + /* ... and the 3.x -> 2.x mapping tables */ + name_mapping_3to2 = PyObject_GetAttrString(compat_pickle, + "REVERSE_NAME_MAPPING"); + if (!name_mapping_3to2) + goto error; + if (!PyDict_CheckExact(name_mapping_3to2)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_NAME_MAPPING shouldbe a dict, " + "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name); + goto error; + } + import_mapping_3to2 = PyObject_GetAttrString(compat_pickle, + "REVERSE_IMPORT_MAPPING"); + if (!import_mapping_3to2) + goto error; + if (!PyDict_CheckExact(import_mapping_3to2)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, " + "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name); + goto error; + } + Py_CLEAR(compat_pickle); empty_tuple = PyTuple_New(0); if (empty_tuple == NULL) - return -1; - + goto error; two_tuple = PyTuple_New(2); if (two_tuple == NULL) - return -1; + goto error; /* We use this temp container with no regard to refcounts, or to * keeping containees alive. Exempt from GC, because we don't * want anything looking at two_tuple() by magic. @@ -4715,7 +4903,18 @@ init_stuff(void) return 0; error: - Py_DECREF(copyreg); + Py_CLEAR(copyreg); + Py_CLEAR(dispatch_table); + Py_CLEAR(extension_registry); + Py_CLEAR(inverted_registry); + Py_CLEAR(extension_cache); + Py_CLEAR(compat_pickle); + Py_CLEAR(name_mapping_2to3); + Py_CLEAR(import_mapping_2to3); + Py_CLEAR(name_mapping_3to2); + Py_CLEAR(import_mapping_3to2); + Py_CLEAR(empty_tuple); + Py_CLEAR(two_tuple); return -1; } @@ -4773,7 +4972,7 @@ PyInit__pickle(void) if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0) return NULL; - if (init_stuff() < 0) + if (initmodule() < 0) return NULL; return m; |