diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2024-08-05 13:21:32 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-08-05 13:21:32 (GMT) |
commit | 1bb955a2fe0237721c141fdfe520fd3ba46db11e (patch) | |
tree | 56bfc52f28cbdfc12d252e39b969e9ae4691cefb | |
parent | 1422500d020bd199b26357fc387f8b79b82226cd (diff) | |
download | cpython-1bb955a2fe0237721c141fdfe520fd3ba46db11e.zip cpython-1bb955a2fe0237721c141fdfe520fd3ba46db11e.tar.gz cpython-1bb955a2fe0237721c141fdfe520fd3ba46db11e.tar.bz2 |
gh-122459: Optimize pickling by name objects without __module__ (GH-122460)
-rw-r--r-- | Lib/pickle.py | 97 | ||||
-rw-r--r-- | Lib/test/pickletester.py | 2 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2024-07-30-15-57-07.gh-issue-122459.AYIoeN.rst | 2 | ||||
-rw-r--r-- | Modules/_pickle.c | 249 |
4 files changed, 173 insertions, 177 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py index 299c9e0..b8e114a 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -313,38 +313,45 @@ class _Unframer: # Tools used for pickling. -def _getattribute(obj, name): - top = obj - for subpath in name.split('.'): - if subpath == '<locals>': - raise AttributeError("Can't get local attribute {!r} on {!r}" - .format(name, top)) - try: - parent = obj - obj = getattr(obj, subpath) - except AttributeError: - raise AttributeError("Can't get attribute {!r} on {!r}" - .format(name, top)) from None - return obj, parent +def _getattribute(obj, dotted_path): + for subpath in dotted_path: + obj = getattr(obj, subpath) + return obj def whichmodule(obj, name): """Find the module an object belong to.""" + dotted_path = name.split('.') module_name = getattr(obj, '__module__', None) - if module_name is not None: - return module_name - # Protect the iteration by using a list copy of sys.modules against dynamic - # modules that trigger imports of other modules upon calls to getattr. - for module_name, module in sys.modules.copy().items(): - if (module_name == '__main__' - or module_name == '__mp_main__' # bpo-42406 - or module is None): - continue - try: - if _getattribute(module, name)[0] is obj: - return module_name - except AttributeError: - pass - return '__main__' + if module_name is None and '<locals>' not in dotted_path: + # Protect the iteration by using a list copy of sys.modules against dynamic + # modules that trigger imports of other modules upon calls to getattr. + for module_name, module in sys.modules.copy().items(): + if (module_name == '__main__' + or module_name == '__mp_main__' # bpo-42406 + or module is None): + continue + try: + if _getattribute(module, dotted_path) is obj: + return module_name + except AttributeError: + pass + module_name = '__main__' + elif module_name is None: + module_name = '__main__' + + try: + __import__(module_name, level=0) + module = sys.modules[module_name] + if _getattribute(module, dotted_path) is obj: + return module_name + except (ImportError, KeyError, AttributeError): + raise PicklingError( + "Can't pickle %r: it's not found as %s.%s" % + (obj, module_name, name)) from None + + raise PicklingError( + "Can't pickle %r: it's not the same object as %s.%s" % + (obj, module_name, name)) def encode_long(x): r"""Encode a long to a two's complement little-endian binary string. @@ -1074,24 +1081,10 @@ class _Pickler: if name is None: name = getattr(obj, '__qualname__', None) - if name is None: - name = obj.__name__ + if name is None: + name = obj.__name__ module_name = whichmodule(obj, name) - try: - __import__(module_name, level=0) - module = sys.modules[module_name] - obj2, parent = _getattribute(module, name) - except (ImportError, KeyError, AttributeError): - raise PicklingError( - "Can't pickle %r: it's not found as %s.%s" % - (obj, module_name, name)) from None - else: - if obj2 is not obj: - raise PicklingError( - "Can't pickle %r: it's not the same object as %s.%s" % - (obj, module_name, name)) - if self.proto >= 2: code = _extension_registry.get((module_name, name)) if code: @@ -1103,10 +1096,7 @@ class _Pickler: else: write(EXT4 + pack("<i", code)) return - lastname = name.rpartition('.')[2] - if parent is module: - name = lastname - # Non-ASCII identifiers are supported only with protocols >= 3. + if self.proto >= 4: self.save(module_name) self.save(name) @@ -1616,7 +1606,16 @@ class _Unpickler: module = _compat_pickle.IMPORT_MAPPING[module] __import__(module, level=0) if self.proto >= 4: - return _getattribute(sys.modules[module], name)[0] + module = sys.modules[module] + dotted_path = name.split('.') + if '<locals>' in dotted_path: + raise AttributeError( + f"Can't get local attribute {name!r} on {module!r}") + try: + return _getattribute(module, dotted_path) + except AttributeError: + raise AttributeError( + f"Can't get attribute {name!r} on {module!r}") from None else: return getattr(sys.modules[module], name) diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 3c936b3..db42f13 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -2068,7 +2068,7 @@ class AbstractPicklingErrorTests: self.dumps(f, proto) self.assertIn(str(cm.exception), { f"Can't pickle {f!r}: it's not found as {__name__}.{f.__qualname__}", - f"Can't get local object {f.__qualname__!r}"}) + f"Can't get local attribute {f.__qualname__!r} on {sys.modules[__name__]}"}) # Same without a __module__ attribute (exercises a different path # in _pickle.c). del f.__module__ diff --git a/Misc/NEWS.d/next/Library/2024-07-30-15-57-07.gh-issue-122459.AYIoeN.rst b/Misc/NEWS.d/next/Library/2024-07-30-15-57-07.gh-issue-122459.AYIoeN.rst new file mode 100644 index 0000000..5955040 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-30-15-57-07.gh-issue-122459.AYIoeN.rst @@ -0,0 +1,2 @@ +Optimize :mod:`pickling <pickle>` by name objects without the ``__module__`` +attribute. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 50c73dc..5d9ee8c 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1803,13 +1803,15 @@ memo_put(PickleState *st, PicklerObject *self, PyObject *obj) } static PyObject * -get_dotted_path(PyObject *obj, PyObject *name) +get_dotted_path(PyObject *name) +{ + return PyUnicode_Split(name, _Py_LATIN1_CHR('.'), -1); +} + +static int +check_dotted_path(PyObject *obj, PyObject *name, PyObject *dotted_path) { - PyObject *dotted_path; Py_ssize_t i, n; - dotted_path = PyUnicode_Split(name, _Py_LATIN1_CHR('.'), -1); - if (dotted_path == NULL) - return NULL; n = PyList_GET_SIZE(dotted_path); assert(n >= 1); for (i = 0; i < n; i++) { @@ -1821,61 +1823,33 @@ get_dotted_path(PyObject *obj, PyObject *name) else PyErr_Format(PyExc_AttributeError, "Can't get local attribute %R on %R", name, obj); - Py_DECREF(dotted_path); - return NULL; + return -1; } } - return dotted_path; + return 0; } static PyObject * -get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent) +getattribute(PyObject *obj, PyObject *names) { Py_ssize_t i, n; - PyObject *parent = NULL; assert(PyList_CheckExact(names)); Py_INCREF(obj); n = PyList_GET_SIZE(names); for (i = 0; i < n; i++) { PyObject *name = PyList_GET_ITEM(names, i); - Py_XSETREF(parent, obj); + PyObject *parent = obj; (void)PyObject_GetOptionalAttr(parent, name, &obj); + Py_DECREF(parent); if (obj == NULL) { - Py_DECREF(parent); return NULL; } } - if (pparent != NULL) - *pparent = parent; - else - Py_XDECREF(parent); return obj; } -static PyObject * -getattribute(PyObject *obj, PyObject *name, int allow_qualname) -{ - PyObject *dotted_path, *attr; - - if (allow_qualname) { - dotted_path = get_dotted_path(obj, name); - if (dotted_path == NULL) - return NULL; - attr = get_deep_attribute(obj, dotted_path, NULL); - Py_DECREF(dotted_path); - } - else { - (void)PyObject_GetOptionalAttr(obj, name, &attr); - } - if (attr == NULL && !PyErr_Occurred()) { - PyErr_Format(PyExc_AttributeError, - "Can't get attribute %R on %R", name, obj); - } - return attr; -} - static int _checkmodule(PyObject *module_name, PyObject *module, PyObject *global, PyObject *dotted_path) @@ -1888,7 +1862,7 @@ _checkmodule(PyObject *module_name, PyObject *module, return -1; } - PyObject *candidate = get_deep_attribute(module, dotted_path, NULL); + PyObject *candidate = getattribute(module, dotted_path); if (candidate == NULL) { return -1; } @@ -1901,7 +1875,7 @@ _checkmodule(PyObject *module_name, PyObject *module, } static PyObject * -whichmodule(PyObject *global, PyObject *dotted_path) +whichmodule(PickleState *st, PyObject *global, PyObject *global_name, PyObject *dotted_path) { PyObject *module_name; PyObject *module = NULL; @@ -1911,63 +1885,106 @@ whichmodule(PyObject *global, PyObject *dotted_path) if (PyObject_GetOptionalAttr(global, &_Py_ID(__module__), &module_name) < 0) { return NULL; } - if (module_name) { + if (module_name == NULL || module_name == Py_None) { /* In some rare cases (e.g., bound methods of extension types), __module__ can be None. If it is so, then search sys.modules for the module of global. */ - if (module_name != Py_None) - return module_name; Py_CLEAR(module_name); - } - assert(module_name == NULL); - - /* Fallback on walking sys.modules */ - PyThreadState *tstate = _PyThreadState_GET(); - modules = _PySys_GetAttr(tstate, &_Py_ID(modules)); - if (modules == NULL) { - PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules"); - return NULL; - } - if (PyDict_CheckExact(modules)) { - i = 0; - while (PyDict_Next(modules, &i, &module_name, &module)) { - if (_checkmodule(module_name, module, global, dotted_path) == 0) { - return Py_NewRef(module_name); - } - if (PyErr_Occurred()) { - return NULL; - } + if (check_dotted_path(NULL, global_name, dotted_path) < 0) { + return NULL; } - } - else { - PyObject *iterator = PyObject_GetIter(modules); - if (iterator == NULL) { + PyThreadState *tstate = _PyThreadState_GET(); + modules = _PySys_GetAttr(tstate, &_Py_ID(modules)); + if (modules == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules"); return NULL; } - while ((module_name = PyIter_Next(iterator))) { - module = PyObject_GetItem(modules, module_name); - if (module == NULL) { + if (PyDict_CheckExact(modules)) { + i = 0; + while (PyDict_Next(modules, &i, &module_name, &module)) { + Py_INCREF(module_name); + Py_INCREF(module); + if (_checkmodule(module_name, module, global, dotted_path) == 0) { + Py_DECREF(module); + return module_name; + } + Py_DECREF(module); Py_DECREF(module_name); - Py_DECREF(iterator); + if (PyErr_Occurred()) { + return NULL; + } + } + } + else { + PyObject *iterator = PyObject_GetIter(modules); + if (iterator == NULL) { return NULL; } - if (_checkmodule(module_name, module, global, dotted_path) == 0) { + while ((module_name = PyIter_Next(iterator))) { + module = PyObject_GetItem(modules, module_name); + if (module == NULL) { + Py_DECREF(module_name); + Py_DECREF(iterator); + return NULL; + } + if (_checkmodule(module_name, module, global, dotted_path) == 0) { + Py_DECREF(module); + Py_DECREF(iterator); + return module_name; + } Py_DECREF(module); - Py_DECREF(iterator); - return module_name; - } - Py_DECREF(module); - Py_DECREF(module_name); - if (PyErr_Occurred()) { - Py_DECREF(iterator); - return NULL; + Py_DECREF(module_name); + if (PyErr_Occurred()) { + Py_DECREF(iterator); + return NULL; + } } + Py_DECREF(iterator); + } + if (PyErr_Occurred()) { + return NULL; } - Py_DECREF(iterator); + + /* If no module is found, use __main__. */ + module_name = Py_NewRef(&_Py_ID(__main__)); } - /* If no module is found, use __main__. */ - return &_Py_ID(__main__); + /* XXX: Change to use the import C API directly with level=0 to disallow + relative imports. + + XXX: PyImport_ImportModuleLevel could be used. However, this bypasses + builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore + custom import functions (IMHO, this would be a nice security + feature). The import C API would need to be extended to support the + extra parameters of __import__ to fix that. */ + module = PyImport_Import(module_name); + if (module == NULL) { + PyErr_Format(st->PicklingError, + "Can't pickle %R: import of module %R failed", + global, module_name); + return NULL; + } + if (check_dotted_path(module, global_name, dotted_path) < 0) { + Py_DECREF(module); + return NULL; + } + PyObject *actual = getattribute(module, dotted_path); + Py_DECREF(module); + if (actual == NULL) { + PyErr_Format(st->PicklingError, + "Can't pickle %R: attribute lookup %S on %S failed", + global, global_name, module_name); + return NULL; + } + if (actual != global) { + Py_DECREF(actual); + PyErr_Format(st->PicklingError, + "Can't pickle %R: it's not the same object as %S.%S", + global, module_name, global_name); + return NULL; + } + Py_DECREF(actual); + return module_name; } /* fast_save_enter() and fast_save_leave() are guards against recursive @@ -3590,10 +3607,7 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, { PyObject *global_name = NULL; PyObject *module_name = NULL; - PyObject *module = NULL; - PyObject *parent = NULL; PyObject *dotted_path = NULL; - PyObject *cls; int status = 0; const char global_op = GLOBAL; @@ -3611,44 +3625,13 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, } } - dotted_path = get_dotted_path(module, global_name); + dotted_path = get_dotted_path(global_name); if (dotted_path == NULL) goto error; - module_name = whichmodule(obj, dotted_path); + module_name = whichmodule(st, obj, global_name, dotted_path); if (module_name == NULL) goto error; - /* XXX: Change to use the import C API directly with level=0 to disallow - relative imports. - - XXX: PyImport_ImportModuleLevel could be used. However, this bypasses - builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore - custom import functions (IMHO, this would be a nice security - feature). The import C API would need to be extended to support the - extra parameters of __import__ to fix that. */ - module = PyImport_Import(module_name); - if (module == NULL) { - PyErr_Format(st->PicklingError, - "Can't pickle %R: import of module %R failed", - obj, module_name); - goto error; - } - cls = get_deep_attribute(module, dotted_path, &parent); - if (cls == NULL) { - PyErr_Format(st->PicklingError, - "Can't pickle %R: attribute lookup %S on %S failed", - obj, global_name, module_name); - goto error; - } - if (cls != obj) { - Py_DECREF(cls); - PyErr_Format(st->PicklingError, - "Can't pickle %R: it's not the same object as %S.%S", - obj, module_name, global_name); - goto error; - } - Py_DECREF(cls); - if (self->proto >= 2) { /* See whether this is in the extension registry, and if * so generate an EXT opcode. @@ -3720,12 +3703,6 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, } else { gen_global: - if (parent == module) { - Py_SETREF(global_name, - Py_NewRef(PyList_GET_ITEM(dotted_path, - PyList_GET_SIZE(dotted_path) - 1))); - Py_CLEAR(dotted_path); - } if (self->proto >= 4) { const char stack_global_op = STACK_GLOBAL; @@ -3845,8 +3822,6 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, } Py_XDECREF(module_name); Py_XDECREF(global_name); - Py_XDECREF(module); - Py_XDECREF(parent); Py_XDECREF(dotted_path); return status; @@ -7063,7 +7038,27 @@ _pickle_Unpickler_find_class_impl(UnpicklerObject *self, PyTypeObject *cls, if (module == NULL) { return NULL; } - global = getattribute(module, global_name, self->proto >= 4); + if (self->proto >= 4) { + PyObject *dotted_path = get_dotted_path(global_name); + if (dotted_path == NULL) { + Py_DECREF(module); + return NULL; + } + if (check_dotted_path(module, global_name, dotted_path) < 0) { + Py_DECREF(dotted_path); + Py_DECREF(module); + return NULL; + } + global = getattribute(module, dotted_path); + Py_DECREF(dotted_path); + if (global == NULL && !PyErr_Occurred()) { + PyErr_Format(PyExc_AttributeError, + "Can't get attribute %R on %R", global_name, module); + } + } + else { + global = PyObject_GetAttr(module, global_name); + } Py_DECREF(module); return global; } |