summaryrefslogtreecommitdiffstats
path: root/Objects/dictobject.c
diff options
context:
space:
mode:
authorYury Selivanov <yury@magic.io>2018-01-22 16:54:41 (GMT)
committerGitHub <noreply@github.com>2018-01-22 16:54:41 (GMT)
commitb0a7a037b8fde56b62f886d5188bced7776777b4 (patch)
treea9722f72b836b19446e9e716fbf7b45702664e20 /Objects/dictobject.c
parenta4b1bb4801f7a941ff9e86b96da539be1c288833 (diff)
downloadcpython-b0a7a037b8fde56b62f886d5188bced7776777b4.zip
cpython-b0a7a037b8fde56b62f886d5188bced7776777b4.tar.gz
cpython-b0a7a037b8fde56b62f886d5188bced7776777b4.tar.bz2
bpo-31179: Make dict.copy() up to 5.5 times faster. (#3067)
Diffstat (limited to 'Objects/dictobject.c')
-rw-r--r--Objects/dictobject.c73
1 files changed, 73 insertions, 0 deletions
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index b20b85c..259fa25 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -615,6 +615,52 @@ new_dict_with_shared_keys(PyDictKeysObject *keys)
return new_dict(keys, values);
}
+
+static PyObject *
+clone_combined_dict(PyDictObject *orig)
+{
+ assert(PyDict_CheckExact(orig));
+ assert(orig->ma_values == NULL);
+ assert(orig->ma_keys->dk_refcnt == 1);
+
+ Py_ssize_t keys_size = _PyDict_KeysSize(orig->ma_keys);
+ PyDictKeysObject *keys = PyObject_Malloc(keys_size);
+ if (keys == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ memcpy(keys, orig->ma_keys, keys_size);
+
+ /* After copying key/value pairs, we need to incref all
+ keys and values and they are about to be co-owned by a
+ new dict object. */
+ PyDictKeyEntry *ep0 = DK_ENTRIES(keys);
+ Py_ssize_t n = keys->dk_nentries;
+ for (Py_ssize_t i = 0; i < n; i++) {
+ PyDictKeyEntry *entry = &ep0[i];
+ PyObject *value = entry->me_value;
+ if (value != NULL) {
+ Py_INCREF(value);
+ Py_INCREF(entry->me_key);
+ }
+ }
+
+ PyDictObject *new = (PyDictObject *)new_dict(keys, NULL);
+ if (new == NULL) {
+ /* In case of an error, `new_dict()` takes care of
+ cleaning up `keys`. */
+ return NULL;
+ }
+ new->ma_used = orig->ma_used;
+ assert(_PyDict_CheckConsistency(new));
+ if (_PyObject_GC_IS_TRACKED(orig)) {
+ /* Maintain tracking. */
+ _PyObject_GC_TRACK(new);
+ }
+ return (PyObject *)new;
+}
+
PyObject *
PyDict_New(void)
{
@@ -2484,7 +2530,13 @@ PyDict_Copy(PyObject *o)
PyErr_BadInternalCall();
return NULL;
}
+
mp = (PyDictObject *)o;
+ if (mp->ma_used == 0) {
+ /* The dict is empty; just return a new dict. */
+ return PyDict_New();
+ }
+
if (_PyDict_HasSplitTable(mp)) {
PyDictObject *split_copy;
Py_ssize_t size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
@@ -2510,6 +2562,27 @@ PyDict_Copy(PyObject *o)
_PyObject_GC_TRACK(split_copy);
return (PyObject *)split_copy;
}
+
+ if (PyDict_CheckExact(mp) && mp->ma_values == NULL &&
+ (mp->ma_used >= (mp->ma_keys->dk_nentries * 2) / 3))
+ {
+ /* Use fast-copy if:
+
+ (1) 'mp' is an instance of a subclassed dict; and
+
+ (2) 'mp' is not a split-dict; and
+
+ (3) if 'mp' is non-compact ('del' operation does not resize dicts),
+ do fast-copy only if it has at most 1/3 non-used keys.
+
+ The last condition (3) is important to guard against a pathalogical
+ case when a large dict is almost emptied with multiple del/pop
+ operations and copied after that. In cases like this, we defer to
+ PyDict_Merge, which produces a compacted copy.
+ */
+ return clone_combined_dict(mp);
+ }
+
copy = PyDict_New();
if (copy == NULL)
return NULL;