diff options
author | Tim Peters <tim.peters@gmail.com> | 2001-09-14 00:25:33 (GMT) |
---|---|---|
committer | Tim Peters <tim.peters@gmail.com> | 2001-09-14 00:25:33 (GMT) |
commit | 0ab085c4cba79c1743288a300425b3c9050250ba (patch) | |
tree | aa0436b634cc45a83f710bdf94289deefd540d9b | |
parent | 742dfd6f178c3880248c32d64322e2cff8cea23f (diff) | |
download | cpython-0ab085c4cba79c1743288a300425b3c9050250ba.zip cpython-0ab085c4cba79c1743288a300425b3c9050250ba.tar.gz cpython-0ab085c4cba79c1743288a300425b3c9050250ba.tar.bz2 |
Changed the dict implementation to take "string shortcuts" only when
keys are true strings -- no subclasses need apply. This may be debatable.
The problem is that a str subclass may very well want to override __eq__
and/or __hash__ (see the new example of case-insensitive strings in
test_descr), but go-fast shortcuts for strings are ubiquitous in our dicts
(and subclass overrides aren't even looked for then). Another go-fast
reason for the change is that PyCheck_StringExact() is a quicker test
than PyCheck_String(), and we make such a test on virtually every access
to every dict.
OTOH, a str subclass may also be perfectly happy using the base str eq
and hash, and this change slows them a lot. But those cases are still
hypothetical, while Python's own reliance on true-string dicts is not.
-rw-r--r-- | Lib/test/test_descr.py | 38 | ||||
-rw-r--r-- | Objects/dictobject.c | 29 |
2 files changed, 52 insertions, 15 deletions
diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index f1af5b9..06631dc 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -1533,8 +1533,8 @@ def inherits(): verify(str(s) == base) verify(str(s).__class__ is str) verify(hash(s) == hash(base)) - verify({s: 1}[base] == 1) - verify({base: 1}[s] == 1) + #XXX verify({s: 1}[base] == 1) + #XXX verify({base: 1}[s] == 1) verify((s + "").__class__ is str) verify(s + "" == base) verify(("" + s).__class__ is str) @@ -1758,6 +1758,39 @@ f = t(%r, 'w') # rexec can't catch this by itself except: pass +def str_subclass_as_dict_key(): + if verbose: + print "Testing a str subclass used as dict key .." + + class cistr(str): + """Sublcass of str that computes __eq__ case-insensitively. + + Also computes a hash code of the string in canonical form. + """ + + def __init__(self, value): + self.canonical = value.lower() + self.hashcode = hash(self.canonical) + + def __eq__(self, other): + if not isinstance(other, cistr): + other = cistr(other) + return self.canonical == other.canonical + + def __hash__(self): + return self.hashcode + + verify('aBc' == cistr('ABC') == 'abc') + verify(str(cistr('ABC')) == 'ABC') + + d = {cistr('one'): 1, cistr('two'): 2, cistr('tHree'): 3} + verify(d[cistr('one')] == 1) + verify(d[cistr('tWo')] == 2) + verify(d[cistr('THrEE')] == 3) + verify(cistr('ONe') in d) + verify(d.get(cistr('thrEE')) == 3) + + def all(): lists() dicts() @@ -1794,6 +1827,7 @@ def all(): inherits() keywords() restricted() + str_subclass_as_dict_key() all() diff --git a/Objects/dictobject.c b/Objects/dictobject.c index b98cccf..f68a964 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -298,8 +298,8 @@ Done: * means we don't need to go through PyObject_Compare(); we can always use * _PyString_Eq directly. * - * This really only becomes meaningful if proper error handling in lookdict() - * is too expensive. + * This is valuable because the general-case error handling in lookdict() is + * expensive, and dicts with pure-string keys are very common. */ static dictentry * lookdict_string(dictobject *mp, PyObject *key, register long hash) @@ -311,8 +311,11 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash) dictentry *ep0 = mp->ma_table; register dictentry *ep; - /* make sure this function doesn't have to handle non-string keys */ - if (!PyString_Check(key)) { + /* Make sure this function doesn't have to handle non-string keys, + including subclasses of str; e.g., one reason to subclass + strings is to override __eq__, and for speed we don't cater to + that here. */ + if (!PyString_CheckExact(key)) { #ifdef SHOW_CONVERSION_COUNTS ++converted; #endif @@ -478,7 +481,7 @@ PyDict_GetItem(PyObject *op, PyObject *key) return NULL; } #ifdef CACHE_HASH - if (!PyString_Check(key) || + if (!PyString_CheckExact(key) || (hash = ((PyStringObject *) key)->ob_shash) == -1) #endif { @@ -510,7 +513,7 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value) } mp = (dictobject *)op; #ifdef CACHE_HASH - if (PyString_Check(key)) { + if (PyString_CheckExact(key)) { #ifdef INTERN_STRINGS if (((PyStringObject *)key)->ob_sinterned != NULL) { key = ((PyStringObject *)key)->ob_sinterned; @@ -562,7 +565,7 @@ PyDict_DelItem(PyObject *op, PyObject *key) return -1; } #ifdef CACHE_HASH - if (!PyString_Check(key) || + if (!PyString_CheckExact(key) || (hash = ((PyStringObject *) key)->ob_shash) == -1) #endif { @@ -820,7 +823,7 @@ dict_repr(dictobject *mp) if (s == NULL) goto Done; result = _PyString_Join(s, pieces); - Py_DECREF(s); + Py_DECREF(s); Done: Py_XDECREF(pieces); @@ -842,7 +845,7 @@ dict_subscript(dictobject *mp, register PyObject *key) long hash; assert(mp->ma_table != NULL); #ifdef CACHE_HASH - if (!PyString_Check(key) || + if (!PyString_CheckExact(key) || (hash = ((PyStringObject *) key)->ob_shash) == -1) #endif { @@ -1358,7 +1361,7 @@ dict_has_key(register dictobject *mp, PyObject *key) long hash; register long ok; #ifdef CACHE_HASH - if (!PyString_Check(key) || + if (!PyString_CheckExact(key) || (hash = ((PyStringObject *) key)->ob_shash) == -1) #endif { @@ -1382,7 +1385,7 @@ dict_get(register dictobject *mp, PyObject *args) return NULL; #ifdef CACHE_HASH - if (!PyString_Check(key) || + if (!PyString_CheckExact(key) || (hash = ((PyStringObject *) key)->ob_shash) == -1) #endif { @@ -1411,7 +1414,7 @@ dict_setdefault(register dictobject *mp, PyObject *args) return NULL; #ifdef CACHE_HASH - if (!PyString_Check(key) || + if (!PyString_CheckExact(key) || (hash = ((PyStringObject *) key)->ob_shash) == -1) #endif { @@ -1647,7 +1650,7 @@ dict_contains(dictobject *mp, PyObject *key) long hash; #ifdef CACHE_HASH - if (!PyString_Check(key) || + if (!PyString_CheckExact(key) || (hash = ((PyStringObject *) key)->ob_shash) == -1) #endif { |