summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2006-05-30 04:16:25 (GMT)
committerTim Peters <tim.peters@gmail.com>2006-05-30 04:16:25 (GMT)
commit9b10f7e0cbacad9be6375f0e6dbf70cf18574e6b (patch)
tree000aabf5a76a04c9166578c1c8c22363c1ae8d12
parent1e44ca94acc5e93ab21aacded7046ee5171c40cd (diff)
downloadcpython-9b10f7e0cbacad9be6375f0e6dbf70cf18574e6b.zip
cpython-9b10f7e0cbacad9be6375f0e6dbf70cf18574e6b.tar.gz
cpython-9b10f7e0cbacad9be6375f0e6dbf70cf18574e6b.tar.bz2
Convert relevant dict internals to Py_ssize_t.
I don't have a box with nearly enough RAM, or an OS, that could get close to tickling this, though (requires a dict w/ at least 2**31 entries).
-rw-r--r--Include/dictobject.h14
-rw-r--r--Objects/dictobject.c98
2 files changed, 64 insertions, 48 deletions
diff --git a/Include/dictobject.h b/Include/dictobject.h
index c917782..fd3d1fc 100644
--- a/Include/dictobject.h
+++ b/Include/dictobject.h
@@ -8,7 +8,7 @@ extern "C" {
/* Dictionary object type -- mapping from hashable object to object */
/* The distribution includes a separate file, Objects/dictnotes.txt,
- describing explorations into dictionary design and optimization.
+ describing explorations into dictionary design and optimization.
It covers typical dictionary use patterns, the parameters for
tuning dictionaries, and several ideas for possible optimizations.
*/
@@ -48,7 +48,11 @@ meaning otherwise.
#define PyDict_MINSIZE 8
typedef struct {
- long me_hash; /* cached hash code of me_key */
+ /* Cached hash code of me_key. Note that hash codes are C longs.
+ * We have to use Py_ssize_t instead because dict_popitem() abuses
+ * me_hash to hold a search finger.
+ */
+ Py_ssize_t me_hash;
PyObject *me_key;
PyObject *me_value;
} PyDictEntry;
@@ -65,14 +69,14 @@ it's two-thirds full.
typedef struct _dictobject PyDictObject;
struct _dictobject {
PyObject_HEAD
- int ma_fill; /* # Active + # Dummy */
- int ma_used; /* # Active */
+ Py_ssize_t ma_fill; /* # Active + # Dummy */
+ Py_ssize_t ma_used; /* # Active */
/* The table contains ma_mask + 1 slots, and that's a power of 2.
* We store the mask instead of the size because the mask is more
* frequently needed.
*/
- int ma_mask;
+ Py_ssize_t ma_mask;
/* ma_table points to ma_smalltable for small tables, else to
* additional malloc'ed memory. ma_table is never NULL! This rule
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index f5799ee..dd369a9 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -110,6 +110,16 @@ above, and then shifting perturb can be done while the table index is being
masked); and the dictobject struct required a member to hold the table's
polynomial. In Tim's experiments the current scheme ran faster, produced
equally good collision statistics, needed less code & used less memory.
+
+Theoretical Python 2.5 headache: hash codes are only C "long", but
+sizeof(Py_ssize_t) > sizeof(long) may be possible. In that case, and if a
+dict is genuinely huge, then only the slots directly reachable via indexing
+by a C long can be the first slot in a probe sequence. The probe sequence
+will still eventually reach every slot in the table, but the collision rate
+on initial probes may be much higher than this scheme was designed for.
+Getting a hash code as fat as Py_ssize_t is the only real cure. But in
+practice, this probably won't make a lick of difference for many years (at
+which point everyone will have terabytes of RAM on 64-bit boxes).
*/
/* Object used as dummy key to fill deleted entries */
@@ -228,7 +238,7 @@ lookdict(dictobject *mp, PyObject *key, register long hash)
register Py_ssize_t i;
register size_t perturb;
register dictentry *freeslot;
- register unsigned int mask = mp->ma_mask;
+ register Py_ssize_t mask = mp->ma_mask;
dictentry *ep0 = mp->ma_table;
register dictentry *ep;
register int restore_error;
@@ -339,7 +349,7 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash)
register Py_ssize_t i;
register size_t perturb;
register dictentry *freeslot;
- register unsigned int mask = mp->ma_mask;
+ register Py_ssize_t mask = mp->ma_mask;
dictentry *ep0 = mp->ma_table;
register dictentry *ep;
@@ -413,7 +423,7 @@ insertdict(register dictobject *mp, PyObject *key, long hash, PyObject *value)
Py_DECREF(dummy);
}
ep->me_key = key;
- ep->me_hash = hash;
+ ep->me_hash = (Py_ssize_t)hash;
ep->me_value = value;
mp->ma_used++;
}
@@ -425,11 +435,11 @@ items again. When entries have been deleted, the new table may
actually be smaller than the old one.
*/
static int
-dictresize(dictobject *mp, int minused)
+dictresize(dictobject *mp, Py_ssize_t minused)
{
- int newsize;
+ Py_ssize_t newsize;
dictentry *oldtable, *newtable, *ep;
- int i;
+ Py_ssize_t i;
int is_oldtable_malloced;
dictentry small_copy[PyDict_MINSIZE];
@@ -537,7 +547,7 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
{
register dictobject *mp;
register long hash;
- register int n_used;
+ register Py_ssize_t n_used;
if (!PyDict_Check(op)) {
PyErr_BadInternalCall();
@@ -568,14 +578,14 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
* Quadrupling the size improves average dictionary sparseness
* (reducing collisions) at the cost of some memory and iteration
* speed (which loops over every possible entry). It also halves
- * the number of expensive resize operations in a growing dictionary.
+| * the number of expensive resize operations in a growing dictionary.
*
* Very large dictionaries (over 50K items) use doubling instead.
* This may help applications with severe memory constraints.
*/
if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
return 0;
- return dictresize(mp, (mp->ma_used>50000 ? mp->ma_used*2 : mp->ma_used*4));
+ return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
}
int
@@ -619,10 +629,10 @@ PyDict_Clear(PyObject *op)
dictobject *mp;
dictentry *ep, *table;
int table_is_malloced;
- int fill;
+ Py_ssize_t fill;
dictentry small_copy[PyDict_MINSIZE];
#ifdef Py_DEBUG
- int i, n;
+ Py_ssize_t i, n;
#endif
if (!PyDict_Check(op))
@@ -685,7 +695,7 @@ PyDict_Clear(PyObject *op)
/*
* Iterate over a dict. Use like so:
*
- * int i;
+ * Py_ssize_t i;
* PyObject *key, *value;
* i = 0; # important! i should not otherwise be changed by you
* while (PyDict_Next(yourdict, &i, &key, &value)) {
@@ -701,7 +711,7 @@ int
PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
{
register Py_ssize_t i;
- register int mask;
+ register Py_ssize_t mask;
register dictentry *ep;
if (!PyDict_Check(op))
@@ -729,7 +739,7 @@ static void
dict_dealloc(register dictobject *mp)
{
register dictentry *ep;
- int fill = mp->ma_fill;
+ Py_ssize_t fill = mp->ma_fill;
PyObject_GC_UnTrack(mp);
Py_TRASHCAN_SAFE_BEGIN(mp)
for (ep = mp->ma_table; fill > 0; ep++) {
@@ -751,10 +761,10 @@ dict_dealloc(register dictobject *mp)
static int
dict_print(register dictobject *mp, register FILE *fp, register int flags)
{
- register int i;
- register int any;
+ register Py_ssize_t i;
+ register Py_ssize_t any;
- i = Py_ReprEnter((PyObject*)mp);
+ i = (int)Py_ReprEnter((PyObject*)mp);
if (i != 0) {
if (i < 0)
return i;
@@ -896,7 +906,7 @@ dict_subscript(dictobject *mp, register PyObject *key)
PyObject *missing;
static PyObject *missing_str = NULL;
if (missing_str == NULL)
- missing_str =
+ missing_str =
PyString_InternFromString("__missing__");
missing = _PyType_Lookup(mp->ob_type, missing_str);
if (missing != NULL)
@@ -930,9 +940,9 @@ static PyObject *
dict_keys(register dictobject *mp)
{
register PyObject *v;
- register int i, j;
+ register Py_ssize_t i, j;
dictentry *ep;
- int mask, n;
+ Py_ssize_t mask, n;
again:
n = mp->ma_used;
@@ -964,9 +974,9 @@ static PyObject *
dict_values(register dictobject *mp)
{
register PyObject *v;
- register int i, j;
+ register Py_ssize_t i, j;
dictentry *ep;
- int mask, n;
+ Py_ssize_t mask, n;
again:
n = mp->ma_used;
@@ -998,8 +1008,8 @@ static PyObject *
dict_items(register dictobject *mp)
{
register PyObject *v;
- register int i, j, n;
- int mask;
+ register Py_ssize_t i, j, n;
+ Py_ssize_t mask;
PyObject *item, *key, *value;
dictentry *ep;
@@ -1132,7 +1142,7 @@ int
PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
{
PyObject *it; /* iter(seq2) */
- int i; /* index into seq2 of current element */
+ Py_ssize_t i; /* index into seq2 of current element */
PyObject *item; /* seq2[i] */
PyObject *fast; /* item as a 2-tuple or 2-list */
@@ -1195,7 +1205,7 @@ Fail:
i = -1;
Return:
Py_DECREF(it);
- return i;
+ return (int)i;
}
int
@@ -1208,7 +1218,7 @@ int
PyDict_Merge(PyObject *a, PyObject *b, int override)
{
register PyDictObject *mp, *other;
- register int i;
+ register Py_ssize_t i;
dictentry *entry;
/* We accept for the argument either a concrete dictionary object,
@@ -1247,7 +1257,8 @@ PyDict_Merge(PyObject *a, PyObject *b, int override)
PyDict_GetItem(a, entry->me_key) == NULL)) {
Py_INCREF(entry->me_key);
Py_INCREF(entry->me_value);
- insertdict(mp, entry->me_key, entry->me_hash,
+ insertdict(mp, entry->me_key,
+ (long)entry->me_hash,
entry->me_value);
}
}
@@ -1376,7 +1387,8 @@ characterize(dictobject *a, dictobject *b, PyObject **pval)
{
PyObject *akey = NULL; /* smallest key in a s.t. a[akey] != b[akey] */
PyObject *aval = NULL; /* a[akey] */
- int i, cmp;
+ Py_ssize_t i;
+ int cmp;
for (i = 0; i <= a->ma_mask; i++) {
PyObject *thiskey, *thisaval, *thisbval;
@@ -1399,7 +1411,7 @@ characterize(dictobject *a, dictobject *b, PyObject **pval)
* find its associated value anymore; or
* maybe it is but the compare deleted the
* a[thiskey] entry.
- */
+| */
Py_DECREF(thiskey);
continue;
}
@@ -1499,7 +1511,7 @@ Finished:
static int
dict_equal(dictobject *a, dictobject *b)
{
- int i;
+ Py_ssize_t i;
if (a->ma_used != b->ma_used)
/* can't be equal if # of entries differ */
@@ -1673,7 +1685,7 @@ dict_pop(dictobject *mp, PyObject *args)
static PyObject *
dict_popitem(dictobject *mp)
{
- int i = 0;
+ Py_ssize_t i = 0;
dictentry *ep;
PyObject *res;
@@ -1683,7 +1695,7 @@ dict_popitem(dictobject *mp)
* happened, the result would be an infinite loop (searching for an
* entry that no longer exists). Note that the usual popitem()
* idiom is "while d: k, v = d.popitem()". so needing to throw the
- * tuple away if the dict *is* empty isn't a significant
+ * tuple away if the dict *is* empty isn't a significant
* inefficiency -- possible, but unlikely in practice.
*/
res = PyTuple_New(2);
@@ -1699,11 +1711,11 @@ dict_popitem(dictobject *mp)
* field of slot 0 to hold a search finger:
* If slot 0 has a value, use slot 0.
* Else slot 0 is being used to hold a search finger,
- * and we use its hash value as the first index to look.
+| * and we use its hash value as the first index to look.
*/
ep = &mp->ma_table[0];
if (ep->me_value == NULL) {
- i = (int)ep->me_hash;
+ i = ep->me_hash;
/* The hash field may be a real hash value, or it may be a
* legit search finger, or it may be a once-legit search
* finger that's out of bounds now because it wrapped around
@@ -2035,10 +2047,10 @@ PyDict_DelItemString(PyObject *v, const char *key)
typedef struct {
PyObject_HEAD
dictobject *di_dict; /* Set to NULL when iterator is exhausted */
- int di_used;
- int di_pos;
+ Py_ssize_t di_used;
+ Py_ssize_t di_pos;
PyObject* di_result; /* reusable result tuple for iteritems */
- long len;
+ Py_ssize_t len;
} dictiterobject;
static PyObject *
@@ -2076,10 +2088,10 @@ dictiter_dealloc(dictiterobject *di)
static PyObject *
dictiter_len(dictiterobject *di)
{
- long len = 0;
+ Py_ssize_t len = 0;
if (di->di_dict != NULL && di->di_used == di->di_dict->ma_used)
len = di->len;
- return PyInt_FromLong(len);
+ return PyInt_FromSize_t(len);
}
PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
@@ -2092,7 +2104,7 @@ static PyMethodDef dictiter_methods[] = {
static PyObject *dictiter_iternextkey(dictiterobject *di)
{
PyObject *key;
- register int i, mask;
+ register Py_ssize_t i, mask;
register dictentry *ep;
dictobject *d = di->di_dict;
@@ -2165,7 +2177,7 @@ PyTypeObject PyDictIterKey_Type = {
static PyObject *dictiter_iternextvalue(dictiterobject *di)
{
PyObject *value;
- register int i, mask;
+ register Py_ssize_t i, mask;
register dictentry *ep;
dictobject *d = di->di_dict;
@@ -2238,7 +2250,7 @@ PyTypeObject PyDictIterValue_Type = {
static PyObject *dictiter_iternextitem(dictiterobject *di)
{
PyObject *key, *value, *result = di->di_result;
- register int i, mask;
+ register Py_ssize_t i, mask;
register dictentry *ep;
dictobject *d = di->di_dict;