summaryrefslogtreecommitdiffstats
path: root/Modules/hashtable.c
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2016-03-21 21:00:58 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2016-03-21 21:00:58 (GMT)
commit285cf0a6014af147b82a3446d9e088ad0332720d (patch)
tree829fa2b00f39bf7ff31496cca47ddd127b135e4f /Modules/hashtable.c
parent928bff0b26adb643a7078575c9075b4b709c1b16 (diff)
downloadcpython-285cf0a6014af147b82a3446d9e088ad0332720d.zip
cpython-285cf0a6014af147b82a3446d9e088ad0332720d.tar.gz
cpython-285cf0a6014af147b82a3446d9e088ad0332720d.tar.bz2
hashtable.h now supports keys of any size
Issue #26588: hashtable.h now supports keys of any size, not only sizeof(void*). It allows to support key larger than sizeof(void*), but also to use less memory for key smaller than sizeof(void*).
Diffstat (limited to 'Modules/hashtable.c')
-rw-r--r--Modules/hashtable.c147
1 files changed, 89 insertions, 58 deletions
diff --git a/Modules/hashtable.c b/Modules/hashtable.c
index 7de154b..d33f0d7 100644
--- a/Modules/hashtable.c
+++ b/Modules/hashtable.c
@@ -1,5 +1,5 @@
-/* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash
- project:
+/* The implementation of the hash table (_Py_hashtable_t) is based on the
+ cfuhash project:
http://sourceforge.net/projects/libcfu/
Copyright of cfuhash:
@@ -59,7 +59,7 @@
#define ENTRY_NEXT(ENTRY) \
((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
#define HASHTABLE_ITEM_SIZE(HT) \
- (sizeof(_Py_hashtable_entry_t) + (HT)->data_size)
+ (sizeof(_Py_hashtable_entry_t) + (HT)->key_size + (HT)->data_size)
/* Forward declaration */
static void hashtable_rehash(_Py_hashtable_t *ht);
@@ -70,6 +70,7 @@ _Py_slist_init(_Py_slist_t *list)
list->head = NULL;
}
+
static void
_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
{
@@ -77,6 +78,7 @@ _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
list->head = item;
}
+
static void
_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
_Py_slist_item_t *item)
@@ -87,24 +89,26 @@ _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
list->head = item->next;
}
-Py_uhash_t
-_Py_hashtable_hash_int(const void *key)
-{
- return (Py_uhash_t)key;
-}
Py_uhash_t
-_Py_hashtable_hash_ptr(const void *key)
+_Py_hashtable_hash_ptr(size_t key_size, const void *pkey)
{
+ void *key;
+
+ _Py_HASHTABLE_READ_KEY(key_size, pkey, key);
return (Py_uhash_t)_Py_HashPointer((void *)key);
}
+
int
-_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry)
+_Py_hashtable_compare_direct(size_t key_size, const void *pkey,
+ const _Py_hashtable_entry_t *entry)
{
- return entry->key == key;
+ const void *pkey2 = _Py_HASHTABLE_ENTRY_KEY(entry);
+ return (memcmp(pkey, pkey2, key_size) == 0);
}
+
/* makes sure the real size of the buckets array is a power of 2 */
static size_t
round_size(size_t s)
@@ -118,8 +122,10 @@ round_size(size_t s)
return i;
}
+
_Py_hashtable_t *
-_Py_hashtable_new_full(size_t data_size, size_t init_size,
+_Py_hashtable_new_full(size_t key_size, size_t data_size,
+ size_t init_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func,
_Py_hashtable_copy_data_func copy_data_func,
@@ -144,6 +150,7 @@ _Py_hashtable_new_full(size_t data_size, size_t init_size,
ht->num_buckets = round_size(init_size);
ht->entries = 0;
+ ht->key_size = key_size;
ht->data_size = data_size;
buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
@@ -163,16 +170,19 @@ _Py_hashtable_new_full(size_t data_size, size_t init_size,
return ht;
}
+
_Py_hashtable_t *
-_Py_hashtable_new(size_t data_size,
+_Py_hashtable_new(size_t key_size, size_t data_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func)
{
- return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE,
+ return _Py_hashtable_new_full(key_size, data_size,
+ HASHTABLE_MIN_SIZE,
hash_func, compare_func,
NULL, NULL, NULL, NULL);
}
+
size_t
_Py_hashtable_size(_Py_hashtable_t *ht)
{
@@ -195,7 +205,7 @@ _Py_hashtable_size(_Py_hashtable_t *ht)
for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
void *data;
- data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
+ data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry);
size += ht->get_data_size_func(data);
}
}
@@ -203,6 +213,7 @@ _Py_hashtable_size(_Py_hashtable_t *ht)
return size;
}
+
#ifdef Py_DEBUG
void
_Py_hashtable_print_stats(_Py_hashtable_t *ht)
@@ -243,38 +254,47 @@ _Py_hashtable_print_stats(_Py_hashtable_t *ht)
}
#endif
-/* Get an entry. Return NULL if the key does not exist. */
+
_Py_hashtable_entry_t *
-_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key)
+_Py_hashtable_get_entry(_Py_hashtable_t *ht,
+ size_t key_size, const void *pkey)
{
Py_uhash_t key_hash;
size_t index;
_Py_hashtable_entry_t *entry;
- key_hash = ht->hash_func(key);
+ assert(key_size == ht->key_size);
+
+ key_hash = ht->hash_func(key_size, pkey);
index = key_hash & (ht->num_buckets - 1);
for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
- if (entry->key_hash == key_hash && ht->compare_func(key, entry))
+ if (entry->key_hash == key_hash
+ && ht->compare_func(key_size, pkey, entry))
break;
}
return entry;
}
+
static int
-_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
+_Py_hashtable_pop_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
+ void *data, size_t data_size)
{
Py_uhash_t key_hash;
size_t index;
_Py_hashtable_entry_t *entry, *previous;
- key_hash = ht->hash_func(key);
+ assert(key_size == ht->key_size);
+
+ key_hash = ht->hash_func(key_size, pkey);
index = key_hash & (ht->num_buckets - 1);
previous = NULL;
for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
- if (entry->key_hash == key_hash && ht->compare_func(key, entry))
+ if (entry->key_hash == key_hash
+ && ht->compare_func(key_size, pkey, entry))
break;
previous = entry;
}
@@ -287,7 +307,7 @@ _hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t da
ht->entries--;
if (data != NULL)
- _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
+ _Py_HASHTABLE_ENTRY_READ_DATA(ht, entry, data_size, data);
ht->alloc.free(entry);
if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
@@ -295,26 +315,27 @@ _hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t da
return 1;
}
-/* Add a new entry to the hash. The key must not be present in the hash table.
- Return 0 on success, -1 on memory error. */
+
int
-_Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
- void *data, size_t data_size)
+_Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
+ size_t data_size, void *data)
{
Py_uhash_t key_hash;
size_t index;
_Py_hashtable_entry_t *entry;
+ assert(key_size == ht->key_size);
+
assert(data != NULL || data_size == 0);
#ifndef NDEBUG
/* Don't write the assertion on a single line because it is interesting
to know the duplicated entry if the assertion failed. The entry can
be read using a debugger. */
- entry = _Py_hashtable_get_entry(ht, key);
+ entry = _Py_hashtable_get_entry(ht, key_size, pkey);
assert(entry == NULL);
#endif
- key_hash = ht->hash_func(key);
+ key_hash = ht->hash_func(key_size, pkey);
index = key_hash & (ht->num_buckets - 1);
entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
@@ -323,11 +344,11 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
return -1;
}
- entry->key = (void *)key;
entry->key_hash = key_hash;
+ memcpy((void *)_Py_HASHTABLE_ENTRY_KEY(entry), pkey, key_size);
assert(data_size == ht->data_size);
- memcpy(_Py_HASHTABLE_ENTRY_DATA(entry), data, data_size);
+ memcpy(_Py_HASHTABLE_ENTRY_DATA(ht, entry), data, data_size);
_Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
ht->entries++;
@@ -337,48 +358,48 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
return 0;
}
-/* Get data from an entry. Copy entry data into data and return 1 if the entry
- exists, return 0 if the entry does not exist. */
+
int
-_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
+_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
+ size_t data_size, void *data)
{
_Py_hashtable_entry_t *entry;
assert(data != NULL);
- entry = _Py_hashtable_get_entry(ht, key);
+ entry = _Py_hashtable_get_entry(ht, key_size, pkey);
if (entry == NULL)
return 0;
- _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
+ _Py_HASHTABLE_ENTRY_READ_DATA(ht, entry, data_size, data);
return 1;
}
+
int
-_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
+_Py_hashtable_pop(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
+ size_t data_size, void *data)
{
assert(data != NULL);
assert(ht->free_data_func == NULL);
- return _hashtable_pop_entry(ht, key, data, data_size);
+ return _Py_hashtable_pop_entry(ht, key_size, pkey, data, data_size);
}
-/* Delete an entry. The entry must exist. */
+
void
-_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key)
+_Py_hashtable_delete(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
{
#ifndef NDEBUG
- int found = _hashtable_pop_entry(ht, key, NULL, 0);
+ int found = _Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
assert(found);
#else
- (void)_hashtable_pop_entry(ht, key, NULL, 0);
+ (void)_Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
#endif
}
-/* Prototype for a pointer to a function to be called foreach
- key/value pair in the hash by hashtable_foreach(). Iteration
- stops if a non-zero value is returned. */
+
int
_Py_hashtable_foreach(_Py_hashtable_t *ht,
- int (*func) (_Py_hashtable_entry_t *entry, void *arg),
+ _Py_hashtable_foreach_func func,
void *arg)
{
_Py_hashtable_entry_t *entry;
@@ -386,7 +407,7 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht,
for (hv = 0; hv < ht->num_buckets; hv++) {
for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
- int res = func(entry, arg);
+ int res = func(ht, entry, arg);
if (res)
return res;
}
@@ -394,9 +415,11 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht,
return 0;
}
+
static void
hashtable_rehash(_Py_hashtable_t *ht)
{
+ const size_t key_size = ht->key_size;
size_t buckets_size, new_size, bucket;
_Py_slist_t *old_buckets = NULL;
size_t old_num_buckets;
@@ -425,7 +448,8 @@ hashtable_rehash(_Py_hashtable_t *ht)
for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
size_t entry_index;
- assert(ht->hash_func(entry->key) == entry->key_hash);
+
+ assert(ht->hash_func(key_size, _Py_HASHTABLE_ENTRY_KEY(entry)) == entry->key_hash);
next = ENTRY_NEXT(entry);
entry_index = entry->key_hash & (new_size - 1);
@@ -436,6 +460,7 @@ hashtable_rehash(_Py_hashtable_t *ht)
ht->alloc.free(old_buckets);
}
+
void
_Py_hashtable_clear(_Py_hashtable_t *ht)
{
@@ -446,7 +471,7 @@ _Py_hashtable_clear(_Py_hashtable_t *ht)
for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
next = ENTRY_NEXT(entry);
if (ht->free_data_func)
- ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
+ ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry));
ht->alloc.free(entry);
}
_Py_slist_init(&ht->buckets[i]);
@@ -455,6 +480,7 @@ _Py_hashtable_clear(_Py_hashtable_t *ht)
hashtable_rehash(ht);
}
+
void
_Py_hashtable_destroy(_Py_hashtable_t *ht)
{
@@ -465,7 +491,7 @@ _Py_hashtable_destroy(_Py_hashtable_t *ht)
while (entry) {
_Py_slist_item_t *entry_next = entry->next;
if (ht->free_data_func)
- ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
+ ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry));
ht->alloc.free(entry);
entry = entry_next;
}
@@ -475,17 +501,20 @@ _Py_hashtable_destroy(_Py_hashtable_t *ht)
ht->alloc.free(ht);
}
-/* Return a copy of the hash table */
+
_Py_hashtable_t *
_Py_hashtable_copy(_Py_hashtable_t *src)
{
+ const size_t key_size = src->key_size;
+ const size_t data_size = src->data_size;
_Py_hashtable_t *dst;
_Py_hashtable_entry_t *entry;
size_t bucket;
int err;
void *data, *new_data;
- dst = _Py_hashtable_new_full(src->data_size, src->num_buckets,
+ dst = _Py_hashtable_new_full(key_size, data_size,
+ src->num_buckets,
src->hash_func, src->compare_func,
src->copy_data_func, src->free_data_func,
src->get_data_size_func, &src->alloc);
@@ -496,17 +525,20 @@ _Py_hashtable_copy(_Py_hashtable_t *src)
entry = TABLE_HEAD(src, bucket);
for (; entry; entry = ENTRY_NEXT(entry)) {
if (src->copy_data_func) {
- data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
+ data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(src, entry);
new_data = src->copy_data_func(data);
if (new_data != NULL)
- err = _Py_hashtable_set(dst, entry->key,
- &new_data, src->data_size);
+ err = _Py_hashtable_set(dst, key_size,
+ _Py_HASHTABLE_ENTRY_KEY(entry),
+ data_size, &new_data);
else
err = 1;
}
else {
- data = _Py_HASHTABLE_ENTRY_DATA(entry);
- err = _Py_hashtable_set(dst, entry->key, data, src->data_size);
+ data = _Py_HASHTABLE_ENTRY_DATA(src, entry);
+ err = _Py_hashtable_set(dst, key_size,
+ _Py_HASHTABLE_ENTRY_KEY(entry),
+ data_size, data);
}
if (err) {
_Py_hashtable_destroy(dst);
@@ -516,4 +548,3 @@ _Py_hashtable_copy(_Py_hashtable_t *src)
}
return dst;
}
-