diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2016-03-21 21:00:58 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2016-03-21 21:00:58 (GMT) |
commit | 285cf0a6014af147b82a3446d9e088ad0332720d (patch) | |
tree | 829fa2b00f39bf7ff31496cca47ddd127b135e4f /Modules/hashtable.c | |
parent | 928bff0b26adb643a7078575c9075b4b709c1b16 (diff) | |
download | cpython-285cf0a6014af147b82a3446d9e088ad0332720d.zip cpython-285cf0a6014af147b82a3446d9e088ad0332720d.tar.gz cpython-285cf0a6014af147b82a3446d9e088ad0332720d.tar.bz2 |
hashtable.h now supports keys of any size
Issue #26588: hashtable.h now supports keys of any size, not only
sizeof(void*). It allows to support key larger than sizeof(void*), but also to
use less memory for key smaller than sizeof(void*).
Diffstat (limited to 'Modules/hashtable.c')
-rw-r--r-- | Modules/hashtable.c | 147 |
1 files changed, 89 insertions, 58 deletions
diff --git a/Modules/hashtable.c b/Modules/hashtable.c index 7de154b..d33f0d7 100644 --- a/Modules/hashtable.c +++ b/Modules/hashtable.c @@ -1,5 +1,5 @@ -/* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash - project: +/* The implementation of the hash table (_Py_hashtable_t) is based on the + cfuhash project: http://sourceforge.net/projects/libcfu/ Copyright of cfuhash: @@ -59,7 +59,7 @@ #define ENTRY_NEXT(ENTRY) \ ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY)) #define HASHTABLE_ITEM_SIZE(HT) \ - (sizeof(_Py_hashtable_entry_t) + (HT)->data_size) + (sizeof(_Py_hashtable_entry_t) + (HT)->key_size + (HT)->data_size) /* Forward declaration */ static void hashtable_rehash(_Py_hashtable_t *ht); @@ -70,6 +70,7 @@ _Py_slist_init(_Py_slist_t *list) list->head = NULL; } + static void _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) { @@ -77,6 +78,7 @@ _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) list->head = item; } + static void _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, _Py_slist_item_t *item) @@ -87,24 +89,26 @@ _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, list->head = item->next; } -Py_uhash_t -_Py_hashtable_hash_int(const void *key) -{ - return (Py_uhash_t)key; -} Py_uhash_t -_Py_hashtable_hash_ptr(const void *key) +_Py_hashtable_hash_ptr(size_t key_size, const void *pkey) { + void *key; + + _Py_HASHTABLE_READ_KEY(key_size, pkey, key); return (Py_uhash_t)_Py_HashPointer((void *)key); } + int -_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry) +_Py_hashtable_compare_direct(size_t key_size, const void *pkey, + const _Py_hashtable_entry_t *entry) { - return entry->key == key; + const void *pkey2 = _Py_HASHTABLE_ENTRY_KEY(entry); + return (memcmp(pkey, pkey2, key_size) == 0); } + /* makes sure the real size of the buckets array is a power of 2 */ static size_t round_size(size_t s) @@ -118,8 +122,10 @@ round_size(size_t s) return i; } + _Py_hashtable_t * -_Py_hashtable_new_full(size_t data_size, size_t init_size, +_Py_hashtable_new_full(size_t key_size, size_t data_size, + size_t init_size, _Py_hashtable_hash_func hash_func, _Py_hashtable_compare_func compare_func, _Py_hashtable_copy_data_func copy_data_func, @@ -144,6 +150,7 @@ _Py_hashtable_new_full(size_t data_size, size_t init_size, ht->num_buckets = round_size(init_size); ht->entries = 0; + ht->key_size = key_size; ht->data_size = data_size; buckets_size = ht->num_buckets * sizeof(ht->buckets[0]); @@ -163,16 +170,19 @@ _Py_hashtable_new_full(size_t data_size, size_t init_size, return ht; } + _Py_hashtable_t * -_Py_hashtable_new(size_t data_size, +_Py_hashtable_new(size_t key_size, size_t data_size, _Py_hashtable_hash_func hash_func, _Py_hashtable_compare_func compare_func) { - return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE, + return _Py_hashtable_new_full(key_size, data_size, + HASHTABLE_MIN_SIZE, hash_func, compare_func, NULL, NULL, NULL, NULL); } + size_t _Py_hashtable_size(_Py_hashtable_t *ht) { @@ -195,7 +205,7 @@ _Py_hashtable_size(_Py_hashtable_t *ht) for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { void *data; - data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry); size += ht->get_data_size_func(data); } } @@ -203,6 +213,7 @@ _Py_hashtable_size(_Py_hashtable_t *ht) return size; } + #ifdef Py_DEBUG void _Py_hashtable_print_stats(_Py_hashtable_t *ht) @@ -243,38 +254,47 @@ _Py_hashtable_print_stats(_Py_hashtable_t *ht) } #endif -/* Get an entry. Return NULL if the key does not exist. */ + _Py_hashtable_entry_t * -_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key) +_Py_hashtable_get_entry(_Py_hashtable_t *ht, + size_t key_size, const void *pkey) { Py_uhash_t key_hash; size_t index; _Py_hashtable_entry_t *entry; - key_hash = ht->hash_func(key); + assert(key_size == ht->key_size); + + key_hash = ht->hash_func(key_size, pkey); index = key_hash & (ht->num_buckets - 1); for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { - if (entry->key_hash == key_hash && ht->compare_func(key, entry)) + if (entry->key_hash == key_hash + && ht->compare_func(key_size, pkey, entry)) break; } return entry; } + static int -_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +_Py_hashtable_pop_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey, + void *data, size_t data_size) { Py_uhash_t key_hash; size_t index; _Py_hashtable_entry_t *entry, *previous; - key_hash = ht->hash_func(key); + assert(key_size == ht->key_size); + + key_hash = ht->hash_func(key_size, pkey); index = key_hash & (ht->num_buckets - 1); previous = NULL; for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { - if (entry->key_hash == key_hash && ht->compare_func(key, entry)) + if (entry->key_hash == key_hash + && ht->compare_func(key_size, pkey, entry)) break; previous = entry; } @@ -287,7 +307,7 @@ _hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t da ht->entries--; if (data != NULL) - _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); + _Py_HASHTABLE_ENTRY_READ_DATA(ht, entry, data_size, data); ht->alloc.free(entry); if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW) @@ -295,26 +315,27 @@ _hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t da return 1; } -/* Add a new entry to the hash. The key must not be present in the hash table. - Return 0 on success, -1 on memory error. */ + int -_Py_hashtable_set(_Py_hashtable_t *ht, const void *key, - void *data, size_t data_size) +_Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey, + size_t data_size, void *data) { Py_uhash_t key_hash; size_t index; _Py_hashtable_entry_t *entry; + assert(key_size == ht->key_size); + assert(data != NULL || data_size == 0); #ifndef NDEBUG /* Don't write the assertion on a single line because it is interesting to know the duplicated entry if the assertion failed. The entry can be read using a debugger. */ - entry = _Py_hashtable_get_entry(ht, key); + entry = _Py_hashtable_get_entry(ht, key_size, pkey); assert(entry == NULL); #endif - key_hash = ht->hash_func(key); + key_hash = ht->hash_func(key_size, pkey); index = key_hash & (ht->num_buckets - 1); entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht)); @@ -323,11 +344,11 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, return -1; } - entry->key = (void *)key; entry->key_hash = key_hash; + memcpy((void *)_Py_HASHTABLE_ENTRY_KEY(entry), pkey, key_size); assert(data_size == ht->data_size); - memcpy(_Py_HASHTABLE_ENTRY_DATA(entry), data, data_size); + memcpy(_Py_HASHTABLE_ENTRY_DATA(ht, entry), data, data_size); _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry); ht->entries++; @@ -337,48 +358,48 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, return 0; } -/* Get data from an entry. Copy entry data into data and return 1 if the entry - exists, return 0 if the entry does not exist. */ + int -_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey, + size_t data_size, void *data) { _Py_hashtable_entry_t *entry; assert(data != NULL); - entry = _Py_hashtable_get_entry(ht, key); + entry = _Py_hashtable_get_entry(ht, key_size, pkey); if (entry == NULL) return 0; - _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); + _Py_HASHTABLE_ENTRY_READ_DATA(ht, entry, data_size, data); return 1; } + int -_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +_Py_hashtable_pop(_Py_hashtable_t *ht, size_t key_size, const void *pkey, + size_t data_size, void *data) { assert(data != NULL); assert(ht->free_data_func == NULL); - return _hashtable_pop_entry(ht, key, data, data_size); + return _Py_hashtable_pop_entry(ht, key_size, pkey, data, data_size); } -/* Delete an entry. The entry must exist. */ + void -_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key) +_Py_hashtable_delete(_Py_hashtable_t *ht, size_t key_size, const void *pkey) { #ifndef NDEBUG - int found = _hashtable_pop_entry(ht, key, NULL, 0); + int found = _Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0); assert(found); #else - (void)_hashtable_pop_entry(ht, key, NULL, 0); + (void)_Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0); #endif } -/* Prototype for a pointer to a function to be called foreach - key/value pair in the hash by hashtable_foreach(). Iteration - stops if a non-zero value is returned. */ + int _Py_hashtable_foreach(_Py_hashtable_t *ht, - int (*func) (_Py_hashtable_entry_t *entry, void *arg), + _Py_hashtable_foreach_func func, void *arg) { _Py_hashtable_entry_t *entry; @@ -386,7 +407,7 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht, for (hv = 0; hv < ht->num_buckets; hv++) { for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { - int res = func(entry, arg); + int res = func(ht, entry, arg); if (res) return res; } @@ -394,9 +415,11 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht, return 0; } + static void hashtable_rehash(_Py_hashtable_t *ht) { + const size_t key_size = ht->key_size; size_t buckets_size, new_size, bucket; _Py_slist_t *old_buckets = NULL; size_t old_num_buckets; @@ -425,7 +448,8 @@ hashtable_rehash(_Py_hashtable_t *ht) for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) { size_t entry_index; - assert(ht->hash_func(entry->key) == entry->key_hash); + + assert(ht->hash_func(key_size, _Py_HASHTABLE_ENTRY_KEY(entry)) == entry->key_hash); next = ENTRY_NEXT(entry); entry_index = entry->key_hash & (new_size - 1); @@ -436,6 +460,7 @@ hashtable_rehash(_Py_hashtable_t *ht) ht->alloc.free(old_buckets); } + void _Py_hashtable_clear(_Py_hashtable_t *ht) { @@ -446,7 +471,7 @@ _Py_hashtable_clear(_Py_hashtable_t *ht) for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) { next = ENTRY_NEXT(entry); if (ht->free_data_func) - ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); + ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry)); ht->alloc.free(entry); } _Py_slist_init(&ht->buckets[i]); @@ -455,6 +480,7 @@ _Py_hashtable_clear(_Py_hashtable_t *ht) hashtable_rehash(ht); } + void _Py_hashtable_destroy(_Py_hashtable_t *ht) { @@ -465,7 +491,7 @@ _Py_hashtable_destroy(_Py_hashtable_t *ht) while (entry) { _Py_slist_item_t *entry_next = entry->next; if (ht->free_data_func) - ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); + ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry)); ht->alloc.free(entry); entry = entry_next; } @@ -475,17 +501,20 @@ _Py_hashtable_destroy(_Py_hashtable_t *ht) ht->alloc.free(ht); } -/* Return a copy of the hash table */ + _Py_hashtable_t * _Py_hashtable_copy(_Py_hashtable_t *src) { + const size_t key_size = src->key_size; + const size_t data_size = src->data_size; _Py_hashtable_t *dst; _Py_hashtable_entry_t *entry; size_t bucket; int err; void *data, *new_data; - dst = _Py_hashtable_new_full(src->data_size, src->num_buckets, + dst = _Py_hashtable_new_full(key_size, data_size, + src->num_buckets, src->hash_func, src->compare_func, src->copy_data_func, src->free_data_func, src->get_data_size_func, &src->alloc); @@ -496,17 +525,20 @@ _Py_hashtable_copy(_Py_hashtable_t *src) entry = TABLE_HEAD(src, bucket); for (; entry; entry = ENTRY_NEXT(entry)) { if (src->copy_data_func) { - data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(src, entry); new_data = src->copy_data_func(data); if (new_data != NULL) - err = _Py_hashtable_set(dst, entry->key, - &new_data, src->data_size); + err = _Py_hashtable_set(dst, key_size, + _Py_HASHTABLE_ENTRY_KEY(entry), + data_size, &new_data); else err = 1; } else { - data = _Py_HASHTABLE_ENTRY_DATA(entry); - err = _Py_hashtable_set(dst, entry->key, data, src->data_size); + data = _Py_HASHTABLE_ENTRY_DATA(src, entry); + err = _Py_hashtable_set(dst, key_size, + _Py_HASHTABLE_ENTRY_KEY(entry), + data_size, data); } if (err) { _Py_hashtable_destroy(dst); @@ -516,4 +548,3 @@ _Py_hashtable_copy(_Py_hashtable_t *src) } return dst; } - |