diff options
Diffstat (limited to 'Modules/hashtable.c')
-rw-r--r-- | Modules/hashtable.c | 196 |
1 files changed, 100 insertions, 96 deletions
diff --git a/Modules/hashtable.c b/Modules/hashtable.c index 133f313..b53cc24 100644 --- a/Modules/hashtable.c +++ b/Modules/hashtable.c @@ -1,5 +1,5 @@ -/* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash - project: +/* The implementation of the hash table (_Py_hashtable_t) is based on the + cfuhash project: http://sourceforge.net/projects/libcfu/ Copyright of cfuhash: @@ -59,7 +59,21 @@ #define ENTRY_NEXT(ENTRY) \ ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY)) #define HASHTABLE_ITEM_SIZE(HT) \ - (sizeof(_Py_hashtable_entry_t) + (HT)->data_size) + (sizeof(_Py_hashtable_entry_t) + (HT)->key_size + (HT)->data_size) + +#define ENTRY_READ_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \ + do { \ + assert((DATA_SIZE) == (TABLE)->data_size); \ + Py_MEMCPY((PDATA), _Py_HASHTABLE_ENTRY_PDATA(TABLE, (ENTRY)), \ + (DATA_SIZE)); \ + } while (0) + +#define ENTRY_WRITE_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \ + do { \ + assert((DATA_SIZE) == (TABLE)->data_size); \ + Py_MEMCPY((void *)_Py_HASHTABLE_ENTRY_PDATA((TABLE), (ENTRY)), \ + (PDATA), (DATA_SIZE)); \ + } while (0) /* Forward declaration */ static void hashtable_rehash(_Py_hashtable_t *ht); @@ -70,6 +84,7 @@ _Py_slist_init(_Py_slist_t *list) list->head = NULL; } + static void _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) { @@ -77,6 +92,7 @@ _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) list->head = item; } + static void _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, _Py_slist_item_t *item) @@ -87,24 +103,26 @@ _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, list->head = item->next; } -Py_uhash_t -_Py_hashtable_hash_int(const void *key) -{ - return (Py_uhash_t)key; -} Py_uhash_t -_Py_hashtable_hash_ptr(const void *key) +_Py_hashtable_hash_ptr(struct _Py_hashtable_t *ht, const void *pkey) { - return (Py_uhash_t)_Py_HashPointer((void *)key); + void *key; + + _Py_HASHTABLE_READ_KEY(ht, pkey, key); + return (Py_uhash_t)_Py_HashPointer(key); } + int -_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry) +_Py_hashtable_compare_direct(_Py_hashtable_t *ht, const void *pkey, + const _Py_hashtable_entry_t *entry) { - return entry->key == key; + const void *pkey2 = _Py_HASHTABLE_ENTRY_PKEY(entry); + return (memcmp(pkey, pkey2, ht->key_size) == 0); } + /* makes sure the real size of the buckets array is a power of 2 */ static size_t round_size(size_t s) @@ -118,13 +136,12 @@ round_size(size_t s) return i; } + _Py_hashtable_t * -_Py_hashtable_new_full(size_t data_size, size_t init_size, +_Py_hashtable_new_full(size_t key_size, size_t data_size, + size_t init_size, _Py_hashtable_hash_func hash_func, _Py_hashtable_compare_func compare_func, - _Py_hashtable_copy_data_func copy_data_func, - _Py_hashtable_free_data_func free_data_func, - _Py_hashtable_get_data_size_func get_data_size_func, _Py_hashtable_allocator_t *allocator) { _Py_hashtable_t *ht; @@ -144,6 +161,7 @@ _Py_hashtable_new_full(size_t data_size, size_t init_size, ht->num_buckets = round_size(init_size); ht->entries = 0; + ht->key_size = key_size; ht->data_size = data_size; buckets_size = ht->num_buckets * sizeof(ht->buckets[0]); @@ -156,28 +174,27 @@ _Py_hashtable_new_full(size_t data_size, size_t init_size, ht->hash_func = hash_func; ht->compare_func = compare_func; - ht->copy_data_func = copy_data_func; - ht->free_data_func = free_data_func; - ht->get_data_size_func = get_data_size_func; ht->alloc = alloc; return ht; } + _Py_hashtable_t * -_Py_hashtable_new(size_t data_size, +_Py_hashtable_new(size_t key_size, size_t data_size, _Py_hashtable_hash_func hash_func, _Py_hashtable_compare_func compare_func) { - return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE, + return _Py_hashtable_new_full(key_size, data_size, + HASHTABLE_MIN_SIZE, hash_func, compare_func, - NULL, NULL, NULL, NULL); + NULL); } + size_t _Py_hashtable_size(_Py_hashtable_t *ht) { size_t size; - size_t hv; size = sizeof(_Py_hashtable_t); @@ -187,22 +204,10 @@ _Py_hashtable_size(_Py_hashtable_t *ht) /* entries */ size += ht->entries * HASHTABLE_ITEM_SIZE(ht); - /* data linked from entries */ - if (ht->get_data_size_func) { - for (hv = 0; hv < ht->num_buckets; hv++) { - _Py_hashtable_entry_t *entry; - - for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { - void *data; - - data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); - size += ht->get_data_size_func(data); - } - } - } return size; } + #ifdef Py_DEBUG void _Py_hashtable_print_stats(_Py_hashtable_t *ht) @@ -243,38 +248,45 @@ _Py_hashtable_print_stats(_Py_hashtable_t *ht) } #endif -/* Get an entry. Return NULL if the key does not exist. */ + _Py_hashtable_entry_t * -_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key) +_Py_hashtable_get_entry(_Py_hashtable_t *ht, + size_t key_size, const void *pkey) { Py_uhash_t key_hash; size_t index; _Py_hashtable_entry_t *entry; - key_hash = ht->hash_func(key); + assert(key_size == ht->key_size); + + key_hash = ht->hash_func(ht, pkey); index = key_hash & (ht->num_buckets - 1); for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { - if (entry->key_hash == key_hash && ht->compare_func(key, entry)) + if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry)) break; } return entry; } + static int -_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +_Py_hashtable_pop_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey, + void *data, size_t data_size) { Py_uhash_t key_hash; size_t index; _Py_hashtable_entry_t *entry, *previous; - key_hash = ht->hash_func(key); + assert(key_size == ht->key_size); + + key_hash = ht->hash_func(ht, pkey); index = key_hash & (ht->num_buckets - 1); previous = NULL; for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { - if (entry->key_hash == key_hash && ht->compare_func(key, entry)) + if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry)) break; previous = entry; } @@ -287,7 +299,7 @@ _hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t da ht->entries--; if (data != NULL) - _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); + ENTRY_READ_PDATA(ht, entry, data_size, data); ht->alloc.free(entry); if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW) @@ -295,26 +307,27 @@ _hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t da return 1; } -/* Add a new entry to the hash. The key must not be present in the hash table. - Return 0 on success, -1 on memory error. */ + int -_Py_hashtable_set(_Py_hashtable_t *ht, const void *key, - void *data, size_t data_size) +_Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey, + size_t data_size, const void *data) { Py_uhash_t key_hash; size_t index; _Py_hashtable_entry_t *entry; + assert(key_size == ht->key_size); + assert(data != NULL || data_size == 0); #ifndef NDEBUG /* Don't write the assertion on a single line because it is interesting to know the duplicated entry if the assertion failed. The entry can be read using a debugger. */ - entry = _Py_hashtable_get_entry(ht, key); + entry = _Py_hashtable_get_entry(ht, key_size, pkey); assert(entry == NULL); #endif - key_hash = ht->hash_func(key); + key_hash = ht->hash_func(ht, pkey); index = key_hash & (ht->num_buckets - 1); entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht)); @@ -323,11 +336,9 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, return -1; } - entry->key = (void *)key; entry->key_hash = key_hash; - - assert(data_size == ht->data_size); - memcpy(_Py_HASHTABLE_ENTRY_DATA(entry), data, data_size); + Py_MEMCPY((void *)_Py_HASHTABLE_ENTRY_PKEY(entry), pkey, ht->key_size); + ENTRY_WRITE_PDATA(ht, entry, data_size, data); _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry); ht->entries++; @@ -337,48 +348,50 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, return 0; } -/* Get data from an entry. Copy entry data into data and return 1 if the entry - exists, return 0 if the entry does not exist. */ + int -_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey, + size_t data_size, void *data) { _Py_hashtable_entry_t *entry; assert(data != NULL); - entry = _Py_hashtable_get_entry(ht, key); + entry = _Py_hashtable_get_entry(ht, key_size, pkey); if (entry == NULL) return 0; - _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); + ENTRY_READ_PDATA(ht, entry, data_size, data); return 1; } + int -_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +_Py_hashtable_pop(_Py_hashtable_t *ht, size_t key_size, const void *pkey, + size_t data_size, void *data) { assert(data != NULL); - assert(ht->free_data_func == NULL); - return _hashtable_pop_entry(ht, key, data, data_size); + return _Py_hashtable_pop_entry(ht, key_size, pkey, data, data_size); } -/* Delete an entry. The entry must exist. */ + +/* Code commented since the function is not needed in Python */ +#if 0 void -_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key) +_Py_hashtable_delete(_Py_hashtable_t *ht, size_t key_size, const void *pkey) { #ifndef NDEBUG - int found = _hashtable_pop_entry(ht, key, NULL, 0); + int found = _Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0); assert(found); #else - (void)_hashtable_pop_entry(ht, key, NULL, 0); + (void)_Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0); #endif } +#endif + -/* Prototype for a pointer to a function to be called foreach - key/value pair in the hash by hashtable_foreach(). Iteration - stops if a non-zero value is returned. */ int _Py_hashtable_foreach(_Py_hashtable_t *ht, - int (*func) (_Py_hashtable_entry_t *entry, void *arg), + _Py_hashtable_foreach_func func, void *arg) { _Py_hashtable_entry_t *entry; @@ -386,7 +399,7 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht, for (hv = 0; hv < ht->num_buckets; hv++) { for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { - int res = func(entry, arg); + int res = func(ht, entry, arg); if (res) return res; } @@ -394,6 +407,7 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht, return 0; } + static void hashtable_rehash(_Py_hashtable_t *ht) { @@ -425,7 +439,8 @@ hashtable_rehash(_Py_hashtable_t *ht) for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) { size_t entry_index; - assert(ht->hash_func(entry->key) == entry->key_hash); + + assert(ht->hash_func(ht, _Py_HASHTABLE_ENTRY_PKEY(entry)) == entry->key_hash); next = ENTRY_NEXT(entry); entry_index = entry->key_hash & (new_size - 1); @@ -436,6 +451,7 @@ hashtable_rehash(_Py_hashtable_t *ht) ht->alloc.free(old_buckets); } + void _Py_hashtable_clear(_Py_hashtable_t *ht) { @@ -445,8 +461,6 @@ _Py_hashtable_clear(_Py_hashtable_t *ht) for (i=0; i < ht->num_buckets; i++) { for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) { next = ENTRY_NEXT(entry); - if (ht->free_data_func) - ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); ht->alloc.free(entry); } _Py_slist_init(&ht->buckets[i]); @@ -455,6 +469,7 @@ _Py_hashtable_clear(_Py_hashtable_t *ht) hashtable_rehash(ht); } + void _Py_hashtable_destroy(_Py_hashtable_t *ht) { @@ -464,8 +479,6 @@ _Py_hashtable_destroy(_Py_hashtable_t *ht) _Py_slist_item_t *entry = ht->buckets[i].head; while (entry) { _Py_slist_item_t *entry_next = entry->next; - if (ht->free_data_func) - ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); ht->alloc.free(entry); entry = entry_next; } @@ -475,39 +488,31 @@ _Py_hashtable_destroy(_Py_hashtable_t *ht) ht->alloc.free(ht); } -/* Return a copy of the hash table */ + _Py_hashtable_t * _Py_hashtable_copy(_Py_hashtable_t *src) { + const size_t key_size = src->key_size; + const size_t data_size = src->data_size; _Py_hashtable_t *dst; _Py_hashtable_entry_t *entry; size_t bucket; int err; - void *data, *new_data; - dst = _Py_hashtable_new_full(src->data_size, src->num_buckets, - src->hash_func, src->compare_func, - src->copy_data_func, src->free_data_func, - src->get_data_size_func, &src->alloc); + dst = _Py_hashtable_new_full(key_size, data_size, + src->num_buckets, + src->hash_func, + src->compare_func, + &src->alloc); if (dst == NULL) return NULL; for (bucket=0; bucket < src->num_buckets; bucket++) { entry = TABLE_HEAD(src, bucket); for (; entry; entry = ENTRY_NEXT(entry)) { - if (src->copy_data_func) { - data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); - new_data = src->copy_data_func(data); - if (new_data != NULL) - err = _Py_hashtable_set(dst, entry->key, - &new_data, src->data_size); - else - err = 1; - } - else { - data = _Py_HASHTABLE_ENTRY_DATA(entry); - err = _Py_hashtable_set(dst, entry->key, data, src->data_size); - } + const void *pkey = _Py_HASHTABLE_ENTRY_PKEY(entry); + const void *pdata = _Py_HASHTABLE_ENTRY_PDATA(src, entry); + err = _Py_hashtable_set(dst, key_size, pkey, data_size, pdata); if (err) { _Py_hashtable_destroy(dst); return NULL; @@ -516,4 +521,3 @@ _Py_hashtable_copy(_Py_hashtable_t *src) } return dst; } - |