diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2013-11-23 11:27:24 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2013-11-23 11:27:24 (GMT) |
commit | ed3b0bca3ef9d7bdbb8bd8e67e60e85f5a336da0 (patch) | |
tree | fd4390855d293372f73048fdf4b3e6b4a7cdf440 /Modules/hashtable.c | |
parent | 0fb6072fad411eba171b53037bcc04d07c7b0770 (diff) | |
download | cpython-ed3b0bca3ef9d7bdbb8bd8e67e60e85f5a336da0.zip cpython-ed3b0bca3ef9d7bdbb8bd8e67e60e85f5a336da0.tar.gz cpython-ed3b0bca3ef9d7bdbb8bd8e67e60e85f5a336da0.tar.bz2 |
Issue #18874: Implement the PEP 454 (tracemalloc)
Diffstat (limited to 'Modules/hashtable.c')
-rw-r--r-- | Modules/hashtable.c | 518 |
1 files changed, 518 insertions, 0 deletions
diff --git a/Modules/hashtable.c b/Modules/hashtable.c new file mode 100644 index 0000000..221ed53 --- /dev/null +++ b/Modules/hashtable.c @@ -0,0 +1,518 @@ +/* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash + project: + http://sourceforge.net/projects/libcfu/ + + Copyright of cfuhash: + ---------------------------------- + Creation date: 2005-06-24 21:22:40 + Authors: Don + Change log: + + Copyright (c) 2005 Don Owens + All rights reserved. + + This code is released under the BSD license: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + OF THE POSSIBILITY OF SUCH DAMAGE. + ---------------------------------- +*/ + +#include "Python.h" +#include "hashtable.h" + +#define HASHTABLE_MIN_SIZE 16 +#define HASHTABLE_HIGH 0.50 +#define HASHTABLE_LOW 0.10 +#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH) + +#define BUCKETS_HEAD(SLIST) \ + ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST))) +#define TABLE_HEAD(HT, BUCKET) \ + ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET])) +#define ENTRY_NEXT(ENTRY) \ + ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY)) +#define HASHTABLE_ITEM_SIZE(HT) \ + (sizeof(_Py_hashtable_entry_t) + (HT)->data_size) + +/* Forward declaration */ +static void hashtable_rehash(_Py_hashtable_t *ht); + +static void +_Py_slist_init(_Py_slist_t *list) +{ + list->head = NULL; +} + +static void +_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) +{ + item->next = list->head; + list->head = item; +} + +static void +_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, + _Py_slist_item_t *item) +{ + if (previous != NULL) + previous->next = item->next; + else + list->head = item->next; +} + +Py_uhash_t +_Py_hashtable_hash_int(const void *key) +{ + return (Py_uhash_t)key; +} + +Py_uhash_t +_Py_hashtable_hash_ptr(const void *key) +{ + return (Py_uhash_t)_Py_HashPointer((void *)key); +} + +int +_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry) +{ + return entry->key == key; +} + +/* makes sure the real size of the buckets array is a power of 2 */ +static size_t +round_size(size_t s) +{ + size_t i; + if (s < HASHTABLE_MIN_SIZE) + return HASHTABLE_MIN_SIZE; + i = 1; + while (i < s) + i <<= 1; + return i; +} + +_Py_hashtable_t * +_Py_hashtable_new_full(size_t data_size, size_t init_size, + _Py_hashtable_hash_func hash_func, + _Py_hashtable_compare_func compare_func, + _Py_hashtable_copy_data_func copy_data_func, + _Py_hashtable_free_data_func free_data_func, + _Py_hashtable_get_data_size_func get_data_size_func, + _Py_hashtable_allocator_t *allocator) +{ + _Py_hashtable_t *ht; + size_t buckets_size; + _Py_hashtable_allocator_t alloc; + + if (allocator == NULL) { + alloc.malloc = PyMem_RawMalloc; + alloc.free = PyMem_RawFree; + } + else + alloc = *allocator; + + ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t)); + if (ht == NULL) + return ht; + + ht->num_buckets = round_size(init_size); + ht->entries = 0; + ht->data_size = data_size; + + buckets_size = ht->num_buckets * sizeof(ht->buckets[0]); + ht->buckets = alloc.malloc(buckets_size); + if (ht->buckets == NULL) { + alloc.free(ht); + return NULL; + } + memset(ht->buckets, 0, buckets_size); + + ht->hash_func = hash_func; + ht->compare_func = compare_func; + ht->copy_data_func = copy_data_func; + ht->free_data_func = free_data_func; + ht->get_data_size_func = get_data_size_func; + ht->alloc = alloc; + return ht; +} + +_Py_hashtable_t * +_Py_hashtable_new(size_t data_size, + _Py_hashtable_hash_func hash_func, + _Py_hashtable_compare_func compare_func) +{ + return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE, + hash_func, compare_func, + NULL, NULL, NULL, NULL); +} + +size_t +_Py_hashtable_size(_Py_hashtable_t *ht) +{ + size_t size; + size_t hv; + + size = sizeof(_Py_hashtable_t); + + /* buckets */ + size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *); + + /* entries */ + size += ht->entries * HASHTABLE_ITEM_SIZE(ht); + + /* data linked from entries */ + if (ht->get_data_size_func) { + for (hv = 0; hv < ht->num_buckets; hv++) { + _Py_hashtable_entry_t *entry; + + for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { + void *data; + + data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + size += ht->get_data_size_func(data); + } + } + } + return size; +} + +#ifdef Py_DEBUG +void +_Py_hashtable_print_stats(_Py_hashtable_t *ht) +{ + size_t size; + size_t chain_len, max_chain_len, total_chain_len, nchains; + _Py_hashtable_entry_t *entry; + size_t hv; + double load; + + size = _Py_hashtable_size(ht); + + load = (double)ht->entries / ht->num_buckets; + + max_chain_len = 0; + total_chain_len = 0; + nchains = 0; + for (hv = 0; hv < ht->num_buckets; hv++) { + entry = TABLE_HEAD(ht, hv); + if (entry != NULL) { + chain_len = 0; + for (; entry; entry = ENTRY_NEXT(entry)) { + chain_len++; + } + if (chain_len > max_chain_len) + max_chain_len = chain_len; + total_chain_len += chain_len; + nchains++; + } + } + printf("hash table %p: entries=%zu/%zu (%.0f%%), ", + ht, ht->entries, ht->num_buckets, load * 100.0); + if (nchains) + printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains); + printf("max_chain_len=%zu, %zu kB\n", + max_chain_len, size / 1024); +} +#endif + +/* Get an entry. Return NULL if the key does not exist. */ +_Py_hashtable_entry_t * +_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key) +{ + Py_uhash_t key_hash; + size_t index; + _Py_hashtable_entry_t *entry; + + key_hash = ht->hash_func(key); + index = key_hash & (ht->num_buckets - 1); + + for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { + if (entry->key_hash == key_hash && ht->compare_func(key, entry)) + break; + } + + return entry; +} + +static int +_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +{ + Py_uhash_t key_hash; + size_t index; + _Py_hashtable_entry_t *entry, *previous; + + key_hash = ht->hash_func(key); + index = key_hash & (ht->num_buckets - 1); + + previous = NULL; + for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { + if (entry->key_hash == key_hash && ht->compare_func(key, entry)) + break; + previous = entry; + } + + if (entry == NULL) + return 0; + + _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous, + (_Py_slist_item_t *)entry); + ht->entries--; + + if (data != NULL) + _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); + ht->alloc.free(entry); + + if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW) + hashtable_rehash(ht); + return 1; +} + +/* Add a new entry to the hash. The key must not be present in the hash table. + Return 0 on success, -1 on memory error. */ +int +_Py_hashtable_set(_Py_hashtable_t *ht, const void *key, + void *data, size_t data_size) +{ + Py_uhash_t key_hash; + size_t index; + _Py_hashtable_entry_t *entry; + + assert(data != NULL || data_size == 0); +#ifndef NDEBUG + /* Don't write the assertion on a single line because it is interesting + to know the duplicated entry if the assertion failed. The entry can + be read using a debugger. */ + entry = _Py_hashtable_get_entry(ht, key); + assert(entry == NULL); +#endif + + key_hash = ht->hash_func(key); + index = key_hash & (ht->num_buckets - 1); + + entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht)); + if (entry == NULL) { + /* memory allocation failed */ + return -1; + } + + entry->key = (void *)key; + entry->key_hash = key_hash; + + assert(data_size == ht->data_size); + memcpy(_PY_HASHTABLE_ENTRY_DATA(entry), data, data_size); + + _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry); + ht->entries++; + + if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH) + hashtable_rehash(ht); + return 0; +} + +/* Get data from an entry. Copy entry data into data and return 1 if the entry + exists, return 0 if the entry does not exist. */ +int +_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +{ + _Py_hashtable_entry_t *entry; + + assert(data != NULL); + + entry = _Py_hashtable_get_entry(ht, key); + if (entry == NULL) + return 0; + _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); + return 1; +} + +int +_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +{ + assert(data != NULL); + assert(ht->free_data_func == NULL); + return _hashtable_pop_entry(ht, key, data, data_size); +} + +/* Delete an entry. The entry must exist. */ +void +_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key) +{ +#ifndef NDEBUG + int found = _hashtable_pop_entry(ht, key, NULL, 0); + assert(found); +#else + (void)_hashtable_pop_entry(ht, key, NULL, 0); +#endif +} + +/* Prototype for a pointer to a function to be called foreach + key/value pair in the hash by hashtable_foreach(). Iteration + stops if a non-zero value is returned. */ +int +_Py_hashtable_foreach(_Py_hashtable_t *ht, + int (*func) (_Py_hashtable_entry_t *entry, void *arg), + void *arg) +{ + _Py_hashtable_entry_t *entry; + size_t hv; + + for (hv = 0; hv < ht->num_buckets; hv++) { + for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { + int res = func(entry, arg); + if (res) + return res; + } + } + return 0; +} + +static void +hashtable_rehash(_Py_hashtable_t *ht) +{ + size_t buckets_size, new_size, bucket; + _Py_slist_t *old_buckets = NULL; + size_t old_num_buckets; + + new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR)); + if (new_size == ht->num_buckets) + return; + + old_num_buckets = ht->num_buckets; + + buckets_size = new_size * sizeof(ht->buckets[0]); + old_buckets = ht->buckets; + ht->buckets = ht->alloc.malloc(buckets_size); + if (ht->buckets == NULL) { + /* cancel rehash on memory allocation failure */ + ht->buckets = old_buckets ; + /* memory allocation failed */ + return; + } + memset(ht->buckets, 0, buckets_size); + + ht->num_buckets = new_size; + + for (bucket = 0; bucket < old_num_buckets; bucket++) { + _Py_hashtable_entry_t *entry, *next; + for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) { + size_t entry_index; + + assert(ht->hash_func(entry->key) == entry->key_hash); + next = ENTRY_NEXT(entry); + entry_index = entry->key_hash & (new_size - 1); + + _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry); + } + } + + ht->alloc.free(old_buckets); +} + +void +_Py_hashtable_clear(_Py_hashtable_t *ht) +{ + _Py_hashtable_entry_t *entry, *next; + size_t i; + + for (i=0; i < ht->num_buckets; i++) { + for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) { + next = ENTRY_NEXT(entry); + if (ht->free_data_func) + ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); + ht->alloc.free(entry); + } + _Py_slist_init(&ht->buckets[i]); + } + ht->entries = 0; + hashtable_rehash(ht); +} + +void +_Py_hashtable_destroy(_Py_hashtable_t *ht) +{ + size_t i; + + for (i = 0; i < ht->num_buckets; i++) { + _Py_slist_item_t *entry = ht->buckets[i].head; + while (entry) { + _Py_slist_item_t *entry_next = entry->next; + if (ht->free_data_func) + ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); + ht->alloc.free(entry); + entry = entry_next; + } + } + + ht->alloc.free(ht->buckets); + ht->alloc.free(ht); +} + +/* Return a copy of the hash table */ +_Py_hashtable_t * +_Py_hashtable_copy(_Py_hashtable_t *src) +{ + _Py_hashtable_t *dst; + _Py_hashtable_entry_t *entry; + size_t bucket; + int err; + void *data, *new_data; + + dst = _Py_hashtable_new_full(src->data_size, src->num_buckets, + src->hash_func, src->compare_func, + src->copy_data_func, src->free_data_func, + src->get_data_size_func, &src->alloc); + if (dst == NULL) + return NULL; + + for (bucket=0; bucket < src->num_buckets; bucket++) { + entry = TABLE_HEAD(src, bucket); + for (; entry; entry = ENTRY_NEXT(entry)) { + if (src->copy_data_func) { + data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + new_data = src->copy_data_func(data); + if (new_data != NULL) + err = _Py_hashtable_set(dst, entry->key, + &new_data, src->data_size); + else + err = 1; + } + else { + data = _PY_HASHTABLE_ENTRY_DATA(entry); + err = _Py_hashtable_set(dst, entry->key, data, src->data_size); + } + if (err) { + _Py_hashtable_destroy(dst); + return NULL; + } + } + } + return dst; +} + |