summaryrefslogtreecommitdiffstats
path: root/Modules/hashtable.c
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2013-11-23 11:27:24 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2013-11-23 11:27:24 (GMT)
commited3b0bca3ef9d7bdbb8bd8e67e60e85f5a336da0 (patch)
treefd4390855d293372f73048fdf4b3e6b4a7cdf440 /Modules/hashtable.c
parent0fb6072fad411eba171b53037bcc04d07c7b0770 (diff)
downloadcpython-ed3b0bca3ef9d7bdbb8bd8e67e60e85f5a336da0.zip
cpython-ed3b0bca3ef9d7bdbb8bd8e67e60e85f5a336da0.tar.gz
cpython-ed3b0bca3ef9d7bdbb8bd8e67e60e85f5a336da0.tar.bz2
Issue #18874: Implement the PEP 454 (tracemalloc)
Diffstat (limited to 'Modules/hashtable.c')
-rw-r--r--Modules/hashtable.c518
1 files changed, 518 insertions, 0 deletions
diff --git a/Modules/hashtable.c b/Modules/hashtable.c
new file mode 100644
index 0000000..221ed53
--- /dev/null
+++ b/Modules/hashtable.c
@@ -0,0 +1,518 @@
+/* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash
+ project:
+ http://sourceforge.net/projects/libcfu/
+
+ Copyright of cfuhash:
+ ----------------------------------
+ Creation date: 2005-06-24 21:22:40
+ Authors: Don
+ Change log:
+
+ Copyright (c) 2005 Don Owens
+ All rights reserved.
+
+ This code is released under the BSD license:
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ * Neither the name of the author nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ OF THE POSSIBILITY OF SUCH DAMAGE.
+ ----------------------------------
+*/
+
+#include "Python.h"
+#include "hashtable.h"
+
+#define HASHTABLE_MIN_SIZE 16
+#define HASHTABLE_HIGH 0.50
+#define HASHTABLE_LOW 0.10
+#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
+
+#define BUCKETS_HEAD(SLIST) \
+ ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
+#define TABLE_HEAD(HT, BUCKET) \
+ ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
+#define ENTRY_NEXT(ENTRY) \
+ ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
+#define HASHTABLE_ITEM_SIZE(HT) \
+ (sizeof(_Py_hashtable_entry_t) + (HT)->data_size)
+
+/* Forward declaration */
+static void hashtable_rehash(_Py_hashtable_t *ht);
+
+static void
+_Py_slist_init(_Py_slist_t *list)
+{
+ list->head = NULL;
+}
+
+static void
+_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
+{
+ item->next = list->head;
+ list->head = item;
+}
+
+static void
+_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
+ _Py_slist_item_t *item)
+{
+ if (previous != NULL)
+ previous->next = item->next;
+ else
+ list->head = item->next;
+}
+
+Py_uhash_t
+_Py_hashtable_hash_int(const void *key)
+{
+ return (Py_uhash_t)key;
+}
+
+Py_uhash_t
+_Py_hashtable_hash_ptr(const void *key)
+{
+ return (Py_uhash_t)_Py_HashPointer((void *)key);
+}
+
+int
+_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry)
+{
+ return entry->key == key;
+}
+
+/* makes sure the real size of the buckets array is a power of 2 */
+static size_t
+round_size(size_t s)
+{
+ size_t i;
+ if (s < HASHTABLE_MIN_SIZE)
+ return HASHTABLE_MIN_SIZE;
+ i = 1;
+ while (i < s)
+ i <<= 1;
+ return i;
+}
+
+_Py_hashtable_t *
+_Py_hashtable_new_full(size_t data_size, size_t init_size,
+ _Py_hashtable_hash_func hash_func,
+ _Py_hashtable_compare_func compare_func,
+ _Py_hashtable_copy_data_func copy_data_func,
+ _Py_hashtable_free_data_func free_data_func,
+ _Py_hashtable_get_data_size_func get_data_size_func,
+ _Py_hashtable_allocator_t *allocator)
+{
+ _Py_hashtable_t *ht;
+ size_t buckets_size;
+ _Py_hashtable_allocator_t alloc;
+
+ if (allocator == NULL) {
+ alloc.malloc = PyMem_RawMalloc;
+ alloc.free = PyMem_RawFree;
+ }
+ else
+ alloc = *allocator;
+
+ ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
+ if (ht == NULL)
+ return ht;
+
+ ht->num_buckets = round_size(init_size);
+ ht->entries = 0;
+ ht->data_size = data_size;
+
+ buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
+ ht->buckets = alloc.malloc(buckets_size);
+ if (ht->buckets == NULL) {
+ alloc.free(ht);
+ return NULL;
+ }
+ memset(ht->buckets, 0, buckets_size);
+
+ ht->hash_func = hash_func;
+ ht->compare_func = compare_func;
+ ht->copy_data_func = copy_data_func;
+ ht->free_data_func = free_data_func;
+ ht->get_data_size_func = get_data_size_func;
+ ht->alloc = alloc;
+ return ht;
+}
+
+_Py_hashtable_t *
+_Py_hashtable_new(size_t data_size,
+ _Py_hashtable_hash_func hash_func,
+ _Py_hashtable_compare_func compare_func)
+{
+ return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE,
+ hash_func, compare_func,
+ NULL, NULL, NULL, NULL);
+}
+
+size_t
+_Py_hashtable_size(_Py_hashtable_t *ht)
+{
+ size_t size;
+ size_t hv;
+
+ size = sizeof(_Py_hashtable_t);
+
+ /* buckets */
+ size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *);
+
+ /* entries */
+ size += ht->entries * HASHTABLE_ITEM_SIZE(ht);
+
+ /* data linked from entries */
+ if (ht->get_data_size_func) {
+ for (hv = 0; hv < ht->num_buckets; hv++) {
+ _Py_hashtable_entry_t *entry;
+
+ for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
+ void *data;
+
+ data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
+ size += ht->get_data_size_func(data);
+ }
+ }
+ }
+ return size;
+}
+
+#ifdef Py_DEBUG
+void
+_Py_hashtable_print_stats(_Py_hashtable_t *ht)
+{
+ size_t size;
+ size_t chain_len, max_chain_len, total_chain_len, nchains;
+ _Py_hashtable_entry_t *entry;
+ size_t hv;
+ double load;
+
+ size = _Py_hashtable_size(ht);
+
+ load = (double)ht->entries / ht->num_buckets;
+
+ max_chain_len = 0;
+ total_chain_len = 0;
+ nchains = 0;
+ for (hv = 0; hv < ht->num_buckets; hv++) {
+ entry = TABLE_HEAD(ht, hv);
+ if (entry != NULL) {
+ chain_len = 0;
+ for (; entry; entry = ENTRY_NEXT(entry)) {
+ chain_len++;
+ }
+ if (chain_len > max_chain_len)
+ max_chain_len = chain_len;
+ total_chain_len += chain_len;
+ nchains++;
+ }
+ }
+ printf("hash table %p: entries=%zu/%zu (%.0f%%), ",
+ ht, ht->entries, ht->num_buckets, load * 100.0);
+ if (nchains)
+ printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains);
+ printf("max_chain_len=%zu, %zu kB\n",
+ max_chain_len, size / 1024);
+}
+#endif
+
+/* Get an entry. Return NULL if the key does not exist. */
+_Py_hashtable_entry_t *
+_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key)
+{
+ Py_uhash_t key_hash;
+ size_t index;
+ _Py_hashtable_entry_t *entry;
+
+ key_hash = ht->hash_func(key);
+ index = key_hash & (ht->num_buckets - 1);
+
+ for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
+ if (entry->key_hash == key_hash && ht->compare_func(key, entry))
+ break;
+ }
+
+ return entry;
+}
+
+static int
+_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
+{
+ Py_uhash_t key_hash;
+ size_t index;
+ _Py_hashtable_entry_t *entry, *previous;
+
+ key_hash = ht->hash_func(key);
+ index = key_hash & (ht->num_buckets - 1);
+
+ previous = NULL;
+ for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
+ if (entry->key_hash == key_hash && ht->compare_func(key, entry))
+ break;
+ previous = entry;
+ }
+
+ if (entry == NULL)
+ return 0;
+
+ _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
+ (_Py_slist_item_t *)entry);
+ ht->entries--;
+
+ if (data != NULL)
+ _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
+ ht->alloc.free(entry);
+
+ if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
+ hashtable_rehash(ht);
+ return 1;
+}
+
+/* Add a new entry to the hash. The key must not be present in the hash table.
+ Return 0 on success, -1 on memory error. */
+int
+_Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
+ void *data, size_t data_size)
+{
+ Py_uhash_t key_hash;
+ size_t index;
+ _Py_hashtable_entry_t *entry;
+
+ assert(data != NULL || data_size == 0);
+#ifndef NDEBUG
+ /* Don't write the assertion on a single line because it is interesting
+ to know the duplicated entry if the assertion failed. The entry can
+ be read using a debugger. */
+ entry = _Py_hashtable_get_entry(ht, key);
+ assert(entry == NULL);
+#endif
+
+ key_hash = ht->hash_func(key);
+ index = key_hash & (ht->num_buckets - 1);
+
+ entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
+ if (entry == NULL) {
+ /* memory allocation failed */
+ return -1;
+ }
+
+ entry->key = (void *)key;
+ entry->key_hash = key_hash;
+
+ assert(data_size == ht->data_size);
+ memcpy(_PY_HASHTABLE_ENTRY_DATA(entry), data, data_size);
+
+ _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
+ ht->entries++;
+
+ if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH)
+ hashtable_rehash(ht);
+ return 0;
+}
+
+/* Get data from an entry. Copy entry data into data and return 1 if the entry
+ exists, return 0 if the entry does not exist. */
+int
+_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
+{
+ _Py_hashtable_entry_t *entry;
+
+ assert(data != NULL);
+
+ entry = _Py_hashtable_get_entry(ht, key);
+ if (entry == NULL)
+ return 0;
+ _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
+ return 1;
+}
+
+int
+_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
+{
+ assert(data != NULL);
+ assert(ht->free_data_func == NULL);
+ return _hashtable_pop_entry(ht, key, data, data_size);
+}
+
+/* Delete an entry. The entry must exist. */
+void
+_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key)
+{
+#ifndef NDEBUG
+ int found = _hashtable_pop_entry(ht, key, NULL, 0);
+ assert(found);
+#else
+ (void)_hashtable_pop_entry(ht, key, NULL, 0);
+#endif
+}
+
+/* Prototype for a pointer to a function to be called foreach
+ key/value pair in the hash by hashtable_foreach(). Iteration
+ stops if a non-zero value is returned. */
+int
+_Py_hashtable_foreach(_Py_hashtable_t *ht,
+ int (*func) (_Py_hashtable_entry_t *entry, void *arg),
+ void *arg)
+{
+ _Py_hashtable_entry_t *entry;
+ size_t hv;
+
+ for (hv = 0; hv < ht->num_buckets; hv++) {
+ for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
+ int res = func(entry, arg);
+ if (res)
+ return res;
+ }
+ }
+ return 0;
+}
+
+static void
+hashtable_rehash(_Py_hashtable_t *ht)
+{
+ size_t buckets_size, new_size, bucket;
+ _Py_slist_t *old_buckets = NULL;
+ size_t old_num_buckets;
+
+ new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR));
+ if (new_size == ht->num_buckets)
+ return;
+
+ old_num_buckets = ht->num_buckets;
+
+ buckets_size = new_size * sizeof(ht->buckets[0]);
+ old_buckets = ht->buckets;
+ ht->buckets = ht->alloc.malloc(buckets_size);
+ if (ht->buckets == NULL) {
+ /* cancel rehash on memory allocation failure */
+ ht->buckets = old_buckets ;
+ /* memory allocation failed */
+ return;
+ }
+ memset(ht->buckets, 0, buckets_size);
+
+ ht->num_buckets = new_size;
+
+ for (bucket = 0; bucket < old_num_buckets; bucket++) {
+ _Py_hashtable_entry_t *entry, *next;
+ for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
+ size_t entry_index;
+
+ assert(ht->hash_func(entry->key) == entry->key_hash);
+ next = ENTRY_NEXT(entry);
+ entry_index = entry->key_hash & (new_size - 1);
+
+ _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry);
+ }
+ }
+
+ ht->alloc.free(old_buckets);
+}
+
+void
+_Py_hashtable_clear(_Py_hashtable_t *ht)
+{
+ _Py_hashtable_entry_t *entry, *next;
+ size_t i;
+
+ for (i=0; i < ht->num_buckets; i++) {
+ for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
+ next = ENTRY_NEXT(entry);
+ if (ht->free_data_func)
+ ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
+ ht->alloc.free(entry);
+ }
+ _Py_slist_init(&ht->buckets[i]);
+ }
+ ht->entries = 0;
+ hashtable_rehash(ht);
+}
+
+void
+_Py_hashtable_destroy(_Py_hashtable_t *ht)
+{
+ size_t i;
+
+ for (i = 0; i < ht->num_buckets; i++) {
+ _Py_slist_item_t *entry = ht->buckets[i].head;
+ while (entry) {
+ _Py_slist_item_t *entry_next = entry->next;
+ if (ht->free_data_func)
+ ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
+ ht->alloc.free(entry);
+ entry = entry_next;
+ }
+ }
+
+ ht->alloc.free(ht->buckets);
+ ht->alloc.free(ht);
+}
+
+/* Return a copy of the hash table */
+_Py_hashtable_t *
+_Py_hashtable_copy(_Py_hashtable_t *src)
+{
+ _Py_hashtable_t *dst;
+ _Py_hashtable_entry_t *entry;
+ size_t bucket;
+ int err;
+ void *data, *new_data;
+
+ dst = _Py_hashtable_new_full(src->data_size, src->num_buckets,
+ src->hash_func, src->compare_func,
+ src->copy_data_func, src->free_data_func,
+ src->get_data_size_func, &src->alloc);
+ if (dst == NULL)
+ return NULL;
+
+ for (bucket=0; bucket < src->num_buckets; bucket++) {
+ entry = TABLE_HEAD(src, bucket);
+ for (; entry; entry = ENTRY_NEXT(entry)) {
+ if (src->copy_data_func) {
+ data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
+ new_data = src->copy_data_func(data);
+ if (new_data != NULL)
+ err = _Py_hashtable_set(dst, entry->key,
+ &new_data, src->data_size);
+ else
+ err = 1;
+ }
+ else {
+ data = _PY_HASHTABLE_ENTRY_DATA(entry);
+ err = _Py_hashtable_set(dst, entry->key, data, src->data_size);
+ }
+ if (err) {
+ _Py_hashtable_destroy(dst);
+ return NULL;
+ }
+ }
+ }
+ return dst;
+}
+