summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2016-03-21 21:00:58 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2016-03-21 21:00:58 (GMT)
commit285cf0a6014af147b82a3446d9e088ad0332720d (patch)
tree829fa2b00f39bf7ff31496cca47ddd127b135e4f
parent928bff0b26adb643a7078575c9075b4b709c1b16 (diff)
downloadcpython-285cf0a6014af147b82a3446d9e088ad0332720d.zip
cpython-285cf0a6014af147b82a3446d9e088ad0332720d.tar.gz
cpython-285cf0a6014af147b82a3446d9e088ad0332720d.tar.bz2
hashtable.h now supports keys of any size
Issue #26588: hashtable.h now supports keys of any size, not only sizeof(void*). It allows to support key larger than sizeof(void*), but also to use less memory for key smaller than sizeof(void*).
-rw-r--r--Modules/_tracemalloc.c105
-rw-r--r--Modules/hashtable.c147
-rw-r--r--Modules/hashtable.h165
-rw-r--r--Python/marshal.c15
4 files changed, 294 insertions, 138 deletions
diff --git a/Modules/_tracemalloc.c b/Modules/_tracemalloc.c
index 5752904..6799eb6 100644
--- a/Modules/_tracemalloc.c
+++ b/Modules/_tracemalloc.c
@@ -196,23 +196,38 @@ set_reentrant(int reentrant)
}
#endif
+static Py_uhash_t
+hashtable_hash_pyobject(size_t key_size, const void *pkey)
+{
+ PyObject *obj;
+
+ _Py_HASHTABLE_READ_KEY(key_size, pkey, obj);
+ return PyObject_Hash(obj);
+}
+
static int
-hashtable_compare_unicode(const void *key, const _Py_hashtable_entry_t *entry)
+hashtable_compare_unicode(size_t key_size, const void *pkey,
+ const _Py_hashtable_entry_t *entry)
{
- if (key != NULL && entry->key != NULL)
- return (PyUnicode_Compare((PyObject *)key, (PyObject *)entry->key) == 0);
+ PyObject *key, *entry_key;
+
+ _Py_HASHTABLE_READ_KEY(key_size, pkey, key);
+ _Py_HASHTABLE_ENTRY_READ_KEY(key_size, entry, entry_key);
+
+ if (key != NULL && entry_key != NULL)
+ return (PyUnicode_Compare(key, entry_key) == 0);
else
- return key == entry->key;
+ return key == entry_key;
}
static _Py_hashtable_allocator_t hashtable_alloc = {malloc, free};
static _Py_hashtable_t *
-hashtable_new(size_t data_size,
+hashtable_new(size_t key_size, size_t data_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func)
{
- return _Py_hashtable_new_full(data_size, 0,
+ return _Py_hashtable_new_full(key_size, data_size, 0,
hash_func, compare_func,
NULL, NULL, NULL, &hashtable_alloc);
}
@@ -230,20 +245,25 @@ raw_free(void *ptr)
}
static Py_uhash_t
-hashtable_hash_traceback(const void *key)
+hashtable_hash_traceback(size_t key_size, const void *pkey)
{
- const traceback_t *traceback = key;
+ const traceback_t *traceback;
+
+ _Py_HASHTABLE_READ_KEY(key_size, pkey, traceback);
return traceback->hash;
}
static int
-hashtable_compare_traceback(const traceback_t *traceback1,
+hashtable_compare_traceback(size_t key_size, const void *pkey,
const _Py_hashtable_entry_t *he)
{
- const traceback_t *traceback2 = he->key;
+ traceback_t *traceback1, *traceback2;
const frame_t *frame1, *frame2;
int i;
+ _Py_HASHTABLE_READ_KEY(key_size, pkey, traceback1);
+ _Py_HASHTABLE_ENTRY_READ_KEY(key_size, he, traceback2);
+
if (traceback1->nframe != traceback2->nframe)
return 0;
@@ -312,15 +332,16 @@ tracemalloc_get_frame(PyFrameObject *pyframe, frame_t *frame)
}
/* intern the filename */
- entry = _Py_hashtable_get_entry(tracemalloc_filenames, filename);
+ entry = _Py_HASHTABLE_GET_ENTRY(tracemalloc_filenames, filename);
if (entry != NULL) {
- filename = (PyObject *)entry->key;
+ _Py_HASHTABLE_ENTRY_READ_KEY(tracemalloc_filenames->key_size, entry,
+ filename);
}
else {
/* tracemalloc_filenames is responsible to keep a reference
to the filename */
Py_INCREF(filename);
- if (_Py_hashtable_set(tracemalloc_filenames, filename, NULL, 0) < 0) {
+ if (_Py_HASHTABLE_SET_NODATA(tracemalloc_filenames, filename) < 0) {
Py_DECREF(filename);
#ifdef TRACE_DEBUG
tracemalloc_error("failed to intern the filename");
@@ -403,9 +424,10 @@ traceback_new(void)
traceback->hash = traceback_hash(traceback);
/* intern the traceback */
- entry = _Py_hashtable_get_entry(tracemalloc_tracebacks, traceback);
+ entry = _Py_HASHTABLE_GET_ENTRY(tracemalloc_tracebacks, traceback);
if (entry != NULL) {
- traceback = (traceback_t *)entry->key;
+ _Py_HASHTABLE_ENTRY_READ_KEY(tracemalloc_tracebacks->key_size, entry,
+ traceback);
}
else {
traceback_t *copy;
@@ -422,7 +444,7 @@ traceback_new(void)
}
memcpy(copy, traceback, traceback_size);
- if (_Py_hashtable_set(tracemalloc_tracebacks, copy, NULL, 0) < 0) {
+ if (_Py_HASHTABLE_SET_NODATA(tracemalloc_tracebacks, copy) < 0) {
raw_free(copy);
#ifdef TRACE_DEBUG
tracemalloc_error("failed to intern the traceback: putdata failed");
@@ -464,7 +486,7 @@ tracemalloc_remove_trace(void *ptr)
{
trace_t trace;
- if (_Py_hashtable_pop(tracemalloc_traces, ptr, &trace, sizeof(trace))) {
+ if (_Py_HASHTABLE_POP(tracemalloc_traces, ptr, trace)) {
assert(tracemalloc_traced_memory >= trace.size);
tracemalloc_traced_memory -= trace.size;
}
@@ -714,17 +736,23 @@ tracemalloc_raw_realloc(void *ctx, void *ptr, size_t new_size)
#endif /* TRACE_RAW_MALLOC */
static int
-tracemalloc_clear_filename(_Py_hashtable_entry_t *entry, void *user_data)
+tracemalloc_clear_filename(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
+ void *user_data)
{
- PyObject *filename = (PyObject *)entry->key;
+ PyObject *filename;
+
+ _Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, filename);
Py_DECREF(filename);
return 0;
}
static int
-traceback_free_traceback(_Py_hashtable_entry_t *entry, void *user_data)
+traceback_free_traceback(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
+ void *user_data)
{
- traceback_t *traceback = (traceback_t *)entry->key;
+ traceback_t *traceback;
+
+ _Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, traceback);
raw_free(traceback);
return 0;
}
@@ -791,21 +819,20 @@ tracemalloc_init(void)
}
#endif
- tracemalloc_filenames = hashtable_new(0,
- (_Py_hashtable_hash_func)PyObject_Hash,
+ tracemalloc_filenames = hashtable_new(sizeof(PyObject *), 0,
+ hashtable_hash_pyobject,
hashtable_compare_unicode);
- tracemalloc_tracebacks = hashtable_new(0,
- (_Py_hashtable_hash_func)hashtable_hash_traceback,
- (_Py_hashtable_compare_func)hashtable_compare_traceback);
+ tracemalloc_tracebacks = hashtable_new(sizeof(traceback_t *), 0,
+ hashtable_hash_traceback,
+ hashtable_compare_traceback);
- tracemalloc_traces = hashtable_new(sizeof(trace_t),
+ tracemalloc_traces = hashtable_new(sizeof(void*), sizeof(trace_t),
_Py_hashtable_hash_ptr,
_Py_hashtable_compare_direct);
if (tracemalloc_filenames == NULL || tracemalloc_tracebacks == NULL
- || tracemalloc_traces == NULL)
- {
+ || tracemalloc_traces == NULL) {
PyErr_NoMemory();
return -1;
}
@@ -840,9 +867,9 @@ tracemalloc_deinit(void)
tracemalloc_stop();
/* destroy hash tables */
- _Py_hashtable_destroy(tracemalloc_traces);
_Py_hashtable_destroy(tracemalloc_tracebacks);
_Py_hashtable_destroy(tracemalloc_filenames);
+ _Py_hashtable_destroy(tracemalloc_traces);
#if defined(WITH_THREAD) && defined(TRACE_RAW_MALLOC)
if (tables_lock != NULL) {
@@ -935,8 +962,9 @@ tracemalloc_stop(void)
PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &allocators.mem);
PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &allocators.obj);
- /* release memory */
tracemalloc_clear_traces();
+
+ /* release memory */
raw_free(tracemalloc_traceback);
tracemalloc_traceback = NULL;
}
@@ -1065,14 +1093,15 @@ typedef struct {
} get_traces_t;
static int
-tracemalloc_get_traces_fill(_Py_hashtable_entry_t *entry, void *user_data)
+tracemalloc_get_traces_fill(_Py_hashtable_t *traces, _Py_hashtable_entry_t *entry,
+ void *user_data)
{
get_traces_t *get_traces = user_data;
trace_t *trace;
PyObject *tracemalloc_obj;
int res;
- trace = (trace_t *)_Py_HASHTABLE_ENTRY_DATA(entry);
+ trace = (trace_t *)_Py_HASHTABLE_ENTRY_DATA(traces, entry);
tracemalloc_obj = trace_to_pyobject(trace, get_traces->tracebacks);
if (tracemalloc_obj == NULL)
@@ -1087,9 +1116,11 @@ tracemalloc_get_traces_fill(_Py_hashtable_entry_t *entry, void *user_data)
}
static int
-tracemalloc_pyobject_decref_cb(_Py_hashtable_entry_t *entry, void *user_data)
+tracemalloc_pyobject_decref_cb(_Py_hashtable_t *tracebacks,
+ _Py_hashtable_entry_t *entry,
+ void *user_data)
{
- PyObject *obj = (PyObject *)_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
+ PyObject *obj = (PyObject *)_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(tracebacks, entry);
Py_DECREF(obj);
return 0;
}
@@ -1120,7 +1151,7 @@ py_tracemalloc_get_traces(PyObject *self, PyObject *obj)
/* the traceback hash table is used temporarily to intern traceback tuple
of (filename, lineno) tuples */
- get_traces.tracebacks = hashtable_new(sizeof(PyObject *),
+ get_traces.tracebacks = hashtable_new(sizeof(traceback_t *), sizeof(PyObject *),
_Py_hashtable_hash_ptr,
_Py_hashtable_compare_direct);
if (get_traces.tracebacks == NULL) {
@@ -1152,7 +1183,7 @@ error:
finally:
if (get_traces.tracebacks != NULL) {
_Py_hashtable_foreach(get_traces.tracebacks,
- tracemalloc_pyobject_decref_cb, NULL);
+ tracemalloc_pyobject_decref_cb, NULL);
_Py_hashtable_destroy(get_traces.tracebacks);
}
if (get_traces.traces != NULL)
diff --git a/Modules/hashtable.c b/Modules/hashtable.c
index 7de154b..d33f0d7 100644
--- a/Modules/hashtable.c
+++ b/Modules/hashtable.c
@@ -1,5 +1,5 @@
-/* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash
- project:
+/* The implementation of the hash table (_Py_hashtable_t) is based on the
+ cfuhash project:
http://sourceforge.net/projects/libcfu/
Copyright of cfuhash:
@@ -59,7 +59,7 @@
#define ENTRY_NEXT(ENTRY) \
((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
#define HASHTABLE_ITEM_SIZE(HT) \
- (sizeof(_Py_hashtable_entry_t) + (HT)->data_size)
+ (sizeof(_Py_hashtable_entry_t) + (HT)->key_size + (HT)->data_size)
/* Forward declaration */
static void hashtable_rehash(_Py_hashtable_t *ht);
@@ -70,6 +70,7 @@ _Py_slist_init(_Py_slist_t *list)
list->head = NULL;
}
+
static void
_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
{
@@ -77,6 +78,7 @@ _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
list->head = item;
}
+
static void
_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
_Py_slist_item_t *item)
@@ -87,24 +89,26 @@ _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
list->head = item->next;
}
-Py_uhash_t
-_Py_hashtable_hash_int(const void *key)
-{
- return (Py_uhash_t)key;
-}
Py_uhash_t
-_Py_hashtable_hash_ptr(const void *key)
+_Py_hashtable_hash_ptr(size_t key_size, const void *pkey)
{
+ void *key;
+
+ _Py_HASHTABLE_READ_KEY(key_size, pkey, key);
return (Py_uhash_t)_Py_HashPointer((void *)key);
}
+
int
-_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry)
+_Py_hashtable_compare_direct(size_t key_size, const void *pkey,
+ const _Py_hashtable_entry_t *entry)
{
- return entry->key == key;
+ const void *pkey2 = _Py_HASHTABLE_ENTRY_KEY(entry);
+ return (memcmp(pkey, pkey2, key_size) == 0);
}
+
/* makes sure the real size of the buckets array is a power of 2 */
static size_t
round_size(size_t s)
@@ -118,8 +122,10 @@ round_size(size_t s)
return i;
}
+
_Py_hashtable_t *
-_Py_hashtable_new_full(size_t data_size, size_t init_size,
+_Py_hashtable_new_full(size_t key_size, size_t data_size,
+ size_t init_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func,
_Py_hashtable_copy_data_func copy_data_func,
@@ -144,6 +150,7 @@ _Py_hashtable_new_full(size_t data_size, size_t init_size,
ht->num_buckets = round_size(init_size);
ht->entries = 0;
+ ht->key_size = key_size;
ht->data_size = data_size;
buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
@@ -163,16 +170,19 @@ _Py_hashtable_new_full(size_t data_size, size_t init_size,
return ht;
}
+
_Py_hashtable_t *
-_Py_hashtable_new(size_t data_size,
+_Py_hashtable_new(size_t key_size, size_t data_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func)
{
- return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE,
+ return _Py_hashtable_new_full(key_size, data_size,
+ HASHTABLE_MIN_SIZE,
hash_func, compare_func,
NULL, NULL, NULL, NULL);
}
+
size_t
_Py_hashtable_size(_Py_hashtable_t *ht)
{
@@ -195,7 +205,7 @@ _Py_hashtable_size(_Py_hashtable_t *ht)
for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
void *data;
- data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
+ data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry);
size += ht->get_data_size_func(data);
}
}
@@ -203,6 +213,7 @@ _Py_hashtable_size(_Py_hashtable_t *ht)
return size;
}
+
#ifdef Py_DEBUG
void
_Py_hashtable_print_stats(_Py_hashtable_t *ht)
@@ -243,38 +254,47 @@ _Py_hashtable_print_stats(_Py_hashtable_t *ht)
}
#endif
-/* Get an entry. Return NULL if the key does not exist. */
+
_Py_hashtable_entry_t *
-_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key)
+_Py_hashtable_get_entry(_Py_hashtable_t *ht,
+ size_t key_size, const void *pkey)
{
Py_uhash_t key_hash;
size_t index;
_Py_hashtable_entry_t *entry;
- key_hash = ht->hash_func(key);
+ assert(key_size == ht->key_size);
+
+ key_hash = ht->hash_func(key_size, pkey);
index = key_hash & (ht->num_buckets - 1);
for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
- if (entry->key_hash == key_hash && ht->compare_func(key, entry))
+ if (entry->key_hash == key_hash
+ && ht->compare_func(key_size, pkey, entry))
break;
}
return entry;
}
+
static int
-_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
+_Py_hashtable_pop_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
+ void *data, size_t data_size)
{
Py_uhash_t key_hash;
size_t index;
_Py_hashtable_entry_t *entry, *previous;
- key_hash = ht->hash_func(key);
+ assert(key_size == ht->key_size);
+
+ key_hash = ht->hash_func(key_size, pkey);
index = key_hash & (ht->num_buckets - 1);
previous = NULL;
for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
- if (entry->key_hash == key_hash && ht->compare_func(key, entry))
+ if (entry->key_hash == key_hash
+ && ht->compare_func(key_size, pkey, entry))
break;
previous = entry;
}
@@ -287,7 +307,7 @@ _hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t da
ht->entries--;
if (data != NULL)
- _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
+ _Py_HASHTABLE_ENTRY_READ_DATA(ht, entry, data_size, data);
ht->alloc.free(entry);
if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
@@ -295,26 +315,27 @@ _hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t da
return 1;
}
-/* Add a new entry to the hash. The key must not be present in the hash table.
- Return 0 on success, -1 on memory error. */
+
int
-_Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
- void *data, size_t data_size)
+_Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
+ size_t data_size, void *data)
{
Py_uhash_t key_hash;
size_t index;
_Py_hashtable_entry_t *entry;
+ assert(key_size == ht->key_size);
+
assert(data != NULL || data_size == 0);
#ifndef NDEBUG
/* Don't write the assertion on a single line because it is interesting
to know the duplicated entry if the assertion failed. The entry can
be read using a debugger. */
- entry = _Py_hashtable_get_entry(ht, key);
+ entry = _Py_hashtable_get_entry(ht, key_size, pkey);
assert(entry == NULL);
#endif
- key_hash = ht->hash_func(key);
+ key_hash = ht->hash_func(key_size, pkey);
index = key_hash & (ht->num_buckets - 1);
entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
@@ -323,11 +344,11 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
return -1;
}
- entry->key = (void *)key;
entry->key_hash = key_hash;
+ memcpy((void *)_Py_HASHTABLE_ENTRY_KEY(entry), pkey, key_size);
assert(data_size == ht->data_size);
- memcpy(_Py_HASHTABLE_ENTRY_DATA(entry), data, data_size);
+ memcpy(_Py_HASHTABLE_ENTRY_DATA(ht, entry), data, data_size);
_Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
ht->entries++;
@@ -337,48 +358,48 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
return 0;
}
-/* Get data from an entry. Copy entry data into data and return 1 if the entry
- exists, return 0 if the entry does not exist. */
+
int
-_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
+_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
+ size_t data_size, void *data)
{
_Py_hashtable_entry_t *entry;
assert(data != NULL);
- entry = _Py_hashtable_get_entry(ht, key);
+ entry = _Py_hashtable_get_entry(ht, key_size, pkey);
if (entry == NULL)
return 0;
- _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
+ _Py_HASHTABLE_ENTRY_READ_DATA(ht, entry, data_size, data);
return 1;
}
+
int
-_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
+_Py_hashtable_pop(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
+ size_t data_size, void *data)
{
assert(data != NULL);
assert(ht->free_data_func == NULL);
- return _hashtable_pop_entry(ht, key, data, data_size);
+ return _Py_hashtable_pop_entry(ht, key_size, pkey, data, data_size);
}
-/* Delete an entry. The entry must exist. */
+
void
-_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key)
+_Py_hashtable_delete(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
{
#ifndef NDEBUG
- int found = _hashtable_pop_entry(ht, key, NULL, 0);
+ int found = _Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
assert(found);
#else
- (void)_hashtable_pop_entry(ht, key, NULL, 0);
+ (void)_Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
#endif
}
-/* Prototype for a pointer to a function to be called foreach
- key/value pair in the hash by hashtable_foreach(). Iteration
- stops if a non-zero value is returned. */
+
int
_Py_hashtable_foreach(_Py_hashtable_t *ht,
- int (*func) (_Py_hashtable_entry_t *entry, void *arg),
+ _Py_hashtable_foreach_func func,
void *arg)
{
_Py_hashtable_entry_t *entry;
@@ -386,7 +407,7 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht,
for (hv = 0; hv < ht->num_buckets; hv++) {
for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
- int res = func(entry, arg);
+ int res = func(ht, entry, arg);
if (res)
return res;
}
@@ -394,9 +415,11 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht,
return 0;
}
+
static void
hashtable_rehash(_Py_hashtable_t *ht)
{
+ const size_t key_size = ht->key_size;
size_t buckets_size, new_size, bucket;
_Py_slist_t *old_buckets = NULL;
size_t old_num_buckets;
@@ -425,7 +448,8 @@ hashtable_rehash(_Py_hashtable_t *ht)
for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
size_t entry_index;
- assert(ht->hash_func(entry->key) == entry->key_hash);
+
+ assert(ht->hash_func(key_size, _Py_HASHTABLE_ENTRY_KEY(entry)) == entry->key_hash);
next = ENTRY_NEXT(entry);
entry_index = entry->key_hash & (new_size - 1);
@@ -436,6 +460,7 @@ hashtable_rehash(_Py_hashtable_t *ht)
ht->alloc.free(old_buckets);
}
+
void
_Py_hashtable_clear(_Py_hashtable_t *ht)
{
@@ -446,7 +471,7 @@ _Py_hashtable_clear(_Py_hashtable_t *ht)
for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
next = ENTRY_NEXT(entry);
if (ht->free_data_func)
- ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
+ ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry));
ht->alloc.free(entry);
}
_Py_slist_init(&ht->buckets[i]);
@@ -455,6 +480,7 @@ _Py_hashtable_clear(_Py_hashtable_t *ht)
hashtable_rehash(ht);
}
+
void
_Py_hashtable_destroy(_Py_hashtable_t *ht)
{
@@ -465,7 +491,7 @@ _Py_hashtable_destroy(_Py_hashtable_t *ht)
while (entry) {
_Py_slist_item_t *entry_next = entry->next;
if (ht->free_data_func)
- ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
+ ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry));
ht->alloc.free(entry);
entry = entry_next;
}
@@ -475,17 +501,20 @@ _Py_hashtable_destroy(_Py_hashtable_t *ht)
ht->alloc.free(ht);
}
-/* Return a copy of the hash table */
+
_Py_hashtable_t *
_Py_hashtable_copy(_Py_hashtable_t *src)
{
+ const size_t key_size = src->key_size;
+ const size_t data_size = src->data_size;
_Py_hashtable_t *dst;
_Py_hashtable_entry_t *entry;
size_t bucket;
int err;
void *data, *new_data;
- dst = _Py_hashtable_new_full(src->data_size, src->num_buckets,
+ dst = _Py_hashtable_new_full(key_size, data_size,
+ src->num_buckets,
src->hash_func, src->compare_func,
src->copy_data_func, src->free_data_func,
src->get_data_size_func, &src->alloc);
@@ -496,17 +525,20 @@ _Py_hashtable_copy(_Py_hashtable_t *src)
entry = TABLE_HEAD(src, bucket);
for (; entry; entry = ENTRY_NEXT(entry)) {
if (src->copy_data_func) {
- data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
+ data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(src, entry);
new_data = src->copy_data_func(data);
if (new_data != NULL)
- err = _Py_hashtable_set(dst, entry->key,
- &new_data, src->data_size);
+ err = _Py_hashtable_set(dst, key_size,
+ _Py_HASHTABLE_ENTRY_KEY(entry),
+ data_size, &new_data);
else
err = 1;
}
else {
- data = _Py_HASHTABLE_ENTRY_DATA(entry);
- err = _Py_hashtable_set(dst, entry->key, data, src->data_size);
+ data = _Py_HASHTABLE_ENTRY_DATA(src, entry);
+ err = _Py_hashtable_set(dst, key_size,
+ _Py_HASHTABLE_ENTRY_KEY(entry),
+ data_size, data);
}
if (err) {
_Py_hashtable_destroy(dst);
@@ -516,4 +548,3 @@ _Py_hashtable_copy(_Py_hashtable_t *src)
}
return dst;
}
-
diff --git a/Modules/hashtable.h b/Modules/hashtable.h
index a9f9993..6eb5737 100644
--- a/Modules/hashtable.h
+++ b/Modules/hashtable.h
@@ -1,9 +1,10 @@
#ifndef Py_HASHTABLE_H
#define Py_HASHTABLE_H
-
/* The whole API is private */
#ifndef Py_LIMITED_API
+/* Single linked list */
+
typedef struct _Py_slist_item_s {
struct _Py_slist_item_s *next;
} _Py_slist_item_t;
@@ -16,30 +17,55 @@ typedef struct {
#define _Py_SLIST_HEAD(SLIST) (((_Py_slist_t *)SLIST)->head)
+
+/* _Py_hashtable: table entry */
+
typedef struct {
/* used by _Py_hashtable_t.buckets to link entries */
_Py_slist_item_t _Py_slist_item;
- const void *key;
Py_uhash_t key_hash;
- /* data follows */
+ /* key (key_size bytes) and then data (data_size bytes) follows */
} _Py_hashtable_entry_t;
-#define _Py_HASHTABLE_ENTRY_DATA(ENTRY) \
- ((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t))
+#define _Py_HASHTABLE_ENTRY_KEY(ENTRY) \
+ ((const void *)((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t)))
+
+#define _Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY) \
+ ((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t) + (TABLE)->key_size)
+
+#define _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(TABLE, ENTRY) \
+ (*(void **)_Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY))
+
+/* Get a key value from pkey: use memcpy() rather than a pointer dereference
+ to avoid memory alignment issues. */
+#define _Py_HASHTABLE_READ_KEY(KEY_SIZE, PKEY, DST_KEY) \
+ do { \
+ assert(sizeof(DST_KEY) == (KEY_SIZE)); \
+ memcpy(&(DST_KEY), (PKEY), sizeof(DST_KEY)); \
+ } while (0)
-#define _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ENTRY) \
- (*(void **)_Py_HASHTABLE_ENTRY_DATA(ENTRY))
+#define _Py_HASHTABLE_ENTRY_READ_KEY(KEY_SIZE, ENTRY, KEY) \
+ do { \
+ assert(sizeof(KEY) == (KEY_SIZE)); \
+ memcpy(&(KEY), _Py_HASHTABLE_ENTRY_KEY(ENTRY), sizeof(KEY)); \
+ } while (0)
-#define _Py_HASHTABLE_ENTRY_READ_DATA(TABLE, DATA, DATA_SIZE, ENTRY) \
+#define _Py_HASHTABLE_ENTRY_READ_DATA(TABLE, ENTRY, DATA_SIZE, DATA) \
do { \
assert((DATA_SIZE) == (TABLE)->data_size); \
- memcpy(DATA, _Py_HASHTABLE_ENTRY_DATA(ENTRY), DATA_SIZE); \
+ memcpy(DATA, _Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY), DATA_SIZE); \
} while (0)
-typedef Py_uhash_t (*_Py_hashtable_hash_func) (const void *key);
-typedef int (*_Py_hashtable_compare_func) (const void *key, const _Py_hashtable_entry_t *he);
+
+/* _Py_hashtable: prototypes */
+
+typedef Py_uhash_t (*_Py_hashtable_hash_func) (size_t key_size,
+ const void *pkey);
+typedef int (*_Py_hashtable_compare_func) (size_t key_size,
+ const void *pkey,
+ const _Py_hashtable_entry_t *he);
typedef void* (*_Py_hashtable_copy_data_func)(void *data);
typedef void (*_Py_hashtable_free_data_func)(void *data);
typedef size_t (*_Py_hashtable_get_data_size_func)(void *data);
@@ -52,10 +78,14 @@ typedef struct {
void (*free) (void *ptr);
} _Py_hashtable_allocator_t;
+
+/* _Py_hashtable: table */
+
typedef struct {
size_t num_buckets;
size_t entries; /* Total number of entries in the table. */
_Py_slist_t *buckets;
+ size_t key_size;
size_t data_size;
_Py_hashtable_hash_func hash_func;
@@ -66,16 +96,25 @@ typedef struct {
_Py_hashtable_allocator_t alloc;
} _Py_hashtable_t;
-/* hash and compare functions for integers and pointers */
-PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(const void *key);
-PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_int(const void *key);
-PyAPI_FUNC(int) _Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry);
+/* hash a pointer (void*) */
+PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(
+ size_t key_size,
+ const void *pkey);
+
+/* comparison using memcmp() */
+PyAPI_FUNC(int) _Py_hashtable_compare_direct(
+ size_t key_size,
+ const void *pkey,
+ const _Py_hashtable_entry_t *entry);
PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new(
+ size_t key_size,
size_t data_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func);
+
PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new_full(
+ size_t key_size,
size_t data_size,
size_t init_size,
_Py_hashtable_hash_func hash_func,
@@ -84,45 +123,95 @@ PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new_full(
_Py_hashtable_free_data_func free_data_func,
_Py_hashtable_get_data_size_func get_data_size_func,
_Py_hashtable_allocator_t *allocator);
+
+PyAPI_FUNC(void) _Py_hashtable_destroy(_Py_hashtable_t *ht);
+
+/* Return a copy of the hash table */
PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_copy(_Py_hashtable_t *src);
+
PyAPI_FUNC(void) _Py_hashtable_clear(_Py_hashtable_t *ht);
-PyAPI_FUNC(void) _Py_hashtable_destroy(_Py_hashtable_t *ht);
-typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_entry_t *entry, void *arg);
+typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_t *ht,
+ _Py_hashtable_entry_t *entry,
+ void *arg);
+/* Call func() on each entry of the hashtable.
+ Iteration stops if func() result is non-zero, in this case it's the result
+ of the call. Otherwise, the function returns 0. */
PyAPI_FUNC(int) _Py_hashtable_foreach(
_Py_hashtable_t *ht,
- _Py_hashtable_foreach_func func, void *arg);
+ _Py_hashtable_foreach_func func,
+ void *arg);
+
PyAPI_FUNC(size_t) _Py_hashtable_size(_Py_hashtable_t *ht);
-PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
- _Py_hashtable_t *ht,
- const void *key);
+/* Add a new entry to the hash. The key must not be present in the hash table.
+ Return 0 on success, -1 on memory error.
+
+ Don't call directly this function,
+ but use _Py_HASHTABLE_SET() and _Py_HASHTABLE_SET_NODATA() macros */
PyAPI_FUNC(int) _Py_hashtable_set(
_Py_hashtable_t *ht,
- const void *key,
- void *data,
- size_t data_size);
+ size_t key_size,
+ const void *pkey,
+ size_t data_size,
+ void *data);
+
+#define _Py_HASHTABLE_SET(TABLE, KEY, DATA) \
+ _Py_hashtable_set(TABLE, sizeof(KEY), &KEY, sizeof(DATA), &(DATA))
+
+#define _Py_HASHTABLE_SET_NODATA(TABLE, KEY) \
+ _Py_hashtable_set(TABLE, sizeof(KEY), &KEY, 0, NULL)
+
+
+/* Get an entry.
+ Return NULL if the key does not exist.
+
+ Don't call directly this function, but use _Py_HASHTABLE_GET_ENTRY()
+ macro */
+PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
+ _Py_hashtable_t *ht,
+ size_t key_size,
+ const void *pkey);
+
+#define _Py_HASHTABLE_GET_ENTRY(TABLE, KEY) \
+ _Py_hashtable_get_entry(TABLE, sizeof(KEY), &(KEY))
+
+
+/* Get data from an entry. Copy entry data into data and return 1 if the entry
+ exists, return 0 if the entry does not exist.
+
+ Don't call directly this function, but use _Py_HASHTABLE_GET() macro */
PyAPI_FUNC(int) _Py_hashtable_get(
_Py_hashtable_t *ht,
- const void *key,
- void *data,
- size_t data_size);
+ size_t key_size,
+ const void *pkey,
+ size_t data_size,
+ void *data);
+
+#define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \
+ _Py_hashtable_get(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA))
+
+
+/* Don't call directly this function, but use _Py_HASHTABLE_POP() macro */
PyAPI_FUNC(int) _Py_hashtable_pop(
_Py_hashtable_t *ht,
- const void *key,
- void *data,
- size_t data_size);
-PyAPI_FUNC(void) _Py_hashtable_delete(
- _Py_hashtable_t *ht,
- const void *key);
+ size_t key_size,
+ const void *pkey,
+ size_t data_size,
+ void *data);
-#define _Py_HASHTABLE_SET(TABLE, KEY, DATA) \
- _Py_hashtable_set(TABLE, KEY, &(DATA), sizeof(DATA))
+#define _Py_HASHTABLE_POP(TABLE, KEY, DATA) \
+ _Py_hashtable_pop(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA))
-#define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \
- _Py_hashtable_get(TABLE, KEY, &(DATA), sizeof(DATA))
-#endif /* Py_LIMITED_API */
+/* Delete an entry.
+
+ WARNING: The entry must exist. */
+PyAPI_FUNC(void) _Py_hashtable_delete(
+ _Py_hashtable_t *ht,
+ size_t key_size,
+ const void *pkey);
+#endif /* Py_LIMITED_API */
#endif
diff --git a/Python/marshal.c b/Python/marshal.c
index 7a4b9d2..83a1885 100644
--- a/Python/marshal.c
+++ b/Python/marshal.c
@@ -263,10 +263,10 @@ w_ref(PyObject *v, char *flag, WFILE *p)
if (Py_REFCNT(v) == 1)
return 0;
- entry = _Py_hashtable_get_entry(p->hashtable, v);
+ entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v);
if (entry != NULL) {
/* write the reference index to the stream */
- _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, &w, sizeof(w), entry);
+ _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, sizeof(w), &w);
/* we don't store "long" indices in the dict */
assert(0 <= w && w <= 0x7fffffff);
w_byte(TYPE_REF, p);
@@ -571,7 +571,8 @@ static int
w_init_refs(WFILE *wf, int version)
{
if (version >= 3) {
- wf->hashtable = _Py_hashtable_new(sizeof(int), _Py_hashtable_hash_ptr,
+ wf->hashtable = _Py_hashtable_new(sizeof(PyObject *), sizeof(int),
+ _Py_hashtable_hash_ptr,
_Py_hashtable_compare_direct);
if (wf->hashtable == NULL) {
PyErr_NoMemory();
@@ -582,9 +583,13 @@ w_init_refs(WFILE *wf, int version)
}
static int
-w_decref_entry(_Py_hashtable_entry_t *entry, void *Py_UNUSED(data))
+w_decref_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
+ void *Py_UNUSED(data))
{
- Py_XDECREF(entry->key);
+ PyObject *entry_key;
+
+ _Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, entry_key);
+ Py_XDECREF(entry_key);
return 0;
}