diff options
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/Setup.dist | 17 | ||||
-rw-r--r-- | Modules/_tracemalloc.c | 1407 | ||||
-rw-r--r-- | Modules/hashtable.c | 518 | ||||
-rw-r--r-- | Modules/hashtable.h | 128 |
4 files changed, 2063 insertions, 7 deletions
diff --git a/Modules/Setup.dist b/Modules/Setup.dist index ebf8172..01fb85f 100644 --- a/Modules/Setup.dist +++ b/Modules/Setup.dist @@ -102,7 +102,7 @@ PYTHONPATH=$(COREPYTHONPATH) # various reasons; therefore they are listed here instead of in the # normal order. -# This only contains the minimal set of modules required to run the +# This only contains the minimal set of modules required to run the # setup.py script in the root of the Python source tree. posix posixmodule.c # posix (UNIX) system calls @@ -115,7 +115,7 @@ _weakref _weakref.c # weak references _functools _functoolsmodule.c # Tools for working with functions and callable objects _operator _operator.c # operator.add() and similar goodies _collections _collectionsmodule.c # Container types -itertools itertoolsmodule.c # Functions creating iterators for efficient looping +itertools itertoolsmodule.c # Functions creating iterators for efficient looping atexit atexitmodule.c # Register functions to be run at interpreter-shutdown _stat _stat.c # stat.h interface @@ -132,12 +132,15 @@ zipimport zipimport.c # faulthandler module faulthandler faulthandler.c +# debug tool to trace memory blocks allocated by Python +_tracemalloc _tracemalloc.c hashtable.c + # The rest of the modules listed in this file are all commented out by # default. Usually they can be detected and built as dynamically # loaded modules by the new setup.py script added in Python 2.1. If -# you're on a platform that doesn't support dynamic loading, want to -# compile modules statically into the Python binary, or need to -# specify some odd set of compiler switches, you can uncomment the +# you're on a platform that doesn't support dynamic loading, want to +# compile modules statically into the Python binary, or need to +# specify some odd set of compiler switches, you can uncomment the # appropriate lines below. # ====================================================================== @@ -186,7 +189,7 @@ _symtable symtablemodule.c # supported...) #fcntl fcntlmodule.c # fcntl(2) and ioctl(2) -#spwd spwdmodule.c # spwd(3) +#spwd spwdmodule.c # spwd(3) #grp grpmodule.c # grp(3) #select selectmodule.c # select(2); not on ancient System V @@ -302,7 +305,7 @@ _symtable symtablemodule.c #_curses _cursesmodule.c -lcurses -ltermcap # Wrapper for the panel library that's part of ncurses and SYSV curses. -#_curses_panel _curses_panel.c -lpanel -lncurses +#_curses_panel _curses_panel.c -lpanel -lncurses # Modules that provide persistent dictionary-like semantics. You will diff --git a/Modules/_tracemalloc.c b/Modules/_tracemalloc.c new file mode 100644 index 0000000..15ed734 --- /dev/null +++ b/Modules/_tracemalloc.c @@ -0,0 +1,1407 @@ +#include "Python.h" +#include "hashtable.h" +#include "frameobject.h" +#include "pythread.h" +#include "osdefs.h" + +/* Trace memory blocks allocated by PyMem_RawMalloc() */ +#define TRACE_RAW_MALLOC + +/* Forward declaration */ +static void tracemalloc_stop(void); +static int tracemalloc_atexit_register(void); +static void* raw_malloc(size_t size); +static void raw_free(void *ptr); + +#ifdef Py_DEBUG +# define TRACE_DEBUG +#endif + +#define _STR(VAL) #VAL +#define STR(VAL) _STR(VAL) + +/* Protected by the GIL */ +static struct { + PyMemAllocator mem; + PyMemAllocator raw; + PyMemAllocator obj; +} allocators; + +/* Arbitrary limit of the number of frames in a traceback. The value was chosen + to not allocate too much memory on the stack (see TRACEBACK_STACK_SIZE + below). */ +#define MAX_NFRAME 100 + +static struct { + /* Module initialized? + Variable protected by the GIL */ + enum { + TRACEMALLOC_NOT_INITIALIZED, + TRACEMALLOC_INITIALIZED, + TRACEMALLOC_FINALIZED + } initialized; + + /* atexit handler registered? */ + int atexit_registered; + + /* Is tracemalloc tracing memory allocations? + Variable protected by the GIL */ + int tracing; + + /* limit of the number of frames in a traceback, 1 by default. + Variable protected by the GIL. */ + int max_nframe; +} tracemalloc_config = {TRACEMALLOC_NOT_INITIALIZED, 0, 0, 1}; + +#if defined(TRACE_RAW_MALLOC) && defined(WITH_THREAD) +/* This lock is needed because tracemalloc_free() is called without + the GIL held from PyMem_RawFree(). It cannot acquire the lock because it + would introduce a deadlock in PyThreadState_DeleteCurrent(). */ +static PyThread_type_lock tables_lock; +# define TABLES_LOCK() PyThread_acquire_lock(tables_lock, 1) +# define TABLES_UNLOCK() PyThread_release_lock(tables_lock) +#else + /* variables are protected by the GIL */ +# define TABLES_LOCK() +# define TABLES_UNLOCK() +#endif + +/* Pack the frame_t structure to reduce the memory footprint on 64-bit + architectures: 12 bytes instead of 16. This optimization might produce + SIGBUS on architectures not supporting unaligned memory accesses (64-bit + IPS CPU?): on such architecture, the structure must not be packed. */ +#pragma pack(4) +typedef struct +#ifdef __GNUC__ +__attribute__((packed)) +#endif +{ + PyObject *filename; + int lineno; +} frame_t; + +typedef struct { + Py_uhash_t hash; + int nframe; + frame_t frames[1]; +} traceback_t; + +#define TRACEBACK_SIZE(NFRAME) \ + (sizeof(traceback_t) + sizeof(frame_t) * (NFRAME - 1)) +#define TRACEBACK_STACK_SIZE TRACEBACK_SIZE(MAX_NFRAME) + +static PyObject *unknown_filename = NULL; +static traceback_t tracemalloc_empty_traceback; + +typedef struct { + size_t size; + traceback_t *traceback; +} trace_t; + +/* Size in bytes of currently traced memory. + Protected by TABLES_LOCK(). */ +static size_t tracemalloc_traced_memory = 0; + +/* Maximum size in bytes of traced memory. + Protected by TABLES_LOCK(). */ +static size_t tracemalloc_max_traced_memory = 0; + +/* Hash table used as a set to to intern filenames: + PyObject* => PyObject*. + Protected by the GIL */ +static _Py_hashtable_t *tracemalloc_filenames = NULL; + +/* Hash table used as a set to intern tracebacks: + traceback_t* => traceback_t* + Protected by the GIL */ +static _Py_hashtable_t *tracemalloc_tracebacks = NULL; + +/* pointer (void*) => trace (trace_t). + Protected by TABLES_LOCK(). */ +static _Py_hashtable_t *tracemalloc_traces = NULL; + +#ifdef TRACE_DEBUG +static void +tracemalloc_error(const char *format, ...) +{ + va_list ap; + fprintf(stderr, "tracemalloc: "); + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + fprintf(stderr, "\n"); + fflush(stderr); +} +#endif + +#if defined(WITH_THREAD) && defined(TRACE_RAW_MALLOC) +#define REENTRANT_THREADLOCAL + +/* If your OS does not provide native thread local storage, you can implement + it manually using a lock. Functions of thread.c cannot be used because + they use PyMem_RawMalloc() which leads to a reentrant call. */ +#if !(defined(_POSIX_THREADS) || defined(NT_THREADS)) +# error "need native thread local storage (TLS)" +#endif + +static int tracemalloc_reentrant_key; + +/* Any non-NULL pointer can be used */ +#define REENTRANT Py_True + +static int +get_reentrant(void) +{ + void *ptr = PyThread_get_key_value(tracemalloc_reentrant_key); + if (ptr != NULL) { + assert(ptr == REENTRANT); + return 1; + } + else + return 0; +} + +static void +set_reentrant(int reentrant) +{ + if (reentrant) { + assert(PyThread_get_key_value(tracemalloc_reentrant_key) == NULL); + PyThread_set_key_value(tracemalloc_reentrant_key, + REENTRANT); + } + else { + /* FIXME: PyThread_set_key_value() cannot be used to set the flag + to zero, because it does nothing if the variable has already + a value set. */ + PyThread_delete_key_value(tracemalloc_reentrant_key); + } +} + +#else + +/* WITH_THREAD not defined: Python compiled without threads, + or TRACE_RAW_MALLOC not defined: variable protected by the GIL */ +static int tracemalloc_reentrant = 0; + +static int +get_reentrant(void) +{ + return tracemalloc_reentrant; +} + +static void +set_reentrant(int reentrant) +{ + assert(!reentrant || !get_reentrant()); + tracemalloc_reentrant = reentrant; +} +#endif + +static int +hashtable_compare_unicode(const void *key, const _Py_hashtable_entry_t *entry) +{ + if (key != NULL && entry->key != NULL) + return (PyUnicode_Compare((PyObject *)key, (PyObject *)entry->key) == 0); + else + return key == entry->key; +} + +static _Py_hashtable_allocator_t hashtable_alloc = {malloc, free}; + +static _Py_hashtable_t * +hashtable_new(size_t data_size, + _Py_hashtable_hash_func hash_func, + _Py_hashtable_compare_func compare_func) +{ + return _Py_hashtable_new_full(data_size, 0, + hash_func, compare_func, + NULL, NULL, NULL, &hashtable_alloc); +} + +static void* +raw_malloc(size_t size) +{ + return allocators.raw.malloc(allocators.raw.ctx, size); +} + +static void +raw_free(void *ptr) +{ + allocators.raw.free(allocators.raw.ctx, ptr); +} + +static Py_uhash_t +hashtable_hash_traceback(const void *key) +{ + const traceback_t *traceback = key; + return traceback->hash; +} + +static int +hashtable_compare_traceback(const traceback_t *traceback1, + const _Py_hashtable_entry_t *he) +{ + const traceback_t *traceback2 = he->key; + const frame_t *frame1, *frame2; + int i; + + if (traceback1->nframe != traceback2->nframe) + return 0; + + for (i=0; i < traceback1->nframe; i++) { + frame1 = &traceback1->frames[i]; + frame2 = &traceback2->frames[i]; + + if (frame1->lineno != frame2->lineno) + return 0; + + if (frame1->filename != frame2->filename) { + assert(PyUnicode_Compare(frame1->filename, frame2->filename) != 0); + return 0; + } + } + return 1; +} + +static void +tracemalloc_get_frame(PyFrameObject *pyframe, frame_t *frame) +{ + PyCodeObject *code; + PyObject *filename; + _Py_hashtable_entry_t *entry; + + frame->filename = unknown_filename; + frame->lineno = PyFrame_GetLineNumber(pyframe); + assert(frame->lineno >= 0); + if (frame->lineno < 0) + frame->lineno = 0; + + code = pyframe->f_code; + if (code == NULL) { +#ifdef TRACE_DEBUG + tracemalloc_error("failed to get the code object of the a frame"); +#endif + return; + } + + if (code->co_filename == NULL) { +#ifdef TRACE_DEBUG + tracemalloc_error("failed to get the filename of the code object"); +#endif + return; + } + + filename = code->co_filename; + assert(filename != NULL); + if (filename == NULL) + return; + + if (!PyUnicode_Check(filename)) { +#ifdef TRACE_DEBUG + tracemalloc_error("filename is not an unicode string"); +#endif + return; + } + if (!PyUnicode_IS_READY(filename)) { + /* Don't make a Unicode string ready to avoid reentrant calls + to tracemalloc_malloc() or tracemalloc_realloc() */ +#ifdef TRACE_DEBUG + tracemalloc_error("filename is not a ready unicode string"); +#endif + return; + } + + /* intern the filename */ + entry = _Py_hashtable_get_entry(tracemalloc_filenames, filename); + if (entry != NULL) { + filename = (PyObject *)entry->key; + } + else { + /* tracemalloc_filenames is responsible to keep a reference + to the filename */ + Py_INCREF(filename); + if (_Py_hashtable_set(tracemalloc_filenames, filename, NULL, 0) < 0) { + Py_DECREF(filename); +#ifdef TRACE_DEBUG + tracemalloc_error("failed to intern the filename"); +#endif + return; + } + } + + /* the tracemalloc_filenames table keeps a reference to the filename */ + frame->filename = filename; +} + +static Py_uhash_t +traceback_hash(traceback_t *traceback) +{ + /* code based on tuplehash() of Objects/tupleobject.c */ + Py_uhash_t x; /* Unsigned for defined overflow behavior. */ + Py_hash_t y; + int len = traceback->nframe; + Py_uhash_t mult = _PyHASH_MULTIPLIER; + frame_t *frame; + + x = 0x345678UL; + frame = traceback->frames; + while (--len >= 0) { + y = PyObject_Hash(frame->filename); + y ^= frame->lineno; + frame++; + + x = (x ^ y) * mult; + /* the cast might truncate len; that doesn't change hash stability */ + mult += (Py_hash_t)(82520UL + len + len); + } + x += 97531UL; + return x; +} + +static void +traceback_get_frames(traceback_t *traceback) +{ + PyThreadState *tstate; + PyFrameObject *pyframe; + +#ifdef WITH_THREAD + tstate = PyGILState_GetThisThreadState(); +#else + tstate = PyThreadState_Get(); +#endif + if (tstate == NULL) { +#ifdef TRACE_DEBUG + tracemalloc_error("failed to get the current thread state"); +#endif + return; + } + + for (pyframe = tstate->frame; pyframe != NULL; pyframe = pyframe->f_back) { + tracemalloc_get_frame(pyframe, &traceback->frames[traceback->nframe]); + assert(traceback->frames[traceback->nframe].filename != NULL); + assert(traceback->frames[traceback->nframe].lineno >= 0); + traceback->nframe++; + if (traceback->nframe == tracemalloc_config.max_nframe) + break; + } +} + +static traceback_t * +traceback_new(void) +{ + char stack_buffer[TRACEBACK_STACK_SIZE]; + traceback_t *traceback = (traceback_t *)stack_buffer; + _Py_hashtable_entry_t *entry; + +#ifdef WITH_THREAD + assert(PyGILState_Check()); +#endif + + /* get frames */ + traceback->nframe = 0; + traceback_get_frames(traceback); + if (traceback->nframe == 0) + return &tracemalloc_empty_traceback; + traceback->hash = traceback_hash(traceback); + + /* intern the traceback */ + entry = _Py_hashtable_get_entry(tracemalloc_tracebacks, traceback); + if (entry != NULL) { + traceback = (traceback_t *)entry->key; + } + else { + traceback_t *copy; + size_t traceback_size; + + traceback_size = TRACEBACK_SIZE(traceback->nframe); + + copy = raw_malloc(traceback_size); + if (copy == NULL) { +#ifdef TRACE_DEBUG + tracemalloc_error("failed to intern the traceback: malloc failed"); +#endif + return NULL; + } + memcpy(copy, traceback, traceback_size); + + if (_Py_hashtable_set(tracemalloc_tracebacks, copy, NULL, 0) < 0) { + raw_free(copy); +#ifdef TRACE_DEBUG + tracemalloc_error("failed to intern the traceback: putdata failed"); +#endif + return NULL; + } + traceback = copy; + } + return traceback; +} + +static void +tracemalloc_log_alloc(void *ptr, size_t size) +{ + traceback_t *traceback; + trace_t trace; + +#ifdef WITH_THREAD + assert(PyGILState_Check()); +#endif + + traceback = traceback_new(); + if (traceback == NULL) + return; + + trace.size = size; + trace.traceback = traceback; + + TABLES_LOCK(); + assert(tracemalloc_traced_memory <= PY_SIZE_MAX - size); + tracemalloc_traced_memory += size; + if (tracemalloc_traced_memory > tracemalloc_max_traced_memory) + tracemalloc_max_traced_memory = tracemalloc_traced_memory; + + _Py_HASHTABLE_SET(tracemalloc_traces, ptr, trace); + TABLES_UNLOCK(); +} + +static void +tracemalloc_log_free(void *ptr) +{ + trace_t trace; + + TABLES_LOCK(); + if (_Py_hashtable_pop(tracemalloc_traces, ptr, &trace, sizeof(trace))) { + assert(tracemalloc_traced_memory >= trace.size); + tracemalloc_traced_memory -= trace.size; + } + TABLES_UNLOCK(); +} + +static void* +tracemalloc_malloc(void *ctx, size_t size, int gil_held) +{ + PyMemAllocator *alloc = (PyMemAllocator *)ctx; +#if defined(TRACE_RAW_MALLOC) && defined(WITH_THREAD) + PyGILState_STATE gil_state; +#endif + void *ptr; + + if (get_reentrant()) { + return alloc->malloc(alloc->ctx, size); + } + + /* Ignore reentrant call. PyObjet_Malloc() calls PyMem_Malloc() + for allocations larger than 512 bytes. PyGILState_Ensure() may call + PyMem_RawMalloc() indirectly which would call PyGILState_Ensure() if + reentrant are not disabled. */ + set_reentrant(1); +#ifdef WITH_THREAD +#ifdef TRACE_RAW_MALLOC + if (!gil_held) + gil_state = PyGILState_Ensure(); +#else + assert(gil_held); +#endif +#endif + ptr = alloc->malloc(alloc->ctx, size); + set_reentrant(0); + + if (ptr != NULL) + tracemalloc_log_alloc(ptr, size); + +#if defined(TRACE_RAW_MALLOC) && defined(WITH_THREAD) + if (!gil_held) + PyGILState_Release(gil_state); +#endif + + return ptr; +} + +static void* +tracemalloc_realloc(void *ctx, void *ptr, size_t new_size, int gil_held) +{ + PyMemAllocator *alloc = (PyMemAllocator *)ctx; +#if defined(TRACE_RAW_MALLOC) && defined(WITH_THREAD) + PyGILState_STATE gil_state; +#endif + void *ptr2; + + if (get_reentrant()) { + /* Reentrant call to PyMem_Realloc() and PyMem_RawRealloc(). + Example: PyMem_RawRealloc() is called internally by pymalloc + (_PyObject_Malloc() and _PyObject_Realloc()) to allocate a new + arena (new_arena()). */ + ptr2 = alloc->realloc(alloc->ctx, ptr, new_size); + + if (ptr2 != NULL && ptr != NULL) + tracemalloc_log_free(ptr); + + return ptr2; + } + + /* Ignore reentrant call. PyObjet_Realloc() calls PyMem_Realloc() for + allocations larger than 512 bytes. PyGILState_Ensure() may call + PyMem_RawMalloc() indirectly which would call PyGILState_Ensure() if + reentrant are not disabled. */ + set_reentrant(1); +#ifdef WITH_THREAD +#ifdef TRACE_RAW_MALLOC + if (!gil_held) + gil_state = PyGILState_Ensure(); +#else + assert(gil_held); +#endif +#endif + ptr2 = alloc->realloc(alloc->ctx, ptr, new_size); + set_reentrant(0); + + if (ptr2 != NULL) { + if (ptr != NULL) + tracemalloc_log_free(ptr); + + tracemalloc_log_alloc(ptr2, new_size); + } + +#if defined(TRACE_RAW_MALLOC) && defined(WITH_THREAD) + if (!gil_held) + PyGILState_Release(gil_state); +#endif + + return ptr2; +} + +static void +tracemalloc_free(void *ctx, void *ptr) +{ + PyMemAllocator *alloc = (PyMemAllocator *)ctx; + + if (ptr == NULL) + return; + + /* GIL cannot be locked in PyMem_RawFree() because it would introduce + a deadlock in PyThreadState_DeleteCurrent(). */ + + alloc->free(alloc->ctx, ptr); + tracemalloc_log_free(ptr); +} + +static void* +tracemalloc_malloc_gil(void *ctx, size_t size) +{ + return tracemalloc_malloc(ctx, size, 1); +} + +static void* +tracemalloc_realloc_gil(void *ctx, void *ptr, size_t new_size) +{ + return tracemalloc_realloc(ctx, ptr, new_size, 1); +} + +#ifdef TRACE_RAW_MALLOC +static void* +tracemalloc_raw_malloc(void *ctx, size_t size) +{ + return tracemalloc_malloc(ctx, size, 0); +} + +static void* +tracemalloc_raw_realloc(void *ctx, void *ptr, size_t new_size) +{ + return tracemalloc_realloc(ctx, ptr, new_size, 0); +} +#endif + +static int +tracemalloc_clear_filename(_Py_hashtable_entry_t *entry, void *user_data) +{ + PyObject *filename = (PyObject *)entry->key; + Py_DECREF(filename); + return 0; +} + +static int +traceback_free_traceback(_Py_hashtable_entry_t *entry, void *user_data) +{ + traceback_t *traceback = (traceback_t *)entry->key; + raw_free(traceback); + return 0; +} + +/* reentrant flag must be set to call this function and GIL must be held */ +static void +tracemalloc_clear_traces(void) +{ +#ifdef WITH_THREAD + /* The GIL protects variables againt concurrent access */ + assert(PyGILState_Check()); +#endif + + /* Disable also reentrant calls to tracemalloc_malloc() to not add a new + trace while we are clearing traces */ + assert(get_reentrant()); + + TABLES_LOCK(); + _Py_hashtable_clear(tracemalloc_traces); + tracemalloc_traced_memory = 0; + tracemalloc_max_traced_memory = 0; + TABLES_UNLOCK(); + + _Py_hashtable_foreach(tracemalloc_tracebacks, traceback_free_traceback, NULL); + _Py_hashtable_clear(tracemalloc_tracebacks); + + _Py_hashtable_foreach(tracemalloc_filenames, tracemalloc_clear_filename, NULL); + _Py_hashtable_clear(tracemalloc_filenames); +} + +static int +tracemalloc_init(void) +{ + if (tracemalloc_config.initialized == TRACEMALLOC_FINALIZED) { + PyErr_SetString(PyExc_RuntimeError, + "the tracemalloc module has been unloaded"); + return -1; + } + + if (tracemalloc_config.initialized == TRACEMALLOC_INITIALIZED) + return 0; + + PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &allocators.raw); + +#ifdef REENTRANT_THREADLOCAL + tracemalloc_reentrant_key = PyThread_create_key(); + if (tracemalloc_reentrant_key == -1) { +#ifdef MS_WINDOWS + PyErr_SetFromWindowsErr(0); +#else + PyErr_SetFromErrno(PyExc_OSError); +#endif + return -1; + } +#endif + +#if defined(WITH_THREAD) && defined(TRACE_RAW_MALLOC) + if (tables_lock == NULL) { + tables_lock = PyThread_allocate_lock(); + if (tables_lock == NULL) { + PyErr_SetString(PyExc_RuntimeError, "cannot allocate lock"); + return -1; + } + } +#endif + + tracemalloc_filenames = hashtable_new(0, + (_Py_hashtable_hash_func)PyObject_Hash, + hashtable_compare_unicode); + + tracemalloc_tracebacks = hashtable_new(0, + (_Py_hashtable_hash_func)hashtable_hash_traceback, + (_Py_hashtable_compare_func)hashtable_compare_traceback); + + tracemalloc_traces = hashtable_new(sizeof(trace_t), + _Py_hashtable_hash_ptr, + _Py_hashtable_compare_direct); + + if (tracemalloc_filenames == NULL || tracemalloc_tracebacks == NULL + || tracemalloc_traces == NULL) + { + PyErr_NoMemory(); + return -1; + } + + unknown_filename = PyUnicode_FromString("<unknown>"); + if (unknown_filename == NULL) + return -1; + PyUnicode_InternInPlace(&unknown_filename); + + tracemalloc_empty_traceback.nframe = 1; + /* borrowed reference */ + tracemalloc_empty_traceback.frames[0].filename = unknown_filename; + tracemalloc_empty_traceback.frames[0].lineno = 0; + tracemalloc_empty_traceback.hash = traceback_hash(&tracemalloc_empty_traceback); + + /* Disable tracing allocations until hooks are installed. Set + also the reentrant flag to detect bugs: fail with an assertion error + if set_reentrant(1) is called while tracing is disabled. */ + set_reentrant(1); + + tracemalloc_config.initialized = TRACEMALLOC_INITIALIZED; + return 0; +} + +static void +tracemalloc_deinit(void) +{ + if (tracemalloc_config.initialized != TRACEMALLOC_INITIALIZED) + return; + tracemalloc_config.initialized = TRACEMALLOC_FINALIZED; + + tracemalloc_stop(); + + /* destroy hash tables */ + _Py_hashtable_destroy(tracemalloc_traces); + _Py_hashtable_destroy(tracemalloc_tracebacks); + _Py_hashtable_destroy(tracemalloc_filenames); + +#if defined(WITH_THREAD) && defined(TRACE_RAW_MALLOC) + if (tables_lock != NULL) { + PyThread_free_lock(tables_lock); + tables_lock = NULL; + } +#endif + +#ifdef REENTRANT_THREADLOCAL + PyThread_delete_key(tracemalloc_reentrant_key); +#endif + + Py_XDECREF(unknown_filename); +} + +static int +tracemalloc_start(void) +{ + PyMemAllocator alloc; + + if (tracemalloc_init() < 0) + return -1; + + if (tracemalloc_config.tracing) { + /* hook already installed: do nothing */ + return 0; + } + + if (tracemalloc_atexit_register() < 0) + return -1; + +#ifdef TRACE_RAW_MALLOC + alloc.malloc = tracemalloc_raw_malloc; + alloc.realloc = tracemalloc_raw_realloc; + alloc.free = tracemalloc_free; + + alloc.ctx = &allocators.raw; + PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &allocators.raw); + PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc); +#endif + + alloc.malloc = tracemalloc_malloc_gil; + alloc.realloc = tracemalloc_realloc_gil; + alloc.free = tracemalloc_free; + + alloc.ctx = &allocators.mem; + PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &allocators.mem); + PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc); + + alloc.ctx = &allocators.obj; + PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &allocators.obj); + PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc); + + /* everything is ready: start tracing Python memory allocations */ + tracemalloc_config.tracing = 1; + set_reentrant(0); + + return 0; +} + +static void +tracemalloc_stop(void) +{ + if (!tracemalloc_config.tracing) + return; + + /* stop tracing Python memory allocations */ + tracemalloc_config.tracing = 0; + + /* set the reentrant flag to detect bugs: fail with an assertion error if + set_reentrant(1) is called while tracing is disabled. */ + set_reentrant(1); + + /* unregister the hook on memory allocators */ +#ifdef TRACE_RAW_MALLOC + PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &allocators.raw); +#endif + PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &allocators.mem); + PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &allocators.obj); + + /* release memory */ + tracemalloc_clear_traces(); +} + + +static PyObject* +lineno_as_obj(int lineno) +{ + if (lineno >= 0) + return PyLong_FromLong(lineno); + else + Py_RETURN_NONE; +} + +PyDoc_STRVAR(tracemalloc_is_tracing_doc, + "is_tracing()->bool\n" + "\n" + "True if the tracemalloc module is tracing Python memory allocations,\n" + "False otherwise."); + +static PyObject* +py_tracemalloc_is_tracing(PyObject *self) +{ + return PyBool_FromLong(tracemalloc_config.tracing); +} + +PyDoc_STRVAR(tracemalloc_clear_traces_doc, + "clear_traces()\n" + "\n" + "Clear traces of memory blocks allocated by Python."); + +static PyObject* +py_tracemalloc_clear_traces(PyObject *self) +{ + if (!tracemalloc_config.tracing) + Py_RETURN_NONE; + + set_reentrant(1); + tracemalloc_clear_traces(); + set_reentrant(0); + + Py_RETURN_NONE; +} + +static PyObject* +frame_to_pyobject(frame_t *frame) +{ + PyObject *frame_obj, *lineno_obj; + + frame_obj = PyTuple_New(2); + if (frame_obj == NULL) + return NULL; + + if (frame->filename == NULL) + frame->filename = Py_None; + Py_INCREF(frame->filename); + PyTuple_SET_ITEM(frame_obj, 0, frame->filename); + + assert(frame->lineno >= 0); + lineno_obj = lineno_as_obj(frame->lineno); + if (lineno_obj == NULL) { + Py_DECREF(frame_obj); + return NULL; + } + PyTuple_SET_ITEM(frame_obj, 1, lineno_obj); + + return frame_obj; +} + +static PyObject* +traceback_to_pyobject(traceback_t *traceback, _Py_hashtable_t *intern_table) +{ + int i; + PyObject *frames, *frame; + + if (intern_table != NULL) { + if (_Py_HASHTABLE_GET(intern_table, traceback, frames)) { + Py_INCREF(frames); + return frames; + } + } + + frames = PyTuple_New(traceback->nframe); + if (frames == NULL) + return NULL; + + for (i=0; i < traceback->nframe; i++) { + frame = frame_to_pyobject(&traceback->frames[i]); + if (frame == NULL) { + Py_DECREF(frames); + return NULL; + } + PyTuple_SET_ITEM(frames, i, frame); + } + + if (intern_table != NULL) { + if (_Py_HASHTABLE_SET(intern_table, traceback, frames) < 0) { + Py_DECREF(frames); + PyErr_NoMemory(); + return NULL; + } + /* intern_table keeps a new reference to frames */ + Py_INCREF(frames); + } + return frames; +} + +static PyObject* +trace_to_pyobject(trace_t *trace, _Py_hashtable_t *intern_tracebacks) +{ + PyObject *trace_obj = NULL; + PyObject *size, *traceback; + + trace_obj = PyTuple_New(2); + if (trace_obj == NULL) + return NULL; + + size = PyLong_FromSize_t(trace->size); + if (size == NULL) { + Py_DECREF(trace_obj); + return NULL; + } + PyTuple_SET_ITEM(trace_obj, 0, size); + + traceback = traceback_to_pyobject(trace->traceback, intern_tracebacks); + if (traceback == NULL) { + Py_DECREF(trace_obj); + return NULL; + } + PyTuple_SET_ITEM(trace_obj, 1, traceback); + + return trace_obj; +} + +typedef struct { + _Py_hashtable_t *traces; + _Py_hashtable_t *tracebacks; + PyObject *list; +} get_traces_t; + +static int +tracemalloc_get_traces_fill(_Py_hashtable_entry_t *entry, void *user_data) +{ + get_traces_t *get_traces = user_data; + trace_t *trace; + PyObject *tracemalloc_obj; + int res; + + trace = (trace_t *)_PY_HASHTABLE_ENTRY_DATA(entry); + + tracemalloc_obj = trace_to_pyobject(trace, get_traces->tracebacks); + if (tracemalloc_obj == NULL) + return 1; + + res = PyList_Append(get_traces->list, tracemalloc_obj); + Py_DECREF(tracemalloc_obj); + if (res < 0) + return 1; + + return 0; +} + +static int +tracemalloc_pyobject_decref_cb(_Py_hashtable_entry_t *entry, void *user_data) +{ + PyObject *obj = (PyObject *)_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + Py_DECREF(obj); + return 0; +} + +PyDoc_STRVAR(tracemalloc_get_traces_doc, + "get_traces() -> list\n" + "\n" + "Get traces of all memory blocks allocated by Python.\n" + "Return a list of (size: int, traceback: tuple) tuples.\n" + "traceback is a tuple of (filename: str, lineno: int) tuples.\n" + "\n" + "Return an empty list if the tracemalloc module is disabled."); + +static PyObject* +py_tracemalloc_get_traces(PyObject *self, PyObject *obj) +{ + get_traces_t get_traces; + int err; + + get_traces.traces = NULL; + get_traces.tracebacks = NULL; + get_traces.list = PyList_New(0); + if (get_traces.list == NULL) + goto error; + + if (!tracemalloc_config.tracing) + return get_traces.list; + + get_traces.tracebacks = hashtable_new(sizeof(PyObject *), + _Py_hashtable_hash_ptr, + _Py_hashtable_compare_direct); + if (get_traces.tracebacks == NULL) { + PyErr_NoMemory(); + goto error; + } + + TABLES_LOCK(); + get_traces.traces = _Py_hashtable_copy(tracemalloc_traces); + TABLES_UNLOCK(); + + if (get_traces.traces == NULL) { + PyErr_NoMemory(); + goto error; + } + + set_reentrant(1); + err = _Py_hashtable_foreach(get_traces.traces, + tracemalloc_get_traces_fill, &get_traces); + set_reentrant(0); + if (err) + goto error; + + goto finally; + +error: + Py_CLEAR(get_traces.list); + +finally: + if (get_traces.tracebacks != NULL) { + _Py_hashtable_foreach(get_traces.tracebacks, + tracemalloc_pyobject_decref_cb, NULL); + _Py_hashtable_destroy(get_traces.tracebacks); + } + if (get_traces.traces != NULL) + _Py_hashtable_destroy(get_traces.traces); + + return get_traces.list; +} + +PyDoc_STRVAR(tracemalloc_get_object_traceback_doc, + "get_object_traceback(obj)\n" + "\n" + "Get the traceback where the Python object obj was allocated.\n" + "Return a tuple of (filename: str, lineno: int) tuples.\n" + "\n" + "Return None if the tracemalloc module is disabled or did not\n" + "trace the allocation of the object."); + +static PyObject* +py_tracemalloc_get_object_traceback(PyObject *self, PyObject *obj) +{ + PyTypeObject *type; + void *ptr; + trace_t trace; + int found; + + if (!tracemalloc_config.tracing) + Py_RETURN_NONE; + + type = Py_TYPE(obj); + if (PyType_IS_GC(type)) + ptr = (void *)((char *)obj - sizeof(PyGC_Head)); + else + ptr = (void *)obj; + + TABLES_LOCK(); + found = _Py_HASHTABLE_GET(tracemalloc_traces, ptr, trace); + TABLES_UNLOCK(); + + if (!found) + Py_RETURN_NONE; + + return traceback_to_pyobject(trace.traceback, NULL); +} + +static PyObject* +tracemalloc_atexit(PyObject *self) +{ +#ifdef WITH_THREAD + assert(PyGILState_Check()); +#endif + tracemalloc_deinit(); + Py_RETURN_NONE; +} + +static PyMethodDef atexit_method = { + "_atexit", (PyCFunction)tracemalloc_atexit, METH_NOARGS, NULL}; + +static int +tracemalloc_atexit_register(void) +{ + PyObject *method = NULL, *atexit = NULL, *func = NULL; + PyObject *result; + int ret = -1; + + if (tracemalloc_config.atexit_registered) + return 0; + tracemalloc_config.atexit_registered = 1; + + /* private functions */ + method = PyCFunction_New(&atexit_method, NULL); + if (method == NULL) + goto done; + + atexit = PyImport_ImportModule("atexit"); + if (atexit == NULL) { + if (!PyErr_Warn(PyExc_ImportWarning, + "atexit module is missing: " + "cannot automatically disable tracemalloc at exit")) + { + PyErr_Clear(); + return 0; + } + goto done; + } + + func = PyObject_GetAttrString(atexit, "register"); + if (func == NULL) + goto done; + + result = PyObject_CallFunction(func, "O", method); + if (result == NULL) + goto done; + Py_DECREF(result); + + ret = 0; + +done: + Py_XDECREF(method); + Py_XDECREF(func); + Py_XDECREF(atexit); + return ret; +} + +PyDoc_STRVAR(tracemalloc_start_doc, + "start()\n" + "\n" + "Start tracing Python memory allocations."); + +static PyObject* +py_tracemalloc_start(PyObject *self) +{ + if (tracemalloc_start() < 0) + return NULL; + + Py_RETURN_NONE; +} + +PyDoc_STRVAR(tracemalloc_stop_doc, + "stop()\n" + "\n" + "Stop tracing Python memory allocations and clear traces\n" + "of memory blocks allocated by Python."); + +static PyObject* +py_tracemalloc_stop(PyObject *self) +{ + tracemalloc_stop(); + Py_RETURN_NONE; +} + +PyDoc_STRVAR(tracemalloc_get_traceback_limit_doc, + "get_traceback_limit() -> int\n" + "\n" + "Get the maximum number of frames stored in the traceback\n" + "of a trace.\n" + "\n" + "By default, a trace of an allocated memory block only stores\n" + "the most recent frame: the limit is 1."); + +static PyObject* +py_tracemalloc_get_traceback_limit(PyObject *self) +{ + return PyLong_FromLong(tracemalloc_config.max_nframe); +} + +PyDoc_STRVAR(tracemalloc_set_traceback_limit_doc, + "set_traceback_limit(nframe: int)\n" + "\n" + "Set the maximum number of frames stored in the traceback of a trace."); + +static PyObject* +tracemalloc_set_traceback_limit(PyObject *self, PyObject *args) +{ + Py_ssize_t nframe; + + if (!PyArg_ParseTuple(args, "n:set_traceback_limit", + &nframe)) + return NULL; + + if (nframe < 1 || nframe > MAX_NFRAME) { + PyErr_Format(PyExc_ValueError, + "the number of frames must be in range [1; %i]", + MAX_NFRAME); + return NULL; + } + tracemalloc_config.max_nframe = Py_SAFE_DOWNCAST(nframe, Py_ssize_t, int); + + Py_RETURN_NONE; +} + +PyDoc_STRVAR(tracemalloc_get_tracemalloc_memory_doc, + "get_tracemalloc_memory() -> int\n" + "\n" + "Get the memory usage in bytes of the tracemalloc module\n" + "used internally to trace memory allocations."); + +static PyObject* +tracemalloc_get_tracemalloc_memory(PyObject *self) +{ + size_t size; + PyObject *size_obj; + + size = _Py_hashtable_size(tracemalloc_tracebacks); + size += _Py_hashtable_size(tracemalloc_filenames); + + TABLES_LOCK(); + size += _Py_hashtable_size(tracemalloc_traces); + TABLES_UNLOCK(); + + size_obj = PyLong_FromSize_t(size); + return Py_BuildValue("N", size_obj); +} + +PyDoc_STRVAR(tracemalloc_get_traced_memory_doc, + "get_traced_memory() -> int\n" + "\n" + "Get the current size and maximum size of memory blocks traced\n" + "by the tracemalloc module as a tuple: (size: int, max_size: int)."); + +static PyObject* +tracemalloc_get_traced_memory(PyObject *self) +{ + Py_ssize_t size, max_size; + PyObject *size_obj, *max_size_obj; + + if (!tracemalloc_config.tracing) + return Py_BuildValue("ii", 0, 0); + + TABLES_LOCK(); + size = tracemalloc_traced_memory; + max_size = tracemalloc_max_traced_memory; + TABLES_UNLOCK(); + + size_obj = PyLong_FromSize_t(size); + max_size_obj = PyLong_FromSize_t(max_size); + return Py_BuildValue("NN", size_obj, max_size_obj); +} + +static PyMethodDef module_methods[] = { + {"is_tracing", (PyCFunction)py_tracemalloc_is_tracing, + METH_NOARGS, tracemalloc_is_tracing_doc}, + {"clear_traces", (PyCFunction)py_tracemalloc_clear_traces, + METH_NOARGS, tracemalloc_clear_traces_doc}, + {"_get_traces", (PyCFunction)py_tracemalloc_get_traces, + METH_NOARGS, tracemalloc_get_traces_doc}, + {"_get_object_traceback", (PyCFunction)py_tracemalloc_get_object_traceback, + METH_O, tracemalloc_get_object_traceback_doc}, + {"start", (PyCFunction)py_tracemalloc_start, + METH_NOARGS, tracemalloc_start_doc}, + {"stop", (PyCFunction)py_tracemalloc_stop, + METH_NOARGS, tracemalloc_stop_doc}, + {"get_traceback_limit", (PyCFunction)py_tracemalloc_get_traceback_limit, + METH_NOARGS, tracemalloc_get_traceback_limit_doc}, + {"set_traceback_limit", (PyCFunction)tracemalloc_set_traceback_limit, + METH_VARARGS, tracemalloc_set_traceback_limit_doc}, + {"get_tracemalloc_memory", (PyCFunction)tracemalloc_get_tracemalloc_memory, + METH_NOARGS, tracemalloc_get_tracemalloc_memory_doc}, + {"get_traced_memory", (PyCFunction)tracemalloc_get_traced_memory, + METH_NOARGS, tracemalloc_get_traced_memory_doc}, + + /* sentinel */ + {NULL, NULL} +}; + +PyDoc_STRVAR(module_doc, +"Debug module to trace memory blocks allocated by Python."); + +static struct PyModuleDef module_def = { + PyModuleDef_HEAD_INIT, + "_tracemalloc", + module_doc, + 0, /* non-negative size to be able to unload the module */ + module_methods, + NULL, +}; + +PyMODINIT_FUNC +PyInit__tracemalloc(void) +{ + PyObject *m; + m = PyModule_Create(&module_def); + if (m == NULL) + return NULL; + + if (tracemalloc_init() < 0) + return NULL; + + return m; +} + +static int +parse_sys_xoptions(PyObject *value) +{ + PyObject *valuelong; + long nframe; + + if (value == Py_True) + return 1; + + assert(PyUnicode_Check(value)); + if (PyUnicode_GetLength(value) == 0) + return -1; + + valuelong = PyLong_FromUnicodeObject(value, 10); + if (valuelong == NULL) + return -1; + + nframe = PyLong_AsLong(valuelong); + Py_DECREF(valuelong); + if (nframe == -1 && PyErr_Occurred()) + return -1; + + if (nframe < 1 || nframe > MAX_NFRAME) + return -1; + + return Py_SAFE_DOWNCAST(nframe, long, int); +} + +int +_PyTraceMalloc_Init(void) +{ + char *p; + int nframe; + +#ifdef WITH_THREAD + assert(PyGILState_Check()); +#endif + + if ((p = Py_GETENV("PYTHONTRACEMALLOC")) && *p != '\0') { + char *endptr = p; + unsigned long value; + + value = strtoul(p, &endptr, 10); + if (*endptr != '\0' + || value < 1 + || value > MAX_NFRAME + || (errno == ERANGE && value == ULONG_MAX)) + { + Py_FatalError("PYTHONTRACEMALLOC must be an integer " + "in range [1; " STR(MAX_NFRAME) "]"); + return -1; + } + + nframe = (int)value; + } + else { + PyObject *xoptions, *key, *value; + + xoptions = PySys_GetXOptions(); + if (xoptions == NULL) + return -1; + + key = PyUnicode_FromString("tracemalloc"); + if (key == NULL) + return -1; + + value = PyDict_GetItemWithError(xoptions, key); + Py_DECREF(key); + if (value == NULL) { + if (PyErr_Occurred()) + return -1; + + /* -X tracemalloc is not used */ + return 0; + } + + nframe = parse_sys_xoptions(value); + Py_DECREF(value); + if (nframe < 0) { + Py_FatalError("-X tracemalloc=NFRAME: number of frame must be " + "an integer in range [1; " STR(MAX_NFRAME) "]"); + } + } + + tracemalloc_config.max_nframe = nframe; + return tracemalloc_start(); +} + diff --git a/Modules/hashtable.c b/Modules/hashtable.c new file mode 100644 index 0000000..221ed53 --- /dev/null +++ b/Modules/hashtable.c @@ -0,0 +1,518 @@ +/* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash + project: + http://sourceforge.net/projects/libcfu/ + + Copyright of cfuhash: + ---------------------------------- + Creation date: 2005-06-24 21:22:40 + Authors: Don + Change log: + + Copyright (c) 2005 Don Owens + All rights reserved. + + This code is released under the BSD license: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + OF THE POSSIBILITY OF SUCH DAMAGE. + ---------------------------------- +*/ + +#include "Python.h" +#include "hashtable.h" + +#define HASHTABLE_MIN_SIZE 16 +#define HASHTABLE_HIGH 0.50 +#define HASHTABLE_LOW 0.10 +#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH) + +#define BUCKETS_HEAD(SLIST) \ + ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST))) +#define TABLE_HEAD(HT, BUCKET) \ + ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET])) +#define ENTRY_NEXT(ENTRY) \ + ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY)) +#define HASHTABLE_ITEM_SIZE(HT) \ + (sizeof(_Py_hashtable_entry_t) + (HT)->data_size) + +/* Forward declaration */ +static void hashtable_rehash(_Py_hashtable_t *ht); + +static void +_Py_slist_init(_Py_slist_t *list) +{ + list->head = NULL; +} + +static void +_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) +{ + item->next = list->head; + list->head = item; +} + +static void +_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, + _Py_slist_item_t *item) +{ + if (previous != NULL) + previous->next = item->next; + else + list->head = item->next; +} + +Py_uhash_t +_Py_hashtable_hash_int(const void *key) +{ + return (Py_uhash_t)key; +} + +Py_uhash_t +_Py_hashtable_hash_ptr(const void *key) +{ + return (Py_uhash_t)_Py_HashPointer((void *)key); +} + +int +_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry) +{ + return entry->key == key; +} + +/* makes sure the real size of the buckets array is a power of 2 */ +static size_t +round_size(size_t s) +{ + size_t i; + if (s < HASHTABLE_MIN_SIZE) + return HASHTABLE_MIN_SIZE; + i = 1; + while (i < s) + i <<= 1; + return i; +} + +_Py_hashtable_t * +_Py_hashtable_new_full(size_t data_size, size_t init_size, + _Py_hashtable_hash_func hash_func, + _Py_hashtable_compare_func compare_func, + _Py_hashtable_copy_data_func copy_data_func, + _Py_hashtable_free_data_func free_data_func, + _Py_hashtable_get_data_size_func get_data_size_func, + _Py_hashtable_allocator_t *allocator) +{ + _Py_hashtable_t *ht; + size_t buckets_size; + _Py_hashtable_allocator_t alloc; + + if (allocator == NULL) { + alloc.malloc = PyMem_RawMalloc; + alloc.free = PyMem_RawFree; + } + else + alloc = *allocator; + + ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t)); + if (ht == NULL) + return ht; + + ht->num_buckets = round_size(init_size); + ht->entries = 0; + ht->data_size = data_size; + + buckets_size = ht->num_buckets * sizeof(ht->buckets[0]); + ht->buckets = alloc.malloc(buckets_size); + if (ht->buckets == NULL) { + alloc.free(ht); + return NULL; + } + memset(ht->buckets, 0, buckets_size); + + ht->hash_func = hash_func; + ht->compare_func = compare_func; + ht->copy_data_func = copy_data_func; + ht->free_data_func = free_data_func; + ht->get_data_size_func = get_data_size_func; + ht->alloc = alloc; + return ht; +} + +_Py_hashtable_t * +_Py_hashtable_new(size_t data_size, + _Py_hashtable_hash_func hash_func, + _Py_hashtable_compare_func compare_func) +{ + return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE, + hash_func, compare_func, + NULL, NULL, NULL, NULL); +} + +size_t +_Py_hashtable_size(_Py_hashtable_t *ht) +{ + size_t size; + size_t hv; + + size = sizeof(_Py_hashtable_t); + + /* buckets */ + size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *); + + /* entries */ + size += ht->entries * HASHTABLE_ITEM_SIZE(ht); + + /* data linked from entries */ + if (ht->get_data_size_func) { + for (hv = 0; hv < ht->num_buckets; hv++) { + _Py_hashtable_entry_t *entry; + + for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { + void *data; + + data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + size += ht->get_data_size_func(data); + } + } + } + return size; +} + +#ifdef Py_DEBUG +void +_Py_hashtable_print_stats(_Py_hashtable_t *ht) +{ + size_t size; + size_t chain_len, max_chain_len, total_chain_len, nchains; + _Py_hashtable_entry_t *entry; + size_t hv; + double load; + + size = _Py_hashtable_size(ht); + + load = (double)ht->entries / ht->num_buckets; + + max_chain_len = 0; + total_chain_len = 0; + nchains = 0; + for (hv = 0; hv < ht->num_buckets; hv++) { + entry = TABLE_HEAD(ht, hv); + if (entry != NULL) { + chain_len = 0; + for (; entry; entry = ENTRY_NEXT(entry)) { + chain_len++; + } + if (chain_len > max_chain_len) + max_chain_len = chain_len; + total_chain_len += chain_len; + nchains++; + } + } + printf("hash table %p: entries=%zu/%zu (%.0f%%), ", + ht, ht->entries, ht->num_buckets, load * 100.0); + if (nchains) + printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains); + printf("max_chain_len=%zu, %zu kB\n", + max_chain_len, size / 1024); +} +#endif + +/* Get an entry. Return NULL if the key does not exist. */ +_Py_hashtable_entry_t * +_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key) +{ + Py_uhash_t key_hash; + size_t index; + _Py_hashtable_entry_t *entry; + + key_hash = ht->hash_func(key); + index = key_hash & (ht->num_buckets - 1); + + for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { + if (entry->key_hash == key_hash && ht->compare_func(key, entry)) + break; + } + + return entry; +} + +static int +_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +{ + Py_uhash_t key_hash; + size_t index; + _Py_hashtable_entry_t *entry, *previous; + + key_hash = ht->hash_func(key); + index = key_hash & (ht->num_buckets - 1); + + previous = NULL; + for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { + if (entry->key_hash == key_hash && ht->compare_func(key, entry)) + break; + previous = entry; + } + + if (entry == NULL) + return 0; + + _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous, + (_Py_slist_item_t *)entry); + ht->entries--; + + if (data != NULL) + _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); + ht->alloc.free(entry); + + if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW) + hashtable_rehash(ht); + return 1; +} + +/* Add a new entry to the hash. The key must not be present in the hash table. + Return 0 on success, -1 on memory error. */ +int +_Py_hashtable_set(_Py_hashtable_t *ht, const void *key, + void *data, size_t data_size) +{ + Py_uhash_t key_hash; + size_t index; + _Py_hashtable_entry_t *entry; + + assert(data != NULL || data_size == 0); +#ifndef NDEBUG + /* Don't write the assertion on a single line because it is interesting + to know the duplicated entry if the assertion failed. The entry can + be read using a debugger. */ + entry = _Py_hashtable_get_entry(ht, key); + assert(entry == NULL); +#endif + + key_hash = ht->hash_func(key); + index = key_hash & (ht->num_buckets - 1); + + entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht)); + if (entry == NULL) { + /* memory allocation failed */ + return -1; + } + + entry->key = (void *)key; + entry->key_hash = key_hash; + + assert(data_size == ht->data_size); + memcpy(_PY_HASHTABLE_ENTRY_DATA(entry), data, data_size); + + _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry); + ht->entries++; + + if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH) + hashtable_rehash(ht); + return 0; +} + +/* Get data from an entry. Copy entry data into data and return 1 if the entry + exists, return 0 if the entry does not exist. */ +int +_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +{ + _Py_hashtable_entry_t *entry; + + assert(data != NULL); + + entry = _Py_hashtable_get_entry(ht, key); + if (entry == NULL) + return 0; + _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); + return 1; +} + +int +_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +{ + assert(data != NULL); + assert(ht->free_data_func == NULL); + return _hashtable_pop_entry(ht, key, data, data_size); +} + +/* Delete an entry. The entry must exist. */ +void +_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key) +{ +#ifndef NDEBUG + int found = _hashtable_pop_entry(ht, key, NULL, 0); + assert(found); +#else + (void)_hashtable_pop_entry(ht, key, NULL, 0); +#endif +} + +/* Prototype for a pointer to a function to be called foreach + key/value pair in the hash by hashtable_foreach(). Iteration + stops if a non-zero value is returned. */ +int +_Py_hashtable_foreach(_Py_hashtable_t *ht, + int (*func) (_Py_hashtable_entry_t *entry, void *arg), + void *arg) +{ + _Py_hashtable_entry_t *entry; + size_t hv; + + for (hv = 0; hv < ht->num_buckets; hv++) { + for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { + int res = func(entry, arg); + if (res) + return res; + } + } + return 0; +} + +static void +hashtable_rehash(_Py_hashtable_t *ht) +{ + size_t buckets_size, new_size, bucket; + _Py_slist_t *old_buckets = NULL; + size_t old_num_buckets; + + new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR)); + if (new_size == ht->num_buckets) + return; + + old_num_buckets = ht->num_buckets; + + buckets_size = new_size * sizeof(ht->buckets[0]); + old_buckets = ht->buckets; + ht->buckets = ht->alloc.malloc(buckets_size); + if (ht->buckets == NULL) { + /* cancel rehash on memory allocation failure */ + ht->buckets = old_buckets ; + /* memory allocation failed */ + return; + } + memset(ht->buckets, 0, buckets_size); + + ht->num_buckets = new_size; + + for (bucket = 0; bucket < old_num_buckets; bucket++) { + _Py_hashtable_entry_t *entry, *next; + for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) { + size_t entry_index; + + assert(ht->hash_func(entry->key) == entry->key_hash); + next = ENTRY_NEXT(entry); + entry_index = entry->key_hash & (new_size - 1); + + _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry); + } + } + + ht->alloc.free(old_buckets); +} + +void +_Py_hashtable_clear(_Py_hashtable_t *ht) +{ + _Py_hashtable_entry_t *entry, *next; + size_t i; + + for (i=0; i < ht->num_buckets; i++) { + for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) { + next = ENTRY_NEXT(entry); + if (ht->free_data_func) + ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); + ht->alloc.free(entry); + } + _Py_slist_init(&ht->buckets[i]); + } + ht->entries = 0; + hashtable_rehash(ht); +} + +void +_Py_hashtable_destroy(_Py_hashtable_t *ht) +{ + size_t i; + + for (i = 0; i < ht->num_buckets; i++) { + _Py_slist_item_t *entry = ht->buckets[i].head; + while (entry) { + _Py_slist_item_t *entry_next = entry->next; + if (ht->free_data_func) + ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); + ht->alloc.free(entry); + entry = entry_next; + } + } + + ht->alloc.free(ht->buckets); + ht->alloc.free(ht); +} + +/* Return a copy of the hash table */ +_Py_hashtable_t * +_Py_hashtable_copy(_Py_hashtable_t *src) +{ + _Py_hashtable_t *dst; + _Py_hashtable_entry_t *entry; + size_t bucket; + int err; + void *data, *new_data; + + dst = _Py_hashtable_new_full(src->data_size, src->num_buckets, + src->hash_func, src->compare_func, + src->copy_data_func, src->free_data_func, + src->get_data_size_func, &src->alloc); + if (dst == NULL) + return NULL; + + for (bucket=0; bucket < src->num_buckets; bucket++) { + entry = TABLE_HEAD(src, bucket); + for (; entry; entry = ENTRY_NEXT(entry)) { + if (src->copy_data_func) { + data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + new_data = src->copy_data_func(data); + if (new_data != NULL) + err = _Py_hashtable_set(dst, entry->key, + &new_data, src->data_size); + else + err = 1; + } + else { + data = _PY_HASHTABLE_ENTRY_DATA(entry); + err = _Py_hashtable_set(dst, entry->key, data, src->data_size); + } + if (err) { + _Py_hashtable_destroy(dst); + return NULL; + } + } + } + return dst; +} + diff --git a/Modules/hashtable.h b/Modules/hashtable.h new file mode 100644 index 0000000..539e490 --- /dev/null +++ b/Modules/hashtable.h @@ -0,0 +1,128 @@ +#ifndef Py_HASHTABLE_H +#define Py_HASHTABLE_H + +/* The whole API is private */ +#ifndef Py_LIMITED_API + +typedef struct _Py_slist_item_s { + struct _Py_slist_item_s *next; +} _Py_slist_item_t; + +typedef struct { + _Py_slist_item_t *head; +} _Py_slist_t; + +#define _Py_SLIST_ITEM_NEXT(ITEM) (((_Py_slist_item_t *)ITEM)->next) + +#define _Py_SLIST_HEAD(SLIST) (((_Py_slist_t *)SLIST)->head) + +typedef struct { + /* used by _Py_hashtable_t.buckets to link entries */ + _Py_slist_item_t _Py_slist_item; + + const void *key; + Py_uhash_t key_hash; + + /* data follows */ +} _Py_hashtable_entry_t; + +#define _PY_HASHTABLE_ENTRY_DATA(ENTRY) \ + ((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t)) + +#define _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ENTRY) \ + (*(void **)_PY_HASHTABLE_ENTRY_DATA(ENTRY)) + +#define _Py_HASHTABLE_ENTRY_READ_DATA(TABLE, DATA, DATA_SIZE, ENTRY) \ + do { \ + assert((DATA_SIZE) == (TABLE)->data_size); \ + memcpy(DATA, _PY_HASHTABLE_ENTRY_DATA(ENTRY), DATA_SIZE); \ + } while (0) + +typedef Py_uhash_t (*_Py_hashtable_hash_func) (const void *key); +typedef int (*_Py_hashtable_compare_func) (const void *key, const _Py_hashtable_entry_t *he); +typedef void* (*_Py_hashtable_copy_data_func)(void *data); +typedef void (*_Py_hashtable_free_data_func)(void *data); +typedef size_t (*_Py_hashtable_get_data_size_func)(void *data); + +typedef struct { + /* allocate a memory block */ + void* (*malloc) (size_t size); + + /* release a memory block */ + void (*free) (void *ptr); +} _Py_hashtable_allocator_t; + +typedef struct { + size_t num_buckets; + size_t entries; /* Total number of entries in the table. */ + _Py_slist_t *buckets; + size_t data_size; + + _Py_hashtable_hash_func hash_func; + _Py_hashtable_compare_func compare_func; + _Py_hashtable_copy_data_func copy_data_func; + _Py_hashtable_free_data_func free_data_func; + _Py_hashtable_get_data_size_func get_data_size_func; + _Py_hashtable_allocator_t alloc; +} _Py_hashtable_t; + +/* hash and compare functions for integers and pointers */ +PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(const void *key); +PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_int(const void *key); +PyAPI_FUNC(int) _Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry); + +PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new( + size_t data_size, + _Py_hashtable_hash_func hash_func, + _Py_hashtable_compare_func compare_func); +PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new_full( + size_t data_size, + size_t init_size, + _Py_hashtable_hash_func hash_func, + _Py_hashtable_compare_func compare_func, + _Py_hashtable_copy_data_func copy_data_func, + _Py_hashtable_free_data_func free_data_func, + _Py_hashtable_get_data_size_func get_data_size_func, + _Py_hashtable_allocator_t *allocator); +PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_copy(_Py_hashtable_t *src); +PyAPI_FUNC(void) _Py_hashtable_clear(_Py_hashtable_t *ht); +PyAPI_FUNC(void) _Py_hashtable_destroy(_Py_hashtable_t *ht); + +typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_entry_t *entry, void *arg); + +PyAPI_FUNC(int) _Py_hashtable_foreach( + _Py_hashtable_t *ht, + _Py_hashtable_foreach_func func, void *arg); +PyAPI_FUNC(size_t) _Py_hashtable_size(_Py_hashtable_t *ht); + +PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry( + _Py_hashtable_t *ht, + const void *key); +PyAPI_FUNC(int) _Py_hashtable_set( + _Py_hashtable_t *ht, + const void *key, + void *data, + size_t data_size); +PyAPI_FUNC(int) _Py_hashtable_get( + _Py_hashtable_t *ht, + const void *key, + void *data, + size_t data_size); +PyAPI_FUNC(int) _Py_hashtable_pop( + _Py_hashtable_t *ht, + const void *key, + void *data, + size_t data_size); +PyAPI_FUNC(void) _Py_hashtable_delete( + _Py_hashtable_t *ht, + const void *key); + +#define _Py_HASHTABLE_SET(TABLE, KEY, DATA) \ + _Py_hashtable_set(TABLE, KEY, &(DATA), sizeof(DATA)) + +#define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \ + _Py_hashtable_get(TABLE, KEY, &(DATA), sizeof(DATA)) + +#endif /* Py_LIMITED_API */ + +#endif |