summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2013-11-23 11:27:24 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2013-11-23 11:27:24 (GMT)
commited3b0bca3ef9d7bdbb8bd8e67e60e85f5a336da0 (patch)
treefd4390855d293372f73048fdf4b3e6b4a7cdf440 /Modules
parent0fb6072fad411eba171b53037bcc04d07c7b0770 (diff)
downloadcpython-ed3b0bca3ef9d7bdbb8bd8e67e60e85f5a336da0.zip
cpython-ed3b0bca3ef9d7bdbb8bd8e67e60e85f5a336da0.tar.gz
cpython-ed3b0bca3ef9d7bdbb8bd8e67e60e85f5a336da0.tar.bz2
Issue #18874: Implement the PEP 454 (tracemalloc)
Diffstat (limited to 'Modules')
-rw-r--r--Modules/Setup.dist17
-rw-r--r--Modules/_tracemalloc.c1407
-rw-r--r--Modules/hashtable.c518
-rw-r--r--Modules/hashtable.h128
4 files changed, 2063 insertions, 7 deletions
diff --git a/Modules/Setup.dist b/Modules/Setup.dist
index ebf8172..01fb85f 100644
--- a/Modules/Setup.dist
+++ b/Modules/Setup.dist
@@ -102,7 +102,7 @@ PYTHONPATH=$(COREPYTHONPATH)
# various reasons; therefore they are listed here instead of in the
# normal order.
-# This only contains the minimal set of modules required to run the
+# This only contains the minimal set of modules required to run the
# setup.py script in the root of the Python source tree.
posix posixmodule.c # posix (UNIX) system calls
@@ -115,7 +115,7 @@ _weakref _weakref.c # weak references
_functools _functoolsmodule.c # Tools for working with functions and callable objects
_operator _operator.c # operator.add() and similar goodies
_collections _collectionsmodule.c # Container types
-itertools itertoolsmodule.c # Functions creating iterators for efficient looping
+itertools itertoolsmodule.c # Functions creating iterators for efficient looping
atexit atexitmodule.c # Register functions to be run at interpreter-shutdown
_stat _stat.c # stat.h interface
@@ -132,12 +132,15 @@ zipimport zipimport.c
# faulthandler module
faulthandler faulthandler.c
+# debug tool to trace memory blocks allocated by Python
+_tracemalloc _tracemalloc.c hashtable.c
+
# The rest of the modules listed in this file are all commented out by
# default. Usually they can be detected and built as dynamically
# loaded modules by the new setup.py script added in Python 2.1. If
-# you're on a platform that doesn't support dynamic loading, want to
-# compile modules statically into the Python binary, or need to
-# specify some odd set of compiler switches, you can uncomment the
+# you're on a platform that doesn't support dynamic loading, want to
+# compile modules statically into the Python binary, or need to
+# specify some odd set of compiler switches, you can uncomment the
# appropriate lines below.
# ======================================================================
@@ -186,7 +189,7 @@ _symtable symtablemodule.c
# supported...)
#fcntl fcntlmodule.c # fcntl(2) and ioctl(2)
-#spwd spwdmodule.c # spwd(3)
+#spwd spwdmodule.c # spwd(3)
#grp grpmodule.c # grp(3)
#select selectmodule.c # select(2); not on ancient System V
@@ -302,7 +305,7 @@ _symtable symtablemodule.c
#_curses _cursesmodule.c -lcurses -ltermcap
# Wrapper for the panel library that's part of ncurses and SYSV curses.
-#_curses_panel _curses_panel.c -lpanel -lncurses
+#_curses_panel _curses_panel.c -lpanel -lncurses
# Modules that provide persistent dictionary-like semantics. You will
diff --git a/Modules/_tracemalloc.c b/Modules/_tracemalloc.c
new file mode 100644
index 0000000..15ed734
--- /dev/null
+++ b/Modules/_tracemalloc.c
@@ -0,0 +1,1407 @@
+#include "Python.h"
+#include "hashtable.h"
+#include "frameobject.h"
+#include "pythread.h"
+#include "osdefs.h"
+
+/* Trace memory blocks allocated by PyMem_RawMalloc() */
+#define TRACE_RAW_MALLOC
+
+/* Forward declaration */
+static void tracemalloc_stop(void);
+static int tracemalloc_atexit_register(void);
+static void* raw_malloc(size_t size);
+static void raw_free(void *ptr);
+
+#ifdef Py_DEBUG
+# define TRACE_DEBUG
+#endif
+
+#define _STR(VAL) #VAL
+#define STR(VAL) _STR(VAL)
+
+/* Protected by the GIL */
+static struct {
+ PyMemAllocator mem;
+ PyMemAllocator raw;
+ PyMemAllocator obj;
+} allocators;
+
+/* Arbitrary limit of the number of frames in a traceback. The value was chosen
+ to not allocate too much memory on the stack (see TRACEBACK_STACK_SIZE
+ below). */
+#define MAX_NFRAME 100
+
+static struct {
+ /* Module initialized?
+ Variable protected by the GIL */
+ enum {
+ TRACEMALLOC_NOT_INITIALIZED,
+ TRACEMALLOC_INITIALIZED,
+ TRACEMALLOC_FINALIZED
+ } initialized;
+
+ /* atexit handler registered? */
+ int atexit_registered;
+
+ /* Is tracemalloc tracing memory allocations?
+ Variable protected by the GIL */
+ int tracing;
+
+ /* limit of the number of frames in a traceback, 1 by default.
+ Variable protected by the GIL. */
+ int max_nframe;
+} tracemalloc_config = {TRACEMALLOC_NOT_INITIALIZED, 0, 0, 1};
+
+#if defined(TRACE_RAW_MALLOC) && defined(WITH_THREAD)
+/* This lock is needed because tracemalloc_free() is called without
+ the GIL held from PyMem_RawFree(). It cannot acquire the lock because it
+ would introduce a deadlock in PyThreadState_DeleteCurrent(). */
+static PyThread_type_lock tables_lock;
+# define TABLES_LOCK() PyThread_acquire_lock(tables_lock, 1)
+# define TABLES_UNLOCK() PyThread_release_lock(tables_lock)
+#else
+ /* variables are protected by the GIL */
+# define TABLES_LOCK()
+# define TABLES_UNLOCK()
+#endif
+
+/* Pack the frame_t structure to reduce the memory footprint on 64-bit
+ architectures: 12 bytes instead of 16. This optimization might produce
+ SIGBUS on architectures not supporting unaligned memory accesses (64-bit
+ IPS CPU?): on such architecture, the structure must not be packed. */
+#pragma pack(4)
+typedef struct
+#ifdef __GNUC__
+__attribute__((packed))
+#endif
+{
+ PyObject *filename;
+ int lineno;
+} frame_t;
+
+typedef struct {
+ Py_uhash_t hash;
+ int nframe;
+ frame_t frames[1];
+} traceback_t;
+
+#define TRACEBACK_SIZE(NFRAME) \
+ (sizeof(traceback_t) + sizeof(frame_t) * (NFRAME - 1))
+#define TRACEBACK_STACK_SIZE TRACEBACK_SIZE(MAX_NFRAME)
+
+static PyObject *unknown_filename = NULL;
+static traceback_t tracemalloc_empty_traceback;
+
+typedef struct {
+ size_t size;
+ traceback_t *traceback;
+} trace_t;
+
+/* Size in bytes of currently traced memory.
+ Protected by TABLES_LOCK(). */
+static size_t tracemalloc_traced_memory = 0;
+
+/* Maximum size in bytes of traced memory.
+ Protected by TABLES_LOCK(). */
+static size_t tracemalloc_max_traced_memory = 0;
+
+/* Hash table used as a set to to intern filenames:
+ PyObject* => PyObject*.
+ Protected by the GIL */
+static _Py_hashtable_t *tracemalloc_filenames = NULL;
+
+/* Hash table used as a set to intern tracebacks:
+ traceback_t* => traceback_t*
+ Protected by the GIL */
+static _Py_hashtable_t *tracemalloc_tracebacks = NULL;
+
+/* pointer (void*) => trace (trace_t).
+ Protected by TABLES_LOCK(). */
+static _Py_hashtable_t *tracemalloc_traces = NULL;
+
+#ifdef TRACE_DEBUG
+static void
+tracemalloc_error(const char *format, ...)
+{
+ va_list ap;
+ fprintf(stderr, "tracemalloc: ");
+ va_start(ap, format);
+ vfprintf(stderr, format, ap);
+ va_end(ap);
+ fprintf(stderr, "\n");
+ fflush(stderr);
+}
+#endif
+
+#if defined(WITH_THREAD) && defined(TRACE_RAW_MALLOC)
+#define REENTRANT_THREADLOCAL
+
+/* If your OS does not provide native thread local storage, you can implement
+ it manually using a lock. Functions of thread.c cannot be used because
+ they use PyMem_RawMalloc() which leads to a reentrant call. */
+#if !(defined(_POSIX_THREADS) || defined(NT_THREADS))
+# error "need native thread local storage (TLS)"
+#endif
+
+static int tracemalloc_reentrant_key;
+
+/* Any non-NULL pointer can be used */
+#define REENTRANT Py_True
+
+static int
+get_reentrant(void)
+{
+ void *ptr = PyThread_get_key_value(tracemalloc_reentrant_key);
+ if (ptr != NULL) {
+ assert(ptr == REENTRANT);
+ return 1;
+ }
+ else
+ return 0;
+}
+
+static void
+set_reentrant(int reentrant)
+{
+ if (reentrant) {
+ assert(PyThread_get_key_value(tracemalloc_reentrant_key) == NULL);
+ PyThread_set_key_value(tracemalloc_reentrant_key,
+ REENTRANT);
+ }
+ else {
+ /* FIXME: PyThread_set_key_value() cannot be used to set the flag
+ to zero, because it does nothing if the variable has already
+ a value set. */
+ PyThread_delete_key_value(tracemalloc_reentrant_key);
+ }
+}
+
+#else
+
+/* WITH_THREAD not defined: Python compiled without threads,
+ or TRACE_RAW_MALLOC not defined: variable protected by the GIL */
+static int tracemalloc_reentrant = 0;
+
+static int
+get_reentrant(void)
+{
+ return tracemalloc_reentrant;
+}
+
+static void
+set_reentrant(int reentrant)
+{
+ assert(!reentrant || !get_reentrant());
+ tracemalloc_reentrant = reentrant;
+}
+#endif
+
+static int
+hashtable_compare_unicode(const void *key, const _Py_hashtable_entry_t *entry)
+{
+ if (key != NULL && entry->key != NULL)
+ return (PyUnicode_Compare((PyObject *)key, (PyObject *)entry->key) == 0);
+ else
+ return key == entry->key;
+}
+
+static _Py_hashtable_allocator_t hashtable_alloc = {malloc, free};
+
+static _Py_hashtable_t *
+hashtable_new(size_t data_size,
+ _Py_hashtable_hash_func hash_func,
+ _Py_hashtable_compare_func compare_func)
+{
+ return _Py_hashtable_new_full(data_size, 0,
+ hash_func, compare_func,
+ NULL, NULL, NULL, &hashtable_alloc);
+}
+
+static void*
+raw_malloc(size_t size)
+{
+ return allocators.raw.malloc(allocators.raw.ctx, size);
+}
+
+static void
+raw_free(void *ptr)
+{
+ allocators.raw.free(allocators.raw.ctx, ptr);
+}
+
+static Py_uhash_t
+hashtable_hash_traceback(const void *key)
+{
+ const traceback_t *traceback = key;
+ return traceback->hash;
+}
+
+static int
+hashtable_compare_traceback(const traceback_t *traceback1,
+ const _Py_hashtable_entry_t *he)
+{
+ const traceback_t *traceback2 = he->key;
+ const frame_t *frame1, *frame2;
+ int i;
+
+ if (traceback1->nframe != traceback2->nframe)
+ return 0;
+
+ for (i=0; i < traceback1->nframe; i++) {
+ frame1 = &traceback1->frames[i];
+ frame2 = &traceback2->frames[i];
+
+ if (frame1->lineno != frame2->lineno)
+ return 0;
+
+ if (frame1->filename != frame2->filename) {
+ assert(PyUnicode_Compare(frame1->filename, frame2->filename) != 0);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static void
+tracemalloc_get_frame(PyFrameObject *pyframe, frame_t *frame)
+{
+ PyCodeObject *code;
+ PyObject *filename;
+ _Py_hashtable_entry_t *entry;
+
+ frame->filename = unknown_filename;
+ frame->lineno = PyFrame_GetLineNumber(pyframe);
+ assert(frame->lineno >= 0);
+ if (frame->lineno < 0)
+ frame->lineno = 0;
+
+ code = pyframe->f_code;
+ if (code == NULL) {
+#ifdef TRACE_DEBUG
+ tracemalloc_error("failed to get the code object of the a frame");
+#endif
+ return;
+ }
+
+ if (code->co_filename == NULL) {
+#ifdef TRACE_DEBUG
+ tracemalloc_error("failed to get the filename of the code object");
+#endif
+ return;
+ }
+
+ filename = code->co_filename;
+ assert(filename != NULL);
+ if (filename == NULL)
+ return;
+
+ if (!PyUnicode_Check(filename)) {
+#ifdef TRACE_DEBUG
+ tracemalloc_error("filename is not an unicode string");
+#endif
+ return;
+ }
+ if (!PyUnicode_IS_READY(filename)) {
+ /* Don't make a Unicode string ready to avoid reentrant calls
+ to tracemalloc_malloc() or tracemalloc_realloc() */
+#ifdef TRACE_DEBUG
+ tracemalloc_error("filename is not a ready unicode string");
+#endif
+ return;
+ }
+
+ /* intern the filename */
+ entry = _Py_hashtable_get_entry(tracemalloc_filenames, filename);
+ if (entry != NULL) {
+ filename = (PyObject *)entry->key;
+ }
+ else {
+ /* tracemalloc_filenames is responsible to keep a reference
+ to the filename */
+ Py_INCREF(filename);
+ if (_Py_hashtable_set(tracemalloc_filenames, filename, NULL, 0) < 0) {
+ Py_DECREF(filename);
+#ifdef TRACE_DEBUG
+ tracemalloc_error("failed to intern the filename");
+#endif
+ return;
+ }
+ }
+
+ /* the tracemalloc_filenames table keeps a reference to the filename */
+ frame->filename = filename;
+}
+
+static Py_uhash_t
+traceback_hash(traceback_t *traceback)
+{
+ /* code based on tuplehash() of Objects/tupleobject.c */
+ Py_uhash_t x; /* Unsigned for defined overflow behavior. */
+ Py_hash_t y;
+ int len = traceback->nframe;
+ Py_uhash_t mult = _PyHASH_MULTIPLIER;
+ frame_t *frame;
+
+ x = 0x345678UL;
+ frame = traceback->frames;
+ while (--len >= 0) {
+ y = PyObject_Hash(frame->filename);
+ y ^= frame->lineno;
+ frame++;
+
+ x = (x ^ y) * mult;
+ /* the cast might truncate len; that doesn't change hash stability */
+ mult += (Py_hash_t)(82520UL + len + len);
+ }
+ x += 97531UL;
+ return x;
+}
+
+static void
+traceback_get_frames(traceback_t *traceback)
+{
+ PyThreadState *tstate;
+ PyFrameObject *pyframe;
+
+#ifdef WITH_THREAD
+ tstate = PyGILState_GetThisThreadState();
+#else
+ tstate = PyThreadState_Get();
+#endif
+ if (tstate == NULL) {
+#ifdef TRACE_DEBUG
+ tracemalloc_error("failed to get the current thread state");
+#endif
+ return;
+ }
+
+ for (pyframe = tstate->frame; pyframe != NULL; pyframe = pyframe->f_back) {
+ tracemalloc_get_frame(pyframe, &traceback->frames[traceback->nframe]);
+ assert(traceback->frames[traceback->nframe].filename != NULL);
+ assert(traceback->frames[traceback->nframe].lineno >= 0);
+ traceback->nframe++;
+ if (traceback->nframe == tracemalloc_config.max_nframe)
+ break;
+ }
+}
+
+static traceback_t *
+traceback_new(void)
+{
+ char stack_buffer[TRACEBACK_STACK_SIZE];
+ traceback_t *traceback = (traceback_t *)stack_buffer;
+ _Py_hashtable_entry_t *entry;
+
+#ifdef WITH_THREAD
+ assert(PyGILState_Check());
+#endif
+
+ /* get frames */
+ traceback->nframe = 0;
+ traceback_get_frames(traceback);
+ if (traceback->nframe == 0)
+ return &tracemalloc_empty_traceback;
+ traceback->hash = traceback_hash(traceback);
+
+ /* intern the traceback */
+ entry = _Py_hashtable_get_entry(tracemalloc_tracebacks, traceback);
+ if (entry != NULL) {
+ traceback = (traceback_t *)entry->key;
+ }
+ else {
+ traceback_t *copy;
+ size_t traceback_size;
+
+ traceback_size = TRACEBACK_SIZE(traceback->nframe);
+
+ copy = raw_malloc(traceback_size);
+ if (copy == NULL) {
+#ifdef TRACE_DEBUG
+ tracemalloc_error("failed to intern the traceback: malloc failed");
+#endif
+ return NULL;
+ }
+ memcpy(copy, traceback, traceback_size);
+
+ if (_Py_hashtable_set(tracemalloc_tracebacks, copy, NULL, 0) < 0) {
+ raw_free(copy);
+#ifdef TRACE_DEBUG
+ tracemalloc_error("failed to intern the traceback: putdata failed");
+#endif
+ return NULL;
+ }
+ traceback = copy;
+ }
+ return traceback;
+}
+
+static void
+tracemalloc_log_alloc(void *ptr, size_t size)
+{
+ traceback_t *traceback;
+ trace_t trace;
+
+#ifdef WITH_THREAD
+ assert(PyGILState_Check());
+#endif
+
+ traceback = traceback_new();
+ if (traceback == NULL)
+ return;
+
+ trace.size = size;
+ trace.traceback = traceback;
+
+ TABLES_LOCK();
+ assert(tracemalloc_traced_memory <= PY_SIZE_MAX - size);
+ tracemalloc_traced_memory += size;
+ if (tracemalloc_traced_memory > tracemalloc_max_traced_memory)
+ tracemalloc_max_traced_memory = tracemalloc_traced_memory;
+
+ _Py_HASHTABLE_SET(tracemalloc_traces, ptr, trace);
+ TABLES_UNLOCK();
+}
+
+static void
+tracemalloc_log_free(void *ptr)
+{
+ trace_t trace;
+
+ TABLES_LOCK();
+ if (_Py_hashtable_pop(tracemalloc_traces, ptr, &trace, sizeof(trace))) {
+ assert(tracemalloc_traced_memory >= trace.size);
+ tracemalloc_traced_memory -= trace.size;
+ }
+ TABLES_UNLOCK();
+}
+
+static void*
+tracemalloc_malloc(void *ctx, size_t size, int gil_held)
+{
+ PyMemAllocator *alloc = (PyMemAllocator *)ctx;
+#if defined(TRACE_RAW_MALLOC) && defined(WITH_THREAD)
+ PyGILState_STATE gil_state;
+#endif
+ void *ptr;
+
+ if (get_reentrant()) {
+ return alloc->malloc(alloc->ctx, size);
+ }
+
+ /* Ignore reentrant call. PyObjet_Malloc() calls PyMem_Malloc()
+ for allocations larger than 512 bytes. PyGILState_Ensure() may call
+ PyMem_RawMalloc() indirectly which would call PyGILState_Ensure() if
+ reentrant are not disabled. */
+ set_reentrant(1);
+#ifdef WITH_THREAD
+#ifdef TRACE_RAW_MALLOC
+ if (!gil_held)
+ gil_state = PyGILState_Ensure();
+#else
+ assert(gil_held);
+#endif
+#endif
+ ptr = alloc->malloc(alloc->ctx, size);
+ set_reentrant(0);
+
+ if (ptr != NULL)
+ tracemalloc_log_alloc(ptr, size);
+
+#if defined(TRACE_RAW_MALLOC) && defined(WITH_THREAD)
+ if (!gil_held)
+ PyGILState_Release(gil_state);
+#endif
+
+ return ptr;
+}
+
+static void*
+tracemalloc_realloc(void *ctx, void *ptr, size_t new_size, int gil_held)
+{
+ PyMemAllocator *alloc = (PyMemAllocator *)ctx;
+#if defined(TRACE_RAW_MALLOC) && defined(WITH_THREAD)
+ PyGILState_STATE gil_state;
+#endif
+ void *ptr2;
+
+ if (get_reentrant()) {
+ /* Reentrant call to PyMem_Realloc() and PyMem_RawRealloc().
+ Example: PyMem_RawRealloc() is called internally by pymalloc
+ (_PyObject_Malloc() and _PyObject_Realloc()) to allocate a new
+ arena (new_arena()). */
+ ptr2 = alloc->realloc(alloc->ctx, ptr, new_size);
+
+ if (ptr2 != NULL && ptr != NULL)
+ tracemalloc_log_free(ptr);
+
+ return ptr2;
+ }
+
+ /* Ignore reentrant call. PyObjet_Realloc() calls PyMem_Realloc() for
+ allocations larger than 512 bytes. PyGILState_Ensure() may call
+ PyMem_RawMalloc() indirectly which would call PyGILState_Ensure() if
+ reentrant are not disabled. */
+ set_reentrant(1);
+#ifdef WITH_THREAD
+#ifdef TRACE_RAW_MALLOC
+ if (!gil_held)
+ gil_state = PyGILState_Ensure();
+#else
+ assert(gil_held);
+#endif
+#endif
+ ptr2 = alloc->realloc(alloc->ctx, ptr, new_size);
+ set_reentrant(0);
+
+ if (ptr2 != NULL) {
+ if (ptr != NULL)
+ tracemalloc_log_free(ptr);
+
+ tracemalloc_log_alloc(ptr2, new_size);
+ }
+
+#if defined(TRACE_RAW_MALLOC) && defined(WITH_THREAD)
+ if (!gil_held)
+ PyGILState_Release(gil_state);
+#endif
+
+ return ptr2;
+}
+
+static void
+tracemalloc_free(void *ctx, void *ptr)
+{
+ PyMemAllocator *alloc = (PyMemAllocator *)ctx;
+
+ if (ptr == NULL)
+ return;
+
+ /* GIL cannot be locked in PyMem_RawFree() because it would introduce
+ a deadlock in PyThreadState_DeleteCurrent(). */
+
+ alloc->free(alloc->ctx, ptr);
+ tracemalloc_log_free(ptr);
+}
+
+static void*
+tracemalloc_malloc_gil(void *ctx, size_t size)
+{
+ return tracemalloc_malloc(ctx, size, 1);
+}
+
+static void*
+tracemalloc_realloc_gil(void *ctx, void *ptr, size_t new_size)
+{
+ return tracemalloc_realloc(ctx, ptr, new_size, 1);
+}
+
+#ifdef TRACE_RAW_MALLOC
+static void*
+tracemalloc_raw_malloc(void *ctx, size_t size)
+{
+ return tracemalloc_malloc(ctx, size, 0);
+}
+
+static void*
+tracemalloc_raw_realloc(void *ctx, void *ptr, size_t new_size)
+{
+ return tracemalloc_realloc(ctx, ptr, new_size, 0);
+}
+#endif
+
+static int
+tracemalloc_clear_filename(_Py_hashtable_entry_t *entry, void *user_data)
+{
+ PyObject *filename = (PyObject *)entry->key;
+ Py_DECREF(filename);
+ return 0;
+}
+
+static int
+traceback_free_traceback(_Py_hashtable_entry_t *entry, void *user_data)
+{
+ traceback_t *traceback = (traceback_t *)entry->key;
+ raw_free(traceback);
+ return 0;
+}
+
+/* reentrant flag must be set to call this function and GIL must be held */
+static void
+tracemalloc_clear_traces(void)
+{
+#ifdef WITH_THREAD
+ /* The GIL protects variables againt concurrent access */
+ assert(PyGILState_Check());
+#endif
+
+ /* Disable also reentrant calls to tracemalloc_malloc() to not add a new
+ trace while we are clearing traces */
+ assert(get_reentrant());
+
+ TABLES_LOCK();
+ _Py_hashtable_clear(tracemalloc_traces);
+ tracemalloc_traced_memory = 0;
+ tracemalloc_max_traced_memory = 0;
+ TABLES_UNLOCK();
+
+ _Py_hashtable_foreach(tracemalloc_tracebacks, traceback_free_traceback, NULL);
+ _Py_hashtable_clear(tracemalloc_tracebacks);
+
+ _Py_hashtable_foreach(tracemalloc_filenames, tracemalloc_clear_filename, NULL);
+ _Py_hashtable_clear(tracemalloc_filenames);
+}
+
+static int
+tracemalloc_init(void)
+{
+ if (tracemalloc_config.initialized == TRACEMALLOC_FINALIZED) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "the tracemalloc module has been unloaded");
+ return -1;
+ }
+
+ if (tracemalloc_config.initialized == TRACEMALLOC_INITIALIZED)
+ return 0;
+
+ PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &allocators.raw);
+
+#ifdef REENTRANT_THREADLOCAL
+ tracemalloc_reentrant_key = PyThread_create_key();
+ if (tracemalloc_reentrant_key == -1) {
+#ifdef MS_WINDOWS
+ PyErr_SetFromWindowsErr(0);
+#else
+ PyErr_SetFromErrno(PyExc_OSError);
+#endif
+ return -1;
+ }
+#endif
+
+#if defined(WITH_THREAD) && defined(TRACE_RAW_MALLOC)
+ if (tables_lock == NULL) {
+ tables_lock = PyThread_allocate_lock();
+ if (tables_lock == NULL) {
+ PyErr_SetString(PyExc_RuntimeError, "cannot allocate lock");
+ return -1;
+ }
+ }
+#endif
+
+ tracemalloc_filenames = hashtable_new(0,
+ (_Py_hashtable_hash_func)PyObject_Hash,
+ hashtable_compare_unicode);
+
+ tracemalloc_tracebacks = hashtable_new(0,
+ (_Py_hashtable_hash_func)hashtable_hash_traceback,
+ (_Py_hashtable_compare_func)hashtable_compare_traceback);
+
+ tracemalloc_traces = hashtable_new(sizeof(trace_t),
+ _Py_hashtable_hash_ptr,
+ _Py_hashtable_compare_direct);
+
+ if (tracemalloc_filenames == NULL || tracemalloc_tracebacks == NULL
+ || tracemalloc_traces == NULL)
+ {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ unknown_filename = PyUnicode_FromString("<unknown>");
+ if (unknown_filename == NULL)
+ return -1;
+ PyUnicode_InternInPlace(&unknown_filename);
+
+ tracemalloc_empty_traceback.nframe = 1;
+ /* borrowed reference */
+ tracemalloc_empty_traceback.frames[0].filename = unknown_filename;
+ tracemalloc_empty_traceback.frames[0].lineno = 0;
+ tracemalloc_empty_traceback.hash = traceback_hash(&tracemalloc_empty_traceback);
+
+ /* Disable tracing allocations until hooks are installed. Set
+ also the reentrant flag to detect bugs: fail with an assertion error
+ if set_reentrant(1) is called while tracing is disabled. */
+ set_reentrant(1);
+
+ tracemalloc_config.initialized = TRACEMALLOC_INITIALIZED;
+ return 0;
+}
+
+static void
+tracemalloc_deinit(void)
+{
+ if (tracemalloc_config.initialized != TRACEMALLOC_INITIALIZED)
+ return;
+ tracemalloc_config.initialized = TRACEMALLOC_FINALIZED;
+
+ tracemalloc_stop();
+
+ /* destroy hash tables */
+ _Py_hashtable_destroy(tracemalloc_traces);
+ _Py_hashtable_destroy(tracemalloc_tracebacks);
+ _Py_hashtable_destroy(tracemalloc_filenames);
+
+#if defined(WITH_THREAD) && defined(TRACE_RAW_MALLOC)
+ if (tables_lock != NULL) {
+ PyThread_free_lock(tables_lock);
+ tables_lock = NULL;
+ }
+#endif
+
+#ifdef REENTRANT_THREADLOCAL
+ PyThread_delete_key(tracemalloc_reentrant_key);
+#endif
+
+ Py_XDECREF(unknown_filename);
+}
+
+static int
+tracemalloc_start(void)
+{
+ PyMemAllocator alloc;
+
+ if (tracemalloc_init() < 0)
+ return -1;
+
+ if (tracemalloc_config.tracing) {
+ /* hook already installed: do nothing */
+ return 0;
+ }
+
+ if (tracemalloc_atexit_register() < 0)
+ return -1;
+
+#ifdef TRACE_RAW_MALLOC
+ alloc.malloc = tracemalloc_raw_malloc;
+ alloc.realloc = tracemalloc_raw_realloc;
+ alloc.free = tracemalloc_free;
+
+ alloc.ctx = &allocators.raw;
+ PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &allocators.raw);
+ PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
+#endif
+
+ alloc.malloc = tracemalloc_malloc_gil;
+ alloc.realloc = tracemalloc_realloc_gil;
+ alloc.free = tracemalloc_free;
+
+ alloc.ctx = &allocators.mem;
+ PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &allocators.mem);
+ PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
+
+ alloc.ctx = &allocators.obj;
+ PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &allocators.obj);
+ PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
+
+ /* everything is ready: start tracing Python memory allocations */
+ tracemalloc_config.tracing = 1;
+ set_reentrant(0);
+
+ return 0;
+}
+
+static void
+tracemalloc_stop(void)
+{
+ if (!tracemalloc_config.tracing)
+ return;
+
+ /* stop tracing Python memory allocations */
+ tracemalloc_config.tracing = 0;
+
+ /* set the reentrant flag to detect bugs: fail with an assertion error if
+ set_reentrant(1) is called while tracing is disabled. */
+ set_reentrant(1);
+
+ /* unregister the hook on memory allocators */
+#ifdef TRACE_RAW_MALLOC
+ PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &allocators.raw);
+#endif
+ PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &allocators.mem);
+ PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &allocators.obj);
+
+ /* release memory */
+ tracemalloc_clear_traces();
+}
+
+
+static PyObject*
+lineno_as_obj(int lineno)
+{
+ if (lineno >= 0)
+ return PyLong_FromLong(lineno);
+ else
+ Py_RETURN_NONE;
+}
+
+PyDoc_STRVAR(tracemalloc_is_tracing_doc,
+ "is_tracing()->bool\n"
+ "\n"
+ "True if the tracemalloc module is tracing Python memory allocations,\n"
+ "False otherwise.");
+
+static PyObject*
+py_tracemalloc_is_tracing(PyObject *self)
+{
+ return PyBool_FromLong(tracemalloc_config.tracing);
+}
+
+PyDoc_STRVAR(tracemalloc_clear_traces_doc,
+ "clear_traces()\n"
+ "\n"
+ "Clear traces of memory blocks allocated by Python.");
+
+static PyObject*
+py_tracemalloc_clear_traces(PyObject *self)
+{
+ if (!tracemalloc_config.tracing)
+ Py_RETURN_NONE;
+
+ set_reentrant(1);
+ tracemalloc_clear_traces();
+ set_reentrant(0);
+
+ Py_RETURN_NONE;
+}
+
+static PyObject*
+frame_to_pyobject(frame_t *frame)
+{
+ PyObject *frame_obj, *lineno_obj;
+
+ frame_obj = PyTuple_New(2);
+ if (frame_obj == NULL)
+ return NULL;
+
+ if (frame->filename == NULL)
+ frame->filename = Py_None;
+ Py_INCREF(frame->filename);
+ PyTuple_SET_ITEM(frame_obj, 0, frame->filename);
+
+ assert(frame->lineno >= 0);
+ lineno_obj = lineno_as_obj(frame->lineno);
+ if (lineno_obj == NULL) {
+ Py_DECREF(frame_obj);
+ return NULL;
+ }
+ PyTuple_SET_ITEM(frame_obj, 1, lineno_obj);
+
+ return frame_obj;
+}
+
+static PyObject*
+traceback_to_pyobject(traceback_t *traceback, _Py_hashtable_t *intern_table)
+{
+ int i;
+ PyObject *frames, *frame;
+
+ if (intern_table != NULL) {
+ if (_Py_HASHTABLE_GET(intern_table, traceback, frames)) {
+ Py_INCREF(frames);
+ return frames;
+ }
+ }
+
+ frames = PyTuple_New(traceback->nframe);
+ if (frames == NULL)
+ return NULL;
+
+ for (i=0; i < traceback->nframe; i++) {
+ frame = frame_to_pyobject(&traceback->frames[i]);
+ if (frame == NULL) {
+ Py_DECREF(frames);
+ return NULL;
+ }
+ PyTuple_SET_ITEM(frames, i, frame);
+ }
+
+ if (intern_table != NULL) {
+ if (_Py_HASHTABLE_SET(intern_table, traceback, frames) < 0) {
+ Py_DECREF(frames);
+ PyErr_NoMemory();
+ return NULL;
+ }
+ /* intern_table keeps a new reference to frames */
+ Py_INCREF(frames);
+ }
+ return frames;
+}
+
+static PyObject*
+trace_to_pyobject(trace_t *trace, _Py_hashtable_t *intern_tracebacks)
+{
+ PyObject *trace_obj = NULL;
+ PyObject *size, *traceback;
+
+ trace_obj = PyTuple_New(2);
+ if (trace_obj == NULL)
+ return NULL;
+
+ size = PyLong_FromSize_t(trace->size);
+ if (size == NULL) {
+ Py_DECREF(trace_obj);
+ return NULL;
+ }
+ PyTuple_SET_ITEM(trace_obj, 0, size);
+
+ traceback = traceback_to_pyobject(trace->traceback, intern_tracebacks);
+ if (traceback == NULL) {
+ Py_DECREF(trace_obj);
+ return NULL;
+ }
+ PyTuple_SET_ITEM(trace_obj, 1, traceback);
+
+ return trace_obj;
+}
+
+typedef struct {
+ _Py_hashtable_t *traces;
+ _Py_hashtable_t *tracebacks;
+ PyObject *list;
+} get_traces_t;
+
+static int
+tracemalloc_get_traces_fill(_Py_hashtable_entry_t *entry, void *user_data)
+{
+ get_traces_t *get_traces = user_data;
+ trace_t *trace;
+ PyObject *tracemalloc_obj;
+ int res;
+
+ trace = (trace_t *)_PY_HASHTABLE_ENTRY_DATA(entry);
+
+ tracemalloc_obj = trace_to_pyobject(trace, get_traces->tracebacks);
+ if (tracemalloc_obj == NULL)
+ return 1;
+
+ res = PyList_Append(get_traces->list, tracemalloc_obj);
+ Py_DECREF(tracemalloc_obj);
+ if (res < 0)
+ return 1;
+
+ return 0;
+}
+
+static int
+tracemalloc_pyobject_decref_cb(_Py_hashtable_entry_t *entry, void *user_data)
+{
+ PyObject *obj = (PyObject *)_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
+ Py_DECREF(obj);
+ return 0;
+}
+
+PyDoc_STRVAR(tracemalloc_get_traces_doc,
+ "get_traces() -> list\n"
+ "\n"
+ "Get traces of all memory blocks allocated by Python.\n"
+ "Return a list of (size: int, traceback: tuple) tuples.\n"
+ "traceback is a tuple of (filename: str, lineno: int) tuples.\n"
+ "\n"
+ "Return an empty list if the tracemalloc module is disabled.");
+
+static PyObject*
+py_tracemalloc_get_traces(PyObject *self, PyObject *obj)
+{
+ get_traces_t get_traces;
+ int err;
+
+ get_traces.traces = NULL;
+ get_traces.tracebacks = NULL;
+ get_traces.list = PyList_New(0);
+ if (get_traces.list == NULL)
+ goto error;
+
+ if (!tracemalloc_config.tracing)
+ return get_traces.list;
+
+ get_traces.tracebacks = hashtable_new(sizeof(PyObject *),
+ _Py_hashtable_hash_ptr,
+ _Py_hashtable_compare_direct);
+ if (get_traces.tracebacks == NULL) {
+ PyErr_NoMemory();
+ goto error;
+ }
+
+ TABLES_LOCK();
+ get_traces.traces = _Py_hashtable_copy(tracemalloc_traces);
+ TABLES_UNLOCK();
+
+ if (get_traces.traces == NULL) {
+ PyErr_NoMemory();
+ goto error;
+ }
+
+ set_reentrant(1);
+ err = _Py_hashtable_foreach(get_traces.traces,
+ tracemalloc_get_traces_fill, &get_traces);
+ set_reentrant(0);
+ if (err)
+ goto error;
+
+ goto finally;
+
+error:
+ Py_CLEAR(get_traces.list);
+
+finally:
+ if (get_traces.tracebacks != NULL) {
+ _Py_hashtable_foreach(get_traces.tracebacks,
+ tracemalloc_pyobject_decref_cb, NULL);
+ _Py_hashtable_destroy(get_traces.tracebacks);
+ }
+ if (get_traces.traces != NULL)
+ _Py_hashtable_destroy(get_traces.traces);
+
+ return get_traces.list;
+}
+
+PyDoc_STRVAR(tracemalloc_get_object_traceback_doc,
+ "get_object_traceback(obj)\n"
+ "\n"
+ "Get the traceback where the Python object obj was allocated.\n"
+ "Return a tuple of (filename: str, lineno: int) tuples.\n"
+ "\n"
+ "Return None if the tracemalloc module is disabled or did not\n"
+ "trace the allocation of the object.");
+
+static PyObject*
+py_tracemalloc_get_object_traceback(PyObject *self, PyObject *obj)
+{
+ PyTypeObject *type;
+ void *ptr;
+ trace_t trace;
+ int found;
+
+ if (!tracemalloc_config.tracing)
+ Py_RETURN_NONE;
+
+ type = Py_TYPE(obj);
+ if (PyType_IS_GC(type))
+ ptr = (void *)((char *)obj - sizeof(PyGC_Head));
+ else
+ ptr = (void *)obj;
+
+ TABLES_LOCK();
+ found = _Py_HASHTABLE_GET(tracemalloc_traces, ptr, trace);
+ TABLES_UNLOCK();
+
+ if (!found)
+ Py_RETURN_NONE;
+
+ return traceback_to_pyobject(trace.traceback, NULL);
+}
+
+static PyObject*
+tracemalloc_atexit(PyObject *self)
+{
+#ifdef WITH_THREAD
+ assert(PyGILState_Check());
+#endif
+ tracemalloc_deinit();
+ Py_RETURN_NONE;
+}
+
+static PyMethodDef atexit_method = {
+ "_atexit", (PyCFunction)tracemalloc_atexit, METH_NOARGS, NULL};
+
+static int
+tracemalloc_atexit_register(void)
+{
+ PyObject *method = NULL, *atexit = NULL, *func = NULL;
+ PyObject *result;
+ int ret = -1;
+
+ if (tracemalloc_config.atexit_registered)
+ return 0;
+ tracemalloc_config.atexit_registered = 1;
+
+ /* private functions */
+ method = PyCFunction_New(&atexit_method, NULL);
+ if (method == NULL)
+ goto done;
+
+ atexit = PyImport_ImportModule("atexit");
+ if (atexit == NULL) {
+ if (!PyErr_Warn(PyExc_ImportWarning,
+ "atexit module is missing: "
+ "cannot automatically disable tracemalloc at exit"))
+ {
+ PyErr_Clear();
+ return 0;
+ }
+ goto done;
+ }
+
+ func = PyObject_GetAttrString(atexit, "register");
+ if (func == NULL)
+ goto done;
+
+ result = PyObject_CallFunction(func, "O", method);
+ if (result == NULL)
+ goto done;
+ Py_DECREF(result);
+
+ ret = 0;
+
+done:
+ Py_XDECREF(method);
+ Py_XDECREF(func);
+ Py_XDECREF(atexit);
+ return ret;
+}
+
+PyDoc_STRVAR(tracemalloc_start_doc,
+ "start()\n"
+ "\n"
+ "Start tracing Python memory allocations.");
+
+static PyObject*
+py_tracemalloc_start(PyObject *self)
+{
+ if (tracemalloc_start() < 0)
+ return NULL;
+
+ Py_RETURN_NONE;
+}
+
+PyDoc_STRVAR(tracemalloc_stop_doc,
+ "stop()\n"
+ "\n"
+ "Stop tracing Python memory allocations and clear traces\n"
+ "of memory blocks allocated by Python.");
+
+static PyObject*
+py_tracemalloc_stop(PyObject *self)
+{
+ tracemalloc_stop();
+ Py_RETURN_NONE;
+}
+
+PyDoc_STRVAR(tracemalloc_get_traceback_limit_doc,
+ "get_traceback_limit() -> int\n"
+ "\n"
+ "Get the maximum number of frames stored in the traceback\n"
+ "of a trace.\n"
+ "\n"
+ "By default, a trace of an allocated memory block only stores\n"
+ "the most recent frame: the limit is 1.");
+
+static PyObject*
+py_tracemalloc_get_traceback_limit(PyObject *self)
+{
+ return PyLong_FromLong(tracemalloc_config.max_nframe);
+}
+
+PyDoc_STRVAR(tracemalloc_set_traceback_limit_doc,
+ "set_traceback_limit(nframe: int)\n"
+ "\n"
+ "Set the maximum number of frames stored in the traceback of a trace.");
+
+static PyObject*
+tracemalloc_set_traceback_limit(PyObject *self, PyObject *args)
+{
+ Py_ssize_t nframe;
+
+ if (!PyArg_ParseTuple(args, "n:set_traceback_limit",
+ &nframe))
+ return NULL;
+
+ if (nframe < 1 || nframe > MAX_NFRAME) {
+ PyErr_Format(PyExc_ValueError,
+ "the number of frames must be in range [1; %i]",
+ MAX_NFRAME);
+ return NULL;
+ }
+ tracemalloc_config.max_nframe = Py_SAFE_DOWNCAST(nframe, Py_ssize_t, int);
+
+ Py_RETURN_NONE;
+}
+
+PyDoc_STRVAR(tracemalloc_get_tracemalloc_memory_doc,
+ "get_tracemalloc_memory() -> int\n"
+ "\n"
+ "Get the memory usage in bytes of the tracemalloc module\n"
+ "used internally to trace memory allocations.");
+
+static PyObject*
+tracemalloc_get_tracemalloc_memory(PyObject *self)
+{
+ size_t size;
+ PyObject *size_obj;
+
+ size = _Py_hashtable_size(tracemalloc_tracebacks);
+ size += _Py_hashtable_size(tracemalloc_filenames);
+
+ TABLES_LOCK();
+ size += _Py_hashtable_size(tracemalloc_traces);
+ TABLES_UNLOCK();
+
+ size_obj = PyLong_FromSize_t(size);
+ return Py_BuildValue("N", size_obj);
+}
+
+PyDoc_STRVAR(tracemalloc_get_traced_memory_doc,
+ "get_traced_memory() -> int\n"
+ "\n"
+ "Get the current size and maximum size of memory blocks traced\n"
+ "by the tracemalloc module as a tuple: (size: int, max_size: int).");
+
+static PyObject*
+tracemalloc_get_traced_memory(PyObject *self)
+{
+ Py_ssize_t size, max_size;
+ PyObject *size_obj, *max_size_obj;
+
+ if (!tracemalloc_config.tracing)
+ return Py_BuildValue("ii", 0, 0);
+
+ TABLES_LOCK();
+ size = tracemalloc_traced_memory;
+ max_size = tracemalloc_max_traced_memory;
+ TABLES_UNLOCK();
+
+ size_obj = PyLong_FromSize_t(size);
+ max_size_obj = PyLong_FromSize_t(max_size);
+ return Py_BuildValue("NN", size_obj, max_size_obj);
+}
+
+static PyMethodDef module_methods[] = {
+ {"is_tracing", (PyCFunction)py_tracemalloc_is_tracing,
+ METH_NOARGS, tracemalloc_is_tracing_doc},
+ {"clear_traces", (PyCFunction)py_tracemalloc_clear_traces,
+ METH_NOARGS, tracemalloc_clear_traces_doc},
+ {"_get_traces", (PyCFunction)py_tracemalloc_get_traces,
+ METH_NOARGS, tracemalloc_get_traces_doc},
+ {"_get_object_traceback", (PyCFunction)py_tracemalloc_get_object_traceback,
+ METH_O, tracemalloc_get_object_traceback_doc},
+ {"start", (PyCFunction)py_tracemalloc_start,
+ METH_NOARGS, tracemalloc_start_doc},
+ {"stop", (PyCFunction)py_tracemalloc_stop,
+ METH_NOARGS, tracemalloc_stop_doc},
+ {"get_traceback_limit", (PyCFunction)py_tracemalloc_get_traceback_limit,
+ METH_NOARGS, tracemalloc_get_traceback_limit_doc},
+ {"set_traceback_limit", (PyCFunction)tracemalloc_set_traceback_limit,
+ METH_VARARGS, tracemalloc_set_traceback_limit_doc},
+ {"get_tracemalloc_memory", (PyCFunction)tracemalloc_get_tracemalloc_memory,
+ METH_NOARGS, tracemalloc_get_tracemalloc_memory_doc},
+ {"get_traced_memory", (PyCFunction)tracemalloc_get_traced_memory,
+ METH_NOARGS, tracemalloc_get_traced_memory_doc},
+
+ /* sentinel */
+ {NULL, NULL}
+};
+
+PyDoc_STRVAR(module_doc,
+"Debug module to trace memory blocks allocated by Python.");
+
+static struct PyModuleDef module_def = {
+ PyModuleDef_HEAD_INIT,
+ "_tracemalloc",
+ module_doc,
+ 0, /* non-negative size to be able to unload the module */
+ module_methods,
+ NULL,
+};
+
+PyMODINIT_FUNC
+PyInit__tracemalloc(void)
+{
+ PyObject *m;
+ m = PyModule_Create(&module_def);
+ if (m == NULL)
+ return NULL;
+
+ if (tracemalloc_init() < 0)
+ return NULL;
+
+ return m;
+}
+
+static int
+parse_sys_xoptions(PyObject *value)
+{
+ PyObject *valuelong;
+ long nframe;
+
+ if (value == Py_True)
+ return 1;
+
+ assert(PyUnicode_Check(value));
+ if (PyUnicode_GetLength(value) == 0)
+ return -1;
+
+ valuelong = PyLong_FromUnicodeObject(value, 10);
+ if (valuelong == NULL)
+ return -1;
+
+ nframe = PyLong_AsLong(valuelong);
+ Py_DECREF(valuelong);
+ if (nframe == -1 && PyErr_Occurred())
+ return -1;
+
+ if (nframe < 1 || nframe > MAX_NFRAME)
+ return -1;
+
+ return Py_SAFE_DOWNCAST(nframe, long, int);
+}
+
+int
+_PyTraceMalloc_Init(void)
+{
+ char *p;
+ int nframe;
+
+#ifdef WITH_THREAD
+ assert(PyGILState_Check());
+#endif
+
+ if ((p = Py_GETENV("PYTHONTRACEMALLOC")) && *p != '\0') {
+ char *endptr = p;
+ unsigned long value;
+
+ value = strtoul(p, &endptr, 10);
+ if (*endptr != '\0'
+ || value < 1
+ || value > MAX_NFRAME
+ || (errno == ERANGE && value == ULONG_MAX))
+ {
+ Py_FatalError("PYTHONTRACEMALLOC must be an integer "
+ "in range [1; " STR(MAX_NFRAME) "]");
+ return -1;
+ }
+
+ nframe = (int)value;
+ }
+ else {
+ PyObject *xoptions, *key, *value;
+
+ xoptions = PySys_GetXOptions();
+ if (xoptions == NULL)
+ return -1;
+
+ key = PyUnicode_FromString("tracemalloc");
+ if (key == NULL)
+ return -1;
+
+ value = PyDict_GetItemWithError(xoptions, key);
+ Py_DECREF(key);
+ if (value == NULL) {
+ if (PyErr_Occurred())
+ return -1;
+
+ /* -X tracemalloc is not used */
+ return 0;
+ }
+
+ nframe = parse_sys_xoptions(value);
+ Py_DECREF(value);
+ if (nframe < 0) {
+ Py_FatalError("-X tracemalloc=NFRAME: number of frame must be "
+ "an integer in range [1; " STR(MAX_NFRAME) "]");
+ }
+ }
+
+ tracemalloc_config.max_nframe = nframe;
+ return tracemalloc_start();
+}
+
diff --git a/Modules/hashtable.c b/Modules/hashtable.c
new file mode 100644
index 0000000..221ed53
--- /dev/null
+++ b/Modules/hashtable.c
@@ -0,0 +1,518 @@
+/* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash
+ project:
+ http://sourceforge.net/projects/libcfu/
+
+ Copyright of cfuhash:
+ ----------------------------------
+ Creation date: 2005-06-24 21:22:40
+ Authors: Don
+ Change log:
+
+ Copyright (c) 2005 Don Owens
+ All rights reserved.
+
+ This code is released under the BSD license:
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ * Neither the name of the author nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ OF THE POSSIBILITY OF SUCH DAMAGE.
+ ----------------------------------
+*/
+
+#include "Python.h"
+#include "hashtable.h"
+
+#define HASHTABLE_MIN_SIZE 16
+#define HASHTABLE_HIGH 0.50
+#define HASHTABLE_LOW 0.10
+#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
+
+#define BUCKETS_HEAD(SLIST) \
+ ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
+#define TABLE_HEAD(HT, BUCKET) \
+ ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
+#define ENTRY_NEXT(ENTRY) \
+ ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
+#define HASHTABLE_ITEM_SIZE(HT) \
+ (sizeof(_Py_hashtable_entry_t) + (HT)->data_size)
+
+/* Forward declaration */
+static void hashtable_rehash(_Py_hashtable_t *ht);
+
+static void
+_Py_slist_init(_Py_slist_t *list)
+{
+ list->head = NULL;
+}
+
+static void
+_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
+{
+ item->next = list->head;
+ list->head = item;
+}
+
+static void
+_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
+ _Py_slist_item_t *item)
+{
+ if (previous != NULL)
+ previous->next = item->next;
+ else
+ list->head = item->next;
+}
+
+Py_uhash_t
+_Py_hashtable_hash_int(const void *key)
+{
+ return (Py_uhash_t)key;
+}
+
+Py_uhash_t
+_Py_hashtable_hash_ptr(const void *key)
+{
+ return (Py_uhash_t)_Py_HashPointer((void *)key);
+}
+
+int
+_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry)
+{
+ return entry->key == key;
+}
+
+/* makes sure the real size of the buckets array is a power of 2 */
+static size_t
+round_size(size_t s)
+{
+ size_t i;
+ if (s < HASHTABLE_MIN_SIZE)
+ return HASHTABLE_MIN_SIZE;
+ i = 1;
+ while (i < s)
+ i <<= 1;
+ return i;
+}
+
+_Py_hashtable_t *
+_Py_hashtable_new_full(size_t data_size, size_t init_size,
+ _Py_hashtable_hash_func hash_func,
+ _Py_hashtable_compare_func compare_func,
+ _Py_hashtable_copy_data_func copy_data_func,
+ _Py_hashtable_free_data_func free_data_func,
+ _Py_hashtable_get_data_size_func get_data_size_func,
+ _Py_hashtable_allocator_t *allocator)
+{
+ _Py_hashtable_t *ht;
+ size_t buckets_size;
+ _Py_hashtable_allocator_t alloc;
+
+ if (allocator == NULL) {
+ alloc.malloc = PyMem_RawMalloc;
+ alloc.free = PyMem_RawFree;
+ }
+ else
+ alloc = *allocator;
+
+ ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
+ if (ht == NULL)
+ return ht;
+
+ ht->num_buckets = round_size(init_size);
+ ht->entries = 0;
+ ht->data_size = data_size;
+
+ buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
+ ht->buckets = alloc.malloc(buckets_size);
+ if (ht->buckets == NULL) {
+ alloc.free(ht);
+ return NULL;
+ }
+ memset(ht->buckets, 0, buckets_size);
+
+ ht->hash_func = hash_func;
+ ht->compare_func = compare_func;
+ ht->copy_data_func = copy_data_func;
+ ht->free_data_func = free_data_func;
+ ht->get_data_size_func = get_data_size_func;
+ ht->alloc = alloc;
+ return ht;
+}
+
+_Py_hashtable_t *
+_Py_hashtable_new(size_t data_size,
+ _Py_hashtable_hash_func hash_func,
+ _Py_hashtable_compare_func compare_func)
+{
+ return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE,
+ hash_func, compare_func,
+ NULL, NULL, NULL, NULL);
+}
+
+size_t
+_Py_hashtable_size(_Py_hashtable_t *ht)
+{
+ size_t size;
+ size_t hv;
+
+ size = sizeof(_Py_hashtable_t);
+
+ /* buckets */
+ size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *);
+
+ /* entries */
+ size += ht->entries * HASHTABLE_ITEM_SIZE(ht);
+
+ /* data linked from entries */
+ if (ht->get_data_size_func) {
+ for (hv = 0; hv < ht->num_buckets; hv++) {
+ _Py_hashtable_entry_t *entry;
+
+ for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
+ void *data;
+
+ data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
+ size += ht->get_data_size_func(data);
+ }
+ }
+ }
+ return size;
+}
+
+#ifdef Py_DEBUG
+void
+_Py_hashtable_print_stats(_Py_hashtable_t *ht)
+{
+ size_t size;
+ size_t chain_len, max_chain_len, total_chain_len, nchains;
+ _Py_hashtable_entry_t *entry;
+ size_t hv;
+ double load;
+
+ size = _Py_hashtable_size(ht);
+
+ load = (double)ht->entries / ht->num_buckets;
+
+ max_chain_len = 0;
+ total_chain_len = 0;
+ nchains = 0;
+ for (hv = 0; hv < ht->num_buckets; hv++) {
+ entry = TABLE_HEAD(ht, hv);
+ if (entry != NULL) {
+ chain_len = 0;
+ for (; entry; entry = ENTRY_NEXT(entry)) {
+ chain_len++;
+ }
+ if (chain_len > max_chain_len)
+ max_chain_len = chain_len;
+ total_chain_len += chain_len;
+ nchains++;
+ }
+ }
+ printf("hash table %p: entries=%zu/%zu (%.0f%%), ",
+ ht, ht->entries, ht->num_buckets, load * 100.0);
+ if (nchains)
+ printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains);
+ printf("max_chain_len=%zu, %zu kB\n",
+ max_chain_len, size / 1024);
+}
+#endif
+
+/* Get an entry. Return NULL if the key does not exist. */
+_Py_hashtable_entry_t *
+_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key)
+{
+ Py_uhash_t key_hash;
+ size_t index;
+ _Py_hashtable_entry_t *entry;
+
+ key_hash = ht->hash_func(key);
+ index = key_hash & (ht->num_buckets - 1);
+
+ for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
+ if (entry->key_hash == key_hash && ht->compare_func(key, entry))
+ break;
+ }
+
+ return entry;
+}
+
+static int
+_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
+{
+ Py_uhash_t key_hash;
+ size_t index;
+ _Py_hashtable_entry_t *entry, *previous;
+
+ key_hash = ht->hash_func(key);
+ index = key_hash & (ht->num_buckets - 1);
+
+ previous = NULL;
+ for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
+ if (entry->key_hash == key_hash && ht->compare_func(key, entry))
+ break;
+ previous = entry;
+ }
+
+ if (entry == NULL)
+ return 0;
+
+ _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
+ (_Py_slist_item_t *)entry);
+ ht->entries--;
+
+ if (data != NULL)
+ _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
+ ht->alloc.free(entry);
+
+ if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
+ hashtable_rehash(ht);
+ return 1;
+}
+
+/* Add a new entry to the hash. The key must not be present in the hash table.
+ Return 0 on success, -1 on memory error. */
+int
+_Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
+ void *data, size_t data_size)
+{
+ Py_uhash_t key_hash;
+ size_t index;
+ _Py_hashtable_entry_t *entry;
+
+ assert(data != NULL || data_size == 0);
+#ifndef NDEBUG
+ /* Don't write the assertion on a single line because it is interesting
+ to know the duplicated entry if the assertion failed. The entry can
+ be read using a debugger. */
+ entry = _Py_hashtable_get_entry(ht, key);
+ assert(entry == NULL);
+#endif
+
+ key_hash = ht->hash_func(key);
+ index = key_hash & (ht->num_buckets - 1);
+
+ entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
+ if (entry == NULL) {
+ /* memory allocation failed */
+ return -1;
+ }
+
+ entry->key = (void *)key;
+ entry->key_hash = key_hash;
+
+ assert(data_size == ht->data_size);
+ memcpy(_PY_HASHTABLE_ENTRY_DATA(entry), data, data_size);
+
+ _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
+ ht->entries++;
+
+ if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH)
+ hashtable_rehash(ht);
+ return 0;
+}
+
+/* Get data from an entry. Copy entry data into data and return 1 if the entry
+ exists, return 0 if the entry does not exist. */
+int
+_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
+{
+ _Py_hashtable_entry_t *entry;
+
+ assert(data != NULL);
+
+ entry = _Py_hashtable_get_entry(ht, key);
+ if (entry == NULL)
+ return 0;
+ _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
+ return 1;
+}
+
+int
+_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
+{
+ assert(data != NULL);
+ assert(ht->free_data_func == NULL);
+ return _hashtable_pop_entry(ht, key, data, data_size);
+}
+
+/* Delete an entry. The entry must exist. */
+void
+_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key)
+{
+#ifndef NDEBUG
+ int found = _hashtable_pop_entry(ht, key, NULL, 0);
+ assert(found);
+#else
+ (void)_hashtable_pop_entry(ht, key, NULL, 0);
+#endif
+}
+
+/* Prototype for a pointer to a function to be called foreach
+ key/value pair in the hash by hashtable_foreach(). Iteration
+ stops if a non-zero value is returned. */
+int
+_Py_hashtable_foreach(_Py_hashtable_t *ht,
+ int (*func) (_Py_hashtable_entry_t *entry, void *arg),
+ void *arg)
+{
+ _Py_hashtable_entry_t *entry;
+ size_t hv;
+
+ for (hv = 0; hv < ht->num_buckets; hv++) {
+ for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
+ int res = func(entry, arg);
+ if (res)
+ return res;
+ }
+ }
+ return 0;
+}
+
+static void
+hashtable_rehash(_Py_hashtable_t *ht)
+{
+ size_t buckets_size, new_size, bucket;
+ _Py_slist_t *old_buckets = NULL;
+ size_t old_num_buckets;
+
+ new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR));
+ if (new_size == ht->num_buckets)
+ return;
+
+ old_num_buckets = ht->num_buckets;
+
+ buckets_size = new_size * sizeof(ht->buckets[0]);
+ old_buckets = ht->buckets;
+ ht->buckets = ht->alloc.malloc(buckets_size);
+ if (ht->buckets == NULL) {
+ /* cancel rehash on memory allocation failure */
+ ht->buckets = old_buckets ;
+ /* memory allocation failed */
+ return;
+ }
+ memset(ht->buckets, 0, buckets_size);
+
+ ht->num_buckets = new_size;
+
+ for (bucket = 0; bucket < old_num_buckets; bucket++) {
+ _Py_hashtable_entry_t *entry, *next;
+ for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
+ size_t entry_index;
+
+ assert(ht->hash_func(entry->key) == entry->key_hash);
+ next = ENTRY_NEXT(entry);
+ entry_index = entry->key_hash & (new_size - 1);
+
+ _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry);
+ }
+ }
+
+ ht->alloc.free(old_buckets);
+}
+
+void
+_Py_hashtable_clear(_Py_hashtable_t *ht)
+{
+ _Py_hashtable_entry_t *entry, *next;
+ size_t i;
+
+ for (i=0; i < ht->num_buckets; i++) {
+ for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
+ next = ENTRY_NEXT(entry);
+ if (ht->free_data_func)
+ ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
+ ht->alloc.free(entry);
+ }
+ _Py_slist_init(&ht->buckets[i]);
+ }
+ ht->entries = 0;
+ hashtable_rehash(ht);
+}
+
+void
+_Py_hashtable_destroy(_Py_hashtable_t *ht)
+{
+ size_t i;
+
+ for (i = 0; i < ht->num_buckets; i++) {
+ _Py_slist_item_t *entry = ht->buckets[i].head;
+ while (entry) {
+ _Py_slist_item_t *entry_next = entry->next;
+ if (ht->free_data_func)
+ ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
+ ht->alloc.free(entry);
+ entry = entry_next;
+ }
+ }
+
+ ht->alloc.free(ht->buckets);
+ ht->alloc.free(ht);
+}
+
+/* Return a copy of the hash table */
+_Py_hashtable_t *
+_Py_hashtable_copy(_Py_hashtable_t *src)
+{
+ _Py_hashtable_t *dst;
+ _Py_hashtable_entry_t *entry;
+ size_t bucket;
+ int err;
+ void *data, *new_data;
+
+ dst = _Py_hashtable_new_full(src->data_size, src->num_buckets,
+ src->hash_func, src->compare_func,
+ src->copy_data_func, src->free_data_func,
+ src->get_data_size_func, &src->alloc);
+ if (dst == NULL)
+ return NULL;
+
+ for (bucket=0; bucket < src->num_buckets; bucket++) {
+ entry = TABLE_HEAD(src, bucket);
+ for (; entry; entry = ENTRY_NEXT(entry)) {
+ if (src->copy_data_func) {
+ data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
+ new_data = src->copy_data_func(data);
+ if (new_data != NULL)
+ err = _Py_hashtable_set(dst, entry->key,
+ &new_data, src->data_size);
+ else
+ err = 1;
+ }
+ else {
+ data = _PY_HASHTABLE_ENTRY_DATA(entry);
+ err = _Py_hashtable_set(dst, entry->key, data, src->data_size);
+ }
+ if (err) {
+ _Py_hashtable_destroy(dst);
+ return NULL;
+ }
+ }
+ }
+ return dst;
+}
+
diff --git a/Modules/hashtable.h b/Modules/hashtable.h
new file mode 100644
index 0000000..539e490
--- /dev/null
+++ b/Modules/hashtable.h
@@ -0,0 +1,128 @@
+#ifndef Py_HASHTABLE_H
+#define Py_HASHTABLE_H
+
+/* The whole API is private */
+#ifndef Py_LIMITED_API
+
+typedef struct _Py_slist_item_s {
+ struct _Py_slist_item_s *next;
+} _Py_slist_item_t;
+
+typedef struct {
+ _Py_slist_item_t *head;
+} _Py_slist_t;
+
+#define _Py_SLIST_ITEM_NEXT(ITEM) (((_Py_slist_item_t *)ITEM)->next)
+
+#define _Py_SLIST_HEAD(SLIST) (((_Py_slist_t *)SLIST)->head)
+
+typedef struct {
+ /* used by _Py_hashtable_t.buckets to link entries */
+ _Py_slist_item_t _Py_slist_item;
+
+ const void *key;
+ Py_uhash_t key_hash;
+
+ /* data follows */
+} _Py_hashtable_entry_t;
+
+#define _PY_HASHTABLE_ENTRY_DATA(ENTRY) \
+ ((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t))
+
+#define _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ENTRY) \
+ (*(void **)_PY_HASHTABLE_ENTRY_DATA(ENTRY))
+
+#define _Py_HASHTABLE_ENTRY_READ_DATA(TABLE, DATA, DATA_SIZE, ENTRY) \
+ do { \
+ assert((DATA_SIZE) == (TABLE)->data_size); \
+ memcpy(DATA, _PY_HASHTABLE_ENTRY_DATA(ENTRY), DATA_SIZE); \
+ } while (0)
+
+typedef Py_uhash_t (*_Py_hashtable_hash_func) (const void *key);
+typedef int (*_Py_hashtable_compare_func) (const void *key, const _Py_hashtable_entry_t *he);
+typedef void* (*_Py_hashtable_copy_data_func)(void *data);
+typedef void (*_Py_hashtable_free_data_func)(void *data);
+typedef size_t (*_Py_hashtable_get_data_size_func)(void *data);
+
+typedef struct {
+ /* allocate a memory block */
+ void* (*malloc) (size_t size);
+
+ /* release a memory block */
+ void (*free) (void *ptr);
+} _Py_hashtable_allocator_t;
+
+typedef struct {
+ size_t num_buckets;
+ size_t entries; /* Total number of entries in the table. */
+ _Py_slist_t *buckets;
+ size_t data_size;
+
+ _Py_hashtable_hash_func hash_func;
+ _Py_hashtable_compare_func compare_func;
+ _Py_hashtable_copy_data_func copy_data_func;
+ _Py_hashtable_free_data_func free_data_func;
+ _Py_hashtable_get_data_size_func get_data_size_func;
+ _Py_hashtable_allocator_t alloc;
+} _Py_hashtable_t;
+
+/* hash and compare functions for integers and pointers */
+PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(const void *key);
+PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_int(const void *key);
+PyAPI_FUNC(int) _Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry);
+
+PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new(
+ size_t data_size,
+ _Py_hashtable_hash_func hash_func,
+ _Py_hashtable_compare_func compare_func);
+PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new_full(
+ size_t data_size,
+ size_t init_size,
+ _Py_hashtable_hash_func hash_func,
+ _Py_hashtable_compare_func compare_func,
+ _Py_hashtable_copy_data_func copy_data_func,
+ _Py_hashtable_free_data_func free_data_func,
+ _Py_hashtable_get_data_size_func get_data_size_func,
+ _Py_hashtable_allocator_t *allocator);
+PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_copy(_Py_hashtable_t *src);
+PyAPI_FUNC(void) _Py_hashtable_clear(_Py_hashtable_t *ht);
+PyAPI_FUNC(void) _Py_hashtable_destroy(_Py_hashtable_t *ht);
+
+typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_entry_t *entry, void *arg);
+
+PyAPI_FUNC(int) _Py_hashtable_foreach(
+ _Py_hashtable_t *ht,
+ _Py_hashtable_foreach_func func, void *arg);
+PyAPI_FUNC(size_t) _Py_hashtable_size(_Py_hashtable_t *ht);
+
+PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
+ _Py_hashtable_t *ht,
+ const void *key);
+PyAPI_FUNC(int) _Py_hashtable_set(
+ _Py_hashtable_t *ht,
+ const void *key,
+ void *data,
+ size_t data_size);
+PyAPI_FUNC(int) _Py_hashtable_get(
+ _Py_hashtable_t *ht,
+ const void *key,
+ void *data,
+ size_t data_size);
+PyAPI_FUNC(int) _Py_hashtable_pop(
+ _Py_hashtable_t *ht,
+ const void *key,
+ void *data,
+ size_t data_size);
+PyAPI_FUNC(void) _Py_hashtable_delete(
+ _Py_hashtable_t *ht,
+ const void *key);
+
+#define _Py_HASHTABLE_SET(TABLE, KEY, DATA) \
+ _Py_hashtable_set(TABLE, KEY, &(DATA), sizeof(DATA))
+
+#define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \
+ _Py_hashtable_get(TABLE, KEY, &(DATA), sizeof(DATA))
+
+#endif /* Py_LIMITED_API */
+
+#endif