From 76d5abc8684bac4f2fc7cccfe2cd940923357351 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 5 Sep 2017 18:26:16 -0700 Subject: bpo-30860: Consolidate stateful runtime globals. (#2594) * group the (stateful) runtime globals into various topical structs * consolidate the topical structs under a single top-level _PyRuntimeState struct * add a check-c-globals.py script that helps identify runtime globals Other globals are excluded (see globals.txt and check-c-globals.py). --- Include/Python.h | 4 + Include/ceval.h | 7 +- Include/internal/_Python.h | 16 + Include/internal/_ceval.h | 71 ++ Include/internal/_condvar.h | 91 +++ Include/internal/_gil.h | 48 ++ Include/internal/_mem.h | 197 ++++++ Include/internal/_pymalloc.h | 443 ++++++++++++ Include/internal/_pystate.h | 93 +++ Include/internal/_warnings.h | 21 + Include/object.h | 2 - Include/pylifecycle.h | 5 +- Include/pystate.h | 30 +- Makefile.pre.in | 7 + .../2017-09-05-13-47-49.bpo-30860.MROpZw.rst | 2 + Modules/_io/bufferedio.c | 2 +- Modules/_threadmodule.c | 11 +- Modules/_winapi.c | 2 +- Modules/gcmodule.c | 309 +++------ Modules/main.c | 8 +- Objects/object.c | 22 +- Objects/obmalloc.c | 772 +++++---------------- Objects/setobject.c | 1 + Objects/typeobject.c | 10 +- PCbuild/pythoncore.vcxproj | 8 + PCbuild/pythoncore.vcxproj.filters | 24 + Parser/pgenmain.c | 4 + Python/_warnings.c | 79 +-- Python/ceval.c | 190 +++-- Python/ceval_gil.h | 159 ++--- Python/condvar.h | 75 +- Python/pylifecycle.c | 88 ++- Python/pystate.c | 196 +++--- Python/sysmodule.c | 59 +- Python/thread.c | 7 +- Python/thread_nt.h | 9 +- Python/thread_pthread.h | 9 +- Tools/c-globals/README | 41 ++ Tools/c-globals/check-c-globals.py | 446 ++++++++++++ Tools/c-globals/ignored-globals.txt | 494 +++++++++++++ 40 files changed, 2731 insertions(+), 1331 deletions(-) create mode 100644 Include/internal/_Python.h create mode 100644 Include/internal/_ceval.h create mode 100644 Include/internal/_condvar.h create mode 100644 Include/internal/_gil.h create mode 100644 Include/internal/_mem.h create mode 100644 Include/internal/_pymalloc.h create mode 100644 Include/internal/_pystate.h create mode 100644 Include/internal/_warnings.h create mode 100644 Misc/NEWS.d/next/Core and Builtins/2017-09-05-13-47-49.bpo-30860.MROpZw.rst create mode 100644 Tools/c-globals/README create mode 100644 Tools/c-globals/check-c-globals.py create mode 100644 Tools/c-globals/ignored-globals.txt diff --git a/Include/Python.h b/Include/Python.h index 061d693..3ab9fe9 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -133,4 +133,8 @@ #include "fileutils.h" #include "pyfpe.h" +#ifdef Py_BUILD_CORE +#include "internal/_Python.h" +#endif + #endif /* !Py_PYTHON_H */ diff --git a/Include/ceval.h b/Include/ceval.h index b2d57cb..7cbbf7c 100644 --- a/Include/ceval.h +++ b/Include/ceval.h @@ -93,7 +93,12 @@ PyAPI_FUNC(int) Py_GetRecursionLimit(void); PyThreadState_GET()->overflowed = 0; \ } while(0) PyAPI_FUNC(int) _Py_CheckRecursiveCall(const char *where); -PyAPI_DATA(int) _Py_CheckRecursionLimit; +#ifdef Py_BUILD_CORE +#define _Py_CheckRecursionLimit _PyRuntime.ceval.check_recursion_limit +#else +PyAPI_FUNC(int) _PyEval_CheckRecursionLimit(void); +#define _Py_CheckRecursionLimit _PyEval_CheckRecursionLimit() +#endif #ifdef USE_STACKCHECK /* With USE_STACKCHECK, we artificially decrement the recursion limit in order diff --git a/Include/internal/_Python.h b/Include/internal/_Python.h new file mode 100644 index 0000000..c56e98f --- /dev/null +++ b/Include/internal/_Python.h @@ -0,0 +1,16 @@ +#ifndef _Py_PYTHON_H +#define _Py_PYTHON_H +/* Since this is a "meta-include" file, no #ifdef __cplusplus / extern "C" { */ + +/* Include all internal Python header files */ + +#ifndef Py_BUILD_CORE +#error "Internal headers are not available externally." +#endif + +#include "_mem.h" +#include "_ceval.h" +#include "_warnings.h" +#include "_pystate.h" + +#endif /* !_Py_PYTHON_H */ diff --git a/Include/internal/_ceval.h b/Include/internal/_ceval.h new file mode 100644 index 0000000..c2343f1 --- /dev/null +++ b/Include/internal/_ceval.h @@ -0,0 +1,71 @@ +#ifndef _Py_CEVAL_H +#define _Py_CEVAL_H +#ifdef __cplusplus +extern "C" { +#endif + +#include "ceval.h" +#include "compile.h" +#include "pyatomic.h" + +#ifdef WITH_THREAD +#include "pythread.h" +#endif + +struct _pending_calls { + unsigned long main_thread; +#ifdef WITH_THREAD + PyThread_type_lock lock; + /* Request for running pending calls. */ + _Py_atomic_int calls_to_do; + /* Request for looking at the `async_exc` field of the current + thread state. + Guarded by the GIL. */ + int async_exc; +#define NPENDINGCALLS 32 + struct { + int (*func)(void *); + void *arg; + } calls[NPENDINGCALLS]; + int first; + int last; +#else /* ! WITH_THREAD */ + _Py_atomic_int calls_to_do; +#define NPENDINGCALLS 32 + struct { + int (*func)(void *); + void *arg; + } calls[NPENDINGCALLS]; + volatile int first; + volatile int last; +#endif /* WITH_THREAD */ +}; + +#include "_gil.h" + +struct _ceval_runtime_state { + int recursion_limit; + int check_recursion_limit; + /* Records whether tracing is on for any thread. Counts the number + of threads for which tstate->c_tracefunc is non-NULL, so if the + value is 0, we know we don't have to check this thread's + c_tracefunc. This speeds up the if statement in + PyEval_EvalFrameEx() after fast_next_opcode. */ + int tracing_possible; + /* This single variable consolidates all requests to break out of + the fast path in the eval loop. */ + _Py_atomic_int eval_breaker; +#ifdef WITH_THREAD + /* Request for dropping the GIL */ + _Py_atomic_int gil_drop_request; +#endif + struct _pending_calls pending; + struct _gil_runtime_state gil; +}; + +PyAPI_FUNC(void) _PyEval_Initialize(struct _ceval_runtime_state *); + +#ifdef __cplusplus +} +#endif +#endif /* !_Py_CEVAL_H */ diff --git a/Include/internal/_condvar.h b/Include/internal/_condvar.h new file mode 100644 index 0000000..6827db7 --- /dev/null +++ b/Include/internal/_condvar.h @@ -0,0 +1,91 @@ +#ifndef _CONDVAR_H_ +#define _CONDVAR_H_ + +#ifndef _POSIX_THREADS +/* This means pthreads are not implemented in libc headers, hence the macro + not present in unistd.h. But they still can be implemented as an external + library (e.g. gnu pth in pthread emulation) */ +# ifdef HAVE_PTHREAD_H +# include /* _POSIX_THREADS */ +# endif +#endif + +#ifdef _POSIX_THREADS +/* + * POSIX support + */ +#define Py_HAVE_CONDVAR + +#include + +#define PyMUTEX_T pthread_mutex_t +#define PyCOND_T pthread_cond_t + +#elif defined(NT_THREADS) +/* + * Windows (XP, 2003 server and later, as well as (hopefully) CE) support + * + * Emulated condition variables ones that work with XP and later, plus + * example native support on VISTA and onwards. + */ +#define Py_HAVE_CONDVAR + +/* include windows if it hasn't been done before */ +#define WIN32_LEAN_AND_MEAN +#include + +/* options */ +/* non-emulated condition variables are provided for those that want + * to target Windows Vista. Modify this macro to enable them. + */ +#ifndef _PY_EMULATED_WIN_CV +#define _PY_EMULATED_WIN_CV 1 /* use emulated condition variables */ +#endif + +/* fall back to emulation if not targeting Vista */ +#if !defined NTDDI_VISTA || NTDDI_VERSION < NTDDI_VISTA +#undef _PY_EMULATED_WIN_CV +#define _PY_EMULATED_WIN_CV 1 +#endif + +#if _PY_EMULATED_WIN_CV + +typedef CRITICAL_SECTION PyMUTEX_T; + +/* The ConditionVariable object. From XP onwards it is easily emulated + with a Semaphore. + Semaphores are available on Windows XP (2003 server) and later. + We use a Semaphore rather than an auto-reset event, because although + an auto-resent event might appear to solve the lost-wakeup bug (race + condition between releasing the outer lock and waiting) because it + maintains state even though a wait hasn't happened, there is still + a lost wakeup problem if more than one thread are interrupted in the + critical place. A semaphore solves that, because its state is + counted, not Boolean. + Because it is ok to signal a condition variable with no one + waiting, we need to keep track of the number of + waiting threads. Otherwise, the semaphore's state could rise + without bound. This also helps reduce the number of "spurious wakeups" + that would otherwise happen. + */ + +typedef struct _PyCOND_T +{ + HANDLE sem; + int waiting; /* to allow PyCOND_SIGNAL to be a no-op */ +} PyCOND_T; + +#else /* !_PY_EMULATED_WIN_CV */ + +/* Use native Win7 primitives if build target is Win7 or higher */ + +/* SRWLOCK is faster and better than CriticalSection */ +typedef SRWLOCK PyMUTEX_T; + +typedef CONDITION_VARIABLE PyCOND_T; + +#endif /* _PY_EMULATED_WIN_CV */ + +#endif /* _POSIX_THREADS, NT_THREADS */ + +#endif /* _CONDVAR_H_ */ diff --git a/Include/internal/_gil.h b/Include/internal/_gil.h new file mode 100644 index 0000000..42301bf --- /dev/null +++ b/Include/internal/_gil.h @@ -0,0 +1,48 @@ +#ifndef _Py_GIL_H +#define _Py_GIL_H +#ifdef __cplusplus +extern "C" { +#endif + +#include "pyatomic.h" + +#include "internal/_condvar.h" +#ifndef Py_HAVE_CONDVAR +#error You need either a POSIX-compatible or a Windows system! +#endif + +/* Enable if you want to force the switching of threads at least + every `interval`. */ +#undef FORCE_SWITCHING +#define FORCE_SWITCHING + +struct _gil_runtime_state { + /* microseconds (the Python API uses seconds, though) */ + unsigned long interval; + /* Last PyThreadState holding / having held the GIL. This helps us + know whether anyone else was scheduled after we dropped the GIL. */ + _Py_atomic_address last_holder; + /* Whether the GIL is already taken (-1 if uninitialized). This is + atomic because it can be read without any lock taken in ceval.c. */ + _Py_atomic_int locked; + /* Number of GIL switches since the beginning. */ + unsigned long switch_number; +#ifdef WITH_THREAD + /* This condition variable allows one or several threads to wait + until the GIL is released. In addition, the mutex also protects + the above variables. */ + PyCOND_T cond; + PyMUTEX_T mutex; +#ifdef FORCE_SWITCHING + /* This condition variable helps the GIL-releasing thread wait for + a GIL-awaiting thread to be scheduled and take the GIL. */ + PyCOND_T switch_cond; + PyMUTEX_T switch_mutex; +#endif +#endif /* WITH_THREAD */ +}; + +#ifdef __cplusplus +} +#endif +#endif /* !_Py_GIL_H */ diff --git a/Include/internal/_mem.h b/Include/internal/_mem.h new file mode 100644 index 0000000..2932377 --- /dev/null +++ b/Include/internal/_mem.h @@ -0,0 +1,197 @@ +#ifndef _Py_MEM_H +#define _Py_MEM_H +#ifdef __cplusplus +extern "C" { +#endif + +#include "objimpl.h" +#include "pymem.h" + +#ifdef WITH_PYMALLOC +#include "_pymalloc.h" +#endif + +/* Low-level memory runtime state */ + +struct _pymem_runtime_state { + struct _allocator_runtime_state { + PyMemAllocatorEx mem; + PyMemAllocatorEx obj; + PyMemAllocatorEx raw; + } allocators; +#ifdef WITH_PYMALLOC + /* Array of objects used to track chunks of memory (arenas). */ + struct arena_object* arenas; + /* The head of the singly-linked, NULL-terminated list of available + arena_objects. */ + struct arena_object* unused_arena_objects; + /* The head of the doubly-linked, NULL-terminated at each end, + list of arena_objects associated with arenas that have pools + available. */ + struct arena_object* usable_arenas; + /* Number of slots currently allocated in the `arenas` vector. */ + unsigned int maxarenas; + /* Number of arenas allocated that haven't been free()'d. */ + size_t narenas_currently_allocated; + /* High water mark (max value ever seen) for + * narenas_currently_allocated. */ + size_t narenas_highwater; + /* Total number of times malloc() called to allocate an arena. */ + size_t ntimes_arena_allocated; + poolp usedpools[MAX_POOLS]; + Py_ssize_t num_allocated_blocks; + size_t serialno; /* incremented on each debug {m,re}alloc */ +#endif /* WITH_PYMALLOC */ +}; + +PyAPI_FUNC(void) _PyMem_Initialize(struct _pymem_runtime_state *); + + +/* High-level memory runtime state */ + +struct _pyobj_runtime_state { + PyObjectArenaAllocator allocator_arenas; +}; + +PyAPI_FUNC(void) _PyObject_Initialize(struct _pyobj_runtime_state *); + + +/* GC runtime state */ + +/* If we change this, we need to change the default value in the + signature of gc.collect. */ +#define NUM_GENERATIONS 3 + +/* + NOTE: about the counting of long-lived objects. + + To limit the cost of garbage collection, there are two strategies; + - make each collection faster, e.g. by scanning fewer objects + - do less collections + This heuristic is about the latter strategy. + + In addition to the various configurable thresholds, we only trigger a + full collection if the ratio + long_lived_pending / long_lived_total + is above a given value (hardwired to 25%). + + The reason is that, while "non-full" collections (i.e., collections of + the young and middle generations) will always examine roughly the same + number of objects -- determined by the aforementioned thresholds --, + the cost of a full collection is proportional to the total number of + long-lived objects, which is virtually unbounded. + + Indeed, it has been remarked that doing a full collection every + of object creations entails a dramatic performance + degradation in workloads which consist in creating and storing lots of + long-lived objects (e.g. building a large list of GC-tracked objects would + show quadratic performance, instead of linear as expected: see issue #4074). + + Using the above ratio, instead, yields amortized linear performance in + the total number of objects (the effect of which can be summarized + thusly: "each full garbage collection is more and more costly as the + number of objects grows, but we do fewer and fewer of them"). + + This heuristic was suggested by Martin von Löwis on python-dev in + June 2008. His original analysis and proposal can be found at: + http://mail.python.org/pipermail/python-dev/2008-June/080579.html +*/ + +/* + NOTE: about untracking of mutable objects. + + Certain types of container cannot participate in a reference cycle, and + so do not need to be tracked by the garbage collector. Untracking these + objects reduces the cost of garbage collections. However, determining + which objects may be untracked is not free, and the costs must be + weighed against the benefits for garbage collection. + + There are two possible strategies for when to untrack a container: + + i) When the container is created. + ii) When the container is examined by the garbage collector. + + Tuples containing only immutable objects (integers, strings etc, and + recursively, tuples of immutable objects) do not need to be tracked. + The interpreter creates a large number of tuples, many of which will + not survive until garbage collection. It is therefore not worthwhile + to untrack eligible tuples at creation time. + + Instead, all tuples except the empty tuple are tracked when created. + During garbage collection it is determined whether any surviving tuples + can be untracked. A tuple can be untracked if all of its contents are + already not tracked. Tuples are examined for untracking in all garbage + collection cycles. It may take more than one cycle to untrack a tuple. + + Dictionaries containing only immutable objects also do not need to be + tracked. Dictionaries are untracked when created. If a tracked item is + inserted into a dictionary (either as a key or value), the dictionary + becomes tracked. During a full garbage collection (all generations), + the collector will untrack any dictionaries whose contents are not + tracked. + + The module provides the python function is_tracked(obj), which returns + the CURRENT tracking status of the object. Subsequent garbage + collections may change the tracking status of the object. + + Untracking of certain containers was introduced in issue #4688, and + the algorithm was refined in response to issue #14775. +*/ + +struct gc_generation { + PyGC_Head head; + int threshold; /* collection threshold */ + int count; /* count of allocations or collections of younger + generations */ +}; + +/* Running stats per generation */ +struct gc_generation_stats { + /* total number of collections */ + Py_ssize_t collections; + /* total number of collected objects */ + Py_ssize_t collected; + /* total number of uncollectable objects (put into gc.garbage) */ + Py_ssize_t uncollectable; +}; + +struct _gc_runtime_state { + /* List of objects that still need to be cleaned up, singly linked + * via their gc headers' gc_prev pointers. */ + PyObject *trash_delete_later; + /* Current call-stack depth of tp_dealloc calls. */ + int trash_delete_nesting; + + int enabled; + int debug; + /* linked lists of container objects */ + struct gc_generation generations[NUM_GENERATIONS]; + PyGC_Head *generation0; + struct gc_generation_stats generation_stats[NUM_GENERATIONS]; + /* true if we are currently running the collector */ + int collecting; + /* list of uncollectable objects */ + PyObject *garbage; + /* a list of callbacks to be invoked when collection is performed */ + PyObject *callbacks; + /* This is the number of objects that survived the last full + collection. It approximates the number of long lived objects + tracked by the GC. + + (by "full collection", we mean a collection of the oldest + generation). */ + Py_ssize_t long_lived_total; + /* This is the number of objects that survived all "non-full" + collections, and are awaiting to undergo a full collection for + the first time. */ + Py_ssize_t long_lived_pending; +}; + +PyAPI_FUNC(void) _PyGC_Initialize(struct _gc_runtime_state *); + +#define _PyGC_generation0 _PyRuntime.gc.generation0 + +#ifdef __cplusplus +} +#endif +#endif /* !_Py_MEM_H */ diff --git a/Include/internal/_pymalloc.h b/Include/internal/_pymalloc.h new file mode 100644 index 0000000..764edf9 --- /dev/null +++ b/Include/internal/_pymalloc.h @@ -0,0 +1,443 @@ + +/* An object allocator for Python. + + Here is an introduction to the layers of the Python memory architecture, + showing where the object allocator is actually used (layer +2), It is + called for every object allocation and deallocation (PyObject_New/Del), + unless the object-specific allocators implement a proprietary allocation + scheme (ex.: ints use a simple free list). This is also the place where + the cyclic garbage collector operates selectively on container objects. + + + Object-specific allocators + _____ ______ ______ ________ + [ int ] [ dict ] [ list ] ... [ string ] Python core | ++3 | <----- Object-specific memory -----> | <-- Non-object memory --> | + _______________________________ | | + [ Python's object allocator ] | | ++2 | ####### Object memory ####### | <------ Internal buffers ------> | + ______________________________________________________________ | + [ Python's raw memory allocator (PyMem_ API) ] | ++1 | <----- Python memory (under PyMem manager's control) ------> | | + __________________________________________________________________ + [ Underlying general-purpose allocator (ex: C library malloc) ] + 0 | <------ Virtual memory allocated for the python process -------> | + + ========================================================================= + _______________________________________________________________________ + [ OS-specific Virtual Memory Manager (VMM) ] +-1 | <--- Kernel dynamic storage allocation & management (page-based) ---> | + __________________________________ __________________________________ + [ ] [ ] +-2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> | + +*/ +/*==========================================================================*/ + +/* A fast, special-purpose memory allocator for small blocks, to be used + on top of a general-purpose malloc -- heavily based on previous art. */ + +/* Vladimir Marangozov -- August 2000 */ + +/* + * "Memory management is where the rubber meets the road -- if we do the wrong + * thing at any level, the results will not be good. And if we don't make the + * levels work well together, we are in serious trouble." (1) + * + * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles, + * "Dynamic Storage Allocation: A Survey and Critical Review", + * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995. + */ + +#ifndef _Py_PYMALLOC_H +#define _Py_PYMALLOC_H + +/* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */ + +/*==========================================================================*/ + +/* + * Allocation strategy abstract: + * + * For small requests, the allocator sub-allocates blocks of memory. + * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the + * system's allocator. + * + * Small requests are grouped in size classes spaced 8 bytes apart, due + * to the required valid alignment of the returned address. Requests of + * a particular size are serviced from memory pools of 4K (one VMM page). + * Pools are fragmented on demand and contain free lists of blocks of one + * particular size class. In other words, there is a fixed-size allocator + * for each size class. Free pools are shared by the different allocators + * thus minimizing the space reserved for a particular size class. + * + * This allocation strategy is a variant of what is known as "simple + * segregated storage based on array of free lists". The main drawback of + * simple segregated storage is that we might end up with lot of reserved + * memory for the different free lists, which degenerate in time. To avoid + * this, we partition each free list in pools and we share dynamically the + * reserved space between all free lists. This technique is quite efficient + * for memory intensive programs which allocate mainly small-sized blocks. + * + * For small requests we have the following table: + * + * Request in bytes Size of allocated block Size class idx + * ---------------------------------------------------------------- + * 1-8 8 0 + * 9-16 16 1 + * 17-24 24 2 + * 25-32 32 3 + * 33-40 40 4 + * 41-48 48 5 + * 49-56 56 6 + * 57-64 64 7 + * 65-72 72 8 + * ... ... ... + * 497-504 504 62 + * 505-512 512 63 + * + * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying + * allocator. + */ + +/*==========================================================================*/ + +/* + * -- Main tunable settings section -- + */ + +/* + * Alignment of addresses returned to the user. 8-bytes alignment works + * on most current architectures (with 32-bit or 64-bit address busses). + * The alignment value is also used for grouping small requests in size + * classes spaced ALIGNMENT bytes apart. + * + * You shouldn't change this unless you know what you are doing. + */ +#define ALIGNMENT 8 /* must be 2^N */ +#define ALIGNMENT_SHIFT 3 + +/* Return the number of bytes in size class I, as a uint. */ +#define INDEX2SIZE(I) (((unsigned int)(I) + 1) << ALIGNMENT_SHIFT) + +/* + * Max size threshold below which malloc requests are considered to be + * small enough in order to use preallocated memory pools. You can tune + * this value according to your application behaviour and memory needs. + * + * Note: a size threshold of 512 guarantees that newly created dictionaries + * will be allocated from preallocated memory pools on 64-bit. + * + * The following invariants must hold: + * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512 + * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT + * + * Although not required, for better performance and space efficiency, + * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2. + */ +#define SMALL_REQUEST_THRESHOLD 512 +#define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT) + +#if NB_SMALL_SIZE_CLASSES > 64 +#error "NB_SMALL_SIZE_CLASSES should be less than 64" +#endif /* NB_SMALL_SIZE_CLASSES > 64 */ + +/* + * The system's VMM page size can be obtained on most unices with a + * getpagesize() call or deduced from various header files. To make + * things simpler, we assume that it is 4K, which is OK for most systems. + * It is probably better if this is the native page size, but it doesn't + * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page + * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation + * violation fault. 4K is apparently OK for all the platforms that python + * currently targets. + */ +#define SYSTEM_PAGE_SIZE (4 * 1024) +#define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1) + +/* + * Maximum amount of memory managed by the allocator for small requests. + */ +#ifdef WITH_MEMORY_LIMITS +#ifndef SMALL_MEMORY_LIMIT +#define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */ +#endif +#endif + +/* + * The allocator sub-allocates blocks of memory (called arenas) aligned + * on a page boundary. This is a reserved virtual address space for the + * current process (obtained through a malloc()/mmap() call). In no way this + * means that the memory arenas will be used entirely. A malloc() is + * usually an address range reservation for bytes, unless all pages within + * this space are referenced subsequently. So malloc'ing big blocks and not + * using them does not mean "wasting memory". It's an addressable range + * wastage... + * + * Arenas are allocated with mmap() on systems supporting anonymous memory + * mappings to reduce heap fragmentation. + */ +#define ARENA_SIZE (256 << 10) /* 256KB */ + +#ifdef WITH_MEMORY_LIMITS +#define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE) +#endif + +/* + * Size of the pools used for small blocks. Should be a power of 2, + * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k. + */ +#define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */ +#define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK + +/* + * -- End of tunable settings section -- + */ + +/*==========================================================================*/ + +/* + * Locking + * + * To reduce lock contention, it would probably be better to refine the + * crude function locking with per size class locking. I'm not positive + * however, whether it's worth switching to such locking policy because + * of the performance penalty it might introduce. + * + * The following macros describe the simplest (should also be the fastest) + * lock object on a particular platform and the init/fini/lock/unlock + * operations on it. The locks defined here are not expected to be recursive + * because it is assumed that they will always be called in the order: + * INIT, [LOCK, UNLOCK]*, FINI. + */ + +/* + * Python's threads are serialized, so object malloc locking is disabled. + */ +#define SIMPLELOCK_DECL(lock) /* simple lock declaration */ +#define SIMPLELOCK_INIT(lock) /* allocate (if needed) and initialize */ +#define SIMPLELOCK_FINI(lock) /* free/destroy an existing lock */ +#define SIMPLELOCK_LOCK(lock) /* acquire released lock */ +#define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */ + +/* When you say memory, my mind reasons in terms of (pointers to) blocks */ +typedef uint8_t pyblock; + +/* Pool for small blocks. */ +struct pool_header { + union { pyblock *_padding; + unsigned int count; } ref; /* number of allocated blocks */ + pyblock *freeblock; /* pool's free list head */ + struct pool_header *nextpool; /* next pool of this size class */ + struct pool_header *prevpool; /* previous pool "" */ + unsigned int arenaindex; /* index into arenas of base adr */ + unsigned int szidx; /* block size class index */ + unsigned int nextoffset; /* bytes to virgin block */ + unsigned int maxnextoffset; /* largest valid nextoffset */ +}; + +typedef struct pool_header *poolp; + +/* Record keeping for arenas. */ +struct arena_object { + /* The address of the arena, as returned by malloc. Note that 0 + * will never be returned by a successful malloc, and is used + * here to mark an arena_object that doesn't correspond to an + * allocated arena. + */ + uintptr_t address; + + /* Pool-aligned pointer to the next pool to be carved off. */ + pyblock* pool_address; + + /* The number of available pools in the arena: free pools + never- + * allocated pools. + */ + unsigned int nfreepools; + + /* The total number of pools in the arena, whether or not available. */ + unsigned int ntotalpools; + + /* Singly-linked list of available pools. */ + struct pool_header* freepools; + + /* Whenever this arena_object is not associated with an allocated + * arena, the nextarena member is used to link all unassociated + * arena_objects in the singly-linked `unused_arena_objects` list. + * The prevarena member is unused in this case. + * + * When this arena_object is associated with an allocated arena + * with at least one available pool, both members are used in the + * doubly-linked `usable_arenas` list, which is maintained in + * increasing order of `nfreepools` values. + * + * Else this arena_object is associated with an allocated arena + * all of whose pools are in use. `nextarena` and `prevarena` + * are both meaningless in this case. + */ + struct arena_object* nextarena; + struct arena_object* prevarena; +}; + +#define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT) + +#define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */ + +/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */ +#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE)) + +/* Return total number of blocks in pool of size index I, as a uint. */ +#define NUMBLOCKS(I) \ + ((unsigned int)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I)) + +/*==========================================================================*/ + +/* + * This malloc lock + */ +SIMPLELOCK_DECL(_malloc_lock) +#define LOCK() SIMPLELOCK_LOCK(_malloc_lock) +#define UNLOCK() SIMPLELOCK_UNLOCK(_malloc_lock) +#define LOCK_INIT() SIMPLELOCK_INIT(_malloc_lock) +#define LOCK_FINI() SIMPLELOCK_FINI(_malloc_lock) + +/* + * Pool table -- headed, circular, doubly-linked lists of partially used pools. + +This is involved. For an index i, usedpools[i+i] is the header for a list of +all partially used pools holding small blocks with "size class idx" i. So +usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size +16, and so on: index 2*i <-> blocks of size (i+1)<freeblock points to +the start of a singly-linked list of free blocks within the pool. When a +block is freed, it's inserted at the front of its pool's freeblock list. Note +that the available blocks in a pool are *not* linked all together when a pool +is initialized. Instead only "the first two" (lowest addresses) blocks are +set up, returning the first such block, and setting pool->freeblock to a +one-block list holding the second such block. This is consistent with that +pymalloc strives at all levels (arena, pool, and block) never to touch a piece +of memory until it's actually needed. + +So long as a pool is in the used state, we're certain there *is* a block +available for allocating, and pool->freeblock is not NULL. If pool->freeblock +points to the end of the free list before we've carved the entire pool into +blocks, that means we simply haven't yet gotten to one of the higher-address +blocks. The offset from the pool_header to the start of "the next" virgin +block is stored in the pool_header nextoffset member, and the largest value +of nextoffset that makes sense is stored in the maxnextoffset member when a +pool is initialized. All the blocks in a pool have been passed out at least +once when and only when nextoffset > maxnextoffset. + + +Major obscurity: While the usedpools vector is declared to have poolp +entries, it doesn't really. It really contains two pointers per (conceptual) +poolp entry, the nextpool and prevpool members of a pool_header. The +excruciating initialization code below fools C so that + + usedpool[i+i] + +"acts like" a genuine poolp, but only so long as you only reference its +nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is +compensating for that a pool_header's nextpool and prevpool members +immediately follow a pool_header's first two members: + + union { block *_padding; + uint count; } ref; + block *freeblock; + +each of which consume sizeof(block *) bytes. So what usedpools[i+i] really +contains is a fudged-up pointer p such that *if* C believes it's a poolp +pointer, then p->nextpool and p->prevpool are both p (meaning that the headed +circular list is empty). + +It's unclear why the usedpools setup is so convoluted. It could be to +minimize the amount of cache required to hold this heavily-referenced table +(which only *needs* the two interpool pointer members of a pool_header). OTOH, +referencing code has to remember to "double the index" and doing so isn't +free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying +on that C doesn't insert any padding anywhere in a pool_header at or before +the prevpool member. +**************************************************************************** */ + +#define MAX_POOLS (2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8) + +/*========================================================================== +Arena management. + +`arenas` is a vector of arena_objects. It contains maxarenas entries, some of +which may not be currently used (== they're arena_objects that aren't +currently associated with an allocated arena). Note that arenas proper are +separately malloc'ed. + +Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5, +we do try to free() arenas, and use some mild heuristic strategies to increase +the likelihood that arenas eventually can be freed. + +unused_arena_objects + + This is a singly-linked list of the arena_objects that are currently not + being used (no arena is associated with them). Objects are taken off the + head of the list in new_arena(), and are pushed on the head of the list in + PyObject_Free() when the arena is empty. Key invariant: an arena_object + is on this list if and only if its .address member is 0. + +usable_arenas + + This is a doubly-linked list of the arena_objects associated with arenas + that have pools available. These pools are either waiting to be reused, + or have not been used before. The list is sorted to have the most- + allocated arenas first (ascending order based on the nfreepools member). + This means that the next allocation will come from a heavily used arena, + which gives the nearly empty arenas a chance to be returned to the system. + In my unscientific tests this dramatically improved the number of arenas + that could be freed. + +Note that an arena_object associated with an arena all of whose pools are +currently in use isn't on either list. +*/ + +/* How many arena_objects do we initially allocate? + * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the + * `arenas` vector. + */ +#define INITIAL_ARENA_OBJECTS 16 + +#endif /* _Py_PYMALLOC_H */ diff --git a/Include/internal/_pystate.h b/Include/internal/_pystate.h new file mode 100644 index 0000000..9f2dea1 --- /dev/null +++ b/Include/internal/_pystate.h @@ -0,0 +1,93 @@ +#ifndef _Py_PYSTATE_H +#define _Py_PYSTATE_H +#ifdef __cplusplus +extern "C" { +#endif + +#include "pystate.h" +#include "pyatomic.h" + +#ifdef WITH_THREAD +#include "pythread.h" +#endif + +#include "_mem.h" +#include "_ceval.h" +#include "_warnings.h" + + +/* GIL state */ + +struct _gilstate_runtime_state { + int check_enabled; + /* Assuming the current thread holds the GIL, this is the + PyThreadState for the current thread. */ + _Py_atomic_address tstate_current; + PyThreadFrameGetter getframe; +#ifdef WITH_THREAD + /* The single PyInterpreterState used by this process' + GILState implementation + */ + /* TODO: Given interp_main, it may be possible to kill this ref */ + PyInterpreterState *autoInterpreterState; + int autoTLSkey; +#endif /* WITH_THREAD */ +}; + +/* hook for PyEval_GetFrame(), requested for Psyco */ +#define _PyThreadState_GetFrame _PyRuntime.gilstate.getframe + +/* Issue #26558: Flag to disable PyGILState_Check(). + If set to non-zero, PyGILState_Check() always return 1. */ +#define _PyGILState_check_enabled _PyRuntime.gilstate.check_enabled + + +/* Full Python runtime state */ + +typedef struct pyruntimestate { + int initialized; + int core_initialized; + PyThreadState *finalizing; + + struct pyinterpreters { +#ifdef WITH_THREAD + PyThread_type_lock mutex; +#endif + PyInterpreterState *head; + PyInterpreterState *main; + /* _next_interp_id is an auto-numbered sequence of small + integers. It gets initialized in _PyInterpreterState_Init(), + which is called in Py_Initialize(), and used in + PyInterpreterState_New(). A negative interpreter ID + indicates an error occurred. The main interpreter will + always have an ID of 0. Overflow results in a RuntimeError. + If that becomes a problem later then we can adjust, e.g. by + using a Python int. */ + int64_t next_id; + } interpreters; + +#define NEXITFUNCS 32 + void (*exitfuncs[NEXITFUNCS])(void); + int nexitfuncs; + void (*pyexitfunc)(void); + + struct _pyobj_runtime_state obj; + struct _gc_runtime_state gc; + struct _pymem_runtime_state mem; + struct _warnings_runtime_state warnings; + struct _ceval_runtime_state ceval; + struct _gilstate_runtime_state gilstate; + + // XXX Consolidate globals found via the check-c-globals script. +} _PyRuntimeState; + +PyAPI_DATA(_PyRuntimeState) _PyRuntime; +PyAPI_FUNC(void) _PyRuntimeState_Init(_PyRuntimeState *); +PyAPI_FUNC(void) _PyRuntimeState_Fini(_PyRuntimeState *); + +PyAPI_FUNC(void) _PyInterpreterState_Enable(_PyRuntimeState *); + +#ifdef __cplusplus +} +#endif +#endif /* !_Py_PYSTATE_H */ diff --git a/Include/internal/_warnings.h b/Include/internal/_warnings.h new file mode 100644 index 0000000..2a1abb2 --- /dev/null +++ b/Include/internal/_warnings.h @@ -0,0 +1,21 @@ +#ifndef _Py_WARNINGS_H +#define _Py_WARNINGS_H +#ifdef __cplusplus +extern "C" { +#endif + +#include "object.h" + +struct _warnings_runtime_state { + /* Both 'filters' and 'onceregistry' can be set in warnings.py; + get_warnings_attr() will reset these variables accordingly. */ + PyObject *filters; /* List */ + PyObject *once_registry; /* Dict */ + PyObject *default_action; /* String */ + long filters_version; +}; + +#ifdef __cplusplus +} +#endif +#endif /* !_Py_WARNINGS_H */ diff --git a/Include/object.h b/Include/object.h index f5ed70b..b46d4c3 100644 --- a/Include/object.h +++ b/Include/object.h @@ -1038,8 +1038,6 @@ with the call stack never exceeding a depth of PyTrash_UNWIND_LEVEL. Kept for binary compatibility of extensions using the stable ABI. */ PyAPI_FUNC(void) _PyTrash_deposit_object(PyObject*); PyAPI_FUNC(void) _PyTrash_destroy_chain(void); -PyAPI_DATA(int) _PyTrash_delete_nesting; -PyAPI_DATA(PyObject *) _PyTrash_delete_later; #endif /* !Py_LIMITED_API */ /* The new thread-safe private API, invoked by the macros below. */ diff --git a/Include/pylifecycle.h b/Include/pylifecycle.h index 0d609ec..b02cd4c 100644 --- a/Include/pylifecycle.h +++ b/Include/pylifecycle.h @@ -119,7 +119,10 @@ PyAPI_FUNC(void) _PyType_Fini(void); PyAPI_FUNC(void) _Py_HashRandomization_Fini(void); PyAPI_FUNC(void) PyAsyncGen_Fini(void); -PyAPI_DATA(PyThreadState *) _Py_Finalizing; +#define _Py_IS_FINALIZING() \ + (_PyRuntime.finalizing != NULL) +#define _Py_CURRENTLY_FINALIZING(tstate) \ + (_PyRuntime.finalizing == tstate) #endif /* Signals */ diff --git a/Include/pystate.h b/Include/pystate.h index 8a92f3e..90081c5 100644 --- a/Include/pystate.h +++ b/Include/pystate.h @@ -29,9 +29,10 @@ typedef struct { int use_hash_seed; unsigned long hash_seed; int _disable_importlib; /* Needed by freeze_importlib */ + char *allocator; } _PyCoreConfig; -#define _PyCoreConfig_INIT {0, -1, 0, 0} +#define _PyCoreConfig_INIT {0, -1, 0, 0, NULL} /* Placeholders while working on the new configuration API * @@ -57,6 +58,19 @@ typedef struct _is { PyObject *builtins; PyObject *importlib; + /* Used in Python/sysmodule.c. */ + int check_interval; + PyObject *warnoptions; + PyObject *xoptions; + + /* Used in Modules/_threadmodule.c. */ + long num_threads; + /* Support for runtime thread stack size tuning. + A value of 0 means using the platform's default stack size + or the size specified by the THREAD_STACK_SIZE macro. */ + /* Used in Python/thread.c. */ + size_t pythread_stacksize; + PyObject *codec_search_path; PyObject *codec_search_cache; PyObject *codec_error_registry; @@ -185,9 +199,6 @@ typedef struct _ts { #endif -#ifndef Py_LIMITED_API -PyAPI_FUNC(void) _PyInterpreterState_Init(void); -#endif /* !Py_LIMITED_API */ PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_New(void); PyAPI_FUNC(void) PyInterpreterState_Clear(PyInterpreterState *); PyAPI_FUNC(void) PyInterpreterState_Delete(PyInterpreterState *); @@ -246,7 +257,7 @@ PyAPI_FUNC(int) PyThreadState_SetAsyncExc(unsigned long, PyObject *); /* Assuming the current thread holds the GIL, this is the PyThreadState for the current thread. */ #ifdef Py_BUILD_CORE -PyAPI_DATA(_Py_atomic_address) _PyThreadState_Current; +# define _PyThreadState_Current _PyRuntime.gilstate.tstate_current # define PyThreadState_GET() \ ((PyThreadState*)_Py_atomic_load_relaxed(&_PyThreadState_Current)) #else @@ -301,10 +312,6 @@ PyAPI_FUNC(void) PyGILState_Release(PyGILState_STATE); PyAPI_FUNC(PyThreadState *) PyGILState_GetThisThreadState(void); #ifndef Py_LIMITED_API -/* Issue #26558: Flag to disable PyGILState_Check(). - If set to non-zero, PyGILState_Check() always return 1. */ -PyAPI_DATA(int) _PyGILState_check_enabled; - /* Helper/diagnostic function - return 1 if the current thread currently holds the GIL, 0 otherwise. @@ -340,11 +347,6 @@ PyAPI_FUNC(PyThreadState *) PyThreadState_Next(PyThreadState *); typedef struct _frame *(*PyThreadFrameGetter)(PyThreadState *self_); #endif -/* hook for PyEval_GetFrame(), requested for Psyco */ -#ifndef Py_LIMITED_API -PyAPI_DATA(PyThreadFrameGetter) _PyThreadState_GetFrame; -#endif - #ifdef __cplusplus } #endif diff --git a/Makefile.pre.in b/Makefile.pre.in index 57d2ab7..d6ebf85 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -987,6 +987,13 @@ PYTHON_HEADERS= \ pyconfig.h \ $(PARSER_HEADERS) \ $(srcdir)/Include/Python-ast.h \ + $(srcdir)/Include/internal/_Python.h \ + $(srcdir)/Include/internal/_ceval.h \ + $(srcdir)/Include/internal/_gil.h \ + $(srcdir)/Include/internal/_mem.h \ + $(srcdir)/Include/internal/_pymalloc.h \ + $(srcdir)/Include/internal/_pystate.h \ + $(srcdir)/Include/internal/_warnings.h \ $(DTRACE_HEADERS) $(LIBRARY_OBJS) $(MODOBJS) Programs/python.o: $(PYTHON_HEADERS) diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-09-05-13-47-49.bpo-30860.MROpZw.rst b/Misc/NEWS.d/next/Core and Builtins/2017-09-05-13-47-49.bpo-30860.MROpZw.rst new file mode 100644 index 0000000..d8e9d5e --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2017-09-05-13-47-49.bpo-30860.MROpZw.rst @@ -0,0 +1,2 @@ +Consolidate CPython's global runtime state under a single struct. This +improves discoverability of the runtime state. diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 189b1cd..3f57041 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -279,7 +279,7 @@ _enter_buffered_busy(buffered *self) "reentrant call inside %R", self); return 0; } - relax_locking = (_Py_Finalizing != NULL); + relax_locking = _Py_IS_FINALIZING(); Py_BEGIN_ALLOW_THREADS if (!relax_locking) st = PyThread_acquire_lock(self->lock, 1); diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index da750c0..89be96c 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -14,7 +14,6 @@ #include "pythread.h" static PyObject *ThreadError; -static long nb_threads = 0; static PyObject *str_dict; _Py_IDENTIFIER(stderr); @@ -993,7 +992,7 @@ t_bootstrap(void *boot_raw) tstate->thread_id = PyThread_get_thread_ident(); _PyThreadState_Init(tstate); PyEval_AcquireThread(tstate); - nb_threads++; + tstate->interp->num_threads++; res = PyObject_Call(boot->func, boot->args, boot->keyw); if (res == NULL) { if (PyErr_ExceptionMatches(PyExc_SystemExit)) @@ -1020,7 +1019,7 @@ t_bootstrap(void *boot_raw) Py_DECREF(boot->args); Py_XDECREF(boot->keyw); PyMem_DEL(boot_raw); - nb_threads--; + tstate->interp->num_threads--; PyThreadState_Clear(tstate); PyThreadState_DeleteCurrent(); PyThread_exit_thread(); @@ -1159,7 +1158,8 @@ A thread's identity may be reused for another thread after it exits."); static PyObject * thread__count(PyObject *self) { - return PyLong_FromLong(nb_threads); + PyThreadState *tstate = PyThreadState_Get(); + return PyLong_FromLong(tstate->interp->num_threads); } PyDoc_STRVAR(_count_doc, @@ -1352,6 +1352,7 @@ PyInit__thread(void) PyObject *m, *d, *v; double time_max; double timeout_max; + PyThreadState *tstate = PyThreadState_Get(); /* Initialize types: */ if (PyType_Ready(&localdummytype) < 0) @@ -1396,7 +1397,7 @@ PyInit__thread(void) if (PyModule_AddObject(m, "_local", (PyObject *)&localtype) < 0) return NULL; - nb_threads = 0; + tstate->interp->num_threads = 0; str_dict = PyUnicode_InternFromString("__dict__"); if (str_dict == NULL) diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 682d0a3..6556d99 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -114,7 +114,7 @@ overlapped_dealloc(OverlappedObject *self) { /* The operation is no longer pending -- nothing to do. */ } - else if (_Py_Finalizing == NULL) + else if _Py_IS_FINALIZING() { /* The operation is still pending -- give a warning. This will probably only happen on Windows XP. */ diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 4e5acf3..fa67f7f 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -39,133 +39,9 @@ module gc /* Get the object given the GC head */ #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1)) -/*** Global GC state ***/ - -struct gc_generation { - PyGC_Head head; - int threshold; /* collection threshold */ - int count; /* count of allocations or collections of younger - generations */ -}; - -/* If we change this, we need to change the default value in the signature of - gc.collect. */ -#define NUM_GENERATIONS 3 -#define GEN_HEAD(n) (&generations[n].head) - -/* linked lists of container objects */ -static struct gc_generation generations[NUM_GENERATIONS] = { - /* PyGC_Head, threshold, count */ - {{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0}, - {{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0}, - {{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0}, -}; - -PyGC_Head *_PyGC_generation0 = GEN_HEAD(0); - -static int enabled = 1; /* automatic collection enabled? */ - -/* true if we are currently running the collector */ -static int collecting = 0; - -/* list of uncollectable objects */ -static PyObject *garbage = NULL; - /* Python string to use if unhandled exception occurs */ static PyObject *gc_str = NULL; -/* a list of callbacks to be invoked when collection is performed */ -static PyObject *callbacks = NULL; - -/* This is the number of objects that survived the last full collection. It - approximates the number of long lived objects tracked by the GC. - - (by "full collection", we mean a collection of the oldest generation). -*/ -static Py_ssize_t long_lived_total = 0; - -/* This is the number of objects that survived all "non-full" collections, - and are awaiting to undergo a full collection for the first time. - -*/ -static Py_ssize_t long_lived_pending = 0; - -/* - NOTE: about the counting of long-lived objects. - - To limit the cost of garbage collection, there are two strategies; - - make each collection faster, e.g. by scanning fewer objects - - do less collections - This heuristic is about the latter strategy. - - In addition to the various configurable thresholds, we only trigger a - full collection if the ratio - long_lived_pending / long_lived_total - is above a given value (hardwired to 25%). - - The reason is that, while "non-full" collections (i.e., collections of - the young and middle generations) will always examine roughly the same - number of objects -- determined by the aforementioned thresholds --, - the cost of a full collection is proportional to the total number of - long-lived objects, which is virtually unbounded. - - Indeed, it has been remarked that doing a full collection every - of object creations entails a dramatic performance - degradation in workloads which consist in creating and storing lots of - long-lived objects (e.g. building a large list of GC-tracked objects would - show quadratic performance, instead of linear as expected: see issue #4074). - - Using the above ratio, instead, yields amortized linear performance in - the total number of objects (the effect of which can be summarized - thusly: "each full garbage collection is more and more costly as the - number of objects grows, but we do fewer and fewer of them"). - - This heuristic was suggested by Martin von Löwis on python-dev in - June 2008. His original analysis and proposal can be found at: - http://mail.python.org/pipermail/python-dev/2008-June/080579.html -*/ - -/* - NOTE: about untracking of mutable objects. - - Certain types of container cannot participate in a reference cycle, and - so do not need to be tracked by the garbage collector. Untracking these - objects reduces the cost of garbage collections. However, determining - which objects may be untracked is not free, and the costs must be - weighed against the benefits for garbage collection. - - There are two possible strategies for when to untrack a container: - - i) When the container is created. - ii) When the container is examined by the garbage collector. - - Tuples containing only immutable objects (integers, strings etc, and - recursively, tuples of immutable objects) do not need to be tracked. - The interpreter creates a large number of tuples, many of which will - not survive until garbage collection. It is therefore not worthwhile - to untrack eligible tuples at creation time. - - Instead, all tuples except the empty tuple are tracked when created. - During garbage collection it is determined whether any surviving tuples - can be untracked. A tuple can be untracked if all of its contents are - already not tracked. Tuples are examined for untracking in all garbage - collection cycles. It may take more than one cycle to untrack a tuple. - - Dictionaries containing only immutable objects also do not need to be - tracked. Dictionaries are untracked when created. If a tracked item is - inserted into a dictionary (either as a key or value), the dictionary - becomes tracked. During a full garbage collection (all generations), - the collector will untrack any dictionaries whose contents are not - tracked. - - The module provides the python function is_tracked(obj), which returns - the CURRENT tracking status of the object. Subsequent garbage - collections may change the tracking status of the object. - - Untracking of certain containers was introduced in issue #4688, and - the algorithm was refined in response to issue #14775. -*/ - /* set for debugging information */ #define DEBUG_STATS (1<<0) /* print collection statistics */ #define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */ @@ -174,19 +50,26 @@ static Py_ssize_t long_lived_pending = 0; #define DEBUG_LEAK DEBUG_COLLECTABLE | \ DEBUG_UNCOLLECTABLE | \ DEBUG_SAVEALL -static int debug; - -/* Running stats per generation */ -struct gc_generation_stats { - /* total number of collections */ - Py_ssize_t collections; - /* total number of collected objects */ - Py_ssize_t collected; - /* total number of uncollectable objects (put into gc.garbage) */ - Py_ssize_t uncollectable; -}; -static struct gc_generation_stats generation_stats[NUM_GENERATIONS]; +#define GEN_HEAD(n) (&_PyRuntime.gc.generations[n].head) + +void +_PyGC_Initialize(struct _gc_runtime_state *state) +{ + state->enabled = 1; /* automatic collection enabled? */ + +#define _GEN_HEAD(n) (&state->generations[n].head) + struct gc_generation generations[NUM_GENERATIONS] = { + /* PyGC_Head, threshold, count */ + {{{_GEN_HEAD(0), _GEN_HEAD(0), 0}}, 700, 0}, + {{{_GEN_HEAD(1), _GEN_HEAD(1), 0}}, 10, 0}, + {{{_GEN_HEAD(2), _GEN_HEAD(2), 0}}, 10, 0}, + }; + for (int i = 0; i < NUM_GENERATIONS; i++) { + state->generations[i] = generations[i]; + }; + state->generation0 = GEN_HEAD(0); +} /*-------------------------------------------------------------------------- gc_refs values. @@ -766,16 +649,16 @@ handle_legacy_finalizers(PyGC_Head *finalizers, PyGC_Head *old) { PyGC_Head *gc = finalizers->gc.gc_next; - if (garbage == NULL) { - garbage = PyList_New(0); - if (garbage == NULL) + if (_PyRuntime.gc.garbage == NULL) { + _PyRuntime.gc.garbage = PyList_New(0); + if (_PyRuntime.gc.garbage == NULL) Py_FatalError("gc couldn't create gc.garbage list"); } for (; gc != finalizers; gc = gc->gc.gc_next) { PyObject *op = FROM_GC(gc); - if ((debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) { - if (PyList_Append(garbage, op) < 0) + if ((_PyRuntime.gc.debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) { + if (PyList_Append(_PyRuntime.gc.garbage, op) < 0) return -1; } } @@ -865,8 +748,8 @@ delete_garbage(PyGC_Head *collectable, PyGC_Head *old) PyGC_Head *gc = collectable->gc.gc_next; PyObject *op = FROM_GC(gc); - if (debug & DEBUG_SAVEALL) { - PyList_Append(garbage, op); + if (_PyRuntime.gc.debug & DEBUG_SAVEALL) { + PyList_Append(_PyRuntime.gc.garbage, op); } else { if ((clear = Py_TYPE(op)->tp_clear) != NULL) { @@ -919,9 +802,9 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, PyGC_Head *gc; _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */ - struct gc_generation_stats *stats = &generation_stats[generation]; + struct gc_generation_stats *stats = &_PyRuntime.gc.generation_stats[generation]; - if (debug & DEBUG_STATS) { + if (_PyRuntime.gc.debug & DEBUG_STATS) { PySys_WriteStderr("gc: collecting generation %d...\n", generation); PySys_WriteStderr("gc: objects in each generation:"); @@ -938,9 +821,9 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, /* update collection and allocation counters */ if (generation+1 < NUM_GENERATIONS) - generations[generation+1].count += 1; + _PyRuntime.gc.generations[generation+1].count += 1; for (i = 0; i <= generation; i++) - generations[i].count = 0; + _PyRuntime.gc.generations[i].count = 0; /* merge younger generations with one we are currently collecting */ for (i = 0; i < generation; i++) { @@ -974,7 +857,7 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, /* Move reachable objects to next generation. */ if (young != old) { if (generation == NUM_GENERATIONS - 2) { - long_lived_pending += gc_list_size(young); + _PyRuntime.gc.long_lived_pending += gc_list_size(young); } gc_list_merge(young, old); } @@ -982,8 +865,8 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, /* We only untrack dicts in full collections, to avoid quadratic dict build-up. See issue #14775. */ untrack_dicts(young); - long_lived_pending = 0; - long_lived_total = gc_list_size(young); + _PyRuntime.gc.long_lived_pending = 0; + _PyRuntime.gc.long_lived_total = gc_list_size(young); } /* All objects in unreachable are trash, but objects reachable from @@ -1003,7 +886,7 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, for (gc = unreachable.gc.gc_next; gc != &unreachable; gc = gc->gc.gc_next) { m++; - if (debug & DEBUG_COLLECTABLE) { + if (_PyRuntime.gc.debug & DEBUG_COLLECTABLE) { debug_cycle("collectable", FROM_GC(gc)); } } @@ -1032,10 +915,10 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, gc != &finalizers; gc = gc->gc.gc_next) { n++; - if (debug & DEBUG_UNCOLLECTABLE) + if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE) debug_cycle("uncollectable", FROM_GC(gc)); } - if (debug & DEBUG_STATS) { + if (_PyRuntime.gc.debug & DEBUG_STATS) { _PyTime_t t2 = _PyTime_GetMonotonicClock(); if (m == 0 && n == 0) @@ -1098,11 +981,11 @@ invoke_gc_callback(const char *phase, int generation, PyObject *info = NULL; /* we may get called very early */ - if (callbacks == NULL) + if (_PyRuntime.gc.callbacks == NULL) return; /* The local variable cannot be rebound, check it for sanity */ - assert(callbacks != NULL && PyList_CheckExact(callbacks)); - if (PyList_GET_SIZE(callbacks) != 0) { + assert(_PyRuntime.gc.callbacks != NULL && PyList_CheckExact(_PyRuntime.gc.callbacks)); + if (PyList_GET_SIZE(_PyRuntime.gc.callbacks) != 0) { info = Py_BuildValue("{sisnsn}", "generation", generation, "collected", collected, @@ -1112,8 +995,8 @@ invoke_gc_callback(const char *phase, int generation, return; } } - for (i=0; i= 0; i--) { - if (generations[i].count > generations[i].threshold) { + if (_PyRuntime.gc.generations[i].count > _PyRuntime.gc.generations[i].threshold) { /* Avoid quadratic performance degradation in number of tracked objects. See comments at the beginning of this file, and issue #4074. */ if (i == NUM_GENERATIONS - 1 - && long_lived_pending < long_lived_total / 4) + && _PyRuntime.gc.long_lived_pending < _PyRuntime.gc.long_lived_total / 4) continue; n = collect_with_callback(i); break; @@ -1174,7 +1057,7 @@ static PyObject * gc_enable_impl(PyObject *module) /*[clinic end generated code: output=45a427e9dce9155c input=81ac4940ca579707]*/ { - enabled = 1; + _PyRuntime.gc.enabled = 1; Py_RETURN_NONE; } @@ -1188,7 +1071,7 @@ static PyObject * gc_disable_impl(PyObject *module) /*[clinic end generated code: output=97d1030f7aa9d279 input=8c2e5a14e800d83b]*/ { - enabled = 0; + _PyRuntime.gc.enabled = 0; Py_RETURN_NONE; } @@ -1202,7 +1085,7 @@ static int gc_isenabled_impl(PyObject *module) /*[clinic end generated code: output=1874298331c49130 input=30005e0422373b31]*/ { - return enabled; + return _PyRuntime.gc.enabled; } /*[clinic input] @@ -1230,12 +1113,12 @@ gc_collect_impl(PyObject *module, int generation) return -1; } - if (collecting) + if (_PyRuntime.gc.collecting) n = 0; /* already collecting, don't do anything */ else { - collecting = 1; + _PyRuntime.gc.collecting = 1; n = collect_with_callback(generation); - collecting = 0; + _PyRuntime.gc.collecting = 0; } return n; @@ -1263,7 +1146,7 @@ static PyObject * gc_set_debug_impl(PyObject *module, int flags) /*[clinic end generated code: output=7c8366575486b228 input=5e5ce15e84fbed15]*/ { - debug = flags; + _PyRuntime.gc.debug = flags; Py_RETURN_NONE; } @@ -1278,7 +1161,7 @@ static int gc_get_debug_impl(PyObject *module) /*[clinic end generated code: output=91242f3506cd1e50 input=91a101e1c3b98366]*/ { - return debug; + return _PyRuntime.gc.debug; } PyDoc_STRVAR(gc_set_thresh__doc__, @@ -1292,13 +1175,13 @@ gc_set_thresh(PyObject *self, PyObject *args) { int i; if (!PyArg_ParseTuple(args, "i|ii:set_threshold", - &generations[0].threshold, - &generations[1].threshold, - &generations[2].threshold)) + &_PyRuntime.gc.generations[0].threshold, + &_PyRuntime.gc.generations[1].threshold, + &_PyRuntime.gc.generations[2].threshold)) return NULL; for (i = 2; i < NUM_GENERATIONS; i++) { /* generations higher than 2 get the same threshold */ - generations[i].threshold = generations[2].threshold; + _PyRuntime.gc.generations[i].threshold = _PyRuntime.gc.generations[2].threshold; } Py_RETURN_NONE; @@ -1315,9 +1198,9 @@ gc_get_threshold_impl(PyObject *module) /*[clinic end generated code: output=7902bc9f41ecbbd8 input=286d79918034d6e6]*/ { return Py_BuildValue("(iii)", - generations[0].threshold, - generations[1].threshold, - generations[2].threshold); + _PyRuntime.gc.generations[0].threshold, + _PyRuntime.gc.generations[1].threshold, + _PyRuntime.gc.generations[2].threshold); } /*[clinic input] @@ -1331,9 +1214,9 @@ gc_get_count_impl(PyObject *module) /*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/ { return Py_BuildValue("(iii)", - generations[0].count, - generations[1].count, - generations[2].count); + _PyRuntime.gc.generations[0].count, + _PyRuntime.gc.generations[1].count, + _PyRuntime.gc.generations[2].count); } static int @@ -1464,7 +1347,7 @@ gc_get_stats_impl(PyObject *module) /* To get consistent values despite allocations while constructing the result list, we use a snapshot of the running stats. */ for (i = 0; i < NUM_GENERATIONS; i++) { - stats[i] = generation_stats[i]; + stats[i] = _PyRuntime.gc.generation_stats[i]; } result = PyList_New(0); @@ -1581,22 +1464,22 @@ PyInit_gc(void) if (m == NULL) return NULL; - if (garbage == NULL) { - garbage = PyList_New(0); - if (garbage == NULL) + if (_PyRuntime.gc.garbage == NULL) { + _PyRuntime.gc.garbage = PyList_New(0); + if (_PyRuntime.gc.garbage == NULL) return NULL; } - Py_INCREF(garbage); - if (PyModule_AddObject(m, "garbage", garbage) < 0) + Py_INCREF(_PyRuntime.gc.garbage); + if (PyModule_AddObject(m, "garbage", _PyRuntime.gc.garbage) < 0) return NULL; - if (callbacks == NULL) { - callbacks = PyList_New(0); - if (callbacks == NULL) + if (_PyRuntime.gc.callbacks == NULL) { + _PyRuntime.gc.callbacks = PyList_New(0); + if (_PyRuntime.gc.callbacks == NULL) return NULL; } - Py_INCREF(callbacks); - if (PyModule_AddObject(m, "callbacks", callbacks) < 0) + Py_INCREF(_PyRuntime.gc.callbacks); + if (PyModule_AddObject(m, "callbacks", _PyRuntime.gc.callbacks) < 0) return NULL; #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return NULL @@ -1615,12 +1498,12 @@ PyGC_Collect(void) { Py_ssize_t n; - if (collecting) + if (_PyRuntime.gc.collecting) n = 0; /* already collecting, don't do anything */ else { - collecting = 1; + _PyRuntime.gc.collecting = 1; n = collect_with_callback(NUM_GENERATIONS - 1); - collecting = 0; + _PyRuntime.gc.collecting = 0; } return n; @@ -1629,7 +1512,7 @@ PyGC_Collect(void) Py_ssize_t _PyGC_CollectIfEnabled(void) { - if (!enabled) + if (!_PyRuntime.gc.enabled) return 0; return PyGC_Collect(); @@ -1646,12 +1529,12 @@ _PyGC_CollectNoFail(void) during interpreter shutdown (and then never finish it). See http://bugs.python.org/issue8713#msg195178 for an example. */ - if (collecting) + if (_PyRuntime.gc.collecting) n = 0; else { - collecting = 1; + _PyRuntime.gc.collecting = 1; n = collect(NUM_GENERATIONS - 1, NULL, NULL, 1); - collecting = 0; + _PyRuntime.gc.collecting = 0; } return n; } @@ -1659,10 +1542,10 @@ _PyGC_CollectNoFail(void) void _PyGC_DumpShutdownStats(void) { - if (!(debug & DEBUG_SAVEALL) - && garbage != NULL && PyList_GET_SIZE(garbage) > 0) { + if (!(_PyRuntime.gc.debug & DEBUG_SAVEALL) + && _PyRuntime.gc.garbage != NULL && PyList_GET_SIZE(_PyRuntime.gc.garbage) > 0) { char *message; - if (debug & DEBUG_UNCOLLECTABLE) + if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE) message = "gc: %zd uncollectable objects at " \ "shutdown"; else @@ -1673,13 +1556,13 @@ _PyGC_DumpShutdownStats(void) already. */ if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0, "gc", NULL, message, - PyList_GET_SIZE(garbage))) + PyList_GET_SIZE(_PyRuntime.gc.garbage))) PyErr_WriteUnraisable(NULL); - if (debug & DEBUG_UNCOLLECTABLE) { + if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE) { PyObject *repr = NULL, *bytes = NULL; - repr = PyObject_Repr(garbage); + repr = PyObject_Repr(_PyRuntime.gc.garbage); if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) - PyErr_WriteUnraisable(garbage); + PyErr_WriteUnraisable(_PyRuntime.gc.garbage); else { PySys_WriteStderr( " %s\n", @@ -1695,7 +1578,7 @@ _PyGC_DumpShutdownStats(void) void _PyGC_Fini(void) { - Py_CLEAR(callbacks); + Py_CLEAR(_PyRuntime.gc.callbacks); } /* for debugging */ @@ -1746,15 +1629,15 @@ _PyObject_GC_Alloc(int use_calloc, size_t basicsize) return PyErr_NoMemory(); g->gc.gc_refs = 0; _PyGCHead_SET_REFS(g, GC_UNTRACKED); - generations[0].count++; /* number of allocated GC objects */ - if (generations[0].count > generations[0].threshold && - enabled && - generations[0].threshold && - !collecting && + _PyRuntime.gc.generations[0].count++; /* number of allocated GC objects */ + if (_PyRuntime.gc.generations[0].count > _PyRuntime.gc.generations[0].threshold && + _PyRuntime.gc.enabled && + _PyRuntime.gc.generations[0].threshold && + !_PyRuntime.gc.collecting && !PyErr_Occurred()) { - collecting = 1; + _PyRuntime.gc.collecting = 1; collect_generations(); - collecting = 0; + _PyRuntime.gc.collecting = 0; } op = FROM_GC(g); return op; @@ -1819,8 +1702,8 @@ PyObject_GC_Del(void *op) PyGC_Head *g = AS_GC(op); if (IS_TRACKED(op)) gc_list_remove(g); - if (generations[0].count > 0) { - generations[0].count--; + if (_PyRuntime.gc.generations[0].count > 0) { + _PyRuntime.gc.generations[0].count--; } PyObject_FREE(g); } diff --git a/Modules/main.c b/Modules/main.c index 08b2276..3e347dc 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -598,16 +598,10 @@ Py_Main(int argc, wchar_t **argv) } } - char *pymalloc = Py_GETENV("PYTHONMALLOC"); - if (_PyMem_SetupAllocators(pymalloc) < 0) { - fprintf(stderr, - "Error in PYTHONMALLOC: unknown allocator \"%s\"!\n", pymalloc); - exit(1); - } - /* Initialize the core language runtime */ Py_IgnoreEnvironmentFlag = core_config.ignore_environment; core_config._disable_importlib = 0; + core_config.allocator = Py_GETENV("PYTHONMALLOC"); _Py_InitializeCore(&core_config); /* Reprocess the command line with the language runtime available */ diff --git a/Objects/object.c b/Objects/object.c index 2ba6e57..68a90c2 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2028,14 +2028,6 @@ finally: /* Trashcan support. */ -/* Current call-stack depth of tp_dealloc calls. */ -int _PyTrash_delete_nesting = 0; - -/* List of objects that still need to be cleaned up, singly linked via their - * gc headers' gc_prev pointers. - */ -PyObject *_PyTrash_delete_later = NULL; - /* Add op to the _PyTrash_delete_later list. Called when the current * call-stack depth gets large. op must be a currently untracked gc'ed * object, with refcount 0. Py_DECREF must already have been called on it. @@ -2046,8 +2038,8 @@ _PyTrash_deposit_object(PyObject *op) assert(PyObject_IS_GC(op)); assert(_PyGC_REFS(op) == _PyGC_REFS_UNTRACKED); assert(op->ob_refcnt == 0); - _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *)_PyTrash_delete_later; - _PyTrash_delete_later = op; + _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *)_PyRuntime.gc.trash_delete_later; + _PyRuntime.gc.trash_delete_later = op; } /* The equivalent API, using per-thread state recursion info */ @@ -2068,11 +2060,11 @@ _PyTrash_thread_deposit_object(PyObject *op) void _PyTrash_destroy_chain(void) { - while (_PyTrash_delete_later) { - PyObject *op = _PyTrash_delete_later; + while (_PyRuntime.gc.trash_delete_later) { + PyObject *op = _PyRuntime.gc.trash_delete_later; destructor dealloc = Py_TYPE(op)->tp_dealloc; - _PyTrash_delete_later = + _PyRuntime.gc.trash_delete_later = (PyObject*) _Py_AS_GC(op)->gc.gc_prev; /* Call the deallocator directly. This used to try to @@ -2082,9 +2074,9 @@ _PyTrash_destroy_chain(void) * up distorting allocation statistics. */ assert(op->ob_refcnt == 0); - ++_PyTrash_delete_nesting; + ++_PyRuntime.gc.trash_delete_nesting; (*dealloc)(op); - --_PyTrash_delete_nesting; + --_PyRuntime.gc.trash_delete_nesting; } } diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 32e7ecb..3698cfc 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -178,7 +178,9 @@ static struct { #define PYDBG_FUNCS \ _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree -static PyMemAllocatorEx _PyMem_Raw = { + +#define _PyMem_Raw _PyRuntime.mem.allocators.raw +static const PyMemAllocatorEx _pymem_raw = { #ifdef Py_DEBUG &_PyMem_Debug.raw, PYRAWDBG_FUNCS #else @@ -186,7 +188,8 @@ static PyMemAllocatorEx _PyMem_Raw = { #endif }; -static PyMemAllocatorEx _PyMem = { +#define _PyMem _PyRuntime.mem.allocators.mem +static const PyMemAllocatorEx _pymem = { #ifdef Py_DEBUG &_PyMem_Debug.mem, PYDBG_FUNCS #else @@ -194,7 +197,8 @@ static PyMemAllocatorEx _PyMem = { #endif }; -static PyMemAllocatorEx _PyObject = { +#define _PyObject _PyRuntime.mem.allocators.obj +static const PyMemAllocatorEx _pyobject = { #ifdef Py_DEBUG &_PyMem_Debug.obj, PYDBG_FUNCS #else @@ -267,7 +271,7 @@ _PyMem_SetupAllocators(const char *opt) #undef PYRAWDBG_FUNCS #undef PYDBG_FUNCS -static PyObjectArenaAllocator _PyObject_Arena = {NULL, +static const PyObjectArenaAllocator _PyObject_Arena = {NULL, #ifdef MS_WINDOWS _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree #elif defined(ARENAS_USE_MMAP) @@ -277,6 +281,34 @@ static PyObjectArenaAllocator _PyObject_Arena = {NULL, #endif }; +void +_PyObject_Initialize(struct _pyobj_runtime_state *state) +{ + state->allocator_arenas = _PyObject_Arena; +} + +void +_PyMem_Initialize(struct _pymem_runtime_state *state) +{ + state->allocators.raw = _pymem_raw; + state->allocators.mem = _pymem; + state->allocators.obj = _pyobject; + +#ifdef WITH_PYMALLOC + for (int i = 0; i < 8; i++) { + if (NB_SMALL_SIZE_CLASSES <= i * 8) + break; + for (int j = 0; j < 8; j++) { + int x = i * 8 + j; + poolp *addr = &(state->usedpools[2*(x)]); + poolp val = (poolp)((uint8_t *)addr - 2*sizeof(pyblock *)); + state->usedpools[x * 2] = val; + state->usedpools[x * 2 + 1] = val; + }; + }; +#endif /* WITH_PYMALLOC */ +} + #ifdef WITH_PYMALLOC static int _PyMem_DebugEnabled(void) @@ -363,13 +395,13 @@ PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) void PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator) { - *allocator = _PyObject_Arena; + *allocator = _PyRuntime.obj.allocator_arenas; } void PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator) { - _PyObject_Arena = *allocator; + _PyRuntime.obj.allocator_arenas = *allocator; } void * @@ -404,7 +436,8 @@ PyMem_RawRealloc(void *ptr, size_t new_size) return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size); } -void PyMem_RawFree(void *ptr) +void +PyMem_RawFree(void *ptr) { _PyMem_Raw.free(_PyMem_Raw.ctx, ptr); } @@ -521,497 +554,10 @@ PyObject_Free(void *ptr) static int running_on_valgrind = -1; #endif -/* An object allocator for Python. - - Here is an introduction to the layers of the Python memory architecture, - showing where the object allocator is actually used (layer +2), It is - called for every object allocation and deallocation (PyObject_New/Del), - unless the object-specific allocators implement a proprietary allocation - scheme (ex.: ints use a simple free list). This is also the place where - the cyclic garbage collector operates selectively on container objects. - - - Object-specific allocators - _____ ______ ______ ________ - [ int ] [ dict ] [ list ] ... [ string ] Python core | -+3 | <----- Object-specific memory -----> | <-- Non-object memory --> | - _______________________________ | | - [ Python's object allocator ] | | -+2 | ####### Object memory ####### | <------ Internal buffers ------> | - ______________________________________________________________ | - [ Python's raw memory allocator (PyMem_ API) ] | -+1 | <----- Python memory (under PyMem manager's control) ------> | | - __________________________________________________________________ - [ Underlying general-purpose allocator (ex: C library malloc) ] - 0 | <------ Virtual memory allocated for the python process -------> | - - ========================================================================= - _______________________________________________________________________ - [ OS-specific Virtual Memory Manager (VMM) ] --1 | <--- Kernel dynamic storage allocation & management (page-based) ---> | - __________________________________ __________________________________ - [ ] [ ] --2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> | - -*/ -/*==========================================================================*/ - -/* A fast, special-purpose memory allocator for small blocks, to be used - on top of a general-purpose malloc -- heavily based on previous art. */ - -/* Vladimir Marangozov -- August 2000 */ - -/* - * "Memory management is where the rubber meets the road -- if we do the wrong - * thing at any level, the results will not be good. And if we don't make the - * levels work well together, we are in serious trouble." (1) - * - * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles, - * "Dynamic Storage Allocation: A Survey and Critical Review", - * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995. - */ - -/* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */ - -/*==========================================================================*/ - -/* - * Allocation strategy abstract: - * - * For small requests, the allocator sub-allocates blocks of memory. - * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the - * system's allocator. - * - * Small requests are grouped in size classes spaced 8 bytes apart, due - * to the required valid alignment of the returned address. Requests of - * a particular size are serviced from memory pools of 4K (one VMM page). - * Pools are fragmented on demand and contain free lists of blocks of one - * particular size class. In other words, there is a fixed-size allocator - * for each size class. Free pools are shared by the different allocators - * thus minimizing the space reserved for a particular size class. - * - * This allocation strategy is a variant of what is known as "simple - * segregated storage based on array of free lists". The main drawback of - * simple segregated storage is that we might end up with lot of reserved - * memory for the different free lists, which degenerate in time. To avoid - * this, we partition each free list in pools and we share dynamically the - * reserved space between all free lists. This technique is quite efficient - * for memory intensive programs which allocate mainly small-sized blocks. - * - * For small requests we have the following table: - * - * Request in bytes Size of allocated block Size class idx - * ---------------------------------------------------------------- - * 1-8 8 0 - * 9-16 16 1 - * 17-24 24 2 - * 25-32 32 3 - * 33-40 40 4 - * 41-48 48 5 - * 49-56 56 6 - * 57-64 64 7 - * 65-72 72 8 - * ... ... ... - * 497-504 504 62 - * 505-512 512 63 - * - * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying - * allocator. - */ - -/*==========================================================================*/ - -/* - * -- Main tunable settings section -- - */ - -/* - * Alignment of addresses returned to the user. 8-bytes alignment works - * on most current architectures (with 32-bit or 64-bit address busses). - * The alignment value is also used for grouping small requests in size - * classes spaced ALIGNMENT bytes apart. - * - * You shouldn't change this unless you know what you are doing. - */ -#define ALIGNMENT 8 /* must be 2^N */ -#define ALIGNMENT_SHIFT 3 - -/* Return the number of bytes in size class I, as a uint. */ -#define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT) - -/* - * Max size threshold below which malloc requests are considered to be - * small enough in order to use preallocated memory pools. You can tune - * this value according to your application behaviour and memory needs. - * - * Note: a size threshold of 512 guarantees that newly created dictionaries - * will be allocated from preallocated memory pools on 64-bit. - * - * The following invariants must hold: - * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512 - * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT - * - * Although not required, for better performance and space efficiency, - * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2. - */ -#define SMALL_REQUEST_THRESHOLD 512 -#define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT) - -/* - * The system's VMM page size can be obtained on most unices with a - * getpagesize() call or deduced from various header files. To make - * things simpler, we assume that it is 4K, which is OK for most systems. - * It is probably better if this is the native page size, but it doesn't - * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page - * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation - * violation fault. 4K is apparently OK for all the platforms that python - * currently targets. - */ -#define SYSTEM_PAGE_SIZE (4 * 1024) -#define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1) - -/* - * Maximum amount of memory managed by the allocator for small requests. - */ -#ifdef WITH_MEMORY_LIMITS -#ifndef SMALL_MEMORY_LIMIT -#define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */ -#endif -#endif - -/* - * The allocator sub-allocates blocks of memory (called arenas) aligned - * on a page boundary. This is a reserved virtual address space for the - * current process (obtained through a malloc()/mmap() call). In no way this - * means that the memory arenas will be used entirely. A malloc() is - * usually an address range reservation for bytes, unless all pages within - * this space are referenced subsequently. So malloc'ing big blocks and not - * using them does not mean "wasting memory". It's an addressable range - * wastage... - * - * Arenas are allocated with mmap() on systems supporting anonymous memory - * mappings to reduce heap fragmentation. - */ -#define ARENA_SIZE (256 << 10) /* 256KB */ - -#ifdef WITH_MEMORY_LIMITS -#define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE) -#endif - -/* - * Size of the pools used for small blocks. Should be a power of 2, - * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k. - */ -#define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */ -#define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK - -/* - * -- End of tunable settings section -- - */ - -/*==========================================================================*/ - -/* - * Locking - * - * To reduce lock contention, it would probably be better to refine the - * crude function locking with per size class locking. I'm not positive - * however, whether it's worth switching to such locking policy because - * of the performance penalty it might introduce. - * - * The following macros describe the simplest (should also be the fastest) - * lock object on a particular platform and the init/fini/lock/unlock - * operations on it. The locks defined here are not expected to be recursive - * because it is assumed that they will always be called in the order: - * INIT, [LOCK, UNLOCK]*, FINI. - */ - -/* - * Python's threads are serialized, so object malloc locking is disabled. - */ -#define SIMPLELOCK_DECL(lock) /* simple lock declaration */ -#define SIMPLELOCK_INIT(lock) /* allocate (if needed) and initialize */ -#define SIMPLELOCK_FINI(lock) /* free/destroy an existing lock */ -#define SIMPLELOCK_LOCK(lock) /* acquire released lock */ -#define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */ - -/* When you say memory, my mind reasons in terms of (pointers to) blocks */ -typedef uint8_t block; - -/* Pool for small blocks. */ -struct pool_header { - union { block *_padding; - uint count; } ref; /* number of allocated blocks */ - block *freeblock; /* pool's free list head */ - struct pool_header *nextpool; /* next pool of this size class */ - struct pool_header *prevpool; /* previous pool "" */ - uint arenaindex; /* index into arenas of base adr */ - uint szidx; /* block size class index */ - uint nextoffset; /* bytes to virgin block */ - uint maxnextoffset; /* largest valid nextoffset */ -}; - -typedef struct pool_header *poolp; - -/* Record keeping for arenas. */ -struct arena_object { - /* The address of the arena, as returned by malloc. Note that 0 - * will never be returned by a successful malloc, and is used - * here to mark an arena_object that doesn't correspond to an - * allocated arena. - */ - uintptr_t address; - - /* Pool-aligned pointer to the next pool to be carved off. */ - block* pool_address; - - /* The number of available pools in the arena: free pools + never- - * allocated pools. - */ - uint nfreepools; - - /* The total number of pools in the arena, whether or not available. */ - uint ntotalpools; - - /* Singly-linked list of available pools. */ - struct pool_header* freepools; - - /* Whenever this arena_object is not associated with an allocated - * arena, the nextarena member is used to link all unassociated - * arena_objects in the singly-linked `unused_arena_objects` list. - * The prevarena member is unused in this case. - * - * When this arena_object is associated with an allocated arena - * with at least one available pool, both members are used in the - * doubly-linked `usable_arenas` list, which is maintained in - * increasing order of `nfreepools` values. - * - * Else this arena_object is associated with an allocated arena - * all of whose pools are in use. `nextarena` and `prevarena` - * are both meaningless in this case. - */ - struct arena_object* nextarena; - struct arena_object* prevarena; -}; - -#define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT) - -#define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */ - -/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */ -#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE)) - -/* Return total number of blocks in pool of size index I, as a uint. */ -#define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I)) - -/*==========================================================================*/ - -/* - * This malloc lock - */ -SIMPLELOCK_DECL(_malloc_lock) -#define LOCK() SIMPLELOCK_LOCK(_malloc_lock) -#define UNLOCK() SIMPLELOCK_UNLOCK(_malloc_lock) -#define LOCK_INIT() SIMPLELOCK_INIT(_malloc_lock) -#define LOCK_FINI() SIMPLELOCK_FINI(_malloc_lock) - -/* - * Pool table -- headed, circular, doubly-linked lists of partially used pools. - -This is involved. For an index i, usedpools[i+i] is the header for a list of -all partially used pools holding small blocks with "size class idx" i. So -usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size -16, and so on: index 2*i <-> blocks of size (i+1)<freeblock points to -the start of a singly-linked list of free blocks within the pool. When a -block is freed, it's inserted at the front of its pool's freeblock list. Note -that the available blocks in a pool are *not* linked all together when a pool -is initialized. Instead only "the first two" (lowest addresses) blocks are -set up, returning the first such block, and setting pool->freeblock to a -one-block list holding the second such block. This is consistent with that -pymalloc strives at all levels (arena, pool, and block) never to touch a piece -of memory until it's actually needed. - -So long as a pool is in the used state, we're certain there *is* a block -available for allocating, and pool->freeblock is not NULL. If pool->freeblock -points to the end of the free list before we've carved the entire pool into -blocks, that means we simply haven't yet gotten to one of the higher-address -blocks. The offset from the pool_header to the start of "the next" virgin -block is stored in the pool_header nextoffset member, and the largest value -of nextoffset that makes sense is stored in the maxnextoffset member when a -pool is initialized. All the blocks in a pool have been passed out at least -once when and only when nextoffset > maxnextoffset. - - -Major obscurity: While the usedpools vector is declared to have poolp -entries, it doesn't really. It really contains two pointers per (conceptual) -poolp entry, the nextpool and prevpool members of a pool_header. The -excruciating initialization code below fools C so that - - usedpool[i+i] - -"acts like" a genuine poolp, but only so long as you only reference its -nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is -compensating for that a pool_header's nextpool and prevpool members -immediately follow a pool_header's first two members: - - union { block *_padding; - uint count; } ref; - block *freeblock; - -each of which consume sizeof(block *) bytes. So what usedpools[i+i] really -contains is a fudged-up pointer p such that *if* C believes it's a poolp -pointer, then p->nextpool and p->prevpool are both p (meaning that the headed -circular list is empty). - -It's unclear why the usedpools setup is so convoluted. It could be to -minimize the amount of cache required to hold this heavily-referenced table -(which only *needs* the two interpool pointer members of a pool_header). OTOH, -referencing code has to remember to "double the index" and doing so isn't -free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying -on that C doesn't insert any padding anywhere in a pool_header at or before -the prevpool member. -**************************************************************************** */ - -#define PTA(x) ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *))) -#define PT(x) PTA(x), PTA(x) - -static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = { - PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7) -#if NB_SMALL_SIZE_CLASSES > 8 - , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15) -#if NB_SMALL_SIZE_CLASSES > 16 - , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23) -#if NB_SMALL_SIZE_CLASSES > 24 - , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31) -#if NB_SMALL_SIZE_CLASSES > 32 - , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39) -#if NB_SMALL_SIZE_CLASSES > 40 - , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47) -#if NB_SMALL_SIZE_CLASSES > 48 - , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55) -#if NB_SMALL_SIZE_CLASSES > 56 - , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63) -#if NB_SMALL_SIZE_CLASSES > 64 -#error "NB_SMALL_SIZE_CLASSES should be less than 64" -#endif /* NB_SMALL_SIZE_CLASSES > 64 */ -#endif /* NB_SMALL_SIZE_CLASSES > 56 */ -#endif /* NB_SMALL_SIZE_CLASSES > 48 */ -#endif /* NB_SMALL_SIZE_CLASSES > 40 */ -#endif /* NB_SMALL_SIZE_CLASSES > 32 */ -#endif /* NB_SMALL_SIZE_CLASSES > 24 */ -#endif /* NB_SMALL_SIZE_CLASSES > 16 */ -#endif /* NB_SMALL_SIZE_CLASSES > 8 */ -}; - -/*========================================================================== -Arena management. - -`arenas` is a vector of arena_objects. It contains maxarenas entries, some of -which may not be currently used (== they're arena_objects that aren't -currently associated with an allocated arena). Note that arenas proper are -separately malloc'ed. - -Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5, -we do try to free() arenas, and use some mild heuristic strategies to increase -the likelihood that arenas eventually can be freed. - -unused_arena_objects - - This is a singly-linked list of the arena_objects that are currently not - being used (no arena is associated with them). Objects are taken off the - head of the list in new_arena(), and are pushed on the head of the list in - PyObject_Free() when the arena is empty. Key invariant: an arena_object - is on this list if and only if its .address member is 0. - -usable_arenas - - This is a doubly-linked list of the arena_objects associated with arenas - that have pools available. These pools are either waiting to be reused, - or have not been used before. The list is sorted to have the most- - allocated arenas first (ascending order based on the nfreepools member). - This means that the next allocation will come from a heavily used arena, - which gives the nearly empty arenas a chance to be returned to the system. - In my unscientific tests this dramatically improved the number of arenas - that could be freed. - -Note that an arena_object associated with an arena all of whose pools are -currently in use isn't on either list. -*/ - -/* Array of objects used to track chunks of memory (arenas). */ -static struct arena_object* arenas = NULL; -/* Number of slots currently allocated in the `arenas` vector. */ -static uint maxarenas = 0; - -/* The head of the singly-linked, NULL-terminated list of available - * arena_objects. - */ -static struct arena_object* unused_arena_objects = NULL; - -/* The head of the doubly-linked, NULL-terminated at each end, list of - * arena_objects associated with arenas that have pools available. - */ -static struct arena_object* usable_arenas = NULL; - -/* How many arena_objects do we initially allocate? - * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the - * `arenas` vector. - */ -#define INITIAL_ARENA_OBJECTS 16 - -/* Number of arenas allocated that haven't been free()'d. */ -static size_t narenas_currently_allocated = 0; - -/* Total number of times malloc() called to allocate an arena. */ -static size_t ntimes_arena_allocated = 0; -/* High water mark (max value ever seen) for narenas_currently_allocated. */ -static size_t narenas_highwater = 0; - -static Py_ssize_t _Py_AllocatedBlocks = 0; - Py_ssize_t _Py_GetAllocatedBlocks(void) { - return _Py_AllocatedBlocks; + return _PyRuntime.mem.num_allocated_blocks; } @@ -1035,7 +581,7 @@ new_arena(void) if (debug_stats) _PyObject_DebugMallocStats(stderr); - if (unused_arena_objects == NULL) { + if (_PyRuntime.mem.unused_arena_objects == NULL) { uint i; uint numarenas; size_t nbytes; @@ -1043,18 +589,18 @@ new_arena(void) /* Double the number of arena objects on each allocation. * Note that it's possible for `numarenas` to overflow. */ - numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS; - if (numarenas <= maxarenas) + numarenas = _PyRuntime.mem.maxarenas ? _PyRuntime.mem.maxarenas << 1 : INITIAL_ARENA_OBJECTS; + if (numarenas <= _PyRuntime.mem.maxarenas) return NULL; /* overflow */ #if SIZEOF_SIZE_T <= SIZEOF_INT - if (numarenas > SIZE_MAX / sizeof(*arenas)) + if (numarenas > SIZE_MAX / sizeof(*_PyRuntime.mem.arenas)) return NULL; /* overflow */ #endif - nbytes = numarenas * sizeof(*arenas); - arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes); + nbytes = numarenas * sizeof(*_PyRuntime.mem.arenas); + arenaobj = (struct arena_object *)PyMem_RawRealloc(_PyRuntime.mem.arenas, nbytes); if (arenaobj == NULL) return NULL; - arenas = arenaobj; + _PyRuntime.mem.arenas = arenaobj; /* We might need to fix pointers that were copied. However, * new_arena only gets called when all the pages in the @@ -1062,45 +608,45 @@ new_arena(void) * into the old array. Thus, we don't have to worry about * invalid pointers. Just to be sure, some asserts: */ - assert(usable_arenas == NULL); - assert(unused_arena_objects == NULL); + assert(_PyRuntime.mem.usable_arenas == NULL); + assert(_PyRuntime.mem.unused_arena_objects == NULL); /* Put the new arenas on the unused_arena_objects list. */ - for (i = maxarenas; i < numarenas; ++i) { - arenas[i].address = 0; /* mark as unassociated */ - arenas[i].nextarena = i < numarenas - 1 ? - &arenas[i+1] : NULL; + for (i = _PyRuntime.mem.maxarenas; i < numarenas; ++i) { + _PyRuntime.mem.arenas[i].address = 0; /* mark as unassociated */ + _PyRuntime.mem.arenas[i].nextarena = i < numarenas - 1 ? + &_PyRuntime.mem.arenas[i+1] : NULL; } /* Update globals. */ - unused_arena_objects = &arenas[maxarenas]; - maxarenas = numarenas; + _PyRuntime.mem.unused_arena_objects = &_PyRuntime.mem.arenas[_PyRuntime.mem.maxarenas]; + _PyRuntime.mem.maxarenas = numarenas; } /* Take the next available arena object off the head of the list. */ - assert(unused_arena_objects != NULL); - arenaobj = unused_arena_objects; - unused_arena_objects = arenaobj->nextarena; + assert(_PyRuntime.mem.unused_arena_objects != NULL); + arenaobj = _PyRuntime.mem.unused_arena_objects; + _PyRuntime.mem.unused_arena_objects = arenaobj->nextarena; assert(arenaobj->address == 0); - address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE); + address = _PyRuntime.obj.allocator_arenas.alloc(_PyRuntime.obj.allocator_arenas.ctx, ARENA_SIZE); if (address == NULL) { /* The allocation failed: return NULL after putting the * arenaobj back. */ - arenaobj->nextarena = unused_arena_objects; - unused_arena_objects = arenaobj; + arenaobj->nextarena = _PyRuntime.mem.unused_arena_objects; + _PyRuntime.mem.unused_arena_objects = arenaobj; return NULL; } arenaobj->address = (uintptr_t)address; - ++narenas_currently_allocated; - ++ntimes_arena_allocated; - if (narenas_currently_allocated > narenas_highwater) - narenas_highwater = narenas_currently_allocated; + ++_PyRuntime.mem.narenas_currently_allocated; + ++_PyRuntime.mem.ntimes_arena_allocated; + if (_PyRuntime.mem.narenas_currently_allocated > _PyRuntime.mem.narenas_highwater) + _PyRuntime.mem.narenas_highwater = _PyRuntime.mem.narenas_currently_allocated; arenaobj->freepools = NULL; /* pool_address <- first pool-aligned address in the arena nfreepools <- number of whole pools that fit after alignment */ - arenaobj->pool_address = (block*)arenaobj->address; + arenaobj->pool_address = (pyblock*)arenaobj->address; arenaobj->nfreepools = ARENA_SIZE / POOL_SIZE; assert(POOL_SIZE * arenaobj->nfreepools == ARENA_SIZE); excess = (uint)(arenaobj->address & POOL_SIZE_MASK); @@ -1197,9 +743,9 @@ address_in_range(void *p, poolp pool) // the GIL. The following dance forces the compiler to read pool->arenaindex // only once. uint arenaindex = *((volatile uint *)&pool->arenaindex); - return arenaindex < maxarenas && - (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE && - arenas[arenaindex].address != 0; + return arenaindex < _PyRuntime.mem.maxarenas && + (uintptr_t)p - _PyRuntime.mem.arenas[arenaindex].address < ARENA_SIZE && + _PyRuntime.mem.arenas[arenaindex].address != 0; } /*==========================================================================*/ @@ -1220,12 +766,12 @@ static void * _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) { size_t nbytes; - block *bp; + pyblock *bp; poolp pool; poolp next; uint size; - _Py_AllocatedBlocks++; + _PyRuntime.mem.num_allocated_blocks++; assert(elsize == 0 || nelem <= PY_SSIZE_T_MAX / elsize); nbytes = nelem * elsize; @@ -1246,7 +792,7 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) * Most frequent paths first */ size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT; - pool = usedpools[size + size]; + pool = _PyRuntime.mem.usedpools[size + size]; if (pool != pool->nextpool) { /* * There is a used pool for this size class. @@ -1255,7 +801,7 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) ++pool->ref.count; bp = pool->freeblock; assert(bp != NULL); - if ((pool->freeblock = *(block **)bp) != NULL) { + if ((pool->freeblock = *(pyblock **)bp) != NULL) { UNLOCK(); if (use_calloc) memset(bp, 0, nbytes); @@ -1266,10 +812,10 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) */ if (pool->nextoffset <= pool->maxnextoffset) { /* There is room for another block. */ - pool->freeblock = (block*)pool + + pool->freeblock = (pyblock*)pool + pool->nextoffset; pool->nextoffset += INDEX2SIZE(size); - *(block **)(pool->freeblock) = NULL; + *(pyblock **)(pool->freeblock) = NULL; UNLOCK(); if (use_calloc) memset(bp, 0, nbytes); @@ -1289,29 +835,29 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) /* There isn't a pool of the right size class immediately * available: use a free pool. */ - if (usable_arenas == NULL) { + if (_PyRuntime.mem.usable_arenas == NULL) { /* No arena has a free pool: allocate a new arena. */ #ifdef WITH_MEMORY_LIMITS - if (narenas_currently_allocated >= MAX_ARENAS) { + if (_PyRuntime.mem.narenas_currently_allocated >= MAX_ARENAS) { UNLOCK(); goto redirect; } #endif - usable_arenas = new_arena(); - if (usable_arenas == NULL) { + _PyRuntime.mem.usable_arenas = new_arena(); + if (_PyRuntime.mem.usable_arenas == NULL) { UNLOCK(); goto redirect; } - usable_arenas->nextarena = - usable_arenas->prevarena = NULL; + _PyRuntime.mem.usable_arenas->nextarena = + _PyRuntime.mem.usable_arenas->prevarena = NULL; } - assert(usable_arenas->address != 0); + assert(_PyRuntime.mem.usable_arenas->address != 0); /* Try to get a cached free pool. */ - pool = usable_arenas->freepools; + pool = _PyRuntime.mem.usable_arenas->freepools; if (pool != NULL) { /* Unlink from cached pools. */ - usable_arenas->freepools = pool->nextpool; + _PyRuntime.mem.usable_arenas->freepools = pool->nextpool; /* This arena already had the smallest nfreepools * value, so decreasing nfreepools doesn't change @@ -1320,18 +866,18 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) * become wholly allocated, we need to remove its * arena_object from usable_arenas. */ - --usable_arenas->nfreepools; - if (usable_arenas->nfreepools == 0) { + --_PyRuntime.mem.usable_arenas->nfreepools; + if (_PyRuntime.mem.usable_arenas->nfreepools == 0) { /* Wholly allocated: remove. */ - assert(usable_arenas->freepools == NULL); - assert(usable_arenas->nextarena == NULL || - usable_arenas->nextarena->prevarena == - usable_arenas); - - usable_arenas = usable_arenas->nextarena; - if (usable_arenas != NULL) { - usable_arenas->prevarena = NULL; - assert(usable_arenas->address != 0); + assert(_PyRuntime.mem.usable_arenas->freepools == NULL); + assert(_PyRuntime.mem.usable_arenas->nextarena == NULL || + _PyRuntime.mem.usable_arenas->nextarena->prevarena == + _PyRuntime.mem.usable_arenas); + + _PyRuntime.mem.usable_arenas = _PyRuntime.mem.usable_arenas->nextarena; + if (_PyRuntime.mem.usable_arenas != NULL) { + _PyRuntime.mem.usable_arenas->prevarena = NULL; + assert(_PyRuntime.mem.usable_arenas->address != 0); } } else { @@ -1340,14 +886,14 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) * off all the arena's pools for the first * time. */ - assert(usable_arenas->freepools != NULL || - usable_arenas->pool_address <= - (block*)usable_arenas->address + + assert(_PyRuntime.mem.usable_arenas->freepools != NULL || + _PyRuntime.mem.usable_arenas->pool_address <= + (pyblock*)_PyRuntime.mem.usable_arenas->address + ARENA_SIZE - POOL_SIZE); } init_pool: /* Frontlink to used pools. */ - next = usedpools[size + size]; /* == prev */ + next = _PyRuntime.mem.usedpools[size + size]; /* == prev */ pool->nextpool = next; pool->prevpool = next; next->nextpool = pool; @@ -1360,7 +906,7 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) */ bp = pool->freeblock; assert(bp != NULL); - pool->freeblock = *(block **)bp; + pool->freeblock = *(pyblock **)bp; UNLOCK(); if (use_calloc) memset(bp, 0, nbytes); @@ -1373,11 +919,11 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) */ pool->szidx = size; size = INDEX2SIZE(size); - bp = (block *)pool + POOL_OVERHEAD; + bp = (pyblock *)pool + POOL_OVERHEAD; pool->nextoffset = POOL_OVERHEAD + (size << 1); pool->maxnextoffset = POOL_SIZE - size; pool->freeblock = bp + size; - *(block **)(pool->freeblock) = NULL; + *(pyblock **)(pool->freeblock) = NULL; UNLOCK(); if (use_calloc) memset(bp, 0, nbytes); @@ -1385,26 +931,26 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) } /* Carve off a new pool. */ - assert(usable_arenas->nfreepools > 0); - assert(usable_arenas->freepools == NULL); - pool = (poolp)usable_arenas->pool_address; - assert((block*)pool <= (block*)usable_arenas->address + - ARENA_SIZE - POOL_SIZE); - pool->arenaindex = (uint)(usable_arenas - arenas); - assert(&arenas[pool->arenaindex] == usable_arenas); + assert(_PyRuntime.mem.usable_arenas->nfreepools > 0); + assert(_PyRuntime.mem.usable_arenas->freepools == NULL); + pool = (poolp)_PyRuntime.mem.usable_arenas->pool_address; + assert((pyblock*)pool <= (pyblock*)_PyRuntime.mem.usable_arenas->address + + ARENA_SIZE - POOL_SIZE); + pool->arenaindex = (uint)(_PyRuntime.mem.usable_arenas - _PyRuntime.mem.arenas); + assert(&_PyRuntime.mem.arenas[pool->arenaindex] == _PyRuntime.mem.usable_arenas); pool->szidx = DUMMY_SIZE_IDX; - usable_arenas->pool_address += POOL_SIZE; - --usable_arenas->nfreepools; + _PyRuntime.mem.usable_arenas->pool_address += POOL_SIZE; + --_PyRuntime.mem.usable_arenas->nfreepools; - if (usable_arenas->nfreepools == 0) { - assert(usable_arenas->nextarena == NULL || - usable_arenas->nextarena->prevarena == - usable_arenas); + if (_PyRuntime.mem.usable_arenas->nfreepools == 0) { + assert(_PyRuntime.mem.usable_arenas->nextarena == NULL || + _PyRuntime.mem.usable_arenas->nextarena->prevarena == + _PyRuntime.mem.usable_arenas); /* Unlink the arena: it is completely allocated. */ - usable_arenas = usable_arenas->nextarena; - if (usable_arenas != NULL) { - usable_arenas->prevarena = NULL; - assert(usable_arenas->address != 0); + _PyRuntime.mem.usable_arenas = _PyRuntime.mem.usable_arenas->nextarena; + if (_PyRuntime.mem.usable_arenas != NULL) { + _PyRuntime.mem.usable_arenas->prevarena = NULL; + assert(_PyRuntime.mem.usable_arenas->address != 0); } } @@ -1426,7 +972,7 @@ redirect: else result = PyMem_RawMalloc(nbytes); if (!result) - _Py_AllocatedBlocks--; + _PyRuntime.mem.num_allocated_blocks--; return result; } } @@ -1449,14 +995,14 @@ static void _PyObject_Free(void *ctx, void *p) { poolp pool; - block *lastfree; + pyblock *lastfree; poolp next, prev; uint size; if (p == NULL) /* free(NULL) has no effect */ return; - _Py_AllocatedBlocks--; + _PyRuntime.mem.num_allocated_blocks--; #ifdef WITH_VALGRIND if (UNLIKELY(running_on_valgrind > 0)) @@ -1474,8 +1020,8 @@ _PyObject_Free(void *ctx, void *p) * list in any case). */ assert(pool->ref.count > 0); /* else it was empty */ - *(block **)p = lastfree = pool->freeblock; - pool->freeblock = (block *)p; + *(pyblock **)p = lastfree = pool->freeblock; + pool->freeblock = (pyblock *)p; if (lastfree) { struct arena_object* ao; uint nf; /* ao->nfreepools */ @@ -1501,7 +1047,7 @@ _PyObject_Free(void *ctx, void *p) /* Link the pool to freepools. This is a singly-linked * list, and pool->prevpool isn't used there. */ - ao = &arenas[pool->arenaindex]; + ao = &_PyRuntime.mem.arenas[pool->arenaindex]; pool->nextpool = ao->freepools; ao->freepools = pool; nf = ++ao->nfreepools; @@ -1530,9 +1076,9 @@ _PyObject_Free(void *ctx, void *p) * usable_arenas pointer. */ if (ao->prevarena == NULL) { - usable_arenas = ao->nextarena; - assert(usable_arenas == NULL || - usable_arenas->address != 0); + _PyRuntime.mem.usable_arenas = ao->nextarena; + assert(_PyRuntime.mem.usable_arenas == NULL || + _PyRuntime.mem.usable_arenas->address != 0); } else { assert(ao->prevarena->nextarena == ao); @@ -1548,14 +1094,14 @@ _PyObject_Free(void *ctx, void *p) /* Record that this arena_object slot is * available to be reused. */ - ao->nextarena = unused_arena_objects; - unused_arena_objects = ao; + ao->nextarena = _PyRuntime.mem.unused_arena_objects; + _PyRuntime.mem.unused_arena_objects = ao; /* Free the entire arena. */ - _PyObject_Arena.free(_PyObject_Arena.ctx, + _PyRuntime.obj.allocator_arenas.free(_PyRuntime.obj.allocator_arenas.ctx, (void *)ao->address, ARENA_SIZE); ao->address = 0; /* mark unassociated */ - --narenas_currently_allocated; + --_PyRuntime.mem.narenas_currently_allocated; UNLOCK(); return; @@ -1566,12 +1112,12 @@ _PyObject_Free(void *ctx, void *p) * ao->nfreepools was 0 before, ao isn't * currently on the usable_arenas list. */ - ao->nextarena = usable_arenas; + ao->nextarena = _PyRuntime.mem.usable_arenas; ao->prevarena = NULL; - if (usable_arenas) - usable_arenas->prevarena = ao; - usable_arenas = ao; - assert(usable_arenas->address != 0); + if (_PyRuntime.mem.usable_arenas) + _PyRuntime.mem.usable_arenas->prevarena = ao; + _PyRuntime.mem.usable_arenas = ao; + assert(_PyRuntime.mem.usable_arenas->address != 0); UNLOCK(); return; @@ -1601,8 +1147,8 @@ _PyObject_Free(void *ctx, void *p) } else { /* ao is at the head of the list */ - assert(usable_arenas == ao); - usable_arenas = ao->nextarena; + assert(_PyRuntime.mem.usable_arenas == ao); + _PyRuntime.mem.usable_arenas = ao->nextarena; } ao->nextarena->prevarena = ao->prevarena; @@ -1631,7 +1177,7 @@ _PyObject_Free(void *ctx, void *p) nf > ao->prevarena->nfreepools); assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao); - assert((usable_arenas == ao && + assert((_PyRuntime.mem.usable_arenas == ao && ao->prevarena == NULL) || ao->prevarena->nextarena == ao); @@ -1647,7 +1193,7 @@ _PyObject_Free(void *ctx, void *p) --pool->ref.count; assert(pool->ref.count > 0); /* else the pool is empty */ size = pool->szidx; - next = usedpools[size + size]; + next = _PyRuntime.mem.usedpools[size + size]; prev = next->prevpool; /* insert pool before next: prev <-> pool <-> next */ pool->nextpool = next; @@ -1769,15 +1315,13 @@ _Py_GetAllocatedBlocks(void) #define DEADBYTE 0xDB /* dead (newly freed) memory */ #define FORBIDDENBYTE 0xFB /* untouchable bytes at each end of a block */ -static size_t serialno = 0; /* incremented on each debug {m,re}alloc */ - /* serialno is always incremented via calling this routine. The point is * to supply a single place to set a breakpoint. */ static void bumpserialno(void) { - ++serialno; + ++_PyRuntime.mem.serialno; } #define SST SIZEOF_SIZE_T @@ -1868,7 +1412,7 @@ _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes) /* at tail, write pad (SST bytes) and serialno (SST bytes) */ tail = p + 2*SST + nbytes; memset(tail, FORBIDDENBYTE, SST); - write_size_t(tail + SST, serialno); + write_size_t(tail + SST, _PyRuntime.mem.serialno); return p + 2*SST; } @@ -1953,7 +1497,7 @@ _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes) tail = q + nbytes; memset(tail, FORBIDDENBYTE, SST); - write_size_t(tail + SST, serialno); + write_size_t(tail + SST, _PyRuntime.mem.serialno); if (nbytes > original_nbytes) { /* growing: mark new extra memory clean */ @@ -2285,16 +1829,16 @@ _PyObject_DebugMallocStats(FILE *out) * to march over all the arenas. If we're lucky, most of the memory * will be living in full pools -- would be a shame to miss them. */ - for (i = 0; i < maxarenas; ++i) { + for (i = 0; i < _PyRuntime.mem.maxarenas; ++i) { uint j; - uintptr_t base = arenas[i].address; + uintptr_t base = _PyRuntime.mem.arenas[i].address; /* Skip arenas which are not allocated. */ - if (arenas[i].address == (uintptr_t)NULL) + if (_PyRuntime.mem.arenas[i].address == (uintptr_t)NULL) continue; narenas += 1; - numfreepools += arenas[i].nfreepools; + numfreepools += _PyRuntime.mem.arenas[i].nfreepools; /* round up to pool alignment */ if (base & (uintptr_t)POOL_SIZE_MASK) { @@ -2304,8 +1848,8 @@ _PyObject_DebugMallocStats(FILE *out) } /* visit every pool in the arena */ - assert(base <= (uintptr_t) arenas[i].pool_address); - for (j = 0; base < (uintptr_t) arenas[i].pool_address; + assert(base <= (uintptr_t) _PyRuntime.mem.arenas[i].pool_address); + for (j = 0; base < (uintptr_t) _PyRuntime.mem.arenas[i].pool_address; ++j, base += POOL_SIZE) { poolp p = (poolp)base; const uint sz = p->szidx; @@ -2314,7 +1858,7 @@ _PyObject_DebugMallocStats(FILE *out) if (p->ref.count == 0) { /* currently unused */ #ifdef Py_DEBUG - assert(pool_is_in_list(p, arenas[i].freepools)); + assert(pool_is_in_list(p, _PyRuntime.mem.arenas[i].freepools)); #endif continue; } @@ -2324,11 +1868,11 @@ _PyObject_DebugMallocStats(FILE *out) numfreeblocks[sz] += freeblocks; #ifdef Py_DEBUG if (freeblocks > 0) - assert(pool_is_in_list(p, usedpools[sz + sz])); + assert(pool_is_in_list(p, _PyRuntime.mem.usedpools[sz + sz])); #endif } } - assert(narenas == narenas_currently_allocated); + assert(narenas == _PyRuntime.mem.narenas_currently_allocated); fputc('\n', out); fputs("class size num pools blocks in use avail blocks\n" @@ -2356,10 +1900,10 @@ _PyObject_DebugMallocStats(FILE *out) } fputc('\n', out); if (_PyMem_DebugEnabled()) - (void)printone(out, "# times object malloc called", serialno); - (void)printone(out, "# arenas allocated total", ntimes_arena_allocated); - (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas); - (void)printone(out, "# arenas highwater mark", narenas_highwater); + (void)printone(out, "# times object malloc called", _PyRuntime.mem.serialno); + (void)printone(out, "# arenas allocated total", _PyRuntime.mem.ntimes_arena_allocated); + (void)printone(out, "# arenas reclaimed", _PyRuntime.mem.ntimes_arena_allocated - narenas); + (void)printone(out, "# arenas highwater mark", _PyRuntime.mem.narenas_highwater); (void)printone(out, "# arenas allocated current", narenas); PyOS_snprintf(buf, sizeof(buf), diff --git a/Objects/setobject.c b/Objects/setobject.c index 219e81d..6001f7b 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -1115,6 +1115,7 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds) } /* The empty frozenset is a singleton */ if (emptyfrozenset == NULL) + /* There is a possible (relatively harmless) race here. */ emptyfrozenset = make_new_set(type, NULL); Py_XINCREF(emptyfrozenset); return emptyfrozenset; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 1d963aa..6bf474a 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1157,10 +1157,10 @@ subtype_dealloc(PyObject *self) /* UnTrack and re-Track around the trashcan macro, alas */ /* See explanation at end of function for full disclosure */ PyObject_GC_UnTrack(self); - ++_PyTrash_delete_nesting; + ++_PyRuntime.gc.trash_delete_nesting; ++ tstate->trash_delete_nesting; Py_TRASHCAN_SAFE_BEGIN(self); - --_PyTrash_delete_nesting; + --_PyRuntime.gc.trash_delete_nesting; -- tstate->trash_delete_nesting; /* Find the nearest base with a different tp_dealloc */ @@ -1254,10 +1254,10 @@ subtype_dealloc(PyObject *self) Py_DECREF(type); endlabel: - ++_PyTrash_delete_nesting; + ++_PyRuntime.gc.trash_delete_nesting; ++ tstate->trash_delete_nesting; Py_TRASHCAN_SAFE_END(self); - --_PyTrash_delete_nesting; + --_PyRuntime.gc.trash_delete_nesting; -- tstate->trash_delete_nesting; /* Explanation of the weirdness around the trashcan macros: @@ -1297,7 +1297,7 @@ subtype_dealloc(PyObject *self) a subtle disaster. Q. Why the bizarre (net-zero) manipulation of - _PyTrash_delete_nesting around the trashcan macros? + _PyRuntime.trash_delete_nesting around the trashcan macros? A. Some base classes (e.g. list) also use the trashcan mechanism. The following scenario used to be possible: diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 8ebb22e..5db80b6 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -106,6 +106,14 @@ + + + + + + + + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index cbe1a39..e5a9b62 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -129,6 +129,30 @@ Include + + Include + + + Include + + + Include + + + Include + + + Include + + + Include + + + Include + + + Include + Include diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c index e386248..fd927c0 100644 --- a/Parser/pgenmain.c +++ b/Parser/pgenmain.c @@ -21,10 +21,12 @@ #include "node.h" #include "parsetok.h" #include "pgen.h" +#include "internal/_mem.h" int Py_DebugFlag; int Py_VerboseFlag; int Py_IgnoreEnvironmentFlag; +struct pyruntimestate _PyRuntime = {}; /* Forward */ grammar *getgrammar(const char *filename); @@ -61,6 +63,8 @@ main(int argc, char **argv) filename = argv[1]; graminit_h = argv[2]; graminit_c = argv[3]; + _PyObject_Initialize(&_PyRuntime.obj); + _PyMem_Initialize(&_PyRuntime.mem); g = getgrammar(filename); fp = fopen(graminit_c, "w"); if (fp == NULL) { diff --git a/Python/_warnings.c b/Python/_warnings.c index 8616195..a5e42a3 100644 --- a/Python/_warnings.c +++ b/Python/_warnings.c @@ -8,13 +8,6 @@ PyDoc_STRVAR(warnings__doc__, MODULE_NAME " provides basic warning filtering support.\n" "It is a helper module to speed up interpreter start-up."); -/* Both 'filters' and 'onceregistry' can be set in warnings.py; - get_warnings_attr() will reset these variables accordingly. */ -static PyObject *_filters; /* List */ -static PyObject *_once_registry; /* Dict */ -static PyObject *_default_action; /* String */ -static long _filters_version; - _Py_IDENTIFIER(argv); _Py_IDENTIFIER(stderr); @@ -53,7 +46,7 @@ get_warnings_attr(const char *attr, int try_import) } /* don't try to import after the start of the Python finallization */ - if (try_import && _Py_Finalizing == NULL) { + if (try_import && !_Py_IS_FINALIZING()) { warnings_module = PyImport_Import(warnings_str); if (warnings_module == NULL) { /* Fallback to the C implementation if we cannot get @@ -90,10 +83,10 @@ get_once_registry(void) if (registry == NULL) { if (PyErr_Occurred()) return NULL; - return _once_registry; + return _PyRuntime.warnings.once_registry; } - Py_DECREF(_once_registry); - _once_registry = registry; + Py_DECREF(_PyRuntime.warnings.once_registry); + _PyRuntime.warnings.once_registry = registry; return registry; } @@ -108,11 +101,11 @@ get_default_action(void) if (PyErr_Occurred()) { return NULL; } - return _default_action; + return _PyRuntime.warnings.default_action; } - Py_DECREF(_default_action); - _default_action = default_action; + Py_DECREF(_PyRuntime.warnings.default_action); + _PyRuntime.warnings.default_action = default_action; return default_action; } @@ -132,23 +125,24 @@ get_filter(PyObject *category, PyObject *text, Py_ssize_t lineno, return NULL; } else { - Py_DECREF(_filters); - _filters = warnings_filters; + Py_DECREF(_PyRuntime.warnings.filters); + _PyRuntime.warnings.filters = warnings_filters; } - if (_filters == NULL || !PyList_Check(_filters)) { + PyObject *filters = _PyRuntime.warnings.filters; + if (filters == NULL || !PyList_Check(filters)) { PyErr_SetString(PyExc_ValueError, MODULE_NAME ".filters must be a list"); return NULL; } - /* _filters could change while we are iterating over it. */ - for (i = 0; i < PyList_GET_SIZE(_filters); i++) { + /* _PyRuntime.warnings.filters could change while we are iterating over it. */ + for (i = 0; i < PyList_GET_SIZE(filters); i++) { PyObject *tmp_item, *action, *msg, *cat, *mod, *ln_obj; Py_ssize_t ln; int is_subclass, good_msg, good_mod; - tmp_item = PyList_GET_ITEM(_filters, i); + tmp_item = PyList_GET_ITEM(filters, i); if (!PyTuple_Check(tmp_item) || PyTuple_GET_SIZE(tmp_item) != 5) { PyErr_Format(PyExc_ValueError, MODULE_NAME ".filters item %zd isn't a 5-tuple", i); @@ -220,9 +214,9 @@ already_warned(PyObject *registry, PyObject *key, int should_set) version_obj = _PyDict_GetItemId(registry, &PyId_version); if (version_obj == NULL || !PyLong_CheckExact(version_obj) - || PyLong_AsLong(version_obj) != _filters_version) { + || PyLong_AsLong(version_obj) != _PyRuntime.warnings.filters_version) { PyDict_Clear(registry); - version_obj = PyLong_FromLong(_filters_version); + version_obj = PyLong_FromLong(_PyRuntime.warnings.filters_version); if (version_obj == NULL) return -1; if (_PyDict_SetItemId(registry, &PyId_version, version_obj) < 0) { @@ -520,7 +514,7 @@ warn_explicit(PyObject *category, PyObject *message, if (registry == NULL) goto cleanup; } - /* _once_registry[(text, category)] = 1 */ + /* _PyRuntime.warnings.once_registry[(text, category)] = 1 */ rc = update_registry(registry, text, category, 0); } else if (_PyUnicode_EqualToASCIIString(action, "module")) { @@ -910,7 +904,7 @@ warnings_warn_explicit(PyObject *self, PyObject *args, PyObject *kwds) static PyObject * warnings_filters_mutated(PyObject *self, PyObject *args) { - _filters_version++; + _PyRuntime.warnings.filters_version++; Py_RETURN_NONE; } @@ -1160,7 +1154,8 @@ create_filter(PyObject *category, const char *action) } /* This assumes the line number is zero for now. */ - return PyTuple_Pack(5, action_obj, Py_None, category, Py_None, _PyLong_Zero); + return PyTuple_Pack(5, action_obj, Py_None, + category, Py_None, _PyLong_Zero); } static PyObject * @@ -1228,33 +1223,35 @@ _PyWarnings_Init(void) if (m == NULL) return NULL; - if (_filters == NULL) { - _filters = init_filters(); - if (_filters == NULL) + if (_PyRuntime.warnings.filters == NULL) { + _PyRuntime.warnings.filters = init_filters(); + if (_PyRuntime.warnings.filters == NULL) return NULL; } - Py_INCREF(_filters); - if (PyModule_AddObject(m, "filters", _filters) < 0) + Py_INCREF(_PyRuntime.warnings.filters); + if (PyModule_AddObject(m, "filters", _PyRuntime.warnings.filters) < 0) return NULL; - if (_once_registry == NULL) { - _once_registry = PyDict_New(); - if (_once_registry == NULL) + if (_PyRuntime.warnings.once_registry == NULL) { + _PyRuntime.warnings.once_registry = PyDict_New(); + if (_PyRuntime.warnings.once_registry == NULL) return NULL; } - Py_INCREF(_once_registry); - if (PyModule_AddObject(m, "_onceregistry", _once_registry) < 0) + Py_INCREF(_PyRuntime.warnings.once_registry); + if (PyModule_AddObject(m, "_onceregistry", + _PyRuntime.warnings.once_registry) < 0) return NULL; - if (_default_action == NULL) { - _default_action = PyUnicode_FromString("default"); - if (_default_action == NULL) + if (_PyRuntime.warnings.default_action == NULL) { + _PyRuntime.warnings.default_action = PyUnicode_FromString("default"); + if (_PyRuntime.warnings.default_action == NULL) return NULL; } - Py_INCREF(_default_action); - if (PyModule_AddObject(m, "_defaultaction", _default_action) < 0) + Py_INCREF(_PyRuntime.warnings.default_action); + if (PyModule_AddObject(m, "_defaultaction", + _PyRuntime.warnings.default_action) < 0) return NULL; - _filters_version = 0; + _PyRuntime.warnings.filters_version = 0; return m; } diff --git a/Python/ceval.c b/Python/ceval.c index 436e5ca..9741c15 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -36,7 +36,8 @@ extern int _PyObject_GetMethod(PyObject *, PyObject *, PyObject **); typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *); /* Forward declarations */ -Py_LOCAL_INLINE(PyObject *) call_function(PyObject ***, Py_ssize_t, PyObject *); +Py_LOCAL_INLINE(PyObject *) call_function(PyObject ***, Py_ssize_t, + PyObject *); static PyObject * do_call_core(PyObject *, PyObject *, PyObject *); #ifdef LLTRACE @@ -52,13 +53,15 @@ static int call_trace_protected(Py_tracefunc, PyObject *, static void call_exc_trace(Py_tracefunc, PyObject *, PyThreadState *, PyFrameObject *); static int maybe_call_line_trace(Py_tracefunc, PyObject *, - PyThreadState *, PyFrameObject *, int *, int *, int *); + PyThreadState *, PyFrameObject *, + int *, int *, int *); static void maybe_dtrace_line(PyFrameObject *, int *, int *, int *); static void dtrace_function_entry(PyFrameObject *); static void dtrace_function_return(PyFrameObject *); static PyObject * cmp_outcome(int, PyObject *, PyObject *); -static PyObject * import_name(PyFrameObject *, PyObject *, PyObject *, PyObject *); +static PyObject * import_name(PyFrameObject *, PyObject *, PyObject *, + PyObject *); static PyObject * import_from(PyObject *, PyObject *); static int import_all_from(PyObject *, PyObject *); static void format_exc_check_arg(PyObject *, const char *, PyObject *); @@ -88,7 +91,7 @@ static long dxp[256]; #endif #ifdef WITH_THREAD -#define GIL_REQUEST _Py_atomic_load_relaxed(&gil_drop_request) +#define GIL_REQUEST _Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request) #else #define GIL_REQUEST 0 #endif @@ -98,22 +101,22 @@ static long dxp[256]; the GIL eventually anyway. */ #define COMPUTE_EVAL_BREAKER() \ _Py_atomic_store_relaxed( \ - &eval_breaker, \ + &_PyRuntime.ceval.eval_breaker, \ GIL_REQUEST | \ - _Py_atomic_load_relaxed(&pendingcalls_to_do) | \ - pending_async_exc) + _Py_atomic_load_relaxed(&_PyRuntime.ceval.pending.calls_to_do) | \ + _PyRuntime.ceval.pending.async_exc) #ifdef WITH_THREAD #define SET_GIL_DROP_REQUEST() \ do { \ - _Py_atomic_store_relaxed(&gil_drop_request, 1); \ - _Py_atomic_store_relaxed(&eval_breaker, 1); \ + _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil_drop_request, 1); \ + _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \ } while (0) #define RESET_GIL_DROP_REQUEST() \ do { \ - _Py_atomic_store_relaxed(&gil_drop_request, 0); \ + _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil_drop_request, 0); \ COMPUTE_EVAL_BREAKER(); \ } while (0) @@ -122,34 +125,28 @@ static long dxp[256]; /* Pending calls are only modified under pending_lock */ #define SIGNAL_PENDING_CALLS() \ do { \ - _Py_atomic_store_relaxed(&pendingcalls_to_do, 1); \ - _Py_atomic_store_relaxed(&eval_breaker, 1); \ + _Py_atomic_store_relaxed(&_PyRuntime.ceval.pending.calls_to_do, 1); \ + _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \ } while (0) #define UNSIGNAL_PENDING_CALLS() \ do { \ - _Py_atomic_store_relaxed(&pendingcalls_to_do, 0); \ + _Py_atomic_store_relaxed(&_PyRuntime.ceval.pending.calls_to_do, 0); \ COMPUTE_EVAL_BREAKER(); \ } while (0) #define SIGNAL_ASYNC_EXC() \ do { \ - pending_async_exc = 1; \ - _Py_atomic_store_relaxed(&eval_breaker, 1); \ + _PyRuntime.ceval.pending.async_exc = 1; \ + _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \ } while (0) #define UNSIGNAL_ASYNC_EXC() \ - do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0) - + do { \ + _PyRuntime.ceval.pending.async_exc = 0; \ + COMPUTE_EVAL_BREAKER(); \ + } while (0) -/* This single variable consolidates all requests to break out of the fast path - in the eval loop. */ -static _Py_atomic_int eval_breaker = {0}; -/* Request for running pending calls. */ -static _Py_atomic_int pendingcalls_to_do = {0}; -/* Request for looking at the `async_exc` field of the current thread state. - Guarded by the GIL. */ -static int pending_async_exc = 0; #ifdef WITH_THREAD @@ -157,12 +154,6 @@ static int pending_async_exc = 0; #include #endif #include "pythread.h" - -static PyThread_type_lock pending_lock = 0; /* for pending calls */ -static unsigned long main_thread = 0; -/* Request for dropping the GIL */ -static _Py_atomic_int gil_drop_request = {0}; - #include "ceval_gil.h" int @@ -178,9 +169,9 @@ PyEval_InitThreads(void) return; create_gil(); take_gil(PyThreadState_GET()); - main_thread = PyThread_get_thread_ident(); - if (!pending_lock) - pending_lock = PyThread_allocate_lock(); + _PyRuntime.ceval.pending.main_thread = PyThread_get_thread_ident(); + if (!_PyRuntime.ceval.pending.lock) + _PyRuntime.ceval.pending.lock = PyThread_allocate_lock(); } void @@ -248,9 +239,9 @@ PyEval_ReInitThreads(void) if (!gil_created()) return; recreate_gil(); - pending_lock = PyThread_allocate_lock(); + _PyRuntime.ceval.pending.lock = PyThread_allocate_lock(); take_gil(current_tstate); - main_thread = PyThread_get_thread_ident(); + _PyRuntime.ceval.pending.main_thread = PyThread_get_thread_ident(); /* Destroy all threads except the current one */ _PyThreadState_DeleteExcept(current_tstate); @@ -294,7 +285,7 @@ PyEval_RestoreThread(PyThreadState *tstate) int err = errno; take_gil(tstate); /* _Py_Finalizing is protected by the GIL */ - if (_Py_Finalizing && tstate != _Py_Finalizing) { + if (_Py_IS_FINALIZING() && !_Py_CURRENTLY_FINALIZING(tstate)) { drop_gil(tstate); PyThread_exit_thread(); assert(0); /* unreachable */ @@ -346,19 +337,11 @@ _PyEval_SignalReceived(void) callback. */ -#define NPENDINGCALLS 32 -static struct { - int (*func)(void *); - void *arg; -} pendingcalls[NPENDINGCALLS]; -static int pendingfirst = 0; -static int pendinglast = 0; - int Py_AddPendingCall(int (*func)(void *), void *arg) { int i, j, result=0; - PyThread_type_lock lock = pending_lock; + PyThread_type_lock lock = _PyRuntime.ceval.pending.lock; /* try a few times for the lock. Since this mechanism is used * for signal handling (on the main thread), there is a (slim) @@ -380,14 +363,14 @@ Py_AddPendingCall(int (*func)(void *), void *arg) return -1; } - i = pendinglast; + i = _PyRuntime.ceval.pending.last; j = (i + 1) % NPENDINGCALLS; - if (j == pendingfirst) { + if (j == _PyRuntime.ceval.pending.first) { result = -1; /* Queue full */ } else { - pendingcalls[i].func = func; - pendingcalls[i].arg = arg; - pendinglast = j; + _PyRuntime.ceval.pending.calls[i].func = func; + _PyRuntime.ceval.pending.calls[i].arg = arg; + _PyRuntime.ceval.pending.last = j; } /* signal main loop */ SIGNAL_PENDING_CALLS(); @@ -405,16 +388,19 @@ Py_MakePendingCalls(void) assert(PyGILState_Check()); - if (!pending_lock) { + if (!_PyRuntime.ceval.pending.lock) { /* initial allocation of the lock */ - pending_lock = PyThread_allocate_lock(); - if (pending_lock == NULL) + _PyRuntime.ceval.pending.lock = PyThread_allocate_lock(); + if (_PyRuntime.ceval.pending.lock == NULL) return -1; } /* only service pending calls on main thread */ - if (main_thread && PyThread_get_thread_ident() != main_thread) + if (_PyRuntime.ceval.pending.main_thread && + PyThread_get_thread_ident() != _PyRuntime.ceval.pending.main_thread) + { return 0; + } /* don't perform recursive pending calls */ if (busy) return 0; @@ -436,16 +422,16 @@ Py_MakePendingCalls(void) void *arg = NULL; /* pop one item off the queue while holding the lock */ - PyThread_acquire_lock(pending_lock, WAIT_LOCK); - j = pendingfirst; - if (j == pendinglast) { + PyThread_acquire_lock(_PyRuntime.ceval.pending.lock, WAIT_LOCK); + j = _PyRuntime.ceval.pending.first; + if (j == _PyRuntime.ceval.pending.last) { func = NULL; /* Queue empty */ } else { - func = pendingcalls[j].func; - arg = pendingcalls[j].arg; - pendingfirst = (j + 1) % NPENDINGCALLS; + func = _PyRuntime.ceval.pending.calls[j].func; + arg = _PyRuntime.ceval.pending.calls[j].arg; + _PyRuntime.ceval.pending.first = (j + 1) % NPENDINGCALLS; } - PyThread_release_lock(pending_lock); + PyThread_release_lock(_PyRuntime.ceval.pending.lock); /* having released the lock, perform the callback */ if (func == NULL) break; @@ -489,14 +475,6 @@ error: The two threads could theoretically wiggle around the "busy" variable. */ -#define NPENDINGCALLS 32 -static struct { - int (*func)(void *); - void *arg; -} pendingcalls[NPENDINGCALLS]; -static volatile int pendingfirst = 0; -static volatile int pendinglast = 0; - int Py_AddPendingCall(int (*func)(void *), void *arg) { @@ -506,15 +484,15 @@ Py_AddPendingCall(int (*func)(void *), void *arg) if (busy) return -1; busy = 1; - i = pendinglast; + i = _PyRuntime.ceval.pending.last; j = (i + 1) % NPENDINGCALLS; - if (j == pendingfirst) { + if (j == _PyRuntime.ceval.pending.first) { busy = 0; return -1; /* Queue full */ } - pendingcalls[i].func = func; - pendingcalls[i].arg = arg; - pendinglast = j; + _PyRuntime.ceval.pending.calls[i].func = func; + _PyRuntime.ceval.pending.calls[i].arg = arg; + _PyRuntime.ceval.pending.last = j; SIGNAL_PENDING_CALLS(); busy = 0; @@ -543,12 +521,12 @@ Py_MakePendingCalls(void) int i; int (*func)(void *); void *arg; - i = pendingfirst; - if (i == pendinglast) + i = _PyRuntime.ceval.pending.first; + if (i == _PyRuntime.ceval.pending.last) break; /* Queue empty */ - func = pendingcalls[i].func; - arg = pendingcalls[i].arg; - pendingfirst = (i + 1) % NPENDINGCALLS; + func = _PyRuntime.ceval.pending.calls[i].func; + arg = _PyRuntime.ceval.pending.calls[i].arg; + _PyRuntime.ceval.pending.first = (i + 1) % NPENDINGCALLS; if (func(arg) < 0) { goto error; } @@ -570,20 +548,32 @@ error: #ifndef Py_DEFAULT_RECURSION_LIMIT #define Py_DEFAULT_RECURSION_LIMIT 1000 #endif -static int recursion_limit = Py_DEFAULT_RECURSION_LIMIT; -int _Py_CheckRecursionLimit = Py_DEFAULT_RECURSION_LIMIT; + +void +_PyEval_Initialize(struct _ceval_runtime_state *state) +{ + state->recursion_limit = Py_DEFAULT_RECURSION_LIMIT; + state->check_recursion_limit = Py_DEFAULT_RECURSION_LIMIT; + _gil_initialize(&state->gil); +} + +int +_PyEval_CheckRecursionLimit(void) +{ + return _PyRuntime.ceval.check_recursion_limit; +} int Py_GetRecursionLimit(void) { - return recursion_limit; + return _PyRuntime.ceval.recursion_limit; } void Py_SetRecursionLimit(int new_limit) { - recursion_limit = new_limit; - _Py_CheckRecursionLimit = recursion_limit; + _PyRuntime.ceval.recursion_limit = new_limit; + _PyRuntime.ceval.check_recursion_limit = _PyRuntime.ceval.recursion_limit; } /* the macro Py_EnterRecursiveCall() only calls _Py_CheckRecursiveCall() @@ -595,6 +585,7 @@ int _Py_CheckRecursiveCall(const char *where) { PyThreadState *tstate = PyThreadState_GET(); + int recursion_limit = _PyRuntime.ceval.recursion_limit; #ifdef USE_STACKCHECK if (PyOS_CheckStack()) { @@ -603,7 +594,7 @@ _Py_CheckRecursiveCall(const char *where) return -1; } #endif - _Py_CheckRecursionLimit = recursion_limit; + _PyRuntime.ceval.check_recursion_limit = recursion_limit; if (tstate->recursion_critical) /* Somebody asked that we don't check for recursion. */ return 0; @@ -642,13 +633,7 @@ static void restore_and_clear_exc_state(PyThreadState *, PyFrameObject *); static int do_raise(PyObject *, PyObject *); static int unpack_iterable(PyObject *, int, int, PyObject **); -/* Records whether tracing is on for any thread. Counts the number of - threads for which tstate->c_tracefunc is non-NULL, so if the value - is 0, we know we don't have to check this thread's c_tracefunc. - This speeds up the if statement in PyEval_EvalFrameEx() after - fast_next_opcode*/ -static int _Py_TracingPossible = 0; - +#define _Py_TracingPossible _PyRuntime.ceval.tracing_possible PyObject * @@ -779,7 +764,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) #define DISPATCH() \ { \ - if (!_Py_atomic_load_relaxed(&eval_breaker)) { \ + if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.eval_breaker)) { \ FAST_DISPATCH(); \ } \ continue; \ @@ -827,7 +812,8 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) /* Code access macros */ /* The integer overflow is checked by an assertion below. */ -#define INSTR_OFFSET() (sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr)) +#define INSTR_OFFSET() \ + (sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr)) #define NEXTOPARG() do { \ _Py_CODEUNIT word = *next_instr; \ opcode = _Py_OPCODE(word); \ @@ -1080,7 +1066,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) async I/O handler); see Py_AddPendingCall() and Py_MakePendingCalls() above. */ - if (_Py_atomic_load_relaxed(&eval_breaker)) { + if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.eval_breaker)) { if (_Py_OPCODE(*next_instr) == SETUP_FINALLY || _Py_OPCODE(*next_instr) == YIELD_FROM) { /* Two cases where we skip running signal handlers and other @@ -1097,12 +1083,16 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) */ goto fast_next_opcode; } - if (_Py_atomic_load_relaxed(&pendingcalls_to_do)) { + if (_Py_atomic_load_relaxed( + &_PyRuntime.ceval.pending.calls_to_do)) + { if (Py_MakePendingCalls() < 0) goto error; } #ifdef WITH_THREAD - if (_Py_atomic_load_relaxed(&gil_drop_request)) { + if (_Py_atomic_load_relaxed( + &_PyRuntime.ceval.gil_drop_request)) + { /* Give another thread a chance */ if (PyThreadState_Swap(NULL) != tstate) Py_FatalError("ceval: tstate mix-up"); @@ -1113,7 +1103,9 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) take_gil(tstate); /* Check if we should make a quick exit. */ - if (_Py_Finalizing && _Py_Finalizing != tstate) { + if (_Py_IS_FINALIZING() && + !_Py_CURRENTLY_FINALIZING(tstate)) + { drop_gil(tstate); PyThread_exit_thread(); } diff --git a/Python/ceval_gil.h b/Python/ceval_gil.h index a3b450b..ef51890 100644 --- a/Python/ceval_gil.h +++ b/Python/ceval_gil.h @@ -8,20 +8,13 @@ /* First some general settings */ -/* microseconds (the Python API uses seconds, though) */ -#define DEFAULT_INTERVAL 5000 -static unsigned long gil_interval = DEFAULT_INTERVAL; -#define INTERVAL (gil_interval >= 1 ? gil_interval : 1) - -/* Enable if you want to force the switching of threads at least every `gil_interval` */ -#undef FORCE_SWITCHING -#define FORCE_SWITCHING +#define INTERVAL (_PyRuntime.ceval.gil.interval >= 1 ? _PyRuntime.ceval.gil.interval : 1) /* Notes about the implementation: - - The GIL is just a boolean variable (gil_locked) whose access is protected + - The GIL is just a boolean variable (locked) whose access is protected by a mutex (gil_mutex), and whose changes are signalled by a condition variable (gil_cond). gil_mutex is taken for short periods of time, and therefore mostly uncontended. @@ -48,7 +41,7 @@ static unsigned long gil_interval = DEFAULT_INTERVAL; - When a thread releases the GIL and gil_drop_request is set, that thread ensures that another GIL-awaiting thread gets scheduled. It does so by waiting on a condition variable (switch_cond) until - the value of gil_last_holder is changed to something else than its + the value of last_holder is changed to something else than its own thread state pointer, indicating that another thread was able to take the GIL. @@ -60,11 +53,7 @@ static unsigned long gil_interval = DEFAULT_INTERVAL; */ #include "condvar.h" -#ifndef Py_HAVE_CONDVAR -#error You need either a POSIX-compatible or a Windows system! -#endif -#define MUTEX_T PyMUTEX_T #define MUTEX_INIT(mut) \ if (PyMUTEX_INIT(&(mut))) { \ Py_FatalError("PyMUTEX_INIT(" #mut ") failed"); }; @@ -78,7 +67,6 @@ static unsigned long gil_interval = DEFAULT_INTERVAL; if (PyMUTEX_UNLOCK(&(mut))) { \ Py_FatalError("PyMUTEX_UNLOCK(" #mut ") failed"); }; -#define COND_T PyCOND_T #define COND_INIT(cond) \ if (PyCOND_INIT(&(cond))) { \ Py_FatalError("PyCOND_INIT(" #cond ") failed"); }; @@ -103,48 +91,36 @@ static unsigned long gil_interval = DEFAULT_INTERVAL; } \ +#define DEFAULT_INTERVAL 5000 -/* Whether the GIL is already taken (-1 if uninitialized). This is atomic - because it can be read without any lock taken in ceval.c. */ -static _Py_atomic_int gil_locked = {-1}; -/* Number of GIL switches since the beginning. */ -static unsigned long gil_switch_number = 0; -/* Last PyThreadState holding / having held the GIL. This helps us know - whether anyone else was scheduled after we dropped the GIL. */ -static _Py_atomic_address gil_last_holder = {0}; - -/* This condition variable allows one or several threads to wait until - the GIL is released. In addition, the mutex also protects the above - variables. */ -static COND_T gil_cond; -static MUTEX_T gil_mutex; - -#ifdef FORCE_SWITCHING -/* This condition variable helps the GIL-releasing thread wait for - a GIL-awaiting thread to be scheduled and take the GIL. */ -static COND_T switch_cond; -static MUTEX_T switch_mutex; -#endif - +static void _gil_initialize(struct _gil_runtime_state *state) +{ + _Py_atomic_int uninitialized = {-1}; + state->locked = uninitialized; + state->interval = DEFAULT_INTERVAL; +} static int gil_created(void) { - return _Py_atomic_load_explicit(&gil_locked, _Py_memory_order_acquire) >= 0; + return (_Py_atomic_load_explicit(&_PyRuntime.ceval.gil.locked, + _Py_memory_order_acquire) + ) >= 0; } static void create_gil(void) { - MUTEX_INIT(gil_mutex); + MUTEX_INIT(_PyRuntime.ceval.gil.mutex); #ifdef FORCE_SWITCHING - MUTEX_INIT(switch_mutex); + MUTEX_INIT(_PyRuntime.ceval.gil.switch_mutex); #endif - COND_INIT(gil_cond); + COND_INIT(_PyRuntime.ceval.gil.cond); #ifdef FORCE_SWITCHING - COND_INIT(switch_cond); + COND_INIT(_PyRuntime.ceval.gil.switch_cond); #endif - _Py_atomic_store_relaxed(&gil_last_holder, 0); - _Py_ANNOTATE_RWLOCK_CREATE(&gil_locked); - _Py_atomic_store_explicit(&gil_locked, 0, _Py_memory_order_release); + _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder, 0); + _Py_ANNOTATE_RWLOCK_CREATE(&_PyRuntime.ceval.gil.locked); + _Py_atomic_store_explicit(&_PyRuntime.ceval.gil.locked, 0, + _Py_memory_order_release); } static void destroy_gil(void) @@ -152,54 +128,62 @@ static void destroy_gil(void) /* some pthread-like implementations tie the mutex to the cond * and must have the cond destroyed first. */ - COND_FINI(gil_cond); - MUTEX_FINI(gil_mutex); + COND_FINI(_PyRuntime.ceval.gil.cond); + MUTEX_FINI(_PyRuntime.ceval.gil.mutex); #ifdef FORCE_SWITCHING - COND_FINI(switch_cond); - MUTEX_FINI(switch_mutex); + COND_FINI(_PyRuntime.ceval.gil.switch_cond); + MUTEX_FINI(_PyRuntime.ceval.gil.switch_mutex); #endif - _Py_atomic_store_explicit(&gil_locked, -1, _Py_memory_order_release); - _Py_ANNOTATE_RWLOCK_DESTROY(&gil_locked); + _Py_atomic_store_explicit(&_PyRuntime.ceval.gil.locked, -1, + _Py_memory_order_release); + _Py_ANNOTATE_RWLOCK_DESTROY(&_PyRuntime.ceval.gil.locked); } static void recreate_gil(void) { - _Py_ANNOTATE_RWLOCK_DESTROY(&gil_locked); + _Py_ANNOTATE_RWLOCK_DESTROY(&_PyRuntime.ceval.gil.locked); /* XXX should we destroy the old OS resources here? */ create_gil(); } static void drop_gil(PyThreadState *tstate) { - if (!_Py_atomic_load_relaxed(&gil_locked)) + if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked)) Py_FatalError("drop_gil: GIL is not locked"); /* tstate is allowed to be NULL (early interpreter init) */ if (tstate != NULL) { /* Sub-interpreter support: threads might have been switched under our feet using PyThreadState_Swap(). Fix the GIL last holder variable so that our heuristics work. */ - _Py_atomic_store_relaxed(&gil_last_holder, (uintptr_t)tstate); + _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder, + (uintptr_t)tstate); } - MUTEX_LOCK(gil_mutex); - _Py_ANNOTATE_RWLOCK_RELEASED(&gil_locked, /*is_write=*/1); - _Py_atomic_store_relaxed(&gil_locked, 0); - COND_SIGNAL(gil_cond); - MUTEX_UNLOCK(gil_mutex); + MUTEX_LOCK(_PyRuntime.ceval.gil.mutex); + _Py_ANNOTATE_RWLOCK_RELEASED(&_PyRuntime.ceval.gil.locked, /*is_write=*/1); + _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.locked, 0); + COND_SIGNAL(_PyRuntime.ceval.gil.cond); + MUTEX_UNLOCK(_PyRuntime.ceval.gil.mutex); #ifdef FORCE_SWITCHING - if (_Py_atomic_load_relaxed(&gil_drop_request) && tstate != NULL) { - MUTEX_LOCK(switch_mutex); + if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request) && + tstate != NULL) + { + MUTEX_LOCK(_PyRuntime.ceval.gil.switch_mutex); /* Not switched yet => wait */ - if ((PyThreadState*)_Py_atomic_load_relaxed(&gil_last_holder) == tstate) { + if (((PyThreadState*)_Py_atomic_load_relaxed( + &_PyRuntime.ceval.gil.last_holder) + ) == tstate) + { RESET_GIL_DROP_REQUEST(); /* NOTE: if COND_WAIT does not atomically start waiting when releasing the mutex, another thread can run through, take the GIL and drop it again, and reset the condition before we even had a chance to wait for it. */ - COND_WAIT(switch_cond, switch_mutex); + COND_WAIT(_PyRuntime.ceval.gil.switch_cond, + _PyRuntime.ceval.gil.switch_mutex); } - MUTEX_UNLOCK(switch_mutex); + MUTEX_UNLOCK(_PyRuntime.ceval.gil.switch_mutex); } #endif } @@ -211,60 +195,65 @@ static void take_gil(PyThreadState *tstate) Py_FatalError("take_gil: NULL tstate"); err = errno; - MUTEX_LOCK(gil_mutex); + MUTEX_LOCK(_PyRuntime.ceval.gil.mutex); - if (!_Py_atomic_load_relaxed(&gil_locked)) + if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked)) goto _ready; - while (_Py_atomic_load_relaxed(&gil_locked)) { + while (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked)) { int timed_out = 0; unsigned long saved_switchnum; - saved_switchnum = gil_switch_number; - COND_TIMED_WAIT(gil_cond, gil_mutex, INTERVAL, timed_out); + saved_switchnum = _PyRuntime.ceval.gil.switch_number; + COND_TIMED_WAIT(_PyRuntime.ceval.gil.cond, _PyRuntime.ceval.gil.mutex, + INTERVAL, timed_out); /* If we timed out and no switch occurred in the meantime, it is time to ask the GIL-holding thread to drop it. */ if (timed_out && - _Py_atomic_load_relaxed(&gil_locked) && - gil_switch_number == saved_switchnum) { + _Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked) && + _PyRuntime.ceval.gil.switch_number == saved_switchnum) { SET_GIL_DROP_REQUEST(); } } _ready: #ifdef FORCE_SWITCHING - /* This mutex must be taken before modifying gil_last_holder (see drop_gil()). */ - MUTEX_LOCK(switch_mutex); + /* This mutex must be taken before modifying + _PyRuntime.ceval.gil.last_holder (see drop_gil()). */ + MUTEX_LOCK(_PyRuntime.ceval.gil.switch_mutex); #endif /* We now hold the GIL */ - _Py_atomic_store_relaxed(&gil_locked, 1); - _Py_ANNOTATE_RWLOCK_ACQUIRED(&gil_locked, /*is_write=*/1); - - if (tstate != (PyThreadState*)_Py_atomic_load_relaxed(&gil_last_holder)) { - _Py_atomic_store_relaxed(&gil_last_holder, (uintptr_t)tstate); - ++gil_switch_number; + _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.locked, 1); + _Py_ANNOTATE_RWLOCK_ACQUIRED(&_PyRuntime.ceval.gil.locked, /*is_write=*/1); + + if (tstate != (PyThreadState*)_Py_atomic_load_relaxed( + &_PyRuntime.ceval.gil.last_holder)) + { + _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder, + (uintptr_t)tstate); + ++_PyRuntime.ceval.gil.switch_number; } #ifdef FORCE_SWITCHING - COND_SIGNAL(switch_cond); - MUTEX_UNLOCK(switch_mutex); + COND_SIGNAL(_PyRuntime.ceval.gil.switch_cond); + MUTEX_UNLOCK(_PyRuntime.ceval.gil.switch_mutex); #endif - if (_Py_atomic_load_relaxed(&gil_drop_request)) { + if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request)) { RESET_GIL_DROP_REQUEST(); } if (tstate->async_exc != NULL) { _PyEval_SignalAsyncExc(); } - MUTEX_UNLOCK(gil_mutex); + MUTEX_UNLOCK(_PyRuntime.ceval.gil.mutex); errno = err; } void _PyEval_SetSwitchInterval(unsigned long microseconds) { - gil_interval = microseconds; + _PyRuntime.ceval.gil.interval = microseconds; } unsigned long _PyEval_GetSwitchInterval() { - return gil_interval; + return _PyRuntime.ceval.gil.interval; } diff --git a/Python/condvar.h b/Python/condvar.h index 9a71b17..aaa8043 100644 --- a/Python/condvar.h +++ b/Python/condvar.h @@ -37,27 +37,16 @@ * Condition Variable. */ -#ifndef _CONDVAR_H_ -#define _CONDVAR_H_ +#ifndef _CONDVAR_IMPL_H_ +#define _CONDVAR_IMPL_H_ #include "Python.h" - -#ifndef _POSIX_THREADS -/* This means pthreads are not implemented in libc headers, hence the macro - not present in unistd.h. But they still can be implemented as an external - library (e.g. gnu pth in pthread emulation) */ -# ifdef HAVE_PTHREAD_H -# include /* _POSIX_THREADS */ -# endif -#endif +#include "internal/_condvar.h" #ifdef _POSIX_THREADS /* * POSIX support */ -#define Py_HAVE_CONDVAR - -#include #define PyCOND_ADD_MICROSECONDS(tv, interval) \ do { /* TODO: add overflow and truncation checks */ \ @@ -74,13 +63,11 @@ do { /* TODO: add overflow and truncation checks */ \ #endif /* The following functions return 0 on success, nonzero on error */ -#define PyMUTEX_T pthread_mutex_t #define PyMUTEX_INIT(mut) pthread_mutex_init((mut), NULL) #define PyMUTEX_FINI(mut) pthread_mutex_destroy(mut) #define PyMUTEX_LOCK(mut) pthread_mutex_lock(mut) #define PyMUTEX_UNLOCK(mut) pthread_mutex_unlock(mut) -#define PyCOND_T pthread_cond_t #define PyCOND_INIT(cond) pthread_cond_init((cond), NULL) #define PyCOND_FINI(cond) pthread_cond_destroy(cond) #define PyCOND_SIGNAL(cond) pthread_cond_signal(cond) @@ -116,45 +103,11 @@ PyCOND_TIMEDWAIT(PyCOND_T *cond, PyMUTEX_T *mut, long long us) * Emulated condition variables ones that work with XP and later, plus * example native support on VISTA and onwards. */ -#define Py_HAVE_CONDVAR - - -/* include windows if it hasn't been done before */ -#define WIN32_LEAN_AND_MEAN -#include - -/* options */ -/* non-emulated condition variables are provided for those that want - * to target Windows Vista. Modify this macro to enable them. - */ -#ifndef _PY_EMULATED_WIN_CV -#define _PY_EMULATED_WIN_CV 1 /* use emulated condition variables */ -#endif - -/* fall back to emulation if not targeting Vista */ -#if !defined NTDDI_VISTA || NTDDI_VERSION < NTDDI_VISTA -#undef _PY_EMULATED_WIN_CV -#define _PY_EMULATED_WIN_CV 1 -#endif - #if _PY_EMULATED_WIN_CV /* The mutex is a CriticalSection object and The condition variables is emulated with the help of a semaphore. - Semaphores are available on Windows XP (2003 server) and later. - We use a Semaphore rather than an auto-reset event, because although - an auto-resent event might appear to solve the lost-wakeup bug (race - condition between releasing the outer lock and waiting) because it - maintains state even though a wait hasn't happened, there is still - a lost wakeup problem if more than one thread are interrupted in the - critical place. A semaphore solves that, because its state is counted, - not Boolean. - Because it is ok to signal a condition variable with no one - waiting, we need to keep track of the number of - waiting threads. Otherwise, the semaphore's state could rise - without bound. This also helps reduce the number of "spurious wakeups" - that would otherwise happen. This implementation still has the problem that the threads woken with a "signal" aren't necessarily those that are already @@ -168,8 +121,6 @@ PyCOND_TIMEDWAIT(PyCOND_T *cond, PyMUTEX_T *mut, long long us) http://www.cse.wustl.edu/~schmidt/win32-cv-1.html */ -typedef CRITICAL_SECTION PyMUTEX_T; - Py_LOCAL_INLINE(int) PyMUTEX_INIT(PyMUTEX_T *cs) { @@ -198,15 +149,6 @@ PyMUTEX_UNLOCK(PyMUTEX_T *cs) return 0; } -/* The ConditionVariable object. From XP onwards it is easily emulated with - * a Semaphore - */ - -typedef struct _PyCOND_T -{ - HANDLE sem; - int waiting; /* to allow PyCOND_SIGNAL to be a no-op */ -} PyCOND_T; Py_LOCAL_INLINE(int) PyCOND_INIT(PyCOND_T *cv) @@ -304,12 +246,7 @@ PyCOND_BROADCAST(PyCOND_T *cv) return 0; } -#else - -/* Use native Win7 primitives if build target is Win7 or higher */ - -/* SRWLOCK is faster and better than CriticalSection */ -typedef SRWLOCK PyMUTEX_T; +#else /* !_PY_EMULATED_WIN_CV */ Py_LOCAL_INLINE(int) PyMUTEX_INIT(PyMUTEX_T *cs) @@ -339,8 +276,6 @@ PyMUTEX_UNLOCK(PyMUTEX_T *cs) } -typedef CONDITION_VARIABLE PyCOND_T; - Py_LOCAL_INLINE(int) PyCOND_INIT(PyCOND_T *cv) { @@ -387,4 +322,4 @@ PyCOND_BROADCAST(PyCOND_T *cv) #endif /* _POSIX_THREADS, NT_THREADS */ -#endif /* _CONDVAR_H_ */ +#endif /* _CONDVAR_IMPL_H_ */ diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 662405b..f33e920 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -77,6 +77,30 @@ extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *); extern void _PyGILState_Fini(void); #endif /* WITH_THREAD */ +_PyRuntimeState _PyRuntime = {}; + +void +_PyRuntime_Initialize(void) +{ + /* XXX We only initialize once in the process, which aligns with + the static initialization of the former globals now found in + _PyRuntime. However, _PyRuntime *should* be initialized with + every Py_Initialize() call, but doing so breaks the runtime. + This is because the runtime state is not properly finalized + currently. */ + static int initialized = 0; + if (initialized) + return; + initialized = 1; + _PyRuntimeState_Init(&_PyRuntime); +} + +void +_PyRuntime_Finalize(void) +{ + _PyRuntimeState_Fini(&_PyRuntime); +} + /* Global configuration variable declarations are in pydebug.h */ /* XXX (ncoghlan): move those declarations to pylifecycle.h? */ int Py_DebugFlag; /* Needed by parser.c */ @@ -100,8 +124,6 @@ int Py_LegacyWindowsFSEncodingFlag = 0; /* Uses mbcs instead of utf-8 */ int Py_LegacyWindowsStdioFlag = 0; /* Uses FileIO instead of WindowsConsoleIO */ #endif -PyThreadState *_Py_Finalizing = NULL; - /* Hack to force loading of object files */ int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t) = \ PyOS_mystrnicmp; /* Python/pystrcmp.o */ @@ -119,19 +141,17 @@ PyModule_GetWarningsModule(void) * * Can be called prior to Py_Initialize. */ -int _Py_CoreInitialized = 0; -int _Py_Initialized = 0; int _Py_IsCoreInitialized(void) { - return _Py_CoreInitialized; + return _PyRuntime.core_initialized; } int Py_IsInitialized(void) { - return _Py_Initialized; + return _PyRuntime.initialized; } /* Helper to allow an embedding application to override the normal @@ -544,14 +564,16 @@ void _Py_InitializeCore(const _PyCoreConfig *config) _PyCoreConfig core_config = _PyCoreConfig_INIT; _PyMainInterpreterConfig preinit_config = _PyMainInterpreterConfig_INIT; + _PyRuntime_Initialize(); + if (config != NULL) { core_config = *config; } - if (_Py_Initialized) { + if (_PyRuntime.initialized) { Py_FatalError("Py_InitializeCore: main interpreter already initialized"); } - if (_Py_CoreInitialized) { + if (_PyRuntime.core_initialized) { Py_FatalError("Py_InitializeCore: runtime core already initialized"); } @@ -564,7 +586,14 @@ void _Py_InitializeCore(const _PyCoreConfig *config) * threads still hanging around from a previous Py_Initialize/Finalize * pair :( */ - _Py_Finalizing = NULL; + _PyRuntime.finalizing = NULL; + + if (_PyMem_SetupAllocators(core_config.allocator) < 0) { + fprintf(stderr, + "Error in PYTHONMALLOC: unknown allocator \"%s\"!\n", + core_config.allocator); + exit(1); + } #ifdef __ANDROID__ /* Passing "" to setlocale() on Android requests the C locale rather @@ -606,7 +635,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config) Py_HashRandomizationFlag = 1; } - _PyInterpreterState_Init(); + _PyInterpreterState_Enable(&_PyRuntime); interp = PyInterpreterState_New(); if (interp == NULL) Py_FatalError("Py_InitializeCore: can't make main interpreter"); @@ -698,7 +727,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config) } /* Only when we get here is the runtime core fully initialized */ - _Py_CoreInitialized = 1; + _PyRuntime.core_initialized = 1; } /* Read configuration settings from standard locations @@ -739,10 +768,10 @@ int _Py_InitializeMainInterpreter(const _PyMainInterpreterConfig *config) PyInterpreterState *interp; PyThreadState *tstate; - if (!_Py_CoreInitialized) { + if (!_PyRuntime.core_initialized) { Py_FatalError("Py_InitializeMainInterpreter: runtime core not initialized"); } - if (_Py_Initialized) { + if (_PyRuntime.initialized) { Py_FatalError("Py_InitializeMainInterpreter: main interpreter already initialized"); } @@ -763,7 +792,7 @@ int _Py_InitializeMainInterpreter(const _PyMainInterpreterConfig *config) * This means anything which needs support from extension modules * or pure Python code in the standard library won't work. */ - _Py_Initialized = 1; + _PyRuntime.initialized = 1; return 0; } /* TODO: Report exceptions rather than fatal errors below here */ @@ -808,7 +837,7 @@ int _Py_InitializeMainInterpreter(const _PyMainInterpreterConfig *config) Py_XDECREF(warnings_module); } - _Py_Initialized = 1; + _PyRuntime.initialized = 1; if (!Py_NoSiteFlag) initsite(); /* Module site */ @@ -924,7 +953,7 @@ Py_FinalizeEx(void) PyThreadState *tstate; int status = 0; - if (!_Py_Initialized) + if (!_PyRuntime.initialized) return status; wait_for_thread_shutdown(); @@ -946,9 +975,9 @@ Py_FinalizeEx(void) /* Remaining threads (e.g. daemon threads) will automatically exit after taking the GIL (in PyEval_RestoreThread()). */ - _Py_Finalizing = tstate; - _Py_Initialized = 0; - _Py_CoreInitialized = 0; + _PyRuntime.finalizing = tstate; + _PyRuntime.initialized = 0; + _PyRuntime.core_initialized = 0; /* Flush sys.stdout and sys.stderr */ if (flush_std_files() < 0) { @@ -1110,6 +1139,7 @@ Py_FinalizeEx(void) #endif call_ll_exitfuncs(); + _PyRuntime_Finalize(); return status; } @@ -1139,7 +1169,7 @@ Py_NewInterpreter(void) PyThreadState *tstate, *save_tstate; PyObject *bimod, *sysmod; - if (!_Py_Initialized) + if (!_PyRuntime.initialized) Py_FatalError("Py_NewInterpreter: call Py_Initialize first"); #ifdef WITH_THREAD @@ -1854,20 +1884,19 @@ exit: # include "pythread.h" #endif -static void (*pyexitfunc)(void) = NULL; /* For the atexit module. */ void _Py_PyAtExit(void (*func)(void)) { - pyexitfunc = func; + _PyRuntime.pyexitfunc = func; } static void call_py_exitfuncs(void) { - if (pyexitfunc == NULL) + if (_PyRuntime.pyexitfunc == NULL) return; - (*pyexitfunc)(); + (*_PyRuntime.pyexitfunc)(); PyErr_Clear(); } @@ -1900,22 +1929,19 @@ wait_for_thread_shutdown(void) } #define NEXITFUNCS 32 -static void (*exitfuncs[NEXITFUNCS])(void); -static int nexitfuncs = 0; - int Py_AtExit(void (*func)(void)) { - if (nexitfuncs >= NEXITFUNCS) + if (_PyRuntime.nexitfuncs >= NEXITFUNCS) return -1; - exitfuncs[nexitfuncs++] = func; + _PyRuntime.exitfuncs[_PyRuntime.nexitfuncs++] = func; return 0; } static void call_ll_exitfuncs(void) { - while (nexitfuncs > 0) - (*exitfuncs[--nexitfuncs])(); + while (_PyRuntime.nexitfuncs > 0) + (*_PyRuntime.exitfuncs[--_PyRuntime.nexitfuncs])(); fflush(stdout); fflush(stderr); diff --git a/Python/pystate.c b/Python/pystate.c index 30a3722..3d32077 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -34,55 +34,66 @@ to avoid the expense of doing their own locking). extern "C" { #endif -int _PyGILState_check_enabled = 1; +void +_PyRuntimeState_Init(_PyRuntimeState *runtime) +{ + _PyRuntimeState initial = {}; + *runtime = initial; + + _PyObject_Initialize(&runtime->obj); + _PyMem_Initialize(&runtime->mem); + _PyGC_Initialize(&runtime->gc); + _PyEval_Initialize(&runtime->ceval); + + runtime->gilstate.check_enabled = 1; + runtime->gilstate.autoTLSkey = -1; #ifdef WITH_THREAD -#include "pythread.h" -static PyThread_type_lock head_mutex = NULL; /* Protects interp->tstate_head */ -#define HEAD_INIT() (void)(head_mutex || (head_mutex = PyThread_allocate_lock())) -#define HEAD_LOCK() PyThread_acquire_lock(head_mutex, WAIT_LOCK) -#define HEAD_UNLOCK() PyThread_release_lock(head_mutex) - -/* The single PyInterpreterState used by this process' - GILState implementation -*/ -/* TODO: Given interp_main, it may be possible to kill this ref */ -static PyInterpreterState *autoInterpreterState = NULL; -static int autoTLSkey = -1; + runtime->interpreters.mutex = PyThread_allocate_lock(); + if (runtime->interpreters.mutex == NULL) + Py_FatalError("Can't initialize threads for interpreter"); +#endif + runtime->interpreters.next_id = -1; +} + +void +_PyRuntimeState_Fini(_PyRuntimeState *runtime) +{ +#ifdef WITH_THREAD + if (runtime->interpreters.mutex != NULL) { + PyThread_free_lock(runtime->interpreters.mutex); + runtime->interpreters.mutex = NULL; + } +#endif +} + +#ifdef WITH_THREAD +#define HEAD_LOCK() PyThread_acquire_lock(_PyRuntime.interpreters.mutex, \ + WAIT_LOCK) +#define HEAD_UNLOCK() PyThread_release_lock(_PyRuntime.interpreters.mutex) #else -#define HEAD_INIT() /* Nothing */ #define HEAD_LOCK() /* Nothing */ #define HEAD_UNLOCK() /* Nothing */ #endif -static PyInterpreterState *interp_head = NULL; -static PyInterpreterState *interp_main = NULL; - -/* Assuming the current thread holds the GIL, this is the - PyThreadState for the current thread. */ -_Py_atomic_address _PyThreadState_Current = {0}; -PyThreadFrameGetter _PyThreadState_GetFrame = NULL; - #ifdef WITH_THREAD static void _PyGILState_NoteThreadState(PyThreadState* tstate); #endif -/* _next_interp_id is an auto-numbered sequence of small integers. - It gets initialized in _PyInterpreterState_Init(), which is called - in Py_Initialize(), and used in PyInterpreterState_New(). A negative - interpreter ID indicates an error occurred. The main interpreter - will always have an ID of 0. Overflow results in a RuntimeError. - If that becomes a problem later then we can adjust, e.g. by using - a Python int. - - We initialize this to -1 so that the pre-Py_Initialize() value - results in an error. */ -static int64_t _next_interp_id = -1; - void -_PyInterpreterState_Init(void) +_PyInterpreterState_Enable(_PyRuntimeState *runtime) { - _next_interp_id = 0; + runtime->interpreters.next_id = 0; +#ifdef WITH_THREAD + /* Since we only call _PyRuntimeState_Init() once per process + (see _PyRuntime_Initialize()), we make sure the mutex is + initialized here. */ + if (runtime->interpreters.mutex == NULL) { + runtime->interpreters.mutex = PyThread_allocate_lock(); + if (runtime->interpreters.mutex == NULL) + Py_FatalError("Can't initialize threads for interpreter"); + } +#endif } PyInterpreterState * @@ -92,16 +103,16 @@ PyInterpreterState_New(void) PyMem_RawMalloc(sizeof(PyInterpreterState)); if (interp != NULL) { - HEAD_INIT(); -#ifdef WITH_THREAD - if (head_mutex == NULL) - Py_FatalError("Can't initialize threads for interpreter"); -#endif interp->modules_by_index = NULL; interp->sysdict = NULL; interp->builtins = NULL; interp->builtins_copy = NULL; interp->tstate_head = NULL; + interp->check_interval = 100; + interp->warnoptions = NULL; + interp->xoptions = NULL; + interp->num_threads = 0; + interp->pythread_stacksize = 0; interp->codec_search_path = NULL; interp->codec_search_cache = NULL; interp->codec_error_registry = NULL; @@ -125,19 +136,19 @@ PyInterpreterState_New(void) #endif HEAD_LOCK(); - interp->next = interp_head; - if (interp_main == NULL) { - interp_main = interp; + interp->next = _PyRuntime.interpreters.head; + if (_PyRuntime.interpreters.main == NULL) { + _PyRuntime.interpreters.main = interp; } - interp_head = interp; - if (_next_interp_id < 0) { + _PyRuntime.interpreters.head = interp; + if (_PyRuntime.interpreters.next_id < 0) { /* overflow or Py_Initialize() not called! */ PyErr_SetString(PyExc_RuntimeError, "failed to get an interpreter ID"); interp = NULL; } else { - interp->id = _next_interp_id; - _next_interp_id += 1; + interp->id = _PyRuntime.interpreters.next_id; + _PyRuntime.interpreters.next_id += 1; } HEAD_UNLOCK(); } @@ -189,7 +200,7 @@ PyInterpreterState_Delete(PyInterpreterState *interp) PyInterpreterState **p; zapthreads(interp); HEAD_LOCK(); - for (p = &interp_head; ; p = &(*p)->next) { + for (p = &_PyRuntime.interpreters.head; ; p = &(*p)->next) { if (*p == NULL) Py_FatalError( "PyInterpreterState_Delete: invalid interp"); @@ -199,19 +210,13 @@ PyInterpreterState_Delete(PyInterpreterState *interp) if (interp->tstate_head != NULL) Py_FatalError("PyInterpreterState_Delete: remaining threads"); *p = interp->next; - if (interp_main == interp) { - interp_main = NULL; - if (interp_head != NULL) + if (_PyRuntime.interpreters.main == interp) { + _PyRuntime.interpreters.main = NULL; + if (_PyRuntime.interpreters.head != NULL) Py_FatalError("PyInterpreterState_Delete: remaining subinterpreters"); } HEAD_UNLOCK(); PyMem_RawFree(interp); -#ifdef WITH_THREAD - if (interp_head == NULL && head_mutex != NULL) { - PyThread_free_lock(head_mutex); - head_mutex = NULL; - } -#endif } @@ -499,8 +504,11 @@ PyThreadState_Delete(PyThreadState *tstate) if (tstate == GET_TSTATE()) Py_FatalError("PyThreadState_Delete: tstate is still current"); #ifdef WITH_THREAD - if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate) - PyThread_delete_key_value(autoTLSkey); + if (_PyRuntime.gilstate.autoInterpreterState && + PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == tstate) + { + PyThread_delete_key_value(_PyRuntime.gilstate.autoTLSkey); + } #endif /* WITH_THREAD */ tstate_delete_common(tstate); } @@ -515,8 +523,11 @@ PyThreadState_DeleteCurrent() Py_FatalError( "PyThreadState_DeleteCurrent: no current tstate"); tstate_delete_common(tstate); - if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate) - PyThread_delete_key_value(autoTLSkey); + if (_PyRuntime.gilstate.autoInterpreterState && + PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == tstate) + { + PyThread_delete_key_value(_PyRuntime.gilstate.autoTLSkey); + } SET_TSTATE(NULL); PyEval_ReleaseLock(); } @@ -676,13 +687,13 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc) PyInterpreterState * PyInterpreterState_Head(void) { - return interp_head; + return _PyRuntime.interpreters.head; } PyInterpreterState * PyInterpreterState_Main(void) { - return interp_main; + return _PyRuntime.interpreters.main; } PyInterpreterState * @@ -722,7 +733,7 @@ _PyThread_CurrentFrames(void) * need to grab head_mutex for the duration. */ HEAD_LOCK(); - for (i = interp_head; i != NULL; i = i->next) { + for (i = _PyRuntime.interpreters.head; i != NULL; i = i->next) { PyThreadState *t; for (t = i->tstate_head; t != NULL; t = t->next) { PyObject *id; @@ -774,11 +785,11 @@ void _PyGILState_Init(PyInterpreterState *i, PyThreadState *t) { assert(i && t); /* must init with valid states */ - autoTLSkey = PyThread_create_key(); - if (autoTLSkey == -1) + _PyRuntime.gilstate.autoTLSkey = PyThread_create_key(); + if (_PyRuntime.gilstate.autoTLSkey == -1) Py_FatalError("Could not allocate TLS entry"); - autoInterpreterState = i; - assert(PyThread_get_key_value(autoTLSkey) == NULL); + _PyRuntime.gilstate.autoInterpreterState = i; + assert(PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == NULL); assert(t->gilstate_counter == 0); _PyGILState_NoteThreadState(t); @@ -787,15 +798,15 @@ _PyGILState_Init(PyInterpreterState *i, PyThreadState *t) PyInterpreterState * _PyGILState_GetInterpreterStateUnsafe(void) { - return autoInterpreterState; + return _PyRuntime.gilstate.autoInterpreterState; } void _PyGILState_Fini(void) { - PyThread_delete_key(autoTLSkey); - autoTLSkey = -1; - autoInterpreterState = NULL; + PyThread_delete_key(_PyRuntime.gilstate.autoTLSkey); + _PyRuntime.gilstate.autoTLSkey = -1; + _PyRuntime.gilstate.autoInterpreterState = NULL; } /* Reset the TLS key - called by PyOS_AfterFork_Child(). @@ -806,17 +817,19 @@ void _PyGILState_Reinit(void) { #ifdef WITH_THREAD - head_mutex = NULL; - HEAD_INIT(); + _PyRuntime.interpreters.mutex = PyThread_allocate_lock(); + if (_PyRuntime.interpreters.mutex == NULL) + Py_FatalError("Can't initialize threads for interpreter"); #endif PyThreadState *tstate = PyGILState_GetThisThreadState(); - PyThread_delete_key(autoTLSkey); - if ((autoTLSkey = PyThread_create_key()) == -1) + PyThread_delete_key(_PyRuntime.gilstate.autoTLSkey); + if ((_PyRuntime.gilstate.autoTLSkey = PyThread_create_key()) == -1) Py_FatalError("Could not allocate TLS entry"); /* If the thread had an associated auto thread state, reassociate it with * the new key. */ - if (tstate && PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0) + if (tstate && PyThread_set_key_value(_PyRuntime.gilstate.autoTLSkey, + (void *)tstate) < 0) Py_FatalError("Couldn't create autoTLSkey mapping"); } @@ -831,7 +844,7 @@ _PyGILState_NoteThreadState(PyThreadState* tstate) /* If autoTLSkey isn't initialized, this must be the very first threadstate created in Py_Initialize(). Don't do anything for now (we'll be back here when _PyGILState_Init is called). */ - if (!autoInterpreterState) + if (!_PyRuntime.gilstate.autoInterpreterState) return; /* Stick the thread state for this thread in thread local storage. @@ -846,9 +859,13 @@ _PyGILState_NoteThreadState(PyThreadState* tstate) The first thread state created for that given OS level thread will "win", which seems reasonable behaviour. */ - if (PyThread_get_key_value(autoTLSkey) == NULL) { - if (PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0) + if (PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == NULL) { + if ((PyThread_set_key_value(_PyRuntime.gilstate.autoTLSkey, + (void *)tstate) + ) < 0) + { Py_FatalError("Couldn't create autoTLSkey mapping"); + } } /* PyGILState_Release must not try to delete this thread state. */ @@ -859,9 +876,10 @@ _PyGILState_NoteThreadState(PyThreadState* tstate) PyThreadState * PyGILState_GetThisThreadState(void) { - if (autoInterpreterState == NULL) + if (_PyRuntime.gilstate.autoInterpreterState == NULL) return NULL; - return (PyThreadState *)PyThread_get_key_value(autoTLSkey); + return (PyThreadState *)PyThread_get_key_value( + _PyRuntime.gilstate.autoTLSkey); } int @@ -872,7 +890,7 @@ PyGILState_Check(void) if (!_PyGILState_check_enabled) return 1; - if (autoTLSkey == -1) + if (_PyRuntime.gilstate.autoTLSkey == -1) return 1; tstate = GET_TSTATE(); @@ -892,8 +910,10 @@ PyGILState_Ensure(void) spells out other issues. Embedders are expected to have called Py_Initialize() and usually PyEval_InitThreads(). */ - assert(autoInterpreterState); /* Py_Initialize() hasn't been called! */ - tcur = (PyThreadState *)PyThread_get_key_value(autoTLSkey); + /* Py_Initialize() hasn't been called! */ + assert(_PyRuntime.gilstate.autoInterpreterState); + tcur = (PyThreadState *)PyThread_get_key_value( + _PyRuntime.gilstate.autoTLSkey); if (tcur == NULL) { /* At startup, Python has no concrete GIL. If PyGILState_Ensure() is called from a new thread for the first time, we need the create the @@ -901,7 +921,7 @@ PyGILState_Ensure(void) PyEval_InitThreads(); /* Create a new thread state for this thread */ - tcur = PyThreadState_New(autoInterpreterState); + tcur = PyThreadState_New(_PyRuntime.gilstate.autoInterpreterState); if (tcur == NULL) Py_FatalError("Couldn't create thread-state for new thread"); /* This is our thread state! We'll need to delete it in the @@ -926,7 +946,7 @@ void PyGILState_Release(PyGILState_STATE oldstate) { PyThreadState *tcur = (PyThreadState *)PyThread_get_key_value( - autoTLSkey); + _PyRuntime.gilstate.autoTLSkey); if (tcur == NULL) Py_FatalError("auto-releasing thread-state, " "but no thread-state for this thread"); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 852babb..080c541 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -519,8 +519,6 @@ Return the profiling function set with sys.setprofile.\n\ See the profiler chapter in the library manual." ); -static int _check_interval = 100; - static PyObject * sys_setcheckinterval(PyObject *self, PyObject *args) { @@ -529,7 +527,8 @@ sys_setcheckinterval(PyObject *self, PyObject *args) "are deprecated. Use sys.setswitchinterval() " "instead.", 1) < 0) return NULL; - if (!PyArg_ParseTuple(args, "i:setcheckinterval", &_check_interval)) + PyInterpreterState *interp = PyThreadState_GET()->interp; + if (!PyArg_ParseTuple(args, "i:setcheckinterval", &interp->check_interval)) return NULL; Py_RETURN_NONE; } @@ -549,7 +548,8 @@ sys_getcheckinterval(PyObject *self, PyObject *args) "are deprecated. Use sys.getswitchinterval() " "instead.", 1) < 0) return NULL; - return PyLong_FromLong(_check_interval); + PyInterpreterState *interp = PyThreadState_GET()->interp; + return PyLong_FromLong(interp->check_interval); } PyDoc_STRVAR(getcheckinterval_doc, @@ -1339,7 +1339,7 @@ Clear the internal type lookup cache."); static PyObject * sys_is_finalizing(PyObject* self, PyObject* args) { - return PyBool_FromLong(_Py_Finalizing != NULL); + return PyBool_FromLong(_Py_IS_FINALIZING()); } PyDoc_STRVAR(is_finalizing_doc, @@ -1479,11 +1479,24 @@ list_builtin_module_names(void) return list; } -static PyObject *warnoptions = NULL; +static PyObject * +get_warnoptions(void) +{ + PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions; + if (warnoptions == NULL || !PyList_Check(warnoptions)) { + Py_XDECREF(warnoptions); + warnoptions = PyList_New(0); + if (warnoptions == NULL) + return NULL; + PyThreadState_GET()->interp->warnoptions = warnoptions; + } + return warnoptions; +} void PySys_ResetWarnOptions(void) { + PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions; if (warnoptions == NULL || !PyList_Check(warnoptions)) return; PyList_SetSlice(warnoptions, 0, PyList_GET_SIZE(warnoptions), NULL); @@ -1492,12 +1505,9 @@ PySys_ResetWarnOptions(void) void PySys_AddWarnOptionUnicode(PyObject *unicode) { - if (warnoptions == NULL || !PyList_Check(warnoptions)) { - Py_XDECREF(warnoptions); - warnoptions = PyList_New(0); - if (warnoptions == NULL) - return; - } + PyObject *warnoptions = get_warnoptions(); + if (warnoptions == NULL) + return; PyList_Append(warnoptions, unicode); } @@ -1515,17 +1525,20 @@ PySys_AddWarnOption(const wchar_t *s) int PySys_HasWarnOptions(void) { + PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions; return (warnoptions != NULL && (PyList_Size(warnoptions) > 0)) ? 1 : 0; } -static PyObject *xoptions = NULL; - static PyObject * get_xoptions(void) { + PyObject *xoptions = PyThreadState_GET()->interp->xoptions; if (xoptions == NULL || !PyDict_Check(xoptions)) { Py_XDECREF(xoptions); xoptions = PyDict_New(); + if (xoptions == NULL) + return NULL; + PyThreadState_GET()->interp->xoptions = xoptions; } return xoptions; } @@ -2130,17 +2143,15 @@ _PySys_EndInit(PyObject *sysdict) SET_SYS_FROM_STRING_INT_RESULT("base_exec_prefix", PyUnicode_FromWideChar(Py_GetExecPrefix(), -1)); - if (warnoptions == NULL) { - warnoptions = PyList_New(0); - if (warnoptions == NULL) - return -1; - } - - SET_SYS_FROM_STRING_INT_RESULT("warnoptions", - PyList_GetSlice(warnoptions, - 0, Py_SIZE(warnoptions))); + PyObject *warnoptions = get_warnoptions(); + if (warnoptions == NULL) + return -1; + SET_SYS_FROM_STRING_BORROW_INT_RESULT("warnoptions", warnoptions); - SET_SYS_FROM_STRING_BORROW_INT_RESULT("_xoptions", get_xoptions()); + PyObject *xoptions = get_xoptions(); + if (xoptions == NULL) + return -1; + SET_SYS_FROM_STRING_BORROW_INT_RESULT("_xoptions", xoptions); if (PyErr_Occurred()) return -1; diff --git a/Python/thread.c b/Python/thread.c index 4d2f2c3..6fd594f 100644 --- a/Python/thread.c +++ b/Python/thread.c @@ -76,11 +76,6 @@ PyThread_init_thread(void) PyThread__init_thread(); } -/* Support for runtime thread stack size tuning. - A value of 0 means using the platform's default stack size - or the size specified by the THREAD_STACK_SIZE macro. */ -static size_t _pythread_stacksize = 0; - #if defined(_POSIX_THREADS) # define PYTHREAD_NAME "pthread" # include "thread_pthread.h" @@ -96,7 +91,7 @@ static size_t _pythread_stacksize = 0; size_t PyThread_get_stacksize(void) { - return _pythread_stacksize; + return PyThreadState_GET()->interp->pythread_stacksize; } /* Only platforms defining a THREAD_SET_STACKSIZE() macro diff --git a/Python/thread_nt.h b/Python/thread_nt.h index 47eb4b6..2f3a71b 100644 --- a/Python/thread_nt.h +++ b/Python/thread_nt.h @@ -189,9 +189,10 @@ PyThread_start_new_thread(void (*func)(void *), void *arg) return PYTHREAD_INVALID_THREAD_ID; obj->func = func; obj->arg = arg; + PyThreadState *tstate = PyThreadState_GET(); + size_t stacksize = tstate ? tstate->interp->pythread_stacksize : 0; hThread = (HANDLE)_beginthreadex(0, - Py_SAFE_DOWNCAST(_pythread_stacksize, - Py_ssize_t, unsigned int), + Py_SAFE_DOWNCAST(stacksize, Py_ssize_t, unsigned int), bootstrap, obj, 0, &threadID); if (hThread == 0) { @@ -332,13 +333,13 @@ _pythread_nt_set_stacksize(size_t size) { /* set to default */ if (size == 0) { - _pythread_stacksize = 0; + PyThreadState_GET()->interp->pythread_stacksize = 0; return 0; } /* valid range? */ if (size >= THREAD_MIN_STACKSIZE && size < THREAD_MAX_STACKSIZE) { - _pythread_stacksize = size; + PyThreadState_GET()->interp->pythread_stacksize = size; return 0; } diff --git a/Python/thread_pthread.h b/Python/thread_pthread.h index 268dec4..ea05b6f 100644 --- a/Python/thread_pthread.h +++ b/Python/thread_pthread.h @@ -205,8 +205,9 @@ PyThread_start_new_thread(void (*func)(void *), void *arg) return PYTHREAD_INVALID_THREAD_ID; #endif #if defined(THREAD_STACK_SIZE) - tss = (_pythread_stacksize != 0) ? _pythread_stacksize - : THREAD_STACK_SIZE; + PyThreadState *tstate = PyThreadState_GET(); + size_t stacksize = tstate ? tstate->interp->pythread_stacksize : 0; + tss = (stacksize != 0) ? stacksize : THREAD_STACK_SIZE; if (tss != 0) { if (pthread_attr_setstacksize(&attrs, tss) != 0) { pthread_attr_destroy(&attrs); @@ -578,7 +579,7 @@ _pythread_pthread_set_stacksize(size_t size) /* set to default */ if (size == 0) { - _pythread_stacksize = 0; + PyThreadState_GET()->interp->pythread_stacksize = 0; return 0; } @@ -595,7 +596,7 @@ _pythread_pthread_set_stacksize(size_t size) rc = pthread_attr_setstacksize(&attrs, size); pthread_attr_destroy(&attrs); if (rc == 0) { - _pythread_stacksize = size; + PyThreadState_GET()->interp->pythread_stacksize = size; return 0; } } diff --git a/Tools/c-globals/README b/Tools/c-globals/README new file mode 100644 index 0000000..d0e6e8e --- /dev/null +++ b/Tools/c-globals/README @@ -0,0 +1,41 @@ +####################################### +# C Globals and CPython Runtime State. + +CPython's C code makes extensive use of global variables. Each global +falls into one of several categories: + +* (effectively) constants (incl. static types) +* globals used exclusively in main or in the REPL +* freelists, caches, and counters +* process-global state +* module state +* Python runtime state + +The ignored-globals.txt file is organized similarly. Of the different +categories, the last two are problematic and generally should not exist +in the codebase. + +Globals that hold module state (i.e. in Modules/*.c) cause problems +when multiple interpreters are in use. For more info, see PEP 3121, +which addresses the situation for extension modules in general. + +Globals in the last category should be avoided as well. The problem +isn't with the Python runtime having state. Rather, the problem is with +that state being spread thoughout the codebase in dozens of individual +globals. Unlike the other globals, the runtime state represents a set +of values that are constantly shifting in a complex way. When they are +spread out it's harder to get a clear picture of what the runtime +involves. Furthermore, when they are spread out it complicates efforts +that change the runtime. + +Consequently, the globals for Python's runtime state have been +consolidated under a single top-level _PyRuntime global. No new globals +should be added for runtime state. Instead, they should be added to +_PyRuntimeState or one of its sub-structs. The check-c-globals script +should be run to ensure that no new globals have been added: + + python3 Tools/c-globals/check-c-globals.py + +If it reports any globals then they should be resolved. If the globals +are runtime state then they should be folded into _PyRuntimeState. +Otherwise they should be added to ignored-globals.txt. diff --git a/Tools/c-globals/check-c-globals.py b/Tools/c-globals/check-c-globals.py new file mode 100644 index 0000000..1de69a8 --- /dev/null +++ b/Tools/c-globals/check-c-globals.py @@ -0,0 +1,446 @@ + +from collections import namedtuple +import glob +import os.path +import re +import shutil +import sys +import subprocess + + +VERBOSITY = 2 + +C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__)) +TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR) +ROOT_DIR = os.path.dirname(TOOLS_DIR) +GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt') + +SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python'] + +CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$') + + +IGNORED_VARS = { + '_DYNAMIC', + '_GLOBAL_OFFSET_TABLE_', + '__JCR_LIST__', + '__JCR_END__', + '__TMC_END__', + '__bss_start', + '__data_start', + '__dso_handle', + '_edata', + '_end', + } + + +def find_capi_vars(root): + capi_vars = {} + for dirname in SOURCE_DIRS: + for filename in glob.glob(os.path.join(ROOT_DIR, dirname, '**/*.[hc]'), + recursive=True): + with open(filename) as file: + for name in _find_capi_vars(file): + if name in capi_vars: + assert not filename.endswith('.c') + assert capi_vars[name].endswith('.c') + capi_vars[name] = filename + return capi_vars + + +def _find_capi_vars(lines): + for line in lines: + if not line.startswith('PyAPI_DATA'): + continue + assert '{' not in line + match = CAPI_REGEX.match(line) + assert match + names, = match.groups() + for name in names.split(', '): + yield name + + +def _read_global_names(filename): + # These variables are shared between all interpreters in the process. + with open(filename) as file: + return {line.partition('#')[0].strip() + for line in file + if line.strip() and not line.startswith('#')} + + +def _is_global_var(name, globalnames): + if _is_autogen_var(name): + return True + if _is_type_var(name): + return True + if _is_module(name): + return True + if _is_exception(name): + return True + if _is_compiler(name): + return True + return name in globalnames + + +def _is_autogen_var(name): + return ( + name.startswith('PyId_') or + '.' in name or + # Objects/typeobject.c + name.startswith('op_id.') or + name.startswith('rop_id.') or + # Python/graminit.c + name.startswith('arcs_') or + name.startswith('states_') + ) + + +def _is_type_var(name): + if name.endswith(('Type', '_Type', '_type')): # XXX Always a static type? + return True + if name.endswith('_desc'): # for structseq types + return True + return ( + name.startswith('doc_') or + name.endswith(('_doc', '__doc__', '_docstring')) or + name.endswith('_methods') or + name.endswith('_fields') or + name.endswith(('_memberlist', '_members')) or + name.endswith('_slots') or + name.endswith(('_getset', '_getsets', '_getsetlist')) or + name.endswith('_as_mapping') or + name.endswith('_as_number') or + name.endswith('_as_sequence') or + name.endswith('_as_buffer') or + name.endswith('_as_async') + ) + + +def _is_module(name): + if name.endswith(('_functions', 'Methods', '_Methods')): + return True + if name == 'module_def': + return True + if name == 'initialized': + return True + return name.endswith(('module', '_Module')) + + +def _is_exception(name): + # Other vars are enumerated in globals-core.txt. + if not name.startswith(('PyExc_', '_PyExc_')): + return False + return name.endswith(('Error', 'Warning')) + + +def _is_compiler(name): + return ( + # Python/Pythyon-ast.c + name.endswith('_type') or + name.endswith('_singleton') or + name.endswith('_attributes') + ) + + +class Var(namedtuple('Var', 'name kind scope capi filename')): + + @classmethod + def parse_nm(cls, line, expected, ignored, capi_vars, globalnames): + _, _, line = line.partition(' ') # strip off the address + line = line.strip() + kind, _, line = line.partition(' ') + if kind in ignored or (): + return None + elif kind not in expected or (): + raise RuntimeError('unsupported NM type {!r}'.format(kind)) + + name, _, filename = line.partition('\t') + name = name.strip() + if _is_autogen_var(name): + return None + if _is_global_var(name, globalnames): + scope = 'global' + else: + scope = None + capi = (name in capi_vars or ()) + if filename: + filename = os.path.relpath(filename.partition(':')[0]) + return cls(name, kind, scope, capi, filename or '~???~') + + @property + def external(self): + return self.kind.isupper() + + +def find_vars(root, globals_filename=GLOBALS_FILE): + python = os.path.join(root, 'python') + if not os.path.exists(python): + raise RuntimeError('python binary missing (need to build it first?)') + capi_vars = find_capi_vars(root) + globalnames = _read_global_names(globals_filename) + + nm = shutil.which('nm') + if nm is None: + # XXX Use dumpbin.exe /SYMBOLS on Windows. + raise NotImplementedError + else: + yield from (var + for var in _find_var_symbols(python, nm, capi_vars, + globalnames) + if var.name not in IGNORED_VARS) + + +NM_FUNCS = set('Tt') +NM_PUBLIC_VARS = set('BD') +NM_PRIVATE_VARS = set('bd') +NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS +NM_DATA = set('Rr') +NM_OTHER = set('ACGgiINpSsuUVvWw-?') +NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER + + +def _find_var_symbols(python, nm, capi_vars, globalnames): + args = [nm, + '--line-numbers', + python] + out = subprocess.check_output(args) + for line in out.decode('utf-8').splitlines(): + var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames) + if var is None: + continue + yield var + + +####################################### + +class Filter(namedtuple('Filter', 'name op value action')): + + @classmethod + def parse(cls, raw): + action = '+' + if raw.startswith(('+', '-')): + action = raw[0] + raw = raw[1:] + # XXX Support < and >? + name, op, value = raw.partition('=') + return cls(name, op, value, action) + + def check(self, var): + value = getattr(var, self.name, None) + if not self.op: + matched = bool(value) + elif self.op == '=': + matched = (value == self.value) + else: + raise NotImplementedError + + if self.action == '+': + return matched + elif self.action == '-': + return not matched + else: + raise NotImplementedError + + +def filter_var(var, filters): + for filter in filters: + if not filter.check(var): + return False + return True + + +def make_sort_key(spec): + columns = [(col.strip('_'), '_' if col.startswith('_') else '') + for col in spec] + def sort_key(var): + return tuple(getattr(var, col).lstrip(prefix) + for col, prefix in columns) + return sort_key + + +def make_groups(allvars, spec): + group = spec + groups = {} + for var in allvars: + value = getattr(var, group) + key = '{}: {}'.format(group, value) + try: + groupvars = groups[key] + except KeyError: + groupvars = groups[key] = [] + groupvars.append(var) + return groups + + +def format_groups(groups, columns, fmts, widths): + for group in sorted(groups): + groupvars = groups[group] + yield '', 0 + yield ' # {}'.format(group), 0 + yield from format_vars(groupvars, columns, fmts, widths) + + +def format_vars(allvars, columns, fmts, widths): + fmt = ' '.join(fmts[col] for col in columns) + fmt = ' ' + fmt.replace(' ', ' ') + ' ' # for div margin + header = fmt.replace(':', ':^').format(*(col.upper() for col in columns)) + yield header, 0 + div = ' '.join('-'*(widths[col]+2) for col in columns) + yield div, 0 + for var in allvars: + values = (getattr(var, col) for col in columns) + row = fmt.format(*('X' if val is True else val or '' + for val in values)) + yield row, 1 + yield div, 0 + + +####################################### + +COLUMNS = 'name,external,capi,scope,filename' +COLUMN_NAMES = COLUMNS.split(',') + +COLUMN_WIDTHS = {col: len(col) + for col in COLUMN_NAMES} +COLUMN_WIDTHS.update({ + 'name': 50, + 'scope': 7, + 'filename': 40, + }) +COLUMN_FORMATS = {col: '{:%s}' % width + for col, width in COLUMN_WIDTHS.items()} +for col in COLUMN_FORMATS: + if COLUMN_WIDTHS[col] == len(col): + COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^') + + +def _parse_filters_arg(raw, error): + filters = [] + for value in raw.split(','): + value=value.strip() + if not value: + continue + try: + filter = Filter.parse(value) + if filter.name not in COLUMN_NAMES: + raise Exception('unsupported column {!r}'.format(filter.name)) + except Exception as e: + error('bad filter {!r}: {}'.format(raw, e)) + filters.append(filter) + return filters + + +def _parse_columns_arg(raw, error): + columns = raw.split(',') + for column in columns: + if column not in COLUMN_NAMES: + error('unsupported column {!r}'.format(column)) + return columns + + +def _parse_sort_arg(raw, error): + sort = raw.split(',') + for column in sort: + if column.lstrip('_') not in COLUMN_NAMES: + error('unsupported column {!r}'.format(column)) + return sort + + +def _parse_group_arg(raw, error): + if not raw: + return raw + group = raw + if group not in COLUMN_NAMES: + error('unsupported column {!r}'.format(group)) + if group != 'filename': + error('unsupported group {!r}'.format(group)) + return group + + +def parse_args(argv=None): + if argv is None: + argv = sys.argv[1:] + + import argparse + parser = argparse.ArgumentParser() + + parser.add_argument('-v', '--verbose', action='count', default=0) + parser.add_argument('-q', '--quiet', action='count', default=0) + + parser.add_argument('--filters', default='-scope', + help='[[-][=]] ...') + + parser.add_argument('--columns', default=COLUMNS, + help='a comma-separated list of columns to show') + parser.add_argument('--sort', default='filename,_name', + help='a comma-separated list of columns to sort') + parser.add_argument('--group', + help='group by the given column name (- to not group)') + + parser.add_argument('--rc-on-match', dest='rc', type=int) + + parser.add_argument('filename', nargs='?', default=GLOBALS_FILE) + + args = parser.parse_args(argv) + + verbose = vars(args).pop('verbose', 0) + quiet = vars(args).pop('quiet', 0) + args.verbosity = max(0, VERBOSITY + verbose - quiet) + + if args.sort.startswith('filename') and not args.group: + args.group = 'filename' + + if args.rc is None: + if '-scope=core' in args.filters or 'core' not in args.filters: + args.rc = 0 + else: + args.rc = 1 + + args.filters = _parse_filters_arg(args.filters, parser.error) + args.columns = _parse_columns_arg(args.columns, parser.error) + args.sort = _parse_sort_arg(args.sort, parser.error) + args.group = _parse_group_arg(args.group, parser.error) + + return args + + +def main(root=ROOT_DIR, filename=GLOBALS_FILE, + filters=None, columns=COLUMN_NAMES, sort=None, group=None, + verbosity=VERBOSITY, rc=1): + + log = lambda msg: ... + if verbosity >= 2: + log = lambda msg: print(msg) + + allvars = (var + for var in find_vars(root, filename) + if filter_var(var, filters)) + if sort: + allvars = sorted(allvars, key=make_sort_key(sort)) + + if group: + try: + columns.remove(group) + except ValueError: + pass + grouped = make_groups(allvars, group) + lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS) + else: + lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS) + + total = 0 + for line, count in lines: + total += count + log(line) + log('\ntotal: {}'.format(total)) + + if total and rc: + print('ERROR: found unsafe globals', file=sys.stderr) + return rc + return 0 + + +if __name__ == '__main__': + args = parse_args() + sys.exit( + main(**vars(args))) diff --git a/Tools/c-globals/ignored-globals.txt b/Tools/c-globals/ignored-globals.txt new file mode 100644 index 0000000..4fafba6 --- /dev/null +++ b/Tools/c-globals/ignored-globals.txt @@ -0,0 +1,494 @@ +# All variables declared here are shared between all interpreters +# in a single process. That means that they must not be changed +# unless that change should apply to all interpreters. +# +# See check-c-globals.py. +# +# Many generic names are handled via the script: +# +# * most exceptions and all warnings handled via _is_exception() +# * for builtin modules, generic names are handled via _is_module() +# * generic names for static types handled via _is_type_var() +# * AST vars handled via _is_compiler() + + +####################################### +# main + +# Modules/getpath.c +exec_prefix +module_search_path +prefix +progpath + +# Modules/main.c +orig_argc +orig_argv + +# Python/getopt.c +opt_ptr +_PyOS_optarg +_PyOS_opterr +_PyOS_optind + + +####################################### +# REPL + +# Parser/myreadline.c +PyOS_InputHook +PyOS_ReadlineFunctionPointer +_PyOS_ReadlineLock +_PyOS_ReadlineTState + + +####################################### +# state + +# Python/dtoa.c +p5s +pmem_next # very slight race +private_mem # very slight race + +# Python/import.c +# For the moment the import lock stays global. Ultimately there should +# be a global lock for extension modules and a per-interpreter lock. +import_lock +import_lock_level +import_lock_thread + +# Python/pylifecycle.c +_PyRuntime + + +#--------------------------------- +# module globals (PyObject) + +# Modules/_functoolsmodule.c +kwd_mark + +# Modules/_localemodule.c +Error + +# Modules/_threadmodule.c +ThreadError + +# Modules/_tracemalloc.c +unknown_filename + +# Modules/gcmodule.c +gc_str + +# Modules/posixmodule.c +billion +posix_putenv_garbage + +# Modules/signalmodule.c +DefaultHandler +IgnoreHandler +IntHandler +ItimerError + +# Modules/zipimport.c +ZipImportError +zip_directory_cache + + +#--------------------------------- +# module globals (other) + +# Modules/_tracemalloc.c +allocators +tables_lock +tracemalloc_config +tracemalloc_empty_traceback +tracemalloc_filenames +tracemalloc_peak_traced_memory +tracemalloc_reentrant_key +tracemalloc_traceback +tracemalloc_tracebacks +tracemalloc_traced_memory +tracemalloc_traces + +# Modules/faulthandler.c +fatal_error +faulthandler_handlers +old_stack +stack +thread +user_signals + +# Modules/posixmodule.c +posix_constants_confstr +posix_constants_pathconf +posix_constants_sysconf +_stat_float_times # deprecated, __main__-only +structseq_new +ticks_per_second + +# Modules/signalmodule.c +Handlers # main thread only +is_tripped # main thread only +main_pid +main_thread +old_siginthandler +wakeup_fd # main thread only + +# Modules/zipimport.c +zip_searchorder + +# Python/bltinmodule.c +Py_FileSystemDefaultEncodeErrors +Py_FileSystemDefaultEncoding +Py_HasFileSystemDefaultEncoding + +# Python/sysmodule.c +_PySys_ImplCacheTag +_PySys_ImplName + + +#--------------------------------- +# freelists + +# Modules/_collectionsmodule.c +freeblocks +numfreeblocks + +# Objects/classobject.c +free_list +numfree + +# Objects/dictobject.c +free_list +keys_free_list +numfree +numfreekeys + +# Objects/exceptions.c +memerrors_freelist +memerrors_numfree + +# Objects/floatobject.c +free_list +numfree + +# Objects/frameobject.c +free_list +numfree + +# Objects/genobject.c +ag_asend_freelist +ag_asend_freelist_free +ag_value_freelist +ag_value_freelist_free + +# Objects/listobject.c +free_list +numfree + +# Objects/methodobject.c +free_list +numfree + +# Objects/sliceobject.c +slice_cache # slight race + +# Objects/tupleobject.c +free_list +numfree + +# Python/dtoa.c +freelist # very slight race + + +#--------------------------------- +# caches (PyObject) + +# Objects/typeobject.c +method_cache # only for static types +next_version_tag # only for static types + +# Python/dynload_shlib.c +handles # slight race during import +nhandles # slight race during import + +# Python/import.c +extensions # slight race on init during import + + +#--------------------------------- +# caches (other) + +# Python/bootstrap_hash.c +urandom_cache + +# Python/modsupport.c +_Py_PackageContext # Slight race during import! Move to PyThreadState? + + +#--------------------------------- +# counters + +# Objects/bytesobject.c +null_strings +one_strings + +# Objects/dictobject.c +pydict_global_version + +# Objects/moduleobject.c +max_module_number # slight race during import + + +####################################### +# constants + +#--------------------------------- +# singletons + +# Objects/boolobject.c +_Py_FalseStruct +_Py_TrueStruct + +# Objects/object.c +_Py_NoneStruct +_Py_NotImplementedStruct + +# Objects/sliceobject.c +_Py_EllipsisObject + + +#--------------------------------- +# constants (other) + +# Modules/config.c +_PyImport_Inittab + +# Objects/bytearrayobject.c +_PyByteArray_empty_string + +# Objects/dictobject.c +empty_keys_struct +empty_values + +# Objects/floatobject.c +detected_double_format +detected_float_format +double_format +float_format + +# Objects/longobject.c +_PyLong_DigitValue + +# Objects/object.c +_Py_SwappedOp + +# Objects/obmalloc.c +_PyMem_Debug + +# Objects/setobject.c +_dummy_struct + +# Objects/structseq.c +PyStructSequence_UnnamedField + +# Objects/typeobject.c +name_op +slotdefs # almost +slotdefs_initialized # almost +subtype_getsets_dict_only +subtype_getsets_full +subtype_getsets_weakref_only +tp_new_methoddef + +# Objects/unicodeobject.c +bloom_linebreak +static_strings # slight race + +# Parser/tokenizer.c +_PyParser_TokenNames + +# Python/Python-ast.c +alias_fields + +# Python/codecs.c +Py_hexdigits +ucnhash_CAPI # slight performance-only race + +# Python/dynload_shlib.c +_PyImport_DynLoadFiletab + +# Python/fileutils.c +_Py_open_cloexec_works +force_ascii + +# Python/frozen.c +M___hello__ +PyImport_FrozenModules + +# Python/graminit.c +_PyParser_Grammar +dfas +labels + +# Python/import.c +PyImport_Inittab + +# Python/pylifecycle.c +_TARGET_LOCALES + + +#--------------------------------- +# initialized (PyObject) + +# Objects/bytesobject.c +characters +nullstring + +# Objects/exceptions.c +PyExc_RecursionErrorInst +errnomap + +# Objects/longobject.c +_PyLong_One +_PyLong_Zero +small_ints + +# Objects/setobject.c +emptyfrozenset + +# Objects/unicodeobject.c +interned # slight race on init in PyUnicode_InternInPlace() +unicode_empty +unicode_latin1 + + +#--------------------------------- +# initialized (other) + +# Python/getargs.c +static_arg_parsers + +# Python/pyhash.c +PyHash_Func +_Py_HashSecret +_Py_HashSecret_Initialized + +# Python/pylifecycle.c +_Py_StandardStreamEncoding +_Py_StandardStreamErrors +default_home +env_home +progname +Py_BytesWarningFlag +Py_DebugFlag +Py_DontWriteBytecodeFlag +Py_FrozenFlag +Py_HashRandomizationFlag +Py_IgnoreEnvironmentFlag +Py_InspectFlag +Py_InteractiveFlag +Py_IsolatedFlag +Py_NoSiteFlag +Py_NoUserSiteDirectory +Py_OptimizeFlag +Py_QuietFlag +Py_UnbufferedStdioFlag +Py_UseClassExceptionsFlag +Py_VerboseFlag + + +#--------------------------------- +# types + +# Modules/_threadmodule.c +Locktype +RLocktype +localdummytype +localtype + +# Objects/exceptions.c +PyExc_BaseException +PyExc_Exception +PyExc_GeneratorExit +PyExc_KeyboardInterrupt +PyExc_StopAsyncIteration +PyExc_StopIteration +PyExc_SystemExit +_PyExc_BaseException +_PyExc_Exception +_PyExc_GeneratorExit +_PyExc_KeyboardInterrupt +_PyExc_StopAsyncIteration +_PyExc_StopIteration +_PyExc_SystemExit + +# Objects/structseq.c +_struct_sequence_template + + +#--------------------------------- +# interned strings/bytes + +# Modules/_io/_iomodule.c +_PyIO_empty_bytes +_PyIO_empty_str +_PyIO_str_close +_PyIO_str_closed +_PyIO_str_decode +_PyIO_str_encode +_PyIO_str_fileno +_PyIO_str_flush +_PyIO_str_getstate +_PyIO_str_isatty +_PyIO_str_newlines +_PyIO_str_nl +_PyIO_str_read +_PyIO_str_read1 +_PyIO_str_readable +_PyIO_str_readall +_PyIO_str_readinto +_PyIO_str_readline +_PyIO_str_reset +_PyIO_str_seek +_PyIO_str_seekable +_PyIO_str_setstate +_PyIO_str_tell +_PyIO_str_truncate +_PyIO_str_writable +_PyIO_str_write + +# Modules/_threadmodule.c +str_dict + +# Objects/boolobject.c +false_str +true_str + +# Objects/listobject.c +indexerr + +# Python/symtable.c +__class__ +dictcomp +genexpr +lambda +listcomp +setcomp +top + +# Python/sysmodule.c +whatstrings + + +####################################### +# hacks + +# Objects/object.c +_Py_abstract_hack + +# Objects/setobject.c +_PySet_Dummy + +# Python/pylifecycle.c +_PyOS_mystrnicmp_hack -- cgit v0.12