diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2017-09-06 04:43:08 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-09-06 04:43:08 (GMT) |
commit | 05351c1bd8b70d1878527762174cdaaba3572395 (patch) | |
tree | e97ef4ba0ae7ffe5bd2c8969199616bffbbc4d6f | |
parent | 833860615bedfd2484ac0623d6f01ff0578ba09f (diff) | |
download | cpython-05351c1bd8b70d1878527762174cdaaba3572395.zip cpython-05351c1bd8b70d1878527762174cdaaba3572395.tar.gz cpython-05351c1bd8b70d1878527762174cdaaba3572395.tar.bz2 |
Revert "bpo-30860: Consolidate stateful runtime globals." (#3379)
Windows buildbots started failing due to include-related errors.
40 files changed, 1331 insertions, 2730 deletions
diff --git a/Include/Python.h b/Include/Python.h index 3ab9fe9..061d693 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -133,8 +133,4 @@ #include "fileutils.h" #include "pyfpe.h" -#ifdef Py_BUILD_CORE -#include "internal/_Python.h" -#endif - #endif /* !Py_PYTHON_H */ diff --git a/Include/ceval.h b/Include/ceval.h index 7cbbf7c..b2d57cb 100644 --- a/Include/ceval.h +++ b/Include/ceval.h @@ -93,12 +93,7 @@ PyAPI_FUNC(int) Py_GetRecursionLimit(void); PyThreadState_GET()->overflowed = 0; \ } while(0) PyAPI_FUNC(int) _Py_CheckRecursiveCall(const char *where); -#ifdef Py_BUILD_CORE -#define _Py_CheckRecursionLimit _PyRuntime.ceval.check_recursion_limit -#else -PyAPI_FUNC(int) _PyEval_CheckRecursionLimit(void); -#define _Py_CheckRecursionLimit _PyEval_CheckRecursionLimit() -#endif +PyAPI_DATA(int) _Py_CheckRecursionLimit; #ifdef USE_STACKCHECK /* With USE_STACKCHECK, we artificially decrement the recursion limit in order diff --git a/Include/internal/_Python.h b/Include/internal/_Python.h deleted file mode 100644 index c56e98f..0000000 --- a/Include/internal/_Python.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _Py_PYTHON_H -#define _Py_PYTHON_H -/* Since this is a "meta-include" file, no #ifdef __cplusplus / extern "C" { */ - -/* Include all internal Python header files */ - -#ifndef Py_BUILD_CORE -#error "Internal headers are not available externally." -#endif - -#include "_mem.h" -#include "_ceval.h" -#include "_warnings.h" -#include "_pystate.h" - -#endif /* !_Py_PYTHON_H */ diff --git a/Include/internal/_ceval.h b/Include/internal/_ceval.h deleted file mode 100644 index c2343f1..0000000 --- a/Include/internal/_ceval.h +++ /dev/null @@ -1,71 +0,0 @@ -#ifndef _Py_CEVAL_H -#define _Py_CEVAL_H -#ifdef __cplusplus -extern "C" { -#endif - -#include "ceval.h" -#include "compile.h" -#include "pyatomic.h" - -#ifdef WITH_THREAD -#include "pythread.h" -#endif - -struct _pending_calls { - unsigned long main_thread; -#ifdef WITH_THREAD - PyThread_type_lock lock; - /* Request for running pending calls. */ - _Py_atomic_int calls_to_do; - /* Request for looking at the `async_exc` field of the current - thread state. - Guarded by the GIL. */ - int async_exc; -#define NPENDINGCALLS 32 - struct { - int (*func)(void *); - void *arg; - } calls[NPENDINGCALLS]; - int first; - int last; -#else /* ! WITH_THREAD */ - _Py_atomic_int calls_to_do; -#define NPENDINGCALLS 32 - struct { - int (*func)(void *); - void *arg; - } calls[NPENDINGCALLS]; - volatile int first; - volatile int last; -#endif /* WITH_THREAD */ -}; - -#include "_gil.h" - -struct _ceval_runtime_state { - int recursion_limit; - int check_recursion_limit; - /* Records whether tracing is on for any thread. Counts the number - of threads for which tstate->c_tracefunc is non-NULL, so if the - value is 0, we know we don't have to check this thread's - c_tracefunc. This speeds up the if statement in - PyEval_EvalFrameEx() after fast_next_opcode. */ - int tracing_possible; - /* This single variable consolidates all requests to break out of - the fast path in the eval loop. */ - _Py_atomic_int eval_breaker; -#ifdef WITH_THREAD - /* Request for dropping the GIL */ - _Py_atomic_int gil_drop_request; -#endif - struct _pending_calls pending; - struct _gil_runtime_state gil; -}; - -PyAPI_FUNC(void) _PyEval_Initialize(struct _ceval_runtime_state *); - -#ifdef __cplusplus -} -#endif -#endif /* !_Py_CEVAL_H */ diff --git a/Include/internal/_condvar.h b/Include/internal/_condvar.h deleted file mode 100644 index 6827db7..0000000 --- a/Include/internal/_condvar.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef _CONDVAR_H_ -#define _CONDVAR_H_ - -#ifndef _POSIX_THREADS -/* This means pthreads are not implemented in libc headers, hence the macro - not present in unistd.h. But they still can be implemented as an external - library (e.g. gnu pth in pthread emulation) */ -# ifdef HAVE_PTHREAD_H -# include <pthread.h> /* _POSIX_THREADS */ -# endif -#endif - -#ifdef _POSIX_THREADS -/* - * POSIX support - */ -#define Py_HAVE_CONDVAR - -#include <pthread.h> - -#define PyMUTEX_T pthread_mutex_t -#define PyCOND_T pthread_cond_t - -#elif defined(NT_THREADS) -/* - * Windows (XP, 2003 server and later, as well as (hopefully) CE) support - * - * Emulated condition variables ones that work with XP and later, plus - * example native support on VISTA and onwards. - */ -#define Py_HAVE_CONDVAR - -/* include windows if it hasn't been done before */ -#define WIN32_LEAN_AND_MEAN -#include <windows.h> - -/* options */ -/* non-emulated condition variables are provided for those that want - * to target Windows Vista. Modify this macro to enable them. - */ -#ifndef _PY_EMULATED_WIN_CV -#define _PY_EMULATED_WIN_CV 1 /* use emulated condition variables */ -#endif - -/* fall back to emulation if not targeting Vista */ -#if !defined NTDDI_VISTA || NTDDI_VERSION < NTDDI_VISTA -#undef _PY_EMULATED_WIN_CV -#define _PY_EMULATED_WIN_CV 1 -#endif - -#if _PY_EMULATED_WIN_CV - -typedef CRITICAL_SECTION PyMUTEX_T; - -/* The ConditionVariable object. From XP onwards it is easily emulated - with a Semaphore. - Semaphores are available on Windows XP (2003 server) and later. - We use a Semaphore rather than an auto-reset event, because although - an auto-resent event might appear to solve the lost-wakeup bug (race - condition between releasing the outer lock and waiting) because it - maintains state even though a wait hasn't happened, there is still - a lost wakeup problem if more than one thread are interrupted in the - critical place. A semaphore solves that, because its state is - counted, not Boolean. - Because it is ok to signal a condition variable with no one - waiting, we need to keep track of the number of - waiting threads. Otherwise, the semaphore's state could rise - without bound. This also helps reduce the number of "spurious wakeups" - that would otherwise happen. - */ - -typedef struct _PyCOND_T -{ - HANDLE sem; - int waiting; /* to allow PyCOND_SIGNAL to be a no-op */ -} PyCOND_T; - -#else /* !_PY_EMULATED_WIN_CV */ - -/* Use native Win7 primitives if build target is Win7 or higher */ - -/* SRWLOCK is faster and better than CriticalSection */ -typedef SRWLOCK PyMUTEX_T; - -typedef CONDITION_VARIABLE PyCOND_T; - -#endif /* _PY_EMULATED_WIN_CV */ - -#endif /* _POSIX_THREADS, NT_THREADS */ - -#endif /* _CONDVAR_H_ */ diff --git a/Include/internal/_gil.h b/Include/internal/_gil.h deleted file mode 100644 index 42301bf..0000000 --- a/Include/internal/_gil.h +++ /dev/null @@ -1,48 +0,0 @@ -#ifndef _Py_GIL_H -#define _Py_GIL_H -#ifdef __cplusplus -extern "C" { -#endif - -#include "pyatomic.h" - -#include "internal/_condvar.h" -#ifndef Py_HAVE_CONDVAR -#error You need either a POSIX-compatible or a Windows system! -#endif - -/* Enable if you want to force the switching of threads at least - every `interval`. */ -#undef FORCE_SWITCHING -#define FORCE_SWITCHING - -struct _gil_runtime_state { - /* microseconds (the Python API uses seconds, though) */ - unsigned long interval; - /* Last PyThreadState holding / having held the GIL. This helps us - know whether anyone else was scheduled after we dropped the GIL. */ - _Py_atomic_address last_holder; - /* Whether the GIL is already taken (-1 if uninitialized). This is - atomic because it can be read without any lock taken in ceval.c. */ - _Py_atomic_int locked; - /* Number of GIL switches since the beginning. */ - unsigned long switch_number; -#ifdef WITH_THREAD - /* This condition variable allows one or several threads to wait - until the GIL is released. In addition, the mutex also protects - the above variables. */ - PyCOND_T cond; - PyMUTEX_T mutex; -#ifdef FORCE_SWITCHING - /* This condition variable helps the GIL-releasing thread wait for - a GIL-awaiting thread to be scheduled and take the GIL. */ - PyCOND_T switch_cond; - PyMUTEX_T switch_mutex; -#endif -#endif /* WITH_THREAD */ -}; - -#ifdef __cplusplus -} -#endif -#endif /* !_Py_GIL_H */ diff --git a/Include/internal/_mem.h b/Include/internal/_mem.h deleted file mode 100644 index 2932377..0000000 --- a/Include/internal/_mem.h +++ /dev/null @@ -1,197 +0,0 @@ -#ifndef _Py_MEM_H -#define _Py_MEM_H -#ifdef __cplusplus -extern "C" { -#endif - -#include "objimpl.h" -#include "pymem.h" - -#ifdef WITH_PYMALLOC -#include "_pymalloc.h" -#endif - -/* Low-level memory runtime state */ - -struct _pymem_runtime_state { - struct _allocator_runtime_state { - PyMemAllocatorEx mem; - PyMemAllocatorEx obj; - PyMemAllocatorEx raw; - } allocators; -#ifdef WITH_PYMALLOC - /* Array of objects used to track chunks of memory (arenas). */ - struct arena_object* arenas; - /* The head of the singly-linked, NULL-terminated list of available - arena_objects. */ - struct arena_object* unused_arena_objects; - /* The head of the doubly-linked, NULL-terminated at each end, - list of arena_objects associated with arenas that have pools - available. */ - struct arena_object* usable_arenas; - /* Number of slots currently allocated in the `arenas` vector. */ - unsigned int maxarenas; - /* Number of arenas allocated that haven't been free()'d. */ - size_t narenas_currently_allocated; - /* High water mark (max value ever seen) for - * narenas_currently_allocated. */ - size_t narenas_highwater; - /* Total number of times malloc() called to allocate an arena. */ - size_t ntimes_arena_allocated; - poolp usedpools[MAX_POOLS]; - Py_ssize_t num_allocated_blocks; - size_t serialno; /* incremented on each debug {m,re}alloc */ -#endif /* WITH_PYMALLOC */ -}; - -PyAPI_FUNC(void) _PyMem_Initialize(struct _pymem_runtime_state *); - - -/* High-level memory runtime state */ - -struct _pyobj_runtime_state { - PyObjectArenaAllocator allocator_arenas; -}; - -PyAPI_FUNC(void) _PyObject_Initialize(struct _pyobj_runtime_state *); - - -/* GC runtime state */ - -/* If we change this, we need to change the default value in the - signature of gc.collect. */ -#define NUM_GENERATIONS 3 - -/* - NOTE: about the counting of long-lived objects. - - To limit the cost of garbage collection, there are two strategies; - - make each collection faster, e.g. by scanning fewer objects - - do less collections - This heuristic is about the latter strategy. - - In addition to the various configurable thresholds, we only trigger a - full collection if the ratio - long_lived_pending / long_lived_total - is above a given value (hardwired to 25%). - - The reason is that, while "non-full" collections (i.e., collections of - the young and middle generations) will always examine roughly the same - number of objects -- determined by the aforementioned thresholds --, - the cost of a full collection is proportional to the total number of - long-lived objects, which is virtually unbounded. - - Indeed, it has been remarked that doing a full collection every - <constant number> of object creations entails a dramatic performance - degradation in workloads which consist in creating and storing lots of - long-lived objects (e.g. building a large list of GC-tracked objects would - show quadratic performance, instead of linear as expected: see issue #4074). - - Using the above ratio, instead, yields amortized linear performance in - the total number of objects (the effect of which can be summarized - thusly: "each full garbage collection is more and more costly as the - number of objects grows, but we do fewer and fewer of them"). - - This heuristic was suggested by Martin von Löwis on python-dev in - June 2008. His original analysis and proposal can be found at: - http://mail.python.org/pipermail/python-dev/2008-June/080579.html -*/ - -/* - NOTE: about untracking of mutable objects. - - Certain types of container cannot participate in a reference cycle, and - so do not need to be tracked by the garbage collector. Untracking these - objects reduces the cost of garbage collections. However, determining - which objects may be untracked is not free, and the costs must be - weighed against the benefits for garbage collection. - - There are two possible strategies for when to untrack a container: - - i) When the container is created. - ii) When the container is examined by the garbage collector. - - Tuples containing only immutable objects (integers, strings etc, and - recursively, tuples of immutable objects) do not need to be tracked. - The interpreter creates a large number of tuples, many of which will - not survive until garbage collection. It is therefore not worthwhile - to untrack eligible tuples at creation time. - - Instead, all tuples except the empty tuple are tracked when created. - During garbage collection it is determined whether any surviving tuples - can be untracked. A tuple can be untracked if all of its contents are - already not tracked. Tuples are examined for untracking in all garbage - collection cycles. It may take more than one cycle to untrack a tuple. - - Dictionaries containing only immutable objects also do not need to be - tracked. Dictionaries are untracked when created. If a tracked item is - inserted into a dictionary (either as a key or value), the dictionary - becomes tracked. During a full garbage collection (all generations), - the collector will untrack any dictionaries whose contents are not - tracked. - - The module provides the python function is_tracked(obj), which returns - the CURRENT tracking status of the object. Subsequent garbage - collections may change the tracking status of the object. - - Untracking of certain containers was introduced in issue #4688, and - the algorithm was refined in response to issue #14775. -*/ - -struct gc_generation { - PyGC_Head head; - int threshold; /* collection threshold */ - int count; /* count of allocations or collections of younger - generations */ -}; - -/* Running stats per generation */ -struct gc_generation_stats { - /* total number of collections */ - Py_ssize_t collections; - /* total number of collected objects */ - Py_ssize_t collected; - /* total number of uncollectable objects (put into gc.garbage) */ - Py_ssize_t uncollectable; -}; - -struct _gc_runtime_state { - /* List of objects that still need to be cleaned up, singly linked - * via their gc headers' gc_prev pointers. */ - PyObject *trash_delete_later; - /* Current call-stack depth of tp_dealloc calls. */ - int trash_delete_nesting; - - int enabled; - int debug; - /* linked lists of container objects */ - struct gc_generation generations[NUM_GENERATIONS]; - PyGC_Head *generation0; - struct gc_generation_stats generation_stats[NUM_GENERATIONS]; - /* true if we are currently running the collector */ - int collecting; - /* list of uncollectable objects */ - PyObject *garbage; - /* a list of callbacks to be invoked when collection is performed */ - PyObject *callbacks; - /* This is the number of objects that survived the last full - collection. It approximates the number of long lived objects - tracked by the GC. - - (by "full collection", we mean a collection of the oldest - generation). */ - Py_ssize_t long_lived_total; - /* This is the number of objects that survived all "non-full" - collections, and are awaiting to undergo a full collection for - the first time. */ - Py_ssize_t long_lived_pending; -}; - -PyAPI_FUNC(void) _PyGC_Initialize(struct _gc_runtime_state *); - -#define _PyGC_generation0 _PyRuntime.gc.generation0 - -#ifdef __cplusplus -} -#endif -#endif /* !_Py_MEM_H */ diff --git a/Include/internal/_pymalloc.h b/Include/internal/_pymalloc.h deleted file mode 100644 index 764edf9..0000000 --- a/Include/internal/_pymalloc.h +++ /dev/null @@ -1,443 +0,0 @@ - -/* An object allocator for Python. - - Here is an introduction to the layers of the Python memory architecture, - showing where the object allocator is actually used (layer +2), It is - called for every object allocation and deallocation (PyObject_New/Del), - unless the object-specific allocators implement a proprietary allocation - scheme (ex.: ints use a simple free list). This is also the place where - the cyclic garbage collector operates selectively on container objects. - - - Object-specific allocators - _____ ______ ______ ________ - [ int ] [ dict ] [ list ] ... [ string ] Python core | -+3 | <----- Object-specific memory -----> | <-- Non-object memory --> | - _______________________________ | | - [ Python's object allocator ] | | -+2 | ####### Object memory ####### | <------ Internal buffers ------> | - ______________________________________________________________ | - [ Python's raw memory allocator (PyMem_ API) ] | -+1 | <----- Python memory (under PyMem manager's control) ------> | | - __________________________________________________________________ - [ Underlying general-purpose allocator (ex: C library malloc) ] - 0 | <------ Virtual memory allocated for the python process -------> | - - ========================================================================= - _______________________________________________________________________ - [ OS-specific Virtual Memory Manager (VMM) ] --1 | <--- Kernel dynamic storage allocation & management (page-based) ---> | - __________________________________ __________________________________ - [ ] [ ] --2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> | - -*/ -/*==========================================================================*/ - -/* A fast, special-purpose memory allocator for small blocks, to be used - on top of a general-purpose malloc -- heavily based on previous art. */ - -/* Vladimir Marangozov -- August 2000 */ - -/* - * "Memory management is where the rubber meets the road -- if we do the wrong - * thing at any level, the results will not be good. And if we don't make the - * levels work well together, we are in serious trouble." (1) - * - * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles, - * "Dynamic Storage Allocation: A Survey and Critical Review", - * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995. - */ - -#ifndef _Py_PYMALLOC_H -#define _Py_PYMALLOC_H - -/* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */ - -/*==========================================================================*/ - -/* - * Allocation strategy abstract: - * - * For small requests, the allocator sub-allocates <Big> blocks of memory. - * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the - * system's allocator. - * - * Small requests are grouped in size classes spaced 8 bytes apart, due - * to the required valid alignment of the returned address. Requests of - * a particular size are serviced from memory pools of 4K (one VMM page). - * Pools are fragmented on demand and contain free lists of blocks of one - * particular size class. In other words, there is a fixed-size allocator - * for each size class. Free pools are shared by the different allocators - * thus minimizing the space reserved for a particular size class. - * - * This allocation strategy is a variant of what is known as "simple - * segregated storage based on array of free lists". The main drawback of - * simple segregated storage is that we might end up with lot of reserved - * memory for the different free lists, which degenerate in time. To avoid - * this, we partition each free list in pools and we share dynamically the - * reserved space between all free lists. This technique is quite efficient - * for memory intensive programs which allocate mainly small-sized blocks. - * - * For small requests we have the following table: - * - * Request in bytes Size of allocated block Size class idx - * ---------------------------------------------------------------- - * 1-8 8 0 - * 9-16 16 1 - * 17-24 24 2 - * 25-32 32 3 - * 33-40 40 4 - * 41-48 48 5 - * 49-56 56 6 - * 57-64 64 7 - * 65-72 72 8 - * ... ... ... - * 497-504 504 62 - * 505-512 512 63 - * - * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying - * allocator. - */ - -/*==========================================================================*/ - -/* - * -- Main tunable settings section -- - */ - -/* - * Alignment of addresses returned to the user. 8-bytes alignment works - * on most current architectures (with 32-bit or 64-bit address busses). - * The alignment value is also used for grouping small requests in size - * classes spaced ALIGNMENT bytes apart. - * - * You shouldn't change this unless you know what you are doing. - */ -#define ALIGNMENT 8 /* must be 2^N */ -#define ALIGNMENT_SHIFT 3 - -/* Return the number of bytes in size class I, as a uint. */ -#define INDEX2SIZE(I) (((unsigned int)(I) + 1) << ALIGNMENT_SHIFT) - -/* - * Max size threshold below which malloc requests are considered to be - * small enough in order to use preallocated memory pools. You can tune - * this value according to your application behaviour and memory needs. - * - * Note: a size threshold of 512 guarantees that newly created dictionaries - * will be allocated from preallocated memory pools on 64-bit. - * - * The following invariants must hold: - * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512 - * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT - * - * Although not required, for better performance and space efficiency, - * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2. - */ -#define SMALL_REQUEST_THRESHOLD 512 -#define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT) - -#if NB_SMALL_SIZE_CLASSES > 64 -#error "NB_SMALL_SIZE_CLASSES should be less than 64" -#endif /* NB_SMALL_SIZE_CLASSES > 64 */ - -/* - * The system's VMM page size can be obtained on most unices with a - * getpagesize() call or deduced from various header files. To make - * things simpler, we assume that it is 4K, which is OK for most systems. - * It is probably better if this is the native page size, but it doesn't - * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page - * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation - * violation fault. 4K is apparently OK for all the platforms that python - * currently targets. - */ -#define SYSTEM_PAGE_SIZE (4 * 1024) -#define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1) - -/* - * Maximum amount of memory managed by the allocator for small requests. - */ -#ifdef WITH_MEMORY_LIMITS -#ifndef SMALL_MEMORY_LIMIT -#define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */ -#endif -#endif - -/* - * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned - * on a page boundary. This is a reserved virtual address space for the - * current process (obtained through a malloc()/mmap() call). In no way this - * means that the memory arenas will be used entirely. A malloc(<Big>) is - * usually an address range reservation for <Big> bytes, unless all pages within - * this space are referenced subsequently. So malloc'ing big blocks and not - * using them does not mean "wasting memory". It's an addressable range - * wastage... - * - * Arenas are allocated with mmap() on systems supporting anonymous memory - * mappings to reduce heap fragmentation. - */ -#define ARENA_SIZE (256 << 10) /* 256KB */ - -#ifdef WITH_MEMORY_LIMITS -#define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE) -#endif - -/* - * Size of the pools used for small blocks. Should be a power of 2, - * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k. - */ -#define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */ -#define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK - -/* - * -- End of tunable settings section -- - */ - -/*==========================================================================*/ - -/* - * Locking - * - * To reduce lock contention, it would probably be better to refine the - * crude function locking with per size class locking. I'm not positive - * however, whether it's worth switching to such locking policy because - * of the performance penalty it might introduce. - * - * The following macros describe the simplest (should also be the fastest) - * lock object on a particular platform and the init/fini/lock/unlock - * operations on it. The locks defined here are not expected to be recursive - * because it is assumed that they will always be called in the order: - * INIT, [LOCK, UNLOCK]*, FINI. - */ - -/* - * Python's threads are serialized, so object malloc locking is disabled. - */ -#define SIMPLELOCK_DECL(lock) /* simple lock declaration */ -#define SIMPLELOCK_INIT(lock) /* allocate (if needed) and initialize */ -#define SIMPLELOCK_FINI(lock) /* free/destroy an existing lock */ -#define SIMPLELOCK_LOCK(lock) /* acquire released lock */ -#define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */ - -/* When you say memory, my mind reasons in terms of (pointers to) blocks */ -typedef uint8_t pyblock; - -/* Pool for small blocks. */ -struct pool_header { - union { pyblock *_padding; - unsigned int count; } ref; /* number of allocated blocks */ - pyblock *freeblock; /* pool's free list head */ - struct pool_header *nextpool; /* next pool of this size class */ - struct pool_header *prevpool; /* previous pool "" */ - unsigned int arenaindex; /* index into arenas of base adr */ - unsigned int szidx; /* block size class index */ - unsigned int nextoffset; /* bytes to virgin block */ - unsigned int maxnextoffset; /* largest valid nextoffset */ -}; - -typedef struct pool_header *poolp; - -/* Record keeping for arenas. */ -struct arena_object { - /* The address of the arena, as returned by malloc. Note that 0 - * will never be returned by a successful malloc, and is used - * here to mark an arena_object that doesn't correspond to an - * allocated arena. - */ - uintptr_t address; - - /* Pool-aligned pointer to the next pool to be carved off. */ - pyblock* pool_address; - - /* The number of available pools in the arena: free pools + never- - * allocated pools. - */ - unsigned int nfreepools; - - /* The total number of pools in the arena, whether or not available. */ - unsigned int ntotalpools; - - /* Singly-linked list of available pools. */ - struct pool_header* freepools; - - /* Whenever this arena_object is not associated with an allocated - * arena, the nextarena member is used to link all unassociated - * arena_objects in the singly-linked `unused_arena_objects` list. - * The prevarena member is unused in this case. - * - * When this arena_object is associated with an allocated arena - * with at least one available pool, both members are used in the - * doubly-linked `usable_arenas` list, which is maintained in - * increasing order of `nfreepools` values. - * - * Else this arena_object is associated with an allocated arena - * all of whose pools are in use. `nextarena` and `prevarena` - * are both meaningless in this case. - */ - struct arena_object* nextarena; - struct arena_object* prevarena; -}; - -#define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT) - -#define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */ - -/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */ -#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE)) - -/* Return total number of blocks in pool of size index I, as a uint. */ -#define NUMBLOCKS(I) \ - ((unsigned int)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I)) - -/*==========================================================================*/ - -/* - * This malloc lock - */ -SIMPLELOCK_DECL(_malloc_lock) -#define LOCK() SIMPLELOCK_LOCK(_malloc_lock) -#define UNLOCK() SIMPLELOCK_UNLOCK(_malloc_lock) -#define LOCK_INIT() SIMPLELOCK_INIT(_malloc_lock) -#define LOCK_FINI() SIMPLELOCK_FINI(_malloc_lock) - -/* - * Pool table -- headed, circular, doubly-linked lists of partially used pools. - -This is involved. For an index i, usedpools[i+i] is the header for a list of -all partially used pools holding small blocks with "size class idx" i. So -usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size -16, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT. - -Pools are carved off an arena's highwater mark (an arena_object's pool_address -member) as needed. Once carved off, a pool is in one of three states forever -after: - -used == partially used, neither empty nor full - At least one block in the pool is currently allocated, and at least one - block in the pool is not currently allocated (note this implies a pool - has room for at least two blocks). - This is a pool's initial state, as a pool is created only when malloc - needs space. - The pool holds blocks of a fixed size, and is in the circular list headed - at usedpools[i] (see above). It's linked to the other used pools of the - same size class via the pool_header's nextpool and prevpool members. - If all but one block is currently allocated, a malloc can cause a - transition to the full state. If all but one block is not currently - allocated, a free can cause a transition to the empty state. - -full == all the pool's blocks are currently allocated - On transition to full, a pool is unlinked from its usedpools[] list. - It's not linked to from anything then anymore, and its nextpool and - prevpool members are meaningless until it transitions back to used. - A free of a block in a full pool puts the pool back in the used state. - Then it's linked in at the front of the appropriate usedpools[] list, so - that the next allocation for its size class will reuse the freed block. - -empty == all the pool's blocks are currently available for allocation - On transition to empty, a pool is unlinked from its usedpools[] list, - and linked to the front of its arena_object's singly-linked freepools list, - via its nextpool member. The prevpool member has no meaning in this case. - Empty pools have no inherent size class: the next time a malloc finds - an empty list in usedpools[], it takes the first pool off of freepools. - If the size class needed happens to be the same as the size class the pool - last had, some pool initialization can be skipped. - - -Block Management - -Blocks within pools are again carved out as needed. pool->freeblock points to -the start of a singly-linked list of free blocks within the pool. When a -block is freed, it's inserted at the front of its pool's freeblock list. Note -that the available blocks in a pool are *not* linked all together when a pool -is initialized. Instead only "the first two" (lowest addresses) blocks are -set up, returning the first such block, and setting pool->freeblock to a -one-block list holding the second such block. This is consistent with that -pymalloc strives at all levels (arena, pool, and block) never to touch a piece -of memory until it's actually needed. - -So long as a pool is in the used state, we're certain there *is* a block -available for allocating, and pool->freeblock is not NULL. If pool->freeblock -points to the end of the free list before we've carved the entire pool into -blocks, that means we simply haven't yet gotten to one of the higher-address -blocks. The offset from the pool_header to the start of "the next" virgin -block is stored in the pool_header nextoffset member, and the largest value -of nextoffset that makes sense is stored in the maxnextoffset member when a -pool is initialized. All the blocks in a pool have been passed out at least -once when and only when nextoffset > maxnextoffset. - - -Major obscurity: While the usedpools vector is declared to have poolp -entries, it doesn't really. It really contains two pointers per (conceptual) -poolp entry, the nextpool and prevpool members of a pool_header. The -excruciating initialization code below fools C so that - - usedpool[i+i] - -"acts like" a genuine poolp, but only so long as you only reference its -nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is -compensating for that a pool_header's nextpool and prevpool members -immediately follow a pool_header's first two members: - - union { block *_padding; - uint count; } ref; - block *freeblock; - -each of which consume sizeof(block *) bytes. So what usedpools[i+i] really -contains is a fudged-up pointer p such that *if* C believes it's a poolp -pointer, then p->nextpool and p->prevpool are both p (meaning that the headed -circular list is empty). - -It's unclear why the usedpools setup is so convoluted. It could be to -minimize the amount of cache required to hold this heavily-referenced table -(which only *needs* the two interpool pointer members of a pool_header). OTOH, -referencing code has to remember to "double the index" and doing so isn't -free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying -on that C doesn't insert any padding anywhere in a pool_header at or before -the prevpool member. -**************************************************************************** */ - -#define MAX_POOLS (2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8) - -/*========================================================================== -Arena management. - -`arenas` is a vector of arena_objects. It contains maxarenas entries, some of -which may not be currently used (== they're arena_objects that aren't -currently associated with an allocated arena). Note that arenas proper are -separately malloc'ed. - -Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5, -we do try to free() arenas, and use some mild heuristic strategies to increase -the likelihood that arenas eventually can be freed. - -unused_arena_objects - - This is a singly-linked list of the arena_objects that are currently not - being used (no arena is associated with them). Objects are taken off the - head of the list in new_arena(), and are pushed on the head of the list in - PyObject_Free() when the arena is empty. Key invariant: an arena_object - is on this list if and only if its .address member is 0. - -usable_arenas - - This is a doubly-linked list of the arena_objects associated with arenas - that have pools available. These pools are either waiting to be reused, - or have not been used before. The list is sorted to have the most- - allocated arenas first (ascending order based on the nfreepools member). - This means that the next allocation will come from a heavily used arena, - which gives the nearly empty arenas a chance to be returned to the system. - In my unscientific tests this dramatically improved the number of arenas - that could be freed. - -Note that an arena_object associated with an arena all of whose pools are -currently in use isn't on either list. -*/ - -/* How many arena_objects do we initially allocate? - * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the - * `arenas` vector. - */ -#define INITIAL_ARENA_OBJECTS 16 - -#endif /* _Py_PYMALLOC_H */ diff --git a/Include/internal/_pystate.h b/Include/internal/_pystate.h deleted file mode 100644 index 9f2dea1..0000000 --- a/Include/internal/_pystate.h +++ /dev/null @@ -1,93 +0,0 @@ -#ifndef _Py_PYSTATE_H -#define _Py_PYSTATE_H -#ifdef __cplusplus -extern "C" { -#endif - -#include "pystate.h" -#include "pyatomic.h" - -#ifdef WITH_THREAD -#include "pythread.h" -#endif - -#include "_mem.h" -#include "_ceval.h" -#include "_warnings.h" - - -/* GIL state */ - -struct _gilstate_runtime_state { - int check_enabled; - /* Assuming the current thread holds the GIL, this is the - PyThreadState for the current thread. */ - _Py_atomic_address tstate_current; - PyThreadFrameGetter getframe; -#ifdef WITH_THREAD - /* The single PyInterpreterState used by this process' - GILState implementation - */ - /* TODO: Given interp_main, it may be possible to kill this ref */ - PyInterpreterState *autoInterpreterState; - int autoTLSkey; -#endif /* WITH_THREAD */ -}; - -/* hook for PyEval_GetFrame(), requested for Psyco */ -#define _PyThreadState_GetFrame _PyRuntime.gilstate.getframe - -/* Issue #26558: Flag to disable PyGILState_Check(). - If set to non-zero, PyGILState_Check() always return 1. */ -#define _PyGILState_check_enabled _PyRuntime.gilstate.check_enabled - - -/* Full Python runtime state */ - -typedef struct pyruntimestate { - int initialized; - int core_initialized; - PyThreadState *finalizing; - - struct pyinterpreters { -#ifdef WITH_THREAD - PyThread_type_lock mutex; -#endif - PyInterpreterState *head; - PyInterpreterState *main; - /* _next_interp_id is an auto-numbered sequence of small - integers. It gets initialized in _PyInterpreterState_Init(), - which is called in Py_Initialize(), and used in - PyInterpreterState_New(). A negative interpreter ID - indicates an error occurred. The main interpreter will - always have an ID of 0. Overflow results in a RuntimeError. - If that becomes a problem later then we can adjust, e.g. by - using a Python int. */ - int64_t next_id; - } interpreters; - -#define NEXITFUNCS 32 - void (*exitfuncs[NEXITFUNCS])(void); - int nexitfuncs; - void (*pyexitfunc)(void); - - struct _pyobj_runtime_state obj; - struct _gc_runtime_state gc; - struct _pymem_runtime_state mem; - struct _warnings_runtime_state warnings; - struct _ceval_runtime_state ceval; - struct _gilstate_runtime_state gilstate; - - // XXX Consolidate globals found via the check-c-globals script. -} _PyRuntimeState; - -PyAPI_DATA(_PyRuntimeState) _PyRuntime; -PyAPI_FUNC(void) _PyRuntimeState_Init(_PyRuntimeState *); -PyAPI_FUNC(void) _PyRuntimeState_Fini(_PyRuntimeState *); - -PyAPI_FUNC(void) _PyInterpreterState_Enable(_PyRuntimeState *); - -#ifdef __cplusplus -} -#endif -#endif /* !_Py_PYSTATE_H */ diff --git a/Include/internal/_warnings.h b/Include/internal/_warnings.h deleted file mode 100644 index 2a1abb2..0000000 --- a/Include/internal/_warnings.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef _Py_WARNINGS_H -#define _Py_WARNINGS_H -#ifdef __cplusplus -extern "C" { -#endif - -#include "object.h" - -struct _warnings_runtime_state { - /* Both 'filters' and 'onceregistry' can be set in warnings.py; - get_warnings_attr() will reset these variables accordingly. */ - PyObject *filters; /* List */ - PyObject *once_registry; /* Dict */ - PyObject *default_action; /* String */ - long filters_version; -}; - -#ifdef __cplusplus -} -#endif -#endif /* !_Py_WARNINGS_H */ diff --git a/Include/object.h b/Include/object.h index b46d4c3..f5ed70b 100644 --- a/Include/object.h +++ b/Include/object.h @@ -1038,6 +1038,8 @@ with the call stack never exceeding a depth of PyTrash_UNWIND_LEVEL. Kept for binary compatibility of extensions using the stable ABI. */ PyAPI_FUNC(void) _PyTrash_deposit_object(PyObject*); PyAPI_FUNC(void) _PyTrash_destroy_chain(void); +PyAPI_DATA(int) _PyTrash_delete_nesting; +PyAPI_DATA(PyObject *) _PyTrash_delete_later; #endif /* !Py_LIMITED_API */ /* The new thread-safe private API, invoked by the macros below. */ diff --git a/Include/pylifecycle.h b/Include/pylifecycle.h index b02cd4c..0d609ec 100644 --- a/Include/pylifecycle.h +++ b/Include/pylifecycle.h @@ -119,10 +119,7 @@ PyAPI_FUNC(void) _PyType_Fini(void); PyAPI_FUNC(void) _Py_HashRandomization_Fini(void); PyAPI_FUNC(void) PyAsyncGen_Fini(void); -#define _Py_IS_FINALIZING() \ - (_PyRuntime.finalizing != NULL) -#define _Py_CURRENTLY_FINALIZING(tstate) \ - (_PyRuntime.finalizing == tstate) +PyAPI_DATA(PyThreadState *) _Py_Finalizing; #endif /* Signals */ diff --git a/Include/pystate.h b/Include/pystate.h index 90081c5..8a92f3e 100644 --- a/Include/pystate.h +++ b/Include/pystate.h @@ -29,10 +29,9 @@ typedef struct { int use_hash_seed; unsigned long hash_seed; int _disable_importlib; /* Needed by freeze_importlib */ - char *allocator; } _PyCoreConfig; -#define _PyCoreConfig_INIT {0, -1, 0, 0, NULL} +#define _PyCoreConfig_INIT {0, -1, 0, 0} /* Placeholders while working on the new configuration API * @@ -58,19 +57,6 @@ typedef struct _is { PyObject *builtins; PyObject *importlib; - /* Used in Python/sysmodule.c. */ - int check_interval; - PyObject *warnoptions; - PyObject *xoptions; - - /* Used in Modules/_threadmodule.c. */ - long num_threads; - /* Support for runtime thread stack size tuning. - A value of 0 means using the platform's default stack size - or the size specified by the THREAD_STACK_SIZE macro. */ - /* Used in Python/thread.c. */ - size_t pythread_stacksize; - PyObject *codec_search_path; PyObject *codec_search_cache; PyObject *codec_error_registry; @@ -199,6 +185,9 @@ typedef struct _ts { #endif +#ifndef Py_LIMITED_API +PyAPI_FUNC(void) _PyInterpreterState_Init(void); +#endif /* !Py_LIMITED_API */ PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_New(void); PyAPI_FUNC(void) PyInterpreterState_Clear(PyInterpreterState *); PyAPI_FUNC(void) PyInterpreterState_Delete(PyInterpreterState *); @@ -257,7 +246,7 @@ PyAPI_FUNC(int) PyThreadState_SetAsyncExc(unsigned long, PyObject *); /* Assuming the current thread holds the GIL, this is the PyThreadState for the current thread. */ #ifdef Py_BUILD_CORE -# define _PyThreadState_Current _PyRuntime.gilstate.tstate_current +PyAPI_DATA(_Py_atomic_address) _PyThreadState_Current; # define PyThreadState_GET() \ ((PyThreadState*)_Py_atomic_load_relaxed(&_PyThreadState_Current)) #else @@ -312,6 +301,10 @@ PyAPI_FUNC(void) PyGILState_Release(PyGILState_STATE); PyAPI_FUNC(PyThreadState *) PyGILState_GetThisThreadState(void); #ifndef Py_LIMITED_API +/* Issue #26558: Flag to disable PyGILState_Check(). + If set to non-zero, PyGILState_Check() always return 1. */ +PyAPI_DATA(int) _PyGILState_check_enabled; + /* Helper/diagnostic function - return 1 if the current thread currently holds the GIL, 0 otherwise. @@ -347,6 +340,11 @@ PyAPI_FUNC(PyThreadState *) PyThreadState_Next(PyThreadState *); typedef struct _frame *(*PyThreadFrameGetter)(PyThreadState *self_); #endif +/* hook for PyEval_GetFrame(), requested for Psyco */ +#ifndef Py_LIMITED_API +PyAPI_DATA(PyThreadFrameGetter) _PyThreadState_GetFrame; +#endif + #ifdef __cplusplus } #endif diff --git a/Makefile.pre.in b/Makefile.pre.in index d6ebf85..57d2ab7 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -987,13 +987,6 @@ PYTHON_HEADERS= \ pyconfig.h \ $(PARSER_HEADERS) \ $(srcdir)/Include/Python-ast.h \ - $(srcdir)/Include/internal/_Python.h \ - $(srcdir)/Include/internal/_ceval.h \ - $(srcdir)/Include/internal/_gil.h \ - $(srcdir)/Include/internal/_mem.h \ - $(srcdir)/Include/internal/_pymalloc.h \ - $(srcdir)/Include/internal/_pystate.h \ - $(srcdir)/Include/internal/_warnings.h \ $(DTRACE_HEADERS) $(LIBRARY_OBJS) $(MODOBJS) Programs/python.o: $(PYTHON_HEADERS) diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-09-05-13-47-49.bpo-30860.MROpZw.rst b/Misc/NEWS.d/next/Core and Builtins/2017-09-05-13-47-49.bpo-30860.MROpZw.rst deleted file mode 100644 index d8e9d5e..0000000 --- a/Misc/NEWS.d/next/Core and Builtins/2017-09-05-13-47-49.bpo-30860.MROpZw.rst +++ /dev/null @@ -1,2 +0,0 @@ -Consolidate CPython's global runtime state under a single struct. This -improves discoverability of the runtime state. diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 3f57041..189b1cd 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -279,7 +279,7 @@ _enter_buffered_busy(buffered *self) "reentrant call inside %R", self); return 0; } - relax_locking = _Py_IS_FINALIZING(); + relax_locking = (_Py_Finalizing != NULL); Py_BEGIN_ALLOW_THREADS if (!relax_locking) st = PyThread_acquire_lock(self->lock, 1); diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 89be96c..da750c0 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -14,6 +14,7 @@ #include "pythread.h" static PyObject *ThreadError; +static long nb_threads = 0; static PyObject *str_dict; _Py_IDENTIFIER(stderr); @@ -992,7 +993,7 @@ t_bootstrap(void *boot_raw) tstate->thread_id = PyThread_get_thread_ident(); _PyThreadState_Init(tstate); PyEval_AcquireThread(tstate); - tstate->interp->num_threads++; + nb_threads++; res = PyObject_Call(boot->func, boot->args, boot->keyw); if (res == NULL) { if (PyErr_ExceptionMatches(PyExc_SystemExit)) @@ -1019,7 +1020,7 @@ t_bootstrap(void *boot_raw) Py_DECREF(boot->args); Py_XDECREF(boot->keyw); PyMem_DEL(boot_raw); - tstate->interp->num_threads--; + nb_threads--; PyThreadState_Clear(tstate); PyThreadState_DeleteCurrent(); PyThread_exit_thread(); @@ -1158,8 +1159,7 @@ A thread's identity may be reused for another thread after it exits."); static PyObject * thread__count(PyObject *self) { - PyThreadState *tstate = PyThreadState_Get(); - return PyLong_FromLong(tstate->interp->num_threads); + return PyLong_FromLong(nb_threads); } PyDoc_STRVAR(_count_doc, @@ -1352,7 +1352,6 @@ PyInit__thread(void) PyObject *m, *d, *v; double time_max; double timeout_max; - PyThreadState *tstate = PyThreadState_Get(); /* Initialize types: */ if (PyType_Ready(&localdummytype) < 0) @@ -1397,7 +1396,7 @@ PyInit__thread(void) if (PyModule_AddObject(m, "_local", (PyObject *)&localtype) < 0) return NULL; - tstate->interp->num_threads = 0; + nb_threads = 0; str_dict = PyUnicode_InternFromString("__dict__"); if (str_dict == NULL) diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 6556d99..682d0a3 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -114,7 +114,7 @@ overlapped_dealloc(OverlappedObject *self) { /* The operation is no longer pending -- nothing to do. */ } - else if _Py_IS_FINALIZING() + else if (_Py_Finalizing == NULL) { /* The operation is still pending -- give a warning. This will probably only happen on Windows XP. */ diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index fa67f7f..4e5acf3 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -39,9 +39,133 @@ module gc /* Get the object given the GC head */ #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1)) +/*** Global GC state ***/ + +struct gc_generation { + PyGC_Head head; + int threshold; /* collection threshold */ + int count; /* count of allocations or collections of younger + generations */ +}; + +/* If we change this, we need to change the default value in the signature of + gc.collect. */ +#define NUM_GENERATIONS 3 +#define GEN_HEAD(n) (&generations[n].head) + +/* linked lists of container objects */ +static struct gc_generation generations[NUM_GENERATIONS] = { + /* PyGC_Head, threshold, count */ + {{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0}, + {{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0}, + {{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0}, +}; + +PyGC_Head *_PyGC_generation0 = GEN_HEAD(0); + +static int enabled = 1; /* automatic collection enabled? */ + +/* true if we are currently running the collector */ +static int collecting = 0; + +/* list of uncollectable objects */ +static PyObject *garbage = NULL; + /* Python string to use if unhandled exception occurs */ static PyObject *gc_str = NULL; +/* a list of callbacks to be invoked when collection is performed */ +static PyObject *callbacks = NULL; + +/* This is the number of objects that survived the last full collection. It + approximates the number of long lived objects tracked by the GC. + + (by "full collection", we mean a collection of the oldest generation). +*/ +static Py_ssize_t long_lived_total = 0; + +/* This is the number of objects that survived all "non-full" collections, + and are awaiting to undergo a full collection for the first time. + +*/ +static Py_ssize_t long_lived_pending = 0; + +/* + NOTE: about the counting of long-lived objects. + + To limit the cost of garbage collection, there are two strategies; + - make each collection faster, e.g. by scanning fewer objects + - do less collections + This heuristic is about the latter strategy. + + In addition to the various configurable thresholds, we only trigger a + full collection if the ratio + long_lived_pending / long_lived_total + is above a given value (hardwired to 25%). + + The reason is that, while "non-full" collections (i.e., collections of + the young and middle generations) will always examine roughly the same + number of objects -- determined by the aforementioned thresholds --, + the cost of a full collection is proportional to the total number of + long-lived objects, which is virtually unbounded. + + Indeed, it has been remarked that doing a full collection every + <constant number> of object creations entails a dramatic performance + degradation in workloads which consist in creating and storing lots of + long-lived objects (e.g. building a large list of GC-tracked objects would + show quadratic performance, instead of linear as expected: see issue #4074). + + Using the above ratio, instead, yields amortized linear performance in + the total number of objects (the effect of which can be summarized + thusly: "each full garbage collection is more and more costly as the + number of objects grows, but we do fewer and fewer of them"). + + This heuristic was suggested by Martin von Löwis on python-dev in + June 2008. His original analysis and proposal can be found at: + http://mail.python.org/pipermail/python-dev/2008-June/080579.html +*/ + +/* + NOTE: about untracking of mutable objects. + + Certain types of container cannot participate in a reference cycle, and + so do not need to be tracked by the garbage collector. Untracking these + objects reduces the cost of garbage collections. However, determining + which objects may be untracked is not free, and the costs must be + weighed against the benefits for garbage collection. + + There are two possible strategies for when to untrack a container: + + i) When the container is created. + ii) When the container is examined by the garbage collector. + + Tuples containing only immutable objects (integers, strings etc, and + recursively, tuples of immutable objects) do not need to be tracked. + The interpreter creates a large number of tuples, many of which will + not survive until garbage collection. It is therefore not worthwhile + to untrack eligible tuples at creation time. + + Instead, all tuples except the empty tuple are tracked when created. + During garbage collection it is determined whether any surviving tuples + can be untracked. A tuple can be untracked if all of its contents are + already not tracked. Tuples are examined for untracking in all garbage + collection cycles. It may take more than one cycle to untrack a tuple. + + Dictionaries containing only immutable objects also do not need to be + tracked. Dictionaries are untracked when created. If a tracked item is + inserted into a dictionary (either as a key or value), the dictionary + becomes tracked. During a full garbage collection (all generations), + the collector will untrack any dictionaries whose contents are not + tracked. + + The module provides the python function is_tracked(obj), which returns + the CURRENT tracking status of the object. Subsequent garbage + collections may change the tracking status of the object. + + Untracking of certain containers was introduced in issue #4688, and + the algorithm was refined in response to issue #14775. +*/ + /* set for debugging information */ #define DEBUG_STATS (1<<0) /* print collection statistics */ #define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */ @@ -50,26 +174,19 @@ static PyObject *gc_str = NULL; #define DEBUG_LEAK DEBUG_COLLECTABLE | \ DEBUG_UNCOLLECTABLE | \ DEBUG_SAVEALL +static int debug; + +/* Running stats per generation */ +struct gc_generation_stats { + /* total number of collections */ + Py_ssize_t collections; + /* total number of collected objects */ + Py_ssize_t collected; + /* total number of uncollectable objects (put into gc.garbage) */ + Py_ssize_t uncollectable; +}; -#define GEN_HEAD(n) (&_PyRuntime.gc.generations[n].head) - -void -_PyGC_Initialize(struct _gc_runtime_state *state) -{ - state->enabled = 1; /* automatic collection enabled? */ - -#define _GEN_HEAD(n) (&state->generations[n].head) - struct gc_generation generations[NUM_GENERATIONS] = { - /* PyGC_Head, threshold, count */ - {{{_GEN_HEAD(0), _GEN_HEAD(0), 0}}, 700, 0}, - {{{_GEN_HEAD(1), _GEN_HEAD(1), 0}}, 10, 0}, - {{{_GEN_HEAD(2), _GEN_HEAD(2), 0}}, 10, 0}, - }; - for (int i = 0; i < NUM_GENERATIONS; i++) { - state->generations[i] = generations[i]; - }; - state->generation0 = GEN_HEAD(0); -} +static struct gc_generation_stats generation_stats[NUM_GENERATIONS]; /*-------------------------------------------------------------------------- gc_refs values. @@ -649,16 +766,16 @@ handle_legacy_finalizers(PyGC_Head *finalizers, PyGC_Head *old) { PyGC_Head *gc = finalizers->gc.gc_next; - if (_PyRuntime.gc.garbage == NULL) { - _PyRuntime.gc.garbage = PyList_New(0); - if (_PyRuntime.gc.garbage == NULL) + if (garbage == NULL) { + garbage = PyList_New(0); + if (garbage == NULL) Py_FatalError("gc couldn't create gc.garbage list"); } for (; gc != finalizers; gc = gc->gc.gc_next) { PyObject *op = FROM_GC(gc); - if ((_PyRuntime.gc.debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) { - if (PyList_Append(_PyRuntime.gc.garbage, op) < 0) + if ((debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) { + if (PyList_Append(garbage, op) < 0) return -1; } } @@ -748,8 +865,8 @@ delete_garbage(PyGC_Head *collectable, PyGC_Head *old) PyGC_Head *gc = collectable->gc.gc_next; PyObject *op = FROM_GC(gc); - if (_PyRuntime.gc.debug & DEBUG_SAVEALL) { - PyList_Append(_PyRuntime.gc.garbage, op); + if (debug & DEBUG_SAVEALL) { + PyList_Append(garbage, op); } else { if ((clear = Py_TYPE(op)->tp_clear) != NULL) { @@ -802,9 +919,9 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, PyGC_Head *gc; _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */ - struct gc_generation_stats *stats = &_PyRuntime.gc.generation_stats[generation]; + struct gc_generation_stats *stats = &generation_stats[generation]; - if (_PyRuntime.gc.debug & DEBUG_STATS) { + if (debug & DEBUG_STATS) { PySys_WriteStderr("gc: collecting generation %d...\n", generation); PySys_WriteStderr("gc: objects in each generation:"); @@ -821,9 +938,9 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, /* update collection and allocation counters */ if (generation+1 < NUM_GENERATIONS) - _PyRuntime.gc.generations[generation+1].count += 1; + generations[generation+1].count += 1; for (i = 0; i <= generation; i++) - _PyRuntime.gc.generations[i].count = 0; + generations[i].count = 0; /* merge younger generations with one we are currently collecting */ for (i = 0; i < generation; i++) { @@ -857,7 +974,7 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, /* Move reachable objects to next generation. */ if (young != old) { if (generation == NUM_GENERATIONS - 2) { - _PyRuntime.gc.long_lived_pending += gc_list_size(young); + long_lived_pending += gc_list_size(young); } gc_list_merge(young, old); } @@ -865,8 +982,8 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, /* We only untrack dicts in full collections, to avoid quadratic dict build-up. See issue #14775. */ untrack_dicts(young); - _PyRuntime.gc.long_lived_pending = 0; - _PyRuntime.gc.long_lived_total = gc_list_size(young); + long_lived_pending = 0; + long_lived_total = gc_list_size(young); } /* All objects in unreachable are trash, but objects reachable from @@ -886,7 +1003,7 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, for (gc = unreachable.gc.gc_next; gc != &unreachable; gc = gc->gc.gc_next) { m++; - if (_PyRuntime.gc.debug & DEBUG_COLLECTABLE) { + if (debug & DEBUG_COLLECTABLE) { debug_cycle("collectable", FROM_GC(gc)); } } @@ -915,10 +1032,10 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, gc != &finalizers; gc = gc->gc.gc_next) { n++; - if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE) + if (debug & DEBUG_UNCOLLECTABLE) debug_cycle("uncollectable", FROM_GC(gc)); } - if (_PyRuntime.gc.debug & DEBUG_STATS) { + if (debug & DEBUG_STATS) { _PyTime_t t2 = _PyTime_GetMonotonicClock(); if (m == 0 && n == 0) @@ -981,11 +1098,11 @@ invoke_gc_callback(const char *phase, int generation, PyObject *info = NULL; /* we may get called very early */ - if (_PyRuntime.gc.callbacks == NULL) + if (callbacks == NULL) return; /* The local variable cannot be rebound, check it for sanity */ - assert(_PyRuntime.gc.callbacks != NULL && PyList_CheckExact(_PyRuntime.gc.callbacks)); - if (PyList_GET_SIZE(_PyRuntime.gc.callbacks) != 0) { + assert(callbacks != NULL && PyList_CheckExact(callbacks)); + if (PyList_GET_SIZE(callbacks) != 0) { info = Py_BuildValue("{sisnsn}", "generation", generation, "collected", collected, @@ -995,8 +1112,8 @@ invoke_gc_callback(const char *phase, int generation, return; } } - for (i=0; i<PyList_GET_SIZE(_PyRuntime.gc.callbacks); i++) { - PyObject *r, *cb = PyList_GET_ITEM(_PyRuntime.gc.callbacks, i); + for (i=0; i<PyList_GET_SIZE(callbacks); i++) { + PyObject *r, *cb = PyList_GET_ITEM(callbacks, i); Py_INCREF(cb); /* make sure cb doesn't go away */ r = PyObject_CallFunction(cb, "sO", phase, info); Py_XDECREF(r); @@ -1030,13 +1147,13 @@ collect_generations(void) * exceeds the threshold. Objects in the that generation and * generations younger than it will be collected. */ for (i = NUM_GENERATIONS-1; i >= 0; i--) { - if (_PyRuntime.gc.generations[i].count > _PyRuntime.gc.generations[i].threshold) { + if (generations[i].count > generations[i].threshold) { /* Avoid quadratic performance degradation in number of tracked objects. See comments at the beginning of this file, and issue #4074. */ if (i == NUM_GENERATIONS - 1 - && _PyRuntime.gc.long_lived_pending < _PyRuntime.gc.long_lived_total / 4) + && long_lived_pending < long_lived_total / 4) continue; n = collect_with_callback(i); break; @@ -1057,7 +1174,7 @@ static PyObject * gc_enable_impl(PyObject *module) /*[clinic end generated code: output=45a427e9dce9155c input=81ac4940ca579707]*/ { - _PyRuntime.gc.enabled = 1; + enabled = 1; Py_RETURN_NONE; } @@ -1071,7 +1188,7 @@ static PyObject * gc_disable_impl(PyObject *module) /*[clinic end generated code: output=97d1030f7aa9d279 input=8c2e5a14e800d83b]*/ { - _PyRuntime.gc.enabled = 0; + enabled = 0; Py_RETURN_NONE; } @@ -1085,7 +1202,7 @@ static int gc_isenabled_impl(PyObject *module) /*[clinic end generated code: output=1874298331c49130 input=30005e0422373b31]*/ { - return _PyRuntime.gc.enabled; + return enabled; } /*[clinic input] @@ -1113,12 +1230,12 @@ gc_collect_impl(PyObject *module, int generation) return -1; } - if (_PyRuntime.gc.collecting) + if (collecting) n = 0; /* already collecting, don't do anything */ else { - _PyRuntime.gc.collecting = 1; + collecting = 1; n = collect_with_callback(generation); - _PyRuntime.gc.collecting = 0; + collecting = 0; } return n; @@ -1146,7 +1263,7 @@ static PyObject * gc_set_debug_impl(PyObject *module, int flags) /*[clinic end generated code: output=7c8366575486b228 input=5e5ce15e84fbed15]*/ { - _PyRuntime.gc.debug = flags; + debug = flags; Py_RETURN_NONE; } @@ -1161,7 +1278,7 @@ static int gc_get_debug_impl(PyObject *module) /*[clinic end generated code: output=91242f3506cd1e50 input=91a101e1c3b98366]*/ { - return _PyRuntime.gc.debug; + return debug; } PyDoc_STRVAR(gc_set_thresh__doc__, @@ -1175,13 +1292,13 @@ gc_set_thresh(PyObject *self, PyObject *args) { int i; if (!PyArg_ParseTuple(args, "i|ii:set_threshold", - &_PyRuntime.gc.generations[0].threshold, - &_PyRuntime.gc.generations[1].threshold, - &_PyRuntime.gc.generations[2].threshold)) + &generations[0].threshold, + &generations[1].threshold, + &generations[2].threshold)) return NULL; for (i = 2; i < NUM_GENERATIONS; i++) { /* generations higher than 2 get the same threshold */ - _PyRuntime.gc.generations[i].threshold = _PyRuntime.gc.generations[2].threshold; + generations[i].threshold = generations[2].threshold; } Py_RETURN_NONE; @@ -1198,9 +1315,9 @@ gc_get_threshold_impl(PyObject *module) /*[clinic end generated code: output=7902bc9f41ecbbd8 input=286d79918034d6e6]*/ { return Py_BuildValue("(iii)", - _PyRuntime.gc.generations[0].threshold, - _PyRuntime.gc.generations[1].threshold, - _PyRuntime.gc.generations[2].threshold); + generations[0].threshold, + generations[1].threshold, + generations[2].threshold); } /*[clinic input] @@ -1214,9 +1331,9 @@ gc_get_count_impl(PyObject *module) /*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/ { return Py_BuildValue("(iii)", - _PyRuntime.gc.generations[0].count, - _PyRuntime.gc.generations[1].count, - _PyRuntime.gc.generations[2].count); + generations[0].count, + generations[1].count, + generations[2].count); } static int @@ -1347,7 +1464,7 @@ gc_get_stats_impl(PyObject *module) /* To get consistent values despite allocations while constructing the result list, we use a snapshot of the running stats. */ for (i = 0; i < NUM_GENERATIONS; i++) { - stats[i] = _PyRuntime.gc.generation_stats[i]; + stats[i] = generation_stats[i]; } result = PyList_New(0); @@ -1464,22 +1581,22 @@ PyInit_gc(void) if (m == NULL) return NULL; - if (_PyRuntime.gc.garbage == NULL) { - _PyRuntime.gc.garbage = PyList_New(0); - if (_PyRuntime.gc.garbage == NULL) + if (garbage == NULL) { + garbage = PyList_New(0); + if (garbage == NULL) return NULL; } - Py_INCREF(_PyRuntime.gc.garbage); - if (PyModule_AddObject(m, "garbage", _PyRuntime.gc.garbage) < 0) + Py_INCREF(garbage); + if (PyModule_AddObject(m, "garbage", garbage) < 0) return NULL; - if (_PyRuntime.gc.callbacks == NULL) { - _PyRuntime.gc.callbacks = PyList_New(0); - if (_PyRuntime.gc.callbacks == NULL) + if (callbacks == NULL) { + callbacks = PyList_New(0); + if (callbacks == NULL) return NULL; } - Py_INCREF(_PyRuntime.gc.callbacks); - if (PyModule_AddObject(m, "callbacks", _PyRuntime.gc.callbacks) < 0) + Py_INCREF(callbacks); + if (PyModule_AddObject(m, "callbacks", callbacks) < 0) return NULL; #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return NULL @@ -1498,12 +1615,12 @@ PyGC_Collect(void) { Py_ssize_t n; - if (_PyRuntime.gc.collecting) + if (collecting) n = 0; /* already collecting, don't do anything */ else { - _PyRuntime.gc.collecting = 1; + collecting = 1; n = collect_with_callback(NUM_GENERATIONS - 1); - _PyRuntime.gc.collecting = 0; + collecting = 0; } return n; @@ -1512,7 +1629,7 @@ PyGC_Collect(void) Py_ssize_t _PyGC_CollectIfEnabled(void) { - if (!_PyRuntime.gc.enabled) + if (!enabled) return 0; return PyGC_Collect(); @@ -1529,12 +1646,12 @@ _PyGC_CollectNoFail(void) during interpreter shutdown (and then never finish it). See http://bugs.python.org/issue8713#msg195178 for an example. */ - if (_PyRuntime.gc.collecting) + if (collecting) n = 0; else { - _PyRuntime.gc.collecting = 1; + collecting = 1; n = collect(NUM_GENERATIONS - 1, NULL, NULL, 1); - _PyRuntime.gc.collecting = 0; + collecting = 0; } return n; } @@ -1542,10 +1659,10 @@ _PyGC_CollectNoFail(void) void _PyGC_DumpShutdownStats(void) { - if (!(_PyRuntime.gc.debug & DEBUG_SAVEALL) - && _PyRuntime.gc.garbage != NULL && PyList_GET_SIZE(_PyRuntime.gc.garbage) > 0) { + if (!(debug & DEBUG_SAVEALL) + && garbage != NULL && PyList_GET_SIZE(garbage) > 0) { char *message; - if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE) + if (debug & DEBUG_UNCOLLECTABLE) message = "gc: %zd uncollectable objects at " \ "shutdown"; else @@ -1556,13 +1673,13 @@ _PyGC_DumpShutdownStats(void) already. */ if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0, "gc", NULL, message, - PyList_GET_SIZE(_PyRuntime.gc.garbage))) + PyList_GET_SIZE(garbage))) PyErr_WriteUnraisable(NULL); - if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE) { + if (debug & DEBUG_UNCOLLECTABLE) { PyObject *repr = NULL, *bytes = NULL; - repr = PyObject_Repr(_PyRuntime.gc.garbage); + repr = PyObject_Repr(garbage); if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) - PyErr_WriteUnraisable(_PyRuntime.gc.garbage); + PyErr_WriteUnraisable(garbage); else { PySys_WriteStderr( " %s\n", @@ -1578,7 +1695,7 @@ _PyGC_DumpShutdownStats(void) void _PyGC_Fini(void) { - Py_CLEAR(_PyRuntime.gc.callbacks); + Py_CLEAR(callbacks); } /* for debugging */ @@ -1629,15 +1746,15 @@ _PyObject_GC_Alloc(int use_calloc, size_t basicsize) return PyErr_NoMemory(); g->gc.gc_refs = 0; _PyGCHead_SET_REFS(g, GC_UNTRACKED); - _PyRuntime.gc.generations[0].count++; /* number of allocated GC objects */ - if (_PyRuntime.gc.generations[0].count > _PyRuntime.gc.generations[0].threshold && - _PyRuntime.gc.enabled && - _PyRuntime.gc.generations[0].threshold && - !_PyRuntime.gc.collecting && + generations[0].count++; /* number of allocated GC objects */ + if (generations[0].count > generations[0].threshold && + enabled && + generations[0].threshold && + !collecting && !PyErr_Occurred()) { - _PyRuntime.gc.collecting = 1; + collecting = 1; collect_generations(); - _PyRuntime.gc.collecting = 0; + collecting = 0; } op = FROM_GC(g); return op; @@ -1702,8 +1819,8 @@ PyObject_GC_Del(void *op) PyGC_Head *g = AS_GC(op); if (IS_TRACKED(op)) gc_list_remove(g); - if (_PyRuntime.gc.generations[0].count > 0) { - _PyRuntime.gc.generations[0].count--; + if (generations[0].count > 0) { + generations[0].count--; } PyObject_FREE(g); } diff --git a/Modules/main.c b/Modules/main.c index 3e347dc..08b2276 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -598,10 +598,16 @@ Py_Main(int argc, wchar_t **argv) } } + char *pymalloc = Py_GETENV("PYTHONMALLOC"); + if (_PyMem_SetupAllocators(pymalloc) < 0) { + fprintf(stderr, + "Error in PYTHONMALLOC: unknown allocator \"%s\"!\n", pymalloc); + exit(1); + } + /* Initialize the core language runtime */ Py_IgnoreEnvironmentFlag = core_config.ignore_environment; core_config._disable_importlib = 0; - core_config.allocator = Py_GETENV("PYTHONMALLOC"); _Py_InitializeCore(&core_config); /* Reprocess the command line with the language runtime available */ diff --git a/Objects/object.c b/Objects/object.c index 68a90c2..2ba6e57 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2028,6 +2028,14 @@ finally: /* Trashcan support. */ +/* Current call-stack depth of tp_dealloc calls. */ +int _PyTrash_delete_nesting = 0; + +/* List of objects that still need to be cleaned up, singly linked via their + * gc headers' gc_prev pointers. + */ +PyObject *_PyTrash_delete_later = NULL; + /* Add op to the _PyTrash_delete_later list. Called when the current * call-stack depth gets large. op must be a currently untracked gc'ed * object, with refcount 0. Py_DECREF must already have been called on it. @@ -2038,8 +2046,8 @@ _PyTrash_deposit_object(PyObject *op) assert(PyObject_IS_GC(op)); assert(_PyGC_REFS(op) == _PyGC_REFS_UNTRACKED); assert(op->ob_refcnt == 0); - _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *)_PyRuntime.gc.trash_delete_later; - _PyRuntime.gc.trash_delete_later = op; + _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *)_PyTrash_delete_later; + _PyTrash_delete_later = op; } /* The equivalent API, using per-thread state recursion info */ @@ -2060,11 +2068,11 @@ _PyTrash_thread_deposit_object(PyObject *op) void _PyTrash_destroy_chain(void) { - while (_PyRuntime.gc.trash_delete_later) { - PyObject *op = _PyRuntime.gc.trash_delete_later; + while (_PyTrash_delete_later) { + PyObject *op = _PyTrash_delete_later; destructor dealloc = Py_TYPE(op)->tp_dealloc; - _PyRuntime.gc.trash_delete_later = + _PyTrash_delete_later = (PyObject*) _Py_AS_GC(op)->gc.gc_prev; /* Call the deallocator directly. This used to try to @@ -2074,9 +2082,9 @@ _PyTrash_destroy_chain(void) * up distorting allocation statistics. */ assert(op->ob_refcnt == 0); - ++_PyRuntime.gc.trash_delete_nesting; + ++_PyTrash_delete_nesting; (*dealloc)(op); - --_PyRuntime.gc.trash_delete_nesting; + --_PyTrash_delete_nesting; } } diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 3698cfc..32e7ecb 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -178,9 +178,7 @@ static struct { #define PYDBG_FUNCS \ _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree - -#define _PyMem_Raw _PyRuntime.mem.allocators.raw -static const PyMemAllocatorEx _pymem_raw = { +static PyMemAllocatorEx _PyMem_Raw = { #ifdef Py_DEBUG &_PyMem_Debug.raw, PYRAWDBG_FUNCS #else @@ -188,8 +186,7 @@ static const PyMemAllocatorEx _pymem_raw = { #endif }; -#define _PyMem _PyRuntime.mem.allocators.mem -static const PyMemAllocatorEx _pymem = { +static PyMemAllocatorEx _PyMem = { #ifdef Py_DEBUG &_PyMem_Debug.mem, PYDBG_FUNCS #else @@ -197,8 +194,7 @@ static const PyMemAllocatorEx _pymem = { #endif }; -#define _PyObject _PyRuntime.mem.allocators.obj -static const PyMemAllocatorEx _pyobject = { +static PyMemAllocatorEx _PyObject = { #ifdef Py_DEBUG &_PyMem_Debug.obj, PYDBG_FUNCS #else @@ -271,7 +267,7 @@ _PyMem_SetupAllocators(const char *opt) #undef PYRAWDBG_FUNCS #undef PYDBG_FUNCS -static const PyObjectArenaAllocator _PyObject_Arena = {NULL, +static PyObjectArenaAllocator _PyObject_Arena = {NULL, #ifdef MS_WINDOWS _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree #elif defined(ARENAS_USE_MMAP) @@ -281,34 +277,6 @@ static const PyObjectArenaAllocator _PyObject_Arena = {NULL, #endif }; -void -_PyObject_Initialize(struct _pyobj_runtime_state *state) -{ - state->allocator_arenas = _PyObject_Arena; -} - -void -_PyMem_Initialize(struct _pymem_runtime_state *state) -{ - state->allocators.raw = _pymem_raw; - state->allocators.mem = _pymem; - state->allocators.obj = _pyobject; - -#ifdef WITH_PYMALLOC - for (int i = 0; i < 8; i++) { - if (NB_SMALL_SIZE_CLASSES <= i * 8) - break; - for (int j = 0; j < 8; j++) { - int x = i * 8 + j; - poolp *addr = &(state->usedpools[2*(x)]); - poolp val = (poolp)((uint8_t *)addr - 2*sizeof(pyblock *)); - state->usedpools[x * 2] = val; - state->usedpools[x * 2 + 1] = val; - }; - }; -#endif /* WITH_PYMALLOC */ -} - #ifdef WITH_PYMALLOC static int _PyMem_DebugEnabled(void) @@ -395,13 +363,13 @@ PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) void PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator) { - *allocator = _PyRuntime.obj.allocator_arenas; + *allocator = _PyObject_Arena; } void PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator) { - _PyRuntime.obj.allocator_arenas = *allocator; + _PyObject_Arena = *allocator; } void * @@ -436,8 +404,7 @@ PyMem_RawRealloc(void *ptr, size_t new_size) return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size); } -void -PyMem_RawFree(void *ptr) +void PyMem_RawFree(void *ptr) { _PyMem_Raw.free(_PyMem_Raw.ctx, ptr); } @@ -554,10 +521,497 @@ PyObject_Free(void *ptr) static int running_on_valgrind = -1; #endif +/* An object allocator for Python. + + Here is an introduction to the layers of the Python memory architecture, + showing where the object allocator is actually used (layer +2), It is + called for every object allocation and deallocation (PyObject_New/Del), + unless the object-specific allocators implement a proprietary allocation + scheme (ex.: ints use a simple free list). This is also the place where + the cyclic garbage collector operates selectively on container objects. + + + Object-specific allocators + _____ ______ ______ ________ + [ int ] [ dict ] [ list ] ... [ string ] Python core | ++3 | <----- Object-specific memory -----> | <-- Non-object memory --> | + _______________________________ | | + [ Python's object allocator ] | | ++2 | ####### Object memory ####### | <------ Internal buffers ------> | + ______________________________________________________________ | + [ Python's raw memory allocator (PyMem_ API) ] | ++1 | <----- Python memory (under PyMem manager's control) ------> | | + __________________________________________________________________ + [ Underlying general-purpose allocator (ex: C library malloc) ] + 0 | <------ Virtual memory allocated for the python process -------> | + + ========================================================================= + _______________________________________________________________________ + [ OS-specific Virtual Memory Manager (VMM) ] +-1 | <--- Kernel dynamic storage allocation & management (page-based) ---> | + __________________________________ __________________________________ + [ ] [ ] +-2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> | + +*/ +/*==========================================================================*/ + +/* A fast, special-purpose memory allocator for small blocks, to be used + on top of a general-purpose malloc -- heavily based on previous art. */ + +/* Vladimir Marangozov -- August 2000 */ + +/* + * "Memory management is where the rubber meets the road -- if we do the wrong + * thing at any level, the results will not be good. And if we don't make the + * levels work well together, we are in serious trouble." (1) + * + * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles, + * "Dynamic Storage Allocation: A Survey and Critical Review", + * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995. + */ + +/* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */ + +/*==========================================================================*/ + +/* + * Allocation strategy abstract: + * + * For small requests, the allocator sub-allocates <Big> blocks of memory. + * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the + * system's allocator. + * + * Small requests are grouped in size classes spaced 8 bytes apart, due + * to the required valid alignment of the returned address. Requests of + * a particular size are serviced from memory pools of 4K (one VMM page). + * Pools are fragmented on demand and contain free lists of blocks of one + * particular size class. In other words, there is a fixed-size allocator + * for each size class. Free pools are shared by the different allocators + * thus minimizing the space reserved for a particular size class. + * + * This allocation strategy is a variant of what is known as "simple + * segregated storage based on array of free lists". The main drawback of + * simple segregated storage is that we might end up with lot of reserved + * memory for the different free lists, which degenerate in time. To avoid + * this, we partition each free list in pools and we share dynamically the + * reserved space between all free lists. This technique is quite efficient + * for memory intensive programs which allocate mainly small-sized blocks. + * + * For small requests we have the following table: + * + * Request in bytes Size of allocated block Size class idx + * ---------------------------------------------------------------- + * 1-8 8 0 + * 9-16 16 1 + * 17-24 24 2 + * 25-32 32 3 + * 33-40 40 4 + * 41-48 48 5 + * 49-56 56 6 + * 57-64 64 7 + * 65-72 72 8 + * ... ... ... + * 497-504 504 62 + * 505-512 512 63 + * + * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying + * allocator. + */ + +/*==========================================================================*/ + +/* + * -- Main tunable settings section -- + */ + +/* + * Alignment of addresses returned to the user. 8-bytes alignment works + * on most current architectures (with 32-bit or 64-bit address busses). + * The alignment value is also used for grouping small requests in size + * classes spaced ALIGNMENT bytes apart. + * + * You shouldn't change this unless you know what you are doing. + */ +#define ALIGNMENT 8 /* must be 2^N */ +#define ALIGNMENT_SHIFT 3 + +/* Return the number of bytes in size class I, as a uint. */ +#define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT) + +/* + * Max size threshold below which malloc requests are considered to be + * small enough in order to use preallocated memory pools. You can tune + * this value according to your application behaviour and memory needs. + * + * Note: a size threshold of 512 guarantees that newly created dictionaries + * will be allocated from preallocated memory pools on 64-bit. + * + * The following invariants must hold: + * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512 + * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT + * + * Although not required, for better performance and space efficiency, + * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2. + */ +#define SMALL_REQUEST_THRESHOLD 512 +#define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT) + +/* + * The system's VMM page size can be obtained on most unices with a + * getpagesize() call or deduced from various header files. To make + * things simpler, we assume that it is 4K, which is OK for most systems. + * It is probably better if this is the native page size, but it doesn't + * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page + * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation + * violation fault. 4K is apparently OK for all the platforms that python + * currently targets. + */ +#define SYSTEM_PAGE_SIZE (4 * 1024) +#define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1) + +/* + * Maximum amount of memory managed by the allocator for small requests. + */ +#ifdef WITH_MEMORY_LIMITS +#ifndef SMALL_MEMORY_LIMIT +#define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */ +#endif +#endif + +/* + * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned + * on a page boundary. This is a reserved virtual address space for the + * current process (obtained through a malloc()/mmap() call). In no way this + * means that the memory arenas will be used entirely. A malloc(<Big>) is + * usually an address range reservation for <Big> bytes, unless all pages within + * this space are referenced subsequently. So malloc'ing big blocks and not + * using them does not mean "wasting memory". It's an addressable range + * wastage... + * + * Arenas are allocated with mmap() on systems supporting anonymous memory + * mappings to reduce heap fragmentation. + */ +#define ARENA_SIZE (256 << 10) /* 256KB */ + +#ifdef WITH_MEMORY_LIMITS +#define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE) +#endif + +/* + * Size of the pools used for small blocks. Should be a power of 2, + * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k. + */ +#define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */ +#define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK + +/* + * -- End of tunable settings section -- + */ + +/*==========================================================================*/ + +/* + * Locking + * + * To reduce lock contention, it would probably be better to refine the + * crude function locking with per size class locking. I'm not positive + * however, whether it's worth switching to such locking policy because + * of the performance penalty it might introduce. + * + * The following macros describe the simplest (should also be the fastest) + * lock object on a particular platform and the init/fini/lock/unlock + * operations on it. The locks defined here are not expected to be recursive + * because it is assumed that they will always be called in the order: + * INIT, [LOCK, UNLOCK]*, FINI. + */ + +/* + * Python's threads are serialized, so object malloc locking is disabled. + */ +#define SIMPLELOCK_DECL(lock) /* simple lock declaration */ +#define SIMPLELOCK_INIT(lock) /* allocate (if needed) and initialize */ +#define SIMPLELOCK_FINI(lock) /* free/destroy an existing lock */ +#define SIMPLELOCK_LOCK(lock) /* acquire released lock */ +#define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */ + +/* When you say memory, my mind reasons in terms of (pointers to) blocks */ +typedef uint8_t block; + +/* Pool for small blocks. */ +struct pool_header { + union { block *_padding; + uint count; } ref; /* number of allocated blocks */ + block *freeblock; /* pool's free list head */ + struct pool_header *nextpool; /* next pool of this size class */ + struct pool_header *prevpool; /* previous pool "" */ + uint arenaindex; /* index into arenas of base adr */ + uint szidx; /* block size class index */ + uint nextoffset; /* bytes to virgin block */ + uint maxnextoffset; /* largest valid nextoffset */ +}; + +typedef struct pool_header *poolp; + +/* Record keeping for arenas. */ +struct arena_object { + /* The address of the arena, as returned by malloc. Note that 0 + * will never be returned by a successful malloc, and is used + * here to mark an arena_object that doesn't correspond to an + * allocated arena. + */ + uintptr_t address; + + /* Pool-aligned pointer to the next pool to be carved off. */ + block* pool_address; + + /* The number of available pools in the arena: free pools + never- + * allocated pools. + */ + uint nfreepools; + + /* The total number of pools in the arena, whether or not available. */ + uint ntotalpools; + + /* Singly-linked list of available pools. */ + struct pool_header* freepools; + + /* Whenever this arena_object is not associated with an allocated + * arena, the nextarena member is used to link all unassociated + * arena_objects in the singly-linked `unused_arena_objects` list. + * The prevarena member is unused in this case. + * + * When this arena_object is associated with an allocated arena + * with at least one available pool, both members are used in the + * doubly-linked `usable_arenas` list, which is maintained in + * increasing order of `nfreepools` values. + * + * Else this arena_object is associated with an allocated arena + * all of whose pools are in use. `nextarena` and `prevarena` + * are both meaningless in this case. + */ + struct arena_object* nextarena; + struct arena_object* prevarena; +}; + +#define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT) + +#define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */ + +/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */ +#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE)) + +/* Return total number of blocks in pool of size index I, as a uint. */ +#define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I)) + +/*==========================================================================*/ + +/* + * This malloc lock + */ +SIMPLELOCK_DECL(_malloc_lock) +#define LOCK() SIMPLELOCK_LOCK(_malloc_lock) +#define UNLOCK() SIMPLELOCK_UNLOCK(_malloc_lock) +#define LOCK_INIT() SIMPLELOCK_INIT(_malloc_lock) +#define LOCK_FINI() SIMPLELOCK_FINI(_malloc_lock) + +/* + * Pool table -- headed, circular, doubly-linked lists of partially used pools. + +This is involved. For an index i, usedpools[i+i] is the header for a list of +all partially used pools holding small blocks with "size class idx" i. So +usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size +16, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT. + +Pools are carved off an arena's highwater mark (an arena_object's pool_address +member) as needed. Once carved off, a pool is in one of three states forever +after: + +used == partially used, neither empty nor full + At least one block in the pool is currently allocated, and at least one + block in the pool is not currently allocated (note this implies a pool + has room for at least two blocks). + This is a pool's initial state, as a pool is created only when malloc + needs space. + The pool holds blocks of a fixed size, and is in the circular list headed + at usedpools[i] (see above). It's linked to the other used pools of the + same size class via the pool_header's nextpool and prevpool members. + If all but one block is currently allocated, a malloc can cause a + transition to the full state. If all but one block is not currently + allocated, a free can cause a transition to the empty state. + +full == all the pool's blocks are currently allocated + On transition to full, a pool is unlinked from its usedpools[] list. + It's not linked to from anything then anymore, and its nextpool and + prevpool members are meaningless until it transitions back to used. + A free of a block in a full pool puts the pool back in the used state. + Then it's linked in at the front of the appropriate usedpools[] list, so + that the next allocation for its size class will reuse the freed block. + +empty == all the pool's blocks are currently available for allocation + On transition to empty, a pool is unlinked from its usedpools[] list, + and linked to the front of its arena_object's singly-linked freepools list, + via its nextpool member. The prevpool member has no meaning in this case. + Empty pools have no inherent size class: the next time a malloc finds + an empty list in usedpools[], it takes the first pool off of freepools. + If the size class needed happens to be the same as the size class the pool + last had, some pool initialization can be skipped. + + +Block Management + +Blocks within pools are again carved out as needed. pool->freeblock points to +the start of a singly-linked list of free blocks within the pool. When a +block is freed, it's inserted at the front of its pool's freeblock list. Note +that the available blocks in a pool are *not* linked all together when a pool +is initialized. Instead only "the first two" (lowest addresses) blocks are +set up, returning the first such block, and setting pool->freeblock to a +one-block list holding the second such block. This is consistent with that +pymalloc strives at all levels (arena, pool, and block) never to touch a piece +of memory until it's actually needed. + +So long as a pool is in the used state, we're certain there *is* a block +available for allocating, and pool->freeblock is not NULL. If pool->freeblock +points to the end of the free list before we've carved the entire pool into +blocks, that means we simply haven't yet gotten to one of the higher-address +blocks. The offset from the pool_header to the start of "the next" virgin +block is stored in the pool_header nextoffset member, and the largest value +of nextoffset that makes sense is stored in the maxnextoffset member when a +pool is initialized. All the blocks in a pool have been passed out at least +once when and only when nextoffset > maxnextoffset. + + +Major obscurity: While the usedpools vector is declared to have poolp +entries, it doesn't really. It really contains two pointers per (conceptual) +poolp entry, the nextpool and prevpool members of a pool_header. The +excruciating initialization code below fools C so that + + usedpool[i+i] + +"acts like" a genuine poolp, but only so long as you only reference its +nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is +compensating for that a pool_header's nextpool and prevpool members +immediately follow a pool_header's first two members: + + union { block *_padding; + uint count; } ref; + block *freeblock; + +each of which consume sizeof(block *) bytes. So what usedpools[i+i] really +contains is a fudged-up pointer p such that *if* C believes it's a poolp +pointer, then p->nextpool and p->prevpool are both p (meaning that the headed +circular list is empty). + +It's unclear why the usedpools setup is so convoluted. It could be to +minimize the amount of cache required to hold this heavily-referenced table +(which only *needs* the two interpool pointer members of a pool_header). OTOH, +referencing code has to remember to "double the index" and doing so isn't +free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying +on that C doesn't insert any padding anywhere in a pool_header at or before +the prevpool member. +**************************************************************************** */ + +#define PTA(x) ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *))) +#define PT(x) PTA(x), PTA(x) + +static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = { + PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7) +#if NB_SMALL_SIZE_CLASSES > 8 + , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15) +#if NB_SMALL_SIZE_CLASSES > 16 + , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23) +#if NB_SMALL_SIZE_CLASSES > 24 + , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31) +#if NB_SMALL_SIZE_CLASSES > 32 + , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39) +#if NB_SMALL_SIZE_CLASSES > 40 + , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47) +#if NB_SMALL_SIZE_CLASSES > 48 + , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55) +#if NB_SMALL_SIZE_CLASSES > 56 + , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63) +#if NB_SMALL_SIZE_CLASSES > 64 +#error "NB_SMALL_SIZE_CLASSES should be less than 64" +#endif /* NB_SMALL_SIZE_CLASSES > 64 */ +#endif /* NB_SMALL_SIZE_CLASSES > 56 */ +#endif /* NB_SMALL_SIZE_CLASSES > 48 */ +#endif /* NB_SMALL_SIZE_CLASSES > 40 */ +#endif /* NB_SMALL_SIZE_CLASSES > 32 */ +#endif /* NB_SMALL_SIZE_CLASSES > 24 */ +#endif /* NB_SMALL_SIZE_CLASSES > 16 */ +#endif /* NB_SMALL_SIZE_CLASSES > 8 */ +}; + +/*========================================================================== +Arena management. + +`arenas` is a vector of arena_objects. It contains maxarenas entries, some of +which may not be currently used (== they're arena_objects that aren't +currently associated with an allocated arena). Note that arenas proper are +separately malloc'ed. + +Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5, +we do try to free() arenas, and use some mild heuristic strategies to increase +the likelihood that arenas eventually can be freed. + +unused_arena_objects + + This is a singly-linked list of the arena_objects that are currently not + being used (no arena is associated with them). Objects are taken off the + head of the list in new_arena(), and are pushed on the head of the list in + PyObject_Free() when the arena is empty. Key invariant: an arena_object + is on this list if and only if its .address member is 0. + +usable_arenas + + This is a doubly-linked list of the arena_objects associated with arenas + that have pools available. These pools are either waiting to be reused, + or have not been used before. The list is sorted to have the most- + allocated arenas first (ascending order based on the nfreepools member). + This means that the next allocation will come from a heavily used arena, + which gives the nearly empty arenas a chance to be returned to the system. + In my unscientific tests this dramatically improved the number of arenas + that could be freed. + +Note that an arena_object associated with an arena all of whose pools are +currently in use isn't on either list. +*/ + +/* Array of objects used to track chunks of memory (arenas). */ +static struct arena_object* arenas = NULL; +/* Number of slots currently allocated in the `arenas` vector. */ +static uint maxarenas = 0; + +/* The head of the singly-linked, NULL-terminated list of available + * arena_objects. + */ +static struct arena_object* unused_arena_objects = NULL; + +/* The head of the doubly-linked, NULL-terminated at each end, list of + * arena_objects associated with arenas that have pools available. + */ +static struct arena_object* usable_arenas = NULL; + +/* How many arena_objects do we initially allocate? + * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the + * `arenas` vector. + */ +#define INITIAL_ARENA_OBJECTS 16 + +/* Number of arenas allocated that haven't been free()'d. */ +static size_t narenas_currently_allocated = 0; + +/* Total number of times malloc() called to allocate an arena. */ +static size_t ntimes_arena_allocated = 0; +/* High water mark (max value ever seen) for narenas_currently_allocated. */ +static size_t narenas_highwater = 0; + +static Py_ssize_t _Py_AllocatedBlocks = 0; + Py_ssize_t _Py_GetAllocatedBlocks(void) { - return _PyRuntime.mem.num_allocated_blocks; + return _Py_AllocatedBlocks; } @@ -581,7 +1035,7 @@ new_arena(void) if (debug_stats) _PyObject_DebugMallocStats(stderr); - if (_PyRuntime.mem.unused_arena_objects == NULL) { + if (unused_arena_objects == NULL) { uint i; uint numarenas; size_t nbytes; @@ -589,18 +1043,18 @@ new_arena(void) /* Double the number of arena objects on each allocation. * Note that it's possible for `numarenas` to overflow. */ - numarenas = _PyRuntime.mem.maxarenas ? _PyRuntime.mem.maxarenas << 1 : INITIAL_ARENA_OBJECTS; - if (numarenas <= _PyRuntime.mem.maxarenas) + numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS; + if (numarenas <= maxarenas) return NULL; /* overflow */ #if SIZEOF_SIZE_T <= SIZEOF_INT - if (numarenas > SIZE_MAX / sizeof(*_PyRuntime.mem.arenas)) + if (numarenas > SIZE_MAX / sizeof(*arenas)) return NULL; /* overflow */ #endif - nbytes = numarenas * sizeof(*_PyRuntime.mem.arenas); - arenaobj = (struct arena_object *)PyMem_RawRealloc(_PyRuntime.mem.arenas, nbytes); + nbytes = numarenas * sizeof(*arenas); + arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes); if (arenaobj == NULL) return NULL; - _PyRuntime.mem.arenas = arenaobj; + arenas = arenaobj; /* We might need to fix pointers that were copied. However, * new_arena only gets called when all the pages in the @@ -608,45 +1062,45 @@ new_arena(void) * into the old array. Thus, we don't have to worry about * invalid pointers. Just to be sure, some asserts: */ - assert(_PyRuntime.mem.usable_arenas == NULL); - assert(_PyRuntime.mem.unused_arena_objects == NULL); + assert(usable_arenas == NULL); + assert(unused_arena_objects == NULL); /* Put the new arenas on the unused_arena_objects list. */ - for (i = _PyRuntime.mem.maxarenas; i < numarenas; ++i) { - _PyRuntime.mem.arenas[i].address = 0; /* mark as unassociated */ - _PyRuntime.mem.arenas[i].nextarena = i < numarenas - 1 ? - &_PyRuntime.mem.arenas[i+1] : NULL; + for (i = maxarenas; i < numarenas; ++i) { + arenas[i].address = 0; /* mark as unassociated */ + arenas[i].nextarena = i < numarenas - 1 ? + &arenas[i+1] : NULL; } /* Update globals. */ - _PyRuntime.mem.unused_arena_objects = &_PyRuntime.mem.arenas[_PyRuntime.mem.maxarenas]; - _PyRuntime.mem.maxarenas = numarenas; + unused_arena_objects = &arenas[maxarenas]; + maxarenas = numarenas; } /* Take the next available arena object off the head of the list. */ - assert(_PyRuntime.mem.unused_arena_objects != NULL); - arenaobj = _PyRuntime.mem.unused_arena_objects; - _PyRuntime.mem.unused_arena_objects = arenaobj->nextarena; + assert(unused_arena_objects != NULL); + arenaobj = unused_arena_objects; + unused_arena_objects = arenaobj->nextarena; assert(arenaobj->address == 0); - address = _PyRuntime.obj.allocator_arenas.alloc(_PyRuntime.obj.allocator_arenas.ctx, ARENA_SIZE); + address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE); if (address == NULL) { /* The allocation failed: return NULL after putting the * arenaobj back. */ - arenaobj->nextarena = _PyRuntime.mem.unused_arena_objects; - _PyRuntime.mem.unused_arena_objects = arenaobj; + arenaobj->nextarena = unused_arena_objects; + unused_arena_objects = arenaobj; return NULL; } arenaobj->address = (uintptr_t)address; - ++_PyRuntime.mem.narenas_currently_allocated; - ++_PyRuntime.mem.ntimes_arena_allocated; - if (_PyRuntime.mem.narenas_currently_allocated > _PyRuntime.mem.narenas_highwater) - _PyRuntime.mem.narenas_highwater = _PyRuntime.mem.narenas_currently_allocated; + ++narenas_currently_allocated; + ++ntimes_arena_allocated; + if (narenas_currently_allocated > narenas_highwater) + narenas_highwater = narenas_currently_allocated; arenaobj->freepools = NULL; /* pool_address <- first pool-aligned address in the arena nfreepools <- number of whole pools that fit after alignment */ - arenaobj->pool_address = (pyblock*)arenaobj->address; + arenaobj->pool_address = (block*)arenaobj->address; arenaobj->nfreepools = ARENA_SIZE / POOL_SIZE; assert(POOL_SIZE * arenaobj->nfreepools == ARENA_SIZE); excess = (uint)(arenaobj->address & POOL_SIZE_MASK); @@ -743,9 +1197,9 @@ address_in_range(void *p, poolp pool) // the GIL. The following dance forces the compiler to read pool->arenaindex // only once. uint arenaindex = *((volatile uint *)&pool->arenaindex); - return arenaindex < _PyRuntime.mem.maxarenas && - (uintptr_t)p - _PyRuntime.mem.arenas[arenaindex].address < ARENA_SIZE && - _PyRuntime.mem.arenas[arenaindex].address != 0; + return arenaindex < maxarenas && + (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE && + arenas[arenaindex].address != 0; } /*==========================================================================*/ @@ -766,12 +1220,12 @@ static void * _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) { size_t nbytes; - pyblock *bp; + block *bp; poolp pool; poolp next; uint size; - _PyRuntime.mem.num_allocated_blocks++; + _Py_AllocatedBlocks++; assert(elsize == 0 || nelem <= PY_SSIZE_T_MAX / elsize); nbytes = nelem * elsize; @@ -792,7 +1246,7 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) * Most frequent paths first */ size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT; - pool = _PyRuntime.mem.usedpools[size + size]; + pool = usedpools[size + size]; if (pool != pool->nextpool) { /* * There is a used pool for this size class. @@ -801,7 +1255,7 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) ++pool->ref.count; bp = pool->freeblock; assert(bp != NULL); - if ((pool->freeblock = *(pyblock **)bp) != NULL) { + if ((pool->freeblock = *(block **)bp) != NULL) { UNLOCK(); if (use_calloc) memset(bp, 0, nbytes); @@ -812,10 +1266,10 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) */ if (pool->nextoffset <= pool->maxnextoffset) { /* There is room for another block. */ - pool->freeblock = (pyblock*)pool + + pool->freeblock = (block*)pool + pool->nextoffset; pool->nextoffset += INDEX2SIZE(size); - *(pyblock **)(pool->freeblock) = NULL; + *(block **)(pool->freeblock) = NULL; UNLOCK(); if (use_calloc) memset(bp, 0, nbytes); @@ -835,29 +1289,29 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) /* There isn't a pool of the right size class immediately * available: use a free pool. */ - if (_PyRuntime.mem.usable_arenas == NULL) { + if (usable_arenas == NULL) { /* No arena has a free pool: allocate a new arena. */ #ifdef WITH_MEMORY_LIMITS - if (_PyRuntime.mem.narenas_currently_allocated >= MAX_ARENAS) { + if (narenas_currently_allocated >= MAX_ARENAS) { UNLOCK(); goto redirect; } #endif - _PyRuntime.mem.usable_arenas = new_arena(); - if (_PyRuntime.mem.usable_arenas == NULL) { + usable_arenas = new_arena(); + if (usable_arenas == NULL) { UNLOCK(); goto redirect; } - _PyRuntime.mem.usable_arenas->nextarena = - _PyRuntime.mem.usable_arenas->prevarena = NULL; + usable_arenas->nextarena = + usable_arenas->prevarena = NULL; } - assert(_PyRuntime.mem.usable_arenas->address != 0); + assert(usable_arenas->address != 0); /* Try to get a cached free pool. */ - pool = _PyRuntime.mem.usable_arenas->freepools; + pool = usable_arenas->freepools; if (pool != NULL) { /* Unlink from cached pools. */ - _PyRuntime.mem.usable_arenas->freepools = pool->nextpool; + usable_arenas->freepools = pool->nextpool; /* This arena already had the smallest nfreepools * value, so decreasing nfreepools doesn't change @@ -866,18 +1320,18 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) * become wholly allocated, we need to remove its * arena_object from usable_arenas. */ - --_PyRuntime.mem.usable_arenas->nfreepools; - if (_PyRuntime.mem.usable_arenas->nfreepools == 0) { + --usable_arenas->nfreepools; + if (usable_arenas->nfreepools == 0) { /* Wholly allocated: remove. */ - assert(_PyRuntime.mem.usable_arenas->freepools == NULL); - assert(_PyRuntime.mem.usable_arenas->nextarena == NULL || - _PyRuntime.mem.usable_arenas->nextarena->prevarena == - _PyRuntime.mem.usable_arenas); - - _PyRuntime.mem.usable_arenas = _PyRuntime.mem.usable_arenas->nextarena; - if (_PyRuntime.mem.usable_arenas != NULL) { - _PyRuntime.mem.usable_arenas->prevarena = NULL; - assert(_PyRuntime.mem.usable_arenas->address != 0); + assert(usable_arenas->freepools == NULL); + assert(usable_arenas->nextarena == NULL || + usable_arenas->nextarena->prevarena == + usable_arenas); + + usable_arenas = usable_arenas->nextarena; + if (usable_arenas != NULL) { + usable_arenas->prevarena = NULL; + assert(usable_arenas->address != 0); } } else { @@ -886,14 +1340,14 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) * off all the arena's pools for the first * time. */ - assert(_PyRuntime.mem.usable_arenas->freepools != NULL || - _PyRuntime.mem.usable_arenas->pool_address <= - (pyblock*)_PyRuntime.mem.usable_arenas->address + + assert(usable_arenas->freepools != NULL || + usable_arenas->pool_address <= + (block*)usable_arenas->address + ARENA_SIZE - POOL_SIZE); } init_pool: /* Frontlink to used pools. */ - next = _PyRuntime.mem.usedpools[size + size]; /* == prev */ + next = usedpools[size + size]; /* == prev */ pool->nextpool = next; pool->prevpool = next; next->nextpool = pool; @@ -906,7 +1360,7 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) */ bp = pool->freeblock; assert(bp != NULL); - pool->freeblock = *(pyblock **)bp; + pool->freeblock = *(block **)bp; UNLOCK(); if (use_calloc) memset(bp, 0, nbytes); @@ -919,11 +1373,11 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) */ pool->szidx = size; size = INDEX2SIZE(size); - bp = (pyblock *)pool + POOL_OVERHEAD; + bp = (block *)pool + POOL_OVERHEAD; pool->nextoffset = POOL_OVERHEAD + (size << 1); pool->maxnextoffset = POOL_SIZE - size; pool->freeblock = bp + size; - *(pyblock **)(pool->freeblock) = NULL; + *(block **)(pool->freeblock) = NULL; UNLOCK(); if (use_calloc) memset(bp, 0, nbytes); @@ -931,26 +1385,26 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) } /* Carve off a new pool. */ - assert(_PyRuntime.mem.usable_arenas->nfreepools > 0); - assert(_PyRuntime.mem.usable_arenas->freepools == NULL); - pool = (poolp)_PyRuntime.mem.usable_arenas->pool_address; - assert((pyblock*)pool <= (pyblock*)_PyRuntime.mem.usable_arenas->address + - ARENA_SIZE - POOL_SIZE); - pool->arenaindex = (uint)(_PyRuntime.mem.usable_arenas - _PyRuntime.mem.arenas); - assert(&_PyRuntime.mem.arenas[pool->arenaindex] == _PyRuntime.mem.usable_arenas); + assert(usable_arenas->nfreepools > 0); + assert(usable_arenas->freepools == NULL); + pool = (poolp)usable_arenas->pool_address; + assert((block*)pool <= (block*)usable_arenas->address + + ARENA_SIZE - POOL_SIZE); + pool->arenaindex = (uint)(usable_arenas - arenas); + assert(&arenas[pool->arenaindex] == usable_arenas); pool->szidx = DUMMY_SIZE_IDX; - _PyRuntime.mem.usable_arenas->pool_address += POOL_SIZE; - --_PyRuntime.mem.usable_arenas->nfreepools; + usable_arenas->pool_address += POOL_SIZE; + --usable_arenas->nfreepools; - if (_PyRuntime.mem.usable_arenas->nfreepools == 0) { - assert(_PyRuntime.mem.usable_arenas->nextarena == NULL || - _PyRuntime.mem.usable_arenas->nextarena->prevarena == - _PyRuntime.mem.usable_arenas); + if (usable_arenas->nfreepools == 0) { + assert(usable_arenas->nextarena == NULL || + usable_arenas->nextarena->prevarena == + usable_arenas); /* Unlink the arena: it is completely allocated. */ - _PyRuntime.mem.usable_arenas = _PyRuntime.mem.usable_arenas->nextarena; - if (_PyRuntime.mem.usable_arenas != NULL) { - _PyRuntime.mem.usable_arenas->prevarena = NULL; - assert(_PyRuntime.mem.usable_arenas->address != 0); + usable_arenas = usable_arenas->nextarena; + if (usable_arenas != NULL) { + usable_arenas->prevarena = NULL; + assert(usable_arenas->address != 0); } } @@ -972,7 +1426,7 @@ redirect: else result = PyMem_RawMalloc(nbytes); if (!result) - _PyRuntime.mem.num_allocated_blocks--; + _Py_AllocatedBlocks--; return result; } } @@ -995,14 +1449,14 @@ static void _PyObject_Free(void *ctx, void *p) { poolp pool; - pyblock *lastfree; + block *lastfree; poolp next, prev; uint size; if (p == NULL) /* free(NULL) has no effect */ return; - _PyRuntime.mem.num_allocated_blocks--; + _Py_AllocatedBlocks--; #ifdef WITH_VALGRIND if (UNLIKELY(running_on_valgrind > 0)) @@ -1020,8 +1474,8 @@ _PyObject_Free(void *ctx, void *p) * list in any case). */ assert(pool->ref.count > 0); /* else it was empty */ - *(pyblock **)p = lastfree = pool->freeblock; - pool->freeblock = (pyblock *)p; + *(block **)p = lastfree = pool->freeblock; + pool->freeblock = (block *)p; if (lastfree) { struct arena_object* ao; uint nf; /* ao->nfreepools */ @@ -1047,7 +1501,7 @@ _PyObject_Free(void *ctx, void *p) /* Link the pool to freepools. This is a singly-linked * list, and pool->prevpool isn't used there. */ - ao = &_PyRuntime.mem.arenas[pool->arenaindex]; + ao = &arenas[pool->arenaindex]; pool->nextpool = ao->freepools; ao->freepools = pool; nf = ++ao->nfreepools; @@ -1076,9 +1530,9 @@ _PyObject_Free(void *ctx, void *p) * usable_arenas pointer. */ if (ao->prevarena == NULL) { - _PyRuntime.mem.usable_arenas = ao->nextarena; - assert(_PyRuntime.mem.usable_arenas == NULL || - _PyRuntime.mem.usable_arenas->address != 0); + usable_arenas = ao->nextarena; + assert(usable_arenas == NULL || + usable_arenas->address != 0); } else { assert(ao->prevarena->nextarena == ao); @@ -1094,14 +1548,14 @@ _PyObject_Free(void *ctx, void *p) /* Record that this arena_object slot is * available to be reused. */ - ao->nextarena = _PyRuntime.mem.unused_arena_objects; - _PyRuntime.mem.unused_arena_objects = ao; + ao->nextarena = unused_arena_objects; + unused_arena_objects = ao; /* Free the entire arena. */ - _PyRuntime.obj.allocator_arenas.free(_PyRuntime.obj.allocator_arenas.ctx, + _PyObject_Arena.free(_PyObject_Arena.ctx, (void *)ao->address, ARENA_SIZE); ao->address = 0; /* mark unassociated */ - --_PyRuntime.mem.narenas_currently_allocated; + --narenas_currently_allocated; UNLOCK(); return; @@ -1112,12 +1566,12 @@ _PyObject_Free(void *ctx, void *p) * ao->nfreepools was 0 before, ao isn't * currently on the usable_arenas list. */ - ao->nextarena = _PyRuntime.mem.usable_arenas; + ao->nextarena = usable_arenas; ao->prevarena = NULL; - if (_PyRuntime.mem.usable_arenas) - _PyRuntime.mem.usable_arenas->prevarena = ao; - _PyRuntime.mem.usable_arenas = ao; - assert(_PyRuntime.mem.usable_arenas->address != 0); + if (usable_arenas) + usable_arenas->prevarena = ao; + usable_arenas = ao; + assert(usable_arenas->address != 0); UNLOCK(); return; @@ -1147,8 +1601,8 @@ _PyObject_Free(void *ctx, void *p) } else { /* ao is at the head of the list */ - assert(_PyRuntime.mem.usable_arenas == ao); - _PyRuntime.mem.usable_arenas = ao->nextarena; + assert(usable_arenas == ao); + usable_arenas = ao->nextarena; } ao->nextarena->prevarena = ao->prevarena; @@ -1177,7 +1631,7 @@ _PyObject_Free(void *ctx, void *p) nf > ao->prevarena->nfreepools); assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao); - assert((_PyRuntime.mem.usable_arenas == ao && + assert((usable_arenas == ao && ao->prevarena == NULL) || ao->prevarena->nextarena == ao); @@ -1193,7 +1647,7 @@ _PyObject_Free(void *ctx, void *p) --pool->ref.count; assert(pool->ref.count > 0); /* else the pool is empty */ size = pool->szidx; - next = _PyRuntime.mem.usedpools[size + size]; + next = usedpools[size + size]; prev = next->prevpool; /* insert pool before next: prev <-> pool <-> next */ pool->nextpool = next; @@ -1315,13 +1769,15 @@ _Py_GetAllocatedBlocks(void) #define DEADBYTE 0xDB /* dead (newly freed) memory */ #define FORBIDDENBYTE 0xFB /* untouchable bytes at each end of a block */ +static size_t serialno = 0; /* incremented on each debug {m,re}alloc */ + /* serialno is always incremented via calling this routine. The point is * to supply a single place to set a breakpoint. */ static void bumpserialno(void) { - ++_PyRuntime.mem.serialno; + ++serialno; } #define SST SIZEOF_SIZE_T @@ -1412,7 +1868,7 @@ _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes) /* at tail, write pad (SST bytes) and serialno (SST bytes) */ tail = p + 2*SST + nbytes; memset(tail, FORBIDDENBYTE, SST); - write_size_t(tail + SST, _PyRuntime.mem.serialno); + write_size_t(tail + SST, serialno); return p + 2*SST; } @@ -1497,7 +1953,7 @@ _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes) tail = q + nbytes; memset(tail, FORBIDDENBYTE, SST); - write_size_t(tail + SST, _PyRuntime.mem.serialno); + write_size_t(tail + SST, serialno); if (nbytes > original_nbytes) { /* growing: mark new extra memory clean */ @@ -1829,16 +2285,16 @@ _PyObject_DebugMallocStats(FILE *out) * to march over all the arenas. If we're lucky, most of the memory * will be living in full pools -- would be a shame to miss them. */ - for (i = 0; i < _PyRuntime.mem.maxarenas; ++i) { + for (i = 0; i < maxarenas; ++i) { uint j; - uintptr_t base = _PyRuntime.mem.arenas[i].address; + uintptr_t base = arenas[i].address; /* Skip arenas which are not allocated. */ - if (_PyRuntime.mem.arenas[i].address == (uintptr_t)NULL) + if (arenas[i].address == (uintptr_t)NULL) continue; narenas += 1; - numfreepools += _PyRuntime.mem.arenas[i].nfreepools; + numfreepools += arenas[i].nfreepools; /* round up to pool alignment */ if (base & (uintptr_t)POOL_SIZE_MASK) { @@ -1848,8 +2304,8 @@ _PyObject_DebugMallocStats(FILE *out) } /* visit every pool in the arena */ - assert(base <= (uintptr_t) _PyRuntime.mem.arenas[i].pool_address); - for (j = 0; base < (uintptr_t) _PyRuntime.mem.arenas[i].pool_address; + assert(base <= (uintptr_t) arenas[i].pool_address); + for (j = 0; base < (uintptr_t) arenas[i].pool_address; ++j, base += POOL_SIZE) { poolp p = (poolp)base; const uint sz = p->szidx; @@ -1858,7 +2314,7 @@ _PyObject_DebugMallocStats(FILE *out) if (p->ref.count == 0) { /* currently unused */ #ifdef Py_DEBUG - assert(pool_is_in_list(p, _PyRuntime.mem.arenas[i].freepools)); + assert(pool_is_in_list(p, arenas[i].freepools)); #endif continue; } @@ -1868,11 +2324,11 @@ _PyObject_DebugMallocStats(FILE *out) numfreeblocks[sz] += freeblocks; #ifdef Py_DEBUG if (freeblocks > 0) - assert(pool_is_in_list(p, _PyRuntime.mem.usedpools[sz + sz])); + assert(pool_is_in_list(p, usedpools[sz + sz])); #endif } } - assert(narenas == _PyRuntime.mem.narenas_currently_allocated); + assert(narenas == narenas_currently_allocated); fputc('\n', out); fputs("class size num pools blocks in use avail blocks\n" @@ -1900,10 +2356,10 @@ _PyObject_DebugMallocStats(FILE *out) } fputc('\n', out); if (_PyMem_DebugEnabled()) - (void)printone(out, "# times object malloc called", _PyRuntime.mem.serialno); - (void)printone(out, "# arenas allocated total", _PyRuntime.mem.ntimes_arena_allocated); - (void)printone(out, "# arenas reclaimed", _PyRuntime.mem.ntimes_arena_allocated - narenas); - (void)printone(out, "# arenas highwater mark", _PyRuntime.mem.narenas_highwater); + (void)printone(out, "# times object malloc called", serialno); + (void)printone(out, "# arenas allocated total", ntimes_arena_allocated); + (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas); + (void)printone(out, "# arenas highwater mark", narenas_highwater); (void)printone(out, "# arenas allocated current", narenas); PyOS_snprintf(buf, sizeof(buf), diff --git a/Objects/setobject.c b/Objects/setobject.c index 6001f7b..219e81d 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -1115,7 +1115,6 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds) } /* The empty frozenset is a singleton */ if (emptyfrozenset == NULL) - /* There is a possible (relatively harmless) race here. */ emptyfrozenset = make_new_set(type, NULL); Py_XINCREF(emptyfrozenset); return emptyfrozenset; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 6bf474a..1d963aa 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1157,10 +1157,10 @@ subtype_dealloc(PyObject *self) /* UnTrack and re-Track around the trashcan macro, alas */ /* See explanation at end of function for full disclosure */ PyObject_GC_UnTrack(self); - ++_PyRuntime.gc.trash_delete_nesting; + ++_PyTrash_delete_nesting; ++ tstate->trash_delete_nesting; Py_TRASHCAN_SAFE_BEGIN(self); - --_PyRuntime.gc.trash_delete_nesting; + --_PyTrash_delete_nesting; -- tstate->trash_delete_nesting; /* Find the nearest base with a different tp_dealloc */ @@ -1254,10 +1254,10 @@ subtype_dealloc(PyObject *self) Py_DECREF(type); endlabel: - ++_PyRuntime.gc.trash_delete_nesting; + ++_PyTrash_delete_nesting; ++ tstate->trash_delete_nesting; Py_TRASHCAN_SAFE_END(self); - --_PyRuntime.gc.trash_delete_nesting; + --_PyTrash_delete_nesting; -- tstate->trash_delete_nesting; /* Explanation of the weirdness around the trashcan macros: @@ -1297,7 +1297,7 @@ subtype_dealloc(PyObject *self) a subtle disaster. Q. Why the bizarre (net-zero) manipulation of - _PyRuntime.trash_delete_nesting around the trashcan macros? + _PyTrash_delete_nesting around the trashcan macros? A. Some base classes (e.g. list) also use the trashcan mechanism. The following scenario used to be possible: diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 5db80b6..8ebb22e 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -106,14 +106,6 @@ <ClInclude Include="..\Include\graminit.h" /> <ClInclude Include="..\Include\grammar.h" /> <ClInclude Include="..\Include\import.h" /> - <ClInclude Include="..\Include\internal\_Python.h" /> - <ClInclude Include="..\Include\internal\_ceval.h" /> - <ClInclude Include="..\Include\internal\_condvar.h" /> - <ClInclude Include="..\Include\internal\_gil.h" /> - <ClInclude Include="..\Include\internal\_mem.h" /> - <ClInclude Include="..\Include\internal\_pymalloc.h" /> - <ClInclude Include="..\Include\internal\_pystate.h" /> - <ClInclude Include="..\Include\internal\_warnings.h" /> <ClInclude Include="..\Include\intrcheck.h" /> <ClInclude Include="..\Include\iterobject.h" /> <ClInclude Include="..\Include\listobject.h" /> diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index e5a9b62..cbe1a39 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -129,30 +129,6 @@ <ClInclude Include="..\Include\import.h"> <Filter>Include</Filter> </ClInclude> - <ClInclude Include="..\Include\internal\_Python.h"> - <Filter>Include</Filter> - </ClInclude> - <ClInclude Include="..\Include\internal\_ceval.h"> - <Filter>Include</Filter> - </ClInclude> - <ClInclude Include="..\Include\internal\_condvar.h"> - <Filter>Include</Filter> - </ClInclude> - <ClInclude Include="..\Include\internal\_gil.h"> - <Filter>Include</Filter> - </ClInclude> - <ClInclude Include="..\Include\internal\_mem.h"> - <Filter>Include</Filter> - </ClInclude> - <ClInclude Include="..\Include\internal\_pymalloc.h"> - <Filter>Include</Filter> - </ClInclude> - <ClInclude Include="..\Include\internal\_pystate.h"> - <Filter>Include</Filter> - </ClInclude> - <ClInclude Include="..\Include\internal\_warnings.h"> - <Filter>Include</Filter> - </ClInclude> <ClInclude Include="..\Include\intrcheck.h"> <Filter>Include</Filter> </ClInclude> diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c index fd927c0..e386248 100644 --- a/Parser/pgenmain.c +++ b/Parser/pgenmain.c @@ -21,12 +21,10 @@ #include "node.h" #include "parsetok.h" #include "pgen.h" -#include "internal/_mem.h" int Py_DebugFlag; int Py_VerboseFlag; int Py_IgnoreEnvironmentFlag; -struct pyruntimestate _PyRuntime = {}; /* Forward */ grammar *getgrammar(const char *filename); @@ -63,8 +61,6 @@ main(int argc, char **argv) filename = argv[1]; graminit_h = argv[2]; graminit_c = argv[3]; - _PyObject_Initialize(&_PyRuntime.obj); - _PyMem_Initialize(&_PyRuntime.mem); g = getgrammar(filename); fp = fopen(graminit_c, "w"); if (fp == NULL) { diff --git a/Python/_warnings.c b/Python/_warnings.c index a5e42a3..8616195 100644 --- a/Python/_warnings.c +++ b/Python/_warnings.c @@ -8,6 +8,13 @@ PyDoc_STRVAR(warnings__doc__, MODULE_NAME " provides basic warning filtering support.\n" "It is a helper module to speed up interpreter start-up."); +/* Both 'filters' and 'onceregistry' can be set in warnings.py; + get_warnings_attr() will reset these variables accordingly. */ +static PyObject *_filters; /* List */ +static PyObject *_once_registry; /* Dict */ +static PyObject *_default_action; /* String */ +static long _filters_version; + _Py_IDENTIFIER(argv); _Py_IDENTIFIER(stderr); @@ -46,7 +53,7 @@ get_warnings_attr(const char *attr, int try_import) } /* don't try to import after the start of the Python finallization */ - if (try_import && !_Py_IS_FINALIZING()) { + if (try_import && _Py_Finalizing == NULL) { warnings_module = PyImport_Import(warnings_str); if (warnings_module == NULL) { /* Fallback to the C implementation if we cannot get @@ -83,10 +90,10 @@ get_once_registry(void) if (registry == NULL) { if (PyErr_Occurred()) return NULL; - return _PyRuntime.warnings.once_registry; + return _once_registry; } - Py_DECREF(_PyRuntime.warnings.once_registry); - _PyRuntime.warnings.once_registry = registry; + Py_DECREF(_once_registry); + _once_registry = registry; return registry; } @@ -101,11 +108,11 @@ get_default_action(void) if (PyErr_Occurred()) { return NULL; } - return _PyRuntime.warnings.default_action; + return _default_action; } - Py_DECREF(_PyRuntime.warnings.default_action); - _PyRuntime.warnings.default_action = default_action; + Py_DECREF(_default_action); + _default_action = default_action; return default_action; } @@ -125,24 +132,23 @@ get_filter(PyObject *category, PyObject *text, Py_ssize_t lineno, return NULL; } else { - Py_DECREF(_PyRuntime.warnings.filters); - _PyRuntime.warnings.filters = warnings_filters; + Py_DECREF(_filters); + _filters = warnings_filters; } - PyObject *filters = _PyRuntime.warnings.filters; - if (filters == NULL || !PyList_Check(filters)) { + if (_filters == NULL || !PyList_Check(_filters)) { PyErr_SetString(PyExc_ValueError, MODULE_NAME ".filters must be a list"); return NULL; } - /* _PyRuntime.warnings.filters could change while we are iterating over it. */ - for (i = 0; i < PyList_GET_SIZE(filters); i++) { + /* _filters could change while we are iterating over it. */ + for (i = 0; i < PyList_GET_SIZE(_filters); i++) { PyObject *tmp_item, *action, *msg, *cat, *mod, *ln_obj; Py_ssize_t ln; int is_subclass, good_msg, good_mod; - tmp_item = PyList_GET_ITEM(filters, i); + tmp_item = PyList_GET_ITEM(_filters, i); if (!PyTuple_Check(tmp_item) || PyTuple_GET_SIZE(tmp_item) != 5) { PyErr_Format(PyExc_ValueError, MODULE_NAME ".filters item %zd isn't a 5-tuple", i); @@ -214,9 +220,9 @@ already_warned(PyObject *registry, PyObject *key, int should_set) version_obj = _PyDict_GetItemId(registry, &PyId_version); if (version_obj == NULL || !PyLong_CheckExact(version_obj) - || PyLong_AsLong(version_obj) != _PyRuntime.warnings.filters_version) { + || PyLong_AsLong(version_obj) != _filters_version) { PyDict_Clear(registry); - version_obj = PyLong_FromLong(_PyRuntime.warnings.filters_version); + version_obj = PyLong_FromLong(_filters_version); if (version_obj == NULL) return -1; if (_PyDict_SetItemId(registry, &PyId_version, version_obj) < 0) { @@ -514,7 +520,7 @@ warn_explicit(PyObject *category, PyObject *message, if (registry == NULL) goto cleanup; } - /* _PyRuntime.warnings.once_registry[(text, category)] = 1 */ + /* _once_registry[(text, category)] = 1 */ rc = update_registry(registry, text, category, 0); } else if (_PyUnicode_EqualToASCIIString(action, "module")) { @@ -904,7 +910,7 @@ warnings_warn_explicit(PyObject *self, PyObject *args, PyObject *kwds) static PyObject * warnings_filters_mutated(PyObject *self, PyObject *args) { - _PyRuntime.warnings.filters_version++; + _filters_version++; Py_RETURN_NONE; } @@ -1154,8 +1160,7 @@ create_filter(PyObject *category, const char *action) } /* This assumes the line number is zero for now. */ - return PyTuple_Pack(5, action_obj, Py_None, - category, Py_None, _PyLong_Zero); + return PyTuple_Pack(5, action_obj, Py_None, category, Py_None, _PyLong_Zero); } static PyObject * @@ -1223,35 +1228,33 @@ _PyWarnings_Init(void) if (m == NULL) return NULL; - if (_PyRuntime.warnings.filters == NULL) { - _PyRuntime.warnings.filters = init_filters(); - if (_PyRuntime.warnings.filters == NULL) + if (_filters == NULL) { + _filters = init_filters(); + if (_filters == NULL) return NULL; } - Py_INCREF(_PyRuntime.warnings.filters); - if (PyModule_AddObject(m, "filters", _PyRuntime.warnings.filters) < 0) + Py_INCREF(_filters); + if (PyModule_AddObject(m, "filters", _filters) < 0) return NULL; - if (_PyRuntime.warnings.once_registry == NULL) { - _PyRuntime.warnings.once_registry = PyDict_New(); - if (_PyRuntime.warnings.once_registry == NULL) + if (_once_registry == NULL) { + _once_registry = PyDict_New(); + if (_once_registry == NULL) return NULL; } - Py_INCREF(_PyRuntime.warnings.once_registry); - if (PyModule_AddObject(m, "_onceregistry", - _PyRuntime.warnings.once_registry) < 0) + Py_INCREF(_once_registry); + if (PyModule_AddObject(m, "_onceregistry", _once_registry) < 0) return NULL; - if (_PyRuntime.warnings.default_action == NULL) { - _PyRuntime.warnings.default_action = PyUnicode_FromString("default"); - if (_PyRuntime.warnings.default_action == NULL) + if (_default_action == NULL) { + _default_action = PyUnicode_FromString("default"); + if (_default_action == NULL) return NULL; } - Py_INCREF(_PyRuntime.warnings.default_action); - if (PyModule_AddObject(m, "_defaultaction", - _PyRuntime.warnings.default_action) < 0) + Py_INCREF(_default_action); + if (PyModule_AddObject(m, "_defaultaction", _default_action) < 0) return NULL; - _PyRuntime.warnings.filters_version = 0; + _filters_version = 0; return m; } diff --git a/Python/ceval.c b/Python/ceval.c index 9741c15..436e5ca 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -36,8 +36,7 @@ extern int _PyObject_GetMethod(PyObject *, PyObject *, PyObject **); typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *); /* Forward declarations */ -Py_LOCAL_INLINE(PyObject *) call_function(PyObject ***, Py_ssize_t, - PyObject *); +Py_LOCAL_INLINE(PyObject *) call_function(PyObject ***, Py_ssize_t, PyObject *); static PyObject * do_call_core(PyObject *, PyObject *, PyObject *); #ifdef LLTRACE @@ -53,15 +52,13 @@ static int call_trace_protected(Py_tracefunc, PyObject *, static void call_exc_trace(Py_tracefunc, PyObject *, PyThreadState *, PyFrameObject *); static int maybe_call_line_trace(Py_tracefunc, PyObject *, - PyThreadState *, PyFrameObject *, - int *, int *, int *); + PyThreadState *, PyFrameObject *, int *, int *, int *); static void maybe_dtrace_line(PyFrameObject *, int *, int *, int *); static void dtrace_function_entry(PyFrameObject *); static void dtrace_function_return(PyFrameObject *); static PyObject * cmp_outcome(int, PyObject *, PyObject *); -static PyObject * import_name(PyFrameObject *, PyObject *, PyObject *, - PyObject *); +static PyObject * import_name(PyFrameObject *, PyObject *, PyObject *, PyObject *); static PyObject * import_from(PyObject *, PyObject *); static int import_all_from(PyObject *, PyObject *); static void format_exc_check_arg(PyObject *, const char *, PyObject *); @@ -91,7 +88,7 @@ static long dxp[256]; #endif #ifdef WITH_THREAD -#define GIL_REQUEST _Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request) +#define GIL_REQUEST _Py_atomic_load_relaxed(&gil_drop_request) #else #define GIL_REQUEST 0 #endif @@ -101,22 +98,22 @@ static long dxp[256]; the GIL eventually anyway. */ #define COMPUTE_EVAL_BREAKER() \ _Py_atomic_store_relaxed( \ - &_PyRuntime.ceval.eval_breaker, \ + &eval_breaker, \ GIL_REQUEST | \ - _Py_atomic_load_relaxed(&_PyRuntime.ceval.pending.calls_to_do) | \ - _PyRuntime.ceval.pending.async_exc) + _Py_atomic_load_relaxed(&pendingcalls_to_do) | \ + pending_async_exc) #ifdef WITH_THREAD #define SET_GIL_DROP_REQUEST() \ do { \ - _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil_drop_request, 1); \ - _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \ + _Py_atomic_store_relaxed(&gil_drop_request, 1); \ + _Py_atomic_store_relaxed(&eval_breaker, 1); \ } while (0) #define RESET_GIL_DROP_REQUEST() \ do { \ - _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil_drop_request, 0); \ + _Py_atomic_store_relaxed(&gil_drop_request, 0); \ COMPUTE_EVAL_BREAKER(); \ } while (0) @@ -125,35 +122,47 @@ static long dxp[256]; /* Pending calls are only modified under pending_lock */ #define SIGNAL_PENDING_CALLS() \ do { \ - _Py_atomic_store_relaxed(&_PyRuntime.ceval.pending.calls_to_do, 1); \ - _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \ + _Py_atomic_store_relaxed(&pendingcalls_to_do, 1); \ + _Py_atomic_store_relaxed(&eval_breaker, 1); \ } while (0) #define UNSIGNAL_PENDING_CALLS() \ do { \ - _Py_atomic_store_relaxed(&_PyRuntime.ceval.pending.calls_to_do, 0); \ + _Py_atomic_store_relaxed(&pendingcalls_to_do, 0); \ COMPUTE_EVAL_BREAKER(); \ } while (0) #define SIGNAL_ASYNC_EXC() \ do { \ - _PyRuntime.ceval.pending.async_exc = 1; \ - _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \ + pending_async_exc = 1; \ + _Py_atomic_store_relaxed(&eval_breaker, 1); \ } while (0) #define UNSIGNAL_ASYNC_EXC() \ - do { \ - _PyRuntime.ceval.pending.async_exc = 0; \ - COMPUTE_EVAL_BREAKER(); \ - } while (0) + do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0) +/* This single variable consolidates all requests to break out of the fast path + in the eval loop. */ +static _Py_atomic_int eval_breaker = {0}; +/* Request for running pending calls. */ +static _Py_atomic_int pendingcalls_to_do = {0}; +/* Request for looking at the `async_exc` field of the current thread state. + Guarded by the GIL. */ +static int pending_async_exc = 0; + #ifdef WITH_THREAD #ifdef HAVE_ERRNO_H #include <errno.h> #endif #include "pythread.h" + +static PyThread_type_lock pending_lock = 0; /* for pending calls */ +static unsigned long main_thread = 0; +/* Request for dropping the GIL */ +static _Py_atomic_int gil_drop_request = {0}; + #include "ceval_gil.h" int @@ -169,9 +178,9 @@ PyEval_InitThreads(void) return; create_gil(); take_gil(PyThreadState_GET()); - _PyRuntime.ceval.pending.main_thread = PyThread_get_thread_ident(); - if (!_PyRuntime.ceval.pending.lock) - _PyRuntime.ceval.pending.lock = PyThread_allocate_lock(); + main_thread = PyThread_get_thread_ident(); + if (!pending_lock) + pending_lock = PyThread_allocate_lock(); } void @@ -239,9 +248,9 @@ PyEval_ReInitThreads(void) if (!gil_created()) return; recreate_gil(); - _PyRuntime.ceval.pending.lock = PyThread_allocate_lock(); + pending_lock = PyThread_allocate_lock(); take_gil(current_tstate); - _PyRuntime.ceval.pending.main_thread = PyThread_get_thread_ident(); + main_thread = PyThread_get_thread_ident(); /* Destroy all threads except the current one */ _PyThreadState_DeleteExcept(current_tstate); @@ -285,7 +294,7 @@ PyEval_RestoreThread(PyThreadState *tstate) int err = errno; take_gil(tstate); /* _Py_Finalizing is protected by the GIL */ - if (_Py_IS_FINALIZING() && !_Py_CURRENTLY_FINALIZING(tstate)) { + if (_Py_Finalizing && tstate != _Py_Finalizing) { drop_gil(tstate); PyThread_exit_thread(); assert(0); /* unreachable */ @@ -337,11 +346,19 @@ _PyEval_SignalReceived(void) callback. */ +#define NPENDINGCALLS 32 +static struct { + int (*func)(void *); + void *arg; +} pendingcalls[NPENDINGCALLS]; +static int pendingfirst = 0; +static int pendinglast = 0; + int Py_AddPendingCall(int (*func)(void *), void *arg) { int i, j, result=0; - PyThread_type_lock lock = _PyRuntime.ceval.pending.lock; + PyThread_type_lock lock = pending_lock; /* try a few times for the lock. Since this mechanism is used * for signal handling (on the main thread), there is a (slim) @@ -363,14 +380,14 @@ Py_AddPendingCall(int (*func)(void *), void *arg) return -1; } - i = _PyRuntime.ceval.pending.last; + i = pendinglast; j = (i + 1) % NPENDINGCALLS; - if (j == _PyRuntime.ceval.pending.first) { + if (j == pendingfirst) { result = -1; /* Queue full */ } else { - _PyRuntime.ceval.pending.calls[i].func = func; - _PyRuntime.ceval.pending.calls[i].arg = arg; - _PyRuntime.ceval.pending.last = j; + pendingcalls[i].func = func; + pendingcalls[i].arg = arg; + pendinglast = j; } /* signal main loop */ SIGNAL_PENDING_CALLS(); @@ -388,19 +405,16 @@ Py_MakePendingCalls(void) assert(PyGILState_Check()); - if (!_PyRuntime.ceval.pending.lock) { + if (!pending_lock) { /* initial allocation of the lock */ - _PyRuntime.ceval.pending.lock = PyThread_allocate_lock(); - if (_PyRuntime.ceval.pending.lock == NULL) + pending_lock = PyThread_allocate_lock(); + if (pending_lock == NULL) return -1; } /* only service pending calls on main thread */ - if (_PyRuntime.ceval.pending.main_thread && - PyThread_get_thread_ident() != _PyRuntime.ceval.pending.main_thread) - { + if (main_thread && PyThread_get_thread_ident() != main_thread) return 0; - } /* don't perform recursive pending calls */ if (busy) return 0; @@ -422,16 +436,16 @@ Py_MakePendingCalls(void) void *arg = NULL; /* pop one item off the queue while holding the lock */ - PyThread_acquire_lock(_PyRuntime.ceval.pending.lock, WAIT_LOCK); - j = _PyRuntime.ceval.pending.first; - if (j == _PyRuntime.ceval.pending.last) { + PyThread_acquire_lock(pending_lock, WAIT_LOCK); + j = pendingfirst; + if (j == pendinglast) { func = NULL; /* Queue empty */ } else { - func = _PyRuntime.ceval.pending.calls[j].func; - arg = _PyRuntime.ceval.pending.calls[j].arg; - _PyRuntime.ceval.pending.first = (j + 1) % NPENDINGCALLS; + func = pendingcalls[j].func; + arg = pendingcalls[j].arg; + pendingfirst = (j + 1) % NPENDINGCALLS; } - PyThread_release_lock(_PyRuntime.ceval.pending.lock); + PyThread_release_lock(pending_lock); /* having released the lock, perform the callback */ if (func == NULL) break; @@ -475,6 +489,14 @@ error: The two threads could theoretically wiggle around the "busy" variable. */ +#define NPENDINGCALLS 32 +static struct { + int (*func)(void *); + void *arg; +} pendingcalls[NPENDINGCALLS]; +static volatile int pendingfirst = 0; +static volatile int pendinglast = 0; + int Py_AddPendingCall(int (*func)(void *), void *arg) { @@ -484,15 +506,15 @@ Py_AddPendingCall(int (*func)(void *), void *arg) if (busy) return -1; busy = 1; - i = _PyRuntime.ceval.pending.last; + i = pendinglast; j = (i + 1) % NPENDINGCALLS; - if (j == _PyRuntime.ceval.pending.first) { + if (j == pendingfirst) { busy = 0; return -1; /* Queue full */ } - _PyRuntime.ceval.pending.calls[i].func = func; - _PyRuntime.ceval.pending.calls[i].arg = arg; - _PyRuntime.ceval.pending.last = j; + pendingcalls[i].func = func; + pendingcalls[i].arg = arg; + pendinglast = j; SIGNAL_PENDING_CALLS(); busy = 0; @@ -521,12 +543,12 @@ Py_MakePendingCalls(void) int i; int (*func)(void *); void *arg; - i = _PyRuntime.ceval.pending.first; - if (i == _PyRuntime.ceval.pending.last) + i = pendingfirst; + if (i == pendinglast) break; /* Queue empty */ - func = _PyRuntime.ceval.pending.calls[i].func; - arg = _PyRuntime.ceval.pending.calls[i].arg; - _PyRuntime.ceval.pending.first = (i + 1) % NPENDINGCALLS; + func = pendingcalls[i].func; + arg = pendingcalls[i].arg; + pendingfirst = (i + 1) % NPENDINGCALLS; if (func(arg) < 0) { goto error; } @@ -548,32 +570,20 @@ error: #ifndef Py_DEFAULT_RECURSION_LIMIT #define Py_DEFAULT_RECURSION_LIMIT 1000 #endif - -void -_PyEval_Initialize(struct _ceval_runtime_state *state) -{ - state->recursion_limit = Py_DEFAULT_RECURSION_LIMIT; - state->check_recursion_limit = Py_DEFAULT_RECURSION_LIMIT; - _gil_initialize(&state->gil); -} - -int -_PyEval_CheckRecursionLimit(void) -{ - return _PyRuntime.ceval.check_recursion_limit; -} +static int recursion_limit = Py_DEFAULT_RECURSION_LIMIT; +int _Py_CheckRecursionLimit = Py_DEFAULT_RECURSION_LIMIT; int Py_GetRecursionLimit(void) { - return _PyRuntime.ceval.recursion_limit; + return recursion_limit; } void Py_SetRecursionLimit(int new_limit) { - _PyRuntime.ceval.recursion_limit = new_limit; - _PyRuntime.ceval.check_recursion_limit = _PyRuntime.ceval.recursion_limit; + recursion_limit = new_limit; + _Py_CheckRecursionLimit = recursion_limit; } /* the macro Py_EnterRecursiveCall() only calls _Py_CheckRecursiveCall() @@ -585,7 +595,6 @@ int _Py_CheckRecursiveCall(const char *where) { PyThreadState *tstate = PyThreadState_GET(); - int recursion_limit = _PyRuntime.ceval.recursion_limit; #ifdef USE_STACKCHECK if (PyOS_CheckStack()) { @@ -594,7 +603,7 @@ _Py_CheckRecursiveCall(const char *where) return -1; } #endif - _PyRuntime.ceval.check_recursion_limit = recursion_limit; + _Py_CheckRecursionLimit = recursion_limit; if (tstate->recursion_critical) /* Somebody asked that we don't check for recursion. */ return 0; @@ -633,7 +642,13 @@ static void restore_and_clear_exc_state(PyThreadState *, PyFrameObject *); static int do_raise(PyObject *, PyObject *); static int unpack_iterable(PyObject *, int, int, PyObject **); -#define _Py_TracingPossible _PyRuntime.ceval.tracing_possible +/* Records whether tracing is on for any thread. Counts the number of + threads for which tstate->c_tracefunc is non-NULL, so if the value + is 0, we know we don't have to check this thread's c_tracefunc. + This speeds up the if statement in PyEval_EvalFrameEx() after + fast_next_opcode*/ +static int _Py_TracingPossible = 0; + PyObject * @@ -764,7 +779,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) #define DISPATCH() \ { \ - if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.eval_breaker)) { \ + if (!_Py_atomic_load_relaxed(&eval_breaker)) { \ FAST_DISPATCH(); \ } \ continue; \ @@ -812,8 +827,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) /* Code access macros */ /* The integer overflow is checked by an assertion below. */ -#define INSTR_OFFSET() \ - (sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr)) +#define INSTR_OFFSET() (sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr)) #define NEXTOPARG() do { \ _Py_CODEUNIT word = *next_instr; \ opcode = _Py_OPCODE(word); \ @@ -1066,7 +1080,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) async I/O handler); see Py_AddPendingCall() and Py_MakePendingCalls() above. */ - if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.eval_breaker)) { + if (_Py_atomic_load_relaxed(&eval_breaker)) { if (_Py_OPCODE(*next_instr) == SETUP_FINALLY || _Py_OPCODE(*next_instr) == YIELD_FROM) { /* Two cases where we skip running signal handlers and other @@ -1083,16 +1097,12 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) */ goto fast_next_opcode; } - if (_Py_atomic_load_relaxed( - &_PyRuntime.ceval.pending.calls_to_do)) - { + if (_Py_atomic_load_relaxed(&pendingcalls_to_do)) { if (Py_MakePendingCalls() < 0) goto error; } #ifdef WITH_THREAD - if (_Py_atomic_load_relaxed( - &_PyRuntime.ceval.gil_drop_request)) - { + if (_Py_atomic_load_relaxed(&gil_drop_request)) { /* Give another thread a chance */ if (PyThreadState_Swap(NULL) != tstate) Py_FatalError("ceval: tstate mix-up"); @@ -1103,9 +1113,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) take_gil(tstate); /* Check if we should make a quick exit. */ - if (_Py_IS_FINALIZING() && - !_Py_CURRENTLY_FINALIZING(tstate)) - { + if (_Py_Finalizing && _Py_Finalizing != tstate) { drop_gil(tstate); PyThread_exit_thread(); } diff --git a/Python/ceval_gil.h b/Python/ceval_gil.h index ef51890..a3b450b 100644 --- a/Python/ceval_gil.h +++ b/Python/ceval_gil.h @@ -8,13 +8,20 @@ /* First some general settings */ -#define INTERVAL (_PyRuntime.ceval.gil.interval >= 1 ? _PyRuntime.ceval.gil.interval : 1) +/* microseconds (the Python API uses seconds, though) */ +#define DEFAULT_INTERVAL 5000 +static unsigned long gil_interval = DEFAULT_INTERVAL; +#define INTERVAL (gil_interval >= 1 ? gil_interval : 1) + +/* Enable if you want to force the switching of threads at least every `gil_interval` */ +#undef FORCE_SWITCHING +#define FORCE_SWITCHING /* Notes about the implementation: - - The GIL is just a boolean variable (locked) whose access is protected + - The GIL is just a boolean variable (gil_locked) whose access is protected by a mutex (gil_mutex), and whose changes are signalled by a condition variable (gil_cond). gil_mutex is taken for short periods of time, and therefore mostly uncontended. @@ -41,7 +48,7 @@ - When a thread releases the GIL and gil_drop_request is set, that thread ensures that another GIL-awaiting thread gets scheduled. It does so by waiting on a condition variable (switch_cond) until - the value of last_holder is changed to something else than its + the value of gil_last_holder is changed to something else than its own thread state pointer, indicating that another thread was able to take the GIL. @@ -53,7 +60,11 @@ */ #include "condvar.h" +#ifndef Py_HAVE_CONDVAR +#error You need either a POSIX-compatible or a Windows system! +#endif +#define MUTEX_T PyMUTEX_T #define MUTEX_INIT(mut) \ if (PyMUTEX_INIT(&(mut))) { \ Py_FatalError("PyMUTEX_INIT(" #mut ") failed"); }; @@ -67,6 +78,7 @@ if (PyMUTEX_UNLOCK(&(mut))) { \ Py_FatalError("PyMUTEX_UNLOCK(" #mut ") failed"); }; +#define COND_T PyCOND_T #define COND_INIT(cond) \ if (PyCOND_INIT(&(cond))) { \ Py_FatalError("PyCOND_INIT(" #cond ") failed"); }; @@ -91,36 +103,48 @@ } \ -#define DEFAULT_INTERVAL 5000 -static void _gil_initialize(struct _gil_runtime_state *state) -{ - _Py_atomic_int uninitialized = {-1}; - state->locked = uninitialized; - state->interval = DEFAULT_INTERVAL; -} +/* Whether the GIL is already taken (-1 if uninitialized). This is atomic + because it can be read without any lock taken in ceval.c. */ +static _Py_atomic_int gil_locked = {-1}; +/* Number of GIL switches since the beginning. */ +static unsigned long gil_switch_number = 0; +/* Last PyThreadState holding / having held the GIL. This helps us know + whether anyone else was scheduled after we dropped the GIL. */ +static _Py_atomic_address gil_last_holder = {0}; + +/* This condition variable allows one or several threads to wait until + the GIL is released. In addition, the mutex also protects the above + variables. */ +static COND_T gil_cond; +static MUTEX_T gil_mutex; + +#ifdef FORCE_SWITCHING +/* This condition variable helps the GIL-releasing thread wait for + a GIL-awaiting thread to be scheduled and take the GIL. */ +static COND_T switch_cond; +static MUTEX_T switch_mutex; +#endif + static int gil_created(void) { - return (_Py_atomic_load_explicit(&_PyRuntime.ceval.gil.locked, - _Py_memory_order_acquire) - ) >= 0; + return _Py_atomic_load_explicit(&gil_locked, _Py_memory_order_acquire) >= 0; } static void create_gil(void) { - MUTEX_INIT(_PyRuntime.ceval.gil.mutex); + MUTEX_INIT(gil_mutex); #ifdef FORCE_SWITCHING - MUTEX_INIT(_PyRuntime.ceval.gil.switch_mutex); + MUTEX_INIT(switch_mutex); #endif - COND_INIT(_PyRuntime.ceval.gil.cond); + COND_INIT(gil_cond); #ifdef FORCE_SWITCHING - COND_INIT(_PyRuntime.ceval.gil.switch_cond); + COND_INIT(switch_cond); #endif - _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder, 0); - _Py_ANNOTATE_RWLOCK_CREATE(&_PyRuntime.ceval.gil.locked); - _Py_atomic_store_explicit(&_PyRuntime.ceval.gil.locked, 0, - _Py_memory_order_release); + _Py_atomic_store_relaxed(&gil_last_holder, 0); + _Py_ANNOTATE_RWLOCK_CREATE(&gil_locked); + _Py_atomic_store_explicit(&gil_locked, 0, _Py_memory_order_release); } static void destroy_gil(void) @@ -128,62 +152,54 @@ static void destroy_gil(void) /* some pthread-like implementations tie the mutex to the cond * and must have the cond destroyed first. */ - COND_FINI(_PyRuntime.ceval.gil.cond); - MUTEX_FINI(_PyRuntime.ceval.gil.mutex); + COND_FINI(gil_cond); + MUTEX_FINI(gil_mutex); #ifdef FORCE_SWITCHING - COND_FINI(_PyRuntime.ceval.gil.switch_cond); - MUTEX_FINI(_PyRuntime.ceval.gil.switch_mutex); + COND_FINI(switch_cond); + MUTEX_FINI(switch_mutex); #endif - _Py_atomic_store_explicit(&_PyRuntime.ceval.gil.locked, -1, - _Py_memory_order_release); - _Py_ANNOTATE_RWLOCK_DESTROY(&_PyRuntime.ceval.gil.locked); + _Py_atomic_store_explicit(&gil_locked, -1, _Py_memory_order_release); + _Py_ANNOTATE_RWLOCK_DESTROY(&gil_locked); } static void recreate_gil(void) { - _Py_ANNOTATE_RWLOCK_DESTROY(&_PyRuntime.ceval.gil.locked); + _Py_ANNOTATE_RWLOCK_DESTROY(&gil_locked); /* XXX should we destroy the old OS resources here? */ create_gil(); } static void drop_gil(PyThreadState *tstate) { - if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked)) + if (!_Py_atomic_load_relaxed(&gil_locked)) Py_FatalError("drop_gil: GIL is not locked"); /* tstate is allowed to be NULL (early interpreter init) */ if (tstate != NULL) { /* Sub-interpreter support: threads might have been switched under our feet using PyThreadState_Swap(). Fix the GIL last holder variable so that our heuristics work. */ - _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder, - (uintptr_t)tstate); + _Py_atomic_store_relaxed(&gil_last_holder, (uintptr_t)tstate); } - MUTEX_LOCK(_PyRuntime.ceval.gil.mutex); - _Py_ANNOTATE_RWLOCK_RELEASED(&_PyRuntime.ceval.gil.locked, /*is_write=*/1); - _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.locked, 0); - COND_SIGNAL(_PyRuntime.ceval.gil.cond); - MUTEX_UNLOCK(_PyRuntime.ceval.gil.mutex); + MUTEX_LOCK(gil_mutex); + _Py_ANNOTATE_RWLOCK_RELEASED(&gil_locked, /*is_write=*/1); + _Py_atomic_store_relaxed(&gil_locked, 0); + COND_SIGNAL(gil_cond); + MUTEX_UNLOCK(gil_mutex); #ifdef FORCE_SWITCHING - if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request) && - tstate != NULL) - { - MUTEX_LOCK(_PyRuntime.ceval.gil.switch_mutex); + if (_Py_atomic_load_relaxed(&gil_drop_request) && tstate != NULL) { + MUTEX_LOCK(switch_mutex); /* Not switched yet => wait */ - if (((PyThreadState*)_Py_atomic_load_relaxed( - &_PyRuntime.ceval.gil.last_holder) - ) == tstate) - { + if ((PyThreadState*)_Py_atomic_load_relaxed(&gil_last_holder) == tstate) { RESET_GIL_DROP_REQUEST(); /* NOTE: if COND_WAIT does not atomically start waiting when releasing the mutex, another thread can run through, take the GIL and drop it again, and reset the condition before we even had a chance to wait for it. */ - COND_WAIT(_PyRuntime.ceval.gil.switch_cond, - _PyRuntime.ceval.gil.switch_mutex); + COND_WAIT(switch_cond, switch_mutex); } - MUTEX_UNLOCK(_PyRuntime.ceval.gil.switch_mutex); + MUTEX_UNLOCK(switch_mutex); } #endif } @@ -195,65 +211,60 @@ static void take_gil(PyThreadState *tstate) Py_FatalError("take_gil: NULL tstate"); err = errno; - MUTEX_LOCK(_PyRuntime.ceval.gil.mutex); + MUTEX_LOCK(gil_mutex); - if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked)) + if (!_Py_atomic_load_relaxed(&gil_locked)) goto _ready; - while (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked)) { + while (_Py_atomic_load_relaxed(&gil_locked)) { int timed_out = 0; unsigned long saved_switchnum; - saved_switchnum = _PyRuntime.ceval.gil.switch_number; - COND_TIMED_WAIT(_PyRuntime.ceval.gil.cond, _PyRuntime.ceval.gil.mutex, - INTERVAL, timed_out); + saved_switchnum = gil_switch_number; + COND_TIMED_WAIT(gil_cond, gil_mutex, INTERVAL, timed_out); /* If we timed out and no switch occurred in the meantime, it is time to ask the GIL-holding thread to drop it. */ if (timed_out && - _Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked) && - _PyRuntime.ceval.gil.switch_number == saved_switchnum) { + _Py_atomic_load_relaxed(&gil_locked) && + gil_switch_number == saved_switchnum) { SET_GIL_DROP_REQUEST(); } } _ready: #ifdef FORCE_SWITCHING - /* This mutex must be taken before modifying - _PyRuntime.ceval.gil.last_holder (see drop_gil()). */ - MUTEX_LOCK(_PyRuntime.ceval.gil.switch_mutex); + /* This mutex must be taken before modifying gil_last_holder (see drop_gil()). */ + MUTEX_LOCK(switch_mutex); #endif /* We now hold the GIL */ - _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.locked, 1); - _Py_ANNOTATE_RWLOCK_ACQUIRED(&_PyRuntime.ceval.gil.locked, /*is_write=*/1); - - if (tstate != (PyThreadState*)_Py_atomic_load_relaxed( - &_PyRuntime.ceval.gil.last_holder)) - { - _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder, - (uintptr_t)tstate); - ++_PyRuntime.ceval.gil.switch_number; + _Py_atomic_store_relaxed(&gil_locked, 1); + _Py_ANNOTATE_RWLOCK_ACQUIRED(&gil_locked, /*is_write=*/1); + + if (tstate != (PyThreadState*)_Py_atomic_load_relaxed(&gil_last_holder)) { + _Py_atomic_store_relaxed(&gil_last_holder, (uintptr_t)tstate); + ++gil_switch_number; } #ifdef FORCE_SWITCHING - COND_SIGNAL(_PyRuntime.ceval.gil.switch_cond); - MUTEX_UNLOCK(_PyRuntime.ceval.gil.switch_mutex); + COND_SIGNAL(switch_cond); + MUTEX_UNLOCK(switch_mutex); #endif - if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request)) { + if (_Py_atomic_load_relaxed(&gil_drop_request)) { RESET_GIL_DROP_REQUEST(); } if (tstate->async_exc != NULL) { _PyEval_SignalAsyncExc(); } - MUTEX_UNLOCK(_PyRuntime.ceval.gil.mutex); + MUTEX_UNLOCK(gil_mutex); errno = err; } void _PyEval_SetSwitchInterval(unsigned long microseconds) { - _PyRuntime.ceval.gil.interval = microseconds; + gil_interval = microseconds; } unsigned long _PyEval_GetSwitchInterval() { - return _PyRuntime.ceval.gil.interval; + return gil_interval; } diff --git a/Python/condvar.h b/Python/condvar.h index aaa8043..9a71b17 100644 --- a/Python/condvar.h +++ b/Python/condvar.h @@ -37,16 +37,27 @@ * Condition Variable. */ -#ifndef _CONDVAR_IMPL_H_ -#define _CONDVAR_IMPL_H_ +#ifndef _CONDVAR_H_ +#define _CONDVAR_H_ #include "Python.h" -#include "internal/_condvar.h" + +#ifndef _POSIX_THREADS +/* This means pthreads are not implemented in libc headers, hence the macro + not present in unistd.h. But they still can be implemented as an external + library (e.g. gnu pth in pthread emulation) */ +# ifdef HAVE_PTHREAD_H +# include <pthread.h> /* _POSIX_THREADS */ +# endif +#endif #ifdef _POSIX_THREADS /* * POSIX support */ +#define Py_HAVE_CONDVAR + +#include <pthread.h> #define PyCOND_ADD_MICROSECONDS(tv, interval) \ do { /* TODO: add overflow and truncation checks */ \ @@ -63,11 +74,13 @@ do { /* TODO: add overflow and truncation checks */ \ #endif /* The following functions return 0 on success, nonzero on error */ +#define PyMUTEX_T pthread_mutex_t #define PyMUTEX_INIT(mut) pthread_mutex_init((mut), NULL) #define PyMUTEX_FINI(mut) pthread_mutex_destroy(mut) #define PyMUTEX_LOCK(mut) pthread_mutex_lock(mut) #define PyMUTEX_UNLOCK(mut) pthread_mutex_unlock(mut) +#define PyCOND_T pthread_cond_t #define PyCOND_INIT(cond) pthread_cond_init((cond), NULL) #define PyCOND_FINI(cond) pthread_cond_destroy(cond) #define PyCOND_SIGNAL(cond) pthread_cond_signal(cond) @@ -103,11 +116,45 @@ PyCOND_TIMEDWAIT(PyCOND_T *cond, PyMUTEX_T *mut, long long us) * Emulated condition variables ones that work with XP and later, plus * example native support on VISTA and onwards. */ +#define Py_HAVE_CONDVAR + + +/* include windows if it hasn't been done before */ +#define WIN32_LEAN_AND_MEAN +#include <windows.h> + +/* options */ +/* non-emulated condition variables are provided for those that want + * to target Windows Vista. Modify this macro to enable them. + */ +#ifndef _PY_EMULATED_WIN_CV +#define _PY_EMULATED_WIN_CV 1 /* use emulated condition variables */ +#endif + +/* fall back to emulation if not targeting Vista */ +#if !defined NTDDI_VISTA || NTDDI_VERSION < NTDDI_VISTA +#undef _PY_EMULATED_WIN_CV +#define _PY_EMULATED_WIN_CV 1 +#endif + #if _PY_EMULATED_WIN_CV /* The mutex is a CriticalSection object and The condition variables is emulated with the help of a semaphore. + Semaphores are available on Windows XP (2003 server) and later. + We use a Semaphore rather than an auto-reset event, because although + an auto-resent event might appear to solve the lost-wakeup bug (race + condition between releasing the outer lock and waiting) because it + maintains state even though a wait hasn't happened, there is still + a lost wakeup problem if more than one thread are interrupted in the + critical place. A semaphore solves that, because its state is counted, + not Boolean. + Because it is ok to signal a condition variable with no one + waiting, we need to keep track of the number of + waiting threads. Otherwise, the semaphore's state could rise + without bound. This also helps reduce the number of "spurious wakeups" + that would otherwise happen. This implementation still has the problem that the threads woken with a "signal" aren't necessarily those that are already @@ -121,6 +168,8 @@ PyCOND_TIMEDWAIT(PyCOND_T *cond, PyMUTEX_T *mut, long long us) http://www.cse.wustl.edu/~schmidt/win32-cv-1.html */ +typedef CRITICAL_SECTION PyMUTEX_T; + Py_LOCAL_INLINE(int) PyMUTEX_INIT(PyMUTEX_T *cs) { @@ -149,6 +198,15 @@ PyMUTEX_UNLOCK(PyMUTEX_T *cs) return 0; } +/* The ConditionVariable object. From XP onwards it is easily emulated with + * a Semaphore + */ + +typedef struct _PyCOND_T +{ + HANDLE sem; + int waiting; /* to allow PyCOND_SIGNAL to be a no-op */ +} PyCOND_T; Py_LOCAL_INLINE(int) PyCOND_INIT(PyCOND_T *cv) @@ -246,7 +304,12 @@ PyCOND_BROADCAST(PyCOND_T *cv) return 0; } -#else /* !_PY_EMULATED_WIN_CV */ +#else + +/* Use native Win7 primitives if build target is Win7 or higher */ + +/* SRWLOCK is faster and better than CriticalSection */ +typedef SRWLOCK PyMUTEX_T; Py_LOCAL_INLINE(int) PyMUTEX_INIT(PyMUTEX_T *cs) @@ -276,6 +339,8 @@ PyMUTEX_UNLOCK(PyMUTEX_T *cs) } +typedef CONDITION_VARIABLE PyCOND_T; + Py_LOCAL_INLINE(int) PyCOND_INIT(PyCOND_T *cv) { @@ -322,4 +387,4 @@ PyCOND_BROADCAST(PyCOND_T *cv) #endif /* _POSIX_THREADS, NT_THREADS */ -#endif /* _CONDVAR_IMPL_H_ */ +#endif /* _CONDVAR_H_ */ diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 3f405b1..662405b 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -77,30 +77,6 @@ extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *); extern void _PyGILState_Fini(void); #endif /* WITH_THREAD */ -_PyRuntimeState _PyRuntime = {0, 0}; - -void -_PyRuntime_Initialize(void) -{ - /* XXX We only initialize once in the process, which aligns with - the static initialization of the former globals now found in - _PyRuntime. However, _PyRuntime *should* be initialized with - every Py_Initialize() call, but doing so breaks the runtime. - This is because the runtime state is not properly finalized - currently. */ - static int initialized = 0; - if (initialized) - return; - initialized = 1; - _PyRuntimeState_Init(&_PyRuntime); -} - -void -_PyRuntime_Finalize(void) -{ - _PyRuntimeState_Fini(&_PyRuntime); -} - /* Global configuration variable declarations are in pydebug.h */ /* XXX (ncoghlan): move those declarations to pylifecycle.h? */ int Py_DebugFlag; /* Needed by parser.c */ @@ -124,6 +100,8 @@ int Py_LegacyWindowsFSEncodingFlag = 0; /* Uses mbcs instead of utf-8 */ int Py_LegacyWindowsStdioFlag = 0; /* Uses FileIO instead of WindowsConsoleIO */ #endif +PyThreadState *_Py_Finalizing = NULL; + /* Hack to force loading of object files */ int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t) = \ PyOS_mystrnicmp; /* Python/pystrcmp.o */ @@ -141,17 +119,19 @@ PyModule_GetWarningsModule(void) * * Can be called prior to Py_Initialize. */ +int _Py_CoreInitialized = 0; +int _Py_Initialized = 0; int _Py_IsCoreInitialized(void) { - return _PyRuntime.core_initialized; + return _Py_CoreInitialized; } int Py_IsInitialized(void) { - return _PyRuntime.initialized; + return _Py_Initialized; } /* Helper to allow an embedding application to override the normal @@ -564,16 +544,14 @@ void _Py_InitializeCore(const _PyCoreConfig *config) _PyCoreConfig core_config = _PyCoreConfig_INIT; _PyMainInterpreterConfig preinit_config = _PyMainInterpreterConfig_INIT; - _PyRuntime_Initialize(); - if (config != NULL) { core_config = *config; } - if (_PyRuntime.initialized) { + if (_Py_Initialized) { Py_FatalError("Py_InitializeCore: main interpreter already initialized"); } - if (_PyRuntime.core_initialized) { + if (_Py_CoreInitialized) { Py_FatalError("Py_InitializeCore: runtime core already initialized"); } @@ -586,14 +564,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config) * threads still hanging around from a previous Py_Initialize/Finalize * pair :( */ - _PyRuntime.finalizing = NULL; - - if (_PyMem_SetupAllocators(core_config.allocator) < 0) { - fprintf(stderr, - "Error in PYTHONMALLOC: unknown allocator \"%s\"!\n", - core_config.allocator); - exit(1); - } + _Py_Finalizing = NULL; #ifdef __ANDROID__ /* Passing "" to setlocale() on Android requests the C locale rather @@ -635,7 +606,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config) Py_HashRandomizationFlag = 1; } - _PyInterpreterState_Enable(&_PyRuntime); + _PyInterpreterState_Init(); interp = PyInterpreterState_New(); if (interp == NULL) Py_FatalError("Py_InitializeCore: can't make main interpreter"); @@ -727,7 +698,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config) } /* Only when we get here is the runtime core fully initialized */ - _PyRuntime.core_initialized = 1; + _Py_CoreInitialized = 1; } /* Read configuration settings from standard locations @@ -768,10 +739,10 @@ int _Py_InitializeMainInterpreter(const _PyMainInterpreterConfig *config) PyInterpreterState *interp; PyThreadState *tstate; - if (!_PyRuntime.core_initialized) { + if (!_Py_CoreInitialized) { Py_FatalError("Py_InitializeMainInterpreter: runtime core not initialized"); } - if (_PyRuntime.initialized) { + if (_Py_Initialized) { Py_FatalError("Py_InitializeMainInterpreter: main interpreter already initialized"); } @@ -792,7 +763,7 @@ int _Py_InitializeMainInterpreter(const _PyMainInterpreterConfig *config) * This means anything which needs support from extension modules * or pure Python code in the standard library won't work. */ - _PyRuntime.initialized = 1; + _Py_Initialized = 1; return 0; } /* TODO: Report exceptions rather than fatal errors below here */ @@ -837,7 +808,7 @@ int _Py_InitializeMainInterpreter(const _PyMainInterpreterConfig *config) Py_XDECREF(warnings_module); } - _PyRuntime.initialized = 1; + _Py_Initialized = 1; if (!Py_NoSiteFlag) initsite(); /* Module site */ @@ -953,7 +924,7 @@ Py_FinalizeEx(void) PyThreadState *tstate; int status = 0; - if (!_PyRuntime.initialized) + if (!_Py_Initialized) return status; wait_for_thread_shutdown(); @@ -975,9 +946,9 @@ Py_FinalizeEx(void) /* Remaining threads (e.g. daemon threads) will automatically exit after taking the GIL (in PyEval_RestoreThread()). */ - _PyRuntime.finalizing = tstate; - _PyRuntime.initialized = 0; - _PyRuntime.core_initialized = 0; + _Py_Finalizing = tstate; + _Py_Initialized = 0; + _Py_CoreInitialized = 0; /* Flush sys.stdout and sys.stderr */ if (flush_std_files() < 0) { @@ -1139,7 +1110,6 @@ Py_FinalizeEx(void) #endif call_ll_exitfuncs(); - _PyRuntime_Finalize(); return status; } @@ -1169,7 +1139,7 @@ Py_NewInterpreter(void) PyThreadState *tstate, *save_tstate; PyObject *bimod, *sysmod; - if (!_PyRuntime.initialized) + if (!_Py_Initialized) Py_FatalError("Py_NewInterpreter: call Py_Initialize first"); #ifdef WITH_THREAD @@ -1884,19 +1854,20 @@ exit: # include "pythread.h" #endif +static void (*pyexitfunc)(void) = NULL; /* For the atexit module. */ void _Py_PyAtExit(void (*func)(void)) { - _PyRuntime.pyexitfunc = func; + pyexitfunc = func; } static void call_py_exitfuncs(void) { - if (_PyRuntime.pyexitfunc == NULL) + if (pyexitfunc == NULL) return; - (*_PyRuntime.pyexitfunc)(); + (*pyexitfunc)(); PyErr_Clear(); } @@ -1929,19 +1900,22 @@ wait_for_thread_shutdown(void) } #define NEXITFUNCS 32 +static void (*exitfuncs[NEXITFUNCS])(void); +static int nexitfuncs = 0; + int Py_AtExit(void (*func)(void)) { - if (_PyRuntime.nexitfuncs >= NEXITFUNCS) + if (nexitfuncs >= NEXITFUNCS) return -1; - _PyRuntime.exitfuncs[_PyRuntime.nexitfuncs++] = func; + exitfuncs[nexitfuncs++] = func; return 0; } static void call_ll_exitfuncs(void) { - while (_PyRuntime.nexitfuncs > 0) - (*_PyRuntime.exitfuncs[--_PyRuntime.nexitfuncs])(); + while (nexitfuncs > 0) + (*exitfuncs[--nexitfuncs])(); fflush(stdout); fflush(stderr); diff --git a/Python/pystate.c b/Python/pystate.c index 2d92637..30a3722 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -34,65 +34,55 @@ to avoid the expense of doing their own locking). extern "C" { #endif -void -_PyRuntimeState_Init(_PyRuntimeState *runtime) -{ - memset(runtime, 0, sizeof(*runtime)); - - _PyObject_Initialize(&runtime->obj); - _PyMem_Initialize(&runtime->mem); - _PyGC_Initialize(&runtime->gc); - _PyEval_Initialize(&runtime->ceval); - - runtime->gilstate.check_enabled = 1; - runtime->gilstate.autoTLSkey = -1; +int _PyGILState_check_enabled = 1; #ifdef WITH_THREAD - runtime->interpreters.mutex = PyThread_allocate_lock(); - if (runtime->interpreters.mutex == NULL) - Py_FatalError("Can't initialize threads for interpreter"); -#endif - runtime->interpreters.next_id = -1; -} - -void -_PyRuntimeState_Fini(_PyRuntimeState *runtime) -{ -#ifdef WITH_THREAD - if (runtime->interpreters.mutex != NULL) { - PyThread_free_lock(runtime->interpreters.mutex); - runtime->interpreters.mutex = NULL; - } -#endif -} - -#ifdef WITH_THREAD -#define HEAD_LOCK() PyThread_acquire_lock(_PyRuntime.interpreters.mutex, \ - WAIT_LOCK) -#define HEAD_UNLOCK() PyThread_release_lock(_PyRuntime.interpreters.mutex) +#include "pythread.h" +static PyThread_type_lock head_mutex = NULL; /* Protects interp->tstate_head */ +#define HEAD_INIT() (void)(head_mutex || (head_mutex = PyThread_allocate_lock())) +#define HEAD_LOCK() PyThread_acquire_lock(head_mutex, WAIT_LOCK) +#define HEAD_UNLOCK() PyThread_release_lock(head_mutex) + +/* The single PyInterpreterState used by this process' + GILState implementation +*/ +/* TODO: Given interp_main, it may be possible to kill this ref */ +static PyInterpreterState *autoInterpreterState = NULL; +static int autoTLSkey = -1; #else +#define HEAD_INIT() /* Nothing */ #define HEAD_LOCK() /* Nothing */ #define HEAD_UNLOCK() /* Nothing */ #endif +static PyInterpreterState *interp_head = NULL; +static PyInterpreterState *interp_main = NULL; + +/* Assuming the current thread holds the GIL, this is the + PyThreadState for the current thread. */ +_Py_atomic_address _PyThreadState_Current = {0}; +PyThreadFrameGetter _PyThreadState_GetFrame = NULL; + #ifdef WITH_THREAD static void _PyGILState_NoteThreadState(PyThreadState* tstate); #endif +/* _next_interp_id is an auto-numbered sequence of small integers. + It gets initialized in _PyInterpreterState_Init(), which is called + in Py_Initialize(), and used in PyInterpreterState_New(). A negative + interpreter ID indicates an error occurred. The main interpreter + will always have an ID of 0. Overflow results in a RuntimeError. + If that becomes a problem later then we can adjust, e.g. by using + a Python int. + + We initialize this to -1 so that the pre-Py_Initialize() value + results in an error. */ +static int64_t _next_interp_id = -1; + void -_PyInterpreterState_Enable(_PyRuntimeState *runtime) +_PyInterpreterState_Init(void) { - runtime->interpreters.next_id = 0; -#ifdef WITH_THREAD - /* Since we only call _PyRuntimeState_Init() once per process - (see _PyRuntime_Initialize()), we make sure the mutex is - initialized here. */ - if (runtime->interpreters.mutex == NULL) { - runtime->interpreters.mutex = PyThread_allocate_lock(); - if (runtime->interpreters.mutex == NULL) - Py_FatalError("Can't initialize threads for interpreter"); - } -#endif + _next_interp_id = 0; } PyInterpreterState * @@ -102,16 +92,16 @@ PyInterpreterState_New(void) PyMem_RawMalloc(sizeof(PyInterpreterState)); if (interp != NULL) { + HEAD_INIT(); +#ifdef WITH_THREAD + if (head_mutex == NULL) + Py_FatalError("Can't initialize threads for interpreter"); +#endif interp->modules_by_index = NULL; interp->sysdict = NULL; interp->builtins = NULL; interp->builtins_copy = NULL; interp->tstate_head = NULL; - interp->check_interval = 100; - interp->warnoptions = NULL; - interp->xoptions = NULL; - interp->num_threads = 0; - interp->pythread_stacksize = 0; interp->codec_search_path = NULL; interp->codec_search_cache = NULL; interp->codec_error_registry = NULL; @@ -135,19 +125,19 @@ PyInterpreterState_New(void) #endif HEAD_LOCK(); - interp->next = _PyRuntime.interpreters.head; - if (_PyRuntime.interpreters.main == NULL) { - _PyRuntime.interpreters.main = interp; + interp->next = interp_head; + if (interp_main == NULL) { + interp_main = interp; } - _PyRuntime.interpreters.head = interp; - if (_PyRuntime.interpreters.next_id < 0) { + interp_head = interp; + if (_next_interp_id < 0) { /* overflow or Py_Initialize() not called! */ PyErr_SetString(PyExc_RuntimeError, "failed to get an interpreter ID"); interp = NULL; } else { - interp->id = _PyRuntime.interpreters.next_id; - _PyRuntime.interpreters.next_id += 1; + interp->id = _next_interp_id; + _next_interp_id += 1; } HEAD_UNLOCK(); } @@ -199,7 +189,7 @@ PyInterpreterState_Delete(PyInterpreterState *interp) PyInterpreterState **p; zapthreads(interp); HEAD_LOCK(); - for (p = &_PyRuntime.interpreters.head; ; p = &(*p)->next) { + for (p = &interp_head; ; p = &(*p)->next) { if (*p == NULL) Py_FatalError( "PyInterpreterState_Delete: invalid interp"); @@ -209,13 +199,19 @@ PyInterpreterState_Delete(PyInterpreterState *interp) if (interp->tstate_head != NULL) Py_FatalError("PyInterpreterState_Delete: remaining threads"); *p = interp->next; - if (_PyRuntime.interpreters.main == interp) { - _PyRuntime.interpreters.main = NULL; - if (_PyRuntime.interpreters.head != NULL) + if (interp_main == interp) { + interp_main = NULL; + if (interp_head != NULL) Py_FatalError("PyInterpreterState_Delete: remaining subinterpreters"); } HEAD_UNLOCK(); PyMem_RawFree(interp); +#ifdef WITH_THREAD + if (interp_head == NULL && head_mutex != NULL) { + PyThread_free_lock(head_mutex); + head_mutex = NULL; + } +#endif } @@ -503,11 +499,8 @@ PyThreadState_Delete(PyThreadState *tstate) if (tstate == GET_TSTATE()) Py_FatalError("PyThreadState_Delete: tstate is still current"); #ifdef WITH_THREAD - if (_PyRuntime.gilstate.autoInterpreterState && - PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == tstate) - { - PyThread_delete_key_value(_PyRuntime.gilstate.autoTLSkey); - } + if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate) + PyThread_delete_key_value(autoTLSkey); #endif /* WITH_THREAD */ tstate_delete_common(tstate); } @@ -522,11 +515,8 @@ PyThreadState_DeleteCurrent() Py_FatalError( "PyThreadState_DeleteCurrent: no current tstate"); tstate_delete_common(tstate); - if (_PyRuntime.gilstate.autoInterpreterState && - PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == tstate) - { - PyThread_delete_key_value(_PyRuntime.gilstate.autoTLSkey); - } + if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate) + PyThread_delete_key_value(autoTLSkey); SET_TSTATE(NULL); PyEval_ReleaseLock(); } @@ -686,13 +676,13 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc) PyInterpreterState * PyInterpreterState_Head(void) { - return _PyRuntime.interpreters.head; + return interp_head; } PyInterpreterState * PyInterpreterState_Main(void) { - return _PyRuntime.interpreters.main; + return interp_main; } PyInterpreterState * @@ -732,7 +722,7 @@ _PyThread_CurrentFrames(void) * need to grab head_mutex for the duration. */ HEAD_LOCK(); - for (i = _PyRuntime.interpreters.head; i != NULL; i = i->next) { + for (i = interp_head; i != NULL; i = i->next) { PyThreadState *t; for (t = i->tstate_head; t != NULL; t = t->next) { PyObject *id; @@ -784,11 +774,11 @@ void _PyGILState_Init(PyInterpreterState *i, PyThreadState *t) { assert(i && t); /* must init with valid states */ - _PyRuntime.gilstate.autoTLSkey = PyThread_create_key(); - if (_PyRuntime.gilstate.autoTLSkey == -1) + autoTLSkey = PyThread_create_key(); + if (autoTLSkey == -1) Py_FatalError("Could not allocate TLS entry"); - _PyRuntime.gilstate.autoInterpreterState = i; - assert(PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == NULL); + autoInterpreterState = i; + assert(PyThread_get_key_value(autoTLSkey) == NULL); assert(t->gilstate_counter == 0); _PyGILState_NoteThreadState(t); @@ -797,15 +787,15 @@ _PyGILState_Init(PyInterpreterState *i, PyThreadState *t) PyInterpreterState * _PyGILState_GetInterpreterStateUnsafe(void) { - return _PyRuntime.gilstate.autoInterpreterState; + return autoInterpreterState; } void _PyGILState_Fini(void) { - PyThread_delete_key(_PyRuntime.gilstate.autoTLSkey); - _PyRuntime.gilstate.autoTLSkey = -1; - _PyRuntime.gilstate.autoInterpreterState = NULL; + PyThread_delete_key(autoTLSkey); + autoTLSkey = -1; + autoInterpreterState = NULL; } /* Reset the TLS key - called by PyOS_AfterFork_Child(). @@ -816,19 +806,17 @@ void _PyGILState_Reinit(void) { #ifdef WITH_THREAD - _PyRuntime.interpreters.mutex = PyThread_allocate_lock(); - if (_PyRuntime.interpreters.mutex == NULL) - Py_FatalError("Can't initialize threads for interpreter"); + head_mutex = NULL; + HEAD_INIT(); #endif PyThreadState *tstate = PyGILState_GetThisThreadState(); - PyThread_delete_key(_PyRuntime.gilstate.autoTLSkey); - if ((_PyRuntime.gilstate.autoTLSkey = PyThread_create_key()) == -1) + PyThread_delete_key(autoTLSkey); + if ((autoTLSkey = PyThread_create_key()) == -1) Py_FatalError("Could not allocate TLS entry"); /* If the thread had an associated auto thread state, reassociate it with * the new key. */ - if (tstate && PyThread_set_key_value(_PyRuntime.gilstate.autoTLSkey, - (void *)tstate) < 0) + if (tstate && PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0) Py_FatalError("Couldn't create autoTLSkey mapping"); } @@ -843,7 +831,7 @@ _PyGILState_NoteThreadState(PyThreadState* tstate) /* If autoTLSkey isn't initialized, this must be the very first threadstate created in Py_Initialize(). Don't do anything for now (we'll be back here when _PyGILState_Init is called). */ - if (!_PyRuntime.gilstate.autoInterpreterState) + if (!autoInterpreterState) return; /* Stick the thread state for this thread in thread local storage. @@ -858,13 +846,9 @@ _PyGILState_NoteThreadState(PyThreadState* tstate) The first thread state created for that given OS level thread will "win", which seems reasonable behaviour. */ - if (PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == NULL) { - if ((PyThread_set_key_value(_PyRuntime.gilstate.autoTLSkey, - (void *)tstate) - ) < 0) - { + if (PyThread_get_key_value(autoTLSkey) == NULL) { + if (PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0) Py_FatalError("Couldn't create autoTLSkey mapping"); - } } /* PyGILState_Release must not try to delete this thread state. */ @@ -875,10 +859,9 @@ _PyGILState_NoteThreadState(PyThreadState* tstate) PyThreadState * PyGILState_GetThisThreadState(void) { - if (_PyRuntime.gilstate.autoInterpreterState == NULL) + if (autoInterpreterState == NULL) return NULL; - return (PyThreadState *)PyThread_get_key_value( - _PyRuntime.gilstate.autoTLSkey); + return (PyThreadState *)PyThread_get_key_value(autoTLSkey); } int @@ -889,7 +872,7 @@ PyGILState_Check(void) if (!_PyGILState_check_enabled) return 1; - if (_PyRuntime.gilstate.autoTLSkey == -1) + if (autoTLSkey == -1) return 1; tstate = GET_TSTATE(); @@ -909,10 +892,8 @@ PyGILState_Ensure(void) spells out other issues. Embedders are expected to have called Py_Initialize() and usually PyEval_InitThreads(). */ - /* Py_Initialize() hasn't been called! */ - assert(_PyRuntime.gilstate.autoInterpreterState); - tcur = (PyThreadState *)PyThread_get_key_value( - _PyRuntime.gilstate.autoTLSkey); + assert(autoInterpreterState); /* Py_Initialize() hasn't been called! */ + tcur = (PyThreadState *)PyThread_get_key_value(autoTLSkey); if (tcur == NULL) { /* At startup, Python has no concrete GIL. If PyGILState_Ensure() is called from a new thread for the first time, we need the create the @@ -920,7 +901,7 @@ PyGILState_Ensure(void) PyEval_InitThreads(); /* Create a new thread state for this thread */ - tcur = PyThreadState_New(_PyRuntime.gilstate.autoInterpreterState); + tcur = PyThreadState_New(autoInterpreterState); if (tcur == NULL) Py_FatalError("Couldn't create thread-state for new thread"); /* This is our thread state! We'll need to delete it in the @@ -945,7 +926,7 @@ void PyGILState_Release(PyGILState_STATE oldstate) { PyThreadState *tcur = (PyThreadState *)PyThread_get_key_value( - _PyRuntime.gilstate.autoTLSkey); + autoTLSkey); if (tcur == NULL) Py_FatalError("auto-releasing thread-state, " "but no thread-state for this thread"); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 080c541..852babb 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -519,6 +519,8 @@ Return the profiling function set with sys.setprofile.\n\ See the profiler chapter in the library manual." ); +static int _check_interval = 100; + static PyObject * sys_setcheckinterval(PyObject *self, PyObject *args) { @@ -527,8 +529,7 @@ sys_setcheckinterval(PyObject *self, PyObject *args) "are deprecated. Use sys.setswitchinterval() " "instead.", 1) < 0) return NULL; - PyInterpreterState *interp = PyThreadState_GET()->interp; - if (!PyArg_ParseTuple(args, "i:setcheckinterval", &interp->check_interval)) + if (!PyArg_ParseTuple(args, "i:setcheckinterval", &_check_interval)) return NULL; Py_RETURN_NONE; } @@ -548,8 +549,7 @@ sys_getcheckinterval(PyObject *self, PyObject *args) "are deprecated. Use sys.getswitchinterval() " "instead.", 1) < 0) return NULL; - PyInterpreterState *interp = PyThreadState_GET()->interp; - return PyLong_FromLong(interp->check_interval); + return PyLong_FromLong(_check_interval); } PyDoc_STRVAR(getcheckinterval_doc, @@ -1339,7 +1339,7 @@ Clear the internal type lookup cache."); static PyObject * sys_is_finalizing(PyObject* self, PyObject* args) { - return PyBool_FromLong(_Py_IS_FINALIZING()); + return PyBool_FromLong(_Py_Finalizing != NULL); } PyDoc_STRVAR(is_finalizing_doc, @@ -1479,24 +1479,11 @@ list_builtin_module_names(void) return list; } -static PyObject * -get_warnoptions(void) -{ - PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions; - if (warnoptions == NULL || !PyList_Check(warnoptions)) { - Py_XDECREF(warnoptions); - warnoptions = PyList_New(0); - if (warnoptions == NULL) - return NULL; - PyThreadState_GET()->interp->warnoptions = warnoptions; - } - return warnoptions; -} +static PyObject *warnoptions = NULL; void PySys_ResetWarnOptions(void) { - PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions; if (warnoptions == NULL || !PyList_Check(warnoptions)) return; PyList_SetSlice(warnoptions, 0, PyList_GET_SIZE(warnoptions), NULL); @@ -1505,9 +1492,12 @@ PySys_ResetWarnOptions(void) void PySys_AddWarnOptionUnicode(PyObject *unicode) { - PyObject *warnoptions = get_warnoptions(); - if (warnoptions == NULL) - return; + if (warnoptions == NULL || !PyList_Check(warnoptions)) { + Py_XDECREF(warnoptions); + warnoptions = PyList_New(0); + if (warnoptions == NULL) + return; + } PyList_Append(warnoptions, unicode); } @@ -1525,20 +1515,17 @@ PySys_AddWarnOption(const wchar_t *s) int PySys_HasWarnOptions(void) { - PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions; return (warnoptions != NULL && (PyList_Size(warnoptions) > 0)) ? 1 : 0; } +static PyObject *xoptions = NULL; + static PyObject * get_xoptions(void) { - PyObject *xoptions = PyThreadState_GET()->interp->xoptions; if (xoptions == NULL || !PyDict_Check(xoptions)) { Py_XDECREF(xoptions); xoptions = PyDict_New(); - if (xoptions == NULL) - return NULL; - PyThreadState_GET()->interp->xoptions = xoptions; } return xoptions; } @@ -2143,15 +2130,17 @@ _PySys_EndInit(PyObject *sysdict) SET_SYS_FROM_STRING_INT_RESULT("base_exec_prefix", PyUnicode_FromWideChar(Py_GetExecPrefix(), -1)); - PyObject *warnoptions = get_warnoptions(); - if (warnoptions == NULL) - return -1; - SET_SYS_FROM_STRING_BORROW_INT_RESULT("warnoptions", warnoptions); + if (warnoptions == NULL) { + warnoptions = PyList_New(0); + if (warnoptions == NULL) + return -1; + } - PyObject *xoptions = get_xoptions(); - if (xoptions == NULL) - return -1; - SET_SYS_FROM_STRING_BORROW_INT_RESULT("_xoptions", xoptions); + SET_SYS_FROM_STRING_INT_RESULT("warnoptions", + PyList_GetSlice(warnoptions, + 0, Py_SIZE(warnoptions))); + + SET_SYS_FROM_STRING_BORROW_INT_RESULT("_xoptions", get_xoptions()); if (PyErr_Occurred()) return -1; diff --git a/Python/thread.c b/Python/thread.c index 6fd594f..4d2f2c3 100644 --- a/Python/thread.c +++ b/Python/thread.c @@ -76,6 +76,11 @@ PyThread_init_thread(void) PyThread__init_thread(); } +/* Support for runtime thread stack size tuning. + A value of 0 means using the platform's default stack size + or the size specified by the THREAD_STACK_SIZE macro. */ +static size_t _pythread_stacksize = 0; + #if defined(_POSIX_THREADS) # define PYTHREAD_NAME "pthread" # include "thread_pthread.h" @@ -91,7 +96,7 @@ PyThread_init_thread(void) size_t PyThread_get_stacksize(void) { - return PyThreadState_GET()->interp->pythread_stacksize; + return _pythread_stacksize; } /* Only platforms defining a THREAD_SET_STACKSIZE() macro diff --git a/Python/thread_nt.h b/Python/thread_nt.h index 2f3a71b..47eb4b6 100644 --- a/Python/thread_nt.h +++ b/Python/thread_nt.h @@ -189,10 +189,9 @@ PyThread_start_new_thread(void (*func)(void *), void *arg) return PYTHREAD_INVALID_THREAD_ID; obj->func = func; obj->arg = arg; - PyThreadState *tstate = PyThreadState_GET(); - size_t stacksize = tstate ? tstate->interp->pythread_stacksize : 0; hThread = (HANDLE)_beginthreadex(0, - Py_SAFE_DOWNCAST(stacksize, Py_ssize_t, unsigned int), + Py_SAFE_DOWNCAST(_pythread_stacksize, + Py_ssize_t, unsigned int), bootstrap, obj, 0, &threadID); if (hThread == 0) { @@ -333,13 +332,13 @@ _pythread_nt_set_stacksize(size_t size) { /* set to default */ if (size == 0) { - PyThreadState_GET()->interp->pythread_stacksize = 0; + _pythread_stacksize = 0; return 0; } /* valid range? */ if (size >= THREAD_MIN_STACKSIZE && size < THREAD_MAX_STACKSIZE) { - PyThreadState_GET()->interp->pythread_stacksize = size; + _pythread_stacksize = size; return 0; } diff --git a/Python/thread_pthread.h b/Python/thread_pthread.h index ea05b6f..268dec4 100644 --- a/Python/thread_pthread.h +++ b/Python/thread_pthread.h @@ -205,9 +205,8 @@ PyThread_start_new_thread(void (*func)(void *), void *arg) return PYTHREAD_INVALID_THREAD_ID; #endif #if defined(THREAD_STACK_SIZE) - PyThreadState *tstate = PyThreadState_GET(); - size_t stacksize = tstate ? tstate->interp->pythread_stacksize : 0; - tss = (stacksize != 0) ? stacksize : THREAD_STACK_SIZE; + tss = (_pythread_stacksize != 0) ? _pythread_stacksize + : THREAD_STACK_SIZE; if (tss != 0) { if (pthread_attr_setstacksize(&attrs, tss) != 0) { pthread_attr_destroy(&attrs); @@ -579,7 +578,7 @@ _pythread_pthread_set_stacksize(size_t size) /* set to default */ if (size == 0) { - PyThreadState_GET()->interp->pythread_stacksize = 0; + _pythread_stacksize = 0; return 0; } @@ -596,7 +595,7 @@ _pythread_pthread_set_stacksize(size_t size) rc = pthread_attr_setstacksize(&attrs, size); pthread_attr_destroy(&attrs); if (rc == 0) { - PyThreadState_GET()->interp->pythread_stacksize = size; + _pythread_stacksize = size; return 0; } } diff --git a/Tools/c-globals/README b/Tools/c-globals/README deleted file mode 100644 index d0e6e8e..0000000 --- a/Tools/c-globals/README +++ /dev/null @@ -1,41 +0,0 @@ -####################################### -# C Globals and CPython Runtime State. - -CPython's C code makes extensive use of global variables. Each global -falls into one of several categories: - -* (effectively) constants (incl. static types) -* globals used exclusively in main or in the REPL -* freelists, caches, and counters -* process-global state -* module state -* Python runtime state - -The ignored-globals.txt file is organized similarly. Of the different -categories, the last two are problematic and generally should not exist -in the codebase. - -Globals that hold module state (i.e. in Modules/*.c) cause problems -when multiple interpreters are in use. For more info, see PEP 3121, -which addresses the situation for extension modules in general. - -Globals in the last category should be avoided as well. The problem -isn't with the Python runtime having state. Rather, the problem is with -that state being spread thoughout the codebase in dozens of individual -globals. Unlike the other globals, the runtime state represents a set -of values that are constantly shifting in a complex way. When they are -spread out it's harder to get a clear picture of what the runtime -involves. Furthermore, when they are spread out it complicates efforts -that change the runtime. - -Consequently, the globals for Python's runtime state have been -consolidated under a single top-level _PyRuntime global. No new globals -should be added for runtime state. Instead, they should be added to -_PyRuntimeState or one of its sub-structs. The check-c-globals script -should be run to ensure that no new globals have been added: - - python3 Tools/c-globals/check-c-globals.py - -If it reports any globals then they should be resolved. If the globals -are runtime state then they should be folded into _PyRuntimeState. -Otherwise they should be added to ignored-globals.txt. diff --git a/Tools/c-globals/check-c-globals.py b/Tools/c-globals/check-c-globals.py deleted file mode 100644 index 1de69a8..0000000 --- a/Tools/c-globals/check-c-globals.py +++ /dev/null @@ -1,446 +0,0 @@ - -from collections import namedtuple -import glob -import os.path -import re -import shutil -import sys -import subprocess - - -VERBOSITY = 2 - -C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__)) -TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR) -ROOT_DIR = os.path.dirname(TOOLS_DIR) -GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt') - -SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python'] - -CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$') - - -IGNORED_VARS = { - '_DYNAMIC', - '_GLOBAL_OFFSET_TABLE_', - '__JCR_LIST__', - '__JCR_END__', - '__TMC_END__', - '__bss_start', - '__data_start', - '__dso_handle', - '_edata', - '_end', - } - - -def find_capi_vars(root): - capi_vars = {} - for dirname in SOURCE_DIRS: - for filename in glob.glob(os.path.join(ROOT_DIR, dirname, '**/*.[hc]'), - recursive=True): - with open(filename) as file: - for name in _find_capi_vars(file): - if name in capi_vars: - assert not filename.endswith('.c') - assert capi_vars[name].endswith('.c') - capi_vars[name] = filename - return capi_vars - - -def _find_capi_vars(lines): - for line in lines: - if not line.startswith('PyAPI_DATA'): - continue - assert '{' not in line - match = CAPI_REGEX.match(line) - assert match - names, = match.groups() - for name in names.split(', '): - yield name - - -def _read_global_names(filename): - # These variables are shared between all interpreters in the process. - with open(filename) as file: - return {line.partition('#')[0].strip() - for line in file - if line.strip() and not line.startswith('#')} - - -def _is_global_var(name, globalnames): - if _is_autogen_var(name): - return True - if _is_type_var(name): - return True - if _is_module(name): - return True - if _is_exception(name): - return True - if _is_compiler(name): - return True - return name in globalnames - - -def _is_autogen_var(name): - return ( - name.startswith('PyId_') or - '.' in name or - # Objects/typeobject.c - name.startswith('op_id.') or - name.startswith('rop_id.') or - # Python/graminit.c - name.startswith('arcs_') or - name.startswith('states_') - ) - - -def _is_type_var(name): - if name.endswith(('Type', '_Type', '_type')): # XXX Always a static type? - return True - if name.endswith('_desc'): # for structseq types - return True - return ( - name.startswith('doc_') or - name.endswith(('_doc', '__doc__', '_docstring')) or - name.endswith('_methods') or - name.endswith('_fields') or - name.endswith(('_memberlist', '_members')) or - name.endswith('_slots') or - name.endswith(('_getset', '_getsets', '_getsetlist')) or - name.endswith('_as_mapping') or - name.endswith('_as_number') or - name.endswith('_as_sequence') or - name.endswith('_as_buffer') or - name.endswith('_as_async') - ) - - -def _is_module(name): - if name.endswith(('_functions', 'Methods', '_Methods')): - return True - if name == 'module_def': - return True - if name == 'initialized': - return True - return name.endswith(('module', '_Module')) - - -def _is_exception(name): - # Other vars are enumerated in globals-core.txt. - if not name.startswith(('PyExc_', '_PyExc_')): - return False - return name.endswith(('Error', 'Warning')) - - -def _is_compiler(name): - return ( - # Python/Pythyon-ast.c - name.endswith('_type') or - name.endswith('_singleton') or - name.endswith('_attributes') - ) - - -class Var(namedtuple('Var', 'name kind scope capi filename')): - - @classmethod - def parse_nm(cls, line, expected, ignored, capi_vars, globalnames): - _, _, line = line.partition(' ') # strip off the address - line = line.strip() - kind, _, line = line.partition(' ') - if kind in ignored or (): - return None - elif kind not in expected or (): - raise RuntimeError('unsupported NM type {!r}'.format(kind)) - - name, _, filename = line.partition('\t') - name = name.strip() - if _is_autogen_var(name): - return None - if _is_global_var(name, globalnames): - scope = 'global' - else: - scope = None - capi = (name in capi_vars or ()) - if filename: - filename = os.path.relpath(filename.partition(':')[0]) - return cls(name, kind, scope, capi, filename or '~???~') - - @property - def external(self): - return self.kind.isupper() - - -def find_vars(root, globals_filename=GLOBALS_FILE): - python = os.path.join(root, 'python') - if not os.path.exists(python): - raise RuntimeError('python binary missing (need to build it first?)') - capi_vars = find_capi_vars(root) - globalnames = _read_global_names(globals_filename) - - nm = shutil.which('nm') - if nm is None: - # XXX Use dumpbin.exe /SYMBOLS on Windows. - raise NotImplementedError - else: - yield from (var - for var in _find_var_symbols(python, nm, capi_vars, - globalnames) - if var.name not in IGNORED_VARS) - - -NM_FUNCS = set('Tt') -NM_PUBLIC_VARS = set('BD') -NM_PRIVATE_VARS = set('bd') -NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS -NM_DATA = set('Rr') -NM_OTHER = set('ACGgiINpSsuUVvWw-?') -NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER - - -def _find_var_symbols(python, nm, capi_vars, globalnames): - args = [nm, - '--line-numbers', - python] - out = subprocess.check_output(args) - for line in out.decode('utf-8').splitlines(): - var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames) - if var is None: - continue - yield var - - -####################################### - -class Filter(namedtuple('Filter', 'name op value action')): - - @classmethod - def parse(cls, raw): - action = '+' - if raw.startswith(('+', '-')): - action = raw[0] - raw = raw[1:] - # XXX Support < and >? - name, op, value = raw.partition('=') - return cls(name, op, value, action) - - def check(self, var): - value = getattr(var, self.name, None) - if not self.op: - matched = bool(value) - elif self.op == '=': - matched = (value == self.value) - else: - raise NotImplementedError - - if self.action == '+': - return matched - elif self.action == '-': - return not matched - else: - raise NotImplementedError - - -def filter_var(var, filters): - for filter in filters: - if not filter.check(var): - return False - return True - - -def make_sort_key(spec): - columns = [(col.strip('_'), '_' if col.startswith('_') else '') - for col in spec] - def sort_key(var): - return tuple(getattr(var, col).lstrip(prefix) - for col, prefix in columns) - return sort_key - - -def make_groups(allvars, spec): - group = spec - groups = {} - for var in allvars: - value = getattr(var, group) - key = '{}: {}'.format(group, value) - try: - groupvars = groups[key] - except KeyError: - groupvars = groups[key] = [] - groupvars.append(var) - return groups - - -def format_groups(groups, columns, fmts, widths): - for group in sorted(groups): - groupvars = groups[group] - yield '', 0 - yield ' # {}'.format(group), 0 - yield from format_vars(groupvars, columns, fmts, widths) - - -def format_vars(allvars, columns, fmts, widths): - fmt = ' '.join(fmts[col] for col in columns) - fmt = ' ' + fmt.replace(' ', ' ') + ' ' # for div margin - header = fmt.replace(':', ':^').format(*(col.upper() for col in columns)) - yield header, 0 - div = ' '.join('-'*(widths[col]+2) for col in columns) - yield div, 0 - for var in allvars: - values = (getattr(var, col) for col in columns) - row = fmt.format(*('X' if val is True else val or '' - for val in values)) - yield row, 1 - yield div, 0 - - -####################################### - -COLUMNS = 'name,external,capi,scope,filename' -COLUMN_NAMES = COLUMNS.split(',') - -COLUMN_WIDTHS = {col: len(col) - for col in COLUMN_NAMES} -COLUMN_WIDTHS.update({ - 'name': 50, - 'scope': 7, - 'filename': 40, - }) -COLUMN_FORMATS = {col: '{:%s}' % width - for col, width in COLUMN_WIDTHS.items()} -for col in COLUMN_FORMATS: - if COLUMN_WIDTHS[col] == len(col): - COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^') - - -def _parse_filters_arg(raw, error): - filters = [] - for value in raw.split(','): - value=value.strip() - if not value: - continue - try: - filter = Filter.parse(value) - if filter.name not in COLUMN_NAMES: - raise Exception('unsupported column {!r}'.format(filter.name)) - except Exception as e: - error('bad filter {!r}: {}'.format(raw, e)) - filters.append(filter) - return filters - - -def _parse_columns_arg(raw, error): - columns = raw.split(',') - for column in columns: - if column not in COLUMN_NAMES: - error('unsupported column {!r}'.format(column)) - return columns - - -def _parse_sort_arg(raw, error): - sort = raw.split(',') - for column in sort: - if column.lstrip('_') not in COLUMN_NAMES: - error('unsupported column {!r}'.format(column)) - return sort - - -def _parse_group_arg(raw, error): - if not raw: - return raw - group = raw - if group not in COLUMN_NAMES: - error('unsupported column {!r}'.format(group)) - if group != 'filename': - error('unsupported group {!r}'.format(group)) - return group - - -def parse_args(argv=None): - if argv is None: - argv = sys.argv[1:] - - import argparse - parser = argparse.ArgumentParser() - - parser.add_argument('-v', '--verbose', action='count', default=0) - parser.add_argument('-q', '--quiet', action='count', default=0) - - parser.add_argument('--filters', default='-scope', - help='[[-]<COLUMN>[=<GLOB>]] ...') - - parser.add_argument('--columns', default=COLUMNS, - help='a comma-separated list of columns to show') - parser.add_argument('--sort', default='filename,_name', - help='a comma-separated list of columns to sort') - parser.add_argument('--group', - help='group by the given column name (- to not group)') - - parser.add_argument('--rc-on-match', dest='rc', type=int) - - parser.add_argument('filename', nargs='?', default=GLOBALS_FILE) - - args = parser.parse_args(argv) - - verbose = vars(args).pop('verbose', 0) - quiet = vars(args).pop('quiet', 0) - args.verbosity = max(0, VERBOSITY + verbose - quiet) - - if args.sort.startswith('filename') and not args.group: - args.group = 'filename' - - if args.rc is None: - if '-scope=core' in args.filters or 'core' not in args.filters: - args.rc = 0 - else: - args.rc = 1 - - args.filters = _parse_filters_arg(args.filters, parser.error) - args.columns = _parse_columns_arg(args.columns, parser.error) - args.sort = _parse_sort_arg(args.sort, parser.error) - args.group = _parse_group_arg(args.group, parser.error) - - return args - - -def main(root=ROOT_DIR, filename=GLOBALS_FILE, - filters=None, columns=COLUMN_NAMES, sort=None, group=None, - verbosity=VERBOSITY, rc=1): - - log = lambda msg: ... - if verbosity >= 2: - log = lambda msg: print(msg) - - allvars = (var - for var in find_vars(root, filename) - if filter_var(var, filters)) - if sort: - allvars = sorted(allvars, key=make_sort_key(sort)) - - if group: - try: - columns.remove(group) - except ValueError: - pass - grouped = make_groups(allvars, group) - lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS) - else: - lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS) - - total = 0 - for line, count in lines: - total += count - log(line) - log('\ntotal: {}'.format(total)) - - if total and rc: - print('ERROR: found unsafe globals', file=sys.stderr) - return rc - return 0 - - -if __name__ == '__main__': - args = parse_args() - sys.exit( - main(**vars(args))) diff --git a/Tools/c-globals/ignored-globals.txt b/Tools/c-globals/ignored-globals.txt deleted file mode 100644 index 4fafba6..0000000 --- a/Tools/c-globals/ignored-globals.txt +++ /dev/null @@ -1,494 +0,0 @@ -# All variables declared here are shared between all interpreters -# in a single process. That means that they must not be changed -# unless that change should apply to all interpreters. -# -# See check-c-globals.py. -# -# Many generic names are handled via the script: -# -# * most exceptions and all warnings handled via _is_exception() -# * for builtin modules, generic names are handled via _is_module() -# * generic names for static types handled via _is_type_var() -# * AST vars handled via _is_compiler() - - -####################################### -# main - -# Modules/getpath.c -exec_prefix -module_search_path -prefix -progpath - -# Modules/main.c -orig_argc -orig_argv - -# Python/getopt.c -opt_ptr -_PyOS_optarg -_PyOS_opterr -_PyOS_optind - - -####################################### -# REPL - -# Parser/myreadline.c -PyOS_InputHook -PyOS_ReadlineFunctionPointer -_PyOS_ReadlineLock -_PyOS_ReadlineTState - - -####################################### -# state - -# Python/dtoa.c -p5s -pmem_next # very slight race -private_mem # very slight race - -# Python/import.c -# For the moment the import lock stays global. Ultimately there should -# be a global lock for extension modules and a per-interpreter lock. -import_lock -import_lock_level -import_lock_thread - -# Python/pylifecycle.c -_PyRuntime - - -#--------------------------------- -# module globals (PyObject) - -# Modules/_functoolsmodule.c -kwd_mark - -# Modules/_localemodule.c -Error - -# Modules/_threadmodule.c -ThreadError - -# Modules/_tracemalloc.c -unknown_filename - -# Modules/gcmodule.c -gc_str - -# Modules/posixmodule.c -billion -posix_putenv_garbage - -# Modules/signalmodule.c -DefaultHandler -IgnoreHandler -IntHandler -ItimerError - -# Modules/zipimport.c -ZipImportError -zip_directory_cache - - -#--------------------------------- -# module globals (other) - -# Modules/_tracemalloc.c -allocators -tables_lock -tracemalloc_config -tracemalloc_empty_traceback -tracemalloc_filenames -tracemalloc_peak_traced_memory -tracemalloc_reentrant_key -tracemalloc_traceback -tracemalloc_tracebacks -tracemalloc_traced_memory -tracemalloc_traces - -# Modules/faulthandler.c -fatal_error -faulthandler_handlers -old_stack -stack -thread -user_signals - -# Modules/posixmodule.c -posix_constants_confstr -posix_constants_pathconf -posix_constants_sysconf -_stat_float_times # deprecated, __main__-only -structseq_new -ticks_per_second - -# Modules/signalmodule.c -Handlers # main thread only -is_tripped # main thread only -main_pid -main_thread -old_siginthandler -wakeup_fd # main thread only - -# Modules/zipimport.c -zip_searchorder - -# Python/bltinmodule.c -Py_FileSystemDefaultEncodeErrors -Py_FileSystemDefaultEncoding -Py_HasFileSystemDefaultEncoding - -# Python/sysmodule.c -_PySys_ImplCacheTag -_PySys_ImplName - - -#--------------------------------- -# freelists - -# Modules/_collectionsmodule.c -freeblocks -numfreeblocks - -# Objects/classobject.c -free_list -numfree - -# Objects/dictobject.c -free_list -keys_free_list -numfree -numfreekeys - -# Objects/exceptions.c -memerrors_freelist -memerrors_numfree - -# Objects/floatobject.c -free_list -numfree - -# Objects/frameobject.c -free_list -numfree - -# Objects/genobject.c -ag_asend_freelist -ag_asend_freelist_free -ag_value_freelist -ag_value_freelist_free - -# Objects/listobject.c -free_list -numfree - -# Objects/methodobject.c -free_list -numfree - -# Objects/sliceobject.c -slice_cache # slight race - -# Objects/tupleobject.c -free_list -numfree - -# Python/dtoa.c -freelist # very slight race - - -#--------------------------------- -# caches (PyObject) - -# Objects/typeobject.c -method_cache # only for static types -next_version_tag # only for static types - -# Python/dynload_shlib.c -handles # slight race during import -nhandles # slight race during import - -# Python/import.c -extensions # slight race on init during import - - -#--------------------------------- -# caches (other) - -# Python/bootstrap_hash.c -urandom_cache - -# Python/modsupport.c -_Py_PackageContext # Slight race during import! Move to PyThreadState? - - -#--------------------------------- -# counters - -# Objects/bytesobject.c -null_strings -one_strings - -# Objects/dictobject.c -pydict_global_version - -# Objects/moduleobject.c -max_module_number # slight race during import - - -####################################### -# constants - -#--------------------------------- -# singletons - -# Objects/boolobject.c -_Py_FalseStruct -_Py_TrueStruct - -# Objects/object.c -_Py_NoneStruct -_Py_NotImplementedStruct - -# Objects/sliceobject.c -_Py_EllipsisObject - - -#--------------------------------- -# constants (other) - -# Modules/config.c -_PyImport_Inittab - -# Objects/bytearrayobject.c -_PyByteArray_empty_string - -# Objects/dictobject.c -empty_keys_struct -empty_values - -# Objects/floatobject.c -detected_double_format -detected_float_format -double_format -float_format - -# Objects/longobject.c -_PyLong_DigitValue - -# Objects/object.c -_Py_SwappedOp - -# Objects/obmalloc.c -_PyMem_Debug - -# Objects/setobject.c -_dummy_struct - -# Objects/structseq.c -PyStructSequence_UnnamedField - -# Objects/typeobject.c -name_op -slotdefs # almost -slotdefs_initialized # almost -subtype_getsets_dict_only -subtype_getsets_full -subtype_getsets_weakref_only -tp_new_methoddef - -# Objects/unicodeobject.c -bloom_linebreak -static_strings # slight race - -# Parser/tokenizer.c -_PyParser_TokenNames - -# Python/Python-ast.c -alias_fields - -# Python/codecs.c -Py_hexdigits -ucnhash_CAPI # slight performance-only race - -# Python/dynload_shlib.c -_PyImport_DynLoadFiletab - -# Python/fileutils.c -_Py_open_cloexec_works -force_ascii - -# Python/frozen.c -M___hello__ -PyImport_FrozenModules - -# Python/graminit.c -_PyParser_Grammar -dfas -labels - -# Python/import.c -PyImport_Inittab - -# Python/pylifecycle.c -_TARGET_LOCALES - - -#--------------------------------- -# initialized (PyObject) - -# Objects/bytesobject.c -characters -nullstring - -# Objects/exceptions.c -PyExc_RecursionErrorInst -errnomap - -# Objects/longobject.c -_PyLong_One -_PyLong_Zero -small_ints - -# Objects/setobject.c -emptyfrozenset - -# Objects/unicodeobject.c -interned # slight race on init in PyUnicode_InternInPlace() -unicode_empty -unicode_latin1 - - -#--------------------------------- -# initialized (other) - -# Python/getargs.c -static_arg_parsers - -# Python/pyhash.c -PyHash_Func -_Py_HashSecret -_Py_HashSecret_Initialized - -# Python/pylifecycle.c -_Py_StandardStreamEncoding -_Py_StandardStreamErrors -default_home -env_home -progname -Py_BytesWarningFlag -Py_DebugFlag -Py_DontWriteBytecodeFlag -Py_FrozenFlag -Py_HashRandomizationFlag -Py_IgnoreEnvironmentFlag -Py_InspectFlag -Py_InteractiveFlag -Py_IsolatedFlag -Py_NoSiteFlag -Py_NoUserSiteDirectory -Py_OptimizeFlag -Py_QuietFlag -Py_UnbufferedStdioFlag -Py_UseClassExceptionsFlag -Py_VerboseFlag - - -#--------------------------------- -# types - -# Modules/_threadmodule.c -Locktype -RLocktype -localdummytype -localtype - -# Objects/exceptions.c -PyExc_BaseException -PyExc_Exception -PyExc_GeneratorExit -PyExc_KeyboardInterrupt -PyExc_StopAsyncIteration -PyExc_StopIteration -PyExc_SystemExit -_PyExc_BaseException -_PyExc_Exception -_PyExc_GeneratorExit -_PyExc_KeyboardInterrupt -_PyExc_StopAsyncIteration -_PyExc_StopIteration -_PyExc_SystemExit - -# Objects/structseq.c -_struct_sequence_template - - -#--------------------------------- -# interned strings/bytes - -# Modules/_io/_iomodule.c -_PyIO_empty_bytes -_PyIO_empty_str -_PyIO_str_close -_PyIO_str_closed -_PyIO_str_decode -_PyIO_str_encode -_PyIO_str_fileno -_PyIO_str_flush -_PyIO_str_getstate -_PyIO_str_isatty -_PyIO_str_newlines -_PyIO_str_nl -_PyIO_str_read -_PyIO_str_read1 -_PyIO_str_readable -_PyIO_str_readall -_PyIO_str_readinto -_PyIO_str_readline -_PyIO_str_reset -_PyIO_str_seek -_PyIO_str_seekable -_PyIO_str_setstate -_PyIO_str_tell -_PyIO_str_truncate -_PyIO_str_writable -_PyIO_str_write - -# Modules/_threadmodule.c -str_dict - -# Objects/boolobject.c -false_str -true_str - -# Objects/listobject.c -indexerr - -# Python/symtable.c -__class__ -dictcomp -genexpr -lambda -listcomp -setcomp -top - -# Python/sysmodule.c -whatstrings - - -####################################### -# hacks - -# Objects/object.c -_Py_abstract_hack - -# Objects/setobject.c -_PySet_Dummy - -# Python/pylifecycle.c -_PyOS_mystrnicmp_hack |