diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2023-07-27 19:56:59 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-27 19:56:59 (GMT) |
commit | b72947a8d26915156323ccfd04d273199ecb870c (patch) | |
tree | ede8eba6e7824886303d482e746bbb92ea09e172 /Include/internal | |
parent | 4f67921ad28194155e3d4c16255fb140a6a4d89a (diff) | |
download | cpython-b72947a8d26915156323ccfd04d273199ecb870c.zip cpython-b72947a8d26915156323ccfd04d273199ecb870c.tar.gz cpython-b72947a8d26915156323ccfd04d273199ecb870c.tar.bz2 |
gh-106931: Intern Statically Allocated Strings Globally (gh-107272)
We tried this before with a dict and for all interned strings. That ran into problems due to interpreter isolation. However, exclusively using a per-interpreter cache caused some inconsistency that can eliminate the benefit of interning. Here we circle back to using a global cache, but only for statically allocated strings. We also use a more-basic _Py_hashtable_t for that global cache instead of a dict.
Ideally we would only have the global cache, but the optional isolation of each interpreter's allocator means that a non-static string object must not outlive its interpreter. Thus we would have to store a copy of each such interned string in the global cache, tied to the main interpreter.
Diffstat (limited to 'Include/internal')
-rw-r--r-- | Include/internal/pycore_global_objects.h | 6 | ||||
-rw-r--r-- | Include/internal/pycore_hashtable.h | 1 | ||||
-rw-r--r-- | Include/internal/pycore_runtime.h | 1 | ||||
-rw-r--r-- | Include/internal/pycore_runtime_init.h | 1 |
4 files changed, 9 insertions, 0 deletions
diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index 5a3fb13..442f851 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -8,6 +8,7 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_hashtable.h" // _Py_hashtable_t #include "pycore_gc.h" // PyGC_Head #include "pycore_global_strings.h" // struct _Py_global_strings #include "pycore_hamt.h" // PyHamtNode_Bitmap @@ -28,6 +29,11 @@ extern "C" { #define _Py_SINGLETON(NAME) \ _Py_GLOBAL_OBJECT(singletons.NAME) +struct _Py_cached_objects { + // XXX We could statically allocate the hashtable. + _Py_hashtable_t *interned_strings; +}; + struct _Py_static_objects { struct { /* Small integers are preallocated in this array so that they diff --git a/Include/internal/pycore_hashtable.h b/Include/internal/pycore_hashtable.h index 6501ab1..f57978a 100644 --- a/Include/internal/pycore_hashtable.h +++ b/Include/internal/pycore_hashtable.h @@ -106,6 +106,7 @@ PyAPI_FUNC(int) _Py_hashtable_foreach( void *user_data); PyAPI_FUNC(size_t) _Py_hashtable_size(const _Py_hashtable_t *ht); +PyAPI_FUNC(size_t) _Py_hashtable_len(const _Py_hashtable_t *ht); /* Add a new entry to the hash. The key must not be present in the hash table. Return 0 on success, -1 on memory error. */ diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index eb6b66b..0ec86ee 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -249,6 +249,7 @@ typedef struct pyruntimestate { struct _types_runtime_state types; /* All the objects that are shared by the runtime's interpreters. */ + struct _Py_cached_objects cached_objects; struct _Py_static_objects static_objects; /* The following fields are here to avoid allocation during init. diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index e72e742..840e41c 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -214,6 +214,7 @@ extern PyTypeObject _PyExc_MemoryError; .kind = 1, \ .compact = 1, \ .ascii = (ASCII), \ + .statically_allocated = 1, \ }, \ } #define _PyASCIIObject_INIT(LITERAL) \ |