summaryrefslogtreecommitdiffstats
path: root/Include/internal
diff options
context:
space:
mode:
authorEric Snow <ericsnowcurrently@gmail.com>2023-07-27 19:56:59 (GMT)
committerGitHub <noreply@github.com>2023-07-27 19:56:59 (GMT)
commitb72947a8d26915156323ccfd04d273199ecb870c (patch)
treeede8eba6e7824886303d482e746bbb92ea09e172 /Include/internal
parent4f67921ad28194155e3d4c16255fb140a6a4d89a (diff)
downloadcpython-b72947a8d26915156323ccfd04d273199ecb870c.zip
cpython-b72947a8d26915156323ccfd04d273199ecb870c.tar.gz
cpython-b72947a8d26915156323ccfd04d273199ecb870c.tar.bz2
gh-106931: Intern Statically Allocated Strings Globally (gh-107272)
We tried this before with a dict and for all interned strings. That ran into problems due to interpreter isolation. However, exclusively using a per-interpreter cache caused some inconsistency that can eliminate the benefit of interning. Here we circle back to using a global cache, but only for statically allocated strings. We also use a more-basic _Py_hashtable_t for that global cache instead of a dict. Ideally we would only have the global cache, but the optional isolation of each interpreter's allocator means that a non-static string object must not outlive its interpreter. Thus we would have to store a copy of each such interned string in the global cache, tied to the main interpreter.
Diffstat (limited to 'Include/internal')
-rw-r--r--Include/internal/pycore_global_objects.h6
-rw-r--r--Include/internal/pycore_hashtable.h1
-rw-r--r--Include/internal/pycore_runtime.h1
-rw-r--r--Include/internal/pycore_runtime_init.h1
4 files changed, 9 insertions, 0 deletions
diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h
index 5a3fb13..442f851 100644
--- a/Include/internal/pycore_global_objects.h
+++ b/Include/internal/pycore_global_objects.h
@@ -8,6 +8,7 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif
+#include "pycore_hashtable.h" // _Py_hashtable_t
#include "pycore_gc.h" // PyGC_Head
#include "pycore_global_strings.h" // struct _Py_global_strings
#include "pycore_hamt.h" // PyHamtNode_Bitmap
@@ -28,6 +29,11 @@ extern "C" {
#define _Py_SINGLETON(NAME) \
_Py_GLOBAL_OBJECT(singletons.NAME)
+struct _Py_cached_objects {
+ // XXX We could statically allocate the hashtable.
+ _Py_hashtable_t *interned_strings;
+};
+
struct _Py_static_objects {
struct {
/* Small integers are preallocated in this array so that they
diff --git a/Include/internal/pycore_hashtable.h b/Include/internal/pycore_hashtable.h
index 6501ab1..f57978a 100644
--- a/Include/internal/pycore_hashtable.h
+++ b/Include/internal/pycore_hashtable.h
@@ -106,6 +106,7 @@ PyAPI_FUNC(int) _Py_hashtable_foreach(
void *user_data);
PyAPI_FUNC(size_t) _Py_hashtable_size(const _Py_hashtable_t *ht);
+PyAPI_FUNC(size_t) _Py_hashtable_len(const _Py_hashtable_t *ht);
/* Add a new entry to the hash. The key must not be present in the hash table.
Return 0 on success, -1 on memory error. */
diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h
index eb6b66b..0ec86ee 100644
--- a/Include/internal/pycore_runtime.h
+++ b/Include/internal/pycore_runtime.h
@@ -249,6 +249,7 @@ typedef struct pyruntimestate {
struct _types_runtime_state types;
/* All the objects that are shared by the runtime's interpreters. */
+ struct _Py_cached_objects cached_objects;
struct _Py_static_objects static_objects;
/* The following fields are here to avoid allocation during init.
diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h
index e72e742..840e41c 100644
--- a/Include/internal/pycore_runtime_init.h
+++ b/Include/internal/pycore_runtime_init.h
@@ -214,6 +214,7 @@ extern PyTypeObject _PyExc_MemoryError;
.kind = 1, \
.compact = 1, \
.ascii = (ASCII), \
+ .statically_allocated = 1, \
}, \
}
#define _PyASCIIObject_INIT(LITERAL) \