summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c72
1 files changed, 69 insertions, 3 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index fe2660c..cc979b2 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -236,15 +236,54 @@ static inline PyObject *get_interned_dict(PyInterpreterState *interp)
return _Py_INTERP_CACHED_OBJECT(interp, interned_strings);
}
+#define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings
+
Py_ssize_t
_PyUnicode_InternedSize(void)
{
- return PyObject_Length(get_interned_dict(_PyInterpreterState_GET()));
+ PyObject *dict = get_interned_dict(_PyInterpreterState_GET());
+ return _Py_hashtable_len(INTERNED_STRINGS) + PyDict_GET_SIZE(dict);
+}
+
+static Py_hash_t unicode_hash(PyObject *);
+static int unicode_compare_eq(PyObject *, PyObject *);
+
+static Py_uhash_t
+hashtable_unicode_hash(const void *key)
+{
+ return unicode_hash((PyObject *)key);
+}
+
+static int
+hashtable_unicode_compare(const void *key1, const void *key2)
+{
+ PyObject *obj1 = (PyObject *)key1;
+ PyObject *obj2 = (PyObject *)key2;
+ if (obj1 != NULL && obj2 != NULL) {
+ return unicode_compare_eq(obj1, obj2);
+ }
+ else {
+ return obj1 == obj2;
+ }
}
static int
init_interned_dict(PyInterpreterState *interp)
{
+ if (_Py_IsMainInterpreter(interp)) {
+ assert(INTERNED_STRINGS == NULL);
+ _Py_hashtable_allocator_t hashtable_alloc = {PyMem_RawMalloc, PyMem_RawFree};
+ INTERNED_STRINGS = _Py_hashtable_new_full(
+ hashtable_unicode_hash,
+ hashtable_unicode_compare,
+ NULL,
+ NULL,
+ &hashtable_alloc
+ );
+ if (INTERNED_STRINGS == NULL) {
+ return -1;
+ }
+ }
assert(get_interned_dict(interp) == NULL);
PyObject *interned = interned = PyDict_New();
if (interned == NULL) {
@@ -263,6 +302,10 @@ clear_interned_dict(PyInterpreterState *interp)
Py_DECREF(interned);
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL;
}
+ if (_Py_IsMainInterpreter(interp) && INTERNED_STRINGS != NULL) {
+ _Py_hashtable_destroy(INTERNED_STRINGS);
+ INTERNED_STRINGS = NULL;
+ }
}
#define _Py_RETURN_UNICODE_EMPTY() \
@@ -1223,6 +1266,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
_PyUnicode_STATE(unicode).kind = kind;
_PyUnicode_STATE(unicode).compact = 1;
_PyUnicode_STATE(unicode).ascii = is_ascii;
+ _PyUnicode_STATE(unicode).statically_allocated = 0;
if (is_ascii) {
((char*)data)[size] = 0;
}
@@ -1553,7 +1597,9 @@ unicode_dealloc(PyObject *unicode)
* we accidentally decref an immortal string out of existence. Since
* the string is an immortal object, just re-set the reference count.
*/
- if (PyUnicode_CHECK_INTERNED(unicode)) {
+ if (PyUnicode_CHECK_INTERNED(unicode)
+ || _PyUnicode_STATE(unicode).statically_allocated)
+ {
_Py_SetImmortal(unicode);
return;
}
@@ -14503,6 +14549,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
_PyUnicode_STATE(self).kind = kind;
_PyUnicode_STATE(self).compact = 0;
_PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii;
+ _PyUnicode_STATE(self).statically_allocated = 0;
_PyUnicode_UTF8_LENGTH(self) = 0;
_PyUnicode_UTF8(self) = NULL;
_PyUnicode_DATA_ANY(self) = NULL;
@@ -14726,6 +14773,23 @@ _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p)
return;
}
+ /* Look in the global cache first. */
+ PyObject *r = (PyObject *)_Py_hashtable_get(INTERNED_STRINGS, s);
+ if (r != NULL && r != s) {
+ Py_SETREF(*p, Py_NewRef(r));
+ return;
+ }
+
+ /* Handle statically allocated strings. */
+ if (_PyUnicode_STATE(s).statically_allocated) {
+ assert(_Py_IsImmortal(s));
+ if (_Py_hashtable_set(INTERNED_STRINGS, s, s) == 0) {
+ _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL_STATIC;
+ }
+ return;
+ }
+
+ /* Look in the per-interpreter cache. */
PyObject *interned = get_interned_dict(interp);
assert(interned != NULL);
@@ -14741,9 +14805,11 @@ _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p)
}
if (_Py_IsImmortal(s)) {
+ // XXX Restrict this to the main interpreter?
_PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL_STATIC;
- return;
+ return;
}
+
#ifdef Py_REF_DEBUG
/* The reference count value excluding the 2 references from the
interned dictionary should be excluded from the RefTotal. The