diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 72 |
1 files changed, 69 insertions, 3 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index fe2660c..cc979b2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -236,15 +236,54 @@ static inline PyObject *get_interned_dict(PyInterpreterState *interp) return _Py_INTERP_CACHED_OBJECT(interp, interned_strings); } +#define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings + Py_ssize_t _PyUnicode_InternedSize(void) { - return PyObject_Length(get_interned_dict(_PyInterpreterState_GET())); + PyObject *dict = get_interned_dict(_PyInterpreterState_GET()); + return _Py_hashtable_len(INTERNED_STRINGS) + PyDict_GET_SIZE(dict); +} + +static Py_hash_t unicode_hash(PyObject *); +static int unicode_compare_eq(PyObject *, PyObject *); + +static Py_uhash_t +hashtable_unicode_hash(const void *key) +{ + return unicode_hash((PyObject *)key); +} + +static int +hashtable_unicode_compare(const void *key1, const void *key2) +{ + PyObject *obj1 = (PyObject *)key1; + PyObject *obj2 = (PyObject *)key2; + if (obj1 != NULL && obj2 != NULL) { + return unicode_compare_eq(obj1, obj2); + } + else { + return obj1 == obj2; + } } static int init_interned_dict(PyInterpreterState *interp) { + if (_Py_IsMainInterpreter(interp)) { + assert(INTERNED_STRINGS == NULL); + _Py_hashtable_allocator_t hashtable_alloc = {PyMem_RawMalloc, PyMem_RawFree}; + INTERNED_STRINGS = _Py_hashtable_new_full( + hashtable_unicode_hash, + hashtable_unicode_compare, + NULL, + NULL, + &hashtable_alloc + ); + if (INTERNED_STRINGS == NULL) { + return -1; + } + } assert(get_interned_dict(interp) == NULL); PyObject *interned = interned = PyDict_New(); if (interned == NULL) { @@ -263,6 +302,10 @@ clear_interned_dict(PyInterpreterState *interp) Py_DECREF(interned); _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL; } + if (_Py_IsMainInterpreter(interp) && INTERNED_STRINGS != NULL) { + _Py_hashtable_destroy(INTERNED_STRINGS); + INTERNED_STRINGS = NULL; + } } #define _Py_RETURN_UNICODE_EMPTY() \ @@ -1223,6 +1266,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) _PyUnicode_STATE(unicode).kind = kind; _PyUnicode_STATE(unicode).compact = 1; _PyUnicode_STATE(unicode).ascii = is_ascii; + _PyUnicode_STATE(unicode).statically_allocated = 0; if (is_ascii) { ((char*)data)[size] = 0; } @@ -1553,7 +1597,9 @@ unicode_dealloc(PyObject *unicode) * we accidentally decref an immortal string out of existence. Since * the string is an immortal object, just re-set the reference count. */ - if (PyUnicode_CHECK_INTERNED(unicode)) { + if (PyUnicode_CHECK_INTERNED(unicode) + || _PyUnicode_STATE(unicode).statically_allocated) + { _Py_SetImmortal(unicode); return; } @@ -14503,6 +14549,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode) _PyUnicode_STATE(self).kind = kind; _PyUnicode_STATE(self).compact = 0; _PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii; + _PyUnicode_STATE(self).statically_allocated = 0; _PyUnicode_UTF8_LENGTH(self) = 0; _PyUnicode_UTF8(self) = NULL; _PyUnicode_DATA_ANY(self) = NULL; @@ -14726,6 +14773,23 @@ _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p) return; } + /* Look in the global cache first. */ + PyObject *r = (PyObject *)_Py_hashtable_get(INTERNED_STRINGS, s); + if (r != NULL && r != s) { + Py_SETREF(*p, Py_NewRef(r)); + return; + } + + /* Handle statically allocated strings. */ + if (_PyUnicode_STATE(s).statically_allocated) { + assert(_Py_IsImmortal(s)); + if (_Py_hashtable_set(INTERNED_STRINGS, s, s) == 0) { + _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL_STATIC; + } + return; + } + + /* Look in the per-interpreter cache. */ PyObject *interned = get_interned_dict(interp); assert(interned != NULL); @@ -14741,9 +14805,11 @@ _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p) } if (_Py_IsImmortal(s)) { + // XXX Restrict this to the main interpreter? _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL_STATIC; - return; + return; } + #ifdef Py_REF_DEBUG /* The reference count value excluding the 2 references from the interned dictionary should be excluded from the RefTotal. The |