diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 78 |
1 files changed, 63 insertions, 15 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index aba7407..18b9458 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -198,6 +198,11 @@ extern "C" { # define OVERALLOCATE_FACTOR 4 #endif +/* bpo-40521: Interned strings are shared by all interpreters. */ +#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS +# define INTERNED_STRINGS +#endif + /* This dictionary holds all interned unicode strings. Note that references to strings in this dictionary are *not* counted in the string's ob_refcnt. When the interned string reaches a refcnt of 0 the string deallocation @@ -206,7 +211,9 @@ extern "C" { Another way to look at this is that to say that the actual reference count of a string is: s->ob_refcnt + (s->state ? 2 : 0) */ +#ifdef INTERNED_STRINGS static PyObject *interned = NULL; +#endif /* The empty Unicode object is shared to improve performance. */ static PyObject *unicode_empty = NULL; @@ -281,9 +288,16 @@ unicode_decode_utf8(const char *s, Py_ssize_t size, /* List of static strings. */ static _Py_Identifier *static_strings = NULL; +/* bpo-40521: Latin1 singletons are shared by all interpreters. */ +#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS +# define LATIN1_SINGLETONS +#endif + +#ifdef LATIN1_SINGLETONS /* Single character Unicode strings in the Latin-1 range are being shared as well. */ static PyObject *unicode_latin1[256] = {NULL}; +#endif /* Fast detection of the most frequent whitespace characters */ const unsigned char _Py_ascii_whitespace[] = { @@ -662,6 +676,7 @@ unicode_result_ready(PyObject *unicode) return unicode_empty; } +#ifdef LATIN1_SINGLETONS if (length == 1) { const void *data = PyUnicode_DATA(unicode); int kind = PyUnicode_KIND(unicode); @@ -683,6 +698,7 @@ unicode_result_ready(PyObject *unicode) } } } +#endif assert(_PyUnicode_CheckConsistency(unicode, 1)); return unicode; @@ -1913,10 +1929,12 @@ unicode_dealloc(PyObject *unicode) case SSTATE_INTERNED_MORTAL: /* revive dead object temporarily for DelItem */ Py_SET_REFCNT(unicode, 3); +#ifdef INTERNED_STRINGS if (PyDict_DelItem(interned, unicode) != 0) { _PyErr_WriteUnraisableMsg("deletion of interned string failed", NULL); } +#endif break; case SSTATE_INTERNED_IMMORTAL: @@ -1944,15 +1962,18 @@ unicode_dealloc(PyObject *unicode) static int unicode_is_singleton(PyObject *unicode) { - PyASCIIObject *ascii = (PyASCIIObject *)unicode; - if (unicode == unicode_empty) + if (unicode == unicode_empty) { return 1; + } +#ifdef LATIN1_SINGLETONS + PyASCIIObject *ascii = (PyASCIIObject *)unicode; if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1) { Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0); if (ch < 256 && unicode_latin1[ch] == unicode) return 1; } +#endif return 0; } #endif @@ -2094,16 +2115,28 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index, static PyObject* get_latin1_char(unsigned char ch) { - PyObject *unicode = unicode_latin1[ch]; + PyObject *unicode; + +#ifdef LATIN1_SINGLETONS + unicode = unicode_latin1[ch]; + if (unicode) { + Py_INCREF(unicode); + return unicode; + } +#endif + + unicode = PyUnicode_New(1, ch); if (!unicode) { - unicode = PyUnicode_New(1, ch); - if (!unicode) - return NULL; - PyUnicode_1BYTE_DATA(unicode)[0] = ch; - assert(_PyUnicode_CheckConsistency(unicode, 1)); - unicode_latin1[ch] = unicode; + return NULL; } + + PyUnicode_1BYTE_DATA(unicode)[0] = ch; + assert(_PyUnicode_CheckConsistency(unicode, 1)); + +#ifdef LATIN1_SINGLETONS Py_INCREF(unicode); + unicode_latin1[ch] = unicode; +#endif return unicode; } @@ -11270,7 +11303,6 @@ int _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) { PyObject *right_uni; - Py_hash_t hash; assert(_PyUnicode_CHECK(left)); assert(right->string); @@ -11302,10 +11334,12 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) if (PyUnicode_CHECK_INTERNED(left)) return 0; +#ifdef INTERNED_STRINGS assert(_PyUnicode_HASH(right_uni) != -1); - hash = _PyUnicode_HASH(left); + Py_hash_t hash = _PyUnicode_HASH(left); if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) return 0; +#endif return unicode_compare_eq(left, right_uni); } @@ -15487,20 +15521,26 @@ void PyUnicode_InternInPlace(PyObject **p) { PyObject *s = *p; - PyObject *t; #ifdef Py_DEBUG assert(s != NULL); assert(_PyUnicode_CHECK(s)); #else - if (s == NULL || !PyUnicode_Check(s)) + if (s == NULL || !PyUnicode_Check(s)) { return; + } #endif + /* If it's a subclass, we don't really know what putting it in the interned dict might do. */ - if (!PyUnicode_CheckExact(s)) + if (!PyUnicode_CheckExact(s)) { return; - if (PyUnicode_CHECK_INTERNED(s)) + } + + if (PyUnicode_CHECK_INTERNED(s)) { return; + } + +#ifdef INTERNED_STRINGS if (interned == NULL) { interned = PyDict_New(); if (interned == NULL) { @@ -15508,22 +15548,28 @@ PyUnicode_InternInPlace(PyObject **p) return; } } + + PyObject *t; Py_ALLOW_RECURSION t = PyDict_SetDefault(interned, s, s); Py_END_ALLOW_RECURSION + if (t == NULL) { PyErr_Clear(); return; } + if (t != s) { Py_INCREF(t); Py_SETREF(*p, t); return; } + /* The two references in interned are not counted by refcnt. The deallocator will take care of this */ Py_SET_REFCNT(s, Py_REFCNT(s) - 2); _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL; +#endif } void @@ -16109,9 +16155,11 @@ _PyUnicode_Fini(PyThreadState *tstate) Py_CLEAR(unicode_empty); +#ifdef LATIN1_SINGLETONS for (Py_ssize_t i = 0; i < 256; i++) { Py_CLEAR(unicode_latin1[i]); } +#endif _PyUnicode_ClearStaticStrings(); } |