summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c78
1 files changed, 63 insertions, 15 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index aba7407..18b9458 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -198,6 +198,11 @@ extern "C" {
# define OVERALLOCATE_FACTOR 4
#endif
+/* bpo-40521: Interned strings are shared by all interpreters. */
+#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+# define INTERNED_STRINGS
+#endif
+
/* This dictionary holds all interned unicode strings. Note that references
to strings in this dictionary are *not* counted in the string's ob_refcnt.
When the interned string reaches a refcnt of 0 the string deallocation
@@ -206,7 +211,9 @@ extern "C" {
Another way to look at this is that to say that the actual reference
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
*/
+#ifdef INTERNED_STRINGS
static PyObject *interned = NULL;
+#endif
/* The empty Unicode object is shared to improve performance. */
static PyObject *unicode_empty = NULL;
@@ -281,9 +288,16 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
/* List of static strings. */
static _Py_Identifier *static_strings = NULL;
+/* bpo-40521: Latin1 singletons are shared by all interpreters. */
+#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+# define LATIN1_SINGLETONS
+#endif
+
+#ifdef LATIN1_SINGLETONS
/* Single character Unicode strings in the Latin-1 range are being
shared as well. */
static PyObject *unicode_latin1[256] = {NULL};
+#endif
/* Fast detection of the most frequent whitespace characters */
const unsigned char _Py_ascii_whitespace[] = {
@@ -662,6 +676,7 @@ unicode_result_ready(PyObject *unicode)
return unicode_empty;
}
+#ifdef LATIN1_SINGLETONS
if (length == 1) {
const void *data = PyUnicode_DATA(unicode);
int kind = PyUnicode_KIND(unicode);
@@ -683,6 +698,7 @@ unicode_result_ready(PyObject *unicode)
}
}
}
+#endif
assert(_PyUnicode_CheckConsistency(unicode, 1));
return unicode;
@@ -1913,10 +1929,12 @@ unicode_dealloc(PyObject *unicode)
case SSTATE_INTERNED_MORTAL:
/* revive dead object temporarily for DelItem */
Py_SET_REFCNT(unicode, 3);
+#ifdef INTERNED_STRINGS
if (PyDict_DelItem(interned, unicode) != 0) {
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
NULL);
}
+#endif
break;
case SSTATE_INTERNED_IMMORTAL:
@@ -1944,15 +1962,18 @@ unicode_dealloc(PyObject *unicode)
static int
unicode_is_singleton(PyObject *unicode)
{
- PyASCIIObject *ascii = (PyASCIIObject *)unicode;
- if (unicode == unicode_empty)
+ if (unicode == unicode_empty) {
return 1;
+ }
+#ifdef LATIN1_SINGLETONS
+ PyASCIIObject *ascii = (PyASCIIObject *)unicode;
if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1)
{
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
if (ch < 256 && unicode_latin1[ch] == unicode)
return 1;
}
+#endif
return 0;
}
#endif
@@ -2094,16 +2115,28 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
static PyObject*
get_latin1_char(unsigned char ch)
{
- PyObject *unicode = unicode_latin1[ch];
+ PyObject *unicode;
+
+#ifdef LATIN1_SINGLETONS
+ unicode = unicode_latin1[ch];
+ if (unicode) {
+ Py_INCREF(unicode);
+ return unicode;
+ }
+#endif
+
+ unicode = PyUnicode_New(1, ch);
if (!unicode) {
- unicode = PyUnicode_New(1, ch);
- if (!unicode)
- return NULL;
- PyUnicode_1BYTE_DATA(unicode)[0] = ch;
- assert(_PyUnicode_CheckConsistency(unicode, 1));
- unicode_latin1[ch] = unicode;
+ return NULL;
}
+
+ PyUnicode_1BYTE_DATA(unicode)[0] = ch;
+ assert(_PyUnicode_CheckConsistency(unicode, 1));
+
+#ifdef LATIN1_SINGLETONS
Py_INCREF(unicode);
+ unicode_latin1[ch] = unicode;
+#endif
return unicode;
}
@@ -11270,7 +11303,6 @@ int
_PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
{
PyObject *right_uni;
- Py_hash_t hash;
assert(_PyUnicode_CHECK(left));
assert(right->string);
@@ -11302,10 +11334,12 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
if (PyUnicode_CHECK_INTERNED(left))
return 0;
+#ifdef INTERNED_STRINGS
assert(_PyUnicode_HASH(right_uni) != -1);
- hash = _PyUnicode_HASH(left);
+ Py_hash_t hash = _PyUnicode_HASH(left);
if (hash != -1 && hash != _PyUnicode_HASH(right_uni))
return 0;
+#endif
return unicode_compare_eq(left, right_uni);
}
@@ -15487,20 +15521,26 @@ void
PyUnicode_InternInPlace(PyObject **p)
{
PyObject *s = *p;
- PyObject *t;
#ifdef Py_DEBUG
assert(s != NULL);
assert(_PyUnicode_CHECK(s));
#else
- if (s == NULL || !PyUnicode_Check(s))
+ if (s == NULL || !PyUnicode_Check(s)) {
return;
+ }
#endif
+
/* If it's a subclass, we don't really know what putting
it in the interned dict might do. */
- if (!PyUnicode_CheckExact(s))
+ if (!PyUnicode_CheckExact(s)) {
return;
- if (PyUnicode_CHECK_INTERNED(s))
+ }
+
+ if (PyUnicode_CHECK_INTERNED(s)) {
return;
+ }
+
+#ifdef INTERNED_STRINGS
if (interned == NULL) {
interned = PyDict_New();
if (interned == NULL) {
@@ -15508,22 +15548,28 @@ PyUnicode_InternInPlace(PyObject **p)
return;
}
}
+
+ PyObject *t;
Py_ALLOW_RECURSION
t = PyDict_SetDefault(interned, s, s);
Py_END_ALLOW_RECURSION
+
if (t == NULL) {
PyErr_Clear();
return;
}
+
if (t != s) {
Py_INCREF(t);
Py_SETREF(*p, t);
return;
}
+
/* The two references in interned are not counted by refcnt.
The deallocator will take care of this */
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
+#endif
}
void
@@ -16109,9 +16155,11 @@ _PyUnicode_Fini(PyThreadState *tstate)
Py_CLEAR(unicode_empty);
+#ifdef LATIN1_SINGLETONS
for (Py_ssize_t i = 0; i < 256; i++) {
Py_CLEAR(unicode_latin1[i]);
}
+#endif
_PyUnicode_ClearStaticStrings();
}