1 files changed, 63 insertions, 15 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index aba7407..18b9458 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -198,6 +198,11 @@ extern "C" {
 #  define OVERALLOCATE_FACTOR 4
 #endif
 
+/* bpo-40521: Interned strings are shared by all interpreters. */
+#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+#  define INTERNED_STRINGS
+#endif
+
 /* This dictionary holds all interned unicode strings.  Note that references
    to strings in this dictionary are *not* counted in the string's ob_refcnt.
    When the interned string reaches a refcnt of 0 the string deallocation
@@ -206,7 +211,9 @@ extern "C" {
    Another way to look at this is that to say that the actual reference
    count of a string is:  s->ob_refcnt + (s->state ? 2 : 0)
 */
+#ifdef INTERNED_STRINGS
 static PyObject *interned = NULL;
+#endif
 
 /* The empty Unicode object is shared to improve performance. */
 static PyObject *unicode_empty = NULL;
@@ -281,9 +288,16 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
 /* List of static strings. */
 static _Py_Identifier *static_strings = NULL;
 
+/* bpo-40521: Latin1 singletons are shared by all interpreters. */
+#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+#  define LATIN1_SINGLETONS
+#endif
+
+#ifdef LATIN1_SINGLETONS
 /* Single character Unicode strings in the Latin-1 range are being
    shared as well. */
 static PyObject *unicode_latin1[256] = {NULL};
+#endif
 
 /* Fast detection of the most frequent whitespace characters */
 const unsigned char _Py_ascii_whitespace[] = {
@@ -662,6 +676,7 @@ unicode_result_ready(PyObject *unicode)
         return unicode_empty;
     }
 
+#ifdef LATIN1_SINGLETONS
     if (length == 1) {
         const void *data = PyUnicode_DATA(unicode);
         int kind = PyUnicode_KIND(unicode);
@@ -683,6 +698,7 @@ unicode_result_ready(PyObject *unicode)
             }
         }
     }
+#endif
 
     assert(_PyUnicode_CheckConsistency(unicode, 1));
     return unicode;
@@ -1913,10 +1929,12 @@ unicode_dealloc(PyObject *unicode)
     case SSTATE_INTERNED_MORTAL:
         /* revive dead object temporarily for DelItem */
         Py_SET_REFCNT(unicode, 3);
+#ifdef INTERNED_STRINGS
         if (PyDict_DelItem(interned, unicode) != 0) {
             _PyErr_WriteUnraisableMsg("deletion of interned string failed",
                                       NULL);
         }
+#endif
         break;
 
     case SSTATE_INTERNED_IMMORTAL:
@@ -1944,15 +1962,18 @@ unicode_dealloc(PyObject *unicode)
 static int
 unicode_is_singleton(PyObject *unicode)
 {
-    PyASCIIObject *ascii = (PyASCIIObject *)unicode;
-    if (unicode == unicode_empty)
+    if (unicode == unicode_empty) {
         return 1;
+    }
+#ifdef LATIN1_SINGLETONS
+    PyASCIIObject *ascii = (PyASCIIObject *)unicode;
     if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1)
     {
         Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
         if (ch < 256 && unicode_latin1[ch] == unicode)
             return 1;
     }
+#endif
     return 0;
 }
 #endif
@@ -2094,16 +2115,28 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
 static PyObject*
 get_latin1_char(unsigned char ch)
 {
-    PyObject *unicode = unicode_latin1[ch];
+    PyObject *unicode;
+
+#ifdef LATIN1_SINGLETONS
+    unicode = unicode_latin1[ch];
+    if (unicode) {
+        Py_INCREF(unicode);
+        return unicode;
+    }
+#endif
+
+    unicode = PyUnicode_New(1, ch);
     if (!unicode) {
-        unicode = PyUnicode_New(1, ch);
-        if (!unicode)
-            return NULL;
-        PyUnicode_1BYTE_DATA(unicode)[0] = ch;
-        assert(_PyUnicode_CheckConsistency(unicode, 1));
-        unicode_latin1[ch] = unicode;
+        return NULL;
     }
+
+    PyUnicode_1BYTE_DATA(unicode)[0] = ch;
+    assert(_PyUnicode_CheckConsistency(unicode, 1));
+
+#ifdef LATIN1_SINGLETONS
     Py_INCREF(unicode);
+    unicode_latin1[ch] = unicode;
+#endif
     return unicode;
 }
 
@@ -11270,7 +11303,6 @@ int
 _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
 {
     PyObject *right_uni;
-    Py_hash_t hash;
 
     assert(_PyUnicode_CHECK(left));
     assert(right->string);
@@ -11302,10 +11334,12 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
     if (PyUnicode_CHECK_INTERNED(left))
         return 0;
 
+#ifdef INTERNED_STRINGS
     assert(_PyUnicode_HASH(right_uni) != -1);
-    hash = _PyUnicode_HASH(left);
+    Py_hash_t hash = _PyUnicode_HASH(left);
     if (hash != -1 && hash != _PyUnicode_HASH(right_uni))
         return 0;
+#endif
 
     return unicode_compare_eq(left, right_uni);
 }
@@ -15487,20 +15521,26 @@ void
 PyUnicode_InternInPlace(PyObject **p)
 {
     PyObject *s = *p;
-    PyObject *t;
 #ifdef Py_DEBUG
     assert(s != NULL);
     assert(_PyUnicode_CHECK(s));
 #else
-    if (s == NULL || !PyUnicode_Check(s))
+    if (s == NULL || !PyUnicode_Check(s)) {
         return;
+    }
 #endif
+
     /* If it's a subclass, we don't really know what putting
        it in the interned dict might do. */
-    if (!PyUnicode_CheckExact(s))
+    if (!PyUnicode_CheckExact(s)) {
         return;
-    if (PyUnicode_CHECK_INTERNED(s))
+    }
+
+    if (PyUnicode_CHECK_INTERNED(s)) {
         return;
+    }
+
+#ifdef INTERNED_STRINGS
     if (interned == NULL) {
         interned = PyDict_New();
         if (interned == NULL) {
@@ -15508,22 +15548,28 @@ PyUnicode_InternInPlace(PyObject **p)
             return;
         }
     }
+
+    PyObject *t;
     Py_ALLOW_RECURSION
     t = PyDict_SetDefault(interned, s, s);
     Py_END_ALLOW_RECURSION
+
     if (t == NULL) {
         PyErr_Clear();
         return;
     }
+
     if (t != s) {
         Py_INCREF(t);
         Py_SETREF(*p, t);
         return;
     }
+
     /* The two references in interned are not counted by refcnt.
        The deallocator will take care of this */
     Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
     _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
+#endif
 }
 
 void
@@ -16109,9 +16155,11 @@ _PyUnicode_Fini(PyThreadState *tstate)
 
         Py_CLEAR(unicode_empty);
 
+#ifdef LATIN1_SINGLETONS
         for (Py_ssize_t i = 0; i < 256; i++) {
             Py_CLEAR(unicode_latin1[i]);
         }
+#endif
         _PyUnicode_ClearStaticStrings();
     }