Issue #10156: In the interpreter's initialization phase, unicode globals

are now initialized dynamically as needed.
author: Serhiy Storchaka <storchaka@gmail.com> 2013-01-26 10:18:17 (GMT)
committer: Serhiy Storchaka <storchaka@gmail.com> 2013-01-26 10:18:17 (GMT)
commit: ed3c4128c061aef01a19bdfa4ac8b87e43f9d768 (patch)
tree: 510fc49f7b8fb527c67dbe8677789c00964a613d /Objects
parent: dd399af438e02506fb2e1978ae040c3f3e4a21d7 (diff)
parent: 678db84b3724f99d6e4950ecf4eb7c1f79a47b2f (diff)
download: cpython-ed3c4128c061aef01a19bdfa4ac8b87e43f9d768.zip
cpython-ed3c4128c061aef01a19bdfa4ac8b87e43f9d768.tar.gz
cpython-ed3c4128c061aef01a19bdfa4ac8b87e43f9d768.tar.bz2
1 files changed, 83 insertions, 110 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 5030e8d..b4f4185 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -49,8 +49,9 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 /* --- Globals ------------------------------------------------------------
 
-   The globals are initialized by the _PyUnicode_Init() API and should
-   not be used before calling that API.
+NOTE: In the interpreter's initialization phase, some globals are currently
+      initialized dynamically as needed. In the process Unicode objects may
+      be created before the Unicode type is ready.
 
 */
 
@@ -171,17 +172,36 @@ extern "C" {
    Another way to look at this is that to say that the actual reference
    count of a string is:  s->ob_refcnt + (s->state ? 2 : 0)
 */
-static PyObject *interned;
+static PyObject *interned = NULL;
 
 /* The empty Unicode object is shared to improve performance. */
-static PyObject *unicode_empty;
+static PyObject *unicode_empty = NULL;
+
+#define _Py_INCREF_UNICODE_EMPTY()                      \
+    do {                                                \
+        if (unicode_empty != NULL)                      \
+            Py_INCREF(unicode_empty);                   \
+        else {                                          \
+            unicode_empty = PyUnicode_New(0, 0);        \
+            if (unicode_empty != NULL) {                \
+                Py_INCREF(unicode_empty);               \
+                assert(_PyUnicode_CheckConsistency(unicode_empty, 1)); \
+            }                                           \
+        }                                               \
+    } while (0)
+
+#define _Py_RETURN_UNICODE_EMPTY()                      \
+    do {                                                \
+        _Py_INCREF_UNICODE_EMPTY();                     \
+        return unicode_empty;                           \
+    } while (0)
 
 /* List of static strings. */
-static _Py_Identifier *static_strings;
+static _Py_Identifier *static_strings = NULL;
 
 /* Single character Unicode strings in the Latin-1 range are being
    shared as well. */
-static PyObject *unicode_latin1[256];
+static PyObject *unicode_latin1[256] = {NULL};
 
 /* Fast detection of the most frequent whitespace characters */
 const unsigned char _Py_ascii_whitespace[] = {
@@ -406,9 +426,8 @@ unicode_result_wchar(PyObject *unicode)
 
     len = _PyUnicode_WSTR_LENGTH(unicode);
     if (len == 0) {
-        Py_INCREF(unicode_empty);
         Py_DECREF(unicode);
-        return unicode_empty;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
     if (len == 1) {
@@ -442,8 +461,8 @@ unicode_result_ready(PyObject *unicode)
     length = PyUnicode_GET_LENGTH(unicode);
     if (length == 0) {
         if (unicode != unicode_empty) {
-            Py_INCREF(unicode_empty);
             Py_DECREF(unicode);
+            _Py_RETURN_UNICODE_EMPTY();
         }
         return unicode_empty;
     }
@@ -520,7 +539,7 @@ static OSVERSIONINFOEX winver;
 
 #define BLOOM_MASK unsigned long
 
-static BLOOM_MASK bloom_linebreak;
+static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
 
 #define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
 #define BLOOM(mask, ch)     ((mask &  (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
@@ -1602,9 +1621,11 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
         return 0;
 
     if (length == 0) {
+        _Py_INCREF_UNICODE_EMPTY();
+        if (!unicode_empty)
+            return -1;
         Py_DECREF(*p_unicode);
         *p_unicode = unicode_empty;
-        Py_INCREF(*p_unicode);
         return 0;
     }
 
@@ -1727,10 +1748,8 @@ PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
        some optimizations which share commonly used objects. */
 
     /* Optimization for empty strings */
-    if (size == 0 && unicode_empty != NULL) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
 
     /* Single character Unicode objects in the Latin-1 range are
        shared when using this constructor */
@@ -1889,10 +1908,8 @@ _PyUnicode_FromUCS1(const unsigned char* u, Py_ssize_t size)
     PyObject *res;
     unsigned char max_char;
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
     assert(size > 0);
     if (size == 1)
         return get_latin1_char(u[0]);
@@ -1912,10 +1929,8 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)
     PyObject *res;
     Py_UCS2 max_char;
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
     assert(size > 0);
     if (size == 1) {
         Py_UCS4 ch = u[0];
@@ -1950,10 +1965,8 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
     PyObject *res;
     Py_UCS4 max_char;
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
     assert(size > 0);
     if (size == 1) {
         Py_UCS4 ch = u[0];
@@ -2245,10 +2258,8 @@ PyObject *
 PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size)
 {
     if (w == NULL) {
-        if (size == 0) {
-            Py_INCREF(unicode_empty);
-            return unicode_empty;
-        }
+        if (size == 0)
+            _Py_RETURN_UNICODE_EMPTY();
         PyErr_BadInternalCall();
         return NULL;
     }
@@ -2825,15 +2836,11 @@ PyUnicode_FromEncodedObject(register PyObject *obj,
 
     /* Decoding bytes objects is the most common case and should be fast */
     if (PyBytes_Check(obj)) {
-        if (PyBytes_GET_SIZE(obj) == 0) {
-            Py_INCREF(unicode_empty);
-            v = unicode_empty;
-        }
-        else {
-            v = PyUnicode_Decode(
-                    PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
-                    encoding, errors);
-        }
+        if (PyBytes_GET_SIZE(obj) == 0)
+            _Py_RETURN_UNICODE_EMPTY();
+        v = PyUnicode_Decode(
+                PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
+                encoding, errors);
         return v;
     }
 
@@ -2853,12 +2860,11 @@ PyUnicode_FromEncodedObject(register PyObject *obj,
     }
 
     if (buffer.len == 0) {
-        Py_INCREF(unicode_empty);
-        v = unicode_empty;
+        PyBuffer_Release(&buffer);
+        _Py_RETURN_UNICODE_EMPTY();
     }
-    else
-        v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
 
+    v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
     PyBuffer_Release(&buffer);
     return v;
 }
@@ -4201,8 +4207,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
     if (size == 0) {
         if (consumed)
             *consumed = 0;
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
     /* Start off assuming it's all ASCII. Widen later as necessary. */
@@ -4609,8 +4614,7 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
     if (size == 0) {
         if (consumed)
             *consumed = 0;
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
     /* ASCII is equivalent to the first 128 ordinals in Unicode. */
@@ -4868,8 +4872,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s,
     if (q == e) {
         if (consumed)
             *consumed = size;
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
 #ifdef WORDS_BIGENDIAN
@@ -5108,8 +5111,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
     if (q == e) {
         if (consumed)
             *consumed = size;
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
 #if PY_LITTLE_ENDIAN
@@ -5386,10 +5388,8 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
     Py_ssize_t len;
 
     len = length_of_escaped_ascii_string(s, size);
-    if (len == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (len == 0)
+        _Py_RETURN_UNICODE_EMPTY();
 
     /* After length_of_escaped_ascii_string() there are two alternatives,
        either the string is pure ASCII with named escapes like \n, etc.
@@ -5781,10 +5781,8 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
 
     /* Escaped strings will always be longer than the resulting
        Unicode string, so we start with size here and then reduce the
@@ -5988,10 +5986,8 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
                      1))
         return NULL;
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
 
     /* XXX overflow detection missing */
     _PyUnicodeWriter_Init(&writer, 0);
@@ -6439,10 +6435,8 @@ PyUnicode_DecodeASCII(const char *s,
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
 
     /* ASCII is equivalent to the first 128 ordinals in Unicode. */
     if (size == 1 && (unsigned char)s[0] < 128)
@@ -6820,8 +6814,7 @@ decode_code_page_stateful(int code_page,
         if (chunk_size == 0 && done) {
             if (v != NULL)
                 break;
-            Py_INCREF(unicode_empty);
-            return unicode_empty;
+            _Py_RETURN_UNICODE_EMPTY();
         }
 
 
@@ -7298,10 +7291,8 @@ PyUnicode_DecodeCharmap(const char *s,
     if (mapping == NULL)
         return PyUnicode_DecodeLatin1(s, size, errors);
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
     _PyUnicodeWriter_Init(&writer, 0);
     if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1)
         goto onError;
@@ -9354,9 +9345,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
     /* If empty sequence, return u"". */
     if (seqlen == 0) {
         Py_DECREF(fseq);
-        Py_INCREF(unicode_empty);
-        res = unicode_empty;
-        return res;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
     /* If singleton sequence with an exact Unicode, return that. */
@@ -10056,7 +10045,9 @@ replace(PyObject *self, PyObject *str1,
         }
         new_size = slen + n * (len2 - len1);
         if (new_size == 0) {
-            Py_INCREF(unicode_empty);
+            _Py_INCREF_UNICODE_EMPTY();
+            if (!unicode_empty)
+                goto error;
             u = unicode_empty;
             goto done;
         }
@@ -11559,10 +11550,8 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
         PyErr_SetString(PyExc_IndexError, "string index out of range");
         return NULL;
     }
-    if (start >= length || end < start) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (start >= length || end < start)
+        _Py_RETURN_UNICODE_EMPTY();
 
     length = end - start;
     if (PyUnicode_IS_ASCII(self)) {
@@ -11689,10 +11678,8 @@ unicode_repeat(PyObject *str, Py_ssize_t len)
     PyObject *u;
     Py_ssize_t nchars, n;
 
-    if (len < 1) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (len < 1)
+        _Py_RETURN_UNICODE_EMPTY();
 
     /* no repeat, return original string */
     if (len == 1)
@@ -12832,8 +12819,7 @@ _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
 {
     if (writer->pos == 0) {
         Py_XDECREF(writer->buffer);
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
+        _Py_RETURN_UNICODE_EMPTY();
     }
     if (writer->readonly) {
         assert(PyUnicode_GET_LENGTH(writer->buffer) == writer->pos);
@@ -13051,8 +13037,7 @@ unicode_subscript(PyObject* self, PyObject* item)
         }
 
         if (slicelength <= 0) {
-            Py_INCREF(unicode_empty);
-            return unicode_empty;
+            _Py_RETURN_UNICODE_EMPTY();
         } else if (start == 0 && step == 1 &&
                    slicelength == PyUnicode_GET_LENGTH(self)) {
             return unicode_result_unchanged(self);
@@ -14056,10 +14041,8 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str",
                                      kwlist, &x, &encoding, &errors))
         return NULL;
-    if (x == NULL) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (x == NULL)
+        _Py_RETURN_UNICODE_EMPTY();
     if (encoding == NULL && errors == NULL)
         return PyObject_Str(x);
     else
@@ -14228,8 +14211,6 @@ PyTypeObject PyUnicode_Type = {
 
 int _PyUnicode_Init(void)
 {
-    int i;
-
     /* XXX - move this array to unicodectype.c ? */
     Py_UCS2 linebreak[] = {
         0x000A, /* LINE FEED */
@@ -14243,13 +14224,11 @@ int _PyUnicode_Init(void)
     };
 
     /* Init the implementation */
-    unicode_empty = PyUnicode_New(0, 0);
+    _Py_INCREF_UNICODE_EMPTY();
     if (!unicode_empty)
         Py_FatalError("Can't create empty string");
-    assert(_PyUnicode_CheckConsistency(unicode_empty, 1));
+    Py_DECREF(unicode_empty);
 
-    for (i = 0; i < 256; i++)
-        unicode_latin1[i] = NULL;
     if (PyType_Ready(&PyUnicode_Type) < 0)
         Py_FatalError("Can't initialize 'unicode'");
 
@@ -14289,15 +14268,10 @@ _PyUnicode_Fini(void)
 {
     int i;
 
-    Py_XDECREF(unicode_empty);
-    unicode_empty = NULL;
+    Py_CLEAR(unicode_empty);
 
-    for (i = 0; i < 256; i++) {
-        if (unicode_latin1[i]) {
-            Py_DECREF(unicode_latin1[i]);
-            unicode_latin1[i] = NULL;
-        }
-    }
+    for (i = 0; i < 256; i++)
+        Py_CLEAR(unicode_latin1[i]);
     _PyUnicode_ClearStaticStrings();
     (void)PyUnicode_ClearFreeList();
 }
@@ -14426,8 +14400,7 @@ _Py_ReleaseInternedUnicodeStrings(void)
             "mortal/immortal\n", mortal_size, immortal_size);
     Py_DECREF(keys);
     PyDict_Clear(interned);
-    Py_DECREF(interned);
-    interned = NULL;
+    Py_CLEAR(interned);
 }
author	Serhiy Storchaka <storchaka@gmail.com>	2013-01-26 10:18:17 (GMT)
committer	Serhiy Storchaka <storchaka@gmail.com>	2013-01-26 10:18:17 (GMT)
commit	ed3c4128c061aef01a19bdfa4ac8b87e43f9d768 (patch)
tree	510fc49f7b8fb527c67dbe8677789c00964a613d /Objects
parent	dd399af438e02506fb2e1978ae040c3f3e4a21d7 (diff)
parent	678db84b3724f99d6e4950ecf4eb7c1f79a47b2f (diff)
download	cpython-ed3c4128c061aef01a19bdfa4ac8b87e43f9d768.zip cpython-ed3c4128c061aef01a19bdfa4ac8b87e43f9d768.tar.gz cpython-ed3c4128c061aef01a19bdfa4ac8b87e43f9d768.tar.bz2