diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-01-26 10:18:17 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-01-26 10:18:17 (GMT) |
commit | ed3c4128c061aef01a19bdfa4ac8b87e43f9d768 (patch) | |
tree | 510fc49f7b8fb527c67dbe8677789c00964a613d /Objects | |
parent | dd399af438e02506fb2e1978ae040c3f3e4a21d7 (diff) | |
parent | 678db84b3724f99d6e4950ecf4eb7c1f79a47b2f (diff) | |
download | cpython-ed3c4128c061aef01a19bdfa4ac8b87e43f9d768.zip cpython-ed3c4128c061aef01a19bdfa4ac8b87e43f9d768.tar.gz cpython-ed3c4128c061aef01a19bdfa4ac8b87e43f9d768.tar.bz2 |
Issue #10156: In the interpreter's initialization phase, unicode globals
are now initialized dynamically as needed.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 193 |
1 files changed, 83 insertions, 110 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5030e8d..b4f4185 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -49,8 +49,9 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. /* --- Globals ------------------------------------------------------------ - The globals are initialized by the _PyUnicode_Init() API and should - not be used before calling that API. +NOTE: In the interpreter's initialization phase, some globals are currently + initialized dynamically as needed. In the process Unicode objects may + be created before the Unicode type is ready. */ @@ -171,17 +172,36 @@ extern "C" { Another way to look at this is that to say that the actual reference count of a string is: s->ob_refcnt + (s->state ? 2 : 0) */ -static PyObject *interned; +static PyObject *interned = NULL; /* The empty Unicode object is shared to improve performance. */ -static PyObject *unicode_empty; +static PyObject *unicode_empty = NULL; + +#define _Py_INCREF_UNICODE_EMPTY() \ + do { \ + if (unicode_empty != NULL) \ + Py_INCREF(unicode_empty); \ + else { \ + unicode_empty = PyUnicode_New(0, 0); \ + if (unicode_empty != NULL) { \ + Py_INCREF(unicode_empty); \ + assert(_PyUnicode_CheckConsistency(unicode_empty, 1)); \ + } \ + } \ + } while (0) + +#define _Py_RETURN_UNICODE_EMPTY() \ + do { \ + _Py_INCREF_UNICODE_EMPTY(); \ + return unicode_empty; \ + } while (0) /* List of static strings. */ -static _Py_Identifier *static_strings; +static _Py_Identifier *static_strings = NULL; /* Single character Unicode strings in the Latin-1 range are being shared as well. */ -static PyObject *unicode_latin1[256]; +static PyObject *unicode_latin1[256] = {NULL}; /* Fast detection of the most frequent whitespace characters */ const unsigned char _Py_ascii_whitespace[] = { @@ -406,9 +426,8 @@ unicode_result_wchar(PyObject *unicode) len = _PyUnicode_WSTR_LENGTH(unicode); if (len == 0) { - Py_INCREF(unicode_empty); Py_DECREF(unicode); - return unicode_empty; + _Py_RETURN_UNICODE_EMPTY(); } if (len == 1) { @@ -442,8 +461,8 @@ unicode_result_ready(PyObject *unicode) length = PyUnicode_GET_LENGTH(unicode); if (length == 0) { if (unicode != unicode_empty) { - Py_INCREF(unicode_empty); Py_DECREF(unicode); + _Py_RETURN_UNICODE_EMPTY(); } return unicode_empty; } @@ -520,7 +539,7 @@ static OSVERSIONINFOEX winver; #define BLOOM_MASK unsigned long -static BLOOM_MASK bloom_linebreak; +static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0; #define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1))))) #define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1))))) @@ -1602,9 +1621,11 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length) return 0; if (length == 0) { + _Py_INCREF_UNICODE_EMPTY(); + if (!unicode_empty) + return -1; Py_DECREF(*p_unicode); *p_unicode = unicode_empty; - Py_INCREF(*p_unicode); return 0; } @@ -1727,10 +1748,8 @@ PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size) some optimizations which share commonly used objects. */ /* Optimization for empty strings */ - if (size == 0 && unicode_empty != NULL) { - Py_INCREF(unicode_empty); - return unicode_empty; - } + if (size == 0) + _Py_RETURN_UNICODE_EMPTY(); /* Single character Unicode objects in the Latin-1 range are shared when using this constructor */ @@ -1889,10 +1908,8 @@ _PyUnicode_FromUCS1(const unsigned char* u, Py_ssize_t size) PyObject *res; unsigned char max_char; - if (size == 0) { - Py_INCREF(unicode_empty); - return unicode_empty; - } + if (size == 0) + _Py_RETURN_UNICODE_EMPTY(); assert(size > 0); if (size == 1) return get_latin1_char(u[0]); @@ -1912,10 +1929,8 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size) PyObject *res; Py_UCS2 max_char; - if (size == 0) { - Py_INCREF(unicode_empty); - return unicode_empty; - } + if (size == 0) + _Py_RETURN_UNICODE_EMPTY(); assert(size > 0); if (size == 1) { Py_UCS4 ch = u[0]; @@ -1950,10 +1965,8 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size) PyObject *res; Py_UCS4 max_char; - if (size == 0) { - Py_INCREF(unicode_empty); - return unicode_empty; - } + if (size == 0) + _Py_RETURN_UNICODE_EMPTY(); assert(size > 0); if (size == 1) { Py_UCS4 ch = u[0]; @@ -2245,10 +2258,8 @@ PyObject * PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size) { if (w == NULL) { - if (size == 0) { - Py_INCREF(unicode_empty); - return unicode_empty; - } + if (size == 0) + _Py_RETURN_UNICODE_EMPTY(); PyErr_BadInternalCall(); return NULL; } @@ -2825,15 +2836,11 @@ PyUnicode_FromEncodedObject(register PyObject *obj, /* Decoding bytes objects is the most common case and should be fast */ if (PyBytes_Check(obj)) { - if (PyBytes_GET_SIZE(obj) == 0) { - Py_INCREF(unicode_empty); - v = unicode_empty; - } - else { - v = PyUnicode_Decode( - PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj), - encoding, errors); - } + if (PyBytes_GET_SIZE(obj) == 0) + _Py_RETURN_UNICODE_EMPTY(); + v = PyUnicode_Decode( + PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj), + encoding, errors); return v; } @@ -2853,12 +2860,11 @@ PyUnicode_FromEncodedObject(register PyObject *obj, } if (buffer.len == 0) { - Py_INCREF(unicode_empty); - v = unicode_empty; + PyBuffer_Release(&buffer); + _Py_RETURN_UNICODE_EMPTY(); } - else - v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors); + v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors); PyBuffer_Release(&buffer); return v; } @@ -4201,8 +4207,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s, if (size == 0) { if (consumed) *consumed = 0; - Py_INCREF(unicode_empty); - return unicode_empty; + _Py_RETURN_UNICODE_EMPTY(); } /* Start off assuming it's all ASCII. Widen later as necessary. */ @@ -4609,8 +4614,7 @@ PyUnicode_DecodeUTF8Stateful(const char *s, if (size == 0) { if (consumed) *consumed = 0; - Py_INCREF(unicode_empty); - return unicode_empty; + _Py_RETURN_UNICODE_EMPTY(); } /* ASCII is equivalent to the first 128 ordinals in Unicode. */ @@ -4868,8 +4872,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s, if (q == e) { if (consumed) *consumed = size; - Py_INCREF(unicode_empty); - return unicode_empty; + _Py_RETURN_UNICODE_EMPTY(); } #ifdef WORDS_BIGENDIAN @@ -5108,8 +5111,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s, if (q == e) { if (consumed) *consumed = size; - Py_INCREF(unicode_empty); - return unicode_empty; + _Py_RETURN_UNICODE_EMPTY(); } #if PY_LITTLE_ENDIAN @@ -5386,10 +5388,8 @@ PyUnicode_DecodeUnicodeEscape(const char *s, Py_ssize_t len; len = length_of_escaped_ascii_string(s, size); - if (len == 0) { - Py_INCREF(unicode_empty); - return unicode_empty; - } + if (len == 0) + _Py_RETURN_UNICODE_EMPTY(); /* After length_of_escaped_ascii_string() there are two alternatives, either the string is pure ASCII with named escapes like \n, etc. @@ -5781,10 +5781,8 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s, PyObject *errorHandler = NULL; PyObject *exc = NULL; - if (size == 0) { - Py_INCREF(unicode_empty); - return unicode_empty; - } + if (size == 0) + _Py_RETURN_UNICODE_EMPTY(); /* Escaped strings will always be longer than the resulting Unicode string, so we start with size here and then reduce the @@ -5988,10 +5986,8 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, 1)) return NULL; - if (size == 0) { - Py_INCREF(unicode_empty); - return unicode_empty; - } + if (size == 0) + _Py_RETURN_UNICODE_EMPTY(); /* XXX overflow detection missing */ _PyUnicodeWriter_Init(&writer, 0); @@ -6439,10 +6435,8 @@ PyUnicode_DecodeASCII(const char *s, PyObject *errorHandler = NULL; PyObject *exc = NULL; - if (size == 0) { - Py_INCREF(unicode_empty); - return unicode_empty; - } + if (size == 0) + _Py_RETURN_UNICODE_EMPTY(); /* ASCII is equivalent to the first 128 ordinals in Unicode. */ if (size == 1 && (unsigned char)s[0] < 128) @@ -6820,8 +6814,7 @@ decode_code_page_stateful(int code_page, if (chunk_size == 0 && done) { if (v != NULL) break; - Py_INCREF(unicode_empty); - return unicode_empty; + _Py_RETURN_UNICODE_EMPTY(); } @@ -7298,10 +7291,8 @@ PyUnicode_DecodeCharmap(const char *s, if (mapping == NULL) return PyUnicode_DecodeLatin1(s, size, errors); - if (size == 0) { - Py_INCREF(unicode_empty); - return unicode_empty; - } + if (size == 0) + _Py_RETURN_UNICODE_EMPTY(); _PyUnicodeWriter_Init(&writer, 0); if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) goto onError; @@ -9354,9 +9345,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) /* If empty sequence, return u"". */ if (seqlen == 0) { Py_DECREF(fseq); - Py_INCREF(unicode_empty); - res = unicode_empty; - return res; + _Py_RETURN_UNICODE_EMPTY(); } /* If singleton sequence with an exact Unicode, return that. */ @@ -10056,7 +10045,9 @@ replace(PyObject *self, PyObject *str1, } new_size = slen + n * (len2 - len1); if (new_size == 0) { - Py_INCREF(unicode_empty); + _Py_INCREF_UNICODE_EMPTY(); + if (!unicode_empty) + goto error; u = unicode_empty; goto done; } @@ -11559,10 +11550,8 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end) PyErr_SetString(PyExc_IndexError, "string index out of range"); return NULL; } - if (start >= length || end < start) { - Py_INCREF(unicode_empty); - return unicode_empty; - } + if (start >= length || end < start) + _Py_RETURN_UNICODE_EMPTY(); length = end - start; if (PyUnicode_IS_ASCII(self)) { @@ -11689,10 +11678,8 @@ unicode_repeat(PyObject *str, Py_ssize_t len) PyObject *u; Py_ssize_t nchars, n; - if (len < 1) { - Py_INCREF(unicode_empty); - return unicode_empty; - } + if (len < 1) + _Py_RETURN_UNICODE_EMPTY(); /* no repeat, return original string */ if (len == 1) @@ -12832,8 +12819,7 @@ _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer) { if (writer->pos == 0) { Py_XDECREF(writer->buffer); - Py_INCREF(unicode_empty); - return unicode_empty; + _Py_RETURN_UNICODE_EMPTY(); } if (writer->readonly) { assert(PyUnicode_GET_LENGTH(writer->buffer) == writer->pos); @@ -13051,8 +13037,7 @@ unicode_subscript(PyObject* self, PyObject* item) } if (slicelength <= 0) { - Py_INCREF(unicode_empty); - return unicode_empty; + _Py_RETURN_UNICODE_EMPTY(); } else if (start == 0 && step == 1 && slicelength == PyUnicode_GET_LENGTH(self)) { return unicode_result_unchanged(self); @@ -14056,10 +14041,8 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str", kwlist, &x, &encoding, &errors)) return NULL; - if (x == NULL) { - Py_INCREF(unicode_empty); - return unicode_empty; - } + if (x == NULL) + _Py_RETURN_UNICODE_EMPTY(); if (encoding == NULL && errors == NULL) return PyObject_Str(x); else @@ -14228,8 +14211,6 @@ PyTypeObject PyUnicode_Type = { int _PyUnicode_Init(void) { - int i; - /* XXX - move this array to unicodectype.c ? */ Py_UCS2 linebreak[] = { 0x000A, /* LINE FEED */ @@ -14243,13 +14224,11 @@ int _PyUnicode_Init(void) }; /* Init the implementation */ - unicode_empty = PyUnicode_New(0, 0); + _Py_INCREF_UNICODE_EMPTY(); if (!unicode_empty) Py_FatalError("Can't create empty string"); - assert(_PyUnicode_CheckConsistency(unicode_empty, 1)); + Py_DECREF(unicode_empty); - for (i = 0; i < 256; i++) - unicode_latin1[i] = NULL; if (PyType_Ready(&PyUnicode_Type) < 0) Py_FatalError("Can't initialize 'unicode'"); @@ -14289,15 +14268,10 @@ _PyUnicode_Fini(void) { int i; - Py_XDECREF(unicode_empty); - unicode_empty = NULL; + Py_CLEAR(unicode_empty); - for (i = 0; i < 256; i++) { - if (unicode_latin1[i]) { - Py_DECREF(unicode_latin1[i]); - unicode_latin1[i] = NULL; - } - } + for (i = 0; i < 256; i++) + Py_CLEAR(unicode_latin1[i]); _PyUnicode_ClearStaticStrings(); (void)PyUnicode_ClearFreeList(); } @@ -14426,8 +14400,7 @@ _Py_ReleaseInternedUnicodeStrings(void) "mortal/immortal\n", mortal_size, immortal_size); Py_DECREF(keys); PyDict_Clear(interned); - Py_DECREF(interned); - interned = NULL; + Py_CLEAR(interned); } |