diff options
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 97 |
2 files changed, 48 insertions, 52 deletions
@@ -10,6 +10,9 @@ What's New in Python 3.2.4 Core and Builtins ----------------- +- Issue #10156: In the interpreter's initialization phase, unicode globals + are now initialized dynamically as needed. + - Issue #16975: Fix error handling bug in the escape-decode bytes decoder. - Issue #14850: Now a charmap decoder treats U+FFFE as "undefined mapping" diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ddd8d53..92d1777 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -80,8 +80,9 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. /* --- Globals ------------------------------------------------------------ - The globals are initialized by the _PyUnicode_Init() API and should - not be used before calling that API. +NOTE: In the interpreter's initialization phase, some globals are currently + initialized dynamically as needed. In the process Unicode objects may + be created before the Unicode type is ready. */ @@ -98,18 +99,30 @@ extern "C" { Another way to look at this is that to say that the actual reference count of a string is: s->ob_refcnt + (s->state ? 2 : 0) */ -static PyObject *interned; +static PyObject *interned = NULL; /* Free list for Unicode objects */ -static PyUnicodeObject *free_list; -static int numfree; +static PyUnicodeObject *free_list = NULL; +static int numfree = 0; /* The empty Unicode object is shared to improve performance. */ -static PyUnicodeObject *unicode_empty; +static PyUnicodeObject *unicode_empty = NULL; + +#define _Py_RETURN_UNICODE_EMPTY() \ + do { \ + if (unicode_empty != NULL) \ + Py_INCREF(unicode_empty); \ + else { \ + unicode_empty = _PyUnicode_New(0); \ + if (unicode_empty != NULL) \ + Py_INCREF(unicode_empty); \ + } \ + return (PyObject *)unicode_empty; \ + } while (0) /* Single character Unicode strings in the Latin-1 range are being shared as well. */ -static PyUnicodeObject *unicode_latin1[256]; +static PyUnicodeObject *unicode_latin1[256] = {NULL}; /* Fast detection of the most frequent whitespace characters */ const unsigned char _Py_ascii_whitespace[] = { @@ -214,7 +227,7 @@ PyUnicode_GetMax(void) #define BLOOM_MASK unsigned long -static BLOOM_MASK bloom_linebreak; +static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0; #define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1))))) #define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1))))) @@ -479,10 +492,8 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u, if (u != NULL) { /* Optimization for empty strings */ - if (size == 0 && unicode_empty != NULL) { - Py_INCREF(unicode_empty); - return (PyObject *)unicode_empty; - } + if (size == 0) + _Py_RETURN_UNICODE_EMPTY(); /* Single character Unicode objects in the Latin-1 range are shared when using this constructor */ @@ -528,10 +539,8 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) if (u != NULL) { /* Optimization for empty strings */ - if (size == 0 && unicode_empty != NULL) { - Py_INCREF(unicode_empty); - return (PyObject *)unicode_empty; - } + if (size == 0) + _Py_RETURN_UNICODE_EMPTY(); /* Single characters are shared when using this constructor. Restrict to ASCII, since the input must be UTF-8. */ @@ -1393,15 +1402,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, /* Decoding bytes objects is the most common case and should be fast */ if (PyBytes_Check(obj)) { - if (PyBytes_GET_SIZE(obj) == 0) { - Py_INCREF(unicode_empty); - v = (PyObject *) unicode_empty; - } - else { - v = PyUnicode_Decode( - PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj), - encoding, errors); - } + if (PyBytes_GET_SIZE(obj) == 0) + _Py_RETURN_UNICODE_EMPTY(); + v = PyUnicode_Decode( + PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj), + encoding, errors); return v; } @@ -1421,12 +1426,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, } if (buffer.len == 0) { - Py_INCREF(unicode_empty); - v = (PyObject *) unicode_empty; + PyBuffer_Release(&buffer); + _Py_RETURN_UNICODE_EMPTY(); } - else - v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors); + v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors); PyBuffer_Release(&buffer); return v; } @@ -8323,10 +8327,8 @@ unicode_repeat(PyUnicodeObject *str, Py_ssize_t len) Py_ssize_t nchars; size_t nbytes; - if (len < 1) { - Py_INCREF(unicode_empty); - return (PyObject *)unicode_empty; - } + if (len < 1) + _Py_RETURN_UNICODE_EMPTY(); if (len == 1 && PyUnicode_CheckExact(str)) { /* no repeat, return original string */ @@ -10056,8 +10058,6 @@ PyTypeObject PyUnicode_Type = { void _PyUnicode_Init(void) { - int i; - /* XXX - move this array to unicodectype.c ? */ Py_UNICODE linebreak[] = { 0x000A, /* LINE FEED */ @@ -10071,14 +10071,12 @@ void _PyUnicode_Init(void) }; /* Init the implementation */ - free_list = NULL; - numfree = 0; - unicode_empty = _PyUnicode_New(0); - if (!unicode_empty) - return; + if (!unicode_empty) { + unicode_empty = _PyUnicode_New(0); + if (!unicode_empty) + return; + } - for (i = 0; i < 256; i++) - unicode_latin1[i] = NULL; if (PyType_Ready(&PyUnicode_Type) < 0) Py_FatalError("Can't initialize 'unicode'"); @@ -10123,15 +10121,11 @@ _PyUnicode_Fini(void) { int i; - Py_XDECREF(unicode_empty); - unicode_empty = NULL; + Py_CLEAR(unicode_empty); + + for (i = 0; i < 256; i++) + Py_CLEAR(unicode_latin1[i]); - for (i = 0; i < 256; i++) { - if (unicode_latin1[i]) { - Py_DECREF(unicode_latin1[i]); - unicode_latin1[i] = NULL; - } - } (void)PyUnicode_ClearFreeList(); } @@ -10250,8 +10244,7 @@ void _Py_ReleaseInternedUnicodeStrings(void) "mortal/immortal\n", mortal_size, immortal_size); Py_DECREF(keys); PyDict_Clear(interned); - Py_DECREF(interned); - interned = NULL; + Py_CLEAR(interned); } |