diff options
author | Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> | 2022-03-09 23:02:00 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-09 23:02:00 (GMT) |
commit | 8714b6fa27271035dd6dd3514e283f92d669321d (patch) | |
tree | ab3d0c0a10753d3a28515831a22817a348b53c91 /Objects | |
parent | e801e88744f34508aa338f9f7f3f3baee012f813 (diff) | |
download | cpython-8714b6fa27271035dd6dd3514e283f92d669321d.zip cpython-8714b6fa27271035dd6dd3514e283f92d669321d.tar.gz cpython-8714b6fa27271035dd6dd3514e283f92d669321d.tar.bz2 |
bpo-46881: Statically allocate and initialize the latin1 characters. (GH-31616)
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 64 |
1 files changed, 14 insertions, 50 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 908ad51..9052c53 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -206,6 +206,11 @@ extern "C" { *_to++ = (to_type) *_iter++; \ } while (0) +#define LATIN1(ch) \ + (ch < 128 \ + ? (PyObject*)&_Py_SINGLETON(strings).ascii[ch] \ + : (PyObject*)&_Py_SINGLETON(strings).latin1[ch - 128]) + #ifdef MS_WINDOWS /* On Windows, overallocate by 50% is the best factor */ # define OVERALLOCATE_FACTOR 2 @@ -249,14 +254,6 @@ static int unicode_is_singleton(PyObject *unicode); #endif -static struct _Py_unicode_state* -get_unicode_state(void) -{ - PyInterpreterState *interp = _PyInterpreterState_GET(); - return &interp->unicode; -} - - // Return a borrowed reference to the empty string singleton. static inline PyObject* unicode_get_empty(void) { @@ -680,24 +677,10 @@ unicode_result_ready(PyObject *unicode) if (kind == PyUnicode_1BYTE_KIND) { const Py_UCS1 *data = PyUnicode_1BYTE_DATA(unicode); Py_UCS1 ch = data[0]; - struct _Py_unicode_state *state = get_unicode_state(); - PyObject *latin1_char = state->latin1[ch]; - if (latin1_char != NULL) { - if (unicode != latin1_char) { - Py_INCREF(latin1_char); - Py_DECREF(unicode); - } - return latin1_char; + if (unicode != LATIN1(ch)) { + Py_DECREF(unicode); } - else { - assert(_PyUnicode_CheckConsistency(unicode, 1)); - Py_INCREF(unicode); - state->latin1[ch] = unicode; - return unicode; - } - } - else { - assert(PyUnicode_READ_CHAR(unicode, 0) >= 256); + return get_latin1_char(ch); } } @@ -1990,11 +1973,10 @@ unicode_is_singleton(PyObject *unicode) return 1; } - struct _Py_unicode_state *state = get_unicode_state(); PyASCIIObject *ascii = (PyASCIIObject *)unicode; if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1) { Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0); - if (ch < 256 && state->latin1[ch] == unicode) { + if (ch < 256 && LATIN1(ch) == unicode) { return 1; } } @@ -2137,25 +2119,7 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index, static PyObject* get_latin1_char(Py_UCS1 ch) { - struct _Py_unicode_state *state = get_unicode_state(); - - PyObject *unicode = state->latin1[ch]; - if (unicode) { - Py_INCREF(unicode); - return unicode; - } - - unicode = PyUnicode_New(1, ch); - if (!unicode) { - return NULL; - } - - PyUnicode_1BYTE_DATA(unicode)[0] = ch; - assert(_PyUnicode_CheckConsistency(unicode, 1)); - - Py_INCREF(unicode); - state->latin1[ch] = unicode; - return unicode; + return Py_NewRef(LATIN1(ch)); } static PyObject* @@ -15535,6 +15499,10 @@ _PyUnicode_InitGlobalObjects(PyInterpreterState *interp) #ifdef Py_DEBUG assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1)); + + for (int i = 0; i < 256; i++) { + assert(_PyUnicode_CheckConsistency(LATIN1(i), 1)); + } #endif return _PyStatus_OK(); @@ -16113,10 +16081,6 @@ _PyUnicode_Fini(PyInterpreterState *interp) _PyUnicode_FiniEncodings(&state->fs_codec); unicode_clear_identifiers(state); - - for (Py_ssize_t i = 0; i < 256; i++) { - Py_CLEAR(state->latin1[i]); - } } |