summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorKumar Aditya <59607654+kumaraditya303@users.noreply.github.com>2022-03-09 23:02:00 (GMT)
committerGitHub <noreply@github.com>2022-03-09 23:02:00 (GMT)
commit8714b6fa27271035dd6dd3514e283f92d669321d (patch)
treeab3d0c0a10753d3a28515831a22817a348b53c91 /Objects
parente801e88744f34508aa338f9f7f3f3baee012f813 (diff)
downloadcpython-8714b6fa27271035dd6dd3514e283f92d669321d.zip
cpython-8714b6fa27271035dd6dd3514e283f92d669321d.tar.gz
cpython-8714b6fa27271035dd6dd3514e283f92d669321d.tar.bz2
bpo-46881: Statically allocate and initialize the latin1 characters. (GH-31616)
Diffstat (limited to 'Objects')
-rw-r--r--Objects/unicodeobject.c64
1 files changed, 14 insertions, 50 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 908ad51..9052c53 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -206,6 +206,11 @@ extern "C" {
*_to++ = (to_type) *_iter++; \
} while (0)
+#define LATIN1(ch) \
+ (ch < 128 \
+ ? (PyObject*)&_Py_SINGLETON(strings).ascii[ch] \
+ : (PyObject*)&_Py_SINGLETON(strings).latin1[ch - 128])
+
#ifdef MS_WINDOWS
/* On Windows, overallocate by 50% is the best factor */
# define OVERALLOCATE_FACTOR 2
@@ -249,14 +254,6 @@ static int unicode_is_singleton(PyObject *unicode);
#endif
-static struct _Py_unicode_state*
-get_unicode_state(void)
-{
- PyInterpreterState *interp = _PyInterpreterState_GET();
- return &interp->unicode;
-}
-
-
// Return a borrowed reference to the empty string singleton.
static inline PyObject* unicode_get_empty(void)
{
@@ -680,24 +677,10 @@ unicode_result_ready(PyObject *unicode)
if (kind == PyUnicode_1BYTE_KIND) {
const Py_UCS1 *data = PyUnicode_1BYTE_DATA(unicode);
Py_UCS1 ch = data[0];
- struct _Py_unicode_state *state = get_unicode_state();
- PyObject *latin1_char = state->latin1[ch];
- if (latin1_char != NULL) {
- if (unicode != latin1_char) {
- Py_INCREF(latin1_char);
- Py_DECREF(unicode);
- }
- return latin1_char;
+ if (unicode != LATIN1(ch)) {
+ Py_DECREF(unicode);
}
- else {
- assert(_PyUnicode_CheckConsistency(unicode, 1));
- Py_INCREF(unicode);
- state->latin1[ch] = unicode;
- return unicode;
- }
- }
- else {
- assert(PyUnicode_READ_CHAR(unicode, 0) >= 256);
+ return get_latin1_char(ch);
}
}
@@ -1990,11 +1973,10 @@ unicode_is_singleton(PyObject *unicode)
return 1;
}
- struct _Py_unicode_state *state = get_unicode_state();
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1) {
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
- if (ch < 256 && state->latin1[ch] == unicode) {
+ if (ch < 256 && LATIN1(ch) == unicode) {
return 1;
}
}
@@ -2137,25 +2119,7 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
static PyObject*
get_latin1_char(Py_UCS1 ch)
{
- struct _Py_unicode_state *state = get_unicode_state();
-
- PyObject *unicode = state->latin1[ch];
- if (unicode) {
- Py_INCREF(unicode);
- return unicode;
- }
-
- unicode = PyUnicode_New(1, ch);
- if (!unicode) {
- return NULL;
- }
-
- PyUnicode_1BYTE_DATA(unicode)[0] = ch;
- assert(_PyUnicode_CheckConsistency(unicode, 1));
-
- Py_INCREF(unicode);
- state->latin1[ch] = unicode;
- return unicode;
+ return Py_NewRef(LATIN1(ch));
}
static PyObject*
@@ -15535,6 +15499,10 @@ _PyUnicode_InitGlobalObjects(PyInterpreterState *interp)
#ifdef Py_DEBUG
assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1));
+
+ for (int i = 0; i < 256; i++) {
+ assert(_PyUnicode_CheckConsistency(LATIN1(i), 1));
+ }
#endif
return _PyStatus_OK();
@@ -16113,10 +16081,6 @@ _PyUnicode_Fini(PyInterpreterState *interp)
_PyUnicode_FiniEncodings(&state->fs_codec);
unicode_clear_identifiers(state);
-
- for (Py_ssize_t i = 0; i < 256; i++) {
- Py_CLEAR(state->latin1[i]);
- }
}