summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2020-06-23 22:34:07 (GMT)
committerGitHub <noreply@github.com>2020-06-23 22:34:07 (GMT)
commit90ed8a6d71b2d6e0853c14e8e6f85fe730a4329a (patch)
treed0142c2ad764ad5381218ecb462493f78872e137
parentf363d0a6e9cfa50677a6de203735fbc0d06c2f49 (diff)
downloadcpython-90ed8a6d71b2d6e0853c14e8e6f85fe730a4329a.zip
cpython-90ed8a6d71b2d6e0853c14e8e6f85fe730a4329a.tar.gz
cpython-90ed8a6d71b2d6e0853c14e8e6f85fe730a4329a.tar.bz2
bpo-40521: Optimize PyUnicode_New(0, maxchar) (GH-21099)
Functions of unicodeobject.c, like PyUnicode_New(), no longer check if the empty Unicode singleton has been initialized or not. Consider that it is always initialized. The Unicode API must not be used before _PyUnicode_Init() or after _PyUnicode_Fini().
-rw-r--r--Objects/unicodeobject.c80
1 files changed, 25 insertions, 55 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 06ca7a5..e4235b1 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -231,28 +231,19 @@ get_unicode_state(void)
// Return a borrowed reference to the empty string singleton.
-// Return NULL if the singleton was not created yet.
static inline PyObject* unicode_get_empty(void)
{
struct _Py_unicode_state *state = get_unicode_state();
+ // unicode_get_empty() must not be called before _PyUnicode_Init()
+ // or after _PyUnicode_Fini()
+ assert(state->empty != NULL);
return state->empty;
}
static inline PyObject* unicode_new_empty(void)
{
- struct _Py_unicode_state *state = get_unicode_state();
- PyObject *empty = state->empty;
- if (empty != NULL) {
- Py_INCREF(empty);
- }
- else {
- empty = PyUnicode_New(0, 0);
- if (empty != NULL) {
- Py_INCREF(empty);
- assert(_PyUnicode_CheckConsistency(empty, 1));
- state->empty = empty;
- }
- }
+ PyObject *empty = unicode_get_empty();
+ Py_INCREF(empty);
return empty;
}
@@ -696,12 +687,9 @@ unicode_result_ready(PyObject *unicode)
PyObject *empty = unicode_get_empty();
if (unicode != empty) {
Py_DECREF(unicode);
-
Py_INCREF(empty);
- return empty;
}
- // unicode is the empty string singleton
- return unicode;
+ return empty;
}
#ifdef LATIN1_SINGLETONS
@@ -959,7 +947,7 @@ ensure_unicode(PyObject *obj)
/* Compilation of templated routines */
-#define STRINGLIB_GET_EMPTY() unicode_get_empty()
+#define STRINGLIB_GET_EMPTY() unicode_get_empty()
#include "stringlib/asciilib.h"
#include "stringlib/fastsearch.h"
@@ -1260,11 +1248,7 @@ _PyUnicode_New(Py_ssize_t length)
/* Optimization for empty strings */
if (length == 0) {
- PyObject *empty = unicode_get_empty();
- if (empty != NULL) {
- Py_INCREF(empty);
- return (PyUnicodeObject *)empty;
- }
+ return (PyUnicodeObject *)unicode_new_empty();
}
/* Ensure we won't overflow the size. */
@@ -1416,11 +1400,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
{
/* Optimization for empty strings */
if (size == 0) {
- PyObject *empty = unicode_get_empty();
- if (empty != NULL) {
- Py_INCREF(empty);
- return empty;
- }
+ return unicode_new_empty();
}
PyObject *obj;
@@ -2001,8 +1981,7 @@ unicode_dealloc(PyObject *unicode)
static int
unicode_is_singleton(PyObject *unicode)
{
- struct _Py_unicode_state *state = get_unicode_state();
- if (unicode == state->empty) {
+ if (unicode == unicode_get_empty()) {
return 1;
}
#ifdef LATIN1_SINGLETONS
@@ -2059,8 +2038,6 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
if (length == 0) {
PyObject *empty = unicode_new_empty();
- if (!empty)
- return -1;
Py_SETREF(*p_unicode, empty);
return 0;
}
@@ -10868,10 +10845,7 @@ replace(PyObject *self, PyObject *str1,
}
new_size = slen + n * (len2 - len1);
if (new_size == 0) {
- PyObject *empty = unicode_new_empty();
- if (!empty)
- goto error;
- u = empty;
+ u = unicode_new_empty();
goto done;
}
if (new_size > (PY_SSIZE_T_MAX / rkind)) {
@@ -13293,13 +13267,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
len2 = PyUnicode_GET_LENGTH(sep_obj);
if (kind1 < kind2 || len1 < len2) {
PyObject *empty = unicode_get_empty(); // Borrowed reference
- if (!empty) {
- out = NULL;
- }
- else {
- out = PyTuple_Pack(3, str_obj, empty, empty);
- }
- return out;
+ return PyTuple_Pack(3, str_obj, empty, empty);
}
buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj);
@@ -13351,13 +13319,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
len2 = PyUnicode_GET_LENGTH(sep_obj);
if (kind1 < kind2 || len1 < len2) {
PyObject *empty = unicode_get_empty(); // Borrowed reference
- if (!empty) {
- out = NULL;
- }
- else {
- out = PyTuple_Pack(3, empty, empty, str_obj);
- }
- return out;
+ return PyTuple_Pack(3, empty, empty, str_obj);
}
buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj);
@@ -15589,12 +15551,20 @@ _PyUnicode_Init(PyThreadState *tstate)
0x2029, /* PARAGRAPH SEPARATOR */
};
- /* Init the implementation */
- PyObject *empty = unicode_new_empty();
- if (!empty) {
+ // Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be
+ // optimized to always use state->empty without having to check if it is
+ // NULL or not.
+ PyObject *empty = PyUnicode_New(1, 0);
+ if (empty == NULL) {
return _PyStatus_NO_MEMORY();
}
- Py_DECREF(empty);
+ PyUnicode_1BYTE_DATA(empty)[0] = 0;
+ _PyUnicode_LENGTH(empty) = 0;
+ assert(_PyUnicode_CheckConsistency(empty, 1));
+
+ struct _Py_unicode_state *state = &tstate->interp->unicode;
+ assert(state->empty == NULL);
+ state->empty = empty;
if (_Py_IsMainInterpreter(tstate)) {
/* initialize the linebreak bloom filter */