diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2022-01-11 16:37:24 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-11 16:37:24 (GMT) |
commit | cf496d657a1a82eaf9ebfb47d721676fef6effa5 (patch) | |
tree | 6fde71cbde5f2713d77015f1c86baede744a239f | |
parent | 6f05e1ec193c132015e9a23d1137b1731596f186 (diff) | |
download | cpython-cf496d657a1a82eaf9ebfb47d721676fef6effa5.zip cpython-cf496d657a1a82eaf9ebfb47d721676fef6effa5.tar.gz cpython-cf496d657a1a82eaf9ebfb47d721676fef6effa5.tar.bz2 |
bpo-45953: Statically allocate and initialize global bytes objects. (gh-30096)
The empty bytes object (b'') and the 256 one-character bytes objects were allocated at runtime init. Now we statically allocate and initialize them.
https://bugs.python.org/issue45953
-rw-r--r-- | Include/internal/pycore_bytesobject.h | 10 | ||||
-rw-r--r-- | Include/internal/pycore_global_objects.h | 280 | ||||
-rw-r--r-- | Include/internal/pycore_interp.h | 2 | ||||
-rw-r--r-- | Objects/bytesobject.c | 92 | ||||
-rw-r--r-- | Python/pylifecycle.c | 6 |
5 files changed, 294 insertions, 96 deletions
diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h index b00ed97..18d9530 100644 --- a/Include/internal/pycore_bytesobject.h +++ b/Include/internal/pycore_bytesobject.h @@ -11,17 +11,7 @@ extern "C" { /* runtime lifecycle */ -extern PyStatus _PyBytes_InitGlobalObjects(PyInterpreterState *); extern PyStatus _PyBytes_InitTypes(PyInterpreterState *); -extern void _PyBytes_Fini(PyInterpreterState *); - - -/* other API */ - -struct _Py_bytes_state { - PyObject *empty_string; - PyBytesObject *characters[256]; -}; #ifdef __cplusplus diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index 6cae3bc..d2dc907 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -34,6 +34,20 @@ extern "C" { } +/* bytes objects */ + +#define _PyBytes_SIMPLE_INIT(CH, LEN) \ + { \ + _PyVarObject_IMMORTAL_INIT(&PyBytes_Type, LEN), \ + .ob_shash = -1, \ + .ob_sval = { CH }, \ + } +#define _PyBytes_CHAR_INIT(CH) \ + { \ + _PyBytes_SIMPLE_INIT(CH, 1) \ + } + + /********************** * the global objects * **********************/ @@ -54,6 +68,12 @@ struct _Py_global_objects { * -_PY_NSMALLNEGINTS (inclusive) to _PY_NSMALLPOSINTS (exclusive). */ PyLongObject small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS]; + + PyBytesObject bytes_empty; + struct { + PyBytesObject ob; + char eos; + } bytes_characters[256]; } singletons; }; @@ -323,6 +343,266 @@ struct _Py_global_objects { _PyLong_DIGIT_INIT(255), \ _PyLong_DIGIT_INIT(256), \ }, \ + \ + .bytes_empty = _PyBytes_SIMPLE_INIT(0, 0), \ + .bytes_characters = { \ + _PyBytes_CHAR_INIT(0), \ + _PyBytes_CHAR_INIT(1), \ + _PyBytes_CHAR_INIT(2), \ + _PyBytes_CHAR_INIT(3), \ + _PyBytes_CHAR_INIT(4), \ + _PyBytes_CHAR_INIT(5), \ + _PyBytes_CHAR_INIT(6), \ + _PyBytes_CHAR_INIT(7), \ + _PyBytes_CHAR_INIT(8), \ + _PyBytes_CHAR_INIT(9), \ + _PyBytes_CHAR_INIT(10), \ + _PyBytes_CHAR_INIT(11), \ + _PyBytes_CHAR_INIT(12), \ + _PyBytes_CHAR_INIT(13), \ + _PyBytes_CHAR_INIT(14), \ + _PyBytes_CHAR_INIT(15), \ + _PyBytes_CHAR_INIT(16), \ + _PyBytes_CHAR_INIT(17), \ + _PyBytes_CHAR_INIT(18), \ + _PyBytes_CHAR_INIT(19), \ + _PyBytes_CHAR_INIT(20), \ + _PyBytes_CHAR_INIT(21), \ + _PyBytes_CHAR_INIT(22), \ + _PyBytes_CHAR_INIT(23), \ + _PyBytes_CHAR_INIT(24), \ + _PyBytes_CHAR_INIT(25), \ + _PyBytes_CHAR_INIT(26), \ + _PyBytes_CHAR_INIT(27), \ + _PyBytes_CHAR_INIT(28), \ + _PyBytes_CHAR_INIT(29), \ + _PyBytes_CHAR_INIT(30), \ + _PyBytes_CHAR_INIT(31), \ + _PyBytes_CHAR_INIT(32), \ + _PyBytes_CHAR_INIT(33), \ + _PyBytes_CHAR_INIT(34), \ + _PyBytes_CHAR_INIT(35), \ + _PyBytes_CHAR_INIT(36), \ + _PyBytes_CHAR_INIT(37), \ + _PyBytes_CHAR_INIT(38), \ + _PyBytes_CHAR_INIT(39), \ + _PyBytes_CHAR_INIT(40), \ + _PyBytes_CHAR_INIT(41), \ + _PyBytes_CHAR_INIT(42), \ + _PyBytes_CHAR_INIT(43), \ + _PyBytes_CHAR_INIT(44), \ + _PyBytes_CHAR_INIT(45), \ + _PyBytes_CHAR_INIT(46), \ + _PyBytes_CHAR_INIT(47), \ + _PyBytes_CHAR_INIT(48), \ + _PyBytes_CHAR_INIT(49), \ + _PyBytes_CHAR_INIT(50), \ + _PyBytes_CHAR_INIT(51), \ + _PyBytes_CHAR_INIT(52), \ + _PyBytes_CHAR_INIT(53), \ + _PyBytes_CHAR_INIT(54), \ + _PyBytes_CHAR_INIT(55), \ + _PyBytes_CHAR_INIT(56), \ + _PyBytes_CHAR_INIT(57), \ + _PyBytes_CHAR_INIT(58), \ + _PyBytes_CHAR_INIT(59), \ + _PyBytes_CHAR_INIT(60), \ + _PyBytes_CHAR_INIT(61), \ + _PyBytes_CHAR_INIT(62), \ + _PyBytes_CHAR_INIT(63), \ + _PyBytes_CHAR_INIT(64), \ + _PyBytes_CHAR_INIT(65), \ + _PyBytes_CHAR_INIT(66), \ + _PyBytes_CHAR_INIT(67), \ + _PyBytes_CHAR_INIT(68), \ + _PyBytes_CHAR_INIT(69), \ + _PyBytes_CHAR_INIT(70), \ + _PyBytes_CHAR_INIT(71), \ + _PyBytes_CHAR_INIT(72), \ + _PyBytes_CHAR_INIT(73), \ + _PyBytes_CHAR_INIT(74), \ + _PyBytes_CHAR_INIT(75), \ + _PyBytes_CHAR_INIT(76), \ + _PyBytes_CHAR_INIT(77), \ + _PyBytes_CHAR_INIT(78), \ + _PyBytes_CHAR_INIT(79), \ + _PyBytes_CHAR_INIT(80), \ + _PyBytes_CHAR_INIT(81), \ + _PyBytes_CHAR_INIT(82), \ + _PyBytes_CHAR_INIT(83), \ + _PyBytes_CHAR_INIT(84), \ + _PyBytes_CHAR_INIT(85), \ + _PyBytes_CHAR_INIT(86), \ + _PyBytes_CHAR_INIT(87), \ + _PyBytes_CHAR_INIT(88), \ + _PyBytes_CHAR_INIT(89), \ + _PyBytes_CHAR_INIT(90), \ + _PyBytes_CHAR_INIT(91), \ + _PyBytes_CHAR_INIT(92), \ + _PyBytes_CHAR_INIT(93), \ + _PyBytes_CHAR_INIT(94), \ + _PyBytes_CHAR_INIT(95), \ + _PyBytes_CHAR_INIT(96), \ + _PyBytes_CHAR_INIT(97), \ + _PyBytes_CHAR_INIT(98), \ + _PyBytes_CHAR_INIT(99), \ + _PyBytes_CHAR_INIT(100), \ + _PyBytes_CHAR_INIT(101), \ + _PyBytes_CHAR_INIT(102), \ + _PyBytes_CHAR_INIT(103), \ + _PyBytes_CHAR_INIT(104), \ + _PyBytes_CHAR_INIT(105), \ + _PyBytes_CHAR_INIT(106), \ + _PyBytes_CHAR_INIT(107), \ + _PyBytes_CHAR_INIT(108), \ + _PyBytes_CHAR_INIT(109), \ + _PyBytes_CHAR_INIT(110), \ + _PyBytes_CHAR_INIT(111), \ + _PyBytes_CHAR_INIT(112), \ + _PyBytes_CHAR_INIT(113), \ + _PyBytes_CHAR_INIT(114), \ + _PyBytes_CHAR_INIT(115), \ + _PyBytes_CHAR_INIT(116), \ + _PyBytes_CHAR_INIT(117), \ + _PyBytes_CHAR_INIT(118), \ + _PyBytes_CHAR_INIT(119), \ + _PyBytes_CHAR_INIT(120), \ + _PyBytes_CHAR_INIT(121), \ + _PyBytes_CHAR_INIT(122), \ + _PyBytes_CHAR_INIT(123), \ + _PyBytes_CHAR_INIT(124), \ + _PyBytes_CHAR_INIT(125), \ + _PyBytes_CHAR_INIT(126), \ + _PyBytes_CHAR_INIT(127), \ + _PyBytes_CHAR_INIT(128), \ + _PyBytes_CHAR_INIT(129), \ + _PyBytes_CHAR_INIT(130), \ + _PyBytes_CHAR_INIT(131), \ + _PyBytes_CHAR_INIT(132), \ + _PyBytes_CHAR_INIT(133), \ + _PyBytes_CHAR_INIT(134), \ + _PyBytes_CHAR_INIT(135), \ + _PyBytes_CHAR_INIT(136), \ + _PyBytes_CHAR_INIT(137), \ + _PyBytes_CHAR_INIT(138), \ + _PyBytes_CHAR_INIT(139), \ + _PyBytes_CHAR_INIT(140), \ + _PyBytes_CHAR_INIT(141), \ + _PyBytes_CHAR_INIT(142), \ + _PyBytes_CHAR_INIT(143), \ + _PyBytes_CHAR_INIT(144), \ + _PyBytes_CHAR_INIT(145), \ + _PyBytes_CHAR_INIT(146), \ + _PyBytes_CHAR_INIT(147), \ + _PyBytes_CHAR_INIT(148), \ + _PyBytes_CHAR_INIT(149), \ + _PyBytes_CHAR_INIT(150), \ + _PyBytes_CHAR_INIT(151), \ + _PyBytes_CHAR_INIT(152), \ + _PyBytes_CHAR_INIT(153), \ + _PyBytes_CHAR_INIT(154), \ + _PyBytes_CHAR_INIT(155), \ + _PyBytes_CHAR_INIT(156), \ + _PyBytes_CHAR_INIT(157), \ + _PyBytes_CHAR_INIT(158), \ + _PyBytes_CHAR_INIT(159), \ + _PyBytes_CHAR_INIT(160), \ + _PyBytes_CHAR_INIT(161), \ + _PyBytes_CHAR_INIT(162), \ + _PyBytes_CHAR_INIT(163), \ + _PyBytes_CHAR_INIT(164), \ + _PyBytes_CHAR_INIT(165), \ + _PyBytes_CHAR_INIT(166), \ + _PyBytes_CHAR_INIT(167), \ + _PyBytes_CHAR_INIT(168), \ + _PyBytes_CHAR_INIT(169), \ + _PyBytes_CHAR_INIT(170), \ + _PyBytes_CHAR_INIT(171), \ + _PyBytes_CHAR_INIT(172), \ + _PyBytes_CHAR_INIT(173), \ + _PyBytes_CHAR_INIT(174), \ + _PyBytes_CHAR_INIT(175), \ + _PyBytes_CHAR_INIT(176), \ + _PyBytes_CHAR_INIT(177), \ + _PyBytes_CHAR_INIT(178), \ + _PyBytes_CHAR_INIT(179), \ + _PyBytes_CHAR_INIT(180), \ + _PyBytes_CHAR_INIT(181), \ + _PyBytes_CHAR_INIT(182), \ + _PyBytes_CHAR_INIT(183), \ + _PyBytes_CHAR_INIT(184), \ + _PyBytes_CHAR_INIT(185), \ + _PyBytes_CHAR_INIT(186), \ + _PyBytes_CHAR_INIT(187), \ + _PyBytes_CHAR_INIT(188), \ + _PyBytes_CHAR_INIT(189), \ + _PyBytes_CHAR_INIT(190), \ + _PyBytes_CHAR_INIT(191), \ + _PyBytes_CHAR_INIT(192), \ + _PyBytes_CHAR_INIT(193), \ + _PyBytes_CHAR_INIT(194), \ + _PyBytes_CHAR_INIT(195), \ + _PyBytes_CHAR_INIT(196), \ + _PyBytes_CHAR_INIT(197), \ + _PyBytes_CHAR_INIT(198), \ + _PyBytes_CHAR_INIT(199), \ + _PyBytes_CHAR_INIT(200), \ + _PyBytes_CHAR_INIT(201), \ + _PyBytes_CHAR_INIT(202), \ + _PyBytes_CHAR_INIT(203), \ + _PyBytes_CHAR_INIT(204), \ + _PyBytes_CHAR_INIT(205), \ + _PyBytes_CHAR_INIT(206), \ + _PyBytes_CHAR_INIT(207), \ + _PyBytes_CHAR_INIT(208), \ + _PyBytes_CHAR_INIT(209), \ + _PyBytes_CHAR_INIT(210), \ + _PyBytes_CHAR_INIT(211), \ + _PyBytes_CHAR_INIT(212), \ + _PyBytes_CHAR_INIT(213), \ + _PyBytes_CHAR_INIT(214), \ + _PyBytes_CHAR_INIT(215), \ + _PyBytes_CHAR_INIT(216), \ + _PyBytes_CHAR_INIT(217), \ + _PyBytes_CHAR_INIT(218), \ + _PyBytes_CHAR_INIT(219), \ + _PyBytes_CHAR_INIT(220), \ + _PyBytes_CHAR_INIT(221), \ + _PyBytes_CHAR_INIT(222), \ + _PyBytes_CHAR_INIT(223), \ + _PyBytes_CHAR_INIT(224), \ + _PyBytes_CHAR_INIT(225), \ + _PyBytes_CHAR_INIT(226), \ + _PyBytes_CHAR_INIT(227), \ + _PyBytes_CHAR_INIT(228), \ + _PyBytes_CHAR_INIT(229), \ + _PyBytes_CHAR_INIT(230), \ + _PyBytes_CHAR_INIT(231), \ + _PyBytes_CHAR_INIT(232), \ + _PyBytes_CHAR_INIT(233), \ + _PyBytes_CHAR_INIT(234), \ + _PyBytes_CHAR_INIT(235), \ + _PyBytes_CHAR_INIT(236), \ + _PyBytes_CHAR_INIT(237), \ + _PyBytes_CHAR_INIT(238), \ + _PyBytes_CHAR_INIT(239), \ + _PyBytes_CHAR_INIT(240), \ + _PyBytes_CHAR_INIT(241), \ + _PyBytes_CHAR_INIT(242), \ + _PyBytes_CHAR_INIT(243), \ + _PyBytes_CHAR_INIT(244), \ + _PyBytes_CHAR_INIT(245), \ + _PyBytes_CHAR_INIT(246), \ + _PyBytes_CHAR_INIT(247), \ + _PyBytes_CHAR_INIT(248), \ + _PyBytes_CHAR_INIT(249), \ + _PyBytes_CHAR_INIT(250), \ + _PyBytes_CHAR_INIT(251), \ + _PyBytes_CHAR_INIT(252), \ + _PyBytes_CHAR_INIT(253), \ + _PyBytes_CHAR_INIT(254), \ + _PyBytes_CHAR_INIT(255), \ + }, \ }, \ } diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index e4d7b1b..d48ea87 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -10,7 +10,6 @@ extern "C" { #include "pycore_atomic.h" // _Py_atomic_address #include "pycore_ast_state.h" // struct ast_state -#include "pycore_bytesobject.h" // struct _Py_bytes_state #include "pycore_context.h" // struct _Py_context_state #include "pycore_dict.h" // struct _Py_dict_state #include "pycore_exceptions.h" // struct _Py_exc_state @@ -152,7 +151,6 @@ struct _is { PyObject *audit_hooks; - struct _Py_bytes_state bytes; struct _Py_unicode_state unicode; struct _Py_float_state float_state; /* Using a cache is very effective since typically only a single slice is diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 2f7e0a6..85d6912 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -5,9 +5,9 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_bytes_methods.h" // _Py_bytes_startswith() -#include "pycore_bytesobject.h" // struct _Py_bytes_state #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_format.h" // F_LJUST +#include "pycore_global_objects.h" // _Py_GET_GLOBAL_OBJECT() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_long.h" // _PyLong_DigitValue #include "pycore_object.h" // _PyObject_GC_TRACK @@ -38,49 +38,24 @@ Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str); -static struct _Py_bytes_state* -get_bytes_state(void) -{ - PyInterpreterState *interp = _PyInterpreterState_GET(); - return &interp->bytes; -} +#define CHARACTERS _Py_SINGLETON(bytes_characters) +#define CHARACTER(ch) \ + ((PyBytesObject *)&(CHARACTERS[ch])); +#define EMPTY (&_Py_SINGLETON(bytes_empty)) // Return a borrowed reference to the empty bytes string singleton. static inline PyObject* bytes_get_empty(void) { - struct _Py_bytes_state *state = get_bytes_state(); - // bytes_get_empty() must not be called before _PyBytes_Init() - // or after _PyBytes_Fini() - assert(state->empty_string != NULL); - return state->empty_string; + return &EMPTY->ob_base.ob_base; } // Return a strong reference to the empty bytes string singleton. static inline PyObject* bytes_new_empty(void) { - PyObject *empty = bytes_get_empty(); - Py_INCREF(empty); - return (PyObject *)empty; -} - - -static int -bytes_create_empty_string_singleton(struct _Py_bytes_state *state) -{ - // Create the empty bytes string singleton - PyBytesObject *op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE); - if (op == NULL) { - return -1; - } - _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, 0); - op->ob_shash = -1; - op->ob_sval[0] = '\0'; - - assert(state->empty_string == NULL); - state->empty_string = (PyObject *)op; - return 0; + Py_INCREF(EMPTY); + return (PyObject *)EMPTY; } @@ -148,12 +123,9 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) return NULL; } if (size == 1 && str != NULL) { - struct _Py_bytes_state *state = get_bytes_state(); - op = state->characters[*str & UCHAR_MAX]; - if (op != NULL) { - Py_INCREF(op); - return (PyObject *)op; - } + op = CHARACTER(*str & 255); + Py_INCREF(op); + return (PyObject *)op; } if (size == 0) { return bytes_new_empty(); @@ -166,12 +138,6 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) return (PyObject *) op; memcpy(op->ob_sval, str, size); - /* share short strings */ - if (size == 1) { - struct _Py_bytes_state *state = get_bytes_state(); - Py_INCREF(op); - state->characters[*str & UCHAR_MAX] = op; - } return (PyObject *) op; } @@ -189,16 +155,13 @@ PyBytes_FromString(const char *str) return NULL; } - struct _Py_bytes_state *state = get_bytes_state(); if (size == 0) { return bytes_new_empty(); } else if (size == 1) { - op = state->characters[*str & UCHAR_MAX]; - if (op != NULL) { - Py_INCREF(op); - return (PyObject *)op; - } + op = CHARACTER(*str & 255); + Py_INCREF(op); + return (PyObject *)op; } /* Inline PyObject_NewVar */ @@ -209,12 +172,6 @@ PyBytes_FromString(const char *str) _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size); op->ob_shash = -1; memcpy(op->ob_sval, str, size+1); - /* share short strings */ - if (size == 1) { - assert(state->characters[*str & UCHAR_MAX] == NULL); - Py_INCREF(op); - state->characters[*str & UCHAR_MAX] = op; - } return (PyObject *) op; } @@ -3087,17 +3044,6 @@ error: PyStatus -_PyBytes_InitGlobalObjects(PyInterpreterState *interp) -{ - struct _Py_bytes_state *state = &interp->bytes; - if (bytes_create_empty_string_singleton(state) < 0) { - return _PyStatus_NO_MEMORY(); - } - return _PyStatus_OK(); -} - - -PyStatus _PyBytes_InitTypes(PyInterpreterState *interp) { if (!_Py_IsMainInterpreter(interp)) { @@ -3116,16 +3062,6 @@ _PyBytes_InitTypes(PyInterpreterState *interp) } -void -_PyBytes_Fini(PyInterpreterState *interp) -{ - struct _Py_bytes_state* state = &interp->bytes; - for (int i = 0; i < UCHAR_MAX + 1; i++) { - Py_CLEAR(state->characters[i]); - } - Py_CLEAR(state->empty_string); -} - /*********************** Bytes Iterator ****************************/ typedef struct { diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index b2f58f4..284cfac 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -678,11 +678,6 @@ pycore_init_global_objects(PyInterpreterState *interp) _PyFloat_InitState(interp); - status = _PyBytes_InitGlobalObjects(interp); - if (_PyStatus_EXCEPTION(status)) { - return status; - } - status = _PyUnicode_InitGlobalObjects(interp); if (_PyStatus_EXCEPTION(status)) { return status; @@ -1685,7 +1680,6 @@ finalize_interp_types(PyInterpreterState *interp) _PySlice_Fini(interp); - _PyBytes_Fini(interp); _PyUnicode_Fini(interp); _PyFloat_Fini(interp); } |