From 6dab8c95bd8db18e09619d804a938ab3e46042fc Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Sat, 3 Sep 2022 12:13:08 +0530 Subject: GH-96458: Statically initialize utf8 representation of static strings (#96481) --- Include/internal/pycore_runtime_init.h | 4 +- Include/internal/pycore_runtime_init_generated.h | 256 +++++++++++------------ Include/internal/pycore_unicodeobject.h | 1 - Objects/unicodeobject.c | 33 --- Tools/scripts/deepfreeze.py | 4 +- Tools/scripts/generate_global_objects.py | 6 +- 6 files changed, 139 insertions(+), 165 deletions(-) diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index c14d259..621d5cc 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -113,10 +113,12 @@ extern "C" { ._ ## NAME = _PyASCIIObject_INIT(LITERAL) #define INIT_ID(NAME) \ ._ ## NAME = _PyASCIIObject_INIT(#NAME) -#define _PyUnicode_LATIN1_INIT(LITERAL) \ +#define _PyUnicode_LATIN1_INIT(LITERAL, UTF8) \ { \ ._latin1 = { \ ._base = _PyUnicode_ASCII_BASE_INIT((LITERAL), 0), \ + .utf8 = (UTF8), \ + .utf8_length = sizeof(UTF8) - 1, \ }, \ ._data = (LITERAL), \ } diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 65ab098..7a76077 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1287,134 +1287,134 @@ extern "C" { _PyASCIIObject_INIT("\x7f"), \ }, \ .latin1 = { \ - _PyUnicode_LATIN1_INIT("\x80"), \ - _PyUnicode_LATIN1_INIT("\x81"), \ - _PyUnicode_LATIN1_INIT("\x82"), \ - _PyUnicode_LATIN1_INIT("\x83"), \ - _PyUnicode_LATIN1_INIT("\x84"), \ - _PyUnicode_LATIN1_INIT("\x85"), \ - _PyUnicode_LATIN1_INIT("\x86"), \ - _PyUnicode_LATIN1_INIT("\x87"), \ - _PyUnicode_LATIN1_INIT("\x88"), \ - _PyUnicode_LATIN1_INIT("\x89"), \ - _PyUnicode_LATIN1_INIT("\x8a"), \ - _PyUnicode_LATIN1_INIT("\x8b"), \ - _PyUnicode_LATIN1_INIT("\x8c"), \ - _PyUnicode_LATIN1_INIT("\x8d"), \ - _PyUnicode_LATIN1_INIT("\x8e"), \ - _PyUnicode_LATIN1_INIT("\x8f"), \ - _PyUnicode_LATIN1_INIT("\x90"), \ - _PyUnicode_LATIN1_INIT("\x91"), \ - _PyUnicode_LATIN1_INIT("\x92"), \ - _PyUnicode_LATIN1_INIT("\x93"), \ - _PyUnicode_LATIN1_INIT("\x94"), \ - _PyUnicode_LATIN1_INIT("\x95"), \ - _PyUnicode_LATIN1_INIT("\x96"), \ - _PyUnicode_LATIN1_INIT("\x97"), \ - _PyUnicode_LATIN1_INIT("\x98"), \ - _PyUnicode_LATIN1_INIT("\x99"), \ - _PyUnicode_LATIN1_INIT("\x9a"), \ - _PyUnicode_LATIN1_INIT("\x9b"), \ - _PyUnicode_LATIN1_INIT("\x9c"), \ - _PyUnicode_LATIN1_INIT("\x9d"), \ - _PyUnicode_LATIN1_INIT("\x9e"), \ - _PyUnicode_LATIN1_INIT("\x9f"), \ - _PyUnicode_LATIN1_INIT("\xa0"), \ - _PyUnicode_LATIN1_INIT("\xa1"), \ - _PyUnicode_LATIN1_INIT("\xa2"), \ - _PyUnicode_LATIN1_INIT("\xa3"), \ - _PyUnicode_LATIN1_INIT("\xa4"), \ - _PyUnicode_LATIN1_INIT("\xa5"), \ - _PyUnicode_LATIN1_INIT("\xa6"), \ - _PyUnicode_LATIN1_INIT("\xa7"), \ - _PyUnicode_LATIN1_INIT("\xa8"), \ - _PyUnicode_LATIN1_INIT("\xa9"), \ - _PyUnicode_LATIN1_INIT("\xaa"), \ - _PyUnicode_LATIN1_INIT("\xab"), \ - _PyUnicode_LATIN1_INIT("\xac"), \ - _PyUnicode_LATIN1_INIT("\xad"), \ - _PyUnicode_LATIN1_INIT("\xae"), \ - _PyUnicode_LATIN1_INIT("\xaf"), \ - _PyUnicode_LATIN1_INIT("\xb0"), \ - _PyUnicode_LATIN1_INIT("\xb1"), \ - _PyUnicode_LATIN1_INIT("\xb2"), \ - _PyUnicode_LATIN1_INIT("\xb3"), \ - _PyUnicode_LATIN1_INIT("\xb4"), \ - _PyUnicode_LATIN1_INIT("\xb5"), \ - _PyUnicode_LATIN1_INIT("\xb6"), \ - _PyUnicode_LATIN1_INIT("\xb7"), \ - _PyUnicode_LATIN1_INIT("\xb8"), \ - _PyUnicode_LATIN1_INIT("\xb9"), \ - _PyUnicode_LATIN1_INIT("\xba"), \ - _PyUnicode_LATIN1_INIT("\xbb"), \ - _PyUnicode_LATIN1_INIT("\xbc"), \ - _PyUnicode_LATIN1_INIT("\xbd"), \ - _PyUnicode_LATIN1_INIT("\xbe"), \ - _PyUnicode_LATIN1_INIT("\xbf"), \ - _PyUnicode_LATIN1_INIT("\xc0"), \ - _PyUnicode_LATIN1_INIT("\xc1"), \ - _PyUnicode_LATIN1_INIT("\xc2"), \ - _PyUnicode_LATIN1_INIT("\xc3"), \ - _PyUnicode_LATIN1_INIT("\xc4"), \ - _PyUnicode_LATIN1_INIT("\xc5"), \ - _PyUnicode_LATIN1_INIT("\xc6"), \ - _PyUnicode_LATIN1_INIT("\xc7"), \ - _PyUnicode_LATIN1_INIT("\xc8"), \ - _PyUnicode_LATIN1_INIT("\xc9"), \ - _PyUnicode_LATIN1_INIT("\xca"), \ - _PyUnicode_LATIN1_INIT("\xcb"), \ - _PyUnicode_LATIN1_INIT("\xcc"), \ - _PyUnicode_LATIN1_INIT("\xcd"), \ - _PyUnicode_LATIN1_INIT("\xce"), \ - _PyUnicode_LATIN1_INIT("\xcf"), \ - _PyUnicode_LATIN1_INIT("\xd0"), \ - _PyUnicode_LATIN1_INIT("\xd1"), \ - _PyUnicode_LATIN1_INIT("\xd2"), \ - _PyUnicode_LATIN1_INIT("\xd3"), \ - _PyUnicode_LATIN1_INIT("\xd4"), \ - _PyUnicode_LATIN1_INIT("\xd5"), \ - _PyUnicode_LATIN1_INIT("\xd6"), \ - _PyUnicode_LATIN1_INIT("\xd7"), \ - _PyUnicode_LATIN1_INIT("\xd8"), \ - _PyUnicode_LATIN1_INIT("\xd9"), \ - _PyUnicode_LATIN1_INIT("\xda"), \ - _PyUnicode_LATIN1_INIT("\xdb"), \ - _PyUnicode_LATIN1_INIT("\xdc"), \ - _PyUnicode_LATIN1_INIT("\xdd"), \ - _PyUnicode_LATIN1_INIT("\xde"), \ - _PyUnicode_LATIN1_INIT("\xdf"), \ - _PyUnicode_LATIN1_INIT("\xe0"), \ - _PyUnicode_LATIN1_INIT("\xe1"), \ - _PyUnicode_LATIN1_INIT("\xe2"), \ - _PyUnicode_LATIN1_INIT("\xe3"), \ - _PyUnicode_LATIN1_INIT("\xe4"), \ - _PyUnicode_LATIN1_INIT("\xe5"), \ - _PyUnicode_LATIN1_INIT("\xe6"), \ - _PyUnicode_LATIN1_INIT("\xe7"), \ - _PyUnicode_LATIN1_INIT("\xe8"), \ - _PyUnicode_LATIN1_INIT("\xe9"), \ - _PyUnicode_LATIN1_INIT("\xea"), \ - _PyUnicode_LATIN1_INIT("\xeb"), \ - _PyUnicode_LATIN1_INIT("\xec"), \ - _PyUnicode_LATIN1_INIT("\xed"), \ - _PyUnicode_LATIN1_INIT("\xee"), \ - _PyUnicode_LATIN1_INIT("\xef"), \ - _PyUnicode_LATIN1_INIT("\xf0"), \ - _PyUnicode_LATIN1_INIT("\xf1"), \ - _PyUnicode_LATIN1_INIT("\xf2"), \ - _PyUnicode_LATIN1_INIT("\xf3"), \ - _PyUnicode_LATIN1_INIT("\xf4"), \ - _PyUnicode_LATIN1_INIT("\xf5"), \ - _PyUnicode_LATIN1_INIT("\xf6"), \ - _PyUnicode_LATIN1_INIT("\xf7"), \ - _PyUnicode_LATIN1_INIT("\xf8"), \ - _PyUnicode_LATIN1_INIT("\xf9"), \ - _PyUnicode_LATIN1_INIT("\xfa"), \ - _PyUnicode_LATIN1_INIT("\xfb"), \ - _PyUnicode_LATIN1_INIT("\xfc"), \ - _PyUnicode_LATIN1_INIT("\xfd"), \ - _PyUnicode_LATIN1_INIT("\xfe"), \ - _PyUnicode_LATIN1_INIT("\xff"), \ + _PyUnicode_LATIN1_INIT("\x80", "\xc2\x80"), \ + _PyUnicode_LATIN1_INIT("\x81", "\xc2\x81"), \ + _PyUnicode_LATIN1_INIT("\x82", "\xc2\x82"), \ + _PyUnicode_LATIN1_INIT("\x83", "\xc2\x83"), \ + _PyUnicode_LATIN1_INIT("\x84", "\xc2\x84"), \ + _PyUnicode_LATIN1_INIT("\x85", "\xc2\x85"), \ + _PyUnicode_LATIN1_INIT("\x86", "\xc2\x86"), \ + _PyUnicode_LATIN1_INIT("\x87", "\xc2\x87"), \ + _PyUnicode_LATIN1_INIT("\x88", "\xc2\x88"), \ + _PyUnicode_LATIN1_INIT("\x89", "\xc2\x89"), \ + _PyUnicode_LATIN1_INIT("\x8a", "\xc2\x8a"), \ + _PyUnicode_LATIN1_INIT("\x8b", "\xc2\x8b"), \ + _PyUnicode_LATIN1_INIT("\x8c", "\xc2\x8c"), \ + _PyUnicode_LATIN1_INIT("\x8d", "\xc2\x8d"), \ + _PyUnicode_LATIN1_INIT("\x8e", "\xc2\x8e"), \ + _PyUnicode_LATIN1_INIT("\x8f", "\xc2\x8f"), \ + _PyUnicode_LATIN1_INIT("\x90", "\xc2\x90"), \ + _PyUnicode_LATIN1_INIT("\x91", "\xc2\x91"), \ + _PyUnicode_LATIN1_INIT("\x92", "\xc2\x92"), \ + _PyUnicode_LATIN1_INIT("\x93", "\xc2\x93"), \ + _PyUnicode_LATIN1_INIT("\x94", "\xc2\x94"), \ + _PyUnicode_LATIN1_INIT("\x95", "\xc2\x95"), \ + _PyUnicode_LATIN1_INIT("\x96", "\xc2\x96"), \ + _PyUnicode_LATIN1_INIT("\x97", "\xc2\x97"), \ + _PyUnicode_LATIN1_INIT("\x98", "\xc2\x98"), \ + _PyUnicode_LATIN1_INIT("\x99", "\xc2\x99"), \ + _PyUnicode_LATIN1_INIT("\x9a", "\xc2\x9a"), \ + _PyUnicode_LATIN1_INIT("\x9b", "\xc2\x9b"), \ + _PyUnicode_LATIN1_INIT("\x9c", "\xc2\x9c"), \ + _PyUnicode_LATIN1_INIT("\x9d", "\xc2\x9d"), \ + _PyUnicode_LATIN1_INIT("\x9e", "\xc2\x9e"), \ + _PyUnicode_LATIN1_INIT("\x9f", "\xc2\x9f"), \ + _PyUnicode_LATIN1_INIT("\xa0", "\xc2\xa0"), \ + _PyUnicode_LATIN1_INIT("\xa1", "\xc2\xa1"), \ + _PyUnicode_LATIN1_INIT("\xa2", "\xc2\xa2"), \ + _PyUnicode_LATIN1_INIT("\xa3", "\xc2\xa3"), \ + _PyUnicode_LATIN1_INIT("\xa4", "\xc2\xa4"), \ + _PyUnicode_LATIN1_INIT("\xa5", "\xc2\xa5"), \ + _PyUnicode_LATIN1_INIT("\xa6", "\xc2\xa6"), \ + _PyUnicode_LATIN1_INIT("\xa7", "\xc2\xa7"), \ + _PyUnicode_LATIN1_INIT("\xa8", "\xc2\xa8"), \ + _PyUnicode_LATIN1_INIT("\xa9", "\xc2\xa9"), \ + _PyUnicode_LATIN1_INIT("\xaa", "\xc2\xaa"), \ + _PyUnicode_LATIN1_INIT("\xab", "\xc2\xab"), \ + _PyUnicode_LATIN1_INIT("\xac", "\xc2\xac"), \ + _PyUnicode_LATIN1_INIT("\xad", "\xc2\xad"), \ + _PyUnicode_LATIN1_INIT("\xae", "\xc2\xae"), \ + _PyUnicode_LATIN1_INIT("\xaf", "\xc2\xaf"), \ + _PyUnicode_LATIN1_INIT("\xb0", "\xc2\xb0"), \ + _PyUnicode_LATIN1_INIT("\xb1", "\xc2\xb1"), \ + _PyUnicode_LATIN1_INIT("\xb2", "\xc2\xb2"), \ + _PyUnicode_LATIN1_INIT("\xb3", "\xc2\xb3"), \ + _PyUnicode_LATIN1_INIT("\xb4", "\xc2\xb4"), \ + _PyUnicode_LATIN1_INIT("\xb5", "\xc2\xb5"), \ + _PyUnicode_LATIN1_INIT("\xb6", "\xc2\xb6"), \ + _PyUnicode_LATIN1_INIT("\xb7", "\xc2\xb7"), \ + _PyUnicode_LATIN1_INIT("\xb8", "\xc2\xb8"), \ + _PyUnicode_LATIN1_INIT("\xb9", "\xc2\xb9"), \ + _PyUnicode_LATIN1_INIT("\xba", "\xc2\xba"), \ + _PyUnicode_LATIN1_INIT("\xbb", "\xc2\xbb"), \ + _PyUnicode_LATIN1_INIT("\xbc", "\xc2\xbc"), \ + _PyUnicode_LATIN1_INIT("\xbd", "\xc2\xbd"), \ + _PyUnicode_LATIN1_INIT("\xbe", "\xc2\xbe"), \ + _PyUnicode_LATIN1_INIT("\xbf", "\xc2\xbf"), \ + _PyUnicode_LATIN1_INIT("\xc0", "\xc3\x80"), \ + _PyUnicode_LATIN1_INIT("\xc1", "\xc3\x81"), \ + _PyUnicode_LATIN1_INIT("\xc2", "\xc3\x82"), \ + _PyUnicode_LATIN1_INIT("\xc3", "\xc3\x83"), \ + _PyUnicode_LATIN1_INIT("\xc4", "\xc3\x84"), \ + _PyUnicode_LATIN1_INIT("\xc5", "\xc3\x85"), \ + _PyUnicode_LATIN1_INIT("\xc6", "\xc3\x86"), \ + _PyUnicode_LATIN1_INIT("\xc7", "\xc3\x87"), \ + _PyUnicode_LATIN1_INIT("\xc8", "\xc3\x88"), \ + _PyUnicode_LATIN1_INIT("\xc9", "\xc3\x89"), \ + _PyUnicode_LATIN1_INIT("\xca", "\xc3\x8a"), \ + _PyUnicode_LATIN1_INIT("\xcb", "\xc3\x8b"), \ + _PyUnicode_LATIN1_INIT("\xcc", "\xc3\x8c"), \ + _PyUnicode_LATIN1_INIT("\xcd", "\xc3\x8d"), \ + _PyUnicode_LATIN1_INIT("\xce", "\xc3\x8e"), \ + _PyUnicode_LATIN1_INIT("\xcf", "\xc3\x8f"), \ + _PyUnicode_LATIN1_INIT("\xd0", "\xc3\x90"), \ + _PyUnicode_LATIN1_INIT("\xd1", "\xc3\x91"), \ + _PyUnicode_LATIN1_INIT("\xd2", "\xc3\x92"), \ + _PyUnicode_LATIN1_INIT("\xd3", "\xc3\x93"), \ + _PyUnicode_LATIN1_INIT("\xd4", "\xc3\x94"), \ + _PyUnicode_LATIN1_INIT("\xd5", "\xc3\x95"), \ + _PyUnicode_LATIN1_INIT("\xd6", "\xc3\x96"), \ + _PyUnicode_LATIN1_INIT("\xd7", "\xc3\x97"), \ + _PyUnicode_LATIN1_INIT("\xd8", "\xc3\x98"), \ + _PyUnicode_LATIN1_INIT("\xd9", "\xc3\x99"), \ + _PyUnicode_LATIN1_INIT("\xda", "\xc3\x9a"), \ + _PyUnicode_LATIN1_INIT("\xdb", "\xc3\x9b"), \ + _PyUnicode_LATIN1_INIT("\xdc", "\xc3\x9c"), \ + _PyUnicode_LATIN1_INIT("\xdd", "\xc3\x9d"), \ + _PyUnicode_LATIN1_INIT("\xde", "\xc3\x9e"), \ + _PyUnicode_LATIN1_INIT("\xdf", "\xc3\x9f"), \ + _PyUnicode_LATIN1_INIT("\xe0", "\xc3\xa0"), \ + _PyUnicode_LATIN1_INIT("\xe1", "\xc3\xa1"), \ + _PyUnicode_LATIN1_INIT("\xe2", "\xc3\xa2"), \ + _PyUnicode_LATIN1_INIT("\xe3", "\xc3\xa3"), \ + _PyUnicode_LATIN1_INIT("\xe4", "\xc3\xa4"), \ + _PyUnicode_LATIN1_INIT("\xe5", "\xc3\xa5"), \ + _PyUnicode_LATIN1_INIT("\xe6", "\xc3\xa6"), \ + _PyUnicode_LATIN1_INIT("\xe7", "\xc3\xa7"), \ + _PyUnicode_LATIN1_INIT("\xe8", "\xc3\xa8"), \ + _PyUnicode_LATIN1_INIT("\xe9", "\xc3\xa9"), \ + _PyUnicode_LATIN1_INIT("\xea", "\xc3\xaa"), \ + _PyUnicode_LATIN1_INIT("\xeb", "\xc3\xab"), \ + _PyUnicode_LATIN1_INIT("\xec", "\xc3\xac"), \ + _PyUnicode_LATIN1_INIT("\xed", "\xc3\xad"), \ + _PyUnicode_LATIN1_INIT("\xee", "\xc3\xae"), \ + _PyUnicode_LATIN1_INIT("\xef", "\xc3\xaf"), \ + _PyUnicode_LATIN1_INIT("\xf0", "\xc3\xb0"), \ + _PyUnicode_LATIN1_INIT("\xf1", "\xc3\xb1"), \ + _PyUnicode_LATIN1_INIT("\xf2", "\xc3\xb2"), \ + _PyUnicode_LATIN1_INIT("\xf3", "\xc3\xb3"), \ + _PyUnicode_LATIN1_INIT("\xf4", "\xc3\xb4"), \ + _PyUnicode_LATIN1_INIT("\xf5", "\xc3\xb5"), \ + _PyUnicode_LATIN1_INIT("\xf6", "\xc3\xb6"), \ + _PyUnicode_LATIN1_INIT("\xf7", "\xc3\xb7"), \ + _PyUnicode_LATIN1_INIT("\xf8", "\xc3\xb8"), \ + _PyUnicode_LATIN1_INIT("\xf9", "\xc3\xb9"), \ + _PyUnicode_LATIN1_INIT("\xfa", "\xc3\xba"), \ + _PyUnicode_LATIN1_INIT("\xfb", "\xc3\xbb"), \ + _PyUnicode_LATIN1_INIT("\xfc", "\xc3\xbc"), \ + _PyUnicode_LATIN1_INIT("\xfd", "\xc3\xbd"), \ + _PyUnicode_LATIN1_INIT("\xfe", "\xc3\xbe"), \ + _PyUnicode_LATIN1_INIT("\xff", "\xc3\xbf"), \ }, \ }, \ \ diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 4bee241..63bf04b 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -19,7 +19,6 @@ extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *); extern PyStatus _PyUnicode_InitTypes(PyInterpreterState *); extern void _PyUnicode_Fini(PyInterpreterState *); extern void _PyUnicode_FiniTypes(PyInterpreterState *); -extern void _PyStaticUnicode_Dealloc(PyObject *); extern PyTypeObject _PyUnicodeASCIIIter_Type; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 13f2c5b..bd169ed 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15184,23 +15184,6 @@ _PyUnicode_FiniTypes(PyInterpreterState *interp) } -static void unicode_static_dealloc(PyObject *op) -{ - PyASCIIObject *ascii = _PyASCIIObject_CAST(op); - - assert(ascii->state.compact); - - if (!ascii->state.ascii) { - PyCompactUnicodeObject* compact = (PyCompactUnicodeObject*)op; - if (compact->utf8) { - PyObject_Free(compact->utf8); - compact->utf8 = NULL; - compact->utf8_length = 0; - } - } -} - - void _PyUnicode_Fini(PyInterpreterState *interp) { @@ -15217,24 +15200,8 @@ _PyUnicode_Fini(PyInterpreterState *interp) _PyUnicode_FiniEncodings(&state->fs_codec); unicode_clear_identifiers(state); - - // Clear the single character singletons - for (int i = 0; i < 128; i++) { - unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).ascii[i]); - } - for (int i = 0; i < 128; i++) { - unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).latin1[i]); - } } - -void -_PyStaticUnicode_Dealloc(PyObject *op) -{ - unicode_static_dealloc(op); -} - - /* A _string module, to export formatter_parser and formatter_field_name_split to the string.Formatter class implemented in Python. */ diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py index 62eeafa..d9c6030 100644 --- a/Tools/scripts/deepfreeze.py +++ b/Tools/scripts/deepfreeze.py @@ -195,7 +195,6 @@ class Printer: else: self.write("PyCompactUnicodeObject _compact;") self.write(f"{datatype} _data[{len(s)+1}];") - self.deallocs.append(f"_PyStaticUnicode_Dealloc((PyObject *)&{name});") with self.block(f"{name} =", ";"): if ascii: with self.block("._ascii =", ","): @@ -218,6 +217,9 @@ class Printer: self.write(f".kind = {kind},") self.write(".compact = 1,") self.write(".ascii = 0,") + utf8 = s.encode('utf-8') + self.write(f'.utf8 = {make_string_literal(utf8)},') + self.write(f'.utf8_length = {len(utf8)},') with self.block(f"._data =", ","): for i in range(0, len(s), 16): data = s[i:i+16] diff --git a/Tools/scripts/generate_global_objects.py b/Tools/scripts/generate_global_objects.py index f3a11f5..a50f3ba 100644 --- a/Tools/scripts/generate_global_objects.py +++ b/Tools/scripts/generate_global_objects.py @@ -287,7 +287,11 @@ def generate_runtime_init(identifiers, strings): immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).ascii[{i}]') with printer.block('.latin1 =', ','): for i in range(128, 256): - printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}"),') + utf8 = ['"'] + for c in chr(i).encode('utf-8'): + utf8.append(f"\\x{c:02x}") + utf8.append('"') + printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}", {"".join(utf8)}),') immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).latin1[{i} - 128]') printer.write('') with printer.block('.tuple_empty =', ','): -- cgit v0.12