From ab0d35d70dfe0b4c11583f8f735a8cc49b58c58b Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Tue, 19 Apr 2022 23:11:36 +0530 Subject: bpo-46712: share more global strings in deepfreeze (gh-32152) (for gh-90868) --- Modules/_io/textio.c | 1 + Modules/_pickle.c | 2 +- Objects/unicodeobject.c | 1 + Python/compile.c | 3 +++ Tools/scripts/deepfreeze.py | 4 +++- Tools/scripts/generate_global_objects.py | 19 ++++++------------- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index f45a697..f1cd6d0 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1959,6 +1959,7 @@ _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n) if (chunks != NULL) { if (result != NULL && PyList_Append(chunks, result) < 0) goto fail; + _Py_DECLARE_STR(empty, ""); Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks)); if (result == NULL) goto fail; diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 84f469d..a5595eb 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1812,7 +1812,7 @@ get_dotted_path(PyObject *obj, PyObject *name) { PyObject *dotted_path; Py_ssize_t i, n; - + _Py_DECLARE_STR(dot, "."); dotted_path = PyUnicode_Split(name, &_Py_STR(dot), -1); if (dotted_path == NULL) return NULL; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d46a52c..dd0c022 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -258,6 +258,7 @@ static int unicode_is_singleton(PyObject *unicode); // Return a borrowed reference to the empty string singleton. static inline PyObject* unicode_get_empty(void) { + _Py_DECLARE_STR(empty, ""); return &_Py_STR(empty); } diff --git a/Python/compile.c b/Python/compile.c index 4108b89..ceaf852 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -782,6 +782,7 @@ compiler_set_qualname(struct compiler *c) } if (base != NULL) { + _Py_DECLARE_STR(dot, "."); name = PyUnicode_Concat(base, &_Py_STR(dot)); Py_DECREF(base); if (name == NULL) @@ -3945,6 +3946,7 @@ compiler_from_import(struct compiler *c, stmt_ty s) ADDOP_NAME(c, IMPORT_NAME, s->v.ImportFrom.module, names); } else { + _Py_DECLARE_STR(empty, ""); ADDOP_NAME(c, IMPORT_NAME, &_Py_STR(empty), names); } for (i = 0; i < n; i++) { @@ -4885,6 +4887,7 @@ compiler_joined_str(struct compiler *c, expr_ty e) Py_ssize_t value_count = asdl_seq_LEN(e->v.JoinedStr.values); if (value_count > STACK_USE_GUIDELINE) { + _Py_DECLARE_STR(empty, ""); ADDOP_LOAD_CONST_NEW(c, &_Py_STR(empty)); ADDOP_NAME(c, LOAD_METHOD, &_Py_ID(join), names); ADDOP_I(c, BUILD_LIST, 0); diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py index dfa4b3a..3c48bac 100644 --- a/Tools/scripts/deepfreeze.py +++ b/Tools/scripts/deepfreeze.py @@ -18,7 +18,7 @@ import umarshal from generate_global_objects import get_identifiers_and_strings verbose = False -identifiers = get_identifiers_and_strings()[0] +identifiers, strings = get_identifiers_and_strings() def isprintable(b: bytes) -> bool: return all(0x20 <= c < 0x7f for c in b) @@ -168,6 +168,8 @@ class Printer: return f"& {name}.ob_base.ob_base" def generate_unicode(self, name: str, s: str) -> str: + if s in strings: + return f"&_Py_STR({strings[s]})" if s in identifiers: return f"&_Py_ID({s})" if re.match(r'\A[A-Za-z0-9_]+\Z', s): diff --git a/Tools/scripts/generate_global_objects.py b/Tools/scripts/generate_global_objects.py index 826f4c4..2180acd 100644 --- a/Tools/scripts/generate_global_objects.py +++ b/Tools/scripts/generate_global_objects.py @@ -1,20 +1,13 @@ import contextlib -import glob import io import os.path import re -import sys - __file__ = os.path.abspath(__file__) ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) INTERNAL = os.path.join(ROOT, 'Include', 'internal') -STRING_LITERALS = { - 'empty': '', - 'dot': '.', -} IGNORED = { 'ACTION', # Python/_warnings.c 'ATTR', # Python/_warnings.c and Objects/funcobject.c @@ -211,7 +204,7 @@ def generate_global_strings(identifiers, strings): printer.write(START) with printer.block('struct _Py_global_strings', ';'): with printer.block('struct', ' literals;'): - for name, literal in sorted(strings.items()): + for literal, name in sorted(strings.items(), key=lambda x: x[1]): printer.write(f'STRUCT_FOR_STR({name}, "{literal}")') outfile.write('\n') with printer.block('struct', ' identifiers;'): @@ -276,7 +269,7 @@ def generate_runtime_init(identifiers, strings): # Global strings. with printer.block('.strings =', ','): with printer.block('.literals =', ','): - for name, literal in sorted(strings.items()): + for literal, name in sorted(strings.items(), key=lambda x: x[1]): printer.write(f'INIT_STR({name}, "{literal}"),') with printer.block('.identifiers =', ','): for name in sorted(identifiers): @@ -297,15 +290,15 @@ def generate_runtime_init(identifiers, strings): def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]': identifiers = set(IDENTIFIERS) - strings = dict(STRING_LITERALS) + strings = {} for name, string, *_ in iter_global_strings(): if string is None: if name not in IGNORED: identifiers.add(name) else: - if name not in strings: - strings[name] = string - elif string != strings[name]: + if string not in strings: + strings[string] = name + elif name != strings[string]: raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}') return identifiers, strings -- cgit v0.12