From a2a4b9f1ec86b9762a5d35895ac5b528e03d5b98 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 14 Aug 2023 14:41:27 -0700 Subject: Attempt to speed up deepfreeze.py (#107887) * Instead of calling get_identifiers_and_strings(), extract identifiers and strings from pycore_global_strings.h. * Avoid ast.literal_eval(), it's very slow. --- Makefile.pre.in | 2 +- Tools/build/deepfreeze.py | 35 +++++++++++++++++++++++------------ 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index 52236f7..3a628bf 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1253,7 +1253,7 @@ regen-frozen: Tools/build/freeze_modules.py $(FROZEN_FILES_IN) .PHONY: regen-deepfreeze regen-deepfreeze: $(DEEPFREEZE_OBJS) -DEEPFREEZE_DEPS=$(srcdir)/Tools/build/deepfreeze.py $(FREEZE_MODULE_DEPS) $(FROZEN_FILES_OUT) +DEEPFREEZE_DEPS=$(srcdir)/Tools/build/deepfreeze.py Include/internal/pycore_global_strings.h $(FREEZE_MODULE_DEPS) $(FROZEN_FILES_OUT) # BEGIN: deepfreeze modules Python/deepfreeze/deepfreeze.c: $(DEEPFREEZE_DEPS) diff --git a/Tools/build/deepfreeze.py b/Tools/build/deepfreeze.py index a11fe6a..ce609bd 100644 --- a/Tools/build/deepfreeze.py +++ b/Tools/build/deepfreeze.py @@ -6,7 +6,6 @@ On Windows, and in cross-compilation cases, it is executed by Python 3.10, and 3.11 features are not available. """ import argparse -import ast import builtins import collections import contextlib @@ -17,10 +16,10 @@ import types from typing import Dict, FrozenSet, TextIO, Tuple import umarshal -from generate_global_objects import get_identifiers_and_strings + +ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) verbose = False -identifiers, strings = get_identifiers_and_strings() # This must be kept in sync with opcode.py RESUME = 151 @@ -114,6 +113,7 @@ class Printer: self.hits, self.misses = 0, 0 self.finis: list[str] = [] self.inits: list[str] = [] + self.identifiers, self.strings = self.get_identifiers_and_strings() self.write('#include "Python.h"') self.write('#include "internal/pycore_gc.h"') self.write('#include "internal/pycore_code.h"') @@ -121,6 +121,19 @@ class Printer: self.write('#include "internal/pycore_long.h"') self.write("") + def get_identifiers_and_strings(self) -> tuple[set[str], dict[str, str]]: + filename = os.path.join(ROOT, "Include", "internal", "pycore_global_strings.h") + with open(filename) as fp: + lines = fp.readlines() + identifiers: set[str] = set() + strings: dict[str, str] = {} + for line in lines: + if m := re.search(r"STRUCT_FOR_ID\((\w+)\)", line): + identifiers.add(m.group(1)) + if m := re.search(r'STRUCT_FOR_STR\((\w+), "(.*?)"\)', line): + strings[m.group(2)] = m.group(1) + return identifiers, strings + @contextlib.contextmanager def indent(self) -> None: save_level = self.level @@ -171,9 +184,9 @@ class Printer: return f"& {name}.ob_base.ob_base" def generate_unicode(self, name: str, s: str) -> str: - if s in strings: - return f"&_Py_STR({strings[s]})" - if s in identifiers: + if s in self.strings: + return f"&_Py_STR({self.strings[s]})" + if s in self.identifiers: return f"&_Py_ID({s})" if len(s) == 1: c = ord(s) @@ -441,12 +454,10 @@ def is_frozen_header(source: str) -> bool: def decode_frozen_data(source: str) -> types.CodeType: - lines = source.splitlines() - while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None: - del lines[0] - while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None: - del lines[-1] - values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip()) + values: list[int] = [] + for line in source.splitlines(): + if re.match(FROZEN_DATA_LINE, line): + values.extend([int(x) for x in line.split(",") if x.strip()]) data = bytes(values) return umarshal.loads(data) -- cgit v0.12