summaryrefslogtreecommitdiffstats
path: root/Tools/build/deepfreeze.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2023-08-14 21:41:27 (GMT)
committerGitHub <noreply@github.com>2023-08-14 21:41:27 (GMT)
commita2a4b9f1ec86b9762a5d35895ac5b528e03d5b98 (patch)
treeaa2e7a28e2634e473cdd591873f1d7750968b0fa /Tools/build/deepfreeze.py
parent39745347f645ac99021f4ab981ff02ab5647b19c (diff)
downloadcpython-a2a4b9f1ec86b9762a5d35895ac5b528e03d5b98.zip
cpython-a2a4b9f1ec86b9762a5d35895ac5b528e03d5b98.tar.gz
cpython-a2a4b9f1ec86b9762a5d35895ac5b528e03d5b98.tar.bz2
Attempt to speed up deepfreeze.py (#107887)
* Instead of calling get_identifiers_and_strings(), extract identifiers and strings from pycore_global_strings.h. * Avoid ast.literal_eval(), it's very slow.
Diffstat (limited to 'Tools/build/deepfreeze.py')
-rw-r--r--Tools/build/deepfreeze.py35
1 files changed, 23 insertions, 12 deletions
diff --git a/Tools/build/deepfreeze.py b/Tools/build/deepfreeze.py
index a11fe6a..ce609bd 100644
--- a/Tools/build/deepfreeze.py
+++ b/Tools/build/deepfreeze.py
@@ -6,7 +6,6 @@ On Windows, and in cross-compilation cases, it is executed
by Python 3.10, and 3.11 features are not available.
"""
import argparse
-import ast
import builtins
import collections
import contextlib
@@ -17,10 +16,10 @@ import types
from typing import Dict, FrozenSet, TextIO, Tuple
import umarshal
-from generate_global_objects import get_identifiers_and_strings
+
+ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
verbose = False
-identifiers, strings = get_identifiers_and_strings()
# This must be kept in sync with opcode.py
RESUME = 151
@@ -114,6 +113,7 @@ class Printer:
self.hits, self.misses = 0, 0
self.finis: list[str] = []
self.inits: list[str] = []
+ self.identifiers, self.strings = self.get_identifiers_and_strings()
self.write('#include "Python.h"')
self.write('#include "internal/pycore_gc.h"')
self.write('#include "internal/pycore_code.h"')
@@ -121,6 +121,19 @@ class Printer:
self.write('#include "internal/pycore_long.h"')
self.write("")
+ def get_identifiers_and_strings(self) -> tuple[set[str], dict[str, str]]:
+ filename = os.path.join(ROOT, "Include", "internal", "pycore_global_strings.h")
+ with open(filename) as fp:
+ lines = fp.readlines()
+ identifiers: set[str] = set()
+ strings: dict[str, str] = {}
+ for line in lines:
+ if m := re.search(r"STRUCT_FOR_ID\((\w+)\)", line):
+ identifiers.add(m.group(1))
+ if m := re.search(r'STRUCT_FOR_STR\((\w+), "(.*?)"\)', line):
+ strings[m.group(2)] = m.group(1)
+ return identifiers, strings
+
@contextlib.contextmanager
def indent(self) -> None:
save_level = self.level
@@ -171,9 +184,9 @@ class Printer:
return f"& {name}.ob_base.ob_base"
def generate_unicode(self, name: str, s: str) -> str:
- if s in strings:
- return f"&_Py_STR({strings[s]})"
- if s in identifiers:
+ if s in self.strings:
+ return f"&_Py_STR({self.strings[s]})"
+ if s in self.identifiers:
return f"&_Py_ID({s})"
if len(s) == 1:
c = ord(s)
@@ -441,12 +454,10 @@ def is_frozen_header(source: str) -> bool:
def decode_frozen_data(source: str) -> types.CodeType:
- lines = source.splitlines()
- while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None:
- del lines[0]
- while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None:
- del lines[-1]
- values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip())
+ values: list[int] = []
+ for line in source.splitlines():
+ if re.match(FROZEN_DATA_LINE, line):
+ values.extend([int(x) for x in line.split(",") if x.strip()])
data = bytes(values)
return umarshal.loads(data)