diff options
author | Guido van Rossum <guido@python.org> | 2021-11-22 18:09:48 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-11-22 18:09:48 (GMT) |
commit | 1037ca5a8ea001bfa2a198e08655620234e9befd (patch) | |
tree | dcf9b1966caca1eab0437f730f487701a960d851 | |
parent | 4d6c0c0cce05befa06e0cad7351b1303ac048277 (diff) | |
download | cpython-1037ca5a8ea001bfa2a198e08655620234e9befd.zip cpython-1037ca5a8ea001bfa2a198e08655620234e9befd.tar.gz cpython-1037ca5a8ea001bfa2a198e08655620234e9befd.tar.bz2 |
bpo-45850: Implement deep-freeze on Windows (#29648)
Implement changes to build with deep-frozen modules on Windows.
Note that we now require Python 3.10 as the "bootstrap" or "host" Python.
This causes a modest startup speed (around 7%) on Windows.
-rw-r--r-- | Misc/NEWS.d/next/Windows/2021-11-20-00-06-59.bpo-45850.q9lofz.rst | 2 | ||||
-rw-r--r-- | PCbuild/_freeze_module.vcxproj | 58 | ||||
-rw-r--r-- | PCbuild/find_python.bat | 6 | ||||
-rw-r--r-- | PCbuild/pythoncore.vcxproj | 24 | ||||
-rw-r--r-- | Python/frozen.c | 5 | ||||
-rw-r--r-- | Tools/scripts/deepfreeze.py | 58 | ||||
-rw-r--r-- | Tools/scripts/freeze_modules.py | 21 | ||||
-rw-r--r-- | Tools/scripts/startuptime.py | 22 | ||||
-rw-r--r-- | Tools/scripts/umarshal.py | 328 |
9 files changed, 500 insertions, 24 deletions
diff --git a/Misc/NEWS.d/next/Windows/2021-11-20-00-06-59.bpo-45850.q9lofz.rst b/Misc/NEWS.d/next/Windows/2021-11-20-00-06-59.bpo-45850.q9lofz.rst new file mode 100644 index 0000000..a84e1fe --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2021-11-20-00-06-59.bpo-45850.q9lofz.rst @@ -0,0 +1,2 @@ +Implement changes to build with deep-frozen modules on Windows. +Note that we now require Python 3.10 as the "bootstrap" or "host" Python. diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 6a91776..54fef9c 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -236,101 +236,141 @@ <ModName>importlib._bootstrap</ModName> <IntFile>$(IntDir)importlib._bootstrap.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\importlib._bootstrap.h</OutFile> + <DeepIntFile>$(IntDir)importlib._bootstrap.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.importlib._bootstrap.c</DeepOutFile> </None> <None Include="..\Lib\importlib\_bootstrap_external.py"> <ModName>importlib._bootstrap_external</ModName> <IntFile>$(IntDir)importlib._bootstrap_external.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\importlib._bootstrap_external.h</OutFile> + <DeepIntFile>$(IntDir)importlib._bootstrap_external.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.importlib._bootstrap_external.c</DeepOutFile> </None> <None Include="..\Lib\zipimport.py"> <ModName>zipimport</ModName> <IntFile>$(IntDir)zipimport.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\zipimport.h</OutFile> + <DeepIntFile>$(IntDir)zipimport.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.zipimport.c</DeepOutFile> </None> <None Include="..\Lib\abc.py"> <ModName>abc</ModName> <IntFile>$(IntDir)abc.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\abc.h</OutFile> + <DeepIntFile>$(IntDir)abc.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.abc.c</DeepOutFile> </None> <None Include="..\Lib\codecs.py"> <ModName>codecs</ModName> <IntFile>$(IntDir)codecs.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\codecs.h</OutFile> + <DeepIntFile>$(IntDir)codecs.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.codecs.c</DeepOutFile> </None> <None Include="..\Lib\io.py"> <ModName>io</ModName> <IntFile>$(IntDir)io.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\io.h</OutFile> + <DeepIntFile>$(IntDir)io.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.io.c</DeepOutFile> </None> <None Include="..\Lib\_collections_abc.py"> <ModName>_collections_abc</ModName> <IntFile>$(IntDir)_collections_abc.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\_collections_abc.h</OutFile> + <DeepIntFile>$(IntDir)_collections_abc.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df._collections_abc.c</DeepOutFile> </None> <None Include="..\Lib\_sitebuiltins.py"> <ModName>_sitebuiltins</ModName> <IntFile>$(IntDir)_sitebuiltins.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\_sitebuiltins.h</OutFile> + <DeepIntFile>$(IntDir)_sitebuiltins.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df._sitebuiltins.c</DeepOutFile> </None> <None Include="..\Lib\genericpath.py"> <ModName>genericpath</ModName> <IntFile>$(IntDir)genericpath.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\genericpath.h</OutFile> + <DeepIntFile>$(IntDir)genericpath.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.genericpath.c</DeepOutFile> </None> <None Include="..\Lib\ntpath.py"> <ModName>ntpath</ModName> <IntFile>$(IntDir)ntpath.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\ntpath.h</OutFile> + <DeepIntFile>$(IntDir)ntpath.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.ntpath.c</DeepOutFile> </None> <None Include="..\Lib\posixpath.py"> <ModName>posixpath</ModName> <IntFile>$(IntDir)posixpath.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\posixpath.h</OutFile> + <DeepIntFile>$(IntDir)posixpath.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.posixpath.c</DeepOutFile> </None> <None Include="..\Lib\os.py"> <ModName>os</ModName> <IntFile>$(IntDir)os.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\os.h</OutFile> + <DeepIntFile>$(IntDir)os.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.os.c</DeepOutFile> </None> <None Include="..\Lib\site.py"> <ModName>site</ModName> <IntFile>$(IntDir)site.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\site.h</OutFile> + <DeepIntFile>$(IntDir)site.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.site.c</DeepOutFile> </None> <None Include="..\Lib\stat.py"> <ModName>stat</ModName> <IntFile>$(IntDir)stat.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\stat.h</OutFile> + <DeepIntFile>$(IntDir)stat.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.stat.c</DeepOutFile> </None> <None Include="..\Lib\__hello__.py"> <ModName>__hello__</ModName> <IntFile>$(IntDir)__hello__.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\__hello__.h</OutFile> + <DeepIntFile>$(IntDir)__hello__.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.__hello__.c</DeepOutFile> </None> <None Include="..\Lib\__phello__\__init__.py"> <ModName>__phello__</ModName> <IntFile>$(IntDir)__phello__.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\__phello__.h</OutFile> + <DeepIntFile>$(IntDir)__phello__.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.__phello__.c</DeepOutFile> </None> <None Include="..\Lib\__phello__\ham\__init__.py"> <ModName>__phello__.ham</ModName> <IntFile>$(IntDir)__phello__.ham.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\__phello__.ham.h</OutFile> + <DeepIntFile>$(IntDir)__phello__.ham.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.__phello__.ham.c</DeepOutFile> </None> <None Include="..\Lib\__phello__\ham\eggs.py"> <ModName>__phello__.ham.eggs</ModName> <IntFile>$(IntDir)__phello__.ham.eggs.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\__phello__.ham.eggs.h</OutFile> + <DeepIntFile>$(IntDir)__phello__.ham.eggs.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.__phello__.ham.eggs.c</DeepOutFile> </None> <None Include="..\Lib\__phello__\spam.py"> <ModName>__phello__.spam</ModName> <IntFile>$(IntDir)__phello__.spam.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\__phello__.spam.h</OutFile> + <DeepIntFile>$(IntDir)__phello__.spam.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.__phello__.spam.c</DeepOutFile> </None> <None Include="..\Tools\freeze\flag.py"> <ModName>frozen_only</ModName> <IntFile>$(IntDir)frozen_only.g.h</IntFile> <OutFile>$(PySourcePath)Python\frozen_modules\frozen_only.h</OutFile> + <DeepIntFile>$(IntDir)frozen_only.g.c</DeepIntFile> + <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.frozen_only.c</DeepOutFile> </None> <!-- END frozen modules --> </ItemGroup> @@ -338,17 +378,29 @@ <ImportGroup Label="ExtensionTargets"> </ImportGroup> <Target Name="_RebuildFrozen" AfterTargets="AfterBuild" Condition="$(Configuration) != 'PGUpdate'"> - <Exec Command='"$(TargetPath)" "%(None.ModName)" "%(None.FullPath)" "%(None.IntFile)"' /> + <Exec Command='"$(TargetPath)" "%(None.ModName)" "%(None.FullPath)" "%(None.DeepIntFile)"' /> - <Copy SourceFiles="%(None.IntFile)" + <Copy SourceFiles="%(None.DeepIntFile)" DestinationFiles="%(None.OutFile)" - Condition="!Exists(%(None.OutFile)) or (Exists(%(None.IntFile)) and '$([System.IO.File]::ReadAllText(%(None.OutFile)).Replace(`
`, `
`))' != '$([System.IO.File]::ReadAllText(%(None.IntFile)).Replace(`
`, `
`))')"> + Condition="!Exists(%(None.OutFile)) or (Exists(%(None.DeepIntFile)) and '$([System.IO.File]::ReadAllText(%(None.OutFile)).Replace(`
`, `
`))' != '$([System.IO.File]::ReadAllText(%(None.DeepIntFile)).Replace(`
`, `
`))')"> <Output TaskParameter="CopiedFiles" ItemName="_Updated" /> </Copy> <Message Text="Updated files: @(_Updated->'%(Filename)%(Extension)',', ')" Condition="'@(_Updated)' != ''" Importance="high" /> </Target> + <Target Name="_RebuildDeepFrozen" AfterTargets="_RebuildFrozen" Condition="$(Configuration) != 'PGUpdate'"> + <Exec Command='$(PythonForBuild) "$(PySourcePath)Tools\scripts\deepfreeze.py" "%(None.OutFile)" "-m" "%(None.ModName)" -o "%(None.IntFile)"' /> + + <Copy SourceFiles="%(None.IntFile)" + DestinationFiles="%(None.DeepOutFile)" + Condition="!Exists(%(None.DeepOutFile)) or (Exists(%(None.IntFile)) and '$([System.IO.File]::ReadAllText(%(None.DeepOutFile)).Replace(`
`, `
`))' != '$([System.IO.File]::ReadAllText(%(None.IntFile)).Replace(`
`, `
`))')"> + <Output TaskParameter="CopiedFiles" ItemName="_DeepUpdated" /> + </Copy> + + <Message Text="Updated files: @(_DeepUpdated->'%(Filename)%(Extension)',', ')" + Condition="'@(_DeepUpdated)' != ''" Importance="high" /> + </Target> <Target Name="_CleanFrozen" BeforeTargets="CoreClean" Condition="$(Configuration) != 'PGUpdate'"> <ItemGroup> <Clean Include="%(None.IntFile)" /> diff --git a/PCbuild/find_python.bat b/PCbuild/find_python.bat index d0e4a86..a9f14c5 100644 --- a/PCbuild/find_python.bat +++ b/PCbuild/find_python.bat @@ -31,13 +31,13 @@ @if "%_Py_EXTERNALS_DIR%"=="" (set _Py_EXTERNALS_DIR=%~dp0\..\externals) @rem If we have Python in externals, use that one -@if exist "%_Py_EXTERNALS_DIR%\pythonx86\tools\python.exe" (set PYTHON="%_Py_EXTERNALS_DIR%\pythonx86\tools\python.exe") & (set _Py_Python_Source=found in externals directory) & goto :found +@if exist "%_Py_EXTERNALS_DIR%\pythonx86\tools\python.exe" ("%_Py_EXTERNALS_DIR%\pythonx86\tools\python.exe" -Ec "import sys; assert sys.version_info[:2] >= (3, 10)" >nul 2>nul) && (set PYTHON="%_Py_EXTERNALS_DIR%\pythonx86\tools\python.exe") && (set _Py_Python_Source=found in externals directory) && goto :found || rmdir /Q /S "%_Py_EXTERNALS_DIR%\pythonx86" @rem If HOST_PYTHON is recent enough, use that -@if NOT "%HOST_PYTHON%"=="" @%HOST_PYTHON% -Ec "import sys; assert sys.version_info[:2] >= (3, 8)" >nul 2>nul && (set PYTHON="%HOST_PYTHON%") && (set _Py_Python_Source=found as HOST_PYTHON) && goto :found +@if NOT "%HOST_PYTHON%"=="" @%HOST_PYTHON% -Ec "import sys; assert sys.version_info[:2] >= (3, 10)" >nul 2>nul && (set PYTHON="%HOST_PYTHON%") && (set _Py_Python_Source=found as HOST_PYTHON) && goto :found @rem If py.exe finds a recent enough version, use that one -@for %%p in (3.9 3.8) do @py -%%p -EV >nul 2>&1 && (set PYTHON=py -%%p) && (set _Py_Python_Source=found %%p with py.exe) && goto :found +@for %%p in (3.10) do @py -%%p -EV >nul 2>&1 && (set PYTHON=py -%%p) && (set _Py_Python_Source=found %%p with py.exe) && goto :found @if NOT exist "%_Py_EXTERNALS_DIR%" mkdir "%_Py_EXTERNALS_DIR%" @set _Py_NUGET=%NUGET% diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 70f0556..e1d59de 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -502,6 +502,30 @@ <ClCompile Include="..\Python\thread.c" /> <ClCompile Include="..\Python\traceback.c" /> </ItemGroup> + <ItemGroup> + <!-- BEGIN deepfreeze --> + <ClCompile Include="..\Python\deepfreeze\df.importlib._bootstrap.c" /> + <ClCompile Include="..\Python\deepfreeze\df.importlib._bootstrap_external.c" /> + <ClCompile Include="..\Python\deepfreeze\df.zipimport.c" /> + <ClCompile Include="..\Python\deepfreeze\df.abc.c" /> + <ClCompile Include="..\Python\deepfreeze\df.codecs.c" /> + <ClCompile Include="..\Python\deepfreeze\df.io.c" /> + <ClCompile Include="..\Python\deepfreeze\df._collections_abc.c" /> + <ClCompile Include="..\Python\deepfreeze\df._sitebuiltins.c" /> + <ClCompile Include="..\Python\deepfreeze\df.genericpath.c" /> + <ClCompile Include="..\Python\deepfreeze\df.ntpath.c" /> + <ClCompile Include="..\Python\deepfreeze\df.posixpath.c" /> + <ClCompile Include="..\Python\deepfreeze\df.os.c" /> + <ClCompile Include="..\Python\deepfreeze\df.site.c" /> + <ClCompile Include="..\Python\deepfreeze\df.stat.c" /> + <ClCompile Include="..\Python\deepfreeze\df.__hello__.c" /> + <ClCompile Include="..\Python\deepfreeze\df.__phello__.c" /> + <ClCompile Include="..\Python\deepfreeze\df.__phello__.ham.c" /> + <ClCompile Include="..\Python\deepfreeze\df.__phello__.ham.eggs.c" /> + <ClCompile Include="..\Python\deepfreeze\df.__phello__.spam.c" /> + <ClCompile Include="..\Python\deepfreeze\df.frozen_only.c" /> + <!-- END deepfreeze --> + </ItemGroup> <ItemGroup Condition="$(IncludeExternals)"> <ClCompile Include="..\Modules\zlibmodule.c" /> <ClCompile Include="$(zlibDir)\adler32.c" /> diff --git a/Python/frozen.c b/Python/frozen.c index 1565c9a..9f43db7 100644 --- a/Python/frozen.c +++ b/Python/frozen.c @@ -61,12 +61,7 @@ #include "frozen_modules/frozen_only.h" /* End includes */ -#ifdef MS_WINDOWS -/* Deepfreeze isn't supported on Windows yet. */ -#define GET_CODE(name) NULL -#else #define GET_CODE(name) _Py_get_##name##_toplevel -#endif /* Start extern declarations */ extern PyObject *_Py_get_importlib__bootstrap_toplevel(void); diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py index 074127f..b6d52b7 100644 --- a/Tools/scripts/deepfreeze.py +++ b/Tools/scripts/deepfreeze.py @@ -1,13 +1,16 @@ import argparse +import ast import builtins import collections import contextlib import os -import sys +import re import time import types import typing +import umarshal + verbose = False @@ -55,7 +58,8 @@ def get_localsplus_counts(code: types.CodeType, nplaincellvars += 1 elif kind & CO_FAST_FREE: nfreevars += 1 - assert nlocals == len(code.co_varnames) == code.co_nlocals + assert nlocals == len(code.co_varnames) == code.co_nlocals, \ + (nlocals, len(code.co_varnames), code.co_nlocals) assert ncellvars == len(code.co_cellvars) assert nfreevars == len(code.co_freevars) assert len(names) == nlocals + nplaincellvars + nfreevars @@ -274,14 +278,7 @@ class Printer: self.write(item + ",") return f"& {name}._object.ob_base.ob_base" - def generate_int(self, name: str, i: int) -> str: - maxint = sys.maxsize - if maxint == 2**31 - 1: - digit = 2**15 - elif maxint == 2**63 - 1: - digit = 2**30 - else: - assert False, f"What int size is this system?!? {maxint=}" + def _generate_int_for_bits(self, name: str, i: int, digit: int) -> None: sign = -1 if i < 0 else 0 if i == 0 else +1 i = abs(i) digits: list[int] = [] @@ -298,6 +295,20 @@ class Printer: if digits: ds = ", ".join(map(str, digits)) self.write(f".ob_digit = {{ {ds} }},") + + def generate_int(self, name: str, i: int) -> str: + if abs(i) < 2**15: + self._generate_int_for_bits(name, i, 2**15) + else: + connective = "if" + for bits_in_digit in 15, 30: + self.write(f"#{connective} PYLONG_BITS_IN_DIGIT == {bits_in_digit}") + self._generate_int_for_bits(name, i, 2**bits_in_digit) + connective = "elif" + self.write("#else") + self.write('#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"') + self.write("#endif") + # If neither clause applies, it won't compile return f"& {name}.ob_base.ob_base" def generate_float(self, name: str, x: float) -> str: @@ -326,7 +337,7 @@ class Printer: return self.cache[key] self.misses += 1 match obj: - case types.CodeType() as code: + case types.CodeType() | umarshal.Code() as code: val = self.generate_code(name, code) case tuple(t): val = self.generate_tuple(name, t) @@ -367,8 +378,31 @@ _Py_get_%%NAME%%_toplevel(void) } """ +FROZEN_COMMENT = "/* Auto-generated by Programs/_freeze_module.c */" + +FROZEN_DATA_LINE = r"\s*(\d+,\s*)+\s*" + + +def is_frozen_header(source: str) -> bool: + return source.startswith(FROZEN_COMMENT) + + +def decode_frozen_data(source: str) -> types.CodeType: + lines = source.splitlines() + while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None: + del lines[0] + while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None: + del lines[-1] + values: tuple[int, ...] = ast.literal_eval("".join(lines)) + data = bytes(values) + return umarshal.loads(data) + + def generate(source: str, filename: str, modname: str, file: typing.TextIO) -> None: - code = compile(source, filename, "exec") + if is_frozen_header(source): + code = decode_frozen_data(source) + else: + code = compile(source, filename, "exec") printer = Printer(file) printer.generate("toplevel", code) printer.write("") diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py index ccea4e1..61ccae6 100644 --- a/Tools/scripts/freeze_modules.py +++ b/Tools/scripts/freeze_modules.py @@ -11,7 +11,6 @@ import posixpath import platform import subprocess import sys -import textwrap import time from update_file import updating_file_with_tmpfile, update_file_with_tmpfile @@ -55,6 +54,7 @@ FROZEN_FILE = os.path.join(ROOT_DIR, 'Python', 'frozen.c') MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in') PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj') PCBUILD_FILTERS = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj.filters') +PCBUILD_PYTHONCORE = os.path.join(ROOT_DIR, 'PCbuild', 'pythoncore.vcxproj') OS_PATH = 'ntpath' if os.name == 'nt' else 'posixpath' @@ -717,20 +717,28 @@ def regen_makefile(modules): def regen_pcbuild(modules): projlines = [] filterlines = [] + corelines = [] for src in _iter_sources(modules): pyfile = relpath_for_windows_display(src.pyfile, ROOT_DIR) header = relpath_for_windows_display(src.frozenfile, ROOT_DIR) + deepbase = "df." + src.id + deepoutfile = f"Python\\deepfreeze\\{deepbase}.c" intfile = ntpath.splitext(ntpath.basename(header))[0] + '.g.h' + deepintfile = ntpath.splitext(ntpath.basename(header))[0] + '.g.c' projlines.append(f' <None Include="..\\{pyfile}">') projlines.append(f' <ModName>{src.frozenid}</ModName>') projlines.append(f' <IntFile>$(IntDir){intfile}</IntFile>') projlines.append(f' <OutFile>$(PySourcePath){header}</OutFile>') + projlines.append(f' <DeepIntFile>$(IntDir){deepintfile}</DeepIntFile>') + projlines.append(f' <DeepOutFile>$(PySourcePath){deepoutfile}</DeepOutFile>') projlines.append(f' </None>') filterlines.append(f' <None Include="..\\{pyfile}">') filterlines.append(' <Filter>Python Files</Filter>') filterlines.append(' </None>') + corelines.append(f' <ClCompile Include="..\\{deepoutfile}" />') + print(f'# Updating {os.path.relpath(PCBUILD_PROJECT)}') with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile): lines = infile.readlines() @@ -753,6 +761,17 @@ def regen_pcbuild(modules): PCBUILD_FILTERS, ) outfile.writelines(lines) + print(f'# Updating {os.path.relpath(PCBUILD_PYTHONCORE)}') + with updating_file_with_tmpfile(PCBUILD_PYTHONCORE) as (infile, outfile): + lines = infile.readlines() + lines = replace_block( + lines, + '<!-- BEGIN deepfreeze -->', + '<!-- END deepfreeze -->', + corelines, + PCBUILD_FILTERS, + ) + outfile.writelines(lines) ####################################### diff --git a/Tools/scripts/startuptime.py b/Tools/scripts/startuptime.py new file mode 100644 index 0000000..1bb5b20 --- /dev/null +++ b/Tools/scripts/startuptime.py @@ -0,0 +1,22 @@ +# Quick script to time startup for various binaries + +import subprocess +import sys +import time + +NREPS = 100 + + +def main(): + binaries = sys.argv[1:] + for bin in binaries: + t0 = time.time() + for _ in range(NREPS): + result = subprocess.run([bin, "-c", "pass"]) + result.check_returncode() + t1 = time.time() + print(f"{(t1-t0)/NREPS:6.3f} {bin}") + + +if __name__ == "__main__": + main() diff --git a/Tools/scripts/umarshal.py b/Tools/scripts/umarshal.py new file mode 100644 index 0000000..e0d18c8 --- /dev/null +++ b/Tools/scripts/umarshal.py @@ -0,0 +1,328 @@ +# Implementat marshal.loads() in pure Python + +import ast + +from typing import Any + + +class Type: + # Adapted from marshal.c + NULL = ord('0') + NONE = ord('N') + FALSE = ord('F') + TRUE = ord('T') + STOPITER = ord('S') + ELLIPSIS = ord('.') + INT = ord('i') + INT64 = ord('I') + FLOAT = ord('f') + BINARY_FLOAT = ord('g') + COMPLEX = ord('x') + BINARY_COMPLEX = ord('y') + LONG = ord('l') + STRING = ord('s') + INTERNED = ord('t') + REF = ord('r') + TUPLE = ord('(') + LIST = ord('[') + DICT = ord('{') + CODE = ord('c') + UNICODE = ord('u') + UNKNOWN = ord('?') + SET = ord('<') + FROZENSET = ord('>') + ASCII = ord('a') + ASCII_INTERNED = ord('A') + SMALL_TUPLE = ord(')') + SHORT_ASCII = ord('z') + SHORT_ASCII_INTERNED = ord('Z') + + +FLAG_REF = 0x80 # with a type, add obj to index + +NULL = object() # marker + +# Cell kinds +CO_FAST_LOCAL = 0x20 +CO_FAST_CELL = 0x40 +CO_FAST_FREE = 0x80 + + +class Code: + def __init__(self, **kwds: Any): + self.__dict__.update(kwds) + + def __repr__(self) -> str: + return f"Code(**{self.__dict__})" + + co_localsplusnames: tuple[str] + co_localspluskinds: tuple[int] + + def get_localsplus_names(self, select_kind: int) -> tuple[str, ...]: + varnames: list[str] = [] + for name, kind in zip(self.co_localsplusnames, + self.co_localspluskinds): + if kind & select_kind: + varnames.append(name) + return tuple(varnames) + + @property + def co_varnames(self) -> tuple[str, ...]: + return self.get_localsplus_names(CO_FAST_LOCAL) + + @property + def co_cellvars(self) -> tuple[str, ...]: + return self.get_localsplus_names(CO_FAST_CELL) + + @property + def co_freevars(self) -> tuple[str, ...]: + return self.get_localsplus_names(CO_FAST_FREE) + + @property + def co_nlocals(self) -> int: + return len(self.co_varnames) + + +class Reader: + # A fairly literal translation of the marshal reader. + + def __init__(self, data: bytes): + self.data: bytes = data + self.end: int = len(self.data) + self.pos: int = 0 + self.refs: list[Any] = [] + self.level: int = 0 + + def r_string(self, n: int) -> bytes: + assert 0 <= n <= self.end - self.pos + buf = self.data[self.pos : self.pos + n] + self.pos += n + return buf + + def r_byte(self) -> int: + buf = self.r_string(1) + return buf[0] + + def r_short(self) -> int: + buf = self.r_string(2) + x = buf[0] + x |= buf[1] << 8 + x |= -(x & (1<<15)) # Sign-extend + return x + + def r_long(self) -> int: + buf = self.r_string(4) + x = buf[0] + x |= buf[1] << 8 + x |= buf[2] << 16 + x |= buf[3] << 24 + x |= -(x & (1<<31)) # Sign-extend + return x + + def r_long64(self) -> int: + buf = self.r_string(8) + x = buf[0] + x |= buf[1] << 8 + x |= buf[2] << 16 + x |= buf[3] << 24 + x |= buf[1] << 32 + x |= buf[1] << 40 + x |= buf[1] << 48 + x |= buf[1] << 56 + x |= -(x & (1<<63)) # Sign-extend + return x + + def r_PyLong(self) -> int: + n = self.r_long() + size = abs(n) + x = 0 + # Pray this is right + for i in range(size): + x |= self.r_short() << i*15 + if n < 0: + x = -x + return x + + def r_float_bin(self) -> float: + buf = self.r_string(8) + import struct # Lazy import to avoid breaking UNIX build + return struct.unpack("d", buf)[0] + + def r_float_str(self) -> float: + n = self.r_byte() + buf = self.r_string(n) + return ast.literal_eval(buf.decode("ascii")) + + def r_ref_reserve(self, flag: int) -> int: + if flag: + idx = len(self.refs) + self.refs.append(None) + return idx + else: + return 0 + + def r_ref_insert(self, obj: Any, idx: int, flag: int) -> Any: + if flag: + self.refs[idx] = obj + return obj + + def r_ref(self, obj: Any, flag: int) -> Any: + assert flag & FLAG_REF + self.refs.append(obj) + return obj + + def r_object(self) -> Any: + old_level = self.level + try: + return self._r_object() + finally: + self.level = old_level + + def _r_object(self) -> Any: + code = self.r_byte() + flag = code & FLAG_REF + type = code & ~FLAG_REF + # print(" "*self.level + f"{code} {flag} {type} {chr(type)!r}") + self.level += 1 + + def R_REF(obj: Any) -> Any: + if flag: + obj = self.r_ref(obj, flag) + return obj + + match type: + case Type.NULL: + return NULL + case Type.NONE: + return None + case Type.ELLIPSIS: + return Ellipsis + case Type.FALSE: + return False + case Type.TRUE: + return True + case Type.INT: + return R_REF(self.r_long()) + case Type.INT64: + return R_REF(self.r_long64()) + case Type.LONG: + return R_REF(self.r_PyLong()) + case Type.FLOAT: + return R_REF(self.r_float_str()) + case Type.BINARY_FLOAT: + return R_REF(self.r_float_bin()) + case Type.COMPLEX: + return R_REF(complex(self.r_float_str(), + self.r_float_str())) + case Type.BINARY_COMPLEX: + return R_REF(complex(self.r_float_bin(), + self.r_float_bin())) + case Type.STRING: + n = self.r_long() + return R_REF(self.r_string(n)) + case Type.ASCII_INTERNED | Type.ASCII: + n = self.r_long() + return R_REF(self.r_string(n).decode("ascii")) + case Type.SHORT_ASCII_INTERNED | Type.SHORT_ASCII: + n = self.r_byte() + return R_REF(self.r_string(n).decode("ascii")) + case Type.INTERNED | Type.UNICODE: + n = self.r_long() + return R_REF(self.r_string(n).decode("utf8", "surrogatepass")) + case Type.SMALL_TUPLE: + n = self.r_byte() + idx = self.r_ref_reserve(flag) + retval: Any = tuple(self.r_object() for _ in range(n)) + self.r_ref_insert(retval, idx, flag) + return retval + case Type.TUPLE: + n = self.r_long() + idx = self.r_ref_reserve(flag) + retval = tuple(self.r_object() for _ in range(n)) + self.r_ref_insert(retval, idx, flag) + return retval + case Type.LIST: + n = self.r_long() + retval = R_REF([]) + for _ in range(n): + retval.append(self.r_object()) + return retval + case Type.DICT: + retval = R_REF({}) + while True: + key = self.r_object() + if key == NULL: + break + val = self.r_object() + retval[key] = val + return retval + case Type.SET: + n = self.r_long() + retval = R_REF(set()) + for _ in range(n): + v = self.r_object() + retval.add(v) + return retval + case Type.FROZENSET: + n = self.r_long() + s: set[Any] = set() + idx = self.r_ref_reserve(flag) + for _ in range(n): + v = self.r_object() + s.add(v) + retval = frozenset(s) + self.r_ref_insert(retval, idx, flag) + return retval + case Type.CODE: + retval = R_REF(Code()) + retval.co_argcount = self.r_long() + retval.co_posonlyargcount = self.r_long() + retval.co_kwonlyargcount = self.r_long() + retval.co_stacksize = self.r_long() + retval.co_flags = self.r_long() + retval.co_code = self.r_object() + retval.co_consts = self.r_object() + retval.co_names = self.r_object() + retval.co_localsplusnames = self.r_object() + retval.co_localspluskinds = self.r_object() + retval.co_filename = self.r_object() + retval.co_name = self.r_object() + retval.co_qualname = self.r_object() + retval.co_firstlineno = self.r_long() + retval.co_linetable = self.r_object() + retval.co_endlinetable = self.r_object() + retval.co_columntable = self.r_object() + retval.co_exceptiontable = self.r_object() + return retval + case Type.REF: + n = self.r_long() + retval = self.refs[n] + assert retval is not None + return retval + case _: + breakpoint() + raise AssertionError(f"Unknown type {type} {chr(type)!r}") + + +def loads(data: bytes) -> Any: + assert isinstance(data, bytes) + r = Reader(data) + return r.r_object() + + +def main(): + # Test + import marshal, pprint + sample = {'foo': {(42, "bar", 3.14)}} + data = marshal.dumps(sample) + retval = loads(data) + assert retval == sample, retval + sample = main.__code__ + data = marshal.dumps(sample) + retval = loads(data) + assert isinstance(retval, Code), retval + pprint.pprint(retval.__dict__) + + +if __name__ == "__main__": + main() |