From 1863302d61a7a5dd8b8d345a00f0ee242c7c10bf Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 17 Oct 2022 12:01:00 +0200 Subject: gh-97669: Create Tools/build/ directory (#97963) Create Tools/build/ directory. Move the following scripts from Tools/scripts/ to Tools/build/: * check_extension_modules.py * deepfreeze.py * freeze_modules.py * generate_global_objects.py * generate_levenshtein_examples.py * generate_opcode_h.py * generate_re_casefix.py * generate_sre_constants.py * generate_stdlib_module_names.py * generate_token.py * parse_html5_entities.py * smelly.py * stable_abi.py * umarshal.py * update_file.py * verify_ensurepip_wheels.py Update references to these scripts. --- .github/CODEOWNERS | 2 +- .github/workflows/verify-ensurepip-wheels.yml | 6 +- .gitignore | 2 +- Doc/library/token-list.inc | 2 +- Include/internal/pycore_global_strings.h | 4 +- Include/internal/pycore_opcode.h | 2 +- Include/internal/pycore_runtime_init_generated.h | 2 +- Include/internal/pycore_token.h | 2 +- Include/opcode.h | 2 +- Lib/html/entities.py | 2 +- Lib/token.py | 2 +- Makefile.pre.in | 62 +- Misc/stable_abi.toml | 2 +- Modules/_sre/sre_constants.h | 2 +- Modules/_sre/sre_targets.h | 2 +- PC/python3dll.c | 2 +- PCbuild/_freeze_module.vcxproj | 2 +- PCbuild/regen.targets | 8 +- Parser/token.c | 2 +- Programs/_bootstrap_python.c | 2 +- Programs/_freeze_module.c | 2 +- Python/deepfreeze/README.txt | 2 +- Python/frozen.c | 2 +- Python/frozen_modules/README.txt | 2 +- Python/stdlib_module_names.h | 2 +- Tools/build/check_extension_modules.py | 484 +++++++++++++++ Tools/build/deepfreeze.py | 504 +++++++++++++++ Tools/build/freeze_modules.py | 733 ++++++++++++++++++++++ Tools/build/generate_global_objects.py | 382 ++++++++++++ Tools/build/generate_levenshtein_examples.py | 70 +++ Tools/build/generate_opcode_h.py | 199 ++++++ Tools/build/generate_re_casefix.py | 96 +++ Tools/build/generate_sre_constants.py | 80 +++ Tools/build/generate_stdlib_module_names.py | 139 +++++ Tools/build/generate_token.py | 282 +++++++++ Tools/build/parse_html5_entities.py | 115 ++++ Tools/build/smelly.py | 173 ++++++ Tools/build/stable_abi.py | 757 +++++++++++++++++++++++ Tools/build/umarshal.py | 325 ++++++++++ Tools/build/update_file.py | 92 +++ Tools/build/verify_ensurepip_wheels.py | 98 +++ Tools/scripts/check_extension_modules.py | 484 --------------- Tools/scripts/deepfreeze.py | 504 --------------- Tools/scripts/freeze_modules.py | 733 ---------------------- Tools/scripts/generate_global_objects.py | 381 ------------ Tools/scripts/generate_levenshtein_examples.py | 70 --- Tools/scripts/generate_opcode_h.py | 199 ------ Tools/scripts/generate_re_casefix.py | 94 --- Tools/scripts/generate_sre_constants.py | 78 --- Tools/scripts/generate_stdlib_module_names.py | 137 ---- Tools/scripts/generate_token.py | 275 -------- Tools/scripts/parse_html5_entities.py | 114 ---- Tools/scripts/smelly.py | 173 ------ Tools/scripts/stable_abi.py | 754 ---------------------- Tools/scripts/umarshal.py | 325 ---------- Tools/scripts/update_file.py | 92 --- Tools/scripts/verify_ensurepip_wheels.py | 98 --- 57 files changed, 4590 insertions(+), 4572 deletions(-) create mode 100644 Tools/build/check_extension_modules.py create mode 100644 Tools/build/deepfreeze.py create mode 100644 Tools/build/freeze_modules.py create mode 100644 Tools/build/generate_global_objects.py create mode 100644 Tools/build/generate_levenshtein_examples.py create mode 100644 Tools/build/generate_opcode_h.py create mode 100755 Tools/build/generate_re_casefix.py create mode 100755 Tools/build/generate_sre_constants.py create mode 100644 Tools/build/generate_stdlib_module_names.py create mode 100755 Tools/build/generate_token.py create mode 100755 Tools/build/parse_html5_entities.py create mode 100755 Tools/build/smelly.py create mode 100644 Tools/build/stable_abi.py create mode 100644 Tools/build/umarshal.py create mode 100644 Tools/build/update_file.py create mode 100755 Tools/build/verify_ensurepip_wheels.py delete mode 100644 Tools/scripts/check_extension_modules.py delete mode 100644 Tools/scripts/deepfreeze.py delete mode 100644 Tools/scripts/freeze_modules.py delete mode 100644 Tools/scripts/generate_global_objects.py delete mode 100644 Tools/scripts/generate_levenshtein_examples.py delete mode 100644 Tools/scripts/generate_opcode_h.py delete mode 100755 Tools/scripts/generate_re_casefix.py delete mode 100755 Tools/scripts/generate_sre_constants.py delete mode 100644 Tools/scripts/generate_stdlib_module_names.py delete mode 100755 Tools/scripts/generate_token.py delete mode 100755 Tools/scripts/parse_html5_entities.py delete mode 100755 Tools/scripts/smelly.py delete mode 100755 Tools/scripts/stable_abi.py delete mode 100644 Tools/scripts/umarshal.py delete mode 100644 Tools/scripts/update_file.py delete mode 100755 Tools/scripts/verify_ensurepip_wheels.py diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 585589d..2fd933a 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -55,7 +55,7 @@ Python/traceback.c @iritkatriel /Lib/html/ @ezio-melotti /Lib/_markupbase.py @ezio-melotti /Lib/test/test_html*.py @ezio-melotti -/Tools/scripts/*html5* @ezio-melotti +/Tools/build/parse_html5_entities.py @ezio-melotti # Import (including importlib). # Ignoring importlib.h so as to not get flagged on diff --git a/.github/workflows/verify-ensurepip-wheels.yml b/.github/workflows/verify-ensurepip-wheels.yml index 9f4754f..969515e 100644 --- a/.github/workflows/verify-ensurepip-wheels.yml +++ b/.github/workflows/verify-ensurepip-wheels.yml @@ -6,12 +6,12 @@ on: paths: - 'Lib/ensurepip/_bundled/**' - '.github/workflows/verify-ensurepip-wheels.yml' - - 'Tools/scripts/verify_ensurepip_wheels.py' + - 'Tools/build/verify_ensurepip_wheels.py' pull_request: paths: - 'Lib/ensurepip/_bundled/**' - '.github/workflows/verify-ensurepip-wheels.yml' - - 'Tools/scripts/verify_ensurepip_wheels.py' + - 'Tools/build/verify_ensurepip_wheels.py' permissions: contents: read @@ -29,4 +29,4 @@ jobs: with: python-version: '3' - name: Compare checksums of bundled pip and setuptools to ones published on PyPI - run: ./Tools/scripts/verify_ensurepip_wheels.py + run: ./Tools/build/verify_ensurepip_wheels.py diff --git a/.gitignore b/.gitignore index 924c136..6934faa 100644 --- a/.gitignore +++ b/.gitignore @@ -143,7 +143,7 @@ Tools/ssl/win32 Tools/freeze/test/outdir # The frozen modules are always generated by the build so we don't -# keep them in the repo. Also see Tools/scripts/freeze_modules.py. +# keep them in the repo. Also see Tools/build/freeze_modules.py. Python/frozen_modules/*.h # The manifest can be generated at any time with "make regen-frozen". Python/frozen_modules/MANIFEST diff --git a/Doc/library/token-list.inc b/Doc/library/token-list.inc index 1a99f05..2739d5b 100644 --- a/Doc/library/token-list.inc +++ b/Doc/library/token-list.inc @@ -1,4 +1,4 @@ -.. Auto-generated by Tools/scripts/generate_token.py +.. Auto-generated by Tools/build/generate_token.py .. data:: ENDMARKER .. data:: NAME diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index f646979..811cfc1 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -8,7 +8,7 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -// The data structure & init here are inspired by Tools/scripts/deepfreeze.py. +// The data structure & init here are inspired by Tools/build/deepfreeze.py. // All field names generated by ASCII_STR() have a common prefix, // to help avoid collisions with keywords, etc. @@ -25,7 +25,7 @@ extern "C" { // XXX Order by frequency of use? -/* The following is auto-generated by Tools/scripts/generate_global_objects.py. */ +/* The following is auto-generated by Tools/build/generate_global_objects.py. */ struct _Py_global_strings { struct { STRUCT_FOR_STR(anon_dictcomp, "") diff --git a/Include/internal/pycore_opcode.h b/Include/internal/pycore_opcode.h index 1592551..c8ef5dd 100644 --- a/Include/internal/pycore_opcode.h +++ b/Include/internal/pycore_opcode.h @@ -1,4 +1,4 @@ -// Auto-generated by Tools/scripts/generate_opcode_h.py from Lib/opcode.py +// Auto-generated by Tools/build/generate_opcode_h.py from Lib/opcode.py #ifndef Py_INTERNAL_OPCODE_H #define Py_INTERNAL_OPCODE_H diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index bd1fede..8ce9588 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -8,7 +8,7 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -/* The following is auto-generated by Tools/scripts/generate_global_objects.py. */ +/* The following is auto-generated by Tools/build/generate_global_objects.py. */ #define _Py_global_objects_INIT { \ .singletons = { \ .small_ints = { \ diff --git a/Include/internal/pycore_token.h b/Include/internal/pycore_token.h index f9b8240..95459ab 100644 --- a/Include/internal/pycore_token.h +++ b/Include/internal/pycore_token.h @@ -1,4 +1,4 @@ -/* Auto-generated by Tools/scripts/generate_token.py */ +/* Auto-generated by Tools/build/generate_token.py */ /* Token types */ #ifndef Py_INTERNAL_TOKEN_H diff --git a/Include/opcode.h b/Include/opcode.h index 42825df..0871eb1 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -1,4 +1,4 @@ -// Auto-generated by Tools/scripts/generate_opcode_h.py from Lib/opcode.py +// Auto-generated by Tools/build/generate_opcode_h.py from Lib/opcode.py #ifndef Py_OPCODE_H #define Py_OPCODE_H diff --git a/Lib/html/entities.py b/Lib/html/entities.py index cc59bc3..eb6dc12 100644 --- a/Lib/html/entities.py +++ b/Lib/html/entities.py @@ -261,7 +261,7 @@ name2codepoint = { # HTML5 named character references -# Generated by 'Tools/scripts/parse_html5_entities.py' +# Generated by Tools/build/parse_html5_entities.py # from https://html.spec.whatwg.org/entities.json and # https://html.spec.whatwg.org/multipage/named-characters.html. # Map HTML5 named character references to the equivalent Unicode character(s). diff --git a/Lib/token.py b/Lib/token.py index 9d0c0bf..95b107c 100644 --- a/Lib/token.py +++ b/Lib/token.py @@ -1,5 +1,5 @@ """Token constants.""" -# Auto-generated by Tools/scripts/generate_token.py +# Auto-generated by Tools/build/generate_token.py __all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] diff --git a/Makefile.pre.in b/Makefile.pre.in index 7e25671..5b4bf15 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -285,7 +285,7 @@ BUILDPYTHON= python$(BUILDEXE) HOSTRUNNER= @HOSTRUNNER@ PYTHON_FOR_REGEN?=@PYTHON_FOR_REGEN@ -UPDATE_FILE=$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/update_file.py +UPDATE_FILE=$(PYTHON_FOR_REGEN) $(srcdir)/Tools/build/update_file.py PYTHON_FOR_BUILD=@PYTHON_FOR_BUILD@ # Single-platform builds depend on $(BUILDPYTHON). Cross builds use an # external "build Python" and have an empty PYTHON_FOR_BUILD_DEPS. @@ -705,7 +705,7 @@ coverage-report: regen-token regen-frozen .PHONY=clinic clinic: check-clean-src $(srcdir)/Modules/_blake2/blake2s_impl.c $(PYTHON_FOR_REGEN) $(srcdir)/Tools/clinic/clinic.py --make --srcdir $(srcdir) - $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_global_objects.py + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/build/generate_global_objects.py # Build the interpreter $(BUILDPYTHON): Programs/python.o $(LINK_PYTHON_DEPS) @@ -907,7 +907,7 @@ sharedmods: $(SHAREDMODS) pybuilddir.txt # dependency on BUILDPYTHON ensures that the target is run last checksharedmods: sharedmods $(PYTHON_FOR_BUILD_DEPS) $(BUILDPYTHON) - @$(RUNSHARED) $(PYTHON_FOR_BUILD) $(srcdir)/Tools/scripts/check_extension_modules.py + @$(RUNSHARED) $(PYTHON_FOR_BUILD) $(srcdir)/Tools/build/check_extension_modules.py rundsymutil: sharedmods $(PYTHON_FOR_BUILD_DEPS) $(BUILDPYTHON) @if [ ! -z $(DSYMUTIL) ] ; then \ @@ -961,13 +961,13 @@ regen-test-frozenmain: $(BUILDPYTHON) .PHONY: regen-test-levenshtein regen-test-levenshtein: # Regenerate Lib/test/levenshtein_examples.json - $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_levenshtein_examples.py Lib/test/levenshtein_examples.json + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/build/generate_levenshtein_examples.py Lib/test/levenshtein_examples.json .PHONY: regen-re regen-re: $(BUILDPYTHON) # Regenerate Lib/re/_casefix.py - # using Tools/scripts/generate_re_casefix.py - $(RUNSHARED) ./$(BUILDPYTHON) $(srcdir)/Tools/scripts/generate_re_casefix.py $(srcdir)/Lib/re/_casefix.py + # using Tools/build/generate_re_casefix.py + $(RUNSHARED) ./$(BUILDPYTHON) $(srcdir)/Tools/build/generate_re_casefix.py $(srcdir)/Lib/re/_casefix.py Programs/_testembed: Programs/_testembed.o $(LINK_PYTHON_DEPS) $(LINKCC) $(PY_CORE_LDFLAGS) $(LINKFORSHARED) -o $@ Programs/_testembed.o $(LINK_PYTHON_OBJS) $(LIBS) $(MODLIBS) $(SYSLIBS) @@ -1013,7 +1013,7 @@ _bootstrap_python: $(LIBRARY_OBJS_OMIT_FROZEN) Programs/_bootstrap_python.o Modu # 2) deepfreeze modules with external build Python. # -# FROZEN_FILES_* are auto-generated by Tools/scripts/freeze_modules.py. +# FROZEN_FILES_* are auto-generated by Tools/build/freeze_modules.py. FROZEN_FILES_IN = \ Lib/importlib/_bootstrap.py \ Lib/importlib/_bootstrap_external.py \ @@ -1149,11 +1149,11 @@ Python/frozen_modules/frozen_only.h: Tools/freeze/flag.py $(FREEZE_MODULE_DEPS) # END: freezing modules -Tools/scripts/freeze_modules.py: $(FREEZE_MODULE) +Tools/build/freeze_modules.py: $(FREEZE_MODULE) .PHONY: regen-frozen -regen-frozen: Tools/scripts/freeze_modules.py $(FROZEN_FILES_IN) - $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/freeze_modules.py +regen-frozen: Tools/build/freeze_modules.py $(FROZEN_FILES_IN) + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/build/freeze_modules.py @echo "The Makefile was updated, you may need to re-run make." ############################################################################ @@ -1162,11 +1162,11 @@ regen-frozen: Tools/scripts/freeze_modules.py $(FROZEN_FILES_IN) .PHONY: regen-deepfreeze regen-deepfreeze: $(DEEPFREEZE_OBJS) -DEEPFREEZE_DEPS=$(srcdir)/Tools/scripts/deepfreeze.py $(FREEZE_MODULE_DEPS) $(FROZEN_FILES_OUT) +DEEPFREEZE_DEPS=$(srcdir)/Tools/build/deepfreeze.py $(FREEZE_MODULE_DEPS) $(FROZEN_FILES_OUT) # BEGIN: deepfreeze modules Python/deepfreeze/deepfreeze.c: $(DEEPFREEZE_DEPS) - $(PYTHON_FOR_FREEZE) $(srcdir)/Tools/scripts/deepfreeze.py \ + $(PYTHON_FOR_FREEZE) $(srcdir)/Tools/build/deepfreeze.py \ Python/frozen_modules/importlib._bootstrap.h:importlib._bootstrap \ Python/frozen_modules/importlib._bootstrap_external.h:importlib._bootstrap_external \ Python/frozen_modules/zipimport.h:zipimport \ @@ -1203,8 +1203,8 @@ regen-importlib: regen-frozen # Global objects .PHONY: regen-global-objects -regen-global-objects: $(srcdir)/Tools/scripts/generate_global_objects.py - $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_global_objects.py +regen-global-objects: $(srcdir)/Tools/build/generate_global_objects.py + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/build/generate_global_objects.py @echo "Note: Global objects can be added or removed by other tools (e.g. deepfreeze), " @echo " so be sure to re-run regen-global-objects after those tools." @@ -1220,7 +1220,7 @@ check-abidump: all abidiff $(srcdir)/Doc/data/python$(LDVERSION).abi "libpython$(LDVERSION).so" --drop-private-types --no-architecture --no-added-syms regen-limited-abi: all - $(RUNSHARED) ./$(BUILDPYTHON) $(srcdir)/Tools/scripts/stable_abi.py --generate-all $(srcdir)/Misc/stable_abi.toml + $(RUNSHARED) ./$(BUILDPYTHON) $(srcdir)/Tools/build/stable_abi.py --generate-all $(srcdir)/Misc/stable_abi.toml ############################################################################ # Regenerate all generated files @@ -1331,8 +1331,8 @@ regen-ast: .PHONY: regen-opcode regen-opcode: # Regenerate Include/opcode.h from Lib/opcode.py - # using Tools/scripts/generate_opcode_h.py - $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_opcode_h.py \ + # using Tools/build/generate_opcode_h.py + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/build/generate_opcode_h.py \ $(srcdir)/Lib/opcode.py \ $(srcdir)/Include/opcode.h.new \ $(srcdir)/Include/internal/pycore_opcode.h.new @@ -1342,23 +1342,23 @@ regen-opcode: .PHONY: regen-token regen-token: # Regenerate Doc/library/token-list.inc from Grammar/Tokens - # using Tools/scripts/generate_token.py - $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py rst \ + # using Tools/build/generate_token.py + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/build/generate_token.py rst \ $(srcdir)/Grammar/Tokens \ $(srcdir)/Doc/library/token-list.inc # Regenerate Include/internal/pycore_token.h from Grammar/Tokens - # using Tools/scripts/generate_token.py - $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py h \ + # using Tools/build/generate_token.py + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/build/generate_token.py h \ $(srcdir)/Grammar/Tokens \ $(srcdir)/Include/internal/pycore_token.h # Regenerate Parser/token.c from Grammar/Tokens - # using Tools/scripts/generate_token.py - $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py c \ + # using Tools/build/generate_token.py + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/build/generate_token.py c \ $(srcdir)/Grammar/Tokens \ $(srcdir)/Parser/token.c # Regenerate Lib/token.py from Grammar/Tokens - # using Tools/scripts/generate_token.py - $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py py \ + # using Tools/build/generate_token.py + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/build/generate_token.py py \ $(srcdir)/Grammar/Tokens \ $(srcdir)/Lib/token.py @@ -1375,16 +1375,16 @@ regen-keyword: .PHONY: regen-stdlib-module-names regen-stdlib-module-names: all Programs/_testembed # Regenerate Python/stdlib_module_names.h - # using Tools/scripts/generate_stdlib_module_names.py + # using Tools/build/generate_stdlib_module_names.py $(RUNSHARED) ./$(BUILDPYTHON) \ - $(srcdir)/Tools/scripts/generate_stdlib_module_names.py \ + $(srcdir)/Tools/build/generate_stdlib_module_names.py \ > $(srcdir)/Python/stdlib_module_names.h.new $(UPDATE_FILE) $(srcdir)/Python/stdlib_module_names.h $(srcdir)/Python/stdlib_module_names.h.new regen-sre: # Regenerate Modules/_sre/sre_constants.h and Modules/_sre/sre_targets.h - # from Lib/re/_constants.py using Tools/scripts/generate_sre_constants.py - $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_sre_constants.py \ + # from Lib/re/_constants.py using Tools/build/generate_sre_constants.py + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/build/generate_sre_constants.py \ $(srcdir)/Lib/re/_constants.py \ $(srcdir)/Modules/_sre/sre_constants.h \ $(srcdir)/Modules/_sre/sre_targets.h @@ -2511,7 +2511,7 @@ distclean: clobber docclean # Check that all symbols exported by libpython start with "Py" or "_Py" smelly: all - $(RUNSHARED) ./$(BUILDPYTHON) $(srcdir)/Tools/scripts/smelly.py + $(RUNSHARED) ./$(BUILDPYTHON) $(srcdir)/Tools/build/smelly.py # Find files with funny names funny: @@ -2549,7 +2549,7 @@ patchcheck: all $(RUNSHARED) ./$(BUILDPYTHON) $(srcdir)/Tools/patchcheck/patchcheck.py check-limited-abi: all - $(RUNSHARED) ./$(BUILDPYTHON) $(srcdir)/Tools/scripts/stable_abi.py --all $(srcdir)/Misc/stable_abi.toml + $(RUNSHARED) ./$(BUILDPYTHON) $(srcdir)/Tools/build/stable_abi.py --all $(srcdir)/Misc/stable_abi.toml .PHONY: update-config update-config: diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index a8920d9..e78646f 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2,7 +2,7 @@ # Please append new items at the end. # The syntax of this file is not fixed. -# It is designed to be read only by Tools/stable_abi.py, which can change +# It is designed to be read only by Tools/build/stable_abi.py, which can change # without notice. # For the history of the stable ABI prior to this file, diff --git a/Modules/_sre/sre_constants.h b/Modules/_sre/sre_constants.h index c633514..f030815 100644 --- a/Modules/_sre/sre_constants.h +++ b/Modules/_sre/sre_constants.h @@ -3,7 +3,7 @@ * * regular expression matching engine * - * Auto-generated by Tools/scripts/generate_sre_constants.py from + * Auto-generated by Tools/build/generate_sre_constants.py from * Lib/re/_constants.py. * * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. diff --git a/Modules/_sre/sre_targets.h b/Modules/_sre/sre_targets.h index 25b6edd..62761a0 100644 --- a/Modules/_sre/sre_targets.h +++ b/Modules/_sre/sre_targets.h @@ -3,7 +3,7 @@ * * regular expression matching engine * - * Auto-generated by Tools/scripts/generate_sre_constants.py from + * Auto-generated by Tools/build/generate_sre_constants.py from * Lib/re/_constants.py. * * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. diff --git a/PC/python3dll.c b/PC/python3dll.c index 89bbd05..c1b88c6 100755 --- a/PC/python3dll.c +++ b/PC/python3dll.c @@ -1,7 +1,7 @@ /* Re-export stable Python ABI */ -/* Generated by Tools/scripts/stable_abi.py */ +/* Generated by Tools/build/stable_abi.py */ #ifdef _M_IX86 #define DECORATE "_" diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 39939a7..49e5cc8 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -395,7 +395,7 @@ DependsOnTargets="FindPythonForBuild" Condition="$(Configuration) != 'PGUpdate'"> - -C - <_OpcodeSources Include="$(PySourcePath)Tools\scripts\generate_opcode_h.py;$(PySourcePath)Lib\opcode.py" /> + <_OpcodeSources Include="$(PySourcePath)Tools\build\generate_opcode_h.py;$(PySourcePath)Lib\opcode.py" /> <_OpcodeOutputs Include="$(PySourcePath)Include\opcode.h;$(PySourcePath)Include\internal\pycore_opcode.h;$(PySourcePath)Python\opcode_targets.h" /> <_TokenSources Include="$(PySourcePath)Grammar\Tokens" /> <_TokenOutputs Include="$(PySourcePath)Doc\library\token-list.inc"> @@ -59,7 +59,7 @@ Inputs="@(_OpcodeSources)" Outputs="@(_OpcodeOutputs)" DependsOnTargets="FindPythonForBuild"> - @@ -69,7 +69,7 @@ Inputs="@(_TokenSources)" Outputs="@(_TokenOutputs)" DependsOnTargets="FindPythonForBuild"> - @@ -85,7 +85,7 @@ - diff --git a/Parser/token.c b/Parser/token.c index fa03fbc..6299ad2f 100644 --- a/Parser/token.c +++ b/Parser/token.c @@ -1,4 +1,4 @@ -/* Auto-generated by Tools/scripts/generate_token.py */ +/* Auto-generated by Tools/build/generate_token.py */ #include "Python.h" #include "pycore_token.h" diff --git a/Programs/_bootstrap_python.c b/Programs/_bootstrap_python.c index 6ecbf0c..bbac0c4 100644 --- a/Programs/_bootstrap_python.c +++ b/Programs/_bootstrap_python.c @@ -2,7 +2,7 @@ /* Frozen modules bootstrap * * Limited and restricted Python interpreter to run - * "Tools/scripts/deepfreeze.py" on systems with no or older Python + * "Tools/build/deepfreeze.py" on systems with no or older Python * interpreter. */ diff --git a/Programs/_freeze_module.c b/Programs/_freeze_module.c index 3d27b79..d6d737d 100644 --- a/Programs/_freeze_module.c +++ b/Programs/_freeze_module.c @@ -2,7 +2,7 @@ modules into frozen modules (like Lib/importlib/_bootstrap.py into Python/importlib.h). - This is used directly by Tools/scripts/freeze_modules.py, and indirectly by "make regen-frozen". + This is used directly by Tools/build/freeze_modules.py, and indirectly by "make regen-frozen". See Python/frozen.c for more info. diff --git a/Python/deepfreeze/README.txt b/Python/deepfreeze/README.txt index da55d4e..276ab51 100644 --- a/Python/deepfreeze/README.txt +++ b/Python/deepfreeze/README.txt @@ -3,4 +3,4 @@ modules. Python/frozen.c depends on these files. None of these files are committed into the repo. -See Tools/scripts/freeze_modules.py for more info. +See Tools/build/freeze_modules.py for more info. diff --git a/Python/frozen.c b/Python/frozen.c index 8a2a724..48b4295 100644 --- a/Python/frozen.c +++ b/Python/frozen.c @@ -8,7 +8,7 @@ * These files must be regenerated any time the corresponding .pyc * file would change (including with changes to the compiler, bytecode * format, marshal format). This can be done with "make regen-frozen". - * That make target just runs Tools/scripts/freeze_modules.py. + * That make target just runs Tools/build/freeze_modules.py. * * The freeze_modules.py script also determines which modules get * frozen. Update the list at the top of the script to add, remove, diff --git a/Python/frozen_modules/README.txt b/Python/frozen_modules/README.txt index 444167c..795bb0e 100644 --- a/Python/frozen_modules/README.txt +++ b/Python/frozen_modules/README.txt @@ -4,4 +4,4 @@ modules. Python/frozen.c depends on these files. Note that, other than the required frozen modules, none of these files are committed into the repo. -See Tools/scripts/freeze_modules.py for more info. +See Tools/build/freeze_modules.py for more info. diff --git a/Python/stdlib_module_names.h b/Python/stdlib_module_names.h index b281566..12827e7 100644 --- a/Python/stdlib_module_names.h +++ b/Python/stdlib_module_names.h @@ -1,4 +1,4 @@ -// Auto-generated by Tools/scripts/generate_stdlib_module_names.py. +// Auto-generated by Tools/build/generate_stdlib_module_names.py. // List used to create sys.stdlib_module_names. static const char* _Py_stdlib_module_names[] = { diff --git a/Tools/build/check_extension_modules.py b/Tools/build/check_extension_modules.py new file mode 100644 index 0000000..59239c6 --- /dev/null +++ b/Tools/build/check_extension_modules.py @@ -0,0 +1,484 @@ +"""Check extension modules + +The script checks shared and built-in extension modules. It verifies that the +modules have been built and that they can be imported successfully. Missing +modules and failed imports are reported to the user. Shared extension +files are renamed on failed import. + +Module information is parsed from several sources: + +- core modules hard-coded in Modules/config.c.in +- Windows-specific modules that are hard-coded in PC/config.c +- MODULE_{name}_STATE entries in Makefile (provided through sysconfig) +- Various makesetup files: + - $(srcdir)/Modules/Setup + - Modules/Setup.[local|bootstrap|stdlib] files, which are generated + from $(srcdir)/Modules/Setup.*.in files + +See --help for more information +""" +import argparse +import collections +import enum +import logging +import os +import pathlib +import re +import sys +import sysconfig +import warnings + +from importlib._bootstrap import _load as bootstrap_load +from importlib.machinery import BuiltinImporter, ExtensionFileLoader, ModuleSpec +from importlib.util import spec_from_file_location, spec_from_loader +from typing import Iterable + +SRC_DIR = pathlib.Path(__file__).parent.parent.parent + +# core modules, hard-coded in Modules/config.h.in +CORE_MODULES = { + "_ast", + "_imp", + "_string", + "_tokenize", + "_warnings", + "builtins", + "gc", + "marshal", + "sys", +} + +# Windows-only modules +WINDOWS_MODULES = { + "_msi", + "_overlapped", + "_testconsole", + "_winapi", + "msvcrt", + "nt", + "winreg", + "winsound", +} + + +logger = logging.getLogger(__name__) + +parser = argparse.ArgumentParser( + prog="check_extension_modules", + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, +) + +parser.add_argument( + "--verbose", + action="store_true", + help="Verbose, report builtin, shared, and unavailable modules", +) + +parser.add_argument( + "--debug", + action="store_true", + help="Enable debug logging", +) + +parser.add_argument( + "--strict", + action=argparse.BooleanOptionalAction, + help=( + "Strict check, fail when a module is missing or fails to import" + "(default: no, unless env var PYTHONSTRICTEXTENSIONBUILD is set)" + ), + default=bool(os.environ.get("PYTHONSTRICTEXTENSIONBUILD")), +) + +parser.add_argument( + "--cross-compiling", + action=argparse.BooleanOptionalAction, + help=( + "Use cross-compiling checks " + "(default: no, unless env var _PYTHON_HOST_PLATFORM is set)." + ), + default="_PYTHON_HOST_PLATFORM" in os.environ, +) + +parser.add_argument( + "--list-module-names", + action="store_true", + help="Print a list of module names to stdout and exit", +) + + +class ModuleState(enum.Enum): + # Makefile state "yes" + BUILTIN = "builtin" + SHARED = "shared" + + DISABLED = "disabled" + MISSING = "missing" + NA = "n/a" + # disabled by Setup / makesetup rule + DISABLED_SETUP = "disabled_setup" + + def __bool__(self): + return self.value in {"builtin", "shared"} + + +ModuleInfo = collections.namedtuple("ModuleInfo", "name state") + + +class ModuleChecker: + pybuilddir_txt = "pybuilddir.txt" + + setup_files = ( + # see end of configure.ac + "Modules/Setup.local", + "Modules/Setup.stdlib", + "Modules/Setup.bootstrap", + SRC_DIR / "Modules/Setup", + ) + + def __init__(self, cross_compiling: bool = False, strict: bool = False): + self.cross_compiling = cross_compiling + self.strict_extensions_build = strict + self.ext_suffix = sysconfig.get_config_var("EXT_SUFFIX") + self.platform = sysconfig.get_platform() + self.builddir = self.get_builddir() + self.modules = self.get_modules() + + self.builtin_ok = [] + self.shared_ok = [] + self.failed_on_import = [] + self.missing = [] + self.disabled_configure = [] + self.disabled_setup = [] + self.notavailable = [] + + def check(self): + for modinfo in self.modules: + logger.debug("Checking '%s' (%s)", modinfo.name, self.get_location(modinfo)) + if modinfo.state == ModuleState.DISABLED: + self.disabled_configure.append(modinfo) + elif modinfo.state == ModuleState.DISABLED_SETUP: + self.disabled_setup.append(modinfo) + elif modinfo.state == ModuleState.MISSING: + self.missing.append(modinfo) + elif modinfo.state == ModuleState.NA: + self.notavailable.append(modinfo) + else: + try: + if self.cross_compiling: + self.check_module_cross(modinfo) + else: + self.check_module_import(modinfo) + except (ImportError, FileNotFoundError): + self.rename_module(modinfo) + self.failed_on_import.append(modinfo) + else: + if modinfo.state == ModuleState.BUILTIN: + self.builtin_ok.append(modinfo) + else: + assert modinfo.state == ModuleState.SHARED + self.shared_ok.append(modinfo) + + def summary(self, *, verbose: bool = False): + longest = max([len(e.name) for e in self.modules], default=0) + + def print_three_column(modinfos: list[ModuleInfo]): + names = [modinfo.name for modinfo in modinfos] + names.sort(key=str.lower) + # guarantee zip() doesn't drop anything + while len(names) % 3: + names.append("") + for l, m, r in zip(names[::3], names[1::3], names[2::3]): + print("%-*s %-*s %-*s" % (longest, l, longest, m, longest, r)) + + if verbose and self.builtin_ok: + print("The following *built-in* modules have been successfully built:") + print_three_column(self.builtin_ok) + print() + + if verbose and self.shared_ok: + print("The following *shared* modules have been successfully built:") + print_three_column(self.shared_ok) + print() + + if self.disabled_configure: + print("The following modules are *disabled* in configure script:") + print_three_column(self.disabled_configure) + print() + + if self.disabled_setup: + print("The following modules are *disabled* in Modules/Setup files:") + print_three_column(self.disabled_setup) + print() + + if verbose and self.notavailable: + print( + f"The following modules are not available on platform '{self.platform}':" + ) + print_three_column(self.notavailable) + print() + + if self.missing: + print("The necessary bits to build these optional modules were not found:") + print_three_column(self.missing) + print("To find the necessary bits, look in configure.ac and config.log.") + print() + + if self.failed_on_import: + print( + "Following modules built successfully " + "but were removed because they could not be imported:" + ) + print_three_column(self.failed_on_import) + print() + + if any( + modinfo.name == "_ssl" for modinfo in self.missing + self.failed_on_import + ): + print("Could not build the ssl module!") + print("Python requires a OpenSSL 1.1.1 or newer") + if sysconfig.get_config_var("OPENSSL_LDFLAGS"): + print("Custom linker flags may require --with-openssl-rpath=auto") + print() + + disabled = len(self.disabled_configure) + len(self.disabled_setup) + print( + f"Checked {len(self.modules)} modules (" + f"{len(self.builtin_ok)} built-in, " + f"{len(self.shared_ok)} shared, " + f"{len(self.notavailable)} n/a on {self.platform}, " + f"{disabled} disabled, " + f"{len(self.missing)} missing, " + f"{len(self.failed_on_import)} failed on import)" + ) + + def check_strict_build(self): + """Fail if modules are missing and it's a strict build""" + if self.strict_extensions_build and (self.failed_on_import or self.missing): + raise RuntimeError("Failed to build some stdlib modules") + + def list_module_names(self, *, all: bool = False) -> set: + names = {modinfo.name for modinfo in self.modules} + if all: + names.update(WINDOWS_MODULES) + return names + + def get_builddir(self) -> pathlib.Path: + try: + with open(self.pybuilddir_txt, encoding="utf-8") as f: + builddir = f.read() + except FileNotFoundError: + logger.error("%s must be run from the top build directory", __file__) + raise + builddir = pathlib.Path(builddir) + logger.debug("%s: %s", self.pybuilddir_txt, builddir) + return builddir + + def get_modules(self) -> list[ModuleInfo]: + """Get module info from sysconfig and Modules/Setup* files""" + seen = set() + modules = [] + # parsing order is important, first entry wins + for modinfo in self.get_core_modules(): + modules.append(modinfo) + seen.add(modinfo.name) + for setup_file in self.setup_files: + for modinfo in self.parse_setup_file(setup_file): + if modinfo.name not in seen: + modules.append(modinfo) + seen.add(modinfo.name) + for modinfo in self.get_sysconfig_modules(): + if modinfo.name not in seen: + modules.append(modinfo) + seen.add(modinfo.name) + logger.debug("Found %i modules in total", len(modules)) + modules.sort() + return modules + + def get_core_modules(self) -> Iterable[ModuleInfo]: + """Get hard-coded core modules""" + for name in CORE_MODULES: + modinfo = ModuleInfo(name, ModuleState.BUILTIN) + logger.debug("Found core module %s", modinfo) + yield modinfo + + def get_sysconfig_modules(self) -> Iterable[ModuleInfo]: + """Get modules defined in Makefile through sysconfig + + MODBUILT_NAMES: modules in *static* block + MODSHARED_NAMES: modules in *shared* block + MODDISABLED_NAMES: modules in *disabled* block + """ + moddisabled = set(sysconfig.get_config_var("MODDISABLED_NAMES").split()) + if self.cross_compiling: + modbuiltin = set(sysconfig.get_config_var("MODBUILT_NAMES").split()) + else: + modbuiltin = set(sys.builtin_module_names) + + for key, value in sysconfig.get_config_vars().items(): + if not key.startswith("MODULE_") or not key.endswith("_STATE"): + continue + if value not in {"yes", "disabled", "missing", "n/a"}: + raise ValueError(f"Unsupported value '{value}' for {key}") + + modname = key[7:-6].lower() + if modname in moddisabled: + # Setup "*disabled*" rule + state = ModuleState.DISABLED_SETUP + elif value in {"disabled", "missing", "n/a"}: + state = ModuleState(value) + elif modname in modbuiltin: + assert value == "yes" + state = ModuleState.BUILTIN + else: + assert value == "yes" + state = ModuleState.SHARED + + modinfo = ModuleInfo(modname, state) + logger.debug("Found %s in Makefile", modinfo) + yield modinfo + + def parse_setup_file(self, setup_file: pathlib.Path) -> Iterable[ModuleInfo]: + """Parse a Modules/Setup file""" + assign_var = re.compile(r"^\w+=") # EGG_SPAM=foo + # default to static module + state = ModuleState.BUILTIN + logger.debug("Parsing Setup file %s", setup_file) + with open(setup_file, encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line or line.startswith("#") or assign_var.match(line): + continue + match line.split(): + case ["*shared*"]: + state = ModuleState.SHARED + case ["*static*"]: + state = ModuleState.BUILTIN + case ["*disabled*"]: + state = ModuleState.DISABLED + case ["*noconfig*"]: + state = None + case [*items]: + if state == ModuleState.DISABLED: + # *disabled* can disable multiple modules per line + for item in items: + modinfo = ModuleInfo(item, state) + logger.debug("Found %s in %s", modinfo, setup_file) + yield modinfo + elif state in {ModuleState.SHARED, ModuleState.BUILTIN}: + # *shared* and *static*, first item is the name of the module. + modinfo = ModuleInfo(items[0], state) + logger.debug("Found %s in %s", modinfo, setup_file) + yield modinfo + + def get_spec(self, modinfo: ModuleInfo) -> ModuleSpec: + """Get ModuleSpec for builtin or extension module""" + if modinfo.state == ModuleState.SHARED: + location = os.fspath(self.get_location(modinfo)) + loader = ExtensionFileLoader(modinfo.name, location) + return spec_from_file_location(modinfo.name, location, loader=loader) + elif modinfo.state == ModuleState.BUILTIN: + return spec_from_loader(modinfo.name, loader=BuiltinImporter) + else: + raise ValueError(modinfo) + + def get_location(self, modinfo: ModuleInfo) -> pathlib.Path: + """Get shared library location in build directory""" + if modinfo.state == ModuleState.SHARED: + return self.builddir / f"{modinfo.name}{self.ext_suffix}" + else: + return None + + def _check_file(self, modinfo: ModuleInfo, spec: ModuleSpec): + """Check that the module file is present and not empty""" + if spec.loader is BuiltinImporter: + return + try: + st = os.stat(spec.origin) + except FileNotFoundError: + logger.error("%s (%s) is missing", modinfo.name, spec.origin) + raise + if not st.st_size: + raise ImportError(f"{spec.origin} is an empty file") + + def check_module_import(self, modinfo: ModuleInfo): + """Attempt to import module and report errors""" + spec = self.get_spec(modinfo) + self._check_file(modinfo, spec) + try: + with warnings.catch_warnings(): + # ignore deprecation warning from deprecated modules + warnings.simplefilter("ignore", DeprecationWarning) + bootstrap_load(spec) + except ImportError as e: + logger.error("%s failed to import: %s", modinfo.name, e) + raise + except Exception as e: + logger.exception("Importing extension '%s' failed!", modinfo.name) + raise + + def check_module_cross(self, modinfo: ModuleInfo): + """Sanity check for cross compiling""" + spec = self.get_spec(modinfo) + self._check_file(modinfo, spec) + + def rename_module(self, modinfo: ModuleInfo) -> None: + """Rename module file""" + if modinfo.state == ModuleState.BUILTIN: + logger.error("Cannot mark builtin module '%s' as failed!", modinfo.name) + return + + failed_name = f"{modinfo.name}_failed{self.ext_suffix}" + builddir_path = self.get_location(modinfo) + if builddir_path.is_symlink(): + symlink = builddir_path + module_path = builddir_path.resolve().relative_to(os.getcwd()) + failed_path = module_path.parent / failed_name + else: + symlink = None + module_path = builddir_path + failed_path = self.builddir / failed_name + + # remove old failed file + failed_path.unlink(missing_ok=True) + # remove symlink + if symlink is not None: + symlink.unlink(missing_ok=True) + # rename shared extension file + try: + module_path.rename(failed_path) + except FileNotFoundError: + logger.debug("Shared extension file '%s' does not exist.", module_path) + else: + logger.debug("Rename '%s' -> '%s'", module_path, failed_path) + + +def main(): + args = parser.parse_args() + if args.debug: + args.verbose = True + logging.basicConfig( + level=logging.DEBUG if args.debug else logging.INFO, + format="[%(levelname)s] %(message)s", + ) + + checker = ModuleChecker( + cross_compiling=args.cross_compiling, + strict=args.strict, + ) + if args.list_module_names: + names = checker.list_module_names(all=True) + for name in sorted(names): + print(name) + else: + checker.check() + checker.summary(verbose=args.verbose) + try: + checker.check_strict_build() + except RuntimeError as e: + parser.exit(1, f"\nError: {e}\n") + + +if __name__ == "__main__": + main() diff --git a/Tools/build/deepfreeze.py b/Tools/build/deepfreeze.py new file mode 100644 index 0000000..28ac2b1 --- /dev/null +++ b/Tools/build/deepfreeze.py @@ -0,0 +1,504 @@ +"""Deep freeze + +The script may be executed by _bootstrap_python interpreter. +Shared library extension modules are not available in that case. +On Windows, and in cross-compilation cases, it is executed +by Python 3.10, and 3.11 features are not available. +""" +import argparse +import ast +import builtins +import collections +import contextlib +import os +import re +import time +import types +from typing import Dict, FrozenSet, TextIO, Tuple + +import umarshal +from generate_global_objects import get_identifiers_and_strings + +verbose = False +identifiers, strings = get_identifiers_and_strings() + +# This must be kept in sync with opcode.py +RESUME = 151 + +def isprintable(b: bytes) -> bool: + return all(0x20 <= c < 0x7f for c in b) + + +def make_string_literal(b: bytes) -> str: + res = ['"'] + if isprintable(b): + res.append(b.decode("ascii").replace("\\", "\\\\").replace("\"", "\\\"")) + else: + for i in b: + res.append(f"\\x{i:02x}") + res.append('"') + return "".join(res) + + +CO_FAST_LOCAL = 0x20 +CO_FAST_CELL = 0x40 +CO_FAST_FREE = 0x80 + + +def get_localsplus(code: types.CodeType): + a = collections.defaultdict(int) + for name in code.co_varnames: + a[name] |= CO_FAST_LOCAL + for name in code.co_cellvars: + a[name] |= CO_FAST_CELL + for name in code.co_freevars: + a[name] |= CO_FAST_FREE + return tuple(a.keys()), bytes(a.values()) + + +def get_localsplus_counts(code: types.CodeType, + names: Tuple[str, ...], + kinds: bytes) -> Tuple[int, int, int, int]: + nlocals = 0 + nplaincellvars = 0 + ncellvars = 0 + nfreevars = 0 + assert len(names) == len(kinds) + for name, kind in zip(names, kinds): + if kind & CO_FAST_LOCAL: + nlocals += 1 + if kind & CO_FAST_CELL: + ncellvars += 1 + elif kind & CO_FAST_CELL: + ncellvars += 1 + nplaincellvars += 1 + elif kind & CO_FAST_FREE: + nfreevars += 1 + assert nlocals == len(code.co_varnames) == code.co_nlocals, \ + (nlocals, len(code.co_varnames), code.co_nlocals) + assert ncellvars == len(code.co_cellvars) + assert nfreevars == len(code.co_freevars) + assert len(names) == nlocals + nplaincellvars + nfreevars + return nlocals, nplaincellvars, ncellvars, nfreevars + + +PyUnicode_1BYTE_KIND = 1 +PyUnicode_2BYTE_KIND = 2 +PyUnicode_4BYTE_KIND = 4 + + +def analyze_character_width(s: str) -> Tuple[int, bool]: + maxchar = ' ' + for c in s: + maxchar = max(maxchar, c) + ascii = False + if maxchar <= '\xFF': + kind = PyUnicode_1BYTE_KIND + ascii = maxchar <= '\x7F' + elif maxchar <= '\uFFFF': + kind = PyUnicode_2BYTE_KIND + else: + kind = PyUnicode_4BYTE_KIND + return kind, ascii + + +def removesuffix(base: str, suffix: str) -> str: + if base.endswith(suffix): + return base[:len(base) - len(suffix)] + return base + +class Printer: + + def __init__(self, file: TextIO) -> None: + self.level = 0 + self.file = file + self.cache: Dict[tuple[type, object, str], str] = {} + self.hits, self.misses = 0, 0 + self.patchups: list[str] = [] + self.deallocs: list[str] = [] + self.interns: list[str] = [] + self.write('#include "Python.h"') + self.write('#include "internal/pycore_gc.h"') + self.write('#include "internal/pycore_code.h"') + self.write('#include "internal/pycore_frame.h"') + self.write('#include "internal/pycore_long.h"') + self.write("") + + @contextlib.contextmanager + def indent(self) -> None: + save_level = self.level + try: + self.level += 1 + yield + finally: + self.level = save_level + + def write(self, arg: str) -> None: + self.file.writelines((" "*self.level, arg, "\n")) + + @contextlib.contextmanager + def block(self, prefix: str, suffix: str = "") -> None: + self.write(prefix + " {") + with self.indent(): + yield + self.write("}" + suffix) + + def object_head(self, typename: str) -> None: + with self.block(".ob_base =", ","): + self.write(f".ob_refcnt = 999999999,") + self.write(f".ob_type = &{typename},") + + def object_var_head(self, typename: str, size: int) -> None: + with self.block(".ob_base =", ","): + self.object_head(typename) + self.write(f".ob_size = {size},") + + def field(self, obj: object, name: str) -> None: + self.write(f".{name} = {getattr(obj, name)},") + + def generate_bytes(self, name: str, b: bytes) -> str: + if b == b"": + return "(PyObject *)&_Py_SINGLETON(bytes_empty)" + if len(b) == 1: + return f"(PyObject *)&_Py_SINGLETON(bytes_characters[{b[0]}])" + self.write("static") + with self.indent(): + with self.block("struct"): + self.write("PyObject_VAR_HEAD") + self.write("Py_hash_t ob_shash;") + self.write(f"char ob_sval[{len(b) + 1}];") + with self.block(f"{name} =", ";"): + self.object_var_head("PyBytes_Type", len(b)) + self.write(".ob_shash = -1,") + self.write(f".ob_sval = {make_string_literal(b)},") + return f"& {name}.ob_base.ob_base" + + def generate_unicode(self, name: str, s: str) -> str: + if s in strings: + return f"&_Py_STR({strings[s]})" + if s in identifiers: + return f"&_Py_ID({s})" + if re.match(r'\A[A-Za-z0-9_]+\Z', s): + name = f"const_str_{s}" + kind, ascii = analyze_character_width(s) + if kind == PyUnicode_1BYTE_KIND: + datatype = "uint8_t" + elif kind == PyUnicode_2BYTE_KIND: + datatype = "uint16_t" + else: + datatype = "uint32_t" + self.write("static") + with self.indent(): + with self.block("struct"): + if ascii: + self.write("PyASCIIObject _ascii;") + else: + self.write("PyCompactUnicodeObject _compact;") + self.write(f"{datatype} _data[{len(s)+1}];") + with self.block(f"{name} =", ";"): + if ascii: + with self.block("._ascii =", ","): + self.object_head("PyUnicode_Type") + self.write(f".length = {len(s)},") + self.write(".hash = -1,") + with self.block(".state =", ","): + self.write(".kind = 1,") + self.write(".compact = 1,") + self.write(".ascii = 1,") + self.write(f"._data = {make_string_literal(s.encode('ascii'))},") + return f"& {name}._ascii.ob_base" + else: + with self.block("._compact =", ","): + with self.block("._base =", ","): + self.object_head("PyUnicode_Type") + self.write(f".length = {len(s)},") + self.write(".hash = -1,") + with self.block(".state =", ","): + self.write(f".kind = {kind},") + self.write(".compact = 1,") + self.write(".ascii = 0,") + utf8 = s.encode('utf-8') + self.write(f'.utf8 = {make_string_literal(utf8)},') + self.write(f'.utf8_length = {len(utf8)},') + with self.block(f"._data =", ","): + for i in range(0, len(s), 16): + data = s[i:i+16] + self.write(", ".join(map(str, map(ord, data))) + ",") + return f"& {name}._compact._base.ob_base" + + + def generate_code(self, name: str, code: types.CodeType) -> str: + # The ordering here matches PyCode_NewWithPosOnlyArgs() + # (but see below). + co_consts = self.generate(name + "_consts", code.co_consts) + co_names = self.generate(name + "_names", code.co_names) + co_filename = self.generate(name + "_filename", code.co_filename) + co_name = self.generate(name + "_name", code.co_name) + co_qualname = self.generate(name + "_qualname", code.co_qualname) + co_linetable = self.generate(name + "_linetable", code.co_linetable) + co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable) + # These fields are not directly accessible + localsplusnames, localspluskinds = get_localsplus(code) + co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames) + co_localspluskinds = self.generate(name + "_localspluskinds", localspluskinds) + # Derived values + nlocals, nplaincellvars, ncellvars, nfreevars = \ + get_localsplus_counts(code, localsplusnames, localspluskinds) + co_code_adaptive = make_string_literal(code.co_code) + self.write("static") + with self.indent(): + self.write(f"struct _PyCode_DEF({len(code.co_code)})") + with self.block(f"{name} =", ";"): + self.object_var_head("PyCode_Type", len(code.co_code) // 2) + # But the ordering here must match that in cpython/code.h + # (which is a pain because we tend to reorder those for perf) + # otherwise MSVC doesn't like it. + self.write(f".co_consts = {co_consts},") + self.write(f".co_names = {co_names},") + self.write(f".co_exceptiontable = {co_exceptiontable},") + self.field(code, "co_flags") + self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,") + self.write("._co_linearray_entry_size = 0,") + self.field(code, "co_argcount") + self.field(code, "co_posonlyargcount") + self.field(code, "co_kwonlyargcount") + self.write(f".co_framesize = {code.co_stacksize + len(localsplusnames)} + FRAME_SPECIALS_SIZE,") + self.field(code, "co_stacksize") + self.field(code, "co_firstlineno") + self.write(f".co_nlocalsplus = {len(localsplusnames)},") + self.field(code, "co_nlocals") + self.write(f".co_nplaincellvars = {nplaincellvars},") + self.write(f".co_ncellvars = {ncellvars},") + self.write(f".co_nfreevars = {nfreevars},") + self.write(f".co_localsplusnames = {co_localsplusnames},") + self.write(f".co_localspluskinds = {co_localspluskinds},") + self.write(f".co_filename = {co_filename},") + self.write(f".co_name = {co_name},") + self.write(f".co_qualname = {co_qualname},") + self.write(f".co_linetable = {co_linetable},") + self.write(f"._co_cached = NULL,") + self.write("._co_linearray = NULL,") + self.write(f".co_code_adaptive = {co_code_adaptive},") + for i, op in enumerate(code.co_code[::2]): + if op == RESUME: + self.write(f"._co_firsttraceable = {i},") + break + name_as_code = f"(PyCodeObject *)&{name}" + self.deallocs.append(f"_PyStaticCode_Dealloc({name_as_code});") + self.interns.append(f"_PyStaticCode_InternStrings({name_as_code})") + return f"& {name}.ob_base.ob_base" + + def generate_tuple(self, name: str, t: Tuple[object, ...]) -> str: + if len(t) == 0: + return f"(PyObject *)& _Py_SINGLETON(tuple_empty)" + items = [self.generate(f"{name}_{i}", it) for i, it in enumerate(t)] + self.write("static") + with self.indent(): + with self.block("struct"): + self.write("PyGC_Head _gc_head;") + with self.block("struct", "_object;"): + self.write("PyObject_VAR_HEAD") + if t: + self.write(f"PyObject *ob_item[{len(t)}];") + with self.block(f"{name} =", ";"): + with self.block("._object =", ","): + self.object_var_head("PyTuple_Type", len(t)) + if items: + with self.block(f".ob_item =", ","): + for item in items: + self.write(item + ",") + return f"& {name}._object.ob_base.ob_base" + + def _generate_int_for_bits(self, name: str, i: int, digit: int) -> None: + sign = -1 if i < 0 else 0 if i == 0 else +1 + i = abs(i) + digits: list[int] = [] + while i: + i, rem = divmod(i, digit) + digits.append(rem) + self.write("static") + with self.indent(): + with self.block("struct"): + self.write("PyObject_VAR_HEAD") + self.write(f"digit ob_digit[{max(1, len(digits))}];") + with self.block(f"{name} =", ";"): + self.object_var_head("PyLong_Type", sign*len(digits)) + if digits: + ds = ", ".join(map(str, digits)) + self.write(f".ob_digit = {{ {ds} }},") + + def generate_int(self, name: str, i: int) -> str: + if -5 <= i <= 256: + return f"(PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + {i}]" + if i >= 0: + name = f"const_int_{i}" + else: + name = f"const_int_negative_{abs(i)}" + if abs(i) < 2**15: + self._generate_int_for_bits(name, i, 2**15) + else: + connective = "if" + for bits_in_digit in 15, 30: + self.write(f"#{connective} PYLONG_BITS_IN_DIGIT == {bits_in_digit}") + self._generate_int_for_bits(name, i, 2**bits_in_digit) + connective = "elif" + self.write("#else") + self.write('#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"') + self.write("#endif") + # If neither clause applies, it won't compile + return f"& {name}.ob_base.ob_base" + + def generate_float(self, name: str, x: float) -> str: + with self.block(f"static PyFloatObject {name} =", ";"): + self.object_head("PyFloat_Type") + self.write(f".ob_fval = {x},") + return f"&{name}.ob_base" + + def generate_complex(self, name: str, z: complex) -> str: + with self.block(f"static PyComplexObject {name} =", ";"): + self.object_head("PyComplex_Type") + self.write(f".cval = {{ {z.real}, {z.imag} }},") + return f"&{name}.ob_base" + + def generate_frozenset(self, name: str, fs: FrozenSet[object]) -> str: + try: + fs = sorted(fs) + except TypeError: + # frozen set with incompatible types, fallback to repr() + fs = sorted(fs, key=repr) + ret = self.generate_tuple(name, tuple(fs)) + self.write("// TODO: The above tuple should be a frozenset") + return ret + + def generate_file(self, module: str, code: object)-> None: + module = module.replace(".", "_") + self.generate(f"{module}_toplevel", code) + with self.block(f"static void {module}_do_patchups(void)"): + for p in self.patchups: + self.write(p) + self.patchups.clear() + self.write(EPILOGUE.replace("%%NAME%%", module)) + + def generate(self, name: str, obj: object) -> str: + # Use repr() in the key to distinguish -0.0 from +0.0 + key = (type(obj), obj, repr(obj)) + if key in self.cache: + self.hits += 1 + # print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}") + return self.cache[key] + self.misses += 1 + if isinstance(obj, (types.CodeType, umarshal.Code)) : + val = self.generate_code(name, obj) + elif isinstance(obj, tuple): + val = self.generate_tuple(name, obj) + elif isinstance(obj, str): + val = self.generate_unicode(name, obj) + elif isinstance(obj, bytes): + val = self.generate_bytes(name, obj) + elif obj is True: + return "Py_True" + elif obj is False: + return "Py_False" + elif isinstance(obj, int): + val = self.generate_int(name, obj) + elif isinstance(obj, float): + val = self.generate_float(name, obj) + elif isinstance(obj, complex): + val = self.generate_complex(name, obj) + elif isinstance(obj, frozenset): + val = self.generate_frozenset(name, obj) + elif obj is builtins.Ellipsis: + return "Py_Ellipsis" + elif obj is None: + return "Py_None" + else: + raise TypeError( + f"Cannot generate code for {type(obj).__name__} object") + # print(f"Cache store {key!r:.40}: {val!r:.40}") + self.cache[key] = val + return val + + +EPILOGUE = """ +PyObject * +_Py_get_%%NAME%%_toplevel(void) +{ + %%NAME%%_do_patchups(); + return Py_NewRef((PyObject *) &%%NAME%%_toplevel); +} +""" + +FROZEN_COMMENT_C = "/* Auto-generated by Programs/_freeze_module.c */" +FROZEN_COMMENT_PY = "/* Auto-generated by Programs/_freeze_module.py */" + +FROZEN_DATA_LINE = r"\s*(\d+,\s*)+\s*" + + +def is_frozen_header(source: str) -> bool: + return source.startswith((FROZEN_COMMENT_C, FROZEN_COMMENT_PY)) + + +def decode_frozen_data(source: str) -> types.CodeType: + lines = source.splitlines() + while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None: + del lines[0] + while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None: + del lines[-1] + values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip()) + data = bytes(values) + return umarshal.loads(data) + + +def generate(args: list[str], output: TextIO) -> None: + printer = Printer(output) + for arg in args: + file, modname = arg.rsplit(':', 1) + with open(file, "r", encoding="utf8") as fd: + source = fd.read() + if is_frozen_header(source): + code = decode_frozen_data(source) + else: + code = compile(fd.read(), f"", "exec") + printer.generate_file(modname, code) + with printer.block(f"void\n_Py_Deepfreeze_Fini(void)"): + for p in printer.deallocs: + printer.write(p) + with printer.block(f"int\n_Py_Deepfreeze_Init(void)"): + for p in printer.interns: + with printer.block(f"if ({p} < 0)"): + printer.write("return -1;") + printer.write("return 0;") + if verbose: + print(f"Cache hits: {printer.hits}, misses: {printer.misses}") + + +parser = argparse.ArgumentParser() +parser.add_argument("-o", "--output", help="Defaults to deepfreeze.c", default="deepfreeze.c") +parser.add_argument("-v", "--verbose", action="store_true", help="Print diagnostics") +parser.add_argument('args', nargs="+", help="Input file and module name (required) in file:modname format") + +@contextlib.contextmanager +def report_time(label: str): + t0 = time.time() + try: + yield + finally: + t1 = time.time() + if verbose: + print(f"{label}: {t1-t0:.3f} sec") + + +def main() -> None: + global verbose + args = parser.parse_args() + verbose = args.verbose + output = args.output + with open(output, "w", encoding="utf-8") as file: + with report_time("generate"): + generate(args.args, file) + if verbose: + print(f"Wrote {os.path.getsize(output)} bytes to {output}") + + +if __name__ == "__main__": + main() diff --git a/Tools/build/freeze_modules.py b/Tools/build/freeze_modules.py new file mode 100644 index 0000000..810224b --- /dev/null +++ b/Tools/build/freeze_modules.py @@ -0,0 +1,733 @@ +"""Freeze modules and regen related files (e.g. Python/frozen.c). + +See the notes at the top of Python/frozen.c for more info. +""" + +from collections import namedtuple +import hashlib +import os +import ntpath +import posixpath +import argparse +from update_file import updating_file_with_tmpfile + + +ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) +ROOT_DIR = os.path.abspath(ROOT_DIR) +FROZEN_ONLY = os.path.join(ROOT_DIR, 'Tools', 'freeze', 'flag.py') + +STDLIB_DIR = os.path.join(ROOT_DIR, 'Lib') +# If FROZEN_MODULES_DIR or DEEPFROZEN_MODULES_DIR is changed then the +# .gitattributes and .gitignore files needs to be updated. +FROZEN_MODULES_DIR = os.path.join(ROOT_DIR, 'Python', 'frozen_modules') +DEEPFROZEN_MODULES_DIR = os.path.join(ROOT_DIR, 'Python', 'deepfreeze') + +FROZEN_FILE = os.path.join(ROOT_DIR, 'Python', 'frozen.c') +MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in') +PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj') +PCBUILD_FILTERS = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj.filters') +PCBUILD_PYTHONCORE = os.path.join(ROOT_DIR, 'PCbuild', 'pythoncore.vcxproj') + + +OS_PATH = 'ntpath' if os.name == 'nt' else 'posixpath' + +# These are modules that get frozen. +TESTS_SECTION = 'Test module' +FROZEN = [ + # See parse_frozen_spec() for the format. + # In cases where the frozenid is duplicated, the first one is re-used. + ('import system', [ + # These frozen modules are necessary for bootstrapping + # the import system. + 'importlib._bootstrap : _frozen_importlib', + 'importlib._bootstrap_external : _frozen_importlib_external', + # This module is important because some Python builds rely + # on a builtin zip file instead of a filesystem. + 'zipimport', + ]), + ('stdlib - startup, without site (python -S)', [ + 'abc', + 'codecs', + # For now we do not freeze the encodings, due # to the noise all + # those extra modules add to the text printed during the build. + # (See https://github.com/python/cpython/pull/28398#pullrequestreview-756856469.) + #'', + 'io', + ]), + ('stdlib - startup, with site', [ + '_collections_abc', + '_sitebuiltins', + 'genericpath', + 'ntpath', + 'posixpath', + # We must explicitly mark os.path as a frozen module + # even though it will never be imported. + f'{OS_PATH} : os.path', + 'os', + 'site', + 'stat', + ]), + ('runpy - run module with -m', [ + "importlib.util", + "importlib.machinery", + "runpy", + ]), + (TESTS_SECTION, [ + '__hello__', + '__hello__ : __hello_alias__', + '__hello__ : <__phello_alias__>', + '__hello__ : __phello_alias__.spam', + '<__phello__.**.*>', + f'frozen_only : __hello_only__ = {FROZEN_ONLY}', + ]), +] +BOOTSTRAP = { + 'importlib._bootstrap', + 'importlib._bootstrap_external', + 'zipimport', +} + + +####################################### +# platform-specific helpers + +if os.path is posixpath: + relpath_for_posix_display = os.path.relpath + + def relpath_for_windows_display(path, base): + return ntpath.relpath( + ntpath.join(*path.split(os.path.sep)), + ntpath.join(*base.split(os.path.sep)), + ) + +else: + relpath_for_windows_display = ntpath.relpath + + def relpath_for_posix_display(path, base): + return posixpath.relpath( + posixpath.join(*path.split(os.path.sep)), + posixpath.join(*base.split(os.path.sep)), + ) + + +####################################### +# specs + +def parse_frozen_specs(): + seen = {} + for section, specs in FROZEN: + parsed = _parse_specs(specs, section, seen) + for item in parsed: + frozenid, pyfile, modname, ispkg, section = item + try: + source = seen[frozenid] + except KeyError: + source = FrozenSource.from_id(frozenid, pyfile) + seen[frozenid] = source + else: + assert not pyfile or pyfile == source.pyfile, item + yield FrozenModule(modname, ispkg, section, source) + + +def _parse_specs(specs, section, seen): + for spec in specs: + info, subs = _parse_spec(spec, seen, section) + yield info + for info in subs or (): + yield info + + +def _parse_spec(spec, knownids=None, section=None): + """Yield an info tuple for each module corresponding to the given spec. + + The info consists of: (frozenid, pyfile, modname, ispkg, section). + + Supported formats: + + frozenid + frozenid : modname + frozenid : modname = pyfile + + "frozenid" and "modname" must be valid module names (dot-separated + identifiers). If "modname" is not provided then "frozenid" is used. + If "pyfile" is not provided then the filename of the module + corresponding to "frozenid" is used. + + Angle brackets around a frozenid (e.g. '") indicate + it is a package. This also means it must be an actual module + (i.e. "pyfile" cannot have been provided). Such values can have + patterns to expand submodules: + + - also freeze all direct submodules + - also freeze the full submodule tree + + As with "frozenid", angle brackets around "modname" indicate + it is a package. However, in this case "pyfile" should not + have been provided and patterns in "modname" are not supported. + Also, if "modname" has brackets then "frozenid" should not, + and "pyfile" should have been provided.. + """ + frozenid, _, remainder = spec.partition(':') + modname, _, pyfile = remainder.partition('=') + frozenid = frozenid.strip() + modname = modname.strip() + pyfile = pyfile.strip() + + submodules = None + if modname.startswith('<') and modname.endswith('>'): + assert check_modname(frozenid), spec + modname = modname[1:-1] + assert check_modname(modname), spec + if frozenid in knownids: + pass + elif pyfile: + assert not os.path.isdir(pyfile), spec + else: + pyfile = _resolve_module(frozenid, ispkg=False) + ispkg = True + elif pyfile: + assert check_modname(frozenid), spec + assert not knownids or frozenid not in knownids, spec + assert check_modname(modname), spec + assert not os.path.isdir(pyfile), spec + ispkg = False + elif knownids and frozenid in knownids: + assert check_modname(frozenid), spec + assert check_modname(modname), spec + ispkg = False + else: + assert not modname or check_modname(modname), spec + resolved = iter(resolve_modules(frozenid)) + frozenid, pyfile, ispkg = next(resolved) + if not modname: + modname = frozenid + if ispkg: + pkgid = frozenid + pkgname = modname + pkgfiles = {pyfile: pkgid} + def iter_subs(): + for frozenid, pyfile, ispkg in resolved: + if pkgname: + modname = frozenid.replace(pkgid, pkgname, 1) + else: + modname = frozenid + if pyfile: + if pyfile in pkgfiles: + frozenid = pkgfiles[pyfile] + pyfile = None + elif ispkg: + pkgfiles[pyfile] = frozenid + yield frozenid, pyfile, modname, ispkg, section + submodules = iter_subs() + + info = (frozenid, pyfile or None, modname, ispkg, section) + return info, submodules + + +####################################### +# frozen source files + +class FrozenSource(namedtuple('FrozenSource', 'id pyfile frozenfile deepfreezefile')): + + @classmethod + def from_id(cls, frozenid, pyfile=None): + if not pyfile: + pyfile = os.path.join(STDLIB_DIR, *frozenid.split('.')) + '.py' + #assert os.path.exists(pyfile), (frozenid, pyfile) + frozenfile = resolve_frozen_file(frozenid, FROZEN_MODULES_DIR) + deepfreezefile = resolve_frozen_file(frozenid, DEEPFROZEN_MODULES_DIR) + return cls(frozenid, pyfile, frozenfile, deepfreezefile) + + @property + def frozenid(self): + return self.id + + @property + def modname(self): + if self.pyfile.startswith(STDLIB_DIR): + return self.id + return None + + @property + def symbol(self): + # This matches what we do in Programs/_freeze_module.c: + name = self.frozenid.replace('.', '_') + return '_Py_M__' + name + + @property + def ispkg(self): + if not self.pyfile: + return False + elif self.frozenid.endswith('.__init__'): + return False + else: + return os.path.basename(self.pyfile) == '__init__.py' + + @property + def isbootstrap(self): + return self.id in BOOTSTRAP + + +def resolve_frozen_file(frozenid, destdir): + """Return the filename corresponding to the given frozen ID. + + For stdlib modules the ID will always be the full name + of the source module. + """ + if not isinstance(frozenid, str): + try: + frozenid = frozenid.frozenid + except AttributeError: + raise ValueError(f'unsupported frozenid {frozenid!r}') + # We use a consistent naming convention for all frozen modules. + frozenfile = f'{frozenid}.h' + if not destdir: + return frozenfile + return os.path.join(destdir, frozenfile) + + +####################################### +# frozen modules + +class FrozenModule(namedtuple('FrozenModule', 'name ispkg section source')): + + def __getattr__(self, name): + return getattr(self.source, name) + + @property + def modname(self): + return self.name + + @property + def orig(self): + return self.source.modname + + @property + def isalias(self): + orig = self.source.modname + if not orig: + return True + return self.name != orig + + def summarize(self): + source = self.source.modname + if source: + source = f'<{source}>' + else: + source = relpath_for_posix_display(self.pyfile, ROOT_DIR) + return { + 'module': self.name, + 'ispkg': self.ispkg, + 'source': source, + 'frozen': os.path.basename(self.frozenfile), + 'checksum': _get_checksum(self.frozenfile), + } + + +def _iter_sources(modules): + seen = set() + for mod in modules: + if mod.source not in seen: + yield mod.source + seen.add(mod.source) + + +####################################### +# generic helpers + +def _get_checksum(filename): + with open(filename, "rb") as infile: + contents = infile.read() + m = hashlib.sha256() + m.update(contents) + return m.hexdigest() + + +def resolve_modules(modname, pyfile=None): + if modname.startswith('<') and modname.endswith('>'): + if pyfile: + assert os.path.isdir(pyfile) or os.path.basename(pyfile) == '__init__.py', pyfile + ispkg = True + modname = modname[1:-1] + rawname = modname + # For now, we only expect match patterns at the end of the name. + _modname, sep, match = modname.rpartition('.') + if sep: + if _modname.endswith('.**'): + modname = _modname[:-3] + match = f'**.{match}' + elif match and not match.isidentifier(): + modname = _modname + # Otherwise it's a plain name so we leave it alone. + else: + match = None + else: + ispkg = False + rawname = modname + match = None + + if not check_modname(modname): + raise ValueError(f'not a valid module name ({rawname})') + + if not pyfile: + pyfile = _resolve_module(modname, ispkg=ispkg) + elif os.path.isdir(pyfile): + pyfile = _resolve_module(modname, pyfile, ispkg) + yield modname, pyfile, ispkg + + if match: + pkgdir = os.path.dirname(pyfile) + yield from iter_submodules(modname, pkgdir, match) + + +def check_modname(modname): + return all(n.isidentifier() for n in modname.split('.')) + + +def iter_submodules(pkgname, pkgdir=None, match='*'): + if not pkgdir: + pkgdir = os.path.join(STDLIB_DIR, *pkgname.split('.')) + if not match: + match = '**.*' + match_modname = _resolve_modname_matcher(match, pkgdir) + + def _iter_submodules(pkgname, pkgdir): + for entry in sorted(os.scandir(pkgdir), key=lambda e: e.name): + matched, recursive = match_modname(entry.name) + if not matched: + continue + modname = f'{pkgname}.{entry.name}' + if modname.endswith('.py'): + yield modname[:-3], entry.path, False + elif entry.is_dir(): + pyfile = os.path.join(entry.path, '__init__.py') + # We ignore namespace packages. + if os.path.exists(pyfile): + yield modname, pyfile, True + if recursive: + yield from _iter_submodules(modname, entry.path) + + return _iter_submodules(pkgname, pkgdir) + + +def _resolve_modname_matcher(match, rootdir=None): + if isinstance(match, str): + if match.startswith('**.'): + recursive = True + pat = match[3:] + assert match + else: + recursive = False + pat = match + + if pat == '*': + def match_modname(modname): + return True, recursive + else: + raise NotImplementedError(match) + elif callable(match): + match_modname = match(rootdir) + else: + raise ValueError(f'unsupported matcher {match!r}') + return match_modname + + +def _resolve_module(modname, pathentry=STDLIB_DIR, ispkg=False): + assert pathentry, pathentry + pathentry = os.path.normpath(pathentry) + assert os.path.isabs(pathentry) + if ispkg: + return os.path.join(pathentry, *modname.split('.'), '__init__.py') + return os.path.join(pathentry, *modname.split('.')) + '.py' + + +####################################### +# regenerating dependent files + +def find_marker(lines, marker, file): + for pos, line in enumerate(lines): + if marker in line: + return pos + raise Exception(f"Can't find {marker!r} in file {file}") + + +def replace_block(lines, start_marker, end_marker, replacements, file): + start_pos = find_marker(lines, start_marker, file) + end_pos = find_marker(lines, end_marker, file) + if end_pos <= start_pos: + raise Exception(f"End marker {end_marker!r} " + f"occurs before start marker {start_marker!r} " + f"in file {file}") + replacements = [line.rstrip() + '\n' for line in replacements] + return lines[:start_pos + 1] + replacements + lines[end_pos:] + + +def regen_frozen(modules, frozen_modules: bool): + headerlines = [] + parentdir = os.path.dirname(FROZEN_FILE) + if frozen_modules: + for src in _iter_sources(modules): + # Adding a comment to separate sections here doesn't add much, + # so we don't. + header = relpath_for_posix_display(src.frozenfile, parentdir) + headerlines.append(f'#include "{header}"') + + externlines = [] + bootstraplines = [] + stdliblines = [] + testlines = [] + aliaslines = [] + indent = ' ' + lastsection = None + for mod in modules: + if mod.isbootstrap: + lines = bootstraplines + elif mod.section == TESTS_SECTION: + lines = testlines + else: + lines = stdliblines + if mod.section != lastsection: + if lastsection is not None: + lines.append('') + lines.append(f'/* {mod.section} */') + lastsection = mod.section + + # Also add a extern declaration for the corresponding + # deepfreeze-generated function. + orig_name = mod.source.id + code_name = orig_name.replace(".", "_") + get_code_name = "_Py_get_%s_toplevel" % code_name + externlines.append("extern PyObject *%s(void);" % get_code_name) + + symbol = mod.symbol + pkg = 'true' if mod.ispkg else 'false' + if not frozen_modules: + line = ('{"%s", NULL, 0, %s, GET_CODE(%s)},' + ) % (mod.name, pkg, code_name) + else: + line = ('{"%s", %s, (int)sizeof(%s), %s, GET_CODE(%s)},' + ) % (mod.name, symbol, symbol, pkg, code_name) + lines.append(line) + + if mod.isalias: + if not mod.orig: + entry = '{"%s", NULL},' % (mod.name,) + elif mod.source.ispkg: + entry = '{"%s", "<%s"},' % (mod.name, mod.orig) + else: + entry = '{"%s", "%s"},' % (mod.name, mod.orig) + aliaslines.append(indent + entry) + + for lines in (bootstraplines, stdliblines, testlines): + # TODO: Is this necessary any more? + if not lines[0]: + del lines[0] + for i, line in enumerate(lines): + if line: + lines[i] = indent + line + + print(f'# Updating {os.path.relpath(FROZEN_FILE)}') + with updating_file_with_tmpfile(FROZEN_FILE) as (infile, outfile): + lines = infile.readlines() + # TODO: Use more obvious markers, e.g. + # $START GENERATED FOOBAR$ / $END GENERATED FOOBAR$ + lines = replace_block( + lines, + "/* Includes for frozen modules: */", + "/* End includes */", + headerlines, + FROZEN_FILE, + ) + lines = replace_block( + lines, + "/* Start extern declarations */", + "/* End extern declarations */", + externlines, + FROZEN_FILE, + ) + lines = replace_block( + lines, + "static const struct _frozen bootstrap_modules[] =", + "/* bootstrap sentinel */", + bootstraplines, + FROZEN_FILE, + ) + lines = replace_block( + lines, + "static const struct _frozen stdlib_modules[] =", + "/* stdlib sentinel */", + stdliblines, + FROZEN_FILE, + ) + lines = replace_block( + lines, + "static const struct _frozen test_modules[] =", + "/* test sentinel */", + testlines, + FROZEN_FILE, + ) + lines = replace_block( + lines, + "const struct _module_alias aliases[] =", + "/* aliases sentinel */", + aliaslines, + FROZEN_FILE, + ) + outfile.writelines(lines) + + +def regen_makefile(modules): + pyfiles = [] + frozenfiles = [] + rules = [''] + deepfreezerules = ["Python/deepfreeze/deepfreeze.c: $(DEEPFREEZE_DEPS)", + "\t$(PYTHON_FOR_FREEZE) $(srcdir)/Tools/build/deepfreeze.py \\"] + for src in _iter_sources(modules): + frozen_header = relpath_for_posix_display(src.frozenfile, ROOT_DIR) + frozenfiles.append(f'\t\t{frozen_header} \\') + + pyfile = relpath_for_posix_display(src.pyfile, ROOT_DIR) + pyfiles.append(f'\t\t{pyfile} \\') + + if src.isbootstrap: + freezecmd = '$(FREEZE_MODULE_BOOTSTRAP)' + freezedep = '$(FREEZE_MODULE_BOOTSTRAP_DEPS)' + else: + freezecmd = '$(FREEZE_MODULE)' + freezedep = '$(FREEZE_MODULE_DEPS)' + + freeze = (f'{freezecmd} {src.frozenid} ' + f'$(srcdir)/{pyfile} {frozen_header}') + rules.extend([ + f'{frozen_header}: {pyfile} {freezedep}', + f'\t{freeze}', + '', + ]) + deepfreezerules.append(f"\t{frozen_header}:{src.frozenid} \\") + deepfreezerules.append('\t-o Python/deepfreeze/deepfreeze.c') + pyfiles[-1] = pyfiles[-1].rstrip(" \\") + frozenfiles[-1] = frozenfiles[-1].rstrip(" \\") + + print(f'# Updating {os.path.relpath(MAKEFILE)}') + with updating_file_with_tmpfile(MAKEFILE) as (infile, outfile): + lines = infile.readlines() + lines = replace_block( + lines, + "FROZEN_FILES_IN =", + "# End FROZEN_FILES_IN", + pyfiles, + MAKEFILE, + ) + lines = replace_block( + lines, + "FROZEN_FILES_OUT =", + "# End FROZEN_FILES_OUT", + frozenfiles, + MAKEFILE, + ) + lines = replace_block( + lines, + "# BEGIN: freezing modules", + "# END: freezing modules", + rules, + MAKEFILE, + ) + lines = replace_block( + lines, + "# BEGIN: deepfreeze modules", + "# END: deepfreeze modules", + deepfreezerules, + MAKEFILE, + ) + outfile.writelines(lines) + + +def regen_pcbuild(modules): + projlines = [] + filterlines = [] + corelines = [] + deepfreezerules = ['\t') + projlines.append(f' {src.frozenid}') + projlines.append(f' $(IntDir){intfile}') + projlines.append(f' $(PySourcePath){header}') + projlines.append(f' ') + + filterlines.append(f' ') + filterlines.append(' Python Files') + filterlines.append(' ') + deepfreezerules.append(f'\t\t "$(PySourcePath){header}:{src.frozenid}" ^') + deepfreezerules.append('\t\t "-o" "$(PySourcePath)Python\\deepfreeze\\deepfreeze.c"\'/>' ) + + corelines.append(f' ') + + print(f'# Updating {os.path.relpath(PCBUILD_PROJECT)}') + with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile): + lines = infile.readlines() + lines = replace_block( + lines, + '', + '', + projlines, + PCBUILD_PROJECT, + ) + outfile.writelines(lines) + with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile): + lines = infile.readlines() + lines = replace_block( + lines, + '', + '', + deepfreezerules, + PCBUILD_PROJECT, + ) + outfile.writelines(lines) + print(f'# Updating {os.path.relpath(PCBUILD_FILTERS)}') + with updating_file_with_tmpfile(PCBUILD_FILTERS) as (infile, outfile): + lines = infile.readlines() + lines = replace_block( + lines, + '', + '', + filterlines, + PCBUILD_FILTERS, + ) + outfile.writelines(lines) + print(f'# Updating {os.path.relpath(PCBUILD_PYTHONCORE)}') + with updating_file_with_tmpfile(PCBUILD_PYTHONCORE) as (infile, outfile): + lines = infile.readlines() + lines = replace_block( + lines, + '', + '', + corelines, + PCBUILD_FILTERS, + ) + outfile.writelines(lines) + + +####################################### +# the script + +parser = argparse.ArgumentParser() +parser.add_argument("--frozen-modules", action="store_true", + help="Use both frozen and deepfrozen modules. (default: uses only deepfrozen modules)") + +def main(): + args = parser.parse_args() + frozen_modules: bool = args.frozen_modules + # Expand the raw specs, preserving order. + modules = list(parse_frozen_specs()) + + # Regen build-related files. + regen_makefile(modules) + regen_pcbuild(modules) + regen_frozen(modules, frozen_modules) + + +if __name__ == '__main__': + main() diff --git a/Tools/build/generate_global_objects.py b/Tools/build/generate_global_objects.py new file mode 100644 index 0000000..dd67cfe --- /dev/null +++ b/Tools/build/generate_global_objects.py @@ -0,0 +1,382 @@ +import contextlib +import io +import os.path +import re + +SCRIPT_NAME = 'Tools/build/generate_global_objects.py' +__file__ = os.path.abspath(__file__) +ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) +INTERNAL = os.path.join(ROOT, 'Include', 'internal') + + +IGNORED = { + 'ACTION', # Python/_warnings.c + 'ATTR', # Python/_warnings.c and Objects/funcobject.c + 'DUNDER', # Objects/typeobject.c + 'RDUNDER', # Objects/typeobject.c + 'SPECIAL', # Objects/weakrefobject.c + 'NAME', # Objects/typeobject.c +} +IDENTIFIERS = [ + # from ADD() Python/_warnings.c + 'default', + 'ignore', + + # from GET_WARNINGS_ATTR() in Python/_warnings.c + 'WarningMessage', + '_showwarnmsg', + '_warn_unawaited_coroutine', + 'defaultaction', + 'filters', + 'onceregistry', + + # from WRAP_METHOD() in Objects/weakrefobject.c + '__bytes__', + '__reversed__', + + # from COPY_ATTR() in Objects/funcobject.c + '__module__', + '__name__', + '__qualname__', + '__doc__', + '__annotations__', + + # from SLOT* in Objects/typeobject.c + '__abs__', + '__add__', + '__aiter__', + '__and__', + '__anext__', + '__await__', + '__bool__', + '__call__', + '__contains__', + '__del__', + '__delattr__', + '__delete__', + '__delitem__', + '__eq__', + '__float__', + '__floordiv__', + '__ge__', + '__get__', + '__getattr__', + '__getattribute__', + '__getitem__', + '__gt__', + '__hash__', + '__iadd__', + '__iand__', + '__ifloordiv__', + '__ilshift__', + '__imatmul__', + '__imod__', + '__imul__', + '__index__', + '__init__', + '__int__', + '__invert__', + '__ior__', + '__ipow__', + '__irshift__', + '__isub__', + '__iter__', + '__itruediv__', + '__ixor__', + '__le__', + '__len__', + '__lshift__', + '__lt__', + '__matmul__', + '__mod__', + '__mul__', + '__ne__', + '__neg__', + '__new__', + '__next__', + '__or__', + '__pos__', + '__pow__', + '__radd__', + '__rand__', + '__repr__', + '__rfloordiv__', + '__rlshift__', + '__rmatmul__', + '__rmod__', + '__rmul__', + '__ror__', + '__rpow__', + '__rrshift__', + '__rshift__', + '__rsub__', + '__rtruediv__', + '__rxor__', + '__set__', + '__setattr__', + '__setitem__', + '__str__', + '__sub__', + '__truediv__', + '__xor__', + '__divmod__', + '__rdivmod__', +] + + +####################################### +# helpers + +def iter_files(): + for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'): + root = os.path.join(ROOT, name) + for dirname, _, files in os.walk(root): + for name in files: + if not name.endswith(('.c', '.h')): + continue + yield os.path.join(dirname, name) + + +def iter_global_strings(): + id_regex = re.compile(r'\b_Py_ID\((\w+)\)') + str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)') + for filename in iter_files(): + try: + infile = open(filename, encoding='utf-8') + except FileNotFoundError: + # The file must have been a temporary file. + continue + with infile: + for lno, line in enumerate(infile, 1): + for m in id_regex.finditer(line): + identifier, = m.groups() + yield identifier, None, filename, lno, line + for m in str_regex.finditer(line): + varname, string = m.groups() + yield varname, string, filename, lno, line + + +def iter_to_marker(lines, marker): + for line in lines: + if line.rstrip() == marker: + break + yield line + + +class Printer: + + def __init__(self, file): + self.level = 0 + self.file = file + self.continuation = [False] + + @contextlib.contextmanager + def indent(self): + save_level = self.level + try: + self.level += 1 + yield + finally: + self.level = save_level + + def write(self, arg): + eol = '\n' + if self.continuation[-1]: + eol = f' \\{eol}' if arg else f'\\{eol}' + self.file.writelines((" "*self.level, arg, eol)) + + @contextlib.contextmanager + def block(self, prefix, suffix="", *, continuation=None): + if continuation is None: + continuation = self.continuation[-1] + self.continuation.append(continuation) + + self.write(prefix + " {") + with self.indent(): + yield + self.continuation.pop() + self.write("}" + suffix) + + +@contextlib.contextmanager +def open_for_changes(filename, orig): + """Like open() but only write to the file if it changed.""" + outfile = io.StringIO() + yield outfile + text = outfile.getvalue() + if text != orig: + with open(filename, 'w', encoding='utf-8') as outfile: + outfile.write(text) + else: + print(f'# not changed: {filename}') + + +####################################### +# the global objects + +START = f'/* The following is auto-generated by {SCRIPT_NAME}. */' +END = '/* End auto-generated code */' + + +def generate_global_strings(identifiers, strings): + filename = os.path.join(INTERNAL, 'pycore_global_strings.h') + + # Read the non-generated part of the file. + with open(filename) as infile: + orig = infile.read() + lines = iter(orig.rstrip().splitlines()) + before = '\n'.join(iter_to_marker(lines, START)) + for _ in iter_to_marker(lines, END): + pass + after = '\n'.join(lines) + + # Generate the file. + with open_for_changes(filename, orig) as outfile: + printer = Printer(outfile) + printer.write(before) + printer.write(START) + with printer.block('struct _Py_global_strings', ';'): + with printer.block('struct', ' literals;'): + for literal, name in sorted(strings.items(), key=lambda x: x[1]): + printer.write(f'STRUCT_FOR_STR({name}, "{literal}")') + outfile.write('\n') + with printer.block('struct', ' identifiers;'): + for name in sorted(identifiers): + assert name.isidentifier(), name + printer.write(f'STRUCT_FOR_ID({name})') + with printer.block('struct', ' ascii[128];'): + printer.write("PyASCIIObject _ascii;") + printer.write("uint8_t _data[2];") + with printer.block('struct', ' latin1[128];'): + printer.write("PyCompactUnicodeObject _latin1;") + printer.write("uint8_t _data[2];") + printer.write(END) + printer.write(after) + + +def generate_runtime_init(identifiers, strings): + # First get some info from the declarations. + nsmallposints = None + nsmallnegints = None + with open(os.path.join(INTERNAL, 'pycore_global_objects.h')) as infile: + for line in infile: + if line.startswith('#define _PY_NSMALLPOSINTS'): + nsmallposints = int(line.split()[-1]) + elif line.startswith('#define _PY_NSMALLNEGINTS'): + nsmallnegints = int(line.split()[-1]) + break + else: + raise NotImplementedError + assert nsmallposints and nsmallnegints + + # Then target the runtime initializer. + filename = os.path.join(INTERNAL, 'pycore_runtime_init_generated.h') + + # Read the non-generated part of the file. + with open(filename) as infile: + orig = infile.read() + lines = iter(orig.rstrip().splitlines()) + before = '\n'.join(iter_to_marker(lines, START)) + for _ in iter_to_marker(lines, END): + pass + after = '\n'.join(lines) + + # Generate the file. + with open_for_changes(filename, orig) as outfile: + immortal_objects = [] + printer = Printer(outfile) + printer.write(before) + printer.write(START) + with printer.block('#define _Py_global_objects_INIT', continuation=True): + with printer.block('.singletons =', ','): + # Global int objects. + with printer.block('.small_ints =', ','): + for i in range(-nsmallnegints, nsmallposints): + printer.write(f'_PyLong_DIGIT_INIT({i}),') + immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + {i}]') + printer.write('') + # Global bytes objects. + printer.write('.bytes_empty = _PyBytes_SIMPLE_INIT(0, 0),') + immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_empty)') + with printer.block('.bytes_characters =', ','): + for i in range(256): + printer.write(f'_PyBytes_CHAR_INIT({i}),') + immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_characters)[{i}]') + printer.write('') + # Global strings. + with printer.block('.strings =', ','): + with printer.block('.literals =', ','): + for literal, name in sorted(strings.items(), key=lambda x: x[1]): + printer.write(f'INIT_STR({name}, "{literal}"),') + immortal_objects.append(f'(PyObject *)&_Py_STR({name})') + with printer.block('.identifiers =', ','): + for name in sorted(identifiers): + assert name.isidentifier(), name + printer.write(f'INIT_ID({name}),') + immortal_objects.append(f'(PyObject *)&_Py_ID({name})') + with printer.block('.ascii =', ','): + for i in range(128): + printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),') + immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).ascii[{i}]') + with printer.block('.latin1 =', ','): + for i in range(128, 256): + utf8 = ['"'] + for c in chr(i).encode('utf-8'): + utf8.append(f"\\x{c:02x}") + utf8.append('"') + printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}", {"".join(utf8)}),') + immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).latin1[{i} - 128]') + printer.write('') + with printer.block('.tuple_empty =', ','): + printer.write('.ob_base = _PyVarObject_IMMORTAL_INIT(&PyTuple_Type, 0)') + immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(tuple_empty)') + printer.write('') + printer.write("static inline void") + with printer.block("_PyUnicode_InitStaticStrings(void)"): + printer.write(f'PyObject *string;') + for i in sorted(identifiers): + # This use of _Py_ID() is ignored by iter_global_strings() + # since iter_files() ignores .h files. + printer.write(f'string = &_Py_ID({i});') + printer.write(f'PyUnicode_InternInPlace(&string);') + printer.write('') + printer.write('#ifdef Py_DEBUG') + printer.write("static inline void") + with printer.block("_PyStaticObjects_CheckRefcnt(void)"): + for i in immortal_objects: + with printer.block(f'if (Py_REFCNT({i}) < _PyObject_IMMORTAL_REFCNT)', ';'): + printer.write(f'_PyObject_Dump({i});') + printer.write(f'Py_FatalError("immortal object has less refcnt than ' + 'expected _PyObject_IMMORTAL_REFCNT");') + printer.write('#endif') + printer.write(END) + printer.write(after) + + +def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]': + identifiers = set(IDENTIFIERS) + strings = {} + for name, string, *_ in iter_global_strings(): + if string is None: + if name not in IGNORED: + identifiers.add(name) + else: + if string not in strings: + strings[string] = name + elif name != strings[string]: + raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}') + return identifiers, strings + + +####################################### +# the script + +def main() -> None: + identifiers, strings = get_identifiers_and_strings() + + generate_global_strings(identifiers, strings) + generate_runtime_init(identifiers, strings) + + +if __name__ == '__main__': + main() diff --git a/Tools/build/generate_levenshtein_examples.py b/Tools/build/generate_levenshtein_examples.py new file mode 100644 index 0000000..5a8360f --- /dev/null +++ b/Tools/build/generate_levenshtein_examples.py @@ -0,0 +1,70 @@ +"""Generate 10,000 unique examples for the Levenshtein short-circuit tests.""" + +import argparse +from functools import cache +import json +import os.path +from random import choices, randrange + + +# This should be in sync with Lib/traceback.py. It's not importing those values +# because this script is being executed by PYTHON_FOR_REGEN and not by the in-tree +# build of Python. +_MOVE_COST = 2 +_CASE_COST = 1 + + +def _substitution_cost(ch_a, ch_b): + if ch_a == ch_b: + return 0 + if ch_a.lower() == ch_b.lower(): + return _CASE_COST + return _MOVE_COST + + +@cache +def levenshtein(a, b): + if not a or not b: + return (len(a) + len(b)) * _MOVE_COST + option1 = levenshtein(a[:-1], b[:-1]) + _substitution_cost(a[-1], b[-1]) + option2 = levenshtein(a[:-1], b) + _MOVE_COST + option3 = levenshtein(a, b[:-1]) + _MOVE_COST + return min(option1, option2, option3) + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('output_path', metavar='FILE', type=str) + parser.add_argument('--overwrite', dest='overwrite', action='store_const', + const=True, default=False, + help='overwrite an existing test file') + + args = parser.parse_args() + output_path = os.path.realpath(args.output_path) + if not args.overwrite and os.path.isfile(output_path): + print(f"{output_path} already exists, skipping regeneration.") + print( + "To force, add --overwrite to the invocation of this tool or" + " delete the existing file." + ) + return + + examples = set() + # Create a lot of non-empty examples, which should end up with a Gauss-like + # distribution for even costs (moves) and odd costs (case substitutions). + while len(examples) < 9990: + a = ''.join(choices("abcABC", k=randrange(1, 10))) + b = ''.join(choices("abcABC", k=randrange(1, 10))) + expected = levenshtein(a, b) + examples.add((a, b, expected)) + # Create one empty case each for strings between 0 and 9 in length. + for i in range(10): + b = ''.join(choices("abcABC", k=i)) + expected = levenshtein("", b) + examples.add(("", b, expected)) + with open(output_path, "w") as f: + json.dump(sorted(examples), f, indent=2) + + +if __name__ == "__main__": + main() diff --git a/Tools/build/generate_opcode_h.py b/Tools/build/generate_opcode_h.py new file mode 100644 index 0000000..372221a --- /dev/null +++ b/Tools/build/generate_opcode_h.py @@ -0,0 +1,199 @@ +# This script generates the opcode.h header file. + +import sys +import tokenize + +SCRIPT_NAME = "Tools/build/generate_opcode_h.py" +PYTHON_OPCODE = "Lib/opcode.py" + +header = f""" +// Auto-generated by {SCRIPT_NAME} from {PYTHON_OPCODE} + +#ifndef Py_OPCODE_H +#define Py_OPCODE_H +#ifdef __cplusplus +extern "C" {{ +#endif + + +/* Instruction opcodes for compiled code */ +""".lstrip() + +footer = """ + +#define IS_PSEUDO_OPCODE(op) (((op) >= MIN_PSEUDO_OPCODE) && ((op) <= MAX_PSEUDO_OPCODE)) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_OPCODE_H */ +""" + +internal_header = f""" +// Auto-generated by {SCRIPT_NAME} from {PYTHON_OPCODE} + +#ifndef Py_INTERNAL_OPCODE_H +#define Py_INTERNAL_OPCODE_H +#ifdef __cplusplus +extern "C" {{ +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "opcode.h" +""".lstrip() + +internal_footer = """ +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_OPCODE_H +""" + +DEFINE = "#define {:<38} {:>3}\n" + +UINT32_MASK = (1<<32)-1 + +def write_int_array_from_ops(name, ops, out): + bits = 0 + for op in ops: + bits |= 1<>= 32 + assert bits == 0 + out.write(f"}};\n") + +def main(opcode_py, outfile='Include/opcode.h', internaloutfile='Include/internal/pycore_opcode.h'): + opcode = {} + if hasattr(tokenize, 'open'): + fp = tokenize.open(opcode_py) # Python 3.2+ + else: + fp = open(opcode_py) # Python 2.7 + with fp: + code = fp.read() + exec(code, opcode) + opmap = opcode['opmap'] + opname = opcode['opname'] + hasarg = opcode['hasarg'] + hasconst = opcode['hasconst'] + hasjrel = opcode['hasjrel'] + hasjabs = opcode['hasjabs'] + is_pseudo = opcode['is_pseudo'] + _pseudo_ops = opcode['_pseudo_ops'] + + HAVE_ARGUMENT = opcode["HAVE_ARGUMENT"] + MIN_PSEUDO_OPCODE = opcode["MIN_PSEUDO_OPCODE"] + MAX_PSEUDO_OPCODE = opcode["MAX_PSEUDO_OPCODE"] + + NUM_OPCODES = len(opname) + used = [ False ] * len(opname) + next_op = 1 + + for name, op in opmap.items(): + used[op] = True + + specialized_opmap = {} + opname_including_specialized = opname.copy() + for name in opcode['_specialized_instructions']: + while used[next_op]: + next_op += 1 + specialized_opmap[name] = next_op + opname_including_specialized[next_op] = name + used[next_op] = True + specialized_opmap['DO_TRACING'] = 255 + opname_including_specialized[255] = 'DO_TRACING' + used[255] = True + + with (open(outfile, 'w') as fobj, open(internaloutfile, 'w') as iobj): + fobj.write(header) + iobj.write(internal_header) + + for name in opname: + if name in opmap: + op = opmap[name] + if op == HAVE_ARGUMENT: + fobj.write(DEFINE.format("HAVE_ARGUMENT", HAVE_ARGUMENT)) + if op == MIN_PSEUDO_OPCODE: + fobj.write(DEFINE.format("MIN_PSEUDO_OPCODE", MIN_PSEUDO_OPCODE)) + + fobj.write(DEFINE.format(name, op)) + + if op == MAX_PSEUDO_OPCODE: + fobj.write(DEFINE.format("MAX_PSEUDO_OPCODE", MAX_PSEUDO_OPCODE)) + + + for name, op in specialized_opmap.items(): + fobj.write(DEFINE.format(name, op)) + + iobj.write("\nextern const uint8_t _PyOpcode_Caches[256];\n") + iobj.write("\nextern const uint8_t _PyOpcode_Deopt[256];\n") + iobj.write("\n#ifdef NEED_OPCODE_TABLES\n") + write_int_array_from_ops("_PyOpcode_RelativeJump", opcode['hasjrel'], iobj) + write_int_array_from_ops("_PyOpcode_Jump", opcode['hasjrel'] + opcode['hasjabs'], iobj) + + iobj.write("\nconst uint8_t _PyOpcode_Caches[256] = {\n") + for i, entries in enumerate(opcode["_inline_cache_entries"]): + if entries: + iobj.write(f" [{opname[i]}] = {entries},\n") + iobj.write("};\n") + + deoptcodes = {} + for basic, op in opmap.items(): + if not is_pseudo(op): + deoptcodes[basic] = basic + for basic, family in opcode["_specializations"].items(): + for specialized in family: + deoptcodes[specialized] = basic + iobj.write("\nconst uint8_t _PyOpcode_Deopt[256] = {\n") + for opt, deopt in sorted(deoptcodes.items()): + iobj.write(f" [{opt}] = {deopt},\n") + iobj.write("};\n") + iobj.write("#endif // NEED_OPCODE_TABLES\n") + + fobj.write("\n") + fobj.write("#define HAS_ARG(op) ((((op) >= HAVE_ARGUMENT) && (!IS_PSEUDO_OPCODE(op)))\\") + for op in _pseudo_ops: + if opmap[op] in hasarg: + fobj.write(f"\n || ((op) == {op}) \\") + fobj.write("\n )\n") + + fobj.write("\n") + fobj.write("#define HAS_CONST(op) (false\\") + for op in hasconst: + fobj.write(f"\n || ((op) == {opname[op]}) \\") + fobj.write("\n )\n") + + fobj.write("\n") + for i, (op, _) in enumerate(opcode["_nb_ops"]): + fobj.write(DEFINE.format(op, i)) + + iobj.write("\n") + iobj.write("#ifdef Py_DEBUG\n") + iobj.write(f"static const char *const _PyOpcode_OpName[{NUM_OPCODES}] = {{\n") + for op, name in enumerate(opname_including_specialized): + if name[0] != "<": + op = name + iobj.write(f''' [{op}] = "{name}",\n''') + iobj.write("};\n") + iobj.write("#endif\n") + + iobj.write("\n") + iobj.write("#define EXTRA_CASES \\\n") + for i, flag in enumerate(used): + if not flag: + iobj.write(f" case {i}: \\\n") + iobj.write(" ;\n") + + fobj.write(footer) + iobj.write(internal_footer) + + + print(f"{outfile} regenerated from {opcode_py}") + + +if __name__ == '__main__': + main(sys.argv[1], sys.argv[2], sys.argv[3]) diff --git a/Tools/build/generate_re_casefix.py b/Tools/build/generate_re_casefix.py new file mode 100755 index 0000000..b57ac07 --- /dev/null +++ b/Tools/build/generate_re_casefix.py @@ -0,0 +1,96 @@ +#! /usr/bin/env python3 +# This script generates Lib/re/_casefix.py. + +import collections +import sys +import unicodedata + +SCRIPT_NAME = 'Tools/build/generate_re_casefix.py' + +def update_file(file, content): + try: + with open(file, 'r', encoding='utf-8') as fobj: + if fobj.read() == content: + return False + except (OSError, ValueError): + pass + with open(file, 'w', encoding='utf-8') as fobj: + fobj.write(content) + return True + +re_casefix_template = f"""\ +# Auto-generated by {SCRIPT_NAME}. + +# Maps the code of lowercased character to codes of different lowercased +# characters which have the same uppercase. +_EXTRA_CASES = { +%s +} +""" + +def uname(i): + return unicodedata.name(chr(i), r'U+%04X' % i) + +class hexint(int): + def __repr__(self): + return '%#06x' % self + +def alpha(i): + c = chr(i) + return c if c.isalpha() else ascii(c)[1:-1] + + +def main(outfile='Lib/re/_casefix.py'): + # Find sets of characters which have the same uppercase. + equivalent_chars = collections.defaultdict(str) + for c in map(chr, range(sys.maxunicode + 1)): + equivalent_chars[c.upper()] += c + equivalent_chars = [t for t in equivalent_chars.values() if len(t) > 1] + + # List of codes of lowercased characters which have the same uppercase. + equivalent_lower_codes = [sorted(t) + for s in equivalent_chars + for t in [set(ord(c.lower()) for c in s)] + if len(t) > 1] + + bad_codes = [] + for t in equivalent_lower_codes: + for i in t: + if i > 0xffff: + bad_codes.extend(t) + try: + bad_codes.append(ord(chr(i).upper())) + except (ValueError, TypeError): + pass + break + if bad_codes: + print('Case-insensitive matching may not work correctly for character:', + file=sys.stderr) + for i in sorted(bad_codes): + print(" '%s' (U+%04x, %s)" % (alpha(i), i, uname(i)), + file=sys.stderr) + sys.exit(1) + + mapping = {i: tuple(j for j in t if i != j) + for t in equivalent_lower_codes + for i in t} + + items = [] + for i, t in sorted(mapping.items()): + items.append(' # %s: %s' % ( + uname(i), + ', '.join(map(uname, t)), + )) + items.append(" %r: %r, # '%s': '%s'" % ( + hexint(i), + tuple(map(hexint, t)), + alpha(i), + ''.join(map(alpha, t)), + )) + + update_file(outfile, re_casefix_template % '\n'.join(items)) + + +if __name__ == '__main__': + import sys + main(*sys.argv[1:]) diff --git a/Tools/build/generate_sre_constants.py b/Tools/build/generate_sre_constants.py new file mode 100755 index 0000000..abea069 --- /dev/null +++ b/Tools/build/generate_sre_constants.py @@ -0,0 +1,80 @@ +#! /usr/bin/env python3 +# This script generates Modules/_sre/sre_constants.h from Lib/re/_constants.py. + +SCRIPT_NAME = 'Tools/build/generate_sre_constants.py' + + +def update_file(file, content): + try: + with open(file, 'r') as fobj: + if fobj.read() == content: + return False + except (OSError, ValueError): + pass + with open(file, 'w') as fobj: + fobj.write(content) + return True + +sre_constants_header = f"""\ +/* + * Secret Labs' Regular Expression Engine + * + * regular expression matching engine + * + * Auto-generated by {SCRIPT_NAME} from + * Lib/re/_constants.py. + * + * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. + * + * See the sre.c file for information on usage and redistribution. + */ + +""" + +def main( + infile="Lib/re/_constants.py", + outfile_constants="Modules/_sre/sre_constants.h", + outfile_targets="Modules/_sre/sre_targets.h", +): + ns = {} + with open(infile) as fp: + code = fp.read() + exec(code, ns) + + def dump(d, prefix): + items = sorted(d) + for item in items: + yield "#define %s_%s %d\n" % (prefix, item, item) + + def dump2(d, prefix): + items = [(value, name) for name, value in d.items() + if name.startswith(prefix)] + for value, name in sorted(items): + yield "#define %s %d\n" % (name, value) + + def dump_gotos(d, prefix): + for i, item in enumerate(sorted(d)): + assert i == item + yield f" &&{prefix}_{item},\n" + + content = [sre_constants_header] + content.append("#define SRE_MAGIC %d\n" % ns["MAGIC"]) + content.extend(dump(ns["OPCODES"], "SRE_OP")) + content.extend(dump(ns["ATCODES"], "SRE")) + content.extend(dump(ns["CHCODES"], "SRE")) + content.extend(dump2(ns, "SRE_FLAG_")) + content.extend(dump2(ns, "SRE_INFO_")) + + update_file(outfile_constants, ''.join(content)) + + content = [sre_constants_header] + content.append(f"static void *sre_targets[{len(ns['OPCODES'])}] = {{\n") + content.extend(dump_gotos(ns["OPCODES"], "TARGET_SRE_OP")) + content.append("};\n") + + update_file(outfile_targets, ''.join(content)) + + +if __name__ == '__main__': + import sys + main(*sys.argv[1:]) diff --git a/Tools/build/generate_stdlib_module_names.py b/Tools/build/generate_stdlib_module_names.py new file mode 100644 index 0000000..e4f09f8 --- /dev/null +++ b/Tools/build/generate_stdlib_module_names.py @@ -0,0 +1,139 @@ +# This script lists the names of standard library modules +# to update Python/stdlib_mod_names.h +import _imp +import os.path +import re +import subprocess +import sys +import sysconfig + +from check_extension_modules import ModuleChecker + + +SCRIPT_NAME = 'Tools/build/generate_stdlib_module_names.py' + +SRC_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) +STDLIB_PATH = os.path.join(SRC_DIR, 'Lib') + +IGNORE = { + '__init__', + '__pycache__', + 'site-packages', + + # Test modules and packages + '__hello__', + '__phello__', + '__hello_alias__', + '__phello_alias__', + '__hello_only__', + '_ctypes_test', + '_testbuffer', + '_testcapi', + '_testconsole', + '_testimportmultiple', + '_testinternalcapi', + '_testmultiphase', + '_xxsubinterpreters', + '_xxtestfuzz', + 'distutils.tests', + 'idlelib.idle_test', + 'test', + 'xxlimited', + 'xxlimited_35', + 'xxsubtype', +} + +# Pure Python modules (Lib/*.py) +def list_python_modules(names): + for filename in os.listdir(STDLIB_PATH): + if not filename.endswith(".py"): + continue + name = filename.removesuffix(".py") + names.add(name) + + +# Packages in Lib/ +def list_packages(names): + for name in os.listdir(STDLIB_PATH): + if name in IGNORE: + continue + package_path = os.path.join(STDLIB_PATH, name) + if not os.path.isdir(package_path): + continue + if any(package_file.endswith(".py") + for package_file in os.listdir(package_path)): + names.add(name) + + +# Built-in and extension modules built by Modules/Setup* +# includes Windows and macOS extensions. +def list_modules_setup_extensions(names): + checker = ModuleChecker() + names.update(checker.list_module_names(all=True)) + + +# List frozen modules of the PyImport_FrozenModules list (Python/frozen.c). +# Use the "./Programs/_testembed list_frozen" command. +def list_frozen(names): + submodules = set() + for name in _imp._frozen_module_names(): + # To skip __hello__, __hello_alias__ and etc. + if name.startswith('__'): + continue + if '.' in name: + submodules.add(name) + else: + names.add(name) + # Make sure all frozen submodules have a known parent. + for name in list(submodules): + if name.partition('.')[0] in names: + submodules.remove(name) + if submodules: + raise Exception(f'unexpected frozen submodules: {sorted(submodules)}') + + +def list_modules(): + names = set(sys.builtin_module_names) + list_modules_setup_extensions(names) + list_packages(names) + list_python_modules(names) + list_frozen(names) + + # Remove ignored packages and modules + for name in list(names): + package_name = name.split('.')[0] + # package_name can be equal to name + if package_name in IGNORE: + names.discard(name) + + for name in names: + if "." in name: + raise Exception("sub-modules must not be listed") + + return names + + +def write_modules(fp, names): + print(f"// Auto-generated by {SCRIPT_NAME}.", + file=fp) + print("// List used to create sys.stdlib_module_names.", file=fp) + print(file=fp) + print("static const char* _Py_stdlib_module_names[] = {", file=fp) + for name in sorted(names): + print(f'"{name}",', file=fp) + print("};", file=fp) + + +def main(): + if not sysconfig.is_python_build(): + print(f"ERROR: {sys.executable} is not a Python build", + file=sys.stderr) + sys.exit(1) + + fp = sys.stdout + names = list_modules() + write_modules(fp, names) + + +if __name__ == "__main__": + main() diff --git a/Tools/build/generate_token.py b/Tools/build/generate_token.py new file mode 100755 index 0000000..fc12835 --- /dev/null +++ b/Tools/build/generate_token.py @@ -0,0 +1,282 @@ +#! /usr/bin/env python3 +# This script generates token related files from Grammar/Tokens: +# +# Doc/library/token-list.inc +# Include/token.h +# Parser/token.c +# Lib/token.py + + +SCRIPT_NAME = 'Tools/build/generate_token.py' +AUTO_GENERATED_BY_SCRIPT = f'Auto-generated by {SCRIPT_NAME}' +NT_OFFSET = 256 + +def load_tokens(path): + tok_names = [] + string_to_tok = {} + ERRORTOKEN = None + with open(path) as fp: + for line in fp: + line = line.strip() + # strip comments + i = line.find('#') + if i >= 0: + line = line[:i].strip() + if not line: + continue + fields = line.split() + name = fields[0] + value = len(tok_names) + if name == 'ERRORTOKEN': + ERRORTOKEN = value + string = fields[1] if len(fields) > 1 else None + if string: + string = eval(string) + string_to_tok[string] = value + tok_names.append(name) + return tok_names, ERRORTOKEN, string_to_tok + + +def update_file(file, content): + try: + with open(file, 'r') as fobj: + if fobj.read() == content: + return False + except (OSError, ValueError): + pass + with open(file, 'w') as fobj: + fobj.write(content) + return True + + +token_h_template = f"""\ +/* {AUTO_GENERATED_BY_SCRIPT} */ +""" +token_h_template += """\ + +/* Token types */ +#ifndef Py_INTERNAL_TOKEN_H +#define Py_INTERNAL_TOKEN_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ + +%s\ +#define N_TOKENS %d +#define NT_OFFSET %d + +/* Special definitions for cooperation with parser */ + +#define ISTERMINAL(x) ((x) < NT_OFFSET) +#define ISNONTERMINAL(x) ((x) >= NT_OFFSET) +#define ISEOF(x) ((x) == ENDMARKER) +#define ISWHITESPACE(x) ((x) == ENDMARKER || \\ + (x) == NEWLINE || \\ + (x) == INDENT || \\ + (x) == DEDENT) + + +// Symbols exported for test_peg_generator +PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ +PyAPI_FUNC(int) _PyToken_OneChar(int); +PyAPI_FUNC(int) _PyToken_TwoChars(int, int); +PyAPI_FUNC(int) _PyToken_ThreeChars(int, int, int); + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_TOKEN_H +""" + +def make_h(infile, outfile='Include/internal/pycore_token.h'): + tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) + + defines = [] + for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): + defines.append("#define %-15s %d\n" % (name, value)) + + if update_file(outfile, token_h_template % ( + ''.join(defines), + len(tok_names), + NT_OFFSET + )): + print("%s regenerated from %s" % (outfile, infile)) + + +token_c_template = f"""\ +/* {AUTO_GENERATED_BY_SCRIPT} */ +""" +token_c_template += """\ + +#include "Python.h" +#include "pycore_token.h" + +/* Token names */ + +const char * const _PyParser_TokenNames[] = { +%s\ +}; + +/* Return the token corresponding to a single character */ + +int +_PyToken_OneChar(int c1) +{ +%s\ + return OP; +} + +int +_PyToken_TwoChars(int c1, int c2) +{ +%s\ + return OP; +} + +int +_PyToken_ThreeChars(int c1, int c2, int c3) +{ +%s\ + return OP; +} +""" + +def generate_chars_to_token(mapping, n=1): + result = [] + write = result.append + indent = ' ' * n + write(indent) + write('switch (c%d) {\n' % (n,)) + for c in sorted(mapping): + write(indent) + value = mapping[c] + if isinstance(value, dict): + write("case '%s':\n" % (c,)) + write(generate_chars_to_token(value, n + 1)) + write(indent) + write(' break;\n') + else: + write("case '%s': return %s;\n" % (c, value)) + write(indent) + write('}\n') + return ''.join(result) + +def make_c(infile, outfile='Parser/token.c'): + tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) + string_to_tok['<>'] = string_to_tok['!='] + chars_to_token = {} + for string, value in string_to_tok.items(): + assert 1 <= len(string) <= 3 + name = tok_names[value] + m = chars_to_token.setdefault(len(string), {}) + for c in string[:-1]: + m = m.setdefault(c, {}) + m[string[-1]] = name + + names = [] + for value, name in enumerate(tok_names): + if value >= ERRORTOKEN: + name = '<%s>' % name + names.append(' "%s",\n' % name) + names.append(' "",\n') + + if update_file(outfile, token_c_template % ( + ''.join(names), + generate_chars_to_token(chars_to_token[1]), + generate_chars_to_token(chars_to_token[2]), + generate_chars_to_token(chars_to_token[3]) + )): + print("%s regenerated from %s" % (outfile, infile)) + + +token_inc_template = f"""\ +.. {AUTO_GENERATED_BY_SCRIPT} +%s +.. data:: N_TOKENS + +.. data:: NT_OFFSET +""" + +def make_rst(infile, outfile='Doc/library/token-list.inc'): + tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) + tok_to_string = {value: s for s, value in string_to_tok.items()} + + names = [] + for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): + names.append('.. data:: %s' % (name,)) + if value in tok_to_string: + names.append('') + names.append(' Token value for ``"%s"``.' % tok_to_string[value]) + names.append('') + + if update_file(outfile, token_inc_template % '\n'.join(names)): + print("%s regenerated from %s" % (outfile, infile)) + + +token_py_template = f'''\ +"""Token constants.""" +# {AUTO_GENERATED_BY_SCRIPT} +''' +token_py_template += ''' +__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] + +%s +N_TOKENS = %d +# Special definitions for cooperation with parser +NT_OFFSET = %d + +tok_name = {value: name + for name, value in globals().items() + if isinstance(value, int) and not name.startswith('_')} +__all__.extend(tok_name.values()) + +EXACT_TOKEN_TYPES = { +%s +} + +def ISTERMINAL(x): + return x < NT_OFFSET + +def ISNONTERMINAL(x): + return x >= NT_OFFSET + +def ISEOF(x): + return x == ENDMARKER +''' + +def make_py(infile, outfile='Lib/token.py'): + tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) + + constants = [] + for value, name in enumerate(tok_names): + constants.append('%s = %d' % (name, value)) + constants.insert(ERRORTOKEN, + "# These aren't used by the C tokenizer but are needed for tokenize.py") + + token_types = [] + for s, value in sorted(string_to_tok.items()): + token_types.append(' %r: %s,' % (s, tok_names[value])) + + if update_file(outfile, token_py_template % ( + '\n'.join(constants), + len(tok_names), + NT_OFFSET, + '\n'.join(token_types), + )): + print("%s regenerated from %s" % (outfile, infile)) + + +def main(op, infile='Grammar/Tokens', *args): + make = globals()['make_' + op] + make(infile, *args) + + +if __name__ == '__main__': + import sys + main(*sys.argv[1:]) diff --git a/Tools/build/parse_html5_entities.py b/Tools/build/parse_html5_entities.py new file mode 100755 index 0000000..d2bf290 --- /dev/null +++ b/Tools/build/parse_html5_entities.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +""" +Utility for parsing HTML5 entity definitions available from: + + https://html.spec.whatwg.org/entities.json + https://html.spec.whatwg.org/multipage/named-characters.html + +The page now contains the following note: + + "This list is static and will not be expanded or changed in the future." + +Written by Ezio Melotti and Iuliia Proskurnia. +""" + +import os +import sys +import json +from urllib.request import urlopen +from html.entities import html5 + +SCRIPT_NAME = 'Tools/build/parse_html5_entities.py' +PAGE_URL = 'https://html.spec.whatwg.org/multipage/named-characters.html' +ENTITIES_URL = 'https://html.spec.whatwg.org/entities.json' +HTML5_SECTION_START = '# HTML5 named character references' + +def get_json(url): + """Download the json file from the url and returns a decoded object.""" + with urlopen(url) as f: + data = f.read().decode('utf-8') + return json.loads(data) + +def create_dict(entities): + """Create the html5 dict from the decoded json object.""" + new_html5 = {} + for name, value in entities.items(): + new_html5[name.lstrip('&')] = value['characters'] + return new_html5 + +def compare_dicts(old, new): + """Compare the old and new dicts and print the differences.""" + added = new.keys() - old.keys() + if added: + print('{} entitie(s) have been added:'.format(len(added))) + for name in sorted(added): + print(' {!r}: {!r}'.format(name, new[name])) + removed = old.keys() - new.keys() + if removed: + print('{} entitie(s) have been removed:'.format(len(removed))) + for name in sorted(removed): + print(' {!r}: {!r}'.format(name, old[name])) + changed = set() + for name in (old.keys() & new.keys()): + if old[name] != new[name]: + changed.add((name, old[name], new[name])) + if changed: + print('{} entitie(s) have been modified:'.format(len(changed))) + for item in sorted(changed): + print(' {!r}: {!r} -> {!r}'.format(*item)) + +def write_items(entities, file=sys.stdout): + """Write the items of the dictionary in the specified file.""" + # The keys in the generated dictionary should be sorted + # in a case-insensitive way, however, when two keys are equal, + # the uppercase version should come first so that the result + # looks like: ['Aacute', 'aacute', 'Aacute;', 'aacute;', ...] + # To do this we first sort in a case-sensitive way (so all the + # uppercase chars come first) and then sort with key=str.lower. + # Since the sorting is stable the uppercase keys will eventually + # be before their equivalent lowercase version. + keys = sorted(entities.keys()) + keys = sorted(keys, key=str.lower) + print(HTML5_SECTION_START, file=file) + print(f'# Generated by {SCRIPT_NAME}\n' + f'# from {ENTITIES_URL} and\n' + f'# {PAGE_URL}.\n' + f'# Map HTML5 named character references to the ' + f'equivalent Unicode character(s).', file=file) + print('html5 = {', file=file) + for name in keys: + print(f' {name!r}: {entities[name]!a},', file=file) + print('}', file=file) + + +if __name__ == '__main__': + # without args print a diff between html.entities.html5 and new_html5 + # with --create print the new html5 dict + # with --patch patch the Lib/html/entities.py file + new_html5 = create_dict(get_json(ENTITIES_URL)) + if '--create' in sys.argv: + write_items(new_html5) + elif '--patch' in sys.argv: + fname = 'Lib/html/entities.py' + temp_fname = fname + '.temp' + with open(fname) as f1, open(temp_fname, 'w') as f2: + skip = False + for line in f1: + if line.startswith(HTML5_SECTION_START): + write_items(new_html5, file=f2) + skip = True + continue + if skip: + # skip the old items until the } + if line.startswith('}'): + skip = False + continue + f2.write(line) + os.remove(fname) + os.rename(temp_fname, fname) + else: + if html5 == new_html5: + print('The current dictionary is updated.') + else: + compare_dicts(html5, new_html5) + print('Run "./python {0} --patch" to update Lib/html/entities.html ' + 'or "./python {0} --create" to see the generated ' 'dictionary.'.format(__file__)) diff --git a/Tools/build/smelly.py b/Tools/build/smelly.py new file mode 100755 index 0000000..276a5ab --- /dev/null +++ b/Tools/build/smelly.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python +# Script checking that all symbols exported by libpython start with Py or _Py + +import os.path +import subprocess +import sys +import sysconfig + + +ALLOWED_PREFIXES = ('Py', '_Py') +if sys.platform == 'darwin': + ALLOWED_PREFIXES += ('__Py',) + +IGNORED_EXTENSION = "_ctypes_test" +# Ignore constructor and destructor functions +IGNORED_SYMBOLS = {'_init', '_fini'} + + +def is_local_symbol_type(symtype): + # Ignore local symbols. + + # If lowercase, the symbol is usually local; if uppercase, the symbol + # is global (external). There are however a few lowercase symbols that + # are shown for special global symbols ("u", "v" and "w"). + if symtype.islower() and symtype not in "uvw": + return True + + # Ignore the initialized data section (d and D) and the BSS data + # section. For example, ignore "__bss_start (type: B)" + # and "_edata (type: D)". + if symtype in "bBdD": + return True + + return False + + +def get_exported_symbols(library, dynamic=False): + print(f"Check that {library} only exports symbols starting with Py or _Py") + + # Only look at dynamic symbols + args = ['nm', '--no-sort'] + if dynamic: + args.append('--dynamic') + args.append(library) + print("+ %s" % ' '.join(args)) + proc = subprocess.run(args, stdout=subprocess.PIPE, universal_newlines=True) + if proc.returncode: + sys.stdout.write(proc.stdout) + sys.exit(proc.returncode) + + stdout = proc.stdout.rstrip() + if not stdout: + raise Exception("command output is empty") + return stdout + + +def get_smelly_symbols(stdout): + smelly_symbols = [] + python_symbols = [] + local_symbols = [] + + for line in stdout.splitlines(): + # Split line '0000000000001b80 D PyTextIOWrapper_Type' + if not line: + continue + + parts = line.split(maxsplit=2) + if len(parts) < 3: + continue + + symtype = parts[1].strip() + symbol = parts[-1] + result = '%s (type: %s)' % (symbol, symtype) + + if symbol.startswith(ALLOWED_PREFIXES): + python_symbols.append(result) + continue + + if is_local_symbol_type(symtype): + local_symbols.append(result) + elif symbol in IGNORED_SYMBOLS: + local_symbols.append(result) + else: + smelly_symbols.append(result) + + if local_symbols: + print(f"Ignore {len(local_symbols)} local symbols") + return smelly_symbols, python_symbols + + +def check_library(library, dynamic=False): + nm_output = get_exported_symbols(library, dynamic) + smelly_symbols, python_symbols = get_smelly_symbols(nm_output) + + if not smelly_symbols: + print(f"OK: no smelly symbol found ({len(python_symbols)} Python symbols)") + return 0 + + print() + smelly_symbols.sort() + for symbol in smelly_symbols: + print("Smelly symbol: %s" % symbol) + + print() + print("ERROR: Found %s smelly symbols!" % len(smelly_symbols)) + return len(smelly_symbols) + + +def check_extensions(): + print(__file__) + # This assumes pybuilddir.txt is in same directory as pyconfig.h. + # In the case of out-of-tree builds, we can't assume pybuilddir.txt is + # in the source folder. + config_dir = os.path.dirname(sysconfig.get_config_h_filename()) + filename = os.path.join(config_dir, "pybuilddir.txt") + try: + with open(filename, encoding="utf-8") as fp: + pybuilddir = fp.readline() + except FileNotFoundError: + print(f"Cannot check extensions because {filename} does not exist") + return True + + print(f"Check extension modules from {pybuilddir} directory") + builddir = os.path.join(config_dir, pybuilddir) + nsymbol = 0 + for name in os.listdir(builddir): + if not name.endswith(".so"): + continue + if IGNORED_EXTENSION in name: + print() + print(f"Ignore extension: {name}") + continue + + print() + filename = os.path.join(builddir, name) + nsymbol += check_library(filename, dynamic=True) + + return nsymbol + + +def main(): + nsymbol = 0 + + # static library + LIBRARY = sysconfig.get_config_var('LIBRARY') + if not LIBRARY: + raise Exception("failed to get LIBRARY variable from sysconfig") + if os.path.exists(LIBRARY): + nsymbol += check_library(LIBRARY) + + # dynamic library + LDLIBRARY = sysconfig.get_config_var('LDLIBRARY') + if not LDLIBRARY: + raise Exception("failed to get LDLIBRARY variable from sysconfig") + if LDLIBRARY != LIBRARY: + print() + nsymbol += check_library(LDLIBRARY, dynamic=True) + + # Check extension modules like _ssl.cpython-310d-x86_64-linux-gnu.so + nsymbol += check_extensions() + + if nsymbol: + print() + print(f"ERROR: Found {nsymbol} smelly symbols in total!") + sys.exit(1) + + print() + print(f"OK: all exported symbols of all libraries " + f"are prefixed with {' or '.join(map(repr, ALLOWED_PREFIXES))}") + + +if __name__ == "__main__": + main() diff --git a/Tools/build/stable_abi.py b/Tools/build/stable_abi.py new file mode 100644 index 0000000..88db93e --- /dev/null +++ b/Tools/build/stable_abi.py @@ -0,0 +1,757 @@ +"""Check the stable ABI manifest or generate files from it + +By default, the tool only checks existing files/libraries. +Pass --generate to recreate auto-generated files instead. + +For actions that take a FILENAME, the filename can be left out to use a default +(relative to the manifest file, as they appear in the CPython codebase). +""" + +from functools import partial +from pathlib import Path +import dataclasses +import subprocess +import sysconfig +import argparse +import textwrap +import tomllib +import difflib +import pprint +import sys +import os +import os.path +import io +import re +import csv + +SCRIPT_NAME = 'Tools/build/stable_abi.py' +MISSING = object() + +EXCLUDED_HEADERS = { + "bytes_methods.h", + "cellobject.h", + "classobject.h", + "code.h", + "compile.h", + "datetime.h", + "dtoa.h", + "frameobject.h", + "genobject.h", + "longintrepr.h", + "parsetok.h", + "pyatomic.h", + "pytime.h", + "token.h", + "ucnhash.h", +} +MACOS = (sys.platform == "darwin") +UNIXY = MACOS or (sys.platform == "linux") # XXX should this be "not Windows"? + + +# The stable ABI manifest (Misc/stable_abi.toml) exists only to fill the +# following dataclasses. +# Feel free to change its syntax (and the `parse_manifest` function) +# to better serve that purpose (while keeping it human-readable). + +class Manifest: + """Collection of `ABIItem`s forming the stable ABI/limited API.""" + def __init__(self): + self.contents = dict() + + def add(self, item): + if item.name in self.contents: + # We assume that stable ABI items do not share names, + # even if they're different kinds (e.g. function vs. macro). + raise ValueError(f'duplicate ABI item {item.name}') + self.contents[item.name] = item + + def select(self, kinds, *, include_abi_only=True, ifdef=None): + """Yield selected items of the manifest + + kinds: set of requested kinds, e.g. {'function', 'macro'} + include_abi_only: if True (default), include all items of the + stable ABI. + If False, include only items from the limited API + (i.e. items people should use today) + ifdef: set of feature macros (e.g. {'HAVE_FORK', 'MS_WINDOWS'}). + If None (default), items are not filtered by this. (This is + different from the empty set, which filters out all such + conditional items.) + """ + for name, item in sorted(self.contents.items()): + if item.kind not in kinds: + continue + if item.abi_only and not include_abi_only: + continue + if (ifdef is not None + and item.ifdef is not None + and item.ifdef not in ifdef): + continue + yield item + + def dump(self): + """Yield lines to recreate the manifest file (sans comments/newlines)""" + for item in self.contents.values(): + fields = dataclasses.fields(item) + yield f"[{item.kind}.{item.name}]" + for field in fields: + if field.name in {'name', 'value', 'kind'}: + continue + value = getattr(item, field.name) + if value == field.default: + pass + elif value is True: + yield f" {field.name} = true" + elif value: + yield f" {field.name} = {value!r}" + + +itemclasses = {} +def itemclass(kind): + """Register the decorated class in `itemclasses`""" + def decorator(cls): + itemclasses[kind] = cls + return cls + return decorator + +@itemclass('function') +@itemclass('macro') +@itemclass('data') +@itemclass('const') +@itemclass('typedef') +@dataclasses.dataclass +class ABIItem: + """Information on one item (function, macro, struct, etc.)""" + + name: str + kind: str + added: str = None + abi_only: bool = False + ifdef: str = None + +@itemclass('feature_macro') +@dataclasses.dataclass(kw_only=True) +class FeatureMacro(ABIItem): + name: str + doc: str + windows: bool = False + abi_only: bool = True + +@itemclass('struct') +@dataclasses.dataclass(kw_only=True) +class Struct(ABIItem): + struct_abi_kind: str + members: list = None + + +def parse_manifest(file): + """Parse the given file (iterable of lines) to a Manifest""" + + manifest = Manifest() + + data = tomllib.load(file) + + for kind, itemclass in itemclasses.items(): + for name, item_data in data[kind].items(): + try: + item = itemclass(name=name, kind=kind, **item_data) + manifest.add(item) + except BaseException as exc: + exc.add_note(f'in {kind} {name}') + raise + + return manifest + +# The tool can run individual "actions". +# Most actions are "generators", which generate a single file from the +# manifest. (Checking works by generating a temp file & comparing.) +# Other actions, like "--unixy-check", don't work on a single file. + +generators = [] +def generator(var_name, default_path): + """Decorates a file generator: function that writes to a file""" + def _decorator(func): + func.var_name = var_name + func.arg_name = '--' + var_name.replace('_', '-') + func.default_path = default_path + generators.append(func) + return func + return _decorator + + +@generator("python3dll", 'PC/python3dll.c') +def gen_python3dll(manifest, args, outfile): + """Generate/check the source for the Windows stable ABI library""" + write = partial(print, file=outfile) + content = f""" + /* Re-export stable Python ABI */ + + /* Generated by {SCRIPT_NAME} */ + """ + content += r""" + #ifdef _M_IX86 + #define DECORATE "_" + #else + #define DECORATE + #endif + + #define EXPORT_FUNC(name) \ + __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name)) + #define EXPORT_DATA(name) \ + __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name ",DATA")) + """ + write(textwrap.dedent(content)) + + def sort_key(item): + return item.name.lower() + + windows_feature_macros = { + item.name for item in manifest.select({'feature_macro'}) if item.windows + } + for item in sorted( + manifest.select( + {'function'}, + include_abi_only=True, + ifdef=windows_feature_macros), + key=sort_key): + write(f'EXPORT_FUNC({item.name})') + + write() + + for item in sorted( + manifest.select( + {'data'}, + include_abi_only=True, + ifdef=windows_feature_macros), + key=sort_key): + write(f'EXPORT_DATA({item.name})') + +REST_ROLES = { + 'function': 'function', + 'data': 'var', + 'struct': 'type', + 'macro': 'macro', + # 'const': 'const', # all undocumented + 'typedef': 'type', +} + +@generator("doc_list", 'Doc/data/stable_abi.dat') +def gen_doc_annotations(manifest, args, outfile): + """Generate/check the stable ABI list for documentation annotations""" + writer = csv.DictWriter( + outfile, + ['role', 'name', 'added', 'ifdef_note', 'struct_abi_kind'], + lineterminator='\n') + writer.writeheader() + for item in manifest.select(REST_ROLES.keys(), include_abi_only=False): + if item.ifdef: + ifdef_note = manifest.contents[item.ifdef].doc + else: + ifdef_note = None + row = { + 'role': REST_ROLES[item.kind], + 'name': item.name, + 'added': item.added, + 'ifdef_note': ifdef_note} + rows = [row] + if item.kind == 'struct': + row['struct_abi_kind'] = item.struct_abi_kind + for member_name in item.members or (): + rows.append({ + 'role': 'member', + 'name': f'{item.name}.{member_name}', + 'added': item.added}) + writer.writerows(rows) + +@generator("ctypes_test", 'Lib/test/test_stable_abi_ctypes.py') +def gen_ctypes_test(manifest, args, outfile): + """Generate/check the ctypes-based test for exported symbols""" + write = partial(print, file=outfile) + write(textwrap.dedent(''' + # Generated by Tools/scripts/stable_abi.py + + """Test that all symbols of the Stable ABI are accessible using ctypes + """ + + import sys + import unittest + from test.support.import_helper import import_module + from _testcapi import get_feature_macros + + feature_macros = get_feature_macros() + ctypes_test = import_module('ctypes') + + class TestStableABIAvailability(unittest.TestCase): + def test_available_symbols(self): + + for symbol_name in SYMBOL_NAMES: + with self.subTest(symbol_name): + ctypes_test.pythonapi[symbol_name] + + def test_feature_macros(self): + self.assertEqual( + set(get_feature_macros()), EXPECTED_FEATURE_MACROS) + + # The feature macros for Windows are used in creating the DLL + # definition, so they must be known on all platforms. + # If we are on Windows, we check that the hardcoded data matches + # the reality. + @unittest.skipIf(sys.platform != "win32", "Windows specific test") + def test_windows_feature_macros(self): + for name, value in WINDOWS_FEATURE_MACROS.items(): + if value != 'maybe': + with self.subTest(name): + self.assertEqual(feature_macros[name], value) + + SYMBOL_NAMES = ( + ''')) + items = manifest.select( + {'function', 'data'}, + include_abi_only=True, + ) + optional_items = {} + for item in items: + if item.name in ( + # Some symbols aren't exported on all platforms. + # This is a bug: https://bugs.python.org/issue44133 + 'PyModule_Create2', 'PyModule_FromDefAndSpec2', + ): + continue + if item.ifdef: + optional_items.setdefault(item.ifdef, []).append(item.name) + else: + write(f' "{item.name}",') + write(")") + for ifdef, names in optional_items.items(): + write(f"if feature_macros[{ifdef!r}]:") + write(f" SYMBOL_NAMES += (") + for name in names: + write(f" {name!r},") + write(" )") + write("") + feature_macros = list(manifest.select({'feature_macro'})) + feature_names = sorted(m.name for m in feature_macros) + write(f"EXPECTED_FEATURE_MACROS = set({pprint.pformat(feature_names)})") + + windows_feature_macros = {m.name: m.windows for m in feature_macros} + write(f"WINDOWS_FEATURE_MACROS = {pprint.pformat(windows_feature_macros)}") + + +@generator("testcapi_feature_macros", 'Modules/_testcapi_feature_macros.inc') +def gen_testcapi_feature_macros(manifest, args, outfile): + """Generate/check the stable ABI list for documentation annotations""" + write = partial(print, file=outfile) + write('// Generated by Tools/scripts/stable_abi.py') + write() + write('// Add an entry in dict `result` for each Stable ABI feature macro.') + write() + for macro in manifest.select({'feature_macro'}): + name = macro.name + write(f'#ifdef {name}') + write(f' res = PyDict_SetItemString(result, "{name}", Py_True);') + write('#else') + write(f' res = PyDict_SetItemString(result, "{name}", Py_False);') + write('#endif') + write('if (res) {') + write(' Py_DECREF(result); return NULL;') + write('}') + write() + + +def generate_or_check(manifest, args, path, func): + """Generate/check a file with a single generator + + Return True if successful; False if a comparison failed. + """ + + outfile = io.StringIO() + func(manifest, args, outfile) + generated = outfile.getvalue() + existing = path.read_text() + + if generated != existing: + if args.generate: + path.write_text(generated) + else: + print(f'File {path} differs from expected!') + diff = difflib.unified_diff( + generated.splitlines(), existing.splitlines(), + str(path), '', + lineterm='', + ) + for line in diff: + print(line) + return False + return True + + +def do_unixy_check(manifest, args): + """Check headers & library using "Unixy" tools (GCC/clang, binutils)""" + okay = True + + # Get all macros first: we'll need feature macros like HAVE_FORK and + # MS_WINDOWS for everything else + present_macros = gcc_get_limited_api_macros(['Include/Python.h']) + feature_macros = set(m.name for m in manifest.select({'feature_macro'})) + feature_macros &= present_macros + + # Check that we have all needed macros + expected_macros = set( + item.name for item in manifest.select({'macro'}) + ) + missing_macros = expected_macros - present_macros + okay &= _report_unexpected_items( + missing_macros, + 'Some macros from are not defined from "Include/Python.h"' + + 'with Py_LIMITED_API:') + + expected_symbols = set(item.name for item in manifest.select( + {'function', 'data'}, include_abi_only=True, ifdef=feature_macros, + )) + + # Check the static library (*.a) + LIBRARY = sysconfig.get_config_var("LIBRARY") + if not LIBRARY: + raise Exception("failed to get LIBRARY variable from sysconfig") + if os.path.exists(LIBRARY): + okay &= binutils_check_library( + manifest, LIBRARY, expected_symbols, dynamic=False) + + # Check the dynamic library (*.so) + LDLIBRARY = sysconfig.get_config_var("LDLIBRARY") + if not LDLIBRARY: + raise Exception("failed to get LDLIBRARY variable from sysconfig") + okay &= binutils_check_library( + manifest, LDLIBRARY, expected_symbols, dynamic=False) + + # Check definitions in the header files + expected_defs = set(item.name for item in manifest.select( + {'function', 'data'}, include_abi_only=False, ifdef=feature_macros, + )) + found_defs = gcc_get_limited_api_definitions(['Include/Python.h']) + missing_defs = expected_defs - found_defs + okay &= _report_unexpected_items( + missing_defs, + 'Some expected declarations were not declared in ' + + '"Include/Python.h" with Py_LIMITED_API:') + + # Some Limited API macros are defined in terms of private symbols. + # These are not part of Limited API (even though they're defined with + # Py_LIMITED_API). They must be part of the Stable ABI, though. + private_symbols = {n for n in expected_symbols if n.startswith('_')} + extra_defs = found_defs - expected_defs - private_symbols + okay &= _report_unexpected_items( + extra_defs, + 'Some extra declarations were found in "Include/Python.h" ' + + 'with Py_LIMITED_API:') + + return okay + + +def _report_unexpected_items(items, msg): + """If there are any `items`, report them using "msg" and return false""" + if items: + print(msg, file=sys.stderr) + for item in sorted(items): + print(' -', item, file=sys.stderr) + return False + return True + + +def binutils_get_exported_symbols(library, dynamic=False): + """Retrieve exported symbols using the nm(1) tool from binutils""" + # Only look at dynamic symbols + args = ["nm", "--no-sort"] + if dynamic: + args.append("--dynamic") + args.append(library) + proc = subprocess.run(args, stdout=subprocess.PIPE, universal_newlines=True) + if proc.returncode: + sys.stdout.write(proc.stdout) + sys.exit(proc.returncode) + + stdout = proc.stdout.rstrip() + if not stdout: + raise Exception("command output is empty") + + for line in stdout.splitlines(): + # Split line '0000000000001b80 D PyTextIOWrapper_Type' + if not line: + continue + + parts = line.split(maxsplit=2) + if len(parts) < 3: + continue + + symbol = parts[-1] + if MACOS and symbol.startswith("_"): + yield symbol[1:] + else: + yield symbol + + +def binutils_check_library(manifest, library, expected_symbols, dynamic): + """Check that library exports all expected_symbols""" + available_symbols = set(binutils_get_exported_symbols(library, dynamic)) + missing_symbols = expected_symbols - available_symbols + if missing_symbols: + print(textwrap.dedent(f"""\ + Some symbols from the limited API are missing from {library}: + {', '.join(missing_symbols)} + + This error means that there are some missing symbols among the + ones exported in the library. + This normally means that some symbol, function implementation or + a prototype belonging to a symbol in the limited API has been + deleted or is missing. + """), file=sys.stderr) + return False + return True + + +def gcc_get_limited_api_macros(headers): + """Get all limited API macros from headers. + + Runs the preprocessor over all the header files in "Include" setting + "-DPy_LIMITED_API" to the correct value for the running version of the + interpreter and extracting all macro definitions (via adding -dM to the + compiler arguments). + + Requires Python built with a GCC-compatible compiler. (clang might work) + """ + + api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16 + + preprocesor_output_with_macros = subprocess.check_output( + sysconfig.get_config_var("CC").split() + + [ + # Prevent the expansion of the exported macros so we can + # capture them later + "-DSIZEOF_WCHAR_T=4", # The actual value is not important + f"-DPy_LIMITED_API={api_hexversion}", + "-I.", + "-I./Include", + "-dM", + "-E", + ] + + [str(file) for file in headers], + text=True, + ) + + return { + target + for target in re.findall( + r"#define (\w+)", preprocesor_output_with_macros + ) + } + + +def gcc_get_limited_api_definitions(headers): + """Get all limited API definitions from headers. + + Run the preprocessor over all the header files in "Include" setting + "-DPy_LIMITED_API" to the correct value for the running version of the + interpreter. + + The limited API symbols will be extracted from the output of this command + as it includes the prototypes and definitions of all the exported symbols + that are in the limited api. + + This function does *NOT* extract the macros defined on the limited API + + Requires Python built with a GCC-compatible compiler. (clang might work) + """ + api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16 + preprocesor_output = subprocess.check_output( + sysconfig.get_config_var("CC").split() + + [ + # Prevent the expansion of the exported macros so we can capture + # them later + "-DPyAPI_FUNC=__PyAPI_FUNC", + "-DPyAPI_DATA=__PyAPI_DATA", + "-DEXPORT_DATA=__EXPORT_DATA", + "-D_Py_NO_RETURN=", + "-DSIZEOF_WCHAR_T=4", # The actual value is not important + f"-DPy_LIMITED_API={api_hexversion}", + "-I.", + "-I./Include", + "-E", + ] + + [str(file) for file in headers], + text=True, + stderr=subprocess.DEVNULL, + ) + stable_functions = set( + re.findall(r"__PyAPI_FUNC\(.*?\)\s*(.*?)\s*\(", preprocesor_output) + ) + stable_exported_data = set( + re.findall(r"__EXPORT_DATA\((.*?)\)", preprocesor_output) + ) + stable_data = set( + re.findall(r"__PyAPI_DATA\(.*?\)[\s\*\(]*([^);]*)\)?.*;", preprocesor_output) + ) + return stable_data | stable_exported_data | stable_functions + +def check_private_names(manifest): + """Ensure limited API doesn't contain private names + + Names prefixed by an underscore are private by definition. + """ + for name, item in manifest.contents.items(): + if name.startswith('_') and not item.abi_only: + raise ValueError( + f'`{name}` is private (underscore-prefixed) and should be ' + + 'removed from the stable ABI list or or marked `abi_only`') + +def check_dump(manifest, filename): + """Check that manifest.dump() corresponds to the data. + + Mainly useful when debugging this script. + """ + dumped = tomllib.loads('\n'.join(manifest.dump())) + with filename.open('rb') as file: + from_file = tomllib.load(file) + if dumped != from_file: + print(f'Dump differs from loaded data!', file=sys.stderr) + diff = difflib.unified_diff( + pprint.pformat(dumped).splitlines(), + pprint.pformat(from_file).splitlines(), + '', str(filename), + lineterm='', + ) + for line in diff: + print(line, file=sys.stderr) + return False + else: + return True + +def main(): + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "file", type=Path, metavar='FILE', + help="file with the stable abi manifest", + ) + parser.add_argument( + "--generate", action='store_true', + help="generate file(s), rather than just checking them", + ) + parser.add_argument( + "--generate-all", action='store_true', + help="as --generate, but generate all file(s) using default filenames." + + " (unlike --all, does not run any extra checks)", + ) + parser.add_argument( + "-a", "--all", action='store_true', + help="run all available checks using default filenames", + ) + parser.add_argument( + "-l", "--list", action='store_true', + help="list available generators and their default filenames; then exit", + ) + parser.add_argument( + "--dump", action='store_true', + help="dump the manifest contents (used for debugging the parser)", + ) + + actions_group = parser.add_argument_group('actions') + for gen in generators: + actions_group.add_argument( + gen.arg_name, dest=gen.var_name, + type=str, nargs="?", default=MISSING, + metavar='FILENAME', + help=gen.__doc__, + ) + actions_group.add_argument( + '--unixy-check', action='store_true', + help=do_unixy_check.__doc__, + ) + args = parser.parse_args() + + base_path = args.file.parent.parent + + if args.list: + for gen in generators: + print(f'{gen.arg_name}: {base_path / gen.default_path}') + sys.exit(0) + + run_all_generators = args.generate_all + + if args.generate_all: + args.generate = True + + if args.all: + run_all_generators = True + args.unixy_check = True + + try: + file = args.file.open('rb') + except FileNotFoundError as err: + if args.file.suffix == '.txt': + # Provide a better error message + suggestion = args.file.with_suffix('.toml') + raise FileNotFoundError( + f'{args.file} not found. Did you mean {suggestion} ?') from err + raise + with file: + manifest = parse_manifest(file) + + check_private_names(manifest) + + # Remember results of all actions (as booleans). + # At the end we'll check that at least one action was run, + # and also fail if any are false. + results = {} + + if args.dump: + for line in manifest.dump(): + print(line) + results['dump'] = check_dump(manifest, args.file) + + for gen in generators: + filename = getattr(args, gen.var_name) + if filename is None or (run_all_generators and filename is MISSING): + filename = base_path / gen.default_path + elif filename is MISSING: + continue + + results[gen.var_name] = generate_or_check(manifest, args, filename, gen) + + if args.unixy_check: + results['unixy_check'] = do_unixy_check(manifest, args) + + if not results: + if args.generate: + parser.error('No file specified. Use --help for usage.') + parser.error('No check specified. Use --help for usage.') + + failed_results = [name for name, result in results.items() if not result] + + if failed_results: + raise Exception(f""" + These checks related to the stable ABI did not succeed: + {', '.join(failed_results)} + + If you see diffs in the output, files derived from the stable + ABI manifest the were not regenerated. + Run `make regen-limited-abi` to fix this. + + Otherwise, see the error(s) above. + + The stable ABI manifest is at: {args.file} + Note that there is a process to follow when modifying it. + + You can read more about the limited API and its contracts at: + + https://docs.python.org/3/c-api/stable.html + + And in PEP 384: + + https://peps.python.org/pep-0384/ + """) + + +if __name__ == "__main__": + main() diff --git a/Tools/build/umarshal.py b/Tools/build/umarshal.py new file mode 100644 index 0000000..f61570c --- /dev/null +++ b/Tools/build/umarshal.py @@ -0,0 +1,325 @@ +# Implementat marshal.loads() in pure Python + +import ast + +from typing import Any, Tuple + + +class Type: + # Adapted from marshal.c + NULL = ord('0') + NONE = ord('N') + FALSE = ord('F') + TRUE = ord('T') + STOPITER = ord('S') + ELLIPSIS = ord('.') + INT = ord('i') + INT64 = ord('I') + FLOAT = ord('f') + BINARY_FLOAT = ord('g') + COMPLEX = ord('x') + BINARY_COMPLEX = ord('y') + LONG = ord('l') + STRING = ord('s') + INTERNED = ord('t') + REF = ord('r') + TUPLE = ord('(') + LIST = ord('[') + DICT = ord('{') + CODE = ord('c') + UNICODE = ord('u') + UNKNOWN = ord('?') + SET = ord('<') + FROZENSET = ord('>') + ASCII = ord('a') + ASCII_INTERNED = ord('A') + SMALL_TUPLE = ord(')') + SHORT_ASCII = ord('z') + SHORT_ASCII_INTERNED = ord('Z') + + +FLAG_REF = 0x80 # with a type, add obj to index + +NULL = object() # marker + +# Cell kinds +CO_FAST_LOCAL = 0x20 +CO_FAST_CELL = 0x40 +CO_FAST_FREE = 0x80 + + +class Code: + def __init__(self, **kwds: Any): + self.__dict__.update(kwds) + + def __repr__(self) -> str: + return f"Code(**{self.__dict__})" + + co_localsplusnames: Tuple[str] + co_localspluskinds: Tuple[int] + + def get_localsplus_names(self, select_kind: int) -> Tuple[str, ...]: + varnames: list[str] = [] + for name, kind in zip(self.co_localsplusnames, + self.co_localspluskinds): + if kind & select_kind: + varnames.append(name) + return tuple(varnames) + + @property + def co_varnames(self) -> Tuple[str, ...]: + return self.get_localsplus_names(CO_FAST_LOCAL) + + @property + def co_cellvars(self) -> Tuple[str, ...]: + return self.get_localsplus_names(CO_FAST_CELL) + + @property + def co_freevars(self) -> Tuple[str, ...]: + return self.get_localsplus_names(CO_FAST_FREE) + + @property + def co_nlocals(self) -> int: + return len(self.co_varnames) + + +class Reader: + # A fairly literal translation of the marshal reader. + + def __init__(self, data: bytes): + self.data: bytes = data + self.end: int = len(self.data) + self.pos: int = 0 + self.refs: list[Any] = [] + self.level: int = 0 + + def r_string(self, n: int) -> bytes: + assert 0 <= n <= self.end - self.pos + buf = self.data[self.pos : self.pos + n] + self.pos += n + return buf + + def r_byte(self) -> int: + buf = self.r_string(1) + return buf[0] + + def r_short(self) -> int: + buf = self.r_string(2) + x = buf[0] + x |= buf[1] << 8 + x |= -(x & (1<<15)) # Sign-extend + return x + + def r_long(self) -> int: + buf = self.r_string(4) + x = buf[0] + x |= buf[1] << 8 + x |= buf[2] << 16 + x |= buf[3] << 24 + x |= -(x & (1<<31)) # Sign-extend + return x + + def r_long64(self) -> int: + buf = self.r_string(8) + x = buf[0] + x |= buf[1] << 8 + x |= buf[2] << 16 + x |= buf[3] << 24 + x |= buf[1] << 32 + x |= buf[1] << 40 + x |= buf[1] << 48 + x |= buf[1] << 56 + x |= -(x & (1<<63)) # Sign-extend + return x + + def r_PyLong(self) -> int: + n = self.r_long() + size = abs(n) + x = 0 + # Pray this is right + for i in range(size): + x |= self.r_short() << i*15 + if n < 0: + x = -x + return x + + def r_float_bin(self) -> float: + buf = self.r_string(8) + import struct # Lazy import to avoid breaking UNIX build + return struct.unpack("d", buf)[0] + + def r_float_str(self) -> float: + n = self.r_byte() + buf = self.r_string(n) + return ast.literal_eval(buf.decode("ascii")) + + def r_ref_reserve(self, flag: int) -> int: + if flag: + idx = len(self.refs) + self.refs.append(None) + return idx + else: + return 0 + + def r_ref_insert(self, obj: Any, idx: int, flag: int) -> Any: + if flag: + self.refs[idx] = obj + return obj + + def r_ref(self, obj: Any, flag: int) -> Any: + assert flag & FLAG_REF + self.refs.append(obj) + return obj + + def r_object(self) -> Any: + old_level = self.level + try: + return self._r_object() + finally: + self.level = old_level + + def _r_object(self) -> Any: + code = self.r_byte() + flag = code & FLAG_REF + type = code & ~FLAG_REF + # print(" "*self.level + f"{code} {flag} {type} {chr(type)!r}") + self.level += 1 + + def R_REF(obj: Any) -> Any: + if flag: + obj = self.r_ref(obj, flag) + return obj + + if type == Type.NULL: + return NULL + elif type == Type.NONE: + return None + elif type == Type.ELLIPSIS: + return Ellipsis + elif type == Type.FALSE: + return False + elif type == Type.TRUE: + return True + elif type == Type.INT: + return R_REF(self.r_long()) + elif type == Type.INT64: + return R_REF(self.r_long64()) + elif type == Type.LONG: + return R_REF(self.r_PyLong()) + elif type == Type.FLOAT: + return R_REF(self.r_float_str()) + elif type == Type.BINARY_FLOAT: + return R_REF(self.r_float_bin()) + elif type == Type.COMPLEX: + return R_REF(complex(self.r_float_str(), + self.r_float_str())) + elif type == Type.BINARY_COMPLEX: + return R_REF(complex(self.r_float_bin(), + self.r_float_bin())) + elif type == Type.STRING: + n = self.r_long() + return R_REF(self.r_string(n)) + elif type == Type.ASCII_INTERNED or type == Type.ASCII: + n = self.r_long() + return R_REF(self.r_string(n).decode("ascii")) + elif type == Type.SHORT_ASCII_INTERNED or type == Type.SHORT_ASCII: + n = self.r_byte() + return R_REF(self.r_string(n).decode("ascii")) + elif type == Type.INTERNED or type == Type.UNICODE: + n = self.r_long() + return R_REF(self.r_string(n).decode("utf8", "surrogatepass")) + elif type == Type.SMALL_TUPLE: + n = self.r_byte() + idx = self.r_ref_reserve(flag) + retval: Any = tuple(self.r_object() for _ in range(n)) + self.r_ref_insert(retval, idx, flag) + return retval + elif type == Type.TUPLE: + n = self.r_long() + idx = self.r_ref_reserve(flag) + retval = tuple(self.r_object() for _ in range(n)) + self.r_ref_insert(retval, idx, flag) + return retval + elif type == Type.LIST: + n = self.r_long() + retval = R_REF([]) + for _ in range(n): + retval.append(self.r_object()) + return retval + elif type == Type.DICT: + retval = R_REF({}) + while True: + key = self.r_object() + if key == NULL: + break + val = self.r_object() + retval[key] = val + return retval + elif type == Type.SET: + n = self.r_long() + retval = R_REF(set()) + for _ in range(n): + v = self.r_object() + retval.add(v) + return retval + elif type == Type.FROZENSET: + n = self.r_long() + s: set[Any] = set() + idx = self.r_ref_reserve(flag) + for _ in range(n): + v = self.r_object() + s.add(v) + retval = frozenset(s) + self.r_ref_insert(retval, idx, flag) + return retval + elif type == Type.CODE: + retval = R_REF(Code()) + retval.co_argcount = self.r_long() + retval.co_posonlyargcount = self.r_long() + retval.co_kwonlyargcount = self.r_long() + retval.co_stacksize = self.r_long() + retval.co_flags = self.r_long() + retval.co_code = self.r_object() + retval.co_consts = self.r_object() + retval.co_names = self.r_object() + retval.co_localsplusnames = self.r_object() + retval.co_localspluskinds = self.r_object() + retval.co_filename = self.r_object() + retval.co_name = self.r_object() + retval.co_qualname = self.r_object() + retval.co_firstlineno = self.r_long() + retval.co_linetable = self.r_object() + retval.co_exceptiontable = self.r_object() + return retval + elif type == Type.REF: + n = self.r_long() + retval = self.refs[n] + assert retval is not None + return retval + else: + breakpoint() + raise AssertionError(f"Unknown type {type} {chr(type)!r}") + + +def loads(data: bytes) -> Any: + assert isinstance(data, bytes) + r = Reader(data) + return r.r_object() + + +def main(): + # Test + import marshal, pprint + sample = {'foo': {(42, "bar", 3.14)}} + data = marshal.dumps(sample) + retval = loads(data) + assert retval == sample, retval + sample = main.__code__ + data = marshal.dumps(sample) + retval = loads(data) + assert isinstance(retval, Code), retval + pprint.pprint(retval.__dict__) + + +if __name__ == "__main__": + main() diff --git a/Tools/build/update_file.py b/Tools/build/update_file.py new file mode 100644 index 0000000..b4182c1 --- /dev/null +++ b/Tools/build/update_file.py @@ -0,0 +1,92 @@ +""" +A script that replaces an old file with a new one, only if the contents +actually changed. If not, the new file is simply deleted. + +This avoids wholesale rebuilds when a code (re)generation phase does not +actually change the in-tree generated code. +""" + +import contextlib +import os +import os.path +import sys + + +@contextlib.contextmanager +def updating_file_with_tmpfile(filename, tmpfile=None): + """A context manager for updating a file via a temp file. + + The context manager provides two open files: the source file open + for reading, and the temp file, open for writing. + + Upon exiting: both files are closed, and the source file is replaced + with the temp file. + """ + # XXX Optionally use tempfile.TemporaryFile? + if not tmpfile: + tmpfile = filename + '.tmp' + elif os.path.isdir(tmpfile): + tmpfile = os.path.join(tmpfile, filename + '.tmp') + + with open(filename, 'rb') as infile: + line = infile.readline() + + if line.endswith(b'\r\n'): + newline = "\r\n" + elif line.endswith(b'\r'): + newline = "\r" + elif line.endswith(b'\n'): + newline = "\n" + else: + raise ValueError(f"unknown end of line: {filename}: {line!a}") + + with open(tmpfile, 'w', newline=newline) as outfile: + with open(filename) as infile: + yield infile, outfile + update_file_with_tmpfile(filename, tmpfile) + + +def update_file_with_tmpfile(filename, tmpfile, *, create=False): + try: + targetfile = open(filename, 'rb') + except FileNotFoundError: + if not create: + raise # re-raise + outcome = 'created' + os.replace(tmpfile, filename) + else: + with targetfile: + old_contents = targetfile.read() + with open(tmpfile, 'rb') as f: + new_contents = f.read() + # Now compare! + if old_contents != new_contents: + outcome = 'updated' + os.replace(tmpfile, filename) + else: + outcome = 'same' + os.unlink(tmpfile) + return outcome + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--create', action='store_true') + parser.add_argument('--exitcode', action='store_true') + parser.add_argument('filename', help='path to be updated') + parser.add_argument('tmpfile', help='path with new contents') + args = parser.parse_args() + kwargs = vars(args) + setexitcode = kwargs.pop('exitcode') + + outcome = update_file_with_tmpfile(**kwargs) + if setexitcode: + if outcome == 'same': + sys.exit(0) + elif outcome == 'updated': + sys.exit(1) + elif outcome == 'created': + sys.exit(2) + else: + raise NotImplementedError diff --git a/Tools/build/verify_ensurepip_wheels.py b/Tools/build/verify_ensurepip_wheels.py new file mode 100755 index 0000000..044d1fd --- /dev/null +++ b/Tools/build/verify_ensurepip_wheels.py @@ -0,0 +1,98 @@ +#! /usr/bin/env python3 + +""" +Compare checksums for wheels in :mod:`ensurepip` against the Cheeseshop. + +When GitHub Actions executes the script, output is formatted accordingly. +https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-notice-message +""" + +import hashlib +import json +import os +import re +from pathlib import Path +from urllib.request import urlopen + +PACKAGE_NAMES = ("pip", "setuptools") +ENSURE_PIP_ROOT = Path(__file__).parent.parent.parent / "Lib/ensurepip" +WHEEL_DIR = ENSURE_PIP_ROOT / "_bundled" +ENSURE_PIP_INIT_PY_TEXT = (ENSURE_PIP_ROOT / "__init__.py").read_text(encoding="utf-8") +GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true" + + +def print_notice(file_path: str, message: str) -> None: + if GITHUB_ACTIONS: + message = f"::notice file={file_path}::{message}" + print(message, end="\n\n") + + +def print_error(file_path: str, message: str) -> None: + if GITHUB_ACTIONS: + message = f"::error file={file_path}::{message}" + print(message, end="\n\n") + + +def verify_wheel(package_name: str) -> bool: + # Find the package on disk + package_path = next(WHEEL_DIR.glob(f"{package_name}*.whl"), None) + if not package_path: + print_error("", f"Could not find a {package_name} wheel on disk.") + return False + + print(f"Verifying checksum for {package_path}.") + + # Find the version of the package used by ensurepip + package_version_match = re.search( + f'_{package_name.upper()}_VERSION = "([^"]+)', ENSURE_PIP_INIT_PY_TEXT + ) + if not package_version_match: + print_error( + package_path, + f"No {package_name} version found in Lib/ensurepip/__init__.py.", + ) + return False + package_version = package_version_match[1] + + # Get the SHA 256 digest from the Cheeseshop + try: + raw_text = urlopen(f"https://pypi.org/pypi/{package_name}/json").read() + except (OSError, ValueError): + print_error(package_path, f"Could not fetch JSON metadata for {package_name}.") + return False + + release_files = json.loads(raw_text)["releases"][package_version] + for release_info in release_files: + if package_path.name != release_info["filename"]: + continue + expected_digest = release_info["digests"].get("sha256", "") + break + else: + print_error(package_path, f"No digest for {package_name} found from PyPI.") + return False + + # Compute the SHA 256 digest of the wheel on disk + actual_digest = hashlib.sha256(package_path.read_bytes()).hexdigest() + + print(f"Expected digest: {expected_digest}") + print(f"Actual digest: {actual_digest}") + + if actual_digest != expected_digest: + print_error( + package_path, f"Failed to verify the checksum of the {package_name} wheel." + ) + return False + + print_notice( + package_path, + f"Successfully verified the checksum of the {package_name} wheel.", + ) + return True + + +if __name__ == "__main__": + exit_status = 0 + for package_name in PACKAGE_NAMES: + if not verify_wheel(package_name): + exit_status = 1 + raise SystemExit(exit_status) diff --git a/Tools/scripts/check_extension_modules.py b/Tools/scripts/check_extension_modules.py deleted file mode 100644 index 59239c6..0000000 --- a/Tools/scripts/check_extension_modules.py +++ /dev/null @@ -1,484 +0,0 @@ -"""Check extension modules - -The script checks shared and built-in extension modules. It verifies that the -modules have been built and that they can be imported successfully. Missing -modules and failed imports are reported to the user. Shared extension -files are renamed on failed import. - -Module information is parsed from several sources: - -- core modules hard-coded in Modules/config.c.in -- Windows-specific modules that are hard-coded in PC/config.c -- MODULE_{name}_STATE entries in Makefile (provided through sysconfig) -- Various makesetup files: - - $(srcdir)/Modules/Setup - - Modules/Setup.[local|bootstrap|stdlib] files, which are generated - from $(srcdir)/Modules/Setup.*.in files - -See --help for more information -""" -import argparse -import collections -import enum -import logging -import os -import pathlib -import re -import sys -import sysconfig -import warnings - -from importlib._bootstrap import _load as bootstrap_load -from importlib.machinery import BuiltinImporter, ExtensionFileLoader, ModuleSpec -from importlib.util import spec_from_file_location, spec_from_loader -from typing import Iterable - -SRC_DIR = pathlib.Path(__file__).parent.parent.parent - -# core modules, hard-coded in Modules/config.h.in -CORE_MODULES = { - "_ast", - "_imp", - "_string", - "_tokenize", - "_warnings", - "builtins", - "gc", - "marshal", - "sys", -} - -# Windows-only modules -WINDOWS_MODULES = { - "_msi", - "_overlapped", - "_testconsole", - "_winapi", - "msvcrt", - "nt", - "winreg", - "winsound", -} - - -logger = logging.getLogger(__name__) - -parser = argparse.ArgumentParser( - prog="check_extension_modules", - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter, -) - -parser.add_argument( - "--verbose", - action="store_true", - help="Verbose, report builtin, shared, and unavailable modules", -) - -parser.add_argument( - "--debug", - action="store_true", - help="Enable debug logging", -) - -parser.add_argument( - "--strict", - action=argparse.BooleanOptionalAction, - help=( - "Strict check, fail when a module is missing or fails to import" - "(default: no, unless env var PYTHONSTRICTEXTENSIONBUILD is set)" - ), - default=bool(os.environ.get("PYTHONSTRICTEXTENSIONBUILD")), -) - -parser.add_argument( - "--cross-compiling", - action=argparse.BooleanOptionalAction, - help=( - "Use cross-compiling checks " - "(default: no, unless env var _PYTHON_HOST_PLATFORM is set)." - ), - default="_PYTHON_HOST_PLATFORM" in os.environ, -) - -parser.add_argument( - "--list-module-names", - action="store_true", - help="Print a list of module names to stdout and exit", -) - - -class ModuleState(enum.Enum): - # Makefile state "yes" - BUILTIN = "builtin" - SHARED = "shared" - - DISABLED = "disabled" - MISSING = "missing" - NA = "n/a" - # disabled by Setup / makesetup rule - DISABLED_SETUP = "disabled_setup" - - def __bool__(self): - return self.value in {"builtin", "shared"} - - -ModuleInfo = collections.namedtuple("ModuleInfo", "name state") - - -class ModuleChecker: - pybuilddir_txt = "pybuilddir.txt" - - setup_files = ( - # see end of configure.ac - "Modules/Setup.local", - "Modules/Setup.stdlib", - "Modules/Setup.bootstrap", - SRC_DIR / "Modules/Setup", - ) - - def __init__(self, cross_compiling: bool = False, strict: bool = False): - self.cross_compiling = cross_compiling - self.strict_extensions_build = strict - self.ext_suffix = sysconfig.get_config_var("EXT_SUFFIX") - self.platform = sysconfig.get_platform() - self.builddir = self.get_builddir() - self.modules = self.get_modules() - - self.builtin_ok = [] - self.shared_ok = [] - self.failed_on_import = [] - self.missing = [] - self.disabled_configure = [] - self.disabled_setup = [] - self.notavailable = [] - - def check(self): - for modinfo in self.modules: - logger.debug("Checking '%s' (%s)", modinfo.name, self.get_location(modinfo)) - if modinfo.state == ModuleState.DISABLED: - self.disabled_configure.append(modinfo) - elif modinfo.state == ModuleState.DISABLED_SETUP: - self.disabled_setup.append(modinfo) - elif modinfo.state == ModuleState.MISSING: - self.missing.append(modinfo) - elif modinfo.state == ModuleState.NA: - self.notavailable.append(modinfo) - else: - try: - if self.cross_compiling: - self.check_module_cross(modinfo) - else: - self.check_module_import(modinfo) - except (ImportError, FileNotFoundError): - self.rename_module(modinfo) - self.failed_on_import.append(modinfo) - else: - if modinfo.state == ModuleState.BUILTIN: - self.builtin_ok.append(modinfo) - else: - assert modinfo.state == ModuleState.SHARED - self.shared_ok.append(modinfo) - - def summary(self, *, verbose: bool = False): - longest = max([len(e.name) for e in self.modules], default=0) - - def print_three_column(modinfos: list[ModuleInfo]): - names = [modinfo.name for modinfo in modinfos] - names.sort(key=str.lower) - # guarantee zip() doesn't drop anything - while len(names) % 3: - names.append("") - for l, m, r in zip(names[::3], names[1::3], names[2::3]): - print("%-*s %-*s %-*s" % (longest, l, longest, m, longest, r)) - - if verbose and self.builtin_ok: - print("The following *built-in* modules have been successfully built:") - print_three_column(self.builtin_ok) - print() - - if verbose and self.shared_ok: - print("The following *shared* modules have been successfully built:") - print_three_column(self.shared_ok) - print() - - if self.disabled_configure: - print("The following modules are *disabled* in configure script:") - print_three_column(self.disabled_configure) - print() - - if self.disabled_setup: - print("The following modules are *disabled* in Modules/Setup files:") - print_three_column(self.disabled_setup) - print() - - if verbose and self.notavailable: - print( - f"The following modules are not available on platform '{self.platform}':" - ) - print_three_column(self.notavailable) - print() - - if self.missing: - print("The necessary bits to build these optional modules were not found:") - print_three_column(self.missing) - print("To find the necessary bits, look in configure.ac and config.log.") - print() - - if self.failed_on_import: - print( - "Following modules built successfully " - "but were removed because they could not be imported:" - ) - print_three_column(self.failed_on_import) - print() - - if any( - modinfo.name == "_ssl" for modinfo in self.missing + self.failed_on_import - ): - print("Could not build the ssl module!") - print("Python requires a OpenSSL 1.1.1 or newer") - if sysconfig.get_config_var("OPENSSL_LDFLAGS"): - print("Custom linker flags may require --with-openssl-rpath=auto") - print() - - disabled = len(self.disabled_configure) + len(self.disabled_setup) - print( - f"Checked {len(self.modules)} modules (" - f"{len(self.builtin_ok)} built-in, " - f"{len(self.shared_ok)} shared, " - f"{len(self.notavailable)} n/a on {self.platform}, " - f"{disabled} disabled, " - f"{len(self.missing)} missing, " - f"{len(self.failed_on_import)} failed on import)" - ) - - def check_strict_build(self): - """Fail if modules are missing and it's a strict build""" - if self.strict_extensions_build and (self.failed_on_import or self.missing): - raise RuntimeError("Failed to build some stdlib modules") - - def list_module_names(self, *, all: bool = False) -> set: - names = {modinfo.name for modinfo in self.modules} - if all: - names.update(WINDOWS_MODULES) - return names - - def get_builddir(self) -> pathlib.Path: - try: - with open(self.pybuilddir_txt, encoding="utf-8") as f: - builddir = f.read() - except FileNotFoundError: - logger.error("%s must be run from the top build directory", __file__) - raise - builddir = pathlib.Path(builddir) - logger.debug("%s: %s", self.pybuilddir_txt, builddir) - return builddir - - def get_modules(self) -> list[ModuleInfo]: - """Get module info from sysconfig and Modules/Setup* files""" - seen = set() - modules = [] - # parsing order is important, first entry wins - for modinfo in self.get_core_modules(): - modules.append(modinfo) - seen.add(modinfo.name) - for setup_file in self.setup_files: - for modinfo in self.parse_setup_file(setup_file): - if modinfo.name not in seen: - modules.append(modinfo) - seen.add(modinfo.name) - for modinfo in self.get_sysconfig_modules(): - if modinfo.name not in seen: - modules.append(modinfo) - seen.add(modinfo.name) - logger.debug("Found %i modules in total", len(modules)) - modules.sort() - return modules - - def get_core_modules(self) -> Iterable[ModuleInfo]: - """Get hard-coded core modules""" - for name in CORE_MODULES: - modinfo = ModuleInfo(name, ModuleState.BUILTIN) - logger.debug("Found core module %s", modinfo) - yield modinfo - - def get_sysconfig_modules(self) -> Iterable[ModuleInfo]: - """Get modules defined in Makefile through sysconfig - - MODBUILT_NAMES: modules in *static* block - MODSHARED_NAMES: modules in *shared* block - MODDISABLED_NAMES: modules in *disabled* block - """ - moddisabled = set(sysconfig.get_config_var("MODDISABLED_NAMES").split()) - if self.cross_compiling: - modbuiltin = set(sysconfig.get_config_var("MODBUILT_NAMES").split()) - else: - modbuiltin = set(sys.builtin_module_names) - - for key, value in sysconfig.get_config_vars().items(): - if not key.startswith("MODULE_") or not key.endswith("_STATE"): - continue - if value not in {"yes", "disabled", "missing", "n/a"}: - raise ValueError(f"Unsupported value '{value}' for {key}") - - modname = key[7:-6].lower() - if modname in moddisabled: - # Setup "*disabled*" rule - state = ModuleState.DISABLED_SETUP - elif value in {"disabled", "missing", "n/a"}: - state = ModuleState(value) - elif modname in modbuiltin: - assert value == "yes" - state = ModuleState.BUILTIN - else: - assert value == "yes" - state = ModuleState.SHARED - - modinfo = ModuleInfo(modname, state) - logger.debug("Found %s in Makefile", modinfo) - yield modinfo - - def parse_setup_file(self, setup_file: pathlib.Path) -> Iterable[ModuleInfo]: - """Parse a Modules/Setup file""" - assign_var = re.compile(r"^\w+=") # EGG_SPAM=foo - # default to static module - state = ModuleState.BUILTIN - logger.debug("Parsing Setup file %s", setup_file) - with open(setup_file, encoding="utf-8") as f: - for line in f: - line = line.strip() - if not line or line.startswith("#") or assign_var.match(line): - continue - match line.split(): - case ["*shared*"]: - state = ModuleState.SHARED - case ["*static*"]: - state = ModuleState.BUILTIN - case ["*disabled*"]: - state = ModuleState.DISABLED - case ["*noconfig*"]: - state = None - case [*items]: - if state == ModuleState.DISABLED: - # *disabled* can disable multiple modules per line - for item in items: - modinfo = ModuleInfo(item, state) - logger.debug("Found %s in %s", modinfo, setup_file) - yield modinfo - elif state in {ModuleState.SHARED, ModuleState.BUILTIN}: - # *shared* and *static*, first item is the name of the module. - modinfo = ModuleInfo(items[0], state) - logger.debug("Found %s in %s", modinfo, setup_file) - yield modinfo - - def get_spec(self, modinfo: ModuleInfo) -> ModuleSpec: - """Get ModuleSpec for builtin or extension module""" - if modinfo.state == ModuleState.SHARED: - location = os.fspath(self.get_location(modinfo)) - loader = ExtensionFileLoader(modinfo.name, location) - return spec_from_file_location(modinfo.name, location, loader=loader) - elif modinfo.state == ModuleState.BUILTIN: - return spec_from_loader(modinfo.name, loader=BuiltinImporter) - else: - raise ValueError(modinfo) - - def get_location(self, modinfo: ModuleInfo) -> pathlib.Path: - """Get shared library location in build directory""" - if modinfo.state == ModuleState.SHARED: - return self.builddir / f"{modinfo.name}{self.ext_suffix}" - else: - return None - - def _check_file(self, modinfo: ModuleInfo, spec: ModuleSpec): - """Check that the module file is present and not empty""" - if spec.loader is BuiltinImporter: - return - try: - st = os.stat(spec.origin) - except FileNotFoundError: - logger.error("%s (%s) is missing", modinfo.name, spec.origin) - raise - if not st.st_size: - raise ImportError(f"{spec.origin} is an empty file") - - def check_module_import(self, modinfo: ModuleInfo): - """Attempt to import module and report errors""" - spec = self.get_spec(modinfo) - self._check_file(modinfo, spec) - try: - with warnings.catch_warnings(): - # ignore deprecation warning from deprecated modules - warnings.simplefilter("ignore", DeprecationWarning) - bootstrap_load(spec) - except ImportError as e: - logger.error("%s failed to import: %s", modinfo.name, e) - raise - except Exception as e: - logger.exception("Importing extension '%s' failed!", modinfo.name) - raise - - def check_module_cross(self, modinfo: ModuleInfo): - """Sanity check for cross compiling""" - spec = self.get_spec(modinfo) - self._check_file(modinfo, spec) - - def rename_module(self, modinfo: ModuleInfo) -> None: - """Rename module file""" - if modinfo.state == ModuleState.BUILTIN: - logger.error("Cannot mark builtin module '%s' as failed!", modinfo.name) - return - - failed_name = f"{modinfo.name}_failed{self.ext_suffix}" - builddir_path = self.get_location(modinfo) - if builddir_path.is_symlink(): - symlink = builddir_path - module_path = builddir_path.resolve().relative_to(os.getcwd()) - failed_path = module_path.parent / failed_name - else: - symlink = None - module_path = builddir_path - failed_path = self.builddir / failed_name - - # remove old failed file - failed_path.unlink(missing_ok=True) - # remove symlink - if symlink is not None: - symlink.unlink(missing_ok=True) - # rename shared extension file - try: - module_path.rename(failed_path) - except FileNotFoundError: - logger.debug("Shared extension file '%s' does not exist.", module_path) - else: - logger.debug("Rename '%s' -> '%s'", module_path, failed_path) - - -def main(): - args = parser.parse_args() - if args.debug: - args.verbose = True - logging.basicConfig( - level=logging.DEBUG if args.debug else logging.INFO, - format="[%(levelname)s] %(message)s", - ) - - checker = ModuleChecker( - cross_compiling=args.cross_compiling, - strict=args.strict, - ) - if args.list_module_names: - names = checker.list_module_names(all=True) - for name in sorted(names): - print(name) - else: - checker.check() - checker.summary(verbose=args.verbose) - try: - checker.check_strict_build() - except RuntimeError as e: - parser.exit(1, f"\nError: {e}\n") - - -if __name__ == "__main__": - main() diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py deleted file mode 100644 index 28ac2b1..0000000 --- a/Tools/scripts/deepfreeze.py +++ /dev/null @@ -1,504 +0,0 @@ -"""Deep freeze - -The script may be executed by _bootstrap_python interpreter. -Shared library extension modules are not available in that case. -On Windows, and in cross-compilation cases, it is executed -by Python 3.10, and 3.11 features are not available. -""" -import argparse -import ast -import builtins -import collections -import contextlib -import os -import re -import time -import types -from typing import Dict, FrozenSet, TextIO, Tuple - -import umarshal -from generate_global_objects import get_identifiers_and_strings - -verbose = False -identifiers, strings = get_identifiers_and_strings() - -# This must be kept in sync with opcode.py -RESUME = 151 - -def isprintable(b: bytes) -> bool: - return all(0x20 <= c < 0x7f for c in b) - - -def make_string_literal(b: bytes) -> str: - res = ['"'] - if isprintable(b): - res.append(b.decode("ascii").replace("\\", "\\\\").replace("\"", "\\\"")) - else: - for i in b: - res.append(f"\\x{i:02x}") - res.append('"') - return "".join(res) - - -CO_FAST_LOCAL = 0x20 -CO_FAST_CELL = 0x40 -CO_FAST_FREE = 0x80 - - -def get_localsplus(code: types.CodeType): - a = collections.defaultdict(int) - for name in code.co_varnames: - a[name] |= CO_FAST_LOCAL - for name in code.co_cellvars: - a[name] |= CO_FAST_CELL - for name in code.co_freevars: - a[name] |= CO_FAST_FREE - return tuple(a.keys()), bytes(a.values()) - - -def get_localsplus_counts(code: types.CodeType, - names: Tuple[str, ...], - kinds: bytes) -> Tuple[int, int, int, int]: - nlocals = 0 - nplaincellvars = 0 - ncellvars = 0 - nfreevars = 0 - assert len(names) == len(kinds) - for name, kind in zip(names, kinds): - if kind & CO_FAST_LOCAL: - nlocals += 1 - if kind & CO_FAST_CELL: - ncellvars += 1 - elif kind & CO_FAST_CELL: - ncellvars += 1 - nplaincellvars += 1 - elif kind & CO_FAST_FREE: - nfreevars += 1 - assert nlocals == len(code.co_varnames) == code.co_nlocals, \ - (nlocals, len(code.co_varnames), code.co_nlocals) - assert ncellvars == len(code.co_cellvars) - assert nfreevars == len(code.co_freevars) - assert len(names) == nlocals + nplaincellvars + nfreevars - return nlocals, nplaincellvars, ncellvars, nfreevars - - -PyUnicode_1BYTE_KIND = 1 -PyUnicode_2BYTE_KIND = 2 -PyUnicode_4BYTE_KIND = 4 - - -def analyze_character_width(s: str) -> Tuple[int, bool]: - maxchar = ' ' - for c in s: - maxchar = max(maxchar, c) - ascii = False - if maxchar <= '\xFF': - kind = PyUnicode_1BYTE_KIND - ascii = maxchar <= '\x7F' - elif maxchar <= '\uFFFF': - kind = PyUnicode_2BYTE_KIND - else: - kind = PyUnicode_4BYTE_KIND - return kind, ascii - - -def removesuffix(base: str, suffix: str) -> str: - if base.endswith(suffix): - return base[:len(base) - len(suffix)] - return base - -class Printer: - - def __init__(self, file: TextIO) -> None: - self.level = 0 - self.file = file - self.cache: Dict[tuple[type, object, str], str] = {} - self.hits, self.misses = 0, 0 - self.patchups: list[str] = [] - self.deallocs: list[str] = [] - self.interns: list[str] = [] - self.write('#include "Python.h"') - self.write('#include "internal/pycore_gc.h"') - self.write('#include "internal/pycore_code.h"') - self.write('#include "internal/pycore_frame.h"') - self.write('#include "internal/pycore_long.h"') - self.write("") - - @contextlib.contextmanager - def indent(self) -> None: - save_level = self.level - try: - self.level += 1 - yield - finally: - self.level = save_level - - def write(self, arg: str) -> None: - self.file.writelines((" "*self.level, arg, "\n")) - - @contextlib.contextmanager - def block(self, prefix: str, suffix: str = "") -> None: - self.write(prefix + " {") - with self.indent(): - yield - self.write("}" + suffix) - - def object_head(self, typename: str) -> None: - with self.block(".ob_base =", ","): - self.write(f".ob_refcnt = 999999999,") - self.write(f".ob_type = &{typename},") - - def object_var_head(self, typename: str, size: int) -> None: - with self.block(".ob_base =", ","): - self.object_head(typename) - self.write(f".ob_size = {size},") - - def field(self, obj: object, name: str) -> None: - self.write(f".{name} = {getattr(obj, name)},") - - def generate_bytes(self, name: str, b: bytes) -> str: - if b == b"": - return "(PyObject *)&_Py_SINGLETON(bytes_empty)" - if len(b) == 1: - return f"(PyObject *)&_Py_SINGLETON(bytes_characters[{b[0]}])" - self.write("static") - with self.indent(): - with self.block("struct"): - self.write("PyObject_VAR_HEAD") - self.write("Py_hash_t ob_shash;") - self.write(f"char ob_sval[{len(b) + 1}];") - with self.block(f"{name} =", ";"): - self.object_var_head("PyBytes_Type", len(b)) - self.write(".ob_shash = -1,") - self.write(f".ob_sval = {make_string_literal(b)},") - return f"& {name}.ob_base.ob_base" - - def generate_unicode(self, name: str, s: str) -> str: - if s in strings: - return f"&_Py_STR({strings[s]})" - if s in identifiers: - return f"&_Py_ID({s})" - if re.match(r'\A[A-Za-z0-9_]+\Z', s): - name = f"const_str_{s}" - kind, ascii = analyze_character_width(s) - if kind == PyUnicode_1BYTE_KIND: - datatype = "uint8_t" - elif kind == PyUnicode_2BYTE_KIND: - datatype = "uint16_t" - else: - datatype = "uint32_t" - self.write("static") - with self.indent(): - with self.block("struct"): - if ascii: - self.write("PyASCIIObject _ascii;") - else: - self.write("PyCompactUnicodeObject _compact;") - self.write(f"{datatype} _data[{len(s)+1}];") - with self.block(f"{name} =", ";"): - if ascii: - with self.block("._ascii =", ","): - self.object_head("PyUnicode_Type") - self.write(f".length = {len(s)},") - self.write(".hash = -1,") - with self.block(".state =", ","): - self.write(".kind = 1,") - self.write(".compact = 1,") - self.write(".ascii = 1,") - self.write(f"._data = {make_string_literal(s.encode('ascii'))},") - return f"& {name}._ascii.ob_base" - else: - with self.block("._compact =", ","): - with self.block("._base =", ","): - self.object_head("PyUnicode_Type") - self.write(f".length = {len(s)},") - self.write(".hash = -1,") - with self.block(".state =", ","): - self.write(f".kind = {kind},") - self.write(".compact = 1,") - self.write(".ascii = 0,") - utf8 = s.encode('utf-8') - self.write(f'.utf8 = {make_string_literal(utf8)},') - self.write(f'.utf8_length = {len(utf8)},') - with self.block(f"._data =", ","): - for i in range(0, len(s), 16): - data = s[i:i+16] - self.write(", ".join(map(str, map(ord, data))) + ",") - return f"& {name}._compact._base.ob_base" - - - def generate_code(self, name: str, code: types.CodeType) -> str: - # The ordering here matches PyCode_NewWithPosOnlyArgs() - # (but see below). - co_consts = self.generate(name + "_consts", code.co_consts) - co_names = self.generate(name + "_names", code.co_names) - co_filename = self.generate(name + "_filename", code.co_filename) - co_name = self.generate(name + "_name", code.co_name) - co_qualname = self.generate(name + "_qualname", code.co_qualname) - co_linetable = self.generate(name + "_linetable", code.co_linetable) - co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable) - # These fields are not directly accessible - localsplusnames, localspluskinds = get_localsplus(code) - co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames) - co_localspluskinds = self.generate(name + "_localspluskinds", localspluskinds) - # Derived values - nlocals, nplaincellvars, ncellvars, nfreevars = \ - get_localsplus_counts(code, localsplusnames, localspluskinds) - co_code_adaptive = make_string_literal(code.co_code) - self.write("static") - with self.indent(): - self.write(f"struct _PyCode_DEF({len(code.co_code)})") - with self.block(f"{name} =", ";"): - self.object_var_head("PyCode_Type", len(code.co_code) // 2) - # But the ordering here must match that in cpython/code.h - # (which is a pain because we tend to reorder those for perf) - # otherwise MSVC doesn't like it. - self.write(f".co_consts = {co_consts},") - self.write(f".co_names = {co_names},") - self.write(f".co_exceptiontable = {co_exceptiontable},") - self.field(code, "co_flags") - self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,") - self.write("._co_linearray_entry_size = 0,") - self.field(code, "co_argcount") - self.field(code, "co_posonlyargcount") - self.field(code, "co_kwonlyargcount") - self.write(f".co_framesize = {code.co_stacksize + len(localsplusnames)} + FRAME_SPECIALS_SIZE,") - self.field(code, "co_stacksize") - self.field(code, "co_firstlineno") - self.write(f".co_nlocalsplus = {len(localsplusnames)},") - self.field(code, "co_nlocals") - self.write(f".co_nplaincellvars = {nplaincellvars},") - self.write(f".co_ncellvars = {ncellvars},") - self.write(f".co_nfreevars = {nfreevars},") - self.write(f".co_localsplusnames = {co_localsplusnames},") - self.write(f".co_localspluskinds = {co_localspluskinds},") - self.write(f".co_filename = {co_filename},") - self.write(f".co_name = {co_name},") - self.write(f".co_qualname = {co_qualname},") - self.write(f".co_linetable = {co_linetable},") - self.write(f"._co_cached = NULL,") - self.write("._co_linearray = NULL,") - self.write(f".co_code_adaptive = {co_code_adaptive},") - for i, op in enumerate(code.co_code[::2]): - if op == RESUME: - self.write(f"._co_firsttraceable = {i},") - break - name_as_code = f"(PyCodeObject *)&{name}" - self.deallocs.append(f"_PyStaticCode_Dealloc({name_as_code});") - self.interns.append(f"_PyStaticCode_InternStrings({name_as_code})") - return f"& {name}.ob_base.ob_base" - - def generate_tuple(self, name: str, t: Tuple[object, ...]) -> str: - if len(t) == 0: - return f"(PyObject *)& _Py_SINGLETON(tuple_empty)" - items = [self.generate(f"{name}_{i}", it) for i, it in enumerate(t)] - self.write("static") - with self.indent(): - with self.block("struct"): - self.write("PyGC_Head _gc_head;") - with self.block("struct", "_object;"): - self.write("PyObject_VAR_HEAD") - if t: - self.write(f"PyObject *ob_item[{len(t)}];") - with self.block(f"{name} =", ";"): - with self.block("._object =", ","): - self.object_var_head("PyTuple_Type", len(t)) - if items: - with self.block(f".ob_item =", ","): - for item in items: - self.write(item + ",") - return f"& {name}._object.ob_base.ob_base" - - def _generate_int_for_bits(self, name: str, i: int, digit: int) -> None: - sign = -1 if i < 0 else 0 if i == 0 else +1 - i = abs(i) - digits: list[int] = [] - while i: - i, rem = divmod(i, digit) - digits.append(rem) - self.write("static") - with self.indent(): - with self.block("struct"): - self.write("PyObject_VAR_HEAD") - self.write(f"digit ob_digit[{max(1, len(digits))}];") - with self.block(f"{name} =", ";"): - self.object_var_head("PyLong_Type", sign*len(digits)) - if digits: - ds = ", ".join(map(str, digits)) - self.write(f".ob_digit = {{ {ds} }},") - - def generate_int(self, name: str, i: int) -> str: - if -5 <= i <= 256: - return f"(PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + {i}]" - if i >= 0: - name = f"const_int_{i}" - else: - name = f"const_int_negative_{abs(i)}" - if abs(i) < 2**15: - self._generate_int_for_bits(name, i, 2**15) - else: - connective = "if" - for bits_in_digit in 15, 30: - self.write(f"#{connective} PYLONG_BITS_IN_DIGIT == {bits_in_digit}") - self._generate_int_for_bits(name, i, 2**bits_in_digit) - connective = "elif" - self.write("#else") - self.write('#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"') - self.write("#endif") - # If neither clause applies, it won't compile - return f"& {name}.ob_base.ob_base" - - def generate_float(self, name: str, x: float) -> str: - with self.block(f"static PyFloatObject {name} =", ";"): - self.object_head("PyFloat_Type") - self.write(f".ob_fval = {x},") - return f"&{name}.ob_base" - - def generate_complex(self, name: str, z: complex) -> str: - with self.block(f"static PyComplexObject {name} =", ";"): - self.object_head("PyComplex_Type") - self.write(f".cval = {{ {z.real}, {z.imag} }},") - return f"&{name}.ob_base" - - def generate_frozenset(self, name: str, fs: FrozenSet[object]) -> str: - try: - fs = sorted(fs) - except TypeError: - # frozen set with incompatible types, fallback to repr() - fs = sorted(fs, key=repr) - ret = self.generate_tuple(name, tuple(fs)) - self.write("// TODO: The above tuple should be a frozenset") - return ret - - def generate_file(self, module: str, code: object)-> None: - module = module.replace(".", "_") - self.generate(f"{module}_toplevel", code) - with self.block(f"static void {module}_do_patchups(void)"): - for p in self.patchups: - self.write(p) - self.patchups.clear() - self.write(EPILOGUE.replace("%%NAME%%", module)) - - def generate(self, name: str, obj: object) -> str: - # Use repr() in the key to distinguish -0.0 from +0.0 - key = (type(obj), obj, repr(obj)) - if key in self.cache: - self.hits += 1 - # print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}") - return self.cache[key] - self.misses += 1 - if isinstance(obj, (types.CodeType, umarshal.Code)) : - val = self.generate_code(name, obj) - elif isinstance(obj, tuple): - val = self.generate_tuple(name, obj) - elif isinstance(obj, str): - val = self.generate_unicode(name, obj) - elif isinstance(obj, bytes): - val = self.generate_bytes(name, obj) - elif obj is True: - return "Py_True" - elif obj is False: - return "Py_False" - elif isinstance(obj, int): - val = self.generate_int(name, obj) - elif isinstance(obj, float): - val = self.generate_float(name, obj) - elif isinstance(obj, complex): - val = self.generate_complex(name, obj) - elif isinstance(obj, frozenset): - val = self.generate_frozenset(name, obj) - elif obj is builtins.Ellipsis: - return "Py_Ellipsis" - elif obj is None: - return "Py_None" - else: - raise TypeError( - f"Cannot generate code for {type(obj).__name__} object") - # print(f"Cache store {key!r:.40}: {val!r:.40}") - self.cache[key] = val - return val - - -EPILOGUE = """ -PyObject * -_Py_get_%%NAME%%_toplevel(void) -{ - %%NAME%%_do_patchups(); - return Py_NewRef((PyObject *) &%%NAME%%_toplevel); -} -""" - -FROZEN_COMMENT_C = "/* Auto-generated by Programs/_freeze_module.c */" -FROZEN_COMMENT_PY = "/* Auto-generated by Programs/_freeze_module.py */" - -FROZEN_DATA_LINE = r"\s*(\d+,\s*)+\s*" - - -def is_frozen_header(source: str) -> bool: - return source.startswith((FROZEN_COMMENT_C, FROZEN_COMMENT_PY)) - - -def decode_frozen_data(source: str) -> types.CodeType: - lines = source.splitlines() - while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None: - del lines[0] - while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None: - del lines[-1] - values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip()) - data = bytes(values) - return umarshal.loads(data) - - -def generate(args: list[str], output: TextIO) -> None: - printer = Printer(output) - for arg in args: - file, modname = arg.rsplit(':', 1) - with open(file, "r", encoding="utf8") as fd: - source = fd.read() - if is_frozen_header(source): - code = decode_frozen_data(source) - else: - code = compile(fd.read(), f"", "exec") - printer.generate_file(modname, code) - with printer.block(f"void\n_Py_Deepfreeze_Fini(void)"): - for p in printer.deallocs: - printer.write(p) - with printer.block(f"int\n_Py_Deepfreeze_Init(void)"): - for p in printer.interns: - with printer.block(f"if ({p} < 0)"): - printer.write("return -1;") - printer.write("return 0;") - if verbose: - print(f"Cache hits: {printer.hits}, misses: {printer.misses}") - - -parser = argparse.ArgumentParser() -parser.add_argument("-o", "--output", help="Defaults to deepfreeze.c", default="deepfreeze.c") -parser.add_argument("-v", "--verbose", action="store_true", help="Print diagnostics") -parser.add_argument('args', nargs="+", help="Input file and module name (required) in file:modname format") - -@contextlib.contextmanager -def report_time(label: str): - t0 = time.time() - try: - yield - finally: - t1 = time.time() - if verbose: - print(f"{label}: {t1-t0:.3f} sec") - - -def main() -> None: - global verbose - args = parser.parse_args() - verbose = args.verbose - output = args.output - with open(output, "w", encoding="utf-8") as file: - with report_time("generate"): - generate(args.args, file) - if verbose: - print(f"Wrote {os.path.getsize(output)} bytes to {output}") - - -if __name__ == "__main__": - main() diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py deleted file mode 100644 index aa1e4fe..0000000 --- a/Tools/scripts/freeze_modules.py +++ /dev/null @@ -1,733 +0,0 @@ -"""Freeze modules and regen related files (e.g. Python/frozen.c). - -See the notes at the top of Python/frozen.c for more info. -""" - -from collections import namedtuple -import hashlib -import os -import ntpath -import posixpath -import argparse -from update_file import updating_file_with_tmpfile - - -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) -ROOT_DIR = os.path.abspath(ROOT_DIR) -FROZEN_ONLY = os.path.join(ROOT_DIR, 'Tools', 'freeze', 'flag.py') - -STDLIB_DIR = os.path.join(ROOT_DIR, 'Lib') -# If FROZEN_MODULES_DIR or DEEPFROZEN_MODULES_DIR is changed then the -# .gitattributes and .gitignore files needs to be updated. -FROZEN_MODULES_DIR = os.path.join(ROOT_DIR, 'Python', 'frozen_modules') -DEEPFROZEN_MODULES_DIR = os.path.join(ROOT_DIR, 'Python', 'deepfreeze') - -FROZEN_FILE = os.path.join(ROOT_DIR, 'Python', 'frozen.c') -MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in') -PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj') -PCBUILD_FILTERS = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj.filters') -PCBUILD_PYTHONCORE = os.path.join(ROOT_DIR, 'PCbuild', 'pythoncore.vcxproj') - - -OS_PATH = 'ntpath' if os.name == 'nt' else 'posixpath' - -# These are modules that get frozen. -TESTS_SECTION = 'Test module' -FROZEN = [ - # See parse_frozen_spec() for the format. - # In cases where the frozenid is duplicated, the first one is re-used. - ('import system', [ - # These frozen modules are necessary for bootstrapping - # the import system. - 'importlib._bootstrap : _frozen_importlib', - 'importlib._bootstrap_external : _frozen_importlib_external', - # This module is important because some Python builds rely - # on a builtin zip file instead of a filesystem. - 'zipimport', - ]), - ('stdlib - startup, without site (python -S)', [ - 'abc', - 'codecs', - # For now we do not freeze the encodings, due # to the noise all - # those extra modules add to the text printed during the build. - # (See https://github.com/python/cpython/pull/28398#pullrequestreview-756856469.) - #'', - 'io', - ]), - ('stdlib - startup, with site', [ - '_collections_abc', - '_sitebuiltins', - 'genericpath', - 'ntpath', - 'posixpath', - # We must explicitly mark os.path as a frozen module - # even though it will never be imported. - f'{OS_PATH} : os.path', - 'os', - 'site', - 'stat', - ]), - ('runpy - run module with -m', [ - "importlib.util", - "importlib.machinery", - "runpy", - ]), - (TESTS_SECTION, [ - '__hello__', - '__hello__ : __hello_alias__', - '__hello__ : <__phello_alias__>', - '__hello__ : __phello_alias__.spam', - '<__phello__.**.*>', - f'frozen_only : __hello_only__ = {FROZEN_ONLY}', - ]), -] -BOOTSTRAP = { - 'importlib._bootstrap', - 'importlib._bootstrap_external', - 'zipimport', -} - - -####################################### -# platform-specific helpers - -if os.path is posixpath: - relpath_for_posix_display = os.path.relpath - - def relpath_for_windows_display(path, base): - return ntpath.relpath( - ntpath.join(*path.split(os.path.sep)), - ntpath.join(*base.split(os.path.sep)), - ) - -else: - relpath_for_windows_display = ntpath.relpath - - def relpath_for_posix_display(path, base): - return posixpath.relpath( - posixpath.join(*path.split(os.path.sep)), - posixpath.join(*base.split(os.path.sep)), - ) - - -####################################### -# specs - -def parse_frozen_specs(): - seen = {} - for section, specs in FROZEN: - parsed = _parse_specs(specs, section, seen) - for item in parsed: - frozenid, pyfile, modname, ispkg, section = item - try: - source = seen[frozenid] - except KeyError: - source = FrozenSource.from_id(frozenid, pyfile) - seen[frozenid] = source - else: - assert not pyfile or pyfile == source.pyfile, item - yield FrozenModule(modname, ispkg, section, source) - - -def _parse_specs(specs, section, seen): - for spec in specs: - info, subs = _parse_spec(spec, seen, section) - yield info - for info in subs or (): - yield info - - -def _parse_spec(spec, knownids=None, section=None): - """Yield an info tuple for each module corresponding to the given spec. - - The info consists of: (frozenid, pyfile, modname, ispkg, section). - - Supported formats: - - frozenid - frozenid : modname - frozenid : modname = pyfile - - "frozenid" and "modname" must be valid module names (dot-separated - identifiers). If "modname" is not provided then "frozenid" is used. - If "pyfile" is not provided then the filename of the module - corresponding to "frozenid" is used. - - Angle brackets around a frozenid (e.g. '") indicate - it is a package. This also means it must be an actual module - (i.e. "pyfile" cannot have been provided). Such values can have - patterns to expand submodules: - - - also freeze all direct submodules - - also freeze the full submodule tree - - As with "frozenid", angle brackets around "modname" indicate - it is a package. However, in this case "pyfile" should not - have been provided and patterns in "modname" are not supported. - Also, if "modname" has brackets then "frozenid" should not, - and "pyfile" should have been provided.. - """ - frozenid, _, remainder = spec.partition(':') - modname, _, pyfile = remainder.partition('=') - frozenid = frozenid.strip() - modname = modname.strip() - pyfile = pyfile.strip() - - submodules = None - if modname.startswith('<') and modname.endswith('>'): - assert check_modname(frozenid), spec - modname = modname[1:-1] - assert check_modname(modname), spec - if frozenid in knownids: - pass - elif pyfile: - assert not os.path.isdir(pyfile), spec - else: - pyfile = _resolve_module(frozenid, ispkg=False) - ispkg = True - elif pyfile: - assert check_modname(frozenid), spec - assert not knownids or frozenid not in knownids, spec - assert check_modname(modname), spec - assert not os.path.isdir(pyfile), spec - ispkg = False - elif knownids and frozenid in knownids: - assert check_modname(frozenid), spec - assert check_modname(modname), spec - ispkg = False - else: - assert not modname or check_modname(modname), spec - resolved = iter(resolve_modules(frozenid)) - frozenid, pyfile, ispkg = next(resolved) - if not modname: - modname = frozenid - if ispkg: - pkgid = frozenid - pkgname = modname - pkgfiles = {pyfile: pkgid} - def iter_subs(): - for frozenid, pyfile, ispkg in resolved: - if pkgname: - modname = frozenid.replace(pkgid, pkgname, 1) - else: - modname = frozenid - if pyfile: - if pyfile in pkgfiles: - frozenid = pkgfiles[pyfile] - pyfile = None - elif ispkg: - pkgfiles[pyfile] = frozenid - yield frozenid, pyfile, modname, ispkg, section - submodules = iter_subs() - - info = (frozenid, pyfile or None, modname, ispkg, section) - return info, submodules - - -####################################### -# frozen source files - -class FrozenSource(namedtuple('FrozenSource', 'id pyfile frozenfile deepfreezefile')): - - @classmethod - def from_id(cls, frozenid, pyfile=None): - if not pyfile: - pyfile = os.path.join(STDLIB_DIR, *frozenid.split('.')) + '.py' - #assert os.path.exists(pyfile), (frozenid, pyfile) - frozenfile = resolve_frozen_file(frozenid, FROZEN_MODULES_DIR) - deepfreezefile = resolve_frozen_file(frozenid, DEEPFROZEN_MODULES_DIR) - return cls(frozenid, pyfile, frozenfile, deepfreezefile) - - @property - def frozenid(self): - return self.id - - @property - def modname(self): - if self.pyfile.startswith(STDLIB_DIR): - return self.id - return None - - @property - def symbol(self): - # This matches what we do in Programs/_freeze_module.c: - name = self.frozenid.replace('.', '_') - return '_Py_M__' + name - - @property - def ispkg(self): - if not self.pyfile: - return False - elif self.frozenid.endswith('.__init__'): - return False - else: - return os.path.basename(self.pyfile) == '__init__.py' - - @property - def isbootstrap(self): - return self.id in BOOTSTRAP - - -def resolve_frozen_file(frozenid, destdir): - """Return the filename corresponding to the given frozen ID. - - For stdlib modules the ID will always be the full name - of the source module. - """ - if not isinstance(frozenid, str): - try: - frozenid = frozenid.frozenid - except AttributeError: - raise ValueError(f'unsupported frozenid {frozenid!r}') - # We use a consistent naming convention for all frozen modules. - frozenfile = f'{frozenid}.h' - if not destdir: - return frozenfile - return os.path.join(destdir, frozenfile) - - -####################################### -# frozen modules - -class FrozenModule(namedtuple('FrozenModule', 'name ispkg section source')): - - def __getattr__(self, name): - return getattr(self.source, name) - - @property - def modname(self): - return self.name - - @property - def orig(self): - return self.source.modname - - @property - def isalias(self): - orig = self.source.modname - if not orig: - return True - return self.name != orig - - def summarize(self): - source = self.source.modname - if source: - source = f'<{source}>' - else: - source = relpath_for_posix_display(self.pyfile, ROOT_DIR) - return { - 'module': self.name, - 'ispkg': self.ispkg, - 'source': source, - 'frozen': os.path.basename(self.frozenfile), - 'checksum': _get_checksum(self.frozenfile), - } - - -def _iter_sources(modules): - seen = set() - for mod in modules: - if mod.source not in seen: - yield mod.source - seen.add(mod.source) - - -####################################### -# generic helpers - -def _get_checksum(filename): - with open(filename, "rb") as infile: - contents = infile.read() - m = hashlib.sha256() - m.update(contents) - return m.hexdigest() - - -def resolve_modules(modname, pyfile=None): - if modname.startswith('<') and modname.endswith('>'): - if pyfile: - assert os.path.isdir(pyfile) or os.path.basename(pyfile) == '__init__.py', pyfile - ispkg = True - modname = modname[1:-1] - rawname = modname - # For now, we only expect match patterns at the end of the name. - _modname, sep, match = modname.rpartition('.') - if sep: - if _modname.endswith('.**'): - modname = _modname[:-3] - match = f'**.{match}' - elif match and not match.isidentifier(): - modname = _modname - # Otherwise it's a plain name so we leave it alone. - else: - match = None - else: - ispkg = False - rawname = modname - match = None - - if not check_modname(modname): - raise ValueError(f'not a valid module name ({rawname})') - - if not pyfile: - pyfile = _resolve_module(modname, ispkg=ispkg) - elif os.path.isdir(pyfile): - pyfile = _resolve_module(modname, pyfile, ispkg) - yield modname, pyfile, ispkg - - if match: - pkgdir = os.path.dirname(pyfile) - yield from iter_submodules(modname, pkgdir, match) - - -def check_modname(modname): - return all(n.isidentifier() for n in modname.split('.')) - - -def iter_submodules(pkgname, pkgdir=None, match='*'): - if not pkgdir: - pkgdir = os.path.join(STDLIB_DIR, *pkgname.split('.')) - if not match: - match = '**.*' - match_modname = _resolve_modname_matcher(match, pkgdir) - - def _iter_submodules(pkgname, pkgdir): - for entry in sorted(os.scandir(pkgdir), key=lambda e: e.name): - matched, recursive = match_modname(entry.name) - if not matched: - continue - modname = f'{pkgname}.{entry.name}' - if modname.endswith('.py'): - yield modname[:-3], entry.path, False - elif entry.is_dir(): - pyfile = os.path.join(entry.path, '__init__.py') - # We ignore namespace packages. - if os.path.exists(pyfile): - yield modname, pyfile, True - if recursive: - yield from _iter_submodules(modname, entry.path) - - return _iter_submodules(pkgname, pkgdir) - - -def _resolve_modname_matcher(match, rootdir=None): - if isinstance(match, str): - if match.startswith('**.'): - recursive = True - pat = match[3:] - assert match - else: - recursive = False - pat = match - - if pat == '*': - def match_modname(modname): - return True, recursive - else: - raise NotImplementedError(match) - elif callable(match): - match_modname = match(rootdir) - else: - raise ValueError(f'unsupported matcher {match!r}') - return match_modname - - -def _resolve_module(modname, pathentry=STDLIB_DIR, ispkg=False): - assert pathentry, pathentry - pathentry = os.path.normpath(pathentry) - assert os.path.isabs(pathentry) - if ispkg: - return os.path.join(pathentry, *modname.split('.'), '__init__.py') - return os.path.join(pathentry, *modname.split('.')) + '.py' - - -####################################### -# regenerating dependent files - -def find_marker(lines, marker, file): - for pos, line in enumerate(lines): - if marker in line: - return pos - raise Exception(f"Can't find {marker!r} in file {file}") - - -def replace_block(lines, start_marker, end_marker, replacements, file): - start_pos = find_marker(lines, start_marker, file) - end_pos = find_marker(lines, end_marker, file) - if end_pos <= start_pos: - raise Exception(f"End marker {end_marker!r} " - f"occurs before start marker {start_marker!r} " - f"in file {file}") - replacements = [line.rstrip() + '\n' for line in replacements] - return lines[:start_pos + 1] + replacements + lines[end_pos:] - - -def regen_frozen(modules, frozen_modules: bool): - headerlines = [] - parentdir = os.path.dirname(FROZEN_FILE) - if frozen_modules: - for src in _iter_sources(modules): - # Adding a comment to separate sections here doesn't add much, - # so we don't. - header = relpath_for_posix_display(src.frozenfile, parentdir) - headerlines.append(f'#include "{header}"') - - externlines = [] - bootstraplines = [] - stdliblines = [] - testlines = [] - aliaslines = [] - indent = ' ' - lastsection = None - for mod in modules: - if mod.isbootstrap: - lines = bootstraplines - elif mod.section == TESTS_SECTION: - lines = testlines - else: - lines = stdliblines - if mod.section != lastsection: - if lastsection is not None: - lines.append('') - lines.append(f'/* {mod.section} */') - lastsection = mod.section - - # Also add a extern declaration for the corresponding - # deepfreeze-generated function. - orig_name = mod.source.id - code_name = orig_name.replace(".", "_") - get_code_name = "_Py_get_%s_toplevel" % code_name - externlines.append("extern PyObject *%s(void);" % get_code_name) - - symbol = mod.symbol - pkg = 'true' if mod.ispkg else 'false' - if not frozen_modules: - line = ('{"%s", NULL, 0, %s, GET_CODE(%s)},' - ) % (mod.name, pkg, code_name) - else: - line = ('{"%s", %s, (int)sizeof(%s), %s, GET_CODE(%s)},' - ) % (mod.name, symbol, symbol, pkg, code_name) - lines.append(line) - - if mod.isalias: - if not mod.orig: - entry = '{"%s", NULL},' % (mod.name,) - elif mod.source.ispkg: - entry = '{"%s", "<%s"},' % (mod.name, mod.orig) - else: - entry = '{"%s", "%s"},' % (mod.name, mod.orig) - aliaslines.append(indent + entry) - - for lines in (bootstraplines, stdliblines, testlines): - # TODO: Is this necessary any more? - if not lines[0]: - del lines[0] - for i, line in enumerate(lines): - if line: - lines[i] = indent + line - - print(f'# Updating {os.path.relpath(FROZEN_FILE)}') - with updating_file_with_tmpfile(FROZEN_FILE) as (infile, outfile): - lines = infile.readlines() - # TODO: Use more obvious markers, e.g. - # $START GENERATED FOOBAR$ / $END GENERATED FOOBAR$ - lines = replace_block( - lines, - "/* Includes for frozen modules: */", - "/* End includes */", - headerlines, - FROZEN_FILE, - ) - lines = replace_block( - lines, - "/* Start extern declarations */", - "/* End extern declarations */", - externlines, - FROZEN_FILE, - ) - lines = replace_block( - lines, - "static const struct _frozen bootstrap_modules[] =", - "/* bootstrap sentinel */", - bootstraplines, - FROZEN_FILE, - ) - lines = replace_block( - lines, - "static const struct _frozen stdlib_modules[] =", - "/* stdlib sentinel */", - stdliblines, - FROZEN_FILE, - ) - lines = replace_block( - lines, - "static const struct _frozen test_modules[] =", - "/* test sentinel */", - testlines, - FROZEN_FILE, - ) - lines = replace_block( - lines, - "const struct _module_alias aliases[] =", - "/* aliases sentinel */", - aliaslines, - FROZEN_FILE, - ) - outfile.writelines(lines) - - -def regen_makefile(modules): - pyfiles = [] - frozenfiles = [] - rules = [''] - deepfreezerules = ["Python/deepfreeze/deepfreeze.c: $(DEEPFREEZE_DEPS)", - "\t$(PYTHON_FOR_FREEZE) $(srcdir)/Tools/scripts/deepfreeze.py \\"] - for src in _iter_sources(modules): - frozen_header = relpath_for_posix_display(src.frozenfile, ROOT_DIR) - frozenfiles.append(f'\t\t{frozen_header} \\') - - pyfile = relpath_for_posix_display(src.pyfile, ROOT_DIR) - pyfiles.append(f'\t\t{pyfile} \\') - - if src.isbootstrap: - freezecmd = '$(FREEZE_MODULE_BOOTSTRAP)' - freezedep = '$(FREEZE_MODULE_BOOTSTRAP_DEPS)' - else: - freezecmd = '$(FREEZE_MODULE)' - freezedep = '$(FREEZE_MODULE_DEPS)' - - freeze = (f'{freezecmd} {src.frozenid} ' - f'$(srcdir)/{pyfile} {frozen_header}') - rules.extend([ - f'{frozen_header}: {pyfile} {freezedep}', - f'\t{freeze}', - '', - ]) - deepfreezerules.append(f"\t{frozen_header}:{src.frozenid} \\") - deepfreezerules.append('\t-o Python/deepfreeze/deepfreeze.c') - pyfiles[-1] = pyfiles[-1].rstrip(" \\") - frozenfiles[-1] = frozenfiles[-1].rstrip(" \\") - - print(f'# Updating {os.path.relpath(MAKEFILE)}') - with updating_file_with_tmpfile(MAKEFILE) as (infile, outfile): - lines = infile.readlines() - lines = replace_block( - lines, - "FROZEN_FILES_IN =", - "# End FROZEN_FILES_IN", - pyfiles, - MAKEFILE, - ) - lines = replace_block( - lines, - "FROZEN_FILES_OUT =", - "# End FROZEN_FILES_OUT", - frozenfiles, - MAKEFILE, - ) - lines = replace_block( - lines, - "# BEGIN: freezing modules", - "# END: freezing modules", - rules, - MAKEFILE, - ) - lines = replace_block( - lines, - "# BEGIN: deepfreeze modules", - "# END: deepfreeze modules", - deepfreezerules, - MAKEFILE, - ) - outfile.writelines(lines) - - -def regen_pcbuild(modules): - projlines = [] - filterlines = [] - corelines = [] - deepfreezerules = ['\t') - projlines.append(f' {src.frozenid}') - projlines.append(f' $(IntDir){intfile}') - projlines.append(f' $(PySourcePath){header}') - projlines.append(f' ') - - filterlines.append(f' ') - filterlines.append(' Python Files') - filterlines.append(' ') - deepfreezerules.append(f'\t\t "$(PySourcePath){header}:{src.frozenid}" ^') - deepfreezerules.append('\t\t "-o" "$(PySourcePath)Python\\deepfreeze\\deepfreeze.c"\'/>' ) - - corelines.append(f' ') - - print(f'# Updating {os.path.relpath(PCBUILD_PROJECT)}') - with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile): - lines = infile.readlines() - lines = replace_block( - lines, - '', - '', - projlines, - PCBUILD_PROJECT, - ) - outfile.writelines(lines) - with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile): - lines = infile.readlines() - lines = replace_block( - lines, - '', - '', - deepfreezerules, - PCBUILD_PROJECT, - ) - outfile.writelines(lines) - print(f'# Updating {os.path.relpath(PCBUILD_FILTERS)}') - with updating_file_with_tmpfile(PCBUILD_FILTERS) as (infile, outfile): - lines = infile.readlines() - lines = replace_block( - lines, - '', - '', - filterlines, - PCBUILD_FILTERS, - ) - outfile.writelines(lines) - print(f'# Updating {os.path.relpath(PCBUILD_PYTHONCORE)}') - with updating_file_with_tmpfile(PCBUILD_PYTHONCORE) as (infile, outfile): - lines = infile.readlines() - lines = replace_block( - lines, - '', - '', - corelines, - PCBUILD_FILTERS, - ) - outfile.writelines(lines) - - -####################################### -# the script - -parser = argparse.ArgumentParser() -parser.add_argument("--frozen-modules", action="store_true", - help="Use both frozen and deepfrozen modules. (default: uses only deepfrozen modules)") - -def main(): - args = parser.parse_args() - frozen_modules: bool = args.frozen_modules - # Expand the raw specs, preserving order. - modules = list(parse_frozen_specs()) - - # Regen build-related files. - regen_makefile(modules) - regen_pcbuild(modules) - regen_frozen(modules, frozen_modules) - - -if __name__ == '__main__': - main() diff --git a/Tools/scripts/generate_global_objects.py b/Tools/scripts/generate_global_objects.py deleted file mode 100644 index 0432bf5..0000000 --- a/Tools/scripts/generate_global_objects.py +++ /dev/null @@ -1,381 +0,0 @@ -import contextlib -import io -import os.path -import re - -__file__ = os.path.abspath(__file__) -ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) -INTERNAL = os.path.join(ROOT, 'Include', 'internal') - - -IGNORED = { - 'ACTION', # Python/_warnings.c - 'ATTR', # Python/_warnings.c and Objects/funcobject.c - 'DUNDER', # Objects/typeobject.c - 'RDUNDER', # Objects/typeobject.c - 'SPECIAL', # Objects/weakrefobject.c - 'NAME', # Objects/typeobject.c -} -IDENTIFIERS = [ - # from ADD() Python/_warnings.c - 'default', - 'ignore', - - # from GET_WARNINGS_ATTR() in Python/_warnings.c - 'WarningMessage', - '_showwarnmsg', - '_warn_unawaited_coroutine', - 'defaultaction', - 'filters', - 'onceregistry', - - # from WRAP_METHOD() in Objects/weakrefobject.c - '__bytes__', - '__reversed__', - - # from COPY_ATTR() in Objects/funcobject.c - '__module__', - '__name__', - '__qualname__', - '__doc__', - '__annotations__', - - # from SLOT* in Objects/typeobject.c - '__abs__', - '__add__', - '__aiter__', - '__and__', - '__anext__', - '__await__', - '__bool__', - '__call__', - '__contains__', - '__del__', - '__delattr__', - '__delete__', - '__delitem__', - '__eq__', - '__float__', - '__floordiv__', - '__ge__', - '__get__', - '__getattr__', - '__getattribute__', - '__getitem__', - '__gt__', - '__hash__', - '__iadd__', - '__iand__', - '__ifloordiv__', - '__ilshift__', - '__imatmul__', - '__imod__', - '__imul__', - '__index__', - '__init__', - '__int__', - '__invert__', - '__ior__', - '__ipow__', - '__irshift__', - '__isub__', - '__iter__', - '__itruediv__', - '__ixor__', - '__le__', - '__len__', - '__lshift__', - '__lt__', - '__matmul__', - '__mod__', - '__mul__', - '__ne__', - '__neg__', - '__new__', - '__next__', - '__or__', - '__pos__', - '__pow__', - '__radd__', - '__rand__', - '__repr__', - '__rfloordiv__', - '__rlshift__', - '__rmatmul__', - '__rmod__', - '__rmul__', - '__ror__', - '__rpow__', - '__rrshift__', - '__rshift__', - '__rsub__', - '__rtruediv__', - '__rxor__', - '__set__', - '__setattr__', - '__setitem__', - '__str__', - '__sub__', - '__truediv__', - '__xor__', - '__divmod__', - '__rdivmod__', -] - - -####################################### -# helpers - -def iter_files(): - for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'): - root = os.path.join(ROOT, name) - for dirname, _, files in os.walk(root): - for name in files: - if not name.endswith(('.c', '.h')): - continue - yield os.path.join(dirname, name) - - -def iter_global_strings(): - id_regex = re.compile(r'\b_Py_ID\((\w+)\)') - str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)') - for filename in iter_files(): - try: - infile = open(filename, encoding='utf-8') - except FileNotFoundError: - # The file must have been a temporary file. - continue - with infile: - for lno, line in enumerate(infile, 1): - for m in id_regex.finditer(line): - identifier, = m.groups() - yield identifier, None, filename, lno, line - for m in str_regex.finditer(line): - varname, string = m.groups() - yield varname, string, filename, lno, line - - -def iter_to_marker(lines, marker): - for line in lines: - if line.rstrip() == marker: - break - yield line - - -class Printer: - - def __init__(self, file): - self.level = 0 - self.file = file - self.continuation = [False] - - @contextlib.contextmanager - def indent(self): - save_level = self.level - try: - self.level += 1 - yield - finally: - self.level = save_level - - def write(self, arg): - eol = '\n' - if self.continuation[-1]: - eol = f' \\{eol}' if arg else f'\\{eol}' - self.file.writelines((" "*self.level, arg, eol)) - - @contextlib.contextmanager - def block(self, prefix, suffix="", *, continuation=None): - if continuation is None: - continuation = self.continuation[-1] - self.continuation.append(continuation) - - self.write(prefix + " {") - with self.indent(): - yield - self.continuation.pop() - self.write("}" + suffix) - - -@contextlib.contextmanager -def open_for_changes(filename, orig): - """Like open() but only write to the file if it changed.""" - outfile = io.StringIO() - yield outfile - text = outfile.getvalue() - if text != orig: - with open(filename, 'w', encoding='utf-8') as outfile: - outfile.write(text) - else: - print(f'# not changed: {filename}') - - -####################################### -# the global objects - -START = '/* The following is auto-generated by Tools/scripts/generate_global_objects.py. */' -END = '/* End auto-generated code */' - - -def generate_global_strings(identifiers, strings): - filename = os.path.join(INTERNAL, 'pycore_global_strings.h') - - # Read the non-generated part of the file. - with open(filename) as infile: - orig = infile.read() - lines = iter(orig.rstrip().splitlines()) - before = '\n'.join(iter_to_marker(lines, START)) - for _ in iter_to_marker(lines, END): - pass - after = '\n'.join(lines) - - # Generate the file. - with open_for_changes(filename, orig) as outfile: - printer = Printer(outfile) - printer.write(before) - printer.write(START) - with printer.block('struct _Py_global_strings', ';'): - with printer.block('struct', ' literals;'): - for literal, name in sorted(strings.items(), key=lambda x: x[1]): - printer.write(f'STRUCT_FOR_STR({name}, "{literal}")') - outfile.write('\n') - with printer.block('struct', ' identifiers;'): - for name in sorted(identifiers): - assert name.isidentifier(), name - printer.write(f'STRUCT_FOR_ID({name})') - with printer.block('struct', ' ascii[128];'): - printer.write("PyASCIIObject _ascii;") - printer.write("uint8_t _data[2];") - with printer.block('struct', ' latin1[128];'): - printer.write("PyCompactUnicodeObject _latin1;") - printer.write("uint8_t _data[2];") - printer.write(END) - printer.write(after) - - -def generate_runtime_init(identifiers, strings): - # First get some info from the declarations. - nsmallposints = None - nsmallnegints = None - with open(os.path.join(INTERNAL, 'pycore_global_objects.h')) as infile: - for line in infile: - if line.startswith('#define _PY_NSMALLPOSINTS'): - nsmallposints = int(line.split()[-1]) - elif line.startswith('#define _PY_NSMALLNEGINTS'): - nsmallnegints = int(line.split()[-1]) - break - else: - raise NotImplementedError - assert nsmallposints and nsmallnegints - - # Then target the runtime initializer. - filename = os.path.join(INTERNAL, 'pycore_runtime_init_generated.h') - - # Read the non-generated part of the file. - with open(filename) as infile: - orig = infile.read() - lines = iter(orig.rstrip().splitlines()) - before = '\n'.join(iter_to_marker(lines, START)) - for _ in iter_to_marker(lines, END): - pass - after = '\n'.join(lines) - - # Generate the file. - with open_for_changes(filename, orig) as outfile: - immortal_objects = [] - printer = Printer(outfile) - printer.write(before) - printer.write(START) - with printer.block('#define _Py_global_objects_INIT', continuation=True): - with printer.block('.singletons =', ','): - # Global int objects. - with printer.block('.small_ints =', ','): - for i in range(-nsmallnegints, nsmallposints): - printer.write(f'_PyLong_DIGIT_INIT({i}),') - immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + {i}]') - printer.write('') - # Global bytes objects. - printer.write('.bytes_empty = _PyBytes_SIMPLE_INIT(0, 0),') - immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_empty)') - with printer.block('.bytes_characters =', ','): - for i in range(256): - printer.write(f'_PyBytes_CHAR_INIT({i}),') - immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_characters)[{i}]') - printer.write('') - # Global strings. - with printer.block('.strings =', ','): - with printer.block('.literals =', ','): - for literal, name in sorted(strings.items(), key=lambda x: x[1]): - printer.write(f'INIT_STR({name}, "{literal}"),') - immortal_objects.append(f'(PyObject *)&_Py_STR({name})') - with printer.block('.identifiers =', ','): - for name in sorted(identifiers): - assert name.isidentifier(), name - printer.write(f'INIT_ID({name}),') - immortal_objects.append(f'(PyObject *)&_Py_ID({name})') - with printer.block('.ascii =', ','): - for i in range(128): - printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),') - immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).ascii[{i}]') - with printer.block('.latin1 =', ','): - for i in range(128, 256): - utf8 = ['"'] - for c in chr(i).encode('utf-8'): - utf8.append(f"\\x{c:02x}") - utf8.append('"') - printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}", {"".join(utf8)}),') - immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).latin1[{i} - 128]') - printer.write('') - with printer.block('.tuple_empty =', ','): - printer.write('.ob_base = _PyVarObject_IMMORTAL_INIT(&PyTuple_Type, 0)') - immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(tuple_empty)') - printer.write('') - printer.write("static inline void") - with printer.block("_PyUnicode_InitStaticStrings(void)"): - printer.write(f'PyObject *string;') - for i in sorted(identifiers): - # This use of _Py_ID() is ignored by iter_global_strings() - # since iter_files() ignores .h files. - printer.write(f'string = &_Py_ID({i});') - printer.write(f'PyUnicode_InternInPlace(&string);') - printer.write('') - printer.write('#ifdef Py_DEBUG') - printer.write("static inline void") - with printer.block("_PyStaticObjects_CheckRefcnt(void)"): - for i in immortal_objects: - with printer.block(f'if (Py_REFCNT({i}) < _PyObject_IMMORTAL_REFCNT)', ';'): - printer.write(f'_PyObject_Dump({i});') - printer.write(f'Py_FatalError("immortal object has less refcnt than ' - 'expected _PyObject_IMMORTAL_REFCNT");') - printer.write('#endif') - printer.write(END) - printer.write(after) - - -def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]': - identifiers = set(IDENTIFIERS) - strings = {} - for name, string, *_ in iter_global_strings(): - if string is None: - if name not in IGNORED: - identifiers.add(name) - else: - if string not in strings: - strings[string] = name - elif name != strings[string]: - raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}') - return identifiers, strings - - -####################################### -# the script - -def main() -> None: - identifiers, strings = get_identifiers_and_strings() - - generate_global_strings(identifiers, strings) - generate_runtime_init(identifiers, strings) - - -if __name__ == '__main__': - main() diff --git a/Tools/scripts/generate_levenshtein_examples.py b/Tools/scripts/generate_levenshtein_examples.py deleted file mode 100644 index 5a8360f..0000000 --- a/Tools/scripts/generate_levenshtein_examples.py +++ /dev/null @@ -1,70 +0,0 @@ -"""Generate 10,000 unique examples for the Levenshtein short-circuit tests.""" - -import argparse -from functools import cache -import json -import os.path -from random import choices, randrange - - -# This should be in sync with Lib/traceback.py. It's not importing those values -# because this script is being executed by PYTHON_FOR_REGEN and not by the in-tree -# build of Python. -_MOVE_COST = 2 -_CASE_COST = 1 - - -def _substitution_cost(ch_a, ch_b): - if ch_a == ch_b: - return 0 - if ch_a.lower() == ch_b.lower(): - return _CASE_COST - return _MOVE_COST - - -@cache -def levenshtein(a, b): - if not a or not b: - return (len(a) + len(b)) * _MOVE_COST - option1 = levenshtein(a[:-1], b[:-1]) + _substitution_cost(a[-1], b[-1]) - option2 = levenshtein(a[:-1], b) + _MOVE_COST - option3 = levenshtein(a, b[:-1]) + _MOVE_COST - return min(option1, option2, option3) - - -def main(): - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument('output_path', metavar='FILE', type=str) - parser.add_argument('--overwrite', dest='overwrite', action='store_const', - const=True, default=False, - help='overwrite an existing test file') - - args = parser.parse_args() - output_path = os.path.realpath(args.output_path) - if not args.overwrite and os.path.isfile(output_path): - print(f"{output_path} already exists, skipping regeneration.") - print( - "To force, add --overwrite to the invocation of this tool or" - " delete the existing file." - ) - return - - examples = set() - # Create a lot of non-empty examples, which should end up with a Gauss-like - # distribution for even costs (moves) and odd costs (case substitutions). - while len(examples) < 9990: - a = ''.join(choices("abcABC", k=randrange(1, 10))) - b = ''.join(choices("abcABC", k=randrange(1, 10))) - expected = levenshtein(a, b) - examples.add((a, b, expected)) - # Create one empty case each for strings between 0 and 9 in length. - for i in range(10): - b = ''.join(choices("abcABC", k=i)) - expected = levenshtein("", b) - examples.add(("", b, expected)) - with open(output_path, "w") as f: - json.dump(sorted(examples), f, indent=2) - - -if __name__ == "__main__": - main() diff --git a/Tools/scripts/generate_opcode_h.py b/Tools/scripts/generate_opcode_h.py deleted file mode 100644 index 9ff264a..0000000 --- a/Tools/scripts/generate_opcode_h.py +++ /dev/null @@ -1,199 +0,0 @@ -# This script generates the opcode.h header file. - -import sys -import tokenize - -SCRIPT_NAME = "Tools/scripts/generate_opcode_h.py" -PYTHON_OPCODE = "Lib/opcode.py" - -header = f""" -// Auto-generated by {SCRIPT_NAME} from {PYTHON_OPCODE} - -#ifndef Py_OPCODE_H -#define Py_OPCODE_H -#ifdef __cplusplus -extern "C" {{ -#endif - - -/* Instruction opcodes for compiled code */ -""".lstrip() - -footer = """ - -#define IS_PSEUDO_OPCODE(op) (((op) >= MIN_PSEUDO_OPCODE) && ((op) <= MAX_PSEUDO_OPCODE)) - -#ifdef __cplusplus -} -#endif -#endif /* !Py_OPCODE_H */ -""" - -internal_header = f""" -// Auto-generated by {SCRIPT_NAME} from {PYTHON_OPCODE} - -#ifndef Py_INTERNAL_OPCODE_H -#define Py_INTERNAL_OPCODE_H -#ifdef __cplusplus -extern "C" {{ -#endif - -#ifndef Py_BUILD_CORE -# error "this header requires Py_BUILD_CORE define" -#endif - -#include "opcode.h" -""".lstrip() - -internal_footer = """ -#ifdef __cplusplus -} -#endif -#endif // !Py_INTERNAL_OPCODE_H -""" - -DEFINE = "#define {:<38} {:>3}\n" - -UINT32_MASK = (1<<32)-1 - -def write_int_array_from_ops(name, ops, out): - bits = 0 - for op in ops: - bits |= 1<>= 32 - assert bits == 0 - out.write(f"}};\n") - -def main(opcode_py, outfile='Include/opcode.h', internaloutfile='Include/internal/pycore_opcode.h'): - opcode = {} - if hasattr(tokenize, 'open'): - fp = tokenize.open(opcode_py) # Python 3.2+ - else: - fp = open(opcode_py) # Python 2.7 - with fp: - code = fp.read() - exec(code, opcode) - opmap = opcode['opmap'] - opname = opcode['opname'] - hasarg = opcode['hasarg'] - hasconst = opcode['hasconst'] - hasjrel = opcode['hasjrel'] - hasjabs = opcode['hasjabs'] - is_pseudo = opcode['is_pseudo'] - _pseudo_ops = opcode['_pseudo_ops'] - - HAVE_ARGUMENT = opcode["HAVE_ARGUMENT"] - MIN_PSEUDO_OPCODE = opcode["MIN_PSEUDO_OPCODE"] - MAX_PSEUDO_OPCODE = opcode["MAX_PSEUDO_OPCODE"] - - NUM_OPCODES = len(opname) - used = [ False ] * len(opname) - next_op = 1 - - for name, op in opmap.items(): - used[op] = True - - specialized_opmap = {} - opname_including_specialized = opname.copy() - for name in opcode['_specialized_instructions']: - while used[next_op]: - next_op += 1 - specialized_opmap[name] = next_op - opname_including_specialized[next_op] = name - used[next_op] = True - specialized_opmap['DO_TRACING'] = 255 - opname_including_specialized[255] = 'DO_TRACING' - used[255] = True - - with (open(outfile, 'w') as fobj, open(internaloutfile, 'w') as iobj): - fobj.write(header) - iobj.write(internal_header) - - for name in opname: - if name in opmap: - op = opmap[name] - if op == HAVE_ARGUMENT: - fobj.write(DEFINE.format("HAVE_ARGUMENT", HAVE_ARGUMENT)) - if op == MIN_PSEUDO_OPCODE: - fobj.write(DEFINE.format("MIN_PSEUDO_OPCODE", MIN_PSEUDO_OPCODE)) - - fobj.write(DEFINE.format(name, op)) - - if op == MAX_PSEUDO_OPCODE: - fobj.write(DEFINE.format("MAX_PSEUDO_OPCODE", MAX_PSEUDO_OPCODE)) - - - for name, op in specialized_opmap.items(): - fobj.write(DEFINE.format(name, op)) - - iobj.write("\nextern const uint8_t _PyOpcode_Caches[256];\n") - iobj.write("\nextern const uint8_t _PyOpcode_Deopt[256];\n") - iobj.write("\n#ifdef NEED_OPCODE_TABLES\n") - write_int_array_from_ops("_PyOpcode_RelativeJump", opcode['hasjrel'], iobj) - write_int_array_from_ops("_PyOpcode_Jump", opcode['hasjrel'] + opcode['hasjabs'], iobj) - - iobj.write("\nconst uint8_t _PyOpcode_Caches[256] = {\n") - for i, entries in enumerate(opcode["_inline_cache_entries"]): - if entries: - iobj.write(f" [{opname[i]}] = {entries},\n") - iobj.write("};\n") - - deoptcodes = {} - for basic, op in opmap.items(): - if not is_pseudo(op): - deoptcodes[basic] = basic - for basic, family in opcode["_specializations"].items(): - for specialized in family: - deoptcodes[specialized] = basic - iobj.write("\nconst uint8_t _PyOpcode_Deopt[256] = {\n") - for opt, deopt in sorted(deoptcodes.items()): - iobj.write(f" [{opt}] = {deopt},\n") - iobj.write("};\n") - iobj.write("#endif // NEED_OPCODE_TABLES\n") - - fobj.write("\n") - fobj.write("#define HAS_ARG(op) ((((op) >= HAVE_ARGUMENT) && (!IS_PSEUDO_OPCODE(op)))\\") - for op in _pseudo_ops: - if opmap[op] in hasarg: - fobj.write(f"\n || ((op) == {op}) \\") - fobj.write("\n )\n") - - fobj.write("\n") - fobj.write("#define HAS_CONST(op) (false\\") - for op in hasconst: - fobj.write(f"\n || ((op) == {opname[op]}) \\") - fobj.write("\n )\n") - - fobj.write("\n") - for i, (op, _) in enumerate(opcode["_nb_ops"]): - fobj.write(DEFINE.format(op, i)) - - iobj.write("\n") - iobj.write("#ifdef Py_DEBUG\n") - iobj.write(f"static const char *const _PyOpcode_OpName[{NUM_OPCODES}] = {{\n") - for op, name in enumerate(opname_including_specialized): - if name[0] != "<": - op = name - iobj.write(f''' [{op}] = "{name}",\n''') - iobj.write("};\n") - iobj.write("#endif\n") - - iobj.write("\n") - iobj.write("#define EXTRA_CASES \\\n") - for i, flag in enumerate(used): - if not flag: - iobj.write(f" case {i}: \\\n") - iobj.write(" ;\n") - - fobj.write(footer) - iobj.write(internal_footer) - - - print(f"{outfile} regenerated from {opcode_py}") - - -if __name__ == '__main__': - main(sys.argv[1], sys.argv[2], sys.argv[3]) diff --git a/Tools/scripts/generate_re_casefix.py b/Tools/scripts/generate_re_casefix.py deleted file mode 100755 index 625b065..0000000 --- a/Tools/scripts/generate_re_casefix.py +++ /dev/null @@ -1,94 +0,0 @@ -#! /usr/bin/env python3 -# This script generates Lib/re/_casefix.py. - -import collections -import sys -import unicodedata - -def update_file(file, content): - try: - with open(file, 'r', encoding='utf-8') as fobj: - if fobj.read() == content: - return False - except (OSError, ValueError): - pass - with open(file, 'w', encoding='utf-8') as fobj: - fobj.write(content) - return True - -re_casefix_template = """\ -# Auto-generated by Tools/scripts/generate_re_casefix.py. - -# Maps the code of lowercased character to codes of different lowercased -# characters which have the same uppercase. -_EXTRA_CASES = { -%s -} -""" - -def uname(i): - return unicodedata.name(chr(i), r'U+%04X' % i) - -class hexint(int): - def __repr__(self): - return '%#06x' % self - -def alpha(i): - c = chr(i) - return c if c.isalpha() else ascii(c)[1:-1] - - -def main(outfile='Lib/re/_casefix.py'): - # Find sets of characters which have the same uppercase. - equivalent_chars = collections.defaultdict(str) - for c in map(chr, range(sys.maxunicode + 1)): - equivalent_chars[c.upper()] += c - equivalent_chars = [t for t in equivalent_chars.values() if len(t) > 1] - - # List of codes of lowercased characters which have the same uppercase. - equivalent_lower_codes = [sorted(t) - for s in equivalent_chars - for t in [set(ord(c.lower()) for c in s)] - if len(t) > 1] - - bad_codes = [] - for t in equivalent_lower_codes: - for i in t: - if i > 0xffff: - bad_codes.extend(t) - try: - bad_codes.append(ord(chr(i).upper())) - except (ValueError, TypeError): - pass - break - if bad_codes: - print('Case-insensitive matching may not work correctly for character:', - file=sys.stderr) - for i in sorted(bad_codes): - print(" '%s' (U+%04x, %s)" % (alpha(i), i, uname(i)), - file=sys.stderr) - sys.exit(1) - - mapping = {i: tuple(j for j in t if i != j) - for t in equivalent_lower_codes - for i in t} - - items = [] - for i, t in sorted(mapping.items()): - items.append(' # %s: %s' % ( - uname(i), - ', '.join(map(uname, t)), - )) - items.append(" %r: %r, # '%s': '%s'" % ( - hexint(i), - tuple(map(hexint, t)), - alpha(i), - ''.join(map(alpha, t)), - )) - - update_file(outfile, re_casefix_template % '\n'.join(items)) - - -if __name__ == '__main__': - import sys - main(*sys.argv[1:]) diff --git a/Tools/scripts/generate_sre_constants.py b/Tools/scripts/generate_sre_constants.py deleted file mode 100755 index 7271507..0000000 --- a/Tools/scripts/generate_sre_constants.py +++ /dev/null @@ -1,78 +0,0 @@ -#! /usr/bin/env python3 -# This script generates Modules/_sre/sre_constants.h from Lib/re/_constants.py. - - -def update_file(file, content): - try: - with open(file, 'r') as fobj: - if fobj.read() == content: - return False - except (OSError, ValueError): - pass - with open(file, 'w') as fobj: - fobj.write(content) - return True - -sre_constants_header = """\ -/* - * Secret Labs' Regular Expression Engine - * - * regular expression matching engine - * - * Auto-generated by Tools/scripts/generate_sre_constants.py from - * Lib/re/_constants.py. - * - * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. - * - * See the sre.c file for information on usage and redistribution. - */ - -""" - -def main( - infile="Lib/re/_constants.py", - outfile_constants="Modules/_sre/sre_constants.h", - outfile_targets="Modules/_sre/sre_targets.h", -): - ns = {} - with open(infile) as fp: - code = fp.read() - exec(code, ns) - - def dump(d, prefix): - items = sorted(d) - for item in items: - yield "#define %s_%s %d\n" % (prefix, item, item) - - def dump2(d, prefix): - items = [(value, name) for name, value in d.items() - if name.startswith(prefix)] - for value, name in sorted(items): - yield "#define %s %d\n" % (name, value) - - def dump_gotos(d, prefix): - for i, item in enumerate(sorted(d)): - assert i == item - yield f" &&{prefix}_{item},\n" - - content = [sre_constants_header] - content.append("#define SRE_MAGIC %d\n" % ns["MAGIC"]) - content.extend(dump(ns["OPCODES"], "SRE_OP")) - content.extend(dump(ns["ATCODES"], "SRE")) - content.extend(dump(ns["CHCODES"], "SRE")) - content.extend(dump2(ns, "SRE_FLAG_")) - content.extend(dump2(ns, "SRE_INFO_")) - - update_file(outfile_constants, ''.join(content)) - - content = [sre_constants_header] - content.append(f"static void *sre_targets[{len(ns['OPCODES'])}] = {{\n") - content.extend(dump_gotos(ns["OPCODES"], "TARGET_SRE_OP")) - content.append("};\n") - - update_file(outfile_targets, ''.join(content)) - - -if __name__ == '__main__': - import sys - main(*sys.argv[1:]) diff --git a/Tools/scripts/generate_stdlib_module_names.py b/Tools/scripts/generate_stdlib_module_names.py deleted file mode 100644 index 92100bd..0000000 --- a/Tools/scripts/generate_stdlib_module_names.py +++ /dev/null @@ -1,137 +0,0 @@ -# This script lists the names of standard library modules -# to update Python/stdlib_mod_names.h -import _imp -import os.path -import re -import subprocess -import sys -import sysconfig - -from check_extension_modules import ModuleChecker - - -SRC_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) -STDLIB_PATH = os.path.join(SRC_DIR, 'Lib') - -IGNORE = { - '__init__', - '__pycache__', - 'site-packages', - - # Test modules and packages - '__hello__', - '__phello__', - '__hello_alias__', - '__phello_alias__', - '__hello_only__', - '_ctypes_test', - '_testbuffer', - '_testcapi', - '_testconsole', - '_testimportmultiple', - '_testinternalcapi', - '_testmultiphase', - '_xxsubinterpreters', - '_xxtestfuzz', - 'distutils.tests', - 'idlelib.idle_test', - 'test', - 'xxlimited', - 'xxlimited_35', - 'xxsubtype', -} - -# Pure Python modules (Lib/*.py) -def list_python_modules(names): - for filename in os.listdir(STDLIB_PATH): - if not filename.endswith(".py"): - continue - name = filename.removesuffix(".py") - names.add(name) - - -# Packages in Lib/ -def list_packages(names): - for name in os.listdir(STDLIB_PATH): - if name in IGNORE: - continue - package_path = os.path.join(STDLIB_PATH, name) - if not os.path.isdir(package_path): - continue - if any(package_file.endswith(".py") - for package_file in os.listdir(package_path)): - names.add(name) - - -# Built-in and extension modules built by Modules/Setup* -# includes Windows and macOS extensions. -def list_modules_setup_extensions(names): - checker = ModuleChecker() - names.update(checker.list_module_names(all=True)) - - -# List frozen modules of the PyImport_FrozenModules list (Python/frozen.c). -# Use the "./Programs/_testembed list_frozen" command. -def list_frozen(names): - submodules = set() - for name in _imp._frozen_module_names(): - # To skip __hello__, __hello_alias__ and etc. - if name.startswith('__'): - continue - if '.' in name: - submodules.add(name) - else: - names.add(name) - # Make sure all frozen submodules have a known parent. - for name in list(submodules): - if name.partition('.')[0] in names: - submodules.remove(name) - if submodules: - raise Exception(f'unexpected frozen submodules: {sorted(submodules)}') - - -def list_modules(): - names = set(sys.builtin_module_names) - list_modules_setup_extensions(names) - list_packages(names) - list_python_modules(names) - list_frozen(names) - - # Remove ignored packages and modules - for name in list(names): - package_name = name.split('.')[0] - # package_name can be equal to name - if package_name in IGNORE: - names.discard(name) - - for name in names: - if "." in name: - raise Exception("sub-modules must not be listed") - - return names - - -def write_modules(fp, names): - print("// Auto-generated by Tools/scripts/generate_stdlib_module_names.py.", - file=fp) - print("// List used to create sys.stdlib_module_names.", file=fp) - print(file=fp) - print("static const char* _Py_stdlib_module_names[] = {", file=fp) - for name in sorted(names): - print(f'"{name}",', file=fp) - print("};", file=fp) - - -def main(): - if not sysconfig.is_python_build(): - print(f"ERROR: {sys.executable} is not a Python build", - file=sys.stderr) - sys.exit(1) - - fp = sys.stdout - names = list_modules() - write_modules(fp, names) - - -if __name__ == "__main__": - main() diff --git a/Tools/scripts/generate_token.py b/Tools/scripts/generate_token.py deleted file mode 100755 index d8be8b9..0000000 --- a/Tools/scripts/generate_token.py +++ /dev/null @@ -1,275 +0,0 @@ -#! /usr/bin/env python3 -# This script generates token related files from Grammar/Tokens: -# -# Doc/library/token-list.inc -# Include/token.h -# Parser/token.c -# Lib/token.py - - -NT_OFFSET = 256 - -def load_tokens(path): - tok_names = [] - string_to_tok = {} - ERRORTOKEN = None - with open(path) as fp: - for line in fp: - line = line.strip() - # strip comments - i = line.find('#') - if i >= 0: - line = line[:i].strip() - if not line: - continue - fields = line.split() - name = fields[0] - value = len(tok_names) - if name == 'ERRORTOKEN': - ERRORTOKEN = value - string = fields[1] if len(fields) > 1 else None - if string: - string = eval(string) - string_to_tok[string] = value - tok_names.append(name) - return tok_names, ERRORTOKEN, string_to_tok - - -def update_file(file, content): - try: - with open(file, 'r') as fobj: - if fobj.read() == content: - return False - except (OSError, ValueError): - pass - with open(file, 'w') as fobj: - fobj.write(content) - return True - - -token_h_template = """\ -/* Auto-generated by Tools/scripts/generate_token.py */ - -/* Token types */ -#ifndef Py_INTERNAL_TOKEN_H -#define Py_INTERNAL_TOKEN_H -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef Py_BUILD_CORE -# error "this header requires Py_BUILD_CORE define" -#endif - -#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ - -%s\ -#define N_TOKENS %d -#define NT_OFFSET %d - -/* Special definitions for cooperation with parser */ - -#define ISTERMINAL(x) ((x) < NT_OFFSET) -#define ISNONTERMINAL(x) ((x) >= NT_OFFSET) -#define ISEOF(x) ((x) == ENDMARKER) -#define ISWHITESPACE(x) ((x) == ENDMARKER || \\ - (x) == NEWLINE || \\ - (x) == INDENT || \\ - (x) == DEDENT) - - -// Symbols exported for test_peg_generator -PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ -PyAPI_FUNC(int) _PyToken_OneChar(int); -PyAPI_FUNC(int) _PyToken_TwoChars(int, int); -PyAPI_FUNC(int) _PyToken_ThreeChars(int, int, int); - -#ifdef __cplusplus -} -#endif -#endif // !Py_INTERNAL_TOKEN_H -""" - -def make_h(infile, outfile='Include/internal/pycore_token.h'): - tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) - - defines = [] - for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): - defines.append("#define %-15s %d\n" % (name, value)) - - if update_file(outfile, token_h_template % ( - ''.join(defines), - len(tok_names), - NT_OFFSET - )): - print("%s regenerated from %s" % (outfile, infile)) - - -token_c_template = """\ -/* Auto-generated by Tools/scripts/generate_token.py */ - -#include "Python.h" -#include "pycore_token.h" - -/* Token names */ - -const char * const _PyParser_TokenNames[] = { -%s\ -}; - -/* Return the token corresponding to a single character */ - -int -_PyToken_OneChar(int c1) -{ -%s\ - return OP; -} - -int -_PyToken_TwoChars(int c1, int c2) -{ -%s\ - return OP; -} - -int -_PyToken_ThreeChars(int c1, int c2, int c3) -{ -%s\ - return OP; -} -""" - -def generate_chars_to_token(mapping, n=1): - result = [] - write = result.append - indent = ' ' * n - write(indent) - write('switch (c%d) {\n' % (n,)) - for c in sorted(mapping): - write(indent) - value = mapping[c] - if isinstance(value, dict): - write("case '%s':\n" % (c,)) - write(generate_chars_to_token(value, n + 1)) - write(indent) - write(' break;\n') - else: - write("case '%s': return %s;\n" % (c, value)) - write(indent) - write('}\n') - return ''.join(result) - -def make_c(infile, outfile='Parser/token.c'): - tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) - string_to_tok['<>'] = string_to_tok['!='] - chars_to_token = {} - for string, value in string_to_tok.items(): - assert 1 <= len(string) <= 3 - name = tok_names[value] - m = chars_to_token.setdefault(len(string), {}) - for c in string[:-1]: - m = m.setdefault(c, {}) - m[string[-1]] = name - - names = [] - for value, name in enumerate(tok_names): - if value >= ERRORTOKEN: - name = '<%s>' % name - names.append(' "%s",\n' % name) - names.append(' "",\n') - - if update_file(outfile, token_c_template % ( - ''.join(names), - generate_chars_to_token(chars_to_token[1]), - generate_chars_to_token(chars_to_token[2]), - generate_chars_to_token(chars_to_token[3]) - )): - print("%s regenerated from %s" % (outfile, infile)) - - -token_inc_template = """\ -.. Auto-generated by Tools/scripts/generate_token.py -%s -.. data:: N_TOKENS - -.. data:: NT_OFFSET -""" - -def make_rst(infile, outfile='Doc/library/token-list.inc'): - tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) - tok_to_string = {value: s for s, value in string_to_tok.items()} - - names = [] - for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): - names.append('.. data:: %s' % (name,)) - if value in tok_to_string: - names.append('') - names.append(' Token value for ``"%s"``.' % tok_to_string[value]) - names.append('') - - if update_file(outfile, token_inc_template % '\n'.join(names)): - print("%s regenerated from %s" % (outfile, infile)) - - -token_py_template = '''\ -"""Token constants.""" -# Auto-generated by Tools/scripts/generate_token.py - -__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] - -%s -N_TOKENS = %d -# Special definitions for cooperation with parser -NT_OFFSET = %d - -tok_name = {value: name - for name, value in globals().items() - if isinstance(value, int) and not name.startswith('_')} -__all__.extend(tok_name.values()) - -EXACT_TOKEN_TYPES = { -%s -} - -def ISTERMINAL(x): - return x < NT_OFFSET - -def ISNONTERMINAL(x): - return x >= NT_OFFSET - -def ISEOF(x): - return x == ENDMARKER -''' - -def make_py(infile, outfile='Lib/token.py'): - tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) - - constants = [] - for value, name in enumerate(tok_names): - constants.append('%s = %d' % (name, value)) - constants.insert(ERRORTOKEN, - "# These aren't used by the C tokenizer but are needed for tokenize.py") - - token_types = [] - for s, value in sorted(string_to_tok.items()): - token_types.append(' %r: %s,' % (s, tok_names[value])) - - if update_file(outfile, token_py_template % ( - '\n'.join(constants), - len(tok_names), - NT_OFFSET, - '\n'.join(token_types), - )): - print("%s regenerated from %s" % (outfile, infile)) - - -def main(op, infile='Grammar/Tokens', *args): - make = globals()['make_' + op] - make(infile, *args) - - -if __name__ == '__main__': - import sys - main(*sys.argv[1:]) diff --git a/Tools/scripts/parse_html5_entities.py b/Tools/scripts/parse_html5_entities.py deleted file mode 100755 index 1e5bdad..0000000 --- a/Tools/scripts/parse_html5_entities.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python3 -""" -Utility for parsing HTML5 entity definitions available from: - - https://html.spec.whatwg.org/entities.json - https://html.spec.whatwg.org/multipage/named-characters.html - -The page now contains the following note: - - "This list is static and will not be expanded or changed in the future." - -Written by Ezio Melotti and Iuliia Proskurnia. -""" - -import os -import sys -import json -from urllib.request import urlopen -from html.entities import html5 - -PAGE_URL = 'https://html.spec.whatwg.org/multipage/named-characters.html' -ENTITIES_URL = 'https://html.spec.whatwg.org/entities.json' -HTML5_SECTION_START = '# HTML5 named character references' - -def get_json(url): - """Download the json file from the url and returns a decoded object.""" - with urlopen(url) as f: - data = f.read().decode('utf-8') - return json.loads(data) - -def create_dict(entities): - """Create the html5 dict from the decoded json object.""" - new_html5 = {} - for name, value in entities.items(): - new_html5[name.lstrip('&')] = value['characters'] - return new_html5 - -def compare_dicts(old, new): - """Compare the old and new dicts and print the differences.""" - added = new.keys() - old.keys() - if added: - print('{} entitie(s) have been added:'.format(len(added))) - for name in sorted(added): - print(' {!r}: {!r}'.format(name, new[name])) - removed = old.keys() - new.keys() - if removed: - print('{} entitie(s) have been removed:'.format(len(removed))) - for name in sorted(removed): - print(' {!r}: {!r}'.format(name, old[name])) - changed = set() - for name in (old.keys() & new.keys()): - if old[name] != new[name]: - changed.add((name, old[name], new[name])) - if changed: - print('{} entitie(s) have been modified:'.format(len(changed))) - for item in sorted(changed): - print(' {!r}: {!r} -> {!r}'.format(*item)) - -def write_items(entities, file=sys.stdout): - """Write the items of the dictionary in the specified file.""" - # The keys in the generated dictionary should be sorted - # in a case-insensitive way, however, when two keys are equal, - # the uppercase version should come first so that the result - # looks like: ['Aacute', 'aacute', 'Aacute;', 'aacute;', ...] - # To do this we first sort in a case-sensitive way (so all the - # uppercase chars come first) and then sort with key=str.lower. - # Since the sorting is stable the uppercase keys will eventually - # be before their equivalent lowercase version. - keys = sorted(entities.keys()) - keys = sorted(keys, key=str.lower) - print(HTML5_SECTION_START, file=file) - print(f'# Generated by {sys.argv[0]!r}\n' - f'# from {ENTITIES_URL} and\n' - f'# {PAGE_URL}.\n' - f'# Map HTML5 named character references to the ' - f'equivalent Unicode character(s).', file=file) - print('html5 = {', file=file) - for name in keys: - print(f' {name!r}: {entities[name]!a},', file=file) - print('}', file=file) - - -if __name__ == '__main__': - # without args print a diff between html.entities.html5 and new_html5 - # with --create print the new html5 dict - # with --patch patch the Lib/html/entities.py file - new_html5 = create_dict(get_json(ENTITIES_URL)) - if '--create' in sys.argv: - write_items(new_html5) - elif '--patch' in sys.argv: - fname = 'Lib/html/entities.py' - temp_fname = fname + '.temp' - with open(fname) as f1, open(temp_fname, 'w') as f2: - skip = False - for line in f1: - if line.startswith(HTML5_SECTION_START): - write_items(new_html5, file=f2) - skip = True - continue - if skip: - # skip the old items until the } - if line.startswith('}'): - skip = False - continue - f2.write(line) - os.remove(fname) - os.rename(temp_fname, fname) - else: - if html5 == new_html5: - print('The current dictionary is updated.') - else: - compare_dicts(html5, new_html5) - print('Run "./python {0} --patch" to update Lib/html/entities.html ' - 'or "./python {0} --create" to see the generated ' 'dictionary.'.format(__file__)) diff --git a/Tools/scripts/smelly.py b/Tools/scripts/smelly.py deleted file mode 100755 index 276a5ab..0000000 --- a/Tools/scripts/smelly.py +++ /dev/null @@ -1,173 +0,0 @@ -#!/usr/bin/env python -# Script checking that all symbols exported by libpython start with Py or _Py - -import os.path -import subprocess -import sys -import sysconfig - - -ALLOWED_PREFIXES = ('Py', '_Py') -if sys.platform == 'darwin': - ALLOWED_PREFIXES += ('__Py',) - -IGNORED_EXTENSION = "_ctypes_test" -# Ignore constructor and destructor functions -IGNORED_SYMBOLS = {'_init', '_fini'} - - -def is_local_symbol_type(symtype): - # Ignore local symbols. - - # If lowercase, the symbol is usually local; if uppercase, the symbol - # is global (external). There are however a few lowercase symbols that - # are shown for special global symbols ("u", "v" and "w"). - if symtype.islower() and symtype not in "uvw": - return True - - # Ignore the initialized data section (d and D) and the BSS data - # section. For example, ignore "__bss_start (type: B)" - # and "_edata (type: D)". - if symtype in "bBdD": - return True - - return False - - -def get_exported_symbols(library, dynamic=False): - print(f"Check that {library} only exports symbols starting with Py or _Py") - - # Only look at dynamic symbols - args = ['nm', '--no-sort'] - if dynamic: - args.append('--dynamic') - args.append(library) - print("+ %s" % ' '.join(args)) - proc = subprocess.run(args, stdout=subprocess.PIPE, universal_newlines=True) - if proc.returncode: - sys.stdout.write(proc.stdout) - sys.exit(proc.returncode) - - stdout = proc.stdout.rstrip() - if not stdout: - raise Exception("command output is empty") - return stdout - - -def get_smelly_symbols(stdout): - smelly_symbols = [] - python_symbols = [] - local_symbols = [] - - for line in stdout.splitlines(): - # Split line '0000000000001b80 D PyTextIOWrapper_Type' - if not line: - continue - - parts = line.split(maxsplit=2) - if len(parts) < 3: - continue - - symtype = parts[1].strip() - symbol = parts[-1] - result = '%s (type: %s)' % (symbol, symtype) - - if symbol.startswith(ALLOWED_PREFIXES): - python_symbols.append(result) - continue - - if is_local_symbol_type(symtype): - local_symbols.append(result) - elif symbol in IGNORED_SYMBOLS: - local_symbols.append(result) - else: - smelly_symbols.append(result) - - if local_symbols: - print(f"Ignore {len(local_symbols)} local symbols") - return smelly_symbols, python_symbols - - -def check_library(library, dynamic=False): - nm_output = get_exported_symbols(library, dynamic) - smelly_symbols, python_symbols = get_smelly_symbols(nm_output) - - if not smelly_symbols: - print(f"OK: no smelly symbol found ({len(python_symbols)} Python symbols)") - return 0 - - print() - smelly_symbols.sort() - for symbol in smelly_symbols: - print("Smelly symbol: %s" % symbol) - - print() - print("ERROR: Found %s smelly symbols!" % len(smelly_symbols)) - return len(smelly_symbols) - - -def check_extensions(): - print(__file__) - # This assumes pybuilddir.txt is in same directory as pyconfig.h. - # In the case of out-of-tree builds, we can't assume pybuilddir.txt is - # in the source folder. - config_dir = os.path.dirname(sysconfig.get_config_h_filename()) - filename = os.path.join(config_dir, "pybuilddir.txt") - try: - with open(filename, encoding="utf-8") as fp: - pybuilddir = fp.readline() - except FileNotFoundError: - print(f"Cannot check extensions because {filename} does not exist") - return True - - print(f"Check extension modules from {pybuilddir} directory") - builddir = os.path.join(config_dir, pybuilddir) - nsymbol = 0 - for name in os.listdir(builddir): - if not name.endswith(".so"): - continue - if IGNORED_EXTENSION in name: - print() - print(f"Ignore extension: {name}") - continue - - print() - filename = os.path.join(builddir, name) - nsymbol += check_library(filename, dynamic=True) - - return nsymbol - - -def main(): - nsymbol = 0 - - # static library - LIBRARY = sysconfig.get_config_var('LIBRARY') - if not LIBRARY: - raise Exception("failed to get LIBRARY variable from sysconfig") - if os.path.exists(LIBRARY): - nsymbol += check_library(LIBRARY) - - # dynamic library - LDLIBRARY = sysconfig.get_config_var('LDLIBRARY') - if not LDLIBRARY: - raise Exception("failed to get LDLIBRARY variable from sysconfig") - if LDLIBRARY != LIBRARY: - print() - nsymbol += check_library(LDLIBRARY, dynamic=True) - - # Check extension modules like _ssl.cpython-310d-x86_64-linux-gnu.so - nsymbol += check_extensions() - - if nsymbol: - print() - print(f"ERROR: Found {nsymbol} smelly symbols in total!") - sys.exit(1) - - print() - print(f"OK: all exported symbols of all libraries " - f"are prefixed with {' or '.join(map(repr, ALLOWED_PREFIXES))}") - - -if __name__ == "__main__": - main() diff --git a/Tools/scripts/stable_abi.py b/Tools/scripts/stable_abi.py deleted file mode 100755 index d557e10..0000000 --- a/Tools/scripts/stable_abi.py +++ /dev/null @@ -1,754 +0,0 @@ -"""Check the stable ABI manifest or generate files from it - -By default, the tool only checks existing files/libraries. -Pass --generate to recreate auto-generated files instead. - -For actions that take a FILENAME, the filename can be left out to use a default -(relative to the manifest file, as they appear in the CPython codebase). -""" - -from functools import partial -from pathlib import Path -import dataclasses -import subprocess -import sysconfig -import argparse -import textwrap -import tomllib -import difflib -import pprint -import sys -import os -import os.path -import io -import re -import csv - -MISSING = object() - -EXCLUDED_HEADERS = { - "bytes_methods.h", - "cellobject.h", - "classobject.h", - "code.h", - "compile.h", - "datetime.h", - "dtoa.h", - "frameobject.h", - "genobject.h", - "longintrepr.h", - "parsetok.h", - "pyatomic.h", - "pytime.h", - "token.h", - "ucnhash.h", -} -MACOS = (sys.platform == "darwin") -UNIXY = MACOS or (sys.platform == "linux") # XXX should this be "not Windows"? - - -# The stable ABI manifest (Misc/stable_abi.toml) exists only to fill the -# following dataclasses. -# Feel free to change its syntax (and the `parse_manifest` function) -# to better serve that purpose (while keeping it human-readable). - -class Manifest: - """Collection of `ABIItem`s forming the stable ABI/limited API.""" - def __init__(self): - self.contents = dict() - - def add(self, item): - if item.name in self.contents: - # We assume that stable ABI items do not share names, - # even if they're different kinds (e.g. function vs. macro). - raise ValueError(f'duplicate ABI item {item.name}') - self.contents[item.name] = item - - def select(self, kinds, *, include_abi_only=True, ifdef=None): - """Yield selected items of the manifest - - kinds: set of requested kinds, e.g. {'function', 'macro'} - include_abi_only: if True (default), include all items of the - stable ABI. - If False, include only items from the limited API - (i.e. items people should use today) - ifdef: set of feature macros (e.g. {'HAVE_FORK', 'MS_WINDOWS'}). - If None (default), items are not filtered by this. (This is - different from the empty set, which filters out all such - conditional items.) - """ - for name, item in sorted(self.contents.items()): - if item.kind not in kinds: - continue - if item.abi_only and not include_abi_only: - continue - if (ifdef is not None - and item.ifdef is not None - and item.ifdef not in ifdef): - continue - yield item - - def dump(self): - """Yield lines to recreate the manifest file (sans comments/newlines)""" - for item in self.contents.values(): - fields = dataclasses.fields(item) - yield f"[{item.kind}.{item.name}]" - for field in fields: - if field.name in {'name', 'value', 'kind'}: - continue - value = getattr(item, field.name) - if value == field.default: - pass - elif value is True: - yield f" {field.name} = true" - elif value: - yield f" {field.name} = {value!r}" - - -itemclasses = {} -def itemclass(kind): - """Register the decorated class in `itemclasses`""" - def decorator(cls): - itemclasses[kind] = cls - return cls - return decorator - -@itemclass('function') -@itemclass('macro') -@itemclass('data') -@itemclass('const') -@itemclass('typedef') -@dataclasses.dataclass -class ABIItem: - """Information on one item (function, macro, struct, etc.)""" - - name: str - kind: str - added: str = None - abi_only: bool = False - ifdef: str = None - -@itemclass('feature_macro') -@dataclasses.dataclass(kw_only=True) -class FeatureMacro(ABIItem): - name: str - doc: str - windows: bool = False - abi_only: bool = True - -@itemclass('struct') -@dataclasses.dataclass(kw_only=True) -class Struct(ABIItem): - struct_abi_kind: str - members: list = None - - -def parse_manifest(file): - """Parse the given file (iterable of lines) to a Manifest""" - - manifest = Manifest() - - data = tomllib.load(file) - - for kind, itemclass in itemclasses.items(): - for name, item_data in data[kind].items(): - try: - item = itemclass(name=name, kind=kind, **item_data) - manifest.add(item) - except BaseException as exc: - exc.add_note(f'in {kind} {name}') - raise - - return manifest - -# The tool can run individual "actions". -# Most actions are "generators", which generate a single file from the -# manifest. (Checking works by generating a temp file & comparing.) -# Other actions, like "--unixy-check", don't work on a single file. - -generators = [] -def generator(var_name, default_path): - """Decorates a file generator: function that writes to a file""" - def _decorator(func): - func.var_name = var_name - func.arg_name = '--' + var_name.replace('_', '-') - func.default_path = default_path - generators.append(func) - return func - return _decorator - - -@generator("python3dll", 'PC/python3dll.c') -def gen_python3dll(manifest, args, outfile): - """Generate/check the source for the Windows stable ABI library""" - write = partial(print, file=outfile) - write(textwrap.dedent(r""" - /* Re-export stable Python ABI */ - - /* Generated by Tools/scripts/stable_abi.py */ - - #ifdef _M_IX86 - #define DECORATE "_" - #else - #define DECORATE - #endif - - #define EXPORT_FUNC(name) \ - __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name)) - #define EXPORT_DATA(name) \ - __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name ",DATA")) - """)) - - def sort_key(item): - return item.name.lower() - - windows_feature_macros = { - item.name for item in manifest.select({'feature_macro'}) if item.windows - } - for item in sorted( - manifest.select( - {'function'}, - include_abi_only=True, - ifdef=windows_feature_macros), - key=sort_key): - write(f'EXPORT_FUNC({item.name})') - - write() - - for item in sorted( - manifest.select( - {'data'}, - include_abi_only=True, - ifdef=windows_feature_macros), - key=sort_key): - write(f'EXPORT_DATA({item.name})') - -REST_ROLES = { - 'function': 'function', - 'data': 'var', - 'struct': 'type', - 'macro': 'macro', - # 'const': 'const', # all undocumented - 'typedef': 'type', -} - -@generator("doc_list", 'Doc/data/stable_abi.dat') -def gen_doc_annotations(manifest, args, outfile): - """Generate/check the stable ABI list for documentation annotations""" - writer = csv.DictWriter( - outfile, - ['role', 'name', 'added', 'ifdef_note', 'struct_abi_kind'], - lineterminator='\n') - writer.writeheader() - for item in manifest.select(REST_ROLES.keys(), include_abi_only=False): - if item.ifdef: - ifdef_note = manifest.contents[item.ifdef].doc - else: - ifdef_note = None - row = { - 'role': REST_ROLES[item.kind], - 'name': item.name, - 'added': item.added, - 'ifdef_note': ifdef_note} - rows = [row] - if item.kind == 'struct': - row['struct_abi_kind'] = item.struct_abi_kind - for member_name in item.members or (): - rows.append({ - 'role': 'member', - 'name': f'{item.name}.{member_name}', - 'added': item.added}) - writer.writerows(rows) - -@generator("ctypes_test", 'Lib/test/test_stable_abi_ctypes.py') -def gen_ctypes_test(manifest, args, outfile): - """Generate/check the ctypes-based test for exported symbols""" - write = partial(print, file=outfile) - write(textwrap.dedent(''' - # Generated by Tools/scripts/stable_abi.py - - """Test that all symbols of the Stable ABI are accessible using ctypes - """ - - import sys - import unittest - from test.support.import_helper import import_module - from _testcapi import get_feature_macros - - feature_macros = get_feature_macros() - ctypes_test = import_module('ctypes') - - class TestStableABIAvailability(unittest.TestCase): - def test_available_symbols(self): - - for symbol_name in SYMBOL_NAMES: - with self.subTest(symbol_name): - ctypes_test.pythonapi[symbol_name] - - def test_feature_macros(self): - self.assertEqual( - set(get_feature_macros()), EXPECTED_FEATURE_MACROS) - - # The feature macros for Windows are used in creating the DLL - # definition, so they must be known on all platforms. - # If we are on Windows, we check that the hardcoded data matches - # the reality. - @unittest.skipIf(sys.platform != "win32", "Windows specific test") - def test_windows_feature_macros(self): - for name, value in WINDOWS_FEATURE_MACROS.items(): - if value != 'maybe': - with self.subTest(name): - self.assertEqual(feature_macros[name], value) - - SYMBOL_NAMES = ( - ''')) - items = manifest.select( - {'function', 'data'}, - include_abi_only=True, - ) - optional_items = {} - for item in items: - if item.name in ( - # Some symbols aren't exported on all platforms. - # This is a bug: https://bugs.python.org/issue44133 - 'PyModule_Create2', 'PyModule_FromDefAndSpec2', - ): - continue - if item.ifdef: - optional_items.setdefault(item.ifdef, []).append(item.name) - else: - write(f' "{item.name}",') - write(")") - for ifdef, names in optional_items.items(): - write(f"if feature_macros[{ifdef!r}]:") - write(f" SYMBOL_NAMES += (") - for name in names: - write(f" {name!r},") - write(" )") - write("") - feature_macros = list(manifest.select({'feature_macro'})) - feature_names = sorted(m.name for m in feature_macros) - write(f"EXPECTED_FEATURE_MACROS = set({pprint.pformat(feature_names)})") - - windows_feature_macros = {m.name: m.windows for m in feature_macros} - write(f"WINDOWS_FEATURE_MACROS = {pprint.pformat(windows_feature_macros)}") - - -@generator("testcapi_feature_macros", 'Modules/_testcapi_feature_macros.inc') -def gen_testcapi_feature_macros(manifest, args, outfile): - """Generate/check the stable ABI list for documentation annotations""" - write = partial(print, file=outfile) - write('// Generated by Tools/scripts/stable_abi.py') - write() - write('// Add an entry in dict `result` for each Stable ABI feature macro.') - write() - for macro in manifest.select({'feature_macro'}): - name = macro.name - write(f'#ifdef {name}') - write(f' res = PyDict_SetItemString(result, "{name}", Py_True);') - write('#else') - write(f' res = PyDict_SetItemString(result, "{name}", Py_False);') - write('#endif') - write('if (res) {') - write(' Py_DECREF(result); return NULL;') - write('}') - write() - - -def generate_or_check(manifest, args, path, func): - """Generate/check a file with a single generator - - Return True if successful; False if a comparison failed. - """ - - outfile = io.StringIO() - func(manifest, args, outfile) - generated = outfile.getvalue() - existing = path.read_text() - - if generated != existing: - if args.generate: - path.write_text(generated) - else: - print(f'File {path} differs from expected!') - diff = difflib.unified_diff( - generated.splitlines(), existing.splitlines(), - str(path), '', - lineterm='', - ) - for line in diff: - print(line) - return False - return True - - -def do_unixy_check(manifest, args): - """Check headers & library using "Unixy" tools (GCC/clang, binutils)""" - okay = True - - # Get all macros first: we'll need feature macros like HAVE_FORK and - # MS_WINDOWS for everything else - present_macros = gcc_get_limited_api_macros(['Include/Python.h']) - feature_macros = set(m.name for m in manifest.select({'feature_macro'})) - feature_macros &= present_macros - - # Check that we have all needed macros - expected_macros = set( - item.name for item in manifest.select({'macro'}) - ) - missing_macros = expected_macros - present_macros - okay &= _report_unexpected_items( - missing_macros, - 'Some macros from are not defined from "Include/Python.h"' - + 'with Py_LIMITED_API:') - - expected_symbols = set(item.name for item in manifest.select( - {'function', 'data'}, include_abi_only=True, ifdef=feature_macros, - )) - - # Check the static library (*.a) - LIBRARY = sysconfig.get_config_var("LIBRARY") - if not LIBRARY: - raise Exception("failed to get LIBRARY variable from sysconfig") - if os.path.exists(LIBRARY): - okay &= binutils_check_library( - manifest, LIBRARY, expected_symbols, dynamic=False) - - # Check the dynamic library (*.so) - LDLIBRARY = sysconfig.get_config_var("LDLIBRARY") - if not LDLIBRARY: - raise Exception("failed to get LDLIBRARY variable from sysconfig") - okay &= binutils_check_library( - manifest, LDLIBRARY, expected_symbols, dynamic=False) - - # Check definitions in the header files - expected_defs = set(item.name for item in manifest.select( - {'function', 'data'}, include_abi_only=False, ifdef=feature_macros, - )) - found_defs = gcc_get_limited_api_definitions(['Include/Python.h']) - missing_defs = expected_defs - found_defs - okay &= _report_unexpected_items( - missing_defs, - 'Some expected declarations were not declared in ' - + '"Include/Python.h" with Py_LIMITED_API:') - - # Some Limited API macros are defined in terms of private symbols. - # These are not part of Limited API (even though they're defined with - # Py_LIMITED_API). They must be part of the Stable ABI, though. - private_symbols = {n for n in expected_symbols if n.startswith('_')} - extra_defs = found_defs - expected_defs - private_symbols - okay &= _report_unexpected_items( - extra_defs, - 'Some extra declarations were found in "Include/Python.h" ' - + 'with Py_LIMITED_API:') - - return okay - - -def _report_unexpected_items(items, msg): - """If there are any `items`, report them using "msg" and return false""" - if items: - print(msg, file=sys.stderr) - for item in sorted(items): - print(' -', item, file=sys.stderr) - return False - return True - - -def binutils_get_exported_symbols(library, dynamic=False): - """Retrieve exported symbols using the nm(1) tool from binutils""" - # Only look at dynamic symbols - args = ["nm", "--no-sort"] - if dynamic: - args.append("--dynamic") - args.append(library) - proc = subprocess.run(args, stdout=subprocess.PIPE, universal_newlines=True) - if proc.returncode: - sys.stdout.write(proc.stdout) - sys.exit(proc.returncode) - - stdout = proc.stdout.rstrip() - if not stdout: - raise Exception("command output is empty") - - for line in stdout.splitlines(): - # Split line '0000000000001b80 D PyTextIOWrapper_Type' - if not line: - continue - - parts = line.split(maxsplit=2) - if len(parts) < 3: - continue - - symbol = parts[-1] - if MACOS and symbol.startswith("_"): - yield symbol[1:] - else: - yield symbol - - -def binutils_check_library(manifest, library, expected_symbols, dynamic): - """Check that library exports all expected_symbols""" - available_symbols = set(binutils_get_exported_symbols(library, dynamic)) - missing_symbols = expected_symbols - available_symbols - if missing_symbols: - print(textwrap.dedent(f"""\ - Some symbols from the limited API are missing from {library}: - {', '.join(missing_symbols)} - - This error means that there are some missing symbols among the - ones exported in the library. - This normally means that some symbol, function implementation or - a prototype belonging to a symbol in the limited API has been - deleted or is missing. - """), file=sys.stderr) - return False - return True - - -def gcc_get_limited_api_macros(headers): - """Get all limited API macros from headers. - - Runs the preprocessor over all the header files in "Include" setting - "-DPy_LIMITED_API" to the correct value for the running version of the - interpreter and extracting all macro definitions (via adding -dM to the - compiler arguments). - - Requires Python built with a GCC-compatible compiler. (clang might work) - """ - - api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16 - - preprocesor_output_with_macros = subprocess.check_output( - sysconfig.get_config_var("CC").split() - + [ - # Prevent the expansion of the exported macros so we can - # capture them later - "-DSIZEOF_WCHAR_T=4", # The actual value is not important - f"-DPy_LIMITED_API={api_hexversion}", - "-I.", - "-I./Include", - "-dM", - "-E", - ] - + [str(file) for file in headers], - text=True, - ) - - return { - target - for target in re.findall( - r"#define (\w+)", preprocesor_output_with_macros - ) - } - - -def gcc_get_limited_api_definitions(headers): - """Get all limited API definitions from headers. - - Run the preprocessor over all the header files in "Include" setting - "-DPy_LIMITED_API" to the correct value for the running version of the - interpreter. - - The limited API symbols will be extracted from the output of this command - as it includes the prototypes and definitions of all the exported symbols - that are in the limited api. - - This function does *NOT* extract the macros defined on the limited API - - Requires Python built with a GCC-compatible compiler. (clang might work) - """ - api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16 - preprocesor_output = subprocess.check_output( - sysconfig.get_config_var("CC").split() - + [ - # Prevent the expansion of the exported macros so we can capture - # them later - "-DPyAPI_FUNC=__PyAPI_FUNC", - "-DPyAPI_DATA=__PyAPI_DATA", - "-DEXPORT_DATA=__EXPORT_DATA", - "-D_Py_NO_RETURN=", - "-DSIZEOF_WCHAR_T=4", # The actual value is not important - f"-DPy_LIMITED_API={api_hexversion}", - "-I.", - "-I./Include", - "-E", - ] - + [str(file) for file in headers], - text=True, - stderr=subprocess.DEVNULL, - ) - stable_functions = set( - re.findall(r"__PyAPI_FUNC\(.*?\)\s*(.*?)\s*\(", preprocesor_output) - ) - stable_exported_data = set( - re.findall(r"__EXPORT_DATA\((.*?)\)", preprocesor_output) - ) - stable_data = set( - re.findall(r"__PyAPI_DATA\(.*?\)[\s\*\(]*([^);]*)\)?.*;", preprocesor_output) - ) - return stable_data | stable_exported_data | stable_functions - -def check_private_names(manifest): - """Ensure limited API doesn't contain private names - - Names prefixed by an underscore are private by definition. - """ - for name, item in manifest.contents.items(): - if name.startswith('_') and not item.abi_only: - raise ValueError( - f'`{name}` is private (underscore-prefixed) and should be ' - + 'removed from the stable ABI list or or marked `abi_only`') - -def check_dump(manifest, filename): - """Check that manifest.dump() corresponds to the data. - - Mainly useful when debugging this script. - """ - dumped = tomllib.loads('\n'.join(manifest.dump())) - with filename.open('rb') as file: - from_file = tomllib.load(file) - if dumped != from_file: - print(f'Dump differs from loaded data!', file=sys.stderr) - diff = difflib.unified_diff( - pprint.pformat(dumped).splitlines(), - pprint.pformat(from_file).splitlines(), - '', str(filename), - lineterm='', - ) - for line in diff: - print(line, file=sys.stderr) - return False - else: - return True - -def main(): - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - parser.add_argument( - "file", type=Path, metavar='FILE', - help="file with the stable abi manifest", - ) - parser.add_argument( - "--generate", action='store_true', - help="generate file(s), rather than just checking them", - ) - parser.add_argument( - "--generate-all", action='store_true', - help="as --generate, but generate all file(s) using default filenames." - + " (unlike --all, does not run any extra checks)", - ) - parser.add_argument( - "-a", "--all", action='store_true', - help="run all available checks using default filenames", - ) - parser.add_argument( - "-l", "--list", action='store_true', - help="list available generators and their default filenames; then exit", - ) - parser.add_argument( - "--dump", action='store_true', - help="dump the manifest contents (used for debugging the parser)", - ) - - actions_group = parser.add_argument_group('actions') - for gen in generators: - actions_group.add_argument( - gen.arg_name, dest=gen.var_name, - type=str, nargs="?", default=MISSING, - metavar='FILENAME', - help=gen.__doc__, - ) - actions_group.add_argument( - '--unixy-check', action='store_true', - help=do_unixy_check.__doc__, - ) - args = parser.parse_args() - - base_path = args.file.parent.parent - - if args.list: - for gen in generators: - print(f'{gen.arg_name}: {base_path / gen.default_path}') - sys.exit(0) - - run_all_generators = args.generate_all - - if args.generate_all: - args.generate = True - - if args.all: - run_all_generators = True - args.unixy_check = True - - try: - file = args.file.open('rb') - except FileNotFoundError as err: - if args.file.suffix == '.txt': - # Provide a better error message - suggestion = args.file.with_suffix('.toml') - raise FileNotFoundError( - f'{args.file} not found. Did you mean {suggestion} ?') from err - raise - with file: - manifest = parse_manifest(file) - - check_private_names(manifest) - - # Remember results of all actions (as booleans). - # At the end we'll check that at least one action was run, - # and also fail if any are false. - results = {} - - if args.dump: - for line in manifest.dump(): - print(line) - results['dump'] = check_dump(manifest, args.file) - - for gen in generators: - filename = getattr(args, gen.var_name) - if filename is None or (run_all_generators and filename is MISSING): - filename = base_path / gen.default_path - elif filename is MISSING: - continue - - results[gen.var_name] = generate_or_check(manifest, args, filename, gen) - - if args.unixy_check: - results['unixy_check'] = do_unixy_check(manifest, args) - - if not results: - if args.generate: - parser.error('No file specified. Use --help for usage.') - parser.error('No check specified. Use --help for usage.') - - failed_results = [name for name, result in results.items() if not result] - - if failed_results: - raise Exception(f""" - These checks related to the stable ABI did not succeed: - {', '.join(failed_results)} - - If you see diffs in the output, files derived from the stable - ABI manifest the were not regenerated. - Run `make regen-limited-abi` to fix this. - - Otherwise, see the error(s) above. - - The stable ABI manifest is at: {args.file} - Note that there is a process to follow when modifying it. - - You can read more about the limited API and its contracts at: - - https://docs.python.org/3/c-api/stable.html - - And in PEP 384: - - https://peps.python.org/pep-0384/ - """) - - -if __name__ == "__main__": - main() diff --git a/Tools/scripts/umarshal.py b/Tools/scripts/umarshal.py deleted file mode 100644 index f61570c..0000000 --- a/Tools/scripts/umarshal.py +++ /dev/null @@ -1,325 +0,0 @@ -# Implementat marshal.loads() in pure Python - -import ast - -from typing import Any, Tuple - - -class Type: - # Adapted from marshal.c - NULL = ord('0') - NONE = ord('N') - FALSE = ord('F') - TRUE = ord('T') - STOPITER = ord('S') - ELLIPSIS = ord('.') - INT = ord('i') - INT64 = ord('I') - FLOAT = ord('f') - BINARY_FLOAT = ord('g') - COMPLEX = ord('x') - BINARY_COMPLEX = ord('y') - LONG = ord('l') - STRING = ord('s') - INTERNED = ord('t') - REF = ord('r') - TUPLE = ord('(') - LIST = ord('[') - DICT = ord('{') - CODE = ord('c') - UNICODE = ord('u') - UNKNOWN = ord('?') - SET = ord('<') - FROZENSET = ord('>') - ASCII = ord('a') - ASCII_INTERNED = ord('A') - SMALL_TUPLE = ord(')') - SHORT_ASCII = ord('z') - SHORT_ASCII_INTERNED = ord('Z') - - -FLAG_REF = 0x80 # with a type, add obj to index - -NULL = object() # marker - -# Cell kinds -CO_FAST_LOCAL = 0x20 -CO_FAST_CELL = 0x40 -CO_FAST_FREE = 0x80 - - -class Code: - def __init__(self, **kwds: Any): - self.__dict__.update(kwds) - - def __repr__(self) -> str: - return f"Code(**{self.__dict__})" - - co_localsplusnames: Tuple[str] - co_localspluskinds: Tuple[int] - - def get_localsplus_names(self, select_kind: int) -> Tuple[str, ...]: - varnames: list[str] = [] - for name, kind in zip(self.co_localsplusnames, - self.co_localspluskinds): - if kind & select_kind: - varnames.append(name) - return tuple(varnames) - - @property - def co_varnames(self) -> Tuple[str, ...]: - return self.get_localsplus_names(CO_FAST_LOCAL) - - @property - def co_cellvars(self) -> Tuple[str, ...]: - return self.get_localsplus_names(CO_FAST_CELL) - - @property - def co_freevars(self) -> Tuple[str, ...]: - return self.get_localsplus_names(CO_FAST_FREE) - - @property - def co_nlocals(self) -> int: - return len(self.co_varnames) - - -class Reader: - # A fairly literal translation of the marshal reader. - - def __init__(self, data: bytes): - self.data: bytes = data - self.end: int = len(self.data) - self.pos: int = 0 - self.refs: list[Any] = [] - self.level: int = 0 - - def r_string(self, n: int) -> bytes: - assert 0 <= n <= self.end - self.pos - buf = self.data[self.pos : self.pos + n] - self.pos += n - return buf - - def r_byte(self) -> int: - buf = self.r_string(1) - return buf[0] - - def r_short(self) -> int: - buf = self.r_string(2) - x = buf[0] - x |= buf[1] << 8 - x |= -(x & (1<<15)) # Sign-extend - return x - - def r_long(self) -> int: - buf = self.r_string(4) - x = buf[0] - x |= buf[1] << 8 - x |= buf[2] << 16 - x |= buf[3] << 24 - x |= -(x & (1<<31)) # Sign-extend - return x - - def r_long64(self) -> int: - buf = self.r_string(8) - x = buf[0] - x |= buf[1] << 8 - x |= buf[2] << 16 - x |= buf[3] << 24 - x |= buf[1] << 32 - x |= buf[1] << 40 - x |= buf[1] << 48 - x |= buf[1] << 56 - x |= -(x & (1<<63)) # Sign-extend - return x - - def r_PyLong(self) -> int: - n = self.r_long() - size = abs(n) - x = 0 - # Pray this is right - for i in range(size): - x |= self.r_short() << i*15 - if n < 0: - x = -x - return x - - def r_float_bin(self) -> float: - buf = self.r_string(8) - import struct # Lazy import to avoid breaking UNIX build - return struct.unpack("d", buf)[0] - - def r_float_str(self) -> float: - n = self.r_byte() - buf = self.r_string(n) - return ast.literal_eval(buf.decode("ascii")) - - def r_ref_reserve(self, flag: int) -> int: - if flag: - idx = len(self.refs) - self.refs.append(None) - return idx - else: - return 0 - - def r_ref_insert(self, obj: Any, idx: int, flag: int) -> Any: - if flag: - self.refs[idx] = obj - return obj - - def r_ref(self, obj: Any, flag: int) -> Any: - assert flag & FLAG_REF - self.refs.append(obj) - return obj - - def r_object(self) -> Any: - old_level = self.level - try: - return self._r_object() - finally: - self.level = old_level - - def _r_object(self) -> Any: - code = self.r_byte() - flag = code & FLAG_REF - type = code & ~FLAG_REF - # print(" "*self.level + f"{code} {flag} {type} {chr(type)!r}") - self.level += 1 - - def R_REF(obj: Any) -> Any: - if flag: - obj = self.r_ref(obj, flag) - return obj - - if type == Type.NULL: - return NULL - elif type == Type.NONE: - return None - elif type == Type.ELLIPSIS: - return Ellipsis - elif type == Type.FALSE: - return False - elif type == Type.TRUE: - return True - elif type == Type.INT: - return R_REF(self.r_long()) - elif type == Type.INT64: - return R_REF(self.r_long64()) - elif type == Type.LONG: - return R_REF(self.r_PyLong()) - elif type == Type.FLOAT: - return R_REF(self.r_float_str()) - elif type == Type.BINARY_FLOAT: - return R_REF(self.r_float_bin()) - elif type == Type.COMPLEX: - return R_REF(complex(self.r_float_str(), - self.r_float_str())) - elif type == Type.BINARY_COMPLEX: - return R_REF(complex(self.r_float_bin(), - self.r_float_bin())) - elif type == Type.STRING: - n = self.r_long() - return R_REF(self.r_string(n)) - elif type == Type.ASCII_INTERNED or type == Type.ASCII: - n = self.r_long() - return R_REF(self.r_string(n).decode("ascii")) - elif type == Type.SHORT_ASCII_INTERNED or type == Type.SHORT_ASCII: - n = self.r_byte() - return R_REF(self.r_string(n).decode("ascii")) - elif type == Type.INTERNED or type == Type.UNICODE: - n = self.r_long() - return R_REF(self.r_string(n).decode("utf8", "surrogatepass")) - elif type == Type.SMALL_TUPLE: - n = self.r_byte() - idx = self.r_ref_reserve(flag) - retval: Any = tuple(self.r_object() for _ in range(n)) - self.r_ref_insert(retval, idx, flag) - return retval - elif type == Type.TUPLE: - n = self.r_long() - idx = self.r_ref_reserve(flag) - retval = tuple(self.r_object() for _ in range(n)) - self.r_ref_insert(retval, idx, flag) - return retval - elif type == Type.LIST: - n = self.r_long() - retval = R_REF([]) - for _ in range(n): - retval.append(self.r_object()) - return retval - elif type == Type.DICT: - retval = R_REF({}) - while True: - key = self.r_object() - if key == NULL: - break - val = self.r_object() - retval[key] = val - return retval - elif type == Type.SET: - n = self.r_long() - retval = R_REF(set()) - for _ in range(n): - v = self.r_object() - retval.add(v) - return retval - elif type == Type.FROZENSET: - n = self.r_long() - s: set[Any] = set() - idx = self.r_ref_reserve(flag) - for _ in range(n): - v = self.r_object() - s.add(v) - retval = frozenset(s) - self.r_ref_insert(retval, idx, flag) - return retval - elif type == Type.CODE: - retval = R_REF(Code()) - retval.co_argcount = self.r_long() - retval.co_posonlyargcount = self.r_long() - retval.co_kwonlyargcount = self.r_long() - retval.co_stacksize = self.r_long() - retval.co_flags = self.r_long() - retval.co_code = self.r_object() - retval.co_consts = self.r_object() - retval.co_names = self.r_object() - retval.co_localsplusnames = self.r_object() - retval.co_localspluskinds = self.r_object() - retval.co_filename = self.r_object() - retval.co_name = self.r_object() - retval.co_qualname = self.r_object() - retval.co_firstlineno = self.r_long() - retval.co_linetable = self.r_object() - retval.co_exceptiontable = self.r_object() - return retval - elif type == Type.REF: - n = self.r_long() - retval = self.refs[n] - assert retval is not None - return retval - else: - breakpoint() - raise AssertionError(f"Unknown type {type} {chr(type)!r}") - - -def loads(data: bytes) -> Any: - assert isinstance(data, bytes) - r = Reader(data) - return r.r_object() - - -def main(): - # Test - import marshal, pprint - sample = {'foo': {(42, "bar", 3.14)}} - data = marshal.dumps(sample) - retval = loads(data) - assert retval == sample, retval - sample = main.__code__ - data = marshal.dumps(sample) - retval = loads(data) - assert isinstance(retval, Code), retval - pprint.pprint(retval.__dict__) - - -if __name__ == "__main__": - main() diff --git a/Tools/scripts/update_file.py b/Tools/scripts/update_file.py deleted file mode 100644 index b4182c1..0000000 --- a/Tools/scripts/update_file.py +++ /dev/null @@ -1,92 +0,0 @@ -""" -A script that replaces an old file with a new one, only if the contents -actually changed. If not, the new file is simply deleted. - -This avoids wholesale rebuilds when a code (re)generation phase does not -actually change the in-tree generated code. -""" - -import contextlib -import os -import os.path -import sys - - -@contextlib.contextmanager -def updating_file_with_tmpfile(filename, tmpfile=None): - """A context manager for updating a file via a temp file. - - The context manager provides two open files: the source file open - for reading, and the temp file, open for writing. - - Upon exiting: both files are closed, and the source file is replaced - with the temp file. - """ - # XXX Optionally use tempfile.TemporaryFile? - if not tmpfile: - tmpfile = filename + '.tmp' - elif os.path.isdir(tmpfile): - tmpfile = os.path.join(tmpfile, filename + '.tmp') - - with open(filename, 'rb') as infile: - line = infile.readline() - - if line.endswith(b'\r\n'): - newline = "\r\n" - elif line.endswith(b'\r'): - newline = "\r" - elif line.endswith(b'\n'): - newline = "\n" - else: - raise ValueError(f"unknown end of line: {filename}: {line!a}") - - with open(tmpfile, 'w', newline=newline) as outfile: - with open(filename) as infile: - yield infile, outfile - update_file_with_tmpfile(filename, tmpfile) - - -def update_file_with_tmpfile(filename, tmpfile, *, create=False): - try: - targetfile = open(filename, 'rb') - except FileNotFoundError: - if not create: - raise # re-raise - outcome = 'created' - os.replace(tmpfile, filename) - else: - with targetfile: - old_contents = targetfile.read() - with open(tmpfile, 'rb') as f: - new_contents = f.read() - # Now compare! - if old_contents != new_contents: - outcome = 'updated' - os.replace(tmpfile, filename) - else: - outcome = 'same' - os.unlink(tmpfile) - return outcome - - -if __name__ == '__main__': - import argparse - parser = argparse.ArgumentParser() - parser.add_argument('--create', action='store_true') - parser.add_argument('--exitcode', action='store_true') - parser.add_argument('filename', help='path to be updated') - parser.add_argument('tmpfile', help='path with new contents') - args = parser.parse_args() - kwargs = vars(args) - setexitcode = kwargs.pop('exitcode') - - outcome = update_file_with_tmpfile(**kwargs) - if setexitcode: - if outcome == 'same': - sys.exit(0) - elif outcome == 'updated': - sys.exit(1) - elif outcome == 'created': - sys.exit(2) - else: - raise NotImplementedError diff --git a/Tools/scripts/verify_ensurepip_wheels.py b/Tools/scripts/verify_ensurepip_wheels.py deleted file mode 100755 index 044d1fd..0000000 --- a/Tools/scripts/verify_ensurepip_wheels.py +++ /dev/null @@ -1,98 +0,0 @@ -#! /usr/bin/env python3 - -""" -Compare checksums for wheels in :mod:`ensurepip` against the Cheeseshop. - -When GitHub Actions executes the script, output is formatted accordingly. -https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-notice-message -""" - -import hashlib -import json -import os -import re -from pathlib import Path -from urllib.request import urlopen - -PACKAGE_NAMES = ("pip", "setuptools") -ENSURE_PIP_ROOT = Path(__file__).parent.parent.parent / "Lib/ensurepip" -WHEEL_DIR = ENSURE_PIP_ROOT / "_bundled" -ENSURE_PIP_INIT_PY_TEXT = (ENSURE_PIP_ROOT / "__init__.py").read_text(encoding="utf-8") -GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true" - - -def print_notice(file_path: str, message: str) -> None: - if GITHUB_ACTIONS: - message = f"::notice file={file_path}::{message}" - print(message, end="\n\n") - - -def print_error(file_path: str, message: str) -> None: - if GITHUB_ACTIONS: - message = f"::error file={file_path}::{message}" - print(message, end="\n\n") - - -def verify_wheel(package_name: str) -> bool: - # Find the package on disk - package_path = next(WHEEL_DIR.glob(f"{package_name}*.whl"), None) - if not package_path: - print_error("", f"Could not find a {package_name} wheel on disk.") - return False - - print(f"Verifying checksum for {package_path}.") - - # Find the version of the package used by ensurepip - package_version_match = re.search( - f'_{package_name.upper()}_VERSION = "([^"]+)', ENSURE_PIP_INIT_PY_TEXT - ) - if not package_version_match: - print_error( - package_path, - f"No {package_name} version found in Lib/ensurepip/__init__.py.", - ) - return False - package_version = package_version_match[1] - - # Get the SHA 256 digest from the Cheeseshop - try: - raw_text = urlopen(f"https://pypi.org/pypi/{package_name}/json").read() - except (OSError, ValueError): - print_error(package_path, f"Could not fetch JSON metadata for {package_name}.") - return False - - release_files = json.loads(raw_text)["releases"][package_version] - for release_info in release_files: - if package_path.name != release_info["filename"]: - continue - expected_digest = release_info["digests"].get("sha256", "") - break - else: - print_error(package_path, f"No digest for {package_name} found from PyPI.") - return False - - # Compute the SHA 256 digest of the wheel on disk - actual_digest = hashlib.sha256(package_path.read_bytes()).hexdigest() - - print(f"Expected digest: {expected_digest}") - print(f"Actual digest: {actual_digest}") - - if actual_digest != expected_digest: - print_error( - package_path, f"Failed to verify the checksum of the {package_name} wheel." - ) - return False - - print_notice( - package_path, - f"Successfully verified the checksum of the {package_name} wheel.", - ) - return True - - -if __name__ == "__main__": - exit_status = 0 - for package_name in PACKAGE_NAMES: - if not verify_wheel(package_name): - exit_status = 1 - raise SystemExit(exit_status) -- cgit v0.12