diff options
author | Benjamin Peterson <benjamin@python.org> | 2017-12-09 18:26:52 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-12-09 18:26:52 (GMT) |
commit | 42aa93b8ff2f7879282b06efc73a31ec7785e602 (patch) | |
tree | 92ee301e1f487a7f5aa8ec78a36ebc50d21d6ec9 /Modules | |
parent | 28d8d14013ade0657fed4673f5fa3c08eb2b1944 (diff) | |
download | cpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.zip cpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.tar.gz cpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.tar.bz2 |
closes bpo-31650: PEP 552 (Deterministic pycs) implementation (#4575)
Python now supports checking bytecode cache up-to-dateness with a hash of the
source contents rather than volatile source metadata. See the PEP for details.
While a fairly straightforward idea, quite a lot of code had to be modified due
to the pervasiveness of pyc implementation details in the codebase. Changes in
this commit include:
- The core changes to importlib to understand how to read, validate, and
regenerate hash-based pycs.
- Support for generating hash-based pycs in py_compile and compileall.
- Modifications to our siphash implementation to support passing a custom
key. We then expose it to importlib through _imp.
- Updates to all places in the interpreter, standard library, and tests that
manually generate or parse pyc files to grok the new format.
- Support in the interpreter command line code for long options like
--check-hash-based-pycs.
- Tests and documentation for all of the above.
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/main.c | 31 | ||||
-rw-r--r-- | Modules/zipimport.c | 16 |
2 files changed, 43 insertions, 4 deletions
diff --git a/Modules/main.c b/Modules/main.c index 4ab1ff2..e536492 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -2,6 +2,7 @@ #include "Python.h" #include "osdefs.h" +#include "internal/import.h" #include "internal/pystate.h" #include <locale.h> @@ -61,6 +62,11 @@ static int orig_argc; #define PROGRAM_OPTS BASE_OPTS +static const _PyOS_LongOption longoptions[] = { + {L"check-hash-based-pycs", 1, 0}, + {NULL, 0, 0}, +}; + /* Short usage message (with %s for argv0) */ static const char usage_line[] = "usage: %ls [option] ... [-c cmd | -m mod | file | -] [arg] ...\n"; @@ -98,6 +104,8 @@ static const char usage_3[] = "\ also PYTHONWARNINGS=arg\n\ -x : skip first line of source, allowing use of non-Unix forms of #!cmd\n\ -X opt : set implementation-specific option\n\ +--check-hash-based-pycs always|default|never:\n\ + control how Python invalidates hash-based .pyc files\n\ "; static const char usage_4[] = "\ file : program read from script file\n\ @@ -393,6 +401,7 @@ typedef struct { int quiet_flag; /* Py_QuietFlag, -q */ int skip_first_line; /* -x option */ _Py_OptList xoptions; /* -X options */ + const char *check_hash_pycs_mode; /* --check-hash-based-pycs */ #ifdef MS_WINDOWS int legacy_windows_fs_encoding; /* Py_LegacyWindowsFSEncodingFlag, PYTHONLEGACYWINDOWSFSENCODING */ @@ -577,7 +586,9 @@ pymain_parse_cmdline_impl(_PyMain *pymain) _PyOS_ResetGetOpt(); do { - int c = _PyOS_GetOpt(pymain->argc, pymain->argv, PROGRAM_OPTS); + int longindex = -1; + int c = _PyOS_GetOpt(pymain->argc, pymain->argv, PROGRAM_OPTS, + longoptions, &longindex); if (c == EOF) { break; } @@ -608,6 +619,22 @@ pymain_parse_cmdline_impl(_PyMain *pymain) } switch (c) { + case 0: + // Handle long option. + assert(longindex == 0); // Only one long option now. + if (!wcscmp(_PyOS_optarg, L"always")) { + cmdline->check_hash_pycs_mode = "always"; + } else if (!wcscmp(_PyOS_optarg, L"never")) { + cmdline->check_hash_pycs_mode = "never"; + } else if (!wcscmp(_PyOS_optarg, L"default")) { + cmdline->check_hash_pycs_mode = "default"; + } else { + fprintf(stderr, "--check-hash-based-pycs must be one of " + "'default', 'always', or 'never'\n"); + return 1; + } + break; + case 'b': cmdline->bytes_warning++; break; @@ -1085,6 +1112,8 @@ pymain_set_global_config(_PyMain *pymain) pymain_set_flag(&Py_UnbufferedStdioFlag, cmdline->use_unbuffered_io); pymain_set_flag(&Py_VerboseFlag, cmdline->verbosity); pymain_set_flag(&Py_QuietFlag, cmdline->quiet_flag); + if (cmdline->check_hash_pycs_mode) + _Py_CheckHashBasedPycsMode = cmdline->check_hash_pycs_mode; #ifdef MS_WINDOWS pymain_set_flag(&Py_LegacyWindowsFSEncodingFlag, cmdline->legacy_windows_fs_encoding); pymain_set_flag(&Py_LegacyWindowsStdioFlag, cmdline->legacy_windows_stdio); diff --git a/Modules/zipimport.c b/Modules/zipimport.c index 009480b..1d0e0ba 100644 --- a/Modules/zipimport.c +++ b/Modules/zipimport.c @@ -1,4 +1,5 @@ #include "Python.h" +#include "internal/import.h" #include "internal/pystate.h" #include "structmember.h" #include "osdefs.h" @@ -1305,7 +1306,7 @@ unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime) unsigned char *buf = (unsigned char *)PyBytes_AsString(data); Py_ssize_t size = PyBytes_Size(data); - if (size < 12) { + if (size < 16) { PyErr_SetString(ZipImportError, "bad pyc data"); return NULL; @@ -1319,7 +1320,16 @@ unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime) Py_RETURN_NONE; /* signal caller to try alternative */ } - if (mtime != 0 && !eq_mtime(get_uint32(buf + 4), mtime)) { + uint32_t flags = get_uint32(buf + 4); + if (flags != 0) { + // Hash-based pyc. We currently refuse to handle checked hash-based + // pycs. We could validate hash-based pycs against the source, but it + // seems likely that most people putting hash-based pycs in a zipfile + // will use unchecked ones. + if (strcmp(_Py_CheckHashBasedPycsMode, "never") && + (flags != 0x1 || !strcmp(_Py_CheckHashBasedPycsMode, "always"))) + Py_RETURN_NONE; + } else if ((mtime != 0 && !eq_mtime(get_uint32(buf + 8), mtime))) { if (Py_VerboseFlag) { PySys_FormatStderr("# %R has bad mtime\n", pathname); @@ -1329,7 +1339,7 @@ unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime) /* XXX the pyc's size field is ignored; timestamp collisions are probably unimportant with zip files. */ - code = PyMarshal_ReadObjectFromString((char *)buf + 12, size - 12); + code = PyMarshal_ReadObjectFromString((char *)buf + 16, size - 16); if (code == NULL) { return NULL; } |