summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorBenjamin Peterson <benjamin@python.org>2017-12-09 18:26:52 (GMT)
committerGitHub <noreply@github.com>2017-12-09 18:26:52 (GMT)
commit42aa93b8ff2f7879282b06efc73a31ec7785e602 (patch)
tree92ee301e1f487a7f5aa8ec78a36ebc50d21d6ec9 /Modules
parent28d8d14013ade0657fed4673f5fa3c08eb2b1944 (diff)
downloadcpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.zip
cpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.tar.gz
cpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.tar.bz2
closes bpo-31650: PEP 552 (Deterministic pycs) implementation (#4575)
Python now supports checking bytecode cache up-to-dateness with a hash of the source contents rather than volatile source metadata. See the PEP for details. While a fairly straightforward idea, quite a lot of code had to be modified due to the pervasiveness of pyc implementation details in the codebase. Changes in this commit include: - The core changes to importlib to understand how to read, validate, and regenerate hash-based pycs. - Support for generating hash-based pycs in py_compile and compileall. - Modifications to our siphash implementation to support passing a custom key. We then expose it to importlib through _imp. - Updates to all places in the interpreter, standard library, and tests that manually generate or parse pyc files to grok the new format. - Support in the interpreter command line code for long options like --check-hash-based-pycs. - Tests and documentation for all of the above.
Diffstat (limited to 'Modules')
-rw-r--r--Modules/main.c31
-rw-r--r--Modules/zipimport.c16
2 files changed, 43 insertions, 4 deletions
diff --git a/Modules/main.c b/Modules/main.c
index 4ab1ff2..e536492 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -2,6 +2,7 @@
#include "Python.h"
#include "osdefs.h"
+#include "internal/import.h"
#include "internal/pystate.h"
#include <locale.h>
@@ -61,6 +62,11 @@ static int orig_argc;
#define PROGRAM_OPTS BASE_OPTS
+static const _PyOS_LongOption longoptions[] = {
+ {L"check-hash-based-pycs", 1, 0},
+ {NULL, 0, 0},
+};
+
/* Short usage message (with %s for argv0) */
static const char usage_line[] =
"usage: %ls [option] ... [-c cmd | -m mod | file | -] [arg] ...\n";
@@ -98,6 +104,8 @@ static const char usage_3[] = "\
also PYTHONWARNINGS=arg\n\
-x : skip first line of source, allowing use of non-Unix forms of #!cmd\n\
-X opt : set implementation-specific option\n\
+--check-hash-based-pycs always|default|never:\n\
+ control how Python invalidates hash-based .pyc files\n\
";
static const char usage_4[] = "\
file : program read from script file\n\
@@ -393,6 +401,7 @@ typedef struct {
int quiet_flag; /* Py_QuietFlag, -q */
int skip_first_line; /* -x option */
_Py_OptList xoptions; /* -X options */
+ const char *check_hash_pycs_mode; /* --check-hash-based-pycs */
#ifdef MS_WINDOWS
int legacy_windows_fs_encoding; /* Py_LegacyWindowsFSEncodingFlag,
PYTHONLEGACYWINDOWSFSENCODING */
@@ -577,7 +586,9 @@ pymain_parse_cmdline_impl(_PyMain *pymain)
_PyOS_ResetGetOpt();
do {
- int c = _PyOS_GetOpt(pymain->argc, pymain->argv, PROGRAM_OPTS);
+ int longindex = -1;
+ int c = _PyOS_GetOpt(pymain->argc, pymain->argv, PROGRAM_OPTS,
+ longoptions, &longindex);
if (c == EOF) {
break;
}
@@ -608,6 +619,22 @@ pymain_parse_cmdline_impl(_PyMain *pymain)
}
switch (c) {
+ case 0:
+ // Handle long option.
+ assert(longindex == 0); // Only one long option now.
+ if (!wcscmp(_PyOS_optarg, L"always")) {
+ cmdline->check_hash_pycs_mode = "always";
+ } else if (!wcscmp(_PyOS_optarg, L"never")) {
+ cmdline->check_hash_pycs_mode = "never";
+ } else if (!wcscmp(_PyOS_optarg, L"default")) {
+ cmdline->check_hash_pycs_mode = "default";
+ } else {
+ fprintf(stderr, "--check-hash-based-pycs must be one of "
+ "'default', 'always', or 'never'\n");
+ return 1;
+ }
+ break;
+
case 'b':
cmdline->bytes_warning++;
break;
@@ -1085,6 +1112,8 @@ pymain_set_global_config(_PyMain *pymain)
pymain_set_flag(&Py_UnbufferedStdioFlag, cmdline->use_unbuffered_io);
pymain_set_flag(&Py_VerboseFlag, cmdline->verbosity);
pymain_set_flag(&Py_QuietFlag, cmdline->quiet_flag);
+ if (cmdline->check_hash_pycs_mode)
+ _Py_CheckHashBasedPycsMode = cmdline->check_hash_pycs_mode;
#ifdef MS_WINDOWS
pymain_set_flag(&Py_LegacyWindowsFSEncodingFlag, cmdline->legacy_windows_fs_encoding);
pymain_set_flag(&Py_LegacyWindowsStdioFlag, cmdline->legacy_windows_stdio);
diff --git a/Modules/zipimport.c b/Modules/zipimport.c
index 009480b..1d0e0ba 100644
--- a/Modules/zipimport.c
+++ b/Modules/zipimport.c
@@ -1,4 +1,5 @@
#include "Python.h"
+#include "internal/import.h"
#include "internal/pystate.h"
#include "structmember.h"
#include "osdefs.h"
@@ -1305,7 +1306,7 @@ unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
unsigned char *buf = (unsigned char *)PyBytes_AsString(data);
Py_ssize_t size = PyBytes_Size(data);
- if (size < 12) {
+ if (size < 16) {
PyErr_SetString(ZipImportError,
"bad pyc data");
return NULL;
@@ -1319,7 +1320,16 @@ unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
Py_RETURN_NONE; /* signal caller to try alternative */
}
- if (mtime != 0 && !eq_mtime(get_uint32(buf + 4), mtime)) {
+ uint32_t flags = get_uint32(buf + 4);
+ if (flags != 0) {
+ // Hash-based pyc. We currently refuse to handle checked hash-based
+ // pycs. We could validate hash-based pycs against the source, but it
+ // seems likely that most people putting hash-based pycs in a zipfile
+ // will use unchecked ones.
+ if (strcmp(_Py_CheckHashBasedPycsMode, "never") &&
+ (flags != 0x1 || !strcmp(_Py_CheckHashBasedPycsMode, "always")))
+ Py_RETURN_NONE;
+ } else if ((mtime != 0 && !eq_mtime(get_uint32(buf + 8), mtime))) {
if (Py_VerboseFlag) {
PySys_FormatStderr("# %R has bad mtime\n",
pathname);
@@ -1329,7 +1339,7 @@ unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
/* XXX the pyc's size field is ignored; timestamp collisions are probably
unimportant with zip files. */
- code = PyMarshal_ReadObjectFromString((char *)buf + 12, size - 12);
+ code = PyMarshal_ReadObjectFromString((char *)buf + 16, size - 16);
if (code == NULL) {
return NULL;
}