summaryrefslogtreecommitdiffstats
path: root/Python/import.c
diff options
context:
space:
mode:
authorBenjamin Peterson <benjamin@python.org>2017-12-09 18:26:52 (GMT)
committerGitHub <noreply@github.com>2017-12-09 18:26:52 (GMT)
commit42aa93b8ff2f7879282b06efc73a31ec7785e602 (patch)
tree92ee301e1f487a7f5aa8ec78a36ebc50d21d6ec9 /Python/import.c
parent28d8d14013ade0657fed4673f5fa3c08eb2b1944 (diff)
downloadcpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.zip
cpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.tar.gz
cpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.tar.bz2
closes bpo-31650: PEP 552 (Deterministic pycs) implementation (#4575)
Python now supports checking bytecode cache up-to-dateness with a hash of the source contents rather than volatile source metadata. See the PEP for details. While a fairly straightforward idea, quite a lot of code had to be modified due to the pervasiveness of pyc implementation details in the codebase. Changes in this commit include: - The core changes to importlib to understand how to read, validate, and regenerate hash-based pycs. - Support for generating hash-based pycs in py_compile and compileall. - Modifications to our siphash implementation to support passing a custom key. We then expose it to importlib through _imp. - Updates to all places in the interpreter, standard library, and tests that manually generate or parse pyc files to grok the new format. - Support in the interpreter command line code for long options like --check-hash-based-pycs. - Tests and documentation for all of the above.
Diffstat (limited to 'Python/import.c')
-rw-r--r--Python/import.c42
1 files changed, 42 insertions, 0 deletions
diff --git a/Python/import.c b/Python/import.c
index dc79685..b2d7511 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -5,6 +5,8 @@
#include "Python-ast.h"
#undef Yield /* undefine macro conflicting with winbase.h */
+#include "internal/hash.h"
+#include "internal/import.h"
#include "internal/pystate.h"
#include "errcode.h"
#include "marshal.h"
@@ -2184,6 +2186,34 @@ _imp_exec_builtin_impl(PyObject *module, PyObject *mod)
return exec_builtin_or_dynamic(mod);
}
+/*[clinic input]
+_imp.source_hash
+
+ key: long
+ source: Py_buffer
+[clinic start generated code]*/
+
+static PyObject *
+_imp_source_hash_impl(PyObject *module, long key, Py_buffer *source)
+/*[clinic end generated code: output=edb292448cf399ea input=9aaad1e590089789]*/
+{
+ uint64_t hash = _Py_KeyedHash((uint64_t)key, source->buf, source->len);
+#if !PY_LITTLE_ENDIAN
+ // Force to little-endian. There really ought to be a succinct standard way
+ // to do this.
+ union {
+ uint64_t x;
+ unsigned char data[sizeof(uint64_t)];
+ } pun;
+ pun.x = hash;
+ for (size_t i = 0; i < sizeof(pun.data); i++) {
+ pun.data[sizeof(pun.data) - i - 1] = pun.data[i];
+ }
+ hash = pun.x;
+#endif
+ return PyBytes_FromStringAndSize((const char *)&hash, sizeof(hash));
+}
+
PyDoc_STRVAR(doc_imp,
"(Extremely) low-level import machinery bits as used by importlib and imp.");
@@ -2203,6 +2233,7 @@ static PyMethodDef imp_methods[] = {
_IMP_EXEC_DYNAMIC_METHODDEF
_IMP_EXEC_BUILTIN_METHODDEF
_IMP__FIX_CO_FILENAME_METHODDEF
+ _IMP_SOURCE_HASH_METHODDEF
{NULL, NULL} /* sentinel */
};
@@ -2219,6 +2250,8 @@ static struct PyModuleDef impmodule = {
NULL
};
+const char *_Py_CheckHashBasedPycsMode = "default";
+
PyMODINIT_FUNC
PyInit_imp(void)
{
@@ -2230,6 +2263,15 @@ PyInit_imp(void)
d = PyModule_GetDict(m);
if (d == NULL)
goto failure;
+ PyObject *pyc_mode = PyUnicode_FromString(_Py_CheckHashBasedPycsMode);
+ if (pyc_mode == NULL) {
+ goto failure;
+ }
+ if (PyDict_SetItemString(d, "check_hash_based_pycs", pyc_mode) < 0) {
+ Py_DECREF(pyc_mode);
+ goto failure;
+ }
+ Py_DECREF(pyc_mode);
return m;
failure: