diff options
author | Benjamin Peterson <benjamin@python.org> | 2017-12-09 18:26:52 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-12-09 18:26:52 (GMT) |
commit | 42aa93b8ff2f7879282b06efc73a31ec7785e602 (patch) | |
tree | 92ee301e1f487a7f5aa8ec78a36ebc50d21d6ec9 /Python/import.c | |
parent | 28d8d14013ade0657fed4673f5fa3c08eb2b1944 (diff) | |
download | cpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.zip cpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.tar.gz cpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.tar.bz2 |
closes bpo-31650: PEP 552 (Deterministic pycs) implementation (#4575)
Python now supports checking bytecode cache up-to-dateness with a hash of the
source contents rather than volatile source metadata. See the PEP for details.
While a fairly straightforward idea, quite a lot of code had to be modified due
to the pervasiveness of pyc implementation details in the codebase. Changes in
this commit include:
- The core changes to importlib to understand how to read, validate, and
regenerate hash-based pycs.
- Support for generating hash-based pycs in py_compile and compileall.
- Modifications to our siphash implementation to support passing a custom
key. We then expose it to importlib through _imp.
- Updates to all places in the interpreter, standard library, and tests that
manually generate or parse pyc files to grok the new format.
- Support in the interpreter command line code for long options like
--check-hash-based-pycs.
- Tests and documentation for all of the above.
Diffstat (limited to 'Python/import.c')
-rw-r--r-- | Python/import.c | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/Python/import.c b/Python/import.c index dc79685..b2d7511 100644 --- a/Python/import.c +++ b/Python/import.c @@ -5,6 +5,8 @@ #include "Python-ast.h" #undef Yield /* undefine macro conflicting with winbase.h */ +#include "internal/hash.h" +#include "internal/import.h" #include "internal/pystate.h" #include "errcode.h" #include "marshal.h" @@ -2184,6 +2186,34 @@ _imp_exec_builtin_impl(PyObject *module, PyObject *mod) return exec_builtin_or_dynamic(mod); } +/*[clinic input] +_imp.source_hash + + key: long + source: Py_buffer +[clinic start generated code]*/ + +static PyObject * +_imp_source_hash_impl(PyObject *module, long key, Py_buffer *source) +/*[clinic end generated code: output=edb292448cf399ea input=9aaad1e590089789]*/ +{ + uint64_t hash = _Py_KeyedHash((uint64_t)key, source->buf, source->len); +#if !PY_LITTLE_ENDIAN + // Force to little-endian. There really ought to be a succinct standard way + // to do this. + union { + uint64_t x; + unsigned char data[sizeof(uint64_t)]; + } pun; + pun.x = hash; + for (size_t i = 0; i < sizeof(pun.data); i++) { + pun.data[sizeof(pun.data) - i - 1] = pun.data[i]; + } + hash = pun.x; +#endif + return PyBytes_FromStringAndSize((const char *)&hash, sizeof(hash)); +} + PyDoc_STRVAR(doc_imp, "(Extremely) low-level import machinery bits as used by importlib and imp."); @@ -2203,6 +2233,7 @@ static PyMethodDef imp_methods[] = { _IMP_EXEC_DYNAMIC_METHODDEF _IMP_EXEC_BUILTIN_METHODDEF _IMP__FIX_CO_FILENAME_METHODDEF + _IMP_SOURCE_HASH_METHODDEF {NULL, NULL} /* sentinel */ }; @@ -2219,6 +2250,8 @@ static struct PyModuleDef impmodule = { NULL }; +const char *_Py_CheckHashBasedPycsMode = "default"; + PyMODINIT_FUNC PyInit_imp(void) { @@ -2230,6 +2263,15 @@ PyInit_imp(void) d = PyModule_GetDict(m); if (d == NULL) goto failure; + PyObject *pyc_mode = PyUnicode_FromString(_Py_CheckHashBasedPycsMode); + if (pyc_mode == NULL) { + goto failure; + } + if (PyDict_SetItemString(d, "check_hash_based_pycs", pyc_mode) < 0) { + Py_DECREF(pyc_mode); + goto failure; + } + Py_DECREF(pyc_mode); return m; failure: |