diff options
author | Christian Heimes <christian@python.org> | 2016-09-06 20:03:25 (GMT) |
---|---|---|
committer | Christian Heimes <christian@python.org> | 2016-09-06 20:03:25 (GMT) |
commit | 121b9487d13b7dcd5a864a6067c4a5c88ba15b5d (patch) | |
tree | b2ac9866afda319cb6582de045c47f1a023d1973 /Modules | |
parent | 5d75f441ef0a3cfe7af0d865db2530528e424818 (diff) | |
download | cpython-121b9487d13b7dcd5a864a6067c4a5c88ba15b5d.zip cpython-121b9487d13b7dcd5a864a6067c4a5c88ba15b5d.tar.gz cpython-121b9487d13b7dcd5a864a6067c4a5c88ba15b5d.tar.bz2 |
Issue #26798: Add BLAKE2 (blake2b and blake2s) to hashlib.
Diffstat (limited to 'Modules')
22 files changed, 4650 insertions, 8 deletions
diff --git a/Modules/_blake2/blake2b2s.py b/Modules/_blake2/blake2b2s.py new file mode 100755 index 0000000..01cf265 --- /dev/null +++ b/Modules/_blake2/blake2b2s.py @@ -0,0 +1,49 @@ +#!/usr/bin/python3 + +import os +import re + +HERE = os.path.dirname(os.path.abspath(__file__)) +BLAKE2 = os.path.join(HERE, 'impl') + +PUBLIC_SEARCH = re.compile(r'\ int (blake2[bs]p?[a-z_]*)\(') + + +def getfiles(): + for name in os.listdir(BLAKE2): + name = os.path.join(BLAKE2, name) + if os.path.isfile(name): + yield name + + +def find_public(): + public_funcs = set() + for name in getfiles(): + with open(name) as f: + for line in f: + # find public functions + mo = PUBLIC_SEARCH.search(line) + if mo: + public_funcs.add(mo.group(1)) + + for f in sorted(public_funcs): + print('#define {0:<18} PyBlake2_{0}'.format(f)) + + return public_funcs + + +def main(): + lines = [] + with open(os.path.join(HERE, 'blake2b_impl.c')) as f: + for line in f: + line = line.replace('blake2b', 'blake2s') + line = line.replace('BLAKE2b', 'BLAKE2s') + line = line.replace('BLAKE2B', 'BLAKE2S') + lines.append(line) + with open(os.path.join(HERE, 'blake2s_impl.c'), 'w') as f: + f.write(''.join(lines)) + # find_public() + + +if __name__ == '__main__': + main() diff --git a/Modules/_blake2/blake2b_impl.c b/Modules/_blake2/blake2b_impl.c new file mode 100644 index 0000000..1c67744 --- /dev/null +++ b/Modules/_blake2/blake2b_impl.c @@ -0,0 +1,460 @@ +/* + * Written in 2013 by Dmitry Chestnykh <dmitry@codingrobots.com> + * Modified for CPython by Christian Heimes <christian@python.org> + * + * To the extent possible under law, the author have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. http://creativecommons.org/publicdomain/zero/1.0/ + */ + +/* WARNING: autogenerated file! + * + * The blake2s_impl.c is autogenerated from blake2b_impl.c. + */ + +#include "Python.h" +#include "pystrhex.h" +#ifdef WITH_THREAD +#include "pythread.h" +#endif + +#include "../hashlib.h" +#include "blake2ns.h" + +#define HAVE_BLAKE2B 1 +#define BLAKE2_LOCAL_INLINE(type) Py_LOCAL_INLINE(type) + +#include "impl/blake2.h" +#include "impl/blake2-impl.h" /* for secure_zero_memory() and store48() */ + +#ifdef BLAKE2_USE_SSE +#include "impl/blake2b.c" +#else +#include "impl/blake2b-ref.c" +#endif + + +extern PyTypeObject PyBlake2_BLAKE2bType; + +typedef struct { + PyObject_HEAD + blake2b_param param; + blake2b_state state; +#ifdef WITH_THREAD + PyThread_type_lock lock; +#endif +} BLAKE2bObject; + +#include "clinic/blake2b_impl.c.h" + +/*[clinic input] +module _blake2b +class _blake2b.blake2b "BLAKE2bObject *" "&PyBlake2_BLAKE2bType" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6893358c6622aecf]*/ + + +static BLAKE2bObject * +new_BLAKE2bObject(PyTypeObject *type) +{ + BLAKE2bObject *self; + self = (BLAKE2bObject *)type->tp_alloc(type, 0); +#ifdef WITH_THREAD + if (self != NULL) { + self->lock = NULL; + } +#endif + return self; +} + +/*[clinic input] +@classmethod +_blake2b.blake2b.__new__ as py_blake2b_new + string as data: object = NULL + * + digest_size: int(c_default="BLAKE2B_OUTBYTES") = _blake2b.blake2b.MAX_DIGEST_SIZE + key: Py_buffer = None + salt: Py_buffer = None + person: Py_buffer = None + fanout: int = 1 + depth: int = 1 + leaf_size as leaf_size_obj: object = NULL + node_offset as node_offset_obj: object = NULL + node_depth: int = 0 + inner_size: int = 0 + last_node: bool = False + +Return a new BLAKE2b hash object. +[clinic start generated code]*/ + +static PyObject * +py_blake2b_new_impl(PyTypeObject *type, PyObject *data, int digest_size, + Py_buffer *key, Py_buffer *salt, Py_buffer *person, + int fanout, int depth, PyObject *leaf_size_obj, + PyObject *node_offset_obj, int node_depth, + int inner_size, int last_node) +/*[clinic end generated code: output=7506d8d890e5f13b input=e41548dfa0866031]*/ +{ + BLAKE2bObject *self = NULL; + Py_buffer buf; + + unsigned long leaf_size = 0; + unsigned PY_LONG_LONG node_offset = 0; + + self = new_BLAKE2bObject(type); + if (self == NULL) { + goto error; + } + + /* Zero parameter block. */ + memset(&self->param, 0, sizeof(self->param)); + + /* Set digest size. */ + if (digest_size <= 0 || digest_size > BLAKE2B_OUTBYTES) { + PyErr_Format(PyExc_ValueError, + "digest_size must be between 1 and %d bytes", + BLAKE2B_OUTBYTES); + goto error; + } + self->param.digest_length = digest_size; + + /* Set salt parameter. */ + if ((salt->obj != NULL) && salt->len) { + if (salt->len > BLAKE2B_SALTBYTES) { + PyErr_Format(PyExc_ValueError, + "maximum salt length is %d bytes", + BLAKE2B_SALTBYTES); + goto error; + } + memcpy(self->param.salt, salt->buf, salt->len); + } + + /* Set personalization parameter. */ + if ((person->obj != NULL) && person->len) { + if (person->len > BLAKE2B_PERSONALBYTES) { + PyErr_Format(PyExc_ValueError, + "maximum person length is %d bytes", + BLAKE2B_PERSONALBYTES); + goto error; + } + memcpy(self->param.personal, person->buf, person->len); + } + + /* Set tree parameters. */ + if (fanout < 0 || fanout > 255) { + PyErr_SetString(PyExc_ValueError, + "fanout must be between 0 and 255"); + goto error; + } + self->param.fanout = (uint8_t)fanout; + + if (depth <= 0 || depth > 255) { + PyErr_SetString(PyExc_ValueError, + "depth must be between 1 and 255"); + goto error; + } + self->param.depth = (uint8_t)depth; + + if (leaf_size_obj != NULL) { + leaf_size = PyLong_AsUnsignedLong(leaf_size_obj); + if (leaf_size == (unsigned long) -1 && PyErr_Occurred()) { + goto error; + } + if (leaf_size > 0xFFFFFFFFU) { + PyErr_SetString(PyExc_OverflowError, "leaf_size is too large"); + goto error; + } + } + self->param.leaf_length = (unsigned int)leaf_size; + + if (node_offset_obj != NULL) { + node_offset = PyLong_AsUnsignedLongLong(node_offset_obj); + if (node_offset == (unsigned PY_LONG_LONG) -1 && PyErr_Occurred()) { + goto error; + } + } +#ifdef HAVE_BLAKE2S + if (node_offset > 0xFFFFFFFFFFFFULL) { + /* maximum 2**48 - 1 */ + PyErr_SetString(PyExc_OverflowError, "node_offset is too large"); + goto error; + } + store48(&(self->param.node_offset), node_offset); +#else + self->param.node_offset = node_offset; +#endif + + if (node_depth < 0 || node_depth > 255) { + PyErr_SetString(PyExc_ValueError, + "node_depth must be between 0 and 255"); + goto error; + } + self->param.node_depth = node_depth; + + if (inner_size < 0 || inner_size > BLAKE2B_OUTBYTES) { + PyErr_Format(PyExc_ValueError, + "inner_size must be between 0 and is %d", + BLAKE2B_OUTBYTES); + goto error; + } + self->param.inner_length = inner_size; + + /* Set key length. */ + if ((key->obj != NULL) && key->len) { + if (key->len > BLAKE2B_KEYBYTES) { + PyErr_Format(PyExc_ValueError, + "maximum key length is %d bytes", + BLAKE2B_KEYBYTES); + goto error; + } + self->param.key_length = key->len; + } + + /* Initialize hash state. */ + if (blake2b_init_param(&self->state, &self->param) < 0) { + PyErr_SetString(PyExc_RuntimeError, + "error initializing hash state"); + goto error; + } + + /* Set last node flag (must come after initialization). */ + self->state.last_node = last_node; + + /* Process key block if any. */ + if (self->param.key_length) { + uint8_t block[BLAKE2B_BLOCKBYTES]; + memset(block, 0, sizeof(block)); + memcpy(block, key->buf, key->len); + blake2b_update(&self->state, block, sizeof(block)); + secure_zero_memory(block, sizeof(block)); + } + + /* Process initial data if any. */ + if (data != NULL) { + GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); + + if (buf.len >= HASHLIB_GIL_MINSIZE) { + Py_BEGIN_ALLOW_THREADS + blake2b_update(&self->state, buf.buf, buf.len); + Py_END_ALLOW_THREADS + } else { + blake2b_update(&self->state, buf.buf, buf.len); + } + PyBuffer_Release(&buf); + } + + return (PyObject *)self; + + error: + if (self != NULL) { + Py_DECREF(self); + } + return NULL; +} + +/*[clinic input] +_blake2b.blake2b.copy + +Return a copy of the hash object. +[clinic start generated code]*/ + +static PyObject * +_blake2b_blake2b_copy_impl(BLAKE2bObject *self) +/*[clinic end generated code: output=c89cd33550ab1543 input=4c9c319f18f10747]*/ +{ + BLAKE2bObject *cpy; + + if ((cpy = new_BLAKE2bObject(Py_TYPE(self))) == NULL) + return NULL; + + ENTER_HASHLIB(self); + cpy->param = self->param; + cpy->state = self->state; + LEAVE_HASHLIB(self); + return (PyObject *)cpy; +} + +/*[clinic input] +_blake2b.blake2b.update + + obj: object + / + +Update this hash object's state with the provided string. +[clinic start generated code]*/ + +static PyObject * +_blake2b_blake2b_update(BLAKE2bObject *self, PyObject *obj) +/*[clinic end generated code: output=a888f07c4cddbe94 input=3ecb8c13ee4260f2]*/ +{ + Py_buffer buf; + + GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); + +#ifdef WITH_THREAD + if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) + self->lock = PyThread_allocate_lock(); + + if (self->lock != NULL) { + Py_BEGIN_ALLOW_THREADS + PyThread_acquire_lock(self->lock, 1); + blake2b_update(&self->state, buf.buf, buf.len); + PyThread_release_lock(self->lock); + Py_END_ALLOW_THREADS + } else { + blake2b_update(&self->state, buf.buf, buf.len); + } +#else + blake2b_update(&self->state, buf.buf, buf.len); +#endif /* !WITH_THREAD */ + PyBuffer_Release(&buf); + + Py_INCREF(Py_None); + return Py_None; +} + +/*[clinic input] +_blake2b.blake2b.digest + +Return the digest value as a string of binary data. +[clinic start generated code]*/ + +static PyObject * +_blake2b_blake2b_digest_impl(BLAKE2bObject *self) +/*[clinic end generated code: output=b13a79360d984740 input=ac2fa462ebb1b9c7]*/ +{ + uint8_t digest[BLAKE2B_OUTBYTES]; + blake2b_state state_cpy; + + ENTER_HASHLIB(self); + state_cpy = self->state; + blake2b_final(&state_cpy, digest, self->param.digest_length); + LEAVE_HASHLIB(self); + return PyBytes_FromStringAndSize((const char *)digest, + self->param.digest_length); +} + +/*[clinic input] +_blake2b.blake2b.hexdigest + +Return the digest value as a string of hexadecimal digits. +[clinic start generated code]*/ + +static PyObject * +_blake2b_blake2b_hexdigest_impl(BLAKE2bObject *self) +/*[clinic end generated code: output=6a503611715b24bd input=d58f0b2f37812e33]*/ +{ + uint8_t digest[BLAKE2B_OUTBYTES]; + blake2b_state state_cpy; + + ENTER_HASHLIB(self); + state_cpy = self->state; + blake2b_final(&state_cpy, digest, self->param.digest_length); + LEAVE_HASHLIB(self); + return _Py_strhex((const char *)digest, self->param.digest_length); +} + + +static PyMethodDef py_blake2b_methods[] = { + _BLAKE2B_BLAKE2B_COPY_METHODDEF + _BLAKE2B_BLAKE2B_DIGEST_METHODDEF + _BLAKE2B_BLAKE2B_HEXDIGEST_METHODDEF + _BLAKE2B_BLAKE2B_UPDATE_METHODDEF + {NULL, NULL} +}; + + + +static PyObject * +py_blake2b_get_name(BLAKE2bObject *self, void *closure) +{ + return PyUnicode_FromString("blake2b"); +} + + + +static PyObject * +py_blake2b_get_block_size(BLAKE2bObject *self, void *closure) +{ + return PyLong_FromLong(BLAKE2B_BLOCKBYTES); +} + + + +static PyObject * +py_blake2b_get_digest_size(BLAKE2bObject *self, void *closure) +{ + return PyLong_FromLong(self->param.digest_length); +} + + +static PyGetSetDef py_blake2b_getsetters[] = { + {"name", (getter)py_blake2b_get_name, + NULL, NULL, NULL}, + {"block_size", (getter)py_blake2b_get_block_size, + NULL, NULL, NULL}, + {"digest_size", (getter)py_blake2b_get_digest_size, + NULL, NULL, NULL}, + {NULL} +}; + + +static void +py_blake2b_dealloc(PyObject *self) +{ + BLAKE2bObject *obj = (BLAKE2bObject *)self; + + /* Try not to leave state in memory. */ + secure_zero_memory(&obj->param, sizeof(obj->param)); + secure_zero_memory(&obj->state, sizeof(obj->state)); +#ifdef WITH_THREAD + if (obj->lock) { + PyThread_free_lock(obj->lock); + obj->lock = NULL; + } +#endif + PyObject_Del(self); +} + + +PyTypeObject PyBlake2_BLAKE2bType = { + PyVarObject_HEAD_INIT(NULL, 0) + "_blake2.blake2b", /* tp_name */ + sizeof(BLAKE2bObject), /* tp_size */ + 0, /* tp_itemsize */ + py_blake2b_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + py_blake2b_new__doc__, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + py_blake2b_methods, /* tp_methods */ + 0, /* tp_members */ + py_blake2b_getsetters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + py_blake2b_new, /* tp_new */ +}; diff --git a/Modules/_blake2/blake2module.c b/Modules/_blake2/blake2module.c new file mode 100644 index 0000000..e2a3d42 --- /dev/null +++ b/Modules/_blake2/blake2module.c @@ -0,0 +1,105 @@ +/* + * Written in 2013 by Dmitry Chestnykh <dmitry@codingrobots.com> + * Modified for CPython by Christian Heimes <christian@python.org> + * + * To the extent possible under law, the author have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. http://creativecommons.org/publicdomain/zero/1.0/ + */ + +#include "Python.h" + +#include "impl/blake2.h" + +extern PyTypeObject PyBlake2_BLAKE2bType; +extern PyTypeObject PyBlake2_BLAKE2sType; + + +PyDoc_STRVAR(blake2mod__doc__, +"_blake2b provides BLAKE2b for hashlib\n" +); + + +static struct PyMethodDef blake2mod_functions[] = { + {NULL, NULL} +}; + +static struct PyModuleDef blake2_module = { + PyModuleDef_HEAD_INIT, + "_blake2", + blake2mod__doc__, + -1, + blake2mod_functions, + NULL, + NULL, + NULL, + NULL +}; + +#define ADD_INT(d, name, value) do { \ + PyObject *x = PyLong_FromLong(value); \ + if (!x) { \ + Py_DECREF(m); \ + return NULL; \ + } \ + if (PyDict_SetItemString(d, name, x) < 0) { \ + Py_DECREF(m); \ + return NULL; \ + } \ + Py_DECREF(x); \ +} while(0) + + +PyMODINIT_FUNC +PyInit__blake2(void) +{ + PyObject *m; + PyObject *d; + + m = PyModule_Create(&blake2_module); + if (m == NULL) + return NULL; + + /* BLAKE2b */ + Py_TYPE(&PyBlake2_BLAKE2bType) = &PyType_Type; + if (PyType_Ready(&PyBlake2_BLAKE2bType) < 0) { + return NULL; + } + + Py_INCREF(&PyBlake2_BLAKE2bType); + PyModule_AddObject(m, "blake2b", (PyObject *)&PyBlake2_BLAKE2bType); + + d = PyBlake2_BLAKE2bType.tp_dict; + ADD_INT(d, "SALT_SIZE", BLAKE2B_SALTBYTES); + ADD_INT(d, "PERSON_SIZE", BLAKE2B_PERSONALBYTES); + ADD_INT(d, "MAX_KEY_SIZE", BLAKE2B_KEYBYTES); + ADD_INT(d, "MAX_DIGEST_SIZE", BLAKE2B_OUTBYTES); + + PyModule_AddIntConstant(m, "BLAKE2B_SALT_SIZE", BLAKE2B_SALTBYTES); + PyModule_AddIntConstant(m, "BLAKE2B_PERSON_SIZE", BLAKE2B_PERSONALBYTES); + PyModule_AddIntConstant(m, "BLAKE2B_MAX_KEY_SIZE", BLAKE2B_KEYBYTES); + PyModule_AddIntConstant(m, "BLAKE2B_MAX_DIGEST_SIZE", BLAKE2B_OUTBYTES); + + /* BLAKE2s */ + Py_TYPE(&PyBlake2_BLAKE2sType) = &PyType_Type; + if (PyType_Ready(&PyBlake2_BLAKE2sType) < 0) { + return NULL; + } + + Py_INCREF(&PyBlake2_BLAKE2sType); + PyModule_AddObject(m, "blake2s", (PyObject *)&PyBlake2_BLAKE2sType); + + d = PyBlake2_BLAKE2sType.tp_dict; + ADD_INT(d, "SALT_SIZE", BLAKE2S_SALTBYTES); + ADD_INT(d, "PERSON_SIZE", BLAKE2S_PERSONALBYTES); + ADD_INT(d, "MAX_KEY_SIZE", BLAKE2S_KEYBYTES); + ADD_INT(d, "MAX_DIGEST_SIZE", BLAKE2S_OUTBYTES); + + PyModule_AddIntConstant(m, "BLAKE2S_SALT_SIZE", BLAKE2S_SALTBYTES); + PyModule_AddIntConstant(m, "BLAKE2S_PERSON_SIZE", BLAKE2S_PERSONALBYTES); + PyModule_AddIntConstant(m, "BLAKE2S_MAX_KEY_SIZE", BLAKE2S_KEYBYTES); + PyModule_AddIntConstant(m, "BLAKE2S_MAX_DIGEST_SIZE", BLAKE2S_OUTBYTES); + + return m; +} diff --git a/Modules/_blake2/blake2ns.h b/Modules/_blake2/blake2ns.h new file mode 100644 index 0000000..53bce8e --- /dev/null +++ b/Modules/_blake2/blake2ns.h @@ -0,0 +1,32 @@ +/* Prefix all public blake2 symbols with PyBlake2_ + */ + +#ifndef Py_BLAKE2_NS +#define Py_BLAKE2_NS + +#define blake2b PyBlake2_blake2b +#define blake2b_compress PyBlake2_blake2b_compress +#define blake2b_final PyBlake2_blake2b_final +#define blake2b_init PyBlake2_blake2b_init +#define blake2b_init_key PyBlake2_blake2b_init_key +#define blake2b_init_param PyBlake2_blake2b_init_param +#define blake2b_update PyBlake2_blake2b_update +#define blake2bp PyBlake2_blake2bp +#define blake2bp_final PyBlake2_blake2bp_final +#define blake2bp_init PyBlake2_blake2bp_init +#define blake2bp_init_key PyBlake2_blake2bp_init_key +#define blake2bp_update PyBlake2_blake2bp_update +#define blake2s PyBlake2_blake2s +#define blake2s_compress PyBlake2_blake2s_compress +#define blake2s_final PyBlake2_blake2s_final +#define blake2s_init PyBlake2_blake2s_init +#define blake2s_init_key PyBlake2_blake2s_init_key +#define blake2s_init_param PyBlake2_blake2s_init_param +#define blake2s_update PyBlake2_blake2s_update +#define blake2sp PyBlake2_blake2sp +#define blake2sp_final PyBlake2_blake2sp_final +#define blake2sp_init PyBlake2_blake2sp_init +#define blake2sp_init_key PyBlake2_blake2sp_init_key +#define blake2sp_update PyBlake2_blake2sp_update + +#endif /* Py_BLAKE2_NS */ diff --git a/Modules/_blake2/blake2s_impl.c b/Modules/_blake2/blake2s_impl.c new file mode 100644 index 0000000..8b38270 --- /dev/null +++ b/Modules/_blake2/blake2s_impl.c @@ -0,0 +1,460 @@ +/* + * Written in 2013 by Dmitry Chestnykh <dmitry@codingrobots.com> + * Modified for CPython by Christian Heimes <christian@python.org> + * + * To the extent possible under law, the author have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. http://creativecommons.org/publicdomain/zero/1.0/ + */ + +/* WARNING: autogenerated file! + * + * The blake2s_impl.c is autogenerated from blake2s_impl.c. + */ + +#include "Python.h" +#include "pystrhex.h" +#ifdef WITH_THREAD +#include "pythread.h" +#endif + +#include "../hashlib.h" +#include "blake2ns.h" + +#define HAVE_BLAKE2S 1 +#define BLAKE2_LOCAL_INLINE(type) Py_LOCAL_INLINE(type) + +#include "impl/blake2.h" +#include "impl/blake2-impl.h" /* for secure_zero_memory() and store48() */ + +#ifdef BLAKE2_USE_SSE +#include "impl/blake2s.c" +#else +#include "impl/blake2s-ref.c" +#endif + + +extern PyTypeObject PyBlake2_BLAKE2sType; + +typedef struct { + PyObject_HEAD + blake2s_param param; + blake2s_state state; +#ifdef WITH_THREAD + PyThread_type_lock lock; +#endif +} BLAKE2sObject; + +#include "clinic/blake2s_impl.c.h" + +/*[clinic input] +module _blake2s +class _blake2s.blake2s "BLAKE2sObject *" "&PyBlake2_BLAKE2sType" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=edbfcf7557a685a7]*/ + + +static BLAKE2sObject * +new_BLAKE2sObject(PyTypeObject *type) +{ + BLAKE2sObject *self; + self = (BLAKE2sObject *)type->tp_alloc(type, 0); +#ifdef WITH_THREAD + if (self != NULL) { + self->lock = NULL; + } +#endif + return self; +} + +/*[clinic input] +@classmethod +_blake2s.blake2s.__new__ as py_blake2s_new + string as data: object = NULL + * + digest_size: int(c_default="BLAKE2S_OUTBYTES") = _blake2s.blake2s.MAX_DIGEST_SIZE + key: Py_buffer = None + salt: Py_buffer = None + person: Py_buffer = None + fanout: int = 1 + depth: int = 1 + leaf_size as leaf_size_obj: object = NULL + node_offset as node_offset_obj: object = NULL + node_depth: int = 0 + inner_size: int = 0 + last_node: bool = False + +Return a new BLAKE2s hash object. +[clinic start generated code]*/ + +static PyObject * +py_blake2s_new_impl(PyTypeObject *type, PyObject *data, int digest_size, + Py_buffer *key, Py_buffer *salt, Py_buffer *person, + int fanout, int depth, PyObject *leaf_size_obj, + PyObject *node_offset_obj, int node_depth, + int inner_size, int last_node) +/*[clinic end generated code: output=fe060b258a8cbfc6 input=458cfdcb3d0d47ff]*/ +{ + BLAKE2sObject *self = NULL; + Py_buffer buf; + + unsigned long leaf_size = 0; + unsigned PY_LONG_LONG node_offset = 0; + + self = new_BLAKE2sObject(type); + if (self == NULL) { + goto error; + } + + /* Zero parameter block. */ + memset(&self->param, 0, sizeof(self->param)); + + /* Set digest size. */ + if (digest_size <= 0 || digest_size > BLAKE2S_OUTBYTES) { + PyErr_Format(PyExc_ValueError, + "digest_size must be between 1 and %d bytes", + BLAKE2S_OUTBYTES); + goto error; + } + self->param.digest_length = digest_size; + + /* Set salt parameter. */ + if ((salt->obj != NULL) && salt->len) { + if (salt->len > BLAKE2S_SALTBYTES) { + PyErr_Format(PyExc_ValueError, + "maximum salt length is %d bytes", + BLAKE2S_SALTBYTES); + goto error; + } + memcpy(self->param.salt, salt->buf, salt->len); + } + + /* Set personalization parameter. */ + if ((person->obj != NULL) && person->len) { + if (person->len > BLAKE2S_PERSONALBYTES) { + PyErr_Format(PyExc_ValueError, + "maximum person length is %d bytes", + BLAKE2S_PERSONALBYTES); + goto error; + } + memcpy(self->param.personal, person->buf, person->len); + } + + /* Set tree parameters. */ + if (fanout < 0 || fanout > 255) { + PyErr_SetString(PyExc_ValueError, + "fanout must be between 0 and 255"); + goto error; + } + self->param.fanout = (uint8_t)fanout; + + if (depth <= 0 || depth > 255) { + PyErr_SetString(PyExc_ValueError, + "depth must be between 1 and 255"); + goto error; + } + self->param.depth = (uint8_t)depth; + + if (leaf_size_obj != NULL) { + leaf_size = PyLong_AsUnsignedLong(leaf_size_obj); + if (leaf_size == (unsigned long) -1 && PyErr_Occurred()) { + goto error; + } + if (leaf_size > 0xFFFFFFFFU) { + PyErr_SetString(PyExc_OverflowError, "leaf_size is too large"); + goto error; + } + } + self->param.leaf_length = (unsigned int)leaf_size; + + if (node_offset_obj != NULL) { + node_offset = PyLong_AsUnsignedLongLong(node_offset_obj); + if (node_offset == (unsigned PY_LONG_LONG) -1 && PyErr_Occurred()) { + goto error; + } + } +#ifdef HAVE_BLAKE2S + if (node_offset > 0xFFFFFFFFFFFFULL) { + /* maximum 2**48 - 1 */ + PyErr_SetString(PyExc_OverflowError, "node_offset is too large"); + goto error; + } + store48(&(self->param.node_offset), node_offset); +#else + self->param.node_offset = node_offset; +#endif + + if (node_depth < 0 || node_depth > 255) { + PyErr_SetString(PyExc_ValueError, + "node_depth must be between 0 and 255"); + goto error; + } + self->param.node_depth = node_depth; + + if (inner_size < 0 || inner_size > BLAKE2S_OUTBYTES) { + PyErr_Format(PyExc_ValueError, + "inner_size must be between 0 and is %d", + BLAKE2S_OUTBYTES); + goto error; + } + self->param.inner_length = inner_size; + + /* Set key length. */ + if ((key->obj != NULL) && key->len) { + if (key->len > BLAKE2S_KEYBYTES) { + PyErr_Format(PyExc_ValueError, + "maximum key length is %d bytes", + BLAKE2S_KEYBYTES); + goto error; + } + self->param.key_length = key->len; + } + + /* Initialize hash state. */ + if (blake2s_init_param(&self->state, &self->param) < 0) { + PyErr_SetString(PyExc_RuntimeError, + "error initializing hash state"); + goto error; + } + + /* Set last node flag (must come after initialization). */ + self->state.last_node = last_node; + + /* Process key block if any. */ + if (self->param.key_length) { + uint8_t block[BLAKE2S_BLOCKBYTES]; + memset(block, 0, sizeof(block)); + memcpy(block, key->buf, key->len); + blake2s_update(&self->state, block, sizeof(block)); + secure_zero_memory(block, sizeof(block)); + } + + /* Process initial data if any. */ + if (data != NULL) { + GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); + + if (buf.len >= HASHLIB_GIL_MINSIZE) { + Py_BEGIN_ALLOW_THREADS + blake2s_update(&self->state, buf.buf, buf.len); + Py_END_ALLOW_THREADS + } else { + blake2s_update(&self->state, buf.buf, buf.len); + } + PyBuffer_Release(&buf); + } + + return (PyObject *)self; + + error: + if (self != NULL) { + Py_DECREF(self); + } + return NULL; +} + +/*[clinic input] +_blake2s.blake2s.copy + +Return a copy of the hash object. +[clinic start generated code]*/ + +static PyObject * +_blake2s_blake2s_copy_impl(BLAKE2sObject *self) +/*[clinic end generated code: output=6c5bada404b7aed7 input=c8858e887ae4a07a]*/ +{ + BLAKE2sObject *cpy; + + if ((cpy = new_BLAKE2sObject(Py_TYPE(self))) == NULL) + return NULL; + + ENTER_HASHLIB(self); + cpy->param = self->param; + cpy->state = self->state; + LEAVE_HASHLIB(self); + return (PyObject *)cpy; +} + +/*[clinic input] +_blake2s.blake2s.update + + obj: object + / + +Update this hash object's state with the provided string. +[clinic start generated code]*/ + +static PyObject * +_blake2s_blake2s_update(BLAKE2sObject *self, PyObject *obj) +/*[clinic end generated code: output=fe8438a1d3cede87 input=47a408b9a3cc05c5]*/ +{ + Py_buffer buf; + + GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); + +#ifdef WITH_THREAD + if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) + self->lock = PyThread_allocate_lock(); + + if (self->lock != NULL) { + Py_BEGIN_ALLOW_THREADS + PyThread_acquire_lock(self->lock, 1); + blake2s_update(&self->state, buf.buf, buf.len); + PyThread_release_lock(self->lock); + Py_END_ALLOW_THREADS + } else { + blake2s_update(&self->state, buf.buf, buf.len); + } +#else + blake2s_update(&self->state, buf.buf, buf.len); +#endif /* !WITH_THREAD */ + PyBuffer_Release(&buf); + + Py_INCREF(Py_None); + return Py_None; +} + +/*[clinic input] +_blake2s.blake2s.digest + +Return the digest value as a string of binary data. +[clinic start generated code]*/ + +static PyObject * +_blake2s_blake2s_digest_impl(BLAKE2sObject *self) +/*[clinic end generated code: output=80e81a48c6f79cf9 input=feb9a220135bdeba]*/ +{ + uint8_t digest[BLAKE2S_OUTBYTES]; + blake2s_state state_cpy; + + ENTER_HASHLIB(self); + state_cpy = self->state; + blake2s_final(&state_cpy, digest, self->param.digest_length); + LEAVE_HASHLIB(self); + return PyBytes_FromStringAndSize((const char *)digest, + self->param.digest_length); +} + +/*[clinic input] +_blake2s.blake2s.hexdigest + +Return the digest value as a string of hexadecimal digits. +[clinic start generated code]*/ + +static PyObject * +_blake2s_blake2s_hexdigest_impl(BLAKE2sObject *self) +/*[clinic end generated code: output=db6c5028c0a3c2e5 input=4e4877b8bd7aea91]*/ +{ + uint8_t digest[BLAKE2S_OUTBYTES]; + blake2s_state state_cpy; + + ENTER_HASHLIB(self); + state_cpy = self->state; + blake2s_final(&state_cpy, digest, self->param.digest_length); + LEAVE_HASHLIB(self); + return _Py_strhex((const char *)digest, self->param.digest_length); +} + + +static PyMethodDef py_blake2s_methods[] = { + _BLAKE2S_BLAKE2S_COPY_METHODDEF + _BLAKE2S_BLAKE2S_DIGEST_METHODDEF + _BLAKE2S_BLAKE2S_HEXDIGEST_METHODDEF + _BLAKE2S_BLAKE2S_UPDATE_METHODDEF + {NULL, NULL} +}; + + + +static PyObject * +py_blake2s_get_name(BLAKE2sObject *self, void *closure) +{ + return PyUnicode_FromString("blake2s"); +} + + + +static PyObject * +py_blake2s_get_block_size(BLAKE2sObject *self, void *closure) +{ + return PyLong_FromLong(BLAKE2S_BLOCKBYTES); +} + + + +static PyObject * +py_blake2s_get_digest_size(BLAKE2sObject *self, void *closure) +{ + return PyLong_FromLong(self->param.digest_length); +} + + +static PyGetSetDef py_blake2s_getsetters[] = { + {"name", (getter)py_blake2s_get_name, + NULL, NULL, NULL}, + {"block_size", (getter)py_blake2s_get_block_size, + NULL, NULL, NULL}, + {"digest_size", (getter)py_blake2s_get_digest_size, + NULL, NULL, NULL}, + {NULL} +}; + + +static void +py_blake2s_dealloc(PyObject *self) +{ + BLAKE2sObject *obj = (BLAKE2sObject *)self; + + /* Try not to leave state in memory. */ + secure_zero_memory(&obj->param, sizeof(obj->param)); + secure_zero_memory(&obj->state, sizeof(obj->state)); +#ifdef WITH_THREAD + if (obj->lock) { + PyThread_free_lock(obj->lock); + obj->lock = NULL; + } +#endif + PyObject_Del(self); +} + + +PyTypeObject PyBlake2_BLAKE2sType = { + PyVarObject_HEAD_INIT(NULL, 0) + "_blake2.blake2s", /* tp_name */ + sizeof(BLAKE2sObject), /* tp_size */ + 0, /* tp_itemsize */ + py_blake2s_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + py_blake2s_new__doc__, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + py_blake2s_methods, /* tp_methods */ + 0, /* tp_members */ + py_blake2s_getsetters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + py_blake2s_new, /* tp_new */ +}; diff --git a/Modules/_blake2/clinic/blake2b_impl.c.h b/Modules/_blake2/clinic/blake2b_impl.c.h new file mode 100644 index 0000000..71c073a --- /dev/null +++ b/Modules/_blake2/clinic/blake2b_impl.c.h @@ -0,0 +1,125 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +PyDoc_STRVAR(py_blake2b_new__doc__, +"blake2b(string=None, *, digest_size=_blake2b.blake2b.MAX_DIGEST_SIZE,\n" +" key=None, salt=None, person=None, fanout=1, depth=1,\n" +" leaf_size=None, node_offset=None, node_depth=0, inner_size=0,\n" +" last_node=False)\n" +"--\n" +"\n" +"Return a new BLAKE2b hash object."); + +static PyObject * +py_blake2b_new_impl(PyTypeObject *type, PyObject *data, int digest_size, + Py_buffer *key, Py_buffer *salt, Py_buffer *person, + int fanout, int depth, PyObject *leaf_size_obj, + PyObject *node_offset_obj, int node_depth, + int inner_size, int last_node); + +static PyObject * +py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"string", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", NULL}; + static _PyArg_Parser _parser = {"|O$iy*y*y*iiOOiip:blake2b", _keywords, 0}; + PyObject *data = NULL; + int digest_size = BLAKE2B_OUTBYTES; + Py_buffer key = {NULL, NULL}; + Py_buffer salt = {NULL, NULL}; + Py_buffer person = {NULL, NULL}; + int fanout = 1; + int depth = 1; + PyObject *leaf_size_obj = NULL; + PyObject *node_offset_obj = NULL; + int node_depth = 0; + int inner_size = 0; + int last_node = 0; + + if (!_PyArg_ParseTupleAndKeywordsFast(args, kwargs, &_parser, + &data, &digest_size, &key, &salt, &person, &fanout, &depth, &leaf_size_obj, &node_offset_obj, &node_depth, &inner_size, &last_node)) { + goto exit; + } + return_value = py_blake2b_new_impl(type, data, digest_size, &key, &salt, &person, fanout, depth, leaf_size_obj, node_offset_obj, node_depth, inner_size, last_node); + +exit: + /* Cleanup for key */ + if (key.obj) { + PyBuffer_Release(&key); + } + /* Cleanup for salt */ + if (salt.obj) { + PyBuffer_Release(&salt); + } + /* Cleanup for person */ + if (person.obj) { + PyBuffer_Release(&person); + } + + return return_value; +} + +PyDoc_STRVAR(_blake2b_blake2b_copy__doc__, +"copy($self, /)\n" +"--\n" +"\n" +"Return a copy of the hash object."); + +#define _BLAKE2B_BLAKE2B_COPY_METHODDEF \ + {"copy", (PyCFunction)_blake2b_blake2b_copy, METH_NOARGS, _blake2b_blake2b_copy__doc__}, + +static PyObject * +_blake2b_blake2b_copy_impl(BLAKE2bObject *self); + +static PyObject * +_blake2b_blake2b_copy(BLAKE2bObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _blake2b_blake2b_copy_impl(self); +} + +PyDoc_STRVAR(_blake2b_blake2b_update__doc__, +"update($self, obj, /)\n" +"--\n" +"\n" +"Update this hash object\'s state with the provided string."); + +#define _BLAKE2B_BLAKE2B_UPDATE_METHODDEF \ + {"update", (PyCFunction)_blake2b_blake2b_update, METH_O, _blake2b_blake2b_update__doc__}, + +PyDoc_STRVAR(_blake2b_blake2b_digest__doc__, +"digest($self, /)\n" +"--\n" +"\n" +"Return the digest value as a string of binary data."); + +#define _BLAKE2B_BLAKE2B_DIGEST_METHODDEF \ + {"digest", (PyCFunction)_blake2b_blake2b_digest, METH_NOARGS, _blake2b_blake2b_digest__doc__}, + +static PyObject * +_blake2b_blake2b_digest_impl(BLAKE2bObject *self); + +static PyObject * +_blake2b_blake2b_digest(BLAKE2bObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _blake2b_blake2b_digest_impl(self); +} + +PyDoc_STRVAR(_blake2b_blake2b_hexdigest__doc__, +"hexdigest($self, /)\n" +"--\n" +"\n" +"Return the digest value as a string of hexadecimal digits."); + +#define _BLAKE2B_BLAKE2B_HEXDIGEST_METHODDEF \ + {"hexdigest", (PyCFunction)_blake2b_blake2b_hexdigest, METH_NOARGS, _blake2b_blake2b_hexdigest__doc__}, + +static PyObject * +_blake2b_blake2b_hexdigest_impl(BLAKE2bObject *self); + +static PyObject * +_blake2b_blake2b_hexdigest(BLAKE2bObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _blake2b_blake2b_hexdigest_impl(self); +} +/*[clinic end generated code: output=535a54852c98e51c input=a9049054013a1b77]*/ diff --git a/Modules/_blake2/clinic/blake2s_impl.c.h b/Modules/_blake2/clinic/blake2s_impl.c.h new file mode 100644 index 0000000..ca908d3 --- /dev/null +++ b/Modules/_blake2/clinic/blake2s_impl.c.h @@ -0,0 +1,125 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +PyDoc_STRVAR(py_blake2s_new__doc__, +"blake2s(string=None, *, digest_size=_blake2s.blake2s.MAX_DIGEST_SIZE,\n" +" key=None, salt=None, person=None, fanout=1, depth=1,\n" +" leaf_size=None, node_offset=None, node_depth=0, inner_size=0,\n" +" last_node=False)\n" +"--\n" +"\n" +"Return a new BLAKE2s hash object."); + +static PyObject * +py_blake2s_new_impl(PyTypeObject *type, PyObject *data, int digest_size, + Py_buffer *key, Py_buffer *salt, Py_buffer *person, + int fanout, int depth, PyObject *leaf_size_obj, + PyObject *node_offset_obj, int node_depth, + int inner_size, int last_node); + +static PyObject * +py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"string", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", NULL}; + static _PyArg_Parser _parser = {"|O$iy*y*y*iiOOiip:blake2s", _keywords, 0}; + PyObject *data = NULL; + int digest_size = BLAKE2S_OUTBYTES; + Py_buffer key = {NULL, NULL}; + Py_buffer salt = {NULL, NULL}; + Py_buffer person = {NULL, NULL}; + int fanout = 1; + int depth = 1; + PyObject *leaf_size_obj = NULL; + PyObject *node_offset_obj = NULL; + int node_depth = 0; + int inner_size = 0; + int last_node = 0; + + if (!_PyArg_ParseTupleAndKeywordsFast(args, kwargs, &_parser, + &data, &digest_size, &key, &salt, &person, &fanout, &depth, &leaf_size_obj, &node_offset_obj, &node_depth, &inner_size, &last_node)) { + goto exit; + } + return_value = py_blake2s_new_impl(type, data, digest_size, &key, &salt, &person, fanout, depth, leaf_size_obj, node_offset_obj, node_depth, inner_size, last_node); + +exit: + /* Cleanup for key */ + if (key.obj) { + PyBuffer_Release(&key); + } + /* Cleanup for salt */ + if (salt.obj) { + PyBuffer_Release(&salt); + } + /* Cleanup for person */ + if (person.obj) { + PyBuffer_Release(&person); + } + + return return_value; +} + +PyDoc_STRVAR(_blake2s_blake2s_copy__doc__, +"copy($self, /)\n" +"--\n" +"\n" +"Return a copy of the hash object."); + +#define _BLAKE2S_BLAKE2S_COPY_METHODDEF \ + {"copy", (PyCFunction)_blake2s_blake2s_copy, METH_NOARGS, _blake2s_blake2s_copy__doc__}, + +static PyObject * +_blake2s_blake2s_copy_impl(BLAKE2sObject *self); + +static PyObject * +_blake2s_blake2s_copy(BLAKE2sObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _blake2s_blake2s_copy_impl(self); +} + +PyDoc_STRVAR(_blake2s_blake2s_update__doc__, +"update($self, obj, /)\n" +"--\n" +"\n" +"Update this hash object\'s state with the provided string."); + +#define _BLAKE2S_BLAKE2S_UPDATE_METHODDEF \ + {"update", (PyCFunction)_blake2s_blake2s_update, METH_O, _blake2s_blake2s_update__doc__}, + +PyDoc_STRVAR(_blake2s_blake2s_digest__doc__, +"digest($self, /)\n" +"--\n" +"\n" +"Return the digest value as a string of binary data."); + +#define _BLAKE2S_BLAKE2S_DIGEST_METHODDEF \ + {"digest", (PyCFunction)_blake2s_blake2s_digest, METH_NOARGS, _blake2s_blake2s_digest__doc__}, + +static PyObject * +_blake2s_blake2s_digest_impl(BLAKE2sObject *self); + +static PyObject * +_blake2s_blake2s_digest(BLAKE2sObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _blake2s_blake2s_digest_impl(self); +} + +PyDoc_STRVAR(_blake2s_blake2s_hexdigest__doc__, +"hexdigest($self, /)\n" +"--\n" +"\n" +"Return the digest value as a string of hexadecimal digits."); + +#define _BLAKE2S_BLAKE2S_HEXDIGEST_METHODDEF \ + {"hexdigest", (PyCFunction)_blake2s_blake2s_hexdigest, METH_NOARGS, _blake2s_blake2s_hexdigest__doc__}, + +static PyObject * +_blake2s_blake2s_hexdigest_impl(BLAKE2sObject *self); + +static PyObject * +_blake2s_blake2s_hexdigest(BLAKE2sObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _blake2s_blake2s_hexdigest_impl(self); +} +/*[clinic end generated code: output=535ea7903f9ccf76 input=a9049054013a1b77]*/ diff --git a/Modules/_blake2/impl/blake2-config.h b/Modules/_blake2/impl/blake2-config.h new file mode 100644 index 0000000..40455b1 --- /dev/null +++ b/Modules/_blake2/impl/blake2-config.h @@ -0,0 +1,74 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2_CONFIG_H__ +#define __BLAKE2_CONFIG_H__ + +/* These don't work everywhere */ +#if defined(__SSE2__) || defined(__x86_64__) || defined(__amd64__) +#define HAVE_SSE2 +#endif + +#if defined(__SSSE3__) +#define HAVE_SSSE3 +#endif + +#if defined(__SSE4_1__) +#define HAVE_SSE41 +#endif + +#if defined(__AVX__) +#define HAVE_AVX +#endif + +#if defined(__XOP__) +#define HAVE_XOP +#endif + + +#ifdef HAVE_AVX2 +#ifndef HAVE_AVX +#define HAVE_AVX +#endif +#endif + +#ifdef HAVE_XOP +#ifndef HAVE_AVX +#define HAVE_AVX +#endif +#endif + +#ifdef HAVE_AVX +#ifndef HAVE_SSE41 +#define HAVE_SSE41 +#endif +#endif + +#ifdef HAVE_SSE41 +#ifndef HAVE_SSSE3 +#define HAVE_SSSE3 +#endif +#endif + +#ifdef HAVE_SSSE3 +#define HAVE_SSE2 +#endif + +#if !defined(HAVE_SSE2) +#error "This code requires at least SSE2." +#endif + +#endif + diff --git a/Modules/_blake2/impl/blake2-impl.h b/Modules/_blake2/impl/blake2-impl.h new file mode 100644 index 0000000..bbe3c0f --- /dev/null +++ b/Modules/_blake2/impl/blake2-impl.h @@ -0,0 +1,139 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2_IMPL_H__ +#define __BLAKE2_IMPL_H__ + +#include <stdint.h> +#include <string.h> + +BLAKE2_LOCAL_INLINE(uint32_t) load32( const void *src ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + uint32_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = ( const uint8_t * )src; + uint32_t w = *p++; + w |= ( uint32_t )( *p++ ) << 8; + w |= ( uint32_t )( *p++ ) << 16; + w |= ( uint32_t )( *p++ ) << 24; + return w; +#endif +} + +BLAKE2_LOCAL_INLINE(uint64_t) load64( const void *src ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + uint64_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = ( const uint8_t * )src; + uint64_t w = *p++; + w |= ( uint64_t )( *p++ ) << 8; + w |= ( uint64_t )( *p++ ) << 16; + w |= ( uint64_t )( *p++ ) << 24; + w |= ( uint64_t )( *p++ ) << 32; + w |= ( uint64_t )( *p++ ) << 40; + w |= ( uint64_t )( *p++ ) << 48; + w |= ( uint64_t )( *p++ ) << 56; + return w; +#endif +} + +BLAKE2_LOCAL_INLINE(void) store32( void *dst, uint32_t w ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; +#endif +} + +BLAKE2_LOCAL_INLINE(void) store64( void *dst, uint64_t w ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; +#endif +} + +BLAKE2_LOCAL_INLINE(uint64_t) load48( const void *src ) +{ + const uint8_t *p = ( const uint8_t * )src; + uint64_t w = *p++; + w |= ( uint64_t )( *p++ ) << 8; + w |= ( uint64_t )( *p++ ) << 16; + w |= ( uint64_t )( *p++ ) << 24; + w |= ( uint64_t )( *p++ ) << 32; + w |= ( uint64_t )( *p++ ) << 40; + return w; +} + +BLAKE2_LOCAL_INLINE(void) store48( void *dst, uint64_t w ) +{ + uint8_t *p = ( uint8_t * )dst; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; +} + +BLAKE2_LOCAL_INLINE(uint32_t) rotl32( const uint32_t w, const unsigned c ) +{ + return ( w << c ) | ( w >> ( 32 - c ) ); +} + +BLAKE2_LOCAL_INLINE(uint64_t) rotl64( const uint64_t w, const unsigned c ) +{ + return ( w << c ) | ( w >> ( 64 - c ) ); +} + +BLAKE2_LOCAL_INLINE(uint32_t) rotr32( const uint32_t w, const unsigned c ) +{ + return ( w >> c ) | ( w << ( 32 - c ) ); +} + +BLAKE2_LOCAL_INLINE(uint64_t) rotr64( const uint64_t w, const unsigned c ) +{ + return ( w >> c ) | ( w << ( 64 - c ) ); +} + +/* prevents compiler optimizing out memset() */ +BLAKE2_LOCAL_INLINE(void) secure_zero_memory(void *v, size_t n) +{ + static void *(*const volatile memset_v)(void *, int, size_t) = &memset; + memset_v(v, 0, n); +} + +#endif + diff --git a/Modules/_blake2/impl/blake2.h b/Modules/_blake2/impl/blake2.h new file mode 100644 index 0000000..1a9fdf4 --- /dev/null +++ b/Modules/_blake2/impl/blake2.h @@ -0,0 +1,161 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2_H__ +#define __BLAKE2_H__ + +#include <stddef.h> +#include <stdint.h> + +#ifdef BLAKE2_NO_INLINE +#define BLAKE2_LOCAL_INLINE(type) static type +#endif + +#ifndef BLAKE2_LOCAL_INLINE +#define BLAKE2_LOCAL_INLINE(type) static inline type +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + + enum blake2s_constant + { + BLAKE2S_BLOCKBYTES = 64, + BLAKE2S_OUTBYTES = 32, + BLAKE2S_KEYBYTES = 32, + BLAKE2S_SALTBYTES = 8, + BLAKE2S_PERSONALBYTES = 8 + }; + + enum blake2b_constant + { + BLAKE2B_BLOCKBYTES = 128, + BLAKE2B_OUTBYTES = 64, + BLAKE2B_KEYBYTES = 64, + BLAKE2B_SALTBYTES = 16, + BLAKE2B_PERSONALBYTES = 16 + }; + + typedef struct __blake2s_state + { + uint32_t h[8]; + uint32_t t[2]; + uint32_t f[2]; + uint8_t buf[2 * BLAKE2S_BLOCKBYTES]; + size_t buflen; + uint8_t last_node; + } blake2s_state; + + typedef struct __blake2b_state + { + uint64_t h[8]; + uint64_t t[2]; + uint64_t f[2]; + uint8_t buf[2 * BLAKE2B_BLOCKBYTES]; + size_t buflen; + uint8_t last_node; + } blake2b_state; + + typedef struct __blake2sp_state + { + blake2s_state S[8][1]; + blake2s_state R[1]; + uint8_t buf[8 * BLAKE2S_BLOCKBYTES]; + size_t buflen; + } blake2sp_state; + + typedef struct __blake2bp_state + { + blake2b_state S[4][1]; + blake2b_state R[1]; + uint8_t buf[4 * BLAKE2B_BLOCKBYTES]; + size_t buflen; + } blake2bp_state; + + +#pragma pack(push, 1) + typedef struct __blake2s_param + { + uint8_t digest_length; /* 1 */ + uint8_t key_length; /* 2 */ + uint8_t fanout; /* 3 */ + uint8_t depth; /* 4 */ + uint32_t leaf_length; /* 8 */ + uint8_t node_offset[6];// 14 + uint8_t node_depth; /* 15 */ + uint8_t inner_length; /* 16 */ + /* uint8_t reserved[0]; */ + uint8_t salt[BLAKE2S_SALTBYTES]; /* 24 */ + uint8_t personal[BLAKE2S_PERSONALBYTES]; /* 32 */ + } blake2s_param; + + typedef struct __blake2b_param + { + uint8_t digest_length; /* 1 */ + uint8_t key_length; /* 2 */ + uint8_t fanout; /* 3 */ + uint8_t depth; /* 4 */ + uint32_t leaf_length; /* 8 */ + uint64_t node_offset; /* 16 */ + uint8_t node_depth; /* 17 */ + uint8_t inner_length; /* 18 */ + uint8_t reserved[14]; /* 32 */ + uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */ + uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ + } blake2b_param; +#pragma pack(pop) + + /* Streaming API */ + int blake2s_init( blake2s_state *S, const uint8_t outlen ); + int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); + int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); + int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen ); + int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen ); + + int blake2b_init( blake2b_state *S, const uint8_t outlen ); + int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); + int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); + int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen ); + int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen ); + + int blake2sp_init( blake2sp_state *S, const uint8_t outlen ); + int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); + int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen ); + int blake2sp_final( blake2sp_state *S, uint8_t *out, uint8_t outlen ); + + int blake2bp_init( blake2bp_state *S, const uint8_t outlen ); + int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); + int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen ); + int blake2bp_final( blake2bp_state *S, uint8_t *out, uint8_t outlen ); + + /* Simple API */ + int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); + int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); + + int blake2sp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); + int blake2bp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); + + static inline int blake2( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) + { + return blake2b( out, in, key, outlen, inlen, keylen ); + } + +#if defined(__cplusplus) +} +#endif + +#endif + diff --git a/Modules/_blake2/impl/blake2b-load-sse2.h b/Modules/_blake2/impl/blake2b-load-sse2.h new file mode 100644 index 0000000..0004a98 --- /dev/null +++ b/Modules/_blake2/impl/blake2b-load-sse2.h @@ -0,0 +1,70 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2B_LOAD_SSE2_H__ +#define __BLAKE2B_LOAD_SSE2_H__ + +#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4) +#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5) +#define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13) +#define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9) +#define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15) +#define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11) +#define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7) +#define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5) +#define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2) +#define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7) +#define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1) +#define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13) +#define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4) +#define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0) +#define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2) +#define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4) +#define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6) +#define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8) +#define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0) +#define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11) +#define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15) +#define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14) +#define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14) +#define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13) +#define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9) +#define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2) +#define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12) +#define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1) +#define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8) +#define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6) +#define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11) +#define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3) +#define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1) +#define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4) +#define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7) +#define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6) +#define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3) +#define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12) +#define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4) +#define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5) +#define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13) +#define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9) +#define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15) +#define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11) +#define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7) + + +#endif + diff --git a/Modules/_blake2/impl/blake2b-load-sse41.h b/Modules/_blake2/impl/blake2b-load-sse41.h new file mode 100644 index 0000000..42a1349 --- /dev/null +++ b/Modules/_blake2/impl/blake2b-load-sse41.h @@ -0,0 +1,404 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2B_LOAD_SSE41_H__ +#define __BLAKE2B_LOAD_SSE41_H__ + +#define LOAD_MSG_0_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m1); \ +b1 = _mm_unpacklo_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_0_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m0, m1); \ +b1 = _mm_unpackhi_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_0_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m4, m5); \ +b1 = _mm_unpacklo_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_0_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m5); \ +b1 = _mm_unpackhi_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_1_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m7, m2); \ +b1 = _mm_unpackhi_epi64(m4, m6); \ +} while(0) + + +#define LOAD_MSG_1_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m5, m4); \ +b1 = _mm_alignr_epi8(m3, m7, 8); \ +} while(0) + + +#define LOAD_MSG_1_3(b0, b1) \ +do \ +{ \ +b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ +b1 = _mm_unpackhi_epi64(m5, m2); \ +} while(0) + + +#define LOAD_MSG_1_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m6, m1); \ +b1 = _mm_unpackhi_epi64(m3, m1); \ +} while(0) + + +#define LOAD_MSG_2_1(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m6, m5, 8); \ +b1 = _mm_unpackhi_epi64(m2, m7); \ +} while(0) + + +#define LOAD_MSG_2_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m4, m0); \ +b1 = _mm_blend_epi16(m1, m6, 0xF0); \ +} while(0) + + +#define LOAD_MSG_2_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m5, m1, 0xF0); \ +b1 = _mm_unpackhi_epi64(m3, m4); \ +} while(0) + + +#define LOAD_MSG_2_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m7, m3); \ +b1 = _mm_alignr_epi8(m2, m0, 8); \ +} while(0) + + +#define LOAD_MSG_3_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m3, m1); \ +b1 = _mm_unpackhi_epi64(m6, m5); \ +} while(0) + + +#define LOAD_MSG_3_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m0); \ +b1 = _mm_unpacklo_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_3_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m1, m2, 0xF0); \ +b1 = _mm_blend_epi16(m2, m7, 0xF0); \ +} while(0) + + +#define LOAD_MSG_3_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m3, m5); \ +b1 = _mm_unpacklo_epi64(m0, m4); \ +} while(0) + + +#define LOAD_MSG_4_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m2); \ +b1 = _mm_unpacklo_epi64(m1, m5); \ +} while(0) + + +#define LOAD_MSG_4_2(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m0, m3, 0xF0); \ +b1 = _mm_blend_epi16(m2, m7, 0xF0); \ +} while(0) + + +#define LOAD_MSG_4_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m7, m5, 0xF0); \ +b1 = _mm_blend_epi16(m3, m1, 0xF0); \ +} while(0) + + +#define LOAD_MSG_4_4(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m6, m0, 8); \ +b1 = _mm_blend_epi16(m4, m6, 0xF0); \ +} while(0) + + +#define LOAD_MSG_5_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m1, m3); \ +b1 = _mm_unpacklo_epi64(m0, m4); \ +} while(0) + + +#define LOAD_MSG_5_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m6, m5); \ +b1 = _mm_unpackhi_epi64(m5, m1); \ +} while(0) + + +#define LOAD_MSG_5_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m2, m3, 0xF0); \ +b1 = _mm_unpackhi_epi64(m7, m0); \ +} while(0) + + +#define LOAD_MSG_5_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m6, m2); \ +b1 = _mm_blend_epi16(m7, m4, 0xF0); \ +} while(0) + + +#define LOAD_MSG_6_1(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m6, m0, 0xF0); \ +b1 = _mm_unpacklo_epi64(m7, m2); \ +} while(0) + + +#define LOAD_MSG_6_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m2, m7); \ +b1 = _mm_alignr_epi8(m5, m6, 8); \ +} while(0) + + +#define LOAD_MSG_6_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m3); \ +b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \ +} while(0) + + +#define LOAD_MSG_6_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m3, m1); \ +b1 = _mm_blend_epi16(m1, m5, 0xF0); \ +} while(0) + + +#define LOAD_MSG_7_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m6, m3); \ +b1 = _mm_blend_epi16(m6, m1, 0xF0); \ +} while(0) + + +#define LOAD_MSG_7_2(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m7, m5, 8); \ +b1 = _mm_unpackhi_epi64(m0, m4); \ +} while(0) + + +#define LOAD_MSG_7_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m2, m7); \ +b1 = _mm_unpacklo_epi64(m4, m1); \ +} while(0) + + +#define LOAD_MSG_7_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m2); \ +b1 = _mm_unpacklo_epi64(m3, m5); \ +} while(0) + + +#define LOAD_MSG_8_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m3, m7); \ +b1 = _mm_alignr_epi8(m0, m5, 8); \ +} while(0) + + +#define LOAD_MSG_8_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m7, m4); \ +b1 = _mm_alignr_epi8(m4, m1, 8); \ +} while(0) + + +#define LOAD_MSG_8_3(b0, b1) \ +do \ +{ \ +b0 = m6; \ +b1 = _mm_alignr_epi8(m5, m0, 8); \ +} while(0) + + +#define LOAD_MSG_8_4(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m1, m3, 0xF0); \ +b1 = m2; \ +} while(0) + + +#define LOAD_MSG_9_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m5, m4); \ +b1 = _mm_unpackhi_epi64(m3, m0); \ +} while(0) + + +#define LOAD_MSG_9_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m1, m2); \ +b1 = _mm_blend_epi16(m3, m2, 0xF0); \ +} while(0) + + +#define LOAD_MSG_9_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m7, m4); \ +b1 = _mm_unpackhi_epi64(m1, m6); \ +} while(0) + + +#define LOAD_MSG_9_4(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m7, m5, 8); \ +b1 = _mm_unpacklo_epi64(m6, m0); \ +} while(0) + + +#define LOAD_MSG_10_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m1); \ +b1 = _mm_unpacklo_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_10_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m0, m1); \ +b1 = _mm_unpackhi_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_10_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m4, m5); \ +b1 = _mm_unpacklo_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_10_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m5); \ +b1 = _mm_unpackhi_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_11_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m7, m2); \ +b1 = _mm_unpackhi_epi64(m4, m6); \ +} while(0) + + +#define LOAD_MSG_11_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m5, m4); \ +b1 = _mm_alignr_epi8(m3, m7, 8); \ +} while(0) + + +#define LOAD_MSG_11_3(b0, b1) \ +do \ +{ \ +b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ +b1 = _mm_unpackhi_epi64(m5, m2); \ +} while(0) + + +#define LOAD_MSG_11_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m6, m1); \ +b1 = _mm_unpackhi_epi64(m3, m1); \ +} while(0) + + +#endif + diff --git a/Modules/_blake2/impl/blake2b-ref.c b/Modules/_blake2/impl/blake2b-ref.c new file mode 100644 index 0000000..ac91568 --- /dev/null +++ b/Modules/_blake2/impl/blake2b-ref.c @@ -0,0 +1,416 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +#include <stdint.h> +#include <string.h> +#include <stdio.h> + +#include "blake2.h" +#include "blake2-impl.h" + +static const uint64_t blake2b_IV[8] = +{ + 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL +}; + +static const uint8_t blake2b_sigma[12][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } +}; + + +BLAKE2_LOCAL_INLINE(int) blake2b_set_lastnode( blake2b_state *S ) +{ + S->f[1] = -1; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_clear_lastnode( blake2b_state *S ) +{ + S->f[1] = 0; + return 0; +} + +/* Some helper functions, not necessarily useful */ +BLAKE2_LOCAL_INLINE(int) blake2b_is_lastblock( const blake2b_state *S ) +{ + return S->f[0] != 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_set_lastblock( blake2b_state *S ) +{ + if( S->last_node ) blake2b_set_lastnode( S ); + + S->f[0] = -1; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_clear_lastblock( blake2b_state *S ) +{ + if( S->last_node ) blake2b_clear_lastnode( S ); + + S->f[0] = 0; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) +{ + S->t[0] += inc; + S->t[1] += ( S->t[0] < inc ); + return 0; +} + + + +/* Parameter-related functions */ +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length ) +{ + P->digest_length = digest_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout ) +{ + P->fanout = fanout; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth ) +{ + P->depth = depth; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length ) +{ + store32( &P->leaf_length, leaf_length ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset ) +{ + store64( &P->node_offset, node_offset ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth ) +{ + P->node_depth = node_depth; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length ) +{ + P->inner_length = inner_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] ) +{ + memcpy( P->salt, salt, BLAKE2B_SALTBYTES ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] ) +{ + memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_init0( blake2b_state *S ) +{ + memset( S, 0, sizeof( blake2b_state ) ); + + for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; + + return 0; +} + +/* init xors IV with input parameter block */ +int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) +{ + const uint8_t *p = ( const uint8_t * )( P ); + + blake2b_init0( S ); + + /* IV XOR ParamBlock */ + for( size_t i = 0; i < 8; ++i ) + S->h[i] ^= load64( p + sizeof( S->h[i] ) * i ); + + return 0; +} + + + +int blake2b_init( blake2b_state *S, const uint8_t outlen ) +{ + blake2b_param P[1]; + + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; + + P->digest_length = outlen; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store64( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + memset( P->reserved, 0, sizeof( P->reserved ) ); + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + return blake2b_init_param( S, P ); +} + + +int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) +{ + blake2b_param P[1]; + + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; + + if ( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1; + + P->digest_length = outlen; + P->key_length = keylen; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store64( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + memset( P->reserved, 0, sizeof( P->reserved ) ); + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + if( blake2b_init_param( S, P ) < 0 ) return -1; + + { + uint8_t block[BLAKE2B_BLOCKBYTES]; + memset( block, 0, BLAKE2B_BLOCKBYTES ); + memcpy( block, key, keylen ); + blake2b_update( S, block, BLAKE2B_BLOCKBYTES ); + secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ + } + return 0; +} + +static int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) +{ + uint64_t m[16]; + uint64_t v[16]; + int i; + + for( i = 0; i < 16; ++i ) + m[i] = load64( block + i * sizeof( m[i] ) ); + + for( i = 0; i < 8; ++i ) + v[i] = S->h[i]; + + v[ 8] = blake2b_IV[0]; + v[ 9] = blake2b_IV[1]; + v[10] = blake2b_IV[2]; + v[11] = blake2b_IV[3]; + v[12] = S->t[0] ^ blake2b_IV[4]; + v[13] = S->t[1] ^ blake2b_IV[5]; + v[14] = S->f[0] ^ blake2b_IV[6]; + v[15] = S->f[1] ^ blake2b_IV[7]; +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2b_sigma[r][2*i+0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2*i+1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while(0) +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + ROUND( 10 ); + ROUND( 11 ); + + for( i = 0; i < 8; ++i ) + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + +#undef G +#undef ROUND + return 0; +} + +/* inlen now in bytes */ +int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen ) +{ + while( inlen > 0 ) + { + size_t left = S->buflen; + size_t fill = 2 * BLAKE2B_BLOCKBYTES - left; + + if( inlen > fill ) + { + memcpy( S->buf + left, in, fill ); /* Fill buffer */ + S->buflen += fill; + blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); + blake2b_compress( S, S->buf ); /* Compress */ + memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); /* Shift buffer left */ + S->buflen -= BLAKE2B_BLOCKBYTES; + in += fill; + inlen -= fill; + } + else /* inlen <= fill */ + { + memcpy( S->buf + left, in, inlen ); + S->buflen += inlen; /* Be lazy, do not compress */ + in += inlen; + inlen -= inlen; + } + } + + return 0; +} + +/* Is this correct? */ +int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen ) +{ + uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; + + if( out == NULL || outlen == 0 || outlen > BLAKE2B_OUTBYTES ) + return -1; + + if( blake2b_is_lastblock( S ) ) + return -1; + + if( S->buflen > BLAKE2B_BLOCKBYTES ) + { + blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); + blake2b_compress( S, S->buf ); + S->buflen -= BLAKE2B_BLOCKBYTES; + memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen ); + } + + blake2b_increment_counter( S, S->buflen ); + blake2b_set_lastblock( S ); + memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ + blake2b_compress( S, S->buf ); + + for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + store64( buffer + sizeof( S->h[i] ) * i, S->h[i] ); + + memcpy( out, buffer, outlen ); + return 0; +} + +/* inlen, at least, should be uint64_t. Others can be size_t. */ +int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) +{ + blake2b_state S[1]; + + /* Verify parameters */ + if ( NULL == in && inlen > 0 ) return -1; + + if ( NULL == out ) return -1; + + if( NULL == key && keylen > 0 ) return -1; + + if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; + + if( keylen > BLAKE2B_KEYBYTES ) return -1; + + if( keylen > 0 ) + { + if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1; + } + else + { + if( blake2b_init( S, outlen ) < 0 ) return -1; + } + + blake2b_update( S, ( const uint8_t * )in, inlen ); + blake2b_final( S, out, outlen ); + return 0; +} + +#if defined(SUPERCOP) +int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) +{ + return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 ); +} +#endif + +#if defined(BLAKE2B_SELFTEST) +#include <string.h> +#include "blake2-kat.h" +int main( int argc, char **argv ) +{ + uint8_t key[BLAKE2B_KEYBYTES]; + uint8_t buf[KAT_LENGTH]; + + for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i ) + key[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + buf[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + { + uint8_t hash[BLAKE2B_OUTBYTES]; + blake2b( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ); + + if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) ) + { + puts( "error" ); + return -1; + } + } + + puts( "ok" ); + return 0; +} +#endif + diff --git a/Modules/_blake2/impl/blake2b-round.h b/Modules/_blake2/impl/blake2b-round.h new file mode 100644 index 0000000..4ce2255 --- /dev/null +++ b/Modules/_blake2/impl/blake2b-round.h @@ -0,0 +1,159 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2B_ROUND_H__ +#define __BLAKE2B_ROUND_H__ + +#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) ) +#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) + +#define TOF(reg) _mm_castsi128_ps((reg)) +#define TOI(reg) _mm_castps_si128((reg)) + +#define LIKELY(x) __builtin_expect((x),1) + + +/* Microarchitecture-specific macros */ +#ifndef HAVE_XOP +#ifdef HAVE_SSSE3 +#define _mm_roti_epi64(x, c) \ + (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \ + : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \ + : (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \ + : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \ + : _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c)))) +#else +#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-(c)) )) +#endif +#else +/* ... */ +#endif + + + +#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ + \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ + \ + row4l = _mm_roti_epi64(row4l, -32); \ + row4h = _mm_roti_epi64(row4h, -32); \ + \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ + \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ + \ + row2l = _mm_roti_epi64(row2l, -24); \ + row2h = _mm_roti_epi64(row2h, -24); \ + +#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ + \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ + \ + row4l = _mm_roti_epi64(row4l, -16); \ + row4h = _mm_roti_epi64(row4h, -16); \ + \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ + \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ + \ + row2l = _mm_roti_epi64(row2l, -63); \ + row2h = _mm_roti_epi64(row2h, -63); \ + +#if defined(HAVE_SSSE3) +#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = _mm_alignr_epi8(row2h, row2l, 8); \ + t1 = _mm_alignr_epi8(row2l, row2h, 8); \ + row2l = t0; \ + row2h = t1; \ + \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ + \ + t0 = _mm_alignr_epi8(row4h, row4l, 8); \ + t1 = _mm_alignr_epi8(row4l, row4h, 8); \ + row4l = t1; \ + row4h = t0; + +#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = _mm_alignr_epi8(row2l, row2h, 8); \ + t1 = _mm_alignr_epi8(row2h, row2l, 8); \ + row2l = t0; \ + row2h = t1; \ + \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ + \ + t0 = _mm_alignr_epi8(row4l, row4h, 8); \ + t1 = _mm_alignr_epi8(row4h, row4l, 8); \ + row4l = t1; \ + row4h = t0; +#else + +#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = row4l;\ + t1 = row2l;\ + row4l = row3l;\ + row3l = row3h;\ + row3h = row4l;\ + row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \ + row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \ + row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \ + row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1)) + +#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = row3l;\ + row3l = row3h;\ + row3h = t0;\ + t0 = row2l;\ + t1 = row4l;\ + row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \ + row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \ + row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \ + row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1)) + +#endif + +#if defined(HAVE_SSE41) +#include "blake2b-load-sse41.h" +#else +#include "blake2b-load-sse2.h" +#endif + +#define ROUND(r) \ + LOAD_MSG_ ##r ##_1(b0, b1); \ + G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + LOAD_MSG_ ##r ##_2(b0, b1); \ + G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ + LOAD_MSG_ ##r ##_3(b0, b1); \ + G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + LOAD_MSG_ ##r ##_4(b0, b1); \ + G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); + +#endif + diff --git a/Modules/_blake2/impl/blake2b.c b/Modules/_blake2/impl/blake2b.c new file mode 100644 index 0000000..f9090a1 --- /dev/null +++ b/Modules/_blake2/impl/blake2b.c @@ -0,0 +1,450 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +#include <stdint.h> +#include <string.h> +#include <stdio.h> + +#include "blake2.h" +#include "blake2-impl.h" + +#include "blake2-config.h" + +#ifdef _MSC_VER +#include <intrin.h> /* for _mm_set_epi64x */ +#endif +#include <emmintrin.h> +#if defined(HAVE_SSSE3) +#include <tmmintrin.h> +#endif +#if defined(HAVE_SSE41) +#include <smmintrin.h> +#endif +#if defined(HAVE_AVX) +#include <immintrin.h> +#endif +#if defined(HAVE_XOP) +#include <x86intrin.h> +#endif + +#include "blake2b-round.h" + +static const uint64_t blake2b_IV[8] = +{ + 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL +}; + +static const uint8_t blake2b_sigma[12][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } +}; + + +/* Some helper functions, not necessarily useful */ +BLAKE2_LOCAL_INLINE(int) blake2b_set_lastnode( blake2b_state *S ) +{ + S->f[1] = -1; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_clear_lastnode( blake2b_state *S ) +{ + S->f[1] = 0; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_is_lastblock( const blake2b_state *S ) +{ + return S->f[0] != 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_set_lastblock( blake2b_state *S ) +{ + if( S->last_node ) blake2b_set_lastnode( S ); + + S->f[0] = -1; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_clear_lastblock( blake2b_state *S ) +{ + if( S->last_node ) blake2b_clear_lastnode( S ); + + S->f[0] = 0; + return 0; +} + + +BLAKE2_LOCAL_INLINE(int) blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) +{ +#if __x86_64__ + /* ADD/ADC chain */ + __uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0]; + t += inc; + S->t[0] = ( uint64_t )( t >> 0 ); + S->t[1] = ( uint64_t )( t >> 64 ); +#else + S->t[0] += inc; + S->t[1] += ( S->t[0] < inc ); +#endif + return 0; +} + + +/* Parameter-related functions */ +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length ) +{ + P->digest_length = digest_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout ) +{ + P->fanout = fanout; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth ) +{ + P->depth = depth; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length ) +{ + P->leaf_length = leaf_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset ) +{ + P->node_offset = node_offset; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth ) +{ + P->node_depth = node_depth; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length ) +{ + P->inner_length = inner_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] ) +{ + memcpy( P->salt, salt, BLAKE2B_SALTBYTES ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] ) +{ + memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_init0( blake2b_state *S ) +{ + memset( S, 0, sizeof( blake2b_state ) ); + + for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; + + return 0; +} + +/* init xors IV with input parameter block */ +int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) +{ + /*blake2b_init0( S ); */ + const uint8_t * v = ( const uint8_t * )( blake2b_IV ); + const uint8_t * p = ( const uint8_t * )( P ); + uint8_t * h = ( uint8_t * )( S->h ); + /* IV XOR ParamBlock */ + memset( S, 0, sizeof( blake2b_state ) ); + + for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; + + return 0; +} + + +/* Some sort of default parameter block initialization, for sequential blake2b */ +int blake2b_init( blake2b_state *S, const uint8_t outlen ) +{ + const blake2b_param P = + { + outlen, + 0, + 1, + 1, + 0, + 0, + 0, + 0, + {0}, + {0}, + {0} + }; + + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; + + return blake2b_init_param( S, &P ); +} + +int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) +{ + const blake2b_param P = + { + outlen, + keylen, + 1, + 1, + 0, + 0, + 0, + 0, + {0}, + {0}, + {0} + }; + + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; + + if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1; + + if( blake2b_init_param( S, &P ) < 0 ) + return 0; + + { + uint8_t block[BLAKE2B_BLOCKBYTES]; + memset( block, 0, BLAKE2B_BLOCKBYTES ); + memcpy( block, key, keylen ); + blake2b_update( S, block, BLAKE2B_BLOCKBYTES ); + secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ + } + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) +{ + __m128i row1l, row1h; + __m128i row2l, row2h; + __m128i row3l, row3h; + __m128i row4l, row4h; + __m128i b0, b1; + __m128i t0, t1; +#if defined(HAVE_SSSE3) && !defined(HAVE_XOP) + const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 ); + const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 ); +#endif +#if defined(HAVE_SSE41) + const __m128i m0 = LOADU( block + 00 ); + const __m128i m1 = LOADU( block + 16 ); + const __m128i m2 = LOADU( block + 32 ); + const __m128i m3 = LOADU( block + 48 ); + const __m128i m4 = LOADU( block + 64 ); + const __m128i m5 = LOADU( block + 80 ); + const __m128i m6 = LOADU( block + 96 ); + const __m128i m7 = LOADU( block + 112 ); +#else + const uint64_t m0 = ( ( uint64_t * )block )[ 0]; + const uint64_t m1 = ( ( uint64_t * )block )[ 1]; + const uint64_t m2 = ( ( uint64_t * )block )[ 2]; + const uint64_t m3 = ( ( uint64_t * )block )[ 3]; + const uint64_t m4 = ( ( uint64_t * )block )[ 4]; + const uint64_t m5 = ( ( uint64_t * )block )[ 5]; + const uint64_t m6 = ( ( uint64_t * )block )[ 6]; + const uint64_t m7 = ( ( uint64_t * )block )[ 7]; + const uint64_t m8 = ( ( uint64_t * )block )[ 8]; + const uint64_t m9 = ( ( uint64_t * )block )[ 9]; + const uint64_t m10 = ( ( uint64_t * )block )[10]; + const uint64_t m11 = ( ( uint64_t * )block )[11]; + const uint64_t m12 = ( ( uint64_t * )block )[12]; + const uint64_t m13 = ( ( uint64_t * )block )[13]; + const uint64_t m14 = ( ( uint64_t * )block )[14]; + const uint64_t m15 = ( ( uint64_t * )block )[15]; +#endif + row1l = LOADU( &S->h[0] ); + row1h = LOADU( &S->h[2] ); + row2l = LOADU( &S->h[4] ); + row2h = LOADU( &S->h[6] ); + row3l = LOADU( &blake2b_IV[0] ); + row3h = LOADU( &blake2b_IV[2] ); + row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) ); + row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) ); + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + ROUND( 10 ); + ROUND( 11 ); + row1l = _mm_xor_si128( row3l, row1l ); + row1h = _mm_xor_si128( row3h, row1h ); + STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) ); + STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) ); + row2l = _mm_xor_si128( row4l, row2l ); + row2h = _mm_xor_si128( row4h, row2h ); + STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) ); + STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) ); + return 0; +} + + +int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen ) +{ + while( inlen > 0 ) + { + size_t left = S->buflen; + size_t fill = 2 * BLAKE2B_BLOCKBYTES - left; + + if( inlen > fill ) + { + memcpy( S->buf + left, in, fill ); /* Fill buffer */ + S->buflen += fill; + blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); + blake2b_compress( S, S->buf ); /* Compress */ + memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); /* Shift buffer left */ + S->buflen -= BLAKE2B_BLOCKBYTES; + in += fill; + inlen -= fill; + } + else /* inlen <= fill */ + { + memcpy( S->buf + left, in, inlen ); + S->buflen += inlen; /* Be lazy, do not compress */ + in += inlen; + inlen -= inlen; + } + } + + return 0; +} + + +int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen ) +{ + if( outlen > BLAKE2B_OUTBYTES ) + return -1; + + if( blake2b_is_lastblock( S ) ) + return -1; + + if( S->buflen > BLAKE2B_BLOCKBYTES ) + { + blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); + blake2b_compress( S, S->buf ); + S->buflen -= BLAKE2B_BLOCKBYTES; + memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen ); + } + + blake2b_increment_counter( S, S->buflen ); + blake2b_set_lastblock( S ); + memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ + blake2b_compress( S, S->buf ); + memcpy( out, &S->h[0], outlen ); + return 0; +} + + +int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) +{ + blake2b_state S[1]; + + /* Verify parameters */ + if ( NULL == in && inlen > 0 ) return -1; + + if ( NULL == out ) return -1; + + if( NULL == key && keylen > 0 ) return -1; + + if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; + + if( keylen > BLAKE2B_KEYBYTES ) return -1; + + if( keylen ) + { + if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1; + } + else + { + if( blake2b_init( S, outlen ) < 0 ) return -1; + } + + blake2b_update( S, ( const uint8_t * )in, inlen ); + blake2b_final( S, out, outlen ); + return 0; +} + +#if defined(SUPERCOP) +int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) +{ + return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 ); +} +#endif + +#if defined(BLAKE2B_SELFTEST) +#include <string.h> +#include "blake2-kat.h" +int main( int argc, char **argv ) +{ + uint8_t key[BLAKE2B_KEYBYTES]; + uint8_t buf[KAT_LENGTH]; + + for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i ) + key[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + buf[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + { + uint8_t hash[BLAKE2B_OUTBYTES]; + blake2b( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ); + + if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) ) + { + puts( "error" ); + return -1; + } + } + + puts( "ok" ); + return 0; +} +#endif + diff --git a/Modules/_blake2/impl/blake2s-load-sse2.h b/Modules/_blake2/impl/blake2s-load-sse2.h new file mode 100644 index 0000000..eadefa7 --- /dev/null +++ b/Modules/_blake2/impl/blake2s-load-sse2.h @@ -0,0 +1,61 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2S_LOAD_SSE2_H__ +#define __BLAKE2S_LOAD_SSE2_H__ + +#define LOAD_MSG_0_1(buf) buf = _mm_set_epi32(m6,m4,m2,m0) +#define LOAD_MSG_0_2(buf) buf = _mm_set_epi32(m7,m5,m3,m1) +#define LOAD_MSG_0_3(buf) buf = _mm_set_epi32(m14,m12,m10,m8) +#define LOAD_MSG_0_4(buf) buf = _mm_set_epi32(m15,m13,m11,m9) +#define LOAD_MSG_1_1(buf) buf = _mm_set_epi32(m13,m9,m4,m14) +#define LOAD_MSG_1_2(buf) buf = _mm_set_epi32(m6,m15,m8,m10) +#define LOAD_MSG_1_3(buf) buf = _mm_set_epi32(m5,m11,m0,m1) +#define LOAD_MSG_1_4(buf) buf = _mm_set_epi32(m3,m7,m2,m12) +#define LOAD_MSG_2_1(buf) buf = _mm_set_epi32(m15,m5,m12,m11) +#define LOAD_MSG_2_2(buf) buf = _mm_set_epi32(m13,m2,m0,m8) +#define LOAD_MSG_2_3(buf) buf = _mm_set_epi32(m9,m7,m3,m10) +#define LOAD_MSG_2_4(buf) buf = _mm_set_epi32(m4,m1,m6,m14) +#define LOAD_MSG_3_1(buf) buf = _mm_set_epi32(m11,m13,m3,m7) +#define LOAD_MSG_3_2(buf) buf = _mm_set_epi32(m14,m12,m1,m9) +#define LOAD_MSG_3_3(buf) buf = _mm_set_epi32(m15,m4,m5,m2) +#define LOAD_MSG_3_4(buf) buf = _mm_set_epi32(m8,m0,m10,m6) +#define LOAD_MSG_4_1(buf) buf = _mm_set_epi32(m10,m2,m5,m9) +#define LOAD_MSG_4_2(buf) buf = _mm_set_epi32(m15,m4,m7,m0) +#define LOAD_MSG_4_3(buf) buf = _mm_set_epi32(m3,m6,m11,m14) +#define LOAD_MSG_4_4(buf) buf = _mm_set_epi32(m13,m8,m12,m1) +#define LOAD_MSG_5_1(buf) buf = _mm_set_epi32(m8,m0,m6,m2) +#define LOAD_MSG_5_2(buf) buf = _mm_set_epi32(m3,m11,m10,m12) +#define LOAD_MSG_5_3(buf) buf = _mm_set_epi32(m1,m15,m7,m4) +#define LOAD_MSG_5_4(buf) buf = _mm_set_epi32(m9,m14,m5,m13) +#define LOAD_MSG_6_1(buf) buf = _mm_set_epi32(m4,m14,m1,m12) +#define LOAD_MSG_6_2(buf) buf = _mm_set_epi32(m10,m13,m15,m5) +#define LOAD_MSG_6_3(buf) buf = _mm_set_epi32(m8,m9,m6,m0) +#define LOAD_MSG_6_4(buf) buf = _mm_set_epi32(m11,m2,m3,m7) +#define LOAD_MSG_7_1(buf) buf = _mm_set_epi32(m3,m12,m7,m13) +#define LOAD_MSG_7_2(buf) buf = _mm_set_epi32(m9,m1,m14,m11) +#define LOAD_MSG_7_3(buf) buf = _mm_set_epi32(m2,m8,m15,m5) +#define LOAD_MSG_7_4(buf) buf = _mm_set_epi32(m10,m6,m4,m0) +#define LOAD_MSG_8_1(buf) buf = _mm_set_epi32(m0,m11,m14,m6) +#define LOAD_MSG_8_2(buf) buf = _mm_set_epi32(m8,m3,m9,m15) +#define LOAD_MSG_8_3(buf) buf = _mm_set_epi32(m10,m1,m13,m12) +#define LOAD_MSG_8_4(buf) buf = _mm_set_epi32(m5,m4,m7,m2) +#define LOAD_MSG_9_1(buf) buf = _mm_set_epi32(m1,m7,m8,m10) +#define LOAD_MSG_9_2(buf) buf = _mm_set_epi32(m5,m6,m4,m2) +#define LOAD_MSG_9_3(buf) buf = _mm_set_epi32(m13,m3,m9,m15) +#define LOAD_MSG_9_4(buf) buf = _mm_set_epi32(m0,m12,m14,m11) + + +#endif diff --git a/Modules/_blake2/impl/blake2s-load-sse41.h b/Modules/_blake2/impl/blake2s-load-sse41.h new file mode 100644 index 0000000..54bf0cd --- /dev/null +++ b/Modules/_blake2/impl/blake2s-load-sse41.h @@ -0,0 +1,231 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2S_LOAD_SSE41_H__ +#define __BLAKE2S_LOAD_SSE41_H__ + +#define LOAD_MSG_0_1(buf) \ +buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2,0,2,0))); + +#define LOAD_MSG_0_2(buf) \ +buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(3,1,3,1))); + +#define LOAD_MSG_0_3(buf) \ +buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(2,0,2,0))); + +#define LOAD_MSG_0_4(buf) \ +buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(3,1,3,1))); + +#define LOAD_MSG_1_1(buf) \ +t0 = _mm_blend_epi16(m1, m2, 0x0C); \ +t1 = _mm_slli_si128(m3, 4); \ +t2 = _mm_blend_epi16(t0, t1, 0xF0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3)); + +#define LOAD_MSG_1_2(buf) \ +t0 = _mm_shuffle_epi32(m2,_MM_SHUFFLE(0,0,2,0)); \ +t1 = _mm_blend_epi16(m1,m3,0xC0); \ +t2 = _mm_blend_epi16(t0, t1, 0xF0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1)); + +#define LOAD_MSG_1_3(buf) \ +t0 = _mm_slli_si128(m1, 4); \ +t1 = _mm_blend_epi16(m2, t0, 0x30); \ +t2 = _mm_blend_epi16(m0, t1, 0xF0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1)); + +#define LOAD_MSG_1_4(buf) \ +t0 = _mm_unpackhi_epi32(m0,m1); \ +t1 = _mm_slli_si128(m3, 4); \ +t2 = _mm_blend_epi16(t0, t1, 0x0C); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1)); + +#define LOAD_MSG_2_1(buf) \ +t0 = _mm_unpackhi_epi32(m2,m3); \ +t1 = _mm_blend_epi16(m3,m1,0x0C); \ +t2 = _mm_blend_epi16(t0, t1, 0x0F); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2)); + +#define LOAD_MSG_2_2(buf) \ +t0 = _mm_unpacklo_epi32(m2,m0); \ +t1 = _mm_blend_epi16(t0, m0, 0xF0); \ +t2 = _mm_slli_si128(m3, 8); \ +buf = _mm_blend_epi16(t1, t2, 0xC0); + +#define LOAD_MSG_2_3(buf) \ +t0 = _mm_blend_epi16(m0, m2, 0x3C); \ +t1 = _mm_srli_si128(m1, 12); \ +t2 = _mm_blend_epi16(t0,t1,0x03); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,3,2)); + +#define LOAD_MSG_2_4(buf) \ +t0 = _mm_slli_si128(m3, 4); \ +t1 = _mm_blend_epi16(m0, m1, 0x33); \ +t2 = _mm_blend_epi16(t1, t0, 0xC0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(0,1,2,3)); + +#define LOAD_MSG_3_1(buf) \ +t0 = _mm_unpackhi_epi32(m0,m1); \ +t1 = _mm_unpackhi_epi32(t0, m2); \ +t2 = _mm_blend_epi16(t1, m3, 0x0C); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2)); + +#define LOAD_MSG_3_2(buf) \ +t0 = _mm_slli_si128(m2, 8); \ +t1 = _mm_blend_epi16(m3,m0,0x0C); \ +t2 = _mm_blend_epi16(t1, t0, 0xC0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3)); + +#define LOAD_MSG_3_3(buf) \ +t0 = _mm_blend_epi16(m0,m1,0x0F); \ +t1 = _mm_blend_epi16(t0, m3, 0xC0); \ +buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2)); + +#define LOAD_MSG_3_4(buf) \ +t0 = _mm_unpacklo_epi32(m0,m2); \ +t1 = _mm_unpackhi_epi32(m1,m2); \ +buf = _mm_unpacklo_epi64(t1,t0); + +#define LOAD_MSG_4_1(buf) \ +t0 = _mm_unpacklo_epi64(m1,m2); \ +t1 = _mm_unpackhi_epi64(m0,m2); \ +t2 = _mm_blend_epi16(t0,t1,0x33); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3)); + +#define LOAD_MSG_4_2(buf) \ +t0 = _mm_unpackhi_epi64(m1,m3); \ +t1 = _mm_unpacklo_epi64(m0,m1); \ +buf = _mm_blend_epi16(t0,t1,0x33); + +#define LOAD_MSG_4_3(buf) \ +t0 = _mm_unpackhi_epi64(m3,m1); \ +t1 = _mm_unpackhi_epi64(m2,m0); \ +buf = _mm_blend_epi16(t1,t0,0x33); + +#define LOAD_MSG_4_4(buf) \ +t0 = _mm_blend_epi16(m0,m2,0x03); \ +t1 = _mm_slli_si128(t0, 8); \ +t2 = _mm_blend_epi16(t1,m3,0x0F); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,0,3)); + +#define LOAD_MSG_5_1(buf) \ +t0 = _mm_unpackhi_epi32(m0,m1); \ +t1 = _mm_unpacklo_epi32(m0,m2); \ +buf = _mm_unpacklo_epi64(t0,t1); + +#define LOAD_MSG_5_2(buf) \ +t0 = _mm_srli_si128(m2, 4); \ +t1 = _mm_blend_epi16(m0,m3,0x03); \ +buf = _mm_blend_epi16(t1,t0,0x3C); + +#define LOAD_MSG_5_3(buf) \ +t0 = _mm_blend_epi16(m1,m0,0x0C); \ +t1 = _mm_srli_si128(m3, 4); \ +t2 = _mm_blend_epi16(t0,t1,0x30); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,3,0)); + +#define LOAD_MSG_5_4(buf) \ +t0 = _mm_unpacklo_epi64(m1,m2); \ +t1= _mm_shuffle_epi32(m3, _MM_SHUFFLE(0,2,0,1)); \ +buf = _mm_blend_epi16(t0,t1,0x33); + +#define LOAD_MSG_6_1(buf) \ +t0 = _mm_slli_si128(m1, 12); \ +t1 = _mm_blend_epi16(m0,m3,0x33); \ +buf = _mm_blend_epi16(t1,t0,0xC0); + +#define LOAD_MSG_6_2(buf) \ +t0 = _mm_blend_epi16(m3,m2,0x30); \ +t1 = _mm_srli_si128(m1, 4); \ +t2 = _mm_blend_epi16(t0,t1,0x03); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,3,0)); + +#define LOAD_MSG_6_3(buf) \ +t0 = _mm_unpacklo_epi64(m0,m2); \ +t1 = _mm_srli_si128(m1, 4); \ +buf = _mm_shuffle_epi32(_mm_blend_epi16(t0,t1,0x0C), _MM_SHUFFLE(2,3,1,0)); + +#define LOAD_MSG_6_4(buf) \ +t0 = _mm_unpackhi_epi32(m1,m2); \ +t1 = _mm_unpackhi_epi64(m0,t0); \ +buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2)); + +#define LOAD_MSG_7_1(buf) \ +t0 = _mm_unpackhi_epi32(m0,m1); \ +t1 = _mm_blend_epi16(t0,m3,0x0F); \ +buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(2,0,3,1)); + +#define LOAD_MSG_7_2(buf) \ +t0 = _mm_blend_epi16(m2,m3,0x30); \ +t1 = _mm_srli_si128(m0,4); \ +t2 = _mm_blend_epi16(t0,t1,0x03); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,2,3)); + +#define LOAD_MSG_7_3(buf) \ +t0 = _mm_unpackhi_epi64(m0,m3); \ +t1 = _mm_unpacklo_epi64(m1,m2); \ +t2 = _mm_blend_epi16(t0,t1,0x3C); \ +buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,2,3,1)); + +#define LOAD_MSG_7_4(buf) \ +t0 = _mm_unpacklo_epi32(m0,m1); \ +t1 = _mm_unpackhi_epi32(m1,m2); \ +buf = _mm_unpacklo_epi64(t0,t1); + +#define LOAD_MSG_8_1(buf) \ +t0 = _mm_unpackhi_epi32(m1,m3); \ +t1 = _mm_unpacklo_epi64(t0,m0); \ +t2 = _mm_blend_epi16(t1,m2,0xC0); \ +buf = _mm_shufflehi_epi16(t2,_MM_SHUFFLE(1,0,3,2)); + +#define LOAD_MSG_8_2(buf) \ +t0 = _mm_unpackhi_epi32(m0,m3); \ +t1 = _mm_blend_epi16(m2,t0,0xF0); \ +buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(0,2,1,3)); + +#define LOAD_MSG_8_3(buf) \ +t0 = _mm_blend_epi16(m2,m0,0x0C); \ +t1 = _mm_slli_si128(t0,4); \ +buf = _mm_blend_epi16(t1,m3,0x0F); + +#define LOAD_MSG_8_4(buf) \ +t0 = _mm_blend_epi16(m1,m0,0x30); \ +buf = _mm_shuffle_epi32(t0,_MM_SHUFFLE(1,0,3,2)); + +#define LOAD_MSG_9_1(buf) \ +t0 = _mm_blend_epi16(m0,m2,0x03); \ +t1 = _mm_blend_epi16(m1,m2,0x30); \ +t2 = _mm_blend_epi16(t1,t0,0x0F); \ +buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(1,3,0,2)); + +#define LOAD_MSG_9_2(buf) \ +t0 = _mm_slli_si128(m0,4); \ +t1 = _mm_blend_epi16(m1,t0,0xC0); \ +buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(1,2,0,3)); + +#define LOAD_MSG_9_3(buf) \ +t0 = _mm_unpackhi_epi32(m0,m3); \ +t1 = _mm_unpacklo_epi32(m2,m3); \ +t2 = _mm_unpackhi_epi64(t0,t1); \ +buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(3,0,2,1)); + +#define LOAD_MSG_9_4(buf) \ +t0 = _mm_blend_epi16(m3,m2,0xC0); \ +t1 = _mm_unpacklo_epi32(m0,m3); \ +t2 = _mm_blend_epi16(t0,t1,0x0F); \ +buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,1,2,3)); + +#endif + diff --git a/Modules/_blake2/impl/blake2s-load-xop.h b/Modules/_blake2/impl/blake2s-load-xop.h new file mode 100644 index 0000000..a3b5d65 --- /dev/null +++ b/Modules/_blake2/impl/blake2s-load-xop.h @@ -0,0 +1,191 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2S_LOAD_XOP_H__ +#define __BLAKE2S_LOAD_XOP_H__ + +#define TOB(x) ((x)*4*0x01010101 + 0x03020100) /* ..or not TOB */ + +/* Basic VPPERM emulation, for testing purposes */ +/*static __m128i _mm_perm_epi8(const __m128i src1, const __m128i src2, const __m128i sel) +{ + const __m128i sixteen = _mm_set1_epi8(16); + const __m128i t0 = _mm_shuffle_epi8(src1, sel); + const __m128i s1 = _mm_shuffle_epi8(src2, _mm_sub_epi8(sel, sixteen)); + const __m128i mask = _mm_or_si128(_mm_cmpeq_epi8(sel, sixteen), + _mm_cmpgt_epi8(sel, sixteen)); /* (>=16) = 0xff : 00 */ + return _mm_blendv_epi8(t0, s1, mask); +}*/ + +#define LOAD_MSG_0_1(buf) \ +buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) ); + +#define LOAD_MSG_0_2(buf) \ +buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) ); + +#define LOAD_MSG_0_3(buf) \ +buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) ); + +#define LOAD_MSG_0_4(buf) \ +buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) ); + +#define LOAD_MSG_1_1(buf) \ +t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(5),TOB(0),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) ); + +#define LOAD_MSG_1_2(buf) \ +t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(2),TOB(0),TOB(4),TOB(6)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); + +#define LOAD_MSG_1_3(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(0),TOB(0),TOB(1)) ); \ +buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); + +#define LOAD_MSG_1_4(buf) \ +t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(7),TOB(2),TOB(0)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) ); + +#define LOAD_MSG_2_1(buf) \ +t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(1),TOB(0),TOB(7)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(4),TOB(0)) ); + +#define LOAD_MSG_2_2(buf) \ +t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(2),TOB(0),TOB(4)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(0)) ); + +#define LOAD_MSG_2_3(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(7),TOB(3),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) ); + +#define LOAD_MSG_2_4(buf) \ +t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(1),TOB(6),TOB(0)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) ); + +#define LOAD_MSG_3_1(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(3),TOB(7)) ); \ +t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(1),TOB(0)) ); + +#define LOAD_MSG_3_2(buf) \ +t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(1),TOB(5)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(1),TOB(0)) ); + +#define LOAD_MSG_3_3(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(5),TOB(2)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); + +#define LOAD_MSG_3_4(buf) \ +t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \ +buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(6),TOB(0)) ); + +#define LOAD_MSG_4_1(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(5),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(5)) ); + +#define LOAD_MSG_4_2(buf) \ +t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(7),TOB(0)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); + +#define LOAD_MSG_4_3(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(6),TOB(0),TOB(0)) ); \ +t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) ); + +#define LOAD_MSG_4_4(buf) \ +t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(4),TOB(0),TOB(1)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(4),TOB(0)) ); + +#define LOAD_MSG_5_1(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(2)) ); \ +buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(1),TOB(0)) ); + +#define LOAD_MSG_5_2(buf) \ +t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(6),TOB(0)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) ); + +#define LOAD_MSG_5_3(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(0),TOB(7),TOB(4)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); + +#define LOAD_MSG_5_4(buf) \ +t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(5),TOB(0),TOB(1),TOB(0)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(5)) ); + +#define LOAD_MSG_6_1(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(0),TOB(1),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(4)) ); + +#define LOAD_MSG_6_2(buf) \ +t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(6),TOB(0),TOB(0),TOB(1)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(7),TOB(0)) ); + +#define LOAD_MSG_6_3(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(5),TOB(1),TOB(0)) ); + +#define LOAD_MSG_6_4(buf) \ +t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(3),TOB(7)) ); \ +buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); + +#define LOAD_MSG_7_1(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(0),TOB(7),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(5)) ); + +#define LOAD_MSG_7_2(buf) \ +t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(5),TOB(1),TOB(0),TOB(7)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) ); + +#define LOAD_MSG_7_3(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(2),TOB(0),TOB(0),TOB(5)) ); \ +t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) ); + +#define LOAD_MSG_7_4(buf) \ +t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(6),TOB(4),TOB(0)) ); \ +buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(0)) ); + +#define LOAD_MSG_8_1(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \ +t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) ); + +#define LOAD_MSG_8_2(buf) \ +t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(4),TOB(3),TOB(5),TOB(0)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(7)) ); + +#define LOAD_MSG_8_3(buf) \ +t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(6),TOB(1),TOB(0),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(5),TOB(4)) ); \ + +#define LOAD_MSG_8_4(buf) \ +buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(4),TOB(7),TOB(2)) ); + +#define LOAD_MSG_9_1(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(7),TOB(0),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(4),TOB(6)) ); + +#define LOAD_MSG_9_2(buf) \ +buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(6),TOB(4),TOB(2)) ); + +#define LOAD_MSG_9_3(buf) \ +t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(3),TOB(5),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(7)) ); + +#define LOAD_MSG_9_4(buf) \ +t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(7)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(6),TOB(0)) ); + +#endif + diff --git a/Modules/_blake2/impl/blake2s-ref.c b/Modules/_blake2/impl/blake2s-ref.c new file mode 100644 index 0000000..0e246c3 --- /dev/null +++ b/Modules/_blake2/impl/blake2s-ref.c @@ -0,0 +1,406 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +#include <stdint.h> +#include <string.h> +#include <stdio.h> + +#include "blake2.h" +#include "blake2-impl.h" + +static const uint32_t blake2s_IV[8] = +{ + 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL +}; + +static const uint8_t blake2s_sigma[10][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , +}; + +BLAKE2_LOCAL_INLINE(int) blake2s_set_lastnode( blake2s_state *S ) +{ + S->f[1] = -1; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_clear_lastnode( blake2s_state *S ) +{ + S->f[1] = 0; + return 0; +} + +/* Some helper functions, not necessarily useful */ +BLAKE2_LOCAL_INLINE(int) blake2s_is_lastblock( const blake2s_state *S ) +{ + return S->f[0] != 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_set_lastblock( blake2s_state *S ) +{ + if( S->last_node ) blake2s_set_lastnode( S ); + + S->f[0] = -1; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_clear_lastblock( blake2s_state *S ) +{ + if( S->last_node ) blake2s_clear_lastnode( S ); + + S->f[0] = 0; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) +{ + S->t[0] += inc; + S->t[1] += ( S->t[0] < inc ); + return 0; +} + +/* Parameter-related functions */ +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length ) +{ + P->digest_length = digest_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout ) +{ + P->fanout = fanout; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth ) +{ + P->depth = depth; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length ) +{ + store32( &P->leaf_length, leaf_length ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset ) +{ + store48( P->node_offset, node_offset ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth ) +{ + P->node_depth = node_depth; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length ) +{ + P->inner_length = inner_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] ) +{ + memcpy( P->salt, salt, BLAKE2S_SALTBYTES ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] ) +{ + memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_init0( blake2s_state *S ) +{ + memset( S, 0, sizeof( blake2s_state ) ); + + for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; + + return 0; +} + +/* init2 xors IV with input parameter block */ +int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) +{ + const uint32_t *p = ( const uint32_t * )( P ); + + blake2s_init0( S ); + + /* IV XOR ParamBlock */ + for( size_t i = 0; i < 8; ++i ) + S->h[i] ^= load32( &p[i] ); + + return 0; +} + + +/* Sequential blake2s initialization */ +int blake2s_init( blake2s_state *S, const uint8_t outlen ) +{ + blake2s_param P[1]; + + /* Move interval verification here? */ + if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; + + P->digest_length = outlen; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store48( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + return blake2s_init_param( S, P ); +} + +int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) +{ + blake2s_param P[1]; + + if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; + + if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1; + + P->digest_length = outlen; + P->key_length = keylen; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store48( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + if( blake2s_init_param( S, P ) < 0 ) return -1; + + { + uint8_t block[BLAKE2S_BLOCKBYTES]; + memset( block, 0, BLAKE2S_BLOCKBYTES ); + memcpy( block, key, keylen ); + blake2s_update( S, block, BLAKE2S_BLOCKBYTES ); + secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ + } + return 0; +} + +static int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) +{ + uint32_t m[16]; + uint32_t v[16]; + + for( size_t i = 0; i < 16; ++i ) + m[i] = load32( block + i * sizeof( m[i] ) ); + + for( size_t i = 0; i < 8; ++i ) + v[i] = S->h[i]; + + v[ 8] = blake2s_IV[0]; + v[ 9] = blake2s_IV[1]; + v[10] = blake2s_IV[2]; + v[11] = blake2s_IV[3]; + v[12] = S->t[0] ^ blake2s_IV[4]; + v[13] = S->t[1] ^ blake2s_IV[5]; + v[14] = S->f[0] ^ blake2s_IV[6]; + v[15] = S->f[1] ^ blake2s_IV[7]; +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2s_sigma[r][2*i+0]]; \ + d = rotr32(d ^ a, 16); \ + c = c + d; \ + b = rotr32(b ^ c, 12); \ + a = a + b + m[blake2s_sigma[r][2*i+1]]; \ + d = rotr32(d ^ a, 8); \ + c = c + d; \ + b = rotr32(b ^ c, 7); \ + } while(0) +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + + for( size_t i = 0; i < 8; ++i ) + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + +#undef G +#undef ROUND + return 0; +} + + +int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen ) +{ + while( inlen > 0 ) + { + size_t left = S->buflen; + size_t fill = 2 * BLAKE2S_BLOCKBYTES - left; + + if( inlen > fill ) + { + memcpy( S->buf + left, in, fill ); /* Fill buffer */ + S->buflen += fill; + blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); + blake2s_compress( S, S->buf ); /* Compress */ + memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); /* Shift buffer left */ + S->buflen -= BLAKE2S_BLOCKBYTES; + in += fill; + inlen -= fill; + } + else /* inlen <= fill */ + { + memcpy( S->buf + left, in, inlen ); + S->buflen += inlen; /* Be lazy, do not compress */ + in += inlen; + inlen -= inlen; + } + } + + return 0; +} + +int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen ) +{ + uint8_t buffer[BLAKE2S_OUTBYTES] = {0}; + + if( out == NULL || outlen == 0 || outlen > BLAKE2S_OUTBYTES ) + return -1; + + if( blake2s_is_lastblock( S ) ) + return -1; + + + if( S->buflen > BLAKE2S_BLOCKBYTES ) + { + blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); + blake2s_compress( S, S->buf ); + S->buflen -= BLAKE2S_BLOCKBYTES; + memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen ); + } + + blake2s_increment_counter( S, ( uint32_t )S->buflen ); + blake2s_set_lastblock( S ); + memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ + blake2s_compress( S, S->buf ); + + for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + store32( buffer + sizeof( S->h[i] ) * i, S->h[i] ); + + memcpy( out, buffer, outlen ); + return 0; +} + +int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) +{ + blake2s_state S[1]; + + /* Verify parameters */ + if ( NULL == in && inlen > 0 ) return -1; + + if ( NULL == out ) return -1; + + if ( NULL == key && keylen > 0) return -1; + + if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; + + if( keylen > BLAKE2S_KEYBYTES ) return -1; + + if( keylen > 0 ) + { + if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1; + } + else + { + if( blake2s_init( S, outlen ) < 0 ) return -1; + } + + blake2s_update( S, ( const uint8_t * )in, inlen ); + blake2s_final( S, out, outlen ); + return 0; +} + +#if defined(SUPERCOP) +int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) +{ + return blake2s( out, in, NULL, BLAKE2S_OUTBYTES, inlen, 0 ); +} +#endif + +#if defined(BLAKE2S_SELFTEST) +#include <string.h> +#include "blake2-kat.h" +int main( int argc, char **argv ) +{ + uint8_t key[BLAKE2S_KEYBYTES]; + uint8_t buf[KAT_LENGTH]; + + for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i ) + key[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + buf[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + { + uint8_t hash[BLAKE2S_OUTBYTES]; + blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ); + + if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) ) + { + puts( "error" ); + return -1; + } + } + + puts( "ok" ); + return 0; +} +#endif + + diff --git a/Modules/_blake2/impl/blake2s-round.h b/Modules/_blake2/impl/blake2s-round.h new file mode 100644 index 0000000..7470d92 --- /dev/null +++ b/Modules/_blake2/impl/blake2s-round.h @@ -0,0 +1,90 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2S_ROUND_H__ +#define __BLAKE2S_ROUND_H__ + +#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) ) +#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) + +#define TOF(reg) _mm_castsi128_ps((reg)) +#define TOI(reg) _mm_castps_si128((reg)) + +#define LIKELY(x) __builtin_expect((x),1) + + +/* Microarchitecture-specific macros */ +#ifndef HAVE_XOP +#ifdef HAVE_SSSE3 +#define _mm_roti_epi32(r, c) ( \ + (8==-(c)) ? _mm_shuffle_epi8(r,r8) \ + : (16==-(c)) ? _mm_shuffle_epi8(r,r16) \ + : _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) ) +#else +#define _mm_roti_epi32(r, c) _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) +#endif +#else +/* ... */ +#endif + + +#define G1(row1,row2,row3,row4,buf) \ + row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \ + row4 = _mm_xor_si128( row4, row1 ); \ + row4 = _mm_roti_epi32(row4, -16); \ + row3 = _mm_add_epi32( row3, row4 ); \ + row2 = _mm_xor_si128( row2, row3 ); \ + row2 = _mm_roti_epi32(row2, -12); + +#define G2(row1,row2,row3,row4,buf) \ + row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \ + row4 = _mm_xor_si128( row4, row1 ); \ + row4 = _mm_roti_epi32(row4, -8); \ + row3 = _mm_add_epi32( row3, row4 ); \ + row2 = _mm_xor_si128( row2, row3 ); \ + row2 = _mm_roti_epi32(row2, -7); + +#define DIAGONALIZE(row1,row2,row3,row4) \ + row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(2,1,0,3) ); \ + row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \ + row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(0,3,2,1) ); + +#define UNDIAGONALIZE(row1,row2,row3,row4) \ + row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(0,3,2,1) ); \ + row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \ + row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(2,1,0,3) ); + +#if defined(HAVE_XOP) +#include "blake2s-load-xop.h" +#elif defined(HAVE_SSE41) +#include "blake2s-load-sse41.h" +#else +#include "blake2s-load-sse2.h" +#endif + +#define ROUND(r) \ + LOAD_MSG_ ##r ##_1(buf1); \ + G1(row1,row2,row3,row4,buf1); \ + LOAD_MSG_ ##r ##_2(buf2); \ + G2(row1,row2,row3,row4,buf2); \ + DIAGONALIZE(row1,row2,row3,row4); \ + LOAD_MSG_ ##r ##_3(buf3); \ + G1(row1,row2,row3,row4,buf3); \ + LOAD_MSG_ ##r ##_4(buf4); \ + G2(row1,row2,row3,row4,buf4); \ + UNDIAGONALIZE(row1,row2,row3,row4); \ + +#endif + diff --git a/Modules/_blake2/impl/blake2s.c b/Modules/_blake2/impl/blake2s.c new file mode 100644 index 0000000..030a85e --- /dev/null +++ b/Modules/_blake2/impl/blake2s.c @@ -0,0 +1,431 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +#include <stdint.h> +#include <string.h> +#include <stdio.h> + +#include "blake2.h" +#include "blake2-impl.h" + +#include "blake2-config.h" + + +#include <emmintrin.h> +#if defined(HAVE_SSSE3) +#include <tmmintrin.h> +#endif +#if defined(HAVE_SSE41) +#include <smmintrin.h> +#endif +#if defined(HAVE_AVX) +#include <immintrin.h> +#endif +#if defined(HAVE_XOP) +#include <x86intrin.h> +#endif + +#include "blake2s-round.h" + +static const uint32_t blake2s_IV[8] = +{ + 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL +}; + +static const uint8_t blake2s_sigma[10][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , +}; + + +/* Some helper functions, not necessarily useful */ +BLAKE2_LOCAL_INLINE(int) blake2s_set_lastnode( blake2s_state *S ) +{ + S->f[1] = -1; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_clear_lastnode( blake2s_state *S ) +{ + S->f[1] = 0; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_is_lastblock( const blake2s_state *S ) +{ + return S->f[0] != 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_set_lastblock( blake2s_state *S ) +{ + if( S->last_node ) blake2s_set_lastnode( S ); + + S->f[0] = -1; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_clear_lastblock( blake2s_state *S ) +{ + if( S->last_node ) blake2s_clear_lastnode( S ); + + S->f[0] = 0; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) +{ + uint64_t t = ( ( uint64_t )S->t[1] << 32 ) | S->t[0]; + t += inc; + S->t[0] = ( uint32_t )( t >> 0 ); + S->t[1] = ( uint32_t )( t >> 32 ); + return 0; +} + + +/* Parameter-related functions */ +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length ) +{ + P->digest_length = digest_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout ) +{ + P->fanout = fanout; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth ) +{ + P->depth = depth; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length ) +{ + P->leaf_length = leaf_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset ) +{ + store48( P->node_offset, node_offset ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth ) +{ + P->node_depth = node_depth; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length ) +{ + P->inner_length = inner_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] ) +{ + memcpy( P->salt, salt, BLAKE2S_SALTBYTES ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] ) +{ + memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_init0( blake2s_state *S ) +{ + memset( S, 0, sizeof( blake2s_state ) ); + + for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; + + return 0; +} + +/* init2 xors IV with input parameter block */ +int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) +{ + /*blake2s_init0( S ); */ + const uint8_t * v = ( const uint8_t * )( blake2s_IV ); + const uint8_t * p = ( const uint8_t * )( P ); + uint8_t * h = ( uint8_t * )( S->h ); + /* IV XOR ParamBlock */ + memset( S, 0, sizeof( blake2s_state ) ); + + for( int i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; + + return 0; +} + + +/* Some sort of default parameter block initialization, for sequential blake2s */ +int blake2s_init( blake2s_state *S, const uint8_t outlen ) +{ + const blake2s_param P = + { + outlen, + 0, + 1, + 1, + 0, + {0}, + 0, + 0, + {0}, + {0} + }; + /* Move interval verification here? */ + if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; + return blake2s_init_param( S, &P ); +} + + +int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) +{ + const blake2s_param P = + { + outlen, + keylen, + 1, + 1, + 0, + {0}, + 0, + 0, + {0}, + {0} + }; + + /* Move interval verification here? */ + if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; + + if ( ( !key ) || ( !keylen ) || keylen > BLAKE2S_KEYBYTES ) return -1; + + if( blake2s_init_param( S, &P ) < 0 ) + return -1; + + { + uint8_t block[BLAKE2S_BLOCKBYTES]; + memset( block, 0, BLAKE2S_BLOCKBYTES ); + memcpy( block, key, keylen ); + blake2s_update( S, block, BLAKE2S_BLOCKBYTES ); + secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ + } + return 0; +} + + +BLAKE2_LOCAL_INLINE(int) blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) +{ + __m128i row1, row2, row3, row4; + __m128i buf1, buf2, buf3, buf4; +#if defined(HAVE_SSE41) + __m128i t0, t1; +#if !defined(HAVE_XOP) + __m128i t2; +#endif +#endif + __m128i ff0, ff1; +#if defined(HAVE_SSSE3) && !defined(HAVE_XOP) + const __m128i r8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 ); + const __m128i r16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 ); +#endif +#if defined(HAVE_SSE41) + const __m128i m0 = LOADU( block + 00 ); + const __m128i m1 = LOADU( block + 16 ); + const __m128i m2 = LOADU( block + 32 ); + const __m128i m3 = LOADU( block + 48 ); +#else + const uint32_t m0 = ( ( uint32_t * )block )[ 0]; + const uint32_t m1 = ( ( uint32_t * )block )[ 1]; + const uint32_t m2 = ( ( uint32_t * )block )[ 2]; + const uint32_t m3 = ( ( uint32_t * )block )[ 3]; + const uint32_t m4 = ( ( uint32_t * )block )[ 4]; + const uint32_t m5 = ( ( uint32_t * )block )[ 5]; + const uint32_t m6 = ( ( uint32_t * )block )[ 6]; + const uint32_t m7 = ( ( uint32_t * )block )[ 7]; + const uint32_t m8 = ( ( uint32_t * )block )[ 8]; + const uint32_t m9 = ( ( uint32_t * )block )[ 9]; + const uint32_t m10 = ( ( uint32_t * )block )[10]; + const uint32_t m11 = ( ( uint32_t * )block )[11]; + const uint32_t m12 = ( ( uint32_t * )block )[12]; + const uint32_t m13 = ( ( uint32_t * )block )[13]; + const uint32_t m14 = ( ( uint32_t * )block )[14]; + const uint32_t m15 = ( ( uint32_t * )block )[15]; +#endif + row1 = ff0 = LOADU( &S->h[0] ); + row2 = ff1 = LOADU( &S->h[4] ); + row3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A ); + row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOADU( &S->t[0] ) ); + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) ); + STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) ); + return 0; +} + +/* inlen now in bytes */ +int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen ) +{ + while( inlen > 0 ) + { + size_t left = S->buflen; + size_t fill = 2 * BLAKE2S_BLOCKBYTES - left; + + if( inlen > fill ) + { + memcpy( S->buf + left, in, fill ); /* Fill buffer */ + S->buflen += fill; + blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); + blake2s_compress( S, S->buf ); /* Compress */ + memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); /* Shift buffer left */ + S->buflen -= BLAKE2S_BLOCKBYTES; + in += fill; + inlen -= fill; + } + else /* inlen <= fill */ + { + memcpy( S->buf + left, in, inlen ); + S->buflen += inlen; /* Be lazy, do not compress */ + in += inlen; + inlen -= inlen; + } + } + + return 0; +} + +/* Is this correct? */ +int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen ) +{ + uint8_t buffer[BLAKE2S_OUTBYTES] = {0}; + + if( outlen > BLAKE2S_OUTBYTES ) + return -1; + + if( blake2s_is_lastblock( S ) ) + return -1; + + if( S->buflen > BLAKE2S_BLOCKBYTES ) + { + blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); + blake2s_compress( S, S->buf ); + S->buflen -= BLAKE2S_BLOCKBYTES; + memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen ); + } + + blake2s_increment_counter( S, ( uint32_t )S->buflen ); + blake2s_set_lastblock( S ); + memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ + blake2s_compress( S, S->buf ); + + for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + store32( buffer + sizeof( S->h[i] ) * i, S->h[i] ); + + memcpy( out, buffer, outlen ); + return 0; +} + +/* inlen, at least, should be uint64_t. Others can be size_t. */ +int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) +{ + blake2s_state S[1]; + + /* Verify parameters */ + if ( NULL == in && inlen > 0 ) return -1; + + if ( NULL == out ) return -1; + + if ( NULL == key && keylen > 0) return -1; + + if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; + + if( keylen > BLAKE2S_KEYBYTES ) return -1; + + if( keylen > 0 ) + { + if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1; + } + else + { + if( blake2s_init( S, outlen ) < 0 ) return -1; + } + + blake2s_update( S, ( const uint8_t * )in, inlen ); + blake2s_final( S, out, outlen ); + return 0; +} + +#if defined(SUPERCOP) +int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) +{ + return blake2s( out, in, NULL, BLAKE2S_OUTBYTES, inlen, 0 ); +} +#endif + +#if defined(BLAKE2S_SELFTEST) +#include <string.h> +#include "blake2-kat.h" +int main( int argc, char **argv ) +{ + uint8_t key[BLAKE2S_KEYBYTES]; + uint8_t buf[KAT_LENGTH]; + + for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i ) + key[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + buf[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + { + uint8_t hash[BLAKE2S_OUTBYTES]; + + if( blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ) < 0 || + 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) ) + { + puts( "error" ); + return -1; + } + } + + puts( "ok" ); + return 0; +} +#endif + + diff --git a/Modules/hashlib.h b/Modules/hashlib.h index 3580453..530b6b1 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -2,30 +2,33 @@ /* * Given a PyObject* obj, fill in the Py_buffer* viewp with the result - * of PyObject_GetBuffer. Sets an exception and issues a return NULL - * on any errors. + * of PyObject_GetBuffer. Sets an exception and issues the erraction + * on any errors, e.g. 'return NULL' or 'goto error'. */ -#define GET_BUFFER_VIEW_OR_ERROUT(obj, viewp) do { \ +#define GET_BUFFER_VIEW_OR_ERROR(obj, viewp, erraction) do { \ if (PyUnicode_Check((obj))) { \ PyErr_SetString(PyExc_TypeError, \ "Unicode-objects must be encoded before hashing");\ - return NULL; \ + erraction; \ } \ if (!PyObject_CheckBuffer((obj))) { \ PyErr_SetString(PyExc_TypeError, \ "object supporting the buffer API required"); \ - return NULL; \ + erraction; \ } \ if (PyObject_GetBuffer((obj), (viewp), PyBUF_SIMPLE) == -1) { \ - return NULL; \ + erraction; \ } \ if ((viewp)->ndim > 1) { \ PyErr_SetString(PyExc_BufferError, \ "Buffer must be single dimension"); \ PyBuffer_Release((viewp)); \ - return NULL; \ + erraction; \ } \ - } while(0); + } while(0) + +#define GET_BUFFER_VIEW_OR_ERROUT(obj, viewp) \ + GET_BUFFER_VIEW_OR_ERROR(obj, viewp, return NULL) /* * Helper code to synchronize access to the hash object when the GIL is |