diff options
author | Jonathan Protzenko <protz@microsoft.com> | 2024-10-17 15:08:43 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-10-17 15:08:43 (GMT) |
commit | 528bbab96feadbfabb798547e5bb2ad52070fb73 (patch) | |
tree | 8f63cbdec7d10080edf6936f9527a62401903e44 /Modules | |
parent | 0d88b995a641315306d56fba7d07479b2c5f57ef (diff) | |
download | cpython-528bbab96feadbfabb798547e5bb2ad52070fb73.zip cpython-528bbab96feadbfabb798547e5bb2ad52070fb73.tar.gz cpython-528bbab96feadbfabb798547e5bb2ad52070fb73.tar.bz2 |
GH-99108: Make vectorized versions of Blake2 available on x86, too (#125244)
Accomplished by updating HACL* vendored code from hacl-star/hacl-star@a6a09496d9cff652b567d26f2c3ab012321b632a to hacl-star/hacl-star@315a9e491d2bc347b9dae99e0ea506995ea84d9d
Co-authored-by: Victor Stinner <vstinner@python.org>
Co-authored-by: Zachary Ware <zach@python.org>
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/_hacl/Hacl_Hash_Blake2b.c | 260 | ||||
-rw-r--r-- | Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c | 225 | ||||
-rw-r--r-- | Modules/_hacl/Hacl_Hash_Blake2s.c | 231 | ||||
-rw-r--r-- | Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c | 196 | ||||
-rw-r--r-- | Modules/_hacl/include/krml/FStar_UInt128_Verified.h | 2 | ||||
-rw-r--r-- | Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h | 2 | ||||
-rw-r--r-- | Modules/_hacl/include/krml/fstar_uint128_struct_endianness.h | 2 | ||||
-rw-r--r-- | Modules/_hacl/include/krml/internal/target.h | 6 | ||||
-rw-r--r-- | Modules/_hacl/include/krml/lowstar_endianness.h | 2 | ||||
-rw-r--r-- | Modules/_hacl/libintvector.h | 2 | ||||
-rwxr-xr-x | Modules/_hacl/refresh.sh | 2 |
11 files changed, 595 insertions, 335 deletions
diff --git a/Modules/_hacl/Hacl_Hash_Blake2b.c b/Modules/_hacl/Hacl_Hash_Blake2b.c index e13f16f..cd3b977 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2b.c +++ b/Modules/_hacl/Hacl_Hash_Blake2b.c @@ -575,86 +575,6 @@ void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn) r1[3U] = iv7_; } -static void init_with_params(uint64_t *hash, Hacl_Hash_Blake2b_blake2_params p) -{ - uint64_t tmp[8U] = { 0U }; - uint64_t *r0 = hash; - uint64_t *r1 = hash + 4U; - uint64_t *r2 = hash + 8U; - uint64_t *r3 = hash + 12U; - uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; - uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; - uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; - uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; - uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; - uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; - uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; - uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; - r2[0U] = iv0; - r2[1U] = iv1; - r2[2U] = iv2; - r2[3U] = iv3; - r3[0U] = iv4; - r3[1U] = iv5; - r3[2U] = iv6; - r3[3U] = iv7; - uint8_t kk = p.key_length; - uint8_t nn = p.digest_length; - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint64_t *os = tmp + 4U; - uint8_t *bj = p.salt + i * 8U; - uint64_t u = load64_le(bj); - uint64_t r = u; - uint64_t x = r; - os[i] = x;); - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint64_t *os = tmp + 6U; - uint8_t *bj = p.personal + i * 8U; - uint64_t u = load64_le(bj); - uint64_t r = u; - uint64_t x = r; - os[i] = x;); - tmp[0U] = - (uint64_t)nn - ^ - ((uint64_t)kk - << 8U - ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); - tmp[1U] = p.node_offset; - tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U; - tmp[3U] = 0ULL; - uint64_t tmp0 = tmp[0U]; - uint64_t tmp1 = tmp[1U]; - uint64_t tmp2 = tmp[2U]; - uint64_t tmp3 = tmp[3U]; - uint64_t tmp4 = tmp[4U]; - uint64_t tmp5 = tmp[5U]; - uint64_t tmp6 = tmp[6U]; - uint64_t tmp7 = tmp[7U]; - uint64_t iv0_ = iv0 ^ tmp0; - uint64_t iv1_ = iv1 ^ tmp1; - uint64_t iv2_ = iv2 ^ tmp2; - uint64_t iv3_ = iv3 ^ tmp3; - uint64_t iv4_ = iv4 ^ tmp4; - uint64_t iv5_ = iv5 ^ tmp5; - uint64_t iv6_ = iv6 ^ tmp6; - uint64_t iv7_ = iv7 ^ tmp7; - r0[0U] = iv0_; - r0[1U] = iv1_; - r0[2U] = iv2_; - r0[3U] = iv3_; - r1[0U] = iv4_; - r1[1U] = iv5_; - r1[2U] = iv6_; - r1[3U] = iv7_; -} - static void update_key(uint64_t *wv, uint64_t *hash, uint32_t kk, uint8_t *k, uint32_t ll) { FStar_UInt128_uint128 lb = FStar_UInt128_uint64_to_uint128((uint64_t)128U); @@ -811,16 +731,92 @@ static Hacl_Hash_Blake2b_state_t uint8_t nn = p1->digest_length; bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; - uint32_t kk2 = (uint32_t)i.key_length; + uint64_t *h = block_state.f3.snd; + uint32_t kk20 = (uint32_t)i.key_length; uint8_t *k_1 = key.snd; - if (!(kk2 == 0U)) + if (!(kk20 == 0U)) { - uint8_t *sub_b = buf + kk2; - memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t)); - memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + uint8_t *sub_b = buf + kk20; + memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t)); + memcpy(buf, k_1, kk20 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p1[0U]; - init_with_params(block_state.f3.snd, pv); + uint64_t tmp[8U] = { 0U }; + uint64_t *r0 = h; + uint64_t *r1 = h + 4U; + uint64_t *r2 = h + 8U; + uint64_t *r3 = h + 12U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + uint8_t kk2 = pv.key_length; + uint8_t nn1 = pv.digest_length; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r4 = u; + uint64_t x = r4; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r4 = u; + uint64_t x = r4; + os[i0] = x;); + tmp[0U] = + (uint64_t)nn1 + ^ + ((uint64_t)kk2 + << 8U + ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + tmp[1U] = pv.node_offset; + tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; return p; } @@ -918,16 +914,92 @@ static void reset_raw(Hacl_Hash_Blake2b_state_t *state, Hacl_Hash_Blake2b_params bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; - uint32_t kk2 = (uint32_t)i1.key_length; + uint64_t *h = block_state.f3.snd; + uint32_t kk20 = (uint32_t)i1.key_length; uint8_t *k_1 = key.snd; - if (!(kk2 == 0U)) + if (!(kk20 == 0U)) { - uint8_t *sub_b = buf + kk2; - memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t)); - memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + uint8_t *sub_b = buf + kk20; + memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t)); + memcpy(buf, k_1, kk20 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p[0U]; - init_with_params(block_state.f3.snd, pv); + uint64_t tmp[8U] = { 0U }; + uint64_t *r0 = h; + uint64_t *r1 = h + 4U; + uint64_t *r2 = h + 8U; + uint64_t *r3 = h + 12U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + uint8_t kk2 = pv.key_length; + uint8_t nn1 = pv.digest_length; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i0] = x;); + tmp[0U] = + (uint64_t)nn1 + ^ + ((uint64_t)kk2 + << 8U + ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + tmp[1U] = pv.node_offset; + tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; uint8_t kk11 = i.key_length; uint32_t ite; if (kk11 != 0U) @@ -939,8 +1011,8 @@ static void reset_raw(Hacl_Hash_Blake2b_state_t *state, Hacl_Hash_Blake2b_params ite = 0U; } Hacl_Hash_Blake2b_state_t - tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; - state[0U] = tmp; + tmp8 = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + state[0U] = tmp8; } /** diff --git a/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c index 35608ae..92b2e8f 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c +++ b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c @@ -299,75 +299,6 @@ Hacl_Hash_Blake2b_Simd256_init(Lib_IntVector_Intrinsics_vec256 *hash, uint32_t k } static void -init_with_params(Lib_IntVector_Intrinsics_vec256 *hash, Hacl_Hash_Blake2b_blake2_params p) -{ - uint64_t tmp[8U] = { 0U }; - Lib_IntVector_Intrinsics_vec256 *r0 = hash; - Lib_IntVector_Intrinsics_vec256 *r1 = hash + 1U; - Lib_IntVector_Intrinsics_vec256 *r2 = hash + 2U; - Lib_IntVector_Intrinsics_vec256 *r3 = hash + 3U; - uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; - uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; - uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; - uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; - uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; - uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; - uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; - uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; - r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3); - r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7); - uint8_t kk = p.key_length; - uint8_t nn = p.digest_length; - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint64_t *os = tmp + 4U; - uint8_t *bj = p.salt + i * 8U; - uint64_t u = load64_le(bj); - uint64_t r = u; - uint64_t x = r; - os[i] = x;); - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint64_t *os = tmp + 6U; - uint8_t *bj = p.personal + i * 8U; - uint64_t u = load64_le(bj); - uint64_t r = u; - uint64_t x = r; - os[i] = x;); - tmp[0U] = - (uint64_t)nn - ^ - ((uint64_t)kk - << 8U - ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); - tmp[1U] = p.node_offset; - tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U; - tmp[3U] = 0ULL; - uint64_t tmp0 = tmp[0U]; - uint64_t tmp1 = tmp[1U]; - uint64_t tmp2 = tmp[2U]; - uint64_t tmp3 = tmp[3U]; - uint64_t tmp4 = tmp[4U]; - uint64_t tmp5 = tmp[5U]; - uint64_t tmp6 = tmp[6U]; - uint64_t tmp7 = tmp[7U]; - uint64_t iv0_ = iv0 ^ tmp0; - uint64_t iv1_ = iv1 ^ tmp1; - uint64_t iv2_ = iv2 ^ tmp2; - uint64_t iv3_ = iv3 ^ tmp3; - uint64_t iv4_ = iv4 ^ tmp4; - uint64_t iv5_ = iv5 ^ tmp5; - uint64_t iv6_ = iv6 ^ tmp6; - uint64_t iv7_ = iv7 ^ tmp7; - r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_); - r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_); -} - -static void update_key( Lib_IntVector_Intrinsics_vec256 *wv, Lib_IntVector_Intrinsics_vec256 *hash, @@ -647,16 +578,80 @@ static Hacl_Hash_Blake2b_Simd256_state_t uint8_t nn = p1->digest_length; bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; - uint32_t kk2 = (uint32_t)i.key_length; + Lib_IntVector_Intrinsics_vec256 *h = block_state.f3.snd; + uint32_t kk20 = (uint32_t)i.key_length; uint8_t *k_1 = key.snd; - if (!(kk2 == 0U)) + if (!(kk20 == 0U)) { - uint8_t *sub_b = buf + kk2; - memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t)); - memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + uint8_t *sub_b = buf + kk20; + memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t)); + memcpy(buf, k_1, kk20 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p1[0U]; - init_with_params(block_state.f3.snd, pv); + uint64_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec256 *r0 = h; + Lib_IntVector_Intrinsics_vec256 *r1 = h + 1U; + Lib_IntVector_Intrinsics_vec256 *r2 = h + 2U; + Lib_IntVector_Intrinsics_vec256 *r3 = h + 3U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7); + uint8_t kk2 = pv.key_length; + uint8_t nn1 = pv.digest_length; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r4 = u; + uint64_t x = r4; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r4 = u; + uint64_t x = r4; + os[i0] = x;); + tmp[0U] = + (uint64_t)nn1 + ^ + ((uint64_t)kk2 + << 8U + ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + tmp[1U] = pv.node_offset; + tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_); return p; } @@ -757,16 +752,80 @@ reset_raw(Hacl_Hash_Blake2b_Simd256_state_t *state, Hacl_Hash_Blake2b_params_and bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; - uint32_t kk2 = (uint32_t)i1.key_length; + Lib_IntVector_Intrinsics_vec256 *h = block_state.f3.snd; + uint32_t kk20 = (uint32_t)i1.key_length; uint8_t *k_1 = key.snd; - if (!(kk2 == 0U)) + if (!(kk20 == 0U)) { - uint8_t *sub_b = buf + kk2; - memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t)); - memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + uint8_t *sub_b = buf + kk20; + memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t)); + memcpy(buf, k_1, kk20 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p[0U]; - init_with_params(block_state.f3.snd, pv); + uint64_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec256 *r0 = h; + Lib_IntVector_Intrinsics_vec256 *r1 = h + 1U; + Lib_IntVector_Intrinsics_vec256 *r2 = h + 2U; + Lib_IntVector_Intrinsics_vec256 *r3 = h + 3U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7); + uint8_t kk2 = pv.key_length; + uint8_t nn1 = pv.digest_length; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i0] = x;); + tmp[0U] = + (uint64_t)nn1 + ^ + ((uint64_t)kk2 + << 8U + ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + tmp[1U] = pv.node_offset; + tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_); uint8_t kk11 = i.key_length; uint32_t ite; if (kk11 != 0U) @@ -778,8 +837,8 @@ reset_raw(Hacl_Hash_Blake2b_Simd256_state_t *state, Hacl_Hash_Blake2b_params_and ite = 0U; } Hacl_Hash_Blake2b_Simd256_state_t - tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; - state[0U] = tmp; + tmp8 = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + state[0U] = tmp8; } /** diff --git a/Modules/_hacl/Hacl_Hash_Blake2s.c b/Modules/_hacl/Hacl_Hash_Blake2s.c index 167f38f..e5e0ecd 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2s.c +++ b/Modules/_hacl/Hacl_Hash_Blake2s.c @@ -573,83 +573,6 @@ void Hacl_Hash_Blake2s_init(uint32_t *hash, uint32_t kk, uint32_t nn) r1[3U] = iv7_; } -static void init_with_params(uint32_t *hash, Hacl_Hash_Blake2b_blake2_params p) -{ - uint32_t tmp[8U] = { 0U }; - uint32_t *r0 = hash; - uint32_t *r1 = hash + 4U; - uint32_t *r2 = hash + 8U; - uint32_t *r3 = hash + 12U; - uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; - uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; - uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; - uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; - uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; - uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; - uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; - uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; - r2[0U] = iv0; - r2[1U] = iv1; - r2[2U] = iv2; - r2[3U] = iv3; - r3[0U] = iv4; - r3[1U] = iv5; - r3[2U] = iv6; - r3[3U] = iv7; - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint32_t *os = tmp + 4U; - uint8_t *bj = p.salt + i * 4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x;); - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint32_t *os = tmp + 6U; - uint8_t *bj = p.personal + i * 4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x;); - tmp[0U] = - (uint32_t)p.digest_length - ^ ((uint32_t)p.key_length << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); - tmp[1U] = p.leaf_length; - tmp[2U] = (uint32_t)p.node_offset; - tmp[3U] = - (uint32_t)(p.node_offset >> 32U) - ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); - uint32_t tmp0 = tmp[0U]; - uint32_t tmp1 = tmp[1U]; - uint32_t tmp2 = tmp[2U]; - uint32_t tmp3 = tmp[3U]; - uint32_t tmp4 = tmp[4U]; - uint32_t tmp5 = tmp[5U]; - uint32_t tmp6 = tmp[6U]; - uint32_t tmp7 = tmp[7U]; - uint32_t iv0_ = iv0 ^ tmp0; - uint32_t iv1_ = iv1 ^ tmp1; - uint32_t iv2_ = iv2 ^ tmp2; - uint32_t iv3_ = iv3 ^ tmp3; - uint32_t iv4_ = iv4 ^ tmp4; - uint32_t iv5_ = iv5 ^ tmp5; - uint32_t iv6_ = iv6 ^ tmp6; - uint32_t iv7_ = iv7 ^ tmp7; - r0[0U] = iv0_; - r0[1U] = iv1_; - r0[2U] = iv2_; - r0[3U] = iv3_; - r1[0U] = iv4_; - r1[1U] = iv5_; - r1[2U] = iv6_; - r1[3U] = iv7_; -} - static void update_key(uint32_t *wv, uint32_t *hash, uint32_t kk, uint8_t *k, uint32_t ll) { uint64_t lb = (uint64_t)64U; @@ -796,6 +719,7 @@ static Hacl_Hash_Blake2s_state_t uint8_t nn = p1->digest_length; bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint32_t *h = block_state.f3.snd; uint32_t kk2 = (uint32_t)i.key_length; uint8_t *k_1 = key.snd; if (!(kk2 == 0U)) @@ -805,7 +729,79 @@ static Hacl_Hash_Blake2s_state_t memcpy(buf, k_1, kk2 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p1[0U]; - init_with_params(block_state.f3.snd, pv); + uint32_t tmp[8U] = { 0U }; + uint32_t *r0 = h; + uint32_t *r1 = h + 4U; + uint32_t *r2 = h + 8U; + uint32_t *r3 = h + 12U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r4 = u; + uint32_t x = r4; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r4 = u; + uint32_t x = r4; + os[i0] = x;); + tmp[0U] = + (uint32_t)pv.digest_length + ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + tmp[1U] = pv.leaf_length; + tmp[2U] = (uint32_t)pv.node_offset; + tmp[3U] = + (uint32_t)(pv.node_offset >> 32U) + ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; return p; } @@ -903,6 +899,7 @@ static void reset_raw(Hacl_Hash_Blake2s_state_t *state, Hacl_Hash_Blake2b_params bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint32_t *h = block_state.f3.snd; uint32_t kk2 = (uint32_t)i1.key_length; uint8_t *k_1 = key.snd; if (!(kk2 == 0U)) @@ -912,7 +909,79 @@ static void reset_raw(Hacl_Hash_Blake2s_state_t *state, Hacl_Hash_Blake2b_params memcpy(buf, k_1, kk2 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p[0U]; - init_with_params(block_state.f3.snd, pv); + uint32_t tmp[8U] = { 0U }; + uint32_t *r0 = h; + uint32_t *r1 = h + 4U; + uint32_t *r2 = h + 8U; + uint32_t *r3 = h + 12U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i0] = x;); + tmp[0U] = + (uint32_t)pv.digest_length + ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + tmp[1U] = pv.leaf_length; + tmp[2U] = (uint32_t)pv.node_offset; + tmp[3U] = + (uint32_t)(pv.node_offset >> 32U) + ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; uint8_t kk11 = i.key_length; uint32_t ite; if (kk11 != 0U) @@ -924,8 +993,8 @@ static void reset_raw(Hacl_Hash_Blake2s_state_t *state, Hacl_Hash_Blake2b_params ite = 0U; } Hacl_Hash_Blake2s_state_t - tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; - state[0U] = tmp; + tmp8 = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + state[0U] = tmp8; } /** diff --git a/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c index a85b18a..f675a7f 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c +++ b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c @@ -296,72 +296,6 @@ Hacl_Hash_Blake2s_Simd128_init(Lib_IntVector_Intrinsics_vec128 *hash, uint32_t k } static void -init_with_params(Lib_IntVector_Intrinsics_vec128 *hash, Hacl_Hash_Blake2b_blake2_params p) -{ - uint32_t tmp[8U] = { 0U }; - Lib_IntVector_Intrinsics_vec128 *r0 = hash; - Lib_IntVector_Intrinsics_vec128 *r1 = hash + 1U; - Lib_IntVector_Intrinsics_vec128 *r2 = hash + 2U; - Lib_IntVector_Intrinsics_vec128 *r3 = hash + 3U; - uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; - uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; - uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; - uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; - uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; - uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; - uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; - uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; - r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3); - r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7); - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint32_t *os = tmp + 4U; - uint8_t *bj = p.salt + i * 4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x;); - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint32_t *os = tmp + 6U; - uint8_t *bj = p.personal + i * 4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x;); - tmp[0U] = - (uint32_t)p.digest_length - ^ ((uint32_t)p.key_length << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); - tmp[1U] = p.leaf_length; - tmp[2U] = (uint32_t)p.node_offset; - tmp[3U] = - (uint32_t)(p.node_offset >> 32U) - ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); - uint32_t tmp0 = tmp[0U]; - uint32_t tmp1 = tmp[1U]; - uint32_t tmp2 = tmp[2U]; - uint32_t tmp3 = tmp[3U]; - uint32_t tmp4 = tmp[4U]; - uint32_t tmp5 = tmp[5U]; - uint32_t tmp6 = tmp[6U]; - uint32_t tmp7 = tmp[7U]; - uint32_t iv0_ = iv0 ^ tmp0; - uint32_t iv1_ = iv1 ^ tmp1; - uint32_t iv2_ = iv2 ^ tmp2; - uint32_t iv3_ = iv3 ^ tmp3; - uint32_t iv4_ = iv4 ^ tmp4; - uint32_t iv5_ = iv5 ^ tmp5; - uint32_t iv6_ = iv6 ^ tmp6; - uint32_t iv7_ = iv7 ^ tmp7; - r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_); - r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_); -} - -static void update_key( Lib_IntVector_Intrinsics_vec128 *wv, Lib_IntVector_Intrinsics_vec128 *hash, @@ -637,6 +571,7 @@ static Hacl_Hash_Blake2s_Simd128_state_t uint8_t nn = p1->digest_length; bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + Lib_IntVector_Intrinsics_vec128 *h = block_state.f3.snd; uint32_t kk2 = (uint32_t)i.key_length; uint8_t *k_1 = key.snd; if (!(kk2 == 0U)) @@ -646,7 +581,67 @@ static Hacl_Hash_Blake2s_Simd128_state_t memcpy(buf, k_1, kk2 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p1[0U]; - init_with_params(block_state.f3.snd, pv); + uint32_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec128 *r0 = h; + Lib_IntVector_Intrinsics_vec128 *r1 = h + 1U; + Lib_IntVector_Intrinsics_vec128 *r2 = h + 2U; + Lib_IntVector_Intrinsics_vec128 *r3 = h + 3U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r4 = u; + uint32_t x = r4; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r4 = u; + uint32_t x = r4; + os[i0] = x;); + tmp[0U] = + (uint32_t)pv.digest_length + ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + tmp[1U] = pv.leaf_length; + tmp[2U] = (uint32_t)pv.node_offset; + tmp[3U] = + (uint32_t)(pv.node_offset >> 32U) + ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_); return p; } @@ -747,6 +742,7 @@ reset_raw(Hacl_Hash_Blake2s_Simd128_state_t *state, Hacl_Hash_Blake2b_params_and bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + Lib_IntVector_Intrinsics_vec128 *h = block_state.f3.snd; uint32_t kk2 = (uint32_t)i1.key_length; uint8_t *k_1 = key.snd; if (!(kk2 == 0U)) @@ -756,7 +752,67 @@ reset_raw(Hacl_Hash_Blake2s_Simd128_state_t *state, Hacl_Hash_Blake2b_params_and memcpy(buf, k_1, kk2 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p[0U]; - init_with_params(block_state.f3.snd, pv); + uint32_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec128 *r0 = h; + Lib_IntVector_Intrinsics_vec128 *r1 = h + 1U; + Lib_IntVector_Intrinsics_vec128 *r2 = h + 2U; + Lib_IntVector_Intrinsics_vec128 *r3 = h + 3U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i0] = x;); + tmp[0U] = + (uint32_t)pv.digest_length + ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + tmp[1U] = pv.leaf_length; + tmp[2U] = (uint32_t)pv.node_offset; + tmp[3U] = + (uint32_t)(pv.node_offset >> 32U) + ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_); uint8_t kk11 = i.key_length; uint32_t ite; if (kk11 != 0U) @@ -768,8 +824,8 @@ reset_raw(Hacl_Hash_Blake2s_Simd128_state_t *state, Hacl_Hash_Blake2b_params_and ite = 0U; } Hacl_Hash_Blake2s_Simd128_state_t - tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; - state[0U] = tmp; + tmp8 = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + state[0U] = tmp8; } /** diff --git a/Modules/_hacl/include/krml/FStar_UInt128_Verified.h b/Modules/_hacl/include/krml/FStar_UInt128_Verified.h index bdf2589..659745b 100644 --- a/Modules/_hacl/include/krml/FStar_UInt128_Verified.h +++ b/Modules/_hacl/include/krml/FStar_UInt128_Verified.h @@ -1,6 +1,6 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. - Licensed under the Apache 2.0 License. + Licensed under the Apache 2.0 and MIT Licenses. */ diff --git a/Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h b/Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h index 1bdec97..68bac0b 100644 --- a/Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h +++ b/Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h @@ -1,6 +1,6 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. - Licensed under the Apache 2.0 License. + Licensed under the Apache 2.0 and MIT Licenses. */ diff --git a/Modules/_hacl/include/krml/fstar_uint128_struct_endianness.h b/Modules/_hacl/include/krml/fstar_uint128_struct_endianness.h index e2b6d62..bb736ad 100644 --- a/Modules/_hacl/include/krml/fstar_uint128_struct_endianness.h +++ b/Modules/_hacl/include/krml/fstar_uint128_struct_endianness.h @@ -1,5 +1,5 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. - Licensed under the Apache 2.0 License. */ + Licensed under the Apache 2.0 and MIT Licenses. */ #ifndef FSTAR_UINT128_STRUCT_ENDIANNESS_H #define FSTAR_UINT128_STRUCT_ENDIANNESS_H diff --git a/Modules/_hacl/include/krml/internal/target.h b/Modules/_hacl/include/krml/internal/target.h index 292adc1..fd74d3d 100644 --- a/Modules/_hacl/include/krml/internal/target.h +++ b/Modules/_hacl/include/krml/internal/target.h @@ -1,5 +1,5 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. - Licensed under the Apache 2.0 License. */ + Licensed under the Apache 2.0 and MIT Licenses. */ #ifndef __KRML_TARGET_H #define __KRML_TARGET_H @@ -82,6 +82,8 @@ # define KRML_NOINLINE __declspec(noinline) # elif defined (__GNUC__) # define KRML_NOINLINE __attribute__((noinline,unused)) +# elif defined (__SUNPRO_C) +# define KRML_NOINLINE __attribute__((noinline)) # else # define KRML_NOINLINE # warning "The KRML_NOINLINE macro is not defined for this toolchain!" @@ -95,6 +97,8 @@ # define KRML_MUSTINLINE inline __forceinline # elif defined (__GNUC__) # define KRML_MUSTINLINE inline __attribute__((always_inline)) +# elif defined (__SUNPRO_C) +# define KRML_MUSTINLINE inline __attribute__((always_inline)) # else # define KRML_MUSTINLINE inline # warning "The KRML_MUSTINLINE macro defaults to plain inline for this toolchain!" diff --git a/Modules/_hacl/include/krml/lowstar_endianness.h b/Modules/_hacl/include/krml/lowstar_endianness.h index 1aa2ccd..af6b882 100644 --- a/Modules/_hacl/include/krml/lowstar_endianness.h +++ b/Modules/_hacl/include/krml/lowstar_endianness.h @@ -1,5 +1,5 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. - Licensed under the Apache 2.0 License. */ + Licensed under the Apache 2.0 and MIT Licenses. */ #ifndef __LOWSTAR_ENDIANNESS_H #define __LOWSTAR_ENDIANNESS_H diff --git a/Modules/_hacl/libintvector.h b/Modules/_hacl/libintvector.h index 99d1133..11e914f 100644 --- a/Modules/_hacl/libintvector.h +++ b/Modules/_hacl/libintvector.h @@ -19,7 +19,7 @@ #define Lib_IntVector_Intrinsics_bit_mask64(x) -((x) & 1) -#if defined(__x86_64__) || defined(_M_X64) +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) #if defined(HACL_CAN_COMPILE_VEC128) diff --git a/Modules/_hacl/refresh.sh b/Modules/_hacl/refresh.sh index 44e18a1..6234fea 100755 --- a/Modules/_hacl/refresh.sh +++ b/Modules/_hacl/refresh.sh @@ -22,7 +22,7 @@ fi # Update this when updating to a new version after verifying that the changes # the update brings in are good. -expected_hacl_star_rev=a6a09496d9cff652b567d26f2c3ab012321b632a +expected_hacl_star_rev=315a9e491d2bc347b9dae99e0ea506995ea84d9d hacl_dir="$(realpath "$1")" cd "$(dirname "$0")" |