From 77d301f602167df1df05a86d7da1c961ff2f4f1a Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Mon, 27 May 2024 01:09:02 -0400 Subject: sha3.c: permute_n_avx2(): replace two instances of permute4x64_epi64() with shuffle_epi32() --- sha3.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sha3.c b/sha3.c index b582cc2..cedd670 100644 --- a/sha3.c +++ b/sha3.c @@ -581,7 +581,6 @@ static const __m256i LM0 = { ~0, 0, 0, 0 }, // only lane 0 #define PI_T3_LO 0x90 // 0b10010000 -> 0x90 #define PI_T3_HI 0x03 #define PI_T4_LO 0x0e // 0b00001110 -> 0x0e -#define PI_T4_HI 0x01 // chi blend mask #define CHI_MASK 0xc0 // 0b11000000 @@ -589,14 +588,13 @@ static const __m256i LM0 = { ~0, 0, 0, 0 }, // only lane 0 // chi permute IDs #define CHI_I0_LO 0x39 // 1, 2, 3, 0 -> 0b00111001 -> 0x39 #define CHI_I1_LO 0x0e // 2, 3, 0, 0 -> 0b00001110 -> 0x0e -#define CHI_I1_HI 0x01 // 1, 0, 0, 0 -> 0b00000001 -> 0x01 // chi step #define CHI(LO, HI) do { \ const __m256i a_lo = _mm256_blend_epi32(_mm256_permute4x64_epi64(LO, CHI_I0_LO), _mm256_permute4x64_epi64(HI, CHI_I0_LO), CHI_MASK), \ a_hi = LO, \ b_lo = (_mm256_permute4x64_epi64(LO, CHI_I1_LO) & ~LM2) | (_mm256_permute4x64_epi64(HI, CHI_I1_LO) & ~LM0), \ - b_hi = _mm256_permute4x64_epi64(LO, CHI_I1_HI); \ + b_hi = _mm256_shuffle_epi32(LO, 0x0e); \ \ LO ^= _mm256_andnot_si256(a_lo, b_lo); HI ^= _mm256_andnot_si256(a_hi, b_hi); \ } while (0) @@ -713,7 +711,7 @@ static inline void permute_n_avx2(uint64_t s[static 25], const size_t num_rounds (_mm256_permute4x64_epi64(r1_lo, PI_T4_LO) & LM1) | (_mm256_permute4x64_epi64(r2_hi, PI_T4_LO) & LM2) | (_mm256_permute4x64_epi64(r3_lo, PI_T4_LO) & LM3), - t4_hi = _mm256_permute4x64_epi64(r4_lo, PI_T4_HI); + t4_hi = _mm256_shuffle_epi32(r4_lo, 0x0e); r0_lo = t0_lo; r0_hi = t0_hi & LM0; r1_lo = t1_lo; r1_hi = t1_hi & LM0; -- cgit v1.2.3