diff options
-rw-r--r-- | sha3.c | 6 |
1 files changed, 2 insertions, 4 deletions
@@ -581,7 +581,6 @@ static const __m256i LM0 = { ~0, 0, 0, 0 }, // only lane 0 #define PI_T3_LO 0x90 // 0b10010000 -> 0x90 #define PI_T3_HI 0x03 #define PI_T4_LO 0x0e // 0b00001110 -> 0x0e -#define PI_T4_HI 0x01 // chi blend mask #define CHI_MASK 0xc0 // 0b11000000 @@ -589,14 +588,13 @@ static const __m256i LM0 = { ~0, 0, 0, 0 }, // only lane 0 // chi permute IDs #define CHI_I0_LO 0x39 // 1, 2, 3, 0 -> 0b00111001 -> 0x39 #define CHI_I1_LO 0x0e // 2, 3, 0, 0 -> 0b00001110 -> 0x0e -#define CHI_I1_HI 0x01 // 1, 0, 0, 0 -> 0b00000001 -> 0x01 // chi step #define CHI(LO, HI) do { \ const __m256i a_lo = _mm256_blend_epi32(_mm256_permute4x64_epi64(LO, CHI_I0_LO), _mm256_permute4x64_epi64(HI, CHI_I0_LO), CHI_MASK), \ a_hi = LO, \ b_lo = (_mm256_permute4x64_epi64(LO, CHI_I1_LO) & ~LM2) | (_mm256_permute4x64_epi64(HI, CHI_I1_LO) & ~LM0), \ - b_hi = _mm256_permute4x64_epi64(LO, CHI_I1_HI); \ + b_hi = _mm256_shuffle_epi32(LO, 0x0e); \ \ LO ^= _mm256_andnot_si256(a_lo, b_lo); HI ^= _mm256_andnot_si256(a_hi, b_hi); \ } while (0) @@ -713,7 +711,7 @@ static inline void permute_n_avx2(uint64_t s[static 25], const size_t num_rounds (_mm256_permute4x64_epi64(r1_lo, PI_T4_LO) & LM1) | (_mm256_permute4x64_epi64(r2_hi, PI_T4_LO) & LM2) | (_mm256_permute4x64_epi64(r3_lo, PI_T4_LO) & LM3), - t4_hi = _mm256_permute4x64_epi64(r4_lo, PI_T4_HI); + t4_hi = _mm256_shuffle_epi32(r4_lo, 0x0e); r0_lo = t0_lo; r0_hi = t0_hi & LM0; r1_lo = t1_lo; r1_hi = t1_hi & LM0; |