From 3371742d975779ed342468831606b52047c0dfc6 Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Mon, 27 May 2024 00:45:49 -0400 Subject: sha3.c: permute_n_avx2(): rho: use ROLI instead of ROLV for hi --- sha3.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/sha3.c b/sha3.c index c65fe13..b582cc2 100644 --- a/sha3.c +++ b/sha3.c @@ -673,19 +673,18 @@ static inline void permute_n_avx2(uint64_t s[static 25], const size_t num_rounds // rho { // rotate values - static const __m256i V0_LO = { 0, 1, 62, 28 }, V0_HI = { 27 }, - V1_LO = { 36, 44, 6, 55 }, V1_HI = { 20 }, - V2_LO = { 3, 10, 43, 25 }, V2_HI = { 39 }, - V3_LO = { 41, 45, 15, 21 }, V3_HI = { 8 }, - V4_LO = { 18, 2, 61, 56 }, V4_HI = { 14 }; + static const __m256i V0_LO = { 0, 1, 62, 28 }, + V1_LO = { 36, 44, 6, 55 }, + V2_LO = { 3, 10, 43, 25 }, + V3_LO = { 41, 45, 15, 21 }, + V4_LO = { 18, 2, 61, 56 }; // rotate rows - // FIXME: could reduce rotates by permuting - r0_lo = AVX2_ROLV(r0_lo, V0_LO); r0_hi = AVX2_ROLV(r0_hi, V0_HI); - r1_lo = AVX2_ROLV(r1_lo, V1_LO); r1_hi = AVX2_ROLV(r1_hi, V1_HI); - r2_lo = AVX2_ROLV(r2_lo, V2_LO); r2_hi = AVX2_ROLV(r2_hi, V2_HI); - r3_lo = AVX2_ROLV(r3_lo, V3_LO); r3_hi = AVX2_ROLV(r3_hi, V3_HI); - r4_lo = AVX2_ROLV(r4_lo, V4_LO); r4_hi = AVX2_ROLV(r4_hi, V4_HI); + r0_lo = AVX2_ROLV(r0_lo, V0_LO); r0_hi = AVX2_ROLI(r0_hi, 27); + r1_lo = AVX2_ROLV(r1_lo, V1_LO); r1_hi = AVX2_ROLI(r1_hi, 20); + r2_lo = AVX2_ROLV(r2_lo, V2_LO); r2_hi = AVX2_ROLI(r2_hi, 39); + r3_lo = AVX2_ROLV(r3_lo, V3_LO); r3_hi = AVX2_ROLI(r3_hi, 8); + r4_lo = AVX2_ROLV(r4_lo, V4_LO); r4_hi = AVX2_ROLI(r4_hi, 14); } // pi -- cgit v1.2.3