aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2024-04-29 12:06:36 -0400
committerPaul Duncan <pabs@pablotron.org>2024-04-29 12:06:36 -0400
commit4f2e3ab022e887e33aff5e2dccb8e6dc7074cbcf (patch)
tree9b125aec8c31082e4bb5564e15abba05ae1c51b5
parent6f345c96eead1ccaca7a41ded0da814e88f33a2a (diff)
downloadsha3-4f2e3ab022e887e33aff5e2dccb8e6dc7074cbcf.tar.bz2
sha3-4f2e3ab022e887e33aff5e2dccb8e6dc7074cbcf.zip
sha3.c: build permute12_scalar() during testing, remove old comment
-rw-r--r--sha3.c8
1 files changed, 1 insertions, 7 deletions
diff --git a/sha3.c b/sha3.c
index 3e8a28c..887c5c2 100644
--- a/sha3.c
+++ b/sha3.c
@@ -177,9 +177,7 @@ static inline void chi(uint64_t dst[static 25], const uint64_t src[static 25]) {
static inline void iota(uint64_t a[static 25], const int i) {
a[0] ^= RCS[i];
}
-#endif /* !defined(__AVX512F__) || defined(SHA3_TEST) */
-#ifndef __AVX512F__
// 24-round keccak permutation (scalar implementation)
static inline void permute_scalar(uint64_t a[static 25]) {
uint64_t tmp[25] = { 0 };
@@ -204,17 +202,13 @@ static inline void permute12_scalar(uint64_t a[static 25]) {
iota(a, 12 + i);
}
}
-#endif /* !__AVX512F__ */
+#endif /* !defined(__AVX512F__) || defined(SHA3_TEST) */
#ifdef __AVX512F__
#include <immintrin.h>
// 24 round keccak permutation (avx512 implementation).
//
-// copied from `permute_avx512_fast()` in `tests/permute/permute.c`. all
-// steps are inlined as blocks. ~3x faster than scalar implementation,
-// but could be sped up more.
-//
// how it operates (roughly):
//
// 1. load rows from state `s` into avx512 registers r0-r4, like so: