From 22c3a095a29a860d1672a542919181ea31faf9e6 Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Fri, 17 May 2024 01:44:41 -0400 Subject: sha3.c: permute_n_scalar(): simplify num_rounds logic bench results (samish across the board) ------------- x86-64-scalar-gcc before: > make clean all BACKEND=1 && perf record ./bench 10000 ... info: cpucycles: version=20240318 implementation=amd64-pmc persecond=4800000000 info: backend=scalar num_trials=10000 src_lens=64,256,1024,4096,16384 dst_lens=32 function,dst_len,64,256,1024,4096,16384 sha3_224,28,19.5,10.0,10.0,9.0,8.8 sha3_256,32,19.5,10.0,9.9,9.5,9.3 sha3_384,48,20.1,14.7,12.3,12.2,12.0 sha3_512,64,19.5,19.5,18.2,17.2,17.1 shake128,32,19.6,10.0,8.7,7.8,7.6 shake256,32,19.6,10.1,10.0,9.6,9.3 x86-64-scalar-clang before: > make clean all CC=clang BACKEND=1 && perf record ./bench 10000 ... info: cpucycles: version=20240318 implementation=amd64-pmc persecond=4800000000 info: backend=scalar num_trials=10000 src_lens=64,256,1024,4096,16384 dst_lens=32 function,dst_len,64,256,1024,4096,16384 sha3_224,28,19.7,9.9,9.7,8.8,8.6 sha3_256,32,19.7,9.8,9.7,9.4,9.1 sha3_384,48,19.7,14.5,12.0,11.9,11.8 sha3_512,64,19.7,19.1,17.7,16.8,16.8 shake128,32,20.4,10.0,8.7,7.7,7.5 shake256,32,20.3,10.0,9.8,9.4,9.1 x86-64-scalar-gcc after: > make clean all BACKEND=1 && perf record ./bench 10000 ... info: cpucycles: version=20240318 implementation=amd64-pmc persecond=4800000000 info: backend=scalar num_trials=10000 src_lens=64,256,1024,4096,16384 dst_lens=32 function,dst_len,64,256,1024,4096,16384 sha3_224,28,19.5,10.0,9.9,9.0,8.8 sha3_256,32,19.5,10.0,9.9,9.5,9.3 sha3_384,48,19.5,14.7,12.3,12.2,12.0 sha3_512,64,19.7,19.5,18.2,17.2,17.1 shake128,32,19.6,10.0,8.7,7.8,7.6 shake256,32,19.6,10.1,10.0,9.6,9.3 x86-64-scalar-gcc after: > make clean all CC=clang BACKEND=1 && perf record ./bench 10000 ... info: cpucycles: version=20240318 implementation=amd64-pmc persecond=4800000000 info: backend=scalar num_trials=10000 src_lens=64,256,1024,4096,16384 dst_lens=32 function,dst_len,64,256,1024,4096,16384 sha3_224,28,19.7,9.9,9.8,8.8,8.7 sha3_256,32,19.6,9.9,9.8,9.4,9.2 sha3_384,48,19.7,14.6,12.1,12.0,11.9 sha3_512,64,19.7,19.3,17.9,16.9,16.9 shake128,32,19.7,9.9,8.7,7.7,7.5 shake256,32,19.7,9.9,9.8,9.5,9.2 a76-scalar-gcc before: > make clean all BACKEND=1 && ./bench 5000 info: cpucycles: version=20240318 implementation=arm64-vct persecond=2400000000 info: backend=scalar num_trials=5000 src_lens=64,256,1024,4096,16384 dst_lens=32 function,dst_len,64,256,1024,4096,16384 sha3_224,28,20.2,10.1,10.2,9.3,9.2 sha3_256,32,20.2,10.3,10.3,10.0,9.7 sha3_384,48,20.9,15.3,12.8,12.7,12.5 sha3_512,64,20.9,20.3,18.9,18.0,17.9 shake128,32,20.2,10.3,9.0,8.1,7.9 shake256,32,20.2,10.1,10.3,9.9,9.7 a76-scalar-clang before: > make clean all CC=clang BACKEND=1 && ./bench 5000 info: cpucycles: version=20240318 implementation=arm64-vct persecond=2400000000 info: backend=scalar num_trials=5000 src_lens=64,256,1024,4096,16384 dst_lens=32 function,dst_len,64,256,1024,4096,16384 sha3_224,28,18.8,9.6,9.5,8.7,8.5 sha3_256,32,18.8,9.6,9.5,9.2,9.0 sha3_384,48,18.8,14.1,11.8,11.8,11.6 sha3_512,64,18.8,18.6,17.5,16.6,16.6 shake128,32,18.8,9.6,8.4,7.5,7.4 shake256,32,18.8,9.6,9.6,9.2,9.0 a76-scalar-gcc after: > make clean all BACKEND=1 && ./bench 5000 info: cpucycles: version=20240318 implementation=arm64-vct persecond=2400000000 info: backend=scalar num_trials=5000 src_lens=64,256,1024,4096,16384 dst_lens=32 function,dst_len,64,256,1024,4096,16384 sha3_224,28,20.2,10.1,10.3,9.3,9.2 sha3_256,32,20.2,10.3,10.3,10.0,9.7 sha3_384,48,20.9,15.3,12.8,12.7,12.5 sha3_512,64,20.9,20.3,18.9,18.0,17.9 shake128,32,20.2,10.3,9.0,8.1,7.9 shake256,32,20.2,10.3,10.3,9.9,9.7 a76-scalar-clang after: > make clean all CC=clang BACKEND=1 && ./bench 5000 info: cpucycles: version=20240318 implementation=arm64-vct persecond=2400000000 info: backend=scalar num_trials=5000 src_lens=64,256,1024,4096,16384 dst_lens=32 function,dst_len,64,256,1024,4096,16384 sha3_224,28,18.8,9.4,9.3,8.5,8.3 sha3_256,32,18.8,9.4,9.3,9.0,8.8 sha3_384,48,18.8,13.9,11.6,11.6,11.4 sha3_512,64,18.8,18.3,17.1,16.3,16.3 shake128,32,18.8,9.4,8.3,7.4,7.2 shake256,32,18.8,9.4,9.4,9.1,8.9 --- sha3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sha3.c b/sha3.c index 14d518e..b846719 100644 --- a/sha3.c +++ b/sha3.c @@ -290,12 +290,12 @@ static inline void iota(uint64_t a[static 25], const int i) { */ static inline void permute_n_scalar(uint64_t a[static 25], const size_t num_rounds) { uint64_t tmp[25] = { 0 }; - for (size_t i = 0; i < num_rounds; i++) { + for (size_t i = (SHA3_NUM_ROUNDS - num_rounds); __builtin_expect(i < SHA3_NUM_ROUNDS, 1); i++) { theta(a); rho(a); pi(tmp, a); chi(a, tmp); - iota(a, (SHA3_NUM_ROUNDS - num_rounds + i)); + iota(a, i); } } #endif /* (BACKEND == BACKEND_SCALAR) || defined(TEST_SHA3) */ -- cgit v1.2.3