diff options
-rw-r--r-- | Makefile | 6 | ||||
-rw-r--r-- | README.md | 14 | ||||
-rw-r--r-- | sha3.c | 96 | ||||
-rw-r--r-- | tests/bench/Makefile | 4 | ||||
-rw-r--r-- | tests/bench/README.md | 12 |
5 files changed, 66 insertions, 66 deletions
@@ -1,8 +1,8 @@ # backend (0 to auto-detect) -SHA3_BACKEND ?= 0 +BACKEND ?= 0 # compiler flags used for sample application and shared library -CFLAGS=-W -Wall -Wextra -Werror -pedantic -std=c11 -fPIC -O3 -march=native -mtune=native -DSHA3_BACKEND=$(SHA3_BACKEND) +CFLAGS=-W -Wall -Wextra -Werror -pedantic -std=c11 -fPIC -O3 -march=native -mtune=native -DBACKEND=$(BACKEND) # sample application APP=./sha3 @@ -13,7 +13,7 @@ LIB=libsha3.so LIB_OBJS=sha3.o # test app (test suite and sanitizers) -TEST_CFLAGS=-g -fsanitize=address,pointer-compare,pointer-subtract,undefined,leak -W -Wall -Wextra -Werror -pedantic -std=c11 -march=native -mtune=native -DSHA3_BACKEND=$(SHA3_BACKEND) +TEST_CFLAGS=-g -fsanitize=address,pointer-compare,pointer-subtract,undefined,leak -W -Wall -Wextra -Werror -pedantic -std=c11 -march=native -mtune=native -DBACKEND=$(BACKEND) TEST_APP=./test-sha3 .PHONY=test all @@ -203,20 +203,20 @@ size. ## Backends This library includes several accelerated backends which are selectable -at compile time via the `SHA3_BACKEND` make argument and define. By -default the fastest backend is selected at compile-time. +at compile time via the `BACKEND` make argument and define. By default +the fastest backend is selected at compile-time. The available backends are: -- Scalar (`SHA3_BACKEND=1`): Default if no faster backend is available. -- [AVX-512][] (`SHA3_BACKEND=2`): [AVX-512][] acceleration. Selected by +- Scalar (`BACKEND=1`): Default if no faster backend is available. +- [AVX-512][] (`BACKEND=2`): [AVX-512][] acceleration. Selected by default if [AVX-512][] is supported. -- [Neon][] (`SHA3_BACKEND=3`): ARM [Neon][] acceleration. Currently - slower than the scalar backend on ARM CPUs and not enabled by default. +- [Neon][] (`BACKEND=3`): ARM [Neon][] acceleration. Currently slower + than the scalar backend on ARM CPUs and not enabled by default. The name of the selected backend is available at run-time via the `sha3_backend()` function. See the `tests/bench/` for examples of the -`SHA3_BACKEND` make argument and the `sha3_backend()` function. +`BACKEND` make argument and the `sha3_backend()` function. ## Benchmarks @@ -33,23 +33,23 @@ #define BACKEND_DIET_NEON 4 // Neon backend which uses fewer registers #define BACKEND_HYBRID_NEON 5 // Hybrid neon backend -// if SHA3_BACKEND is defined and set to 0 (the default), then unset it +// if BACKEND is defined and set to 0 (the default), then unset it // and auto-detect the appropriate backend -#if defined(SHA3_BACKEND) && SHA3_BACKEND == BACKEND_AUTO -#undef SHA3_BACKEND -#endif /* defined(SHA3_BACKEND) && SHA3_BACKEND == 0 */ +#if defined(BACKEND) && BACKEND == BACKEND_AUTO +#undef BACKEND +#endif /* defined(BACKEND) && BACKEND == 0 */ // detect backend -#ifndef SHA3_BACKEND +#ifndef BACKEND #if defined(__AVX512F__) -#define SHA3_BACKEND BACKEND_AVX512 +#define BACKEND BACKEND_AVX512 #elif 0 && defined(__ARM_NEON) -#define SHA3_BACKEND BACKEND_NEON +#define BACKEND BACKEND_NEON #else // no optimized backend detected, fall back to scalar -#define SHA3_BACKEND BACKEND_SCALAR +#define BACKEND BACKEND_SCALAR #endif -#endif /* !SHA3_BACKEND */ +#endif /* !BACKEND */ // 64-bit rotate left #define ROL(v, n) (((v) << (n)) | ((v) >> (64-(n)))) @@ -73,7 +73,7 @@ static const uint64_t RCS[] = { 0x8000000080008081ULL, 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL, }; -#if (SHA3_BACKEND == BACKEND_SCALAR) || defined(SHA3_TEST) +#if (BACKEND == BACKEND_SCALAR) || defined(SHA3_TEST) // If AVX-512 is supported and we are not building the test suite, // then do not compile the scalar step functions. // @@ -244,9 +244,9 @@ static inline void permute_n_scalar(uint64_t a[static 25], const size_t num_roun iota(a, (SHA3_NUM_ROUNDS - num_rounds + i)); } } -#endif /* (SHA3_BACKEND == BACKEND_SCALAR) || defined(SHA3_TEST) */ +#endif /* (BACKEND == BACKEND_SCALAR) || defined(SHA3_TEST) */ -#if SHA3_BACKEND == BACKEND_AVX512 +#if BACKEND == BACKEND_AVX512 #include <immintrin.h> /** @@ -473,9 +473,9 @@ static inline void permute_n_avx512(uint64_t s[static 25], const size_t num_roun _mm512_mask_storeu_epi64(s + 5 * 3, 0x1f, r3); _mm512_mask_storeu_epi64(s + 5 * 4, 0x1f, r4); } -#endif /* SHA3_BACKEND == BACKEND_AVX512 */ +#endif /* BACKEND == BACKEND_AVX512 */ -#if SHA3_BACKEND == BACKEND_NEON +#if BACKEND == BACKEND_NEON #include <arm_neon.h> // rotate elements in uint64x2_t left by N bits @@ -780,9 +780,9 @@ static inline void permute_n_neon(uint64_t a[static 25], const size_t num_rounds row_store(a + 15, r3); row_store(a + 20, r4); } -#endif /* SHA3_BACKEND == BACKEND_NEON */ +#endif /* BACKEND == BACKEND_NEON */ -#if SHA3_BACKEND == BACKEND_DIET_NEON +#if BACKEND == BACKEND_DIET_NEON #include <arm_neon.h> // rotate element in uint64x1_t left by N bits @@ -1060,9 +1060,9 @@ static inline void permute_n_diet_neon(uint64_t a[static 25], const size_t num_r // store column 4 of r4 vst1_u64(a + 24, row_last(r4)); } -#endif /* SHA3_BACKEND == BACKEND_DIET_NEON */ +#endif /* BACKEND == BACKEND_DIET_NEON */ -#if (SHA3_BACKEND == BACKEND_HYBRID_NEON) +#if (BACKEND == BACKEND_HYBRID_NEON) #include <arm_neon.h> /** @@ -1222,21 +1222,21 @@ static inline void permute_n_hybrid_neon(uint64_t a[static 25], const size_t num a[0] ^= RCS[i]; } } -#endif /* (SHA3_BACKEND == BACKEND_HYBRID_NEON) */ +#endif /* (BACKEND == BACKEND_HYBRID_NEON) */ -#if SHA3_BACKEND == BACKEND_AVX512 +#if BACKEND == BACKEND_AVX512 #define permute_n permute_n_avx512 // use avx512 backend -#elif SHA3_BACKEND == BACKEND_NEON +#elif BACKEND == BACKEND_NEON #define permute_n permute_n_neon // use neon backend -#elif SHA3_BACKEND == BACKEND_DIET_NEON +#elif BACKEND == BACKEND_DIET_NEON #define permute_n permute_n_diet_neon // use diet-neon backend -#elif SHA3_BACKEND == BACKEND_HYBRID_NEON +#elif BACKEND == BACKEND_HYBRID_NEON #define permute_n permute_n_hybrid_neon // use hybrid-neon backend -#elif SHA3_BACKEND == BACKEND_SCALAR +#elif BACKEND == BACKEND_SCALAR #define permute_n permute_n_scalar // use scalar backend #else #error "unknown sha3 backend" -#endif /* SHA3_BACKEND */ +#endif /* BACKEND */ /** * @brief 24 round Keccak permutation. @@ -2723,17 +2723,17 @@ void k12_once(const uint8_t *src, const size_t src_len, uint8_t *dst, const size // Return backend name. const char *sha3_backend(void) { -#if SHA3_BACKEND == BACKEND_AVX512 +#if BACKEND == BACKEND_AVX512 return "avx512"; -#elif SHA3_BACKEND == BACKEND_NEON +#elif BACKEND == BACKEND_NEON return "neon"; -#elif SHA3_BACKEND == BACKEND_DIET_NEON +#elif BACKEND == BACKEND_DIET_NEON return "diet-neon"; -#elif SHA3_BACKEND == BACKEND_HYBRID_NEON +#elif BACKEND == BACKEND_HYBRID_NEON return "hybrid-neon"; -#elif SHA3_BACKEND == BACKEND_SCALAR +#elif BACKEND == BACKEND_SCALAR return "scalar"; -#endif /* SHA3_BACKEND */ +#endif /* BACKEND */ } #ifdef SHA3_TEST @@ -2993,7 +2993,7 @@ static void test_permute_scalar(void) { } static void test_permute_avx512(void) { -#if SHA3_BACKEND == BACKEND_AVX512 +#if BACKEND == BACKEND_AVX512 for (size_t i = 0; i < sizeof(PERMUTE_TESTS) / sizeof(PERMUTE_TESTS[0]); i++) { const size_t exp_len = PERMUTE_TESTS[i].exp_len; @@ -3005,11 +3005,11 @@ static void test_permute_avx512(void) { fail_test(__func__, "", (uint8_t*) got, exp_len, (uint8_t*) PERMUTE_TESTS[i].exp, exp_len); } } -#endif /* SHA3_BACKEND == BACKEND_AVX512 */ +#endif /* BACKEND == BACKEND_AVX512 */ } static void test_permute_neon(void) { -#if SHA3_BACKEND == BACKEND_NEON +#if BACKEND == BACKEND_NEON for (size_t i = 0; i < sizeof(PERMUTE_TESTS) / sizeof(PERMUTE_TESTS[0]); i++) { const size_t exp_len = PERMUTE_TESTS[i].exp_len; @@ -3021,11 +3021,11 @@ static void test_permute_neon(void) { fail_test(__func__, "", (uint8_t*) got, exp_len, (uint8_t*) PERMUTE_TESTS[i].exp, exp_len); } } -#endif /* SHA3_BACKEND == BACKEND_NEON */ +#endif /* BACKEND == BACKEND_NEON */ } static void test_permute_diet_neon(void) { -#if SHA3_BACKEND == BACKEND_DIET_NEON +#if BACKEND == BACKEND_DIET_NEON for (size_t i = 0; i < sizeof(PERMUTE_TESTS) / sizeof(PERMUTE_TESTS[0]); i++) { const size_t exp_len = PERMUTE_TESTS[i].exp_len; @@ -3037,11 +3037,11 @@ static void test_permute_diet_neon(void) { fail_test(__func__, "", (uint8_t*) got, exp_len, (uint8_t*) PERMUTE_TESTS[i].exp, exp_len); } } -#endif /* SHA3_BACKEND == BACKEND_DIET_NEON */ +#endif /* BACKEND == BACKEND_DIET_NEON */ } static void test_permute_hybrid_neon(void) { -#if SHA3_BACKEND == BACKEND_HYBRID_NEON +#if BACKEND == BACKEND_HYBRID_NEON for (size_t i = 0; i < sizeof(PERMUTE_TESTS) / sizeof(PERMUTE_TESTS[0]); i++) { const size_t exp_len = PERMUTE_TESTS[i].exp_len; @@ -3053,7 +3053,7 @@ static void test_permute_hybrid_neon(void) { fail_test(__func__, "", (uint8_t*) got, exp_len, (uint8_t*) PERMUTE_TESTS[i].exp, exp_len); } } -#endif /* SHA3_BACKEND == BACKEND_HYBRID_NEON */ +#endif /* BACKEND == BACKEND_HYBRID_NEON */ } static const struct { @@ -3081,7 +3081,7 @@ static void test_permute12_scalar(void) { } static void test_permute12_avx512(void) { -#if SHA3_BACKEND == BACKEND_AVX512 +#if BACKEND == BACKEND_AVX512 for (size_t i = 0; i < sizeof(PERMUTE12_TESTS) / sizeof(PERMUTE12_TESTS[0]); i++) { const size_t exp_len = PERMUTE12_TESTS[i].exp_len; @@ -3093,11 +3093,11 @@ static void test_permute12_avx512(void) { fail_test(__func__, "", (uint8_t*) got, exp_len, (uint8_t*) PERMUTE12_TESTS[i].exp, exp_len); } } -#endif /* SHA3_BACKEND == BACKEND_AVX512 */ +#endif /* BACKEND == BACKEND_AVX512 */ } static void test_permute12_neon(void) { -#if SHA3_BACKEND == BACKEND_NEON +#if BACKEND == BACKEND_NEON for (size_t i = 0; i < sizeof(PERMUTE12_TESTS) / sizeof(PERMUTE12_TESTS[0]); i++) { const size_t exp_len = PERMUTE12_TESTS[i].exp_len; @@ -3109,11 +3109,11 @@ static void test_permute12_neon(void) { fail_test(__func__, "", (uint8_t*) got, exp_len, (uint8_t*) PERMUTE12_TESTS[i].exp, exp_len); } } -#endif /* SHA3_BACKEND == BACKEND_NEON */ +#endif /* BACKEND == BACKEND_NEON */ } static void test_permute12_diet_neon(void) { -#if SHA3_BACKEND == BACKEND_DIET_NEON +#if BACKEND == BACKEND_DIET_NEON for (size_t i = 0; i < sizeof(PERMUTE12_TESTS) / sizeof(PERMUTE12_TESTS[0]); i++) { const size_t exp_len = PERMUTE12_TESTS[i].exp_len; @@ -3125,11 +3125,11 @@ static void test_permute12_diet_neon(void) { fail_test(__func__, "", (uint8_t*) got, exp_len, (uint8_t*) PERMUTE12_TESTS[i].exp, exp_len); } } -#endif /* SHA3_BACKEND == BACKEND_DIET_NEON */ +#endif /* BACKEND == BACKEND_DIET_NEON */ } static void test_permute12_hybrid_neon(void) { -#if SHA3_BACKEND == BACKEND_HYBRID_NEON +#if BACKEND == BACKEND_HYBRID_NEON for (size_t i = 0; i < sizeof(PERMUTE12_TESTS) / sizeof(PERMUTE12_TESTS[0]); i++) { const size_t exp_len = PERMUTE12_TESTS[i].exp_len; @@ -3141,7 +3141,7 @@ static void test_permute12_hybrid_neon(void) { fail_test(__func__, "", (uint8_t*) got, exp_len, (uint8_t*) PERMUTE12_TESTS[i].exp, exp_len); } } -#endif /* SHA3_BACKEND == BACKEND_HYBRID_NEON */ +#endif /* BACKEND == BACKEND_HYBRID_NEON */ } static void test_sha3_224(void) { @@ -7348,7 +7348,7 @@ int main(void) { test_turboshake256(); test_k12_length_encode(); test_k12(); - printf("ok\n"); + printf("ok (%s)\n", sha3_backend()); } #endif /* SHA3_TEST */ diff --git a/tests/bench/Makefile b/tests/bench/Makefile index deb00b2..7fc1a9b 100644 --- a/tests/bench/Makefile +++ b/tests/bench/Makefile @@ -1,7 +1,7 @@ # get backend from environment, or fall back to 0 if unspecified -SHA3_BACKEND ?= 0 +BACKEND ?= 0 -CFLAGS=-std=c11 -W -Wall -Wextra -Wpedantic -Werror -g -O3 -march=native -mtune=native -DSHA3_BACKEND=$(SHA3_BACKEND) +CFLAGS=-std=c11 -W -Wall -Wextra -Wpedantic -Werror -g -O3 -march=native -mtune=native -DBACKEND=$(BACKEND) APP=./bench OBJS=sha3.o bench.o diff --git a/tests/bench/README.md b/tests/bench/README.md index 66fe8a9..7e69817 100644 --- a/tests/bench/README.md +++ b/tests/bench/README.md @@ -65,7 +65,7 @@ root> echo 2 > /proc/sys/kernel/perf_event_paranoid gcc (Debian 12.2.0-14) 12.2.0 # compile with avx512 backend -> make clean all SHA3_BACKEND=2 +> make clean all BACKEND=2 # benchmark with 2k trials > ./bench @@ -80,7 +80,7 @@ shake128,32,15.5,7.8,6.9,6.2,6.1 shake256,32,15.6,7.8,7.9,7.6,7.4 # compile with scalar backend -> make clean all SHA3_BACKEND=1 +> make clean all BACKEND=1 ... # benchmark scalar backend with 2k trials @@ -106,7 +106,7 @@ Cortex-A76 gcc (Debian 12.2.0-14) 12.2.0 # compile with scalar backend -> make clean all SHA3_BACKEND=1 +> make clean all BACKEND=1 ... # benchmark scalar backend with 2k trials @@ -122,7 +122,7 @@ shake128,32,20.2,10.1,9.0,8.1,7.9 shake256,32,20.2,10.3,10.3,9.9,9.7 # compile with neon backend -> make clean all SHA3_BACKEND=3 +> make clean all BACKEND=3 ... # benchmark neon backend with 2k trials @@ -151,7 +151,7 @@ Cortex-A73 gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 # compile with scalar backend -> make clean all SHA3_BACKEND=1 +> make clean all BACKEND=1 ... # benchmark scalar backend with 2k trials @@ -167,7 +167,7 @@ shake128,32,34.0,16.1,13.6,12.1,11.8 shake256,32,34.0,16.1,15.5,14.8,14.4 # compile with neon backend -> make clean all SHA3_BACKEND=3 +> make clean all BACKEND=3 # benchmark neon backend with 2k trials > ./bench 2000 |