summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2024-05-08 17:03:17 -0400
committerPaul Duncan <pabs@pablotron.org>2024-05-08 17:03:17 -0400
commitb55d14bbe2f5d93b92ab8e37d8a2376ed5cb1f11 (patch)
tree70b13921ec1e66fe719f923f31eacfb0a9dbd42f
parent255c5fcfec7b1296c4ba764ce72d14f9dc25dea9 (diff)
downloadsha3-b55d14bbe2f5d93b92ab8e37d8a2376ed5cb1f11.tar.bz2
sha3-b55d14bbe2f5d93b92ab8e37d8a2376ed5cb1f11.zip
s/SHA3_BACKEND/BACKEND/g
-rw-r--r--Makefile6
-rw-r--r--README.md14
-rw-r--r--sha3.c96
-rw-r--r--tests/bench/Makefile4
-rw-r--r--tests/bench/README.md12
5 files changed, 66 insertions, 66 deletions
diff --git a/Makefile b/Makefile
index bdaf13e..07030a0 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
# backend (0 to auto-detect)
-SHA3_BACKEND ?= 0
+BACKEND ?= 0
# compiler flags used for sample application and shared library
-CFLAGS=-W -Wall -Wextra -Werror -pedantic -std=c11 -fPIC -O3 -march=native -mtune=native -DSHA3_BACKEND=$(SHA3_BACKEND)
+CFLAGS=-W -Wall -Wextra -Werror -pedantic -std=c11 -fPIC -O3 -march=native -mtune=native -DBACKEND=$(BACKEND)
# sample application
APP=./sha3
@@ -13,7 +13,7 @@ LIB=libsha3.so
LIB_OBJS=sha3.o
# test app (test suite and sanitizers)
-TEST_CFLAGS=-g -fsanitize=address,pointer-compare,pointer-subtract,undefined,leak -W -Wall -Wextra -Werror -pedantic -std=c11 -march=native -mtune=native -DSHA3_BACKEND=$(SHA3_BACKEND)
+TEST_CFLAGS=-g -fsanitize=address,pointer-compare,pointer-subtract,undefined,leak -W -Wall -Wextra -Werror -pedantic -std=c11 -march=native -mtune=native -DBACKEND=$(BACKEND)
TEST_APP=./test-sha3
.PHONY=test all
diff --git a/README.md b/README.md
index 4c5e348..c221e77 100644
--- a/README.md
+++ b/README.md
@@ -203,20 +203,20 @@ size.
## Backends
This library includes several accelerated backends which are selectable
-at compile time via the `SHA3_BACKEND` make argument and define. By
-default the fastest backend is selected at compile-time.
+at compile time via the `BACKEND` make argument and define. By default
+the fastest backend is selected at compile-time.
The available backends are:
-- Scalar (`SHA3_BACKEND=1`): Default if no faster backend is available.
-- [AVX-512][] (`SHA3_BACKEND=2`): [AVX-512][] acceleration. Selected by
+- Scalar (`BACKEND=1`): Default if no faster backend is available.
+- [AVX-512][] (`BACKEND=2`): [AVX-512][] acceleration. Selected by
default if [AVX-512][] is supported.
-- [Neon][] (`SHA3_BACKEND=3`): ARM [Neon][] acceleration. Currently
- slower than the scalar backend on ARM CPUs and not enabled by default.
+- [Neon][] (`BACKEND=3`): ARM [Neon][] acceleration. Currently slower
+ than the scalar backend on ARM CPUs and not enabled by default.
The name of the selected backend is available at run-time via the
`sha3_backend()` function. See the `tests/bench/` for examples of the
-`SHA3_BACKEND` make argument and the `sha3_backend()` function.
+`BACKEND` make argument and the `sha3_backend()` function.
## Benchmarks
diff --git a/sha3.c b/sha3.c
index fbfc3c8..81b229c 100644
--- a/sha3.c
+++ b/sha3.c
@@ -33,23 +33,23 @@
#define BACKEND_DIET_NEON 4 // Neon backend which uses fewer registers
#define BACKEND_HYBRID_NEON 5 // Hybrid neon backend
-// if SHA3_BACKEND is defined and set to 0 (the default), then unset it
+// if BACKEND is defined and set to 0 (the default), then unset it
// and auto-detect the appropriate backend
-#if defined(SHA3_BACKEND) && SHA3_BACKEND == BACKEND_AUTO
-#undef SHA3_BACKEND
-#endif /* defined(SHA3_BACKEND) && SHA3_BACKEND == 0 */
+#if defined(BACKEND) && BACKEND == BACKEND_AUTO
+#undef BACKEND
+#endif /* defined(BACKEND) && BACKEND == 0 */
// detect backend
-#ifndef SHA3_BACKEND
+#ifndef BACKEND
#if defined(__AVX512F__)
-#define SHA3_BACKEND BACKEND_AVX512
+#define BACKEND BACKEND_AVX512
#elif 0 && defined(__ARM_NEON)
-#define SHA3_BACKEND BACKEND_NEON
+#define BACKEND BACKEND_NEON
#else
// no optimized backend detected, fall back to scalar
-#define SHA3_BACKEND BACKEND_SCALAR
+#define BACKEND BACKEND_SCALAR
#endif
-#endif /* !SHA3_BACKEND */
+#endif /* !BACKEND */
// 64-bit rotate left
#define ROL(v, n) (((v) << (n)) | ((v) >> (64-(n))))
@@ -73,7 +73,7 @@ static const uint64_t RCS[] = {
0x8000000080008081ULL, 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL,
};
-#if (SHA3_BACKEND == BACKEND_SCALAR) || defined(SHA3_TEST)
+#if (BACKEND == BACKEND_SCALAR) || defined(SHA3_TEST)
// If AVX-512 is supported and we are not building the test suite,
// then do not compile the scalar step functions.
//
@@ -244,9 +244,9 @@ static inline void permute_n_scalar(uint64_t a[static 25], const size_t num_roun
iota(a, (SHA3_NUM_ROUNDS - num_rounds + i));
}
}
-#endif /* (SHA3_BACKEND == BACKEND_SCALAR) || defined(SHA3_TEST) */
+#endif /* (BACKEND == BACKEND_SCALAR) || defined(SHA3_TEST) */
-#if SHA3_BACKEND == BACKEND_AVX512
+#if BACKEND == BACKEND_AVX512
#include <immintrin.h>
/**
@@ -473,9 +473,9 @@ static inline void permute_n_avx512(uint64_t s[static 25], const size_t num_roun
_mm512_mask_storeu_epi64(s + 5 * 3, 0x1f, r3);
_mm512_mask_storeu_epi64(s + 5 * 4, 0x1f, r4);
}
-#endif /* SHA3_BACKEND == BACKEND_AVX512 */
+#endif /* BACKEND == BACKEND_AVX512 */
-#if SHA3_BACKEND == BACKEND_NEON
+#if BACKEND == BACKEND_NEON
#include <arm_neon.h>
// rotate elements in uint64x2_t left by N bits
@@ -780,9 +780,9 @@ static inline void permute_n_neon(uint64_t a[static 25], const size_t num_rounds
row_store(a + 15, r3);
row_store(a + 20, r4);
}
-#endif /* SHA3_BACKEND == BACKEND_NEON */
+#endif /* BACKEND == BACKEND_NEON */
-#if SHA3_BACKEND == BACKEND_DIET_NEON
+#if BACKEND == BACKEND_DIET_NEON
#include <arm_neon.h>
// rotate element in uint64x1_t left by N bits
@@ -1060,9 +1060,9 @@ static inline void permute_n_diet_neon(uint64_t a[static 25], const size_t num_r
// store column 4 of r4
vst1_u64(a + 24, row_last(r4));
}
-#endif /* SHA3_BACKEND == BACKEND_DIET_NEON */
+#endif /* BACKEND == BACKEND_DIET_NEON */
-#if (SHA3_BACKEND == BACKEND_HYBRID_NEON)
+#if (BACKEND == BACKEND_HYBRID_NEON)
#include <arm_neon.h>
/**
@@ -1222,21 +1222,21 @@ static inline void permute_n_hybrid_neon(uint64_t a[static 25], const size_t num
a[0] ^= RCS[i];
}
}
-#endif /* (SHA3_BACKEND == BACKEND_HYBRID_NEON) */
+#endif /* (BACKEND == BACKEND_HYBRID_NEON) */
-#if SHA3_BACKEND == BACKEND_AVX512
+#if BACKEND == BACKEND_AVX512
#define permute_n permute_n_avx512 // use avx512 backend
-#elif SHA3_BACKEND == BACKEND_NEON
+#elif BACKEND == BACKEND_NEON
#define permute_n permute_n_neon // use neon backend
-#elif SHA3_BACKEND == BACKEND_DIET_NEON
+#elif BACKEND == BACKEND_DIET_NEON
#define permute_n permute_n_diet_neon // use diet-neon backend
-#elif SHA3_BACKEND == BACKEND_HYBRID_NEON
+#elif BACKEND == BACKEND_HYBRID_NEON
#define permute_n permute_n_hybrid_neon // use hybrid-neon backend
-#elif SHA3_BACKEND == BACKEND_SCALAR
+#elif BACKEND == BACKEND_SCALAR
#define permute_n permute_n_scalar // use scalar backend
#else
#error "unknown sha3 backend"
-#endif /* SHA3_BACKEND */
+#endif /* BACKEND */
/**
* @brief 24 round Keccak permutation.
@@ -2723,17 +2723,17 @@ void k12_once(const uint8_t *src, const size_t src_len, uint8_t *dst, const size
// Return backend name.
const char *sha3_backend(void) {
-#if SHA3_BACKEND == BACKEND_AVX512
+#if BACKEND == BACKEND_AVX512
return "avx512";
-#elif SHA3_BACKEND == BACKEND_NEON
+#elif BACKEND == BACKEND_NEON
return "neon";
-#elif SHA3_BACKEND == BACKEND_DIET_NEON
+#elif BACKEND == BACKEND_DIET_NEON
return "diet-neon";
-#elif SHA3_BACKEND == BACKEND_HYBRID_NEON
+#elif BACKEND == BACKEND_HYBRID_NEON
return "hybrid-neon";
-#elif SHA3_BACKEND == BACKEND_SCALAR
+#elif BACKEND == BACKEND_SCALAR
return "scalar";
-#endif /* SHA3_BACKEND */
+#endif /* BACKEND */
}
#ifdef SHA3_TEST
@@ -2993,7 +2993,7 @@ static void test_permute_scalar(void) {
}
static void test_permute_avx512(void) {
-#if SHA3_BACKEND == BACKEND_AVX512
+#if BACKEND == BACKEND_AVX512
for (size_t i = 0; i < sizeof(PERMUTE_TESTS) / sizeof(PERMUTE_TESTS[0]); i++) {
const size_t exp_len = PERMUTE_TESTS[i].exp_len;
@@ -3005,11 +3005,11 @@ static void test_permute_avx512(void) {
fail_test(__func__, "", (uint8_t*) got, exp_len, (uint8_t*) PERMUTE_TESTS[i].exp, exp_len);
}
}
-#endif /* SHA3_BACKEND == BACKEND_AVX512 */
+#endif /* BACKEND == BACKEND_AVX512 */
}
static void test_permute_neon(void) {
-#if SHA3_BACKEND == BACKEND_NEON
+#if BACKEND == BACKEND_NEON
for (size_t i = 0; i < sizeof(PERMUTE_TESTS) / sizeof(PERMUTE_TESTS[0]); i++) {
const size_t exp_len = PERMUTE_TESTS[i].exp_len;
@@ -3021,11 +3021,11 @@ static void test_permute_neon(void) {
fail_test(__func__, "", (uint8_t*) got, exp_len, (uint8_t*) PERMUTE_TESTS[i].exp, exp_len);
}
}
-#endif /* SHA3_BACKEND == BACKEND_NEON */
+#endif /* BACKEND == BACKEND_NEON */
}
static void test_permute_diet_neon(void) {
-#if SHA3_BACKEND == BACKEND_DIET_NEON
+#if BACKEND == BACKEND_DIET_NEON
for (size_t i = 0; i < sizeof(PERMUTE_TESTS) / sizeof(PERMUTE_TESTS[0]); i++) {
const size_t exp_len = PERMUTE_TESTS[i].exp_len;
@@ -3037,11 +3037,11 @@ static void test_permute_diet_neon(void) {
fail_test(__func__, "", (uint8_t*) got, exp_len, (uint8_t*) PERMUTE_TESTS[i].exp, exp_len);
}
}
-#endif /* SHA3_BACKEND == BACKEND_DIET_NEON */
+#endif /* BACKEND == BACKEND_DIET_NEON */
}
static void test_permute_hybrid_neon(void) {
-#if SHA3_BACKEND == BACKEND_HYBRID_NEON
+#if BACKEND == BACKEND_HYBRID_NEON
for (size_t i = 0; i < sizeof(PERMUTE_TESTS) / sizeof(PERMUTE_TESTS[0]); i++) {
const size_t exp_len = PERMUTE_TESTS[i].exp_len;
@@ -3053,7 +3053,7 @@ static void test_permute_hybrid_neon(void) {
fail_test(__func__, "", (uint8_t*) got, exp_len, (uint8_t*) PERMUTE_TESTS[i].exp, exp_len);
}
}
-#endif /* SHA3_BACKEND == BACKEND_HYBRID_NEON */
+#endif /* BACKEND == BACKEND_HYBRID_NEON */
}
static const struct {
@@ -3081,7 +3081,7 @@ static void test_permute12_scalar(void) {
}
static void test_permute12_avx512(void) {
-#if SHA3_BACKEND == BACKEND_AVX512
+#if BACKEND == BACKEND_AVX512
for (size_t i = 0; i < sizeof(PERMUTE12_TESTS) / sizeof(PERMUTE12_TESTS[0]); i++) {
const size_t exp_len = PERMUTE12_TESTS[i].exp_len;
@@ -3093,11 +3093,11 @@ static void test_permute12_avx512(void) {
fail_test(__func__, "", (uint8_t*) got, exp_len, (uint8_t*) PERMUTE12_TESTS[i].exp, exp_len);
}
}
-#endif /* SHA3_BACKEND == BACKEND_AVX512 */
+#endif /* BACKEND == BACKEND_AVX512 */
}
static void test_permute12_neon(void) {
-#if SHA3_BACKEND == BACKEND_NEON
+#if BACKEND == BACKEND_NEON
for (size_t i = 0; i < sizeof(PERMUTE12_TESTS) / sizeof(PERMUTE12_TESTS[0]); i++) {
const size_t exp_len = PERMUTE12_TESTS[i].exp_len;
@@ -3109,11 +3109,11 @@ static void test_permute12_neon(void) {
fail_test(__func__, "", (uint8_t*) got, exp_len, (uint8_t*) PERMUTE12_TESTS[i].exp, exp_len);
}
}
-#endif /* SHA3_BACKEND == BACKEND_NEON */
+#endif /* BACKEND == BACKEND_NEON */
}
static void test_permute12_diet_neon(void) {
-#if SHA3_BACKEND == BACKEND_DIET_NEON
+#if BACKEND == BACKEND_DIET_NEON
for (size_t i = 0; i < sizeof(PERMUTE12_TESTS) / sizeof(PERMUTE12_TESTS[0]); i++) {
const size_t exp_len = PERMUTE12_TESTS[i].exp_len;
@@ -3125,11 +3125,11 @@ static void test_permute12_diet_neon(void) {
fail_test(__func__, "", (uint8_t*) got, exp_len, (uint8_t*) PERMUTE12_TESTS[i].exp, exp_len);
}
}
-#endif /* SHA3_BACKEND == BACKEND_DIET_NEON */
+#endif /* BACKEND == BACKEND_DIET_NEON */
}
static void test_permute12_hybrid_neon(void) {
-#if SHA3_BACKEND == BACKEND_HYBRID_NEON
+#if BACKEND == BACKEND_HYBRID_NEON
for (size_t i = 0; i < sizeof(PERMUTE12_TESTS) / sizeof(PERMUTE12_TESTS[0]); i++) {
const size_t exp_len = PERMUTE12_TESTS[i].exp_len;
@@ -3141,7 +3141,7 @@ static void test_permute12_hybrid_neon(void) {
fail_test(__func__, "", (uint8_t*) got, exp_len, (uint8_t*) PERMUTE12_TESTS[i].exp, exp_len);
}
}
-#endif /* SHA3_BACKEND == BACKEND_HYBRID_NEON */
+#endif /* BACKEND == BACKEND_HYBRID_NEON */
}
static void test_sha3_224(void) {
@@ -7348,7 +7348,7 @@ int main(void) {
test_turboshake256();
test_k12_length_encode();
test_k12();
- printf("ok\n");
+ printf("ok (%s)\n", sha3_backend());
}
#endif /* SHA3_TEST */
diff --git a/tests/bench/Makefile b/tests/bench/Makefile
index deb00b2..7fc1a9b 100644
--- a/tests/bench/Makefile
+++ b/tests/bench/Makefile
@@ -1,7 +1,7 @@
# get backend from environment, or fall back to 0 if unspecified
-SHA3_BACKEND ?= 0
+BACKEND ?= 0
-CFLAGS=-std=c11 -W -Wall -Wextra -Wpedantic -Werror -g -O3 -march=native -mtune=native -DSHA3_BACKEND=$(SHA3_BACKEND)
+CFLAGS=-std=c11 -W -Wall -Wextra -Wpedantic -Werror -g -O3 -march=native -mtune=native -DBACKEND=$(BACKEND)
APP=./bench
OBJS=sha3.o bench.o
diff --git a/tests/bench/README.md b/tests/bench/README.md
index 66fe8a9..7e69817 100644
--- a/tests/bench/README.md
+++ b/tests/bench/README.md
@@ -65,7 +65,7 @@ root> echo 2 > /proc/sys/kernel/perf_event_paranoid
gcc (Debian 12.2.0-14) 12.2.0
# compile with avx512 backend
-> make clean all SHA3_BACKEND=2
+> make clean all BACKEND=2
# benchmark with 2k trials
> ./bench
@@ -80,7 +80,7 @@ shake128,32,15.5,7.8,6.9,6.2,6.1
shake256,32,15.6,7.8,7.9,7.6,7.4
# compile with scalar backend
-> make clean all SHA3_BACKEND=1
+> make clean all BACKEND=1
...
# benchmark scalar backend with 2k trials
@@ -106,7 +106,7 @@ Cortex-A76
gcc (Debian 12.2.0-14) 12.2.0
# compile with scalar backend
-> make clean all SHA3_BACKEND=1
+> make clean all BACKEND=1
...
# benchmark scalar backend with 2k trials
@@ -122,7 +122,7 @@ shake128,32,20.2,10.1,9.0,8.1,7.9
shake256,32,20.2,10.3,10.3,9.9,9.7
# compile with neon backend
-> make clean all SHA3_BACKEND=3
+> make clean all BACKEND=3
...
# benchmark neon backend with 2k trials
@@ -151,7 +151,7 @@ Cortex-A73
gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
# compile with scalar backend
-> make clean all SHA3_BACKEND=1
+> make clean all BACKEND=1
...
# benchmark scalar backend with 2k trials
@@ -167,7 +167,7 @@ shake128,32,34.0,16.1,13.6,12.1,11.8
shake256,32,34.0,16.1,15.5,14.8,14.4
# compile with neon backend
-> make clean all SHA3_BACKEND=3
+> make clean all BACKEND=3
# benchmark neon backend with 2k trials
> ./bench 2000