aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--tests/bench/Makefile16
-rw-r--r--tests/bench/README.md111
-rw-r--r--tests/bench/bench.c222
l---------tests/bench/rand-bytes.h1
l---------tests/bench/sha3.c1
l---------tests/bench/sha3.h1
7 files changed, 354 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index f9e464a..00a213f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,5 @@ libsha3.so
html
tests/cavp-tests/cavp-tests
tests/cavp-tests/zip-cache
+tests/bench/bench
+tests/bench/perf.data*
diff --git a/tests/bench/Makefile b/tests/bench/Makefile
new file mode 100644
index 0000000..3e08426
--- /dev/null
+++ b/tests/bench/Makefile
@@ -0,0 +1,16 @@
+CFLAGS=-std=c11 -W -Wall -Wextra -Wpedantic -Werror -g -O3 -march=native -mtune=native
+APP=./bench
+OBJS=sha3.o bench.o
+
+.PHONY=all test clean
+
+all: $(APP)
+
+$(APP): $(OBJS)
+ $(CC) -o $(APP) $(CFLAGS) $(OBJS) -lcpucycles -lm
+
+%.o: %.c
+ $(CC) -c $(CFLAGS) $<
+
+clean:
+ $(RM) -f $(APP) $(OBJS)
diff --git a/tests/bench/README.md b/tests/bench/README.md
new file mode 100644
index 0000000..1758c04
--- /dev/null
+++ b/tests/bench/README.md
@@ -0,0 +1,111 @@
+# bench
+
+Benchmark hash and XOF speed and then print summary statistics to
+standard output in [CSV][] format and metadata to standard error.
+
+Requires [libcpucycles][].
+
+The columns of the [CSV][] printed to standard output are as follows:
+
+* `function`: Function name.
+* `dst`: Output size, in bytes.
+* `src`: Input size, in bytes.
+* `median_cpb`: [Median][] of observed CPU cycles divided by the input size.
+* `mean_cpb`: [Arithmetic mean][mean] of observed CPU cycles divided by the input size.
+* `median_cycles`: [Median][] of observed CPU cycles.
+* `mean_cycles`: [Arithmetic mean][mean] of observed CPU cycles.
+* `stddev_cycles`: [Standard deviation][stddev] of observed CPU cycles.
+* `min_cycles`: Minimum observed CPU cycles.
+* `max_cycles`: Maximum observed CPU cycles.
+
+The metadata printed to standard error is as follows:
+
+* `version`: version of [libcpucycles][] as reported by `cpucycles_version()`
+* `implementation`: [libcpucycles][] backend as reported by `cpucycles_implementation()`
+* `persecond`: CPU cycles per second, as reported by `cpucycles_persecond()`
+* `num_trials`: Number of trials.
+
+## Build
+
+1. Install [libcpucycles][].
+2. Type `make`. Creates an executable named `./bench` in the current
+ directory.
+
+## Run
+
+Type `./bench` to run benchmarks with the default number of trials
+(100,000), or `./bench NUM` to run benchmarks with a custom number of
+trials.
+
+**Note:** You may need to adjust your system configuration or run
+`bench` as root to grant [libcpucycles][] access to the high-resolution
+cycle counter.
+
+See [the libcpucycles security page][libcpucycles-security] for details.
+
+## Examples
+
+Below are example runs of `bench` on a ThinkPad X1 Carbon ([x86-64][],
+[AVX-512][] backend) and on an [Odroid N2L][] ([ARM64][], scalar
+backend).
+
+### Lenovo ThinkPad X1 Carbon, 6th Gen (i7-1185G7)
+
+```
+# enable user-level RDPMC access (run as root)
+root> echo 2 > /proc/sys/kernel/perf_event_paranoid
+
+# print cpu and compiler info
+> lscpu | grep -i '^model name:' | sed 's/.*: *//'
+11th Gen Intel(R) Core(TM) i7-1185G7 @ 3.00GHz
+> gcc --version | head -1
+gcc (Debian 12.2.0-14) 12.2.0
+
+# benchmark with 100k trials
+> ./bench
+TODO...
+
+# benchmark with 1M trials
+> ./bench 1000000
+TODO...
+```
+
+### Odroid N2L (Cortex-A73)
+
+```
+# enable user-level perf_even access (run as root)
+root> echo 2 > /proc/sys/kernel/perf_event_paranoid
+
+# print cpu and compiler info
+> lscpu | grep -i '^model name' | sed 's/.*: *//'
+Cortex-A73
+> gcc --version | head -1
+gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
+
+# benchmark with 100k trials
+> ./bench
+info: cpucycles: version=20240318 implementation=arm64-vct persecond=1800000000
+info: num_trials=100000
+TODO...
+```
+
+[csv]: https://en.wikipedia.org/wiki/Comma-separated_values
+ "Comma-Separated Value (CSV)"
+[libcpucycles]: https://cpucycles.cr.yp.to/
+ "Microlibrary for counting CPU cycles."
+[libcpucycles-security]: https://cpucycles.cr.yp.to/security.html
+ "libcpucycles security documentation"
+[median]: https://en.wikipedia.org/wiki/Median
+ "Median"
+[mean]: https://en.wikipedia.org/wiki/Arithmetic_mean
+ "Arithmetic mean"
+[stddev]: https://en.wikipedia.org/wiki/Standard_deviation
+ "Standard deviation"
+[odroid n2l]: https://en.odroid.se/products/odroid-n2l-4gb
+ "Odroid N2L"
+[x86-64]: https://en.wikipedia.org/wiki/X86-64
+ "64-bit x86 instruction set."
+[arm64]: https://en.wikipedia.org/wiki/AArch64
+ "64-bit extension to the ARM instruction set."
+[avx-512]: https://en.wikipedia.org/wiki/AVX-512
+ "AVX-512: 512-bit extensions to the Advanced Vector Extensions (AVX) instruction set."
diff --git a/tests/bench/bench.c b/tests/bench/bench.c
new file mode 100644
index 0000000..9144e91
--- /dev/null
+++ b/tests/bench/bench.c
@@ -0,0 +1,222 @@
+//
+// Benchmark all three ML-KEM parameter sets and print summary
+// statistics to standard output in CSV format.
+//
+// Requires libcpucycles (https://cpucycles.cr.yp.to/).
+//
+// Note: You may need to adjust your system configuration or run `bench`
+// as root to grant libcpucycles access to the high-resolution cycle
+// counter. See the following URL for details:
+//
+// https://cpucycles.cr.yp.to/security.html
+//
+
+#include <stdlib.h> // exit(), qsort()
+#include <stdio.h> // printf()
+#include <string.h> // memcmp()
+#include <math.h> // sqrt(), pow()
+#include <cpucycles.h> // cpucycles()
+#include "sha3.h" // sha3_*(), shake*()
+#include "rand-bytes.h" // rand_bytes()
+
+// default number of trials
+#define NUM_TRIALS 100000
+
+// Random data used for key generation and encapsulation.
+typedef struct {
+ uint8_t keygen[64], // random data for keygen()
+ encaps[32]; // random data for encaps()
+} seeds_t;
+
+// Aggregate statistics for a series of tests.
+typedef struct {
+ // min/max/median times
+ long long lo, hi, median;
+
+ // mean/stddev
+ double mean, stddev;
+} stats_t;
+
+static void *checked_calloc(const char *name, const size_t nmemb, const size_t size) {
+ // alloc keygen times
+ void *mem = calloc(nmemb, size);
+ if (!mem) {
+ fprintf(stderr, "%s: calloc() failed\n", name);
+ exit(-1);
+ }
+ return mem;
+}
+
+// Callback for `qsort()` to sort observed times in ascending order.
+static int sort_asc_cb(const void *ap, const void *bp) {
+ const long long *a = ap, *b = bp;
+ return *a - *b;
+}
+
+// Get summary statistics of a series of test times.
+static stats_t get_stats(long long * const vals, const size_t num_vals) {
+ stats_t stats = { 0 };
+
+ // sort values in ascending order (used for min, max, and median)
+ qsort(vals, num_vals, sizeof(long long), sort_asc_cb);
+
+ // get low, high, and median
+ stats.lo = vals[0];
+ stats.hi = vals[num_vals - 1];
+ stats.median = vals[num_vals / 2];
+
+ // calculate mean
+ for (size_t i = 0; i < num_vals; i++) {
+ stats.mean += vals[i];
+ }
+ stats.mean /= num_vals;
+
+ // calculate standard deviation
+ for (size_t i = 0; i < num_vals; i++) {
+ stats.stddev += pow(stats.mean - vals[i], 2);
+ }
+ stats.stddev = sqrt(stats.stddev / num_vals);
+
+ // return stats
+ return stats;
+}
+
+// define xof benchmark function
+#define DEF_BENCH_XOF(FN) \
+ static stats_t bench_ ## FN (const size_t num_trials, const size_t src_len, const size_t dst_len) { \
+ /* allocate times, src, and dst buffers */ \
+ long long *times = checked_calloc(__func__, num_trials, sizeof(long long)); \
+ uint8_t *src = checked_calloc(__func__, num_trials, src_len); \
+ uint8_t *dst = checked_calloc(__func__, num_trials, dst_len); \
+ \
+ /* generate random source data */ \
+ rand_bytes(src, num_trials * src_len); \
+ \
+ /* run trials */ \
+ for (size_t i = 0; i < num_trials; i++) { \
+ /* call function */ \
+ const long long t0 = cpucycles(); \
+ FN (src + (i * src_len), src_len, dst + (i * dst_len), dst_len); \
+ const long long t1 = cpucycles() - t0; \
+ \
+ /* save time */ \
+ times[i] = t1; \
+ } \
+ \
+ /* generate summary stats */ \
+ const stats_t stats = get_stats(times, num_trials); \
+ \
+ /* free buffers */ \
+ free(src); \
+ free(times); \
+ \
+ /* return summary stats */ \
+ return stats; \
+ }
+
+// define hash benchmark function
+#define DEF_BENCH_HASH(FN, OUT_LEN) \
+ static stats_t bench_ ## FN (const size_t num_trials, const size_t src_len) { \
+ /* allocate times and src buffers */ \
+ long long *times = checked_calloc(__func__, num_trials, sizeof(long long)); \
+ uint8_t *src = checked_calloc(__func__, src_len, 1); \
+ \
+ /* run trials */ \
+ for (size_t i = 0; i < num_trials; i++) { \
+ /* generate random source data */ \
+ rand_bytes(src, src_len); \
+ \
+ /* call function */ \
+ uint8_t dst[OUT_LEN] = { 0 }; \
+ const long long t0 = cpucycles(); \
+ FN (src, src_len, dst); \
+ const long long t1 = cpucycles() - t0; \
+ \
+ /* save time */ \
+ times[i] = t1; \
+ } \
+ \
+ /* generate summary stats */ \
+ const stats_t stats = get_stats(times, num_trials); \
+ \
+ /* free buffers */ \
+ free(src); \
+ free(times); \
+ \
+ /* return summary stats */ \
+ return stats; \
+ }
+
+// define xof benchmarks *()
+DEF_BENCH_XOF(shake128)
+DEF_BENCH_XOF(shake256)
+
+// define hash benchmarks
+DEF_BENCH_HASH(sha3_224, 28)
+DEF_BENCH_HASH(sha3_256, 32)
+DEF_BENCH_HASH(sha3_384, 48)
+DEF_BENCH_HASH(sha3_512, 64)
+
+// print function stats to standard output as CSV row.
+static void print_row(const char *name, const size_t src_len, const size_t dst_len, stats_t fs) {
+ const double median_cpb = 1.0 * fs.median / src_len,
+ mean_cpb = 1.0 * fs.mean / src_len;
+ printf("%s,%zu,%zu,%.0f,%.0f,%lld,%.0f,%.0f,%lld,%lld\n", name, dst_len, src_len, median_cpb, mean_cpb, fs.median, fs.mean, fs.stddev, fs.lo, fs.hi);
+}
+
+#define MIN_SRC_LEN 64
+#define MAX_SRC_LEN 2048
+
+#define MIN_DST_LEN 32
+#define MAX_DST_LEN 128
+
+int main(int argc, char *argv[]) {
+ // get number of trials from first command-line argument, or fall back
+ // to default if no argument was provided
+ const size_t num_trials = (argc > 1) ? atoi(argv[1]) : NUM_TRIALS;
+ if (num_trials < 2) {
+ fprintf(stderr, "num_trials must be greater than 1\n");
+ return -1;
+ }
+
+ // print metadata to stderr
+ fprintf(stderr,"info: cpucycles: version=%s implementation=%s persecond=%lld\ninfo: num_trials=%zu\n", cpucycles_version(), cpucycles_implementation(), cpucycles_persecond(), num_trials);
+
+ // print column headers to stdout
+ printf("function,dst,src,median_cpb,mean_cpb,median_cycles,mean_cycles,stddev_cycles,min_cycles,max_cycles\n");
+
+ // sha3-224
+ for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
+ print_row("sha3_224", src_len, 28, bench_sha3_224(num_trials, src_len));
+ }
+
+ // sha3-256
+ for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
+ print_row("sha3_256", src_len, 32, bench_sha3_256(num_trials, src_len));
+ }
+
+ // sha3-384
+ for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
+ print_row("sha3_384", src_len, 48, bench_sha3_384(num_trials, src_len));
+ }
+
+ // sha3-512
+ for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
+ print_row("sha3_512", src_len, 64, bench_sha3_512(num_trials, src_len));
+ }
+
+ for (size_t dst_len = MIN_DST_LEN; dst_len < MAX_DST_LEN; dst_len <<= 1) {
+ // shake128
+ for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
+ print_row("shake128", src_len, dst_len, bench_shake128(num_trials, src_len, dst_len));
+ }
+
+ // shake256
+ for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
+ print_row("shake256", src_len, dst_len, bench_shake256(num_trials, src_len, dst_len));
+ }
+ }
+
+ // return success
+ return 0;
+}
diff --git a/tests/bench/rand-bytes.h b/tests/bench/rand-bytes.h
new file mode 120000
index 0000000..421eaa6
--- /dev/null
+++ b/tests/bench/rand-bytes.h
@@ -0,0 +1 @@
+../../rand-bytes.h \ No newline at end of file
diff --git a/tests/bench/sha3.c b/tests/bench/sha3.c
new file mode 120000
index 0000000..4748193
--- /dev/null
+++ b/tests/bench/sha3.c
@@ -0,0 +1 @@
+../../sha3.c \ No newline at end of file
diff --git a/tests/bench/sha3.h b/tests/bench/sha3.h
new file mode 120000
index 0000000..b7c53d4
--- /dev/null
+++ b/tests/bench/sha3.h
@@ -0,0 +1 @@
+../../sha3.h \ No newline at end of file