diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | tests/bench/Makefile | 16 | ||||
-rw-r--r-- | tests/bench/README.md | 111 | ||||
-rw-r--r-- | tests/bench/bench.c | 222 | ||||
l--------- | tests/bench/rand-bytes.h | 1 | ||||
l--------- | tests/bench/sha3.c | 1 | ||||
l--------- | tests/bench/sha3.h | 1 |
7 files changed, 354 insertions, 0 deletions
@@ -5,3 +5,5 @@ libsha3.so html tests/cavp-tests/cavp-tests tests/cavp-tests/zip-cache +tests/bench/bench +tests/bench/perf.data* diff --git a/tests/bench/Makefile b/tests/bench/Makefile new file mode 100644 index 0000000..3e08426 --- /dev/null +++ b/tests/bench/Makefile @@ -0,0 +1,16 @@ +CFLAGS=-std=c11 -W -Wall -Wextra -Wpedantic -Werror -g -O3 -march=native -mtune=native +APP=./bench +OBJS=sha3.o bench.o + +.PHONY=all test clean + +all: $(APP) + +$(APP): $(OBJS) + $(CC) -o $(APP) $(CFLAGS) $(OBJS) -lcpucycles -lm + +%.o: %.c + $(CC) -c $(CFLAGS) $< + +clean: + $(RM) -f $(APP) $(OBJS) diff --git a/tests/bench/README.md b/tests/bench/README.md new file mode 100644 index 0000000..1758c04 --- /dev/null +++ b/tests/bench/README.md @@ -0,0 +1,111 @@ +# bench + +Benchmark hash and XOF speed and then print summary statistics to +standard output in [CSV][] format and metadata to standard error. + +Requires [libcpucycles][]. + +The columns of the [CSV][] printed to standard output are as follows: + +* `function`: Function name. +* `dst`: Output size, in bytes. +* `src`: Input size, in bytes. +* `median_cpb`: [Median][] of observed CPU cycles divided by the input size. +* `mean_cpb`: [Arithmetic mean][mean] of observed CPU cycles divided by the input size. +* `median_cycles`: [Median][] of observed CPU cycles. +* `mean_cycles`: [Arithmetic mean][mean] of observed CPU cycles. +* `stddev_cycles`: [Standard deviation][stddev] of observed CPU cycles. +* `min_cycles`: Minimum observed CPU cycles. +* `max_cycles`: Maximum observed CPU cycles. + +The metadata printed to standard error is as follows: + +* `version`: version of [libcpucycles][] as reported by `cpucycles_version()` +* `implementation`: [libcpucycles][] backend as reported by `cpucycles_implementation()` +* `persecond`: CPU cycles per second, as reported by `cpucycles_persecond()` +* `num_trials`: Number of trials. + +## Build + +1. Install [libcpucycles][]. +2. Type `make`. Creates an executable named `./bench` in the current + directory. + +## Run + +Type `./bench` to run benchmarks with the default number of trials +(100,000), or `./bench NUM` to run benchmarks with a custom number of +trials. + +**Note:** You may need to adjust your system configuration or run +`bench` as root to grant [libcpucycles][] access to the high-resolution +cycle counter. + +See [the libcpucycles security page][libcpucycles-security] for details. + +## Examples + +Below are example runs of `bench` on a ThinkPad X1 Carbon ([x86-64][], +[AVX-512][] backend) and on an [Odroid N2L][] ([ARM64][], scalar +backend). + +### Lenovo ThinkPad X1 Carbon, 6th Gen (i7-1185G7) + +``` +# enable user-level RDPMC access (run as root) +root> echo 2 > /proc/sys/kernel/perf_event_paranoid + +# print cpu and compiler info +> lscpu | grep -i '^model name:' | sed 's/.*: *//' +11th Gen Intel(R) Core(TM) i7-1185G7 @ 3.00GHz +> gcc --version | head -1 +gcc (Debian 12.2.0-14) 12.2.0 + +# benchmark with 100k trials +> ./bench +TODO... + +# benchmark with 1M trials +> ./bench 1000000 +TODO... +``` + +### Odroid N2L (Cortex-A73) + +``` +# enable user-level perf_even access (run as root) +root> echo 2 > /proc/sys/kernel/perf_event_paranoid + +# print cpu and compiler info +> lscpu | grep -i '^model name' | sed 's/.*: *//' +Cortex-A73 +> gcc --version | head -1 +gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 + +# benchmark with 100k trials +> ./bench +info: cpucycles: version=20240318 implementation=arm64-vct persecond=1800000000 +info: num_trials=100000 +TODO... +``` + +[csv]: https://en.wikipedia.org/wiki/Comma-separated_values + "Comma-Separated Value (CSV)" +[libcpucycles]: https://cpucycles.cr.yp.to/ + "Microlibrary for counting CPU cycles." +[libcpucycles-security]: https://cpucycles.cr.yp.to/security.html + "libcpucycles security documentation" +[median]: https://en.wikipedia.org/wiki/Median + "Median" +[mean]: https://en.wikipedia.org/wiki/Arithmetic_mean + "Arithmetic mean" +[stddev]: https://en.wikipedia.org/wiki/Standard_deviation + "Standard deviation" +[odroid n2l]: https://en.odroid.se/products/odroid-n2l-4gb + "Odroid N2L" +[x86-64]: https://en.wikipedia.org/wiki/X86-64 + "64-bit x86 instruction set." +[arm64]: https://en.wikipedia.org/wiki/AArch64 + "64-bit extension to the ARM instruction set." +[avx-512]: https://en.wikipedia.org/wiki/AVX-512 + "AVX-512: 512-bit extensions to the Advanced Vector Extensions (AVX) instruction set." diff --git a/tests/bench/bench.c b/tests/bench/bench.c new file mode 100644 index 0000000..9144e91 --- /dev/null +++ b/tests/bench/bench.c @@ -0,0 +1,222 @@ +// +// Benchmark all three ML-KEM parameter sets and print summary +// statistics to standard output in CSV format. +// +// Requires libcpucycles (https://cpucycles.cr.yp.to/). +// +// Note: You may need to adjust your system configuration or run `bench` +// as root to grant libcpucycles access to the high-resolution cycle +// counter. See the following URL for details: +// +// https://cpucycles.cr.yp.to/security.html +// + +#include <stdlib.h> // exit(), qsort() +#include <stdio.h> // printf() +#include <string.h> // memcmp() +#include <math.h> // sqrt(), pow() +#include <cpucycles.h> // cpucycles() +#include "sha3.h" // sha3_*(), shake*() +#include "rand-bytes.h" // rand_bytes() + +// default number of trials +#define NUM_TRIALS 100000 + +// Random data used for key generation and encapsulation. +typedef struct { + uint8_t keygen[64], // random data for keygen() + encaps[32]; // random data for encaps() +} seeds_t; + +// Aggregate statistics for a series of tests. +typedef struct { + // min/max/median times + long long lo, hi, median; + + // mean/stddev + double mean, stddev; +} stats_t; + +static void *checked_calloc(const char *name, const size_t nmemb, const size_t size) { + // alloc keygen times + void *mem = calloc(nmemb, size); + if (!mem) { + fprintf(stderr, "%s: calloc() failed\n", name); + exit(-1); + } + return mem; +} + +// Callback for `qsort()` to sort observed times in ascending order. +static int sort_asc_cb(const void *ap, const void *bp) { + const long long *a = ap, *b = bp; + return *a - *b; +} + +// Get summary statistics of a series of test times. +static stats_t get_stats(long long * const vals, const size_t num_vals) { + stats_t stats = { 0 }; + + // sort values in ascending order (used for min, max, and median) + qsort(vals, num_vals, sizeof(long long), sort_asc_cb); + + // get low, high, and median + stats.lo = vals[0]; + stats.hi = vals[num_vals - 1]; + stats.median = vals[num_vals / 2]; + + // calculate mean + for (size_t i = 0; i < num_vals; i++) { + stats.mean += vals[i]; + } + stats.mean /= num_vals; + + // calculate standard deviation + for (size_t i = 0; i < num_vals; i++) { + stats.stddev += pow(stats.mean - vals[i], 2); + } + stats.stddev = sqrt(stats.stddev / num_vals); + + // return stats + return stats; +} + +// define xof benchmark function +#define DEF_BENCH_XOF(FN) \ + static stats_t bench_ ## FN (const size_t num_trials, const size_t src_len, const size_t dst_len) { \ + /* allocate times, src, and dst buffers */ \ + long long *times = checked_calloc(__func__, num_trials, sizeof(long long)); \ + uint8_t *src = checked_calloc(__func__, num_trials, src_len); \ + uint8_t *dst = checked_calloc(__func__, num_trials, dst_len); \ + \ + /* generate random source data */ \ + rand_bytes(src, num_trials * src_len); \ + \ + /* run trials */ \ + for (size_t i = 0; i < num_trials; i++) { \ + /* call function */ \ + const long long t0 = cpucycles(); \ + FN (src + (i * src_len), src_len, dst + (i * dst_len), dst_len); \ + const long long t1 = cpucycles() - t0; \ + \ + /* save time */ \ + times[i] = t1; \ + } \ + \ + /* generate summary stats */ \ + const stats_t stats = get_stats(times, num_trials); \ + \ + /* free buffers */ \ + free(src); \ + free(times); \ + \ + /* return summary stats */ \ + return stats; \ + } + +// define hash benchmark function +#define DEF_BENCH_HASH(FN, OUT_LEN) \ + static stats_t bench_ ## FN (const size_t num_trials, const size_t src_len) { \ + /* allocate times and src buffers */ \ + long long *times = checked_calloc(__func__, num_trials, sizeof(long long)); \ + uint8_t *src = checked_calloc(__func__, src_len, 1); \ + \ + /* run trials */ \ + for (size_t i = 0; i < num_trials; i++) { \ + /* generate random source data */ \ + rand_bytes(src, src_len); \ + \ + /* call function */ \ + uint8_t dst[OUT_LEN] = { 0 }; \ + const long long t0 = cpucycles(); \ + FN (src, src_len, dst); \ + const long long t1 = cpucycles() - t0; \ + \ + /* save time */ \ + times[i] = t1; \ + } \ + \ + /* generate summary stats */ \ + const stats_t stats = get_stats(times, num_trials); \ + \ + /* free buffers */ \ + free(src); \ + free(times); \ + \ + /* return summary stats */ \ + return stats; \ + } + +// define xof benchmarks *() +DEF_BENCH_XOF(shake128) +DEF_BENCH_XOF(shake256) + +// define hash benchmarks +DEF_BENCH_HASH(sha3_224, 28) +DEF_BENCH_HASH(sha3_256, 32) +DEF_BENCH_HASH(sha3_384, 48) +DEF_BENCH_HASH(sha3_512, 64) + +// print function stats to standard output as CSV row. +static void print_row(const char *name, const size_t src_len, const size_t dst_len, stats_t fs) { + const double median_cpb = 1.0 * fs.median / src_len, + mean_cpb = 1.0 * fs.mean / src_len; + printf("%s,%zu,%zu,%.0f,%.0f,%lld,%.0f,%.0f,%lld,%lld\n", name, dst_len, src_len, median_cpb, mean_cpb, fs.median, fs.mean, fs.stddev, fs.lo, fs.hi); +} + +#define MIN_SRC_LEN 64 +#define MAX_SRC_LEN 2048 + +#define MIN_DST_LEN 32 +#define MAX_DST_LEN 128 + +int main(int argc, char *argv[]) { + // get number of trials from first command-line argument, or fall back + // to default if no argument was provided + const size_t num_trials = (argc > 1) ? atoi(argv[1]) : NUM_TRIALS; + if (num_trials < 2) { + fprintf(stderr, "num_trials must be greater than 1\n"); + return -1; + } + + // print metadata to stderr + fprintf(stderr,"info: cpucycles: version=%s implementation=%s persecond=%lld\ninfo: num_trials=%zu\n", cpucycles_version(), cpucycles_implementation(), cpucycles_persecond(), num_trials); + + // print column headers to stdout + printf("function,dst,src,median_cpb,mean_cpb,median_cycles,mean_cycles,stddev_cycles,min_cycles,max_cycles\n"); + + // sha3-224 + for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) { + print_row("sha3_224", src_len, 28, bench_sha3_224(num_trials, src_len)); + } + + // sha3-256 + for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) { + print_row("sha3_256", src_len, 32, bench_sha3_256(num_trials, src_len)); + } + + // sha3-384 + for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) { + print_row("sha3_384", src_len, 48, bench_sha3_384(num_trials, src_len)); + } + + // sha3-512 + for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) { + print_row("sha3_512", src_len, 64, bench_sha3_512(num_trials, src_len)); + } + + for (size_t dst_len = MIN_DST_LEN; dst_len < MAX_DST_LEN; dst_len <<= 1) { + // shake128 + for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) { + print_row("shake128", src_len, dst_len, bench_shake128(num_trials, src_len, dst_len)); + } + + // shake256 + for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) { + print_row("shake256", src_len, dst_len, bench_shake256(num_trials, src_len, dst_len)); + } + } + + // return success + return 0; +} diff --git a/tests/bench/rand-bytes.h b/tests/bench/rand-bytes.h new file mode 120000 index 0000000..421eaa6 --- /dev/null +++ b/tests/bench/rand-bytes.h @@ -0,0 +1 @@ +../../rand-bytes.h
\ No newline at end of file diff --git a/tests/bench/sha3.c b/tests/bench/sha3.c new file mode 120000 index 0000000..4748193 --- /dev/null +++ b/tests/bench/sha3.c @@ -0,0 +1 @@ +../../sha3.c
\ No newline at end of file diff --git a/tests/bench/sha3.h b/tests/bench/sha3.h new file mode 120000 index 0000000..b7c53d4 --- /dev/null +++ b/tests/bench/sha3.h @@ -0,0 +1 @@ +../../sha3.h
\ No newline at end of file |