aboutsummaryrefslogtreecommitdiff
path: root/tests/bench/bench.c
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2024-04-29 16:49:42 -0400
committerPaul Duncan <pabs@pablotron.org>2024-04-29 16:49:42 -0400
commitfc38ab69ff049f0a87d58d185f9931b8d0d6b5e4 (patch)
tree5c186f85ec25d799fba824e4d2e78a2c53d7c1b3 /tests/bench/bench.c
parent6dc9ea81cb7a2ae5117f0a9cb4af7ccf7aaf97d0 (diff)
downloadsha3-fc38ab69ff049f0a87d58d185f9931b8d0d6b5e4.tar.bz2
sha3-fc38ab69ff049f0a87d58d185f9931b8d0d6b5e4.zip
tests/bench: refactor so bench prints a cpb table to stdout
Diffstat (limited to 'tests/bench/bench.c')
-rw-r--r--tests/bench/bench.c181
1 files changed, 103 insertions, 78 deletions
diff --git a/tests/bench/bench.c b/tests/bench/bench.c
index b147ad5..cd03205 100644
--- a/tests/bench/bench.c
+++ b/tests/bench/bench.c
@@ -1,6 +1,8 @@
//
-// Benchmark all three ML-KEM parameter sets and print summary
-// statistics to standard output in CSV format.
+// Benchmark hash functions and extendable output functions (XOFs), then
+// print metadata to standard error and print a table of median cycles
+// per byte (cpb) for each function and input message length to standard
+// output in CSV format.
//
// Requires libcpucycles (https://cpucycles.cr.yp.to/).
//
@@ -22,19 +24,32 @@
// default number of trials
#define NUM_TRIALS 100000
-// Random data used for key generation and encapsulation.
-typedef struct {
- uint8_t keygen[64], // random data for keygen()
- encaps[32]; // random data for encaps()
-} seeds_t;
+// input sizes (used for hashes and xofs)
+static const size_t SRC_LENS[] = { 64, 256, 1024, 4096, 16384 };
+#define NUM_SRC_LENS (sizeof(SRC_LENS)/sizeof(SRC_LENS[0]))
+
+// output sizes (used for xofs)
+static const size_t DST_LENS[] = { 32 };
+#define NUM_DST_LENS (sizeof(DST_LENS)/sizeof(DST_LENS[0]))
+
+// get maximum source length
+static size_t get_max_src_len(void) {
+ size_t r = 0;
+
+ for (size_t i = 0; i < NUM_SRC_LENS; i++) {
+ r = (SRC_LENS[i] > r) ? SRC_LENS[i] : r;
+ }
+
+ return r;
+}
// Aggregate statistics for a series of tests.
typedef struct {
// min/max/median times
long long lo, hi, median;
- // mean/stddev
- double mean, stddev;
+ // mean/stddev, median_cpb
+ double mean, stddev, median_cpb;
} stats_t;
static void *checked_calloc(const char *name, const size_t nmemb, const size_t size) {
@@ -54,7 +69,7 @@ static int sort_asc_cb(const void *ap, const void *bp) {
}
// Get summary statistics of a series of test times.
-static stats_t get_stats(long long * const vals, const size_t num_vals) {
+static stats_t get_stats(long long * const vals, const size_t num_vals, const size_t len) {
stats_t stats = { 0 };
// sort values in ascending order (used for min, max, and median)
@@ -65,6 +80,9 @@ static stats_t get_stats(long long * const vals, const size_t num_vals) {
stats.hi = vals[num_vals - 1];
stats.median = vals[num_vals / 2];
+ // calculate median cpb
+ stats.median_cpb = 1.0 * stats.median / len;
+
// calculate mean
for (size_t i = 0; i < num_vals; i++) {
stats.mean += vals[i];
@@ -83,70 +101,72 @@ static stats_t get_stats(long long * const vals, const size_t num_vals) {
// define xof benchmark function
#define DEF_BENCH_XOF(FN) \
- static stats_t bench_ ## FN (const size_t num_trials, const size_t src_len, const size_t dst_len) { \
+ static void bench_ ## FN (double * const cpbs, const size_t num_trials, const size_t dst_len) { \
/* allocate times, src, and dst buffers */ \
long long *times = checked_calloc(__func__, num_trials, sizeof(long long)); \
- uint8_t *src = checked_calloc(__func__, 1, src_len); \
+ uint8_t *src = checked_calloc(__func__, 1, get_max_src_len()); \
uint8_t *dst = checked_calloc(__func__, num_trials, dst_len); \
\
- /* run trials */ \
- for (size_t i = 0; i < num_trials; i++) { \
- /* generate random source data */ \
- rand_bytes(src, src_len); \
+ for (size_t i = 0; i < NUM_SRC_LENS; i++) { \
+ const size_t src_len = SRC_LENS[i]; /* get source length */ \
\
- /* call function */ \
- const long long t0 = cpucycles(); \
- FN (src, src_len, dst + (i * dst_len), dst_len); \
- const long long t1 = cpucycles() - t0; \
+ /* run trials */ \
+ for (size_t j = 0; j < num_trials; j++) { \
+ /* generate random source data */ \
+ rand_bytes(src, src_len); \
+ \
+ /* call function */ \
+ const long long t0 = cpucycles(); \
+ FN (src, src_len, dst + (j * dst_len), dst_len); \
+ const long long t1 = cpucycles() - t0; \
+ \
+ /* save time */ \
+ times[j] = t1; \
+ } \
\
- /* save time */ \
- times[i] = t1; \
+ /* generate summary stats, save cpb */ \
+ cpbs[i] = 1.0 * get_stats(times, num_trials, src_len).median_cpb; \
} \
\
- /* generate summary stats */ \
- const stats_t stats = get_stats(times, num_trials); \
- \
/* free buffers */ \
free(times); \
free(src); \
free(dst); \
- \
- /* return summary stats */ \
- return stats; \
}
// define hash benchmark function
#define DEF_BENCH_HASH(FN, OUT_LEN) \
- static stats_t bench_ ## FN (const size_t num_trials, const size_t src_len) { \
+ static void bench_ ## FN (double * const cpbs, const size_t num_trials) { \
/* allocate times, src, and dst buffers */ \
long long *times = checked_calloc(__func__, num_trials, sizeof(long long)); \
- uint8_t *src = checked_calloc(__func__, 1, src_len); \
+ uint8_t *src = checked_calloc(__func__, 1, get_max_src_len()); \
uint8_t *dst = checked_calloc(__func__, num_trials, OUT_LEN); \
\
- /* run trials */ \
- for (size_t i = 0; i < num_trials; i++) { \
- /* generate random source data */ \
- rand_bytes(src, src_len); \
+ for (size_t i = 0; i < NUM_SRC_LENS; i++) { \
+ const size_t src_len = SRC_LENS[i]; /* get source length */ \
\
- /* call function */ \
- const long long t0 = cpucycles(); \
- FN (src, src_len, dst + (i * OUT_LEN)); \
- const long long t1 = cpucycles() - t0; \
+ /* run trials */ \
+ for (size_t j = 0; j < num_trials; j++) { \
+ /* generate random source data */ \
+ rand_bytes(src, src_len); \
+ \
+ /* call function */ \
+ const long long t0 = cpucycles(); \
+ FN (src, src_len, dst + (j * OUT_LEN)); \
+ const long long t1 = cpucycles() - t0; \
+ \
+ /* save time */ \
+ times[j] = t1; \
+ } \
\
- /* save time */ \
- times[i] = t1; \
+ /* generate summary stats, save cpb */ \
+ cpbs[i] = 1.0 * get_stats(times, num_trials, src_len).median_cpb; \
} \
\
- /* generate summary stats */ \
- const stats_t stats = get_stats(times, num_trials); \
- \
/* free buffers */ \
free(times); \
free(src); \
free(dst); \
- \
- /* return summary stats */ \
- return stats; \
}
// define xof benchmarks
@@ -160,20 +180,17 @@ DEF_BENCH_HASH(sha3_384, 48)
DEF_BENCH_HASH(sha3_512, 64)
// print function stats to standard output as CSV row.
-static void print_row(const char *name, const size_t src_len, const size_t dst_len, stats_t fs) {
- const double median_cpb = 1.0 * fs.median / src_len;
- printf("%s,%zu,%zu,%.1f,%lld,%.0f,%.0f,%lld,%lld\n", name, dst_len, src_len, median_cpb, fs.median, fs.mean, fs.stddev, fs.lo, fs.hi);
+static void print_row(const char *name, const size_t dst_len, double * const cpbs) {
+ printf("%s,%zu", name, dst_len);
+ for (size_t i = 0; i < NUM_SRC_LENS; i++) {
+ printf(",%.1f", cpbs[i]);
+ }
+ fputs("\n", stdout);
}
-// input sizes (used for hashes and xofs)
-#define MIN_SRC_LEN (1<<6) // minimum source length (inclusive)
-#define MAX_SRC_LEN (1<<14) // maximum source length (exclusive)
-
-// output sizes (used for xofs)
-#define MIN_DST_LEN (1<<5) // minimum source length (inclusive)
-#define MAX_DST_LEN (1<<7) // maximum source length (exclusive)
-
int main(int argc, char *argv[]) {
+ double cpbs[NUM_SRC_LENS];
+
// get number of trials from first command-line argument, or fall back
// to default if no argument was provided
const size_t num_trials = (argc > 1) ? atoi(argv[1]) : NUM_TRIALS;
@@ -183,42 +200,50 @@ int main(int argc, char *argv[]) {
}
// print metadata to stderr
- fprintf(stderr,"info: cpucycles: version=%s implementation=%s persecond=%lld\ninfo: num_trials=%zu\n", cpucycles_version(), cpucycles_implementation(), cpucycles_persecond(), num_trials);
+ fprintf(stderr,"info: cpucycles: version=%s implementation=%s persecond=%lld\ninfo: num_trials=%zu src_lens", cpucycles_version(), cpucycles_implementation(), cpucycles_persecond(), num_trials);
+ for (size_t i = 0; i < NUM_SRC_LENS; i++) {
+ fprintf(stderr, "%s%zu", (i > 0) ? "," : "=", SRC_LENS[i]);
+ }
+ fputs(" dst_lens", stderr);
+ for (size_t i = 0; i < NUM_DST_LENS; i++) {
+ fprintf(stderr, "%s%zu", (i > 0) ? "," : "=", DST_LENS[i]);
+ }
+ fputs("\n", stderr);
// print column headers to stdout
- printf("function,dst,src,median_cpb,median_cycles,mean_cycles,stddev_cycles,min_cycles,max_cycles\n");
+ fputs("function,dst_len", stdout);
+ for (size_t i = 0; i < NUM_SRC_LENS; i++) {
+ printf(",%zu", SRC_LENS[i]);
+ }
+ fputs("\n", stdout);
// sha3-224
- for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
- print_row("sha3_224", src_len, 28, bench_sha3_224(num_trials, src_len));
- }
+ bench_sha3_224(cpbs, num_trials);
+ print_row("sha3_224", 28, cpbs);
// sha3-256
- for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
- print_row("sha3_256", src_len, 32, bench_sha3_256(num_trials, src_len));
- }
+ bench_sha3_256(cpbs, num_trials);
+ print_row("sha3_256", 32, cpbs);
// sha3-384
- for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
- print_row("sha3_384", src_len, 48, bench_sha3_384(num_trials, src_len));
- }
+ bench_sha3_384(cpbs, num_trials);
+ print_row("sha3_384", 48, cpbs);
// sha3-512
- for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
- print_row("sha3_512", src_len, 64, bench_sha3_512(num_trials, src_len));
- }
+ bench_sha3_512(cpbs, num_trials);
+ print_row("sha3_512", 64, cpbs);
// test xofs
- for (size_t dst_len = MIN_DST_LEN; dst_len < MAX_DST_LEN; dst_len <<= 1) {
+ for (size_t i = 0; i < NUM_DST_LENS; i++) {
+ const size_t dst_len = DST_LENS[i];
+
// shake128
- for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
- print_row("shake128", src_len, dst_len, bench_shake128(num_trials, src_len, dst_len));
- }
+ bench_shake128(cpbs, num_trials, dst_len);
+ print_row("shake128", dst_len, cpbs);
// shake256
- for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
- print_row("shake256", src_len, dst_len, bench_shake256(num_trials, src_len, dst_len));
- }
+ bench_shake256(cpbs, num_trials, dst_len);
+ print_row("shake256", dst_len, cpbs);
}
// return success