aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tests/bench/README.md41
-rw-r--r--tests/bench/bench.c181
2 files changed, 129 insertions, 93 deletions
diff --git a/tests/bench/README.md b/tests/bench/README.md
index 0d792e9..c2c3b73 100644
--- a/tests/bench/README.md
+++ b/tests/bench/README.md
@@ -1,21 +1,21 @@
# bench
-Benchmark hash and XOF speed and then print summary statistics to
-standard output in [CSV][] format and metadata to standard error.
+Benchmark [hash][] functions and [XOFs][xof], then print metadata to
+standard error and print a table of [median][] [cycles per byte (cpb)][]
+for each function and input message length to standard output in [CSV][]
+format.
Requires [libcpucycles][].
The columns of the [CSV][] printed to standard output are as follows:
* `function`: Function name.
-* `dst`: Output length, in bytes.
-* `src`: Input length, in bytes.
-* `median_cpb`: [Median][] [cycles per byte][] (e.g. `median_cycles/src`).
-* `median_cycles`: [Median][] of observed CPU cycles.
-* `mean_cycles`: [Arithmetic mean][mean] of observed CPU cycles.
-* `stddev_cycles`: [Standard deviation][stddev] of observed CPU cycles.
-* `min_cycles`: Minimum observed CPU cycles.
-* `max_cycles`: Maximum observed CPU cycles.
+* `dst_len`: Output digest length, in bytes.
+* `64`: [Median][] [cycles per byte (cpb)][] for a 64 byte input message.
+* `256`: [Median][] [cycles per byte (cpb)][] for a 256 byte input message.
+* `1024`: [Median][] [cycles per byte (cpb)][] for a 1024 byte input message.
+* `4096`: [Median][] [cycles per byte (cpb)][] for a 4096 byte input message.
+* `16384`: [Median][] [cycles per byte (cpb)][] for a 16384 byte input message.
The metadata printed to standard error is as follows:
@@ -23,6 +23,9 @@ The metadata printed to standard error is as follows:
* `implementation`: [libcpucycles][] backend as reported by `cpucycles_implementation()`
* `persecond`: CPU cycles per second, as reported by `cpucycles_persecond()`
* `num_trials`: Number of trials.
+* `src_lens`: Comma-delimited list of input messages lengths, in bytes.
+* `dst_lens`: Comma-delimited list of output digest lengths, in bytes
+ (only used for [XOFs][]).
## Build
@@ -62,11 +65,15 @@ gcc (Debian 12.2.0-14) 12.2.0
# benchmark with 100k trials
> ./bench
-TODO...
-
-# benchmark with 1M trials
-> ./bench 1000000
-TODO...
+info: cpucycles: version=20240318 implementation=amd64-pmc persecond=4800000000
+info: num_trials=100000 src_lens=64,256,1024,4096,16384 dst_lens=32
+function,dst_len,64,256,1024,4096,16384
+sha3_224,28,15.4,7.8,7.8,7.1,7.0
+sha3_256,32,15.4,7.8,7.8,7.6,7.4
+sha3_384,48,15.5,11.7,9.8,9.8,9.7
+sha3_512,64,15.4,15.5,14.6,13.9,13.9
+shake128,32,15.5,7.8,6.9,6.2,6.1
+shake256,32,15.6,7.8,7.9,7.6,7.4
```
### Odroid N2L (Cortex-A73)
@@ -110,3 +117,7 @@ TODO...
"AVX-512: 512-bit extensions to the Advanced Vector Extensions (AVX) instruction set."
[cycles per byte]: https://en.wikipedia.org/wiki/Encryption_software#Performance
"Observed CPU cycles divided by the number of input bytes."
+[xof]: https://en.wikipedia.org/wiki/Extendable-output_function
+ "Extendable-Output Function (XOF)"
+[hash]: https://en.wikipedia.org/wiki/Cryptographic_hash_function
+ "Cryptographic hash function"
diff --git a/tests/bench/bench.c b/tests/bench/bench.c
index b147ad5..cd03205 100644
--- a/tests/bench/bench.c
+++ b/tests/bench/bench.c
@@ -1,6 +1,8 @@
//
-// Benchmark all three ML-KEM parameter sets and print summary
-// statistics to standard output in CSV format.
+// Benchmark hash functions and extendable output functions (XOFs), then
+// print metadata to standard error and print a table of median cycles
+// per byte (cpb) for each function and input message length to standard
+// output in CSV format.
//
// Requires libcpucycles (https://cpucycles.cr.yp.to/).
//
@@ -22,19 +24,32 @@
// default number of trials
#define NUM_TRIALS 100000
-// Random data used for key generation and encapsulation.
-typedef struct {
- uint8_t keygen[64], // random data for keygen()
- encaps[32]; // random data for encaps()
-} seeds_t;
+// input sizes (used for hashes and xofs)
+static const size_t SRC_LENS[] = { 64, 256, 1024, 4096, 16384 };
+#define NUM_SRC_LENS (sizeof(SRC_LENS)/sizeof(SRC_LENS[0]))
+
+// output sizes (used for xofs)
+static const size_t DST_LENS[] = { 32 };
+#define NUM_DST_LENS (sizeof(DST_LENS)/sizeof(DST_LENS[0]))
+
+// get maximum source length
+static size_t get_max_src_len(void) {
+ size_t r = 0;
+
+ for (size_t i = 0; i < NUM_SRC_LENS; i++) {
+ r = (SRC_LENS[i] > r) ? SRC_LENS[i] : r;
+ }
+
+ return r;
+}
// Aggregate statistics for a series of tests.
typedef struct {
// min/max/median times
long long lo, hi, median;
- // mean/stddev
- double mean, stddev;
+ // mean/stddev, median_cpb
+ double mean, stddev, median_cpb;
} stats_t;
static void *checked_calloc(const char *name, const size_t nmemb, const size_t size) {
@@ -54,7 +69,7 @@ static int sort_asc_cb(const void *ap, const void *bp) {
}
// Get summary statistics of a series of test times.
-static stats_t get_stats(long long * const vals, const size_t num_vals) {
+static stats_t get_stats(long long * const vals, const size_t num_vals, const size_t len) {
stats_t stats = { 0 };
// sort values in ascending order (used for min, max, and median)
@@ -65,6 +80,9 @@ static stats_t get_stats(long long * const vals, const size_t num_vals) {
stats.hi = vals[num_vals - 1];
stats.median = vals[num_vals / 2];
+ // calculate median cpb
+ stats.median_cpb = 1.0 * stats.median / len;
+
// calculate mean
for (size_t i = 0; i < num_vals; i++) {
stats.mean += vals[i];
@@ -83,70 +101,72 @@ static stats_t get_stats(long long * const vals, const size_t num_vals) {
// define xof benchmark function
#define DEF_BENCH_XOF(FN) \
- static stats_t bench_ ## FN (const size_t num_trials, const size_t src_len, const size_t dst_len) { \
+ static void bench_ ## FN (double * const cpbs, const size_t num_trials, const size_t dst_len) { \
/* allocate times, src, and dst buffers */ \
long long *times = checked_calloc(__func__, num_trials, sizeof(long long)); \
- uint8_t *src = checked_calloc(__func__, 1, src_len); \
+ uint8_t *src = checked_calloc(__func__, 1, get_max_src_len()); \
uint8_t *dst = checked_calloc(__func__, num_trials, dst_len); \
\
- /* run trials */ \
- for (size_t i = 0; i < num_trials; i++) { \
- /* generate random source data */ \
- rand_bytes(src, src_len); \
+ for (size_t i = 0; i < NUM_SRC_LENS; i++) { \
+ const size_t src_len = SRC_LENS[i]; /* get source length */ \
\
- /* call function */ \
- const long long t0 = cpucycles(); \
- FN (src, src_len, dst + (i * dst_len), dst_len); \
- const long long t1 = cpucycles() - t0; \
+ /* run trials */ \
+ for (size_t j = 0; j < num_trials; j++) { \
+ /* generate random source data */ \
+ rand_bytes(src, src_len); \
+ \
+ /* call function */ \
+ const long long t0 = cpucycles(); \
+ FN (src, src_len, dst + (j * dst_len), dst_len); \
+ const long long t1 = cpucycles() - t0; \
+ \
+ /* save time */ \
+ times[j] = t1; \
+ } \
\
- /* save time */ \
- times[i] = t1; \
+ /* generate summary stats, save cpb */ \
+ cpbs[i] = 1.0 * get_stats(times, num_trials, src_len).median_cpb; \
} \
\
- /* generate summary stats */ \
- const stats_t stats = get_stats(times, num_trials); \
- \
/* free buffers */ \
free(times); \
free(src); \
free(dst); \
- \
- /* return summary stats */ \
- return stats; \
}
// define hash benchmark function
#define DEF_BENCH_HASH(FN, OUT_LEN) \
- static stats_t bench_ ## FN (const size_t num_trials, const size_t src_len) { \
+ static void bench_ ## FN (double * const cpbs, const size_t num_trials) { \
/* allocate times, src, and dst buffers */ \
long long *times = checked_calloc(__func__, num_trials, sizeof(long long)); \
- uint8_t *src = checked_calloc(__func__, 1, src_len); \
+ uint8_t *src = checked_calloc(__func__, 1, get_max_src_len()); \
uint8_t *dst = checked_calloc(__func__, num_trials, OUT_LEN); \
\
- /* run trials */ \
- for (size_t i = 0; i < num_trials; i++) { \
- /* generate random source data */ \
- rand_bytes(src, src_len); \
+ for (size_t i = 0; i < NUM_SRC_LENS; i++) { \
+ const size_t src_len = SRC_LENS[i]; /* get source length */ \
\
- /* call function */ \
- const long long t0 = cpucycles(); \
- FN (src, src_len, dst + (i * OUT_LEN)); \
- const long long t1 = cpucycles() - t0; \
+ /* run trials */ \
+ for (size_t j = 0; j < num_trials; j++) { \
+ /* generate random source data */ \
+ rand_bytes(src, src_len); \
+ \
+ /* call function */ \
+ const long long t0 = cpucycles(); \
+ FN (src, src_len, dst + (j * OUT_LEN)); \
+ const long long t1 = cpucycles() - t0; \
+ \
+ /* save time */ \
+ times[j] = t1; \
+ } \
\
- /* save time */ \
- times[i] = t1; \
+ /* generate summary stats, save cpb */ \
+ cpbs[i] = 1.0 * get_stats(times, num_trials, src_len).median_cpb; \
} \
\
- /* generate summary stats */ \
- const stats_t stats = get_stats(times, num_trials); \
- \
/* free buffers */ \
free(times); \
free(src); \
free(dst); \
- \
- /* return summary stats */ \
- return stats; \
}
// define xof benchmarks
@@ -160,20 +180,17 @@ DEF_BENCH_HASH(sha3_384, 48)
DEF_BENCH_HASH(sha3_512, 64)
// print function stats to standard output as CSV row.
-static void print_row(const char *name, const size_t src_len, const size_t dst_len, stats_t fs) {
- const double median_cpb = 1.0 * fs.median / src_len;
- printf("%s,%zu,%zu,%.1f,%lld,%.0f,%.0f,%lld,%lld\n", name, dst_len, src_len, median_cpb, fs.median, fs.mean, fs.stddev, fs.lo, fs.hi);
+static void print_row(const char *name, const size_t dst_len, double * const cpbs) {
+ printf("%s,%zu", name, dst_len);
+ for (size_t i = 0; i < NUM_SRC_LENS; i++) {
+ printf(",%.1f", cpbs[i]);
+ }
+ fputs("\n", stdout);
}
-// input sizes (used for hashes and xofs)
-#define MIN_SRC_LEN (1<<6) // minimum source length (inclusive)
-#define MAX_SRC_LEN (1<<14) // maximum source length (exclusive)
-
-// output sizes (used for xofs)
-#define MIN_DST_LEN (1<<5) // minimum source length (inclusive)
-#define MAX_DST_LEN (1<<7) // maximum source length (exclusive)
-
int main(int argc, char *argv[]) {
+ double cpbs[NUM_SRC_LENS];
+
// get number of trials from first command-line argument, or fall back
// to default if no argument was provided
const size_t num_trials = (argc > 1) ? atoi(argv[1]) : NUM_TRIALS;
@@ -183,42 +200,50 @@ int main(int argc, char *argv[]) {
}
// print metadata to stderr
- fprintf(stderr,"info: cpucycles: version=%s implementation=%s persecond=%lld\ninfo: num_trials=%zu\n", cpucycles_version(), cpucycles_implementation(), cpucycles_persecond(), num_trials);
+ fprintf(stderr,"info: cpucycles: version=%s implementation=%s persecond=%lld\ninfo: num_trials=%zu src_lens", cpucycles_version(), cpucycles_implementation(), cpucycles_persecond(), num_trials);
+ for (size_t i = 0; i < NUM_SRC_LENS; i++) {
+ fprintf(stderr, "%s%zu", (i > 0) ? "," : "=", SRC_LENS[i]);
+ }
+ fputs(" dst_lens", stderr);
+ for (size_t i = 0; i < NUM_DST_LENS; i++) {
+ fprintf(stderr, "%s%zu", (i > 0) ? "," : "=", DST_LENS[i]);
+ }
+ fputs("\n", stderr);
// print column headers to stdout
- printf("function,dst,src,median_cpb,median_cycles,mean_cycles,stddev_cycles,min_cycles,max_cycles\n");
+ fputs("function,dst_len", stdout);
+ for (size_t i = 0; i < NUM_SRC_LENS; i++) {
+ printf(",%zu", SRC_LENS[i]);
+ }
+ fputs("\n", stdout);
// sha3-224
- for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
- print_row("sha3_224", src_len, 28, bench_sha3_224(num_trials, src_len));
- }
+ bench_sha3_224(cpbs, num_trials);
+ print_row("sha3_224", 28, cpbs);
// sha3-256
- for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
- print_row("sha3_256", src_len, 32, bench_sha3_256(num_trials, src_len));
- }
+ bench_sha3_256(cpbs, num_trials);
+ print_row("sha3_256", 32, cpbs);
// sha3-384
- for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
- print_row("sha3_384", src_len, 48, bench_sha3_384(num_trials, src_len));
- }
+ bench_sha3_384(cpbs, num_trials);
+ print_row("sha3_384", 48, cpbs);
// sha3-512
- for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
- print_row("sha3_512", src_len, 64, bench_sha3_512(num_trials, src_len));
- }
+ bench_sha3_512(cpbs, num_trials);
+ print_row("sha3_512", 64, cpbs);
// test xofs
- for (size_t dst_len = MIN_DST_LEN; dst_len < MAX_DST_LEN; dst_len <<= 1) {
+ for (size_t i = 0; i < NUM_DST_LENS; i++) {
+ const size_t dst_len = DST_LENS[i];
+
// shake128
- for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
- print_row("shake128", src_len, dst_len, bench_shake128(num_trials, src_len, dst_len));
- }
+ bench_shake128(cpbs, num_trials, dst_len);
+ print_row("shake128", dst_len, cpbs);
// shake256
- for (size_t src_len = MIN_SRC_LEN; src_len < MAX_SRC_LEN; src_len <<= 1) {
- print_row("shake256", src_len, dst_len, bench_shake256(num_trials, src_len, dst_len));
- }
+ bench_shake256(cpbs, num_trials, dst_len);
+ print_row("shake256", dst_len, cpbs);
}
// return success