aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2019-07-16 20:24:47 -0400
committerPaul Duncan <pabs@pablotron.org>2019-07-16 20:24:47 -0400
commitae8484372cbbf0bad041f3bd9b11c18d9cf37be7 (patch)
treebd73b1e0ee6fb9e3b98c59cdd9de1689ffad93b8
parentd245b30a36301ec7344aa193f29d4244e5511452 (diff)
downloadsha2-ae8484372cbbf0bad041f3bd9b11c18d9cf37be7.tar.bz2
sha2-ae8484372cbbf0bad041f3bd9b11c18d9cf37be7.zip
make faster
-rw-r--r--Makefile2
-rw-r--r--main.c30
-rw-r--r--sha256.c169
3 files changed, 149 insertions, 52 deletions
diff --git a/Makefile b/Makefile
index 60f10dc..25e3b41 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-CFLAGS=-W -Wall -Wextra -pedantic -O3 -std=c11
+CFLAGS=-std=c11 -W -Wall -Wextra -pedantic -O3 -march=native
OBJS=sha256.o main.o tests.o
APP=sha256
diff --git a/main.c b/main.c
index 3540a32..7c795ef 100644
--- a/main.c
+++ b/main.c
@@ -3,8 +3,6 @@
#include "sha256.h"
#include "tests.h"
-static uint8_t dst[SHA256_HASH_SIZE];
-
static void print_hash(const uint8_t * const hash) {
for (size_t i = 0; i < SHA256_HASH_SIZE; i++) {
printf("%02x", hash[i]);
@@ -32,16 +30,34 @@ static void on_test_fail(
printf("\n");
}
+static uint8_t dst[SHA256_HASH_SIZE];
+static uint8_t buf[1 << 20];
+
int main(int argc, char *argv[]) {
if (argc > 1) {
- // if command-line parameters are given, then hash and print them
- // instead of running the test vectors
+ // if command-line parameters are given, then treat them as a
+ // list of files: open each file, hash it, and and print the
+ // result instead of running the test vectors
for (int i = 1; i < argc; i++) {
- const char * const src = argv[i];
+ sha256_t ctx;
+ sha256_init(&ctx);
+
+ FILE *fh = fopen(argv[i], "rb");
+ if (!fh) {
+ fprintf(stderr, "fopen(\"%s\") failed", argv[i]);
+ return 1;
+ }
+
+ size_t len = 0;
+ while ((len = fread(buf, 1, sizeof(buf), fh)) > 0) {
+ sha256_push(&ctx, buf, len);
+ }
+
+ fclose(fh);
- sha256((const uint8_t *) src, strlen(src), dst);
- print_row(src, dst);
+ sha256_fini(&ctx, dst);
+ print_row(argv[i], dst);
}
} else {
// no command-line parameters given. run internal tests
diff --git a/sha256.c b/sha256.c
index 8249a0c..1391466 100644
--- a/sha256.c
+++ b/sha256.c
@@ -41,22 +41,50 @@ void sha256_init(sha256_t * const ctx) {
memcpy(ctx->h, H, sizeof(H));
}
-// decode buffer data as 32-bit words (used for the first 16 words)
-#define WI(ctx, i) ( \
- (((uint32_t) (ctx)->buf[4 * (i) + 0]) << 24) | \
- (((uint32_t) (ctx)->buf[4 * (i) + 1]) << 16) | \
- (((uint32_t) (ctx)->buf[4 * (i) + 2]) << 8) | \
- ((uint32_t) (ctx)->buf[4 * (i) + 3]) \
+// WI: decode buffer data as 32-bit words (used for the first 16 words)
+#define WI(i) ( \
+ (((uint32_t) ctx->buf[4 * (i) + 0]) << 24) | \
+ (((uint32_t) ctx->buf[4 * (i) + 1]) << 16) | \
+ (((uint32_t) ctx->buf[4 * (i) + 2]) << 8) | \
+ ((uint32_t) ctx->buf[4 * (i) + 3]) \
)
+// WE: expand first 16 buffer words into remaining 48 words
+#define WE(i) do { \
+ const uint32_t w2 = w[(i) - 2], \
+ w7 = w[(i) - 7], \
+ w15 = w[(i) - 15], \
+ w16 = w[(i) - 16], \
+ s0 = rr(w15, 7) ^ rr(w15, 18) ^ (w15 >> 3), \
+ s1 = rr(w2, 17) ^ rr(w2, 19) ^ (w2 >> 10); \
+ w[i] = w16 + s0 + w7 + s1; \
+} while (0)
+
+// WC: compress word
+#define WC(i) do { \
+ const uint32_t s1 = rr(hs[4], 6) ^ rr(hs[4], 11) ^ rr(hs[4], 25), \
+ ch = (hs[4] & hs[5]) ^ ((~(hs[4])) & hs[6]), \
+ t0 = hs[7] + s1 + ch + K[i] + w[i], \
+ s0 = rr(hs[0], 2) ^ rr(hs[0], 13) ^ rr(hs[0], 22), \
+ mj = (hs[0] & hs[1]) ^ (hs[0] & hs[2]) ^ (hs[1] & hs[2]), \
+ t1 = s0 + mj; \
+\
+ hs[7] = hs[6]; \
+ hs[6] = hs[5]; \
+ hs[5] = hs[4]; \
+ hs[4] = hs[3] + t0; \
+ hs[3] = hs[2]; \
+ hs[2] = hs[1]; \
+ hs[1] = hs[0]; \
+ hs[0] = t0 + t1; \
+} while (0)
+
static void
sha256_block(sha256_t * const ctx) {
// init first 16 words from buffer
uint32_t w[64] = {
- WI(ctx, 0), WI(ctx, 1), WI(ctx, 2), WI(ctx, 3),
- WI(ctx, 4), WI(ctx, 5), WI(ctx, 6), WI(ctx, 7),
- WI(ctx, 8), WI(ctx, 9), WI(ctx, 10), WI(ctx, 11),
- WI(ctx, 12), WI(ctx, 13), WI(ctx, 14), WI(ctx, 15),
+ WI(0), WI(1), WI(2), WI(3), WI(4), WI(5), WI(6), WI(7),
+ WI(8), WI(9), WI(10), WI(11), WI(12), WI(13), WI(14), WI(15),
0,
};
@@ -67,14 +95,33 @@ sha256_block(sha256_t * const ctx) {
// s0 := (w[i-15] rr 7) xor (w[i-15] rr 18) xor (w[i-15] rs 3)
// s1 := (w[i- 2] rr 17) xor (w[i- 2] rr 19) xor (w[i- 2] rs 10)
// w[i] := w[i-16] + s0 + w[i-7] + s1
+ //
+ // for (size_t i = 16; i < 64; i++) {
+ // const uint32_t w2 = w[i - 2],
+ // w7 = w[i - 7],
+ // w15 = w[i - 15],
+ // w16 = w[i - 16],
+ // s0 = rr(w15, 7) ^ rr(w15, 18) ^ (w15 >> 3),
+ // s1 = rr(w2, 17) ^ rr(w2, 19) ^ (w2 >> 10);
+ // w[i] = w16 + s0 + w7 + s1;
+ // }
+ //
+ // // fully unrolled version:
+ // WE(24); WE(25); WE(26); WE(27); WE(28); WE(29); WE(30); WE(31);
+ // WE(32); WE(33); WE(34); WE(35); WE(36); WE(37); WE(38); WE(39);
+ // WE(40); WE(41); WE(42); WE(43); WE(44); WE(45); WE(46); WE(47);
+ // WE(48); WE(49); WE(50); WE(51); WE(52); WE(53); WE(54); WE(55);
+ // WE(56); WE(57); WE(58); WE(59); WE(60); WE(61); WE(62); WE(63);
+ //
+ // partially unrolled:
+ // for (size_t we_i = 16; we_i < 64; we_i += 16) {
+ // WE(we_i + 0); WE(we_i + 1); WE(we_i + 2); WE(we_i + 3);
+ // WE(we_i + 4); WE(we_i + 5); WE(we_i + 6); WE(we_i + 7);
+ // WE(we_i + 8); WE(we_i + 9); WE(we_i + 10); WE(we_i + 11);
+ // WE(we_i + 12); WE(we_i + 13); WE(we_i + 14); WE(we_i + 15);
+ // }
for (size_t i = 16; i < 64; i++) {
- const uint32_t w2 = w[i - 2],
- w7 = w[i - 7],
- w15 = w[i - 15],
- w16 = w[i - 16],
- s0 = rr(w15, 7) ^ rr(w15, 18) ^ (w15 >> 3),
- s1 = rr(w2, 17) ^ rr(w2, 19) ^ (w2 >> 10);
- w[i] = w16 + s0 + w7 + s1;
+ WE(i);
}
// Initialize working variables to current hash value
@@ -101,22 +148,41 @@ sha256_block(sha256_t * const ctx) {
// c := b
// b := a
// a := temp1 + temp2
- for (size_t i = 0; i < 64; i++) {
- const uint32_t s1 = rr(hs[4], 6) ^ rr(hs[4], 11) ^ rr(hs[4], 25),
- ch = (hs[4] & hs[5]) ^ ((~(hs[4])) & hs[6]),
- t0 = hs[7] + s1 + ch + K[i] + w[i],
- s0 = rr(hs[0], 2) ^ rr(hs[0], 13) ^ rr(hs[0], 22),
- mj = (hs[0] & hs[1]) ^ (hs[0] & hs[2]) ^ (hs[1] & hs[2]),
- t1 = s0 + mj;
-
- hs[7] = hs[6];
- hs[6] = hs[5];
- hs[5] = hs[4];
- hs[4] = hs[3] + t0;
- hs[3] = hs[2];
- hs[2] = hs[1];
- hs[1] = hs[0];
- hs[0] = t0 + t1;
+ //
+ // for (size_t i = 0; i < 64; i++) {
+ // const uint32_t s1 = rr(hs[4], 6) ^ rr(hs[4], 11) ^ rr(hs[4], 25),
+ // ch = (hs[4] & hs[5]) ^ ((~(hs[4])) & hs[6]),
+ // t0 = hs[7] + s1 + ch + K[i] + w[i],
+ // s0 = rr(hs[0], 2) ^ rr(hs[0], 13) ^ rr(hs[0], 22),
+ // mj = (hs[0] & hs[1]) ^ (hs[0] & hs[2]) ^ (hs[1] & hs[2]),
+ // t1 = s0 + mj;
+
+ // hs[7] = hs[6];
+ // hs[6] = hs[5];
+ // hs[5] = hs[4];
+ // hs[4] = hs[3] + t0;
+ // hs[3] = hs[2];
+ // hs[2] = hs[1];
+ // hs[1] = hs[0];
+ // hs[0] = t0 + t1;
+ // }
+ //
+ // // fully unrolled version:
+ // WC(0); WC(1); WC(2); WC(3); WC(4); WC(5); WC(6); WC(7);
+ // WC(8); WC(9); WC(10); WC(11); WC(12); WC(13); WC(14); WC(15);
+ // WC(16); WC(17); WC(18); WC(19); WC(20); WC(21); WC(22); WC(23);
+ // WC(24); WC(25); WC(26); WC(27); WC(28); WC(29); WC(30); WC(31);
+ // WC(32); WC(33); WC(34); WC(35); WC(36); WC(37); WC(38); WC(39);
+ // WC(40); WC(41); WC(42); WC(43); WC(44); WC(45); WC(46); WC(47);
+ // WC(48); WC(49); WC(50); WC(51); WC(52); WC(53); WC(54); WC(55);
+ // WC(56); WC(57); WC(58); WC(59); WC(60); WC(61); WC(62); WC(63);
+ //
+ // partially unrolled:
+ for (size_t i = 0; i < 64; i += 16) {
+ WC(i + 0); WC(i + 1); WC(i + 2); WC(i + 3);
+ WC(i + 4); WC(i + 5); WC(i + 6); WC(i + 7);
+ WC(i + 8); WC(i + 9); WC(i + 10); WC(i + 11);
+ WC(i + 12); WC(i + 13); WC(i + 14); WC(i + 15);
}
// Add the compressed chunk to the current hash value
@@ -131,22 +197,43 @@ sha256_block(sha256_t * const ctx) {
}
#undef WI
+#undef WE
+#undef WC
+
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
void sha256_push(
sha256_t * const ctx,
const uint8_t * const src,
const size_t src_len
) {
- for (size_t i = 0; i < src_len; i++) {
- ctx->buf[ctx->buf_len] = src[i];
- ctx->buf_len++;
+ const size_t buf_left = 64 - ctx->buf_len;
+
+ if (src_len >= buf_left) {
+ // fill remaining buffer
+ memcpy(ctx->buf + ctx->buf_len, src, buf_left);
+ sha256_block(ctx);
+ ctx->buf_len = 0;
+
+ const size_t new_src_len = src_len - buf_left;
+ const size_t num_blocks = new_src_len / 64;
- if (ctx->buf_len == 64) {
+ // process chunks
+ for (size_t i = 0; i < num_blocks; i++) {
+ memcpy(ctx->buf, src + buf_left + (64 * i), 64);
sha256_block(ctx);
- ctx->buf_len = 0;
}
+
+ // copy remaining bytes to buffer
+ const size_t new_buf_len = (new_src_len - 64 * num_blocks);
+ memcpy(ctx->buf, src + buf_left + (64 * num_blocks), new_buf_len);
+ ctx->buf_len = new_buf_len;
+ } else {
+ memcpy(ctx->buf + ctx->buf_len, src, src_len);
+ ctx->buf_len += src_len;
}
+ // update byte count
ctx->num_bytes += src_len;
}
@@ -191,18 +278,12 @@ void sha256_fini(
const uint64_t num_bytes = ctx->num_bytes;
const size_t pad_len = (65 - ((num_bytes + 1 + 8) % 64));
- // fprintf(stderr, "ctx->num_bytes (before pad) = %lu\n", ctx->num_bytes);
-
// push padding
sha256_push(ctx, PADDING, pad_len);
- // fprintf(stderr, "ctx->num_bytes (before len) = %lu\n", ctx->num_bytes);
-
// push length (in bits)
sha256_push_u64(ctx, num_bytes * 8);
- // fprintf(stderr, "ctx->num_bytes (after len) = %lu\n", ctx->num_bytes);
-
// extract hash
const uint8_t hash[32] = {
WB(ctx, 0), WB(ctx, 1), WB(ctx, 2), WB(ctx, 3),