sha3.c: add xof_absorb_raw_bulk() to absorb data in bulk

author: Paul Duncan <pabs@pablotron.org> 2024-02-24 11:49:07 -0500
committer: Paul Duncan <pabs@pablotron.org> 2024-02-24 11:49:07 -0500
commit: d41372954d22e18a624f9783d90169bc0651b531 (patch)
tree: 6faac4524eac944517a853321f6ace44e85a175d /sha3.c
parent: a239c34fdbaa56b166ad70cbf078fa139b97f64a (diff)
download: sha3-d41372954d22e18a624f9783d90169bc0651b531.tar.bz2
sha3-d41372954d22e18a624f9783d90169bc0651b531.zip
1 files changed, 99 insertions, 6 deletions
diff --git a/sha3.c b/sha3.c
index ee58d50..34c1664 100644
--- a/sha3.c
+++ b/sha3.c
@@ -857,17 +857,110 @@ static inline void xof_init(sha3_xof_t * const xof) {
   memset(xof, 0, sizeof(sha3_xof_t));
 }
 
-// absorb w/o checking state (used by xof_once())
-static inline void xof_absorb_raw(sha3_xof_t * const xof, const size_t rate, const size_t num_rounds, const uint8_t * const m, size_t m_len) {
+// absorb by copying internal state and processing in chunks.  used by
+// xor_absorb_raw() for longer messages.
+static inline void xof_absorb_raw_bulk(sha3_xof_t * const xof, const size_t rate, const size_t num_rounds, const uint8_t *m, size_t m_len) {
+  // load state and byte count from context
+  // TODO: benchmark this to see if it's faster
+  sha3_state_t a = xof->a;
+  size_t num_bytes = xof->num_bytes;
+
+  // absorb chunks
+  if ((num_bytes & 7) == 0) {
+    // absorb 32 byte chunks (4 x uint64)
+    while (num_bytes + 32 <= rate && m_len > 32) {
+      // copy chunk (to avoid `m` alignment issues)
+      uint64_t vals[4];
+      memcpy(vals, m, 4 * sizeof(uint64_t));
+
+      // xor chunk into state
+      // (FIXME: does not vectorize for some reason, even when unrolled)
+      for (size_t i = 0; i < 4; i++) {
+        a.u64[num_bytes/8 + i] ^= vals[i];
+      }
+
+      // update counters
+      num_bytes += 32;
+      m += 32;
+      m_len -= 32;
+
+      if (num_bytes == rate) {
+        // permute state
+        permute(a.u64, num_rounds);
+        num_bytes = 0;
+      }
+    }
+
+    // absorb 8 byte chunks (1 x uint64)
+    while (num_bytes + 8 <= rate && m_len > 8) {
+      // copy chunk (to avoid `m` alignment issues)
+      uint64_t val;
+      memcpy(&val, m, sizeof(uint64_t));
+
+      // xor chunk into state
+      a.u64[num_bytes/8] ^= val;
+
+      // update counters
+      num_bytes += 8;
+      m += 8;
+      m_len -= 8;
+
+      if (num_bytes == rate) {
+        // permute state
+        permute(a.u64, num_rounds);
+        num_bytes = 0;
+      }
+    }
+  }
+
+  // absorb remaining bytes
+  for (size_t i = 0; i < m_len; i++) {
+    // xor byte into state
+    a.u8[num_bytes++] ^= m[i];
+
+    if (num_bytes == rate) {
+      // permute state
+      permute(a.u64, num_rounds);
+      num_bytes = 0;
+    }
+  }
+
+  // save state and byte count to context
+  xof->a = a;
+  xof->num_bytes = num_bytes;
+}
+
+// absorb without copying internal state.  used by xor_absorb_raw() for
+// short messages.
+static inline void xof_absorb_raw_simple(sha3_xof_t * const xof, const size_t rate, const size_t num_rounds, const uint8_t *m, size_t m_len) {
+  // load byte count from context
+  size_t num_bytes = xof->num_bytes;
+
   // absorb bytes
-  // (TODO: absorb larger chunks)
   for (size_t i = 0; i < m_len; i++) {
-    xof->a.u8[xof->num_bytes++] ^= m[i];
-    if (xof->num_bytes == rate) {
+    // xor byte into state
+    xof->a.u8[num_bytes++] ^= m[i];
+
+    if (num_bytes == rate) {
+      // permute state
       permute(xof->a.u64, num_rounds);
-      xof->num_bytes = 0;
+      num_bytes = 0;
     }
   }
+
+  // save byte count to context
+  xof->num_bytes = num_bytes;
+}
+
+// absorb w/o checking state (used by xof_once())
+static inline void xof_absorb_raw(sha3_xof_t * const xof, const size_t rate, const size_t num_rounds, const uint8_t *m, size_t m_len) {
+  if ((xof->num_bytes & 7) == 0 && m_len > 32) {
+    // longer message, absorb in large chunks.
+    xof_absorb_raw_bulk(xof, rate, num_rounds, m, m_len);
+  } else {
+    // shorter message, absorb as bytes
+    xof_absorb_raw_simple(xof, rate, num_rounds, m, m_len);
+  }
 }
 
 // check state, absorb bytes
author	Paul Duncan <pabs@pablotron.org>	2024-02-24 11:49:07 -0500
committer	Paul Duncan <pabs@pablotron.org>	2024-02-24 11:49:07 -0500
commit	d41372954d22e18a624f9783d90169bc0651b531 (patch)
tree	6faac4524eac944517a853321f6ace44e85a175d /sha3.c
parent	a239c34fdbaa56b166ad70cbf078fa139b97f64a (diff)
download	sha3-d41372954d22e18a624f9783d90169bc0651b531.tar.bz2 sha3-d41372954d22e18a624f9783d90169bc0651b531.zip