blake3_portable.c (5889B)
1 #include "blake3_impl.h" 2 #include <string.h> 3 4 INLINE uint32_t rotr32(uint32_t w, uint32_t c) { 5 return (w >> c) | (w << (32 - c)); 6 } 7 8 INLINE void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d, 9 uint32_t x, uint32_t y) { 10 state[a] = state[a] + state[b] + x; 11 state[d] = rotr32(state[d] ^ state[a], 16); 12 state[c] = state[c] + state[d]; 13 state[b] = rotr32(state[b] ^ state[c], 12); 14 state[a] = state[a] + state[b] + y; 15 state[d] = rotr32(state[d] ^ state[a], 8); 16 state[c] = state[c] + state[d]; 17 state[b] = rotr32(state[b] ^ state[c], 7); 18 } 19 20 INLINE void round_fn(uint32_t state[16], const uint32_t *msg, size_t round) { 21 // Select the message schedule based on the round. 22 const uint8_t *schedule = MSG_SCHEDULE[round]; 23 24 // Mix the columns. 25 g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]); 26 g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]); 27 g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]); 28 g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]); 29 30 // Mix the rows. 31 g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]); 32 g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]); 33 g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]); 34 g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]); 35 } 36 37 INLINE void compress_pre(uint32_t state[16], const uint32_t cv[8], 38 const uint8_t block[BLAKE3_BLOCK_LEN], 39 uint8_t block_len, uint64_t counter, uint8_t flags) { 40 uint32_t block_words[16]; 41 block_words[0] = load32(block + 4 * 0); 42 block_words[1] = load32(block + 4 * 1); 43 block_words[2] = load32(block + 4 * 2); 44 block_words[3] = load32(block + 4 * 3); 45 block_words[4] = load32(block + 4 * 4); 46 block_words[5] = load32(block + 4 * 5); 47 block_words[6] = load32(block + 4 * 6); 48 block_words[7] = load32(block + 4 * 7); 49 block_words[8] = load32(block + 4 * 8); 50 block_words[9] = load32(block + 4 * 9); 51 block_words[10] = load32(block + 4 * 10); 52 block_words[11] = load32(block + 4 * 11); 53 block_words[12] = load32(block + 4 * 12); 54 block_words[13] = load32(block + 4 * 13); 55 block_words[14] = load32(block + 4 * 14); 56 block_words[15] = load32(block + 4 * 15); 57 58 state[0] = cv[0]; 59 state[1] = cv[1]; 60 state[2] = cv[2]; 61 state[3] = cv[3]; 62 state[4] = cv[4]; 63 state[5] = cv[5]; 64 state[6] = cv[6]; 65 state[7] = cv[7]; 66 state[8] = IV[0]; 67 state[9] = IV[1]; 68 state[10] = IV[2]; 69 state[11] = IV[3]; 70 state[12] = counter_low(counter); 71 state[13] = counter_high(counter); 72 state[14] = (uint32_t)block_len; 73 state[15] = (uint32_t)flags; 74 75 round_fn(state, &block_words[0], 0); 76 round_fn(state, &block_words[0], 1); 77 round_fn(state, &block_words[0], 2); 78 round_fn(state, &block_words[0], 3); 79 round_fn(state, &block_words[0], 4); 80 round_fn(state, &block_words[0], 5); 81 round_fn(state, &block_words[0], 6); 82 } 83 84 void blake3_compress_in_place_portable(uint32_t cv[8], 85 const uint8_t block[BLAKE3_BLOCK_LEN], 86 uint8_t block_len, uint64_t counter, 87 uint8_t flags) { 88 uint32_t state[16]; 89 compress_pre(state, cv, block, block_len, counter, flags); 90 cv[0] = state[0] ^ state[8]; 91 cv[1] = state[1] ^ state[9]; 92 cv[2] = state[2] ^ state[10]; 93 cv[3] = state[3] ^ state[11]; 94 cv[4] = state[4] ^ state[12]; 95 cv[5] = state[5] ^ state[13]; 96 cv[6] = state[6] ^ state[14]; 97 cv[7] = state[7] ^ state[15]; 98 } 99 100 void blake3_compress_xof_portable(const uint32_t cv[8], 101 const uint8_t block[BLAKE3_BLOCK_LEN], 102 uint8_t block_len, uint64_t counter, 103 uint8_t flags, uint8_t out[64]) { 104 uint32_t state[16]; 105 compress_pre(state, cv, block, block_len, counter, flags); 106 107 store32(&out[0 * 4], state[0] ^ state[8]); 108 store32(&out[1 * 4], state[1] ^ state[9]); 109 store32(&out[2 * 4], state[2] ^ state[10]); 110 store32(&out[3 * 4], state[3] ^ state[11]); 111 store32(&out[4 * 4], state[4] ^ state[12]); 112 store32(&out[5 * 4], state[5] ^ state[13]); 113 store32(&out[6 * 4], state[6] ^ state[14]); 114 store32(&out[7 * 4], state[7] ^ state[15]); 115 store32(&out[8 * 4], state[8] ^ cv[0]); 116 store32(&out[9 * 4], state[9] ^ cv[1]); 117 store32(&out[10 * 4], state[10] ^ cv[2]); 118 store32(&out[11 * 4], state[11] ^ cv[3]); 119 store32(&out[12 * 4], state[12] ^ cv[4]); 120 store32(&out[13 * 4], state[13] ^ cv[5]); 121 store32(&out[14 * 4], state[14] ^ cv[6]); 122 store32(&out[15 * 4], state[15] ^ cv[7]); 123 } 124 125 INLINE void hash_one_portable(const uint8_t *input, size_t blocks, 126 const uint32_t key[8], uint64_t counter, 127 uint8_t flags, uint8_t flags_start, 128 uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) { 129 uint32_t cv[8]; 130 memcpy(cv, key, BLAKE3_KEY_LEN); 131 uint8_t block_flags = flags | flags_start; 132 while (blocks > 0) { 133 if (blocks == 1) { 134 block_flags |= flags_end; 135 } 136 blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter, 137 block_flags); 138 input = &input[BLAKE3_BLOCK_LEN]; 139 blocks -= 1; 140 block_flags = flags; 141 } 142 store_cv_words(out, cv); 143 } 144 145 void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs, 146 size_t blocks, const uint32_t key[8], 147 uint64_t counter, bool increment_counter, 148 uint8_t flags, uint8_t flags_start, 149 uint8_t flags_end, uint8_t *out) { 150 while (num_inputs > 0) { 151 hash_one_portable(inputs[0], blocks, key, counter, flags, flags_start, 152 flags_end, out); 153 if (increment_counter) { 154 counter += 1; 155 } 156 inputs += 1; 157 num_inputs -= 1; 158 out = &out[BLAKE3_OUT_LEN]; 159 } 160 }