1/*
2The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
3Michaƫl Peeters and Gilles Van Assche. For more information, feedback or
4questions, please refer to our website: http://keccak.noekeon.org/
5
6Implementation by the designers,
7hereby denoted as "the implementer".
8
9To the extent possible under law, the implementer has waived all copyright
10and related or neighboring rights to the source code in this file.
11http://creativecommons.org/publicdomain/zero/1.0/
12*/
13
14#include <string.h>
15#include "brg_endian.h"
16#include "KeccakF-1600-opt64-settings.h"
17#include "KeccakF-1600-interface.h"
18
19typedef unsigned char UINT8;
20typedef unsigned long long int UINT64;
21
22#if defined(UseSSE) || defined(UseXOP)
23#if defined(__GNUC__)
24#define ALIGN __attribute__ ((aligned(32)))
25#elif defined(_MSC_VER)
26#define ALIGN __declspec(align(32))
27#endif
28#endif
29
30#ifndef ALIGN
31# define ALIGN
32#endif
33
34#if defined(UseSSE)
35 #include <x86intrin.h>
36 typedef __m128i V64;
37 typedef __m128i V128;
38 typedef union {
39 V128 v128;
40 UINT64 v64[2];
41 } V6464;
42
43 #define ANDnu64(a, b) _mm_andnot_si128(a, b)
44 #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
45 #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
46 #define ROL64(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
47 #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
48 #define XOR64(a, b) _mm_xor_si128(a, b)
49 #define XOReq64(a, b) a = _mm_xor_si128(a, b)
50 #define SHUFFLEBYTES128(a, b) _mm_shuffle_epi8(a, b)
51
52 #define ANDnu128(a, b) _mm_andnot_si128(a, b)
53 #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
54 #define CONST128(a) _mm_load_si128((const V128 *)&(a))
55 #define LOAD128(a) _mm_load_si128((const V128 *)&(a))
56 #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
57 #define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
58 #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
59 #define XOR128(a, b) _mm_xor_si128(a, b)
60 #define XOReq128(a, b) a = _mm_xor_si128(a, b)
61 #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
62 #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
63 #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
64 #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
65 #define ZERO128() _mm_setzero_si128()
66
67 #ifdef UseOnlySIMD64
68 #include "KeccakF-1600-simd64.macros"
69 #else
70ALIGN const UINT64 rho8_56[2] = {0x0605040302010007, 0x080F0E0D0C0B0A09};
71 #include "KeccakF-1600-simd128.macros"
72 #endif
73
74 #ifdef UseBebigokimisa
75 #error "UseBebigokimisa cannot be used in combination with UseSSE"
76 #endif
77#elif defined(UseXOP)
78 #include <x86intrin.h>
79 typedef __m128i V64;
80 typedef __m128i V128;
81
82 #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
83 #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
84 #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
85 #define XOR64(a, b) _mm_xor_si128(a, b)
86 #define XOReq64(a, b) a = _mm_xor_si128(a, b)
87
88 #define ANDnu128(a, b) _mm_andnot_si128(a, b)
89 #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
90 #define CONST128(a) _mm_load_si128((const V128 *)&(a))
91 #define LOAD128(a) _mm_load_si128((const V128 *)&(a))
92 #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
93 #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
94 #define XOR128(a, b) _mm_xor_si128(a, b)
95 #define XOReq128(a, b) a = _mm_xor_si128(a, b)
96 #define ZERO128() _mm_setzero_si128()
97
98 #define SWAP64(a) _mm_shuffle_epi32(a, 0x4E)
99 #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
100 #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
101 #define GET64LOHI(a, b) ((__m128i)_mm_blend_pd((__m128d)a, (__m128d)b, 2))
102 #define GET64HILO(a, b) SWAP64(GET64LOHI(b, a))
103 #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
104 #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
105
106 #define ROL6464same(a, o) _mm_roti_epi64(a, o)
107 #define ROL6464(a, r1, r2) _mm_rot_epi64(a, CONST128( rot_##r1##_##r2 ))
108ALIGN const UINT64 rot_0_20[2] = { 0, 20};
109ALIGN const UINT64 rot_44_3[2] = {44, 3};
110ALIGN const UINT64 rot_43_45[2] = {43, 45};
111ALIGN const UINT64 rot_21_61[2] = {21, 61};
112ALIGN const UINT64 rot_14_28[2] = {14, 28};
113ALIGN const UINT64 rot_1_36[2] = { 1, 36};
114ALIGN const UINT64 rot_6_10[2] = { 6, 10};
115ALIGN const UINT64 rot_25_15[2] = {25, 15};
116ALIGN const UINT64 rot_8_56[2] = { 8, 56};
117ALIGN const UINT64 rot_18_27[2] = {18, 27};
118ALIGN const UINT64 rot_62_55[2] = {62, 55};
119ALIGN const UINT64 rot_39_41[2] = {39, 41};
120
121#if defined(UseSimulatedXOP)
122 // For debugging purposes, when XOP is not available
123 #undef ROL6464
124 #undef ROL6464same
125 #define ROL6464same(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
126 V128 ROL6464(V128 a, int r0, int r1)
127 {
128 V128 a0 = ROL64(a, r0);
129 V128 a1 = COPY64HI2LO(ROL64(a, r1));
130 return GET64LOLO(a0, a1);
131 }
132#endif
133
134 #include "KeccakF-1600-xop.macros"
135
136 #ifdef UseBebigokimisa
137 #error "UseBebigokimisa cannot be used in combination with UseXOP"
138 #endif
139#elif defined(UseMMX)
140 #include <mmintrin.h>
141 typedef __m64 V64;
142 #define ANDnu64(a, b) _mm_andnot_si64(a, b)
143
144 #if (defined(_MSC_VER) || defined (__INTEL_COMPILER))
145 #define LOAD64(a) *(V64*)&(a)
146 #define CONST64(a) *(V64*)&(a)
147 #define STORE64(a, b) *(V64*)&(a) = b
148 #else
149 #define LOAD64(a) (V64)a
150 #define CONST64(a) (V64)a
151 #define STORE64(a, b) a = (UINT64)b
152 #endif
153 #define ROL64(a, o) _mm_or_si64(_mm_slli_si64(a, o), _mm_srli_si64(a, 64-(o)))
154 #define XOR64(a, b) _mm_xor_si64(a, b)
155 #define XOReq64(a, b) a = _mm_xor_si64(a, b)
156
157 #include "KeccakF-1600-simd64.macros"
158
159 #ifdef UseBebigokimisa
160 #error "UseBebigokimisa cannot be used in combination with UseMMX"
161 #endif
162#else
163 #if defined(_MSC_VER)
164 #define ROL64(a, offset) _rotl64(a, offset)
165 #elif defined(UseSHLD)
166 #define ROL64(x,N) ({ \
167 register UINT64 __out; \
168 register UINT64 __in = x; \
169 __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
170 __out; \
171 })
172 #else
173 #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
174 #endif
175
176 #include "KeccakF-1600-64.macros"
177#endif
178
179#include "KeccakF-1600-unrolling.macros"
180
181static void KeccakPermutationOnWords(UINT64 *state)
182{
183 declareABCDE
184#if (Unrolling != 24)
185 unsigned int i;
186#endif
187
188 copyFromState(A, state)
189 rounds
190#if defined(UseMMX)
191 _mm_empty();
192#endif
193}
194
195static void KeccakPermutationOnWordsAfterXoring(UINT64 *state, const UINT64 *input, unsigned int laneCount)
196{
197 declareABCDE
198#if (Unrolling != 24)
199 unsigned int i;
200#endif
201 unsigned int j;
202
203 for(j=0; j<laneCount; j++)
204 state[j] ^= input[j];
205 copyFromState(A, state)
206 rounds
207#if defined(UseMMX)
208 _mm_empty();
209#endif
210}
211
212#ifdef ProvideFast576
213static void KeccakPermutationOnWordsAfterXoring576bits(UINT64 *state, const UINT64 *input)
214{
215 declareABCDE
216#if (Unrolling != 24)
217 unsigned int i;
218#endif
219
220 copyFromStateAndXor576bits(A, state, input)
221 rounds
222#if defined(UseMMX)
223 _mm_empty();
224#endif
225}
226#endif
227
228#ifdef ProvideFast832
229static void KeccakPermutationOnWordsAfterXoring832bits(UINT64 *state, const UINT64 *input)
230{
231 declareABCDE
232#if (Unrolling != 24)
233 unsigned int i;
234#endif
235
236 copyFromStateAndXor832bits(A, state, input)
237 rounds
238#if defined(UseMMX)
239 _mm_empty();
240#endif
241}
242#endif
243
244#ifdef ProvideFast1024
245static void KeccakPermutationOnWordsAfterXoring1024bits(UINT64 *state, const UINT64 *input)
246{
247 declareABCDE
248#if (Unrolling != 24)
249 unsigned int i;
250#endif
251
252 copyFromStateAndXor1024bits(A, state, input)
253 rounds
254#if defined(UseMMX)
255 _mm_empty();
256#endif
257}
258#endif
259
260#ifdef ProvideFast1088
261static void KeccakPermutationOnWordsAfterXoring1088bits(UINT64 *state, const UINT64 *input)
262{
263 declareABCDE
264#if (Unrolling != 24)
265 unsigned int i;
266#endif
267
268 copyFromStateAndXor1088bits(A, state, input)
269 rounds
270#if defined(UseMMX)
271 _mm_empty();
272#endif
273}
274#endif
275
276#ifdef ProvideFast1152
277static void KeccakPermutationOnWordsAfterXoring1152bits(UINT64 *state, const UINT64 *input)
278{
279 declareABCDE
280#if (Unrolling != 24)
281 unsigned int i;
282#endif
283
284 copyFromStateAndXor1152bits(A, state, input)
285 rounds
286#if defined(UseMMX)
287 _mm_empty();
288#endif
289}
290#endif
291
292#ifdef ProvideFast1344
293static void KeccakPermutationOnWordsAfterXoring1344bits(UINT64 *state, const UINT64 *input)
294{
295 declareABCDE
296#if (Unrolling != 24)
297 unsigned int i;
298#endif
299
300 copyFromStateAndXor1344bits(A, state, input)
301 rounds
302#if defined(UseMMX)
303 _mm_empty();
304#endif
305}
306#endif
307
308static void KeccakInitialize()
309{
310}
311
312static void KeccakInitializeState(unsigned char *state)
313{
314 memset(state, 0, 200);
315#ifdef UseBebigokimisa
316 ((UINT64*)state)[ 1] = ~(UINT64)0;
317 ((UINT64*)state)[ 2] = ~(UINT64)0;
318 ((UINT64*)state)[ 8] = ~(UINT64)0;
319 ((UINT64*)state)[12] = ~(UINT64)0;
320 ((UINT64*)state)[17] = ~(UINT64)0;
321 ((UINT64*)state)[20] = ~(UINT64)0;
322#endif
323}
324
325static void KeccakPermutation(unsigned char *state)
326{
327 // We assume the state is always stored as words
328 KeccakPermutationOnWords((UINT64*)state);
329}
330
331#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
332static void fromBytesToWord(UINT64 *word, const UINT8 *bytes)
333{
334 unsigned int i;
335
336 *word = 0;
337 for(i=0; i<(64/8); i++)
338 *word |= (UINT64)(bytes[i]) << (8*i);
339}
340#endif
341
342#ifdef ProvideFast576
343static void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data)
344{
345#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
346 KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, (const UINT64*)data);
347#else
348 UINT64 dataAsWords[9];
349 unsigned int i;
350
351 for(i=0; i<9; i++)
352 fromBytesToWord(dataAsWords+i, data+(i*8));
353 KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, dataAsWords);
354#endif
355}
356#endif
357
358#ifdef ProvideFast832
359static void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data)
360{
361#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
362 KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, (const UINT64*)data);
363#else
364 UINT64 dataAsWords[13];
365 unsigned int i;
366
367 for(i=0; i<13; i++)
368 fromBytesToWord(dataAsWords+i, data+(i*8));
369 KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, dataAsWords);
370#endif
371}
372#endif
373
374#ifdef ProvideFast1024
375static void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data)
376{
377#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
378 KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, (const UINT64*)data);
379#else
380 UINT64 dataAsWords[16];
381 unsigned int i;
382
383 for(i=0; i<16; i++)
384 fromBytesToWord(dataAsWords+i, data+(i*8));
385 KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, dataAsWords);
386#endif
387}
388#endif
389
390#ifdef ProvideFast1088
391static void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data)
392{
393#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
394 KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, (const UINT64*)data);
395#else
396 UINT64 dataAsWords[17];
397 unsigned int i;
398
399 for(i=0; i<17; i++)
400 fromBytesToWord(dataAsWords+i, data+(i*8));
401 KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, dataAsWords);
402#endif
403}
404#endif
405
406#ifdef ProvideFast1152
407static void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data)
408{
409#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
410 KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, (const UINT64*)data);
411#else
412 UINT64 dataAsWords[18];
413 unsigned int i;
414
415 for(i=0; i<18; i++)
416 fromBytesToWord(dataAsWords+i, data+(i*8));
417 KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, dataAsWords);
418#endif
419}
420#endif
421
422#ifdef ProvideFast1344
423static void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data)
424{
425#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
426 KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, (const UINT64*)data);
427#else
428 UINT64 dataAsWords[21];
429 unsigned int i;
430
431 for(i=0; i<21; i++)
432 fromBytesToWord(dataAsWords+i, data+(i*8));
433 KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, dataAsWords);
434#endif
435}
436#endif
437
438static void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount)
439{
440#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
441 KeccakPermutationOnWordsAfterXoring((UINT64*)state, (const UINT64*)data, laneCount);
442#else
443 UINT64 dataAsWords[25];
444 unsigned int i;
445
446 for(i=0; i<laneCount; i++)
447 fromBytesToWord(dataAsWords+i, data+(i*8));
448 KeccakPermutationOnWordsAfterXoring((UINT64*)state, dataAsWords, laneCount);
449#endif
450}
451
452#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
453static void fromWordToBytes(UINT8 *bytes, const UINT64 word)
454{
455 unsigned int i;
456
457 for(i=0; i<(64/8); i++)
458 bytes[i] = (word >> (8*i)) & 0xFF;
459}
460#endif
461
462#ifdef ProvideFast1024
463static void KeccakExtract1024bits(const unsigned char *state, unsigned char *data)
464{
465#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
466 memcpy(data, state, 128);
467#else
468 unsigned int i;
469
470 for(i=0; i<16; i++)
471 fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
472#endif
473#ifdef UseBebigokimisa
474 ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
475 ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
476 ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
477 ((UINT64*)data)[12] = ~((UINT64*)data)[12];
478#endif
479}
480#endif
481
482static void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
483{
484#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
485 memcpy(data, state, laneCount*8);
486#else
487 unsigned int i;
488
489 for(i=0; i<laneCount; i++)
490 fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
491#endif
492#ifdef UseBebigokimisa
493 if (laneCount > 1) {
494 ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
495 if (laneCount > 2) {
496 ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
497 if (laneCount > 8) {
498 ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
499 if (laneCount > 12) {
500 ((UINT64*)data)[12] = ~((UINT64*)data)[12];
501 if (laneCount > 17) {
502 ((UINT64*)data)[17] = ~((UINT64*)data)[17];
503 if (laneCount > 20) {
504 ((UINT64*)data)[20] = ~((UINT64*)data)[20];
505 }
506 }
507 }
508 }
509 }
510 }
511#endif
512}
513