ghash-clmulni-intel_asm.S source code [linux/arch/x86/crypto/ghash-clmulni-intel_asm.S]

1	/ SPDX-License-Identifier: GPL-2.0-only /
2	/*
3	* Accelerated GHASH implementation with Intel PCLMULQDQ-NI
4	* instructions. This file contains accelerated part of ghash
5	* implementation. More information about PCLMULQDQ can be found at:
6	*
7	* https://www.intel.com/content/dam/develop/external/us/en/documents/clmul-wp-rev-2-02-2014-04-20.pdf
8	*
9	* Copyright (c) 2009 Intel Corp.
10	* Author: Huang Ying <ying.huang@intel.com>
11	* Vinodh Gopal
12	* Erdinc Ozturk
13	* Deniz Karakoyunlu
14	*/
15
16	#include <linux/linkage.h>
17	#include <asm/frame.h>
18
19	.section .rodata.cst16.bswap_mask, "aM", @progbits, `16`
20	.align `16`
21	.Lbswap_mask:
22	.octa `0x000102030405060708090a0b0c0d0e0f`
23
24	#define DATA %xmm0
25	#define SHASH %xmm1
26	#define T1 %xmm2
27	#define T2 %xmm3
28	#define T3 %xmm4
29	#define BSWAP %xmm5
30	#define IN1 %xmm6
31
32	.text
33
34	/*
35	* __clmul_gf128mul_ble: internal ABI
36	* input:
37	* DATA: operand1
38	* SHASH: operand2, hash_key << 1 mod poly
39	* output:
40	* DATA: operand1 * operand2 mod poly
41	* changed:
42	* T1
43	* T2
44	* T3
45	*/
46	SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
47	movaps DATA, T1
48	pshufd $`0b01001110`, DATA, T2
49	pshufd $`0b01001110`, SHASH, T3
50	pxor DATA, T2
51	pxor SHASH, T3
52
53	pclmulqdq $`0x00`, SHASH, DATA # DATA = a0 * b0
54	pclmulqdq $`0x11`, SHASH, T1 # T1 = a1 * b1
55	pclmulqdq $`0x00`, T3, T2 # T2 = (a1 + a0) * (b1 + b0)
56	pxor DATA, T2
57	pxor T1, T2 # T2 = a0 * b1 + a1 * b0
58
59	movaps T2, T3
60	pslldq $`8`, T3
61	psrldq $`8`, T2
62	pxor T3, DATA
63	pxor T2, T1 # <T1:DATA> is result of
64	# carry-less multiplication
65
66	# first phase of the reduction
67	movaps DATA, T3
68	psllq $`1`, T3
69	pxor DATA, T3
70	psllq $`5`, T3
71	pxor DATA, T3
72	psllq $`57`, T3
73	movaps T3, T2
74	pslldq $`8`, T2
75	psrldq $`8`, T3
76	pxor T2, DATA
77	pxor T3, T1
78
79	# second phase of the reduction
80	movaps DATA, T2
81	psrlq $`5`, T2
82	pxor DATA, T2
83	psrlq $`1`, T2
84	pxor DATA, T2
85	psrlq $`1`, T2
86	pxor T2, T1
87	pxor T1, DATA
88	RET
89	SYM_FUNC_END(__clmul_gf128mul_ble)
90
91	/ void clmul_ghash_mul(char dst, const le128 shash) /
92	SYM_FUNC_START(clmul_ghash_mul)
93	FRAME_BEGIN
94	movups (%rdi), DATA
95	movups (%rsi), SHASH
96	movaps .Lbswap_mask(%rip), BSWAP
97	pshufb BSWAP, DATA
98	call __clmul_gf128mul_ble
99	pshufb BSWAP, DATA
100	movups DATA, (%rdi)
101	FRAME_END
102	RET
103	SYM_FUNC_END(clmul_ghash_mul)
104
105	/*
106	* void clmul_ghash_update(char dst, const char src, unsigned int srclen,
107	* const le128 *shash);
108	*/
109	SYM_FUNC_START(clmul_ghash_update)
110	FRAME_BEGIN
111	cmp $`16`, %rdx
112	jb .Lupdate_just_ret # check length
113	movaps .Lbswap_mask(%rip), BSWAP
114	movups (%rdi), DATA
115	movups (%rcx), SHASH
116	pshufb BSWAP, DATA
117	.align `4`
118	.Lupdate_loop:
119	movups (%rsi), IN1
120	pshufb BSWAP, IN1
121	pxor IN1, DATA
122	call __clmul_gf128mul_ble
123	sub $`16`, %rdx
124	add $`16`, %rsi
125	cmp $`16`, %rdx
126	jge .Lupdate_loop
127	pshufb BSWAP, DATA
128	movups DATA, (%rdi)
129	.Lupdate_just_ret:
130	FRAME_END
131	RET
132	SYM_FUNC_END(clmul_ghash_update)
133

source code of linux/arch/x86/crypto/ghash-clmulni-intel_asm.S