1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * Fast MD5 implementation for PPC
4 *
5 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
6 */
7#include <asm/ppc_asm.h>
8#include <asm/asm-offsets.h>
9#include <asm/asm-compat.h>
10
11#define rHP r3
12#define rWP r4
13
14#define rH0 r0
15#define rH1 r6
16#define rH2 r7
17#define rH3 r5
18
19#define rW00 r8
20#define rW01 r9
21#define rW02 r10
22#define rW03 r11
23#define rW04 r12
24#define rW05 r14
25#define rW06 r15
26#define rW07 r16
27#define rW08 r17
28#define rW09 r18
29#define rW10 r19
30#define rW11 r20
31#define rW12 r21
32#define rW13 r22
33#define rW14 r23
34#define rW15 r24
35
36#define rT0 r25
37#define rT1 r26
38
39#define INITIALIZE \
40 PPC_STLU r1,-INT_FRAME_SIZE(r1); \
41 SAVE_GPRS(14, 26, r1) /* push registers onto stack */
42
43#define FINALIZE \
44 REST_GPRS(14, 26, r1); /* pop registers from stack */ \
45 addi r1,r1,INT_FRAME_SIZE
46
47#ifdef __BIG_ENDIAN__
48#define LOAD_DATA(reg, off) \
49 lwbrx reg,0,rWP; /* load data */
50#define INC_PTR \
51 addi rWP,rWP,4; /* increment per word */
52#define NEXT_BLOCK /* nothing to do */
53#else
54#define LOAD_DATA(reg, off) \
55 lwz reg,off(rWP); /* load data */
56#define INC_PTR /* nothing to do */
57#define NEXT_BLOCK \
58 addi rWP,rWP,64; /* increment per block */
59#endif
60
61#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
62 LOAD_DATA(w0, off) /* W */ \
63 and rT0,b,c; /* 1: f = b and c */ \
64 INC_PTR /* ptr++ */ \
65 andc rT1,d,b; /* 1: f' = ~b and d */ \
66 LOAD_DATA(w1, off+4) /* W */ \
67 or rT0,rT0,rT1; /* 1: f = f or f' */ \
68 addi w0,w0,k0l; /* 1: wk = w + k */ \
69 add a,a,rT0; /* 1: a = a + f */ \
70 addis w0,w0,k0h; /* 1: wk = w + k' */ \
71 addis w1,w1,k1h; /* 2: wk = w + k */ \
72 add a,a,w0; /* 1: a = a + wk */ \
73 addi w1,w1,k1l; /* 2: wk = w + k' */ \
74 rotrwi a,a,p; /* 1: a = a rotl x */ \
75 add d,d,w1; /* 2: a = a + wk */ \
76 add a,a,b; /* 1: a = a + b */ \
77 and rT0,a,b; /* 2: f = b and c */ \
78 andc rT1,c,a; /* 2: f' = ~b and d */ \
79 or rT0,rT0,rT1; /* 2: f = f or f' */ \
80 add d,d,rT0; /* 2: a = a + f */ \
81 INC_PTR /* ptr++ */ \
82 rotrwi d,d,q; /* 2: a = a rotl x */ \
83 add d,d,a; /* 2: a = a + b */
84
85#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
86 andc rT0,c,d; /* 1: f = c and ~d */ \
87 and rT1,b,d; /* 1: f' = b and d */ \
88 addi w0,w0,k0l; /* 1: wk = w + k */ \
89 or rT0,rT0,rT1; /* 1: f = f or f' */ \
90 addis w0,w0,k0h; /* 1: wk = w + k' */ \
91 add a,a,rT0; /* 1: a = a + f */ \
92 addi w1,w1,k1l; /* 2: wk = w + k */ \
93 add a,a,w0; /* 1: a = a + wk */ \
94 addis w1,w1,k1h; /* 2: wk = w + k' */ \
95 andc rT0,b,c; /* 2: f = c and ~d */ \
96 rotrwi a,a,p; /* 1: a = a rotl x */ \
97 add a,a,b; /* 1: a = a + b */ \
98 add d,d,w1; /* 2: a = a + wk */ \
99 and rT1,a,c; /* 2: f' = b and d */ \
100 or rT0,rT0,rT1; /* 2: f = f or f' */ \
101 add d,d,rT0; /* 2: a = a + f */ \
102 rotrwi d,d,q; /* 2: a = a rotl x */ \
103 add d,d,a; /* 2: a = a +b */
104
105#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
106 xor rT0,b,c; /* 1: f' = b xor c */ \
107 addi w0,w0,k0l; /* 1: wk = w + k */ \
108 xor rT1,rT0,d; /* 1: f = f xor f' */ \
109 addis w0,w0,k0h; /* 1: wk = w + k' */ \
110 add a,a,rT1; /* 1: a = a + f */ \
111 addi w1,w1,k1l; /* 2: wk = w + k */ \
112 add a,a,w0; /* 1: a = a + wk */ \
113 addis w1,w1,k1h; /* 2: wk = w + k' */ \
114 rotrwi a,a,p; /* 1: a = a rotl x */ \
115 add d,d,w1; /* 2: a = a + wk */ \
116 add a,a,b; /* 1: a = a + b */ \
117 xor rT1,rT0,a; /* 2: f = b xor f' */ \
118 add d,d,rT1; /* 2: a = a + f */ \
119 rotrwi d,d,q; /* 2: a = a rotl x */ \
120 add d,d,a; /* 2: a = a + b */
121
122#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
123 addi w0,w0,k0l; /* 1: w = w + k */ \
124 orc rT0,b,d; /* 1: f = b or ~d */ \
125 addis w0,w0,k0h; /* 1: w = w + k' */ \
126 xor rT0,rT0,c; /* 1: f = f xor c */ \
127 add a,a,w0; /* 1: a = a + wk */ \
128 addi w1,w1,k1l; /* 2: w = w + k */ \
129 add a,a,rT0; /* 1: a = a + f */ \
130 addis w1,w1,k1h; /* 2: w = w + k' */ \
131 rotrwi a,a,p; /* 1: a = a rotl x */ \
132 add a,a,b; /* 1: a = a + b */ \
133 orc rT0,a,c; /* 2: f = b or ~d */ \
134 add d,d,w1; /* 2: a = a + wk */ \
135 xor rT0,rT0,b; /* 2: f = f xor c */ \
136 add d,d,rT0; /* 2: a = a + f */ \
137 rotrwi d,d,q; /* 2: a = a rotl x */ \
138 add d,d,a; /* 2: a = a + b */
139
140_GLOBAL(ppc_md5_transform)
141 INITIALIZE
142
143 mtctr r5
144 lwz rH0,0(rHP)
145 lwz rH1,4(rHP)
146 lwz rH2,8(rHP)
147 lwz rH3,12(rHP)
148
149ppc_md5_main:
150 R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
151 0xd76b, -23432, 0xe8c8, -18602)
152 R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
153 0x2420, 0x70db, 0xc1be, -12562)
154 R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
155 0xf57c, 0x0faf, 0x4788, -14806)
156 R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
157 0xa830, 0x4613, 0xfd47, -27391)
158 R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
159 0x6981, -26408, 0x8b45, -2129)
160 R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
161 0xffff, 0x5bb1, 0x895d, -10306)
162 R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
163 0x6b90, 0x1122, 0xfd98, 0x7193)
164 R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
165 0xa679, 0x438e, 0x49b4, 0x0821)
166
167 R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
168 0x0d56, 0x6e0c, 0x1810, 0x6d2d)
169 R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
170 0x9d02, -32109, 0x124c, 0x2332)
171 R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
172 0x8ea7, 0x4a33, 0x0245, -18270)
173 R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
174 0x8eee, -8608, 0xf258, -5095)
175 R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
176 0x969d, -10697, 0x1cbe, -15288)
177 R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
178 0x3317, 0x3e99, 0xdbd9, 0x7c15)
179 R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
180 0xac4b, 0x7772, 0xd8cf, 0x331d)
181 R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
182 0x6a28, 0x6dd8, 0x219a, 0x3b68)
183
184 R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
185 0x29cb, 0x28e5, 0x4218, -7788)
186 R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9,
187 0x473f, 0x06d1, 0x3aae, 0x3036)
188 R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
189 0xaea1, -15134, 0x640b, -11295)
190 R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9,
191 0x8f4c, 0x4887, 0xbc7c, -22499)
192 R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
193 0x7eb8, -27199, 0x00ea, 0x6050)
194 R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9,
195 0xe01a, 0x22fe, 0x4447, 0x69c5)
196 R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
197 0xb7f3, 0x0253, 0x59b1, 0x4d5b)
198 R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9,
199 0x4701, -27017, 0xc7bd, -19859)
200
201 R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
202 0x0988, -1462, 0x4c70, -19401)
203 R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
204 0xadaf, -5221, 0xfc99, 0x66f7)
205 R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
206 0x7e80, -16418, 0xba1e, -25587)
207 R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
208 0x4130, 0x380d, 0xe0c5, 0x738d)
209 lwz rW00,0(rHP)
210 R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
211 0xe837, -30770, 0xde8a, 0x69e8)
212 lwz rW14,4(rHP)
213 R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
214 0x9e79, 0x260f, 0x256d, -27941)
215 lwz rW12,8(rHP)
216 R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
217 0xab75, -20775, 0x4f9e, -28397)
218 lwz rW10,12(rHP)
219 R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
220 0x662b, 0x7c56, 0x11b2, 0x0358)
221
222 add rH0,rH0,rW00
223 stw rH0,0(rHP)
224 add rH1,rH1,rW14
225 stw rH1,4(rHP)
226 add rH2,rH2,rW12
227 stw rH2,8(rHP)
228 add rH3,rH3,rW10
229 stw rH3,12(rHP)
230 NEXT_BLOCK
231
232 bdnz ppc_md5_main
233
234 FINALIZE
235 blr
236

source code of linux/arch/powerpc/crypto/md5-asm.S