1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * arch/powerpc/math-emu/math_efp.c |
4 | * |
5 | * Copyright (C) 2006-2008, 2010 Freescale Semiconductor, Inc. |
6 | * |
7 | * Author: Ebony Zhu, <ebony.zhu@freescale.com> |
8 | * Yu Liu, <yu.liu@freescale.com> |
9 | * |
10 | * Derived from arch/alpha/math-emu/math.c |
11 | * arch/powerpc/math-emu/math.c |
12 | * |
13 | * Description: |
14 | * This file is the exception handler to make E500 SPE instructions |
15 | * fully comply with IEEE-754 floating point standard. |
16 | */ |
17 | |
18 | #include <linux/types.h> |
19 | #include <linux/prctl.h> |
20 | #include <linux/module.h> |
21 | |
22 | #include <linux/uaccess.h> |
23 | #include <asm/reg.h> |
24 | |
25 | #define FP_EX_BOOKE_E500_SPE |
26 | #include <asm/sfp-machine.h> |
27 | |
28 | #include <math-emu/soft-fp.h> |
29 | #include <math-emu/single.h> |
30 | #include <math-emu/double.h> |
31 | |
32 | #define EFAPU 0x4 |
33 | |
34 | #define VCT 0x4 |
35 | #define SPFP 0x6 |
36 | #define DPFP 0x7 |
37 | |
38 | #define EFSADD 0x2c0 |
39 | #define EFSSUB 0x2c1 |
40 | #define EFSABS 0x2c4 |
41 | #define EFSNABS 0x2c5 |
42 | #define EFSNEG 0x2c6 |
43 | #define EFSMUL 0x2c8 |
44 | #define EFSDIV 0x2c9 |
45 | #define EFSCMPGT 0x2cc |
46 | #define EFSCMPLT 0x2cd |
47 | #define EFSCMPEQ 0x2ce |
48 | #define EFSCFD 0x2cf |
49 | #define EFSCFSI 0x2d1 |
50 | #define EFSCTUI 0x2d4 |
51 | #define EFSCTSI 0x2d5 |
52 | #define EFSCTUF 0x2d6 |
53 | #define EFSCTSF 0x2d7 |
54 | #define EFSCTUIZ 0x2d8 |
55 | #define EFSCTSIZ 0x2da |
56 | |
57 | #define EVFSADD 0x280 |
58 | #define EVFSSUB 0x281 |
59 | #define EVFSABS 0x284 |
60 | #define EVFSNABS 0x285 |
61 | #define EVFSNEG 0x286 |
62 | #define EVFSMUL 0x288 |
63 | #define EVFSDIV 0x289 |
64 | #define EVFSCMPGT 0x28c |
65 | #define EVFSCMPLT 0x28d |
66 | #define EVFSCMPEQ 0x28e |
67 | #define EVFSCTUI 0x294 |
68 | #define EVFSCTSI 0x295 |
69 | #define EVFSCTUF 0x296 |
70 | #define EVFSCTSF 0x297 |
71 | #define EVFSCTUIZ 0x298 |
72 | #define EVFSCTSIZ 0x29a |
73 | |
74 | #define EFDADD 0x2e0 |
75 | #define EFDSUB 0x2e1 |
76 | #define EFDABS 0x2e4 |
77 | #define EFDNABS 0x2e5 |
78 | #define EFDNEG 0x2e6 |
79 | #define EFDMUL 0x2e8 |
80 | #define EFDDIV 0x2e9 |
81 | #define EFDCTUIDZ 0x2ea |
82 | #define EFDCTSIDZ 0x2eb |
83 | #define EFDCMPGT 0x2ec |
84 | #define EFDCMPLT 0x2ed |
85 | #define EFDCMPEQ 0x2ee |
86 | #define EFDCFS 0x2ef |
87 | #define EFDCTUI 0x2f4 |
88 | #define EFDCTSI 0x2f5 |
89 | #define EFDCTUF 0x2f6 |
90 | #define EFDCTSF 0x2f7 |
91 | #define EFDCTUIZ 0x2f8 |
92 | #define EFDCTSIZ 0x2fa |
93 | |
94 | #define AB 2 |
95 | #define XA 3 |
96 | #define XB 4 |
97 | #define XCR 5 |
98 | #define NOTYPE 0 |
99 | |
100 | #define SIGN_BIT_S (1UL << 31) |
101 | #define SIGN_BIT_D (1ULL << 63) |
102 | #define FP_EX_MASK (FP_EX_INEXACT | FP_EX_INVALID | FP_EX_DIVZERO | \ |
103 | FP_EX_UNDERFLOW | FP_EX_OVERFLOW) |
104 | |
105 | static int have_e500_cpu_a005_erratum; |
106 | |
107 | union dw_union { |
108 | u64 dp[1]; |
109 | u32 wp[2]; |
110 | }; |
111 | |
112 | static unsigned long insn_type(unsigned long speinsn) |
113 | { |
114 | unsigned long ret = NOTYPE; |
115 | |
116 | switch (speinsn & 0x7ff) { |
117 | case EFSABS: ret = XA; break; |
118 | case EFSADD: ret = AB; break; |
119 | case EFSCFD: ret = XB; break; |
120 | case EFSCMPEQ: ret = XCR; break; |
121 | case EFSCMPGT: ret = XCR; break; |
122 | case EFSCMPLT: ret = XCR; break; |
123 | case EFSCTSF: ret = XB; break; |
124 | case EFSCTSI: ret = XB; break; |
125 | case EFSCTSIZ: ret = XB; break; |
126 | case EFSCTUF: ret = XB; break; |
127 | case EFSCTUI: ret = XB; break; |
128 | case EFSCTUIZ: ret = XB; break; |
129 | case EFSDIV: ret = AB; break; |
130 | case EFSMUL: ret = AB; break; |
131 | case EFSNABS: ret = XA; break; |
132 | case EFSNEG: ret = XA; break; |
133 | case EFSSUB: ret = AB; break; |
134 | case EFSCFSI: ret = XB; break; |
135 | |
136 | case EVFSABS: ret = XA; break; |
137 | case EVFSADD: ret = AB; break; |
138 | case EVFSCMPEQ: ret = XCR; break; |
139 | case EVFSCMPGT: ret = XCR; break; |
140 | case EVFSCMPLT: ret = XCR; break; |
141 | case EVFSCTSF: ret = XB; break; |
142 | case EVFSCTSI: ret = XB; break; |
143 | case EVFSCTSIZ: ret = XB; break; |
144 | case EVFSCTUF: ret = XB; break; |
145 | case EVFSCTUI: ret = XB; break; |
146 | case EVFSCTUIZ: ret = XB; break; |
147 | case EVFSDIV: ret = AB; break; |
148 | case EVFSMUL: ret = AB; break; |
149 | case EVFSNABS: ret = XA; break; |
150 | case EVFSNEG: ret = XA; break; |
151 | case EVFSSUB: ret = AB; break; |
152 | |
153 | case EFDABS: ret = XA; break; |
154 | case EFDADD: ret = AB; break; |
155 | case EFDCFS: ret = XB; break; |
156 | case EFDCMPEQ: ret = XCR; break; |
157 | case EFDCMPGT: ret = XCR; break; |
158 | case EFDCMPLT: ret = XCR; break; |
159 | case EFDCTSF: ret = XB; break; |
160 | case EFDCTSI: ret = XB; break; |
161 | case EFDCTSIDZ: ret = XB; break; |
162 | case EFDCTSIZ: ret = XB; break; |
163 | case EFDCTUF: ret = XB; break; |
164 | case EFDCTUI: ret = XB; break; |
165 | case EFDCTUIDZ: ret = XB; break; |
166 | case EFDCTUIZ: ret = XB; break; |
167 | case EFDDIV: ret = AB; break; |
168 | case EFDMUL: ret = AB; break; |
169 | case EFDNABS: ret = XA; break; |
170 | case EFDNEG: ret = XA; break; |
171 | case EFDSUB: ret = AB; break; |
172 | } |
173 | |
174 | return ret; |
175 | } |
176 | |
177 | int do_spe_mathemu(struct pt_regs *regs) |
178 | { |
179 | FP_DECL_EX; |
180 | int IR, cmp; |
181 | |
182 | unsigned long type, func, fc, fa, fb, src, speinsn; |
183 | union dw_union vc, va, vb; |
184 | |
185 | if (get_user(speinsn, (unsigned int __user *) regs->nip)) |
186 | return -EFAULT; |
187 | if ((speinsn >> 26) != EFAPU) |
188 | return -EINVAL; /* not an spe instruction */ |
189 | |
190 | type = insn_type(speinsn); |
191 | if (type == NOTYPE) |
192 | goto illegal; |
193 | |
194 | func = speinsn & 0x7ff; |
195 | fc = (speinsn >> 21) & 0x1f; |
196 | fa = (speinsn >> 16) & 0x1f; |
197 | fb = (speinsn >> 11) & 0x1f; |
198 | src = (speinsn >> 5) & 0x7; |
199 | |
200 | vc.wp[0] = current->thread.evr[fc]; |
201 | vc.wp[1] = regs->gpr[fc]; |
202 | va.wp[0] = current->thread.evr[fa]; |
203 | va.wp[1] = regs->gpr[fa]; |
204 | vb.wp[0] = current->thread.evr[fb]; |
205 | vb.wp[1] = regs->gpr[fb]; |
206 | |
207 | __FPU_FPSCR = mfspr(SPRN_SPEFSCR); |
208 | |
209 | pr_debug("speinsn:%08lx spefscr:%08lx\n" , speinsn, __FPU_FPSCR); |
210 | pr_debug("vc: %08x %08x\n" , vc.wp[0], vc.wp[1]); |
211 | pr_debug("va: %08x %08x\n" , va.wp[0], va.wp[1]); |
212 | pr_debug("vb: %08x %08x\n" , vb.wp[0], vb.wp[1]); |
213 | |
214 | switch (src) { |
215 | case SPFP: { |
216 | FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); |
217 | |
218 | switch (type) { |
219 | case AB: |
220 | case XCR: |
221 | FP_UNPACK_SP(SA, va.wp + 1); |
222 | fallthrough; |
223 | case XB: |
224 | FP_UNPACK_SP(SB, vb.wp + 1); |
225 | break; |
226 | case XA: |
227 | FP_UNPACK_SP(SA, va.wp + 1); |
228 | break; |
229 | } |
230 | |
231 | pr_debug("SA: %d %08x %d (%d)\n" , SA_s, SA_f, SA_e, SA_c); |
232 | pr_debug("SB: %d %08x %d (%d)\n" , SB_s, SB_f, SB_e, SB_c); |
233 | |
234 | switch (func) { |
235 | case EFSABS: |
236 | vc.wp[1] = va.wp[1] & ~SIGN_BIT_S; |
237 | goto update_regs; |
238 | |
239 | case EFSNABS: |
240 | vc.wp[1] = va.wp[1] | SIGN_BIT_S; |
241 | goto update_regs; |
242 | |
243 | case EFSNEG: |
244 | vc.wp[1] = va.wp[1] ^ SIGN_BIT_S; |
245 | goto update_regs; |
246 | |
247 | case EFSADD: |
248 | FP_ADD_S(SR, SA, SB); |
249 | goto pack_s; |
250 | |
251 | case EFSSUB: |
252 | FP_SUB_S(SR, SA, SB); |
253 | goto pack_s; |
254 | |
255 | case EFSMUL: |
256 | FP_MUL_S(SR, SA, SB); |
257 | goto pack_s; |
258 | |
259 | case EFSDIV: |
260 | FP_DIV_S(SR, SA, SB); |
261 | goto pack_s; |
262 | |
263 | case EFSCMPEQ: |
264 | cmp = 0; |
265 | goto cmp_s; |
266 | |
267 | case EFSCMPGT: |
268 | cmp = 1; |
269 | goto cmp_s; |
270 | |
271 | case EFSCMPLT: |
272 | cmp = -1; |
273 | goto cmp_s; |
274 | |
275 | case EFSCTSF: |
276 | case EFSCTUF: |
277 | if (SB_c == FP_CLS_NAN) { |
278 | vc.wp[1] = 0; |
279 | FP_SET_EXCEPTION(FP_EX_INVALID); |
280 | } else { |
281 | SB_e += (func == EFSCTSF ? 31 : 32); |
282 | FP_TO_INT_ROUND_S(vc.wp[1], SB, 32, |
283 | (func == EFSCTSF) ? 1 : 0); |
284 | } |
285 | goto update_regs; |
286 | |
287 | case EFSCFD: { |
288 | FP_DECL_D(DB); |
289 | FP_CLEAR_EXCEPTIONS; |
290 | FP_UNPACK_DP(DB, vb.dp); |
291 | |
292 | pr_debug("DB: %d %08x %08x %d (%d)\n" , |
293 | DB_s, DB_f1, DB_f0, DB_e, DB_c); |
294 | |
295 | FP_CONV(S, D, 1, 2, SR, DB); |
296 | goto pack_s; |
297 | } |
298 | |
299 | case EFSCTSI: |
300 | case EFSCTUI: |
301 | if (SB_c == FP_CLS_NAN) { |
302 | vc.wp[1] = 0; |
303 | FP_SET_EXCEPTION(FP_EX_INVALID); |
304 | } else { |
305 | FP_TO_INT_ROUND_S(vc.wp[1], SB, 32, |
306 | ((func & 0x3) != 0) ? 1 : 0); |
307 | } |
308 | goto update_regs; |
309 | |
310 | case EFSCTSIZ: |
311 | case EFSCTUIZ: |
312 | if (SB_c == FP_CLS_NAN) { |
313 | vc.wp[1] = 0; |
314 | FP_SET_EXCEPTION(FP_EX_INVALID); |
315 | } else { |
316 | FP_TO_INT_S(vc.wp[1], SB, 32, |
317 | ((func & 0x3) != 0) ? 1 : 0); |
318 | } |
319 | goto update_regs; |
320 | |
321 | default: |
322 | goto illegal; |
323 | } |
324 | break; |
325 | |
326 | pack_s: |
327 | pr_debug("SR: %d %08x %d (%d)\n" , SR_s, SR_f, SR_e, SR_c); |
328 | |
329 | FP_PACK_SP(vc.wp + 1, SR); |
330 | goto update_regs; |
331 | |
332 | cmp_s: |
333 | FP_CMP_S(IR, SA, SB, 3); |
334 | if (IR == 3 && (FP_ISSIGNAN_S(SA) || FP_ISSIGNAN_S(SB))) |
335 | FP_SET_EXCEPTION(FP_EX_INVALID); |
336 | if (IR == cmp) { |
337 | IR = 0x4; |
338 | } else { |
339 | IR = 0; |
340 | } |
341 | goto update_ccr; |
342 | } |
343 | |
344 | case DPFP: { |
345 | FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); |
346 | |
347 | switch (type) { |
348 | case AB: |
349 | case XCR: |
350 | FP_UNPACK_DP(DA, va.dp); |
351 | fallthrough; |
352 | case XB: |
353 | FP_UNPACK_DP(DB, vb.dp); |
354 | break; |
355 | case XA: |
356 | FP_UNPACK_DP(DA, va.dp); |
357 | break; |
358 | } |
359 | |
360 | pr_debug("DA: %d %08x %08x %d (%d)\n" , |
361 | DA_s, DA_f1, DA_f0, DA_e, DA_c); |
362 | pr_debug("DB: %d %08x %08x %d (%d)\n" , |
363 | DB_s, DB_f1, DB_f0, DB_e, DB_c); |
364 | |
365 | switch (func) { |
366 | case EFDABS: |
367 | vc.dp[0] = va.dp[0] & ~SIGN_BIT_D; |
368 | goto update_regs; |
369 | |
370 | case EFDNABS: |
371 | vc.dp[0] = va.dp[0] | SIGN_BIT_D; |
372 | goto update_regs; |
373 | |
374 | case EFDNEG: |
375 | vc.dp[0] = va.dp[0] ^ SIGN_BIT_D; |
376 | goto update_regs; |
377 | |
378 | case EFDADD: |
379 | FP_ADD_D(DR, DA, DB); |
380 | goto pack_d; |
381 | |
382 | case EFDSUB: |
383 | FP_SUB_D(DR, DA, DB); |
384 | goto pack_d; |
385 | |
386 | case EFDMUL: |
387 | FP_MUL_D(DR, DA, DB); |
388 | goto pack_d; |
389 | |
390 | case EFDDIV: |
391 | FP_DIV_D(DR, DA, DB); |
392 | goto pack_d; |
393 | |
394 | case EFDCMPEQ: |
395 | cmp = 0; |
396 | goto cmp_d; |
397 | |
398 | case EFDCMPGT: |
399 | cmp = 1; |
400 | goto cmp_d; |
401 | |
402 | case EFDCMPLT: |
403 | cmp = -1; |
404 | goto cmp_d; |
405 | |
406 | case EFDCTSF: |
407 | case EFDCTUF: |
408 | if (DB_c == FP_CLS_NAN) { |
409 | vc.wp[1] = 0; |
410 | FP_SET_EXCEPTION(FP_EX_INVALID); |
411 | } else { |
412 | DB_e += (func == EFDCTSF ? 31 : 32); |
413 | FP_TO_INT_ROUND_D(vc.wp[1], DB, 32, |
414 | (func == EFDCTSF) ? 1 : 0); |
415 | } |
416 | goto update_regs; |
417 | |
418 | case EFDCFS: { |
419 | FP_DECL_S(SB); |
420 | FP_CLEAR_EXCEPTIONS; |
421 | FP_UNPACK_SP(SB, vb.wp + 1); |
422 | |
423 | pr_debug("SB: %d %08x %d (%d)\n" , |
424 | SB_s, SB_f, SB_e, SB_c); |
425 | |
426 | FP_CONV(D, S, 2, 1, DR, SB); |
427 | goto pack_d; |
428 | } |
429 | |
430 | case EFDCTUIDZ: |
431 | case EFDCTSIDZ: |
432 | if (DB_c == FP_CLS_NAN) { |
433 | vc.dp[0] = 0; |
434 | FP_SET_EXCEPTION(FP_EX_INVALID); |
435 | } else { |
436 | FP_TO_INT_D(vc.dp[0], DB, 64, |
437 | ((func & 0x1) == 0) ? 1 : 0); |
438 | } |
439 | goto update_regs; |
440 | |
441 | case EFDCTUI: |
442 | case EFDCTSI: |
443 | if (DB_c == FP_CLS_NAN) { |
444 | vc.wp[1] = 0; |
445 | FP_SET_EXCEPTION(FP_EX_INVALID); |
446 | } else { |
447 | FP_TO_INT_ROUND_D(vc.wp[1], DB, 32, |
448 | ((func & 0x3) != 0) ? 1 : 0); |
449 | } |
450 | goto update_regs; |
451 | |
452 | case EFDCTUIZ: |
453 | case EFDCTSIZ: |
454 | if (DB_c == FP_CLS_NAN) { |
455 | vc.wp[1] = 0; |
456 | FP_SET_EXCEPTION(FP_EX_INVALID); |
457 | } else { |
458 | FP_TO_INT_D(vc.wp[1], DB, 32, |
459 | ((func & 0x3) != 0) ? 1 : 0); |
460 | } |
461 | goto update_regs; |
462 | |
463 | default: |
464 | goto illegal; |
465 | } |
466 | break; |
467 | |
468 | pack_d: |
469 | pr_debug("DR: %d %08x %08x %d (%d)\n" , |
470 | DR_s, DR_f1, DR_f0, DR_e, DR_c); |
471 | |
472 | FP_PACK_DP(vc.dp, DR); |
473 | goto update_regs; |
474 | |
475 | cmp_d: |
476 | FP_CMP_D(IR, DA, DB, 3); |
477 | if (IR == 3 && (FP_ISSIGNAN_D(DA) || FP_ISSIGNAN_D(DB))) |
478 | FP_SET_EXCEPTION(FP_EX_INVALID); |
479 | if (IR == cmp) { |
480 | IR = 0x4; |
481 | } else { |
482 | IR = 0; |
483 | } |
484 | goto update_ccr; |
485 | |
486 | } |
487 | |
488 | case VCT: { |
489 | FP_DECL_S(SA0); FP_DECL_S(SB0); FP_DECL_S(SR0); |
490 | FP_DECL_S(SA1); FP_DECL_S(SB1); FP_DECL_S(SR1); |
491 | int IR0, IR1; |
492 | |
493 | switch (type) { |
494 | case AB: |
495 | case XCR: |
496 | FP_UNPACK_SP(SA0, va.wp); |
497 | FP_UNPACK_SP(SA1, va.wp + 1); |
498 | fallthrough; |
499 | case XB: |
500 | FP_UNPACK_SP(SB0, vb.wp); |
501 | FP_UNPACK_SP(SB1, vb.wp + 1); |
502 | break; |
503 | case XA: |
504 | FP_UNPACK_SP(SA0, va.wp); |
505 | FP_UNPACK_SP(SA1, va.wp + 1); |
506 | break; |
507 | } |
508 | |
509 | pr_debug("SA0: %d %08x %d (%d)\n" , |
510 | SA0_s, SA0_f, SA0_e, SA0_c); |
511 | pr_debug("SA1: %d %08x %d (%d)\n" , |
512 | SA1_s, SA1_f, SA1_e, SA1_c); |
513 | pr_debug("SB0: %d %08x %d (%d)\n" , |
514 | SB0_s, SB0_f, SB0_e, SB0_c); |
515 | pr_debug("SB1: %d %08x %d (%d)\n" , |
516 | SB1_s, SB1_f, SB1_e, SB1_c); |
517 | |
518 | switch (func) { |
519 | case EVFSABS: |
520 | vc.wp[0] = va.wp[0] & ~SIGN_BIT_S; |
521 | vc.wp[1] = va.wp[1] & ~SIGN_BIT_S; |
522 | goto update_regs; |
523 | |
524 | case EVFSNABS: |
525 | vc.wp[0] = va.wp[0] | SIGN_BIT_S; |
526 | vc.wp[1] = va.wp[1] | SIGN_BIT_S; |
527 | goto update_regs; |
528 | |
529 | case EVFSNEG: |
530 | vc.wp[0] = va.wp[0] ^ SIGN_BIT_S; |
531 | vc.wp[1] = va.wp[1] ^ SIGN_BIT_S; |
532 | goto update_regs; |
533 | |
534 | case EVFSADD: |
535 | FP_ADD_S(SR0, SA0, SB0); |
536 | FP_ADD_S(SR1, SA1, SB1); |
537 | goto pack_vs; |
538 | |
539 | case EVFSSUB: |
540 | FP_SUB_S(SR0, SA0, SB0); |
541 | FP_SUB_S(SR1, SA1, SB1); |
542 | goto pack_vs; |
543 | |
544 | case EVFSMUL: |
545 | FP_MUL_S(SR0, SA0, SB0); |
546 | FP_MUL_S(SR1, SA1, SB1); |
547 | goto pack_vs; |
548 | |
549 | case EVFSDIV: |
550 | FP_DIV_S(SR0, SA0, SB0); |
551 | FP_DIV_S(SR1, SA1, SB1); |
552 | goto pack_vs; |
553 | |
554 | case EVFSCMPEQ: |
555 | cmp = 0; |
556 | goto cmp_vs; |
557 | |
558 | case EVFSCMPGT: |
559 | cmp = 1; |
560 | goto cmp_vs; |
561 | |
562 | case EVFSCMPLT: |
563 | cmp = -1; |
564 | goto cmp_vs; |
565 | |
566 | case EVFSCTUF: |
567 | case EVFSCTSF: |
568 | if (SB0_c == FP_CLS_NAN) { |
569 | vc.wp[0] = 0; |
570 | FP_SET_EXCEPTION(FP_EX_INVALID); |
571 | } else { |
572 | SB0_e += (func == EVFSCTSF ? 31 : 32); |
573 | FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32, |
574 | (func == EVFSCTSF) ? 1 : 0); |
575 | } |
576 | if (SB1_c == FP_CLS_NAN) { |
577 | vc.wp[1] = 0; |
578 | FP_SET_EXCEPTION(FP_EX_INVALID); |
579 | } else { |
580 | SB1_e += (func == EVFSCTSF ? 31 : 32); |
581 | FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32, |
582 | (func == EVFSCTSF) ? 1 : 0); |
583 | } |
584 | goto update_regs; |
585 | |
586 | case EVFSCTUI: |
587 | case EVFSCTSI: |
588 | if (SB0_c == FP_CLS_NAN) { |
589 | vc.wp[0] = 0; |
590 | FP_SET_EXCEPTION(FP_EX_INVALID); |
591 | } else { |
592 | FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32, |
593 | ((func & 0x3) != 0) ? 1 : 0); |
594 | } |
595 | if (SB1_c == FP_CLS_NAN) { |
596 | vc.wp[1] = 0; |
597 | FP_SET_EXCEPTION(FP_EX_INVALID); |
598 | } else { |
599 | FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32, |
600 | ((func & 0x3) != 0) ? 1 : 0); |
601 | } |
602 | goto update_regs; |
603 | |
604 | case EVFSCTUIZ: |
605 | case EVFSCTSIZ: |
606 | if (SB0_c == FP_CLS_NAN) { |
607 | vc.wp[0] = 0; |
608 | FP_SET_EXCEPTION(FP_EX_INVALID); |
609 | } else { |
610 | FP_TO_INT_S(vc.wp[0], SB0, 32, |
611 | ((func & 0x3) != 0) ? 1 : 0); |
612 | } |
613 | if (SB1_c == FP_CLS_NAN) { |
614 | vc.wp[1] = 0; |
615 | FP_SET_EXCEPTION(FP_EX_INVALID); |
616 | } else { |
617 | FP_TO_INT_S(vc.wp[1], SB1, 32, |
618 | ((func & 0x3) != 0) ? 1 : 0); |
619 | } |
620 | goto update_regs; |
621 | |
622 | default: |
623 | goto illegal; |
624 | } |
625 | break; |
626 | |
627 | pack_vs: |
628 | pr_debug("SR0: %d %08x %d (%d)\n" , |
629 | SR0_s, SR0_f, SR0_e, SR0_c); |
630 | pr_debug("SR1: %d %08x %d (%d)\n" , |
631 | SR1_s, SR1_f, SR1_e, SR1_c); |
632 | |
633 | FP_PACK_SP(vc.wp, SR0); |
634 | FP_PACK_SP(vc.wp + 1, SR1); |
635 | goto update_regs; |
636 | |
637 | cmp_vs: |
638 | { |
639 | int ch, cl; |
640 | |
641 | FP_CMP_S(IR0, SA0, SB0, 3); |
642 | FP_CMP_S(IR1, SA1, SB1, 3); |
643 | if (IR0 == 3 && (FP_ISSIGNAN_S(SA0) || FP_ISSIGNAN_S(SB0))) |
644 | FP_SET_EXCEPTION(FP_EX_INVALID); |
645 | if (IR1 == 3 && (FP_ISSIGNAN_S(SA1) || FP_ISSIGNAN_S(SB1))) |
646 | FP_SET_EXCEPTION(FP_EX_INVALID); |
647 | ch = (IR0 == cmp) ? 1 : 0; |
648 | cl = (IR1 == cmp) ? 1 : 0; |
649 | IR = (ch << 3) | (cl << 2) | ((ch | cl) << 1) | |
650 | ((ch & cl) << 0); |
651 | goto update_ccr; |
652 | } |
653 | } |
654 | default: |
655 | return -EINVAL; |
656 | } |
657 | |
658 | update_ccr: |
659 | regs->ccr &= ~(15 << ((7 - ((speinsn >> 23) & 0x7)) << 2)); |
660 | regs->ccr |= (IR << ((7 - ((speinsn >> 23) & 0x7)) << 2)); |
661 | |
662 | update_regs: |
663 | /* |
664 | * If the "invalid" exception sticky bit was set by the |
665 | * processor for non-finite input, but was not set before the |
666 | * instruction being emulated, clear it. Likewise for the |
667 | * "underflow" bit, which may have been set by the processor |
668 | * for exact underflow, not just inexact underflow when the |
669 | * flag should be set for IEEE 754 semantics. Other sticky |
670 | * exceptions will only be set by the processor when they are |
671 | * correct according to IEEE 754 semantics, and we must not |
672 | * clear sticky bits that were already set before the emulated |
673 | * instruction as they represent the user-visible sticky |
674 | * exception status. "inexact" traps to kernel are not |
675 | * required for IEEE semantics and are not enabled by default, |
676 | * so the "inexact" sticky bit may have been set by a previous |
677 | * instruction without the kernel being aware of it. |
678 | */ |
679 | __FPU_FPSCR |
680 | &= ~(FP_EX_INVALID | FP_EX_UNDERFLOW) | current->thread.spefscr_last; |
681 | __FPU_FPSCR |= (FP_CUR_EXCEPTIONS & FP_EX_MASK); |
682 | mtspr(SPRN_SPEFSCR, __FPU_FPSCR); |
683 | current->thread.spefscr_last = __FPU_FPSCR; |
684 | |
685 | current->thread.evr[fc] = vc.wp[0]; |
686 | regs->gpr[fc] = vc.wp[1]; |
687 | |
688 | pr_debug("ccr = %08lx\n" , regs->ccr); |
689 | pr_debug("cur exceptions = %08x spefscr = %08lx\n" , |
690 | FP_CUR_EXCEPTIONS, __FPU_FPSCR); |
691 | pr_debug("vc: %08x %08x\n" , vc.wp[0], vc.wp[1]); |
692 | pr_debug("va: %08x %08x\n" , va.wp[0], va.wp[1]); |
693 | pr_debug("vb: %08x %08x\n" , vb.wp[0], vb.wp[1]); |
694 | |
695 | if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) { |
696 | if ((FP_CUR_EXCEPTIONS & FP_EX_DIVZERO) |
697 | && (current->thread.fpexc_mode & PR_FP_EXC_DIV)) |
698 | return 1; |
699 | if ((FP_CUR_EXCEPTIONS & FP_EX_OVERFLOW) |
700 | && (current->thread.fpexc_mode & PR_FP_EXC_OVF)) |
701 | return 1; |
702 | if ((FP_CUR_EXCEPTIONS & FP_EX_UNDERFLOW) |
703 | && (current->thread.fpexc_mode & PR_FP_EXC_UND)) |
704 | return 1; |
705 | if ((FP_CUR_EXCEPTIONS & FP_EX_INEXACT) |
706 | && (current->thread.fpexc_mode & PR_FP_EXC_RES)) |
707 | return 1; |
708 | if ((FP_CUR_EXCEPTIONS & FP_EX_INVALID) |
709 | && (current->thread.fpexc_mode & PR_FP_EXC_INV)) |
710 | return 1; |
711 | } |
712 | return 0; |
713 | |
714 | illegal: |
715 | if (have_e500_cpu_a005_erratum) { |
716 | /* according to e500 cpu a005 erratum, reissue efp inst */ |
717 | regs_add_return_ip(regs, -4); |
718 | pr_debug("re-issue efp inst: %08lx\n" , speinsn); |
719 | return 0; |
720 | } |
721 | |
722 | printk(KERN_ERR "\nOoops! IEEE-754 compliance handler encountered un-supported instruction.\ninst code: %08lx\n" , speinsn); |
723 | return -ENOSYS; |
724 | } |
725 | |
726 | int speround_handler(struct pt_regs *regs) |
727 | { |
728 | union dw_union fgpr; |
729 | int s_lo, s_hi; |
730 | int lo_inexact, hi_inexact; |
731 | int fp_result; |
732 | unsigned long speinsn, type, fb, fc, fptype, func; |
733 | |
734 | if (get_user(speinsn, (unsigned int __user *) regs->nip)) |
735 | return -EFAULT; |
736 | if ((speinsn >> 26) != 4) |
737 | return -EINVAL; /* not an spe instruction */ |
738 | |
739 | func = speinsn & 0x7ff; |
740 | type = insn_type(speinsn: func); |
741 | if (type == XCR) return -ENOSYS; |
742 | |
743 | __FPU_FPSCR = mfspr(SPRN_SPEFSCR); |
744 | pr_debug("speinsn:%08lx spefscr:%08lx\n" , speinsn, __FPU_FPSCR); |
745 | |
746 | fptype = (speinsn >> 5) & 0x7; |
747 | |
748 | /* No need to round if the result is exact */ |
749 | lo_inexact = __FPU_FPSCR & (SPEFSCR_FG | SPEFSCR_FX); |
750 | hi_inexact = __FPU_FPSCR & (SPEFSCR_FGH | SPEFSCR_FXH); |
751 | if (!(lo_inexact || (hi_inexact && fptype == VCT))) |
752 | return 0; |
753 | |
754 | fc = (speinsn >> 21) & 0x1f; |
755 | s_lo = regs->gpr[fc] & SIGN_BIT_S; |
756 | s_hi = current->thread.evr[fc] & SIGN_BIT_S; |
757 | fgpr.wp[0] = current->thread.evr[fc]; |
758 | fgpr.wp[1] = regs->gpr[fc]; |
759 | |
760 | fb = (speinsn >> 11) & 0x1f; |
761 | switch (func) { |
762 | case EFSCTUIZ: |
763 | case EFSCTSIZ: |
764 | case EVFSCTUIZ: |
765 | case EVFSCTSIZ: |
766 | case EFDCTUIDZ: |
767 | case EFDCTSIDZ: |
768 | case EFDCTUIZ: |
769 | case EFDCTSIZ: |
770 | /* |
771 | * These instructions always round to zero, |
772 | * independent of the rounding mode. |
773 | */ |
774 | return 0; |
775 | |
776 | case EFSCTUI: |
777 | case EFSCTUF: |
778 | case EVFSCTUI: |
779 | case EVFSCTUF: |
780 | case EFDCTUI: |
781 | case EFDCTUF: |
782 | fp_result = 0; |
783 | s_lo = 0; |
784 | s_hi = 0; |
785 | break; |
786 | |
787 | case EFSCTSI: |
788 | case EFSCTSF: |
789 | fp_result = 0; |
790 | /* Recover the sign of a zero result if possible. */ |
791 | if (fgpr.wp[1] == 0) |
792 | s_lo = regs->gpr[fb] & SIGN_BIT_S; |
793 | break; |
794 | |
795 | case EVFSCTSI: |
796 | case EVFSCTSF: |
797 | fp_result = 0; |
798 | /* Recover the sign of a zero result if possible. */ |
799 | if (fgpr.wp[1] == 0) |
800 | s_lo = regs->gpr[fb] & SIGN_BIT_S; |
801 | if (fgpr.wp[0] == 0) |
802 | s_hi = current->thread.evr[fb] & SIGN_BIT_S; |
803 | break; |
804 | |
805 | case EFDCTSI: |
806 | case EFDCTSF: |
807 | fp_result = 0; |
808 | s_hi = s_lo; |
809 | /* Recover the sign of a zero result if possible. */ |
810 | if (fgpr.wp[1] == 0) |
811 | s_hi = current->thread.evr[fb] & SIGN_BIT_S; |
812 | break; |
813 | |
814 | default: |
815 | fp_result = 1; |
816 | break; |
817 | } |
818 | |
819 | pr_debug("round fgpr: %08x %08x\n" , fgpr.wp[0], fgpr.wp[1]); |
820 | |
821 | switch (fptype) { |
822 | /* Since SPE instructions on E500 core can handle round to nearest |
823 | * and round toward zero with IEEE-754 complied, we just need |
824 | * to handle round toward +Inf and round toward -Inf by software. |
825 | */ |
826 | case SPFP: |
827 | if ((FP_ROUNDMODE) == FP_RND_PINF) { |
828 | if (!s_lo) fgpr.wp[1]++; /* Z > 0, choose Z1 */ |
829 | } else { /* round to -Inf */ |
830 | if (s_lo) { |
831 | if (fp_result) |
832 | fgpr.wp[1]++; /* Z < 0, choose Z2 */ |
833 | else |
834 | fgpr.wp[1]--; /* Z < 0, choose Z2 */ |
835 | } |
836 | } |
837 | break; |
838 | |
839 | case DPFP: |
840 | if (FP_ROUNDMODE == FP_RND_PINF) { |
841 | if (!s_hi) { |
842 | if (fp_result) |
843 | fgpr.dp[0]++; /* Z > 0, choose Z1 */ |
844 | else |
845 | fgpr.wp[1]++; /* Z > 0, choose Z1 */ |
846 | } |
847 | } else { /* round to -Inf */ |
848 | if (s_hi) { |
849 | if (fp_result) |
850 | fgpr.dp[0]++; /* Z < 0, choose Z2 */ |
851 | else |
852 | fgpr.wp[1]--; /* Z < 0, choose Z2 */ |
853 | } |
854 | } |
855 | break; |
856 | |
857 | case VCT: |
858 | if (FP_ROUNDMODE == FP_RND_PINF) { |
859 | if (lo_inexact && !s_lo) |
860 | fgpr.wp[1]++; /* Z_low > 0, choose Z1 */ |
861 | if (hi_inexact && !s_hi) |
862 | fgpr.wp[0]++; /* Z_high word > 0, choose Z1 */ |
863 | } else { /* round to -Inf */ |
864 | if (lo_inexact && s_lo) { |
865 | if (fp_result) |
866 | fgpr.wp[1]++; /* Z_low < 0, choose Z2 */ |
867 | else |
868 | fgpr.wp[1]--; /* Z_low < 0, choose Z2 */ |
869 | } |
870 | if (hi_inexact && s_hi) { |
871 | if (fp_result) |
872 | fgpr.wp[0]++; /* Z_high < 0, choose Z2 */ |
873 | else |
874 | fgpr.wp[0]--; /* Z_high < 0, choose Z2 */ |
875 | } |
876 | } |
877 | break; |
878 | |
879 | default: |
880 | return -EINVAL; |
881 | } |
882 | |
883 | current->thread.evr[fc] = fgpr.wp[0]; |
884 | regs->gpr[fc] = fgpr.wp[1]; |
885 | |
886 | pr_debug(" to fgpr: %08x %08x\n" , fgpr.wp[0], fgpr.wp[1]); |
887 | |
888 | if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) |
889 | return (current->thread.fpexc_mode & PR_FP_EXC_RES) ? 1 : 0; |
890 | return 0; |
891 | } |
892 | |
893 | static int __init spe_mathemu_init(void) |
894 | { |
895 | u32 pvr, maj, min; |
896 | |
897 | pvr = mfspr(SPRN_PVR); |
898 | |
899 | if ((PVR_VER(pvr) == PVR_VER_E500V1) || |
900 | (PVR_VER(pvr) == PVR_VER_E500V2)) { |
901 | maj = PVR_MAJ(pvr); |
902 | min = PVR_MIN(pvr); |
903 | |
904 | /* |
905 | * E500 revision below 1.1, 2.3, 3.1, 4.1, 5.1 |
906 | * need cpu a005 errata workaround |
907 | */ |
908 | switch (maj) { |
909 | case 1: |
910 | if (min < 1) |
911 | have_e500_cpu_a005_erratum = 1; |
912 | break; |
913 | case 2: |
914 | if (min < 3) |
915 | have_e500_cpu_a005_erratum = 1; |
916 | break; |
917 | case 3: |
918 | case 4: |
919 | case 5: |
920 | if (min < 1) |
921 | have_e500_cpu_a005_erratum = 1; |
922 | break; |
923 | default: |
924 | break; |
925 | } |
926 | } |
927 | |
928 | return 0; |
929 | } |
930 | |
931 | module_init(spe_mathemu_init); |
932 | |