vfpdouble.c source code [linux/arch/arm/vfp/vfpdouble.c]

1	/*
2	* linux/arch/arm/vfp/vfpdouble.c
3	*
4	* This code is derived in part from John R. Housers softfloat library, which
5	* carries the following notice:
6	*
7	* ===========================================================================
8	* This C source file is part of the SoftFloat IEC/IEEE Floating-point
9	* Arithmetic Package, Release 2.
10	*
11	* Written by John R. Hauser. This work was made possible in part by the
12	* International Computer Science Institute, located at Suite 600, 1947 Center
13	* Street, Berkeley, California 94704. Funding was partially provided by the
14	* National Science Foundation under grant MIP-9311980. The original version
15	* of this code was written as part of a project to build a fixed-point vector
16	* processor in collaboration with the University of California at Berkeley,
17	* overseen by Profs. Nelson Morgan and John Wawrzynek. More information
18	* is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
19	* arithmetic/softfloat.html'.
20	*
21	* THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
22	* has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
23	* TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
24	* PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
25	* AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
26	*
27	* Derivative works are acceptable, even for commercial purposes, so long as
28	* (1) they include prominent notice that the work is derivative, and (2) they
29	* include prominent notice akin to these three paragraphs for those parts of
30	* this code that are retained.
31	* ===========================================================================
32	*/
33	#include <linux/kernel.h>
34	#include <linux/bitops.h>
35
36	#include <asm/div64.h>
37	#include <asm/vfp.h>
38
39	#include "vfpinstr.h"
40	#include "vfp.h"
41
42	static struct vfp_double vfp_double_default_qnan = {
43	.exponent = `2047`,
44	.sign = `0`,
45	.significand = VFP_DOUBLE_SIGNIFICAND_QNAN,
46	};
47
48	static void vfp_double_dump(const char str, struct* vfp_double *d)
49	{
50	pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n",
51	str, d->sign != `0`, d->exponent, d->significand);
52	}
53
54	static void vfp_double_normalise_denormal(struct vfp_double *vd)
55	{
56	int bits = `31` - fls(x: vd->significand >> `32`);
57	if (bits == `31`)
58	bits = `63` - fls(x: vd->significand);
59
60	vfp_double_dump(str: "normalise_denormal: in", d: vd);
61
62	if (bits) {
63	vd->exponent -= bits - `1`;
64	vd->significand <<= bits;
65	}
66
67	vfp_double_dump(str: "normalise_denormal: out", d: vd);
68	}
69
70	u32 vfp_double_normaliseround(int dd, struct vfp_double vd, u32 fpscr, u32 exceptions, const* char *func)
71	{
72	u64 significand, incr;
73	int exponent, shift, underflow;
74	u32 rmode;
75
76	vfp_double_dump(str: "pack: in", d: vd);
77
78	/*
79	* Infinities and NaNs are a special case.
80	*/
81	if (vd->exponent == `2047` && (vd->significand == `0` \|\| exceptions))
82	goto pack;
83
84	/*
85	* Special-case zero.
86	*/
87	if (vd->significand == `0`) {
88	vd->exponent = `0`;
89	goto pack;
90	}
91
92	exponent = vd->exponent;
93	significand = vd->significand;
94
95	shift = `32` - fls(x: significand >> `32`);
96	if (shift == `32`)
97	shift = `64` - fls(x: significand);
98	if (shift) {
99	exponent -= shift;
100	significand <<= shift;
101	}
102
103	#ifdef DEBUG
104	vd->exponent = exponent;
105	vd->significand = significand;
106	vfp_double_dump("pack: normalised", vd);
107	#endif
108
109	/*
110	* Tiny number?
111	*/
112	underflow = exponent < `0`;
113	if (underflow) {
114	significand = vfp_shiftright64jamming(val: significand, shift: -exponent);
115	exponent = `0`;
116	#ifdef DEBUG
117	vd->exponent = exponent;
118	vd->significand = significand;
119	vfp_double_dump("pack: tiny number", vd);
120	#endif
121	if (!(significand & ((`1ULL` << (VFP_DOUBLE_LOW_BITS + `1`)) - `1`)))
122	underflow = `0`;
123	}
124
125	/*
126	* Select rounding increment.
127	*/
128	incr = `0`;
129	rmode = fpscr & FPSCR_RMODE_MASK;
130
131	if (rmode == FPSCR_ROUND_NEAREST) {
132	incr = `1ULL` << VFP_DOUBLE_LOW_BITS;
133	if ((significand & (`1ULL` << (VFP_DOUBLE_LOW_BITS + `1`))) == `0`)
134	incr -= `1`;
135	} else if (rmode == FPSCR_ROUND_TOZERO) {
136	incr = `0`;
137	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != `0`))
138	incr = (`1ULL` << (VFP_DOUBLE_LOW_BITS + `1`)) - `1`;
139
140	pr_debug("VFP: rounding increment = 0x%08llx\n", incr);
141
142	/*
143	* Is our rounding going to overflow?
144	*/
145	if ((significand + incr) < significand) {
146	exponent += `1`;
147	significand = (significand >> `1`) \| (significand & `1`);
148	incr >>= `1`;
149	#ifdef DEBUG
150	vd->exponent = exponent;
151	vd->significand = significand;
152	vfp_double_dump("pack: overflow", vd);
153	#endif
154	}
155
156	/*
157	* If any of the low bits (which will be shifted out of the
158	* number) are non-zero, the result is inexact.
159	*/
160	if (significand & ((`1` << (VFP_DOUBLE_LOW_BITS + `1`)) - `1`))
161	exceptions \|= FPSCR_IXC;
162
163	/*
164	* Do our rounding.
165	*/
166	significand += incr;
167
168	/*
169	* Infinity?
170	*/
171	if (exponent >= `2046`) {
172	exceptions \|= FPSCR_OFC \| FPSCR_IXC;
173	if (incr == `0`) {
174	vd->exponent = `2045`;
175	vd->significand = `0x7fffffffffffffffULL`;
176	} else {
177	vd->exponent = `2047`; / infinity /
178	vd->significand = `0`;
179	}
180	} else {
181	if (significand >> (VFP_DOUBLE_LOW_BITS + `1`) == `0`)
182	exponent = `0`;
183	if (exponent \|\| significand > `0x8000000000000000ULL`)
184	underflow = `0`;
185	if (underflow)
186	exceptions \|= FPSCR_UFC;
187	vd->exponent = exponent;
188	vd->significand = significand >> `1`;
189	}
190
191	pack:
192	vfp_double_dump(str: "pack: final", d: vd);
193	{
194	s64 d = vfp_double_pack(s: vd);
195	pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func,
196	dd, d, exceptions);
197	vfp_put_double(val: d, reg: dd);
198	}
199	return exceptions;
200	}
201
202	/*
203	* Propagate the NaN, setting exceptions if it is signalling.
204	* 'n' is always a NaN. 'm' may be a number, NaN or infinity.
205	*/
206	static u32
207	vfp_propagate_nan(struct vfp_double vdd, struct* vfp_double *vdn,
208	struct vfp_double *vdm, u32 fpscr)
209	{
210	struct vfp_double *nan;
211	int tn, tm = `0`;
212
213	tn = vfp_double_type(s: vdn);
214
215	if (vdm)
216	tm = vfp_double_type(s: vdm);
217
218	if (fpscr & FPSCR_DEFAULT_NAN)
219	/*
220	* Default NaN mode - always returns a quiet NaN
221	*/
222	nan = &vfp_double_default_qnan;
223	else {
224	/*
225	* Contemporary mode - select the first signalling
226	* NAN, or if neither are signalling, the first
227	* quiet NAN.
228	*/
229	if (tn == VFP_SNAN \|\| (tm != VFP_SNAN && tn == VFP_QNAN))
230	nan = vdn;
231	else
232	nan = vdm;
233	/*
234	* Make the NaN quiet.
235	*/
236	nan->significand \|= VFP_DOUBLE_SIGNIFICAND_QNAN;
237	}
238
239	vdd = nan;
240
241	/*
242	* If one was a signalling NAN, raise invalid operation.
243	*/
244	return tn == VFP_SNAN \|\| tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
245	}
246
247	/*
248	* Extended operations
249	*/
250	static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr)
251	{
252	vfp_put_double(vfp_double_packed_abs(vfp_get_double(dm)), reg: dd);
253	return `0`;
254	}
255
256	static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr)
257	{
258	vfp_put_double(val: vfp_get_double(reg: dm), reg: dd);
259	return `0`;
260	}
261
262	static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr)
263	{
264	vfp_put_double(vfp_double_packed_negate(vfp_get_double(dm)), reg: dd);
265	return `0`;
266	}
267
268	static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr)
269	{
270	struct vfp_double vdm, vdd;
271	int ret, tm;
272
273	vfp_double_unpack(s: &vdm, val: vfp_get_double(reg: dm));
274	tm = vfp_double_type(s: &vdm);
275	if (tm & (VFP_NAN\|VFP_INFINITY)) {
276	struct vfp_double *vdp = &vdd;
277
278	if (tm & VFP_NAN)
279	ret = vfp_propagate_nan(vdd: vdp, vdn: &vdm, NULL, fpscr);
280	else if (vdm.sign == `0`) {
281	sqrt_copy:
282	vdp = &vdm;
283	ret = `0`;
284	} else {
285	sqrt_invalid:
286	vdp = &vfp_double_default_qnan;
287	ret = FPSCR_IOC;
288	}
289	vfp_put_double(val: vfp_double_pack(s: vdp), reg: dd);
290	return ret;
291	}
292
293	/*
294	* sqrt(+/- 0) == +/- 0
295	*/
296	if (tm & VFP_ZERO)
297	goto sqrt_copy;
298
299	/*
300	* Normalise a denormalised number
301	*/
302	if (tm & VFP_DENORMAL)
303	vfp_double_normalise_denormal(vd: &vdm);
304
305	/*
306	* sqrt(<0) = invalid
307	*/
308	if (vdm.sign)
309	goto sqrt_invalid;
310
311	vfp_double_dump(str: "sqrt", d: &vdm);
312
313	/*
314	* Estimate the square root.
315	*/
316	vdd.sign = `0`;
317	vdd.exponent = ((vdm.exponent - `1023`) >> `1`) + `1023`;
318	vdd.significand = (u64)vfp_estimate_sqrt_significand(exponent: vdm.exponent, significand: vdm.significand >> `32`) << `31`;
319
320	vfp_double_dump(str: "sqrt estimate1", d: &vdd);
321
322	vdm.significand >>= `1` + (vdm.exponent & `1`);
323	vdd.significand += `2` + vfp_estimate_div128to64(nh: vdm.significand, nl: `0`, m: vdd.significand);
324
325	vfp_double_dump(str: "sqrt estimate2", d: &vdd);
326
327	/*
328	* And now adjust.
329	*/
330	if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= `5`) {
331	if (vdd.significand < `2`) {
332	vdd.significand = ~`0ULL`;
333	} else {
334	u64 termh, terml, remh, reml;
335	vdm.significand <<= `2`;
336	mul64to128(resh: &termh, resl: &terml, n: vdd.significand, m: vdd.significand);
337	sub128(resh: &remh, resl: &reml, nh: vdm.significand, nl: `0`, mh: termh, ml: terml);
338	while ((s64)remh < `0`) {
339	vdd.significand -= `1`;
340	shift64left(resh: &termh, resl: &terml, n: vdd.significand);
341	terml \|= `1`;
342	add128(resh: &remh, resl: &reml, nh: remh, nl: reml, mh: termh, ml: terml);
343	}
344	vdd.significand \|= (remh \| reml) != `0`;
345	}
346	}
347	vdd.significand = vfp_shiftright64jamming(val: vdd.significand, shift: `1`);
348
349	return vfp_double_normaliseround(dd, vd: &vdd, fpscr, exceptions: `0`, func: "fsqrt");
350	}
351
352	/*
353	* Equal := ZC
354	* Less than := N
355	* Greater than := C
356	* Unordered := CV
357	*/
358	static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr)
359	{
360	s64 d, m;
361	u32 ret = `0`;
362
363	m = vfp_get_double(reg: dm);
364	if (vfp_double_packed_exponent(m) == `2047` && vfp_double_packed_mantissa(m)) {
365	ret \|= FPSCR_C \| FPSCR_V;
366	if (signal_on_qnan \|\| !(vfp_double_packed_mantissa(m) & (`1ULL` << (VFP_DOUBLE_MANTISSA_BITS - `1`))))
367	/*
368	* Signalling NaN, or signalling on quiet NaN
369	*/
370	ret \|= FPSCR_IOC;
371	}
372
373	d = vfp_get_double(reg: dd);
374	if (vfp_double_packed_exponent(d) == `2047` && vfp_double_packed_mantissa(d)) {
375	ret \|= FPSCR_C \| FPSCR_V;
376	if (signal_on_qnan \|\| !(vfp_double_packed_mantissa(d) & (`1ULL` << (VFP_DOUBLE_MANTISSA_BITS - `1`))))
377	/*
378	* Signalling NaN, or signalling on quiet NaN
379	*/
380	ret \|= FPSCR_IOC;
381	}
382
383	if (ret == `0`) {
384	if (d == m \|\| vfp_double_packed_abs(d \| m) == `0`) {
385	/*
386	* equal
387	*/
388	ret \|= FPSCR_Z \| FPSCR_C;
389	} else if (vfp_double_packed_sign(d ^ m)) {
390	/*
391	* different signs
392	*/
393	if (vfp_double_packed_sign(d))
394	/*
395	* d is negative, so d < m
396	*/
397	ret \|= FPSCR_N;
398	else
399	/*
400	* d is positive, so d > m
401	*/
402	ret \|= FPSCR_C;
403	} else if ((vfp_double_packed_sign(d) != `0`) ^ (d < m)) {
404	/*
405	* d < m
406	*/
407	ret \|= FPSCR_N;
408	} else if ((vfp_double_packed_sign(d) != `0`) ^ (d > m)) {
409	/*
410	* d > m
411	*/
412	ret \|= FPSCR_C;
413	}
414	}
415
416	return ret;
417	}
418
419	static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr)
420	{
421	return vfp_compare(dd, signal_on_qnan: `0`, dm, fpscr);
422	}
423
424	static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr)
425	{
426	return vfp_compare(dd, signal_on_qnan: `1`, dm, fpscr);
427	}
428
429	static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr)
430	{
431	return vfp_compare(dd, signal_on_qnan: `0`, VFP_REG_ZERO, fpscr);
432	}
433
434	static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr)
435	{
436	return vfp_compare(dd, signal_on_qnan: `1`, VFP_REG_ZERO, fpscr);
437	}
438
439	static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr)
440	{
441	struct vfp_double vdm;
442	struct vfp_single vsd;
443	int tm;
444	u32 exceptions = `0`;
445
446	vfp_double_unpack(s: &vdm, val: vfp_get_double(reg: dm));
447
448	tm = vfp_double_type(s: &vdm);
449
450	/*
451	* If we have a signalling NaN, signal invalid operation.
452	*/
453	if (tm == VFP_SNAN)
454	exceptions = FPSCR_IOC;
455
456	if (tm & VFP_DENORMAL)
457	vfp_double_normalise_denormal(vd: &vdm);
458
459	vsd.sign = vdm.sign;
460	vsd.significand = vfp_hi64to32jamming(val: vdm.significand);
461
462	/*
463	* If we have an infinity or a NaN, the exponent must be 255
464	*/
465	if (tm & (VFP_INFINITY\|VFP_NAN)) {
466	vsd.exponent = `255`;
467	if (tm == VFP_QNAN)
468	vsd.significand \|= VFP_SINGLE_SIGNIFICAND_QNAN;
469	goto pack_nan;
470	} else if (tm & VFP_ZERO)
471	vsd.exponent = `0`;
472	else
473	vsd.exponent = vdm.exponent - (`1023` - `127`);
474
475	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts");
476
477	pack_nan:
478	vfp_put_float(val: vfp_single_pack(s: &vsd), reg: sd);
479	return exceptions;
480	}
481
482	static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr)
483	{
484	struct vfp_double vdm;
485	u32 m = vfp_get_float(reg: dm);
486
487	vdm.sign = `0`;
488	vdm.exponent = `1023` + `63` - `1`;
489	vdm.significand = (u64)m;
490
491	return vfp_double_normaliseround(dd, vd: &vdm, fpscr, exceptions: `0`, func: "fuito");
492	}
493
494	static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr)
495	{
496	struct vfp_double vdm;
497	u32 m = vfp_get_float(reg: dm);
498
499	vdm.sign = (m & `0x80000000`) >> `16`;
500	vdm.exponent = `1023` + `63` - `1`;
501	vdm.significand = vdm.sign ? -m : m;
502
503	return vfp_double_normaliseround(dd, vd: &vdm, fpscr, exceptions: `0`, func: "fsito");
504	}
505
506	static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr)
507	{
508	struct vfp_double vdm;
509	u32 d, exceptions = `0`;
510	int rmode = fpscr & FPSCR_RMODE_MASK;
511	int tm;
512
513	vfp_double_unpack(s: &vdm, val: vfp_get_double(reg: dm));
514
515	/*
516	* Do we have a denormalised number?
517	*/
518	tm = vfp_double_type(s: &vdm);
519	if (tm & VFP_DENORMAL)
520	exceptions \|= FPSCR_IDC;
521
522	if (tm & VFP_NAN)
523	vdm.sign = `0`;
524
525	if (vdm.exponent >= `1023` + `32`) {
526	d = vdm.sign ? `0` : `0xffffffff`;
527	exceptions = FPSCR_IOC;
528	} else if (vdm.exponent >= `1023` - `1`) {
529	int shift = `1023` + `63` - vdm.exponent;
530	u64 rem, incr = `0`;
531
532	/*
533	* 2^0 <= m < 2^32-2^8
534	*/
535	d = (vdm.significand << `1`) >> shift;
536	rem = vdm.significand << (`65` - shift);
537
538	if (rmode == FPSCR_ROUND_NEAREST) {
539	incr = `0x8000000000000000ULL`;
540	if ((d & `1`) == `0`)
541	incr -= `1`;
542	} else if (rmode == FPSCR_ROUND_TOZERO) {
543	incr = `0`;
544	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != `0`)) {
545	incr = ~`0ULL`;
546	}
547
548	if ((rem + incr) < rem) {
549	if (d < `0xffffffff`)
550	d += `1`;
551	else
552	exceptions \|= FPSCR_IOC;
553	}
554
555	if (d && vdm.sign) {
556	d = `0`;
557	exceptions \|= FPSCR_IOC;
558	} else if (rem)
559	exceptions \|= FPSCR_IXC;
560	} else {
561	d = `0`;
562	if (vdm.exponent \| vdm.significand) {
563	exceptions \|= FPSCR_IXC;
564	if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == `0`)
565	d = `1`;
566	else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) {
567	d = `0`;
568	exceptions \|= FPSCR_IOC;
569	}
570	}
571	}
572
573	pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
574
575	vfp_put_float(val: d, reg: sd);
576
577	return exceptions;
578	}
579
580	static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr)
581	{
582	return vfp_double_ftoui(sd, unused, dm, fpscr: FPSCR_ROUND_TOZERO);
583	}
584
585	static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr)
586	{
587	struct vfp_double vdm;
588	u32 d, exceptions = `0`;
589	int rmode = fpscr & FPSCR_RMODE_MASK;
590	int tm;
591
592	vfp_double_unpack(s: &vdm, val: vfp_get_double(reg: dm));
593	vfp_double_dump(str: "VDM", d: &vdm);
594
595	/*
596	* Do we have denormalised number?
597	*/
598	tm = vfp_double_type(s: &vdm);
599	if (tm & VFP_DENORMAL)
600	exceptions \|= FPSCR_IDC;
601
602	if (tm & VFP_NAN) {
603	d = `0`;
604	exceptions \|= FPSCR_IOC;
605	} else if (vdm.exponent >= `1023` + `32`) {
606	d = `0x7fffffff`;
607	if (vdm.sign)
608	d = ~d;
609	exceptions \|= FPSCR_IOC;
610	} else if (vdm.exponent >= `1023` - `1`) {
611	int shift = `1023` + `63` - vdm.exponent; / 58 /
612	u64 rem, incr = `0`;
613
614	d = (vdm.significand << `1`) >> shift;
615	rem = vdm.significand << (`65` - shift);
616
617	if (rmode == FPSCR_ROUND_NEAREST) {
618	incr = `0x8000000000000000ULL`;
619	if ((d & `1`) == `0`)
620	incr -= `1`;
621	} else if (rmode == FPSCR_ROUND_TOZERO) {
622	incr = `0`;
623	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != `0`)) {
624	incr = ~`0ULL`;
625	}
626
627	if ((rem + incr) < rem && d < `0xffffffff`)
628	d += `1`;
629	if (d > `0x7fffffff` + (vdm.sign != `0`)) {
630	d = `0x7fffffff` + (vdm.sign != `0`);
631	exceptions \|= FPSCR_IOC;
632	} else if (rem)
633	exceptions \|= FPSCR_IXC;
634
635	if (vdm.sign)
636	d = -d;
637	} else {
638	d = `0`;
639	if (vdm.exponent \| vdm.significand) {
640	exceptions \|= FPSCR_IXC;
641	if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == `0`)
642	d = `1`;
643	else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign)
644	d = -`1`;
645	}
646	}
647
648	pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
649
650	vfp_put_float(val: (s32)d, reg: sd);
651
652	return exceptions;
653	}
654
655	static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr)
656	{
657	return vfp_double_ftosi(sd: dd, unused, dm, fpscr: FPSCR_ROUND_TOZERO);
658	}
659
660
661	static struct op fops_ext[`32`] = {
662	[FEXT_TO_IDX(FEXT_FCPY)] = { vfp_double_fcpy, `0` },
663	[FEXT_TO_IDX(FEXT_FABS)] = { vfp_double_fabs, `0` },
664	[FEXT_TO_IDX(FEXT_FNEG)] = { vfp_double_fneg, `0` },
665	[FEXT_TO_IDX(FEXT_FSQRT)] = { vfp_double_fsqrt, `0` },
666	[FEXT_TO_IDX(FEXT_FCMP)] = { vfp_double_fcmp, OP_SCALAR },
667	[FEXT_TO_IDX(FEXT_FCMPE)] = { vfp_double_fcmpe, OP_SCALAR },
668	[FEXT_TO_IDX(FEXT_FCMPZ)] = { vfp_double_fcmpz, OP_SCALAR },
669	[FEXT_TO_IDX(FEXT_FCMPEZ)] = { vfp_double_fcmpez, OP_SCALAR },
670	[FEXT_TO_IDX(FEXT_FCVT)] = { vfp_double_fcvts, OP_SCALAR\|OP_SD },
671	[FEXT_TO_IDX(FEXT_FUITO)] = { vfp_double_fuito, OP_SCALAR\|OP_SM },
672	[FEXT_TO_IDX(FEXT_FSITO)] = { vfp_double_fsito, OP_SCALAR\|OP_SM },
673	[FEXT_TO_IDX(FEXT_FTOUI)] = { vfp_double_ftoui, OP_SCALAR\|OP_SD },
674	[FEXT_TO_IDX(FEXT_FTOUIZ)] = { vfp_double_ftouiz, OP_SCALAR\|OP_SD },
675	[FEXT_TO_IDX(FEXT_FTOSI)] = { vfp_double_ftosi, OP_SCALAR\|OP_SD },
676	[FEXT_TO_IDX(FEXT_FTOSIZ)] = { vfp_double_ftosiz, OP_SCALAR\|OP_SD },
677	};
678
679
680
681
682	static u32
683	vfp_double_fadd_nonnumber(struct vfp_double vdd, struct* vfp_double *vdn,
684	struct vfp_double *vdm, u32 fpscr)
685	{
686	struct vfp_double *vdp;
687	u32 exceptions = `0`;
688	int tn, tm;
689
690	tn = vfp_double_type(s: vdn);
691	tm = vfp_double_type(s: vdm);
692
693	if (tn & tm & VFP_INFINITY) {
694	/*
695	* Two infinities. Are they different signs?
696	*/
697	if (vdn->sign ^ vdm->sign) {
698	/*
699	* different signs -> invalid
700	*/
701	exceptions = FPSCR_IOC;
702	vdp = &vfp_double_default_qnan;
703	} else {
704	/*
705	* same signs -> valid
706	*/
707	vdp = vdn;
708	}
709	} else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
710	/*
711	* One infinity and one number -> infinity
712	*/
713	vdp = vdn;
714	} else {
715	/*
716	* 'n' is a NaN of some type
717	*/
718	return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
719	}
720	vdd = vdp;
721	return exceptions;
722	}
723
724	static u32
725	vfp_double_add(struct vfp_double vdd, struct* vfp_double *vdn,
726	struct vfp_double *vdm, u32 fpscr)
727	{
728	u32 exp_diff;
729	u64 m_sig;
730
731	if (vdn->significand & (`1ULL` << `63`) \|\|
732	vdm->significand & (`1ULL` << `63`)) {
733	pr_info("VFP: bad FP values in %s\n", __func__);
734	vfp_double_dump(str: "VDN", d: vdn);
735	vfp_double_dump(str: "VDM", d: vdm);
736	}
737
738	/*
739	* Ensure that 'n' is the largest magnitude number. Note that
740	* if 'n' and 'm' have equal exponents, we do not swap them.
741	* This ensures that NaN propagation works correctly.
742	*/
743	if (vdn->exponent < vdm->exponent) {
744	struct vfp_double *t = vdn;
745	vdn = vdm;
746	vdm = t;
747	}
748
749	/*
750	* Is 'n' an infinity or a NaN? Note that 'm' may be a number,
751	* infinity or a NaN here.
752	*/
753	if (vdn->exponent == `2047`)
754	return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr);
755
756	/*
757	* We have two proper numbers, where 'vdn' is the larger magnitude.
758	*
759	* Copy 'n' to 'd' before doing the arithmetic.
760	*/
761	vdd = vdn;
762
763	/*
764	* Align 'm' with the result.
765	*/
766	exp_diff = vdn->exponent - vdm->exponent;
767	m_sig = vfp_shiftright64jamming(val: vdm->significand, shift: exp_diff);
768
769	/*
770	* If the signs are different, we are really subtracting.
771	*/
772	if (vdn->sign ^ vdm->sign) {
773	m_sig = vdn->significand - m_sig;
774	if ((s64)m_sig < `0`) {
775	vdd->sign = vfp_sign_negate(vdd->sign);
776	m_sig = -m_sig;
777	} else if (m_sig == `0`) {
778	vdd->sign = (fpscr & FPSCR_RMODE_MASK) ==
779	FPSCR_ROUND_MINUSINF ? `0x8000` : `0`;
780	}
781	} else {
782	m_sig += vdn->significand;
783	}
784	vdd->significand = m_sig;
785
786	return `0`;
787	}
788
789	static u32
790	vfp_double_multiply(struct vfp_double vdd, struct* vfp_double *vdn,
791	struct vfp_double *vdm, u32 fpscr)
792	{
793	vfp_double_dump(str: "VDN", d: vdn);
794	vfp_double_dump(str: "VDM", d: vdm);
795
796	/*
797	* Ensure that 'n' is the largest magnitude number. Note that
798	* if 'n' and 'm' have equal exponents, we do not swap them.
799	* This ensures that NaN propagation works correctly.
800	*/
801	if (vdn->exponent < vdm->exponent) {
802	struct vfp_double *t = vdn;
803	vdn = vdm;
804	vdm = t;
805	pr_debug("VFP: swapping M <-> N\n");
806	}
807
808	vdd->sign = vdn->sign ^ vdm->sign;
809
810	/*
811	* If 'n' is an infinity or NaN, handle it. 'm' may be anything.
812	*/
813	if (vdn->exponent == `2047`) {
814	if (vdn->significand \|\| (vdm->exponent == `2047` && vdm->significand))
815	return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
816	if ((vdm->exponent \| vdm->significand) == `0`) {
817	*vdd = vfp_double_default_qnan;
818	return FPSCR_IOC;
819	}
820	vdd->exponent = vdn->exponent;
821	vdd->significand = `0`;
822	return `0`;
823	}
824
825	/*
826	* If 'm' is zero, the result is always zero. In this case,
827	* 'n' may be zero or a number, but it doesn't matter which.
828	*/
829	if ((vdm->exponent \| vdm->significand) == `0`) {
830	vdd->exponent = `0`;
831	vdd->significand = `0`;
832	return `0`;
833	}
834
835	/*
836	* We add 2 to the destination exponent for the same reason
837	* as the addition case - though this time we have +1 from
838	* each input operand.
839	*/
840	vdd->exponent = vdn->exponent + vdm->exponent - `1023` + `2`;
841	vdd->significand = vfp_hi64multiply64(n: vdn->significand, m: vdm->significand);
842
843	vfp_double_dump(str: "VDD", d: vdd);
844	return `0`;
845	}
846
847	#define NEG_MULTIPLY (1 << 0)
848	#define NEG_SUBTRACT (1 << 1)
849
850	static u32
851	vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func)
852	{
853	struct vfp_double vdd, vdp, vdn, vdm;
854	u32 exceptions;
855
856	vfp_double_unpack(s: &vdn, val: vfp_get_double(reg: dn));
857	if (vdn.exponent == `0` && vdn.significand)
858	vfp_double_normalise_denormal(vd: &vdn);
859
860	vfp_double_unpack(s: &vdm, val: vfp_get_double(reg: dm));
861	if (vdm.exponent == `0` && vdm.significand)
862	vfp_double_normalise_denormal(vd: &vdm);
863
864	exceptions = vfp_double_multiply(vdd: &vdp, vdn: &vdn, vdm: &vdm, fpscr);
865	if (negate & NEG_MULTIPLY)
866	vdp.sign = vfp_sign_negate(vdp.sign);
867
868	vfp_double_unpack(s: &vdn, val: vfp_get_double(reg: dd));
869	if (vdn.exponent == `0` && vdn.significand)
870	vfp_double_normalise_denormal(vd: &vdn);
871	if (negate & NEG_SUBTRACT)
872	vdn.sign = vfp_sign_negate(vdn.sign);
873
874	exceptions \|= vfp_double_add(vdd: &vdd, vdn: &vdn, vdm: &vdp, fpscr);
875
876	return vfp_double_normaliseround(dd, vd: &vdd, fpscr, exceptions, func);
877	}
878
879	/*
880	* Standard operations
881	*/
882
883	/*
884	* sd = sd + (sn * sm)
885	*/
886	static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr)
887	{
888	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, negate: `0`, func: "fmac");
889	}
890
891	/*
892	* sd = sd - (sn * sm)
893	*/
894	static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr)
895	{
896	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, func: "fnmac");
897	}
898
899	/*
900	* sd = -sd + (sn * sm)
901	*/
902	static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr)
903	{
904	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, func: "fmsc");
905	}
906
907	/*
908	* sd = -sd - (sn * sm)
909	*/
910	static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr)
911	{
912	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT \| NEG_MULTIPLY, func: "fnmsc");
913	}
914
915	/*
916	* sd = sn * sm
917	*/
918	static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr)
919	{
920	struct vfp_double vdd, vdn, vdm;
921	u32 exceptions;
922
923	vfp_double_unpack(s: &vdn, val: vfp_get_double(reg: dn));
924	if (vdn.exponent == `0` && vdn.significand)
925	vfp_double_normalise_denormal(vd: &vdn);
926
927	vfp_double_unpack(s: &vdm, val: vfp_get_double(reg: dm));
928	if (vdm.exponent == `0` && vdm.significand)
929	vfp_double_normalise_denormal(vd: &vdm);
930
931	exceptions = vfp_double_multiply(vdd: &vdd, vdn: &vdn, vdm: &vdm, fpscr);
932	return vfp_double_normaliseround(dd, vd: &vdd, fpscr, exceptions, func: "fmul");
933	}
934
935	/*
936	* sd = -(sn * sm)
937	*/
938	static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr)
939	{
940	struct vfp_double vdd, vdn, vdm;
941	u32 exceptions;
942
943	vfp_double_unpack(s: &vdn, val: vfp_get_double(reg: dn));
944	if (vdn.exponent == `0` && vdn.significand)
945	vfp_double_normalise_denormal(vd: &vdn);
946
947	vfp_double_unpack(s: &vdm, val: vfp_get_double(reg: dm));
948	if (vdm.exponent == `0` && vdm.significand)
949	vfp_double_normalise_denormal(vd: &vdm);
950
951	exceptions = vfp_double_multiply(vdd: &vdd, vdn: &vdn, vdm: &vdm, fpscr);
952	vdd.sign = vfp_sign_negate(vdd.sign);
953
954	return vfp_double_normaliseround(dd, vd: &vdd, fpscr, exceptions, func: "fnmul");
955	}
956
957	/*
958	* sd = sn + sm
959	*/
960	static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr)
961	{
962	struct vfp_double vdd, vdn, vdm;
963	u32 exceptions;
964
965	vfp_double_unpack(s: &vdn, val: vfp_get_double(reg: dn));
966	if (vdn.exponent == `0` && vdn.significand)
967	vfp_double_normalise_denormal(vd: &vdn);
968
969	vfp_double_unpack(s: &vdm, val: vfp_get_double(reg: dm));
970	if (vdm.exponent == `0` && vdm.significand)
971	vfp_double_normalise_denormal(vd: &vdm);
972
973	exceptions = vfp_double_add(vdd: &vdd, vdn: &vdn, vdm: &vdm, fpscr);
974
975	return vfp_double_normaliseround(dd, vd: &vdd, fpscr, exceptions, func: "fadd");
976	}
977
978	/*
979	* sd = sn - sm
980	*/
981	static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr)
982	{
983	struct vfp_double vdd, vdn, vdm;
984	u32 exceptions;
985
986	vfp_double_unpack(s: &vdn, val: vfp_get_double(reg: dn));
987	if (vdn.exponent == `0` && vdn.significand)
988	vfp_double_normalise_denormal(vd: &vdn);
989
990	vfp_double_unpack(s: &vdm, val: vfp_get_double(reg: dm));
991	if (vdm.exponent == `0` && vdm.significand)
992	vfp_double_normalise_denormal(vd: &vdm);
993
994	/*
995	* Subtraction is like addition, but with a negated operand.
996	*/
997	vdm.sign = vfp_sign_negate(vdm.sign);
998
999	exceptions = vfp_double_add(vdd: &vdd, vdn: &vdn, vdm: &vdm, fpscr);
1000
1001	return vfp_double_normaliseround(dd, vd: &vdd, fpscr, exceptions, func: "fsub");
1002	}
1003
1004	/*
1005	* sd = sn / sm
1006	*/
1007	static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr)
1008	{
1009	struct vfp_double vdd, vdn, vdm;
1010	u32 exceptions = `0`;
1011	int tm, tn;
1012
1013	vfp_double_unpack(s: &vdn, val: vfp_get_double(reg: dn));
1014	vfp_double_unpack(s: &vdm, val: vfp_get_double(reg: dm));
1015
1016	vdd.sign = vdn.sign ^ vdm.sign;
1017
1018	tn = vfp_double_type(s: &vdn);
1019	tm = vfp_double_type(s: &vdm);
1020
1021	/*
1022	* Is n a NAN?
1023	*/
1024	if (tn & VFP_NAN)
1025	goto vdn_nan;
1026
1027	/*
1028	* Is m a NAN?
1029	*/
1030	if (tm & VFP_NAN)
1031	goto vdm_nan;
1032
1033	/*
1034	* If n and m are infinity, the result is invalid
1035	* If n and m are zero, the result is invalid
1036	*/
1037	if (tm & tn & (VFP_INFINITY\|VFP_ZERO))
1038	goto invalid;
1039
1040	/*
1041	* If n is infinity, the result is infinity
1042	*/
1043	if (tn & VFP_INFINITY)
1044	goto infinity;
1045
1046	/*
1047	* If m is zero, raise div0 exceptions
1048	*/
1049	if (tm & VFP_ZERO)
1050	goto divzero;
1051
1052	/*
1053	* If m is infinity, or n is zero, the result is zero
1054	*/
1055	if (tm & VFP_INFINITY \|\| tn & VFP_ZERO)
1056	goto zero;
1057
1058	if (tn & VFP_DENORMAL)
1059	vfp_double_normalise_denormal(vd: &vdn);
1060	if (tm & VFP_DENORMAL)
1061	vfp_double_normalise_denormal(vd: &vdm);
1062
1063	/*
1064	* Ok, we have two numbers, we can perform division.
1065	*/
1066	vdd.exponent = vdn.exponent - vdm.exponent + `1023` - `1`;
1067	vdm.significand <<= `1`;
1068	if (vdm.significand <= (`2` * vdn.significand)) {
1069	vdn.significand >>= `1`;
1070	vdd.exponent++;
1071	}
1072	vdd.significand = vfp_estimate_div128to64(nh: vdn.significand, nl: `0`, m: vdm.significand);
1073	if ((vdd.significand & `0x1ff`) <= `2`) {
1074	u64 termh, terml, remh, reml;
1075	mul64to128(resh: &termh, resl: &terml, n: vdm.significand, m: vdd.significand);
1076	sub128(resh: &remh, resl: &reml, nh: vdn.significand, nl: `0`, mh: termh, ml: terml);
1077	while ((s64)remh < `0`) {
1078	vdd.significand -= `1`;
1079	add128(resh: &remh, resl: &reml, nh: remh, nl: reml, mh: `0`, ml: vdm.significand);
1080	}
1081	vdd.significand \|= (reml != `0`);
1082	}
1083	return vfp_double_normaliseround(dd, vd: &vdd, fpscr, exceptions: `0`, func: "fdiv");
1084
1085	vdn_nan:
1086	exceptions = vfp_propagate_nan(vdd: &vdd, vdn: &vdn, vdm: &vdm, fpscr);
1087	pack:
1088	vfp_put_double(val: vfp_double_pack(s: &vdd), reg: dd);
1089	return exceptions;
1090
1091	vdm_nan:
1092	exceptions = vfp_propagate_nan(vdd: &vdd, vdn: &vdm, vdm: &vdn, fpscr);
1093	goto pack;
1094
1095	zero:
1096	vdd.exponent = `0`;
1097	vdd.significand = `0`;
1098	goto pack;
1099
1100	divzero:
1101	exceptions = FPSCR_DZC;
1102	infinity:
1103	vdd.exponent = `2047`;
1104	vdd.significand = `0`;
1105	goto pack;
1106
1107	invalid:
1108	vfp_put_double(val: vfp_double_pack(s: &vfp_double_default_qnan), reg: dd);
1109	return FPSCR_IOC;
1110	}
1111
1112	static struct op fops[`16`] = {
1113	[FOP_TO_IDX(FOP_FMAC)] = { vfp_double_fmac, `0` },
1114	[FOP_TO_IDX(FOP_FNMAC)] = { vfp_double_fnmac, `0` },
1115	[FOP_TO_IDX(FOP_FMSC)] = { vfp_double_fmsc, `0` },
1116	[FOP_TO_IDX(FOP_FNMSC)] = { vfp_double_fnmsc, `0` },
1117	[FOP_TO_IDX(FOP_FMUL)] = { vfp_double_fmul, `0` },
1118	[FOP_TO_IDX(FOP_FNMUL)] = { vfp_double_fnmul, `0` },
1119	[FOP_TO_IDX(FOP_FADD)] = { vfp_double_fadd, `0` },
1120	[FOP_TO_IDX(FOP_FSUB)] = { vfp_double_fsub, `0` },
1121	[FOP_TO_IDX(FOP_FDIV)] = { vfp_double_fdiv, `0` },
1122	};
1123
1124	#define FREG_BANK(x) ((x) & 0x0c)
1125	#define FREG_IDX(x) ((x) & 3)
1126
1127	u32 vfp_double_cpdo(u32 inst, u32 fpscr)
1128	{
1129	u32 op = inst & FOP_MASK;
1130	u32 exceptions = `0`;
1131	unsigned int dest;
1132	unsigned int dn = vfp_get_dn(inst);
1133	unsigned int dm;
1134	unsigned int vecitr, veclen, vecstride;
1135	struct op *fop;
1136
1137	vecstride = (`1` + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK));
1138
1139	fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
1140
1141	/*
1142	* fcvtds takes an sN register number as destination, not dN.
1143	* It also always operates on scalars.
1144	*/
1145	if (fop->flags & OP_SD)
1146	dest = vfp_get_sd(inst);
1147	else
1148	dest = vfp_get_dd(inst);
1149
1150	/*
1151	* f[us]ito takes a sN operand, not a dN operand.
1152	*/
1153	if (fop->flags & OP_SM)
1154	dm = vfp_get_sm(inst);
1155	else
1156	dm = vfp_get_dm(inst);
1157
1158	/*
1159	* If destination bank is zero, vector length is always '1'.
1160	* ARM DDI0100F C5.1.3, C5.3.2.
1161	*/
1162	if ((fop->flags & OP_SCALAR) \|\| (FREG_BANK(dest) == `0`))
1163	veclen = `0`;
1164	else
1165	veclen = fpscr & FPSCR_LENGTH_MASK;
1166
1167	pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1168	(veclen >> FPSCR_LENGTH_BIT) + `1`);
1169
1170	if (!fop->fn)
1171	goto invalid;
1172
1173	for (vecitr = `0`; vecitr <= veclen; vecitr += `1` << FPSCR_LENGTH_BIT) {
1174	u32 except;
1175	char type;
1176
1177	type = fop->flags & OP_SD ? `'s'` : `'d'`;
1178	if (op == FOP_EXT)
1179	pr_debug("VFP: itr%d (%c%u) = op[%u] (d%u)\n",
1180	vecitr >> FPSCR_LENGTH_BIT,
1181	type, dest, dn, dm);
1182	else
1183	pr_debug("VFP: itr%d (%c%u) = (d%u) op[%u] (d%u)\n",
1184	vecitr >> FPSCR_LENGTH_BIT,
1185	type, dest, dn, FOP_TO_IDX(op), dm);
1186
1187	except = fop->fn(dest, dn, dm, fpscr);
1188	pr_debug("VFP: itr%d: exceptions=%08x\n",
1189	vecitr >> FPSCR_LENGTH_BIT, except);
1190
1191	exceptions \|= except;
1192
1193	/*
1194	* CHECK: It appears to be undefined whether we stop when
1195	* we encounter an exception. We continue.
1196	*/
1197	dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & `3`);
1198	dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & `3`);
1199	if (FREG_BANK(dm) != `0`)
1200	dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & `3`);
1201	}
1202	return exceptions;
1203
1204	invalid:
1205	return ~`0`;
1206	}
1207

source code of linux/arch/arm/vfp/vfpdouble.c