vfpsingle.c source code [linux/arch/arm/vfp/vfpsingle.c]

1	/*
2	* linux/arch/arm/vfp/vfpsingle.c
3	*
4	* This code is derived in part from John R. Housers softfloat library, which
5	* carries the following notice:
6	*
7	* ===========================================================================
8	* This C source file is part of the SoftFloat IEC/IEEE Floating-point
9	* Arithmetic Package, Release 2.
10	*
11	* Written by John R. Hauser. This work was made possible in part by the
12	* International Computer Science Institute, located at Suite 600, 1947 Center
13	* Street, Berkeley, California 94704. Funding was partially provided by the
14	* National Science Foundation under grant MIP-9311980. The original version
15	* of this code was written as part of a project to build a fixed-point vector
16	* processor in collaboration with the University of California at Berkeley,
17	* overseen by Profs. Nelson Morgan and John Wawrzynek. More information
18	* is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
19	* arithmetic/softfloat.html'.
20	*
21	* THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
22	* has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
23	* TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
24	* PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
25	* AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
26	*
27	* Derivative works are acceptable, even for commercial purposes, so long as
28	* (1) they include prominent notice that the work is derivative, and (2) they
29	* include prominent notice akin to these three paragraphs for those parts of
30	* this code that are retained.
31	* ===========================================================================
32	*/
33	#include <linux/kernel.h>
34	#include <linux/bitops.h>
35
36	#include <asm/div64.h>
37	#include <asm/vfp.h>
38
39	#include "vfpinstr.h"
40	#include "vfp.h"
41
42	static struct vfp_single vfp_single_default_qnan = {
43	.exponent = `255`,
44	.sign = `0`,
45	.significand = VFP_SINGLE_SIGNIFICAND_QNAN,
46	};
47
48	static void vfp_single_dump(const char str, struct* vfp_single *s)
49	{
50	pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n",
51	str, s->sign != `0`, s->exponent, s->significand);
52	}
53
54	static void vfp_single_normalise_denormal(struct vfp_single *vs)
55	{
56	int bits = `31` - fls(x: vs->significand);
57
58	vfp_single_dump(str: "normalise_denormal: in", s: vs);
59
60	if (bits) {
61	vs->exponent -= bits - `1`;
62	vs->significand <<= bits;
63	}
64
65	vfp_single_dump(str: "normalise_denormal: out", s: vs);
66	}
67
68	#ifndef DEBUG
69	#define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except)
70	u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions)
71	#else
72	u32 vfp_single_normaliseround(int sd, struct vfp_single vs, u32 fpscr, u32 exceptions, const* char *func)
73	#endif
74	{
75	u32 significand, incr, rmode;
76	int exponent, shift, underflow;
77
78	vfp_single_dump(str: "pack: in", s: vs);
79
80	/*
81	* Infinities and NaNs are a special case.
82	*/
83	if (vs->exponent == `255` && (vs->significand == `0` \|\| exceptions))
84	goto pack;
85
86	/*
87	* Special-case zero.
88	*/
89	if (vs->significand == `0`) {
90	vs->exponent = `0`;
91	goto pack;
92	}
93
94	exponent = vs->exponent;
95	significand = vs->significand;
96
97	/*
98	* Normalise first. Note that we shift the significand up to
99	* bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
100	* significant bit.
101	*/
102	shift = `32` - fls(x: significand);
103	if (shift < `32` && shift) {
104	exponent -= shift;
105	significand <<= shift;
106	}
107
108	#ifdef DEBUG
109	vs->exponent = exponent;
110	vs->significand = significand;
111	vfp_single_dump("pack: normalised", vs);
112	#endif
113
114	/*
115	* Tiny number?
116	*/
117	underflow = exponent < `0`;
118	if (underflow) {
119	significand = vfp_shiftright32jamming(val: significand, shift: -exponent);
120	exponent = `0`;
121	#ifdef DEBUG
122	vs->exponent = exponent;
123	vs->significand = significand;
124	vfp_single_dump("pack: tiny number", vs);
125	#endif
126	if (!(significand & ((`1` << (VFP_SINGLE_LOW_BITS + `1`)) - `1`)))
127	underflow = `0`;
128	}
129
130	/*
131	* Select rounding increment.
132	*/
133	incr = `0`;
134	rmode = fpscr & FPSCR_RMODE_MASK;
135
136	if (rmode == FPSCR_ROUND_NEAREST) {
137	incr = `1` << VFP_SINGLE_LOW_BITS;
138	if ((significand & (`1` << (VFP_SINGLE_LOW_BITS + `1`))) == `0`)
139	incr -= `1`;
140	} else if (rmode == FPSCR_ROUND_TOZERO) {
141	incr = `0`;
142	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != `0`))
143	incr = (`1` << (VFP_SINGLE_LOW_BITS + `1`)) - `1`;
144
145	pr_debug("VFP: rounding increment = 0x%08x\n", incr);
146
147	/*
148	* Is our rounding going to overflow?
149	*/
150	if ((significand + incr) < significand) {
151	exponent += `1`;
152	significand = (significand >> `1`) \| (significand & `1`);
153	incr >>= `1`;
154	#ifdef DEBUG
155	vs->exponent = exponent;
156	vs->significand = significand;
157	vfp_single_dump("pack: overflow", vs);
158	#endif
159	}
160
161	/*
162	* If any of the low bits (which will be shifted out of the
163	* number) are non-zero, the result is inexact.
164	*/
165	if (significand & ((`1` << (VFP_SINGLE_LOW_BITS + `1`)) - `1`))
166	exceptions \|= FPSCR_IXC;
167
168	/*
169	* Do our rounding.
170	*/
171	significand += incr;
172
173	/*
174	* Infinity?
175	*/
176	if (exponent >= `254`) {
177	exceptions \|= FPSCR_OFC \| FPSCR_IXC;
178	if (incr == `0`) {
179	vs->exponent = `253`;
180	vs->significand = `0x7fffffff`;
181	} else {
182	vs->exponent = `255`; / infinity /
183	vs->significand = `0`;
184	}
185	} else {
186	if (significand >> (VFP_SINGLE_LOW_BITS + `1`) == `0`)
187	exponent = `0`;
188	if (exponent \|\| significand > `0x80000000`)
189	underflow = `0`;
190	if (underflow)
191	exceptions \|= FPSCR_UFC;
192	vs->exponent = exponent;
193	vs->significand = significand >> `1`;
194	}
195
196	pack:
197	vfp_single_dump(str: "pack: final", s: vs);
198	{
199	s32 d = vfp_single_pack(s: vs);
200	#ifdef DEBUG
201	pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func,
202	sd, d, exceptions);
203	#endif
204	vfp_put_float(val: d, reg: sd);
205	}
206
207	return exceptions;
208	}
209
210	/*
211	* Propagate the NaN, setting exceptions if it is signalling.
212	* 'n' is always a NaN. 'm' may be a number, NaN or infinity.
213	*/
214	static u32
215	vfp_propagate_nan(struct vfp_single vsd, struct* vfp_single *vsn,
216	struct vfp_single *vsm, u32 fpscr)
217	{
218	struct vfp_single *nan;
219	int tn, tm = `0`;
220
221	tn = vfp_single_type(s: vsn);
222
223	if (vsm)
224	tm = vfp_single_type(s: vsm);
225
226	if (fpscr & FPSCR_DEFAULT_NAN)
227	/*
228	* Default NaN mode - always returns a quiet NaN
229	*/
230	nan = &vfp_single_default_qnan;
231	else {
232	/*
233	* Contemporary mode - select the first signalling
234	* NAN, or if neither are signalling, the first
235	* quiet NAN.
236	*/
237	if (tn == VFP_SNAN \|\| (tm != VFP_SNAN && tn == VFP_QNAN))
238	nan = vsn;
239	else
240	nan = vsm;
241	/*
242	* Make the NaN quiet.
243	*/
244	nan->significand \|= VFP_SINGLE_SIGNIFICAND_QNAN;
245	}
246
247	vsd = nan;
248
249	/*
250	* If one was a signalling NAN, raise invalid operation.
251	*/
252	return tn == VFP_SNAN \|\| tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
253	}
254
255
256	/*
257	* Extended operations
258	*/
259	static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr)
260	{
261	vfp_put_float(vfp_single_packed_abs(m), reg: sd);
262	return `0`;
263	}
264
265	static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr)
266	{
267	vfp_put_float(val: m, reg: sd);
268	return `0`;
269	}
270
271	static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr)
272	{
273	vfp_put_float(vfp_single_packed_negate(m), reg: sd);
274	return `0`;
275	}
276
277	static const u16 sqrt_oddadjust[] = {
278	`0x0004`, `0x0022`, `0x005d`, `0x00b1`, `0x011d`, `0x019f`, `0x0236`, `0x02e0`,
279	`0x039c`, `0x0468`, `0x0545`, `0x0631`, `0x072b`, `0x0832`, `0x0946`, `0x0a67`
280	};
281
282	static const u16 sqrt_evenadjust[] = {
283	`0x0a2d`, `0x08af`, `0x075a`, `0x0629`, `0x051a`, `0x0429`, `0x0356`, `0x029e`,
284	`0x0200`, `0x0179`, `0x0109`, `0x00af`, `0x0068`, `0x0034`, `0x0012`, `0x0002`
285	};
286
287	u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
288	{
289	int index;
290	u32 z, a;
291
292	if ((significand & `0xc0000000`) != `0x40000000`) {
293	pr_warn("VFP: estimate_sqrt: invalid significand\n");
294	}
295
296	a = significand << `1`;
297	index = (a >> `27`) & `15`;
298	if (exponent & `1`) {
299	z = `0x4000` + (a >> `17`) - sqrt_oddadjust[index];
300	z = ((a / z) << `14`) + (z << `15`);
301	a >>= `1`;
302	} else {
303	z = `0x8000` + (a >> `17`) - sqrt_evenadjust[index];
304	z = a / z + z;
305	z = (z >= `0x20000`) ? `0xffff8000` : (z << `15`);
306	if (z <= a)
307	return (s32)a >> `1`;
308	}
309	{
310	u64 v = (u64)a << `31`;
311	do_div(v, z);
312	return v + (z >> `1`);
313	}
314	}
315
316	static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr)
317	{
318	struct vfp_single vsm, vsd;
319	int ret, tm;
320
321	vfp_single_unpack(s: &vsm, val: m);
322	tm = vfp_single_type(s: &vsm);
323	if (tm & (VFP_NAN\|VFP_INFINITY)) {
324	struct vfp_single *vsp = &vsd;
325
326	if (tm & VFP_NAN)
327	ret = vfp_propagate_nan(vsd: vsp, vsn: &vsm, NULL, fpscr);
328	else if (vsm.sign == `0`) {
329	sqrt_copy:
330	vsp = &vsm;
331	ret = `0`;
332	} else {
333	sqrt_invalid:
334	vsp = &vfp_single_default_qnan;
335	ret = FPSCR_IOC;
336	}
337	vfp_put_float(val: vfp_single_pack(s: vsp), reg: sd);
338	return ret;
339	}
340
341	/*
342	* sqrt(+/- 0) == +/- 0
343	*/
344	if (tm & VFP_ZERO)
345	goto sqrt_copy;
346
347	/*
348	* Normalise a denormalised number
349	*/
350	if (tm & VFP_DENORMAL)
351	vfp_single_normalise_denormal(vs: &vsm);
352
353	/*
354	* sqrt(<0) = invalid
355	*/
356	if (vsm.sign)
357	goto sqrt_invalid;
358
359	vfp_single_dump(str: "sqrt", s: &vsm);
360
361	/*
362	* Estimate the square root.
363	*/
364	vsd.sign = `0`;
365	vsd.exponent = ((vsm.exponent - `127`) >> `1`) + `127`;
366	vsd.significand = vfp_estimate_sqrt_significand(exponent: vsm.exponent, significand: vsm.significand) + `2`;
367
368	vfp_single_dump(str: "sqrt estimate", s: &vsd);
369
370	/*
371	* And now adjust.
372	*/
373	if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= `5`) {
374	if (vsd.significand < `2`) {
375	vsd.significand = `0xffffffff`;
376	} else {
377	u64 term;
378	s64 rem;
379	vsm.significand <<= !(vsm.exponent & `1`);
380	term = (u64)vsd.significand * vsd.significand;
381	rem = ((u64)vsm.significand << `32`) - term;
382
383	pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem);
384
385	while (rem < `0`) {
386	vsd.significand -= `1`;
387	rem += ((u64)vsd.significand << `1`) \| `1`;
388	}
389	vsd.significand \|= rem != `0`;
390	}
391	}
392	vsd.significand = vfp_shiftright32jamming(val: vsd.significand, shift: `1`);
393
394	return vfp_single_normaliseround(sd, &vsd, fpscr, `0`, "fsqrt");
395	}
396
397	/*
398	* Equal := ZC
399	* Less than := N
400	* Greater than := C
401	* Unordered := CV
402	*/
403	static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr)
404	{
405	s32 d;
406	u32 ret = `0`;
407
408	d = vfp_get_float(reg: sd);
409	if (vfp_single_packed_exponent(m) == `255` && vfp_single_packed_mantissa(m)) {
410	ret \|= FPSCR_C \| FPSCR_V;
411	if (signal_on_qnan \|\| !(vfp_single_packed_mantissa(m) & (`1` << (VFP_SINGLE_MANTISSA_BITS - `1`))))
412	/*
413	* Signalling NaN, or signalling on quiet NaN
414	*/
415	ret \|= FPSCR_IOC;
416	}
417
418	if (vfp_single_packed_exponent(d) == `255` && vfp_single_packed_mantissa(d)) {
419	ret \|= FPSCR_C \| FPSCR_V;
420	if (signal_on_qnan \|\| !(vfp_single_packed_mantissa(d) & (`1` << (VFP_SINGLE_MANTISSA_BITS - `1`))))
421	/*
422	* Signalling NaN, or signalling on quiet NaN
423	*/
424	ret \|= FPSCR_IOC;
425	}
426
427	if (ret == `0`) {
428	if (d == m \|\| vfp_single_packed_abs(d \| m) == `0`) {
429	/*
430	* equal
431	*/
432	ret \|= FPSCR_Z \| FPSCR_C;
433	} else if (vfp_single_packed_sign(d ^ m)) {
434	/*
435	* different signs
436	*/
437	if (vfp_single_packed_sign(d))
438	/*
439	* d is negative, so d < m
440	*/
441	ret \|= FPSCR_N;
442	else
443	/*
444	* d is positive, so d > m
445	*/
446	ret \|= FPSCR_C;
447	} else if ((vfp_single_packed_sign(d) != `0`) ^ (d < m)) {
448	/*
449	* d < m
450	*/
451	ret \|= FPSCR_N;
452	} else if ((vfp_single_packed_sign(d) != `0`) ^ (d > m)) {
453	/*
454	* d > m
455	*/
456	ret \|= FPSCR_C;
457	}
458	}
459	return ret;
460	}
461
462	static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr)
463	{
464	return vfp_compare(sd, signal_on_qnan: `0`, m, fpscr);
465	}
466
467	static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr)
468	{
469	return vfp_compare(sd, signal_on_qnan: `1`, m, fpscr);
470	}
471
472	static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr)
473	{
474	return vfp_compare(sd, signal_on_qnan: `0`, m: `0`, fpscr);
475	}
476
477	static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr)
478	{
479	return vfp_compare(sd, signal_on_qnan: `1`, m: `0`, fpscr);
480	}
481
482	static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr)
483	{
484	struct vfp_single vsm;
485	struct vfp_double vdd;
486	int tm;
487	u32 exceptions = `0`;
488
489	vfp_single_unpack(s: &vsm, val: m);
490
491	tm = vfp_single_type(s: &vsm);
492
493	/*
494	* If we have a signalling NaN, signal invalid operation.
495	*/
496	if (tm == VFP_SNAN)
497	exceptions = FPSCR_IOC;
498
499	if (tm & VFP_DENORMAL)
500	vfp_single_normalise_denormal(vs: &vsm);
501
502	vdd.sign = vsm.sign;
503	vdd.significand = (u64)vsm.significand << `32`;
504
505	/*
506	* If we have an infinity or NaN, the exponent must be 2047.
507	*/
508	if (tm & (VFP_INFINITY\|VFP_NAN)) {
509	vdd.exponent = `2047`;
510	if (tm == VFP_QNAN)
511	vdd.significand \|= VFP_DOUBLE_SIGNIFICAND_QNAN;
512	goto pack_nan;
513	} else if (tm & VFP_ZERO)
514	vdd.exponent = `0`;
515	else
516	vdd.exponent = vsm.exponent + (`1023` - `127`);
517
518	return vfp_double_normaliseround(dd, vd: &vdd, fpscr, exceptions, func: "fcvtd");
519
520	pack_nan:
521	vfp_put_double(val: vfp_double_pack(s: &vdd), reg: dd);
522	return exceptions;
523	}
524
525	static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr)
526	{
527	struct vfp_single vs;
528
529	vs.sign = `0`;
530	vs.exponent = `127` + `31` - `1`;
531	vs.significand = (u32)m;
532
533	return vfp_single_normaliseround(sd, &vs, fpscr, `0`, "fuito");
534	}
535
536	static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr)
537	{
538	struct vfp_single vs;
539
540	vs.sign = (m & `0x80000000`) >> `16`;
541	vs.exponent = `127` + `31` - `1`;
542	vs.significand = vs.sign ? -m : m;
543
544	return vfp_single_normaliseround(sd, &vs, fpscr, `0`, "fsito");
545	}
546
547	static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr)
548	{
549	struct vfp_single vsm;
550	u32 d, exceptions = `0`;
551	int rmode = fpscr & FPSCR_RMODE_MASK;
552	int tm;
553
554	vfp_single_unpack(s: &vsm, val: m);
555	vfp_single_dump(str: "VSM", s: &vsm);
556
557	/*
558	* Do we have a denormalised number?
559	*/
560	tm = vfp_single_type(s: &vsm);
561	if (tm & VFP_DENORMAL)
562	exceptions \|= FPSCR_IDC;
563
564	if (tm & VFP_NAN)
565	vsm.sign = `0`;
566
567	if (vsm.exponent >= `127` + `32`) {
568	d = vsm.sign ? `0` : `0xffffffff`;
569	exceptions = FPSCR_IOC;
570	} else if (vsm.exponent >= `127` - `1`) {
571	int shift = `127` + `31` - vsm.exponent;
572	u32 rem, incr = `0`;
573
574	/*
575	* 2^0 <= m < 2^32-2^8
576	*/
577	d = (vsm.significand << `1`) >> shift;
578	rem = vsm.significand << (`33` - shift);
579
580	if (rmode == FPSCR_ROUND_NEAREST) {
581	incr = `0x80000000`;
582	if ((d & `1`) == `0`)
583	incr -= `1`;
584	} else if (rmode == FPSCR_ROUND_TOZERO) {
585	incr = `0`;
586	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != `0`)) {
587	incr = ~`0`;
588	}
589
590	if ((rem + incr) < rem) {
591	if (d < `0xffffffff`)
592	d += `1`;
593	else
594	exceptions \|= FPSCR_IOC;
595	}
596
597	if (d && vsm.sign) {
598	d = `0`;
599	exceptions \|= FPSCR_IOC;
600	} else if (rem)
601	exceptions \|= FPSCR_IXC;
602	} else {
603	d = `0`;
604	if (vsm.exponent \| vsm.significand) {
605	exceptions \|= FPSCR_IXC;
606	if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == `0`)
607	d = `1`;
608	else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
609	d = `0`;
610	exceptions \|= FPSCR_IOC;
611	}
612	}
613	}
614
615	pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
616
617	vfp_put_float(val: d, reg: sd);
618
619	return exceptions;
620	}
621
622	static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr)
623	{
624	return vfp_single_ftoui(sd, unused, m, fpscr: FPSCR_ROUND_TOZERO);
625	}
626
627	static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr)
628	{
629	struct vfp_single vsm;
630	u32 d, exceptions = `0`;
631	int rmode = fpscr & FPSCR_RMODE_MASK;
632	int tm;
633
634	vfp_single_unpack(s: &vsm, val: m);
635	vfp_single_dump(str: "VSM", s: &vsm);
636
637	/*
638	* Do we have a denormalised number?
639	*/
640	tm = vfp_single_type(s: &vsm);
641	if (vfp_single_type(s: &vsm) & VFP_DENORMAL)
642	exceptions \|= FPSCR_IDC;
643
644	if (tm & VFP_NAN) {
645	d = `0`;
646	exceptions \|= FPSCR_IOC;
647	} else if (vsm.exponent >= `127` + `32`) {
648	/*
649	* m >= 2^31-2^7: invalid
650	*/
651	d = `0x7fffffff`;
652	if (vsm.sign)
653	d = ~d;
654	exceptions \|= FPSCR_IOC;
655	} else if (vsm.exponent >= `127` - `1`) {
656	int shift = `127` + `31` - vsm.exponent;
657	u32 rem, incr = `0`;
658
659	/ 2^0 <= m <= 2^31-2^7 /
660	d = (vsm.significand << `1`) >> shift;
661	rem = vsm.significand << (`33` - shift);
662
663	if (rmode == FPSCR_ROUND_NEAREST) {
664	incr = `0x80000000`;
665	if ((d & `1`) == `0`)
666	incr -= `1`;
667	} else if (rmode == FPSCR_ROUND_TOZERO) {
668	incr = `0`;
669	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != `0`)) {
670	incr = ~`0`;
671	}
672
673	if ((rem + incr) < rem && d < `0xffffffff`)
674	d += `1`;
675	if (d > `0x7fffffff` + (vsm.sign != `0`)) {
676	d = `0x7fffffff` + (vsm.sign != `0`);
677	exceptions \|= FPSCR_IOC;
678	} else if (rem)
679	exceptions \|= FPSCR_IXC;
680
681	if (vsm.sign)
682	d = -d;
683	} else {
684	d = `0`;
685	if (vsm.exponent \| vsm.significand) {
686	exceptions \|= FPSCR_IXC;
687	if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == `0`)
688	d = `1`;
689	else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
690	d = -`1`;
691	}
692	}
693
694	pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
695
696	vfp_put_float(val: (s32)d, reg: sd);
697
698	return exceptions;
699	}
700
701	static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr)
702	{
703	return vfp_single_ftosi(sd, unused, m, fpscr: FPSCR_ROUND_TOZERO);
704	}
705
706	static struct op fops_ext[`32`] = {
707	[FEXT_TO_IDX(FEXT_FCPY)] = { vfp_single_fcpy, `0` },
708	[FEXT_TO_IDX(FEXT_FABS)] = { vfp_single_fabs, `0` },
709	[FEXT_TO_IDX(FEXT_FNEG)] = { vfp_single_fneg, `0` },
710	[FEXT_TO_IDX(FEXT_FSQRT)] = { vfp_single_fsqrt, `0` },
711	[FEXT_TO_IDX(FEXT_FCMP)] = { vfp_single_fcmp, OP_SCALAR },
712	[FEXT_TO_IDX(FEXT_FCMPE)] = { vfp_single_fcmpe, OP_SCALAR },
713	[FEXT_TO_IDX(FEXT_FCMPZ)] = { vfp_single_fcmpz, OP_SCALAR },
714	[FEXT_TO_IDX(FEXT_FCMPEZ)] = { vfp_single_fcmpez, OP_SCALAR },
715	[FEXT_TO_IDX(FEXT_FCVT)] = { vfp_single_fcvtd, OP_SCALAR\|OP_DD },
716	[FEXT_TO_IDX(FEXT_FUITO)] = { vfp_single_fuito, OP_SCALAR },
717	[FEXT_TO_IDX(FEXT_FSITO)] = { vfp_single_fsito, OP_SCALAR },
718	[FEXT_TO_IDX(FEXT_FTOUI)] = { vfp_single_ftoui, OP_SCALAR },
719	[FEXT_TO_IDX(FEXT_FTOUIZ)] = { vfp_single_ftouiz, OP_SCALAR },
720	[FEXT_TO_IDX(FEXT_FTOSI)] = { vfp_single_ftosi, OP_SCALAR },
721	[FEXT_TO_IDX(FEXT_FTOSIZ)] = { vfp_single_ftosiz, OP_SCALAR },
722	};
723
724
725
726
727
728	static u32
729	vfp_single_fadd_nonnumber(struct vfp_single vsd, struct* vfp_single *vsn,
730	struct vfp_single *vsm, u32 fpscr)
731	{
732	struct vfp_single *vsp;
733	u32 exceptions = `0`;
734	int tn, tm;
735
736	tn = vfp_single_type(s: vsn);
737	tm = vfp_single_type(s: vsm);
738
739	if (tn & tm & VFP_INFINITY) {
740	/*
741	* Two infinities. Are they different signs?
742	*/
743	if (vsn->sign ^ vsm->sign) {
744	/*
745	* different signs -> invalid
746	*/
747	exceptions = FPSCR_IOC;
748	vsp = &vfp_single_default_qnan;
749	} else {
750	/*
751	* same signs -> valid
752	*/
753	vsp = vsn;
754	}
755	} else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
756	/*
757	* One infinity and one number -> infinity
758	*/
759	vsp = vsn;
760	} else {
761	/*
762	* 'n' is a NaN of some type
763	*/
764	return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
765	}
766	vsd = vsp;
767	return exceptions;
768	}
769
770	static u32
771	vfp_single_add(struct vfp_single vsd, struct* vfp_single *vsn,
772	struct vfp_single *vsm, u32 fpscr)
773	{
774	u32 exp_diff, m_sig;
775
776	if (vsn->significand & `0x80000000` \|\|
777	vsm->significand & `0x80000000`) {
778	pr_info("VFP: bad FP values in %s\n", __func__);
779	vfp_single_dump(str: "VSN", s: vsn);
780	vfp_single_dump(str: "VSM", s: vsm);
781	}
782
783	/*
784	* Ensure that 'n' is the largest magnitude number. Note that
785	* if 'n' and 'm' have equal exponents, we do not swap them.
786	* This ensures that NaN propagation works correctly.
787	*/
788	if (vsn->exponent < vsm->exponent) {
789	struct vfp_single *t = vsn;
790	vsn = vsm;
791	vsm = t;
792	}
793
794	/*
795	* Is 'n' an infinity or a NaN? Note that 'm' may be a number,
796	* infinity or a NaN here.
797	*/
798	if (vsn->exponent == `255`)
799	return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
800
801	/*
802	* We have two proper numbers, where 'vsn' is the larger magnitude.
803	*
804	* Copy 'n' to 'd' before doing the arithmetic.
805	*/
806	vsd = vsn;
807
808	/*
809	* Align both numbers.
810	*/
811	exp_diff = vsn->exponent - vsm->exponent;
812	m_sig = vfp_shiftright32jamming(val: vsm->significand, shift: exp_diff);
813
814	/*
815	* If the signs are different, we are really subtracting.
816	*/
817	if (vsn->sign ^ vsm->sign) {
818	m_sig = vsn->significand - m_sig;
819	if ((s32)m_sig < `0`) {
820	vsd->sign = vfp_sign_negate(vsd->sign);
821	m_sig = -m_sig;
822	} else if (m_sig == `0`) {
823	vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
824	FPSCR_ROUND_MINUSINF ? `0x8000` : `0`;
825	}
826	} else {
827	m_sig = vsn->significand + m_sig;
828	}
829	vsd->significand = m_sig;
830
831	return `0`;
832	}
833
834	static u32
835	vfp_single_multiply(struct vfp_single vsd, struct* vfp_single vsn, struct* vfp_single *vsm, u32 fpscr)
836	{
837	vfp_single_dump(str: "VSN", s: vsn);
838	vfp_single_dump(str: "VSM", s: vsm);
839
840	/*
841	* Ensure that 'n' is the largest magnitude number. Note that
842	* if 'n' and 'm' have equal exponents, we do not swap them.
843	* This ensures that NaN propagation works correctly.
844	*/
845	if (vsn->exponent < vsm->exponent) {
846	struct vfp_single *t = vsn;
847	vsn = vsm;
848	vsm = t;
849	pr_debug("VFP: swapping M <-> N\n");
850	}
851
852	vsd->sign = vsn->sign ^ vsm->sign;
853
854	/*
855	* If 'n' is an infinity or NaN, handle it. 'm' may be anything.
856	*/
857	if (vsn->exponent == `255`) {
858	if (vsn->significand \|\| (vsm->exponent == `255` && vsm->significand))
859	return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
860	if ((vsm->exponent \| vsm->significand) == `0`) {
861	*vsd = vfp_single_default_qnan;
862	return FPSCR_IOC;
863	}
864	vsd->exponent = vsn->exponent;
865	vsd->significand = `0`;
866	return `0`;
867	}
868
869	/*
870	* If 'm' is zero, the result is always zero. In this case,
871	* 'n' may be zero or a number, but it doesn't matter which.
872	*/
873	if ((vsm->exponent \| vsm->significand) == `0`) {
874	vsd->exponent = `0`;
875	vsd->significand = `0`;
876	return `0`;
877	}
878
879	/*
880	* We add 2 to the destination exponent for the same reason as
881	* the addition case - though this time we have +1 from each
882	* input operand.
883	*/
884	vsd->exponent = vsn->exponent + vsm->exponent - `127` + `2`;
885	vsd->significand = vfp_hi64to32jamming(val: (u64)vsn->significand * vsm->significand);
886
887	vfp_single_dump(str: "VSD", s: vsd);
888	return `0`;
889	}
890
891	#define NEG_MULTIPLY (1 << 0)
892	#define NEG_SUBTRACT (1 << 1)
893
894	static u32
895	vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func)
896	{
897	struct vfp_single vsd, vsp, vsn, vsm;
898	u32 exceptions;
899	s32 v;
900
901	v = vfp_get_float(reg: sn);
902	pr_debug("VFP: s%u = %08x\n", sn, v);
903	vfp_single_unpack(s: &vsn, val: v);
904	if (vsn.exponent == `0` && vsn.significand)
905	vfp_single_normalise_denormal(vs: &vsn);
906
907	vfp_single_unpack(s: &vsm, val: m);
908	if (vsm.exponent == `0` && vsm.significand)
909	vfp_single_normalise_denormal(vs: &vsm);
910
911	exceptions = vfp_single_multiply(vsd: &vsp, vsn: &vsn, vsm: &vsm, fpscr);
912	if (negate & NEG_MULTIPLY)
913	vsp.sign = vfp_sign_negate(vsp.sign);
914
915	v = vfp_get_float(reg: sd);
916	pr_debug("VFP: s%u = %08x\n", sd, v);
917	vfp_single_unpack(s: &vsn, val: v);
918	if (vsn.exponent == `0` && vsn.significand)
919	vfp_single_normalise_denormal(vs: &vsn);
920	if (negate & NEG_SUBTRACT)
921	vsn.sign = vfp_sign_negate(vsn.sign);
922
923	exceptions \|= vfp_single_add(vsd: &vsd, vsn: &vsn, vsm: &vsp, fpscr);
924
925	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func);
926	}
927
928	/*
929	* Standard operations
930	*/
931
932	/*
933	* sd = sd + (sn * sm)
934	*/
935	static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr)
936	{
937	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, negate: `0`, func: "fmac");
938	}
939
940	/*
941	* sd = sd - (sn * sm)
942	*/
943	static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr)
944	{
945	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, func: "fnmac");
946	}
947
948	/*
949	* sd = -sd + (sn * sm)
950	*/
951	static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr)
952	{
953	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, func: "fmsc");
954	}
955
956	/*
957	* sd = -sd - (sn * sm)
958	*/
959	static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr)
960	{
961	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT \| NEG_MULTIPLY, func: "fnmsc");
962	}
963
964	/*
965	* sd = sn * sm
966	*/
967	static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr)
968	{
969	struct vfp_single vsd, vsn, vsm;
970	u32 exceptions;
971	s32 n = vfp_get_float(reg: sn);
972
973	pr_debug("VFP: s%u = %08x\n", sn, n);
974
975	vfp_single_unpack(s: &vsn, val: n);
976	if (vsn.exponent == `0` && vsn.significand)
977	vfp_single_normalise_denormal(vs: &vsn);
978
979	vfp_single_unpack(s: &vsm, val: m);
980	if (vsm.exponent == `0` && vsm.significand)
981	vfp_single_normalise_denormal(vs: &vsm);
982
983	exceptions = vfp_single_multiply(vsd: &vsd, vsn: &vsn, vsm: &vsm, fpscr);
984	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul");
985	}
986
987	/*
988	* sd = -(sn * sm)
989	*/
990	static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr)
991	{
992	struct vfp_single vsd, vsn, vsm;
993	u32 exceptions;
994	s32 n = vfp_get_float(reg: sn);
995
996	pr_debug("VFP: s%u = %08x\n", sn, n);
997
998	vfp_single_unpack(s: &vsn, val: n);
999	if (vsn.exponent == `0` && vsn.significand)
1000	vfp_single_normalise_denormal(vs: &vsn);
1001
1002	vfp_single_unpack(s: &vsm, val: m);
1003	if (vsm.exponent == `0` && vsm.significand)
1004	vfp_single_normalise_denormal(vs: &vsm);
1005
1006	exceptions = vfp_single_multiply(vsd: &vsd, vsn: &vsn, vsm: &vsm, fpscr);
1007	vsd.sign = vfp_sign_negate(vsd.sign);
1008	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul");
1009	}
1010
1011	/*
1012	* sd = sn + sm
1013	*/
1014	static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr)
1015	{
1016	struct vfp_single vsd, vsn, vsm;
1017	u32 exceptions;
1018	s32 n = vfp_get_float(reg: sn);
1019
1020	pr_debug("VFP: s%u = %08x\n", sn, n);
1021
1022	/*
1023	* Unpack and normalise denormals.
1024	*/
1025	vfp_single_unpack(s: &vsn, val: n);
1026	if (vsn.exponent == `0` && vsn.significand)
1027	vfp_single_normalise_denormal(vs: &vsn);
1028
1029	vfp_single_unpack(s: &vsm, val: m);
1030	if (vsm.exponent == `0` && vsm.significand)
1031	vfp_single_normalise_denormal(vs: &vsm);
1032
1033	exceptions = vfp_single_add(vsd: &vsd, vsn: &vsn, vsm: &vsm, fpscr);
1034
1035	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd");
1036	}
1037
1038	/*
1039	* sd = sn - sm
1040	*/
1041	static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr)
1042	{
1043	/*
1044	* Subtraction is addition with one sign inverted.
1045	*/
1046	return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr);
1047	}
1048
1049	/*
1050	* sd = sn / sm
1051	*/
1052	static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr)
1053	{
1054	struct vfp_single vsd, vsn, vsm;
1055	u32 exceptions = `0`;
1056	s32 n = vfp_get_float(reg: sn);
1057	int tm, tn;
1058
1059	pr_debug("VFP: s%u = %08x\n", sn, n);
1060
1061	vfp_single_unpack(s: &vsn, val: n);
1062	vfp_single_unpack(s: &vsm, val: m);
1063
1064	vsd.sign = vsn.sign ^ vsm.sign;
1065
1066	tn = vfp_single_type(s: &vsn);
1067	tm = vfp_single_type(s: &vsm);
1068
1069	/*
1070	* Is n a NAN?
1071	*/
1072	if (tn & VFP_NAN)
1073	goto vsn_nan;
1074
1075	/*
1076	* Is m a NAN?
1077	*/
1078	if (tm & VFP_NAN)
1079	goto vsm_nan;
1080
1081	/*
1082	* If n and m are infinity, the result is invalid
1083	* If n and m are zero, the result is invalid
1084	*/
1085	if (tm & tn & (VFP_INFINITY\|VFP_ZERO))
1086	goto invalid;
1087
1088	/*
1089	* If n is infinity, the result is infinity
1090	*/
1091	if (tn & VFP_INFINITY)
1092	goto infinity;
1093
1094	/*
1095	* If m is zero, raise div0 exception
1096	*/
1097	if (tm & VFP_ZERO)
1098	goto divzero;
1099
1100	/*
1101	* If m is infinity, or n is zero, the result is zero
1102	*/
1103	if (tm & VFP_INFINITY \|\| tn & VFP_ZERO)
1104	goto zero;
1105
1106	if (tn & VFP_DENORMAL)
1107	vfp_single_normalise_denormal(vs: &vsn);
1108	if (tm & VFP_DENORMAL)
1109	vfp_single_normalise_denormal(vs: &vsm);
1110
1111	/*
1112	* Ok, we have two numbers, we can perform division.
1113	*/
1114	vsd.exponent = vsn.exponent - vsm.exponent + `127` - `1`;
1115	vsm.significand <<= `1`;
1116	if (vsm.significand <= (`2` * vsn.significand)) {
1117	vsn.significand >>= `1`;
1118	vsd.exponent++;
1119	}
1120	{
1121	u64 significand = (u64)vsn.significand << `32`;
1122	do_div(significand, vsm.significand);
1123	vsd.significand = significand;
1124	}
1125	if ((vsd.significand & `0x3f`) == `0`)
1126	vsd.significand \|= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << `32`);
1127
1128	return vfp_single_normaliseround(sd, &vsd, fpscr, `0`, "fdiv");
1129
1130	vsn_nan:
1131	exceptions = vfp_propagate_nan(vsd: &vsd, vsn: &vsn, vsm: &vsm, fpscr);
1132	pack:
1133	vfp_put_float(val: vfp_single_pack(s: &vsd), reg: sd);
1134	return exceptions;
1135
1136	vsm_nan:
1137	exceptions = vfp_propagate_nan(vsd: &vsd, vsn: &vsm, vsm: &vsn, fpscr);
1138	goto pack;
1139
1140	zero:
1141	vsd.exponent = `0`;
1142	vsd.significand = `0`;
1143	goto pack;
1144
1145	divzero:
1146	exceptions = FPSCR_DZC;
1147	infinity:
1148	vsd.exponent = `255`;
1149	vsd.significand = `0`;
1150	goto pack;
1151
1152	invalid:
1153	vfp_put_float(val: vfp_single_pack(s: &vfp_single_default_qnan), reg: sd);
1154	return FPSCR_IOC;
1155	}
1156
1157	static struct op fops[`16`] = {
1158	[FOP_TO_IDX(FOP_FMAC)] = { vfp_single_fmac, `0` },
1159	[FOP_TO_IDX(FOP_FNMAC)] = { vfp_single_fnmac, `0` },
1160	[FOP_TO_IDX(FOP_FMSC)] = { vfp_single_fmsc, `0` },
1161	[FOP_TO_IDX(FOP_FNMSC)] = { vfp_single_fnmsc, `0` },
1162	[FOP_TO_IDX(FOP_FMUL)] = { vfp_single_fmul, `0` },
1163	[FOP_TO_IDX(FOP_FNMUL)] = { vfp_single_fnmul, `0` },
1164	[FOP_TO_IDX(FOP_FADD)] = { vfp_single_fadd, `0` },
1165	[FOP_TO_IDX(FOP_FSUB)] = { vfp_single_fsub, `0` },
1166	[FOP_TO_IDX(FOP_FDIV)] = { vfp_single_fdiv, `0` },
1167	};
1168
1169	#define FREG_BANK(x) ((x) & 0x18)
1170	#define FREG_IDX(x) ((x) & 7)
1171
1172	u32 vfp_single_cpdo(u32 inst, u32 fpscr)
1173	{
1174	u32 op = inst & FOP_MASK;
1175	u32 exceptions = `0`;
1176	unsigned int dest;
1177	unsigned int sn = vfp_get_sn(inst);
1178	unsigned int sm = vfp_get_sm(inst);
1179	unsigned int vecitr, veclen, vecstride;
1180	struct op *fop;
1181
1182	vecstride = `1` + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
1183
1184	fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
1185
1186	/*
1187	* fcvtsd takes a dN register number as destination, not sN.
1188	* Technically, if bit 0 of dd is set, this is an invalid
1189	* instruction. However, we ignore this for efficiency.
1190	* It also only operates on scalars.
1191	*/
1192	if (fop->flags & OP_DD)
1193	dest = vfp_get_dd(inst);
1194	else
1195	dest = vfp_get_sd(inst);
1196
1197	/*
1198	* If destination bank is zero, vector length is always '1'.
1199	* ARM DDI0100F C5.1.3, C5.3.2.
1200	*/
1201	if ((fop->flags & OP_SCALAR) \|\| FREG_BANK(dest) == `0`)
1202	veclen = `0`;
1203	else
1204	veclen = fpscr & FPSCR_LENGTH_MASK;
1205
1206	pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1207	(veclen >> FPSCR_LENGTH_BIT) + `1`);
1208
1209	if (!fop->fn)
1210	goto invalid;
1211
1212	for (vecitr = `0`; vecitr <= veclen; vecitr += `1` << FPSCR_LENGTH_BIT) {
1213	s32 m = vfp_get_float(reg: sm);
1214	u32 except;
1215	char type;
1216
1217	type = fop->flags & OP_DD ? `'d'` : `'s'`;
1218	if (op == FOP_EXT)
1219	pr_debug("VFP: itr%d (%c%u) = op[%u] (s%u=%08x)\n",
1220	vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
1221	sm, m);
1222	else
1223	pr_debug("VFP: itr%d (%c%u) = (s%u) op[%u] (s%u=%08x)\n",
1224	vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
1225	FOP_TO_IDX(op), sm, m);
1226
1227	except = fop->fn(dest, sn, m, fpscr);
1228	pr_debug("VFP: itr%d: exceptions=%08x\n",
1229	vecitr >> FPSCR_LENGTH_BIT, except);
1230
1231	exceptions \|= except;
1232
1233	/*
1234	* CHECK: It appears to be undefined whether we stop when
1235	* we encounter an exception. We continue.
1236	*/
1237	dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & `7`);
1238	sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & `7`);
1239	if (FREG_BANK(sm) != `0`)
1240	sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & `7`);
1241	}
1242	return exceptions;
1243
1244	invalid:
1245	return (u32)-`1`;
1246	}
1247

source code of linux/arch/arm/vfp/vfpsingle.c