remqu.S source code [glibc/sysdeps/alpha/remqu.S]

1	/ Copyright (C) 2004-2022 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3
4	The GNU C Library is free software; you can redistribute it and/or
5	modify it under the terms of the GNU Lesser General Public
6	License as published by the Free Software Foundation; either
7	version 2.1 of the License, or (at your option) any later version.
8
9	The GNU C Library is distributed in the hope that it will be useful,
10	but WITHOUT ANY WARRANTY; without even the implied warranty of
11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12	Lesser General Public License for more details.
13
14	You should have received a copy of the GNU Lesser General Public
15	License along with the GNU C Library. If not, see
16	<https://www.gnu.org/licenses/>. /*
17
18	#include "div_libc.h"
19
20
21	/ 64-bit unsigned long remainder. These are not normal C functions. Argument*
22	registers are t10 and t11, the result goes in t12. Only t12 and AT may be
23	clobbered.
24
25	Theory of operation here is that we can use the FPU divider for virtually
26	all operands that we see: all dividend values between -253 and 253-1
27	can be computed directly. Note that divisor values need not be checked
28	against that range because the rounded fp value will be close enough such
29	that the quotient is < 1, which will properly be truncated to zero when we
30	convert back to integer.
31
32	When the dividend is outside the range for which we can compute exact
33	results, we use the fp quotent as an estimate from which we begin refining
34	an exact integral value. This reduces the number of iterations in the
35	shift-and-subtract loop significantly.
36
37	The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE
38	for cvttq/c even without /sui being set. It will not, however, properly
39	raise the exception, so we don't have to worry about FPCR_INED being clear
40	and so dying by SIGFPE. /*
41
42	.text
43	.align `4`
44	.globl __remqu
45	.type __remqu, @funcnoplt
46	.usepv __remqu, no
47
48	cfi_startproc
49	cfi_return_column (RA)
50	__remqu:
51	lda sp, -FRAME(sp)
52	cfi_def_cfa_offset (FRAME)
53	CALL_MCOUNT
54
55	/ Get the fp divide insn issued as quickly as possible. After*
56	that's done, we have at least 22 cycles until its results are
57	ready -- all the time in the world to figure out how we're
58	going to use the results. /*
59	subq Y, `1`, AT
60	and Y, AT, AT
61	beq AT, $powerof2
62
63	stt $f0, `0`(sp)
64	excb
65	stt $f1, `8`(sp)
66	stt $f3, `48`(sp)
67	cfi_rel_offset ($f0, `0`)
68	cfi_rel_offset ($f1, `8`)
69	cfi_rel_offset ($f3, `48`)
70	mf_fpcr $f3
71
72	_ITOFT2 X, $f0, `16`, Y, $f1, `24`
73	cvtqt $f0, $f0
74	cvtqt $f1, $f1
75
76	blt X, $x_is_neg
77	divt/c $f0, $f1, $f0
78
79	/ Check to see if Y was mis-converted as signed value. /
80	ldt $f1, `8`(sp)
81	blt Y, $y_is_neg
82
83	/ Check to see if X fit in the double as an exact value. /
84	srl X, `53`, AT
85	bne AT, $x_big
86
87	/ If we get here, we're expecting exact results from the division.*
88	Do nothing else besides convert, compute remainder, clean up. /*
89	cvttq/c $f0, $f0
90	excb
91	mt_fpcr $f3
92	_FTOIT $f0, AT, `16`
93
94	mulq AT, Y, AT
95	ldt $f0, `0`(sp)
96	ldt $f3, `48`(sp)
97	lda sp, FRAME(sp)
98	cfi_remember_state
99	cfi_restore ($f0)
100	cfi_restore ($f1)
101	cfi_restore ($f3)
102	cfi_def_cfa_offset (`0`)
103
104	.align `4`
105	subq X, AT, RV
106	ret $`31`, (RA), `1`
107
108	.align `4`
109	cfi_restore_state
110	$x_is_neg:
111	/ If we get here, X is so big that bit 63 is set, which made the*
112	conversion come out negative. Fix it up lest we not even get
113	a good estimate. /*
114	ldah AT, `0x5f80` / 2*64 as float. /*
115	stt $f2, `24`(sp)
116	cfi_rel_offset ($f2, `24`)
117	_ITOFS AT, $f2, `16`
118
119	.align `4`
120	addt $f0, $f2, $f0
121	unop
122	divt/c $f0, $f1, $f0
123	unop
124
125	/ Ok, we've now the divide issued. Continue with other checks. /
126	ldt $f1, `8`(sp)
127	unop
128	ldt $f2, `24`(sp)
129	blt Y, $y_is_neg
130	cfi_restore ($f1)
131	cfi_restore ($f2)
132	cfi_remember_state / for y_is_neg /
133
134	.align `4`
135	$x_big:
136	/ If we get here, X is large enough that we don't expect exact*
137	results, and neither X nor Y got mis-translated for the fp
138	division. Our task is to take the fp result, figure out how
139	far it's off from the correct result and compute a fixup. /*
140	stq t0, `16`(sp)
141	stq t1, `24`(sp)
142	stq t2, `32`(sp)
143	stq t3, `40`(sp)
144	cfi_rel_offset (t0, `16`)
145	cfi_rel_offset (t1, `24`)
146	cfi_rel_offset (t2, `32`)
147	cfi_rel_offset (t3, `40`)
148
149	#define Q t0 /* quotient */
150	#define R RV /* remainder */
151	#define SY t1 /* scaled Y */
152	#define S t2 /* scalar */
153	#define QY t3 /* QY /
154
155	cvttq/c $f0, $f0
156	_FTOIT $f0, Q, `8`
157	mulq Q, Y, QY
158
159	.align `4`
160	stq t4, `8`(sp)
161	excb
162	ldt $f0, `0`(sp)
163	mt_fpcr $f3
164	cfi_rel_offset (t4, `8`)
165	cfi_restore ($f0)
166
167	subq QY, X, R
168	mov Y, SY
169	mov `1`, S
170	bgt R, $q_high
171
172	$q_high_ret:
173	subq X, QY, R
174	mov Y, SY
175	mov `1`, S
176	bgt R, $q_low
177
178	$q_low_ret:
179	ldq t4, `8`(sp)
180	ldq t0, `16`(sp)
181	ldq t1, `24`(sp)
182	ldq t2, `32`(sp)
183
184	ldq t3, `40`(sp)
185	ldt $f3, `48`(sp)
186	lda sp, FRAME(sp)
187	cfi_remember_state
188	cfi_restore (t0)
189	cfi_restore (t1)
190	cfi_restore (t2)
191	cfi_restore (t3)
192	cfi_restore (t4)
193	cfi_restore ($f3)
194	cfi_def_cfa_offset (`0`)
195	ret $`31`, (RA), `1`
196
197	.align `4`
198	cfi_restore_state
199	/ The quotient that we computed was too large. We need to reduce*
200	it by S such that YS >= R. Obviously the closer we get to the*
201	correct value the better, but overshooting high is ok, as we'll
202	fix that up later. /*
203	`0`:
204	addq SY, SY, SY
205	addq S, S, S
206	$q_high:
207	cmpult SY, R, AT
208	bne AT, `0b`
209
210	subq Q, S, Q
211	unop
212	subq QY, SY, QY
213	br $q_high_ret
214
215	.align `4`
216	/ The quotient that we computed was too small. Divide Y by the*
217	current remainder (R) and add that to the existing quotient (Q).
218	The expectation, of course, is that R is much smaller than X. /*
219	/ Begin with a shift-up loop. Compute S such that YS >= R. We
220	already have a copy of Y in SY and the value 1 in S. /*
221	`0`:
222	addq SY, SY, SY
223	addq S, S, S
224	$q_low:
225	cmpult SY, R, AT
226	bne AT, `0b`
227
228	/ Shift-down and subtract loop. Each iteration compares our scaled*
229	Y (SY) with the remainder (R); if SY <= R then X is divisible by
230	Y's scalar (S) so add it to the quotient (Q). /*
231	`2`: addq Q, S, t3
232	srl S, `1`, S
233	cmpule SY, R, AT
234	subq R, SY, t4
235
236	cmovne AT, t3, Q
237	cmovne AT, t4, R
238	srl SY, `1`, SY
239	bne S, `2b`
240
241	br $q_low_ret
242
243	.align `4`
244	cfi_restore_state
245	$y_is_neg:
246	/ If we get here, Y is so big that bit 63 is set. The results*
247	from the divide will be completely wrong. Fortunately, the
248	quotient must be either 0 or 1, so the remainder must be X
249	or X-Y, so just compute it directly. /*
250	cmpule Y, X, AT
251	excb
252	mt_fpcr $f3
253	subq X, Y, RV
254	ldt $f0, `0`(sp)
255	ldt $f3, `48`(sp)
256	cmoveq AT, X, RV
257
258	lda sp, FRAME(sp)
259	cfi_restore ($f0)
260	cfi_restore ($f3)
261	cfi_def_cfa_offset (`0`)
262	ret $`31`, (RA), `1`
263
264	.align `4`
265	cfi_def_cfa_offset (FRAME)
266	$powerof2:
267	subq Y, `1`, AT
268	beq Y, DIVBYZERO
269	and X, AT, RV
270	lda sp, FRAME(sp)
271	cfi_def_cfa_offset (`0`)
272	ret $`31`, (RA), `1`
273
274	cfi_endproc
275	.size __remqu, .-__remqu
276
277	DO_DIVBYZERO
278

source code of glibc/sysdeps/alpha/remqu.S