1/* From the Intel IA-64 Optimization Guide, choose the minimum latency
2 alternative. */
3
4#include <sysdep.h>
5#undef ret
6
7#include <shlib-compat.h>
8
9#if SHLIB_COMPAT(libc, GLIBC_2_2, GLIBC_2_2_6)
10
11/* __divtf3
12 Compute a 80-bit IEEE double-extended quotient.
13 farg0 holds the dividend. farg1 holds the divisor. */
14
15ENTRY(___divtf3)
16 cmp.eq p7, p0 = r0, r0
17 frcpa.s0 f10, p6 = farg0, farg1
18 ;;
19(p6) cmp.ne p7, p0 = r0, r0
20 .pred.rel.mutex p6, p7
21(p6) fnma.s1 f11 = farg1, f10, f1
22(p6) fma.s1 f12 = farg0, f10, f0
23 ;;
24(p6) fma.s1 f13 = f11, f11, f0
25(p6) fma.s1 f14 = f11, f11, f11
26 ;;
27(p6) fma.s1 f11 = f13, f13, f11
28(p6) fma.s1 f13 = f14, f10, f10
29 ;;
30(p6) fma.s1 f10 = f13, f11, f10
31(p6) fnma.s1 f11 = farg1, f12, farg0
32 ;;
33(p6) fma.s1 f11 = f11, f10, f12
34(p6) fnma.s1 f12 = farg1, f10, f1
35 ;;
36(p6) fma.s1 f10 = f12, f10, f10
37(p6) fnma.s1 f12 = farg1, f11, farg0
38 ;;
39(p6) fma.s0 fret0 = f12, f10, f11
40(p7) mov fret0 = f10
41 br.ret.sptk rp
42END(___divtf3)
43 .symver ___divtf3, __divtf3@GLIBC_2.2
44
45/* __divdf3
46 Compute a 64-bit IEEE double quotient.
47 farg0 holds the dividend. farg1 holds the divisor. */
48
49ENTRY(___divdf3)
50 cmp.eq p7, p0 = r0, r0
51 frcpa.s0 f10, p6 = farg0, farg1
52 ;;
53(p6) cmp.ne p7, p0 = r0, r0
54 .pred.rel.mutex p6, p7
55(p6) fmpy.s1 f11 = farg0, f10
56(p6) fnma.s1 f12 = farg1, f10, f1
57 ;;
58(p6) fma.s1 f11 = f12, f11, f11
59(p6) fmpy.s1 f13 = f12, f12
60 ;;
61(p6) fma.s1 f10 = f12, f10, f10
62(p6) fma.s1 f11 = f13, f11, f11
63 ;;
64(p6) fmpy.s1 f12 = f13, f13
65(p6) fma.s1 f10 = f13, f10, f10
66 ;;
67(p6) fma.d.s1 f11 = f12, f11, f11
68(p6) fma.s1 f10 = f12, f10, f10
69 ;;
70(p6) fnma.d.s1 f8 = farg1, f11, farg0
71 ;;
72(p6) fma.d fret0 = f8, f10, f11
73(p7) mov fret0 = f10
74 br.ret.sptk rp
75 ;;
76END(___divdf3)
77 .symver ___divdf3, __divdf3@GLIBC_2.2
78
79/* __divsf3
80 Compute a 32-bit IEEE float quotient.
81 farg0 holds the dividend. farg1 holds the divisor. */
82
83ENTRY(___divsf3)
84 cmp.eq p7, p0 = r0, r0
85 frcpa.s0 f10, p6 = farg0, farg1
86 ;;
87(p6) cmp.ne p7, p0 = r0, r0
88 .pred.rel.mutex p6, p7
89(p6) fmpy.s1 f8 = farg0, f10
90(p6) fnma.s1 f9 = farg1, f10, f1
91 ;;
92(p6) fma.s1 f8 = f9, f8, f8
93(p6) fmpy.s1 f9 = f9, f9
94 ;;
95(p6) fma.s1 f8 = f9, f8, f8
96(p6) fmpy.s1 f9 = f9, f9
97 ;;
98(p6) fma.d.s1 f10 = f9, f8, f8
99 ;;
100(p6) fnorm.s.s0 fret0 = f10
101(p7) mov fret0 = f10
102 br.ret.sptk rp
103 ;;
104END(___divsf3)
105 .symver ___divsf3, __divsf3@GLIBC_2.2
106
107/* __divdi3
108 Compute a 64-bit integer quotient.
109 in0 holds the dividend. in1 holds the divisor. */
110
111ENTRY(___divdi3)
112 .regstk 2,0,0,0
113 /* Transfer inputs to FP registers. */
114 setf.sig f8 = in0
115 setf.sig f9 = in1
116 ;;
117 /* Convert the inputs to FP, so that they won't be treated as
118 unsigned. */
119 fcvt.xf f8 = f8
120 fcvt.xf f9 = f9
121 ;;
122 /* Compute the reciprocal approximation. */
123 frcpa.s1 f10, p6 = f8, f9
124 ;;
125 /* 3 Newton-Raphson iterations. */
126(p6) fnma.s1 f11 = f9, f10, f1
127(p6) fmpy.s1 f12 = f8, f10
128 ;;
129(p6) fmpy.s1 f13 = f11, f11
130(p6) fma.s1 f12 = f11, f12, f12
131 ;;
132(p6) fma.s1 f10 = f11, f10, f10
133(p6) fma.s1 f11 = f13, f12, f12
134 ;;
135(p6) fma.s1 f10 = f13, f10, f10
136(p6) fnma.s1 f12 = f9, f11, f8
137 ;;
138(p6) fma.s1 f10 = f12, f10, f11
139 ;;
140 /* Round quotient to an integer. */
141 fcvt.fx.trunc.s1 f10 = f10
142 ;;
143 /* Transfer result to GP registers. */
144 getf.sig ret0 = f10
145 br.ret.sptk rp
146 ;;
147END(___divdi3)
148 .symver ___divdi3, __divdi3@GLIBC_2.2
149
150/* __moddi3
151 Compute a 64-bit integer modulus.
152 in0 holds the dividend (a). in1 holds the divisor (b). */
153
154ENTRY(___moddi3)
155 .regstk 2,0,0,0
156 /* Transfer inputs to FP registers. */
157 setf.sig f14 = in0
158 setf.sig f9 = in1
159 ;;
160 /* Convert the inputs to FP, so that they won't be treated as
161 unsigned. */
162 fcvt.xf f8 = f14
163 fcvt.xf f9 = f9
164 ;;
165 /* Compute the reciprocal approximation. */
166 frcpa.s1 f10, p6 = f8, f9
167 ;;
168 /* 3 Newton-Raphson iterations. */
169(p6) fmpy.s1 f12 = f8, f10
170(p6) fnma.s1 f11 = f9, f10, f1
171 ;;
172(p6) fma.s1 f12 = f11, f12, f12
173(p6) fmpy.s1 f13 = f11, f11
174 ;;
175(p6) fma.s1 f10 = f11, f10, f10
176(p6) fma.s1 f11 = f13, f12, f12
177 ;;
178 sub in1 = r0, in1
179(p6) fma.s1 f10 = f13, f10, f10
180(p6) fnma.s1 f12 = f9, f11, f8
181 ;;
182 setf.sig f9 = in1
183(p6) fma.s1 f10 = f12, f10, f11
184 ;;
185 fcvt.fx.trunc.s1 f10 = f10
186 ;;
187 /* r = q * (-b) + a */
188 xma.l f10 = f10, f9, f14
189 ;;
190 /* Transfer result to GP registers. */
191 getf.sig ret0 = f10
192 br.ret.sptk rp
193 ;;
194END(___moddi3)
195 .symver ___moddi3, __moddi3@GLIBC_2.2
196
197/* __udivdi3
198 Compute a 64-bit unsigned integer quotient.
199 in0 holds the dividend. in1 holds the divisor. */
200
201ENTRY(___udivdi3)
202 .regstk 2,0,0,0
203 /* Transfer inputs to FP registers. */
204 setf.sig f8 = in0
205 setf.sig f9 = in1
206 ;;
207 /* Convert the inputs to FP, to avoid FP software-assist faults. */
208 fcvt.xuf.s1 f8 = f8
209 fcvt.xuf.s1 f9 = f9
210 ;;
211 /* Compute the reciprocal approximation. */
212 frcpa.s1 f10, p6 = f8, f9
213 ;;
214 /* 3 Newton-Raphson iterations. */
215(p6) fnma.s1 f11 = f9, f10, f1
216(p6) fmpy.s1 f12 = f8, f10
217 ;;
218(p6) fmpy.s1 f13 = f11, f11
219(p6) fma.s1 f12 = f11, f12, f12
220 ;;
221(p6) fma.s1 f10 = f11, f10, f10
222(p6) fma.s1 f11 = f13, f12, f12
223 ;;
224(p6) fma.s1 f10 = f13, f10, f10
225(p6) fnma.s1 f12 = f9, f11, f8
226 ;;
227(p6) fma.s1 f10 = f12, f10, f11
228 ;;
229 /* Round quotient to an unsigned integer. */
230 fcvt.fxu.trunc.s1 f10 = f10
231 ;;
232 /* Transfer result to GP registers. */
233 getf.sig ret0 = f10
234 br.ret.sptk rp
235 ;;
236END(___udivdi3)
237 .symver ___udivdi3, __udivdi3@GLIBC_2.2
238
239/* __umoddi3
240 Compute a 64-bit unsigned integer modulus.
241 in0 holds the dividend (a). in1 holds the divisor (b). */
242
243ENTRY(___umoddi3)
244 .regstk 2,0,0,0
245 /* Transfer inputs to FP registers. */
246 setf.sig f14 = in0
247 setf.sig f9 = in1
248 ;;
249 /* Convert the inputs to FP, to avoid FP software assist faults. */
250 fcvt.xuf.s1 f8 = f14
251 fcvt.xuf.s1 f9 = f9
252 ;;
253 /* Compute the reciprocal approximation. */
254 frcpa.s1 f10, p6 = f8, f9
255 ;;
256 /* 3 Newton-Raphson iterations. */
257(p6) fmpy.s1 f12 = f8, f10
258(p6) fnma.s1 f11 = f9, f10, f1
259 ;;
260(p6) fma.s1 f12 = f11, f12, f12
261(p6) fmpy.s1 f13 = f11, f11
262 ;;
263(p6) fma.s1 f10 = f11, f10, f10
264(p6) fma.s1 f11 = f13, f12, f12
265 ;;
266 sub in1 = r0, in1
267(p6) fma.s1 f10 = f13, f10, f10
268(p6) fnma.s1 f12 = f9, f11, f8
269 ;;
270 setf.sig f9 = in1
271(p6) fma.s1 f10 = f12, f10, f11
272 ;;
273 /* Round quotient to an unsigned integer. */
274 fcvt.fxu.trunc.s1 f10 = f10
275 ;;
276 /* r = q * (-b) + a */
277 xma.l f10 = f10, f9, f14
278 ;;
279 /* Transfer result to GP registers. */
280 getf.sig ret0 = f10
281 br.ret.sptk rp
282 ;;
283END(___umoddi3)
284 .symver ___umoddi3, __umoddi3@GLIBC_2.2
285
286/* __multi3
287 Compute a 128-bit multiply of 128-bit multiplicands.
288 in0/in1 holds one multiplicand (a), in2/in3 holds the other one (b). */
289
290ENTRY(___multi3)
291 .regstk 4,0,0,0
292 setf.sig f6 = in1
293 movl r19 = 0xffffffff
294 setf.sig f7 = in2
295 ;;
296 and r14 = r19, in0
297 ;;
298 setf.sig f10 = r14
299 and r14 = r19, in2
300 xmpy.l f9 = f6, f7
301 ;;
302 setf.sig f6 = r14
303 shr.u r14 = in0, 32
304 ;;
305 setf.sig f7 = r14
306 shr.u r14 = in2, 32
307 ;;
308 setf.sig f8 = r14
309 xmpy.l f11 = f10, f6
310 xmpy.l f6 = f7, f6
311 ;;
312 getf.sig r16 = f11
313 xmpy.l f7 = f7, f8
314 ;;
315 shr.u r14 = r16, 32
316 and r16 = r19, r16
317 getf.sig r17 = f6
318 setf.sig f6 = in0
319 ;;
320 setf.sig f11 = r14
321 getf.sig r21 = f7
322 setf.sig f7 = in3
323 ;;
324 xma.l f11 = f10, f8, f11
325 xma.l f6 = f6, f7, f9
326 ;;
327 getf.sig r18 = f11
328 ;;
329 add r18 = r18, r17
330 ;;
331 and r15 = r19, r18
332 cmp.ltu p7, p6 = r18, r17
333 ;;
334 getf.sig r22 = f6
335(p7) adds r14 = 1, r19
336 ;;
337(p7) add r21 = r21, r14
338 shr.u r14 = r18, 32
339 shl r15 = r15, 32
340 ;;
341 add r20 = r21, r14
342 ;;
343 add ret0 = r15, r16
344 add ret1 = r22, r20
345 br.ret.sptk rp
346 ;;
347END(___multi3)
348 .symver ___multi3, __multi3@GLIBC_2.2
349
350#endif
351

source code of glibc/sysdeps/ia64/ia64libgcc.S