1//===----------------------Hexagon builtin routine ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// Double Precision Divide
10
11#define A r1:0
12#define AH r1
13#define AL r0
14
15#define B r3:2
16#define BH r3
17#define BL r2
18
19#define Q r5:4
20#define QH r5
21#define QL r4
22
23#define PROD r7:6
24#define PRODHI r7
25#define PRODLO r6
26
27#define SFONE r8
28#define SFDEN r9
29#define SFERROR r10
30#define SFRECIP r11
31
32#define EXPBA r13:12
33#define EXPB r13
34#define EXPA r12
35
36#define REMSUB2 r15:14
37
38
39
40#define SIGN r28
41
42#define Q_POSITIVE p3
43#define NORMAL p2
44#define NO_OVF_UNF p1
45#define P_TMP p0
46
47#define RECIPEST_SHIFT 3
48#define QADJ 61
49
50#define DFCLASS_NORMAL 0x02
51#define DFCLASS_NUMBER 0x0F
52#define DFCLASS_INFINITE 0x08
53#define DFCLASS_ZERO 0x01
54#define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
55#define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
56
57#define DF_MANTBITS 52
58#define DF_EXPBITS 11
59#define SF_MANTBITS 23
60#define SF_EXPBITS 8
61#define DF_BIAS 0x3ff
62
63#define SR_ROUND_OFF 22
64
65#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
66#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
67#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
68#define END(TAG) .size TAG,.-TAG
69
70 .text
71 .global __hexagon_divdf3
72 .type __hexagon_divdf3,@function
73 Q6_ALIAS(divdf3)
74 FAST_ALIAS(divdf3)
75 FAST2_ALIAS(divdf3)
76 .p2align 5
77__hexagon_divdf3:
78 {
79 NORMAL = dfclass(A,#DFCLASS_NORMAL)
80 NORMAL = dfclass(B,#DFCLASS_NORMAL)
81 EXPBA = combine(BH,AH)
82 SIGN = xor(AH,BH)
83 }
84#undef A
85#undef AH
86#undef AL
87#undef B
88#undef BH
89#undef BL
90#define REM r1:0
91#define REMHI r1
92#define REMLO r0
93#define DENOM r3:2
94#define DENOMHI r3
95#define DENOMLO r2
96 {
97 if (!NORMAL) jump .Ldiv_abnormal
98 PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
99 SFONE = ##0x3f800001
100 }
101 {
102 SFDEN = or(SFONE,PRODLO)
103 EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
104 EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
105 Q_POSITIVE = cmp.gt(SIGN,#-1)
106 }
107#undef SIGN
108#define ONE r28
109.Ldenorm_continue:
110 {
111 SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
112 SFERROR = and(SFONE,#-2)
113 ONE = #1
114 EXPA = sub(EXPA,EXPB)
115 }
116#undef EXPB
117#define RECIPEST r13
118 {
119 SFERROR -= sfmpy(SFRECIP,SFDEN):lib
120 REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
121 RECIPEST = ##0x00800000 << RECIPEST_SHIFT
122 }
123 {
124 SFRECIP += sfmpy(SFRECIP,SFERROR):lib
125 DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
126 SFERROR = and(SFONE,#-2)
127 }
128 {
129 SFERROR -= sfmpy(SFRECIP,SFDEN):lib
130 QH = #-DF_BIAS+1
131 QL = #DF_BIAS-1
132 }
133 {
134 SFRECIP += sfmpy(SFRECIP,SFERROR):lib
135 NO_OVF_UNF = cmp.gt(EXPA,QH)
136 NO_OVF_UNF = !cmp.gt(EXPA,QL)
137 }
138 {
139 RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
140 Q = #0
141 EXPA = add(EXPA,#-QADJ)
142 }
143#undef SFERROR
144#undef SFRECIP
145#define TMP r10
146#define TMP1 r11
147 {
148 RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
149 }
150
151#define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
152 { \
153 PROD = mpyu(RECIPEST,REMHI); \
154 REM = asl(REM,# ## ( REMSHIFT )); \
155 }; \
156 { \
157 PRODLO = # ## 0; \
158 REM -= mpyu(PRODHI,DENOMLO); \
159 REMSUB2 = mpyu(PRODHI,DENOMHI); \
160 }; \
161 { \
162 Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
163 REM -= asl(REMSUB2, # ## 32); \
164 EXTRA \
165 }
166
167
168 DIV_ITER1B(ASL,14,15,)
169 DIV_ITER1B(ASR,1,15,)
170 DIV_ITER1B(ASR,16,15,)
171 DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)
172
173#undef REMSUB2
174#define TMPPAIR r15:14
175#define TMPPAIRHI r15
176#define TMPPAIRLO r14
177#undef RECIPEST
178#define EXPB r13
179 {
180 // compare or sub with carry
181 TMPPAIR = sub(REM,DENOM)
182 P_TMP = cmp.gtu(DENOM,REM)
183 // set up amt to add to q
184 if (!P_TMP.new) PRODLO = #2
185 }
186 {
187 Q = add(Q,PROD)
188 if (!P_TMP) REM = TMPPAIR
189 TMPPAIR = #0
190 }
191 {
192 P_TMP = cmp.eq(REM,TMPPAIR)
193 if (!P_TMP.new) QL = or(QL,ONE)
194 }
195 {
196 PROD = neg(Q)
197 }
198 {
199 if (!Q_POSITIVE) Q = PROD
200 }
201#undef REM
202#undef REMHI
203#undef REMLO
204#undef DENOM
205#undef DENOMLO
206#undef DENOMHI
207#define A r1:0
208#define AH r1
209#define AL r0
210#define B r3:2
211#define BH r3
212#define BL r2
213 {
214 A = convert_d2df(Q)
215 if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
216 }
217 {
218 AH += asl(EXPA,#DF_MANTBITS-32)
219 jumpr r31
220 }
221
222.Ldiv_ovf_unf:
223 {
224 AH += asl(EXPA,#DF_MANTBITS-32)
225 EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
226 }
227 {
228 PROD = abs(Q)
229 EXPA = add(EXPA,EXPB)
230 }
231 {
232 P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow
233 if (P_TMP.new) jump:nt .Ldiv_ovf
234 }
235 {
236 P_TMP = cmp.gt(EXPA,#0)
237 if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible...
238 }
239 // Underflow
240 // We know what the infinite range exponent should be (EXPA)
241 // Q is 2's complement, PROD is abs(Q)
242 // Normalize Q, shift right, add a high bit, convert, change exponent
243
244#define FUDGE1 7 // how much to shift right
245#define FUDGE2 4 // how many guard/round to keep at lsbs
246
247 {
248 EXPB = add(clb(PROD),#-1) // doesn't need to be added in since
249 EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent
250 TMP = USR
251 TMP1 = #63
252 }
253 {
254 EXPB = min(EXPA,TMP1)
255 TMP1 = or(TMP,#0x030)
256 PROD = asl(PROD,EXPB)
257 EXPA = #0
258 }
259 {
260 TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out
261 PROD = lsr(PROD,EXPB) // shift out bits
262 B = #1
263 }
264 {
265 P_TMP = cmp.gtu(B,TMPPAIR)
266 if (!P_TMP.new) PRODLO = or(BL,PRODLO)
267 PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
268 }
269 {
270 Q = neg(PROD)
271 P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
272 if (!P_TMP.new) TMP = TMP1
273 }
274 {
275 USR = TMP
276 if (Q_POSITIVE) Q = PROD
277 TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
278 }
279 {
280 A = convert_d2df(Q)
281 }
282 {
283 AH += asl(TMP,#DF_MANTBITS-32)
284 jumpr r31
285 }
286
287
288.Lpossible_unf:
289 // If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal
290 // The answer is correct, but we need to raise Underflow
291 {
292 B = extractu(A,#63,#0)
293 TMPPAIR = combine(##0x00100000,#0) // min normal
294 TMP = #0x7FFF
295 }
296 {
297 P_TMP = dfcmp.eq(TMPPAIR,B) // Is everything zero in the rounded value...
298 P_TMP = bitsset(PRODHI,TMP) // but a bunch of bits set in the unrounded abs(quotient)?
299 }
300
301#if (__HEXAGON_ARCH__ == 60)
302 TMP = USR // If not, just return
303 if (!P_TMP) jumpr r31 // Else, we want to set Unf+Inexact
304 // Note that inexact is already set...
305#else
306 {
307 if (!P_TMP) jumpr r31 // If not, just return
308 TMP = USR // Else, we want to set Unf+Inexact
309 } // Note that inexact is already set...
310#endif
311 {
312 TMP = or(TMP,#0x30)
313 }
314 {
315 USR = TMP
316 }
317 {
318 p0 = dfcmp.eq(A,A)
319 jumpr r31
320 }
321
322.Ldiv_ovf:
323
324 // Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
325
326 {
327 TMP = USR
328 B = combine(##0x7fefffff,#-1)
329 AH = mux(Q_POSITIVE,#0,#-1)
330 }
331 {
332 PROD = combine(##0x7ff00000,#0)
333 QH = extractu(TMP,#2,#SR_ROUND_OFF)
334 TMP = or(TMP,#0x28)
335 }
336 {
337 USR = TMP
338 QH ^= lsr(AH,#31)
339 QL = QH
340 }
341 {
342 p0 = !cmp.eq(QL,#1) // if not round-to-zero
343 p0 = !cmp.eq(QH,#2) // and not rounding the other way
344 if (p0.new) B = PROD // go to inf
345 p0 = dfcmp.eq(B,B) // get exceptions
346 }
347 {
348 A = insert(B,#63,#0)
349 jumpr r31
350 }
351
352#undef ONE
353#define SIGN r28
354#undef NORMAL
355#undef NO_OVF_UNF
356#define P_INF p1
357#define P_ZERO p2
358.Ldiv_abnormal:
359 {
360 P_TMP = dfclass(A,#DFCLASS_NUMBER)
361 P_TMP = dfclass(B,#DFCLASS_NUMBER)
362 Q_POSITIVE = cmp.gt(SIGN,#-1)
363 }
364 {
365 P_INF = dfclass(A,#DFCLASS_INFINITE)
366 P_INF = dfclass(B,#DFCLASS_INFINITE)
367 }
368 {
369 P_ZERO = dfclass(A,#DFCLASS_ZERO)
370 P_ZERO = dfclass(B,#DFCLASS_ZERO)
371 }
372 {
373 if (!P_TMP) jump .Ldiv_nan
374 if (P_INF) jump .Ldiv_invalid
375 }
376 {
377 if (P_ZERO) jump .Ldiv_invalid
378 }
379 {
380 P_ZERO = dfclass(A,#DFCLASS_NONZERO) // nonzero
381 P_ZERO = dfclass(B,#DFCLASS_NONINFINITE) // non-infinite
382 }
383 {
384 P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite
385 P_INF = dfclass(B,#DFCLASS_NONZERO) // nonzero
386 }
387 {
388 if (!P_ZERO) jump .Ldiv_zero_result
389 if (!P_INF) jump .Ldiv_inf_result
390 }
391 // Now we've narrowed it down to (de)normal / (de)normal
392 // Set up A/EXPA B/EXPB and go back
393#undef P_ZERO
394#undef P_INF
395#define P_TMP2 p1
396 {
397 P_TMP = dfclass(A,#DFCLASS_NORMAL)
398 P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
399 TMP = ##0x00100000
400 }
401 {
402 EXPBA = combine(BH,AH)
403 AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
404 BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
405 }
406 {
407 if (P_TMP) AH = or(AH,TMP) // if normal, add back in hidden bit
408 if (P_TMP2) BH = or(BH,TMP) // if normal, add back in hidden bit
409 }
410 {
411 QH = add(clb(A),#-DF_EXPBITS)
412 QL = add(clb(B),#-DF_EXPBITS)
413 TMP = #1
414 }
415 {
416 EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
417 EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
418 }
419 {
420 A = asl(A,QH)
421 B = asl(B,QL)
422 if (!P_TMP) EXPA = sub(TMP,QH)
423 if (!P_TMP2) EXPB = sub(TMP,QL)
424 } // recreate values needed by resume coke
425 {
426 PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
427 }
428 {
429 SFDEN = or(SFONE,PRODLO)
430 jump .Ldenorm_continue
431 }
432
433.Ldiv_zero_result:
434 {
435 AH = xor(AH,BH)
436 B = #0
437 }
438 {
439 A = insert(B,#63,#0)
440 jumpr r31
441 }
442.Ldiv_inf_result:
443 {
444 p2 = dfclass(B,#DFCLASS_ZERO)
445 p2 = dfclass(A,#DFCLASS_NONINFINITE)
446 }
447 {
448 TMP = USR
449 if (!p2) jump 1f
450 AH = xor(AH,BH)
451 }
452 {
453 TMP = or(TMP,#0x04) // DBZ
454 }
455 {
456 USR = TMP
457 }
4581:
459 {
460 B = combine(##0x7ff00000,#0)
461 p0 = dfcmp.uo(B,B) // take possible exception
462 }
463 {
464 A = insert(B,#63,#0)
465 jumpr r31
466 }
467.Ldiv_nan:
468 {
469 p0 = dfclass(A,#0x10)
470 p1 = dfclass(B,#0x10)
471 if (!p0.new) A = B
472 if (!p1.new) B = A
473 }
474 {
475 QH = convert_df2sf(A) // get possible invalid exceptions
476 QL = convert_df2sf(B)
477 }
478 {
479 A = #-1
480 jumpr r31
481 }
482
483.Ldiv_invalid:
484 {
485 TMP = ##0x7f800001
486 }
487 {
488 A = convert_sf2df(TMP) // get invalid, get DF qNaN
489 jumpr r31
490 }
491END(__hexagon_divdf3)
492

source code of compiler-rt/lib/builtins/hexagon/dfdiv.S