1//===----------------------Hexagon builtin routine ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// Double Precision Multiply
10
11#define A r1:0
12#define AH r1
13#define AL r0
14#define B r3:2
15#define BH r3
16#define BL r2
17
18#define EXPA r4
19#define EXPB r5
20#define EXPB_A r5:4
21
22#define ZTMP r7:6
23#define ZTMPH r7
24#define ZTMPL r6
25
26#define ATMP r13:12
27#define ATMPH r13
28#define ATMPL r12
29
30#define BTMP r9:8
31#define BTMPH r9
32#define BTMPL r8
33
34#define ATMP2 r11:10
35#define ATMP2H r11
36#define ATMP2L r10
37
38#define EXPDIFF r15
39#define EXTRACTOFF r14
40#define EXTRACTAMT r15:14
41
42#define TMP r28
43
44#define MANTBITS 52
45#define HI_MANTBITS 20
46#define EXPBITS 11
47#define BIAS 1024
48#define MANTISSA_TO_INT_BIAS 52
49#define SR_BIT_INEXACT 5
50
51#ifndef SR_ROUND_OFF
52#define SR_ROUND_OFF 22
53#endif
54
55#define NORMAL p3
56#define BIGB p2
57
58#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
59#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
60#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
61#define END(TAG) .size TAG,.-TAG
62
63 .text
64 .global __hexagon_adddf3
65 .global __hexagon_subdf3
66 .type __hexagon_adddf3, @function
67 .type __hexagon_subdf3, @function
68
69Q6_ALIAS(adddf3)
70FAST_ALIAS(adddf3)
71FAST2_ALIAS(adddf3)
72Q6_ALIAS(subdf3)
73FAST_ALIAS(subdf3)
74FAST2_ALIAS(subdf3)
75
76 .p2align 5
77__hexagon_adddf3:
78 {
79 EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
80 EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
81 ATMP = combine(##0x20000000,#0)
82 }
83 {
84 NORMAL = dfclass(A,#2)
85 NORMAL = dfclass(B,#2)
86 BTMP = ATMP
87 BIGB = cmp.gtu(EXPB,EXPA) // Is B substantially greater than A?
88 }
89 {
90 if (!NORMAL) jump .Ladd_abnormal // If abnormal, go to special code
91 if (BIGB) A = B // if B >> A, swap A and B
92 if (BIGB) B = A // If B >> A, swap A and B
93 if (BIGB) EXPB_A = combine(EXPA,EXPB) // swap exponents
94 }
95 {
96 ATMP = insert(A,#MANTBITS,#EXPBITS-2) // Q1.62
97 BTMP = insert(B,#MANTBITS,#EXPBITS-2) // Q1.62
98 EXPDIFF = sub(EXPA,EXPB)
99 ZTMP = combine(#62,#1)
100 }
101#undef BIGB
102#undef NORMAL
103#define B_POS p3
104#define A_POS p2
105#define NO_STICKIES p1
106.Ladd_continue:
107 {
108 EXPDIFF = min(EXPDIFF,ZTMPH) // If exponent difference >= ~60,
109 // will collapse to sticky bit
110 ATMP2 = neg(ATMP)
111 A_POS = cmp.gt(AH,#-1)
112 EXTRACTOFF = #0
113 }
114 {
115 if (!A_POS) ATMP = ATMP2
116 ATMP2 = extractu(BTMP,EXTRACTAMT)
117 BTMP = ASR(BTMP,EXPDIFF)
118#undef EXTRACTAMT
119#undef EXPDIFF
120#undef EXTRACTOFF
121#define ZERO r15:14
122 ZERO = #0
123 }
124 {
125 NO_STICKIES = cmp.eq(ATMP2,ZERO)
126 if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL)
127 EXPB = add(EXPA,#-BIAS-60)
128 B_POS = cmp.gt(BH,#-1)
129 }
130 {
131 ATMP = add(ATMP,BTMP) // ADD!!!
132 ATMP2 = sub(ATMP,BTMP) // Negate and ADD --> SUB!!!
133 ZTMP = combine(#54,##2045)
134 }
135 {
136 p0 = cmp.gtu(EXPA,ZTMPH) // must be pretty high in case of large cancellation
137 p0 = !cmp.gtu(EXPA,ZTMPL)
138 if (!p0.new) jump:nt .Ladd_ovf_unf
139 if (!B_POS) ATMP = ATMP2 // if B neg, pick difference
140 }
141 {
142 A = convert_d2df(ATMP) // Convert to Double Precision, taking care of flags, etc. So nice!
143 p0 = cmp.eq(ATMPH,#0)
144 p0 = cmp.eq(ATMPL,#0)
145 if (p0.new) jump:nt .Ladd_zero // or maybe conversion handles zero case correctly?
146 }
147 {
148 AH += asl(EXPB,#HI_MANTBITS)
149 jumpr r31
150 }
151 .falign
152__hexagon_subdf3:
153 {
154 BH = togglebit(BH,#31)
155 jump __qdsp_adddf3
156 }
157
158
159 .falign
160.Ladd_zero:
161 // True zero, full cancellation
162 // +0 unless round towards negative infinity
163 {
164 TMP = USR
165 A = #0
166 BH = #1
167 }
168 {
169 TMP = extractu(TMP,#2,#22)
170 BH = asl(BH,#31)
171 }
172 {
173 p0 = cmp.eq(TMP,#2)
174 if (p0.new) AH = xor(AH,BH)
175 jumpr r31
176 }
177 .falign
178.Ladd_ovf_unf:
179 // Overflow or Denormal is possible
180 // Good news: Underflow flag is not possible!
181
182 // ATMP has 2's complement value
183 //
184 // EXPA has A's exponent, EXPB has EXPA-BIAS-60
185 //
186 // Convert, extract exponent, add adjustment.
187 // If > 2046, overflow
188 // If <= 0, denormal
189 //
190 // Note that we've not done our zero check yet, so do that too
191
192 {
193 A = convert_d2df(ATMP)
194 p0 = cmp.eq(ATMPH,#0)
195 p0 = cmp.eq(ATMPL,#0)
196 if (p0.new) jump:nt .Ladd_zero
197 }
198 {
199 TMP = extractu(AH,#EXPBITS,#HI_MANTBITS)
200 AH += asl(EXPB,#HI_MANTBITS)
201 }
202 {
203 EXPB = add(EXPB,TMP)
204 B = combine(##0x00100000,#0)
205 }
206 {
207 p0 = cmp.gt(EXPB,##BIAS+BIAS-2)
208 if (p0.new) jump:nt .Ladd_ovf
209 }
210 {
211 p0 = cmp.gt(EXPB,#0)
212 if (p0.new) jumpr:t r31
213 TMP = sub(#1,EXPB)
214 }
215 {
216 B = insert(A,#MANTBITS,#0)
217 A = ATMP
218 }
219 {
220 B = lsr(B,TMP)
221 }
222 {
223 A = insert(B,#63,#0)
224 jumpr r31
225 }
226 .falign
227.Ladd_ovf:
228 // We get either max finite value or infinity. Either way, overflow+inexact
229 {
230 A = ATMP // 2's complement value
231 TMP = USR
232 ATMP = combine(##0x7fefffff,#-1) // positive max finite
233 }
234 {
235 EXPB = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits
236 TMP = or(TMP,#0x28) // inexact + overflow
237 BTMP = combine(##0x7ff00000,#0) // positive infinity
238 }
239 {
240 USR = TMP
241 EXPB ^= lsr(AH,#31) // Does sign match rounding?
242 TMP = EXPB // unmodified rounding mode
243 }
244 {
245 p0 = !cmp.eq(TMP,#1) // If not round-to-zero and
246 p0 = !cmp.eq(EXPB,#2) // Not rounding the other way,
247 if (p0.new) ATMP = BTMP // we should get infinity
248 }
249 {
250 A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign
251 }
252 {
253 p0 = dfcmp.eq(A,A)
254 jumpr r31
255 }
256
257.Ladd_abnormal:
258 {
259 ATMP = extractu(A,#63,#0) // strip off sign
260 BTMP = extractu(B,#63,#0) // strip off sign
261 }
262 {
263 p3 = cmp.gtu(ATMP,BTMP)
264 if (!p3.new) A = B // sort values
265 if (!p3.new) B = A // sort values
266 }
267 {
268 // Any NaN --> NaN, possibly raise invalid if sNaN
269 p0 = dfclass(A,#0x0f) // A not NaN?
270 if (!p0.new) jump:nt .Linvalid_nan_add
271 if (!p3) ATMP = BTMP
272 if (!p3) BTMP = ATMP
273 }
274 {
275 // Infinity + non-infinity number is infinity
276 // Infinity + infinity --> inf or nan
277 p1 = dfclass(A,#0x08) // A is infinity
278 if (p1.new) jump:nt .Linf_add
279 }
280 {
281 p2 = dfclass(B,#0x01) // B is zero
282 if (p2.new) jump:nt .LB_zero // so return A or special 0+0
283 ATMP = #0
284 }
285 // We are left with adding one or more subnormals
286 {
287 p0 = dfclass(A,#4)
288 if (p0.new) jump:nt .Ladd_two_subnormal
289 ATMP = combine(##0x20000000,#0)
290 }
291 {
292 EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
293 EXPB = #1
294 // BTMP already ABS(B)
295 BTMP = asl(BTMP,#EXPBITS-2)
296 }
297#undef ZERO
298#define EXTRACTOFF r14
299#define EXPDIFF r15
300 {
301 ATMP = insert(A,#MANTBITS,#EXPBITS-2)
302 EXPDIFF = sub(EXPA,EXPB)
303 ZTMP = combine(#62,#1)
304 jump .Ladd_continue
305 }
306
307.Ladd_two_subnormal:
308 {
309 ATMP = extractu(A,#63,#0)
310 BTMP = extractu(B,#63,#0)
311 }
312 {
313 ATMP = neg(ATMP)
314 BTMP = neg(BTMP)
315 p0 = cmp.gt(AH,#-1)
316 p1 = cmp.gt(BH,#-1)
317 }
318 {
319 if (p0) ATMP = A
320 if (p1) BTMP = B
321 }
322 {
323 ATMP = add(ATMP,BTMP)
324 }
325 {
326 BTMP = neg(ATMP)
327 p0 = cmp.gt(ATMPH,#-1)
328 B = #0
329 }
330 {
331 if (!p0) A = BTMP
332 if (p0) A = ATMP
333 BH = ##0x80000000
334 }
335 {
336 if (!p0) AH = or(AH,BH)
337 p0 = dfcmp.eq(A,B)
338 if (p0.new) jump:nt .Lzero_plus_zero
339 }
340 {
341 jumpr r31
342 }
343
344.Linvalid_nan_add:
345 {
346 TMP = convert_df2sf(A) // will generate invalid if sNaN
347 p0 = dfclass(B,#0x0f) // if B is not NaN
348 if (p0.new) B = A // make it whatever A is
349 }
350 {
351 BL = convert_df2sf(B) // will generate invalid if sNaN
352 A = #-1
353 jumpr r31
354 }
355 .falign
356.LB_zero:
357 {
358 p0 = dfcmp.eq(ATMP,A) // is A also zero?
359 if (!p0.new) jumpr:t r31 // If not, just return A
360 }
361 // 0 + 0 is special
362 // if equal integral values, they have the same sign, which is fine for all rounding
363 // modes.
364 // If unequal in sign, we get +0 for all rounding modes except round down
365.Lzero_plus_zero:
366 {
367 p0 = cmp.eq(A,B)
368 if (p0.new) jumpr:t r31
369 }
370 {
371 TMP = USR
372 }
373 {
374 TMP = extractu(TMP,#2,#SR_ROUND_OFF)
375 A = #0
376 }
377 {
378 p0 = cmp.eq(TMP,#2)
379 if (p0.new) AH = ##0x80000000
380 jumpr r31
381 }
382.Linf_add:
383 // adding infinities is only OK if they are equal
384 {
385 p0 = !cmp.eq(AH,BH) // Do they have different signs
386 p0 = dfclass(B,#8) // And is B also infinite?
387 if (!p0.new) jumpr:t r31 // If not, just a normal inf
388 }
389 {
390 BL = ##0x7f800001 // sNAN
391 }
392 {
393 A = convert_sf2df(BL) // trigger invalid, set NaN
394 jumpr r31
395 }
396END(__hexagon_adddf3)
397

source code of compiler-rt/lib/builtins/hexagon/dfaddsub.S