1 | /* Copyright (C) 1996-2022 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library. If not, see |
16 | <https://www.gnu.org/licenses/>. */ |
17 | |
18 | #include "div_libc.h" |
19 | |
20 | #undef FRAME |
21 | #ifdef __alpha_fix__ |
22 | #define FRAME 0 |
23 | #else |
24 | #define FRAME 16 |
25 | #endif |
26 | |
27 | #undef X |
28 | #undef Y |
29 | #define X $17 |
30 | #define Y $18 |
31 | |
32 | .set noat |
33 | |
34 | .align 4 |
35 | .globl ldiv |
36 | .ent ldiv |
37 | ldiv: |
38 | .frame sp, FRAME, ra |
39 | #if FRAME > 0 |
40 | lda sp, -FRAME(sp) |
41 | #endif |
42 | #ifdef PROF |
43 | .set macro |
44 | ldgp gp, 0(pv) |
45 | lda AT, _mcount |
46 | jsr AT, (AT), _mcount |
47 | .set nomacro |
48 | .prologue 1 |
49 | #else |
50 | .prologue 0 |
51 | #endif |
52 | |
53 | beq Y, $divbyzero |
54 | excb |
55 | mf_fpcr $f10 |
56 | |
57 | _ITOFT2 X, $f0, 0, Y, $f1, 8 |
58 | |
59 | .align 4 |
60 | cvtqt $f0, $f0 |
61 | cvtqt $f1, $f1 |
62 | divt/c $f0, $f1, $f0 |
63 | unop |
64 | |
65 | /* Check to see if X fit in the double as an exact value. */ |
66 | sll X, (64-53), AT |
67 | sra AT, (64-53), AT |
68 | cmpeq X, AT, AT |
69 | beq AT, $x_big |
70 | |
71 | /* If we get here, we're expecting exact results from the division. |
72 | Do nothing else besides convert and clean up. */ |
73 | cvttq/c $f0, $f0 |
74 | excb |
75 | mt_fpcr $f10 |
76 | _FTOIT $f0, $0, 0 |
77 | |
78 | $egress: |
79 | mulq $0, Y, $1 |
80 | subq X, $1, $1 |
81 | |
82 | stq $0, 0($16) |
83 | stq $1, 8($16) |
84 | mov $16, $0 |
85 | |
86 | #if FRAME > 0 |
87 | lda sp, FRAME(sp) |
88 | #endif |
89 | ret |
90 | |
91 | .align 4 |
92 | $x_big: |
93 | /* If we get here, X is large enough that we don't expect exact |
94 | results, and neither X nor Y got mis-translated for the fp |
95 | division. Our task is to take the fp result, figure out how |
96 | far it's off from the correct result and compute a fixup. */ |
97 | |
98 | #define Q v0 /* quotient */ |
99 | #define R t0 /* remainder */ |
100 | #define SY t1 /* scaled Y */ |
101 | #define S t2 /* scalar */ |
102 | #define QY t3 /* Q*Y */ |
103 | |
104 | /* The fixup code below can only handle unsigned values. */ |
105 | or X, Y, AT |
106 | mov $31, t5 |
107 | blt AT, $fix_sign_in |
108 | $fix_sign_in_ret1: |
109 | cvttq/c $f0, $f0 |
110 | |
111 | _FTOIT $f0, Q, 8 |
112 | $fix_sign_in_ret2: |
113 | mulq Q, Y, QY |
114 | excb |
115 | mt_fpcr $f10 |
116 | |
117 | .align 4 |
118 | subq QY, X, R |
119 | mov Y, SY |
120 | mov 1, S |
121 | bgt R, $q_high |
122 | |
123 | $q_high_ret: |
124 | subq X, QY, R |
125 | mov Y, SY |
126 | mov 1, S |
127 | bgt R, $q_low |
128 | |
129 | $q_low_ret: |
130 | negq Q, t4 |
131 | cmovlbs t5, t4, Q |
132 | br $egress |
133 | |
134 | .align 4 |
135 | /* The quotient that we computed was too large. We need to reduce |
136 | it by S such that Y*S >= R. Obviously the closer we get to the |
137 | correct value the better, but overshooting high is ok, as we'll |
138 | fix that up later. */ |
139 | 0: |
140 | addq SY, SY, SY |
141 | addq S, S, S |
142 | $q_high: |
143 | cmpult SY, R, AT |
144 | bne AT, 0b |
145 | |
146 | subq Q, S, Q |
147 | unop |
148 | subq QY, SY, QY |
149 | br $q_high_ret |
150 | |
151 | .align 4 |
152 | /* The quotient that we computed was too small. Divide Y by the |
153 | current remainder (R) and add that to the existing quotient (Q). |
154 | The expectation, of course, is that R is much smaller than X. */ |
155 | /* Begin with a shift-up loop. Compute S such that Y*S >= R. We |
156 | already have a copy of Y in SY and the value 1 in S. */ |
157 | 0: |
158 | addq SY, SY, SY |
159 | addq S, S, S |
160 | $q_low: |
161 | cmpult SY, R, AT |
162 | bne AT, 0b |
163 | |
164 | /* Shift-down and subtract loop. Each iteration compares our scaled |
165 | Y (SY) with the remainder (R); if SY <= R then X is divisible by |
166 | Y's scalar (S) so add it to the quotient (Q). */ |
167 | 2: addq Q, S, t3 |
168 | srl S, 1, S |
169 | cmpule SY, R, AT |
170 | subq R, SY, t4 |
171 | |
172 | cmovne AT, t3, Q |
173 | cmovne AT, t4, R |
174 | srl SY, 1, SY |
175 | bne S, 2b |
176 | |
177 | br $q_low_ret |
178 | |
179 | .align 4 |
180 | $fix_sign_in: |
181 | /* If we got here, then X|Y is negative. Need to adjust everything |
182 | such that we're doing unsigned division in the fixup loop. */ |
183 | /* T5 is true if result should be negative. */ |
184 | xor X, Y, AT |
185 | cmplt AT, 0, t5 |
186 | cmplt X, 0, AT |
187 | negq X, t0 |
188 | |
189 | cmovne AT, t0, X |
190 | cmplt Y, 0, AT |
191 | negq Y, t0 |
192 | |
193 | cmovne AT, t0, Y |
194 | blbc t5, $fix_sign_in_ret1 |
195 | |
196 | cvttq/c $f0, $f0 |
197 | _FTOIT $f0, Q, 8 |
198 | .align 3 |
199 | negq Q, Q |
200 | br $fix_sign_in_ret2 |
201 | |
202 | $divbyzero: |
203 | mov a0, v0 |
204 | lda a0, GEN_INTDIV |
205 | call_pal PAL_gentrap |
206 | stq zero, 0(v0) |
207 | stq zero, 8(v0) |
208 | |
209 | #if FRAME > 0 |
210 | lda sp, FRAME(sp) |
211 | #endif |
212 | ret |
213 | |
214 | .end ldiv |
215 | |
216 | weak_alias (ldiv, lldiv) |
217 | weak_alias (ldiv, imaxdiv) |
218 | |