1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | .file "div_Xsig.S" |
3 | /*---------------------------------------------------------------------------+ |
4 | | div_Xsig.S | |
5 | | | |
6 | | Division subroutine for 96 bit quantities | |
7 | | | |
8 | | Copyright (C) 1994,1995 | |
9 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
10 | | Australia. E-mail billm@jacobi.maths.monash.edu.au | |
11 | | | |
12 | | | |
13 | +---------------------------------------------------------------------------*/ |
14 | |
15 | /*---------------------------------------------------------------------------+ |
16 | | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and | |
17 | | put the 96 bit result at the location d. | |
18 | | | |
19 | | The result may not be accurate to 96 bits. It is intended for use where | |
20 | | a result better than 64 bits is required. The result should usually be | |
21 | | good to at least 94 bits. | |
22 | | The returned result is actually divided by one half. This is done to | |
23 | | prevent overflow. | |
24 | | | |
25 | | .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd | |
26 | | | |
27 | | void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) | |
28 | | | |
29 | +---------------------------------------------------------------------------*/ |
30 | |
31 | #include "exception.h" |
32 | #include "fpu_emu.h" |
33 | |
34 | |
35 | #define XsigLL(x) (x) |
36 | #define XsigL(x) 4(x) |
37 | #define XsigH(x) 8(x) |
38 | |
39 | |
40 | #ifndef NON_REENTRANT_FPU |
41 | /* |
42 | Local storage on the stack: |
43 | Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 |
44 | */ |
45 | #define FPU_accum_3 -4(%ebp) |
46 | #define FPU_accum_2 -8(%ebp) |
47 | #define FPU_accum_1 -12(%ebp) |
48 | #define FPU_accum_0 -16(%ebp) |
49 | #define FPU_result_3 -20(%ebp) |
50 | #define FPU_result_2 -24(%ebp) |
51 | #define FPU_result_1 -28(%ebp) |
52 | |
53 | #else |
54 | .data |
55 | /* |
56 | Local storage in a static area: |
57 | Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 |
58 | */ |
59 | .align 4,0 |
60 | FPU_accum_3: |
61 | .long 0 |
62 | FPU_accum_2: |
63 | .long 0 |
64 | FPU_accum_1: |
65 | .long 0 |
66 | FPU_accum_0: |
67 | .long 0 |
68 | FPU_result_3: |
69 | .long 0 |
70 | FPU_result_2: |
71 | .long 0 |
72 | FPU_result_1: |
73 | .long 0 |
74 | #endif /* NON_REENTRANT_FPU */ |
75 | |
76 | |
77 | .text |
78 | SYM_FUNC_START(div_Xsig) |
79 | pushl %ebp |
80 | movl %esp,%ebp |
81 | #ifndef NON_REENTRANT_FPU |
82 | subl $28,%esp |
83 | #endif /* NON_REENTRANT_FPU */ |
84 | |
85 | pushl %esi |
86 | pushl %edi |
87 | pushl %ebx |
88 | |
89 | movl PARAM1,%esi /* pointer to num */ |
90 | movl PARAM2,%ebx /* pointer to denom */ |
91 | |
92 | #ifdef PARANOID |
93 | testl $0x80000000, XsigH(%ebx) /* Divisor */ |
94 | je L_bugged |
95 | #endif /* PARANOID */ |
96 | |
97 | |
98 | /*---------------------------------------------------------------------------+ |
99 | | Divide: Return arg1/arg2 to arg3. | |
100 | | | |
101 | | The maximum returned value is (ignoring exponents) | |
102 | | .ffffffff ffffffff | |
103 | | ------------------ = 1.ffffffff fffffffe | |
104 | | .80000000 00000000 | |
105 | | and the minimum is | |
106 | | .80000000 00000000 | |
107 | | ------------------ = .80000000 00000001 (rounded) | |
108 | | .ffffffff ffffffff | |
109 | | | |
110 | +---------------------------------------------------------------------------*/ |
111 | |
112 | /* Save extended dividend in local register */ |
113 | |
114 | /* Divide by 2 to prevent overflow */ |
115 | clc |
116 | movl XsigH(%esi),%eax |
117 | rcrl %eax |
118 | movl %eax,FPU_accum_3 |
119 | movl XsigL(%esi),%eax |
120 | rcrl %eax |
121 | movl %eax,FPU_accum_2 |
122 | movl XsigLL(%esi),%eax |
123 | rcrl %eax |
124 | movl %eax,FPU_accum_1 |
125 | movl $0,%eax |
126 | rcrl %eax |
127 | movl %eax,FPU_accum_0 |
128 | |
129 | movl FPU_accum_2,%eax /* Get the current num */ |
130 | movl FPU_accum_3,%edx |
131 | |
132 | /*----------------------------------------------------------------------*/ |
133 | /* Initialization done. |
134 | Do the first 32 bits. */ |
135 | |
136 | /* We will divide by a number which is too large */ |
137 | movl XsigH(%ebx),%ecx |
138 | addl $1,%ecx |
139 | jnc LFirst_div_not_1 |
140 | |
141 | /* here we need to divide by 100000000h, |
142 | i.e., no division at all.. */ |
143 | mov %edx,%eax |
144 | jmp LFirst_div_done |
145 | |
146 | LFirst_div_not_1: |
147 | divl %ecx /* Divide the numerator by the augmented |
148 | denom ms dw */ |
149 | |
150 | LFirst_div_done: |
151 | movl %eax,FPU_result_3 /* Put the result in the answer */ |
152 | |
153 | mull XsigH(%ebx) /* mul by the ms dw of the denom */ |
154 | |
155 | subl %eax,FPU_accum_2 /* Subtract from the num local reg */ |
156 | sbbl %edx,FPU_accum_3 |
157 | |
158 | movl FPU_result_3,%eax /* Get the result back */ |
159 | mull XsigL(%ebx) /* now mul the ls dw of the denom */ |
160 | |
161 | subl %eax,FPU_accum_1 /* Subtract from the num local reg */ |
162 | sbbl %edx,FPU_accum_2 |
163 | sbbl $0,FPU_accum_3 |
164 | je LDo_2nd_32_bits /* Must check for non-zero result here */ |
165 | |
166 | #ifdef PARANOID |
167 | jb L_bugged_1 |
168 | #endif /* PARANOID */ |
169 | |
170 | /* need to subtract another once of the denom */ |
171 | incl FPU_result_3 /* Correct the answer */ |
172 | |
173 | movl XsigL(%ebx),%eax |
174 | movl XsigH(%ebx),%edx |
175 | subl %eax,FPU_accum_1 /* Subtract from the num local reg */ |
176 | sbbl %edx,FPU_accum_2 |
177 | |
178 | #ifdef PARANOID |
179 | sbbl $0,FPU_accum_3 |
180 | jne L_bugged_1 /* Must check for non-zero result here */ |
181 | #endif /* PARANOID */ |
182 | |
183 | /*----------------------------------------------------------------------*/ |
184 | /* Half of the main problem is done, there is just a reduced numerator |
185 | to handle now. |
186 | Work with the second 32 bits, FPU_accum_0 not used from now on */ |
187 | LDo_2nd_32_bits: |
188 | movl FPU_accum_2,%edx /* get the reduced num */ |
189 | movl FPU_accum_1,%eax |
190 | |
191 | /* need to check for possible subsequent overflow */ |
192 | cmpl XsigH(%ebx),%edx |
193 | jb LDo_2nd_div |
194 | ja LPrevent_2nd_overflow |
195 | |
196 | cmpl XsigL(%ebx),%eax |
197 | jb LDo_2nd_div |
198 | |
199 | LPrevent_2nd_overflow: |
200 | /* The numerator is greater or equal, would cause overflow */ |
201 | /* prevent overflow */ |
202 | subl XsigL(%ebx),%eax |
203 | sbbl XsigH(%ebx),%edx |
204 | movl %edx,FPU_accum_2 |
205 | movl %eax,FPU_accum_1 |
206 | |
207 | incl FPU_result_3 /* Reflect the subtraction in the answer */ |
208 | |
209 | #ifdef PARANOID |
210 | je L_bugged_2 /* Can't bump the result to 1.0 */ |
211 | #endif /* PARANOID */ |
212 | |
213 | LDo_2nd_div: |
214 | cmpl $0,%ecx /* augmented denom msw */ |
215 | jnz LSecond_div_not_1 |
216 | |
217 | /* %ecx == 0, we are dividing by 1.0 */ |
218 | mov %edx,%eax |
219 | jmp LSecond_div_done |
220 | |
221 | LSecond_div_not_1: |
222 | divl %ecx /* Divide the numerator by the denom ms dw */ |
223 | |
224 | LSecond_div_done: |
225 | movl %eax,FPU_result_2 /* Put the result in the answer */ |
226 | |
227 | mull XsigH(%ebx) /* mul by the ms dw of the denom */ |
228 | |
229 | subl %eax,FPU_accum_1 /* Subtract from the num local reg */ |
230 | sbbl %edx,FPU_accum_2 |
231 | |
232 | #ifdef PARANOID |
233 | jc L_bugged_2 |
234 | #endif /* PARANOID */ |
235 | |
236 | movl FPU_result_2,%eax /* Get the result back */ |
237 | mull XsigL(%ebx) /* now mul the ls dw of the denom */ |
238 | |
239 | subl %eax,FPU_accum_0 /* Subtract from the num local reg */ |
240 | sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ |
241 | sbbl $0,FPU_accum_2 |
242 | |
243 | #ifdef PARANOID |
244 | jc L_bugged_2 |
245 | #endif /* PARANOID */ |
246 | |
247 | jz LDo_3rd_32_bits |
248 | |
249 | #ifdef PARANOID |
250 | cmpl $1,FPU_accum_2 |
251 | jne L_bugged_2 |
252 | #endif /* PARANOID */ |
253 | |
254 | /* need to subtract another once of the denom */ |
255 | movl XsigL(%ebx),%eax |
256 | movl XsigH(%ebx),%edx |
257 | subl %eax,FPU_accum_0 /* Subtract from the num local reg */ |
258 | sbbl %edx,FPU_accum_1 |
259 | sbbl $0,FPU_accum_2 |
260 | |
261 | #ifdef PARANOID |
262 | jc L_bugged_2 |
263 | jne L_bugged_2 |
264 | #endif /* PARANOID */ |
265 | |
266 | addl $1,FPU_result_2 /* Correct the answer */ |
267 | adcl $0,FPU_result_3 |
268 | |
269 | #ifdef PARANOID |
270 | jc L_bugged_2 /* Must check for non-zero result here */ |
271 | #endif /* PARANOID */ |
272 | |
273 | /*----------------------------------------------------------------------*/ |
274 | /* The division is essentially finished here, we just need to perform |
275 | tidying operations. |
276 | Deal with the 3rd 32 bits */ |
277 | LDo_3rd_32_bits: |
278 | /* We use an approximation for the third 32 bits. |
279 | To take account of the 3rd 32 bits of the divisor |
280 | (call them del), we subtract del * (a/b) */ |
281 | |
282 | movl FPU_result_3,%eax /* a/b */ |
283 | mull XsigLL(%ebx) /* del */ |
284 | |
285 | subl %edx,FPU_accum_1 |
286 | |
287 | /* A borrow indicates that the result is negative */ |
288 | jnb LTest_over |
289 | |
290 | movl XsigH(%ebx),%edx |
291 | addl %edx,FPU_accum_1 |
292 | |
293 | subl $1,FPU_result_2 /* Adjust the answer */ |
294 | sbbl $0,FPU_result_3 |
295 | |
296 | /* The above addition might not have been enough, check again. */ |
297 | movl FPU_accum_1,%edx /* get the reduced num */ |
298 | cmpl XsigH(%ebx),%edx /* denom */ |
299 | jb LDo_3rd_div |
300 | |
301 | movl XsigH(%ebx),%edx |
302 | addl %edx,FPU_accum_1 |
303 | |
304 | subl $1,FPU_result_2 /* Adjust the answer */ |
305 | sbbl $0,FPU_result_3 |
306 | jmp LDo_3rd_div |
307 | |
308 | LTest_over: |
309 | movl FPU_accum_1,%edx /* get the reduced num */ |
310 | |
311 | /* need to check for possible subsequent overflow */ |
312 | cmpl XsigH(%ebx),%edx /* denom */ |
313 | jb LDo_3rd_div |
314 | |
315 | /* prevent overflow */ |
316 | subl XsigH(%ebx),%edx |
317 | movl %edx,FPU_accum_1 |
318 | |
319 | addl $1,FPU_result_2 /* Reflect the subtraction in the answer */ |
320 | adcl $0,FPU_result_3 |
321 | |
322 | LDo_3rd_div: |
323 | movl FPU_accum_0,%eax |
324 | movl FPU_accum_1,%edx |
325 | divl XsigH(%ebx) |
326 | |
327 | movl %eax,FPU_result_1 /* Rough estimate of third word */ |
328 | |
329 | movl PARAM3,%esi /* pointer to answer */ |
330 | |
331 | movl FPU_result_1,%eax |
332 | movl %eax,XsigLL(%esi) |
333 | movl FPU_result_2,%eax |
334 | movl %eax,XsigL(%esi) |
335 | movl FPU_result_3,%eax |
336 | movl %eax,XsigH(%esi) |
337 | |
338 | L_exit: |
339 | popl %ebx |
340 | popl %edi |
341 | popl %esi |
342 | |
343 | leave |
344 | RET |
345 | |
346 | |
347 | #ifdef PARANOID |
348 | /* The logic is wrong if we got here */ |
349 | L_bugged: |
350 | pushl EX_INTERNAL|0x240 |
351 | call EXCEPTION |
352 | pop %ebx |
353 | jmp L_exit |
354 | |
355 | L_bugged_1: |
356 | pushl EX_INTERNAL|0x241 |
357 | call EXCEPTION |
358 | pop %ebx |
359 | jmp L_exit |
360 | |
361 | L_bugged_2: |
362 | pushl EX_INTERNAL|0x242 |
363 | call EXCEPTION |
364 | pop %ebx |
365 | jmp L_exit |
366 | #endif /* PARANOID */ |
367 | SYM_FUNC_END(div_Xsig) |
368 | |