1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* |
3 | * linux/arch/arm/lib/div64.S |
4 | * |
5 | * Optimized computation of 64-bit dividend / 32-bit divisor |
6 | * |
7 | * Author: Nicolas Pitre |
8 | * Created: Oct 5, 2003 |
9 | * Copyright: Monta Vista Software, Inc. |
10 | */ |
11 | |
12 | #include <linux/linkage.h> |
13 | #include <asm/assembler.h> |
14 | #include <asm/unwind.h> |
15 | |
16 | #ifdef __ARMEB__ |
17 | #define xh r0 |
18 | #define xl r1 |
19 | #define yh r2 |
20 | #define yl r3 |
21 | #else |
22 | #define xl r0 |
23 | #define xh r1 |
24 | #define yl r2 |
25 | #define yh r3 |
26 | #endif |
27 | |
28 | /* |
29 | * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. |
30 | * |
31 | * Note: Calling convention is totally non standard for optimal code. |
32 | * This is meant to be used by do_div() from include/asm/div64.h only. |
33 | * |
34 | * Input parameters: |
35 | * xh-xl = dividend (clobbered) |
36 | * r4 = divisor (preserved) |
37 | * |
38 | * Output values: |
39 | * yh-yl = result |
40 | * xh = remainder |
41 | * |
42 | * Clobbered regs: xl, ip |
43 | */ |
44 | |
45 | ENTRY(__do_div64) |
46 | UNWIND(.fnstart) |
47 | |
48 | @ Test for easy paths first. |
49 | subs ip, r4, #1 |
50 | bls 9f @ divisor is 0 or 1 |
51 | tst ip, r4 |
52 | beq 8f @ divisor is power of 2 |
53 | |
54 | @ See if we need to handle upper 32-bit result. |
55 | cmp xh, r4 |
56 | mov yh, #0 |
57 | blo 3f |
58 | |
59 | @ Align divisor with upper part of dividend. |
60 | @ The aligned divisor is stored in yl preserving the original. |
61 | @ The bit position is stored in ip. |
62 | |
63 | #if __LINUX_ARM_ARCH__ >= 5 |
64 | |
65 | clz yl, r4 |
66 | clz ip, xh |
67 | sub yl, yl, ip |
68 | mov ip, #1 |
69 | mov ip, ip, lsl yl |
70 | mov yl, r4, lsl yl |
71 | |
72 | #else |
73 | |
74 | mov yl, r4 |
75 | mov ip, #1 |
76 | 1: cmp yl, #0x80000000 |
77 | cmpcc yl, xh |
78 | movcc yl, yl, lsl #1 |
79 | movcc ip, ip, lsl #1 |
80 | bcc 1b |
81 | |
82 | #endif |
83 | |
84 | @ The division loop for needed upper bit positions. |
85 | @ Break out early if dividend reaches 0. |
86 | 2: cmp xh, yl |
87 | orrcs yh, yh, ip |
88 | subscs xh, xh, yl |
89 | movsne ip, ip, lsr #1 |
90 | mov yl, yl, lsr #1 |
91 | bne 2b |
92 | |
93 | @ See if we need to handle lower 32-bit result. |
94 | 3: cmp xh, #0 |
95 | mov yl, #0 |
96 | cmpeq xl, r4 |
97 | movlo xh, xl |
98 | retlo lr |
99 | |
100 | @ The division loop for lower bit positions. |
101 | @ Here we shift remainer bits leftwards rather than moving the |
102 | @ divisor for comparisons, considering the carry-out bit as well. |
103 | mov ip, #0x80000000 |
104 | 4: movs xl, xl, lsl #1 |
105 | adcs xh, xh, xh |
106 | beq 6f |
107 | cmpcc xh, r4 |
108 | 5: orrcs yl, yl, ip |
109 | subcs xh, xh, r4 |
110 | movs ip, ip, lsr #1 |
111 | bne 4b |
112 | ret lr |
113 | |
114 | @ The top part of remainder became zero. If carry is set |
115 | @ (the 33th bit) this is a false positive so resume the loop. |
116 | @ Otherwise, if lower part is also null then we are done. |
117 | 6: bcs 5b |
118 | cmp xl, #0 |
119 | reteq lr |
120 | |
121 | @ We still have remainer bits in the low part. Bring them up. |
122 | |
123 | #if __LINUX_ARM_ARCH__ >= 5 |
124 | |
125 | clz xh, xl @ we know xh is zero here so... |
126 | add xh, xh, #1 |
127 | mov xl, xl, lsl xh |
128 | mov ip, ip, lsr xh |
129 | |
130 | #else |
131 | |
132 | 7: movs xl, xl, lsl #1 |
133 | mov ip, ip, lsr #1 |
134 | bcc 7b |
135 | |
136 | #endif |
137 | |
138 | @ Current remainder is now 1. It is worthless to compare with |
139 | @ divisor at this point since divisor can not be smaller than 3 here. |
140 | @ If possible, branch for another shift in the division loop. |
141 | @ If no bit position left then we are done. |
142 | movs ip, ip, lsr #1 |
143 | mov xh, #1 |
144 | bne 4b |
145 | ret lr |
146 | |
147 | 8: @ Division by a power of 2: determine what that divisor order is |
148 | @ then simply shift values around |
149 | |
150 | #if __LINUX_ARM_ARCH__ >= 5 |
151 | |
152 | clz ip, r4 |
153 | rsb ip, ip, #31 |
154 | |
155 | #else |
156 | |
157 | mov yl, r4 |
158 | cmp r4, #(1 << 16) |
159 | mov ip, #0 |
160 | movhs yl, yl, lsr #16 |
161 | movhs ip, #16 |
162 | |
163 | cmp yl, #(1 << 8) |
164 | movhs yl, yl, lsr #8 |
165 | addhs ip, ip, #8 |
166 | |
167 | cmp yl, #(1 << 4) |
168 | movhs yl, yl, lsr #4 |
169 | addhs ip, ip, #4 |
170 | |
171 | cmp yl, #(1 << 2) |
172 | addhi ip, ip, #3 |
173 | addls ip, ip, yl, lsr #1 |
174 | |
175 | #endif |
176 | |
177 | mov yh, xh, lsr ip |
178 | mov yl, xl, lsr ip |
179 | rsb ip, ip, #32 |
180 | ARM( orr yl, yl, xh, lsl ip ) |
181 | THUMB( lsl xh, xh, ip ) |
182 | THUMB( orr yl, yl, xh ) |
183 | mov xh, xl, lsl ip |
184 | mov xh, xh, lsr ip |
185 | ret lr |
186 | |
187 | @ eq -> division by 1: obvious enough... |
188 | 9: moveq yl, xl |
189 | moveq yh, xh |
190 | moveq xh, #0 |
191 | reteq lr |
192 | UNWIND(.fnend) |
193 | |
194 | UNWIND(.fnstart) |
195 | UNWIND(.pad #4) |
196 | UNWIND(.save {lr}) |
197 | Ldiv0_64: |
198 | @ Division by 0: |
199 | str lr, [sp, #-8]! |
200 | bl __div0 |
201 | |
202 | @ as wrong as it could be... |
203 | mov yl, #0 |
204 | mov yh, #0 |
205 | mov xh, #0 |
206 | ldr pc, [sp], #8 |
207 | |
208 | UNWIND(.fnend) |
209 | ENDPROC(__do_div64) |
210 | |