div64.S source code [linux/arch/arm/lib/div64.S]

1	/ SPDX-License-Identifier: GPL-2.0-only /
2	/*
3	* linux/arch/arm/lib/div64.S
4	*
5	* Optimized computation of 64-bit dividend / 32-bit divisor
6	*
7	* Author: Nicolas Pitre
8	* Created: Oct 5, 2003
9	* Copyright: Monta Vista Software, Inc.
10	*/
11
12	#include <linux/linkage.h>
13	#include <asm/assembler.h>
14	#include <asm/unwind.h>
15
16	#ifdef __ARMEB__
17	#define xh r0
18	#define xl r1
19	#define yh r2
20	#define yl r3
21	#else
22	#define xl r0
23	#define xh r1
24	#define yl r2
25	#define yh r3
26	#endif
27
28	/*
29	* __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
30	*
31	* Note: Calling convention is totally non standard for optimal code.
32	* This is meant to be used by do_div() from include/asm/div64.h only.
33	*
34	* Input parameters:
35	* xh-xl = dividend (clobbered)
36	* r4 = divisor (preserved)
37	*
38	* Output values:
39	* yh-yl = result
40	* xh = remainder
41	*
42	* Clobbered regs: xl, ip
43	*/
44
45	ENTRY(__do_div64)
46	UNWIND(.fnstart)
47
48	@ Test for easy paths first.
49	subs ip, r4, #`1`
50	bls `9f` @ divisor is `0` or `1`
51	tst ip, r4
52	beq `8f` @ divisor is power of `2`
53
54	@ See if we need to handle upper `32`-bit result.
55	cmp xh, r4
56	mov yh, #`0`
57	blo `3f`
58
59	@ Align divisor with upper part of dividend.
60	@ The aligned divisor is stored in yl preserving the original.
61	@ The bit position is stored in ip.
62
63	#if __LINUX_ARM_ARCH__ >= 5
64
65	clz yl, r4
66	clz ip, xh
67	sub yl, yl, ip
68	mov ip, #`1`
69	mov ip, ip, lsl yl
70	mov yl, r4, lsl yl
71
72	#else
73
74	mov yl, r4
75	mov ip, #`1`
76	`1`: cmp yl, #`0x80000000`
77	cmpcc yl, xh
78	movcc yl, yl, lsl #`1`
79	movcc ip, ip, lsl #`1`
80	bcc `1b`
81
82	#endif
83
84	@ The division loop for needed upper bit positions.
85	@ Break out early if dividend reaches `0.`
86	`2`: cmp xh, yl
87	orrcs yh, yh, ip
88	subscs xh, xh, yl
89	movsne ip, ip, lsr #`1`
90	mov yl, yl, lsr #`1`
91	bne `2b`
92
93	@ See if we need to handle lower `32`-bit result.
94	`3`: cmp xh, #`0`
95	mov yl, #`0`
96	cmpeq xl, r4
97	movlo xh, xl
98	retlo lr
99
100	@ The division loop for lower bit positions.
101	@ Here we shift remainer bits leftwards rather than moving the
102	@ divisor for comparisons, considering the carry-out bit as well.
103	mov ip, #`0x80000000`
104	`4`: movs xl, xl, lsl #`1`
105	adcs xh, xh, xh
106	beq `6f`
107	cmpcc xh, r4
108	`5`: orrcs yl, yl, ip
109	subcs xh, xh, r4
110	movs ip, ip, lsr #`1`
111	bne `4b`
112	ret lr
113
114	@ The top part of remainder became zero. If carry is set
115	@ (the `33th` bit) this is a false positive so resume the loop.
116	@ Otherwise, if lower part is also null then we are done.
117	`6`: bcs `5b`
118	cmp xl, #`0`
119	reteq lr
120
121	@ We still have remainer bits in the low part. Bring them up.
122
123	#if __LINUX_ARM_ARCH__ >= 5
124
125	clz xh, xl @ we know xh is zero here so...
126	add xh, xh, #`1`
127	mov xl, xl, lsl xh
128	mov ip, ip, lsr xh
129
130	#else
131
132	`7`: movs xl, xl, lsl #`1`
133	mov ip, ip, lsr #`1`
134	bcc `7b`
135
136	#endif
137
138	@ Current remainder is now `1.` It is worthless to compare with
139	@ divisor at this point since divisor can not be smaller than `3` here.
140	@ If possible, branch for another shift in the division loop.
141	@ If no bit position left then we are done.
142	movs ip, ip, lsr #`1`
143	mov xh, #`1`
144	bne `4b`
145	ret lr
146
147	`8`: @ Division by a power of `2`: determine what that divisor order is
148	@ then simply shift values around
149
150	#if __LINUX_ARM_ARCH__ >= 5
151
152	clz ip, r4
153	rsb ip, ip, #`31`
154
155	#else
156
157	mov yl, r4
158	cmp r4, #(`1` << `16`)
159	mov ip, #`0`
160	movhs yl, yl, lsr #`16`
161	movhs ip, #`16`
162
163	cmp yl, #(`1` << `8`)
164	movhs yl, yl, lsr #`8`
165	addhs ip, ip, #`8`
166
167	cmp yl, #(`1` << `4`)
168	movhs yl, yl, lsr #`4`
169	addhs ip, ip, #`4`
170
171	cmp yl, #(`1` << `2`)
172	addhi ip, ip, #`3`
173	addls ip, ip, yl, lsr #`1`
174
175	#endif
176
177	mov yh, xh, lsr ip
178	mov yl, xl, lsr ip
179	rsb ip, ip, #`32`
180	ARM( orr yl, yl, xh, lsl ip )
181	THUMB( lsl xh, xh, ip )
182	THUMB( orr yl, yl, xh )
183	mov xh, xl, lsl ip
184	mov xh, xh, lsr ip
185	ret lr
186
187	@ eq -> division by `1`: obvious enough...
188	`9`: moveq yl, xl
189	moveq yh, xh
190	moveq xh, #`0`
191	reteq lr
192	UNWIND(.fnend)
193
194	UNWIND(.fnstart)
195	UNWIND(.pad #`4`)
196	UNWIND(.save {lr})
197	Ldiv0_64:
198	@ Division by `0`:
199	str lr, [sp, #-`8`]!
200	bl __div0
201
202	@ as wrong as it could be...
203	mov yl, #`0`
204	mov yh, #`0`
205	mov xh, #`0`
206	ldr pc, [sp], #`8`
207
208	UNWIND(.fnend)
209	ENDPROC(__do_div64)
210

source code of linux/arch/arm/lib/div64.S