1! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
2! store difference in a third limb vector.
3!
4! Copyright (C) 1995-2024 Free Software Foundation, Inc.
5!
6! This file is part of the GNU MP Library.
7!
8! The GNU MP Library is free software; you can redistribute it and/or modify
9! it under the terms of the GNU Lesser General Public License as published by
10! the Free Software Foundation; either version 2.1 of the License, or (at your
11! option) any later version.
12!
13! The GNU MP Library is distributed in the hope that it will be useful, but
14! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16! License for more details.
17!
18! You should have received a copy of the GNU Lesser General Public License
19! along with the GNU MP Library; see the file COPYING.LIB. If not,
20! see <https://www.gnu.org/licenses/>.
21
22
23! INPUT PARAMETERS
24#define RES_PTR %o0
25#define S1_PTR %o1
26#define S2_PTR %o2
27#define SIZE %o3
28
29#include <sysdep.h>
30
31ENTRY(__mpn_sub_n)
32 xor S2_PTR,RES_PTR,%g1
33 andcc %g1,4,%g0
34 bne LOC(1) ! branch if alignment differs
35 nop
36! ** V1a **
37 andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
38 be LOC(v1) ! if no, branch
39 nop
40/* Add least significant limb separately to align RES_PTR and S2_PTR */
41 ld [S1_PTR],%g4
42 add S1_PTR,4,S1_PTR
43 ld [S2_PTR],%g2
44 add S2_PTR,4,S2_PTR
45 add SIZE,-1,SIZE
46 subcc %g4,%g2,%o4
47 st %o4,[RES_PTR]
48 add RES_PTR,4,RES_PTR
49LOC(v1):
50 addx %g0,%g0,%o4 ! save cy in register
51 cmp SIZE,2 ! if SIZE < 2 ...
52 bl LOC(end2) ! ... branch to tail code
53 subcc %g0,%o4,%g0 ! restore cy
54
55 ld [S1_PTR+0],%g4
56 addcc SIZE,-10,SIZE
57 ld [S1_PTR+4],%g1
58 ldd [S2_PTR+0],%g2
59 blt LOC(fin1)
60 subcc %g0,%o4,%g0 ! restore cy
61/* Add blocks of 8 limbs until less than 8 limbs remain */
62LOC(loop1):
63 subxcc %g4,%g2,%o4
64 ld [S1_PTR+8],%g4
65 subxcc %g1,%g3,%o5
66 ld [S1_PTR+12],%g1
67 ldd [S2_PTR+8],%g2
68 std %o4,[RES_PTR+0]
69 subxcc %g4,%g2,%o4
70 ld [S1_PTR+16],%g4
71 subxcc %g1,%g3,%o5
72 ld [S1_PTR+20],%g1
73 ldd [S2_PTR+16],%g2
74 std %o4,[RES_PTR+8]
75 subxcc %g4,%g2,%o4
76 ld [S1_PTR+24],%g4
77 subxcc %g1,%g3,%o5
78 ld [S1_PTR+28],%g1
79 ldd [S2_PTR+24],%g2
80 std %o4,[RES_PTR+16]
81 subxcc %g4,%g2,%o4
82 ld [S1_PTR+32],%g4
83 subxcc %g1,%g3,%o5
84 ld [S1_PTR+36],%g1
85 ldd [S2_PTR+32],%g2
86 std %o4,[RES_PTR+24]
87 addx %g0,%g0,%o4 ! save cy in register
88 addcc SIZE,-8,SIZE
89 add S1_PTR,32,S1_PTR
90 add S2_PTR,32,S2_PTR
91 add RES_PTR,32,RES_PTR
92 bge LOC(loop1)
93 subcc %g0,%o4,%g0 ! restore cy
94
95LOC(fin1):
96 addcc SIZE,8-2,SIZE
97 blt LOC(end1)
98 subcc %g0,%o4,%g0 ! restore cy
99/* Add blocks of 2 limbs until less than 2 limbs remain */
100LOC(loope1):
101 subxcc %g4,%g2,%o4
102 ld [S1_PTR+8],%g4
103 subxcc %g1,%g3,%o5
104 ld [S1_PTR+12],%g1
105 ldd [S2_PTR+8],%g2
106 std %o4,[RES_PTR+0]
107 addx %g0,%g0,%o4 ! save cy in register
108 addcc SIZE,-2,SIZE
109 add S1_PTR,8,S1_PTR
110 add S2_PTR,8,S2_PTR
111 add RES_PTR,8,RES_PTR
112 bge LOC(loope1)
113 subcc %g0,%o4,%g0 ! restore cy
114LOC(end1):
115 subxcc %g4,%g2,%o4
116 subxcc %g1,%g3,%o5
117 std %o4,[RES_PTR+0]
118 addx %g0,%g0,%o4 ! save cy in register
119
120 andcc SIZE,1,%g0
121 be LOC(ret1)
122 subcc %g0,%o4,%g0 ! restore cy
123/* Add last limb */
124 ld [S1_PTR+8],%g4
125 ld [S2_PTR+8],%g2
126 subxcc %g4,%g2,%o4
127 st %o4,[RES_PTR+8]
128
129LOC(ret1):
130 retl
131 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
132
133LOC(1): xor S1_PTR,RES_PTR,%g1
134 andcc %g1,4,%g0
135 bne LOC(2)
136 nop
137! ** V1b **
138 andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
139 be LOC(v1b) ! if no, branch
140 nop
141/* Add least significant limb separately to align RES_PTR and S1_PTR */
142 ld [S2_PTR],%g4
143 add S2_PTR,4,S2_PTR
144 ld [S1_PTR],%g2
145 add S1_PTR,4,S1_PTR
146 add SIZE,-1,SIZE
147 subcc %g2,%g4,%o4
148 st %o4,[RES_PTR]
149 add RES_PTR,4,RES_PTR
150LOC(v1b):
151 addx %g0,%g0,%o4 ! save cy in register
152 cmp SIZE,2 ! if SIZE < 2 ...
153 bl LOC(end2) ! ... branch to tail code
154 subcc %g0,%o4,%g0 ! restore cy
155
156 ld [S2_PTR+0],%g4
157 addcc SIZE,-10,SIZE
158 ld [S2_PTR+4],%g1
159 ldd [S1_PTR+0],%g2
160 blt LOC(fin1b)
161 subcc %g0,%o4,%g0 ! restore cy
162/* Add blocks of 8 limbs until less than 8 limbs remain */
163LOC(loop1b):
164 subxcc %g2,%g4,%o4
165 ld [S2_PTR+8],%g4
166 subxcc %g3,%g1,%o5
167 ld [S2_PTR+12],%g1
168 ldd [S1_PTR+8],%g2
169 std %o4,[RES_PTR+0]
170 subxcc %g2,%g4,%o4
171 ld [S2_PTR+16],%g4
172 subxcc %g3,%g1,%o5
173 ld [S2_PTR+20],%g1
174 ldd [S1_PTR+16],%g2
175 std %o4,[RES_PTR+8]
176 subxcc %g2,%g4,%o4
177 ld [S2_PTR+24],%g4
178 subxcc %g3,%g1,%o5
179 ld [S2_PTR+28],%g1
180 ldd [S1_PTR+24],%g2
181 std %o4,[RES_PTR+16]
182 subxcc %g2,%g4,%o4
183 ld [S2_PTR+32],%g4
184 subxcc %g3,%g1,%o5
185 ld [S2_PTR+36],%g1
186 ldd [S1_PTR+32],%g2
187 std %o4,[RES_PTR+24]
188 addx %g0,%g0,%o4 ! save cy in register
189 addcc SIZE,-8,SIZE
190 add S1_PTR,32,S1_PTR
191 add S2_PTR,32,S2_PTR
192 add RES_PTR,32,RES_PTR
193 bge LOC(loop1b)
194 subcc %g0,%o4,%g0 ! restore cy
195
196LOC(fin1b):
197 addcc SIZE,8-2,SIZE
198 blt LOC(end1b)
199 subcc %g0,%o4,%g0 ! restore cy
200/* Add blocks of 2 limbs until less than 2 limbs remain */
201LOC(loope1b):
202 subxcc %g2,%g4,%o4
203 ld [S2_PTR+8],%g4
204 subxcc %g3,%g1,%o5
205 ld [S2_PTR+12],%g1
206 ldd [S1_PTR+8],%g2
207 std %o4,[RES_PTR+0]
208 addx %g0,%g0,%o4 ! save cy in register
209 addcc SIZE,-2,SIZE
210 add S1_PTR,8,S1_PTR
211 add S2_PTR,8,S2_PTR
212 add RES_PTR,8,RES_PTR
213 bge LOC(loope1b)
214 subcc %g0,%o4,%g0 ! restore cy
215LOC(end1b):
216 subxcc %g2,%g4,%o4
217 subxcc %g3,%g1,%o5
218 std %o4,[RES_PTR+0]
219 addx %g0,%g0,%o4 ! save cy in register
220
221 andcc SIZE,1,%g0
222 be LOC(ret1b)
223 subcc %g0,%o4,%g0 ! restore cy
224/* Add last limb */
225 ld [S2_PTR+8],%g4
226 ld [S1_PTR+8],%g2
227 subxcc %g2,%g4,%o4
228 st %o4,[RES_PTR+8]
229
230LOC(ret1b):
231 retl
232 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
233
234! ** V2 **
235/* If we come here, the alignment of S1_PTR and RES_PTR as well as the
236 alignment of S2_PTR and RES_PTR differ. Since there are only two ways
237 things can be aligned (that we care about) we now know that the alignment
238 of S1_PTR and S2_PTR are the same. */
239
240LOC(2): cmp SIZE,1
241 be LOC(jone)
242 nop
243 andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0
244 be LOC(v2) ! if no, branch
245 nop
246/* Add least significant limb separately to align S1_PTR and S2_PTR */
247 ld [S1_PTR],%g4
248 add S1_PTR,4,S1_PTR
249 ld [S2_PTR],%g2
250 add S2_PTR,4,S2_PTR
251 add SIZE,-1,SIZE
252 subcc %g4,%g2,%o4
253 st %o4,[RES_PTR]
254 add RES_PTR,4,RES_PTR
255
256LOC(v2):
257 addx %g0,%g0,%o4 ! save cy in register
258 addcc SIZE,-8,SIZE
259 blt LOC(fin2)
260 subcc %g0,%o4,%g0 ! restore cy
261/* Add blocks of 8 limbs until less than 8 limbs remain */
262LOC(loop2):
263 ldd [S1_PTR+0],%g2
264 ldd [S2_PTR+0],%o4
265 subxcc %g2,%o4,%g2
266 st %g2,[RES_PTR+0]
267 subxcc %g3,%o5,%g3
268 st %g3,[RES_PTR+4]
269 ldd [S1_PTR+8],%g2
270 ldd [S2_PTR+8],%o4
271 subxcc %g2,%o4,%g2
272 st %g2,[RES_PTR+8]
273 subxcc %g3,%o5,%g3
274 st %g3,[RES_PTR+12]
275 ldd [S1_PTR+16],%g2
276 ldd [S2_PTR+16],%o4
277 subxcc %g2,%o4,%g2
278 st %g2,[RES_PTR+16]
279 subxcc %g3,%o5,%g3
280 st %g3,[RES_PTR+20]
281 ldd [S1_PTR+24],%g2
282 ldd [S2_PTR+24],%o4
283 subxcc %g2,%o4,%g2
284 st %g2,[RES_PTR+24]
285 subxcc %g3,%o5,%g3
286 st %g3,[RES_PTR+28]
287 addx %g0,%g0,%o4 ! save cy in register
288 addcc SIZE,-8,SIZE
289 add S1_PTR,32,S1_PTR
290 add S2_PTR,32,S2_PTR
291 add RES_PTR,32,RES_PTR
292 bge LOC(loop2)
293 subcc %g0,%o4,%g0 ! restore cy
294
295LOC(fin2):
296 addcc SIZE,8-2,SIZE
297 blt LOC(end2)
298 subcc %g0,%o4,%g0 ! restore cy
299LOC(loope2):
300 ldd [S1_PTR+0],%g2
301 ldd [S2_PTR+0],%o4
302 subxcc %g2,%o4,%g2
303 st %g2,[RES_PTR+0]
304 subxcc %g3,%o5,%g3
305 st %g3,[RES_PTR+4]
306 addx %g0,%g0,%o4 ! save cy in register
307 addcc SIZE,-2,SIZE
308 add S1_PTR,8,S1_PTR
309 add S2_PTR,8,S2_PTR
310 add RES_PTR,8,RES_PTR
311 bge LOC(loope2)
312 subcc %g0,%o4,%g0 ! restore cy
313LOC(end2):
314 andcc SIZE,1,%g0
315 be LOC(ret2)
316 subcc %g0,%o4,%g0 ! restore cy
317/* Add last limb */
318LOC(jone):
319 ld [S1_PTR],%g4
320 ld [S2_PTR],%g2
321 subxcc %g4,%g2,%o4
322 st %o4,[RES_PTR]
323
324LOC(ret2):
325 retl
326 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
327
328END(__mpn_sub_n)
329

source code of glibc/sysdeps/sparc/sparc32/sub_n.S