1! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
2! sum in a third limb vector.
3!
4! Copyright (C) 1995-2024 Free Software Foundation, Inc.
5!
6! This file is part of the GNU MP Library.
7!
8! The GNU MP Library is free software; you can redistribute it and/or modify
9! it under the terms of the GNU Lesser General Public License as published by
10! the Free Software Foundation; either version 2.1 of the License, or (at your
11! option) any later version.
12!
13! The GNU MP Library is distributed in the hope that it will be useful, but
14! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16! License for more details.
17!
18! You should have received a copy of the GNU Lesser General Public License
19! along with the GNU MP Library; see the file COPYING.LIB. If not,
20! see <https://www.gnu.org/licenses/>.
21
22
23! INPUT PARAMETERS
24#define RES_PTR %o0
25#define S1_PTR %o1
26#define S2_PTR %o2
27#define SIZE %o3
28
29#include <sysdep.h>
30
31ENTRY(__mpn_add_n)
32 xor S2_PTR,RES_PTR,%g1
33 andcc %g1,4,%g0
34 bne LOC(1) ! branch if alignment differs
35 nop
36! ** V1a **
37LOC(0): andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
38 be LOC(v1) ! if no, branch
39 nop
40/* Add least significant limb separately to align RES_PTR and S2_PTR */
41 ld [S1_PTR],%g4
42 add S1_PTR,4,S1_PTR
43 ld [S2_PTR],%g2
44 add S2_PTR,4,S2_PTR
45 add SIZE,-1,SIZE
46 addcc %g4,%g2,%o4
47 st %o4,[RES_PTR]
48 add RES_PTR,4,RES_PTR
49LOC(v1):
50 addx %g0,%g0,%o4 ! save cy in register
51 cmp SIZE,2 ! if SIZE < 2 ...
52 bl LOC(end2) ! ... branch to tail code
53 subcc %g0,%o4,%g0 ! restore cy
54
55 ld [S1_PTR+0],%g4
56 addcc SIZE,-10,SIZE
57 ld [S1_PTR+4],%g1
58 ldd [S2_PTR+0],%g2
59 blt LOC(fin1)
60 subcc %g0,%o4,%g0 ! restore cy
61/* Add blocks of 8 limbs until less than 8 limbs remain */
62LOC(loop1):
63 addxcc %g4,%g2,%o4
64 ld [S1_PTR+8],%g4
65 addxcc %g1,%g3,%o5
66 ld [S1_PTR+12],%g1
67 ldd [S2_PTR+8],%g2
68 std %o4,[RES_PTR+0]
69 addxcc %g4,%g2,%o4
70 ld [S1_PTR+16],%g4
71 addxcc %g1,%g3,%o5
72 ld [S1_PTR+20],%g1
73 ldd [S2_PTR+16],%g2
74 std %o4,[RES_PTR+8]
75 addxcc %g4,%g2,%o4
76 ld [S1_PTR+24],%g4
77 addxcc %g1,%g3,%o5
78 ld [S1_PTR+28],%g1
79 ldd [S2_PTR+24],%g2
80 std %o4,[RES_PTR+16]
81 addxcc %g4,%g2,%o4
82 ld [S1_PTR+32],%g4
83 addxcc %g1,%g3,%o5
84 ld [S1_PTR+36],%g1
85 ldd [S2_PTR+32],%g2
86 std %o4,[RES_PTR+24]
87 addx %g0,%g0,%o4 ! save cy in register
88 addcc SIZE,-8,SIZE
89 add S1_PTR,32,S1_PTR
90 add S2_PTR,32,S2_PTR
91 add RES_PTR,32,RES_PTR
92 bge LOC(loop1)
93 subcc %g0,%o4,%g0 ! restore cy
94
95LOC(fin1):
96 addcc SIZE,8-2,SIZE
97 blt LOC(end1)
98 subcc %g0,%o4,%g0 ! restore cy
99/* Add blocks of 2 limbs until less than 2 limbs remain */
100LOC(loope1):
101 addxcc %g4,%g2,%o4
102 ld [S1_PTR+8],%g4
103 addxcc %g1,%g3,%o5
104 ld [S1_PTR+12],%g1
105 ldd [S2_PTR+8],%g2
106 std %o4,[RES_PTR+0]
107 addx %g0,%g0,%o4 ! save cy in register
108 addcc SIZE,-2,SIZE
109 add S1_PTR,8,S1_PTR
110 add S2_PTR,8,S2_PTR
111 add RES_PTR,8,RES_PTR
112 bge LOC(loope1)
113 subcc %g0,%o4,%g0 ! restore cy
114LOC(end1):
115 addxcc %g4,%g2,%o4
116 addxcc %g1,%g3,%o5
117 std %o4,[RES_PTR+0]
118 addx %g0,%g0,%o4 ! save cy in register
119
120 andcc SIZE,1,%g0
121 be LOC(ret1)
122 subcc %g0,%o4,%g0 ! restore cy
123/* Add last limb */
124 ld [S1_PTR+8],%g4
125 ld [S2_PTR+8],%g2
126 addxcc %g4,%g2,%o4
127 st %o4,[RES_PTR+8]
128
129LOC(ret1):
130 retl
131 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
132
133LOC(1): xor S1_PTR,RES_PTR,%g1
134 andcc %g1,4,%g0
135 bne LOC(2)
136 nop
137! ** V1b **
138 mov S2_PTR,%g1
139 mov S1_PTR,S2_PTR
140 b LOC(0)
141 mov %g1,S1_PTR
142
143! ** V2 **
144/* If we come here, the alignment of S1_PTR and RES_PTR as well as the
145 alignment of S2_PTR and RES_PTR differ. Since there are only two ways
146 things can be aligned (that we care about) we now know that the alignment
147 of S1_PTR and S2_PTR are the same. */
148
149LOC(2): cmp SIZE,1
150 be LOC(jone)
151 nop
152 andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0
153 be LOC(v2) ! if no, branch
154 nop
155/* Add least significant limb separately to align S1_PTR and S2_PTR */
156 ld [S1_PTR],%g4
157 add S1_PTR,4,S1_PTR
158 ld [S2_PTR],%g2
159 add S2_PTR,4,S2_PTR
160 add SIZE,-1,SIZE
161 addcc %g4,%g2,%o4
162 st %o4,[RES_PTR]
163 add RES_PTR,4,RES_PTR
164
165LOC(v2):
166 addx %g0,%g0,%o4 ! save cy in register
167 addcc SIZE,-8,SIZE
168 blt LOC(fin2)
169 subcc %g0,%o4,%g0 ! restore cy
170/* Add blocks of 8 limbs until less than 8 limbs remain */
171LOC(loop2):
172 ldd [S1_PTR+0],%g2
173 ldd [S2_PTR+0],%o4
174 addxcc %g2,%o4,%g2
175 st %g2,[RES_PTR+0]
176 addxcc %g3,%o5,%g3
177 st %g3,[RES_PTR+4]
178 ldd [S1_PTR+8],%g2
179 ldd [S2_PTR+8],%o4
180 addxcc %g2,%o4,%g2
181 st %g2,[RES_PTR+8]
182 addxcc %g3,%o5,%g3
183 st %g3,[RES_PTR+12]
184 ldd [S1_PTR+16],%g2
185 ldd [S2_PTR+16],%o4
186 addxcc %g2,%o4,%g2
187 st %g2,[RES_PTR+16]
188 addxcc %g3,%o5,%g3
189 st %g3,[RES_PTR+20]
190 ldd [S1_PTR+24],%g2
191 ldd [S2_PTR+24],%o4
192 addxcc %g2,%o4,%g2
193 st %g2,[RES_PTR+24]
194 addxcc %g3,%o5,%g3
195 st %g3,[RES_PTR+28]
196 addx %g0,%g0,%o4 ! save cy in register
197 addcc SIZE,-8,SIZE
198 add S1_PTR,32,S1_PTR
199 add S2_PTR,32,S2_PTR
200 add RES_PTR,32,RES_PTR
201 bge LOC(loop2)
202 subcc %g0,%o4,%g0 ! restore cy
203
204LOC(fin2):
205 addcc SIZE,8-2,SIZE
206 blt LOC(end2)
207 subcc %g0,%o4,%g0 ! restore cy
208LOC(loope2):
209 ldd [S1_PTR+0],%g2
210 ldd [S2_PTR+0],%o4
211 addxcc %g2,%o4,%g2
212 st %g2,[RES_PTR+0]
213 addxcc %g3,%o5,%g3
214 st %g3,[RES_PTR+4]
215 addx %g0,%g0,%o4 ! save cy in register
216 addcc SIZE,-2,SIZE
217 add S1_PTR,8,S1_PTR
218 add S2_PTR,8,S2_PTR
219 add RES_PTR,8,RES_PTR
220 bge LOC(loope2)
221 subcc %g0,%o4,%g0 ! restore cy
222LOC(end2):
223 andcc SIZE,1,%g0
224 be LOC(ret2)
225 subcc %g0,%o4,%g0 ! restore cy
226/* Add last limb */
227LOC(jone):
228 ld [S1_PTR],%g4
229 ld [S2_PTR],%g2
230 addxcc %g4,%g2,%o4
231 st %o4,[RES_PTR]
232
233LOC(ret2):
234 retl
235 addx %g0,%g0,%o0 ! return carry-out from most sign. limb
236
237END(__mpn_add_n)
238

source code of glibc/sysdeps/sparc/sparc32/add_n.S