1/* PowerPC64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
2 the result to a second limb vector.
3 Copyright (C) 1999-2024 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#include <sysdep.h>
21
22#ifdef USE_AS_SUBMUL
23# define FUNC __mpn_submul_1
24# define ADDSUBC subfe
25# define ADDSUB subfc
26#else
27# define FUNC __mpn_addmul_1
28# define ADDSUBC adde
29# define ADDSUB addc
30#endif
31
32#define RP r3
33#define UP r4
34#define N r5
35#define VL r6
36
37#define R27SAVE (-40)
38#define R28SAVE (-32)
39#define R29SAVE (-24)
40#define R30SAVE (-16)
41#define R31SAVE (-8)
42
43ENTRY_TOCLESS (FUNC, 5)
44 std r31, R31SAVE(r1)
45 rldicl. r0, N, 0, 62
46 std r30, R30SAVE(r1)
47 cmpdi VL, r0, 2
48 std r29, R29SAVE(r1)
49 addi N, N, 3
50 std r28, R28SAVE(r1)
51 srdi N, N, 2
52 std r27, R27SAVE(r1)
53 cfi_offset(r31, R31SAVE)
54 cfi_offset(r30, R30SAVE)
55 cfi_offset(r29, R29SAVE)
56 cfi_offset(r28, R28SAVE)
57 cfi_offset(r27, R27SAVE)
58 mtctr N
59 beq cr0, L(b00)
60 blt cr6, L(b01)
61 beq cr6, L(b10)
62
63L(b11): ld r9, 0(UP)
64 ld r28, 0(RP)
65 mulld r0, r9, VL
66 mulhdu r12, r9, VL
67 ADDSUB r0, r0, r28
68 std r0, 0(RP)
69 addi RP, RP, 8
70 ld r9, 8(UP)
71 ld r27, 16(UP)
72 addi UP, UP, 24
73#ifdef USE_AS_SUBMUL
74 subfe r11, r11, r11
75#endif
76 b L(bot)
77
78 .align 4
79L(b00): ld r9, 0(UP)
80 ld r27, 8(UP)
81 ld r28, 0(RP)
82 ld r29, 8(RP)
83 mulld r0, r9, VL
84 mulhdu N, r9, VL
85 mulld r7, r27, VL
86 mulhdu r8, r27, VL
87 addc r7, r7, N
88 addze r12, r8
89 ADDSUB r0, r0, r28
90 std r0, 0(RP)
91 ADDSUBC r7, r7, r29
92 std r7, 8(RP)
93 addi RP, RP, 16
94 ld r9, 16(UP)
95 ld r27, 24(UP)
96 addi UP, UP, 32
97#ifdef USE_AS_SUBMUL
98 subfe r11, r11, r11
99#endif
100 b L(bot)
101
102 .align 4
103L(b01): bdnz L(gt1)
104 ld r9, 0(UP)
105 ld r11, 0(RP)
106 mulld r0, r9, VL
107 mulhdu r8, r9, VL
108 ADDSUB r0, r0, r11
109 std r0, 0(RP)
110#ifdef USE_AS_SUBMUL
111 subfe r11, r11, r11
112 addic r11, r11, 1
113#endif
114 addze RP, r8
115 blr
116
117L(gt1): ld r9, 0(UP)
118 ld r27, 8(UP)
119 mulld r0, r9, VL
120 mulhdu N, r9, VL
121 mulld r7, r27, VL
122 mulhdu r8, r27, VL
123 ld r9, 16(UP)
124 ld r28, 0(RP)
125 ld r29, 8(RP)
126 ld r30, 16(RP)
127 mulld r11, r9, VL
128 mulhdu r10, r9, VL
129 addc r7, r7, N
130 adde r11, r11, r8
131 addze r12, r10
132 ADDSUB r0, r0, r28
133 std r0, 0(RP)
134 ADDSUBC r7, r7, r29
135 std r7, 8(RP)
136 ADDSUBC r11, r11, r30
137 std r11, 16(RP)
138 addi RP, RP, 24
139 ld r9, 24(UP)
140 ld r27, 32(UP)
141 addi UP, UP, 40
142#ifdef USE_AS_SUBMUL
143 subfe r11, r11, r11
144#endif
145 b L(bot)
146
147L(b10): addic r0, r0, r0
148 li r12, 0
149 ld r9, 0(UP)
150 ld r27, 8(UP)
151 bdz L(end)
152 addi UP, UP, 16
153
154 .align 4
155L(top): mulld r0, r9, VL
156 mulhdu N, r9, VL
157 mulld r7, r27, VL
158 mulhdu r8, r27, VL
159 ld r9, 0(UP)
160 ld r28, 0(RP)
161 ld r27, 8(UP)
162 ld r29, 8(RP)
163 adde r0, r0, r12
164 adde r7, r7, N
165 mulld N, r9, VL
166 mulhdu r10, r9, VL
167 mulld r11, r27, VL
168 mulhdu r12, r27, VL
169 ld r9, 16(UP)
170 ld r30, 16(RP)
171 ld r27, 24(UP)
172 ld r31, 24(RP)
173 adde N, N, r8
174 adde r11, r11, r10
175 addze r12, r12
176 ADDSUB r0, r0, r28
177 std r0, 0(RP)
178 ADDSUBC r7, r7, r29
179 std r7, 8(RP)
180 ADDSUBC N, N, r30
181 std N, 16(RP)
182 ADDSUBC r11, r11, r31
183 std r11, 24(RP)
184 addi UP, UP, 32
185#ifdef USE_AS_SUBMUL
186 subfe r11, r11, r11
187#endif
188 addi RP, RP, 32
189L(bot):
190#ifdef USE_AS_SUBMUL
191 addic r11, r11, 1
192#endif
193 bdnz L(top)
194
195L(end): mulld r0, r9, VL
196 mulhdu N, r9, VL
197 mulld r7, r27, VL
198 mulhdu r8, r27, VL
199 ld r28, 0(RP)
200 ld r29, 8(RP)
201 adde r0, r0, r12
202 adde r7, r7, N
203 addze r8, r8
204 ADDSUB r0, r0, r28
205 std r0, 0(RP)
206 ADDSUBC r7, r7, r29
207 std r7, 8(RP)
208#ifdef USE_AS_SUBMUL
209 subfe r11, r11, r11
210 addic r11, r11, 1
211#endif
212 addze RP, r8
213 ld r31, R31SAVE(r1)
214 ld r30, R30SAVE(r1)
215 ld r29, R29SAVE(r1)
216 ld r28, R28SAVE(r1)
217 ld r27, R27SAVE(r1)
218 blr
219END(FUNC)
220

source code of glibc/sysdeps/powerpc/powerpc64/addmul_1.S