1 | /* PowerPC64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add |
2 | the result to a second limb vector. |
3 | Copyright (C) 1999-2024 Free Software Foundation, Inc. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include <sysdep.h> |
21 | |
22 | #ifdef USE_AS_SUBMUL |
23 | # define FUNC __mpn_submul_1 |
24 | # define ADDSUBC subfe |
25 | # define ADDSUB subfc |
26 | #else |
27 | # define FUNC __mpn_addmul_1 |
28 | # define ADDSUBC adde |
29 | # define ADDSUB addc |
30 | #endif |
31 | |
32 | #define RP r3 |
33 | #define UP r4 |
34 | #define N r5 |
35 | #define VL r6 |
36 | |
37 | #define R27SAVE (-40) |
38 | #define R28SAVE (-32) |
39 | #define R29SAVE (-24) |
40 | #define R30SAVE (-16) |
41 | #define R31SAVE (-8) |
42 | |
43 | ENTRY_TOCLESS (FUNC, 5) |
44 | std r31, R31SAVE(r1) |
45 | rldicl. r0, N, 0, 62 |
46 | std r30, R30SAVE(r1) |
47 | cmpdi VL, r0, 2 |
48 | std r29, R29SAVE(r1) |
49 | addi N, N, 3 |
50 | std r28, R28SAVE(r1) |
51 | srdi N, N, 2 |
52 | std r27, R27SAVE(r1) |
53 | cfi_offset(r31, R31SAVE) |
54 | cfi_offset(r30, R30SAVE) |
55 | cfi_offset(r29, R29SAVE) |
56 | cfi_offset(r28, R28SAVE) |
57 | cfi_offset(r27, R27SAVE) |
58 | mtctr N |
59 | beq cr0, L(b00) |
60 | blt cr6, L(b01) |
61 | beq cr6, L(b10) |
62 | |
63 | L(b11): ld r9, 0(UP) |
64 | ld r28, 0(RP) |
65 | mulld r0, r9, VL |
66 | mulhdu r12, r9, VL |
67 | ADDSUB r0, r0, r28 |
68 | std r0, 0(RP) |
69 | addi RP, RP, 8 |
70 | ld r9, 8(UP) |
71 | ld r27, 16(UP) |
72 | addi UP, UP, 24 |
73 | #ifdef USE_AS_SUBMUL |
74 | subfe r11, r11, r11 |
75 | #endif |
76 | b L(bot) |
77 | |
78 | .align 4 |
79 | L(b00): ld r9, 0(UP) |
80 | ld r27, 8(UP) |
81 | ld r28, 0(RP) |
82 | ld r29, 8(RP) |
83 | mulld r0, r9, VL |
84 | mulhdu N, r9, VL |
85 | mulld r7, r27, VL |
86 | mulhdu r8, r27, VL |
87 | addc r7, r7, N |
88 | addze r12, r8 |
89 | ADDSUB r0, r0, r28 |
90 | std r0, 0(RP) |
91 | ADDSUBC r7, r7, r29 |
92 | std r7, 8(RP) |
93 | addi RP, RP, 16 |
94 | ld r9, 16(UP) |
95 | ld r27, 24(UP) |
96 | addi UP, UP, 32 |
97 | #ifdef USE_AS_SUBMUL |
98 | subfe r11, r11, r11 |
99 | #endif |
100 | b L(bot) |
101 | |
102 | .align 4 |
103 | L(b01): bdnz L(gt1) |
104 | ld r9, 0(UP) |
105 | ld r11, 0(RP) |
106 | mulld r0, r9, VL |
107 | mulhdu r8, r9, VL |
108 | ADDSUB r0, r0, r11 |
109 | std r0, 0(RP) |
110 | #ifdef USE_AS_SUBMUL |
111 | subfe r11, r11, r11 |
112 | addic r11, r11, 1 |
113 | #endif |
114 | addze RP, r8 |
115 | blr |
116 | |
117 | L(gt1): ld r9, 0(UP) |
118 | ld r27, 8(UP) |
119 | mulld r0, r9, VL |
120 | mulhdu N, r9, VL |
121 | mulld r7, r27, VL |
122 | mulhdu r8, r27, VL |
123 | ld r9, 16(UP) |
124 | ld r28, 0(RP) |
125 | ld r29, 8(RP) |
126 | ld r30, 16(RP) |
127 | mulld r11, r9, VL |
128 | mulhdu r10, r9, VL |
129 | addc r7, r7, N |
130 | adde r11, r11, r8 |
131 | addze r12, r10 |
132 | ADDSUB r0, r0, r28 |
133 | std r0, 0(RP) |
134 | ADDSUBC r7, r7, r29 |
135 | std r7, 8(RP) |
136 | ADDSUBC r11, r11, r30 |
137 | std r11, 16(RP) |
138 | addi RP, RP, 24 |
139 | ld r9, 24(UP) |
140 | ld r27, 32(UP) |
141 | addi UP, UP, 40 |
142 | #ifdef USE_AS_SUBMUL |
143 | subfe r11, r11, r11 |
144 | #endif |
145 | b L(bot) |
146 | |
147 | L(b10): addic r0, r0, r0 |
148 | li r12, 0 |
149 | ld r9, 0(UP) |
150 | ld r27, 8(UP) |
151 | bdz L(end) |
152 | addi UP, UP, 16 |
153 | |
154 | .align 4 |
155 | L(top): mulld r0, r9, VL |
156 | mulhdu N, r9, VL |
157 | mulld r7, r27, VL |
158 | mulhdu r8, r27, VL |
159 | ld r9, 0(UP) |
160 | ld r28, 0(RP) |
161 | ld r27, 8(UP) |
162 | ld r29, 8(RP) |
163 | adde r0, r0, r12 |
164 | adde r7, r7, N |
165 | mulld N, r9, VL |
166 | mulhdu r10, r9, VL |
167 | mulld r11, r27, VL |
168 | mulhdu r12, r27, VL |
169 | ld r9, 16(UP) |
170 | ld r30, 16(RP) |
171 | ld r27, 24(UP) |
172 | ld r31, 24(RP) |
173 | adde N, N, r8 |
174 | adde r11, r11, r10 |
175 | addze r12, r12 |
176 | ADDSUB r0, r0, r28 |
177 | std r0, 0(RP) |
178 | ADDSUBC r7, r7, r29 |
179 | std r7, 8(RP) |
180 | ADDSUBC N, N, r30 |
181 | std N, 16(RP) |
182 | ADDSUBC r11, r11, r31 |
183 | std r11, 24(RP) |
184 | addi UP, UP, 32 |
185 | #ifdef USE_AS_SUBMUL |
186 | subfe r11, r11, r11 |
187 | #endif |
188 | addi RP, RP, 32 |
189 | L(bot): |
190 | #ifdef USE_AS_SUBMUL |
191 | addic r11, r11, 1 |
192 | #endif |
193 | bdnz L(top) |
194 | |
195 | L(end): mulld r0, r9, VL |
196 | mulhdu N, r9, VL |
197 | mulld r7, r27, VL |
198 | mulhdu r8, r27, VL |
199 | ld r28, 0(RP) |
200 | ld r29, 8(RP) |
201 | adde r0, r0, r12 |
202 | adde r7, r7, N |
203 | addze r8, r8 |
204 | ADDSUB r0, r0, r28 |
205 | std r0, 0(RP) |
206 | ADDSUBC r7, r7, r29 |
207 | std r7, 8(RP) |
208 | #ifdef USE_AS_SUBMUL |
209 | subfe r11, r11, r11 |
210 | addic r11, r11, 1 |
211 | #endif |
212 | addze RP, r8 |
213 | ld r31, R31SAVE(r1) |
214 | ld r30, R30SAVE(r1) |
215 | ld r29, R29SAVE(r1) |
216 | ld r28, R28SAVE(r1) |
217 | ld r27, R27SAVE(r1) |
218 | blr |
219 | END(FUNC) |
220 | |