1 | ! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and |
2 | ! add the result to a second limb vector. |
3 | |
4 | ! Copyright (C) 1992-2024 Free Software Foundation, Inc. |
5 | |
6 | ! This file is part of the GNU MP Library. |
7 | |
8 | ! The GNU MP Library is free software; you can redistribute it and/or modify |
9 | ! it under the terms of the GNU Lesser General Public License as published by |
10 | ! the Free Software Foundation; either version 2.1 of the License, or (at your |
11 | ! option) any later version. |
12 | |
13 | ! The GNU MP Library is distributed in the hope that it will be useful, but |
14 | ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
15 | ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
16 | ! License for more details. |
17 | |
18 | ! You should have received a copy of the GNU Lesser General Public License |
19 | ! along with the GNU MP Library; see the file COPYING.LIB. If not, |
20 | ! see <https://www.gnu.org/licenses/>. |
21 | |
22 | |
23 | ! INPUT PARAMETERS |
24 | ! res_ptr o0 |
25 | ! s1_ptr o1 |
26 | ! size o2 |
27 | ! s2_limb o3 |
28 | |
29 | #include <sysdep.h> |
30 | |
31 | ENTRY(__mpn_addmul_1) |
32 | ld [%o1+0],%o4 ! 1 |
33 | sll %o2,4,%g1 |
34 | orcc %g0,%g0,%g2 |
35 | mov %o7,%g4 ! Save return address register |
36 | and %g1,(4-1)<<4,%g1 |
37 | 1: call 2f |
38 | add %o7,3f-1b,%g3 |
39 | 2: jmp %g3+%g1 |
40 | mov %g4,%o7 ! Restore return address register |
41 | |
42 | .align 4 |
43 | 3: |
44 | LOC(00): |
45 | add %o0,-4,%o0 |
46 | b LOC(loop00) /* 4, 8, 12, ... */ |
47 | add %o1,-4,%o1 |
48 | nop |
49 | LOC(01): |
50 | b LOC(loop01) /* 1, 5, 9, ... */ |
51 | nop |
52 | nop |
53 | nop |
54 | LOC(10): |
55 | add %o0,-12,%o0 /* 2, 6, 10, ... */ |
56 | b LOC(loop10) |
57 | add %o1,4,%o1 |
58 | nop |
59 | LOC(11): |
60 | add %o0,-8,%o0 /* 3, 7, 11, ... */ |
61 | b LOC(loop11) |
62 | add %o1,-8,%o1 |
63 | nop |
64 | |
65 | LOC(loop): |
66 | addcc %g3,%g2,%g3 ! 1 |
67 | ld [%o1+4],%o4 ! 2 |
68 | rd %y,%g2 ! 1 |
69 | addx %g0,%g2,%g2 |
70 | ld [%o0+0],%g1 ! 2 |
71 | addcc %g1,%g3,%g3 |
72 | st %g3,[%o0+0] ! 1 |
73 | LOC(loop00): |
74 | umul %o4,%o3,%g3 ! 2 |
75 | ld [%o0+4],%g1 ! 2 |
76 | addxcc %g3,%g2,%g3 ! 2 |
77 | ld [%o1+8],%o4 ! 3 |
78 | rd %y,%g2 ! 2 |
79 | addx %g0,%g2,%g2 |
80 | nop |
81 | addcc %g1,%g3,%g3 |
82 | st %g3,[%o0+4] ! 2 |
83 | LOC(loop11): |
84 | umul %o4,%o3,%g3 ! 3 |
85 | addxcc %g3,%g2,%g3 ! 3 |
86 | ld [%o1+12],%o4 ! 4 |
87 | rd %y,%g2 ! 3 |
88 | add %o1,16,%o1 |
89 | addx %g0,%g2,%g2 |
90 | ld [%o0+8],%g1 ! 2 |
91 | addcc %g1,%g3,%g3 |
92 | st %g3,[%o0+8] ! 3 |
93 | LOC(loop10): |
94 | umul %o4,%o3,%g3 ! 4 |
95 | addxcc %g3,%g2,%g3 ! 4 |
96 | ld [%o1+0],%o4 ! 1 |
97 | rd %y,%g2 ! 4 |
98 | addx %g0,%g2,%g2 |
99 | ld [%o0+12],%g1 ! 2 |
100 | addcc %g1,%g3,%g3 |
101 | st %g3,[%o0+12] ! 4 |
102 | add %o0,16,%o0 |
103 | addx %g0,%g2,%g2 |
104 | LOC(loop01): |
105 | addcc %o2,-4,%o2 |
106 | bg LOC(loop) |
107 | umul %o4,%o3,%g3 ! 1 |
108 | |
109 | addcc %g3,%g2,%g3 ! 4 |
110 | rd %y,%g2 ! 4 |
111 | addx %g0,%g2,%g2 |
112 | ld [%o0+0],%g1 ! 2 |
113 | addcc %g1,%g3,%g3 |
114 | st %g3,[%o0+0] ! 4 |
115 | retl |
116 | addx %g0,%g2,%o0 |
117 | |
118 | END(__mpn_addmul_1) |
119 | |