1 | ! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and |
2 | ! store the product in a second limb vector. |
3 | |
4 | ! Copyright (C) 1992-2024 Free Software Foundation, Inc. |
5 | |
6 | ! This file is part of the GNU MP Library. |
7 | |
8 | ! The GNU MP Library is free software; you can redistribute it and/or modify |
9 | ! it under the terms of the GNU Lesser General Public License as published by |
10 | ! the Free Software Foundation; either version 2.1 of the License, or (at your |
11 | ! option) any later version. |
12 | |
13 | ! The GNU MP Library is distributed in the hope that it will be useful, but |
14 | ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
15 | ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
16 | ! License for more details. |
17 | |
18 | ! You should have received a copy of the GNU Lesser General Public License |
19 | ! along with the GNU MP Library; see the file COPYING.LIB. If not, |
20 | ! see <https://www.gnu.org/licenses/>. |
21 | |
22 | |
23 | ! INPUT PARAMETERS |
24 | ! res_ptr o0 |
25 | ! s1_ptr o1 |
26 | ! size o2 |
27 | ! s2_limb o3 |
28 | |
29 | #include <sysdep.h> |
30 | |
31 | ENTRY(__mpn_mul_1) |
32 | sll %o2,4,%g1 |
33 | mov %o7,%g4 ! Save return address register |
34 | and %g1,(4-1)<<4,%g1 |
35 | 1: call 2f |
36 | add %o7,3f-1b,%g3 |
37 | 2: mov %g4,%o7 ! Restore return address register |
38 | jmp %g3+%g1 |
39 | ld [%o1+0],%o4 ! 1 |
40 | |
41 | .align 4 |
42 | 3: |
43 | LOC(00): |
44 | add %o0,-4,%o0 |
45 | add %o1,-4,%o1 |
46 | b LOC(loop00) /* 4, 8, 12, ... */ |
47 | orcc %g0,%g0,%g2 |
48 | LOC(01): |
49 | b LOC(loop01) /* 1, 5, 9, ... */ |
50 | orcc %g0,%g0,%g2 |
51 | nop |
52 | nop |
53 | LOC(10): |
54 | add %o0,-12,%o0 /* 2, 6, 10, ... */ |
55 | add %o1,4,%o1 |
56 | b LOC(loop10) |
57 | orcc %g0,%g0,%g2 |
58 | nop |
59 | LOC(11): |
60 | add %o0,-8,%o0 /* 3, 7, 11, ... */ |
61 | add %o1,-8,%o1 |
62 | b LOC(loop11) |
63 | orcc %g0,%g0,%g2 |
64 | |
65 | LOC(loop): |
66 | addcc %g3,%g2,%g3 ! 1 |
67 | ld [%o1+4],%o4 ! 2 |
68 | st %g3,[%o0+0] ! 1 |
69 | rd %y,%g2 ! 1 |
70 | LOC(loop00): |
71 | umul %o4,%o3,%g3 ! 2 |
72 | addxcc %g3,%g2,%g3 ! 2 |
73 | ld [%o1+8],%o4 ! 3 |
74 | st %g3,[%o0+4] ! 2 |
75 | rd %y,%g2 ! 2 |
76 | LOC(loop11): |
77 | umul %o4,%o3,%g3 ! 3 |
78 | addxcc %g3,%g2,%g3 ! 3 |
79 | ld [%o1+12],%o4 ! 4 |
80 | add %o1,16,%o1 |
81 | st %g3,[%o0+8] ! 3 |
82 | rd %y,%g2 ! 3 |
83 | LOC(loop10): |
84 | umul %o4,%o3,%g3 ! 4 |
85 | addxcc %g3,%g2,%g3 ! 4 |
86 | ld [%o1+0],%o4 ! 1 |
87 | st %g3,[%o0+12] ! 4 |
88 | add %o0,16,%o0 |
89 | rd %y,%g2 ! 4 |
90 | addx %g0,%g2,%g2 |
91 | LOC(loop01): |
92 | addcc %o2,-4,%o2 |
93 | bg LOC(loop) |
94 | umul %o4,%o3,%g3 ! 1 |
95 | |
96 | addcc %g3,%g2,%g3 ! 4 |
97 | st %g3,[%o0+0] ! 4 |
98 | rd %y,%g2 ! 4 |
99 | retl |
100 | addx %g0,%g2,%o0 |
101 | |
102 | END(__mpn_mul_1) |
103 | |