addmul_1.S source code [glibc/sysdeps/sparc/sparc32/addmul_1.S]

1	! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and
2	! add the result to a second limb vector.
3
4	! Copyright (C) `1992`-`2024` Free Software Foundation, Inc.
5
6	! This file is part of the GNU MP Library.
7
8	! The GNU MP Library is free software; you can redistribute it and/or modify
9	! it under the terms of the GNU Lesser General Public License as published by
10	! the Free Software Foundation; either version `2.1` of the License, or (at your
11	! option) any later version.
12
13	! The GNU MP Library is distributed in the hope that it will be useful, but
14	! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15	! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16	! License for more details.
17
18	! You should have received a copy of the GNU Lesser General Public License
19	! along with the GNU MP Library; see the file COPYING.LIB. If not,
20	! see <https://www.gnu.org/licenses/>.
21
22
23	! INPUT PARAMETERS
24	! res_ptr o0
25	! s1_ptr o1
26	! size o2
27	! s2_limb o3
28
29	#include <sysdep.h>
30
31	ENTRY(__mpn_addmul_1)
32	ld [%o1+`0`],%o4 ! `1`
33	sll %o2,`4`,%g1
34	orcc %g0,%g0,%g2
35	mov %o7,%g4 ! Save return address register
36	and %g1,(`4`-`1`)<<`4`,%g1
37	`1`: call `2f`
38	add %o7,`3f`-`1b`,%g3
39	`2`: jmp %g3+%g1
40	mov %g4,%o7 ! Restore return address register
41
42	.align `4`
43	`3`:
44	LOC(`00`):
45	add %o0,-`4`,%o0
46	b LOC(loop00) / 4, 8, 12, ... /
47	add %o1,-`4`,%o1
48	nop
49	LOC(`01`):
50	b LOC(loop01) / 1, 5, 9, ... /
51	nop
52	nop
53	nop
54	LOC(`10`):
55	add %o0,-`12`,%o0 / 2, 6, 10, ... /
56	b LOC(loop10)
57	add %o1,`4`,%o1
58	nop
59	LOC(`11`):
60	add %o0,-`8`,%o0 / 3, 7, 11, ... /
61	b LOC(loop11)
62	add %o1,-`8`,%o1
63	nop
64
65	LOC(loop):
66	addcc %g3,%g2,%g3 ! `1`
67	ld [%o1+`4`],%o4 ! `2`
68	rd %y,%g2 ! `1`
69	addx %g0,%g2,%g2
70	ld [%o0+`0`],%g1 ! `2`
71	addcc %g1,%g3,%g3
72	st %g3,[%o0+`0`] ! `1`
73	LOC(loop00):
74	umul %o4,%o3,%g3 ! `2`
75	ld [%o0+`4`],%g1 ! `2`
76	addxcc %g3,%g2,%g3 ! `2`
77	ld [%o1+`8`],%o4 ! `3`
78	rd %y,%g2 ! `2`
79	addx %g0,%g2,%g2
80	nop
81	addcc %g1,%g3,%g3
82	st %g3,[%o0+`4`] ! `2`
83	LOC(loop11):
84	umul %o4,%o3,%g3 ! `3`
85	addxcc %g3,%g2,%g3 ! `3`
86	ld [%o1+`12`],%o4 ! `4`
87	rd %y,%g2 ! `3`
88	add %o1,`16`,%o1
89	addx %g0,%g2,%g2
90	ld [%o0+`8`],%g1 ! `2`
91	addcc %g1,%g3,%g3
92	st %g3,[%o0+`8`] ! `3`
93	LOC(loop10):
94	umul %o4,%o3,%g3 ! `4`
95	addxcc %g3,%g2,%g3 ! `4`
96	ld [%o1+`0`],%o4 ! `1`
97	rd %y,%g2 ! `4`
98	addx %g0,%g2,%g2
99	ld [%o0+`12`],%g1 ! `2`
100	addcc %g1,%g3,%g3
101	st %g3,[%o0+`12`] ! `4`
102	add %o0,`16`,%o0
103	addx %g0,%g2,%g2
104	LOC(loop01):
105	addcc %o2,-`4`,%o2
106	bg LOC(loop)
107	umul %o4,%o3,%g3 ! `1`
108
109	addcc %g3,%g2,%g3 ! `4`
110	rd %y,%g2 ! `4`
111	addx %g0,%g2,%g2
112	ld [%o0+`0`],%g1 ! `2`
113	addcc %g1,%g3,%g3
114	st %g3,[%o0+`0`] ! `4`
115	retl
116	addx %g0,%g2,%o0
117
118	END(__mpn_addmul_1)
119

source code of glibc/sysdeps/sparc/sparc32/addmul_1.S