1/* Pentium __mpn_add_n -- Add two limb vectors of the same length > 0 and store
2 sum in a third limb vector.
3 Copyright (C) 1992-2024 Free Software Foundation, Inc.
4 This file is part of the GNU MP Library.
5
6 The GNU MP Library is free software; you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or (at your
9 option) any later version.
10
11 The GNU MP Library is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with the GNU MP Library; see the file COPYING.LIB. If not,
18 see <https://www.gnu.org/licenses/>. */
19
20#include "sysdep.h"
21#include "asm-syntax.h"
22
23#define PARMS 4+16 /* space for 4 saved regs */
24#define RES PARMS
25#define S1 RES+4
26#define S2 S1+4
27#define SIZE S2+4
28
29 .text
30ENTRY (__mpn_add_n)
31
32 pushl %edi
33 cfi_adjust_cfa_offset (4)
34 pushl %esi
35 cfi_adjust_cfa_offset (4)
36 pushl %ebp
37 cfi_adjust_cfa_offset (4)
38 pushl %ebx
39 cfi_adjust_cfa_offset (4)
40
41 movl RES(%esp),%edi
42 cfi_rel_offset (edi, 12)
43 movl S1(%esp),%esi
44 cfi_rel_offset (esi, 8)
45 movl S2(%esp),%ebx
46 cfi_rel_offset (ebx, 0)
47 movl SIZE(%esp),%ecx
48 movl (%ebx),%ebp
49 cfi_rel_offset (ebp, 4)
50
51 decl %ecx
52 movl %ecx,%edx
53 shrl $3,%ecx
54 andl $7,%edx
55 testl %ecx,%ecx /* zero carry flag */
56 jz L(end)
57 pushl %edx
58 cfi_adjust_cfa_offset (4)
59
60 ALIGN (3)
61L(oop): movl 28(%edi),%eax /* fetch destination cache line */
62 leal 32(%edi),%edi
63
64L(1): movl (%esi),%eax
65 movl 4(%esi),%edx
66 adcl %ebp,%eax
67 movl 4(%ebx),%ebp
68 adcl %ebp,%edx
69 movl 8(%ebx),%ebp
70 movl %eax,-32(%edi)
71 movl %edx,-28(%edi)
72
73L(2): movl 8(%esi),%eax
74 movl 12(%esi),%edx
75 adcl %ebp,%eax
76 movl 12(%ebx),%ebp
77 adcl %ebp,%edx
78 movl 16(%ebx),%ebp
79 movl %eax,-24(%edi)
80 movl %edx,-20(%edi)
81
82L(3): movl 16(%esi),%eax
83 movl 20(%esi),%edx
84 adcl %ebp,%eax
85 movl 20(%ebx),%ebp
86 adcl %ebp,%edx
87 movl 24(%ebx),%ebp
88 movl %eax,-16(%edi)
89 movl %edx,-12(%edi)
90
91L(4): movl 24(%esi),%eax
92 movl 28(%esi),%edx
93 adcl %ebp,%eax
94 movl 28(%ebx),%ebp
95 adcl %ebp,%edx
96 movl 32(%ebx),%ebp
97 movl %eax,-8(%edi)
98 movl %edx,-4(%edi)
99
100 leal 32(%esi),%esi
101 leal 32(%ebx),%ebx
102 decl %ecx
103 jnz L(oop)
104
105 popl %edx
106 cfi_adjust_cfa_offset (-4)
107L(end):
108 decl %edx /* test %edx w/o clobbering carry */
109 js L(end2)
110 incl %edx
111L(oop2):
112 leal 4(%edi),%edi
113 movl (%esi),%eax
114 adcl %ebp,%eax
115 movl 4(%ebx),%ebp
116 movl %eax,-4(%edi)
117 leal 4(%esi),%esi
118 leal 4(%ebx),%ebx
119 decl %edx
120 jnz L(oop2)
121L(end2):
122 movl (%esi),%eax
123 adcl %ebp,%eax
124 movl %eax,(%edi)
125
126 sbbl %eax,%eax
127 negl %eax
128
129 popl %ebx
130 cfi_adjust_cfa_offset (-4)
131 cfi_restore (ebx)
132 popl %ebp
133 cfi_adjust_cfa_offset (-4)
134 cfi_restore (ebp)
135 popl %esi
136 cfi_adjust_cfa_offset (-4)
137 cfi_restore (esi)
138 popl %edi
139 cfi_adjust_cfa_offset (-4)
140 cfi_restore (edi)
141
142 ret
143END (__mpn_add_n)
144

source code of glibc/sysdeps/i386/i586/add_n.S