1/* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
2 sum in a third limb vector.
3 Copyright (C) 2006-2022 Free Software Foundation, Inc.
4 This file is part of the GNU MP Library.
5
6 The GNU MP Library is free software; you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or (at your
9 option) any later version.
10
11 The GNU MP Library is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with the GNU MP Library; see the file COPYING.LIB. If not,
18 see <https://www.gnu.org/licenses/>. */
19
20#include "sysdep.h"
21#include "asm-syntax.h"
22
23#define rp %rdi
24#define up %rsi
25#define vp %rdx
26#define n %rcx
27#define cy %r8
28
29#ifndef func
30# define func __mpn_add_n
31# define ADCSBB adc
32#endif
33
34 .text
35ENTRY (func)
36 xor %r8, %r8
37 mov (up), %r10
38 mov (vp), %r11
39
40 lea -8(up,n,8), up
41 lea -8(vp,n,8), vp
42 lea -16(rp,n,8), rp
43 mov %ecx, %eax
44 neg n
45 and $3, %eax
46 je L(b00)
47 add %rax, n /* clear low rcx bits for jrcxz */
48 cmp $2, %eax
49 jl L(b01)
50 je L(b10)
51
52L(b11): shr %r8 /* set cy */
53 jmp L(e11)
54
55L(b00): shr %r8 /* set cy */
56 mov %r10, %r8
57 mov %r11, %r9
58 lea 4(n), n
59 jmp L(e00)
60
61L(b01): shr %r8 /* set cy */
62 jmp L(e01)
63
64L(b10): shr %r8 /* set cy */
65 mov %r10, %r8
66 mov %r11, %r9
67 jmp L(e10)
68
69L(end): ADCSBB %r11, %r10
70 mov %r10, 8(rp)
71 mov %ecx, %eax /* clear eax, ecx contains 0 */
72 adc %eax, %eax
73 ret
74
75 .p2align 4
76L(top):
77 mov -24(up,n,8), %r8
78 mov -24(vp,n,8), %r9
79 ADCSBB %r11, %r10
80 mov %r10, -24(rp,n,8)
81L(e00):
82 mov -16(up,n,8), %r10
83 mov -16(vp,n,8), %r11
84 ADCSBB %r9, %r8
85 mov %r8, -16(rp,n,8)
86L(e11):
87 mov -8(up,n,8), %r8
88 mov -8(vp,n,8), %r9
89 ADCSBB %r11, %r10
90 mov %r10, -8(rp,n,8)
91L(e10):
92 mov (up,n,8), %r10
93 mov (vp,n,8), %r11
94 ADCSBB %r9, %r8
95 mov %r8, (rp,n,8)
96L(e01):
97 jrcxz L(end)
98 lea 4(n), n
99 jmp L(top)
100END (func)
101

source code of glibc/sysdeps/x86_64/add_n.S