1 | /* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store |
---|---|

2 | sum in a third limb vector. |

3 | Copyright (C) 2006-2019 Free Software Foundation, Inc. |

4 | This file is part of the GNU MP Library. |

5 | |

6 | The GNU MP Library is free software; you can redistribute it and/or modify |

7 | it under the terms of the GNU Lesser General Public License as published by |

8 | the Free Software Foundation; either version 2.1 of the License, or (at your |

9 | option) any later version. |

10 | |

11 | The GNU MP Library is distributed in the hope that it will be useful, but |

12 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |

13 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |

14 | License for more details. |

15 | |

16 | You should have received a copy of the GNU Lesser General Public License |

17 | along with the GNU MP Library; see the file COPYING.LIB. If not, |

18 | see <http://www.gnu.org/licenses/>. */ |

19 | |

20 | #include "sysdep.h" |

21 | #include "asm-syntax.h" |

22 | |

23 | #define rp %rdi |

24 | #define up %rsi |

25 | #define vp %rdx |

26 | #define n %rcx |

27 | #define cy %r8 |

28 | |

29 | #ifndef func |

30 | # define func __mpn_add_n |

31 | # define ADCSBB adc |

32 | #endif |

33 | |

34 | .text |

35 | ENTRY (func) |

36 | xor %r8, %r8 |

37 | mov (up), %r10 |

38 | mov (vp), %r11 |

39 | |

40 | lea -8(up,n,8), up |

41 | lea -8(vp,n,8), vp |

42 | lea -16(rp,n,8), rp |

43 | mov %ecx, %eax |

44 | neg n |

45 | and $3, %eax |

46 | je L(b00) |

47 | add %rax, n /* clear low rcx bits for jrcxz */ |

48 | cmp $2, %eax |

49 | jl L(b01) |

50 | je L(b10) |

51 | |

52 | L(b11): shr %r8 /* set cy */ |

53 | jmp L(e11) |

54 | |

55 | L(b00): shr %r8 /* set cy */ |

56 | mov %r10, %r8 |

57 | mov %r11, %r9 |

58 | lea 4(n), n |

59 | jmp L(e00) |

60 | |

61 | L(b01): shr %r8 /* set cy */ |

62 | jmp L(e01) |

63 | |

64 | L(b10): shr %r8 /* set cy */ |

65 | mov %r10, %r8 |

66 | mov %r11, %r9 |

67 | jmp L(e10) |

68 | |

69 | L(end): ADCSBB %r11, %r10 |

70 | mov %r10, 8(rp) |

71 | mov %ecx, %eax /* clear eax, ecx contains 0 */ |

72 | adc %eax, %eax |

73 | ret |

74 | |

75 | .p2align 4 |

76 | L(top): |

77 | mov -24(up,n,8), %r8 |

78 | mov -24(vp,n,8), %r9 |

79 | ADCSBB %r11, %r10 |

80 | mov %r10, -24(rp,n,8) |

81 | L(e00): |

82 | mov -16(up,n,8), %r10 |

83 | mov -16(vp,n,8), %r11 |

84 | ADCSBB %r9, %r8 |

85 | mov %r8, -16(rp,n,8) |

86 | L(e11): |

87 | mov -8(up,n,8), %r8 |

88 | mov -8(vp,n,8), %r9 |

89 | ADCSBB %r11, %r10 |

90 | mov %r10, -8(rp,n,8) |

91 | L(e10): |

92 | mov (up,n,8), %r10 |

93 | mov (vp,n,8), %r11 |

94 | ADCSBB %r9, %r8 |

95 | mov %r8, (rp,n,8) |

96 | L(e01): |

97 | jrcxz L(end) |

98 | lea 4(n), n |

99 | jmp L(top) |

100 | END (func) |

101 |