1/* Optimized memset for Huawei Kunpeng processor.
2 Copyright (C) 2012-2024 Free Software Foundation, Inc.
3
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library. If not, see
18 <https://www.gnu.org/licenses/>. */
19
20#include <sysdep.h>
21#include <sysdeps/aarch64/memset-reg.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64, unaligned accesses
26 *
27 */
28
29ENTRY (__memset_kunpeng)
30
31 PTR_ARG (0)
32 SIZE_ARG (2)
33
34 dup v0.16B, valw
35 add dstend, dstin, count
36
37 cmp count, 128
38 b.hs L(set_long)
39
40 cmp count, 16
41 b.lo L(less16)
42
43 /* Set 16..127 bytes. */
44 str q0, [dstin]
45 tbnz count, 6, L(set127)
46 str q0, [dstend, -16]
47 tbz count, 5, 1f
48 str q0, [dstin, 16]
49 str q0, [dstend, -32]
501: ret
51
52 .p2align 4
53 /* Set 64..127 bytes. Write 64 bytes from the start and
54 64 bytes from the end. */
55L(set127):
56 stp q0, q0, [dstin, 16]
57 str q0, [dstin, 48]
58 stp q0, q0, [dstend, -64]
59 stp q0, q0, [dstend, -32]
60 ret
61
62 .p2align 4
63 /* Set 0..15 bytes. */
64L(less16):
65 tbz count, 3, L(less8)
66 str d0, [dstin]
67 str d0, [dstend, -8]
68 ret
69L(less8):
70 tbz count, 2, 2f
71 str s0, [dstin]
72 str s0, [dstend, -4]
73 ret
742: cbz count, 3f
75 str b0, [dstin]
76 tbz count, 1, 3f
77 str h0, [dstend, -2]
783: ret
79
80 .p2align 4
81L(set_long):
82 bic dst, dstin, 15
83 str q0, [dstin]
84 sub count, dstend, dst /* Count is 16 too large. */
85 sub dst, dst, 16 /* Dst is biased by -32. */
86 sub count, count, 64 + 16 + 1 /* Adjust count and bias for loop. */
871: stp q0, q0, [dst, 32]
88 stp q0, q0, [dst, 64]!
89 subs count, count, 64
90 b.lo 1f
91 stp q0, q0, [dst, 32]
92 stp q0, q0, [dst, 64]!
93 subs count, count, 64
94 b.lo 1f
95 stp q0, q0, [dst, 32]
96 stp q0, q0, [dst, 64]!
97 subs count, count, 64
98 b.lo 1f
99 stp q0, q0, [dst, 32]
100 stp q0, q0, [dst, 64]!
101 subs count, count, 64
102 b.hs 1b
103
1041: stp q0, q0, [dstend, -64]
105 stp q0, q0, [dstend, -32]
106 ret
107
108END (__memset_kunpeng)
109

source code of glibc/sysdeps/aarch64/multiarch/memset_kunpeng.S