1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* |
3 | * linux/arch/arm/lib/memset.S |
4 | * |
5 | * Copyright (C) 1995-2000 Russell King |
6 | * |
7 | * ASM optimised string functions |
8 | */ |
9 | #include <linux/linkage.h> |
10 | #include <asm/assembler.h> |
11 | #include <asm/unwind.h> |
12 | |
13 | .text |
14 | .align 5 |
15 | |
16 | ENTRY(__memset) |
17 | ENTRY(mmioset) |
18 | WEAK(memset) |
19 | UNWIND( .fnstart ) |
20 | and r1, r1, #255 @ cast to unsigned char |
21 | ands r3, r0, #3 @ 1 unaligned? |
22 | mov ip, r0 @ preserve r0 as return value |
23 | bne 6f @ 1 |
24 | /* |
25 | * we know that the pointer in ip is aligned to a word boundary. |
26 | */ |
27 | 1: orr r1, r1, r1, lsl #8 |
28 | orr r1, r1, r1, lsl #16 |
29 | mov r3, r1 |
30 | 7: cmp r2, #16 |
31 | blt 4f |
32 | UNWIND( .fnend ) |
33 | |
34 | #if ! CALGN(1)+0 |
35 | |
36 | /* |
37 | * We need 2 extra registers for this loop - use r8 and the LR |
38 | */ |
39 | UNWIND( .fnstart ) |
40 | UNWIND( .save {r8, lr} ) |
41 | stmfd sp!, {r8, lr} |
42 | mov r8, r1 |
43 | mov lr, r3 |
44 | |
45 | 2: subs r2, r2, #64 |
46 | stmiage ip!, {r1, r3, r8, lr} @ 64 bytes at a time. |
47 | stmiage ip!, {r1, r3, r8, lr} |
48 | stmiage ip!, {r1, r3, r8, lr} |
49 | stmiage ip!, {r1, r3, r8, lr} |
50 | bgt 2b |
51 | ldmfdeq sp!, {r8, pc} @ Now <64 bytes to go. |
52 | /* |
53 | * No need to correct the count; we're only testing bits from now on |
54 | */ |
55 | tst r2, #32 |
56 | stmiane ip!, {r1, r3, r8, lr} |
57 | stmiane ip!, {r1, r3, r8, lr} |
58 | tst r2, #16 |
59 | stmiane ip!, {r1, r3, r8, lr} |
60 | ldmfd sp!, {r8, lr} |
61 | UNWIND( .fnend ) |
62 | |
63 | #else |
64 | |
65 | /* |
66 | * This version aligns the destination pointer in order to write |
67 | * whole cache lines at once. |
68 | */ |
69 | |
70 | UNWIND( .fnstart ) |
71 | UNWIND( .save {r4-r8, lr} ) |
72 | stmfd sp!, {r4-r8, lr} |
73 | mov r4, r1 |
74 | mov r5, r3 |
75 | mov r6, r1 |
76 | mov r7, r3 |
77 | mov r8, r1 |
78 | mov lr, r3 |
79 | |
80 | cmp r2, #96 |
81 | tstgt ip, #31 |
82 | ble 3f |
83 | |
84 | and r8, ip, #31 |
85 | rsb r8, r8, #32 |
86 | sub r2, r2, r8 |
87 | movs r8, r8, lsl #(32 - 4) |
88 | stmiacs ip!, {r4, r5, r6, r7} |
89 | stmiami ip!, {r4, r5} |
90 | tst r8, #(1 << 30) |
91 | mov r8, r1 |
92 | strne r1, [ip], #4 |
93 | |
94 | 3: subs r2, r2, #64 |
95 | stmiage ip!, {r1, r3-r8, lr} |
96 | stmiage ip!, {r1, r3-r8, lr} |
97 | bgt 3b |
98 | ldmfdeq sp!, {r4-r8, pc} |
99 | |
100 | tst r2, #32 |
101 | stmiane ip!, {r1, r3-r8, lr} |
102 | tst r2, #16 |
103 | stmiane ip!, {r4-r7} |
104 | ldmfd sp!, {r4-r8, lr} |
105 | UNWIND( .fnend ) |
106 | |
107 | #endif |
108 | |
109 | UNWIND( .fnstart ) |
110 | 4: tst r2, #8 |
111 | stmiane ip!, {r1, r3} |
112 | tst r2, #4 |
113 | strne r1, [ip], #4 |
114 | /* |
115 | * When we get here, we've got less than 4 bytes to set. We |
116 | * may have an unaligned pointer as well. |
117 | */ |
118 | 5: tst r2, #2 |
119 | strbne r1, [ip], #1 |
120 | strbne r1, [ip], #1 |
121 | tst r2, #1 |
122 | strbne r1, [ip], #1 |
123 | ret lr |
124 | |
125 | 6: subs r2, r2, #4 @ 1 do we have enough |
126 | blt 5b @ 1 bytes to align with? |
127 | cmp r3, #2 @ 1 |
128 | strblt r1, [ip], #1 @ 1 |
129 | strble r1, [ip], #1 @ 1 |
130 | strb r1, [ip], #1 @ 1 |
131 | add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) |
132 | b 1b |
133 | UNWIND( .fnend ) |
134 | ENDPROC(memset) |
135 | ENDPROC(mmioset) |
136 | ENDPROC(__memset) |
137 | |
138 | ENTRY(__memset32) |
139 | UNWIND( .fnstart ) |
140 | mov r3, r1 @ copy r1 to r3 and fall into memset64 |
141 | UNWIND( .fnend ) |
142 | ENDPROC(__memset32) |
143 | ENTRY(__memset64) |
144 | UNWIND( .fnstart ) |
145 | mov ip, r0 @ preserve r0 as return value |
146 | b 7b @ jump into the middle of memset |
147 | UNWIND( .fnend ) |
148 | ENDPROC(__memset64) |
149 | |