1 | /* memset/bzero -- set memory area to CH/0 |
2 | Highly optimized version for ix86, x>=5. |
3 | Copyright (C) 1996-2024 Free Software Foundation, Inc. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include <sysdep.h> |
21 | #include "asm-syntax.h" |
22 | |
23 | #define PARMS 4+4 /* space for 1 saved reg */ |
24 | #define RTN PARMS |
25 | #define DEST RTN |
26 | #define CHR DEST+4 |
27 | #define LEN CHR+4 |
28 | |
29 | .text |
30 | #if defined SHARED && IS_IN (libc) |
31 | ENTRY (__memset_chk) |
32 | movl 12(%esp), %eax |
33 | cmpl %eax, 16(%esp) |
34 | jb HIDDEN_JUMPTARGET (__chk_fail) |
35 | END (__memset_chk) |
36 | libc_hidden_builtin_def (__memset_chk) |
37 | #endif |
38 | ENTRY (memset) |
39 | |
40 | pushl %edi |
41 | cfi_adjust_cfa_offset (4) |
42 | |
43 | movl DEST(%esp), %edi |
44 | cfi_rel_offset (edi, 0) |
45 | movl LEN(%esp), %edx |
46 | movb CHR(%esp), %al |
47 | movb %al, %ah |
48 | movl %eax, %ecx |
49 | shll $16, %eax |
50 | movw %cx, %ax |
51 | cld |
52 | |
53 | /* If less than 36 bytes to write, skip tricky code (it wouldn't work). */ |
54 | cmpl $36, %edx |
55 | movl %edx, %ecx /* needed when branch is taken! */ |
56 | jl L(2) |
57 | |
58 | /* First write 0-3 bytes to make the pointer 32-bit aligned. */ |
59 | movl %edi, %ecx /* Copy ptr to ecx... */ |
60 | negl %ecx /* ...and negate that and... */ |
61 | andl $3, %ecx /* ...mask to get byte count. */ |
62 | subl %ecx, %edx /* adjust global byte count */ |
63 | rep |
64 | stosb |
65 | |
66 | subl $32, %edx /* offset count for unrolled loop */ |
67 | movl (%edi), %ecx /* Fetch destination cache line */ |
68 | |
69 | .align 2, 0x90 /* supply 0x90 for broken assemblers */ |
70 | L(1): movl 28(%edi), %ecx /* allocate cache line for destination */ |
71 | subl $32, %edx /* decr loop count */ |
72 | movl %eax, 0(%edi) /* store words pairwise */ |
73 | movl %eax, 4(%edi) |
74 | movl %eax, 8(%edi) |
75 | movl %eax, 12(%edi) |
76 | movl %eax, 16(%edi) |
77 | movl %eax, 20(%edi) |
78 | movl %eax, 24(%edi) |
79 | movl %eax, 28(%edi) |
80 | leal 32(%edi), %edi /* update destination pointer */ |
81 | jge L(1) |
82 | |
83 | leal 32(%edx), %ecx /* reset offset count */ |
84 | |
85 | /* Write last 0-7 full 32-bit words (up to 8 words if loop was skipped). */ |
86 | L(2): shrl $2, %ecx /* convert byte count to longword count */ |
87 | rep |
88 | stosl |
89 | |
90 | /* Finally write the last 0-3 bytes. */ |
91 | movl %edx, %ecx |
92 | andl $3, %ecx |
93 | rep |
94 | stosb |
95 | |
96 | /* Load result (only if used as memset). */ |
97 | movl DEST(%esp), %eax /* start address of destination is result */ |
98 | popl %edi |
99 | cfi_adjust_cfa_offset (-4) |
100 | cfi_restore (edi) |
101 | |
102 | ret |
103 | END (memset) |
104 | libc_hidden_builtin_def (memset) |
105 | |