1 | /* Highly optimized version for i586. |
2 | Copyright (C) 1997-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <sysdep.h> |
20 | #include "asm-syntax.h" |
21 | |
22 | #define PARMS 4+8 /* space for 2 saved regs */ |
23 | #define RTN PARMS |
24 | #define DEST RTN |
25 | #define SRC DEST+4 |
26 | #define LEN SRC+4 |
27 | |
28 | .text |
29 | #if defined PIC && IS_IN (libc) |
30 | ENTRY (__memcpy_chk) |
31 | movl 12(%esp), %eax |
32 | cmpl %eax, 16(%esp) |
33 | jb HIDDEN_JUMPTARGET (__chk_fail) |
34 | END (__memcpy_chk) |
35 | libc_hidden_builtin_def (__memcpy_chk) |
36 | #endif |
37 | ENTRY (memcpy) |
38 | |
39 | pushl %edi |
40 | cfi_adjust_cfa_offset (4) |
41 | pushl %esi |
42 | cfi_adjust_cfa_offset (4) |
43 | |
44 | movl DEST(%esp), %edi |
45 | cfi_rel_offset (edi, 4) |
46 | movl SRC(%esp), %esi |
47 | cfi_rel_offset (esi, 0) |
48 | movl LEN(%esp), %ecx |
49 | movl %edi, %eax |
50 | |
51 | /* We need this in any case. */ |
52 | cld |
53 | |
54 | /* Cutoff for the big loop is a size of 32 bytes since otherwise |
55 | the loop will never be entered. */ |
56 | cmpl $32, %ecx |
57 | jbe L(1) |
58 | |
59 | negl %eax |
60 | andl $3, %eax |
61 | subl %eax, %ecx |
62 | xchgl %eax, %ecx |
63 | |
64 | rep; movsb |
65 | |
66 | movl %eax, %ecx |
67 | subl $32, %ecx |
68 | js L(2) |
69 | |
70 | /* Read ahead to make sure we write in the cache since the stupid |
71 | i586 designers haven't implemented read-on-write-miss. */ |
72 | movl (%edi), %eax |
73 | L(3): movl 28(%edi), %edx |
74 | |
75 | /* Now correct the loop counter. Please note that in the following |
76 | code the flags are not changed anymore. */ |
77 | subl $32, %ecx |
78 | |
79 | movl (%esi), %eax |
80 | movl 4(%esi), %edx |
81 | movl %eax, (%edi) |
82 | movl %edx, 4(%edi) |
83 | movl 8(%esi), %eax |
84 | movl 12(%esi), %edx |
85 | movl %eax, 8(%edi) |
86 | movl %edx, 12(%edi) |
87 | movl 16(%esi), %eax |
88 | movl 20(%esi), %edx |
89 | movl %eax, 16(%edi) |
90 | movl %edx, 20(%edi) |
91 | movl 24(%esi), %eax |
92 | movl 28(%esi), %edx |
93 | movl %eax, 24(%edi) |
94 | movl %edx, 28(%edi) |
95 | |
96 | leal 32(%esi), %esi |
97 | leal 32(%edi), %edi |
98 | |
99 | jns L(3) |
100 | |
101 | /* Correct extra loop counter modification. */ |
102 | L(2): addl $32, %ecx |
103 | #ifndef USE_AS_MEMPCPY |
104 | movl DEST(%esp), %eax |
105 | #endif |
106 | |
107 | L(1): rep; movsb |
108 | |
109 | #ifdef USE_AS_MEMPCPY |
110 | movl %edi, %eax |
111 | #endif |
112 | |
113 | popl %esi |
114 | cfi_adjust_cfa_offset (-4) |
115 | cfi_restore (esi) |
116 | popl %edi |
117 | cfi_adjust_cfa_offset (-4) |
118 | cfi_restore (edi) |
119 | |
120 | ret |
121 | END (memcpy) |
122 | #ifndef USE_AS_MEMPCPY |
123 | libc_hidden_builtin_def (memcpy) |
124 | #endif |
125 | |