1 | /* |
2 | * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu> |
3 | * Copyright (C) 2008-2009 PetaLogix |
4 | * Copyright (C) 2007 John Williams |
5 | * |
6 | * Reasonably optimised generic C-code for memcpy on Microblaze |
7 | * This is generic C code to do efficient, alignment-aware memmove. |
8 | * |
9 | * It is based on demo code originally Copyright 2001 by Intel Corp, taken from |
10 | * http://www.embedded.com/showArticle.jhtml?articleID=19205567 |
11 | * |
12 | * Attempts were made, unsuccessfully, to contact the original |
13 | * author of this code (Michael Morrow, Intel). Below is the original |
14 | * copyright notice. |
15 | * |
16 | * This software has been developed by Intel Corporation. |
17 | * Intel specifically disclaims all warranties, express or |
18 | * implied, and all liability, including consequential and |
19 | * other indirect damages, for the use of this program, including |
20 | * liability for infringement of any proprietary rights, |
21 | * and including the warranties of merchantability and fitness |
22 | * for a particular purpose. Intel does not assume any |
23 | * responsibility for and errors which may appear in this program |
24 | * not any responsibility to update it. |
25 | */ |
26 | |
27 | #include <linux/export.h> |
28 | #include <linux/types.h> |
29 | #include <linux/stddef.h> |
30 | #include <linux/compiler.h> |
31 | #include <linux/string.h> |
32 | |
33 | #ifdef CONFIG_OPT_LIB_FUNCTION |
34 | void *memmove(void *v_dst, const void *v_src, __kernel_size_t c) |
35 | { |
36 | const char *src = v_src; |
37 | char *dst = v_dst; |
38 | const uint32_t *i_src; |
39 | uint32_t *i_dst; |
40 | |
41 | if (!c) |
42 | return v_dst; |
43 | |
44 | /* Use memcpy when source is higher than dest */ |
45 | if (v_dst <= v_src) |
46 | return memcpy(v_dst, v_src, c); |
47 | |
48 | /* The following code tries to optimize the copy by using unsigned |
49 | * alignment. This will work fine if both source and destination are |
50 | * aligned on the same boundary. However, if they are aligned on |
51 | * different boundaries shifts will be necessary. This might result in |
52 | * bad performance on MicroBlaze systems without a barrel shifter. |
53 | */ |
54 | /* FIXME this part needs more test */ |
55 | /* Do a descending copy - this is a bit trickier! */ |
56 | dst += c; |
57 | src += c; |
58 | |
59 | if (c >= 4) { |
60 | unsigned value, buf_hold; |
61 | |
62 | /* Align the destination to a word boundary. */ |
63 | /* This is done in an endian independent manner. */ |
64 | |
65 | switch ((unsigned long)dst & 3) { |
66 | case 3: |
67 | *--dst = *--src; |
68 | --c; |
69 | fallthrough; |
70 | case 2: |
71 | *--dst = *--src; |
72 | --c; |
73 | fallthrough; |
74 | case 1: |
75 | *--dst = *--src; |
76 | --c; |
77 | } |
78 | |
79 | i_dst = (void *)dst; |
80 | /* Choose a copy scheme based on the source */ |
81 | /* alignment relative to destination. */ |
82 | switch ((unsigned long)src & 3) { |
83 | case 0x0: /* Both byte offsets are aligned */ |
84 | |
85 | i_src = (const void *)src; |
86 | |
87 | for (; c >= 4; c -= 4) |
88 | *--i_dst = *--i_src; |
89 | |
90 | src = (const void *)i_src; |
91 | break; |
92 | case 0x1: /* Unaligned - Off by 1 */ |
93 | /* Word align the source */ |
94 | i_src = (const void *) (((unsigned)src + 4) & ~3); |
95 | #ifndef __MICROBLAZEEL__ |
96 | /* Load the holding buffer */ |
97 | buf_hold = *--i_src >> 24; |
98 | |
99 | for (; c >= 4; c -= 4) { |
100 | value = *--i_src; |
101 | *--i_dst = buf_hold << 8 | value; |
102 | buf_hold = value >> 24; |
103 | } |
104 | #else |
105 | /* Load the holding buffer */ |
106 | buf_hold = (*--i_src & 0xFF) << 24; |
107 | |
108 | for (; c >= 4; c -= 4) { |
109 | value = *--i_src; |
110 | *--i_dst = buf_hold | |
111 | ((value & 0xFFFFFF00) >> 8); |
112 | buf_hold = (value & 0xFF) << 24; |
113 | } |
114 | #endif |
115 | /* Realign the source */ |
116 | src = (const void *)i_src; |
117 | src += 1; |
118 | break; |
119 | case 0x2: /* Unaligned - Off by 2 */ |
120 | /* Word align the source */ |
121 | i_src = (const void *) (((unsigned)src + 4) & ~3); |
122 | #ifndef __MICROBLAZEEL__ |
123 | /* Load the holding buffer */ |
124 | buf_hold = *--i_src >> 16; |
125 | |
126 | for (; c >= 4; c -= 4) { |
127 | value = *--i_src; |
128 | *--i_dst = buf_hold << 16 | value; |
129 | buf_hold = value >> 16; |
130 | } |
131 | #else |
132 | /* Load the holding buffer */ |
133 | buf_hold = (*--i_src & 0xFFFF) << 16; |
134 | |
135 | for (; c >= 4; c -= 4) { |
136 | value = *--i_src; |
137 | *--i_dst = buf_hold | |
138 | ((value & 0xFFFF0000) >> 16); |
139 | buf_hold = (value & 0xFFFF) << 16; |
140 | } |
141 | #endif |
142 | /* Realign the source */ |
143 | src = (const void *)i_src; |
144 | src += 2; |
145 | break; |
146 | case 0x3: /* Unaligned - Off by 3 */ |
147 | /* Word align the source */ |
148 | i_src = (const void *) (((unsigned)src + 4) & ~3); |
149 | #ifndef __MICROBLAZEEL__ |
150 | /* Load the holding buffer */ |
151 | buf_hold = *--i_src >> 8; |
152 | |
153 | for (; c >= 4; c -= 4) { |
154 | value = *--i_src; |
155 | *--i_dst = buf_hold << 24 | value; |
156 | buf_hold = value >> 8; |
157 | } |
158 | #else |
159 | /* Load the holding buffer */ |
160 | buf_hold = (*--i_src & 0xFFFFFF) << 8; |
161 | |
162 | for (; c >= 4; c -= 4) { |
163 | value = *--i_src; |
164 | *--i_dst = buf_hold | |
165 | ((value & 0xFF000000) >> 24); |
166 | buf_hold = (value & 0xFFFFFF) << 8; |
167 | } |
168 | #endif |
169 | /* Realign the source */ |
170 | src = (const void *)i_src; |
171 | src += 3; |
172 | break; |
173 | } |
174 | dst = (void *)i_dst; |
175 | } |
176 | |
177 | /* simple fast copy, ... unless a cache boundary is crossed */ |
178 | /* Finish off any remaining bytes */ |
179 | switch (c) { |
180 | case 4: |
181 | *--dst = *--src; |
182 | fallthrough; |
183 | case 3: |
184 | *--dst = *--src; |
185 | fallthrough; |
186 | case 2: |
187 | *--dst = *--src; |
188 | fallthrough; |
189 | case 1: |
190 | *--dst = *--src; |
191 | } |
192 | return v_dst; |
193 | } |
194 | EXPORT_SYMBOL(memmove); |
195 | #endif /* CONFIG_OPT_LIB_FUNCTION */ |
196 | |