1 | /* |
2 | * Copyright © 2016 Intel Corporation |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice (including the next |
12 | * paragraph) shall be included in all copies or substantial portions of the |
13 | * Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
21 | * IN THE SOFTWARE. |
22 | * |
23 | */ |
24 | |
25 | #include <linux/kernel.h> |
26 | #include <linux/string.h> |
27 | #include <linux/cpufeature.h> |
28 | #include <linux/bug.h> |
29 | #include <linux/build_bug.h> |
30 | #include <asm/fpu/api.h> |
31 | |
32 | #include "i915_memcpy.h" |
33 | |
34 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) |
35 | #define CI_BUG_ON(expr) BUG_ON(expr) |
36 | #else |
37 | #define CI_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) |
38 | #endif |
39 | |
40 | static DEFINE_STATIC_KEY_FALSE(has_movntdqa); |
41 | |
42 | static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len) |
43 | { |
44 | kernel_fpu_begin(); |
45 | |
46 | while (len >= 4) { |
47 | asm("movntdqa (%0), %%xmm0\n" |
48 | "movntdqa 16(%0), %%xmm1\n" |
49 | "movntdqa 32(%0), %%xmm2\n" |
50 | "movntdqa 48(%0), %%xmm3\n" |
51 | "movaps %%xmm0, (%1)\n" |
52 | "movaps %%xmm1, 16(%1)\n" |
53 | "movaps %%xmm2, 32(%1)\n" |
54 | "movaps %%xmm3, 48(%1)\n" |
55 | :: "r" (src), "r" (dst) : "memory" ); |
56 | src += 64; |
57 | dst += 64; |
58 | len -= 4; |
59 | } |
60 | while (len--) { |
61 | asm("movntdqa (%0), %%xmm0\n" |
62 | "movaps %%xmm0, (%1)\n" |
63 | :: "r" (src), "r" (dst) : "memory" ); |
64 | src += 16; |
65 | dst += 16; |
66 | } |
67 | |
68 | kernel_fpu_end(); |
69 | } |
70 | |
71 | static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len) |
72 | { |
73 | kernel_fpu_begin(); |
74 | |
75 | while (len >= 4) { |
76 | asm("movntdqa (%0), %%xmm0\n" |
77 | "movntdqa 16(%0), %%xmm1\n" |
78 | "movntdqa 32(%0), %%xmm2\n" |
79 | "movntdqa 48(%0), %%xmm3\n" |
80 | "movups %%xmm0, (%1)\n" |
81 | "movups %%xmm1, 16(%1)\n" |
82 | "movups %%xmm2, 32(%1)\n" |
83 | "movups %%xmm3, 48(%1)\n" |
84 | :: "r" (src), "r" (dst) : "memory" ); |
85 | src += 64; |
86 | dst += 64; |
87 | len -= 4; |
88 | } |
89 | while (len--) { |
90 | asm("movntdqa (%0), %%xmm0\n" |
91 | "movups %%xmm0, (%1)\n" |
92 | :: "r" (src), "r" (dst) : "memory" ); |
93 | src += 16; |
94 | dst += 16; |
95 | } |
96 | |
97 | kernel_fpu_end(); |
98 | } |
99 | |
100 | /** |
101 | * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC |
102 | * @dst: destination pointer |
103 | * @src: source pointer |
104 | * @len: how many bytes to copy |
105 | * |
106 | * i915_memcpy_from_wc copies @len bytes from @src to @dst using |
107 | * non-temporal instructions where available. Note that all arguments |
108 | * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple |
109 | * of 16. |
110 | * |
111 | * To test whether accelerated reads from WC are supported, use |
112 | * i915_memcpy_from_wc(NULL, NULL, 0); |
113 | * |
114 | * Returns true if the copy was successful, false if the preconditions |
115 | * are not met. |
116 | */ |
117 | bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len) |
118 | { |
119 | if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15)) |
120 | return false; |
121 | |
122 | if (static_branch_likely(&has_movntdqa)) { |
123 | if (likely(len)) |
124 | __memcpy_ntdqa(dst, src, len: len >> 4); |
125 | return true; |
126 | } |
127 | |
128 | return false; |
129 | } |
130 | |
131 | /** |
132 | * i915_unaligned_memcpy_from_wc: perform a mostly accelerated read from WC |
133 | * @dst: destination pointer |
134 | * @src: source pointer |
135 | * @len: how many bytes to copy |
136 | * |
137 | * Like i915_memcpy_from_wc(), the unaligned variant copies @len bytes from |
138 | * @src to @dst using * non-temporal instructions where available, but |
139 | * accepts that its arguments may not be aligned, but are valid for the |
140 | * potential 16-byte read past the end. |
141 | */ |
142 | void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len) |
143 | { |
144 | unsigned long addr; |
145 | |
146 | CI_BUG_ON(!i915_has_memcpy_from_wc()); |
147 | |
148 | addr = (unsigned long)src; |
149 | if (!IS_ALIGNED(addr, 16)) { |
150 | unsigned long x = min(ALIGN(addr, 16) - addr, len); |
151 | |
152 | memcpy(dst, src, x); |
153 | |
154 | len -= x; |
155 | dst += x; |
156 | src += x; |
157 | } |
158 | |
159 | if (likely(len)) |
160 | __memcpy_ntdqu(dst, src, DIV_ROUND_UP(len, 16)); |
161 | } |
162 | |
163 | void i915_memcpy_init_early(struct drm_i915_private *dev_priv) |
164 | { |
165 | /* |
166 | * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions |
167 | * emulation. So don't enable movntdqa in hypervisor guest. |
168 | */ |
169 | if (static_cpu_has(X86_FEATURE_XMM4_1) && |
170 | !boot_cpu_has(X86_FEATURE_HYPERVISOR)) |
171 | static_branch_enable(&has_movntdqa); |
172 | } |
173 | |