1 | // SPDX-License-Identifier: GPL-2.0 AND MIT |
2 | /* |
3 | * Copyright © 2022 Intel Corporation |
4 | */ |
5 | |
6 | #include <kunit/test.h> |
7 | #include <kunit/visibility.h> |
8 | |
9 | #include "tests/xe_bo_test.h" |
10 | #include "tests/xe_pci_test.h" |
11 | #include "tests/xe_test.h" |
12 | |
13 | #include "xe_bo_evict.h" |
14 | #include "xe_pci.h" |
15 | #include "xe_pm.h" |
16 | |
17 | static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, |
18 | bool clear, u64 get_val, u64 assign_val, |
19 | struct kunit *test) |
20 | { |
21 | struct dma_fence *fence; |
22 | struct ttm_tt *ttm; |
23 | struct page *page; |
24 | pgoff_t ccs_page; |
25 | long timeout; |
26 | u64 *cpu_map; |
27 | int ret; |
28 | u32 offset; |
29 | |
30 | /* Move bo to VRAM if not already there. */ |
31 | ret = xe_bo_validate(bo, NULL, false); |
32 | if (ret) { |
33 | KUNIT_FAIL(test, "Failed to validate bo.\n" ); |
34 | return ret; |
35 | } |
36 | |
37 | /* Optionally clear bo *and* CCS data in VRAM. */ |
38 | if (clear) { |
39 | fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource); |
40 | if (IS_ERR(ptr: fence)) { |
41 | KUNIT_FAIL(test, "Failed to submit bo clear.\n" ); |
42 | return PTR_ERR(ptr: fence); |
43 | } |
44 | dma_fence_put(fence); |
45 | } |
46 | |
47 | /* Evict to system. CCS data should be copied. */ |
48 | ret = xe_bo_evict(bo, true); |
49 | if (ret) { |
50 | KUNIT_FAIL(test, "Failed to evict bo.\n" ); |
51 | return ret; |
52 | } |
53 | |
54 | /* Sync all migration blits */ |
55 | timeout = dma_resv_wait_timeout(bo->ttm.base.resv, |
56 | DMA_RESV_USAGE_KERNEL, |
57 | true, |
58 | 5 * HZ); |
59 | if (timeout <= 0) { |
60 | KUNIT_FAIL(test, "Failed to sync bo eviction.\n" ); |
61 | return -ETIME; |
62 | } |
63 | |
64 | /* |
65 | * Bo with CCS data is now in system memory. Verify backing store |
66 | * and data integrity. Then assign for the next testing round while |
67 | * we still have a CPU map. |
68 | */ |
69 | ttm = bo->ttm.ttm; |
70 | if (!ttm || !ttm_tt_is_populated(ttm)) { |
71 | KUNIT_FAIL(test, "Bo was not in expected placement.\n" ); |
72 | return -EINVAL; |
73 | } |
74 | |
75 | ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT; |
76 | if (ccs_page >= ttm->num_pages) { |
77 | KUNIT_FAIL(test, "No TTM CCS pages present.\n" ); |
78 | return -EINVAL; |
79 | } |
80 | |
81 | page = ttm->pages[ccs_page]; |
82 | cpu_map = kmap_local_page(page); |
83 | |
84 | /* Check first CCS value */ |
85 | if (cpu_map[0] != get_val) { |
86 | KUNIT_FAIL(test, |
87 | "Expected CCS readout 0x%016llx, got 0x%016llx.\n" , |
88 | (unsigned long long)get_val, |
89 | (unsigned long long)cpu_map[0]); |
90 | ret = -EINVAL; |
91 | } |
92 | |
93 | /* Check last CCS value, or at least last value in page. */ |
94 | offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size); |
95 | offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; |
96 | if (cpu_map[offset] != get_val) { |
97 | KUNIT_FAIL(test, |
98 | "Expected CCS readout 0x%016llx, got 0x%016llx.\n" , |
99 | (unsigned long long)get_val, |
100 | (unsigned long long)cpu_map[offset]); |
101 | ret = -EINVAL; |
102 | } |
103 | |
104 | cpu_map[0] = assign_val; |
105 | cpu_map[offset] = assign_val; |
106 | kunmap_local(cpu_map); |
107 | |
108 | return ret; |
109 | } |
110 | |
111 | static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, |
112 | struct kunit *test) |
113 | { |
114 | struct xe_bo *bo; |
115 | |
116 | int ret; |
117 | |
118 | /* TODO: Sanity check */ |
119 | unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile); |
120 | |
121 | if (IS_DGFX(xe)) |
122 | kunit_info(test, "Testing vram id %u\n" , tile->id); |
123 | else |
124 | kunit_info(test, "Testing system memory\n" ); |
125 | |
126 | bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, |
127 | ttm_bo_type_device, bo_flags); |
128 | if (IS_ERR(ptr: bo)) { |
129 | KUNIT_FAIL(test, "Failed to create bo.\n" ); |
130 | return; |
131 | } |
132 | |
133 | xe_bo_lock(bo, false); |
134 | |
135 | kunit_info(test, "Verifying that CCS data is cleared on creation.\n" ); |
136 | ret = ccs_test_migrate(tile, bo, clear: false, get_val: 0ULL, assign_val: 0xdeadbeefdeadbeefULL, |
137 | test); |
138 | if (ret) |
139 | goto out_unlock; |
140 | |
141 | kunit_info(test, "Verifying that CCS data survives migration.\n" ); |
142 | ret = ccs_test_migrate(tile, bo, clear: false, get_val: 0xdeadbeefdeadbeefULL, |
143 | assign_val: 0xdeadbeefdeadbeefULL, test); |
144 | if (ret) |
145 | goto out_unlock; |
146 | |
147 | kunit_info(test, "Verifying that CCS data can be properly cleared.\n" ); |
148 | ret = ccs_test_migrate(tile, bo, clear: true, get_val: 0ULL, assign_val: 0ULL, test); |
149 | |
150 | out_unlock: |
151 | xe_bo_unlock(bo); |
152 | xe_bo_put(bo); |
153 | } |
154 | |
155 | static int ccs_test_run_device(struct xe_device *xe) |
156 | { |
157 | struct kunit *test = xe_cur_kunit(); |
158 | struct xe_tile *tile; |
159 | int id; |
160 | |
161 | if (!xe_device_has_flat_ccs(xe)) { |
162 | kunit_info(test, "Skipping non-flat-ccs device.\n" ); |
163 | return 0; |
164 | } |
165 | |
166 | xe_device_mem_access_get(xe); |
167 | |
168 | for_each_tile(tile, xe, id) { |
169 | /* For igfx run only for primary tile */ |
170 | if (!IS_DGFX(xe) && id > 0) |
171 | continue; |
172 | ccs_test_run_tile(xe, tile, test); |
173 | } |
174 | |
175 | xe_device_mem_access_put(xe); |
176 | |
177 | return 0; |
178 | } |
179 | |
180 | void xe_ccs_migrate_kunit(struct kunit *test) |
181 | { |
182 | xe_call_for_each_device(xe_fn: ccs_test_run_device); |
183 | } |
184 | EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit); |
185 | |
186 | static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test) |
187 | { |
188 | struct xe_bo *bo, *external; |
189 | unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile); |
190 | struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); |
191 | struct xe_gt *__gt; |
192 | int err, i, id; |
193 | |
194 | kunit_info(test, "Testing device %s vram id %u\n" , |
195 | dev_name(xe->drm.dev), tile->id); |
196 | |
197 | for (i = 0; i < 2; ++i) { |
198 | xe_vm_lock(vm, false); |
199 | bo = xe_bo_create_user(xe, NULL, vm, 0x10000, |
200 | DRM_XE_GEM_CPU_CACHING_WC, |
201 | ttm_bo_type_device, |
202 | bo_flags); |
203 | xe_vm_unlock(vm); |
204 | if (IS_ERR(ptr: bo)) { |
205 | KUNIT_FAIL(test, "bo create err=%pe\n" , bo); |
206 | break; |
207 | } |
208 | |
209 | external = xe_bo_create_user(xe, NULL, NULL, 0x10000, |
210 | DRM_XE_GEM_CPU_CACHING_WC, |
211 | ttm_bo_type_device, bo_flags); |
212 | if (IS_ERR(ptr: external)) { |
213 | KUNIT_FAIL(test, "external bo create err=%pe\n" , external); |
214 | goto cleanup_bo; |
215 | } |
216 | |
217 | xe_bo_lock(external, false); |
218 | err = xe_bo_pin_external(external); |
219 | xe_bo_unlock(external); |
220 | if (err) { |
221 | KUNIT_FAIL(test, "external bo pin err=%pe\n" , |
222 | ERR_PTR(err)); |
223 | goto cleanup_external; |
224 | } |
225 | |
226 | err = xe_bo_evict_all(xe); |
227 | if (err) { |
228 | KUNIT_FAIL(test, "evict err=%pe\n" , ERR_PTR(err)); |
229 | goto cleanup_all; |
230 | } |
231 | |
232 | for_each_gt(__gt, xe, id) |
233 | xe_gt_sanitize(__gt); |
234 | err = xe_bo_restore_kernel(xe); |
235 | /* |
236 | * Snapshotting the CTB and copying back a potentially old |
237 | * version seems risky, depending on what might have been |
238 | * inflight. Also it seems snapshotting the ADS object and |
239 | * copying back results in serious breakage. Normally when |
240 | * calling xe_bo_restore_kernel() we always fully restart the |
241 | * GT, which re-intializes such things. We could potentially |
242 | * skip saving and restoring such objects in xe_bo_evict_all() |
243 | * however seems quite fragile not to also restart the GT. Try |
244 | * to do that here by triggering a GT reset. |
245 | */ |
246 | for_each_gt(__gt, xe, id) { |
247 | xe_gt_reset_async(__gt); |
248 | flush_work(work: &__gt->reset.worker); |
249 | } |
250 | if (err) { |
251 | KUNIT_FAIL(test, "restore kernel err=%pe\n" , |
252 | ERR_PTR(err)); |
253 | goto cleanup_all; |
254 | } |
255 | |
256 | err = xe_bo_restore_user(xe); |
257 | if (err) { |
258 | KUNIT_FAIL(test, "restore user err=%pe\n" , ERR_PTR(err)); |
259 | goto cleanup_all; |
260 | } |
261 | |
262 | if (!xe_bo_is_vram(external)) { |
263 | KUNIT_FAIL(test, "external bo is not vram\n" ); |
264 | err = -EPROTO; |
265 | goto cleanup_all; |
266 | } |
267 | |
268 | if (xe_bo_is_vram(bo)) { |
269 | KUNIT_FAIL(test, "bo is vram\n" ); |
270 | err = -EPROTO; |
271 | goto cleanup_all; |
272 | } |
273 | |
274 | if (i) { |
275 | down_read(sem: &vm->lock); |
276 | xe_vm_lock(vm, false); |
277 | err = xe_bo_validate(bo, bo->vm, false); |
278 | xe_vm_unlock(vm); |
279 | up_read(sem: &vm->lock); |
280 | if (err) { |
281 | KUNIT_FAIL(test, "bo valid err=%pe\n" , |
282 | ERR_PTR(err)); |
283 | goto cleanup_all; |
284 | } |
285 | xe_bo_lock(external, false); |
286 | err = xe_bo_validate(external, NULL, false); |
287 | xe_bo_unlock(external); |
288 | if (err) { |
289 | KUNIT_FAIL(test, "external bo valid err=%pe\n" , |
290 | ERR_PTR(err)); |
291 | goto cleanup_all; |
292 | } |
293 | } |
294 | |
295 | xe_bo_lock(external, false); |
296 | xe_bo_unpin_external(external); |
297 | xe_bo_unlock(external); |
298 | |
299 | xe_bo_put(external); |
300 | |
301 | xe_bo_lock(bo, false); |
302 | __xe_bo_unset_bulk_move(bo); |
303 | xe_bo_unlock(bo); |
304 | xe_bo_put(bo); |
305 | continue; |
306 | |
307 | cleanup_all: |
308 | xe_bo_lock(external, false); |
309 | xe_bo_unpin_external(external); |
310 | xe_bo_unlock(external); |
311 | cleanup_external: |
312 | xe_bo_put(external); |
313 | cleanup_bo: |
314 | xe_bo_lock(bo, false); |
315 | __xe_bo_unset_bulk_move(bo); |
316 | xe_bo_unlock(bo); |
317 | xe_bo_put(bo); |
318 | break; |
319 | } |
320 | |
321 | xe_vm_put(vm); |
322 | |
323 | return 0; |
324 | } |
325 | |
326 | static int evict_test_run_device(struct xe_device *xe) |
327 | { |
328 | struct kunit *test = xe_cur_kunit(); |
329 | struct xe_tile *tile; |
330 | int id; |
331 | |
332 | if (!IS_DGFX(xe)) { |
333 | kunit_info(test, "Skipping non-discrete device %s.\n" , |
334 | dev_name(xe->drm.dev)); |
335 | return 0; |
336 | } |
337 | |
338 | xe_device_mem_access_get(xe); |
339 | |
340 | for_each_tile(tile, xe, id) |
341 | evict_test_run_tile(xe, tile, test); |
342 | |
343 | xe_device_mem_access_put(xe); |
344 | |
345 | return 0; |
346 | } |
347 | |
348 | void xe_bo_evict_kunit(struct kunit *test) |
349 | { |
350 | xe_call_for_each_device(xe_fn: evict_test_run_device); |
351 | } |
352 | EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit); |
353 | |