1// SPDX-License-Identifier: GPL-2.0 AND MIT
2/*
3 * Copyright © 2022 Intel Corporation
4 */
5
6#include <kunit/test.h>
7#include <kunit/visibility.h>
8
9#include "tests/xe_bo_test.h"
10#include "tests/xe_pci_test.h"
11#include "tests/xe_test.h"
12
13#include "xe_bo_evict.h"
14#include "xe_pci.h"
15#include "xe_pm.h"
16
17static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
18 bool clear, u64 get_val, u64 assign_val,
19 struct kunit *test)
20{
21 struct dma_fence *fence;
22 struct ttm_tt *ttm;
23 struct page *page;
24 pgoff_t ccs_page;
25 long timeout;
26 u64 *cpu_map;
27 int ret;
28 u32 offset;
29
30 /* Move bo to VRAM if not already there. */
31 ret = xe_bo_validate(bo, NULL, false);
32 if (ret) {
33 KUNIT_FAIL(test, "Failed to validate bo.\n");
34 return ret;
35 }
36
37 /* Optionally clear bo *and* CCS data in VRAM. */
38 if (clear) {
39 fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource);
40 if (IS_ERR(ptr: fence)) {
41 KUNIT_FAIL(test, "Failed to submit bo clear.\n");
42 return PTR_ERR(ptr: fence);
43 }
44 dma_fence_put(fence);
45 }
46
47 /* Evict to system. CCS data should be copied. */
48 ret = xe_bo_evict(bo, true);
49 if (ret) {
50 KUNIT_FAIL(test, "Failed to evict bo.\n");
51 return ret;
52 }
53
54 /* Sync all migration blits */
55 timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
56 DMA_RESV_USAGE_KERNEL,
57 true,
58 5 * HZ);
59 if (timeout <= 0) {
60 KUNIT_FAIL(test, "Failed to sync bo eviction.\n");
61 return -ETIME;
62 }
63
64 /*
65 * Bo with CCS data is now in system memory. Verify backing store
66 * and data integrity. Then assign for the next testing round while
67 * we still have a CPU map.
68 */
69 ttm = bo->ttm.ttm;
70 if (!ttm || !ttm_tt_is_populated(ttm)) {
71 KUNIT_FAIL(test, "Bo was not in expected placement.\n");
72 return -EINVAL;
73 }
74
75 ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT;
76 if (ccs_page >= ttm->num_pages) {
77 KUNIT_FAIL(test, "No TTM CCS pages present.\n");
78 return -EINVAL;
79 }
80
81 page = ttm->pages[ccs_page];
82 cpu_map = kmap_local_page(page);
83
84 /* Check first CCS value */
85 if (cpu_map[0] != get_val) {
86 KUNIT_FAIL(test,
87 "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
88 (unsigned long long)get_val,
89 (unsigned long long)cpu_map[0]);
90 ret = -EINVAL;
91 }
92
93 /* Check last CCS value, or at least last value in page. */
94 offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size);
95 offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1;
96 if (cpu_map[offset] != get_val) {
97 KUNIT_FAIL(test,
98 "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
99 (unsigned long long)get_val,
100 (unsigned long long)cpu_map[offset]);
101 ret = -EINVAL;
102 }
103
104 cpu_map[0] = assign_val;
105 cpu_map[offset] = assign_val;
106 kunmap_local(cpu_map);
107
108 return ret;
109}
110
111static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
112 struct kunit *test)
113{
114 struct xe_bo *bo;
115
116 int ret;
117
118 /* TODO: Sanity check */
119 unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile);
120
121 if (IS_DGFX(xe))
122 kunit_info(test, "Testing vram id %u\n", tile->id);
123 else
124 kunit_info(test, "Testing system memory\n");
125
126 bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
127 ttm_bo_type_device, bo_flags);
128 if (IS_ERR(ptr: bo)) {
129 KUNIT_FAIL(test, "Failed to create bo.\n");
130 return;
131 }
132
133 xe_bo_lock(bo, false);
134
135 kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
136 ret = ccs_test_migrate(tile, bo, clear: false, get_val: 0ULL, assign_val: 0xdeadbeefdeadbeefULL,
137 test);
138 if (ret)
139 goto out_unlock;
140
141 kunit_info(test, "Verifying that CCS data survives migration.\n");
142 ret = ccs_test_migrate(tile, bo, clear: false, get_val: 0xdeadbeefdeadbeefULL,
143 assign_val: 0xdeadbeefdeadbeefULL, test);
144 if (ret)
145 goto out_unlock;
146
147 kunit_info(test, "Verifying that CCS data can be properly cleared.\n");
148 ret = ccs_test_migrate(tile, bo, clear: true, get_val: 0ULL, assign_val: 0ULL, test);
149
150out_unlock:
151 xe_bo_unlock(bo);
152 xe_bo_put(bo);
153}
154
155static int ccs_test_run_device(struct xe_device *xe)
156{
157 struct kunit *test = xe_cur_kunit();
158 struct xe_tile *tile;
159 int id;
160
161 if (!xe_device_has_flat_ccs(xe)) {
162 kunit_info(test, "Skipping non-flat-ccs device.\n");
163 return 0;
164 }
165
166 xe_device_mem_access_get(xe);
167
168 for_each_tile(tile, xe, id) {
169 /* For igfx run only for primary tile */
170 if (!IS_DGFX(xe) && id > 0)
171 continue;
172 ccs_test_run_tile(xe, tile, test);
173 }
174
175 xe_device_mem_access_put(xe);
176
177 return 0;
178}
179
180void xe_ccs_migrate_kunit(struct kunit *test)
181{
182 xe_call_for_each_device(xe_fn: ccs_test_run_device);
183}
184EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit);
185
186static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
187{
188 struct xe_bo *bo, *external;
189 unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile);
190 struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
191 struct xe_gt *__gt;
192 int err, i, id;
193
194 kunit_info(test, "Testing device %s vram id %u\n",
195 dev_name(xe->drm.dev), tile->id);
196
197 for (i = 0; i < 2; ++i) {
198 xe_vm_lock(vm, false);
199 bo = xe_bo_create_user(xe, NULL, vm, 0x10000,
200 DRM_XE_GEM_CPU_CACHING_WC,
201 ttm_bo_type_device,
202 bo_flags);
203 xe_vm_unlock(vm);
204 if (IS_ERR(ptr: bo)) {
205 KUNIT_FAIL(test, "bo create err=%pe\n", bo);
206 break;
207 }
208
209 external = xe_bo_create_user(xe, NULL, NULL, 0x10000,
210 DRM_XE_GEM_CPU_CACHING_WC,
211 ttm_bo_type_device, bo_flags);
212 if (IS_ERR(ptr: external)) {
213 KUNIT_FAIL(test, "external bo create err=%pe\n", external);
214 goto cleanup_bo;
215 }
216
217 xe_bo_lock(external, false);
218 err = xe_bo_pin_external(external);
219 xe_bo_unlock(external);
220 if (err) {
221 KUNIT_FAIL(test, "external bo pin err=%pe\n",
222 ERR_PTR(err));
223 goto cleanup_external;
224 }
225
226 err = xe_bo_evict_all(xe);
227 if (err) {
228 KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err));
229 goto cleanup_all;
230 }
231
232 for_each_gt(__gt, xe, id)
233 xe_gt_sanitize(__gt);
234 err = xe_bo_restore_kernel(xe);
235 /*
236 * Snapshotting the CTB and copying back a potentially old
237 * version seems risky, depending on what might have been
238 * inflight. Also it seems snapshotting the ADS object and
239 * copying back results in serious breakage. Normally when
240 * calling xe_bo_restore_kernel() we always fully restart the
241 * GT, which re-intializes such things. We could potentially
242 * skip saving and restoring such objects in xe_bo_evict_all()
243 * however seems quite fragile not to also restart the GT. Try
244 * to do that here by triggering a GT reset.
245 */
246 for_each_gt(__gt, xe, id) {
247 xe_gt_reset_async(__gt);
248 flush_work(work: &__gt->reset.worker);
249 }
250 if (err) {
251 KUNIT_FAIL(test, "restore kernel err=%pe\n",
252 ERR_PTR(err));
253 goto cleanup_all;
254 }
255
256 err = xe_bo_restore_user(xe);
257 if (err) {
258 KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err));
259 goto cleanup_all;
260 }
261
262 if (!xe_bo_is_vram(external)) {
263 KUNIT_FAIL(test, "external bo is not vram\n");
264 err = -EPROTO;
265 goto cleanup_all;
266 }
267
268 if (xe_bo_is_vram(bo)) {
269 KUNIT_FAIL(test, "bo is vram\n");
270 err = -EPROTO;
271 goto cleanup_all;
272 }
273
274 if (i) {
275 down_read(sem: &vm->lock);
276 xe_vm_lock(vm, false);
277 err = xe_bo_validate(bo, bo->vm, false);
278 xe_vm_unlock(vm);
279 up_read(sem: &vm->lock);
280 if (err) {
281 KUNIT_FAIL(test, "bo valid err=%pe\n",
282 ERR_PTR(err));
283 goto cleanup_all;
284 }
285 xe_bo_lock(external, false);
286 err = xe_bo_validate(external, NULL, false);
287 xe_bo_unlock(external);
288 if (err) {
289 KUNIT_FAIL(test, "external bo valid err=%pe\n",
290 ERR_PTR(err));
291 goto cleanup_all;
292 }
293 }
294
295 xe_bo_lock(external, false);
296 xe_bo_unpin_external(external);
297 xe_bo_unlock(external);
298
299 xe_bo_put(external);
300
301 xe_bo_lock(bo, false);
302 __xe_bo_unset_bulk_move(bo);
303 xe_bo_unlock(bo);
304 xe_bo_put(bo);
305 continue;
306
307cleanup_all:
308 xe_bo_lock(external, false);
309 xe_bo_unpin_external(external);
310 xe_bo_unlock(external);
311cleanup_external:
312 xe_bo_put(external);
313cleanup_bo:
314 xe_bo_lock(bo, false);
315 __xe_bo_unset_bulk_move(bo);
316 xe_bo_unlock(bo);
317 xe_bo_put(bo);
318 break;
319 }
320
321 xe_vm_put(vm);
322
323 return 0;
324}
325
326static int evict_test_run_device(struct xe_device *xe)
327{
328 struct kunit *test = xe_cur_kunit();
329 struct xe_tile *tile;
330 int id;
331
332 if (!IS_DGFX(xe)) {
333 kunit_info(test, "Skipping non-discrete device %s.\n",
334 dev_name(xe->drm.dev));
335 return 0;
336 }
337
338 xe_device_mem_access_get(xe);
339
340 for_each_tile(tile, xe, id)
341 evict_test_run_tile(xe, tile, test);
342
343 xe_device_mem_access_put(xe);
344
345 return 0;
346}
347
348void xe_bo_evict_kunit(struct kunit *test)
349{
350 xe_call_for_each_device(xe_fn: evict_test_run_device);
351}
352EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit);
353

source code of linux/drivers/gpu/drm/xe/tests/xe_bo.c