amdgpu_vm.c source code [linux/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c]

1	/*
2	* Copyright 2008 Advanced Micro Devices, Inc.
3	* Copyright 2008 Red Hat Inc.
4	* Copyright 2009 Jerome Glisse.
5	*
6	* Permission is hereby granted, free of charge, to any person obtaining a
7	* copy of this software and associated documentation files (the "Software"),
8	* to deal in the Software without restriction, including without limitation
9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
10	* and/or sell copies of the Software, and to permit persons to whom the
11	* Software is furnished to do so, subject to the following conditions:
12	*
13	* The above copyright notice and this permission notice shall be included in
14	* all copies or substantial portions of the Software.
15	*
16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22	* OTHER DEALINGS IN THE SOFTWARE.
23	*
24	* Authors: Dave Airlie
25	* Alex Deucher
26	* Jerome Glisse
27	*/
28
29	#include <linux/dma-fence-array.h>
30	#include <linux/interval_tree_generic.h>
31	#include <linux/idr.h>
32	#include <linux/dma-buf.h>
33
34	#include <drm/amdgpu_drm.h>
35	#include <drm/drm_drv.h>
36	#include <drm/ttm/ttm_tt.h>
37	#include <drm/drm_exec.h>
38	#include "amdgpu.h"
39	#include "amdgpu_trace.h"
40	#include "amdgpu_amdkfd.h"
41	#include "amdgpu_gmc.h"
42	#include "amdgpu_xgmi.h"
43	#include "amdgpu_dma_buf.h"
44	#include "amdgpu_res_cursor.h"
45	#include "kfd_svm.h"
46
47	/**
48	* DOC: GPUVM
49	*
50	* GPUVM is the MMU functionality provided on the GPU.
51	* GPUVM is similar to the legacy GART on older asics, however
52	* rather than there being a single global GART table
53	* for the entire GPU, there can be multiple GPUVM page tables active
54	* at any given time. The GPUVM page tables can contain a mix
55	* VRAM pages and system pages (both memory and MMIO) and system pages
56	* can be mapped as snooped (cached system pages) or unsnooped
57	* (uncached system pages).
58	*
59	* Each active GPUVM has an ID associated with it and there is a page table
60	* linked with each VMID. When executing a command buffer,
61	* the kernel tells the engine what VMID to use for that command
62	* buffer. VMIDs are allocated dynamically as commands are submitted.
63	* The userspace drivers maintain their own address space and the kernel
64	* sets up their pages tables accordingly when they submit their
65	* command buffers and a VMID is assigned.
66	* The hardware supports up to 16 active GPUVMs at any given time.
67	*
68	* Each GPUVM is represented by a 1-2 or 1-5 level page table, depending
69	* on the ASIC family. GPUVM supports RWX attributes on each page as well
70	* as other features such as encryption and caching attributes.
71	*
72	* VMID 0 is special. It is the GPUVM used for the kernel driver. In
73	* addition to an aperture managed by a page table, VMID 0 also has
74	* several other apertures. There is an aperture for direct access to VRAM
75	* and there is a legacy AGP aperture which just forwards accesses directly
76	* to the matching system physical addresses (or IOVAs when an IOMMU is
77	* present). These apertures provide direct access to these memories without
78	* incurring the overhead of a page table. VMID 0 is used by the kernel
79	* driver for tasks like memory management.
80	*
81	* GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory.
82	* For user applications, each application can have their own unique GPUVM
83	* address space. The application manages the address space and the kernel
84	* driver manages the GPUVM page tables for each process. If an GPU client
85	* accesses an invalid page, it will generate a GPU page fault, similar to
86	* accessing an invalid page on a CPU.
87	*/
88
89	#define START(node) ((node)->start)
90	#define LAST(node) ((node)->last)
91
92	INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
93	START, LAST, static, amdgpu_vm_it)
94
95	#undef START
96	#undef LAST
97
98	/**
99	* struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback
100	*/
101	struct amdgpu_prt_cb {
102
103	/**
104	* @adev: amdgpu device
105	*/
106	struct amdgpu_device *adev;
107
108	/**
109	* @cb: callback
110	*/
111	struct dma_fence_cb cb;
112	};
113
114	/**
115	* struct amdgpu_vm_tlb_seq_struct - Helper to increment the TLB flush sequence
116	*/
117	struct amdgpu_vm_tlb_seq_struct {
118	/**
119	* @vm: pointer to the amdgpu_vm structure to set the fence sequence on
120	*/
121	struct amdgpu_vm *vm;
122
123	/**
124	* @cb: callback
125	*/
126	struct dma_fence_cb cb;
127	};
128
129	/**
130	* amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
131	*
132	* @adev: amdgpu_device pointer
133	* @vm: amdgpu_vm pointer
134	* @pasid: the pasid the VM is using on this GPU
135	*
136	* Set the pasid this VM is using on this GPU, can also be used to remove the
137	* pasid by passing in zero.
138	*
139	*/
140	int amdgpu_vm_set_pasid(struct amdgpu_device adev, struct* amdgpu_vm *vm,
141	u32 pasid)
142	{
143	int r;
144
145	if (vm->pasid == pasid)
146	return `0`;
147
148	if (vm->pasid) {
149	r = xa_err(entry: xa_erase_irq(xa: &adev->vm_manager.pasids, index: vm->pasid));
150	if (r < `0`)
151	return r;
152
153	vm->pasid = `0`;
154	}
155
156	if (pasid) {
157	r = xa_err(entry: xa_store_irq(xa: &adev->vm_manager.pasids, index: pasid, entry: vm,
158	GFP_KERNEL));
159	if (r < `0`)
160	return r;
161
162	vm->pasid = pasid;
163	}
164
165
166	return `0`;
167	}
168
169	/**
170	* amdgpu_vm_bo_evicted - vm_bo is evicted
171	*
172	* @vm_bo: vm_bo which is evicted
173	*
174	* State for PDs/PTs and per VM BOs which are not at the location they should
175	* be.
176	*/
177	static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
178	{
179	struct amdgpu_vm *vm = vm_bo->vm;
180	struct amdgpu_bo *bo = vm_bo->bo;
181
182	vm_bo->moved = true;
183	spin_lock(lock: &vm_bo->vm->status_lock);
184	if (bo->tbo.type == ttm_bo_type_kernel)
185	list_move(list: &vm_bo->vm_status, head: &vm->evicted);
186	else
187	list_move_tail(list: &vm_bo->vm_status, head: &vm->evicted);
188	spin_unlock(lock: &vm_bo->vm->status_lock);
189	}
190	/**
191	* amdgpu_vm_bo_moved - vm_bo is moved
192	*
193	* @vm_bo: vm_bo which is moved
194	*
195	* State for per VM BOs which are moved, but that change is not yet reflected
196	* in the page tables.
197	*/
198	static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
199	{
200	spin_lock(lock: &vm_bo->vm->status_lock);
201	list_move(list: &vm_bo->vm_status, head: &vm_bo->vm->moved);
202	spin_unlock(lock: &vm_bo->vm->status_lock);
203	}
204
205	/**
206	* amdgpu_vm_bo_idle - vm_bo is idle
207	*
208	* @vm_bo: vm_bo which is now idle
209	*
210	* State for PDs/PTs and per VM BOs which have gone through the state machine
211	* and are now idle.
212	*/
213	static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
214	{
215	spin_lock(lock: &vm_bo->vm->status_lock);
216	list_move(list: &vm_bo->vm_status, head: &vm_bo->vm->idle);
217	spin_unlock(lock: &vm_bo->vm->status_lock);
218	vm_bo->moved = false;
219	}
220
221	/**
222	* amdgpu_vm_bo_invalidated - vm_bo is invalidated
223	*
224	* @vm_bo: vm_bo which is now invalidated
225	*
226	* State for normal BOs which are invalidated and that change not yet reflected
227	* in the PTs.
228	*/
229	static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
230	{
231	spin_lock(lock: &vm_bo->vm->status_lock);
232	list_move(list: &vm_bo->vm_status, head: &vm_bo->vm->invalidated);
233	spin_unlock(lock: &vm_bo->vm->status_lock);
234	}
235
236	/**
237	* amdgpu_vm_bo_relocated - vm_bo is reloacted
238	*
239	* @vm_bo: vm_bo which is relocated
240	*
241	* State for PDs/PTs which needs to update their parent PD.
242	* For the root PD, just move to idle state.
243	*/
244	static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
245	{
246	if (vm_bo->bo->parent) {
247	spin_lock(lock: &vm_bo->vm->status_lock);
248	list_move(list: &vm_bo->vm_status, head: &vm_bo->vm->relocated);
249	spin_unlock(lock: &vm_bo->vm->status_lock);
250	} else {
251	amdgpu_vm_bo_idle(vm_bo);
252	}
253	}
254
255	/**
256	* amdgpu_vm_bo_done - vm_bo is done
257	*
258	* @vm_bo: vm_bo which is now done
259	*
260	* State for normal BOs which are invalidated and that change has been updated
261	* in the PTs.
262	*/
263	static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
264	{
265	spin_lock(lock: &vm_bo->vm->status_lock);
266	list_move(list: &vm_bo->vm_status, head: &vm_bo->vm->done);
267	spin_unlock(lock: &vm_bo->vm->status_lock);
268	}
269
270	/**
271	* amdgpu_vm_bo_reset_state_machine - reset the vm_bo state machine
272	* @vm: the VM which state machine to reset
273	*
274	* Move all vm_bo object in the VM into a state where they will be updated
275	* again during validation.
276	*/
277	static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
278	{
279	struct amdgpu_vm_bo_base vm_bo, tmp;
280
281	spin_lock(lock: &vm->status_lock);
282	list_splice_init(list: &vm->done, head: &vm->invalidated);
283	list_for_each_entry(vm_bo, &vm->invalidated, vm_status)
284	vm_bo->moved = true;
285	list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) {
286	struct amdgpu_bo *bo = vm_bo->bo;
287
288	if (!bo \|\| bo->tbo.type != ttm_bo_type_kernel)
289	list_move(list: &vm_bo->vm_status, head: &vm_bo->vm->moved);
290	else if (bo->parent)
291	list_move(list: &vm_bo->vm_status, head: &vm_bo->vm->relocated);
292	}
293	spin_unlock(lock: &vm->status_lock);
294	}
295
296	/**
297	* amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
298	*
299	* @base: base structure for tracking BO usage in a VM
300	* @vm: vm to which bo is to be added
301	* @bo: amdgpu buffer object
302	*
303	* Initialize a bo_va_base structure and add it to the appropriate lists
304	*
305	*/
306	void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
307	struct amdgpu_vm vm, struct* amdgpu_bo *bo)
308	{
309	base->vm = vm;
310	base->bo = bo;
311	base->next = NULL;
312	INIT_LIST_HEAD(list: &base->vm_status);
313
314	if (!bo)
315	return;
316	base->next = bo->vm_bo;
317	bo->vm_bo = base;
318
319	if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
320	return;
321
322	dma_resv_assert_held(vm->root.bo->tbo.base.resv);
323
324	ttm_bo_set_bulk_move(bo: &bo->tbo, bulk: &vm->lru_bulk_move);
325	if (bo->tbo.type == ttm_bo_type_kernel && bo->parent)
326	amdgpu_vm_bo_relocated(vm_bo: base);
327	else
328	amdgpu_vm_bo_idle(vm_bo: base);
329
330	if (bo->preferred_domains &
331	amdgpu_mem_type_to_domain(mem_type: bo->tbo.resource->mem_type))
332	return;
333
334	/*
335	* we checked all the prerequisites, but it looks like this per vm bo
336	* is currently evicted. add the bo to the evicted list to make sure it
337	* is validated on next vm use to avoid fault.
338	* */
339	amdgpu_vm_bo_evicted(vm_bo: base);
340	}
341
342	/**
343	* amdgpu_vm_lock_pd - lock PD in drm_exec
344	*
345	* @vm: vm providing the BOs
346	* @exec: drm execution context
347	* @num_fences: number of extra fences to reserve
348	*
349	* Lock the VM root PD in the DRM execution context.
350	*/
351	int amdgpu_vm_lock_pd(struct amdgpu_vm vm, struct* drm_exec *exec,
352	unsigned int num_fences)
353	{
354	/ We need at least two fences for the VM PD/PT updates /
355	return drm_exec_prepare_obj(exec, obj: &vm->root.bo->tbo.base,
356	num_fences: `2` + num_fences);
357	}
358
359	/**
360	* amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
361	*
362	* @adev: amdgpu device pointer
363	* @vm: vm providing the BOs
364	*
365	* Move all BOs to the end of LRU and remember their positions to put them
366	* together.
367	*/
368	void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
369	struct amdgpu_vm *vm)
370	{
371	spin_lock(lock: &adev->mman.bdev.lru_lock);
372	ttm_lru_bulk_move_tail(bulk: &vm->lru_bulk_move);
373	spin_unlock(lock: &adev->mman.bdev.lru_lock);
374	}
375
376	/ Create scheduler entities for page table updates /
377	static int amdgpu_vm_init_entities(struct amdgpu_device *adev,
378	struct amdgpu_vm *vm)
379	{
380	int r;
381
382	r = drm_sched_entity_init(entity: &vm->immediate, priority: DRM_SCHED_PRIORITY_NORMAL,
383	sched_list: adev->vm_manager.vm_pte_scheds,
384	num_sched_list: adev->vm_manager.vm_pte_num_scheds, NULL);
385	if (r)
386	goto error;
387
388	return drm_sched_entity_init(entity: &vm->delayed, priority: DRM_SCHED_PRIORITY_NORMAL,
389	sched_list: adev->vm_manager.vm_pte_scheds,
390	num_sched_list: adev->vm_manager.vm_pte_num_scheds, NULL);
391
392	error:
393	drm_sched_entity_destroy(entity: &vm->immediate);
394	return r;
395	}
396
397	/ Destroy the entities for page table updates again /
398	static void amdgpu_vm_fini_entities(struct amdgpu_vm *vm)
399	{
400	drm_sched_entity_destroy(entity: &vm->immediate);
401	drm_sched_entity_destroy(entity: &vm->delayed);
402	}
403
404	/**
405	* amdgpu_vm_generation - return the page table re-generation counter
406	* @adev: the amdgpu_device
407	* @vm: optional VM to check, might be NULL
408	*
409	* Returns a page table re-generation token to allow checking if submissions
410	* are still valid to use this VM. The VM parameter might be NULL in which case
411	* just the VRAM lost counter will be used.
412	*/
413	uint64_t amdgpu_vm_generation(struct amdgpu_device adev, struct* amdgpu_vm *vm)
414	{
415	uint64_t result = (u64)atomic_read(v: &adev->vram_lost_counter) << `32`;
416
417	if (!vm)
418	return result;
419
420	result += vm->generation;
421	/ Add one if the page tables will be re-generated on next CS /
422	if (drm_sched_entity_error(entity: &vm->delayed))
423	++result;
424
425	return result;
426	}
427
428	/**
429	* amdgpu_vm_validate_pt_bos - validate the page table BOs
430	*
431	* @adev: amdgpu device pointer
432	* @vm: vm providing the BOs
433	* @validate: callback to do the validation
434	* @param: parameter for the validation callback
435	*
436	* Validate the page table BOs on command submission if neccessary.
437	*
438	* Returns:
439	* Validation result.
440	*/
441	int amdgpu_vm_validate_pt_bos(struct amdgpu_device adev, struct* amdgpu_vm *vm,
442	int (validate)(void* p, struct* amdgpu_bo *bo),
443	void *param)
444	{
445	struct amdgpu_vm_bo_base *bo_base;
446	struct amdgpu_bo *shadow;
447	struct amdgpu_bo *bo;
448	int r;
449
450	if (drm_sched_entity_error(entity: &vm->delayed)) {
451	++vm->generation;
452	amdgpu_vm_bo_reset_state_machine(vm);
453	amdgpu_vm_fini_entities(vm);
454	r = amdgpu_vm_init_entities(adev, vm);
455	if (r)
456	return r;
457	}
458
459	spin_lock(lock: &vm->status_lock);
460	while (!list_empty(head: &vm->evicted)) {
461	bo_base = list_first_entry(&vm->evicted,
462	struct amdgpu_vm_bo_base,
463	vm_status);
464	spin_unlock(lock: &vm->status_lock);
465
466	bo = bo_base->bo;
467	shadow = amdgpu_bo_shadowed(bo);
468
469	r = validate(param, bo);
470	if (r)
471	return r;
472	if (shadow) {
473	r = validate(param, shadow);
474	if (r)
475	return r;
476	}
477
478	if (bo->tbo.type != ttm_bo_type_kernel) {
479	amdgpu_vm_bo_moved(vm_bo: bo_base);
480	} else {
481	vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
482	amdgpu_vm_bo_relocated(vm_bo: bo_base);
483	}
484	spin_lock(lock: &vm->status_lock);
485	}
486	spin_unlock(lock: &vm->status_lock);
487
488	amdgpu_vm_eviction_lock(vm);
489	vm->evicting = false;
490	amdgpu_vm_eviction_unlock(vm);
491
492	return `0`;
493	}
494
495	/**
496	* amdgpu_vm_ready - check VM is ready for updates
497	*
498	* @vm: VM to check
499	*
500	* Check if all VM PDs/PTs are ready for updates
501	*
502	* Returns:
503	* True if VM is not evicting.
504	*/
505	bool amdgpu_vm_ready(struct amdgpu_vm *vm)
506	{
507	bool empty;
508	bool ret;
509
510	amdgpu_vm_eviction_lock(vm);
511	ret = !vm->evicting;
512	amdgpu_vm_eviction_unlock(vm);
513
514	spin_lock(lock: &vm->status_lock);
515	empty = list_empty(head: &vm->evicted);
516	spin_unlock(lock: &vm->status_lock);
517
518	return ret && empty;
519	}
520
521	/**
522	* amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
523	*
524	* @adev: amdgpu_device pointer
525	*/
526	void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
527	{
528	const struct amdgpu_ip_block *ip_block;
529	bool has_compute_vm_bug;
530	struct amdgpu_ring *ring;
531	int i;
532
533	has_compute_vm_bug = false;
534
535	ip_block = amdgpu_device_ip_get_ip_block(adev, type: AMD_IP_BLOCK_TYPE_GFX);
536	if (ip_block) {
537	/ Compute has a VM bug for GFX version < 7.*
538	Compute has a VM bug for GFX 8 MEC firmware version < 673./*
539	if (ip_block->version->major <= `7`)
540	has_compute_vm_bug = true;
541	else if (ip_block->version->major == `8`)
542	if (adev->gfx.mec_fw_version < `673`)
543	has_compute_vm_bug = true;
544	}
545
546	for (i = `0`; i < adev->num_rings; i++) {
547	ring = adev->rings[i];
548	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
549	/ only compute rings /
550	ring->has_compute_vm_bug = has_compute_vm_bug;
551	else
552	ring->has_compute_vm_bug = false;
553	}
554	}
555
556	/**
557	* amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job.
558	*
559	* @ring: ring on which the job will be submitted
560	* @job: job to submit
561	*
562	* Returns:
563	* True if sync is needed.
564	*/
565	bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
566	struct amdgpu_job *job)
567	{
568	struct amdgpu_device *adev = ring->adev;
569	unsigned vmhub = ring->vm_hub;
570	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
571
572	if (job->vmid == `0`)
573	return false;
574
575	if (job->vm_needs_flush \|\| ring->has_compute_vm_bug)
576	return true;
577
578	if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
579	return true;
580
581	if (amdgpu_vmid_had_gpu_reset(adev, id: &id_mgr->ids[job->vmid]))
582	return true;
583
584	return false;
585	}
586
587	/**
588	* amdgpu_vm_flush - hardware flush the vm
589	*
590	* @ring: ring to use for flush
591	* @job: related job
592	* @need_pipe_sync: is pipe sync needed
593	*
594	* Emit a VM flush when it is necessary.
595	*
596	* Returns:
597	* 0 on success, errno otherwise.
598	*/
599	int amdgpu_vm_flush(struct amdgpu_ring ring, struct* amdgpu_job *job,
600	bool need_pipe_sync)
601	{
602	struct amdgpu_device *adev = ring->adev;
603	unsigned vmhub = ring->vm_hub;
604	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
605	struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
606	bool spm_update_needed = job->spm_update_needed;
607	bool gds_switch_needed = ring->funcs->emit_gds_switch &&
608	job->gds_switch_needed;
609	bool vm_flush_needed = job->vm_needs_flush;
610	struct dma_fence *fence = NULL;
611	bool pasid_mapping_needed = false;
612	unsigned patch_offset = `0`;
613	int r;
614
615	if (amdgpu_vmid_had_gpu_reset(adev, id)) {
616	gds_switch_needed = true;
617	vm_flush_needed = true;
618	pasid_mapping_needed = true;
619	spm_update_needed = true;
620	}
621
622	mutex_lock(&id_mgr->lock);
623	if (id->pasid != job->pasid \|\| !id->pasid_mapping \|\|
624	!dma_fence_is_signaled(fence: id->pasid_mapping))
625	pasid_mapping_needed = true;
626	mutex_unlock(lock: &id_mgr->lock);
627
628	gds_switch_needed &= !!ring->funcs->emit_gds_switch;
629	vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
630	job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
631	pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
632	ring->funcs->emit_wreg;
633
634	if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
635	return `0`;
636
637	amdgpu_ring_ib_begin(ring);
638	if (ring->funcs->init_cond_exec)
639	patch_offset = amdgpu_ring_init_cond_exec(ring);
640
641	if (need_pipe_sync)
642	amdgpu_ring_emit_pipeline_sync(ring);
643
644	if (vm_flush_needed) {
645	trace_amdgpu_vm_flush(ring, vmid: job->vmid, pd_addr: job->vm_pd_addr);
646	amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
647	}
648
649	if (pasid_mapping_needed)
650	amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
651
652	if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
653	adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid);
654
655	if (!ring->is_mes_queue && ring->funcs->emit_gds_switch &&
656	gds_switch_needed) {
657	amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
658	job->gds_size, job->gws_base,
659	job->gws_size, job->oa_base,
660	job->oa_size);
661	}
662
663	if (vm_flush_needed \|\| pasid_mapping_needed) {
664	r = amdgpu_fence_emit(ring, fence: &fence, NULL, flags: `0`);
665	if (r)
666	return r;
667	}
668
669	if (vm_flush_needed) {
670	mutex_lock(&id_mgr->lock);
671	dma_fence_put(fence: id->last_flush);
672	id->last_flush = dma_fence_get(fence);
673	id->current_gpu_reset_count =
674	atomic_read(v: &adev->gpu_reset_counter);
675	mutex_unlock(lock: &id_mgr->lock);
676	}
677
678	if (pasid_mapping_needed) {
679	mutex_lock(&id_mgr->lock);
680	id->pasid = job->pasid;
681	dma_fence_put(fence: id->pasid_mapping);
682	id->pasid_mapping = dma_fence_get(fence);
683	mutex_unlock(lock: &id_mgr->lock);
684	}
685	dma_fence_put(fence);
686
687	if (ring->funcs->patch_cond_exec)
688	amdgpu_ring_patch_cond_exec(ring, patch_offset);
689
690	/ the double SWITCH_BUFFER here cannot be skipped by COND_EXEC /
691	if (ring->funcs->emit_switch_buffer) {
692	amdgpu_ring_emit_switch_buffer(ring);
693	amdgpu_ring_emit_switch_buffer(ring);
694	}
695	amdgpu_ring_ib_end(ring);
696	return `0`;
697	}
698
699	/**
700	* amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
701	*
702	* @vm: requested vm
703	* @bo: requested buffer object
704	*
705	* Find @bo inside the requested vm.
706	* Search inside the @bos vm list for the requested vm
707	* Returns the found bo_va or NULL if none is found
708	*
709	* Object has to be reserved!
710	*
711	* Returns:
712	* Found bo_va or NULL.
713	*/
714	struct amdgpu_bo_va amdgpu_vm_bo_find(struct* amdgpu_vm *vm,
715	struct amdgpu_bo *bo)
716	{
717	struct amdgpu_vm_bo_base *base;
718
719	for (base = bo->vm_bo; base; base = base->next) {
720	if (base->vm != vm)
721	continue;
722
723	return container_of(base, struct amdgpu_bo_va, base);
724	}
725	return NULL;
726	}
727
728	/**
729	* amdgpu_vm_map_gart - Resolve gart mapping of addr
730	*
731	* @pages_addr: optional DMA address to use for lookup
732	* @addr: the unmapped addr
733	*
734	* Look up the physical address of the page that the pte resolves
735	* to.
736	*
737	* Returns:
738	* The pointer for the page table entry.
739	*/
740	uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
741	{
742	uint64_t result;
743
744	/ page table offset /
745	result = pages_addr[addr >> PAGE_SHIFT];
746
747	/ in case cpu page size != gpu page size/
748	result \|= addr & (~PAGE_MASK);
749
750	result &= `0xFFFFFFFFFFFFF000ULL`;
751
752	return result;
753	}
754
755	/**
756	* amdgpu_vm_update_pdes - make sure that all directories are valid
757	*
758	* @adev: amdgpu_device pointer
759	* @vm: requested vm
760	* @immediate: submit immediately to the paging queue
761	*
762	* Makes sure all directories are up to date.
763	*
764	* Returns:
765	* 0 for success, error for failure.
766	*/
767	int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
768	struct amdgpu_vm *vm, bool immediate)
769	{
770	struct amdgpu_vm_update_params params;
771	struct amdgpu_vm_bo_base *entry;
772	bool flush_tlb_needed = false;
773	LIST_HEAD(relocated);
774	int r, idx;
775
776	spin_lock(lock: &vm->status_lock);
777	list_splice_init(list: &vm->relocated, head: &relocated);
778	spin_unlock(lock: &vm->status_lock);
779
780	if (list_empty(head: &relocated))
781	return `0`;
782
783	if (!drm_dev_enter(dev: adev_to_drm(adev), idx: &idx))
784	return -ENODEV;
785
786	memset(&params, `0`, sizeof(params));
787	params.adev = adev;
788	params.vm = vm;
789	params.immediate = immediate;
790
791	r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
792	if (r)
793	goto error;
794
795	list_for_each_entry(entry, &relocated, vm_status) {
796	/ vm_flush_needed after updating moved PDEs /
797	flush_tlb_needed \|= entry->moved;
798
799	r = amdgpu_vm_pde_update(params: &params, entry);
800	if (r)
801	goto error;
802	}
803
804	r = vm->update_funcs->commit(&params, &vm->last_update);
805	if (r)
806	goto error;
807
808	if (flush_tlb_needed)
809	atomic64_inc(v: &vm->tlb_seq);
810
811	while (!list_empty(head: &relocated)) {
812	entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base,
813	vm_status);
814	amdgpu_vm_bo_idle(vm_bo: entry);
815	}
816
817	error:
818	drm_dev_exit(idx);
819	return r;
820	}
821
822	/**
823	* amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence
824	* @fence: unused
825	* @cb: the callback structure
826	*
827	* Increments the tlb sequence to make sure that future CS execute a VM flush.
828	*/
829	static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
830	struct dma_fence_cb *cb)
831	{
832	struct amdgpu_vm_tlb_seq_struct *tlb_cb;
833
834	tlb_cb = container_of(cb, typeof(*tlb_cb), cb);
835	atomic64_inc(v: &tlb_cb->vm->tlb_seq);
836	kfree(objp: tlb_cb);
837	}
838
839	/**
840	* amdgpu_vm_update_range - update a range in the vm page table
841	*
842	* @adev: amdgpu_device pointer to use for commands
843	* @vm: the VM to update the range
844	* @immediate: immediate submission in a page fault
845	* @unlocked: unlocked invalidation during MM callback
846	* @flush_tlb: trigger tlb invalidation after update completed
847	* @allow_override: change MTYPE for local NUMA nodes
848	* @resv: fences we need to sync to
849	* @start: start of mapped range
850	* @last: last mapped entry
851	* @flags: flags for the entries
852	* @offset: offset into nodes and pages_addr
853	* @vram_base: base for vram mappings
854	* @res: ttm_resource to map
855	* @pages_addr: DMA addresses to use for mapping
856	* @fence: optional resulting fence
857	*
858	* Fill in the page table entries between @start and @last.
859	*
860	* Returns:
861	* 0 for success, negative erro code for failure.
862	*/
863	int amdgpu_vm_update_range(struct amdgpu_device adev, struct* amdgpu_vm *vm,
864	bool immediate, bool unlocked, bool flush_tlb, bool allow_override,
865	struct dma_resv *resv, uint64_t start, uint64_t last,
866	uint64_t flags, uint64_t offset, uint64_t vram_base,
867	struct ttm_resource res, dma_addr_t pages_addr,
868	struct dma_fence **fence)
869	{
870	struct amdgpu_vm_update_params params;
871	struct amdgpu_vm_tlb_seq_struct *tlb_cb;
872	struct amdgpu_res_cursor cursor;
873	enum amdgpu_sync_mode sync_mode;
874	int r, idx;
875
876	if (!drm_dev_enter(dev: adev_to_drm(adev), idx: &idx))
877	return -ENODEV;
878
879	tlb_cb = kmalloc(size: sizeof(*tlb_cb), GFP_KERNEL);
880	if (!tlb_cb) {
881	r = -ENOMEM;
882	goto error_unlock;
883	}
884
885	/ Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,*
886	* heavy-weight flush TLB unconditionally.
887	*/
888	flush_tlb \|= adev->gmc.xgmi.num_physical_nodes &&
889	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `4`, `0`);
890
891	/*
892	* On GFX8 and older any 8 PTE block with a valid bit set enters the TLB
893	*/
894	flush_tlb \|= amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) < IP_VERSION(`9`, `0`, `0`);
895
896	memset(&params, `0`, sizeof(params));
897	params.adev = adev;
898	params.vm = vm;
899	params.immediate = immediate;
900	params.pages_addr = pages_addr;
901	params.unlocked = unlocked;
902	params.allow_override = allow_override;
903
904	/ Implicitly sync to command submissions in the same VM before*
905	* unmapping. Sync to moving fences before mapping.
906	*/
907	if (!(flags & AMDGPU_PTE_VALID))
908	sync_mode = AMDGPU_SYNC_EQ_OWNER;
909	else
910	sync_mode = AMDGPU_SYNC_EXPLICIT;
911
912	amdgpu_vm_eviction_lock(vm);
913	if (vm->evicting) {
914	r = -EBUSY;
915	goto error_free;
916	}
917
918	if (!unlocked && !dma_fence_is_signaled(fence: vm->last_unlocked)) {
919	struct dma_fence *tmp = dma_fence_get_stub();
920
921	amdgpu_bo_fence(bo: vm->root.bo, fence: vm->last_unlocked, shared: true);
922	swap(vm->last_unlocked, tmp);
923	dma_fence_put(fence: tmp);
924	}
925
926	r = vm->update_funcs->prepare(&params, resv, sync_mode);
927	if (r)
928	goto error_free;
929
930	amdgpu_res_first(res: pages_addr ? NULL : res, start: offset,
931	size: (last - start + `1`) * AMDGPU_GPU_PAGE_SIZE, cur: &cursor);
932	while (cursor.remaining) {
933	uint64_t tmp, num_entries, addr;
934
935	num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT;
936	if (pages_addr) {
937	bool contiguous = true;
938
939	if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
940	uint64_t pfn = cursor.start >> PAGE_SHIFT;
941	uint64_t count;
942
943	contiguous = pages_addr[pfn + `1`] ==
944	pages_addr[pfn] + PAGE_SIZE;
945
946	tmp = num_entries /
947	AMDGPU_GPU_PAGES_IN_CPU_PAGE;
948	for (count = `2`; count < tmp; ++count) {
949	uint64_t idx = pfn + count;
950
951	if (contiguous != (pages_addr[idx] ==
952	pages_addr[idx - `1`] + PAGE_SIZE))
953	break;
954	}
955	if (!contiguous)
956	count--;
957	num_entries = count *
958	AMDGPU_GPU_PAGES_IN_CPU_PAGE;
959	}
960
961	if (!contiguous) {
962	addr = cursor.start;
963	params.pages_addr = pages_addr;
964	} else {
965	addr = pages_addr[cursor.start >> PAGE_SHIFT];
966	params.pages_addr = NULL;
967	}
968
969	} else if (flags & (AMDGPU_PTE_VALID \| AMDGPU_PTE_PRT)) {
970	addr = vram_base + cursor.start;
971	} else {
972	addr = `0`;
973	}
974
975	tmp = start + num_entries;
976	r = amdgpu_vm_ptes_update(params: &params, start, end: tmp, dst: addr, flags);
977	if (r)
978	goto error_free;
979
980	amdgpu_res_next(cur: &cursor, size: num_entries * AMDGPU_GPU_PAGE_SIZE);
981	start = tmp;
982	}
983
984	r = vm->update_funcs->commit(&params, fence);
985
986	if (flush_tlb \|\| params.table_freed) {
987	tlb_cb->vm = vm;
988	if (fence && *fence &&
989	!dma_fence_add_callback(fence: *fence, cb: &tlb_cb->cb,
990	func: amdgpu_vm_tlb_seq_cb)) {
991	dma_fence_put(fence: vm->last_tlb_flush);
992	vm->last_tlb_flush = dma_fence_get(fence: *fence);
993	} else {
994	amdgpu_vm_tlb_seq_cb(NULL, cb: &tlb_cb->cb);
995	}
996	tlb_cb = NULL;
997	}
998
999	error_free:
1000	kfree(objp: tlb_cb);
1001
1002	error_unlock:
1003	amdgpu_vm_eviction_unlock(vm);
1004	drm_dev_exit(idx);
1005	return r;
1006	}
1007
1008	static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va,
1009	struct amdgpu_mem_stats *stats)
1010	{
1011	struct amdgpu_vm *vm = bo_va->base.vm;
1012	struct amdgpu_bo *bo = bo_va->base.bo;
1013
1014	if (!bo)
1015	return;
1016
1017	/*
1018	* For now ignore BOs which are currently locked and potentially
1019	* changing their location.
1020	*/
1021	if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv &&
1022	!dma_resv_trylock(obj: bo->tbo.base.resv))
1023	return;
1024
1025	amdgpu_bo_get_memory(bo, stats);
1026	if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
1027	dma_resv_unlock(obj: bo->tbo.base.resv);
1028	}
1029
1030	void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
1031	struct amdgpu_mem_stats *stats)
1032	{
1033	struct amdgpu_bo_va bo_va, tmp;
1034
1035	spin_lock(lock: &vm->status_lock);
1036	list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status)
1037	amdgpu_vm_bo_get_memory(bo_va, stats);
1038
1039	list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status)
1040	amdgpu_vm_bo_get_memory(bo_va, stats);
1041
1042	list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
1043	amdgpu_vm_bo_get_memory(bo_va, stats);
1044
1045	list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status)
1046	amdgpu_vm_bo_get_memory(bo_va, stats);
1047
1048	list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status)
1049	amdgpu_vm_bo_get_memory(bo_va, stats);
1050
1051	list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status)
1052	amdgpu_vm_bo_get_memory(bo_va, stats);
1053	spin_unlock(lock: &vm->status_lock);
1054	}
1055
1056	/**
1057	* amdgpu_vm_bo_update - update all BO mappings in the vm page table
1058	*
1059	* @adev: amdgpu_device pointer
1060	* @bo_va: requested BO and VM object
1061	* @clear: if true clear the entries
1062	*
1063	* Fill in the page table entries for @bo_va.
1064	*
1065	* Returns:
1066	* 0 for success, -EINVAL for failure.
1067	*/
1068	int amdgpu_vm_bo_update(struct amdgpu_device adev, struct* amdgpu_bo_va *bo_va,
1069	bool clear)
1070	{
1071	struct amdgpu_bo *bo = bo_va->base.bo;
1072	struct amdgpu_vm *vm = bo_va->base.vm;
1073	struct amdgpu_bo_va_mapping *mapping;
1074	dma_addr_t *pages_addr = NULL;
1075	struct ttm_resource *mem;
1076	struct dma_fence **last_update;
1077	bool flush_tlb = clear;
1078	bool uncached;
1079	struct dma_resv *resv;
1080	uint64_t vram_base;
1081	uint64_t flags;
1082	int r;
1083
1084	if (clear \|\| !bo) {
1085	mem = NULL;
1086	resv = vm->root.bo->tbo.base.resv;
1087	} else {
1088	struct drm_gem_object *obj = &bo->tbo.base;
1089
1090	resv = bo->tbo.base.resv;
1091	if (obj->import_attach && bo_va->is_xgmi) {
1092	struct dma_buf *dma_buf = obj->import_attach->dmabuf;
1093	struct drm_gem_object *gobj = dma_buf->priv;
1094	struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
1095
1096	if (abo->tbo.resource &&
1097	abo->tbo.resource->mem_type == TTM_PL_VRAM)
1098	bo = gem_to_amdgpu_bo(gobj);
1099	}
1100	mem = bo->tbo.resource;
1101	if (mem->mem_type == TTM_PL_TT \|\|
1102	mem->mem_type == AMDGPU_PL_PREEMPT)
1103	pages_addr = bo->tbo.ttm->dma_address;
1104	}
1105
1106	if (bo) {
1107	struct amdgpu_device *bo_adev;
1108
1109	flags = amdgpu_ttm_tt_pte_flags(adev, ttm: bo->tbo.ttm, mem);
1110
1111	if (amdgpu_bo_encrypted(bo))
1112	flags \|= AMDGPU_PTE_TMZ;
1113
1114	bo_adev = amdgpu_ttm_adev(bdev: bo->tbo.bdev);
1115	vram_base = bo_adev->vm_manager.vram_base_offset;
1116	uncached = (bo->flags & AMDGPU_GEM_CREATE_UNCACHED) != `0`;
1117	} else {
1118	flags = `0x0`;
1119	vram_base = `0`;
1120	uncached = false;
1121	}
1122
1123	if (clear \|\| (bo && bo->tbo.base.resv ==
1124	vm->root.bo->tbo.base.resv))
1125	last_update = &vm->last_update;
1126	else
1127	last_update = &bo_va->last_pt_update;
1128
1129	if (!clear && bo_va->base.moved) {
1130	flush_tlb = true;
1131	list_splice_init(list: &bo_va->valids, head: &bo_va->invalids);
1132
1133	} else if (bo_va->cleared != clear) {
1134	list_splice_init(list: &bo_va->valids, head: &bo_va->invalids);
1135	}
1136
1137	list_for_each_entry(mapping, &bo_va->invalids, list) {
1138	uint64_t update_flags = flags;
1139
1140	/ normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here*
1141	* but in case of something, we filter the flags in first place
1142	*/
1143	if (!(mapping->flags & AMDGPU_PTE_READABLE))
1144	update_flags &= ~AMDGPU_PTE_READABLE;
1145	if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
1146	update_flags &= ~AMDGPU_PTE_WRITEABLE;
1147
1148	/ Apply ASIC specific mapping flags /
1149	amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags);
1150
1151	trace_amdgpu_vm_bo_update(mapping);
1152
1153	r = amdgpu_vm_update_range(adev, vm, immediate: false, unlocked: false, flush_tlb,
1154	allow_override: !uncached, resv, start: mapping->start, last: mapping->last,
1155	flags: update_flags, offset: mapping->offset,
1156	vram_base, res: mem, pages_addr,
1157	fence: last_update);
1158	if (r)
1159	return r;
1160	}
1161
1162	/ If the BO is not in its preferred location add it back to*
1163	* the evicted list so that it gets validated again on the
1164	* next command submission.
1165	*/
1166	if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
1167	uint32_t mem_type = bo->tbo.resource->mem_type;
1168
1169	if (!(bo->preferred_domains &
1170	amdgpu_mem_type_to_domain(mem_type)))
1171	amdgpu_vm_bo_evicted(vm_bo: &bo_va->base);
1172	else
1173	amdgpu_vm_bo_idle(vm_bo: &bo_va->base);
1174	} else {
1175	amdgpu_vm_bo_done(vm_bo: &bo_va->base);
1176	}
1177
1178	list_splice_init(list: &bo_va->invalids, head: &bo_va->valids);
1179	bo_va->cleared = clear;
1180	bo_va->base.moved = false;
1181
1182	if (trace_amdgpu_vm_bo_mapping_enabled()) {
1183	list_for_each_entry(mapping, &bo_va->valids, list)
1184	trace_amdgpu_vm_bo_mapping(mapping);
1185	}
1186
1187	return `0`;
1188	}
1189
1190	/**
1191	* amdgpu_vm_update_prt_state - update the global PRT state
1192	*
1193	* @adev: amdgpu_device pointer
1194	*/
1195	static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
1196	{
1197	unsigned long flags;
1198	bool enable;
1199
1200	spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
1201	enable = !!atomic_read(v: &adev->vm_manager.num_prt_users);
1202	adev->gmc.gmc_funcs->set_prt(adev, enable);
1203	spin_unlock_irqrestore(lock: &adev->vm_manager.prt_lock, flags);
1204	}
1205
1206	/**
1207	* amdgpu_vm_prt_get - add a PRT user
1208	*
1209	* @adev: amdgpu_device pointer
1210	*/
1211	static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
1212	{
1213	if (!adev->gmc.gmc_funcs->set_prt)
1214	return;
1215
1216	if (atomic_inc_return(v: &adev->vm_manager.num_prt_users) == `1`)
1217	amdgpu_vm_update_prt_state(adev);
1218	}
1219
1220	/**
1221	* amdgpu_vm_prt_put - drop a PRT user
1222	*
1223	* @adev: amdgpu_device pointer
1224	*/
1225	static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
1226	{
1227	if (atomic_dec_return(v: &adev->vm_manager.num_prt_users) == `0`)
1228	amdgpu_vm_update_prt_state(adev);
1229	}
1230
1231	/**
1232	* amdgpu_vm_prt_cb - callback for updating the PRT status
1233	*
1234	* @fence: fence for the callback
1235	* @_cb: the callback function
1236	*/
1237	static void amdgpu_vm_prt_cb(struct dma_fence fence, struct* dma_fence_cb *_cb)
1238	{
1239	struct amdgpu_prt_cb cb = container_of(_cb, struct* amdgpu_prt_cb, cb);
1240
1241	amdgpu_vm_prt_put(adev: cb->adev);
1242	kfree(objp: cb);
1243	}
1244
1245	/**
1246	* amdgpu_vm_add_prt_cb - add callback for updating the PRT status
1247	*
1248	* @adev: amdgpu_device pointer
1249	* @fence: fence for the callback
1250	*/
1251	static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
1252	struct dma_fence *fence)
1253	{
1254	struct amdgpu_prt_cb *cb;
1255
1256	if (!adev->gmc.gmc_funcs->set_prt)
1257	return;
1258
1259	cb = kmalloc(size: sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
1260	if (!cb) {
1261	/ Last resort when we are OOM /
1262	if (fence)
1263	dma_fence_wait(fence, intr: false);
1264
1265	amdgpu_vm_prt_put(adev);
1266	} else {
1267	cb->adev = adev;
1268	if (!fence \|\| dma_fence_add_callback(fence, cb: &cb->cb,
1269	func: amdgpu_vm_prt_cb))
1270	amdgpu_vm_prt_cb(fence, cb: &cb->cb);
1271	}
1272	}
1273
1274	/**
1275	* amdgpu_vm_free_mapping - free a mapping
1276	*
1277	* @adev: amdgpu_device pointer
1278	* @vm: requested vm
1279	* @mapping: mapping to be freed
1280	* @fence: fence of the unmap operation
1281	*
1282	* Free a mapping and make sure we decrease the PRT usage count if applicable.
1283	*/
1284	static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
1285	struct amdgpu_vm *vm,
1286	struct amdgpu_bo_va_mapping *mapping,
1287	struct dma_fence *fence)
1288	{
1289	if (mapping->flags & AMDGPU_PTE_PRT)
1290	amdgpu_vm_add_prt_cb(adev, fence);
1291	kfree(objp: mapping);
1292	}
1293
1294	/**
1295	* amdgpu_vm_prt_fini - finish all prt mappings
1296	*
1297	* @adev: amdgpu_device pointer
1298	* @vm: requested vm
1299	*
1300	* Register a cleanup callback to disable PRT support after VM dies.
1301	*/
1302	static void amdgpu_vm_prt_fini(struct amdgpu_device adev, struct* amdgpu_vm *vm)
1303	{
1304	struct dma_resv *resv = vm->root.bo->tbo.base.resv;
1305	struct dma_resv_iter cursor;
1306	struct dma_fence *fence;
1307
1308	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
1309	/ Add a callback for each fence in the reservation object /
1310	amdgpu_vm_prt_get(adev);
1311	amdgpu_vm_add_prt_cb(adev, fence);
1312	}
1313	}
1314
1315	/**
1316	* amdgpu_vm_clear_freed - clear freed BOs in the PT
1317	*
1318	* @adev: amdgpu_device pointer
1319	* @vm: requested vm
1320	* @fence: optional resulting fence (unchanged if no work needed to be done
1321	* or if an error occurred)
1322	*
1323	* Make sure all freed BOs are cleared in the PT.
1324	* PTs have to be reserved and mutex must be locked!
1325	*
1326	* Returns:
1327	* 0 for success.
1328	*
1329	*/
1330	int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
1331	struct amdgpu_vm *vm,
1332	struct dma_fence **fence)
1333	{
1334	struct dma_resv *resv = vm->root.bo->tbo.base.resv;
1335	struct amdgpu_bo_va_mapping *mapping;
1336	uint64_t init_pte_value = `0`;
1337	struct dma_fence *f = NULL;
1338	int r;
1339
1340	while (!list_empty(head: &vm->freed)) {
1341	mapping = list_first_entry(&vm->freed,
1342	struct amdgpu_bo_va_mapping, list);
1343	list_del(entry: &mapping->list);
1344
1345	if (vm->pte_support_ats &&
1346	mapping->start < AMDGPU_GMC_HOLE_START)
1347	init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
1348
1349	r = amdgpu_vm_update_range(adev, vm, immediate: false, unlocked: false, flush_tlb: true, allow_override: false,
1350	resv, start: mapping->start, last: mapping->last,
1351	flags: init_pte_value, offset: `0`, vram_base: `0`, NULL, NULL,
1352	fence: &f);
1353	amdgpu_vm_free_mapping(adev, vm, mapping, fence: f);
1354	if (r) {
1355	dma_fence_put(fence: f);
1356	return r;
1357	}
1358	}
1359
1360	if (fence && f) {
1361	dma_fence_put(fence: *fence);
1362	*fence = f;
1363	} else {
1364	dma_fence_put(fence: f);
1365	}
1366
1367	return `0`;
1368
1369	}
1370
1371	/**
1372	* amdgpu_vm_handle_moved - handle moved BOs in the PT
1373	*
1374	* @adev: amdgpu_device pointer
1375	* @vm: requested vm
1376	* @ticket: optional reservation ticket used to reserve the VM
1377	*
1378	* Make sure all BOs which are moved are updated in the PTs.
1379	*
1380	* Returns:
1381	* 0 for success.
1382	*
1383	* PTs have to be reserved!
1384	*/
1385	int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
1386	struct amdgpu_vm *vm,
1387	struct ww_acquire_ctx *ticket)
1388	{
1389	struct amdgpu_bo_va *bo_va;
1390	struct dma_resv *resv;
1391	bool clear, unlock;
1392	int r;
1393
1394	spin_lock(lock: &vm->status_lock);
1395	while (!list_empty(head: &vm->moved)) {
1396	bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
1397	base.vm_status);
1398	spin_unlock(lock: &vm->status_lock);
1399
1400	/ Per VM BOs never need to bo cleared in the page tables /
1401	r = amdgpu_vm_bo_update(adev, bo_va, clear: false);
1402	if (r)
1403	return r;
1404	spin_lock(lock: &vm->status_lock);
1405	}
1406
1407	while (!list_empty(head: &vm->invalidated)) {
1408	bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
1409	base.vm_status);
1410	resv = bo_va->base.bo->tbo.base.resv;
1411	spin_unlock(lock: &vm->status_lock);
1412
1413	/ Try to reserve the BO to avoid clearing its ptes /
1414	if (!adev->debug_vm && dma_resv_trylock(obj: resv)) {
1415	clear = false;
1416	unlock = true;
1417	/ The caller is already holding the reservation lock /
1418	} else if (ticket && dma_resv_locking_ctx(obj: resv) == ticket) {
1419	clear = false;
1420	unlock = false;
1421	/ Somebody else is using the BO right now /
1422	} else {
1423	clear = true;
1424	unlock = false;
1425	}
1426
1427	r = amdgpu_vm_bo_update(adev, bo_va, clear);
1428	if (r)
1429	return r;
1430
1431	if (unlock)
1432	dma_resv_unlock(obj: resv);
1433	spin_lock(lock: &vm->status_lock);
1434	}
1435	spin_unlock(lock: &vm->status_lock);
1436
1437	return `0`;
1438	}
1439
1440	/**
1441	* amdgpu_vm_bo_add - add a bo to a specific vm
1442	*
1443	* @adev: amdgpu_device pointer
1444	* @vm: requested vm
1445	* @bo: amdgpu buffer object
1446	*
1447	* Add @bo into the requested vm.
1448	* Add @bo to the list of bos associated with the vm
1449	*
1450	* Returns:
1451	* Newly added bo_va or NULL for failure
1452	*
1453	* Object has to be reserved!
1454	*/
1455	struct amdgpu_bo_va amdgpu_vm_bo_add(struct* amdgpu_device *adev,
1456	struct amdgpu_vm *vm,
1457	struct amdgpu_bo *bo)
1458	{
1459	struct amdgpu_bo_va *bo_va;
1460
1461	bo_va = kzalloc(size: sizeof(struct amdgpu_bo_va), GFP_KERNEL);
1462	if (bo_va == NULL) {
1463	return NULL;
1464	}
1465	amdgpu_vm_bo_base_init(base: &bo_va->base, vm, bo);
1466
1467	bo_va->ref_count = `1`;
1468	bo_va->last_pt_update = dma_fence_get_stub();
1469	INIT_LIST_HEAD(list: &bo_va->valids);
1470	INIT_LIST_HEAD(list: &bo_va->invalids);
1471
1472	if (!bo)
1473	return bo_va;
1474
1475	dma_resv_assert_held(bo->tbo.base.resv);
1476	if (amdgpu_dmabuf_is_xgmi_accessible(adev, bo)) {
1477	bo_va->is_xgmi = true;
1478	/ Power up XGMI if it can be potentially used /
1479	amdgpu_xgmi_set_pstate(adev, pstate: AMDGPU_XGMI_PSTATE_MAX_VEGA20);
1480	}
1481
1482	return bo_va;
1483	}
1484
1485
1486	/**
1487	* amdgpu_vm_bo_insert_map - insert a new mapping
1488	*
1489	* @adev: amdgpu_device pointer
1490	* @bo_va: bo_va to store the address
1491	* @mapping: the mapping to insert
1492	*
1493	* Insert a new mapping into all structures.
1494	*/
1495	static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
1496	struct amdgpu_bo_va *bo_va,
1497	struct amdgpu_bo_va_mapping *mapping)
1498	{
1499	struct amdgpu_vm *vm = bo_va->base.vm;
1500	struct amdgpu_bo *bo = bo_va->base.bo;
1501
1502	mapping->bo_va = bo_va;
1503	list_add(new: &mapping->list, head: &bo_va->invalids);
1504	amdgpu_vm_it_insert(node: mapping, root: &vm->va);
1505
1506	if (mapping->flags & AMDGPU_PTE_PRT)
1507	amdgpu_vm_prt_get(adev);
1508
1509	if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
1510	!bo_va->base.moved) {
1511	amdgpu_vm_bo_moved(vm_bo: &bo_va->base);
1512	}
1513	trace_amdgpu_vm_bo_map(bo_va, mapping);
1514	}
1515
1516	/**
1517	* amdgpu_vm_bo_map - map bo inside a vm
1518	*
1519	* @adev: amdgpu_device pointer
1520	* @bo_va: bo_va to store the address
1521	* @saddr: where to map the BO
1522	* @offset: requested offset in the BO
1523	* @size: BO size in bytes
1524	* @flags: attributes of pages (read/write/valid/etc.)
1525	*
1526	* Add a mapping of the BO at the specefied addr into the VM.
1527	*
1528	* Returns:
1529	* 0 for success, error for failure.
1530	*
1531	* Object has to be reserved and unreserved outside!
1532	*/
1533	int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1534	struct amdgpu_bo_va *bo_va,
1535	uint64_t saddr, uint64_t offset,
1536	uint64_t size, uint64_t flags)
1537	{
1538	struct amdgpu_bo_va_mapping mapping, tmp;
1539	struct amdgpu_bo *bo = bo_va->base.bo;
1540	struct amdgpu_vm *vm = bo_va->base.vm;
1541	uint64_t eaddr;
1542
1543	/ validate the parameters /
1544	if (saddr & ~PAGE_MASK \|\| offset & ~PAGE_MASK \|\| size & ~PAGE_MASK)
1545	return -EINVAL;
1546	if (saddr + size <= saddr \|\| offset + size <= offset)
1547	return -EINVAL;
1548
1549	/ make sure object fit at this offset /
1550	eaddr = saddr + size - `1`;
1551	if ((bo && offset + size > amdgpu_bo_size(bo)) \|\|
1552	(eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
1553	return -EINVAL;
1554
1555	saddr /= AMDGPU_GPU_PAGE_SIZE;
1556	eaddr /= AMDGPU_GPU_PAGE_SIZE;
1557
1558	tmp = amdgpu_vm_it_iter_first(root: &vm->va, start: saddr, last: eaddr);
1559	if (tmp) {
1560	/ bo and tmp overlap, invalid addr /
1561	dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
1562	"0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,
1563	tmp->start, tmp->last + `1`);
1564	return -EINVAL;
1565	}
1566
1567	mapping = kmalloc(size: sizeof(*mapping), GFP_KERNEL);
1568	if (!mapping)
1569	return -ENOMEM;
1570
1571	mapping->start = saddr;
1572	mapping->last = eaddr;
1573	mapping->offset = offset;
1574	mapping->flags = flags;
1575
1576	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
1577
1578	return `0`;
1579	}
1580
1581	/**
1582	* amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings
1583	*
1584	* @adev: amdgpu_device pointer
1585	* @bo_va: bo_va to store the address
1586	* @saddr: where to map the BO
1587	* @offset: requested offset in the BO
1588	* @size: BO size in bytes
1589	* @flags: attributes of pages (read/write/valid/etc.)
1590	*
1591	* Add a mapping of the BO at the specefied addr into the VM. Replace existing
1592	* mappings as we do so.
1593	*
1594	* Returns:
1595	* 0 for success, error for failure.
1596	*
1597	* Object has to be reserved and unreserved outside!
1598	*/
1599	int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
1600	struct amdgpu_bo_va *bo_va,
1601	uint64_t saddr, uint64_t offset,
1602	uint64_t size, uint64_t flags)
1603	{
1604	struct amdgpu_bo_va_mapping *mapping;
1605	struct amdgpu_bo *bo = bo_va->base.bo;
1606	uint64_t eaddr;
1607	int r;
1608
1609	/ validate the parameters /
1610	if (saddr & ~PAGE_MASK \|\| offset & ~PAGE_MASK \|\| size & ~PAGE_MASK)
1611	return -EINVAL;
1612	if (saddr + size <= saddr \|\| offset + size <= offset)
1613	return -EINVAL;
1614
1615	/ make sure object fit at this offset /
1616	eaddr = saddr + size - `1`;
1617	if ((bo && offset + size > amdgpu_bo_size(bo)) \|\|
1618	(eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
1619	return -EINVAL;
1620
1621	/ Allocate all the needed memory /
1622	mapping = kmalloc(size: sizeof(*mapping), GFP_KERNEL);
1623	if (!mapping)
1624	return -ENOMEM;
1625
1626	r = amdgpu_vm_bo_clear_mappings(adev, vm: bo_va->base.vm, saddr, size);
1627	if (r) {
1628	kfree(objp: mapping);
1629	return r;
1630	}
1631
1632	saddr /= AMDGPU_GPU_PAGE_SIZE;
1633	eaddr /= AMDGPU_GPU_PAGE_SIZE;
1634
1635	mapping->start = saddr;
1636	mapping->last = eaddr;
1637	mapping->offset = offset;
1638	mapping->flags = flags;
1639
1640	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
1641
1642	return `0`;
1643	}
1644
1645	/**
1646	* amdgpu_vm_bo_unmap - remove bo mapping from vm
1647	*
1648	* @adev: amdgpu_device pointer
1649	* @bo_va: bo_va to remove the address from
1650	* @saddr: where to the BO is mapped
1651	*
1652	* Remove a mapping of the BO at the specefied addr from the VM.
1653	*
1654	* Returns:
1655	* 0 for success, error for failure.
1656	*
1657	* Object has to be reserved and unreserved outside!
1658	*/
1659	int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
1660	struct amdgpu_bo_va *bo_va,
1661	uint64_t saddr)
1662	{
1663	struct amdgpu_bo_va_mapping *mapping;
1664	struct amdgpu_vm *vm = bo_va->base.vm;
1665	bool valid = true;
1666
1667	saddr /= AMDGPU_GPU_PAGE_SIZE;
1668
1669	list_for_each_entry(mapping, &bo_va->valids, list) {
1670	if (mapping->start == saddr)
1671	break;
1672	}
1673
1674	if (&mapping->list == &bo_va->valids) {
1675	valid = false;
1676
1677	list_for_each_entry(mapping, &bo_va->invalids, list) {
1678	if (mapping->start == saddr)
1679	break;
1680	}
1681
1682	if (&mapping->list == &bo_va->invalids)
1683	return -ENOENT;
1684	}
1685
1686	list_del(entry: &mapping->list);
1687	amdgpu_vm_it_remove(node: mapping, root: &vm->va);
1688	mapping->bo_va = NULL;
1689	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
1690
1691	if (valid)
1692	list_add(new: &mapping->list, head: &vm->freed);
1693	else
1694	amdgpu_vm_free_mapping(adev, vm, mapping,
1695	fence: bo_va->last_pt_update);
1696
1697	return `0`;
1698	}
1699
1700	/**
1701	* amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range
1702	*
1703	* @adev: amdgpu_device pointer
1704	* @vm: VM structure to use
1705	* @saddr: start of the range
1706	* @size: size of the range
1707	*
1708	* Remove all mappings in a range, split them as appropriate.
1709	*
1710	* Returns:
1711	* 0 for success, error for failure.
1712	*/
1713	int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
1714	struct amdgpu_vm *vm,
1715	uint64_t saddr, uint64_t size)
1716	{
1717	struct amdgpu_bo_va_mapping before, after, tmp, next;
1718	LIST_HEAD(removed);
1719	uint64_t eaddr;
1720
1721	eaddr = saddr + size - `1`;
1722	saddr /= AMDGPU_GPU_PAGE_SIZE;
1723	eaddr /= AMDGPU_GPU_PAGE_SIZE;
1724
1725	/ Allocate all the needed memory /
1726	before = kzalloc(size: sizeof(*before), GFP_KERNEL);
1727	if (!before)
1728	return -ENOMEM;
1729	INIT_LIST_HEAD(list: &before->list);
1730
1731	after = kzalloc(size: sizeof(*after), GFP_KERNEL);
1732	if (!after) {
1733	kfree(objp: before);
1734	return -ENOMEM;
1735	}
1736	INIT_LIST_HEAD(list: &after->list);
1737
1738	/ Now gather all removed mappings /
1739	tmp = amdgpu_vm_it_iter_first(root: &vm->va, start: saddr, last: eaddr);
1740	while (tmp) {
1741	/ Remember mapping split at the start /
1742	if (tmp->start < saddr) {
1743	before->start = tmp->start;
1744	before->last = saddr - `1`;
1745	before->offset = tmp->offset;
1746	before->flags = tmp->flags;
1747	before->bo_va = tmp->bo_va;
1748	list_add(new: &before->list, head: &tmp->bo_va->invalids);
1749	}
1750
1751	/ Remember mapping split at the end /
1752	if (tmp->last > eaddr) {
1753	after->start = eaddr + `1`;
1754	after->last = tmp->last;
1755	after->offset = tmp->offset;
1756	after->offset += (after->start - tmp->start) << PAGE_SHIFT;
1757	after->flags = tmp->flags;
1758	after->bo_va = tmp->bo_va;
1759	list_add(new: &after->list, head: &tmp->bo_va->invalids);
1760	}
1761
1762	list_del(entry: &tmp->list);
1763	list_add(new: &tmp->list, head: &removed);
1764
1765	tmp = amdgpu_vm_it_iter_next(node: tmp, start: saddr, last: eaddr);
1766	}
1767
1768	/ And free them up /
1769	list_for_each_entry_safe(tmp, next, &removed, list) {
1770	amdgpu_vm_it_remove(node: tmp, root: &vm->va);
1771	list_del(entry: &tmp->list);
1772
1773	if (tmp->start < saddr)
1774	tmp->start = saddr;
1775	if (tmp->last > eaddr)
1776	tmp->last = eaddr;
1777
1778	tmp->bo_va = NULL;
1779	list_add(new: &tmp->list, head: &vm->freed);
1780	trace_amdgpu_vm_bo_unmap(NULL, mapping: tmp);
1781	}
1782
1783	/ Insert partial mapping before the range /
1784	if (!list_empty(head: &before->list)) {
1785	struct amdgpu_bo *bo = before->bo_va->base.bo;
1786
1787	amdgpu_vm_it_insert(node: before, root: &vm->va);
1788	if (before->flags & AMDGPU_PTE_PRT)
1789	amdgpu_vm_prt_get(adev);
1790
1791	if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
1792	!before->bo_va->base.moved)
1793	amdgpu_vm_bo_moved(vm_bo: &before->bo_va->base);
1794	} else {
1795	kfree(objp: before);
1796	}
1797
1798	/ Insert partial mapping after the range /
1799	if (!list_empty(head: &after->list)) {
1800	struct amdgpu_bo *bo = after->bo_va->base.bo;
1801
1802	amdgpu_vm_it_insert(node: after, root: &vm->va);
1803	if (after->flags & AMDGPU_PTE_PRT)
1804	amdgpu_vm_prt_get(adev);
1805
1806	if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
1807	!after->bo_va->base.moved)
1808	amdgpu_vm_bo_moved(vm_bo: &after->bo_va->base);
1809	} else {
1810	kfree(objp: after);
1811	}
1812
1813	return `0`;
1814	}
1815
1816	/**
1817	* amdgpu_vm_bo_lookup_mapping - find mapping by address
1818	*
1819	* @vm: the requested VM
1820	* @addr: the address
1821	*
1822	* Find a mapping by it's address.
1823	*
1824	* Returns:
1825	* The amdgpu_bo_va_mapping matching for addr or NULL
1826	*
1827	*/
1828	struct amdgpu_bo_va_mapping amdgpu_vm_bo_lookup_mapping(struct* amdgpu_vm *vm,
1829	uint64_t addr)
1830	{
1831	return amdgpu_vm_it_iter_first(root: &vm->va, start: addr, last: addr);
1832	}
1833
1834	/**
1835	* amdgpu_vm_bo_trace_cs - trace all reserved mappings
1836	*
1837	* @vm: the requested vm
1838	* @ticket: CS ticket
1839	*
1840	* Trace all mappings of BOs reserved during a command submission.
1841	*/
1842	void amdgpu_vm_bo_trace_cs(struct amdgpu_vm vm, struct* ww_acquire_ctx *ticket)
1843	{
1844	struct amdgpu_bo_va_mapping *mapping;
1845
1846	if (!trace_amdgpu_vm_bo_cs_enabled())
1847	return;
1848
1849	for (mapping = amdgpu_vm_it_iter_first(root: &vm->va, start: `0`, U64_MAX); mapping;
1850	mapping = amdgpu_vm_it_iter_next(node: mapping, start: `0`, U64_MAX)) {
1851	if (mapping->bo_va && mapping->bo_va->base.bo) {
1852	struct amdgpu_bo *bo;
1853
1854	bo = mapping->bo_va->base.bo;
1855	if (dma_resv_locking_ctx(obj: bo->tbo.base.resv) !=
1856	ticket)
1857	continue;
1858	}
1859
1860	trace_amdgpu_vm_bo_cs(mapping);
1861	}
1862	}
1863
1864	/**
1865	* amdgpu_vm_bo_del - remove a bo from a specific vm
1866	*
1867	* @adev: amdgpu_device pointer
1868	* @bo_va: requested bo_va
1869	*
1870	* Remove @bo_va->bo from the requested vm.
1871	*
1872	* Object have to be reserved!
1873	*/
1874	void amdgpu_vm_bo_del(struct amdgpu_device *adev,
1875	struct amdgpu_bo_va *bo_va)
1876	{
1877	struct amdgpu_bo_va_mapping mapping, next;
1878	struct amdgpu_bo *bo = bo_va->base.bo;
1879	struct amdgpu_vm *vm = bo_va->base.vm;
1880	struct amdgpu_vm_bo_base **base;
1881
1882	dma_resv_assert_held(vm->root.bo->tbo.base.resv);
1883
1884	if (bo) {
1885	dma_resv_assert_held(bo->tbo.base.resv);
1886	if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
1887	ttm_bo_set_bulk_move(bo: &bo->tbo, NULL);
1888
1889	for (base = &bo_va->base.bo->vm_bo; *base;
1890	base = &(*base)->next) {
1891	if (*base != &bo_va->base)
1892	continue;
1893
1894	*base = bo_va->base.next;
1895	break;
1896	}
1897	}
1898
1899	spin_lock(lock: &vm->status_lock);
1900	list_del(entry: &bo_va->base.vm_status);
1901	spin_unlock(lock: &vm->status_lock);
1902
1903	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
1904	list_del(entry: &mapping->list);
1905	amdgpu_vm_it_remove(node: mapping, root: &vm->va);
1906	mapping->bo_va = NULL;
1907	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
1908	list_add(new: &mapping->list, head: &vm->freed);
1909	}
1910	list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
1911	list_del(entry: &mapping->list);
1912	amdgpu_vm_it_remove(node: mapping, root: &vm->va);
1913	amdgpu_vm_free_mapping(adev, vm, mapping,
1914	fence: bo_va->last_pt_update);
1915	}
1916
1917	dma_fence_put(fence: bo_va->last_pt_update);
1918
1919	if (bo && bo_va->is_xgmi)
1920	amdgpu_xgmi_set_pstate(adev, pstate: AMDGPU_XGMI_PSTATE_MIN);
1921
1922	kfree(objp: bo_va);
1923	}
1924
1925	/**
1926	* amdgpu_vm_evictable - check if we can evict a VM
1927	*
1928	* @bo: A page table of the VM.
1929	*
1930	* Check if it is possible to evict a VM.
1931	*/
1932	bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
1933	{
1934	struct amdgpu_vm_bo_base *bo_base = bo->vm_bo;
1935
1936	/ Page tables of a destroyed VM can go away immediately /
1937	if (!bo_base \|\| !bo_base->vm)
1938	return true;
1939
1940	/ Don't evict VM page tables while they are busy /
1941	if (!dma_resv_test_signaled(obj: bo->tbo.base.resv, usage: DMA_RESV_USAGE_BOOKKEEP))
1942	return false;
1943
1944	/ Try to block ongoing updates /
1945	if (!amdgpu_vm_eviction_trylock(vm: bo_base->vm))
1946	return false;
1947
1948	/ Don't evict VM page tables while they are updated /
1949	if (!dma_fence_is_signaled(fence: bo_base->vm->last_unlocked)) {
1950	amdgpu_vm_eviction_unlock(vm: bo_base->vm);
1951	return false;
1952	}
1953
1954	bo_base->vm->evicting = true;
1955	amdgpu_vm_eviction_unlock(vm: bo_base->vm);
1956	return true;
1957	}
1958
1959	/**
1960	* amdgpu_vm_bo_invalidate - mark the bo as invalid
1961	*
1962	* @adev: amdgpu_device pointer
1963	* @bo: amdgpu buffer object
1964	* @evicted: is the BO evicted
1965	*
1966	* Mark @bo as invalid.
1967	*/
1968	void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
1969	struct amdgpu_bo *bo, bool evicted)
1970	{
1971	struct amdgpu_vm_bo_base *bo_base;
1972
1973	/ shadow bo doesn't have bo base, its validation needs its parent /
1974	if (bo->parent && (amdgpu_bo_shadowed(bo: bo->parent) == bo))
1975	bo = bo->parent;
1976
1977	for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
1978	struct amdgpu_vm *vm = bo_base->vm;
1979
1980	if (evicted && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
1981	amdgpu_vm_bo_evicted(vm_bo: bo_base);
1982	continue;
1983	}
1984
1985	if (bo_base->moved)
1986	continue;
1987	bo_base->moved = true;
1988
1989	if (bo->tbo.type == ttm_bo_type_kernel)
1990	amdgpu_vm_bo_relocated(vm_bo: bo_base);
1991	else if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
1992	amdgpu_vm_bo_moved(vm_bo: bo_base);
1993	else
1994	amdgpu_vm_bo_invalidated(vm_bo: bo_base);
1995	}
1996	}
1997
1998	/**
1999	* amdgpu_vm_get_block_size - calculate VM page table size as power of two
2000	*
2001	* @vm_size: VM size
2002	*
2003	* Returns:
2004	* VM page table as power of two
2005	*/
2006	static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
2007	{
2008	/ Total bits covered by PD + PTs /
2009	unsigned bits = ilog2(vm_size) + `18`;
2010
2011	/ Make sure the PD is 4K in size up to 8GB address space.*
2012	Above that split equal between PD and PTs /*
2013	if (vm_size <= `8`)
2014	return (bits - `9`);
2015	else
2016	return ((bits + `3`) / `2`);
2017	}
2018
2019	/**
2020	* amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
2021	*
2022	* @adev: amdgpu_device pointer
2023	* @min_vm_size: the minimum vm size in GB if it's set auto
2024	* @fragment_size_default: Default PTE fragment size
2025	* @max_level: max VMPT level
2026	* @max_bits: max address space size in bits
2027	*
2028	*/
2029	void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
2030	uint32_t fragment_size_default, unsigned max_level,
2031	unsigned max_bits)
2032	{
2033	unsigned int max_size = `1` << (max_bits - `30`);
2034	unsigned int vm_size;
2035	uint64_t tmp;
2036
2037	/ adjust vm size first /
2038	if (amdgpu_vm_size != -`1`) {
2039	vm_size = amdgpu_vm_size;
2040	if (vm_size > max_size) {
2041	dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
2042	amdgpu_vm_size, max_size);
2043	vm_size = max_size;
2044	}
2045	} else {
2046	struct sysinfo si;
2047	unsigned int phys_ram_gb;
2048
2049	/ Optimal VM size depends on the amount of physical*
2050	* RAM available. Underlying requirements and
2051	* assumptions:
2052	*
2053	* - Need to map system memory and VRAM from all GPUs
2054	* - VRAM from other GPUs not known here
2055	* - Assume VRAM <= system memory
2056	* - On GFX8 and older, VM space can be segmented for
2057	* different MTYPEs
2058	* - Need to allow room for fragmentation, guard pages etc.
2059	*
2060	* This adds up to a rough guess of system memory x3.
2061	* Round up to power of two to maximize the available
2062	* VM size with the given page table size.
2063	*/
2064	si_meminfo(val: &si);
2065	phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit +
2066	(`1` << `30`) - `1`) >> `30`;
2067	vm_size = roundup_pow_of_two(
2068	min(max(phys_ram_gb * `3`, min_vm_size), max_size));
2069	}
2070
2071	adev->vm_manager.max_pfn = (uint64_t)vm_size << `18`;
2072
2073	tmp = roundup_pow_of_two(adev->vm_manager.max_pfn);
2074	if (amdgpu_vm_block_size != -`1`)
2075	tmp >>= amdgpu_vm_block_size - `9`;
2076	tmp = DIV_ROUND_UP(fls64(tmp) - `1`, `9`) - `1`;
2077	adev->vm_manager.num_level = min_t(unsigned int, max_level, tmp);
2078	switch (adev->vm_manager.num_level) {
2079	case `3`:
2080	adev->vm_manager.root_level = AMDGPU_VM_PDB2;
2081	break;
2082	case `2`:
2083	adev->vm_manager.root_level = AMDGPU_VM_PDB1;
2084	break;
2085	case `1`:
2086	adev->vm_manager.root_level = AMDGPU_VM_PDB0;
2087	break;
2088	default:
2089	dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n");
2090	}
2091	/ block size depends on vm size and hw setup/
2092	if (amdgpu_vm_block_size != -`1`)
2093	adev->vm_manager.block_size =
2094	min((unsigned)amdgpu_vm_block_size, max_bits
2095	- AMDGPU_GPU_PAGE_SHIFT
2096	- `9` * adev->vm_manager.num_level);
2097	else if (adev->vm_manager.num_level > `1`)
2098	adev->vm_manager.block_size = `9`;
2099	else
2100	adev->vm_manager.block_size = amdgpu_vm_get_block_size(vm_size: tmp);
2101
2102	if (amdgpu_vm_fragment_size == -`1`)
2103	adev->vm_manager.fragment_size = fragment_size_default;
2104	else
2105	adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
2106
2107	DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
2108	vm_size, adev->vm_manager.num_level + `1`,
2109	adev->vm_manager.block_size,
2110	adev->vm_manager.fragment_size);
2111	}
2112
2113	/**
2114	* amdgpu_vm_wait_idle - wait for the VM to become idle
2115	*
2116	* @vm: VM object to wait for
2117	* @timeout: timeout to wait for VM to become idle
2118	*/
2119	long amdgpu_vm_wait_idle(struct amdgpu_vm vm, long* timeout)
2120	{
2121	timeout = dma_resv_wait_timeout(obj: vm->root.bo->tbo.base.resv,
2122	usage: DMA_RESV_USAGE_BOOKKEEP,
2123	intr: true, timeout);
2124	if (timeout <= `0`)
2125	return timeout;
2126
2127	return dma_fence_wait_timeout(vm->last_unlocked, intr: true, timeout);
2128	}
2129
2130	/**
2131	* amdgpu_vm_init - initialize a vm instance
2132	*
2133	* @adev: amdgpu_device pointer
2134	* @vm: requested vm
2135	* @xcp_id: GPU partition selection id
2136	*
2137	* Init @vm fields.
2138	*
2139	* Returns:
2140	* 0 for success, error for failure.
2141	*/
2142	int amdgpu_vm_init(struct amdgpu_device adev, struct* amdgpu_vm *vm, int32_t xcp_id)
2143	{
2144	struct amdgpu_bo *root_bo;
2145	struct amdgpu_bo_vm *root;
2146	int r, i;
2147
2148	vm->va = RB_ROOT_CACHED;
2149	for (i = `0`; i < AMDGPU_MAX_VMHUBS; i++)
2150	vm->reserved_vmid[i] = NULL;
2151	INIT_LIST_HEAD(list: &vm->evicted);
2152	INIT_LIST_HEAD(list: &vm->relocated);
2153	INIT_LIST_HEAD(list: &vm->moved);
2154	INIT_LIST_HEAD(list: &vm->idle);
2155	INIT_LIST_HEAD(list: &vm->invalidated);
2156	spin_lock_init(&vm->status_lock);
2157	INIT_LIST_HEAD(list: &vm->freed);
2158	INIT_LIST_HEAD(list: &vm->done);
2159	INIT_LIST_HEAD(list: &vm->pt_freed);
2160	INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
2161
2162	r = amdgpu_vm_init_entities(adev, vm);
2163	if (r)
2164	return r;
2165
2166	vm->pte_support_ats = false;
2167	vm->is_compute_context = false;
2168
2169	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2170	AMDGPU_VM_USE_CPU_FOR_GFX);
2171
2172	DRM_DEBUG_DRIVER("VM update mode is %s\n",
2173	vm->use_cpu_for_update ? "CPU" : "SDMA");
2174	WARN_ONCE((vm->use_cpu_for_update &&
2175	!amdgpu_gmc_vram_full_visible(&adev->gmc)),
2176	"CPU update of VM recommended only for large BAR system\n");
2177
2178	if (vm->use_cpu_for_update)
2179	vm->update_funcs = &amdgpu_vm_cpu_funcs;
2180	else
2181	vm->update_funcs = &amdgpu_vm_sdma_funcs;
2182
2183	vm->last_update = dma_fence_get_stub();
2184	vm->last_unlocked = dma_fence_get_stub();
2185	vm->last_tlb_flush = dma_fence_get_stub();
2186	vm->generation = `0`;
2187
2188	mutex_init(&vm->eviction_lock);
2189	vm->evicting = false;
2190
2191	r = amdgpu_vm_pt_create(adev, vm, level: adev->vm_manager.root_level,
2192	immediate: false, vmbo: &root, xcp_id);
2193	if (r)
2194	goto error_free_delayed;
2195	root_bo = &root->bo;
2196	r = amdgpu_bo_reserve(bo: root_bo, no_intr: true);
2197	if (r)
2198	goto error_free_root;
2199
2200	r = dma_resv_reserve_fences(obj: root_bo->tbo.base.resv, num_fences: `1`);
2201	if (r)
2202	goto error_unreserve;
2203
2204	amdgpu_vm_bo_base_init(base: &vm->root, vm, bo: root_bo);
2205
2206	r = amdgpu_vm_pt_clear(adev, vm, vmbo: root, immediate: false);
2207	if (r)
2208	goto error_unreserve;
2209
2210	amdgpu_bo_unreserve(bo: vm->root.bo);
2211
2212	INIT_KFIFO(vm->faults);
2213
2214	return `0`;
2215
2216	error_unreserve:
2217	amdgpu_bo_unreserve(bo: vm->root.bo);
2218
2219	error_free_root:
2220	amdgpu_bo_unref(bo: &root->shadow);
2221	amdgpu_bo_unref(bo: &root_bo);
2222	vm->root.bo = NULL;
2223
2224	error_free_delayed:
2225	dma_fence_put(fence: vm->last_tlb_flush);
2226	dma_fence_put(fence: vm->last_unlocked);
2227	amdgpu_vm_fini_entities(vm);
2228
2229	return r;
2230	}
2231
2232	/**
2233	* amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
2234	*
2235	* @adev: amdgpu_device pointer
2236	* @vm: requested vm
2237	*
2238	* This only works on GFX VMs that don't have any BOs added and no
2239	* page tables allocated yet.
2240	*
2241	* Changes the following VM parameters:
2242	* - use_cpu_for_update
2243	* - pte_supports_ats
2244	*
2245	* Reinitializes the page directory to reflect the changed ATS
2246	* setting.
2247	*
2248	* Returns:
2249	* 0 for success, -errno for errors.
2250	*/
2251	int amdgpu_vm_make_compute(struct amdgpu_device adev, struct* amdgpu_vm *vm)
2252	{
2253	bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
2254	int r;
2255
2256	r = amdgpu_bo_reserve(bo: vm->root.bo, no_intr: true);
2257	if (r)
2258	return r;
2259
2260	/ Check if PD needs to be reinitialized and do it before*
2261	* changing any other state, in case it fails.
2262	*/
2263	if (pte_support_ats != vm->pte_support_ats) {
2264	/ Sanity checks /
2265	if (!amdgpu_vm_pt_is_root_clean(adev, vm)) {
2266	r = -EINVAL;
2267	goto unreserve_bo;
2268	}
2269
2270	vm->pte_support_ats = pte_support_ats;
2271	r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo),
2272	immediate: false);
2273	if (r)
2274	goto unreserve_bo;
2275	}
2276
2277	/ Update VM state /
2278	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2279	AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2280	DRM_DEBUG_DRIVER("VM update mode is %s\n",
2281	vm->use_cpu_for_update ? "CPU" : "SDMA");
2282	WARN_ONCE((vm->use_cpu_for_update &&
2283	!amdgpu_gmc_vram_full_visible(&adev->gmc)),
2284	"CPU update of VM recommended only for large BAR system\n");
2285
2286	if (vm->use_cpu_for_update) {
2287	/ Sync with last SDMA update/clear before switching to CPU /
2288	r = amdgpu_bo_sync_wait(bo: vm->root.bo,
2289	AMDGPU_FENCE_OWNER_UNDEFINED, intr: true);
2290	if (r)
2291	goto unreserve_bo;
2292
2293	vm->update_funcs = &amdgpu_vm_cpu_funcs;
2294	r = amdgpu_vm_pt_map_tables(adev, vm);
2295	if (r)
2296	goto unreserve_bo;
2297
2298	} else {
2299	vm->update_funcs = &amdgpu_vm_sdma_funcs;
2300	}
2301
2302	dma_fence_put(fence: vm->last_update);
2303	vm->last_update = dma_fence_get_stub();
2304	vm->is_compute_context = true;
2305
2306	/ Free the shadow bo for compute VM /
2307	amdgpu_bo_unref(bo: &to_amdgpu_bo_vm(vm->root.bo)->shadow);
2308
2309	goto unreserve_bo;
2310
2311	unreserve_bo:
2312	amdgpu_bo_unreserve(bo: vm->root.bo);
2313	return r;
2314	}
2315
2316	/**
2317	* amdgpu_vm_release_compute - release a compute vm
2318	* @adev: amdgpu_device pointer
2319	* @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute
2320	*
2321	* This is a correspondant of amdgpu_vm_make_compute. It decouples compute
2322	* pasid from vm. Compute should stop use of vm after this call.
2323	*/
2324	void amdgpu_vm_release_compute(struct amdgpu_device adev, struct* amdgpu_vm *vm)
2325	{
2326	amdgpu_vm_set_pasid(adev, vm, pasid: `0`);
2327	vm->is_compute_context = false;
2328	}
2329
2330	/**
2331	* amdgpu_vm_fini - tear down a vm instance
2332	*
2333	* @adev: amdgpu_device pointer
2334	* @vm: requested vm
2335	*
2336	* Tear down @vm.
2337	* Unbind the VM and remove all bos from the vm bo list
2338	*/
2339	void amdgpu_vm_fini(struct amdgpu_device adev, struct* amdgpu_vm *vm)
2340	{
2341	struct amdgpu_bo_va_mapping mapping, tmp;
2342	bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
2343	struct amdgpu_bo *root;
2344	unsigned long flags;
2345	int i;
2346
2347	amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
2348
2349	flush_work(work: &vm->pt_free_work);
2350
2351	root = amdgpu_bo_ref(bo: vm->root.bo);
2352	amdgpu_bo_reserve(bo: root, no_intr: true);
2353	amdgpu_vm_set_pasid(adev, vm, pasid: `0`);
2354	dma_fence_wait(fence: vm->last_unlocked, intr: false);
2355	dma_fence_put(fence: vm->last_unlocked);
2356	dma_fence_wait(fence: vm->last_tlb_flush, intr: false);
2357	/ Make sure that all fence callbacks have completed /
2358	spin_lock_irqsave(vm->last_tlb_flush->lock, flags);
2359	spin_unlock_irqrestore(lock: vm->last_tlb_flush->lock, flags);
2360	dma_fence_put(fence: vm->last_tlb_flush);
2361
2362	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
2363	if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
2364	amdgpu_vm_prt_fini(adev, vm);
2365	prt_fini_needed = false;
2366	}
2367
2368	list_del(entry: &mapping->list);
2369	amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
2370	}
2371
2372	amdgpu_vm_pt_free_root(adev, vm);
2373	amdgpu_bo_unreserve(bo: root);
2374	amdgpu_bo_unref(bo: &root);
2375	WARN_ON(vm->root.bo);
2376
2377	amdgpu_vm_fini_entities(vm);
2378
2379	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
2380	dev_err(adev->dev, "still active bo inside vm\n");
2381	}
2382	rbtree_postorder_for_each_entry_safe(mapping, tmp,
2383	&vm->va.rb_root, rb) {
2384	/ Don't remove the mapping here, we don't want to trigger a*
2385	* rebalance and the tree is about to be destroyed anyway.
2386	*/
2387	list_del(entry: &mapping->list);
2388	kfree(objp: mapping);
2389	}
2390
2391	dma_fence_put(fence: vm->last_update);
2392
2393	for (i = `0`; i < AMDGPU_MAX_VMHUBS; i++) {
2394	if (vm->reserved_vmid[i]) {
2395	amdgpu_vmid_free_reserved(adev, vmhub: i);
2396	vm->reserved_vmid[i] = false;
2397	}
2398	}
2399
2400	}
2401
2402	/**
2403	* amdgpu_vm_manager_init - init the VM manager
2404	*
2405	* @adev: amdgpu_device pointer
2406	*
2407	* Initialize the VM manager structures
2408	*/
2409	void amdgpu_vm_manager_init(struct amdgpu_device *adev)
2410	{
2411	unsigned i;
2412
2413	/ Concurrent flushes are only possible starting with Vega10 and*
2414	* are broken on Navi10 and Navi14.
2415	*/
2416	adev->vm_manager.concurrent_flush = !(adev->asic_type < CHIP_VEGA10 \|\|
2417	adev->asic_type == CHIP_NAVI10 \|\|
2418	adev->asic_type == CHIP_NAVI14);
2419	amdgpu_vmid_mgr_init(adev);
2420
2421	adev->vm_manager.fence_context =
2422	dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2423	for (i = `0`; i < AMDGPU_MAX_RINGS; ++i)
2424	adev->vm_manager.seqno[i] = `0`;
2425
2426	spin_lock_init(&adev->vm_manager.prt_lock);
2427	atomic_set(v: &adev->vm_manager.num_prt_users, i: `0`);
2428
2429	/ If not overridden by the user, by default, only in large BAR systems*
2430	* Compute VM tables will be updated by CPU
2431	*/
2432	#ifdef CONFIG_X86_64
2433	if (amdgpu_vm_update_mode == -`1`) {
2434	/ For asic with VF MMIO access protection*
2435	* avoid using CPU for VM table updates
2436	*/
2437	if (amdgpu_gmc_vram_full_visible(gmc: &adev->gmc) &&
2438	!amdgpu_sriov_vf_mmio_access_protection(adev))
2439	adev->vm_manager.vm_update_mode =
2440	AMDGPU_VM_USE_CPU_FOR_COMPUTE;
2441	else
2442	adev->vm_manager.vm_update_mode = `0`;
2443	} else
2444	adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode;
2445	#else
2446	adev->vm_manager.vm_update_mode = `0`;
2447	#endif
2448
2449	xa_init_flags(xa: &adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
2450	}
2451
2452	/**
2453	* amdgpu_vm_manager_fini - cleanup VM manager
2454	*
2455	* @adev: amdgpu_device pointer
2456	*
2457	* Cleanup the VM manager and free resources.
2458	*/
2459	void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
2460	{
2461	WARN_ON(!xa_empty(&adev->vm_manager.pasids));
2462	xa_destroy(&adev->vm_manager.pasids);
2463
2464	amdgpu_vmid_mgr_fini(adev);
2465	}
2466
2467	/**
2468	* amdgpu_vm_ioctl - Manages VMID reservation for vm hubs.
2469	*
2470	* @dev: drm device pointer
2471	* @data: drm_amdgpu_vm
2472	* @filp: drm file pointer
2473	*
2474	* Returns:
2475	* 0 for success, -errno for errors.
2476	*/
2477	int amdgpu_vm_ioctl(struct drm_device dev, void* data, struct* drm_file *filp)
2478	{
2479	union drm_amdgpu_vm *args = data;
2480	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
2481	struct amdgpu_fpriv *fpriv = filp->driver_priv;
2482
2483	/ No valid flags defined yet /
2484	if (args->in.flags)
2485	return -EINVAL;
2486
2487	switch (args->in.op) {
2488	case AMDGPU_VM_OP_RESERVE_VMID:
2489	/ We only have requirement to reserve vmid from gfxhub /
2490	if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(`0`)]) {
2491	amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(`0`));
2492	fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(`0`)] = true;
2493	}
2494
2495	break;
2496	case AMDGPU_VM_OP_UNRESERVE_VMID:
2497	if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(`0`)]) {
2498	amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(`0`));
2499	fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(`0`)] = false;
2500	}
2501	break;
2502	default:
2503	return -EINVAL;
2504	}
2505
2506	return `0`;
2507	}
2508
2509	/**
2510	* amdgpu_vm_get_task_info - Extracts task info for a PASID.
2511	*
2512	* @adev: drm device pointer
2513	* @pasid: PASID identifier for VM
2514	* @task_info: task_info to fill.
2515	*/
2516	void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
2517	struct amdgpu_task_info *task_info)
2518	{
2519	struct amdgpu_vm *vm;
2520	unsigned long flags;
2521
2522	xa_lock_irqsave(&adev->vm_manager.pasids, flags);
2523
2524	vm = xa_load(&adev->vm_manager.pasids, index: pasid);
2525	if (vm)
2526	*task_info = vm->task_info;
2527
2528	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
2529	}
2530
2531	/**
2532	* amdgpu_vm_set_task_info - Sets VMs task info.
2533	*
2534	* @vm: vm for which to set the info
2535	*/
2536	void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
2537	{
2538	if (vm->task_info.pid)
2539	return;
2540
2541	vm->task_info.pid = current->pid;
2542	get_task_comm(vm->task_info.task_name, current);
2543
2544	if (current->group_leader->mm != current->mm)
2545	return;
2546
2547	vm->task_info.tgid = current->group_leader->pid;
2548	get_task_comm(vm->task_info.process_name, current->group_leader);
2549	}
2550
2551	/**
2552	* amdgpu_vm_handle_fault - graceful handling of VM faults.
2553	* @adev: amdgpu device pointer
2554	* @pasid: PASID of the VM
2555	* @vmid: VMID, only used for GFX 9.4.3.
2556	* @node_id: Node_id received in IH cookie. Only applicable for
2557	* GFX 9.4.3.
2558	* @addr: Address of the fault
2559	* @write_fault: true is write fault, false is read fault
2560	*
2561	* Try to gracefully handle a VM fault. Return true if the fault was handled and
2562	* shouldn't be reported any more.
2563	*/
2564	bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
2565	u32 vmid, u32 node_id, uint64_t addr,
2566	bool write_fault)
2567	{
2568	bool is_compute_context = false;
2569	struct amdgpu_bo *root;
2570	unsigned long irqflags;
2571	uint64_t value, flags;
2572	struct amdgpu_vm *vm;
2573	int r;
2574
2575	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
2576	vm = xa_load(&adev->vm_manager.pasids, index: pasid);
2577	if (vm) {
2578	root = amdgpu_bo_ref(bo: vm->root.bo);
2579	is_compute_context = vm->is_compute_context;
2580	} else {
2581	root = NULL;
2582	}
2583	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
2584
2585	if (!root)
2586	return false;
2587
2588	addr /= AMDGPU_GPU_PAGE_SIZE;
2589
2590	if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
2591	node_id, addr, write_fault)) {
2592	amdgpu_bo_unref(bo: &root);
2593	return true;
2594	}
2595
2596	r = amdgpu_bo_reserve(bo: root, no_intr: true);
2597	if (r)
2598	goto error_unref;
2599
2600	/ Double check that the VM still exists /
2601	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
2602	vm = xa_load(&adev->vm_manager.pasids, index: pasid);
2603	if (vm && vm->root.bo != root)
2604	vm = NULL;
2605	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
2606	if (!vm)
2607	goto error_unlock;
2608
2609	flags = AMDGPU_PTE_VALID \| AMDGPU_PTE_SNOOPED \|
2610	AMDGPU_PTE_SYSTEM;
2611
2612	if (is_compute_context) {
2613	/ Intentionally setting invalid PTE flag*
2614	* combination to force a no-retry-fault
2615	*/
2616	flags = AMDGPU_VM_NORETRY_FLAGS;
2617	value = `0`;
2618	} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
2619	/ Redirect the access to the dummy page /
2620	value = adev->dummy_page_addr;
2621	flags \|= AMDGPU_PTE_EXECUTABLE \| AMDGPU_PTE_READABLE \|
2622	AMDGPU_PTE_WRITEABLE;
2623
2624	} else {
2625	/ Let the hw retry silently on the PTE /
2626	value = `0`;
2627	}
2628
2629	r = dma_resv_reserve_fences(obj: root->tbo.base.resv, num_fences: `1`);
2630	if (r) {
2631	pr_debug("failed %d to reserve fence slot\n", r);
2632	goto error_unlock;
2633	}
2634
2635	r = amdgpu_vm_update_range(adev, vm, immediate: true, unlocked: false, flush_tlb: false, allow_override: false,
2636	NULL, start: addr, last: addr, flags, offset: value, vram_base: `0`, NULL, NULL, NULL);
2637	if (r)
2638	goto error_unlock;
2639
2640	r = amdgpu_vm_update_pdes(adev, vm, immediate: true);
2641
2642	error_unlock:
2643	amdgpu_bo_unreserve(bo: root);
2644	if (r < `0`)
2645	DRM_ERROR("Can't handle page fault (%d)\n", r);
2646
2647	error_unref:
2648	amdgpu_bo_unref(bo: &root);
2649
2650	return false;
2651	}
2652
2653	#if defined(CONFIG_DEBUG_FS)
2654	/**
2655	* amdgpu_debugfs_vm_bo_info - print BO info for the VM
2656	*
2657	* @vm: Requested VM for printing BO info
2658	* @m: debugfs file
2659	*
2660	* Print BO information in debugfs file for the VM
2661	*/
2662	void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm vm, struct* seq_file *m)
2663	{
2664	struct amdgpu_bo_va bo_va, tmp;
2665	u64 total_idle = `0`;
2666	u64 total_evicted = `0`;
2667	u64 total_relocated = `0`;
2668	u64 total_moved = `0`;
2669	u64 total_invalidated = `0`;
2670	u64 total_done = `0`;
2671	unsigned int total_idle_objs = `0`;
2672	unsigned int total_evicted_objs = `0`;
2673	unsigned int total_relocated_objs = `0`;
2674	unsigned int total_moved_objs = `0`;
2675	unsigned int total_invalidated_objs = `0`;
2676	unsigned int total_done_objs = `0`;
2677	unsigned int id = `0`;
2678
2679	spin_lock(lock: &vm->status_lock);
2680	seq_puts(m, s: "\tIdle BOs:\n");
2681	list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
2682	if (!bo_va->base.bo)
2683	continue;
2684	total_idle += amdgpu_bo_print_info(id: id++, bo: bo_va->base.bo, m);
2685	}
2686	total_idle_objs = id;
2687	id = `0`;
2688
2689	seq_puts(m, s: "\tEvicted BOs:\n");
2690	list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) {
2691	if (!bo_va->base.bo)
2692	continue;
2693	total_evicted += amdgpu_bo_print_info(id: id++, bo: bo_va->base.bo, m);
2694	}
2695	total_evicted_objs = id;
2696	id = `0`;
2697
2698	seq_puts(m, s: "\tRelocated BOs:\n");
2699	list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
2700	if (!bo_va->base.bo)
2701	continue;
2702	total_relocated += amdgpu_bo_print_info(id: id++, bo: bo_va->base.bo, m);
2703	}
2704	total_relocated_objs = id;
2705	id = `0`;
2706
2707	seq_puts(m, s: "\tMoved BOs:\n");
2708	list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
2709	if (!bo_va->base.bo)
2710	continue;
2711	total_moved += amdgpu_bo_print_info(id: id++, bo: bo_va->base.bo, m);
2712	}
2713	total_moved_objs = id;
2714	id = `0`;
2715
2716	seq_puts(m, s: "\tInvalidated BOs:\n");
2717	list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
2718	if (!bo_va->base.bo)
2719	continue;
2720	total_invalidated += amdgpu_bo_print_info(id: id++, bo: bo_va->base.bo, m);
2721	}
2722	total_invalidated_objs = id;
2723	id = `0`;
2724
2725	seq_puts(m, s: "\tDone BOs:\n");
2726	list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) {
2727	if (!bo_va->base.bo)
2728	continue;
2729	total_done += amdgpu_bo_print_info(id: id++, bo: bo_va->base.bo, m);
2730	}
2731	spin_unlock(lock: &vm->status_lock);
2732	total_done_objs = id;
2733
2734	seq_printf(m, fmt: "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle,
2735	total_idle_objs);
2736	seq_printf(m, fmt: "\tTotal evicted size: %12lld\tobjs:\t%d\n", total_evicted,
2737	total_evicted_objs);
2738	seq_printf(m, fmt: "\tTotal relocated size: %12lld\tobjs:\t%d\n", total_relocated,
2739	total_relocated_objs);
2740	seq_printf(m, fmt: "\tTotal moved size: %12lld\tobjs:\t%d\n", total_moved,
2741	total_moved_objs);
2742	seq_printf(m, fmt: "\tTotal invalidated size: %12lld\tobjs:\t%d\n", total_invalidated,
2743	total_invalidated_objs);
2744	seq_printf(m, fmt: "\tTotal done size: %12lld\tobjs:\t%d\n", total_done,
2745	total_done_objs);
2746	}
2747	#endif
2748
2749	/**
2750	* amdgpu_vm_update_fault_cache - update cached fault into.
2751	* @adev: amdgpu device pointer
2752	* @pasid: PASID of the VM
2753	* @addr: Address of the fault
2754	* @status: GPUVM fault status register
2755	* @vmhub: which vmhub got the fault
2756	*
2757	* Cache the fault info for later use by userspace in debugging.
2758	*/
2759	void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
2760	unsigned int pasid,
2761	uint64_t addr,
2762	uint32_t status,
2763	unsigned int vmhub)
2764	{
2765	struct amdgpu_vm *vm;
2766	unsigned long flags;
2767
2768	xa_lock_irqsave(&adev->vm_manager.pasids, flags);
2769
2770	vm = xa_load(&adev->vm_manager.pasids, index: pasid);
2771	/ Don't update the fault cache if status is 0. In the multiple*
2772	* fault case, subsequent faults will return a 0 status which is
2773	* useless for userspace and replaces the useful fault status, so
2774	* only update if status is non-0.
2775	*/
2776	if (vm && status) {
2777	vm->fault_info.addr = addr;
2778	vm->fault_info.status = status;
2779	if (AMDGPU_IS_GFXHUB(vmhub)) {
2780	vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX;
2781	vm->fault_info.vmhub \|=
2782	(vmhub - AMDGPU_GFXHUB_START) << AMDGPU_VMHUB_IDX_SHIFT;
2783	} else if (AMDGPU_IS_MMHUB0(vmhub)) {
2784	vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM0;
2785	vm->fault_info.vmhub \|=
2786	(vmhub - AMDGPU_MMHUB0_START) << AMDGPU_VMHUB_IDX_SHIFT;
2787	} else if (AMDGPU_IS_MMHUB1(vmhub)) {
2788	vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM1;
2789	vm->fault_info.vmhub \|=
2790	(vmhub - AMDGPU_MMHUB1_START) << AMDGPU_VMHUB_IDX_SHIFT;
2791	} else {
2792	WARN_ONCE(`1`, "Invalid vmhub %u\n", vmhub);
2793	}
2794	}
2795	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
2796	}
2797
2798

source code of linux/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c