amdgpu_ctx.c source code [linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c]

1	/*
2	* Copyright 2015 Advanced Micro Devices, Inc.
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice shall be included in
12	* all copies or substantial portions of the Software.
13	*
14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20	* OTHER DEALINGS IN THE SOFTWARE.
21	*
22	* Authors: monk liu <monk.liu@amd.com>
23	*/
24
25	#include <drm/drm_auth.h>
26	#include <drm/drm_drv.h>
27	#include "amdgpu.h"
28	#include "amdgpu_sched.h"
29	#include "amdgpu_ras.h"
30	#include <linux/nospec.h>
31
32	#define to_amdgpu_ctx_entity(e) \
33	container_of((e), struct amdgpu_ctx_entity, entity)
34
35	const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
36	[AMDGPU_HW_IP_GFX] = `1`,
37	[AMDGPU_HW_IP_COMPUTE] = `4`,
38	[AMDGPU_HW_IP_DMA] = `2`,
39	[AMDGPU_HW_IP_UVD] = `1`,
40	[AMDGPU_HW_IP_VCE] = `1`,
41	[AMDGPU_HW_IP_UVD_ENC] = `1`,
42	[AMDGPU_HW_IP_VCN_DEC] = `1`,
43	[AMDGPU_HW_IP_VCN_ENC] = `1`,
44	[AMDGPU_HW_IP_VCN_JPEG] = `1`,
45	[AMDGPU_HW_IP_VPE] = `1`,
46	};
47
48	bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
49	{
50	switch (ctx_prio) {
51	case AMDGPU_CTX_PRIORITY_VERY_LOW:
52	case AMDGPU_CTX_PRIORITY_LOW:
53	case AMDGPU_CTX_PRIORITY_NORMAL:
54	case AMDGPU_CTX_PRIORITY_HIGH:
55	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
56	return true;
57	default:
58	case AMDGPU_CTX_PRIORITY_UNSET:
59	/ UNSET priority is not valid and we don't carry that*
60	* around, but set it to NORMAL in the only place this
61	* function is called, amdgpu_ctx_ioctl().
62	*/
63	return false;
64	}
65	}
66
67	static enum drm_sched_priority
68	amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
69	{
70	switch (ctx_prio) {
71	case AMDGPU_CTX_PRIORITY_UNSET:
72	pr_warn_once("AMD-->DRM context priority value UNSET-->NORMAL");
73	return DRM_SCHED_PRIORITY_NORMAL;
74
75	case AMDGPU_CTX_PRIORITY_VERY_LOW:
76	return DRM_SCHED_PRIORITY_MIN;
77
78	case AMDGPU_CTX_PRIORITY_LOW:
79	return DRM_SCHED_PRIORITY_MIN;
80
81	case AMDGPU_CTX_PRIORITY_NORMAL:
82	return DRM_SCHED_PRIORITY_NORMAL;
83
84	case AMDGPU_CTX_PRIORITY_HIGH:
85	return DRM_SCHED_PRIORITY_HIGH;
86
87	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
88	return DRM_SCHED_PRIORITY_HIGH;
89
90	/ This should not happen as we sanitized userspace provided priority*
91	* already, WARN if this happens.
92	*/
93	default:
94	WARN(`1`, "Invalid context priority %d\n", ctx_prio);
95	return DRM_SCHED_PRIORITY_NORMAL;
96	}
97
98	}
99
100	static int amdgpu_ctx_priority_permit(struct drm_file *filp,
101	int32_t priority)
102	{
103	/ NORMAL and below are accessible by everyone /
104	if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
105	return `0`;
106
107	if (capable(CAP_SYS_NICE))
108	return `0`;
109
110	if (drm_is_current_master(fpriv: filp))
111	return `0`;
112
113	return -EACCES;
114	}
115
116	static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio)
117	{
118	switch (prio) {
119	case AMDGPU_CTX_PRIORITY_HIGH:
120	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
121	return AMDGPU_GFX_PIPE_PRIO_HIGH;
122	default:
123	return AMDGPU_GFX_PIPE_PRIO_NORMAL;
124	}
125	}
126
127	static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio)
128	{
129	switch (prio) {
130	case AMDGPU_CTX_PRIORITY_HIGH:
131	return AMDGPU_RING_PRIO_1;
132	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
133	return AMDGPU_RING_PRIO_2;
134	default:
135	return AMDGPU_RING_PRIO_0;
136	}
137	}
138
139	static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
140	{
141	struct amdgpu_device *adev = ctx->mgr->adev;
142	unsigned int hw_prio;
143	int32_t ctx_prio;
144
145	ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
146	ctx->init_priority : ctx->override_priority;
147
148	switch (hw_ip) {
149	case AMDGPU_HW_IP_GFX:
150	case AMDGPU_HW_IP_COMPUTE:
151	hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(prio: ctx_prio);
152	break;
153	case AMDGPU_HW_IP_VCE:
154	case AMDGPU_HW_IP_VCN_ENC:
155	hw_prio = amdgpu_ctx_sched_prio_to_ring_prio(prio: ctx_prio);
156	break;
157	default:
158	hw_prio = AMDGPU_RING_PRIO_DEFAULT;
159	break;
160	}
161
162	hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
163	if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == `0`)
164	hw_prio = AMDGPU_RING_PRIO_DEFAULT;
165
166	return hw_prio;
167	}
168
169	/ Calculate the time spend on the hw /
170	static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
171	{
172	struct drm_sched_fence *s_fence;
173
174	if (!fence)
175	return ns_to_ktime(ns: `0`);
176
177	/ When the fence is not even scheduled it can't have spend time /
178	s_fence = to_drm_sched_fence(f: fence);
179	if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
180	return ns_to_ktime(ns: `0`);
181
182	/ When it is still running account how much already spend /
183	if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
184	return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);
185
186	return ktime_sub(s_fence->finished.timestamp,
187	s_fence->scheduled.timestamp);
188	}
189
190	static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
191	struct amdgpu_ctx_entity *centity)
192	{
193	ktime_t res = ns_to_ktime(ns: `0`);
194	uint32_t i;
195
196	spin_lock(lock: &ctx->ring_lock);
197	for (i = `0`; i < amdgpu_sched_jobs; i++) {
198	res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
199	}
200	spin_unlock(lock: &ctx->ring_lock);
201	return res;
202	}
203
204	static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
205	const u32 ring)
206	{
207	struct drm_gpu_scheduler *scheds = NULL, sched = NULL;
208	struct amdgpu_device *adev = ctx->mgr->adev;
209	struct amdgpu_ctx_entity *entity;
210	enum drm_sched_priority drm_prio;
211	unsigned int hw_prio, num_scheds;
212	int32_t ctx_prio;
213	int r;
214
215	entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
216	GFP_KERNEL);
217	if (!entity)
218	return -ENOMEM;
219
220	ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
221	ctx->init_priority : ctx->override_priority;
222	entity->hw_ip = hw_ip;
223	entity->sequence = `1`;
224	hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
225	drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
226
227	hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
228
229	if (!(adev)->xcp_mgr) {
230	scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
231	num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
232	} else {
233	struct amdgpu_fpriv *fpriv;
234
235	fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr);
236	r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv,
237	&num_scheds, &scheds);
238	if (r)
239	goto cleanup_entity;
240	}
241
242	/ disable load balance if the hw engine retains context among dependent jobs /
243	if (hw_ip == AMDGPU_HW_IP_VCN_ENC \|\|
244	hw_ip == AMDGPU_HW_IP_VCN_DEC \|\|
245	hw_ip == AMDGPU_HW_IP_UVD_ENC \|\|
246	hw_ip == AMDGPU_HW_IP_UVD) {
247	sched = drm_sched_pick_best(sched_list: scheds, num_sched_list: num_scheds);
248	scheds = &sched;
249	num_scheds = `1`;
250	}
251
252	r = drm_sched_entity_init(entity: &entity->entity, priority: drm_prio, sched_list: scheds, num_sched_list: num_scheds,
253	guilty: &ctx->guilty);
254	if (r)
255	goto error_free_entity;
256
257	/ It's not an error if we fail to install the new entity /
258	if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity))
259	goto cleanup_entity;
260
261	return `0`;
262
263	cleanup_entity:
264	drm_sched_entity_fini(entity: &entity->entity);
265
266	error_free_entity:
267	kfree(objp: entity);
268
269	return r;
270	}
271
272	static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_device *adev,
273	struct amdgpu_ctx_entity *entity)
274	{
275	ktime_t res = ns_to_ktime(ns: `0`);
276	int i;
277
278	if (!entity)
279	return res;
280
281	for (i = `0`; i < amdgpu_sched_jobs; ++i) {
282	res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
283	dma_fence_put(fence: entity->fences[i]);
284	}
285
286	amdgpu_xcp_release_sched(adev, entity);
287
288	kfree(objp: entity);
289	return res;
290	}
291
292	static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
293	u32 *stable_pstate)
294	{
295	struct amdgpu_device *adev = ctx->mgr->adev;
296	enum amd_dpm_forced_level current_level;
297
298	current_level = amdgpu_dpm_get_performance_level(adev);
299
300	switch (current_level) {
301	case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
302	*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD;
303	break;
304	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
305	*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK;
306	break;
307	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
308	*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK;
309	break;
310	case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
311	*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK;
312	break;
313	default:
314	*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
315	break;
316	}
317	return `0`;
318	}
319
320	static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
321	struct drm_file filp, struct* amdgpu_ctx *ctx)
322	{
323	struct amdgpu_fpriv *fpriv = filp->driver_priv;
324	u32 current_stable_pstate;
325	int r;
326
327	r = amdgpu_ctx_priority_permit(filp, priority);
328	if (r)
329	return r;
330
331	memset(ctx, `0`, sizeof(*ctx));
332
333	kref_init(kref: &ctx->refcount);
334	ctx->mgr = mgr;
335	spin_lock_init(&ctx->ring_lock);
336
337	ctx->reset_counter = atomic_read(v: &mgr->adev->gpu_reset_counter);
338	ctx->reset_counter_query = ctx->reset_counter;
339	ctx->generation = amdgpu_vm_generation(adev: mgr->adev, vm: &fpriv->vm);
340	ctx->init_priority = priority;
341	ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
342
343	r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate: &current_stable_pstate);
344	if (r)
345	return r;
346
347	if (mgr->adev->pm.stable_pstate_ctx)
348	ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate;
349	else
350	ctx->stable_pstate = current_stable_pstate;
351
352	ctx->ctx_mgr = &(fpriv->ctx_mgr);
353	return `0`;
354	}
355
356	static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
357	u32 stable_pstate)
358	{
359	struct amdgpu_device *adev = ctx->mgr->adev;
360	enum amd_dpm_forced_level level;
361	u32 current_stable_pstate;
362	int r;
363
364	mutex_lock(&adev->pm.stable_pstate_ctx_lock);
365	if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) {
366	r = -EBUSY;
367	goto done;
368	}
369
370	r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate: &current_stable_pstate);
371	if (r \|\| (stable_pstate == current_stable_pstate))
372	goto done;
373
374	switch (stable_pstate) {
375	case AMDGPU_CTX_STABLE_PSTATE_NONE:
376	level = AMD_DPM_FORCED_LEVEL_AUTO;
377	break;
378	case AMDGPU_CTX_STABLE_PSTATE_STANDARD:
379	level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD;
380	break;
381	case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK:
382	level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK;
383	break;
384	case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK:
385	level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK;
386	break;
387	case AMDGPU_CTX_STABLE_PSTATE_PEAK:
388	level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
389	break;
390	default:
391	r = -EINVAL;
392	goto done;
393	}
394
395	r = amdgpu_dpm_force_performance_level(adev, level);
396
397	if (level == AMD_DPM_FORCED_LEVEL_AUTO)
398	adev->pm.stable_pstate_ctx = NULL;
399	else
400	adev->pm.stable_pstate_ctx = ctx;
401	done:
402	mutex_unlock(lock: &adev->pm.stable_pstate_ctx_lock);
403
404	return r;
405	}
406
407	static void amdgpu_ctx_fini(struct kref *ref)
408	{
409	struct amdgpu_ctx ctx = container_of(ref, struct* amdgpu_ctx, refcount);
410	struct amdgpu_ctx_mgr *mgr = ctx->mgr;
411	struct amdgpu_device *adev = mgr->adev;
412	unsigned i, j, idx;
413
414	if (!adev)
415	return;
416
417	for (i = `0`; i < AMDGPU_HW_IP_NUM; ++i) {
418	for (j = `0`; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
419	ktime_t spend;
420
421	spend = amdgpu_ctx_fini_entity(adev, entity: ctx->entities[i][j]);
422	atomic64_add(i: ktime_to_ns(kt: spend), v: &mgr->time_spend[i]);
423	}
424	}
425
426	if (drm_dev_enter(dev: adev_to_drm(adev), idx: &idx)) {
427	amdgpu_ctx_set_stable_pstate(ctx, stable_pstate: ctx->stable_pstate);
428	drm_dev_exit(idx);
429	}
430
431	kfree(objp: ctx);
432	}
433
434	int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
435	u32 ring, struct drm_sched_entity **entity)
436	{
437	int r;
438	struct drm_sched_entity *ctx_entity;
439
440	if (hw_ip >= AMDGPU_HW_IP_NUM) {
441	DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
442	return -EINVAL;
443	}
444
445	/ Right now all IPs have only one instance - multiple rings. /
446	if (instance != `0`) {
447	DRM_DEBUG("invalid ip instance: %d\n", instance);
448	return -EINVAL;
449	}
450
451	if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
452	DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
453	return -EINVAL;
454	}
455
456	if (ctx->entities[hw_ip][ring] == NULL) {
457	r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
458	if (r)
459	return r;
460	}
461
462	ctx_entity = &ctx->entities[hw_ip][ring]->entity;
463	r = drm_sched_entity_error(entity: ctx_entity);
464	if (r) {
465	DRM_DEBUG("error entity %p\n", ctx_entity);
466	return r;
467	}
468
469	*entity = ctx_entity;
470	return `0`;
471	}
472
473	static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
474	struct amdgpu_fpriv *fpriv,
475	struct drm_file *filp,
476	int32_t priority,
477	uint32_t *id)
478	{
479	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
480	struct amdgpu_ctx *ctx;
481	int r;
482
483	ctx = kmalloc(size: sizeof(*ctx), GFP_KERNEL);
484	if (!ctx)
485	return -ENOMEM;
486
487	mutex_lock(&mgr->lock);
488	r = idr_alloc(&mgr->ctx_handles, ptr: ctx, start: `1`, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
489	if (r < `0`) {
490	mutex_unlock(lock: &mgr->lock);
491	kfree(objp: ctx);
492	return r;
493	}
494
495	*id = (uint32_t)r;
496	r = amdgpu_ctx_init(mgr, priority, filp, ctx);
497	if (r) {
498	idr_remove(&mgr->ctx_handles, id: *id);
499	*id = `0`;
500	kfree(objp: ctx);
501	}
502	mutex_unlock(lock: &mgr->lock);
503	return r;
504	}
505
506	static void amdgpu_ctx_do_release(struct kref *ref)
507	{
508	struct amdgpu_ctx *ctx;
509	u32 i, j;
510
511	ctx = container_of(ref, struct amdgpu_ctx, refcount);
512	for (i = `0`; i < AMDGPU_HW_IP_NUM; ++i) {
513	for (j = `0`; j < amdgpu_ctx_num_entities[i]; ++j) {
514	if (!ctx->entities[i][j])
515	continue;
516
517	drm_sched_entity_destroy(entity: &ctx->entities[i][j]->entity);
518	}
519	}
520
521	amdgpu_ctx_fini(ref);
522	}
523
524	static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
525	{
526	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
527	struct amdgpu_ctx *ctx;
528
529	mutex_lock(&mgr->lock);
530	ctx = idr_remove(&mgr->ctx_handles, id);
531	if (ctx)
532	kref_put(kref: &ctx->refcount, release: amdgpu_ctx_do_release);
533	mutex_unlock(lock: &mgr->lock);
534	return ctx ? `0` : -EINVAL;
535	}
536
537	static int amdgpu_ctx_query(struct amdgpu_device *adev,
538	struct amdgpu_fpriv *fpriv, uint32_t id,
539	union drm_amdgpu_ctx_out *out)
540	{
541	struct amdgpu_ctx *ctx;
542	struct amdgpu_ctx_mgr *mgr;
543	unsigned reset_counter;
544
545	if (!fpriv)
546	return -EINVAL;
547
548	mgr = &fpriv->ctx_mgr;
549	mutex_lock(&mgr->lock);
550	ctx = idr_find(&mgr->ctx_handles, id);
551	if (!ctx) {
552	mutex_unlock(lock: &mgr->lock);
553	return -EINVAL;
554	}
555
556	/ TODO: these two are always zero /
557	out->state.flags = `0x0`;
558	out->state.hangs = `0x0`;
559
560	/ determine if a GPU reset has occured since the last call /
561	reset_counter = atomic_read(v: &adev->gpu_reset_counter);
562	/ TODO: this should ideally return NO, GUILTY, or INNOCENT. /
563	if (ctx->reset_counter_query == reset_counter)
564	out->state.reset_status = AMDGPU_CTX_NO_RESET;
565	else
566	out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
567	ctx->reset_counter_query = reset_counter;
568
569	mutex_unlock(lock: &mgr->lock);
570	return `0`;
571	}
572
573	#define AMDGPU_RAS_COUNTE_DELAY_MS 3000
574
575	static int amdgpu_ctx_query2(struct amdgpu_device *adev,
576	struct amdgpu_fpriv *fpriv, uint32_t id,
577	union drm_amdgpu_ctx_out *out)
578	{
579	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
580	struct amdgpu_ctx *ctx;
581	struct amdgpu_ctx_mgr *mgr;
582
583	if (!fpriv)
584	return -EINVAL;
585
586	mgr = &fpriv->ctx_mgr;
587	mutex_lock(&mgr->lock);
588	ctx = idr_find(&mgr->ctx_handles, id);
589	if (!ctx) {
590	mutex_unlock(lock: &mgr->lock);
591	return -EINVAL;
592	}
593
594	out->state.flags = `0x0`;
595	out->state.hangs = `0x0`;
596
597	if (ctx->reset_counter != atomic_read(v: &adev->gpu_reset_counter))
598	out->state.flags \|= AMDGPU_CTX_QUERY2_FLAGS_RESET;
599
600	if (ctx->generation != amdgpu_vm_generation(adev, vm: &fpriv->vm))
601	out->state.flags \|= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
602
603	if (atomic_read(v: &ctx->guilty))
604	out->state.flags \|= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
605
606	if (amdgpu_in_reset(adev))
607	out->state.flags \|= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS;
608
609	if (adev->ras_enabled && con) {
610	/ Return the cached values in O(1),*
611	* and schedule delayed work to cache
612	* new vaues.
613	*/
614	int ce_count, ue_count;
615
616	ce_count = atomic_read(v: &con->ras_ce_count);
617	ue_count = atomic_read(v: &con->ras_ue_count);
618
619	if (ce_count != ctx->ras_counter_ce) {
620	ctx->ras_counter_ce = ce_count;
621	out->state.flags \|= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
622	}
623
624	if (ue_count != ctx->ras_counter_ue) {
625	ctx->ras_counter_ue = ue_count;
626	out->state.flags \|= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
627	}
628
629	schedule_delayed_work(dwork: &con->ras_counte_delay_work,
630	delay: msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS));
631	}
632
633	mutex_unlock(lock: &mgr->lock);
634	return `0`;
635	}
636
637	static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev,
638	struct amdgpu_fpriv *fpriv, uint32_t id,
639	bool set, u32 *stable_pstate)
640	{
641	struct amdgpu_ctx *ctx;
642	struct amdgpu_ctx_mgr *mgr;
643	int r;
644
645	if (!fpriv)
646	return -EINVAL;
647
648	mgr = &fpriv->ctx_mgr;
649	mutex_lock(&mgr->lock);
650	ctx = idr_find(&mgr->ctx_handles, id);
651	if (!ctx) {
652	mutex_unlock(lock: &mgr->lock);
653	return -EINVAL;
654	}
655
656	if (set)
657	r = amdgpu_ctx_set_stable_pstate(ctx, stable_pstate: *stable_pstate);
658	else
659	r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate);
660
661	mutex_unlock(lock: &mgr->lock);
662	return r;
663	}
664
665	int amdgpu_ctx_ioctl(struct drm_device dev, void* *data,
666	struct drm_file *filp)
667	{
668	int r;
669	uint32_t id, stable_pstate;
670	int32_t priority;
671
672	union drm_amdgpu_ctx *args = data;
673	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
674	struct amdgpu_fpriv *fpriv = filp->driver_priv;
675
676	id = args->in.ctx_id;
677	priority = args->in.priority;
678
679	/ For backwards compatibility, we need to accept ioctls with garbage*
680	* in the priority field. Garbage values in the priority field, result
681	* in the priority being set to NORMAL.
682	*/
683	if (!amdgpu_ctx_priority_is_valid(ctx_prio: priority))
684	priority = AMDGPU_CTX_PRIORITY_NORMAL;
685
686	switch (args->in.op) {
687	case AMDGPU_CTX_OP_ALLOC_CTX:
688	r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, id: &id);
689	args->out.alloc.ctx_id = id;
690	break;
691	case AMDGPU_CTX_OP_FREE_CTX:
692	r = amdgpu_ctx_free(fpriv, id);
693	break;
694	case AMDGPU_CTX_OP_QUERY_STATE:
695	r = amdgpu_ctx_query(adev, fpriv, id, out: &args->out);
696	break;
697	case AMDGPU_CTX_OP_QUERY_STATE2:
698	r = amdgpu_ctx_query2(adev, fpriv, id, out: &args->out);
699	break;
700	case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
701	if (args->in.flags)
702	return -EINVAL;
703	r = amdgpu_ctx_stable_pstate(adev, fpriv, id, set: false, stable_pstate: &stable_pstate);
704	if (!r)
705	args->out.pstate.flags = stable_pstate;
706	break;
707	case AMDGPU_CTX_OP_SET_STABLE_PSTATE:
708	if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK)
709	return -EINVAL;
710	stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK;
711	if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK)
712	return -EINVAL;
713	r = amdgpu_ctx_stable_pstate(adev, fpriv, id, set: true, stable_pstate: &stable_pstate);
714	break;
715	default:
716	return -EINVAL;
717	}
718
719	return r;
720	}
721
722	struct amdgpu_ctx amdgpu_ctx_get(struct* amdgpu_fpriv *fpriv, uint32_t id)
723	{
724	struct amdgpu_ctx *ctx;
725	struct amdgpu_ctx_mgr *mgr;
726
727	if (!fpriv)
728	return NULL;
729
730	mgr = &fpriv->ctx_mgr;
731
732	mutex_lock(&mgr->lock);
733	ctx = idr_find(&mgr->ctx_handles, id);
734	if (ctx)
735	kref_get(kref: &ctx->refcount);
736	mutex_unlock(lock: &mgr->lock);
737	return ctx;
738	}
739
740	int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
741	{
742	if (ctx == NULL)
743	return -EINVAL;
744
745	kref_put(kref: &ctx->refcount, release: amdgpu_ctx_do_release);
746	return `0`;
747	}
748
749	uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
750	struct drm_sched_entity *entity,
751	struct dma_fence *fence)
752	{
753	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
754	uint64_t seq = centity->sequence;
755	struct dma_fence *other = NULL;
756	unsigned idx = `0`;
757
758	idx = seq & (amdgpu_sched_jobs - `1`);
759	other = centity->fences[idx];
760	WARN_ON(other && !dma_fence_is_signaled(other));
761
762	dma_fence_get(fence);
763
764	spin_lock(lock: &ctx->ring_lock);
765	centity->fences[idx] = fence;
766	centity->sequence++;
767	spin_unlock(lock: &ctx->ring_lock);
768
769	atomic64_add(i: ktime_to_ns(kt: amdgpu_ctx_fence_time(fence: other)),
770	v: &ctx->mgr->time_spend[centity->hw_ip]);
771
772	dma_fence_put(fence: other);
773	return seq;
774	}
775
776	struct dma_fence amdgpu_ctx_get_fence(struct* amdgpu_ctx *ctx,
777	struct drm_sched_entity *entity,
778	uint64_t seq)
779	{
780	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
781	struct dma_fence *fence;
782
783	spin_lock(lock: &ctx->ring_lock);
784
785	if (seq == ~`0ull`)
786	seq = centity->sequence - `1`;
787
788	if (seq >= centity->sequence) {
789	spin_unlock(lock: &ctx->ring_lock);
790	return ERR_PTR(error: -EINVAL);
791	}
792
793
794	if (seq + amdgpu_sched_jobs < centity->sequence) {
795	spin_unlock(lock: &ctx->ring_lock);
796	return NULL;
797	}
798
799	fence = dma_fence_get(fence: centity->fences[seq & (amdgpu_sched_jobs - `1`)]);
800	spin_unlock(lock: &ctx->ring_lock);
801
802	return fence;
803	}
804
805	static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
806	struct amdgpu_ctx_entity *aentity,
807	int hw_ip,
808	int32_t priority)
809	{
810	struct amdgpu_device *adev = ctx->mgr->adev;
811	unsigned int hw_prio;
812	struct drm_gpu_scheduler **scheds = NULL;
813	unsigned num_scheds;
814
815	/ set sw priority /
816	drm_sched_entity_set_priority(entity: &aentity->entity,
817	priority: amdgpu_ctx_to_drm_sched_prio(ctx_prio: priority));
818
819	/ set hw priority /
820	if (hw_ip == AMDGPU_HW_IP_COMPUTE \|\| hw_ip == AMDGPU_HW_IP_GFX) {
821	hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
822	hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
823	scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
824	num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
825	drm_sched_entity_modify_sched(entity: &aentity->entity, sched_list: scheds,
826	num_sched_list: num_scheds);
827	}
828	}
829
830	void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
831	int32_t priority)
832	{
833	int32_t ctx_prio;
834	unsigned i, j;
835
836	ctx->override_priority = priority;
837
838	ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
839	ctx->init_priority : ctx->override_priority;
840	for (i = `0`; i < AMDGPU_HW_IP_NUM; ++i) {
841	for (j = `0`; j < amdgpu_ctx_num_entities[i]; ++j) {
842	if (!ctx->entities[i][j])
843	continue;
844
845	amdgpu_ctx_set_entity_priority(ctx, aentity: ctx->entities[i][j],
846	hw_ip: i, priority: ctx_prio);
847	}
848	}
849	}
850
851	int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
852	struct drm_sched_entity *entity)
853	{
854	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
855	struct dma_fence *other;
856	unsigned idx;
857	long r;
858
859	spin_lock(lock: &ctx->ring_lock);
860	idx = centity->sequence & (amdgpu_sched_jobs - `1`);
861	other = dma_fence_get(fence: centity->fences[idx]);
862	spin_unlock(lock: &ctx->ring_lock);
863
864	if (!other)
865	return `0`;
866
867	r = dma_fence_wait(fence: other, intr: true);
868	if (r < `0` && r != -ERESTARTSYS)
869	DRM_ERROR("Error (%ld) waiting for fence!\n", r);
870
871	dma_fence_put(fence: other);
872	return r;
873	}
874
875	void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
876	struct amdgpu_device *adev)
877	{
878	unsigned int i;
879
880	mgr->adev = adev;
881	mutex_init(&mgr->lock);
882	idr_init_base(idr: &mgr->ctx_handles, base: `1`);
883
884	for (i = `0`; i < AMDGPU_HW_IP_NUM; ++i)
885	atomic64_set(v: &mgr->time_spend[i], i: `0`);
886	}
887
888	long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr mgr, long* timeout)
889	{
890	struct amdgpu_ctx *ctx;
891	struct idr *idp;
892	uint32_t id, i, j;
893
894	idp = &mgr->ctx_handles;
895
896	mutex_lock(&mgr->lock);
897	idr_for_each_entry(idp, ctx, id) {
898	for (i = `0`; i < AMDGPU_HW_IP_NUM; ++i) {
899	for (j = `0`; j < amdgpu_ctx_num_entities[i]; ++j) {
900	struct drm_sched_entity *entity;
901
902	if (!ctx->entities[i][j])
903	continue;
904
905	entity = &ctx->entities[i][j]->entity;
906	timeout = drm_sched_entity_flush(entity, timeout);
907	}
908	}
909	}
910	mutex_unlock(lock: &mgr->lock);
911	return timeout;
912	}
913
914	void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
915	{
916	struct amdgpu_ctx *ctx;
917	struct idr *idp;
918	uint32_t id, i, j;
919
920	idp = &mgr->ctx_handles;
921
922	idr_for_each_entry(idp, ctx, id) {
923	if (kref_read(kref: &ctx->refcount) != `1`) {
924	DRM_ERROR("ctx %p is still alive\n", ctx);
925	continue;
926	}
927
928	for (i = `0`; i < AMDGPU_HW_IP_NUM; ++i) {
929	for (j = `0`; j < amdgpu_ctx_num_entities[i]; ++j) {
930	struct drm_sched_entity *entity;
931
932	if (!ctx->entities[i][j])
933	continue;
934
935	entity = &ctx->entities[i][j]->entity;
936	drm_sched_entity_fini(entity);
937	}
938	}
939	}
940	}
941
942	void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
943	{
944	struct amdgpu_ctx *ctx;
945	struct idr *idp;
946	uint32_t id;
947
948	amdgpu_ctx_mgr_entity_fini(mgr);
949
950	idp = &mgr->ctx_handles;
951
952	idr_for_each_entry(idp, ctx, id) {
953	if (kref_put(kref: &ctx->refcount, release: amdgpu_ctx_fini) != `1`)
954	DRM_ERROR("ctx %p is still alive\n", ctx);
955	}
956
957	idr_destroy(&mgr->ctx_handles);
958	mutex_destroy(lock: &mgr->lock);
959	}
960
961	void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
962	ktime_t usage[AMDGPU_HW_IP_NUM])
963	{
964	struct amdgpu_ctx *ctx;
965	unsigned int hw_ip, i;
966	uint32_t id;
967
968	/*
969	* This is a little bit racy because it can be that a ctx or a fence are
970	* destroyed just in the moment we try to account them. But that is ok
971	* since exactly that case is explicitely allowed by the interface.
972	*/
973	mutex_lock(&mgr->lock);
974	for (hw_ip = `0`; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
975	uint64_t ns = atomic64_read(v: &mgr->time_spend[hw_ip]);
976
977	usage[hw_ip] = ns_to_ktime(ns);
978	}
979
980	idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
981	for (hw_ip = `0`; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
982	for (i = `0`; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
983	struct amdgpu_ctx_entity *centity;
984	ktime_t spend;
985
986	centity = ctx->entities[hw_ip][i];
987	if (!centity)
988	continue;
989	spend = amdgpu_ctx_entity_time(ctx, centity);
990	usage[hw_ip] = ktime_add(usage[hw_ip], spend);
991	}
992	}
993	}
994	mutex_unlock(lock: &mgr->lock);
995	}
996

source code of linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c