kfd_events.c source code [linux/drivers/gpu/drm/amd/amdkfd/kfd_events.c]

1	// SPDX-License-Identifier: GPL-2.0 OR MIT
2	/*
3	* Copyright 2014-2022 Advanced Micro Devices, Inc.
4	*
5	* Permission is hereby granted, free of charge, to any person obtaining a
6	* copy of this software and associated documentation files (the "Software"),
7	* to deal in the Software without restriction, including without limitation
8	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9	* and/or sell copies of the Software, and to permit persons to whom the
10	* Software is furnished to do so, subject to the following conditions:
11	*
12	* The above copyright notice and this permission notice shall be included in
13	* all copies or substantial portions of the Software.
14	*
15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21	* OTHER DEALINGS IN THE SOFTWARE.
22	*/
23
24	#include <linux/mm_types.h>
25	#include <linux/slab.h>
26	#include <linux/types.h>
27	#include <linux/sched/signal.h>
28	#include <linux/sched/mm.h>
29	#include <linux/uaccess.h>
30	#include <linux/mman.h>
31	#include <linux/memory.h>
32	#include "kfd_priv.h"
33	#include "kfd_events.h"
34	#include <linux/device.h>
35
36	/*
37	* Wrapper around wait_queue_entry_t
38	*/
39	struct kfd_event_waiter {
40	wait_queue_entry_t wait;
41	struct kfd_event event; /* Event to wait for /
42	bool activated; / Becomes true when event is signaled /
43	bool event_age_enabled; / set to true when last_event_age is non-zero /
44	};
45
46	/*
47	* Each signal event needs a 64-bit signal slot where the signaler will write
48	* a 1 before sending an interrupt. (This is needed because some interrupts
49	* do not contain enough spare data bits to identify an event.)
50	* We get whole pages and map them to the process VA.
51	* Individual signal events use their event_id as slot index.
52	*/
53	struct kfd_signal_page {
54	uint64_t *kernel_address;
55	uint64_t __user *user_address;
56	bool need_to_free_pages;
57	};
58
59	static uint64_t page_slots(struct* kfd_signal_page *page)
60	{
61	return page->kernel_address;
62	}
63
64	static struct kfd_signal_page allocate_signal_page(struct* kfd_process *p)
65	{
66	void *backing_store;
67	struct kfd_signal_page *page;
68
69	page = kzalloc(size: sizeof(*page), GFP_KERNEL);
70	if (!page)
71	return NULL;
72
73	backing_store = (void *) __get_free_pages(GFP_KERNEL,
74	order: get_order(KFD_SIGNAL_EVENT_LIMIT * `8`));
75	if (!backing_store)
76	goto fail_alloc_signal_store;
77
78	/ Initialize all events to unsignaled /
79	memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT,
80	KFD_SIGNAL_EVENT_LIMIT * `8`);
81
82	page->kernel_address = backing_store;
83	page->need_to_free_pages = true;
84	pr_debug("Allocated new event signal page at %p, for process %p\n",
85	page, p);
86
87	return page;
88
89	fail_alloc_signal_store:
90	kfree(objp: page);
91	return NULL;
92	}
93
94	static int allocate_event_notification_slot(struct kfd_process *p,
95	struct kfd_event *ev,
96	const int *restore_id)
97	{
98	int id;
99
100	if (!p->signal_page) {
101	p->signal_page = allocate_signal_page(p);
102	if (!p->signal_page)
103	return -ENOMEM;
104	/ Oldest user mode expects 256 event slots /
105	p->signal_mapped_size = `256`*`8`;
106	}
107
108	if (restore_id) {
109	id = idr_alloc(&p->event_idr, ptr: ev, start: restore_id, end: restore_id + `1`,
110	GFP_KERNEL);
111	} else {
112	/*
113	* Compatibility with old user mode: Only use signal slots
114	* user mode has mapped, may be less than
115	* KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
116	* of the event limit without breaking user mode.
117	*/
118	id = idr_alloc(&p->event_idr, ptr: ev, start: `0`, end: p->signal_mapped_size / `8`,
119	GFP_KERNEL);
120	}
121	if (id < `0`)
122	return id;
123
124	ev->event_id = id;
125	page_slots(page: p->signal_page)[id] = UNSIGNALED_EVENT_SLOT;
126
127	return `0`;
128	}
129
130	/*
131	* Assumes that p->event_mutex or rcu_readlock is held and of course that p is
132	* not going away.
133	*/
134	static struct kfd_event lookup_event_by_id(struct* kfd_process *p, uint32_t id)
135	{
136	return idr_find(&p->event_idr, id);
137	}
138
139	/**
140	* lookup_signaled_event_by_partial_id - Lookup signaled event from partial ID
141	* @p: Pointer to struct kfd_process
142	* @id: ID to look up
143	* @bits: Number of valid bits in @id
144	*
145	* Finds the first signaled event with a matching partial ID. If no
146	* matching signaled event is found, returns NULL. In that case the
147	* caller should assume that the partial ID is invalid and do an
148	* exhaustive search of all siglaned events.
149	*
150	* If multiple events with the same partial ID signal at the same
151	* time, they will be found one interrupt at a time, not necessarily
152	* in the same order the interrupts occurred. As long as the number of
153	* interrupts is correct, all signaled events will be seen by the
154	* driver.
155	*/
156	static struct kfd_event *lookup_signaled_event_by_partial_id(
157	struct kfd_process *p, uint32_t id, uint32_t bits)
158	{
159	struct kfd_event *ev;
160
161	if (!p->signal_page \|\| id >= KFD_SIGNAL_EVENT_LIMIT)
162	return NULL;
163
164	/ Fast path for the common case that @id is not a partial ID*
165	* and we only need a single lookup.
166	*/
167	if (bits > `31` \|\| (`1U` << bits) >= KFD_SIGNAL_EVENT_LIMIT) {
168	if (page_slots(page: p->signal_page)[id] == UNSIGNALED_EVENT_SLOT)
169	return NULL;
170
171	return idr_find(&p->event_idr, id);
172	}
173
174	/ General case for partial IDs: Iterate over all matching IDs*
175	* and find the first one that has signaled.
176	*/
177	for (ev = NULL; id < KFD_SIGNAL_EVENT_LIMIT && !ev; id += `1U` << bits) {
178	if (page_slots(page: p->signal_page)[id] == UNSIGNALED_EVENT_SLOT)
179	continue;
180
181	ev = idr_find(&p->event_idr, id);
182	}
183
184	return ev;
185	}
186
187	static int create_signal_event(struct file devkfd, struct* kfd_process *p,
188	struct kfd_event ev, const* int *restore_id)
189	{
190	int ret;
191
192	if (p->signal_mapped_size &&
193	p->signal_event_count == p->signal_mapped_size / `8`) {
194	if (!p->signal_event_limit_reached) {
195	pr_debug("Signal event wasn't created because limit was reached\n");
196	p->signal_event_limit_reached = true;
197	}
198	return -ENOSPC;
199	}
200
201	ret = allocate_event_notification_slot(p, ev, restore_id);
202	if (ret) {
203	pr_warn("Signal event wasn't created because out of kernel memory\n");
204	return ret;
205	}
206
207	p->signal_event_count++;
208
209	ev->user_signal_address = &p->signal_page->user_address[ev->event_id];
210	pr_debug("Signal event number %zu created with id %d, address %p\n",
211	p->signal_event_count, ev->event_id,
212	ev->user_signal_address);
213
214	return `0`;
215	}
216
217	static int create_other_event(struct kfd_process p, struct* kfd_event ev, const* int *restore_id)
218	{
219	int id;
220
221	if (restore_id)
222	id = idr_alloc(&p->event_idr, ptr: ev, start: restore_id, end: restore_id + `1`,
223	GFP_KERNEL);
224	else
225	/ Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an*
226	* intentional integer overflow to -1 without a compiler
227	* warning. idr_alloc treats a negative value as "maximum
228	* signed integer".
229	*/
230	id = idr_alloc(&p->event_idr, ptr: ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
231	end: (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + `1`,
232	GFP_KERNEL);
233
234	if (id < `0`)
235	return id;
236	ev->event_id = id;
237
238	return `0`;
239	}
240
241	int kfd_event_init_process(struct kfd_process *p)
242	{
243	int id;
244
245	mutex_init(&p->event_mutex);
246	idr_init(idr: &p->event_idr);
247	p->signal_page = NULL;
248	p->signal_event_count = `1`;
249	/ Allocate event ID 0. It is used for a fast path to ignore bogus events*
250	* that are sent by the CP without a context ID
251	*/
252	id = idr_alloc(&p->event_idr, NULL, start: `0`, end: `1`, GFP_KERNEL);
253	if (id < `0`) {
254	idr_destroy(&p->event_idr);
255	mutex_destroy(lock: &p->event_mutex);
256	return id;
257	}
258	return `0`;
259	}
260
261	static void destroy_event(struct kfd_process p, struct* kfd_event *ev)
262	{
263	struct kfd_event_waiter *waiter;
264
265	/ Wake up pending waiters. They will return failure /
266	spin_lock(lock: &ev->lock);
267	list_for_each_entry(waiter, &ev->wq.head, wait.entry)
268	WRITE_ONCE(waiter->event, NULL);
269	wake_up_all(&ev->wq);
270	spin_unlock(lock: &ev->lock);
271
272	if (ev->type == KFD_EVENT_TYPE_SIGNAL \|\|
273	ev->type == KFD_EVENT_TYPE_DEBUG)
274	p->signal_event_count--;
275
276	idr_remove(&p->event_idr, id: ev->event_id);
277	kfree_rcu(ev, rcu);
278	}
279
280	static void destroy_events(struct kfd_process *p)
281	{
282	struct kfd_event *ev;
283	uint32_t id;
284
285	idr_for_each_entry(&p->event_idr, ev, id)
286	if (ev)
287	destroy_event(p, ev);
288	idr_destroy(&p->event_idr);
289	mutex_destroy(lock: &p->event_mutex);
290	}
291
292	/*
293	* We assume that the process is being destroyed and there is no need to
294	* unmap the pages or keep bookkeeping data in order.
295	*/
296	static void shutdown_signal_page(struct kfd_process *p)
297	{
298	struct kfd_signal_page *page = p->signal_page;
299
300	if (page) {
301	if (page->need_to_free_pages)
302	free_pages(addr: (unsigned long)page->kernel_address,
303	order: get_order(KFD_SIGNAL_EVENT_LIMIT * `8`));
304	kfree(objp: page);
305	}
306	}
307
308	void kfd_event_free_process(struct kfd_process *p)
309	{
310	destroy_events(p);
311	shutdown_signal_page(p);
312	}
313
314	static bool event_can_be_gpu_signaled(const struct kfd_event *ev)
315	{
316	return ev->type == KFD_EVENT_TYPE_SIGNAL \|\|
317	ev->type == KFD_EVENT_TYPE_DEBUG;
318	}
319
320	static bool event_can_be_cpu_signaled(const struct kfd_event *ev)
321	{
322	return ev->type == KFD_EVENT_TYPE_SIGNAL;
323	}
324
325	static int kfd_event_page_set(struct kfd_process p, void* *kernel_address,
326	uint64_t size, uint64_t user_handle)
327	{
328	struct kfd_signal_page *page;
329
330	if (p->signal_page)
331	return -EBUSY;
332
333	page = kzalloc(size: sizeof(*page), GFP_KERNEL);
334	if (!page)
335	return -ENOMEM;
336
337	/ Initialize all events to unsignaled /
338	memset(kernel_address, (uint8_t) UNSIGNALED_EVENT_SLOT,
339	KFD_SIGNAL_EVENT_LIMIT * `8`);
340
341	page->kernel_address = kernel_address;
342
343	p->signal_page = page;
344	p->signal_mapped_size = size;
345	p->signal_handle = user_handle;
346	return `0`;
347	}
348
349	int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
350	{
351	struct kfd_node *kfd;
352	struct kfd_process_device *pdd;
353	void mem, kern_addr;
354	uint64_t size;
355	int err = `0`;
356
357	if (p->signal_page) {
358	pr_err("Event page is already set\n");
359	return -EINVAL;
360	}
361
362	pdd = kfd_process_device_data_by_id(process: p, GET_GPU_ID(event_page_offset));
363	if (!pdd) {
364	pr_err("Getting device by id failed in %s\n", __func__);
365	return -EINVAL;
366	}
367	kfd = pdd->dev;
368
369	pdd = kfd_bind_process_to_device(dev: kfd, p);
370	if (IS_ERR(ptr: pdd))
371	return PTR_ERR(ptr: pdd);
372
373	mem = kfd_process_device_translate_handle(p: pdd,
374	GET_IDR_HANDLE(event_page_offset));
375	if (!mem) {
376	pr_err("Can't find BO, offset is 0x%llx\n", event_page_offset);
377	return -EINVAL;
378	}
379
380	err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(mem, kptr: &kern_addr, size: &size);
381	if (err) {
382	pr_err("Failed to map event page to kernel\n");
383	return err;
384	}
385
386	err = kfd_event_page_set(p, kernel_address: kern_addr, size, user_handle: event_page_offset);
387	if (err) {
388	pr_err("Failed to set event page\n");
389	amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
390	return err;
391	}
392	return err;
393	}
394
395	int kfd_event_create(struct file devkfd, struct* kfd_process *p,
396	uint32_t event_type, bool auto_reset, uint32_t node_id,
397	uint32_t event_id, uint32_t event_trigger_data,
398	uint64_t event_page_offset, uint32_t event_slot_index)
399	{
400	int ret = `0`;
401	struct kfd_event ev = kzalloc(size: sizeof(ev), GFP_KERNEL);
402
403	if (!ev)
404	return -ENOMEM;
405
406	ev->type = event_type;
407	ev->auto_reset = auto_reset;
408	ev->signaled = false;
409
410	spin_lock_init(&ev->lock);
411	init_waitqueue_head(&ev->wq);
412
413	*event_page_offset = `0`;
414
415	mutex_lock(&p->event_mutex);
416
417	switch (event_type) {
418	case KFD_EVENT_TYPE_SIGNAL:
419	case KFD_EVENT_TYPE_DEBUG:
420	ret = create_signal_event(devkfd, p, ev, NULL);
421	if (!ret) {
422	*event_page_offset = KFD_MMAP_TYPE_EVENTS;
423	*event_slot_index = ev->event_id;
424	}
425	break;
426	default:
427	ret = create_other_event(p, ev, NULL);
428	break;
429	}
430
431	if (!ret) {
432	*event_id = ev->event_id;
433	*event_trigger_data = ev->event_id;
434	ev->event_age = `1`;
435	} else {
436	kfree(objp: ev);
437	}
438
439	mutex_unlock(lock: &p->event_mutex);
440
441	return ret;
442	}
443
444	int kfd_criu_restore_event(struct file *devkfd,
445	struct kfd_process *p,
446	uint8_t __user *user_priv_ptr,
447	uint64_t *priv_data_offset,
448	uint64_t max_priv_data_size)
449	{
450	struct kfd_criu_event_priv_data *ev_priv;
451	struct kfd_event *ev = NULL;
452	int ret = `0`;
453
454	ev_priv = kmalloc(size: sizeof(*ev_priv), GFP_KERNEL);
455	if (!ev_priv)
456	return -ENOMEM;
457
458	ev = kzalloc(size: sizeof(*ev), GFP_KERNEL);
459	if (!ev) {
460	ret = -ENOMEM;
461	goto exit;
462	}
463
464	if (priv_data_offset + sizeof(ev_priv) > max_priv_data_size) {
465	ret = -EINVAL;
466	goto exit;
467	}
468
469	ret = copy_from_user(to: ev_priv, from: user_priv_ptr + priv_data_offset, n: sizeof(ev_priv));
470	if (ret) {
471	ret = -EFAULT;
472	goto exit;
473	}
474	priv_data_offset += sizeof(ev_priv);
475
476	if (ev_priv->user_handle) {
477	ret = kfd_kmap_event_page(p, event_page_offset: ev_priv->user_handle);
478	if (ret)
479	goto exit;
480	}
481
482	ev->type = ev_priv->type;
483	ev->auto_reset = ev_priv->auto_reset;
484	ev->signaled = ev_priv->signaled;
485
486	spin_lock_init(&ev->lock);
487	init_waitqueue_head(&ev->wq);
488
489	mutex_lock(&p->event_mutex);
490	switch (ev->type) {
491	case KFD_EVENT_TYPE_SIGNAL:
492	case KFD_EVENT_TYPE_DEBUG:
493	ret = create_signal_event(devkfd, p, ev, restore_id: &ev_priv->event_id);
494	break;
495	case KFD_EVENT_TYPE_MEMORY:
496	memcpy(&ev->memory_exception_data,
497	&ev_priv->memory_exception_data,
498	sizeof(struct kfd_hsa_memory_exception_data));
499
500	ret = create_other_event(p, ev, restore_id: &ev_priv->event_id);
501	break;
502	case KFD_EVENT_TYPE_HW_EXCEPTION:
503	memcpy(&ev->hw_exception_data,
504	&ev_priv->hw_exception_data,
505	sizeof(struct kfd_hsa_hw_exception_data));
506
507	ret = create_other_event(p, ev, restore_id: &ev_priv->event_id);
508	break;
509	}
510	mutex_unlock(lock: &p->event_mutex);
511
512	exit:
513	if (ret)
514	kfree(objp: ev);
515
516	kfree(objp: ev_priv);
517
518	return ret;
519	}
520
521	int kfd_criu_checkpoint_events(struct kfd_process *p,
522	uint8_t __user *user_priv_data,
523	uint64_t *priv_data_offset)
524	{
525	struct kfd_criu_event_priv_data *ev_privs;
526	int i = `0`;
527	int ret = `0`;
528	struct kfd_event *ev;
529	uint32_t ev_id;
530
531	uint32_t num_events = kfd_get_num_events(p);
532
533	if (!num_events)
534	return `0`;
535
536	ev_privs = kvzalloc(size: num_events * sizeof(*ev_privs), GFP_KERNEL);
537	if (!ev_privs)
538	return -ENOMEM;
539
540
541	idr_for_each_entry(&p->event_idr, ev, ev_id) {
542	struct kfd_criu_event_priv_data *ev_priv;
543
544	/*
545	* Currently, all events have same size of private_data, but the current ioctl's
546	* and CRIU plugin supports private_data of variable sizes
547	*/
548	ev_priv = &ev_privs[i];
549
550	ev_priv->object_type = KFD_CRIU_OBJECT_TYPE_EVENT;
551
552	/ We store the user_handle with the first event /
553	if (i == `0` && p->signal_page)
554	ev_priv->user_handle = p->signal_handle;
555
556	ev_priv->event_id = ev->event_id;
557	ev_priv->auto_reset = ev->auto_reset;
558	ev_priv->type = ev->type;
559	ev_priv->signaled = ev->signaled;
560
561	if (ev_priv->type == KFD_EVENT_TYPE_MEMORY)
562	memcpy(&ev_priv->memory_exception_data,
563	&ev->memory_exception_data,
564	sizeof(struct kfd_hsa_memory_exception_data));
565	else if (ev_priv->type == KFD_EVENT_TYPE_HW_EXCEPTION)
566	memcpy(&ev_priv->hw_exception_data,
567	&ev->hw_exception_data,
568	sizeof(struct kfd_hsa_hw_exception_data));
569
570	pr_debug("Checkpointed event[%d] id = 0x%08x auto_reset = %x type = %x signaled = %x\n",
571	i,
572	ev_priv->event_id,
573	ev_priv->auto_reset,
574	ev_priv->type,
575	ev_priv->signaled);
576	i++;
577	}
578
579	ret = copy_to_user(to: user_priv_data + *priv_data_offset,
580	from: ev_privs, n: num_events * sizeof(*ev_privs));
581	if (ret) {
582	pr_err("Failed to copy events priv to user\n");
583	ret = -EFAULT;
584	}
585
586	priv_data_offset += num_events sizeof(*ev_privs);
587
588	kvfree(addr: ev_privs);
589	return ret;
590	}
591
592	int kfd_get_num_events(struct kfd_process *p)
593	{
594	struct kfd_event *ev;
595	uint32_t id;
596	u32 num_events = `0`;
597
598	idr_for_each_entry(&p->event_idr, ev, id)
599	num_events++;
600
601	return num_events;
602	}
603
604	/ Assumes that p is current. /
605	int kfd_event_destroy(struct kfd_process *p, uint32_t event_id)
606	{
607	struct kfd_event *ev;
608	int ret = `0`;
609
610	mutex_lock(&p->event_mutex);
611
612	ev = lookup_event_by_id(p, id: event_id);
613
614	if (ev)
615	destroy_event(p, ev);
616	else
617	ret = -EINVAL;
618
619	mutex_unlock(lock: &p->event_mutex);
620	return ret;
621	}
622
623	static void set_event(struct kfd_event *ev)
624	{
625	struct kfd_event_waiter *waiter;
626
627	/ Auto reset if the list is non-empty and we're waking*
628	* someone. waitqueue_active is safe here because we're
629	* protected by the ev->lock, which is also held when
630	* updating the wait queues in kfd_wait_on_events.
631	*/
632	ev->signaled = !ev->auto_reset \|\| !waitqueue_active(wq_head: &ev->wq);
633	if (!(++ev->event_age)) {
634	/ Never wrap back to reserved/default event age 0/1 /
635	ev->event_age = `2`;
636	WARN_ONCE(`1`, "event_age wrap back!");
637	}
638
639	list_for_each_entry(waiter, &ev->wq.head, wait.entry)
640	WRITE_ONCE(waiter->activated, true);
641
642	wake_up_all(&ev->wq);
643	}
644
645	/ Assumes that p is current. /
646	int kfd_set_event(struct kfd_process *p, uint32_t event_id)
647	{
648	int ret = `0`;
649	struct kfd_event *ev;
650
651	rcu_read_lock();
652
653	ev = lookup_event_by_id(p, id: event_id);
654	if (!ev) {
655	ret = -EINVAL;
656	goto unlock_rcu;
657	}
658	spin_lock(lock: &ev->lock);
659
660	if (event_can_be_cpu_signaled(ev))
661	set_event(ev);
662	else
663	ret = -EINVAL;
664
665	spin_unlock(lock: &ev->lock);
666	unlock_rcu:
667	rcu_read_unlock();
668	return ret;
669	}
670
671	static void reset_event(struct kfd_event *ev)
672	{
673	ev->signaled = false;
674	}
675
676	/ Assumes that p is current. /
677	int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
678	{
679	int ret = `0`;
680	struct kfd_event *ev;
681
682	rcu_read_lock();
683
684	ev = lookup_event_by_id(p, id: event_id);
685	if (!ev) {
686	ret = -EINVAL;
687	goto unlock_rcu;
688	}
689	spin_lock(lock: &ev->lock);
690
691	if (event_can_be_cpu_signaled(ev))
692	reset_event(ev);
693	else
694	ret = -EINVAL;
695
696	spin_unlock(lock: &ev->lock);
697	unlock_rcu:
698	rcu_read_unlock();
699	return ret;
700
701	}
702
703	static void acknowledge_signal(struct kfd_process p, struct* kfd_event *ev)
704	{
705	WRITE_ONCE(page_slots(p->signal_page)[ev->event_id], UNSIGNALED_EVENT_SLOT);
706	}
707
708	static void set_event_from_interrupt(struct kfd_process *p,
709	struct kfd_event *ev)
710	{
711	if (ev && event_can_be_gpu_signaled(ev)) {
712	acknowledge_signal(p, ev);
713	spin_lock(lock: &ev->lock);
714	set_event(ev);
715	spin_unlock(lock: &ev->lock);
716	}
717	}
718
719	void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
720	uint32_t valid_id_bits)
721	{
722	struct kfd_event *ev = NULL;
723
724	/*
725	* Because we are called from arbitrary context (workqueue) as opposed
726	* to process context, kfd_process could attempt to exit while we are
727	* running so the lookup function increments the process ref count.
728	*/
729	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
730
731	if (!p)
732	return; / Presumably process exited. /
733
734	rcu_read_lock();
735
736	if (valid_id_bits)
737	ev = lookup_signaled_event_by_partial_id(p, id: partial_id,
738	bits: valid_id_bits);
739	if (ev) {
740	set_event_from_interrupt(p, ev);
741	} else if (p->signal_page) {
742	/*
743	* Partial ID lookup failed. Assume that the event ID
744	* in the interrupt payload was invalid and do an
745	* exhaustive search of signaled events.
746	*/
747	uint64_t *slots = page_slots(page: p->signal_page);
748	uint32_t id;
749
750	if (valid_id_bits)
751	pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n",
752	partial_id, valid_id_bits);
753
754	if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT / `64`) {
755	/ With relatively few events, it's faster to*
756	* iterate over the event IDR
757	*/
758	idr_for_each_entry(&p->event_idr, ev, id) {
759	if (id >= KFD_SIGNAL_EVENT_LIMIT)
760	break;
761
762	if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT)
763	set_event_from_interrupt(p, ev);
764	}
765	} else {
766	/ With relatively many events, it's faster to*
767	* iterate over the signal slots and lookup
768	* only signaled events from the IDR.
769	*/
770	for (id = `1`; id < KFD_SIGNAL_EVENT_LIMIT; id++)
771	if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) {
772	ev = lookup_event_by_id(p, id);
773	set_event_from_interrupt(p, ev);
774	}
775	}
776	}
777
778	rcu_read_unlock();
779	kfd_unref_process(p);
780	}
781
782	static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
783	{
784	struct kfd_event_waiter *event_waiters;
785	uint32_t i;
786
787	event_waiters = kcalloc(n: num_events, size: sizeof(struct kfd_event_waiter),
788	GFP_KERNEL);
789	if (!event_waiters)
790	return NULL;
791
792	for (i = `0`; i < num_events; i++)
793	init_wait(&event_waiters[i].wait);
794
795	return event_waiters;
796	}
797
798	static int init_event_waiter(struct kfd_process *p,
799	struct kfd_event_waiter *waiter,
800	struct kfd_event_data *event_data)
801	{
802	struct kfd_event *ev = lookup_event_by_id(p, id: event_data->event_id);
803
804	if (!ev)
805	return -EINVAL;
806
807	spin_lock(lock: &ev->lock);
808	waiter->event = ev;
809	waiter->activated = ev->signaled;
810	ev->signaled = ev->signaled && !ev->auto_reset;
811
812	/ last_event_age = 0 reserved for backward compatible /
813	if (waiter->event->type == KFD_EVENT_TYPE_SIGNAL &&
814	event_data->signal_event_data.last_event_age) {
815	waiter->event_age_enabled = true;
816	if (ev->event_age != event_data->signal_event_data.last_event_age)
817	waiter->activated = true;
818	}
819
820	if (!waiter->activated)
821	add_wait_queue(wq_head: &ev->wq, wq_entry: &waiter->wait);
822	spin_unlock(lock: &ev->lock);
823
824	return `0`;
825	}
826
827	/ test_event_condition - Test condition of events being waited for*
828	* @all: Return completion only if all events have signaled
829	* @num_events: Number of events to wait for
830	* @event_waiters: Array of event waiters, one per event
831	*
832	* Returns KFD_IOC_WAIT_RESULT_COMPLETE if all (or one) event(s) have
833	* signaled. Returns KFD_IOC_WAIT_RESULT_TIMEOUT if no (or not all)
834	* events have signaled. Returns KFD_IOC_WAIT_RESULT_FAIL if any of
835	* the events have been destroyed.
836	*/
837	static uint32_t test_event_condition(bool all, uint32_t num_events,
838	struct kfd_event_waiter *event_waiters)
839	{
840	uint32_t i;
841	uint32_t activated_count = `0`;
842
843	for (i = `0`; i < num_events; i++) {
844	if (!READ_ONCE(event_waiters[i].event))
845	return KFD_IOC_WAIT_RESULT_FAIL;
846
847	if (READ_ONCE(event_waiters[i].activated)) {
848	if (!all)
849	return KFD_IOC_WAIT_RESULT_COMPLETE;
850
851	activated_count++;
852	}
853	}
854
855	return activated_count == num_events ?
856	KFD_IOC_WAIT_RESULT_COMPLETE : KFD_IOC_WAIT_RESULT_TIMEOUT;
857	}
858
859	/*
860	* Copy event specific data, if defined.
861	* Currently only memory exception events have additional data to copy to user
862	*/
863	static int copy_signaled_event_data(uint32_t num_events,
864	struct kfd_event_waiter *event_waiters,
865	struct kfd_event_data __user *data)
866	{
867	void *src;
868	void __user *dst;
869	struct kfd_event_waiter *waiter;
870	struct kfd_event *event;
871	uint32_t i, size = `0`;
872
873	for (i = `0`; i < num_events; i++) {
874	waiter = &event_waiters[i];
875	event = waiter->event;
876	if (!event)
877	return -EINVAL; / event was destroyed /
878	if (waiter->activated) {
879	if (event->type == KFD_EVENT_TYPE_MEMORY) {
880	dst = &data[i].memory_exception_data;
881	src = &event->memory_exception_data;
882	size = sizeof(struct kfd_hsa_memory_exception_data);
883	} else if (event->type == KFD_EVENT_TYPE_SIGNAL &&
884	waiter->event_age_enabled) {
885	dst = &data[i].signal_event_data.last_event_age;
886	src = &event->event_age;
887	size = sizeof(u64);
888	}
889	if (size && copy_to_user(to: dst, from: src, n: size))
890	return -EFAULT;
891	}
892	}
893
894	return `0`;
895	}
896
897	static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
898	{
899	if (user_timeout_ms == KFD_EVENT_TIMEOUT_IMMEDIATE)
900	return `0`;
901
902	if (user_timeout_ms == KFD_EVENT_TIMEOUT_INFINITE)
903	return MAX_SCHEDULE_TIMEOUT;
904
905	/*
906	* msecs_to_jiffies interprets all values above 2^31-1 as infinite,
907	* but we consider them finite.
908	* This hack is wrong, but nobody is likely to notice.
909	*/
910	user_timeout_ms = min_t(uint32_t, user_timeout_ms, `0x7FFFFFFF`);
911
912	return msecs_to_jiffies(m: user_timeout_ms) + `1`;
913	}
914
915	static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters,
916	bool undo_auto_reset)
917	{
918	uint32_t i;
919
920	for (i = `0`; i < num_events; i++)
921	if (waiters[i].event) {
922	spin_lock(lock: &waiters[i].event->lock);
923	remove_wait_queue(wq_head: &waiters[i].event->wq,
924	wq_entry: &waiters[i].wait);
925	if (undo_auto_reset && waiters[i].activated &&
926	waiters[i].event && waiters[i].event->auto_reset)
927	set_event(waiters[i].event);
928	spin_unlock(lock: &waiters[i].event->lock);
929	}
930
931	kfree(objp: waiters);
932	}
933
934	int kfd_wait_on_events(struct kfd_process *p,
935	uint32_t num_events, void __user *data,
936	bool all, uint32_t *user_timeout_ms,
937	uint32_t *wait_result)
938	{
939	struct kfd_event_data __user *events =
940	(struct kfd_event_data __user *) data;
941	uint32_t i;
942	int ret = `0`;
943
944	struct kfd_event_waiter *event_waiters = NULL;
945	long timeout = user_timeout_to_jiffies(user_timeout_ms: *user_timeout_ms);
946
947	event_waiters = alloc_event_waiters(num_events);
948	if (!event_waiters) {
949	ret = -ENOMEM;
950	goto out;
951	}
952
953	/ Use p->event_mutex here to protect against concurrent creation and*
954	* destruction of events while we initialize event_waiters.
955	*/
956	mutex_lock(&p->event_mutex);
957
958	for (i = `0`; i < num_events; i++) {
959	struct kfd_event_data event_data;
960
961	if (copy_from_user(to: &event_data, from: &events[i],
962	n: sizeof(struct kfd_event_data))) {
963	ret = -EFAULT;
964	goto out_unlock;
965	}
966
967	ret = init_event_waiter(p, waiter: &event_waiters[i], event_data: &event_data);
968	if (ret)
969	goto out_unlock;
970	}
971
972	/ Check condition once. /
973	*wait_result = test_event_condition(all, num_events, event_waiters);
974	if (*wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) {
975	ret = copy_signaled_event_data(num_events,
976	event_waiters, data: events);
977	goto out_unlock;
978	} else if (WARN_ON(*wait_result == KFD_IOC_WAIT_RESULT_FAIL)) {
979	/ This should not happen. Events shouldn't be*
980	* destroyed while we're holding the event_mutex
981	*/
982	goto out_unlock;
983	}
984
985	mutex_unlock(lock: &p->event_mutex);
986
987	while (true) {
988	if (fatal_signal_pending(current)) {
989	ret = -EINTR;
990	break;
991	}
992
993	if (signal_pending(current)) {
994	ret = -ERESTARTSYS;
995	if (*user_timeout_ms != KFD_EVENT_TIMEOUT_IMMEDIATE &&
996	*user_timeout_ms != KFD_EVENT_TIMEOUT_INFINITE)
997	*user_timeout_ms = jiffies_to_msecs(
998	max(`0l`, timeout-`1`));
999	break;
1000	}
1001
1002	/ Set task state to interruptible sleep before*
1003	* checking wake-up conditions. A concurrent wake-up
1004	* will put the task back into runnable state. In that
1005	* case schedule_timeout will not put the task to
1006	* sleep and we'll get a chance to re-check the
1007	* updated conditions almost immediately. Otherwise,
1008	* this race condition would lead to a soft hang or a
1009	* very long sleep.
1010	*/
1011	set_current_state(TASK_INTERRUPTIBLE);
1012
1013	*wait_result = test_event_condition(all, num_events,
1014	event_waiters);
1015	if (*wait_result != KFD_IOC_WAIT_RESULT_TIMEOUT)
1016	break;
1017
1018	if (timeout <= `0`)
1019	break;
1020
1021	timeout = schedule_timeout(timeout);
1022	}
1023	__set_current_state(TASK_RUNNING);
1024
1025	mutex_lock(&p->event_mutex);
1026	/ copy_signaled_event_data may sleep. So this has to happen*
1027	* after the task state is set back to RUNNING.
1028	*
1029	* The event may also have been destroyed after signaling. So
1030	* copy_signaled_event_data also must confirm that the event
1031	* still exists. Therefore this must be under the p->event_mutex
1032	* which is also held when events are destroyed.
1033	*/
1034	if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE)
1035	ret = copy_signaled_event_data(num_events,
1036	event_waiters, data: events);
1037
1038	out_unlock:
1039	free_waiters(num_events, waiters: event_waiters, undo_auto_reset: ret == -ERESTARTSYS);
1040	mutex_unlock(lock: &p->event_mutex);
1041	out:
1042	if (ret)
1043	*wait_result = KFD_IOC_WAIT_RESULT_FAIL;
1044	else if (*wait_result == KFD_IOC_WAIT_RESULT_FAIL)
1045	ret = -EIO;
1046
1047	return ret;
1048	}
1049
1050	int kfd_event_mmap(struct kfd_process p, struct* vm_area_struct *vma)
1051	{
1052	unsigned long pfn;
1053	struct kfd_signal_page *page;
1054	int ret;
1055
1056	/ check required size doesn't exceed the allocated size /
1057	if (get_order(KFD_SIGNAL_EVENT_LIMIT * `8`) <
1058	get_order(size: vma->vm_end - vma->vm_start)) {
1059	pr_err("Event page mmap requested illegal size\n");
1060	return -EINVAL;
1061	}
1062
1063	page = p->signal_page;
1064	if (!page) {
1065	/ Probably KFD bug, but mmap is user-accessible. /
1066	pr_debug("Signal page could not be found\n");
1067	return -EINVAL;
1068	}
1069
1070	pfn = __pa(page->kernel_address);
1071	pfn >>= PAGE_SHIFT;
1072
1073	vm_flags_set(vma, VM_IO \| VM_DONTCOPY \| VM_DONTEXPAND \| VM_NORESERVE
1074	\| VM_DONTDUMP \| VM_PFNMAP);
1075
1076	pr_debug("Mapping signal page\n");
1077	pr_debug(" start user address == 0x%08lx\n", vma->vm_start);
1078	pr_debug(" end user address == 0x%08lx\n", vma->vm_end);
1079	pr_debug(" pfn == 0x%016lX\n", pfn);
1080	pr_debug(" vm_flags == 0x%08lX\n", vma->vm_flags);
1081	pr_debug(" size == 0x%08lX\n",
1082	vma->vm_end - vma->vm_start);
1083
1084	page->user_address = (uint64_t __user *)vma->vm_start;
1085
1086	/ mapping the page to user process /
1087	ret = remap_pfn_range(vma, addr: vma->vm_start, pfn,
1088	size: vma->vm_end - vma->vm_start, vma->vm_page_prot);
1089	if (!ret)
1090	p->signal_mapped_size = vma->vm_end - vma->vm_start;
1091
1092	return ret;
1093	}
1094
1095	/*
1096	* Assumes that p is not going away.
1097	*/
1098	static void lookup_events_by_type_and_signal(struct kfd_process *p,
1099	int type, void *event_data)
1100	{
1101	struct kfd_hsa_memory_exception_data *ev_data;
1102	struct kfd_event *ev;
1103	uint32_t id;
1104	bool send_signal = true;
1105
1106	ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
1107
1108	rcu_read_lock();
1109
1110	id = KFD_FIRST_NONSIGNAL_EVENT_ID;
1111	idr_for_each_entry_continue(&p->event_idr, ev, id)
1112	if (ev->type == type) {
1113	send_signal = false;
1114	dev_dbg(kfd_device,
1115	"Event found: id %X type %d",
1116	ev->event_id, ev->type);
1117	spin_lock(lock: &ev->lock);
1118	set_event(ev);
1119	if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data)
1120	ev->memory_exception_data = *ev_data;
1121	spin_unlock(lock: &ev->lock);
1122	}
1123
1124	if (type == KFD_EVENT_TYPE_MEMORY) {
1125	dev_warn(kfd_device,
1126	"Sending SIGSEGV to process %d (pasid 0x%x)",
1127	p->lead_thread->pid, p->pasid);
1128	send_sig(SIGSEGV, p->lead_thread, `0`);
1129	}
1130
1131	/ Send SIGTERM no event of type "type" has been found/
1132	if (send_signal) {
1133	if (send_sigterm) {
1134	dev_warn(kfd_device,
1135	"Sending SIGTERM to process %d (pasid 0x%x)",
1136	p->lead_thread->pid, p->pasid);
1137	send_sig(SIGTERM, p->lead_thread, `0`);
1138	} else {
1139	dev_err(kfd_device,
1140	"Process %d (pasid 0x%x) got unhandled exception",
1141	p->lead_thread->pid, p->pasid);
1142	}
1143	}
1144
1145	rcu_read_unlock();
1146	}
1147
1148	void kfd_signal_hw_exception_event(u32 pasid)
1149	{
1150	/*
1151	* Because we are called from arbitrary context (workqueue) as opposed
1152	* to process context, kfd_process could attempt to exit while we are
1153	* running so the lookup function increments the process ref count.
1154	*/
1155	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
1156
1157	if (!p)
1158	return; / Presumably process exited. /
1159
1160	lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
1161	kfd_unref_process(p);
1162	}
1163
1164	void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid,
1165	struct kfd_vm_fault_info *info,
1166	struct kfd_hsa_memory_exception_data *data)
1167	{
1168	struct kfd_event *ev;
1169	uint32_t id;
1170	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
1171	struct kfd_hsa_memory_exception_data memory_exception_data;
1172	int user_gpu_id;
1173
1174	if (!p)
1175	return; / Presumably process exited. /
1176
1177	user_gpu_id = kfd_process_get_user_gpu_id(p, actual_gpu_id: dev->id);
1178	if (unlikely(user_gpu_id == -EINVAL)) {
1179	WARN_ONCE(`1`, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
1180	return;
1181	}
1182
1183	/ SoC15 chips and onwards will pass in data from now on. /
1184	if (!data) {
1185	memset(&memory_exception_data, `0`, sizeof(memory_exception_data));
1186	memory_exception_data.gpu_id = user_gpu_id;
1187	memory_exception_data.failure.imprecise = true;
1188
1189	/ Set failure reason /
1190	if (info) {
1191	memory_exception_data.va = (info->page_addr) <<
1192	PAGE_SHIFT;
1193	memory_exception_data.failure.NotPresent =
1194	info->prot_valid ? `1` : `0`;
1195	memory_exception_data.failure.NoExecute =
1196	info->prot_exec ? `1` : `0`;
1197	memory_exception_data.failure.ReadOnly =
1198	info->prot_write ? `1` : `0`;
1199	memory_exception_data.failure.imprecise = `0`;
1200	}
1201	}
1202
1203	rcu_read_lock();
1204
1205	id = KFD_FIRST_NONSIGNAL_EVENT_ID;
1206	idr_for_each_entry_continue(&p->event_idr, ev, id)
1207	if (ev->type == KFD_EVENT_TYPE_MEMORY) {
1208	spin_lock(lock: &ev->lock);
1209	ev->memory_exception_data = data ? *data :
1210	memory_exception_data;
1211	set_event(ev);
1212	spin_unlock(lock: &ev->lock);
1213	}
1214
1215	rcu_read_unlock();
1216	kfd_unref_process(p);
1217	}
1218
1219	void kfd_signal_reset_event(struct kfd_node *dev)
1220	{
1221	struct kfd_hsa_hw_exception_data hw_exception_data;
1222	struct kfd_hsa_memory_exception_data memory_exception_data;
1223	struct kfd_process *p;
1224	struct kfd_event *ev;
1225	unsigned int temp;
1226	uint32_t id, idx;
1227	int reset_cause = atomic_read(v: &dev->sram_ecc_flag) ?
1228	KFD_HW_EXCEPTION_ECC :
1229	KFD_HW_EXCEPTION_GPU_HANG;
1230
1231	/ Whole gpu reset caused by GPU hang and memory is lost /
1232	memset(&hw_exception_data, `0`, sizeof(hw_exception_data));
1233	hw_exception_data.memory_lost = `1`;
1234	hw_exception_data.reset_cause = reset_cause;
1235
1236	memset(&memory_exception_data, `0`, sizeof(memory_exception_data));
1237	memory_exception_data.ErrorType = KFD_MEM_ERR_SRAM_ECC;
1238	memory_exception_data.failure.imprecise = true;
1239
1240	idx = srcu_read_lock(ssp: &kfd_processes_srcu);
1241	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1242	int user_gpu_id = kfd_process_get_user_gpu_id(p, actual_gpu_id: dev->id);
1243
1244	if (unlikely(user_gpu_id == -EINVAL)) {
1245	WARN_ONCE(`1`, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
1246	continue;
1247	}
1248
1249	rcu_read_lock();
1250
1251	id = KFD_FIRST_NONSIGNAL_EVENT_ID;
1252	idr_for_each_entry_continue(&p->event_idr, ev, id) {
1253	if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
1254	spin_lock(lock: &ev->lock);
1255	ev->hw_exception_data = hw_exception_data;
1256	ev->hw_exception_data.gpu_id = user_gpu_id;
1257	set_event(ev);
1258	spin_unlock(lock: &ev->lock);
1259	}
1260	if (ev->type == KFD_EVENT_TYPE_MEMORY &&
1261	reset_cause == KFD_HW_EXCEPTION_ECC) {
1262	spin_lock(lock: &ev->lock);
1263	ev->memory_exception_data = memory_exception_data;
1264	ev->memory_exception_data.gpu_id = user_gpu_id;
1265	set_event(ev);
1266	spin_unlock(lock: &ev->lock);
1267	}
1268	}
1269
1270	rcu_read_unlock();
1271	}
1272	srcu_read_unlock(ssp: &kfd_processes_srcu, idx);
1273	}
1274
1275	void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid)
1276	{
1277	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
1278	struct kfd_hsa_memory_exception_data memory_exception_data;
1279	struct kfd_hsa_hw_exception_data hw_exception_data;
1280	struct kfd_event *ev;
1281	uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID;
1282	int user_gpu_id;
1283
1284	if (!p)
1285	return; / Presumably process exited. /
1286
1287	user_gpu_id = kfd_process_get_user_gpu_id(p, actual_gpu_id: dev->id);
1288	if (unlikely(user_gpu_id == -EINVAL)) {
1289	WARN_ONCE(`1`, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
1290	return;
1291	}
1292
1293	memset(&hw_exception_data, `0`, sizeof(hw_exception_data));
1294	hw_exception_data.gpu_id = user_gpu_id;
1295	hw_exception_data.memory_lost = `1`;
1296	hw_exception_data.reset_cause = KFD_HW_EXCEPTION_ECC;
1297
1298	memset(&memory_exception_data, `0`, sizeof(memory_exception_data));
1299	memory_exception_data.ErrorType = KFD_MEM_ERR_POISON_CONSUMED;
1300	memory_exception_data.gpu_id = user_gpu_id;
1301	memory_exception_data.failure.imprecise = true;
1302
1303	rcu_read_lock();
1304
1305	idr_for_each_entry_continue(&p->event_idr, ev, id) {
1306	if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
1307	spin_lock(lock: &ev->lock);
1308	ev->hw_exception_data = hw_exception_data;
1309	set_event(ev);
1310	spin_unlock(lock: &ev->lock);
1311	}
1312
1313	if (ev->type == KFD_EVENT_TYPE_MEMORY) {
1314	spin_lock(lock: &ev->lock);
1315	ev->memory_exception_data = memory_exception_data;
1316	set_event(ev);
1317	spin_unlock(lock: &ev->lock);
1318	}
1319	}
1320
1321	rcu_read_unlock();
1322
1323	/ user application will handle SIGBUS signal /
1324	send_sig(SIGBUS, p->lead_thread, `0`);
1325
1326	kfd_unref_process(p);
1327	}
1328

source code of linux/drivers/gpu/drm/amd/amdkfd/kfd_events.c