grukservices.c source code [linux/drivers/misc/sgi-gru/grukservices.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* SN Platform GRU Driver
4	*
5	* KERNEL SERVICES THAT USE THE GRU
6	*
7	* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
8	*/
9
10	#include <linux/kernel.h>
11	#include <linux/errno.h>
12	#include <linux/slab.h>
13	#include <linux/mm.h>
14	#include <linux/spinlock.h>
15	#include <linux/device.h>
16	#include <linux/miscdevice.h>
17	#include <linux/proc_fs.h>
18	#include <linux/interrupt.h>
19	#include <linux/sync_core.h>
20	#include <linux/uaccess.h>
21	#include <linux/delay.h>
22	#include <linux/export.h>
23	#include <asm/io_apic.h>
24	#include "gru.h"
25	#include "grulib.h"
26	#include "grutables.h"
27	#include "grukservices.h"
28	#include "gru_instructions.h"
29	#include <asm/uv/uv_hub.h>
30
31	/*
32	* Kernel GRU Usage
33	*
34	* The following is an interim algorithm for management of kernel GRU
35	* resources. This will likely be replaced when we better understand the
36	* kernel/user requirements.
37	*
38	* Blade percpu resources reserved for kernel use. These resources are
39	* reserved whenever the kernel context for the blade is loaded. Note
40	* that the kernel context is not guaranteed to be always available. It is
41	* loaded on demand & can be stolen by a user if the user demand exceeds the
42	* kernel demand. The kernel can always reload the kernel context but
43	* a SLEEP may be required!!!.
44	*
45	* Async Overview:
46	*
47	* Each blade has one "kernel context" that owns GRU kernel resources
48	* located on the blade. Kernel drivers use GRU resources in this context
49	* for sending messages, zeroing memory, etc.
50	*
51	* The kernel context is dynamically loaded on demand. If it is not in
52	* use by the kernel, the kernel context can be unloaded & given to a user.
53	* The kernel context will be reloaded when needed. This may require that
54	* a context be stolen from a user.
55	* NOTE: frequent unloading/reloading of the kernel context is
56	* expensive. We are depending on batch schedulers, cpusets, sane
57	* drivers or some other mechanism to prevent the need for frequent
58	* stealing/reloading.
59	*
60	* The kernel context consists of two parts:
61	* - 1 CB & a few DSRs that are reserved for each cpu on the blade.
62	* Each cpu has it's own private resources & does not share them
63	* with other cpus. These resources are used serially, ie,
64	* locked, used & unlocked on each call to a function in
65	* grukservices.
66	* (Now that we have dynamic loading of kernel contexts, I
67	* may rethink this & allow sharing between cpus....)
68	*
69	* - Additional resources can be reserved long term & used directly
70	* by UV drivers located in the kernel. Drivers using these GRU
71	* resources can use asynchronous GRU instructions that send
72	* interrupts on completion.
73	* - these resources must be explicitly locked/unlocked
74	* - locked resources prevent (obviously) the kernel
75	* context from being unloaded.
76	* - drivers using these resource directly issue their own
77	* GRU instruction and must wait/check completion.
78	*
79	* When these resources are reserved, the caller can optionally
80	* associate a wait_queue with the resources and use asynchronous
81	* GRU instructions. When an async GRU instruction completes, the
82	* driver will do a wakeup on the event.
83	*
84	*/
85
86
87	#define ASYNC_HAN_TO_BID(h) ((h) - 1)
88	#define ASYNC_BID_TO_HAN(b) ((b) + 1)
89	#define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)]
90
91	#define GRU_NUM_KERNEL_CBR 1
92	#define GRU_NUM_KERNEL_DSR_BYTES 256
93	#define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \
94	GRU_CACHE_LINE_BYTES)
95
96	/ GRU instruction attributes for all instructions /
97	#define IMA IMA_CB_DELAY
98
99	/ GRU cacheline size is always 64 bytes - even on arches with 128 byte lines /
100	#define __gru_cacheline_aligned__ \
101	__attribute__((__aligned__(GRU_CACHE_LINE_BYTES)))
102
103	#define MAGIC 0x1234567887654321UL
104
105	/ Default retry count for GRU errors on kernel instructions /
106	#define EXCEPTION_RETRY_LIMIT 3
107
108	/ Status of message queue sections /
109	#define MQS_EMPTY 0
110	#define MQS_FULL 1
111	#define MQS_NOOP 2
112
113	/----------------- RESOURCE MANAGEMENT -------------------------------------/
114	/ optimized for x86_64 /
115	struct message_queue {
116	union gru_mesqhead head __gru_cacheline_aligned__; / CL 0 /
117	int qlines; / DW 1 /
118	long hstatus[`2`];
119	void next __gru_cacheline_aligned__;/* CL 1 /
120	void *limit;
121	void *start;
122	void *start2;
123	char data ____cacheline_aligned; / CL 2 /
124	};
125
126	/ First word in every message - used by mesq interface /
127	struct message_header {
128	char present;
129	char present2;
130	char lines;
131	char fill;
132	};
133
134	#define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h]))
135
136	/*
137	* Reload the blade's kernel context into a GRU chiplet. Called holding
138	* the bs_kgts_sema for READ. Will steal user contexts if necessary.
139	*/
140	static void gru_load_kernel_context(struct gru_blade_state bs, int* blade_id)
141	{
142	struct gru_state *gru;
143	struct gru_thread_state *kgts;
144	void *vaddr;
145	int ctxnum, ncpus;
146
147	up_read(sem: &bs->bs_kgts_sema);
148	down_write(sem: &bs->bs_kgts_sema);
149
150	if (!bs->bs_kgts) {
151	do {
152	bs->bs_kgts = gru_alloc_gts(NULL, cbr_au_count: `0`, dsr_au_count: `0`, tlb_preload_count: `0`, options: `0`, tsid: `0`);
153	if (!IS_ERR(ptr: bs->bs_kgts))
154	break;
155	msleep(msecs: `1`);
156	} while (true);
157	bs->bs_kgts->ts_user_blade_id = blade_id;
158	}
159	kgts = bs->bs_kgts;
160
161	if (!kgts->ts_gru) {
162	STAT(load_kernel_context);
163	ncpus = uv_blade_nr_possible_cpus(bid: blade_id);
164	kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU(
165	GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs);
166	kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU(
167	GRU_NUM_KERNEL_DSR_BYTES * ncpus +
168	bs->bs_async_dsr_bytes);
169	while (!gru_assign_gru_context(gts: kgts)) {
170	msleep(msecs: `1`);
171	gru_steal_context(gts: kgts);
172	}
173	gru_load_context(gts: kgts);
174	gru = bs->bs_kgts->ts_gru;
175	vaddr = gru->gs_gru_base_vaddr;
176	ctxnum = kgts->ts_ctxnum;
177	bs->kernel_cb = get_gseg_base_address_cb(base: vaddr, ctxnum, line: `0`);
178	bs->kernel_dsr = get_gseg_base_address_ds(base: vaddr, ctxnum, line: `0`);
179	}
180	downgrade_write(sem: &bs->bs_kgts_sema);
181	}
182
183	/*
184	* Free all kernel contexts that are not currently in use.
185	* Returns 0 if all freed, else number of inuse context.
186	*/
187	static int gru_free_kernel_contexts(void)
188	{
189	struct gru_blade_state *bs;
190	struct gru_thread_state *kgts;
191	int bid, ret = `0`;
192
193	for (bid = `0`; bid < GRU_MAX_BLADES; bid++) {
194	bs = gru_base[bid];
195	if (!bs)
196	continue;
197
198	/ Ignore busy contexts. Don't want to block here. /
199	if (down_write_trylock(sem: &bs->bs_kgts_sema)) {
200	kgts = bs->bs_kgts;
201	if (kgts && kgts->ts_gru)
202	gru_unload_context(gts: kgts, savestate: `0`);
203	bs->bs_kgts = NULL;
204	up_write(sem: &bs->bs_kgts_sema);
205	kfree(objp: kgts);
206	} else {
207	ret++;
208	}
209	}
210	return ret;
211	}
212
213	/*
214	* Lock & load the kernel context for the specified blade.
215	*/
216	static struct gru_blade_state gru_lock_kernel_context(int* blade_id)
217	{
218	struct gru_blade_state *bs;
219	int bid;
220
221	STAT(lock_kernel_context);
222	again:
223	bid = blade_id < `0` ? uv_numa_blade_id() : blade_id;
224	bs = gru_base[bid];
225
226	/ Handle the case where migration occurred while waiting for the sema /
227	down_read(sem: &bs->bs_kgts_sema);
228	if (blade_id < `0` && bid != uv_numa_blade_id()) {
229	up_read(sem: &bs->bs_kgts_sema);
230	goto again;
231	}
232	if (!bs->bs_kgts \|\| !bs->bs_kgts->ts_gru)
233	gru_load_kernel_context(bs, blade_id: bid);
234	return bs;
235
236	}
237
238	/*
239	* Unlock the kernel context for the specified blade. Context is not
240	* unloaded but may be stolen before next use.
241	*/
242	static void gru_unlock_kernel_context(int blade_id)
243	{
244	struct gru_blade_state *bs;
245
246	bs = gru_base[blade_id];
247	up_read(sem: &bs->bs_kgts_sema);
248	STAT(unlock_kernel_context);
249	}
250
251	/*
252	* Reserve & get pointers to the DSR/CBRs reserved for the current cpu.
253	* - returns with preemption disabled
254	*/
255	static int gru_get_cpu_resources(int dsr_bytes, void *cb, void* **dsr)
256	{
257	struct gru_blade_state *bs;
258	int lcpu;
259
260	BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);
261	preempt_disable();
262	bs = gru_lock_kernel_context(blade_id: -`1`);
263	lcpu = uv_blade_processor_id();
264	cb = bs->kernel_cb + lcpu GRU_HANDLE_STRIDE;
265	dsr = bs->kernel_dsr + lcpu GRU_NUM_KERNEL_DSR_BYTES;
266	return `0`;
267	}
268
269	/*
270	* Free the current cpus reserved DSR/CBR resources.
271	*/
272	static void gru_free_cpu_resources(void cb, void* *dsr)
273	{
274	gru_unlock_kernel_context(blade_id: uv_numa_blade_id());
275	preempt_enable();
276	}
277
278	/*
279	* Reserve GRU resources to be used asynchronously.
280	* Note: currently supports only 1 reservation per blade.
281	*
282	* input:
283	* blade_id - blade on which resources should be reserved
284	* cbrs - number of CBRs
285	* dsr_bytes - number of DSR bytes needed
286	* output:
287	* handle to identify resource
288	* (0 = async resources already reserved)
289	*/
290	unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
291	struct completion *cmp)
292	{
293	struct gru_blade_state *bs;
294	struct gru_thread_state *kgts;
295	int ret = `0`;
296
297	bs = gru_base[blade_id];
298
299	down_write(sem: &bs->bs_kgts_sema);
300
301	/ Verify no resources already reserved /
302	if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs)
303	goto done;
304	bs->bs_async_dsr_bytes = dsr_bytes;
305	bs->bs_async_cbrs = cbrs;
306	bs->bs_async_wq = cmp;
307	kgts = bs->bs_kgts;
308
309	/ Resources changed. Unload context if already loaded /
310	if (kgts && kgts->ts_gru)
311	gru_unload_context(gts: kgts, savestate: `0`);
312	ret = ASYNC_BID_TO_HAN(blade_id);
313
314	done:
315	up_write(sem: &bs->bs_kgts_sema);
316	return ret;
317	}
318
319	/*
320	* Release async resources previously reserved.
321	*
322	* input:
323	* han - handle to identify resources
324	*/
325	void gru_release_async_resources(unsigned long han)
326	{
327	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
328
329	down_write(sem: &bs->bs_kgts_sema);
330	bs->bs_async_dsr_bytes = `0`;
331	bs->bs_async_cbrs = `0`;
332	bs->bs_async_wq = NULL;
333	up_write(sem: &bs->bs_kgts_sema);
334	}
335
336	/*
337	* Wait for async GRU instructions to complete.
338	*
339	* input:
340	* han - handle to identify resources
341	*/
342	void gru_wait_async_cbr(unsigned long han)
343	{
344	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
345
346	wait_for_completion(bs->bs_async_wq);
347	mb();
348	}
349
350	/*
351	* Lock previous reserved async GRU resources
352	*
353	* input:
354	* han - handle to identify resources
355	* output:
356	* cb - pointer to first CBR
357	* dsr - pointer to first DSR
358	*/
359	void gru_lock_async_resource(unsigned long han, void *cb, void* **dsr)
360	{
361	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
362	int blade_id = ASYNC_HAN_TO_BID(han);
363	int ncpus;
364
365	gru_lock_kernel_context(blade_id);
366	ncpus = uv_blade_nr_possible_cpus(bid: blade_id);
367	if (cb)
368	cb = bs->kernel_cb + ncpus GRU_HANDLE_STRIDE;
369	if (dsr)
370	dsr = bs->kernel_dsr + ncpus GRU_NUM_KERNEL_DSR_BYTES;
371	}
372
373	/*
374	* Unlock previous reserved async GRU resources
375	*
376	* input:
377	* han - handle to identify resources
378	*/
379	void gru_unlock_async_resource(unsigned long han)
380	{
381	int blade_id = ASYNC_HAN_TO_BID(han);
382
383	gru_unlock_kernel_context(blade_id);
384	}
385
386	/----------------------------------------------------------------------/
387	int gru_get_cb_exception_detail(void *cb,
388	struct control_block_extended_exc_detail *excdet)
389	{
390	struct gru_control_block_extended *cbe;
391	struct gru_thread_state *kgts = NULL;
392	unsigned long off;
393	int cbrnum, bid;
394
395	/*
396	* Locate kgts for cb. This algorithm is SLOW but
397	* this function is rarely called (ie., almost never).
398	* Performance does not matter.
399	*/
400	for_each_possible_blade(bid) {
401	if (!gru_base[bid])
402	break;
403	kgts = gru_base[bid]->bs_kgts;
404	if (!kgts \|\| !kgts->ts_gru)
405	continue;
406	off = cb - kgts->ts_gru->gs_gru_base_vaddr;
407	if (off < GRU_SIZE)
408	break;
409	kgts = NULL;
410	}
411	BUG_ON(!kgts);
412	cbrnum = thread_cbr_number(kgts, get_cb_number(cb));
413	cbe = get_cbe(GRUBASE(cb), ctxnum: cbrnum);
414	gru_flush_cache(p: cbe); / CBE not coherent /
415	sync_core();
416	excdet->opc = cbe->opccpy;
417	excdet->exopc = cbe->exopccpy;
418	excdet->ecause = cbe->ecause;
419	excdet->exceptdet0 = cbe->idef1upd;
420	excdet->exceptdet1 = cbe->idef3upd;
421	gru_flush_cache(p: cbe);
422	return `0`;
423	}
424
425	static char gru_get_cb_exception_detail_str(int* ret, void *cb,
426	char buf, int* size)
427	{
428	struct gru_control_block_status *gen = cb;
429	struct control_block_extended_exc_detail excdet;
430
431	if (ret > `0` && gen->istatus == CBS_EXCEPTION) {
432	gru_get_cb_exception_detail(cb, excdet: &excdet);
433	snprintf(buf, size,
434	fmt: "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
435	"excdet0 0x%lx, excdet1 0x%x", smp_processor_id(),
436	gen, excdet.opc, excdet.exopc, excdet.ecause,
437	excdet.exceptdet0, excdet.exceptdet1);
438	} else {
439	snprintf(buf, size, fmt: "No exception");
440	}
441	return buf;
442	}
443
444	static int gru_wait_idle_or_exception(struct gru_control_block_status *gen)
445	{
446	while (gen->istatus >= CBS_ACTIVE) {
447	cpu_relax();
448	barrier();
449	}
450	return gen->istatus;
451	}
452
453	static int gru_retry_exception(void *cb)
454	{
455	struct gru_control_block_status *gen = cb;
456	struct control_block_extended_exc_detail excdet;
457	int retry = EXCEPTION_RETRY_LIMIT;
458
459	while (`1`) {
460	if (gru_wait_idle_or_exception(gen) == CBS_IDLE)
461	return CBS_IDLE;
462	if (gru_get_cb_message_queue_substatus(cb))
463	return CBS_EXCEPTION;
464	gru_get_cb_exception_detail(cb, excdet: &excdet);
465	if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) \|\|
466	(excdet.cbrexecstatus & CBR_EXS_ABORT_OCC))
467	break;
468	if (retry-- == `0`)
469	break;
470	gen->icmd = `1`;
471	gru_flush_cache(p: gen);
472	}
473	return CBS_EXCEPTION;
474	}
475
476	int gru_check_status_proc(void *cb)
477	{
478	struct gru_control_block_status *gen = cb;
479	int ret;
480
481	ret = gen->istatus;
482	if (ret == CBS_EXCEPTION)
483	ret = gru_retry_exception(cb);
484	rmb();
485	return ret;
486
487	}
488
489	int gru_wait_proc(void *cb)
490	{
491	struct gru_control_block_status *gen = cb;
492	int ret;
493
494	ret = gru_wait_idle_or_exception(gen);
495	if (ret == CBS_EXCEPTION)
496	ret = gru_retry_exception(cb);
497	rmb();
498	return ret;
499	}
500
501	static void gru_abort(int ret, void cb, char* *str)
502	{
503	char buf[GRU_EXC_STR_SIZE];
504
505	panic(fmt: "GRU FATAL ERROR: %s - %s\n", str,
506	gru_get_cb_exception_detail_str(ret, cb, buf, size: sizeof(buf)));
507	}
508
509	void gru_wait_abort_proc(void *cb)
510	{
511	int ret;
512
513	ret = gru_wait_proc(cb);
514	if (ret)
515	gru_abort(ret, cb, str: "gru_wait_abort");
516	}
517
518
519	/------------------------------ MESSAGE QUEUES -----------------------------/
520
521	/ Internal status . These are NOT returned to the user. /
522	#define MQIE_AGAIN -1 /* try again */
523
524
525	/*
526	* Save/restore the "present" flag that is in the second line of 2-line
527	* messages
528	*/
529	static inline int get_present2(void *p)
530	{
531	struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
532	return mhdr->present;
533	}
534
535	static inline void restore_present2(void p, int* val)
536	{
537	struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
538	mhdr->present = val;
539	}
540
541	/*
542	* Create a message queue.
543	* qlines - message queue size in cache lines. Includes 2-line header.
544	*/
545	int gru_create_message_queue(struct gru_message_queue_desc *mqd,
546	void p, unsigned* int bytes, int nasid, int vector, int apicid)
547	{
548	struct message_queue *mq = p;
549	unsigned int qlines;
550
551	qlines = bytes / GRU_CACHE_LINE_BYTES - `2`;
552	memset(mq, `0`, bytes);
553	mq->start = &mq->data;
554	mq->start2 = &mq->data + (qlines / `2` - `1`) * GRU_CACHE_LINE_BYTES;
555	mq->next = &mq->data;
556	mq->limit = &mq->data + (qlines - `2`) * GRU_CACHE_LINE_BYTES;
557	mq->qlines = qlines;
558	mq->hstatus[`0`] = `0`;
559	mq->hstatus[`1`] = `1`;
560	mq->head = gru_mesq_head(head: `2`, limit: qlines / `2` + `1`);
561	mqd->mq = mq;
562	mqd->mq_gpa = uv_gpa(v: mq);
563	mqd->qlines = qlines;
564	mqd->interrupt_pnode = nasid >> `1`;
565	mqd->interrupt_vector = vector;
566	mqd->interrupt_apicid = apicid;
567	return `0`;
568	}
569	EXPORT_SYMBOL_GPL(gru_create_message_queue);
570
571	/*
572	* Send a NOOP message to a message queue
573	* Returns:
574	* 0 - if queue is full after the send. This is the normal case
575	* but various races can change this.
576	* -1 - if mesq sent successfully but queue not full
577	* >0 - unexpected error. MQE_xxx returned
578	*/
579	static int send_noop_message(void cb, struct* gru_message_queue_desc *mqd,
580	void *mesg)
581	{
582	const struct message_header noop_header = {
583	.present = MQS_NOOP, .lines = `1`};
584	unsigned long m;
585	int substatus, ret;
586	struct message_header save_mhdr, *mhdr = mesg;
587
588	STAT(mesq_noop);
589	save_mhdr = *mhdr;
590	*mhdr = noop_header;
591	gru_mesq(cb, queue: mqd->mq_gpa, tri0: gru_get_tri(vaddr: mhdr), nelem: `1`, IMA);
592	ret = gru_wait(cb);
593
594	if (ret) {
595	substatus = gru_get_cb_message_queue_substatus(cb);
596	switch (substatus) {
597	case CBSS_NO_ERROR:
598	STAT(mesq_noop_unexpected_error);
599	ret = MQE_UNEXPECTED_CB_ERR;
600	break;
601	case CBSS_LB_OVERFLOWED:
602	STAT(mesq_noop_lb_overflow);
603	ret = MQE_CONGESTION;
604	break;
605	case CBSS_QLIMIT_REACHED:
606	STAT(mesq_noop_qlimit_reached);
607	ret = `0`;
608	break;
609	case CBSS_AMO_NACKED:
610	STAT(mesq_noop_amo_nacked);
611	ret = MQE_CONGESTION;
612	break;
613	case CBSS_PUT_NACKED:
614	STAT(mesq_noop_put_nacked);
615	m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << `6`);
616	gru_vstore(cb, mem_addr: m, tri0: gru_get_tri(vaddr: mesg), XTYPE_CL, nelem: `1`, stride: `1`,
617	IMA);
618	if (gru_wait(cb) == CBS_IDLE)
619	ret = MQIE_AGAIN;
620	else
621	ret = MQE_UNEXPECTED_CB_ERR;
622	break;
623	case CBSS_PAGE_OVERFLOW:
624	STAT(mesq_noop_page_overflow);
625	fallthrough;
626	default:
627	BUG();
628	}
629	}
630	*mhdr = save_mhdr;
631	return ret;
632	}
633
634	/*
635	* Handle a gru_mesq full.
636	*/
637	static int send_message_queue_full(void cb, struct* gru_message_queue_desc *mqd,
638	void mesg, int* lines)
639	{
640	union gru_mesqhead mqh;
641	unsigned int limit, head;
642	unsigned long avalue;
643	int half, qlines;
644
645	/ Determine if switching to first/second half of q /
646	avalue = gru_get_amo_value(cb);
647	head = gru_get_amo_value_head(cb);
648	limit = gru_get_amo_value_limit(cb);
649
650	qlines = mqd->qlines;
651	half = (limit != qlines);
652
653	if (half)
654	mqh = gru_mesq_head(head: qlines / `2` + `1`, limit: qlines);
655	else
656	mqh = gru_mesq_head(head: `2`, limit: qlines / `2` + `1`);
657
658	/ Try to get lock for switching head pointer /
659	gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA);
660	if (gru_wait(cb) != CBS_IDLE)
661	goto cberr;
662	if (!gru_get_amo_value(cb)) {
663	STAT(mesq_qf_locked);
664	return MQE_QUEUE_FULL;
665	}
666
667	/ Got the lock. Send optional NOP if queue not full, /
668	if (head != limit) {
669	if (send_noop_message(cb, mqd, mesg)) {
670	gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half),
671	XTYPE_DW, IMA);
672	if (gru_wait(cb) != CBS_IDLE)
673	goto cberr;
674	STAT(mesq_qf_noop_not_full);
675	return MQIE_AGAIN;
676	}
677	avalue++;
678	}
679
680	/ Then flip queuehead to other half of queue. /
681	gru_gamer(cb, EOP_ERR_CSWAP, src: mqd->mq_gpa, XTYPE_DW, operand1: mqh.val, operand2: avalue,
682	IMA);
683	if (gru_wait(cb) != CBS_IDLE)
684	goto cberr;
685
686	/ If not successfully in swapping queue head, clear the hstatus lock /
687	if (gru_get_amo_value(cb) != avalue) {
688	STAT(mesq_qf_switch_head_failed);
689	gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW,
690	IMA);
691	if (gru_wait(cb) != CBS_IDLE)
692	goto cberr;
693	}
694	return MQIE_AGAIN;
695	cberr:
696	STAT(mesq_qf_unexpected_error);
697	return MQE_UNEXPECTED_CB_ERR;
698	}
699
700	/*
701	* Handle a PUT failure. Note: if message was a 2-line message, one of the
702	* lines might have successfully have been written. Before sending the
703	* message, "present" must be cleared in BOTH lines to prevent the receiver
704	* from prematurely seeing the full message.
705	*/
706	static int send_message_put_nacked(void cb, struct* gru_message_queue_desc *mqd,
707	void mesg, int* lines)
708	{
709	unsigned long m;
710	int ret, loops = `200`; / experimentally determined /
711
712	m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << `6`);
713	if (lines == `2`) {
714	gru_vset(cb, mem_addr: m, value: `0`, XTYPE_CL, nelem: lines, stride: `1`, IMA);
715	if (gru_wait(cb) != CBS_IDLE)
716	return MQE_UNEXPECTED_CB_ERR;
717	}
718	gru_vstore(cb, mem_addr: m, tri0: gru_get_tri(vaddr: mesg), XTYPE_CL, nelem: lines, stride: `1`, IMA);
719	if (gru_wait(cb) != CBS_IDLE)
720	return MQE_UNEXPECTED_CB_ERR;
721
722	if (!mqd->interrupt_vector)
723	return MQE_OK;
724
725	/*
726	* Send a noop message in order to deliver a cross-partition interrupt
727	* to the SSI that contains the target message queue. Normally, the
728	* interrupt is automatically delivered by hardware following mesq
729	* operations, but some error conditions require explicit delivery.
730	* The noop message will trigger delivery. Otherwise partition failures
731	* could cause unrecovered errors.
732	*/
733	do {
734	ret = send_noop_message(cb, mqd, mesg);
735	} while ((ret == MQIE_AGAIN \|\| ret == MQE_CONGESTION) && (loops-- > `0`));
736
737	if (ret == MQIE_AGAIN \|\| ret == MQE_CONGESTION) {
738	/*
739	* Don't indicate to the app to resend the message, as it's
740	* already been successfully sent. We simply send an OK
741	* (rather than fail the send with MQE_UNEXPECTED_CB_ERR),
742	* assuming that the other side is receiving enough
743	* interrupts to get this message processed anyway.
744	*/
745	ret = MQE_OK;
746	}
747	return ret;
748	}
749
750	/*
751	* Handle a gru_mesq failure. Some of these failures are software recoverable
752	* or retryable.
753	*/
754	static int send_message_failure(void cb, struct* gru_message_queue_desc *mqd,
755	void mesg, int* lines)
756	{
757	int substatus, ret = `0`;
758
759	substatus = gru_get_cb_message_queue_substatus(cb);
760	switch (substatus) {
761	case CBSS_NO_ERROR:
762	STAT(mesq_send_unexpected_error);
763	ret = MQE_UNEXPECTED_CB_ERR;
764	break;
765	case CBSS_LB_OVERFLOWED:
766	STAT(mesq_send_lb_overflow);
767	ret = MQE_CONGESTION;
768	break;
769	case CBSS_QLIMIT_REACHED:
770	STAT(mesq_send_qlimit_reached);
771	ret = send_message_queue_full(cb, mqd, mesg, lines);
772	break;
773	case CBSS_AMO_NACKED:
774	STAT(mesq_send_amo_nacked);
775	ret = MQE_CONGESTION;
776	break;
777	case CBSS_PUT_NACKED:
778	STAT(mesq_send_put_nacked);
779	ret = send_message_put_nacked(cb, mqd, mesg, lines);
780	break;
781	case CBSS_PAGE_OVERFLOW:
782	STAT(mesq_page_overflow);
783	fallthrough;
784	default:
785	BUG();
786	}
787	return ret;
788	}
789
790	/*
791	* Send a message to a message queue
792	* mqd message queue descriptor
793	* mesg message. ust be vaddr within a GSEG
794	* bytes message size (<= 2 CL)
795	*/
796	int gru_send_message_gpa(struct gru_message_queue_desc mqd, void* *mesg,
797	unsigned int bytes)
798	{
799	struct message_header *mhdr;
800	void *cb;
801	void *dsr;
802	int istatus, clines, ret;
803
804	STAT(mesq_send);
805	BUG_ON(bytes < sizeof(int) \|\| bytes > `2` * GRU_CACHE_LINE_BYTES);
806
807	clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES);
808	if (gru_get_cpu_resources(dsr_bytes: bytes, cb: &cb, dsr: &dsr))
809	return MQE_BUG_NO_RESOURCES;
810	memcpy(dsr, mesg, bytes);
811	mhdr = dsr;
812	mhdr->present = MQS_FULL;
813	mhdr->lines = clines;
814	if (clines == `2`) {
815	mhdr->present2 = get_present2(p: mhdr);
816	restore_present2(p: mhdr, MQS_FULL);
817	}
818
819	do {
820	ret = MQE_OK;
821	gru_mesq(cb, queue: mqd->mq_gpa, tri0: gru_get_tri(vaddr: mhdr), nelem: clines, IMA);
822	istatus = gru_wait(cb);
823	if (istatus != CBS_IDLE)
824	ret = send_message_failure(cb, mqd, mesg: dsr, lines: clines);
825	} while (ret == MQIE_AGAIN);
826	gru_free_cpu_resources(cb, dsr);
827
828	if (ret)
829	STAT(mesq_send_failed);
830	return ret;
831	}
832	EXPORT_SYMBOL_GPL(gru_send_message_gpa);
833
834	/*
835	* Advance the receive pointer for the queue to the next message.
836	*/
837	void gru_free_message(struct gru_message_queue_desc mqd, void* *mesg)
838	{
839	struct message_queue *mq = mqd->mq;
840	struct message_header *mhdr = mq->next;
841	void next, pnext;
842	int half = -`1`;
843	int lines = mhdr->lines;
844
845	if (lines == `2`)
846	restore_present2(p: mhdr, MQS_EMPTY);
847	mhdr->present = MQS_EMPTY;
848
849	pnext = mq->next;
850	next = pnext + GRU_CACHE_LINE_BYTES * lines;
851	if (next == mq->limit) {
852	next = mq->start;
853	half = `1`;
854	} else if (pnext < mq->start2 && next >= mq->start2) {
855	half = `0`;
856	}
857
858	if (half >= `0`)
859	mq->hstatus[half] = `1`;
860	mq->next = next;
861	}
862	EXPORT_SYMBOL_GPL(gru_free_message);
863
864	/*
865	* Get next message from message queue. Return NULL if no message
866	* present. User must call next_message() to move to next message.
867	* rmq message queue
868	*/
869	void gru_get_next_message(struct* gru_message_queue_desc *mqd)
870	{
871	struct message_queue *mq = mqd->mq;
872	struct message_header *mhdr = mq->next;
873	int present = mhdr->present;
874
875	/ skip NOOP messages /
876	while (present == MQS_NOOP) {
877	gru_free_message(mqd, mhdr);
878	mhdr = mq->next;
879	present = mhdr->present;
880	}
881
882	/ Wait for both halves of 2 line messages /
883	if (present == MQS_FULL && mhdr->lines == `2` &&
884	get_present2(p: mhdr) == MQS_EMPTY)
885	present = MQS_EMPTY;
886
887	if (!present) {
888	STAT(mesq_receive_none);
889	return NULL;
890	}
891
892	if (mhdr->lines == `2`)
893	restore_present2(p: mhdr, val: mhdr->present2);
894
895	STAT(mesq_receive);
896	return mhdr;
897	}
898	EXPORT_SYMBOL_GPL(gru_get_next_message);
899
900	/ ---------------------- GRU DATA COPY FUNCTIONS ---------------------------/
901
902	/*
903	* Load a DW from a global GPA. The GPA can be a memory or MMR address.
904	*/
905	int gru_read_gpa(unsigned long value, unsigned* long gpa)
906	{
907	void *cb;
908	void *dsr;
909	int ret, iaa;
910
911	STAT(read_gpa);
912	if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, cb: &cb, dsr: &dsr))
913	return MQE_BUG_NO_RESOURCES;
914	iaa = gpa >> `62`;
915	gru_vload_phys(cb, gpa, tri0: gru_get_tri(vaddr: dsr), iaa, IMA);
916	ret = gru_wait(cb);
917	if (ret == CBS_IDLE)
918	value = (unsigned long *)dsr;
919	gru_free_cpu_resources(cb, dsr);
920	return ret;
921	}
922	EXPORT_SYMBOL_GPL(gru_read_gpa);
923
924
925	/*
926	* Copy a block of data using the GRU resources
927	*/
928	int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
929	unsigned int bytes)
930	{
931	void *cb;
932	void *dsr;
933	int ret;
934
935	STAT(copy_gpa);
936	if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, cb: &cb, dsr: &dsr))
937	return MQE_BUG_NO_RESOURCES;
938	gru_bcopy(cb, src: src_gpa, dest: dest_gpa, tri0: gru_get_tri(vaddr: dsr),
939	XTYPE_B, nelem: bytes, GRU_NUM_KERNEL_DSR_CL, IMA);
940	ret = gru_wait(cb);
941	gru_free_cpu_resources(cb, dsr);
942	return ret;
943	}
944	EXPORT_SYMBOL_GPL(gru_copy_gpa);
945
946	/ ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------/
947	/ Temp - will delete after we gain confidence in the GRU /
948
949	static int quicktest0(unsigned long arg)
950	{
951	unsigned long word0;
952	unsigned long word1;
953	void *cb;
954	void *dsr;
955	unsigned long *p;
956	int ret = -EIO;
957
958	if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, cb: &cb, dsr: &dsr))
959	return MQE_BUG_NO_RESOURCES;
960	p = dsr;
961	word0 = MAGIC;
962	word1 = `0`;
963
964	gru_vload(cb, mem_addr: uv_gpa(v: &word0), tri0: gru_get_tri(vaddr: dsr), XTYPE_DW, nelem: `1`, stride: `1`, IMA);
965	if (gru_wait(cb) != CBS_IDLE) {
966	printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id());
967	goto done;
968	}
969
970	if (*p != MAGIC) {
971	printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p);
972	goto done;
973	}
974	gru_vstore(cb, mem_addr: uv_gpa(v: &word1), tri0: gru_get_tri(vaddr: dsr), XTYPE_DW, nelem: `1`, stride: `1`, IMA);
975	if (gru_wait(cb) != CBS_IDLE) {
976	printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id());
977	goto done;
978	}
979
980	if (word0 != word1 \|\| word1 != MAGIC) {
981	printk(KERN_DEBUG
982	"GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n",
983	smp_processor_id(), word1, MAGIC);
984	goto done;
985	}
986	ret = `0`;
987
988	done:
989	gru_free_cpu_resources(cb, dsr);
990	return ret;
991	}
992
993	#define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1)))
994
995	static int quicktest1(unsigned long arg)
996	{
997	struct gru_message_queue_desc mqd;
998	void p, mq;
999	int i, ret = -EIO;
1000	char mes[GRU_CACHE_LINE_BYTES], *m;
1001
1002	/ Need 1K cacheline aligned that does not cross page boundary /
1003	p = kmalloc(size: `4096`, flags: `0`);
1004	if (p == NULL)
1005	return -ENOMEM;
1006	mq = ALIGNUP(p, `1024`);
1007	memset(mes, `0xee`, sizeof(mes));
1008
1009	gru_create_message_queue(&mqd, mq, `8` * GRU_CACHE_LINE_BYTES, `0`, `0`, `0`);
1010	for (i = `0`; i < `6`; i++) {
1011	mes[`8`] = i;
1012	do {
1013	ret = gru_send_message_gpa(&mqd, mes, sizeof(mes));
1014	} while (ret == MQE_CONGESTION);
1015	if (ret)
1016	break;
1017	}
1018	if (ret != MQE_QUEUE_FULL \|\| i != `4`) {
1019	printk(KERN_DEBUG "GRU:%d quicktest1: unexpected status %d, i %d\n",
1020	smp_processor_id(), ret, i);
1021	goto done;
1022	}
1023
1024	for (i = `0`; i < `6`; i++) {
1025	m = gru_get_next_message(&mqd);
1026	if (!m \|\| m[`8`] != i)
1027	break;
1028	gru_free_message(&mqd, m);
1029	}
1030	if (i != `4`) {
1031	printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n",
1032	smp_processor_id(), i, m, m ? m[`8`] : -`1`);
1033	goto done;
1034	}
1035	ret = `0`;
1036
1037	done:
1038	kfree(objp: p);
1039	return ret;
1040	}
1041
1042	static int quicktest2(unsigned long arg)
1043	{
1044	static DECLARE_COMPLETION(cmp);
1045	unsigned long han;
1046	int blade_id = `0`;
1047	int numcb = `4`;
1048	int ret = `0`;
1049	unsigned long *buf;
1050	void cb0, cb;
1051	struct gru_control_block_status *gen;
1052	int i, k, istatus, bytes;
1053
1054	bytes = numcb * `4` * `8`;
1055	buf = kmalloc(size: bytes, GFP_KERNEL);
1056	if (!buf)
1057	return -ENOMEM;
1058
1059	ret = -EBUSY;
1060	han = gru_reserve_async_resources(blade_id, cbrs: numcb, dsr_bytes: `0`, cmp: &cmp);
1061	if (!han)
1062	goto done;
1063
1064	gru_lock_async_resource(han, cb: &cb0, NULL);
1065	memset(buf, `0xee`, bytes);
1066	for (i = `0`; i < numcb; i++)
1067	gru_vset(cb: cb0 + i * GRU_HANDLE_STRIDE, mem_addr: uv_gpa(v: &buf[i * `4`]), value: `0`,
1068	XTYPE_DW, nelem: `4`, stride: `1`, IMA_INTERRUPT);
1069
1070	ret = `0`;
1071	k = numcb;
1072	do {
1073	gru_wait_async_cbr(han);
1074	for (i = `0`; i < numcb; i++) {
1075	cb = cb0 + i * GRU_HANDLE_STRIDE;
1076	istatus = gru_check_status(cb);
1077	if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS)
1078	break;
1079	}
1080	if (i == numcb)
1081	continue;
1082	if (istatus != CBS_IDLE) {
1083	printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i);
1084	ret = -EFAULT;
1085	} else if (buf[`4` * i] \|\| buf[`4` * i + `1`] \|\| buf[`4` * i + `2`] \|\|
1086	buf[`4` * i + `3`]) {
1087	printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n",
1088	smp_processor_id(), i, buf[`4` * i], buf[`4` * i + `1`], buf[`4` * i + `2`], buf[`4` * i + `3`]);
1089	ret = -EIO;
1090	}
1091	k--;
1092	gen = cb;
1093	gen->istatus = CBS_CALL_OS; / don't handle this CBR again /
1094	} while (k);
1095	BUG_ON(cmp.done);
1096
1097	gru_unlock_async_resource(han);
1098	gru_release_async_resources(han);
1099	done:
1100	kfree(objp: buf);
1101	return ret;
1102	}
1103
1104	#define BUFSIZE 200
1105	static int quicktest3(unsigned long arg)
1106	{
1107	char buf1[BUFSIZE], buf2[BUFSIZE];
1108	int ret = `0`;
1109
1110	memset(buf2, `0`, sizeof(buf2));
1111	memset(buf1, get_cycles() & `255`, sizeof(buf1));
1112	gru_copy_gpa(uv_gpa(v: buf2), uv_gpa(v: buf1), BUFSIZE);
1113	if (memcmp(p: buf1, q: buf2, BUFSIZE)) {
1114	printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id());
1115	ret = -EIO;
1116	}
1117	return ret;
1118	}
1119
1120	/*
1121	* Debugging only. User hook for various kernel tests
1122	* of driver & gru.
1123	*/
1124	int gru_ktest(unsigned long arg)
1125	{
1126	int ret = -EINVAL;
1127
1128	switch (arg & `0xff`) {
1129	case `0`:
1130	ret = quicktest0(arg);
1131	break;
1132	case `1`:
1133	ret = quicktest1(arg);
1134	break;
1135	case `2`:
1136	ret = quicktest2(arg);
1137	break;
1138	case `3`:
1139	ret = quicktest3(arg);
1140	break;
1141	case `99`:
1142	ret = gru_free_kernel_contexts();
1143	break;
1144	}
1145	return ret;
1146
1147	}
1148
1149	int gru_kservices_init(void)
1150	{
1151	return `0`;
1152	}
1153
1154	void gru_kservices_exit(void)
1155	{
1156	if (gru_free_kernel_contexts())
1157	BUG();
1158	}
1159
1160

source code of linux/drivers/misc/sgi-gru/grukservices.c