user_sdma.c source code [linux/drivers/infiniband/hw/hfi1/user_sdma.c]

1	// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2	/*
3	* Copyright(c) 2020 - 2023 Cornelis Networks, Inc.
4	* Copyright(c) 2015 - 2018 Intel Corporation.
5	*/
6
7	#include <linux/mm.h>
8	#include <linux/types.h>
9	#include <linux/device.h>
10	#include <linux/dmapool.h>
11	#include <linux/slab.h>
12	#include <linux/list.h>
13	#include <linux/highmem.h>
14	#include <linux/io.h>
15	#include <linux/uio.h>
16	#include <linux/rbtree.h>
17	#include <linux/spinlock.h>
18	#include <linux/delay.h>
19	#include <linux/kthread.h>
20	#include <linux/mmu_context.h>
21	#include <linux/module.h>
22	#include <linux/vmalloc.h>
23	#include <linux/string.h>
24
25	#include "hfi.h"
26	#include "sdma.h"
27	#include "user_sdma.h"
28	#include "verbs.h" /* for the headers */
29	#include "common.h" /* for struct hfi1_tid_info */
30	#include "trace.h"
31
32	static uint hfi1_sdma_comp_ring_size = `128`;
33	module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO);
34	MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 128");
35
36	static unsigned initial_pkt_count = `8`;
37
38	static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts);
39	static void user_sdma_txreq_cb(struct sdma_txreq txreq, int* status);
40	static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
41	static void user_sdma_free_request(struct user_sdma_request *req);
42	static int check_header_template(struct user_sdma_request *req,
43	struct hfi1_pkt_header *hdr, u32 lrhlen,
44	u32 datalen);
45	static int set_txreq_header(struct user_sdma_request *req,
46	struct user_sdma_txreq *tx, u32 datalen);
47	static int set_txreq_header_ahg(struct user_sdma_request *req,
48	struct user_sdma_txreq *tx, u32 len);
49	static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
50	struct hfi1_user_sdma_comp_q *cq,
51	u16 idx, enum hfi1_sdma_comp_state state,
52	int ret);
53	static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags);
54	static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
55
56	static int defer_packet_queue(
57	struct sdma_engine *sde,
58	struct iowait_work *wait,
59	struct sdma_txreq *txreq,
60	uint seq,
61	bool pkts_sent);
62	static void activate_packet_queue(struct iowait wait, int* reason);
63
64	static int defer_packet_queue(
65	struct sdma_engine *sde,
66	struct iowait_work *wait,
67	struct sdma_txreq *txreq,
68	uint seq,
69	bool pkts_sent)
70	{
71	struct hfi1_user_sdma_pkt_q *pq =
72	container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
73
74	write_seqlock(sl: &sde->waitlock);
75	trace_hfi1_usdma_defer(pq, sde, wait: &pq->busy);
76	if (sdma_progress(sde, seq, tx: txreq))
77	goto eagain;
78	/*
79	* We are assuming that if the list is enqueued somewhere, it
80	* is to the dmawait list since that is the only place where
81	* it is supposed to be enqueued.
82	*/
83	xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
84	if (list_empty(head: &pq->busy.list)) {
85	pq->busy.lock = &sde->waitlock;
86	iowait_get_priority(w: &pq->busy);
87	iowait_queue(pkts_sent, w: &pq->busy, wait_head: &sde->dmawait);
88	}
89	write_sequnlock(sl: &sde->waitlock);
90	return -EBUSY;
91	eagain:
92	write_sequnlock(sl: &sde->waitlock);
93	return -EAGAIN;
94	}
95
96	static void activate_packet_queue(struct iowait wait, int* reason)
97	{
98	struct hfi1_user_sdma_pkt_q *pq =
99	container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
100
101	trace_hfi1_usdma_activate(pq, wait, reason);
102	xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
103	wake_up(&wait->wait_dma);
104	};
105
106	int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
107	struct hfi1_filedata *fd)
108	{
109	int ret = -ENOMEM;
110	char buf[`64`];
111	struct hfi1_devdata *dd;
112	struct hfi1_user_sdma_comp_q *cq;
113	struct hfi1_user_sdma_pkt_q *pq;
114
115	if (!uctxt \|\| !fd)
116	return -EBADF;
117
118	if (!hfi1_sdma_comp_ring_size)
119	return -EINVAL;
120
121	dd = uctxt->dd;
122
123	pq = kzalloc(size: sizeof(*pq), GFP_KERNEL);
124	if (!pq)
125	return -ENOMEM;
126	pq->dd = dd;
127	pq->ctxt = uctxt->ctxt;
128	pq->subctxt = fd->subctxt;
129	pq->n_max_reqs = hfi1_sdma_comp_ring_size;
130	atomic_set(v: &pq->n_reqs, i: `0`);
131	init_waitqueue_head(&pq->wait);
132	atomic_set(v: &pq->n_locked, i: `0`);
133
134	iowait_init(wait: &pq->busy, tx_limit: `0`, NULL, NULL, sleep: defer_packet_queue,
135	wakeup: activate_packet_queue, NULL, NULL);
136	pq->reqidx = `0`;
137
138	pq->reqs = kcalloc(n: hfi1_sdma_comp_ring_size,
139	size: sizeof(*pq->reqs),
140	GFP_KERNEL);
141	if (!pq->reqs)
142	goto pq_reqs_nomem;
143
144	pq->req_in_use = bitmap_zalloc(nbits: hfi1_sdma_comp_ring_size, GFP_KERNEL);
145	if (!pq->req_in_use)
146	goto pq_reqs_no_in_use;
147
148	snprintf(buf, size: `64`, fmt: "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt,
149	fd->subctxt);
150	pq->txreq_cache = kmem_cache_create(name: buf,
151	size: sizeof(struct user_sdma_txreq),
152	L1_CACHE_BYTES,
153	SLAB_HWCACHE_ALIGN,
154	NULL);
155	if (!pq->txreq_cache) {
156	dd_dev_err(dd, "[%u] Failed to allocate TxReq cache\n",
157	uctxt->ctxt);
158	goto pq_txreq_nomem;
159	}
160
161	cq = kzalloc(size: sizeof(*cq), GFP_KERNEL);
162	if (!cq)
163	goto cq_nomem;
164
165	cq->comps = vmalloc_user(PAGE_ALIGN(sizeof(*cq->comps)
166	* hfi1_sdma_comp_ring_size));
167	if (!cq->comps)
168	goto cq_comps_nomem;
169
170	cq->nentries = hfi1_sdma_comp_ring_size;
171
172	ret = hfi1_init_system_pinning(pq);
173	if (ret)
174	goto pq_mmu_fail;
175
176	rcu_assign_pointer(fd->pq, pq);
177	fd->cq = cq;
178
179	return `0`;
180
181	pq_mmu_fail:
182	vfree(addr: cq->comps);
183	cq_comps_nomem:
184	kfree(objp: cq);
185	cq_nomem:
186	kmem_cache_destroy(s: pq->txreq_cache);
187	pq_txreq_nomem:
188	bitmap_free(bitmap: pq->req_in_use);
189	pq_reqs_no_in_use:
190	kfree(objp: pq->reqs);
191	pq_reqs_nomem:
192	kfree(objp: pq);
193
194	return ret;
195	}
196
197	static void flush_pq_iowait(struct hfi1_user_sdma_pkt_q *pq)
198	{
199	unsigned long flags;
200	seqlock_t *lock = pq->busy.lock;
201
202	if (!lock)
203	return;
204	write_seqlock_irqsave(lock, flags);
205	if (!list_empty(head: &pq->busy.list)) {
206	list_del_init(entry: &pq->busy.list);
207	pq->busy.lock = NULL;
208	}
209	write_sequnlock_irqrestore(sl: lock, flags);
210	}
211
212	int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
213	struct hfi1_ctxtdata *uctxt)
214	{
215	struct hfi1_user_sdma_pkt_q *pq;
216
217	trace_hfi1_sdma_user_free_queues(dd: uctxt->dd, ctxt: uctxt->ctxt, subctxt: fd->subctxt);
218
219	spin_lock(lock: &fd->pq_rcu_lock);
220	pq = srcu_dereference_check(fd->pq, &fd->pq_srcu,
221	lockdep_is_held(&fd->pq_rcu_lock));
222	if (pq) {
223	rcu_assign_pointer(fd->pq, NULL);
224	spin_unlock(lock: &fd->pq_rcu_lock);
225	synchronize_srcu(ssp: &fd->pq_srcu);
226	/ at this point there can be no more new requests /
227	iowait_sdma_drain(wait: &pq->busy);
228	/ Wait until all requests have been freed. /
229	wait_event_interruptible(
230	pq->wait,
231	!atomic_read(&pq->n_reqs));
232	kfree(objp: pq->reqs);
233	hfi1_free_system_pinning(pq);
234	bitmap_free(bitmap: pq->req_in_use);
235	kmem_cache_destroy(s: pq->txreq_cache);
236	flush_pq_iowait(pq);
237	kfree(objp: pq);
238	} else {
239	spin_unlock(lock: &fd->pq_rcu_lock);
240	}
241	if (fd->cq) {
242	vfree(addr: fd->cq->comps);
243	kfree(objp: fd->cq);
244	fd->cq = NULL;
245	}
246	return `0`;
247	}
248
249	static u8 dlid_to_selector(u16 dlid)
250	{
251	static u8 mapping[`256`];
252	static int initialized;
253	static u8 next;
254	int hash;
255
256	if (!initialized) {
257	memset(mapping, `0xFF`, `256`);
258	initialized = `1`;
259	}
260
261	hash = ((dlid >> `8`) ^ dlid) & `0xFF`;
262	if (mapping[hash] == `0xFF`) {
263	mapping[hash] = next;
264	next = (next + `1`) & `0x7F`;
265	}
266
267	return mapping[hash];
268	}
269
270	/**
271	* hfi1_user_sdma_process_request() - Process and start a user sdma request
272	* @fd: valid file descriptor
273	* @iovec: array of io vectors to process
274	* @dim: overall iovec array size
275	* @count: number of io vector array entries processed
276	*/
277	int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
278	struct iovec iovec, unsigned* long dim,
279	unsigned long *count)
280	{
281	int ret = `0`, i;
282	struct hfi1_ctxtdata *uctxt = fd->uctxt;
283	struct hfi1_user_sdma_pkt_q *pq =
284	srcu_dereference(fd->pq, &fd->pq_srcu);
285	struct hfi1_user_sdma_comp_q *cq = fd->cq;
286	struct hfi1_devdata *dd = pq->dd;
287	unsigned long idx = `0`;
288	u8 pcount = initial_pkt_count;
289	struct sdma_req_info info;
290	struct user_sdma_request *req;
291	u8 opcode, sc, vl;
292	u16 pkey;
293	u32 slid;
294	u16 dlid;
295	u32 selector;
296
297	if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) {
298	hfi1_cdbg(
299	SDMA,
300	"[%u:%u:%u] First vector not big enough for header %lu/%lu",
301	dd->unit, uctxt->ctxt, fd->subctxt,
302	iovec[idx].iov_len, sizeof(info) + sizeof(req->hdr));
303	return -EINVAL;
304	}
305	ret = copy_from_user(to: &info, from: iovec[idx].iov_base, n: sizeof(info));
306	if (ret) {
307	hfi1_cdbg(SDMA, "[%u:%u:%u] Failed to copy info QW (%d)",
308	dd->unit, uctxt->ctxt, fd->subctxt, ret);
309	return -EFAULT;
310	}
311
312	trace_hfi1_sdma_user_reqinfo(dd, ctxt: uctxt->ctxt, subctxt: fd->subctxt,
313	i: (u16 *)&info);
314	if (info.comp_idx >= hfi1_sdma_comp_ring_size) {
315	hfi1_cdbg(SDMA,
316	"[%u:%u:%u:%u] Invalid comp index",
317	dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx);
318	return -EINVAL;
319	}
320
321	/*
322	* Sanity check the header io vector count. Need at least 1 vector
323	* (header) and cannot be larger than the actual io vector count.
324	*/
325	if (req_iovcnt(info.ctrl) < `1` \|\| req_iovcnt(info.ctrl) > dim) {
326	hfi1_cdbg(SDMA,
327	"[%u:%u:%u:%u] Invalid iov count %d, dim %ld",
328	dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx,
329	req_iovcnt(info.ctrl), dim);
330	return -EINVAL;
331	}
332
333	if (!info.fragsize) {
334	hfi1_cdbg(SDMA,
335	"[%u:%u:%u:%u] Request does not specify fragsize",
336	dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx);
337	return -EINVAL;
338	}
339
340	/ Try to claim the request. /
341	if (test_and_set_bit(nr: info.comp_idx, addr: pq->req_in_use)) {
342	hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in use",
343	dd->unit, uctxt->ctxt, fd->subctxt,
344	info.comp_idx);
345	return -EBADSLT;
346	}
347	/*
348	* All safety checks have been done and this request has been claimed.
349	*/
350	trace_hfi1_sdma_user_process_request(dd, ctxt: uctxt->ctxt, subctxt: fd->subctxt,
351	comp_idx: info.comp_idx);
352	req = pq->reqs + info.comp_idx;
353	req->data_iovs = req_iovcnt(info.ctrl) - `1`; / subtract header vector /
354	req->data_len = `0`;
355	req->pq = pq;
356	req->cq = cq;
357	req->ahg_idx = -`1`;
358	req->iov_idx = `0`;
359	req->sent = `0`;
360	req->seqnum = `0`;
361	req->seqcomp = `0`;
362	req->seqsubmitted = `0`;
363	req->tids = NULL;
364	req->has_error = `0`;
365	INIT_LIST_HEAD(list: &req->txps);
366
367	memcpy(&req->info, &info, sizeof(info));
368
369	/ The request is initialized, count it /
370	atomic_inc(v: &pq->n_reqs);
371
372	if (req_opcode(info.ctrl) == EXPECTED) {
373	/ expected must have a TID info and at least one data vector /
374	if (req->data_iovs < `2`) {
375	SDMA_DBG(req,
376	"Not enough vectors for expected request");
377	ret = -EINVAL;
378	goto free_req;
379	}
380	req->data_iovs--;
381	}
382
383	if (!info.npkts \|\| req->data_iovs > MAX_VECTORS_PER_REQ) {
384	SDMA_DBG(req, "Too many vectors (%u/%u)", req->data_iovs,
385	MAX_VECTORS_PER_REQ);
386	ret = -EINVAL;
387	goto free_req;
388	}
389
390	/ Copy the header from the user buffer /
391	ret = copy_from_user(to: &req->hdr, from: iovec[idx].iov_base + sizeof(info),
392	n: sizeof(req->hdr));
393	if (ret) {
394	SDMA_DBG(req, "Failed to copy header template (%d)", ret);
395	ret = -EFAULT;
396	goto free_req;
397	}
398
399	/ If Static rate control is not enabled, sanitize the header. /
400	if (!HFI1_CAP_IS_USET(STATIC_RATE_CTRL))
401	req->hdr.pbc[`2`] = `0`;
402
403	/ Validate the opcode. Do not trust packets from user space blindly. /
404	opcode = (be32_to_cpu(req->hdr.bth[`0`]) >> `24`) & `0xff`;
405	if ((opcode & USER_OPCODE_CHECK_MASK) !=
406	USER_OPCODE_CHECK_VAL) {
407	SDMA_DBG(req, "Invalid opcode (%d)", opcode);
408	ret = -EINVAL;
409	goto free_req;
410	}
411	/*
412	* Validate the vl. Do not trust packets from user space blindly.
413	* VL comes from PBC, SC comes from LRH, and the VL needs to
414	* match the SC look up.
415	*/
416	vl = (le16_to_cpu(req->hdr.pbc[`0`]) >> `12`) & `0xF`;
417	sc = (((be16_to_cpu(req->hdr.lrh[`0`]) >> `12`) & `0xF`) \|
418	(((le16_to_cpu(req->hdr.pbc[`1`]) >> `14`) & `0x1`) << `4`));
419	if (vl >= dd->pport->vls_operational \|\|
420	vl != sc_to_vlt(dd, sc5: sc)) {
421	SDMA_DBG(req, "Invalid SC(%u)/VL(%u)", sc, vl);
422	ret = -EINVAL;
423	goto free_req;
424	}
425
426	/ Checking P_KEY for requests from user-space /
427	pkey = (u16)be32_to_cpu(req->hdr.bth[`0`]);
428	slid = be16_to_cpu(req->hdr.lrh[`3`]);
429	if (egress_pkey_check(ppd: dd->pport, slid, pkey, sc5: sc, PKEY_CHECK_INVALID)) {
430	ret = -EINVAL;
431	goto free_req;
432	}
433
434	/*
435	* Also should check the BTH.lnh. If it says the next header is GRH then
436	* the RXE parsing will be off and will land in the middle of the KDETH
437	* or miss it entirely.
438	*/
439	if ((be16_to_cpu(req->hdr.lrh[`0`]) & `0x3`) == HFI1_LRH_GRH) {
440	SDMA_DBG(req, "User tried to pass in a GRH");
441	ret = -EINVAL;
442	goto free_req;
443	}
444
445	req->koffset = le32_to_cpu(req->hdr.kdeth.swdata[`6`]);
446	/*
447	* Calculate the initial TID offset based on the values of
448	* KDETH.OFFSET and KDETH.OM that are passed in.
449	*/
450	req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) *
451	(KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
452	KDETH_OM_LARGE : KDETH_OM_SMALL);
453	trace_hfi1_sdma_user_initial_tidoffset(dd, ctxt: uctxt->ctxt, subctxt: fd->subctxt,
454	comp_idx: info.comp_idx, tidoffset: req->tidoffset);
455	idx++;
456
457	/ Save all the IO vector structures /
458	for (i = `0`; i < req->data_iovs; i++) {
459	req->iovs[i].offset = `0`;
460	INIT_LIST_HEAD(list: &req->iovs[i].list);
461	memcpy(&req->iovs[i].iov,
462	iovec + idx++,
463	sizeof(req->iovs[i].iov));
464	if (req->iovs[i].iov.iov_len == `0`) {
465	ret = -EINVAL;
466	goto free_req;
467	}
468	req->data_len += req->iovs[i].iov.iov_len;
469	}
470	trace_hfi1_sdma_user_data_length(dd, ctxt: uctxt->ctxt, subctxt: fd->subctxt,
471	comp_idx: info.comp_idx, data_len: req->data_len);
472	if (pcount > req->info.npkts)
473	pcount = req->info.npkts;
474	/*
475	* Copy any TID info
476	* User space will provide the TID info only when the
477	* request type is EXPECTED. This is true even if there is
478	* only one packet in the request and the header is already
479	* setup. The reason for the singular TID case is that the
480	* driver needs to perform safety checks.
481	*/
482	if (req_opcode(req->info.ctrl) == EXPECTED) {
483	u16 ntids = iovec[idx].iov_len / sizeof(*req->tids);
484	u32 *tmp;
485
486	if (!ntids \|\| ntids > MAX_TID_PAIR_ENTRIES) {
487	ret = -EINVAL;
488	goto free_req;
489	}
490
491	/*
492	* We have to copy all of the tids because they may vary
493	* in size and, therefore, the TID count might not be
494	* equal to the pkt count. However, there is no way to
495	* tell at this point.
496	*/
497	tmp = memdup_array_user(src: iovec[idx].iov_base,
498	n: ntids, size: sizeof(*req->tids));
499	if (IS_ERR(ptr: tmp)) {
500	ret = PTR_ERR(ptr: tmp);
501	SDMA_DBG(req, "Failed to copy %d TIDs (%d)",
502	ntids, ret);
503	goto free_req;
504	}
505	req->tids = tmp;
506	req->n_tids = ntids;
507	req->tididx = `0`;
508	idx++;
509	}
510
511	dlid = be16_to_cpu(req->hdr.lrh[`1`]);
512	selector = dlid_to_selector(dlid);
513	selector += uctxt->ctxt + fd->subctxt;
514	req->sde = sdma_select_user_engine(dd, selector, vl);
515
516	if (!req->sde \|\| !sdma_running(engine: req->sde)) {
517	ret = -ECOMM;
518	goto free_req;
519	}
520
521	/ We don't need an AHG entry if the request contains only one packet /
522	if (req->info.npkts > `1` && HFI1_CAP_IS_USET(SDMA_AHG))
523	req->ahg_idx = sdma_ahg_alloc(sde: req->sde);
524
525	set_comp_state(pq, cq, idx: info.comp_idx, state: QUEUED, ret: `0`);
526	pq->state = SDMA_PKT_Q_ACTIVE;
527
528	/*
529	* This is a somewhat blocking send implementation.
530	* The driver will block the caller until all packets of the
531	* request have been submitted to the SDMA engine. However, it
532	* will not wait for send completions.
533	*/
534	while (req->seqsubmitted != req->info.npkts) {
535	ret = user_sdma_send_pkts(req, maxpkts: pcount);
536	if (ret < `0`) {
537	int we_ret;
538
539	if (ret != -EBUSY)
540	goto free_req;
541	we_ret = wait_event_interruptible_timeout(
542	pq->busy.wait_dma,
543	pq->state == SDMA_PKT_Q_ACTIVE,
544	msecs_to_jiffies(
545	SDMA_IOWAIT_TIMEOUT));
546	trace_hfi1_usdma_we(pq, we_ret);
547	if (we_ret <= `0`)
548	flush_pq_iowait(pq);
549	}
550	}
551	*count += idx;
552	return `0`;
553	free_req:
554	/*
555	* If the submitted seqsubmitted == npkts, the completion routine
556	* controls the final state. If sequbmitted < npkts, wait for any
557	* outstanding packets to finish before cleaning up.
558	*/
559	if (req->seqsubmitted < req->info.npkts) {
560	if (req->seqsubmitted)
561	wait_event(pq->busy.wait_dma,
562	(req->seqcomp == req->seqsubmitted - `1`));
563	user_sdma_free_request(req);
564	pq_update(pq);
565	set_comp_state(pq, cq, idx: info.comp_idx, state: ERROR, ret);
566	}
567	return ret;
568	}
569
570	static inline u32 compute_data_length(struct user_sdma_request *req,
571	struct user_sdma_txreq *tx)
572	{
573	/*
574	* Determine the proper size of the packet data.
575	* The size of the data of the first packet is in the header
576	* template. However, it includes the header and ICRC, which need
577	* to be subtracted.
578	* The minimum representable packet data length in a header is 4 bytes,
579	* therefore, when the data length request is less than 4 bytes, there's
580	* only one packet, and the packet data length is equal to that of the
581	* request data length.
582	* The size of the remaining packets is the minimum of the frag
583	* size (MTU) or remaining data in the request.
584	*/
585	u32 len;
586
587	if (!req->seqnum) {
588	if (req->data_len < sizeof(u32))
589	len = req->data_len;
590	else
591	len = ((be16_to_cpu(req->hdr.lrh[`2`]) << `2`) -
592	(sizeof(tx->hdr) - `4`));
593	} else if (req_opcode(req->info.ctrl) == EXPECTED) {
594	u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) *
595	PAGE_SIZE;
596	/*
597	* Get the data length based on the remaining space in the
598	* TID pair.
599	*/
600	len = min(tidlen - req->tidoffset, (u32)req->info.fragsize);
601	/ If we've filled up the TID pair, move to the next one. /
602	if (unlikely(!len) && ++req->tididx < req->n_tids &&
603	req->tids[req->tididx]) {
604	tidlen = EXP_TID_GET(req->tids[req->tididx],
605	LEN) * PAGE_SIZE;
606	req->tidoffset = `0`;
607	len = min_t(u32, tidlen, req->info.fragsize);
608	}
609	/*
610	* Since the TID pairs map entire pages, make sure that we
611	* are not going to try to send more data that we have
612	* remaining.
613	*/
614	len = min(len, req->data_len - req->sent);
615	} else {
616	len = min(req->data_len - req->sent, (u32)req->info.fragsize);
617	}
618	trace_hfi1_sdma_user_compute_length(dd: req->pq->dd,
619	ctxt: req->pq->ctxt,
620	subctxt: req->pq->subctxt,
621	comp_idx: req->info.comp_idx,
622	data_len: len);
623	return len;
624	}
625
626	static inline u32 pad_len(u32 len)
627	{
628	if (len & (sizeof(u32) - `1`))
629	len += sizeof(u32) - (len & (sizeof(u32) - `1`));
630	return len;
631	}
632
633	static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len)
634	{
635	/ (Size of complete header - size of PBC) + 4B ICRC + data length /
636	return ((sizeof(hdr) - sizeof(hdr.pbc)) + `4` + len);
637	}
638
639	static int user_sdma_txadd_ahg(struct user_sdma_request *req,
640	struct user_sdma_txreq *tx,
641	u32 datalen)
642	{
643	int ret;
644	u16 pbclen = le16_to_cpu(req->hdr.pbc[`0`]);
645	u32 lrhlen = get_lrh_len(hdr: req->hdr, len: pad_len(len: datalen));
646	struct hfi1_user_sdma_pkt_q *pq = req->pq;
647
648	/*
649	* Copy the request header into the tx header
650	* because the HW needs a cacheline-aligned
651	* address.
652	* This copy can be optimized out if the hdr
653	* member of user_sdma_request were also
654	* cacheline aligned.
655	*/
656	memcpy(&tx->hdr, &req->hdr, sizeof(tx->hdr));
657	if (PBC2LRH(pbclen) != lrhlen) {
658	pbclen = (pbclen & `0xf000`) \| LRH2PBC(lrhlen);
659	tx->hdr.pbc[`0`] = cpu_to_le16(pbclen);
660	}
661	ret = check_header_template(req, hdr: &tx->hdr, lrhlen, datalen);
662	if (ret)
663	return ret;
664	ret = sdma_txinit_ahg(tx: &tx->txreq, SDMA_TXREQ_F_AHG_COPY,
665	tlen: sizeof(tx->hdr) + datalen, ahg_entry: req->ahg_idx,
666	num_ahg: `0`, NULL, ahg_hlen: `0`, cb: user_sdma_txreq_cb);
667	if (ret)
668	return ret;
669	ret = sdma_txadd_kvaddr(dd: pq->dd, tx: &tx->txreq, kvaddr: &tx->hdr, len: sizeof(tx->hdr));
670	if (ret)
671	sdma_txclean(dd: pq->dd, tx: &tx->txreq);
672	return ret;
673	}
674
675	static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
676	{
677	int ret = `0`;
678	u16 count;
679	unsigned npkts = `0`;
680	struct user_sdma_txreq *tx = NULL;
681	struct hfi1_user_sdma_pkt_q *pq = NULL;
682	struct user_sdma_iovec *iovec = NULL;
683
684	if (!req->pq)
685	return -EINVAL;
686
687	pq = req->pq;
688
689	/ If tx completion has reported an error, we are done. /
690	if (READ_ONCE(req->has_error))
691	return -EFAULT;
692
693	/*
694	* Check if we might have sent the entire request already
695	*/
696	if (unlikely(req->seqnum == req->info.npkts)) {
697	if (!list_empty(head: &req->txps))
698	goto dosend;
699	return ret;
700	}
701
702	if (!maxpkts \|\| maxpkts > req->info.npkts - req->seqnum)
703	maxpkts = req->info.npkts - req->seqnum;
704
705	while (npkts < maxpkts) {
706	u32 datalen = `0`;
707
708	/*
709	* Check whether any of the completions have come back
710	* with errors. If so, we are not going to process any
711	* more packets from this request.
712	*/
713	if (READ_ONCE(req->has_error))
714	return -EFAULT;
715
716	tx = kmem_cache_alloc(cachep: pq->txreq_cache, GFP_KERNEL);
717	if (!tx)
718	return -ENOMEM;
719
720	tx->flags = `0`;
721	tx->req = req;
722	INIT_LIST_HEAD(list: &tx->list);
723
724	/*
725	* For the last packet set the ACK request
726	* and disable header suppression.
727	*/
728	if (req->seqnum == req->info.npkts - `1`)
729	tx->flags \|= (TXREQ_FLAGS_REQ_ACK \|
730	TXREQ_FLAGS_REQ_DISABLE_SH);
731
732	/*
733	* Calculate the payload size - this is min of the fragment
734	* (MTU) size or the remaining bytes in the request but only
735	* if we have payload data.
736	*/
737	if (req->data_len) {
738	iovec = &req->iovs[req->iov_idx];
739	if (READ_ONCE(iovec->offset) == iovec->iov.iov_len) {
740	if (++req->iov_idx == req->data_iovs) {
741	ret = -EFAULT;
742	goto free_tx;
743	}
744	iovec = &req->iovs[req->iov_idx];
745	WARN_ON(iovec->offset);
746	}
747
748	datalen = compute_data_length(req, tx);
749
750	/*
751	* Disable header suppression for the payload <= 8DWS.
752	* If there is an uncorrectable error in the receive
753	* data FIFO when the received payload size is less than
754	* or equal to 8DWS then the RxDmaDataFifoRdUncErr is
755	* not reported.There is set RHF.EccErr if the header
756	* is not suppressed.
757	*/
758	if (!datalen) {
759	SDMA_DBG(req,
760	"Request has data but pkt len is 0");
761	ret = -EFAULT;
762	goto free_tx;
763	} else if (datalen <= `32`) {
764	tx->flags \|= TXREQ_FLAGS_REQ_DISABLE_SH;
765	}
766	}
767
768	if (req->ahg_idx >= `0`) {
769	if (!req->seqnum) {
770	ret = user_sdma_txadd_ahg(req, tx, datalen);
771	if (ret)
772	goto free_tx;
773	} else {
774	int changes;
775
776	changes = set_txreq_header_ahg(req, tx,
777	len: datalen);
778	if (changes < `0`) {
779	ret = changes;
780	goto free_tx;
781	}
782	}
783	} else {
784	ret = sdma_txinit(tx: &tx->txreq, flags: `0`, tlen: sizeof(req->hdr) +
785	datalen, cb: user_sdma_txreq_cb);
786	if (ret)
787	goto free_tx;
788	/*
789	* Modify the header for this packet. This only needs
790	* to be done if we are not going to use AHG. Otherwise,
791	* the HW will do it based on the changes we gave it
792	* during sdma_txinit_ahg().
793	*/
794	ret = set_txreq_header(req, tx, datalen);
795	if (ret)
796	goto free_txreq;
797	}
798
799	req->koffset += datalen;
800	if (req_opcode(req->info.ctrl) == EXPECTED)
801	req->tidoffset += datalen;
802	req->sent += datalen;
803	while (datalen) {
804	ret = hfi1_add_pages_to_sdma_packet(req, tx, iovec,
805	pkt_data_remaining: &datalen);
806	if (ret)
807	goto free_txreq;
808	iovec = &req->iovs[req->iov_idx];
809	}
810	list_add_tail(new: &tx->txreq.list, head: &req->txps);
811	/*
812	* It is important to increment this here as it is used to
813	* generate the BTH.PSN and, therefore, can't be bulk-updated
814	* outside of the loop.
815	*/
816	tx->seqnum = req->seqnum++;
817	npkts++;
818	}
819	dosend:
820	ret = sdma_send_txlist(sde: req->sde,
821	wait: iowait_get_ib_work(w: &pq->busy),
822	tx_list: &req->txps, count_out: &count);
823	req->seqsubmitted += count;
824	if (req->seqsubmitted == req->info.npkts) {
825	/*
826	* The txreq has already been submitted to the HW queue
827	* so we can free the AHG entry now. Corruption will not
828	* happen due to the sequential manner in which
829	* descriptors are processed.
830	*/
831	if (req->ahg_idx >= `0`)
832	sdma_ahg_free(sde: req->sde, ahg_index: req->ahg_idx);
833	}
834	return ret;
835
836	free_txreq:
837	sdma_txclean(dd: pq->dd, tx: &tx->txreq);
838	free_tx:
839	kmem_cache_free(s: pq->txreq_cache, objp: tx);
840	return ret;
841	}
842
843	static int check_header_template(struct user_sdma_request *req,
844	struct hfi1_pkt_header *hdr, u32 lrhlen,
845	u32 datalen)
846	{
847	/*
848	* Perform safety checks for any type of packet:
849	* - transfer size is multiple of 64bytes
850	* - packet length is multiple of 4 bytes
851	* - packet length is not larger than MTU size
852	*
853	* These checks are only done for the first packet of the
854	* transfer since the header is "given" to us by user space.
855	* For the remainder of the packets we compute the values.
856	*/
857	if (req->info.fragsize % PIO_BLOCK_SIZE \|\| lrhlen & `0x3` \|\|
858	lrhlen > get_lrh_len(hdr: *hdr, len: req->info.fragsize))
859	return -EINVAL;
860
861	if (req_opcode(req->info.ctrl) == EXPECTED) {
862	/*
863	* The header is checked only on the first packet. Furthermore,
864	* we ensure that at least one TID entry is copied when the
865	* request is submitted. Therefore, we don't have to verify that
866	* tididx points to something sane.
867	*/
868	u32 tidval = req->tids[req->tididx],
869	tidlen = EXP_TID_GET(tidval, LEN) * PAGE_SIZE,
870	tididx = EXP_TID_GET(tidval, IDX),
871	tidctrl = EXP_TID_GET(tidval, CTRL),
872	tidoff;
873	__le32 kval = hdr->kdeth.ver_tid_offset;
874
875	tidoff = KDETH_GET(kval, OFFSET) *
876	(KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
877	KDETH_OM_LARGE : KDETH_OM_SMALL);
878	/*
879	* Expected receive packets have the following
880	* additional checks:
881	* - offset is not larger than the TID size
882	* - TIDCtrl values match between header and TID array
883	* - TID indexes match between header and TID array
884	*/
885	if ((tidoff + datalen > tidlen) \|\|
886	KDETH_GET(kval, TIDCTRL) != tidctrl \|\|
887	KDETH_GET(kval, TID) != tididx)
888	return -EINVAL;
889	}
890	return `0`;
891	}
892
893	/*
894	* Correctly set the BTH.PSN field based on type of
895	* transfer - eager packets can just increment the PSN but
896	* expected packets encode generation and sequence in the
897	* BTH.PSN field so just incrementing will result in errors.
898	*/
899	static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags)
900	{
901	u32 val = be32_to_cpu(bthpsn),
902	mask = (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? `0x7fffffffull` :
903	`0xffffffull`),
904	psn = val & mask;
905	if (expct)
906	psn = (psn & ~HFI1_KDETH_BTH_SEQ_MASK) \|
907	((psn + frags) & HFI1_KDETH_BTH_SEQ_MASK);
908	else
909	psn = psn + frags;
910	return psn & mask;
911	}
912
913	static int set_txreq_header(struct user_sdma_request *req,
914	struct user_sdma_txreq *tx, u32 datalen)
915	{
916	struct hfi1_user_sdma_pkt_q *pq = req->pq;
917	struct hfi1_pkt_header *hdr = &tx->hdr;
918	u8 omfactor; / KDETH.OM /
919	u16 pbclen;
920	int ret;
921	u32 tidval = `0`, lrhlen = get_lrh_len(hdr: *hdr, len: pad_len(len: datalen));
922
923	/ Copy the header template to the request before modification /
924	memcpy(hdr, &req->hdr, sizeof(*hdr));
925
926	/*
927	* Check if the PBC and LRH length are mismatched. If so
928	* adjust both in the header.
929	*/
930	pbclen = le16_to_cpu(hdr->pbc[`0`]);
931	if (PBC2LRH(pbclen) != lrhlen) {
932	pbclen = (pbclen & `0xf000`) \| LRH2PBC(lrhlen);
933	hdr->pbc[`0`] = cpu_to_le16(pbclen);
934	hdr->lrh[`2`] = cpu_to_be16(lrhlen >> `2`);
935	/*
936	* Third packet
937	* This is the first packet in the sequence that has
938	* a "static" size that can be used for the rest of
939	* the packets (besides the last one).
940	*/
941	if (unlikely(req->seqnum == `2`)) {
942	/*
943	* From this point on the lengths in both the
944	* PBC and LRH are the same until the last
945	* packet.
946	* Adjust the template so we don't have to update
947	* every packet
948	*/
949	req->hdr.pbc[`0`] = hdr->pbc[`0`];
950	req->hdr.lrh[`2`] = hdr->lrh[`2`];
951	}
952	}
953	/*
954	* We only have to modify the header if this is not the
955	* first packet in the request. Otherwise, we use the
956	* header given to us.
957	*/
958	if (unlikely(!req->seqnum)) {
959	ret = check_header_template(req, hdr, lrhlen, datalen);
960	if (ret)
961	return ret;
962	goto done;
963	}
964
965	hdr->bth[`2`] = cpu_to_be32(
966	set_pkt_bth_psn(hdr->bth[`2`],
967	(req_opcode(req->info.ctrl) == EXPECTED),
968	req->seqnum));
969
970	/ Set ACK request on last packet /
971	if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
972	hdr->bth[`2`] \|= cpu_to_be32(`1UL` << `31`);
973
974	/ Set the new offset /
975	hdr->kdeth.swdata[`6`] = cpu_to_le32(req->koffset);
976	/ Expected packets have to fill in the new TID information /
977	if (req_opcode(req->info.ctrl) == EXPECTED) {
978	tidval = req->tids[req->tididx];
979	/*
980	* If the offset puts us at the end of the current TID,
981	* advance everything.
982	*/
983	if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
984	PAGE_SIZE)) {
985	req->tidoffset = `0`;
986	/*
987	* Since we don't copy all the TIDs, all at once,
988	* we have to check again.
989	*/
990	if (++req->tididx > req->n_tids - `1` \|\|
991	!req->tids[req->tididx]) {
992	return -EINVAL;
993	}
994	tidval = req->tids[req->tididx];
995	}
996	omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
997	KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT :
998	KDETH_OM_SMALL_SHIFT;
999	/ Set KDETH.TIDCtrl based on value for this TID. /
1000	KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL,
1001	EXP_TID_GET(tidval, CTRL));
1002	/ Set KDETH.TID based on value for this TID /
1003	KDETH_SET(hdr->kdeth.ver_tid_offset, TID,
1004	EXP_TID_GET(tidval, IDX));
1005	/ Clear KDETH.SH when DISABLE_SH flag is set /
1006	if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH))
1007	KDETH_SET(hdr->kdeth.ver_tid_offset, SH, `0`);
1008	/*
1009	* Set the KDETH.OFFSET and KDETH.OM based on size of
1010	* transfer.
1011	*/
1012	trace_hfi1_sdma_user_tid_info(
1013	dd: pq->dd, ctxt: pq->ctxt, subctxt: pq->subctxt, comp_idx: req->info.comp_idx,
1014	tidoffset: req->tidoffset, units: req->tidoffset >> omfactor,
1015	shift: omfactor != KDETH_OM_SMALL_SHIFT);
1016	KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET,
1017	req->tidoffset >> omfactor);
1018	KDETH_SET(hdr->kdeth.ver_tid_offset, OM,
1019	omfactor != KDETH_OM_SMALL_SHIFT);
1020	}
1021	done:
1022	trace_hfi1_sdma_user_header(dd: pq->dd, ctxt: pq->ctxt, subctxt: pq->subctxt,
1023	req: req->info.comp_idx, hdr, tidval);
1024	return sdma_txadd_kvaddr(dd: pq->dd, tx: &tx->txreq, kvaddr: hdr, len: sizeof(*hdr));
1025	}
1026
1027	static int set_txreq_header_ahg(struct user_sdma_request *req,
1028	struct user_sdma_txreq *tx, u32 datalen)
1029	{
1030	u32 ahg[AHG_KDETH_ARRAY_SIZE];
1031	int idx = `0`;
1032	u8 omfactor; / KDETH.OM /
1033	struct hfi1_user_sdma_pkt_q *pq = req->pq;
1034	struct hfi1_pkt_header *hdr = &req->hdr;
1035	u16 pbclen = le16_to_cpu(hdr->pbc[`0`]);
1036	u32 val32, tidval = `0`, lrhlen = get_lrh_len(hdr: *hdr, len: pad_len(len: datalen));
1037	size_t array_size = ARRAY_SIZE(ahg);
1038
1039	if (PBC2LRH(pbclen) != lrhlen) {
1040	/ PBC.PbcLengthDWs /
1041	idx = ahg_header_set(arr: ahg, idx, array_size, dw: `0`, bit: `0`, width: `12`,
1042	value: (__force u16)cpu_to_le16(LRH2PBC(lrhlen)));
1043	if (idx < `0`)
1044	return idx;
1045	/ LRH.PktLen (we need the full 16 bits due to byte swap) /
1046	idx = ahg_header_set(arr: ahg, idx, array_size, dw: `3`, bit: `0`, width: `16`,
1047	value: (__force u16)cpu_to_be16(lrhlen >> `2`));
1048	if (idx < `0`)
1049	return idx;
1050	}
1051
1052	/*
1053	* Do the common updates
1054	*/
1055	/ BTH.PSN and BTH.A /
1056	val32 = (be32_to_cpu(hdr->bth[`2`]) + req->seqnum) &
1057	(HFI1_CAP_IS_KSET(EXTENDED_PSN) ? `0x7fffffff` : `0xffffff`);
1058	if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
1059	val32 \|= `1UL` << `31`;
1060	idx = ahg_header_set(arr: ahg, idx, array_size, dw: `6`, bit: `0`, width: `16`,
1061	value: (__force u16)cpu_to_be16(val32 >> `16`));
1062	if (idx < `0`)
1063	return idx;
1064	idx = ahg_header_set(arr: ahg, idx, array_size, dw: `6`, bit: `16`, width: `16`,
1065	value: (__force u16)cpu_to_be16(val32 & `0xffff`));
1066	if (idx < `0`)
1067	return idx;
1068	/ KDETH.Offset /
1069	idx = ahg_header_set(arr: ahg, idx, array_size, dw: `15`, bit: `0`, width: `16`,
1070	value: (__force u16)cpu_to_le16(req->koffset & `0xffff`));
1071	if (idx < `0`)
1072	return idx;
1073	idx = ahg_header_set(arr: ahg, idx, array_size, dw: `15`, bit: `16`, width: `16`,
1074	value: (__force u16)cpu_to_le16(req->koffset >> `16`));
1075	if (idx < `0`)
1076	return idx;
1077	if (req_opcode(req->info.ctrl) == EXPECTED) {
1078	__le16 val;
1079
1080	tidval = req->tids[req->tididx];
1081
1082	/*
1083	* If the offset puts us at the end of the current TID,
1084	* advance everything.
1085	*/
1086	if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
1087	PAGE_SIZE)) {
1088	req->tidoffset = `0`;
1089	/*
1090	* Since we don't copy all the TIDs, all at once,
1091	* we have to check again.
1092	*/
1093	if (++req->tididx > req->n_tids - `1` \|\|
1094	!req->tids[req->tididx])
1095	return -EINVAL;
1096	tidval = req->tids[req->tididx];
1097	}
1098	omfactor = ((EXP_TID_GET(tidval, LEN) *
1099	PAGE_SIZE) >=
1100	KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT :
1101	KDETH_OM_SMALL_SHIFT;
1102	/ KDETH.OM and KDETH.OFFSET (TID) /
1103	idx = ahg_header_set(
1104	arr: ahg, idx, array_size, dw: `7`, bit: `0`, width: `16`,
1105	value: ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << `15` \|
1106	((req->tidoffset >> omfactor)
1107	& `0x7fff`)));
1108	if (idx < `0`)
1109	return idx;
1110	/ KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH /
1111	val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & `0x3`) << `10`) \|
1112	(EXP_TID_GET(tidval, IDX) & `0x3ff`));
1113
1114	if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) {
1115	val \|= cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
1116	INTR) <<
1117	AHG_KDETH_INTR_SHIFT));
1118	} else {
1119	val \|= KDETH_GET(hdr->kdeth.ver_tid_offset, SH) ?
1120	cpu_to_le16(`0x1` << AHG_KDETH_SH_SHIFT) :
1121	cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
1122	INTR) <<
1123	AHG_KDETH_INTR_SHIFT));
1124	}
1125
1126	idx = ahg_header_set(arr: ahg, idx, array_size,
1127	dw: `7`, bit: `16`, width: `14`, value: (__force u16)val);
1128	if (idx < `0`)
1129	return idx;
1130	}
1131
1132	trace_hfi1_sdma_user_header_ahg(dd: pq->dd, ctxt: pq->ctxt, subctxt: pq->subctxt,
1133	req: req->info.comp_idx, sde: req->sde->this_idx,
1134	ahgidx: req->ahg_idx, ahg, len: idx, tidval);
1135	sdma_txinit_ahg(tx: &tx->txreq,
1136	SDMA_TXREQ_F_USE_AHG,
1137	tlen: datalen, ahg_entry: req->ahg_idx, num_ahg: idx,
1138	ahg, ahg_hlen: sizeof(req->hdr),
1139	cb: user_sdma_txreq_cb);
1140
1141	return idx;
1142	}
1143
1144	/**
1145	* user_sdma_txreq_cb() - SDMA tx request completion callback.
1146	* @txreq: valid sdma tx request
1147	* @status: success/failure of request
1148	*
1149	* Called when the SDMA progress state machine gets notification that
1150	* the SDMA descriptors for this tx request have been processed by the
1151	* DMA engine. Called in interrupt context.
1152	* Only do work on completed sequences.
1153	*/
1154	static void user_sdma_txreq_cb(struct sdma_txreq txreq, int* status)
1155	{
1156	struct user_sdma_txreq *tx =
1157	container_of(txreq, struct user_sdma_txreq, txreq);
1158	struct user_sdma_request *req;
1159	struct hfi1_user_sdma_pkt_q *pq;
1160	struct hfi1_user_sdma_comp_q *cq;
1161	enum hfi1_sdma_comp_state state = COMPLETE;
1162
1163	if (!tx->req)
1164	return;
1165
1166	req = tx->req;
1167	pq = req->pq;
1168	cq = req->cq;
1169
1170	if (status != SDMA_TXREQ_S_OK) {
1171	SDMA_DBG(req, "SDMA completion with error %d",
1172	status);
1173	WRITE_ONCE(req->has_error, `1`);
1174	state = ERROR;
1175	}
1176
1177	req->seqcomp = tx->seqnum;
1178	kmem_cache_free(s: pq->txreq_cache, objp: tx);
1179
1180	/ sequence isn't complete? We are done /
1181	if (req->seqcomp != req->info.npkts - `1`)
1182	return;
1183
1184	user_sdma_free_request(req);
1185	set_comp_state(pq, cq, idx: req->info.comp_idx, state, ret: status);
1186	pq_update(pq);
1187	}
1188
1189	static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
1190	{
1191	if (atomic_dec_and_test(v: &pq->n_reqs))
1192	wake_up(&pq->wait);
1193	}
1194
1195	static void user_sdma_free_request(struct user_sdma_request *req)
1196	{
1197	if (!list_empty(head: &req->txps)) {
1198	struct sdma_txreq t, p;
1199
1200	list_for_each_entry_safe(t, p, &req->txps, list) {
1201	struct user_sdma_txreq *tx =
1202	container_of(t, struct user_sdma_txreq, txreq);
1203	list_del_init(entry: &t->list);
1204	sdma_txclean(dd: req->pq->dd, tx: t);
1205	kmem_cache_free(s: req->pq->txreq_cache, objp: tx);
1206	}
1207	}
1208
1209	kfree(objp: req->tids);
1210	clear_bit(nr: req->info.comp_idx, addr: req->pq->req_in_use);
1211	}
1212
1213	static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
1214	struct hfi1_user_sdma_comp_q *cq,
1215	u16 idx, enum hfi1_sdma_comp_state state,
1216	int ret)
1217	{
1218	if (state == ERROR)
1219	cq->comps[idx].errcode = -ret;
1220	smp_wmb(); / make sure errcode is visible first /
1221	cq->comps[idx].status = state;
1222	trace_hfi1_sdma_user_completion(dd: pq->dd, ctxt: pq->ctxt, subctxt: pq->subctxt,
1223	idx, state, code: ret);
1224	}
1225

source code of linux/drivers/infiniband/hw/hfi1/user_sdma.c