1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
2 | /* |
3 | * Copyright(c) 2020 - 2023 Cornelis Networks, Inc. |
4 | * Copyright(c) 2015 - 2018 Intel Corporation. |
5 | */ |
6 | |
7 | #include <linux/mm.h> |
8 | #include <linux/types.h> |
9 | #include <linux/device.h> |
10 | #include <linux/dmapool.h> |
11 | #include <linux/slab.h> |
12 | #include <linux/list.h> |
13 | #include <linux/highmem.h> |
14 | #include <linux/io.h> |
15 | #include <linux/uio.h> |
16 | #include <linux/rbtree.h> |
17 | #include <linux/spinlock.h> |
18 | #include <linux/delay.h> |
19 | #include <linux/kthread.h> |
20 | #include <linux/mmu_context.h> |
21 | #include <linux/module.h> |
22 | #include <linux/vmalloc.h> |
23 | #include <linux/string.h> |
24 | |
25 | #include "hfi.h" |
26 | #include "sdma.h" |
27 | #include "user_sdma.h" |
28 | #include "verbs.h" /* for the headers */ |
29 | #include "common.h" /* for struct hfi1_tid_info */ |
30 | #include "trace.h" |
31 | |
32 | static uint hfi1_sdma_comp_ring_size = 128; |
33 | module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO); |
34 | MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 128" ); |
35 | |
36 | static unsigned initial_pkt_count = 8; |
37 | |
38 | static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts); |
39 | static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status); |
40 | static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq); |
41 | static void user_sdma_free_request(struct user_sdma_request *req); |
42 | static int check_header_template(struct user_sdma_request *req, |
43 | struct hfi1_pkt_header *hdr, u32 lrhlen, |
44 | u32 datalen); |
45 | static int set_txreq_header(struct user_sdma_request *req, |
46 | struct user_sdma_txreq *tx, u32 datalen); |
47 | static int set_txreq_header_ahg(struct user_sdma_request *req, |
48 | struct user_sdma_txreq *tx, u32 len); |
49 | static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, |
50 | struct hfi1_user_sdma_comp_q *cq, |
51 | u16 idx, enum hfi1_sdma_comp_state state, |
52 | int ret); |
53 | static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags); |
54 | static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len); |
55 | |
56 | static int defer_packet_queue( |
57 | struct sdma_engine *sde, |
58 | struct iowait_work *wait, |
59 | struct sdma_txreq *txreq, |
60 | uint seq, |
61 | bool pkts_sent); |
62 | static void activate_packet_queue(struct iowait *wait, int reason); |
63 | |
64 | static int defer_packet_queue( |
65 | struct sdma_engine *sde, |
66 | struct iowait_work *wait, |
67 | struct sdma_txreq *txreq, |
68 | uint seq, |
69 | bool pkts_sent) |
70 | { |
71 | struct hfi1_user_sdma_pkt_q *pq = |
72 | container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy); |
73 | |
74 | write_seqlock(sl: &sde->waitlock); |
75 | trace_hfi1_usdma_defer(pq, sde, wait: &pq->busy); |
76 | if (sdma_progress(sde, seq, tx: txreq)) |
77 | goto eagain; |
78 | /* |
79 | * We are assuming that if the list is enqueued somewhere, it |
80 | * is to the dmawait list since that is the only place where |
81 | * it is supposed to be enqueued. |
82 | */ |
83 | xchg(&pq->state, SDMA_PKT_Q_DEFERRED); |
84 | if (list_empty(head: &pq->busy.list)) { |
85 | pq->busy.lock = &sde->waitlock; |
86 | iowait_get_priority(w: &pq->busy); |
87 | iowait_queue(pkts_sent, w: &pq->busy, wait_head: &sde->dmawait); |
88 | } |
89 | write_sequnlock(sl: &sde->waitlock); |
90 | return -EBUSY; |
91 | eagain: |
92 | write_sequnlock(sl: &sde->waitlock); |
93 | return -EAGAIN; |
94 | } |
95 | |
96 | static void activate_packet_queue(struct iowait *wait, int reason) |
97 | { |
98 | struct hfi1_user_sdma_pkt_q *pq = |
99 | container_of(wait, struct hfi1_user_sdma_pkt_q, busy); |
100 | |
101 | trace_hfi1_usdma_activate(pq, wait, reason); |
102 | xchg(&pq->state, SDMA_PKT_Q_ACTIVE); |
103 | wake_up(&wait->wait_dma); |
104 | }; |
105 | |
106 | int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, |
107 | struct hfi1_filedata *fd) |
108 | { |
109 | int ret = -ENOMEM; |
110 | char buf[64]; |
111 | struct hfi1_devdata *dd; |
112 | struct hfi1_user_sdma_comp_q *cq; |
113 | struct hfi1_user_sdma_pkt_q *pq; |
114 | |
115 | if (!uctxt || !fd) |
116 | return -EBADF; |
117 | |
118 | if (!hfi1_sdma_comp_ring_size) |
119 | return -EINVAL; |
120 | |
121 | dd = uctxt->dd; |
122 | |
123 | pq = kzalloc(size: sizeof(*pq), GFP_KERNEL); |
124 | if (!pq) |
125 | return -ENOMEM; |
126 | pq->dd = dd; |
127 | pq->ctxt = uctxt->ctxt; |
128 | pq->subctxt = fd->subctxt; |
129 | pq->n_max_reqs = hfi1_sdma_comp_ring_size; |
130 | atomic_set(v: &pq->n_reqs, i: 0); |
131 | init_waitqueue_head(&pq->wait); |
132 | atomic_set(v: &pq->n_locked, i: 0); |
133 | |
134 | iowait_init(wait: &pq->busy, tx_limit: 0, NULL, NULL, sleep: defer_packet_queue, |
135 | wakeup: activate_packet_queue, NULL, NULL); |
136 | pq->reqidx = 0; |
137 | |
138 | pq->reqs = kcalloc(n: hfi1_sdma_comp_ring_size, |
139 | size: sizeof(*pq->reqs), |
140 | GFP_KERNEL); |
141 | if (!pq->reqs) |
142 | goto pq_reqs_nomem; |
143 | |
144 | pq->req_in_use = bitmap_zalloc(nbits: hfi1_sdma_comp_ring_size, GFP_KERNEL); |
145 | if (!pq->req_in_use) |
146 | goto pq_reqs_no_in_use; |
147 | |
148 | snprintf(buf, size: 64, fmt: "txreq-kmem-cache-%u-%u-%u" , dd->unit, uctxt->ctxt, |
149 | fd->subctxt); |
150 | pq->txreq_cache = kmem_cache_create(name: buf, |
151 | size: sizeof(struct user_sdma_txreq), |
152 | L1_CACHE_BYTES, |
153 | SLAB_HWCACHE_ALIGN, |
154 | NULL); |
155 | if (!pq->txreq_cache) { |
156 | dd_dev_err(dd, "[%u] Failed to allocate TxReq cache\n" , |
157 | uctxt->ctxt); |
158 | goto pq_txreq_nomem; |
159 | } |
160 | |
161 | cq = kzalloc(size: sizeof(*cq), GFP_KERNEL); |
162 | if (!cq) |
163 | goto cq_nomem; |
164 | |
165 | cq->comps = vmalloc_user(PAGE_ALIGN(sizeof(*cq->comps) |
166 | * hfi1_sdma_comp_ring_size)); |
167 | if (!cq->comps) |
168 | goto cq_comps_nomem; |
169 | |
170 | cq->nentries = hfi1_sdma_comp_ring_size; |
171 | |
172 | ret = hfi1_init_system_pinning(pq); |
173 | if (ret) |
174 | goto pq_mmu_fail; |
175 | |
176 | rcu_assign_pointer(fd->pq, pq); |
177 | fd->cq = cq; |
178 | |
179 | return 0; |
180 | |
181 | pq_mmu_fail: |
182 | vfree(addr: cq->comps); |
183 | cq_comps_nomem: |
184 | kfree(objp: cq); |
185 | cq_nomem: |
186 | kmem_cache_destroy(s: pq->txreq_cache); |
187 | pq_txreq_nomem: |
188 | bitmap_free(bitmap: pq->req_in_use); |
189 | pq_reqs_no_in_use: |
190 | kfree(objp: pq->reqs); |
191 | pq_reqs_nomem: |
192 | kfree(objp: pq); |
193 | |
194 | return ret; |
195 | } |
196 | |
197 | static void flush_pq_iowait(struct hfi1_user_sdma_pkt_q *pq) |
198 | { |
199 | unsigned long flags; |
200 | seqlock_t *lock = pq->busy.lock; |
201 | |
202 | if (!lock) |
203 | return; |
204 | write_seqlock_irqsave(lock, flags); |
205 | if (!list_empty(head: &pq->busy.list)) { |
206 | list_del_init(entry: &pq->busy.list); |
207 | pq->busy.lock = NULL; |
208 | } |
209 | write_sequnlock_irqrestore(sl: lock, flags); |
210 | } |
211 | |
212 | int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, |
213 | struct hfi1_ctxtdata *uctxt) |
214 | { |
215 | struct hfi1_user_sdma_pkt_q *pq; |
216 | |
217 | trace_hfi1_sdma_user_free_queues(dd: uctxt->dd, ctxt: uctxt->ctxt, subctxt: fd->subctxt); |
218 | |
219 | spin_lock(lock: &fd->pq_rcu_lock); |
220 | pq = srcu_dereference_check(fd->pq, &fd->pq_srcu, |
221 | lockdep_is_held(&fd->pq_rcu_lock)); |
222 | if (pq) { |
223 | rcu_assign_pointer(fd->pq, NULL); |
224 | spin_unlock(lock: &fd->pq_rcu_lock); |
225 | synchronize_srcu(ssp: &fd->pq_srcu); |
226 | /* at this point there can be no more new requests */ |
227 | iowait_sdma_drain(wait: &pq->busy); |
228 | /* Wait until all requests have been freed. */ |
229 | wait_event_interruptible( |
230 | pq->wait, |
231 | !atomic_read(&pq->n_reqs)); |
232 | kfree(objp: pq->reqs); |
233 | hfi1_free_system_pinning(pq); |
234 | bitmap_free(bitmap: pq->req_in_use); |
235 | kmem_cache_destroy(s: pq->txreq_cache); |
236 | flush_pq_iowait(pq); |
237 | kfree(objp: pq); |
238 | } else { |
239 | spin_unlock(lock: &fd->pq_rcu_lock); |
240 | } |
241 | if (fd->cq) { |
242 | vfree(addr: fd->cq->comps); |
243 | kfree(objp: fd->cq); |
244 | fd->cq = NULL; |
245 | } |
246 | return 0; |
247 | } |
248 | |
249 | static u8 dlid_to_selector(u16 dlid) |
250 | { |
251 | static u8 mapping[256]; |
252 | static int initialized; |
253 | static u8 next; |
254 | int hash; |
255 | |
256 | if (!initialized) { |
257 | memset(mapping, 0xFF, 256); |
258 | initialized = 1; |
259 | } |
260 | |
261 | hash = ((dlid >> 8) ^ dlid) & 0xFF; |
262 | if (mapping[hash] == 0xFF) { |
263 | mapping[hash] = next; |
264 | next = (next + 1) & 0x7F; |
265 | } |
266 | |
267 | return mapping[hash]; |
268 | } |
269 | |
270 | /** |
271 | * hfi1_user_sdma_process_request() - Process and start a user sdma request |
272 | * @fd: valid file descriptor |
273 | * @iovec: array of io vectors to process |
274 | * @dim: overall iovec array size |
275 | * @count: number of io vector array entries processed |
276 | */ |
277 | int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, |
278 | struct iovec *iovec, unsigned long dim, |
279 | unsigned long *count) |
280 | { |
281 | int ret = 0, i; |
282 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
283 | struct hfi1_user_sdma_pkt_q *pq = |
284 | srcu_dereference(fd->pq, &fd->pq_srcu); |
285 | struct hfi1_user_sdma_comp_q *cq = fd->cq; |
286 | struct hfi1_devdata *dd = pq->dd; |
287 | unsigned long idx = 0; |
288 | u8 pcount = initial_pkt_count; |
289 | struct sdma_req_info info; |
290 | struct user_sdma_request *req; |
291 | u8 opcode, sc, vl; |
292 | u16 pkey; |
293 | u32 slid; |
294 | u16 dlid; |
295 | u32 selector; |
296 | |
297 | if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { |
298 | hfi1_cdbg( |
299 | SDMA, |
300 | "[%u:%u:%u] First vector not big enough for header %lu/%lu" , |
301 | dd->unit, uctxt->ctxt, fd->subctxt, |
302 | iovec[idx].iov_len, sizeof(info) + sizeof(req->hdr)); |
303 | return -EINVAL; |
304 | } |
305 | ret = copy_from_user(to: &info, from: iovec[idx].iov_base, n: sizeof(info)); |
306 | if (ret) { |
307 | hfi1_cdbg(SDMA, "[%u:%u:%u] Failed to copy info QW (%d)" , |
308 | dd->unit, uctxt->ctxt, fd->subctxt, ret); |
309 | return -EFAULT; |
310 | } |
311 | |
312 | trace_hfi1_sdma_user_reqinfo(dd, ctxt: uctxt->ctxt, subctxt: fd->subctxt, |
313 | i: (u16 *)&info); |
314 | if (info.comp_idx >= hfi1_sdma_comp_ring_size) { |
315 | hfi1_cdbg(SDMA, |
316 | "[%u:%u:%u:%u] Invalid comp index" , |
317 | dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); |
318 | return -EINVAL; |
319 | } |
320 | |
321 | /* |
322 | * Sanity check the header io vector count. Need at least 1 vector |
323 | * (header) and cannot be larger than the actual io vector count. |
324 | */ |
325 | if (req_iovcnt(info.ctrl) < 1 || req_iovcnt(info.ctrl) > dim) { |
326 | hfi1_cdbg(SDMA, |
327 | "[%u:%u:%u:%u] Invalid iov count %d, dim %ld" , |
328 | dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx, |
329 | req_iovcnt(info.ctrl), dim); |
330 | return -EINVAL; |
331 | } |
332 | |
333 | if (!info.fragsize) { |
334 | hfi1_cdbg(SDMA, |
335 | "[%u:%u:%u:%u] Request does not specify fragsize" , |
336 | dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); |
337 | return -EINVAL; |
338 | } |
339 | |
340 | /* Try to claim the request. */ |
341 | if (test_and_set_bit(nr: info.comp_idx, addr: pq->req_in_use)) { |
342 | hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in use" , |
343 | dd->unit, uctxt->ctxt, fd->subctxt, |
344 | info.comp_idx); |
345 | return -EBADSLT; |
346 | } |
347 | /* |
348 | * All safety checks have been done and this request has been claimed. |
349 | */ |
350 | trace_hfi1_sdma_user_process_request(dd, ctxt: uctxt->ctxt, subctxt: fd->subctxt, |
351 | comp_idx: info.comp_idx); |
352 | req = pq->reqs + info.comp_idx; |
353 | req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */ |
354 | req->data_len = 0; |
355 | req->pq = pq; |
356 | req->cq = cq; |
357 | req->ahg_idx = -1; |
358 | req->iov_idx = 0; |
359 | req->sent = 0; |
360 | req->seqnum = 0; |
361 | req->seqcomp = 0; |
362 | req->seqsubmitted = 0; |
363 | req->tids = NULL; |
364 | req->has_error = 0; |
365 | INIT_LIST_HEAD(list: &req->txps); |
366 | |
367 | memcpy(&req->info, &info, sizeof(info)); |
368 | |
369 | /* The request is initialized, count it */ |
370 | atomic_inc(v: &pq->n_reqs); |
371 | |
372 | if (req_opcode(info.ctrl) == EXPECTED) { |
373 | /* expected must have a TID info and at least one data vector */ |
374 | if (req->data_iovs < 2) { |
375 | SDMA_DBG(req, |
376 | "Not enough vectors for expected request" ); |
377 | ret = -EINVAL; |
378 | goto free_req; |
379 | } |
380 | req->data_iovs--; |
381 | } |
382 | |
383 | if (!info.npkts || req->data_iovs > MAX_VECTORS_PER_REQ) { |
384 | SDMA_DBG(req, "Too many vectors (%u/%u)" , req->data_iovs, |
385 | MAX_VECTORS_PER_REQ); |
386 | ret = -EINVAL; |
387 | goto free_req; |
388 | } |
389 | |
390 | /* Copy the header from the user buffer */ |
391 | ret = copy_from_user(to: &req->hdr, from: iovec[idx].iov_base + sizeof(info), |
392 | n: sizeof(req->hdr)); |
393 | if (ret) { |
394 | SDMA_DBG(req, "Failed to copy header template (%d)" , ret); |
395 | ret = -EFAULT; |
396 | goto free_req; |
397 | } |
398 | |
399 | /* If Static rate control is not enabled, sanitize the header. */ |
400 | if (!HFI1_CAP_IS_USET(STATIC_RATE_CTRL)) |
401 | req->hdr.pbc[2] = 0; |
402 | |
403 | /* Validate the opcode. Do not trust packets from user space blindly. */ |
404 | opcode = (be32_to_cpu(req->hdr.bth[0]) >> 24) & 0xff; |
405 | if ((opcode & USER_OPCODE_CHECK_MASK) != |
406 | USER_OPCODE_CHECK_VAL) { |
407 | SDMA_DBG(req, "Invalid opcode (%d)" , opcode); |
408 | ret = -EINVAL; |
409 | goto free_req; |
410 | } |
411 | /* |
412 | * Validate the vl. Do not trust packets from user space blindly. |
413 | * VL comes from PBC, SC comes from LRH, and the VL needs to |
414 | * match the SC look up. |
415 | */ |
416 | vl = (le16_to_cpu(req->hdr.pbc[0]) >> 12) & 0xF; |
417 | sc = (((be16_to_cpu(req->hdr.lrh[0]) >> 12) & 0xF) | |
418 | (((le16_to_cpu(req->hdr.pbc[1]) >> 14) & 0x1) << 4)); |
419 | if (vl >= dd->pport->vls_operational || |
420 | vl != sc_to_vlt(dd, sc5: sc)) { |
421 | SDMA_DBG(req, "Invalid SC(%u)/VL(%u)" , sc, vl); |
422 | ret = -EINVAL; |
423 | goto free_req; |
424 | } |
425 | |
426 | /* Checking P_KEY for requests from user-space */ |
427 | pkey = (u16)be32_to_cpu(req->hdr.bth[0]); |
428 | slid = be16_to_cpu(req->hdr.lrh[3]); |
429 | if (egress_pkey_check(ppd: dd->pport, slid, pkey, sc5: sc, PKEY_CHECK_INVALID)) { |
430 | ret = -EINVAL; |
431 | goto free_req; |
432 | } |
433 | |
434 | /* |
435 | * Also should check the BTH.lnh. If it says the next header is GRH then |
436 | * the RXE parsing will be off and will land in the middle of the KDETH |
437 | * or miss it entirely. |
438 | */ |
439 | if ((be16_to_cpu(req->hdr.lrh[0]) & 0x3) == HFI1_LRH_GRH) { |
440 | SDMA_DBG(req, "User tried to pass in a GRH" ); |
441 | ret = -EINVAL; |
442 | goto free_req; |
443 | } |
444 | |
445 | req->koffset = le32_to_cpu(req->hdr.kdeth.swdata[6]); |
446 | /* |
447 | * Calculate the initial TID offset based on the values of |
448 | * KDETH.OFFSET and KDETH.OM that are passed in. |
449 | */ |
450 | req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) * |
451 | (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ? |
452 | KDETH_OM_LARGE : KDETH_OM_SMALL); |
453 | trace_hfi1_sdma_user_initial_tidoffset(dd, ctxt: uctxt->ctxt, subctxt: fd->subctxt, |
454 | comp_idx: info.comp_idx, tidoffset: req->tidoffset); |
455 | idx++; |
456 | |
457 | /* Save all the IO vector structures */ |
458 | for (i = 0; i < req->data_iovs; i++) { |
459 | req->iovs[i].offset = 0; |
460 | INIT_LIST_HEAD(list: &req->iovs[i].list); |
461 | memcpy(&req->iovs[i].iov, |
462 | iovec + idx++, |
463 | sizeof(req->iovs[i].iov)); |
464 | if (req->iovs[i].iov.iov_len == 0) { |
465 | ret = -EINVAL; |
466 | goto free_req; |
467 | } |
468 | req->data_len += req->iovs[i].iov.iov_len; |
469 | } |
470 | trace_hfi1_sdma_user_data_length(dd, ctxt: uctxt->ctxt, subctxt: fd->subctxt, |
471 | comp_idx: info.comp_idx, data_len: req->data_len); |
472 | if (pcount > req->info.npkts) |
473 | pcount = req->info.npkts; |
474 | /* |
475 | * Copy any TID info |
476 | * User space will provide the TID info only when the |
477 | * request type is EXPECTED. This is true even if there is |
478 | * only one packet in the request and the header is already |
479 | * setup. The reason for the singular TID case is that the |
480 | * driver needs to perform safety checks. |
481 | */ |
482 | if (req_opcode(req->info.ctrl) == EXPECTED) { |
483 | u16 ntids = iovec[idx].iov_len / sizeof(*req->tids); |
484 | u32 *tmp; |
485 | |
486 | if (!ntids || ntids > MAX_TID_PAIR_ENTRIES) { |
487 | ret = -EINVAL; |
488 | goto free_req; |
489 | } |
490 | |
491 | /* |
492 | * We have to copy all of the tids because they may vary |
493 | * in size and, therefore, the TID count might not be |
494 | * equal to the pkt count. However, there is no way to |
495 | * tell at this point. |
496 | */ |
497 | tmp = memdup_array_user(src: iovec[idx].iov_base, |
498 | n: ntids, size: sizeof(*req->tids)); |
499 | if (IS_ERR(ptr: tmp)) { |
500 | ret = PTR_ERR(ptr: tmp); |
501 | SDMA_DBG(req, "Failed to copy %d TIDs (%d)" , |
502 | ntids, ret); |
503 | goto free_req; |
504 | } |
505 | req->tids = tmp; |
506 | req->n_tids = ntids; |
507 | req->tididx = 0; |
508 | idx++; |
509 | } |
510 | |
511 | dlid = be16_to_cpu(req->hdr.lrh[1]); |
512 | selector = dlid_to_selector(dlid); |
513 | selector += uctxt->ctxt + fd->subctxt; |
514 | req->sde = sdma_select_user_engine(dd, selector, vl); |
515 | |
516 | if (!req->sde || !sdma_running(engine: req->sde)) { |
517 | ret = -ECOMM; |
518 | goto free_req; |
519 | } |
520 | |
521 | /* We don't need an AHG entry if the request contains only one packet */ |
522 | if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG)) |
523 | req->ahg_idx = sdma_ahg_alloc(sde: req->sde); |
524 | |
525 | set_comp_state(pq, cq, idx: info.comp_idx, state: QUEUED, ret: 0); |
526 | pq->state = SDMA_PKT_Q_ACTIVE; |
527 | |
528 | /* |
529 | * This is a somewhat blocking send implementation. |
530 | * The driver will block the caller until all packets of the |
531 | * request have been submitted to the SDMA engine. However, it |
532 | * will not wait for send completions. |
533 | */ |
534 | while (req->seqsubmitted != req->info.npkts) { |
535 | ret = user_sdma_send_pkts(req, maxpkts: pcount); |
536 | if (ret < 0) { |
537 | int we_ret; |
538 | |
539 | if (ret != -EBUSY) |
540 | goto free_req; |
541 | we_ret = wait_event_interruptible_timeout( |
542 | pq->busy.wait_dma, |
543 | pq->state == SDMA_PKT_Q_ACTIVE, |
544 | msecs_to_jiffies( |
545 | SDMA_IOWAIT_TIMEOUT)); |
546 | trace_hfi1_usdma_we(pq, we_ret); |
547 | if (we_ret <= 0) |
548 | flush_pq_iowait(pq); |
549 | } |
550 | } |
551 | *count += idx; |
552 | return 0; |
553 | free_req: |
554 | /* |
555 | * If the submitted seqsubmitted == npkts, the completion routine |
556 | * controls the final state. If sequbmitted < npkts, wait for any |
557 | * outstanding packets to finish before cleaning up. |
558 | */ |
559 | if (req->seqsubmitted < req->info.npkts) { |
560 | if (req->seqsubmitted) |
561 | wait_event(pq->busy.wait_dma, |
562 | (req->seqcomp == req->seqsubmitted - 1)); |
563 | user_sdma_free_request(req); |
564 | pq_update(pq); |
565 | set_comp_state(pq, cq, idx: info.comp_idx, state: ERROR, ret); |
566 | } |
567 | return ret; |
568 | } |
569 | |
570 | static inline u32 compute_data_length(struct user_sdma_request *req, |
571 | struct user_sdma_txreq *tx) |
572 | { |
573 | /* |
574 | * Determine the proper size of the packet data. |
575 | * The size of the data of the first packet is in the header |
576 | * template. However, it includes the header and ICRC, which need |
577 | * to be subtracted. |
578 | * The minimum representable packet data length in a header is 4 bytes, |
579 | * therefore, when the data length request is less than 4 bytes, there's |
580 | * only one packet, and the packet data length is equal to that of the |
581 | * request data length. |
582 | * The size of the remaining packets is the minimum of the frag |
583 | * size (MTU) or remaining data in the request. |
584 | */ |
585 | u32 len; |
586 | |
587 | if (!req->seqnum) { |
588 | if (req->data_len < sizeof(u32)) |
589 | len = req->data_len; |
590 | else |
591 | len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) - |
592 | (sizeof(tx->hdr) - 4)); |
593 | } else if (req_opcode(req->info.ctrl) == EXPECTED) { |
594 | u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) * |
595 | PAGE_SIZE; |
596 | /* |
597 | * Get the data length based on the remaining space in the |
598 | * TID pair. |
599 | */ |
600 | len = min(tidlen - req->tidoffset, (u32)req->info.fragsize); |
601 | /* If we've filled up the TID pair, move to the next one. */ |
602 | if (unlikely(!len) && ++req->tididx < req->n_tids && |
603 | req->tids[req->tididx]) { |
604 | tidlen = EXP_TID_GET(req->tids[req->tididx], |
605 | LEN) * PAGE_SIZE; |
606 | req->tidoffset = 0; |
607 | len = min_t(u32, tidlen, req->info.fragsize); |
608 | } |
609 | /* |
610 | * Since the TID pairs map entire pages, make sure that we |
611 | * are not going to try to send more data that we have |
612 | * remaining. |
613 | */ |
614 | len = min(len, req->data_len - req->sent); |
615 | } else { |
616 | len = min(req->data_len - req->sent, (u32)req->info.fragsize); |
617 | } |
618 | trace_hfi1_sdma_user_compute_length(dd: req->pq->dd, |
619 | ctxt: req->pq->ctxt, |
620 | subctxt: req->pq->subctxt, |
621 | comp_idx: req->info.comp_idx, |
622 | data_len: len); |
623 | return len; |
624 | } |
625 | |
626 | static inline u32 pad_len(u32 len) |
627 | { |
628 | if (len & (sizeof(u32) - 1)) |
629 | len += sizeof(u32) - (len & (sizeof(u32) - 1)); |
630 | return len; |
631 | } |
632 | |
633 | static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len) |
634 | { |
635 | /* (Size of complete header - size of PBC) + 4B ICRC + data length */ |
636 | return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len); |
637 | } |
638 | |
639 | static int user_sdma_txadd_ahg(struct user_sdma_request *req, |
640 | struct user_sdma_txreq *tx, |
641 | u32 datalen) |
642 | { |
643 | int ret; |
644 | u16 pbclen = le16_to_cpu(req->hdr.pbc[0]); |
645 | u32 lrhlen = get_lrh_len(hdr: req->hdr, len: pad_len(len: datalen)); |
646 | struct hfi1_user_sdma_pkt_q *pq = req->pq; |
647 | |
648 | /* |
649 | * Copy the request header into the tx header |
650 | * because the HW needs a cacheline-aligned |
651 | * address. |
652 | * This copy can be optimized out if the hdr |
653 | * member of user_sdma_request were also |
654 | * cacheline aligned. |
655 | */ |
656 | memcpy(&tx->hdr, &req->hdr, sizeof(tx->hdr)); |
657 | if (PBC2LRH(pbclen) != lrhlen) { |
658 | pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen); |
659 | tx->hdr.pbc[0] = cpu_to_le16(pbclen); |
660 | } |
661 | ret = check_header_template(req, hdr: &tx->hdr, lrhlen, datalen); |
662 | if (ret) |
663 | return ret; |
664 | ret = sdma_txinit_ahg(tx: &tx->txreq, SDMA_TXREQ_F_AHG_COPY, |
665 | tlen: sizeof(tx->hdr) + datalen, ahg_entry: req->ahg_idx, |
666 | num_ahg: 0, NULL, ahg_hlen: 0, cb: user_sdma_txreq_cb); |
667 | if (ret) |
668 | return ret; |
669 | ret = sdma_txadd_kvaddr(dd: pq->dd, tx: &tx->txreq, kvaddr: &tx->hdr, len: sizeof(tx->hdr)); |
670 | if (ret) |
671 | sdma_txclean(dd: pq->dd, tx: &tx->txreq); |
672 | return ret; |
673 | } |
674 | |
675 | static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts) |
676 | { |
677 | int ret = 0; |
678 | u16 count; |
679 | unsigned npkts = 0; |
680 | struct user_sdma_txreq *tx = NULL; |
681 | struct hfi1_user_sdma_pkt_q *pq = NULL; |
682 | struct user_sdma_iovec *iovec = NULL; |
683 | |
684 | if (!req->pq) |
685 | return -EINVAL; |
686 | |
687 | pq = req->pq; |
688 | |
689 | /* If tx completion has reported an error, we are done. */ |
690 | if (READ_ONCE(req->has_error)) |
691 | return -EFAULT; |
692 | |
693 | /* |
694 | * Check if we might have sent the entire request already |
695 | */ |
696 | if (unlikely(req->seqnum == req->info.npkts)) { |
697 | if (!list_empty(head: &req->txps)) |
698 | goto dosend; |
699 | return ret; |
700 | } |
701 | |
702 | if (!maxpkts || maxpkts > req->info.npkts - req->seqnum) |
703 | maxpkts = req->info.npkts - req->seqnum; |
704 | |
705 | while (npkts < maxpkts) { |
706 | u32 datalen = 0; |
707 | |
708 | /* |
709 | * Check whether any of the completions have come back |
710 | * with errors. If so, we are not going to process any |
711 | * more packets from this request. |
712 | */ |
713 | if (READ_ONCE(req->has_error)) |
714 | return -EFAULT; |
715 | |
716 | tx = kmem_cache_alloc(cachep: pq->txreq_cache, GFP_KERNEL); |
717 | if (!tx) |
718 | return -ENOMEM; |
719 | |
720 | tx->flags = 0; |
721 | tx->req = req; |
722 | INIT_LIST_HEAD(list: &tx->list); |
723 | |
724 | /* |
725 | * For the last packet set the ACK request |
726 | * and disable header suppression. |
727 | */ |
728 | if (req->seqnum == req->info.npkts - 1) |
729 | tx->flags |= (TXREQ_FLAGS_REQ_ACK | |
730 | TXREQ_FLAGS_REQ_DISABLE_SH); |
731 | |
732 | /* |
733 | * Calculate the payload size - this is min of the fragment |
734 | * (MTU) size or the remaining bytes in the request but only |
735 | * if we have payload data. |
736 | */ |
737 | if (req->data_len) { |
738 | iovec = &req->iovs[req->iov_idx]; |
739 | if (READ_ONCE(iovec->offset) == iovec->iov.iov_len) { |
740 | if (++req->iov_idx == req->data_iovs) { |
741 | ret = -EFAULT; |
742 | goto free_tx; |
743 | } |
744 | iovec = &req->iovs[req->iov_idx]; |
745 | WARN_ON(iovec->offset); |
746 | } |
747 | |
748 | datalen = compute_data_length(req, tx); |
749 | |
750 | /* |
751 | * Disable header suppression for the payload <= 8DWS. |
752 | * If there is an uncorrectable error in the receive |
753 | * data FIFO when the received payload size is less than |
754 | * or equal to 8DWS then the RxDmaDataFifoRdUncErr is |
755 | * not reported.There is set RHF.EccErr if the header |
756 | * is not suppressed. |
757 | */ |
758 | if (!datalen) { |
759 | SDMA_DBG(req, |
760 | "Request has data but pkt len is 0" ); |
761 | ret = -EFAULT; |
762 | goto free_tx; |
763 | } else if (datalen <= 32) { |
764 | tx->flags |= TXREQ_FLAGS_REQ_DISABLE_SH; |
765 | } |
766 | } |
767 | |
768 | if (req->ahg_idx >= 0) { |
769 | if (!req->seqnum) { |
770 | ret = user_sdma_txadd_ahg(req, tx, datalen); |
771 | if (ret) |
772 | goto free_tx; |
773 | } else { |
774 | int changes; |
775 | |
776 | changes = set_txreq_header_ahg(req, tx, |
777 | len: datalen); |
778 | if (changes < 0) { |
779 | ret = changes; |
780 | goto free_tx; |
781 | } |
782 | } |
783 | } else { |
784 | ret = sdma_txinit(tx: &tx->txreq, flags: 0, tlen: sizeof(req->hdr) + |
785 | datalen, cb: user_sdma_txreq_cb); |
786 | if (ret) |
787 | goto free_tx; |
788 | /* |
789 | * Modify the header for this packet. This only needs |
790 | * to be done if we are not going to use AHG. Otherwise, |
791 | * the HW will do it based on the changes we gave it |
792 | * during sdma_txinit_ahg(). |
793 | */ |
794 | ret = set_txreq_header(req, tx, datalen); |
795 | if (ret) |
796 | goto free_txreq; |
797 | } |
798 | |
799 | req->koffset += datalen; |
800 | if (req_opcode(req->info.ctrl) == EXPECTED) |
801 | req->tidoffset += datalen; |
802 | req->sent += datalen; |
803 | while (datalen) { |
804 | ret = hfi1_add_pages_to_sdma_packet(req, tx, iovec, |
805 | pkt_data_remaining: &datalen); |
806 | if (ret) |
807 | goto free_txreq; |
808 | iovec = &req->iovs[req->iov_idx]; |
809 | } |
810 | list_add_tail(new: &tx->txreq.list, head: &req->txps); |
811 | /* |
812 | * It is important to increment this here as it is used to |
813 | * generate the BTH.PSN and, therefore, can't be bulk-updated |
814 | * outside of the loop. |
815 | */ |
816 | tx->seqnum = req->seqnum++; |
817 | npkts++; |
818 | } |
819 | dosend: |
820 | ret = sdma_send_txlist(sde: req->sde, |
821 | wait: iowait_get_ib_work(w: &pq->busy), |
822 | tx_list: &req->txps, count_out: &count); |
823 | req->seqsubmitted += count; |
824 | if (req->seqsubmitted == req->info.npkts) { |
825 | /* |
826 | * The txreq has already been submitted to the HW queue |
827 | * so we can free the AHG entry now. Corruption will not |
828 | * happen due to the sequential manner in which |
829 | * descriptors are processed. |
830 | */ |
831 | if (req->ahg_idx >= 0) |
832 | sdma_ahg_free(sde: req->sde, ahg_index: req->ahg_idx); |
833 | } |
834 | return ret; |
835 | |
836 | free_txreq: |
837 | sdma_txclean(dd: pq->dd, tx: &tx->txreq); |
838 | free_tx: |
839 | kmem_cache_free(s: pq->txreq_cache, objp: tx); |
840 | return ret; |
841 | } |
842 | |
843 | static int (struct user_sdma_request *req, |
844 | struct hfi1_pkt_header *hdr, u32 lrhlen, |
845 | u32 datalen) |
846 | { |
847 | /* |
848 | * Perform safety checks for any type of packet: |
849 | * - transfer size is multiple of 64bytes |
850 | * - packet length is multiple of 4 bytes |
851 | * - packet length is not larger than MTU size |
852 | * |
853 | * These checks are only done for the first packet of the |
854 | * transfer since the header is "given" to us by user space. |
855 | * For the remainder of the packets we compute the values. |
856 | */ |
857 | if (req->info.fragsize % PIO_BLOCK_SIZE || lrhlen & 0x3 || |
858 | lrhlen > get_lrh_len(hdr: *hdr, len: req->info.fragsize)) |
859 | return -EINVAL; |
860 | |
861 | if (req_opcode(req->info.ctrl) == EXPECTED) { |
862 | /* |
863 | * The header is checked only on the first packet. Furthermore, |
864 | * we ensure that at least one TID entry is copied when the |
865 | * request is submitted. Therefore, we don't have to verify that |
866 | * tididx points to something sane. |
867 | */ |
868 | u32 tidval = req->tids[req->tididx], |
869 | tidlen = EXP_TID_GET(tidval, LEN) * PAGE_SIZE, |
870 | tididx = EXP_TID_GET(tidval, IDX), |
871 | tidctrl = EXP_TID_GET(tidval, CTRL), |
872 | tidoff; |
873 | __le32 kval = hdr->kdeth.ver_tid_offset; |
874 | |
875 | tidoff = KDETH_GET(kval, OFFSET) * |
876 | (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ? |
877 | KDETH_OM_LARGE : KDETH_OM_SMALL); |
878 | /* |
879 | * Expected receive packets have the following |
880 | * additional checks: |
881 | * - offset is not larger than the TID size |
882 | * - TIDCtrl values match between header and TID array |
883 | * - TID indexes match between header and TID array |
884 | */ |
885 | if ((tidoff + datalen > tidlen) || |
886 | KDETH_GET(kval, TIDCTRL) != tidctrl || |
887 | KDETH_GET(kval, TID) != tididx) |
888 | return -EINVAL; |
889 | } |
890 | return 0; |
891 | } |
892 | |
893 | /* |
894 | * Correctly set the BTH.PSN field based on type of |
895 | * transfer - eager packets can just increment the PSN but |
896 | * expected packets encode generation and sequence in the |
897 | * BTH.PSN field so just incrementing will result in errors. |
898 | */ |
899 | static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags) |
900 | { |
901 | u32 val = be32_to_cpu(bthpsn), |
902 | mask = (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffffull : |
903 | 0xffffffull), |
904 | psn = val & mask; |
905 | if (expct) |
906 | psn = (psn & ~HFI1_KDETH_BTH_SEQ_MASK) | |
907 | ((psn + frags) & HFI1_KDETH_BTH_SEQ_MASK); |
908 | else |
909 | psn = psn + frags; |
910 | return psn & mask; |
911 | } |
912 | |
913 | static int (struct user_sdma_request *req, |
914 | struct user_sdma_txreq *tx, u32 datalen) |
915 | { |
916 | struct hfi1_user_sdma_pkt_q *pq = req->pq; |
917 | struct hfi1_pkt_header *hdr = &tx->hdr; |
918 | u8 omfactor; /* KDETH.OM */ |
919 | u16 pbclen; |
920 | int ret; |
921 | u32 tidval = 0, lrhlen = get_lrh_len(hdr: *hdr, len: pad_len(len: datalen)); |
922 | |
923 | /* Copy the header template to the request before modification */ |
924 | memcpy(hdr, &req->hdr, sizeof(*hdr)); |
925 | |
926 | /* |
927 | * Check if the PBC and LRH length are mismatched. If so |
928 | * adjust both in the header. |
929 | */ |
930 | pbclen = le16_to_cpu(hdr->pbc[0]); |
931 | if (PBC2LRH(pbclen) != lrhlen) { |
932 | pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen); |
933 | hdr->pbc[0] = cpu_to_le16(pbclen); |
934 | hdr->lrh[2] = cpu_to_be16(lrhlen >> 2); |
935 | /* |
936 | * Third packet |
937 | * This is the first packet in the sequence that has |
938 | * a "static" size that can be used for the rest of |
939 | * the packets (besides the last one). |
940 | */ |
941 | if (unlikely(req->seqnum == 2)) { |
942 | /* |
943 | * From this point on the lengths in both the |
944 | * PBC and LRH are the same until the last |
945 | * packet. |
946 | * Adjust the template so we don't have to update |
947 | * every packet |
948 | */ |
949 | req->hdr.pbc[0] = hdr->pbc[0]; |
950 | req->hdr.lrh[2] = hdr->lrh[2]; |
951 | } |
952 | } |
953 | /* |
954 | * We only have to modify the header if this is not the |
955 | * first packet in the request. Otherwise, we use the |
956 | * header given to us. |
957 | */ |
958 | if (unlikely(!req->seqnum)) { |
959 | ret = check_header_template(req, hdr, lrhlen, datalen); |
960 | if (ret) |
961 | return ret; |
962 | goto done; |
963 | } |
964 | |
965 | hdr->bth[2] = cpu_to_be32( |
966 | set_pkt_bth_psn(hdr->bth[2], |
967 | (req_opcode(req->info.ctrl) == EXPECTED), |
968 | req->seqnum)); |
969 | |
970 | /* Set ACK request on last packet */ |
971 | if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) |
972 | hdr->bth[2] |= cpu_to_be32(1UL << 31); |
973 | |
974 | /* Set the new offset */ |
975 | hdr->kdeth.swdata[6] = cpu_to_le32(req->koffset); |
976 | /* Expected packets have to fill in the new TID information */ |
977 | if (req_opcode(req->info.ctrl) == EXPECTED) { |
978 | tidval = req->tids[req->tididx]; |
979 | /* |
980 | * If the offset puts us at the end of the current TID, |
981 | * advance everything. |
982 | */ |
983 | if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) * |
984 | PAGE_SIZE)) { |
985 | req->tidoffset = 0; |
986 | /* |
987 | * Since we don't copy all the TIDs, all at once, |
988 | * we have to check again. |
989 | */ |
990 | if (++req->tididx > req->n_tids - 1 || |
991 | !req->tids[req->tididx]) { |
992 | return -EINVAL; |
993 | } |
994 | tidval = req->tids[req->tididx]; |
995 | } |
996 | omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >= |
997 | KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT : |
998 | KDETH_OM_SMALL_SHIFT; |
999 | /* Set KDETH.TIDCtrl based on value for this TID. */ |
1000 | KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL, |
1001 | EXP_TID_GET(tidval, CTRL)); |
1002 | /* Set KDETH.TID based on value for this TID */ |
1003 | KDETH_SET(hdr->kdeth.ver_tid_offset, TID, |
1004 | EXP_TID_GET(tidval, IDX)); |
1005 | /* Clear KDETH.SH when DISABLE_SH flag is set */ |
1006 | if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) |
1007 | KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0); |
1008 | /* |
1009 | * Set the KDETH.OFFSET and KDETH.OM based on size of |
1010 | * transfer. |
1011 | */ |
1012 | trace_hfi1_sdma_user_tid_info( |
1013 | dd: pq->dd, ctxt: pq->ctxt, subctxt: pq->subctxt, comp_idx: req->info.comp_idx, |
1014 | tidoffset: req->tidoffset, units: req->tidoffset >> omfactor, |
1015 | shift: omfactor != KDETH_OM_SMALL_SHIFT); |
1016 | KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET, |
1017 | req->tidoffset >> omfactor); |
1018 | KDETH_SET(hdr->kdeth.ver_tid_offset, OM, |
1019 | omfactor != KDETH_OM_SMALL_SHIFT); |
1020 | } |
1021 | done: |
1022 | trace_hfi1_sdma_user_header(dd: pq->dd, ctxt: pq->ctxt, subctxt: pq->subctxt, |
1023 | req: req->info.comp_idx, hdr, tidval); |
1024 | return sdma_txadd_kvaddr(dd: pq->dd, tx: &tx->txreq, kvaddr: hdr, len: sizeof(*hdr)); |
1025 | } |
1026 | |
1027 | static int (struct user_sdma_request *req, |
1028 | struct user_sdma_txreq *tx, u32 datalen) |
1029 | { |
1030 | u32 ahg[AHG_KDETH_ARRAY_SIZE]; |
1031 | int idx = 0; |
1032 | u8 omfactor; /* KDETH.OM */ |
1033 | struct hfi1_user_sdma_pkt_q *pq = req->pq; |
1034 | struct hfi1_pkt_header *hdr = &req->hdr; |
1035 | u16 pbclen = le16_to_cpu(hdr->pbc[0]); |
1036 | u32 val32, tidval = 0, lrhlen = get_lrh_len(hdr: *hdr, len: pad_len(len: datalen)); |
1037 | size_t array_size = ARRAY_SIZE(ahg); |
1038 | |
1039 | if (PBC2LRH(pbclen) != lrhlen) { |
1040 | /* PBC.PbcLengthDWs */ |
1041 | idx = ahg_header_set(arr: ahg, idx, array_size, dw: 0, bit: 0, width: 12, |
1042 | value: (__force u16)cpu_to_le16(LRH2PBC(lrhlen))); |
1043 | if (idx < 0) |
1044 | return idx; |
1045 | /* LRH.PktLen (we need the full 16 bits due to byte swap) */ |
1046 | idx = ahg_header_set(arr: ahg, idx, array_size, dw: 3, bit: 0, width: 16, |
1047 | value: (__force u16)cpu_to_be16(lrhlen >> 2)); |
1048 | if (idx < 0) |
1049 | return idx; |
1050 | } |
1051 | |
1052 | /* |
1053 | * Do the common updates |
1054 | */ |
1055 | /* BTH.PSN and BTH.A */ |
1056 | val32 = (be32_to_cpu(hdr->bth[2]) + req->seqnum) & |
1057 | (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff); |
1058 | if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) |
1059 | val32 |= 1UL << 31; |
1060 | idx = ahg_header_set(arr: ahg, idx, array_size, dw: 6, bit: 0, width: 16, |
1061 | value: (__force u16)cpu_to_be16(val32 >> 16)); |
1062 | if (idx < 0) |
1063 | return idx; |
1064 | idx = ahg_header_set(arr: ahg, idx, array_size, dw: 6, bit: 16, width: 16, |
1065 | value: (__force u16)cpu_to_be16(val32 & 0xffff)); |
1066 | if (idx < 0) |
1067 | return idx; |
1068 | /* KDETH.Offset */ |
1069 | idx = ahg_header_set(arr: ahg, idx, array_size, dw: 15, bit: 0, width: 16, |
1070 | value: (__force u16)cpu_to_le16(req->koffset & 0xffff)); |
1071 | if (idx < 0) |
1072 | return idx; |
1073 | idx = ahg_header_set(arr: ahg, idx, array_size, dw: 15, bit: 16, width: 16, |
1074 | value: (__force u16)cpu_to_le16(req->koffset >> 16)); |
1075 | if (idx < 0) |
1076 | return idx; |
1077 | if (req_opcode(req->info.ctrl) == EXPECTED) { |
1078 | __le16 val; |
1079 | |
1080 | tidval = req->tids[req->tididx]; |
1081 | |
1082 | /* |
1083 | * If the offset puts us at the end of the current TID, |
1084 | * advance everything. |
1085 | */ |
1086 | if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) * |
1087 | PAGE_SIZE)) { |
1088 | req->tidoffset = 0; |
1089 | /* |
1090 | * Since we don't copy all the TIDs, all at once, |
1091 | * we have to check again. |
1092 | */ |
1093 | if (++req->tididx > req->n_tids - 1 || |
1094 | !req->tids[req->tididx]) |
1095 | return -EINVAL; |
1096 | tidval = req->tids[req->tididx]; |
1097 | } |
1098 | omfactor = ((EXP_TID_GET(tidval, LEN) * |
1099 | PAGE_SIZE) >= |
1100 | KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT : |
1101 | KDETH_OM_SMALL_SHIFT; |
1102 | /* KDETH.OM and KDETH.OFFSET (TID) */ |
1103 | idx = ahg_header_set( |
1104 | arr: ahg, idx, array_size, dw: 7, bit: 0, width: 16, |
1105 | value: ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 | |
1106 | ((req->tidoffset >> omfactor) |
1107 | & 0x7fff))); |
1108 | if (idx < 0) |
1109 | return idx; |
1110 | /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */ |
1111 | val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) | |
1112 | (EXP_TID_GET(tidval, IDX) & 0x3ff)); |
1113 | |
1114 | if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) { |
1115 | val |= cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset, |
1116 | INTR) << |
1117 | AHG_KDETH_INTR_SHIFT)); |
1118 | } else { |
1119 | val |= KDETH_GET(hdr->kdeth.ver_tid_offset, SH) ? |
1120 | cpu_to_le16(0x1 << AHG_KDETH_SH_SHIFT) : |
1121 | cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset, |
1122 | INTR) << |
1123 | AHG_KDETH_INTR_SHIFT)); |
1124 | } |
1125 | |
1126 | idx = ahg_header_set(arr: ahg, idx, array_size, |
1127 | dw: 7, bit: 16, width: 14, value: (__force u16)val); |
1128 | if (idx < 0) |
1129 | return idx; |
1130 | } |
1131 | |
1132 | trace_hfi1_sdma_user_header_ahg(dd: pq->dd, ctxt: pq->ctxt, subctxt: pq->subctxt, |
1133 | req: req->info.comp_idx, sde: req->sde->this_idx, |
1134 | ahgidx: req->ahg_idx, ahg, len: idx, tidval); |
1135 | sdma_txinit_ahg(tx: &tx->txreq, |
1136 | SDMA_TXREQ_F_USE_AHG, |
1137 | tlen: datalen, ahg_entry: req->ahg_idx, num_ahg: idx, |
1138 | ahg, ahg_hlen: sizeof(req->hdr), |
1139 | cb: user_sdma_txreq_cb); |
1140 | |
1141 | return idx; |
1142 | } |
1143 | |
1144 | /** |
1145 | * user_sdma_txreq_cb() - SDMA tx request completion callback. |
1146 | * @txreq: valid sdma tx request |
1147 | * @status: success/failure of request |
1148 | * |
1149 | * Called when the SDMA progress state machine gets notification that |
1150 | * the SDMA descriptors for this tx request have been processed by the |
1151 | * DMA engine. Called in interrupt context. |
1152 | * Only do work on completed sequences. |
1153 | */ |
1154 | static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) |
1155 | { |
1156 | struct user_sdma_txreq *tx = |
1157 | container_of(txreq, struct user_sdma_txreq, txreq); |
1158 | struct user_sdma_request *req; |
1159 | struct hfi1_user_sdma_pkt_q *pq; |
1160 | struct hfi1_user_sdma_comp_q *cq; |
1161 | enum hfi1_sdma_comp_state state = COMPLETE; |
1162 | |
1163 | if (!tx->req) |
1164 | return; |
1165 | |
1166 | req = tx->req; |
1167 | pq = req->pq; |
1168 | cq = req->cq; |
1169 | |
1170 | if (status != SDMA_TXREQ_S_OK) { |
1171 | SDMA_DBG(req, "SDMA completion with error %d" , |
1172 | status); |
1173 | WRITE_ONCE(req->has_error, 1); |
1174 | state = ERROR; |
1175 | } |
1176 | |
1177 | req->seqcomp = tx->seqnum; |
1178 | kmem_cache_free(s: pq->txreq_cache, objp: tx); |
1179 | |
1180 | /* sequence isn't complete? We are done */ |
1181 | if (req->seqcomp != req->info.npkts - 1) |
1182 | return; |
1183 | |
1184 | user_sdma_free_request(req); |
1185 | set_comp_state(pq, cq, idx: req->info.comp_idx, state, ret: status); |
1186 | pq_update(pq); |
1187 | } |
1188 | |
1189 | static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq) |
1190 | { |
1191 | if (atomic_dec_and_test(v: &pq->n_reqs)) |
1192 | wake_up(&pq->wait); |
1193 | } |
1194 | |
1195 | static void user_sdma_free_request(struct user_sdma_request *req) |
1196 | { |
1197 | if (!list_empty(head: &req->txps)) { |
1198 | struct sdma_txreq *t, *p; |
1199 | |
1200 | list_for_each_entry_safe(t, p, &req->txps, list) { |
1201 | struct user_sdma_txreq *tx = |
1202 | container_of(t, struct user_sdma_txreq, txreq); |
1203 | list_del_init(entry: &t->list); |
1204 | sdma_txclean(dd: req->pq->dd, tx: t); |
1205 | kmem_cache_free(s: req->pq->txreq_cache, objp: tx); |
1206 | } |
1207 | } |
1208 | |
1209 | kfree(objp: req->tids); |
1210 | clear_bit(nr: req->info.comp_idx, addr: req->pq->req_in_use); |
1211 | } |
1212 | |
1213 | static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, |
1214 | struct hfi1_user_sdma_comp_q *cq, |
1215 | u16 idx, enum hfi1_sdma_comp_state state, |
1216 | int ret) |
1217 | { |
1218 | if (state == ERROR) |
1219 | cq->comps[idx].errcode = -ret; |
1220 | smp_wmb(); /* make sure errcode is visible first */ |
1221 | cq->comps[idx].status = state; |
1222 | trace_hfi1_sdma_user_completion(dd: pq->dd, ctxt: pq->ctxt, subctxt: pq->subctxt, |
1223 | idx, state, code: ret); |
1224 | } |
1225 | |