1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
2 | |
3 | /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ |
4 | /* Copyright (c) 2008-2019, IBM Corporation */ |
5 | |
6 | #include <linux/errno.h> |
7 | #include <linux/types.h> |
8 | #include <linux/net.h> |
9 | #include <linux/scatterlist.h> |
10 | #include <linux/highmem.h> |
11 | |
12 | #include <rdma/iw_cm.h> |
13 | #include <rdma/ib_verbs.h> |
14 | |
15 | #include "siw.h" |
16 | #include "siw_verbs.h" |
17 | #include "siw_mem.h" |
18 | |
19 | /* |
20 | * siw_rx_umem() |
21 | * |
22 | * Receive data of @len into target referenced by @dest_addr. |
23 | * |
24 | * @srx: Receive Context |
25 | * @umem: siw representation of target memory |
26 | * @dest_addr: user virtual address |
27 | * @len: number of bytes to place |
28 | */ |
29 | static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem, |
30 | u64 dest_addr, int len) |
31 | { |
32 | int copied = 0; |
33 | |
34 | while (len) { |
35 | struct page *p; |
36 | int pg_off, bytes, rv; |
37 | void *dest; |
38 | |
39 | p = siw_get_upage(umem, addr: dest_addr); |
40 | if (unlikely(!p)) { |
41 | pr_warn("siw: %s: [QP %u]: bogus addr: %pK, %pK\n" , |
42 | __func__, qp_id(rx_qp(srx)), |
43 | (void *)(uintptr_t)dest_addr, |
44 | (void *)(uintptr_t)umem->fp_addr); |
45 | /* siw internal error */ |
46 | srx->skb_copied += copied; |
47 | srx->skb_new -= copied; |
48 | |
49 | return -EFAULT; |
50 | } |
51 | pg_off = dest_addr & ~PAGE_MASK; |
52 | bytes = min(len, (int)PAGE_SIZE - pg_off); |
53 | |
54 | siw_dbg_qp(rx_qp(srx), "page %pK, bytes=%u\n" , p, bytes); |
55 | |
56 | dest = kmap_atomic(page: p); |
57 | rv = skb_copy_bits(skb: srx->skb, offset: srx->skb_offset, to: dest + pg_off, |
58 | len: bytes); |
59 | |
60 | if (unlikely(rv)) { |
61 | kunmap_atomic(dest); |
62 | srx->skb_copied += copied; |
63 | srx->skb_new -= copied; |
64 | |
65 | pr_warn("siw: [QP %u]: %s, len %d, page %p, rv %d\n" , |
66 | qp_id(rx_qp(srx)), __func__, len, p, rv); |
67 | |
68 | return -EFAULT; |
69 | } |
70 | if (srx->mpa_crc_hd) { |
71 | if (rdma_is_kernel_res(res: &rx_qp(srx)->base_qp.res)) { |
72 | crypto_shash_update(desc: srx->mpa_crc_hd, |
73 | data: (u8 *)(dest + pg_off), len: bytes); |
74 | kunmap_atomic(dest); |
75 | } else { |
76 | kunmap_atomic(dest); |
77 | /* |
78 | * Do CRC on original, not target buffer. |
79 | * Some user land applications may |
80 | * concurrently write the target buffer, |
81 | * which would yield a broken CRC. |
82 | * Walking the skb twice is very ineffcient. |
83 | * Folding the CRC into skb_copy_bits() |
84 | * would be much better, but is currently |
85 | * not supported. |
86 | */ |
87 | siw_crc_skb(srx, len: bytes); |
88 | } |
89 | } else { |
90 | kunmap_atomic(dest); |
91 | } |
92 | srx->skb_offset += bytes; |
93 | copied += bytes; |
94 | len -= bytes; |
95 | dest_addr += bytes; |
96 | pg_off = 0; |
97 | } |
98 | srx->skb_copied += copied; |
99 | srx->skb_new -= copied; |
100 | |
101 | return copied; |
102 | } |
103 | |
104 | static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len) |
105 | { |
106 | int rv; |
107 | |
108 | siw_dbg_qp(rx_qp(srx), "kva: 0x%pK, len: %u\n" , kva, len); |
109 | |
110 | rv = skb_copy_bits(skb: srx->skb, offset: srx->skb_offset, to: kva, len); |
111 | if (unlikely(rv)) { |
112 | pr_warn("siw: [QP %u]: %s, len %d, kva 0x%pK, rv %d\n" , |
113 | qp_id(rx_qp(srx)), __func__, len, kva, rv); |
114 | |
115 | return rv; |
116 | } |
117 | if (srx->mpa_crc_hd) |
118 | crypto_shash_update(desc: srx->mpa_crc_hd, data: (u8 *)kva, len); |
119 | |
120 | srx->skb_offset += len; |
121 | srx->skb_copied += len; |
122 | srx->skb_new -= len; |
123 | |
124 | return len; |
125 | } |
126 | |
127 | static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx, |
128 | struct siw_mem *mem, u64 addr, int len) |
129 | { |
130 | struct siw_pbl *pbl = mem->pbl; |
131 | u64 offset = addr - mem->va; |
132 | int copied = 0; |
133 | |
134 | while (len) { |
135 | int bytes; |
136 | dma_addr_t buf_addr = |
137 | siw_pbl_get_buffer(pbl, off: offset, len: &bytes, idx: pbl_idx); |
138 | if (!buf_addr) |
139 | break; |
140 | |
141 | bytes = min(bytes, len); |
142 | if (siw_rx_kva(srx, kva: ib_virt_dma_to_ptr(dma_addr: buf_addr), len: bytes) == |
143 | bytes) { |
144 | copied += bytes; |
145 | offset += bytes; |
146 | len -= bytes; |
147 | } else { |
148 | break; |
149 | } |
150 | } |
151 | return copied; |
152 | } |
153 | |
154 | /* |
155 | * siw_rresp_check_ntoh() |
156 | * |
157 | * Check incoming RRESP fragment header against expected |
158 | * header values and update expected values for potential next |
159 | * fragment. |
160 | * |
161 | * NOTE: This function must be called only if a RRESP DDP segment |
162 | * starts but not for fragmented consecutive pieces of an |
163 | * already started DDP segment. |
164 | */ |
165 | static int siw_rresp_check_ntoh(struct siw_rx_stream *srx, |
166 | struct siw_rx_fpdu *frx) |
167 | { |
168 | struct iwarp_rdma_rresp *rresp = &srx->hdr.rresp; |
169 | struct siw_wqe *wqe = &frx->wqe_active; |
170 | enum ddp_ecode ecode; |
171 | |
172 | u32 sink_stag = be32_to_cpu(rresp->sink_stag); |
173 | u64 sink_to = be64_to_cpu(rresp->sink_to); |
174 | |
175 | if (frx->first_ddp_seg) { |
176 | srx->ddp_stag = wqe->sqe.sge[0].lkey; |
177 | srx->ddp_to = wqe->sqe.sge[0].laddr; |
178 | frx->pbl_idx = 0; |
179 | } |
180 | /* Below checks extend beyond the semantics of DDP, and |
181 | * into RDMAP: |
182 | * We check if the read response matches exactly the |
183 | * read request which was send to the remote peer to |
184 | * trigger this read response. RFC5040/5041 do not |
185 | * always have a proper error code for the detected |
186 | * error cases. We choose 'base or bounds error' for |
187 | * cases where the inbound STag is valid, but offset |
188 | * or length do not match our response receive state. |
189 | */ |
190 | if (unlikely(srx->ddp_stag != sink_stag)) { |
191 | pr_warn("siw: [QP %u]: rresp stag: %08x != %08x\n" , |
192 | qp_id(rx_qp(srx)), sink_stag, srx->ddp_stag); |
193 | ecode = DDP_ECODE_T_INVALID_STAG; |
194 | goto error; |
195 | } |
196 | if (unlikely(srx->ddp_to != sink_to)) { |
197 | pr_warn("siw: [QP %u]: rresp off: %016llx != %016llx\n" , |
198 | qp_id(rx_qp(srx)), (unsigned long long)sink_to, |
199 | (unsigned long long)srx->ddp_to); |
200 | ecode = DDP_ECODE_T_BASE_BOUNDS; |
201 | goto error; |
202 | } |
203 | if (unlikely(!frx->more_ddp_segs && |
204 | (wqe->processed + srx->fpdu_part_rem != wqe->bytes))) { |
205 | pr_warn("siw: [QP %u]: rresp len: %d != %d\n" , |
206 | qp_id(rx_qp(srx)), |
207 | wqe->processed + srx->fpdu_part_rem, wqe->bytes); |
208 | ecode = DDP_ECODE_T_BASE_BOUNDS; |
209 | goto error; |
210 | } |
211 | return 0; |
212 | error: |
213 | siw_init_terminate(rx_qp(srx), layer: TERM_ERROR_LAYER_DDP, |
214 | etype: DDP_ETYPE_TAGGED_BUF, ecode, in_tx: 0); |
215 | return -EINVAL; |
216 | } |
217 | |
218 | /* |
219 | * siw_write_check_ntoh() |
220 | * |
221 | * Check incoming WRITE fragment header against expected |
222 | * header values and update expected values for potential next |
223 | * fragment |
224 | * |
225 | * NOTE: This function must be called only if a WRITE DDP segment |
226 | * starts but not for fragmented consecutive pieces of an |
227 | * already started DDP segment. |
228 | */ |
229 | static int siw_write_check_ntoh(struct siw_rx_stream *srx, |
230 | struct siw_rx_fpdu *frx) |
231 | { |
232 | struct iwarp_rdma_write *write = &srx->hdr.rwrite; |
233 | enum ddp_ecode ecode; |
234 | |
235 | u32 sink_stag = be32_to_cpu(write->sink_stag); |
236 | u64 sink_to = be64_to_cpu(write->sink_to); |
237 | |
238 | if (frx->first_ddp_seg) { |
239 | srx->ddp_stag = sink_stag; |
240 | srx->ddp_to = sink_to; |
241 | frx->pbl_idx = 0; |
242 | } else { |
243 | if (unlikely(srx->ddp_stag != sink_stag)) { |
244 | pr_warn("siw: [QP %u]: write stag: %08x != %08x\n" , |
245 | qp_id(rx_qp(srx)), sink_stag, |
246 | srx->ddp_stag); |
247 | ecode = DDP_ECODE_T_INVALID_STAG; |
248 | goto error; |
249 | } |
250 | if (unlikely(srx->ddp_to != sink_to)) { |
251 | pr_warn("siw: [QP %u]: write off: %016llx != %016llx\n" , |
252 | qp_id(rx_qp(srx)), |
253 | (unsigned long long)sink_to, |
254 | (unsigned long long)srx->ddp_to); |
255 | ecode = DDP_ECODE_T_BASE_BOUNDS; |
256 | goto error; |
257 | } |
258 | } |
259 | return 0; |
260 | error: |
261 | siw_init_terminate(rx_qp(srx), layer: TERM_ERROR_LAYER_DDP, |
262 | etype: DDP_ETYPE_TAGGED_BUF, ecode, in_tx: 0); |
263 | return -EINVAL; |
264 | } |
265 | |
266 | /* |
267 | * siw_send_check_ntoh() |
268 | * |
269 | * Check incoming SEND fragment header against expected |
270 | * header values and update expected MSN if no next |
271 | * fragment expected |
272 | * |
273 | * NOTE: This function must be called only if a SEND DDP segment |
274 | * starts but not for fragmented consecutive pieces of an |
275 | * already started DDP segment. |
276 | */ |
277 | static int siw_send_check_ntoh(struct siw_rx_stream *srx, |
278 | struct siw_rx_fpdu *frx) |
279 | { |
280 | struct iwarp_send_inv *send = &srx->hdr.send_inv; |
281 | struct siw_wqe *wqe = &frx->wqe_active; |
282 | enum ddp_ecode ecode; |
283 | |
284 | u32 ddp_msn = be32_to_cpu(send->ddp_msn); |
285 | u32 ddp_mo = be32_to_cpu(send->ddp_mo); |
286 | u32 ddp_qn = be32_to_cpu(send->ddp_qn); |
287 | |
288 | if (unlikely(ddp_qn != RDMAP_UNTAGGED_QN_SEND)) { |
289 | pr_warn("siw: [QP %u]: invalid ddp qn %d for send\n" , |
290 | qp_id(rx_qp(srx)), ddp_qn); |
291 | ecode = DDP_ECODE_UT_INVALID_QN; |
292 | goto error; |
293 | } |
294 | if (unlikely(ddp_msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND])) { |
295 | pr_warn("siw: [QP %u]: send msn: %u != %u\n" , |
296 | qp_id(rx_qp(srx)), ddp_msn, |
297 | srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]); |
298 | ecode = DDP_ECODE_UT_INVALID_MSN_RANGE; |
299 | goto error; |
300 | } |
301 | if (unlikely(ddp_mo != wqe->processed)) { |
302 | pr_warn("siw: [QP %u], send mo: %u != %u\n" , |
303 | qp_id(rx_qp(srx)), ddp_mo, wqe->processed); |
304 | ecode = DDP_ECODE_UT_INVALID_MO; |
305 | goto error; |
306 | } |
307 | if (frx->first_ddp_seg) { |
308 | /* initialize user memory write position */ |
309 | frx->sge_idx = 0; |
310 | frx->sge_off = 0; |
311 | frx->pbl_idx = 0; |
312 | |
313 | /* only valid for SEND_INV and SEND_SE_INV operations */ |
314 | srx->inval_stag = be32_to_cpu(send->inval_stag); |
315 | } |
316 | if (unlikely(wqe->bytes < wqe->processed + srx->fpdu_part_rem)) { |
317 | siw_dbg_qp(rx_qp(srx), "receive space short: %d - %d < %d\n" , |
318 | wqe->bytes, wqe->processed, srx->fpdu_part_rem); |
319 | wqe->wc_status = SIW_WC_LOC_LEN_ERR; |
320 | ecode = DDP_ECODE_UT_INVALID_MSN_NOBUF; |
321 | goto error; |
322 | } |
323 | return 0; |
324 | error: |
325 | siw_init_terminate(rx_qp(srx), layer: TERM_ERROR_LAYER_DDP, |
326 | etype: DDP_ETYPE_UNTAGGED_BUF, ecode, in_tx: 0); |
327 | return -EINVAL; |
328 | } |
329 | |
330 | static struct siw_wqe *siw_rqe_get(struct siw_qp *qp) |
331 | { |
332 | struct siw_rqe *rqe; |
333 | struct siw_srq *srq; |
334 | struct siw_wqe *wqe = NULL; |
335 | bool srq_event = false; |
336 | unsigned long flags; |
337 | |
338 | srq = qp->srq; |
339 | if (srq) { |
340 | spin_lock_irqsave(&srq->lock, flags); |
341 | if (unlikely(!srq->num_rqe)) |
342 | goto out; |
343 | |
344 | rqe = &srq->recvq[srq->rq_get % srq->num_rqe]; |
345 | } else { |
346 | if (unlikely(!qp->recvq)) |
347 | goto out; |
348 | |
349 | rqe = &qp->recvq[qp->rq_get % qp->attrs.rq_size]; |
350 | } |
351 | if (likely(rqe->flags == SIW_WQE_VALID)) { |
352 | int num_sge = rqe->num_sge; |
353 | |
354 | if (likely(num_sge <= SIW_MAX_SGE)) { |
355 | int i = 0; |
356 | |
357 | wqe = rx_wqe(&qp->rx_untagged); |
358 | rx_type(wqe) = SIW_OP_RECEIVE; |
359 | wqe->wr_status = SIW_WR_INPROGRESS; |
360 | wqe->bytes = 0; |
361 | wqe->processed = 0; |
362 | |
363 | wqe->rqe.id = rqe->id; |
364 | wqe->rqe.num_sge = num_sge; |
365 | |
366 | while (i < num_sge) { |
367 | wqe->rqe.sge[i].laddr = rqe->sge[i].laddr; |
368 | wqe->rqe.sge[i].lkey = rqe->sge[i].lkey; |
369 | wqe->rqe.sge[i].length = rqe->sge[i].length; |
370 | wqe->bytes += wqe->rqe.sge[i].length; |
371 | wqe->mem[i] = NULL; |
372 | i++; |
373 | } |
374 | /* can be re-used by appl */ |
375 | smp_store_mb(rqe->flags, 0); |
376 | } else { |
377 | siw_dbg_qp(qp, "too many sge's: %d\n" , rqe->num_sge); |
378 | if (srq) |
379 | spin_unlock_irqrestore(lock: &srq->lock, flags); |
380 | return NULL; |
381 | } |
382 | if (!srq) { |
383 | qp->rq_get++; |
384 | } else { |
385 | if (srq->armed) { |
386 | /* Test SRQ limit */ |
387 | u32 off = (srq->rq_get + srq->limit) % |
388 | srq->num_rqe; |
389 | struct siw_rqe *rqe2 = &srq->recvq[off]; |
390 | |
391 | if (!(rqe2->flags & SIW_WQE_VALID)) { |
392 | srq->armed = false; |
393 | srq_event = true; |
394 | } |
395 | } |
396 | srq->rq_get++; |
397 | } |
398 | } |
399 | out: |
400 | if (srq) { |
401 | spin_unlock_irqrestore(lock: &srq->lock, flags); |
402 | if (srq_event) |
403 | siw_srq_event(srq, type: IB_EVENT_SRQ_LIMIT_REACHED); |
404 | } |
405 | return wqe; |
406 | } |
407 | |
408 | static int siw_rx_data(struct siw_mem *mem_p, struct siw_rx_stream *srx, |
409 | unsigned int *pbl_idx, u64 addr, int bytes) |
410 | { |
411 | int rv; |
412 | |
413 | if (mem_p->mem_obj == NULL) |
414 | rv = siw_rx_kva(srx, kva: ib_virt_dma_to_ptr(dma_addr: addr), len: bytes); |
415 | else if (!mem_p->is_pbl) |
416 | rv = siw_rx_umem(srx, umem: mem_p->umem, dest_addr: addr, len: bytes); |
417 | else |
418 | rv = siw_rx_pbl(srx, pbl_idx, mem: mem_p, addr, len: bytes); |
419 | return rv; |
420 | } |
421 | |
422 | /* |
423 | * siw_proc_send: |
424 | * |
425 | * Process one incoming SEND and place data into memory referenced by |
426 | * receive wqe. |
427 | * |
428 | * Function supports partially received sends (suspending/resuming |
429 | * current receive wqe processing) |
430 | * |
431 | * return value: |
432 | * 0: reached the end of a DDP segment |
433 | * -EAGAIN: to be called again to finish the DDP segment |
434 | */ |
435 | int siw_proc_send(struct siw_qp *qp) |
436 | { |
437 | struct siw_rx_stream *srx = &qp->rx_stream; |
438 | struct siw_rx_fpdu *frx = &qp->rx_untagged; |
439 | struct siw_wqe *wqe; |
440 | u32 data_bytes; /* all data bytes available */ |
441 | u32 rcvd_bytes; /* sum of data bytes rcvd */ |
442 | int rv = 0; |
443 | |
444 | if (frx->first_ddp_seg) { |
445 | wqe = siw_rqe_get(qp); |
446 | if (unlikely(!wqe)) { |
447 | siw_init_terminate(qp, layer: TERM_ERROR_LAYER_DDP, |
448 | etype: DDP_ETYPE_UNTAGGED_BUF, |
449 | ecode: DDP_ECODE_UT_INVALID_MSN_NOBUF, in_tx: 0); |
450 | return -ENOENT; |
451 | } |
452 | } else { |
453 | wqe = rx_wqe(frx); |
454 | } |
455 | if (srx->state == SIW_GET_DATA_START) { |
456 | rv = siw_send_check_ntoh(srx, frx); |
457 | if (unlikely(rv)) { |
458 | siw_qp_event(qp, type: IB_EVENT_QP_FATAL); |
459 | return rv; |
460 | } |
461 | if (!srx->fpdu_part_rem) /* zero length SEND */ |
462 | return 0; |
463 | } |
464 | data_bytes = min(srx->fpdu_part_rem, srx->skb_new); |
465 | rcvd_bytes = 0; |
466 | |
467 | /* A zero length SEND will skip below loop */ |
468 | while (data_bytes) { |
469 | struct ib_pd *pd; |
470 | struct siw_mem **mem, *mem_p; |
471 | struct siw_sge *sge; |
472 | u32 sge_bytes; /* data bytes avail for SGE */ |
473 | |
474 | sge = &wqe->rqe.sge[frx->sge_idx]; |
475 | |
476 | if (!sge->length) { |
477 | /* just skip empty sge's */ |
478 | frx->sge_idx++; |
479 | frx->sge_off = 0; |
480 | frx->pbl_idx = 0; |
481 | continue; |
482 | } |
483 | sge_bytes = min(data_bytes, sge->length - frx->sge_off); |
484 | mem = &wqe->mem[frx->sge_idx]; |
485 | |
486 | /* |
487 | * check with QP's PD if no SRQ present, SRQ's PD otherwise |
488 | */ |
489 | pd = qp->srq == NULL ? qp->pd : qp->srq->base_srq.pd; |
490 | |
491 | rv = siw_check_sge(pd, sge, mem, perms: IB_ACCESS_LOCAL_WRITE, |
492 | off: frx->sge_off, len: sge_bytes); |
493 | if (unlikely(rv)) { |
494 | siw_init_terminate(qp, layer: TERM_ERROR_LAYER_DDP, |
495 | etype: DDP_ETYPE_CATASTROPHIC, |
496 | ecode: DDP_ECODE_CATASTROPHIC, in_tx: 0); |
497 | |
498 | siw_qp_event(qp, type: IB_EVENT_QP_ACCESS_ERR); |
499 | break; |
500 | } |
501 | mem_p = *mem; |
502 | rv = siw_rx_data(mem_p, srx, pbl_idx: &frx->pbl_idx, |
503 | addr: sge->laddr + frx->sge_off, bytes: sge_bytes); |
504 | if (unlikely(rv != sge_bytes)) { |
505 | wqe->processed += rcvd_bytes; |
506 | |
507 | siw_init_terminate(qp, layer: TERM_ERROR_LAYER_DDP, |
508 | etype: DDP_ETYPE_CATASTROPHIC, |
509 | ecode: DDP_ECODE_CATASTROPHIC, in_tx: 0); |
510 | return -EINVAL; |
511 | } |
512 | frx->sge_off += rv; |
513 | |
514 | if (frx->sge_off == sge->length) { |
515 | frx->sge_idx++; |
516 | frx->sge_off = 0; |
517 | frx->pbl_idx = 0; |
518 | } |
519 | data_bytes -= rv; |
520 | rcvd_bytes += rv; |
521 | |
522 | srx->fpdu_part_rem -= rv; |
523 | srx->fpdu_part_rcvd += rv; |
524 | } |
525 | wqe->processed += rcvd_bytes; |
526 | |
527 | if (!srx->fpdu_part_rem) |
528 | return 0; |
529 | |
530 | return (rv < 0) ? rv : -EAGAIN; |
531 | } |
532 | |
533 | /* |
534 | * siw_proc_write: |
535 | * |
536 | * Place incoming WRITE after referencing and checking target buffer |
537 | |
538 | * Function supports partially received WRITEs (suspending/resuming |
539 | * current receive processing) |
540 | * |
541 | * return value: |
542 | * 0: reached the end of a DDP segment |
543 | * -EAGAIN: to be called again to finish the DDP segment |
544 | */ |
545 | int siw_proc_write(struct siw_qp *qp) |
546 | { |
547 | struct siw_rx_stream *srx = &qp->rx_stream; |
548 | struct siw_rx_fpdu *frx = &qp->rx_tagged; |
549 | struct siw_mem *mem; |
550 | int bytes, rv; |
551 | |
552 | if (srx->state == SIW_GET_DATA_START) { |
553 | if (!srx->fpdu_part_rem) /* zero length WRITE */ |
554 | return 0; |
555 | |
556 | rv = siw_write_check_ntoh(srx, frx); |
557 | if (unlikely(rv)) { |
558 | siw_qp_event(qp, type: IB_EVENT_QP_FATAL); |
559 | return rv; |
560 | } |
561 | } |
562 | bytes = min(srx->fpdu_part_rem, srx->skb_new); |
563 | |
564 | if (frx->first_ddp_seg) { |
565 | struct siw_wqe *wqe = rx_wqe(frx); |
566 | |
567 | rx_mem(frx) = siw_mem_id2obj(sdev: qp->sdev, stag_index: srx->ddp_stag >> 8); |
568 | if (unlikely(!rx_mem(frx))) { |
569 | siw_dbg_qp(qp, |
570 | "sink stag not found/invalid, stag 0x%08x\n" , |
571 | srx->ddp_stag); |
572 | |
573 | siw_init_terminate(qp, layer: TERM_ERROR_LAYER_DDP, |
574 | etype: DDP_ETYPE_TAGGED_BUF, |
575 | ecode: DDP_ECODE_T_INVALID_STAG, in_tx: 0); |
576 | return -EINVAL; |
577 | } |
578 | wqe->rqe.num_sge = 1; |
579 | rx_type(wqe) = SIW_OP_WRITE; |
580 | wqe->wr_status = SIW_WR_INPROGRESS; |
581 | } |
582 | mem = rx_mem(frx); |
583 | |
584 | /* |
585 | * Check if application re-registered memory with different |
586 | * key field of STag. |
587 | */ |
588 | if (unlikely(mem->stag != srx->ddp_stag)) { |
589 | siw_init_terminate(qp, layer: TERM_ERROR_LAYER_DDP, |
590 | etype: DDP_ETYPE_TAGGED_BUF, |
591 | ecode: DDP_ECODE_T_INVALID_STAG, in_tx: 0); |
592 | return -EINVAL; |
593 | } |
594 | rv = siw_check_mem(pd: qp->pd, mem, addr: srx->ddp_to + srx->fpdu_part_rcvd, |
595 | perms: IB_ACCESS_REMOTE_WRITE, len: bytes); |
596 | if (unlikely(rv)) { |
597 | siw_init_terminate(qp, layer: TERM_ERROR_LAYER_DDP, |
598 | etype: DDP_ETYPE_TAGGED_BUF, ecode: siw_tagged_error(state: -rv), |
599 | in_tx: 0); |
600 | |
601 | siw_qp_event(qp, type: IB_EVENT_QP_ACCESS_ERR); |
602 | |
603 | return -EINVAL; |
604 | } |
605 | |
606 | rv = siw_rx_data(mem_p: mem, srx, pbl_idx: &frx->pbl_idx, |
607 | addr: srx->ddp_to + srx->fpdu_part_rcvd, bytes); |
608 | if (unlikely(rv != bytes)) { |
609 | siw_init_terminate(qp, layer: TERM_ERROR_LAYER_DDP, |
610 | etype: DDP_ETYPE_CATASTROPHIC, |
611 | ecode: DDP_ECODE_CATASTROPHIC, in_tx: 0); |
612 | return -EINVAL; |
613 | } |
614 | srx->fpdu_part_rem -= rv; |
615 | srx->fpdu_part_rcvd += rv; |
616 | |
617 | if (!srx->fpdu_part_rem) { |
618 | srx->ddp_to += srx->fpdu_part_rcvd; |
619 | return 0; |
620 | } |
621 | return -EAGAIN; |
622 | } |
623 | |
624 | /* |
625 | * Inbound RREQ's cannot carry user data. |
626 | */ |
627 | int siw_proc_rreq(struct siw_qp *qp) |
628 | { |
629 | struct siw_rx_stream *srx = &qp->rx_stream; |
630 | |
631 | if (!srx->fpdu_part_rem) |
632 | return 0; |
633 | |
634 | pr_warn("siw: [QP %u]: rreq with mpa len %d\n" , qp_id(qp), |
635 | be16_to_cpu(srx->hdr.ctrl.mpa_len)); |
636 | |
637 | return -EPROTO; |
638 | } |
639 | |
640 | /* |
641 | * siw_init_rresp: |
642 | * |
643 | * Process inbound RDMA READ REQ. Produce a pseudo READ RESPONSE WQE. |
644 | * Put it at the tail of the IRQ, if there is another WQE currently in |
645 | * transmit processing. If not, make it the current WQE to be processed |
646 | * and schedule transmit processing. |
647 | * |
648 | * Can be called from softirq context and from process |
649 | * context (RREAD socket loopback case!) |
650 | * |
651 | * return value: |
652 | * 0: success, |
653 | * failure code otherwise |
654 | */ |
655 | |
656 | static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx) |
657 | { |
658 | struct siw_wqe *tx_work = tx_wqe(qp); |
659 | struct siw_sqe *resp; |
660 | |
661 | uint64_t raddr = be64_to_cpu(srx->hdr.rreq.sink_to), |
662 | laddr = be64_to_cpu(srx->hdr.rreq.source_to); |
663 | uint32_t length = be32_to_cpu(srx->hdr.rreq.read_size), |
664 | lkey = be32_to_cpu(srx->hdr.rreq.source_stag), |
665 | rkey = be32_to_cpu(srx->hdr.rreq.sink_stag), |
666 | msn = be32_to_cpu(srx->hdr.rreq.ddp_msn); |
667 | |
668 | int run_sq = 1, rv = 0; |
669 | unsigned long flags; |
670 | |
671 | if (unlikely(msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ])) { |
672 | siw_init_terminate(qp, layer: TERM_ERROR_LAYER_DDP, |
673 | etype: DDP_ETYPE_UNTAGGED_BUF, |
674 | ecode: DDP_ECODE_UT_INVALID_MSN_RANGE, in_tx: 0); |
675 | return -EPROTO; |
676 | } |
677 | spin_lock_irqsave(&qp->sq_lock, flags); |
678 | |
679 | if (unlikely(!qp->attrs.irq_size)) { |
680 | run_sq = 0; |
681 | goto error_irq; |
682 | } |
683 | if (tx_work->wr_status == SIW_WR_IDLE) { |
684 | /* |
685 | * immediately schedule READ response w/o |
686 | * consuming IRQ entry: IRQ must be empty. |
687 | */ |
688 | tx_work->processed = 0; |
689 | tx_work->mem[0] = NULL; |
690 | tx_work->wr_status = SIW_WR_QUEUED; |
691 | resp = &tx_work->sqe; |
692 | } else { |
693 | resp = irq_alloc_free(qp); |
694 | run_sq = 0; |
695 | } |
696 | if (likely(resp)) { |
697 | resp->opcode = SIW_OP_READ_RESPONSE; |
698 | |
699 | resp->sge[0].length = length; |
700 | resp->sge[0].laddr = laddr; |
701 | resp->sge[0].lkey = lkey; |
702 | |
703 | /* Keep aside message sequence number for potential |
704 | * error reporting during Read Response generation. |
705 | */ |
706 | resp->sge[1].length = msn; |
707 | |
708 | resp->raddr = raddr; |
709 | resp->rkey = rkey; |
710 | resp->num_sge = length ? 1 : 0; |
711 | |
712 | /* RRESP now valid as current TX wqe or placed into IRQ */ |
713 | smp_store_mb(resp->flags, SIW_WQE_VALID); |
714 | } else { |
715 | error_irq: |
716 | pr_warn("siw: [QP %u]: IRQ exceeded or null, size %d\n" , |
717 | qp_id(qp), qp->attrs.irq_size); |
718 | |
719 | siw_init_terminate(qp, layer: TERM_ERROR_LAYER_RDMAP, |
720 | etype: RDMAP_ETYPE_REMOTE_OPERATION, |
721 | ecode: RDMAP_ECODE_CATASTROPHIC_STREAM, in_tx: 0); |
722 | rv = -EPROTO; |
723 | } |
724 | |
725 | spin_unlock_irqrestore(lock: &qp->sq_lock, flags); |
726 | |
727 | if (run_sq) |
728 | rv = siw_sq_start(qp); |
729 | |
730 | return rv; |
731 | } |
732 | |
733 | /* |
734 | * Only called at start of Read.Resonse processing. |
735 | * Transfer pending Read from tip of ORQ into currrent rx wqe, |
736 | * but keep ORQ entry valid until Read.Response processing done. |
737 | * No Queue locking needed. |
738 | */ |
739 | static int siw_orqe_start_rx(struct siw_qp *qp) |
740 | { |
741 | struct siw_sqe *orqe; |
742 | struct siw_wqe *wqe = NULL; |
743 | |
744 | if (unlikely(!qp->attrs.orq_size)) |
745 | return -EPROTO; |
746 | |
747 | /* make sure ORQ indices are current */ |
748 | smp_mb(); |
749 | |
750 | orqe = orq_get_current(qp); |
751 | if (READ_ONCE(orqe->flags) & SIW_WQE_VALID) { |
752 | /* RRESP is a TAGGED RDMAP operation */ |
753 | wqe = rx_wqe(&qp->rx_tagged); |
754 | wqe->sqe.id = orqe->id; |
755 | wqe->sqe.opcode = orqe->opcode; |
756 | wqe->sqe.sge[0].laddr = orqe->sge[0].laddr; |
757 | wqe->sqe.sge[0].lkey = orqe->sge[0].lkey; |
758 | wqe->sqe.sge[0].length = orqe->sge[0].length; |
759 | wqe->sqe.flags = orqe->flags; |
760 | wqe->sqe.num_sge = 1; |
761 | wqe->bytes = orqe->sge[0].length; |
762 | wqe->processed = 0; |
763 | wqe->mem[0] = NULL; |
764 | /* make sure WQE is completely written before valid */ |
765 | smp_wmb(); |
766 | wqe->wr_status = SIW_WR_INPROGRESS; |
767 | |
768 | return 0; |
769 | } |
770 | return -EPROTO; |
771 | } |
772 | |
773 | /* |
774 | * siw_proc_rresp: |
775 | * |
776 | * Place incoming RRESP data into memory referenced by RREQ WQE |
777 | * which is at the tip of the ORQ |
778 | * |
779 | * Function supports partially received RRESP's (suspending/resuming |
780 | * current receive processing) |
781 | */ |
782 | int siw_proc_rresp(struct siw_qp *qp) |
783 | { |
784 | struct siw_rx_stream *srx = &qp->rx_stream; |
785 | struct siw_rx_fpdu *frx = &qp->rx_tagged; |
786 | struct siw_wqe *wqe = rx_wqe(frx); |
787 | struct siw_mem **mem, *mem_p; |
788 | struct siw_sge *sge; |
789 | int bytes, rv; |
790 | |
791 | if (frx->first_ddp_seg) { |
792 | if (unlikely(wqe->wr_status != SIW_WR_IDLE)) { |
793 | pr_warn("siw: [QP %u]: proc RRESP: status %d, op %d\n" , |
794 | qp_id(qp), wqe->wr_status, wqe->sqe.opcode); |
795 | rv = -EPROTO; |
796 | goto error_term; |
797 | } |
798 | /* |
799 | * fetch pending RREQ from orq |
800 | */ |
801 | rv = siw_orqe_start_rx(qp); |
802 | if (rv) { |
803 | pr_warn("siw: [QP %u]: ORQ empty, size %d\n" , |
804 | qp_id(qp), qp->attrs.orq_size); |
805 | goto error_term; |
806 | } |
807 | rv = siw_rresp_check_ntoh(srx, frx); |
808 | if (unlikely(rv)) { |
809 | siw_qp_event(qp, type: IB_EVENT_QP_FATAL); |
810 | return rv; |
811 | } |
812 | } else { |
813 | if (unlikely(wqe->wr_status != SIW_WR_INPROGRESS)) { |
814 | pr_warn("siw: [QP %u]: resume RRESP: status %d\n" , |
815 | qp_id(qp), wqe->wr_status); |
816 | rv = -EPROTO; |
817 | goto error_term; |
818 | } |
819 | } |
820 | if (!srx->fpdu_part_rem) /* zero length RRESPONSE */ |
821 | return 0; |
822 | |
823 | sge = wqe->sqe.sge; /* there is only one */ |
824 | mem = &wqe->mem[0]; |
825 | |
826 | if (!(*mem)) { |
827 | /* |
828 | * check target memory which resolves memory on first fragment |
829 | */ |
830 | rv = siw_check_sge(pd: qp->pd, sge, mem, perms: IB_ACCESS_LOCAL_WRITE, off: 0, |
831 | len: wqe->bytes); |
832 | if (unlikely(rv)) { |
833 | siw_dbg_qp(qp, "target mem check: %d\n" , rv); |
834 | wqe->wc_status = SIW_WC_LOC_PROT_ERR; |
835 | |
836 | siw_init_terminate(qp, layer: TERM_ERROR_LAYER_DDP, |
837 | etype: DDP_ETYPE_TAGGED_BUF, |
838 | ecode: siw_tagged_error(state: -rv), in_tx: 0); |
839 | |
840 | siw_qp_event(qp, type: IB_EVENT_QP_ACCESS_ERR); |
841 | |
842 | return -EINVAL; |
843 | } |
844 | } |
845 | mem_p = *mem; |
846 | |
847 | bytes = min(srx->fpdu_part_rem, srx->skb_new); |
848 | rv = siw_rx_data(mem_p, srx, pbl_idx: &frx->pbl_idx, |
849 | addr: sge->laddr + wqe->processed, bytes); |
850 | if (rv != bytes) { |
851 | wqe->wc_status = SIW_WC_GENERAL_ERR; |
852 | rv = -EINVAL; |
853 | goto error_term; |
854 | } |
855 | srx->fpdu_part_rem -= rv; |
856 | srx->fpdu_part_rcvd += rv; |
857 | wqe->processed += rv; |
858 | |
859 | if (!srx->fpdu_part_rem) { |
860 | srx->ddp_to += srx->fpdu_part_rcvd; |
861 | return 0; |
862 | } |
863 | return -EAGAIN; |
864 | |
865 | error_term: |
866 | siw_init_terminate(qp, layer: TERM_ERROR_LAYER_DDP, etype: DDP_ETYPE_CATASTROPHIC, |
867 | ecode: DDP_ECODE_CATASTROPHIC, in_tx: 0); |
868 | return rv; |
869 | } |
870 | |
871 | static void siw_update_skb_rcvd(struct siw_rx_stream *srx, u16 length) |
872 | { |
873 | srx->skb_offset += length; |
874 | srx->skb_new -= length; |
875 | srx->skb_copied += length; |
876 | } |
877 | |
878 | int siw_proc_terminate(struct siw_qp *qp) |
879 | { |
880 | struct siw_rx_stream *srx = &qp->rx_stream; |
881 | struct sk_buff *skb = srx->skb; |
882 | struct iwarp_terminate *term = &srx->hdr.terminate; |
883 | union iwarp_hdr term_info; |
884 | u8 *infop = (u8 *)&term_info; |
885 | enum rdma_opcode op; |
886 | u16 to_copy = sizeof(struct iwarp_ctrl); |
887 | |
888 | pr_warn("siw: got TERMINATE. layer %d, type %d, code %d\n" , |
889 | __rdmap_term_layer(term), __rdmap_term_etype(term), |
890 | __rdmap_term_ecode(term)); |
891 | |
892 | if (be32_to_cpu(term->ddp_qn) != RDMAP_UNTAGGED_QN_TERMINATE || |
893 | be32_to_cpu(term->ddp_msn) != |
894 | qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] || |
895 | be32_to_cpu(term->ddp_mo) != 0) { |
896 | pr_warn("siw: rx bogus TERM [QN x%08x, MSN x%08x, MO x%08x]\n" , |
897 | be32_to_cpu(term->ddp_qn), be32_to_cpu(term->ddp_msn), |
898 | be32_to_cpu(term->ddp_mo)); |
899 | return -ECONNRESET; |
900 | } |
901 | /* |
902 | * Receive remaining pieces of TERM if indicated |
903 | */ |
904 | if (!term->flag_m) |
905 | return -ECONNRESET; |
906 | |
907 | /* Do not take the effort to reassemble a network fragmented |
908 | * TERM message |
909 | */ |
910 | if (srx->skb_new < sizeof(struct iwarp_ctrl_tagged)) |
911 | return -ECONNRESET; |
912 | |
913 | memset(infop, 0, sizeof(term_info)); |
914 | |
915 | skb_copy_bits(skb, offset: srx->skb_offset, to: infop, len: to_copy); |
916 | |
917 | op = __rdmap_get_opcode(ctrl: &term_info.ctrl); |
918 | if (op >= RDMAP_TERMINATE) |
919 | goto out; |
920 | |
921 | infop += to_copy; |
922 | siw_update_skb_rcvd(srx, length: to_copy); |
923 | srx->fpdu_part_rcvd += to_copy; |
924 | srx->fpdu_part_rem -= to_copy; |
925 | |
926 | to_copy = iwarp_pktinfo[op].hdr_len - to_copy; |
927 | |
928 | /* Again, no network fragmented TERM's */ |
929 | if (to_copy + MPA_CRC_SIZE > srx->skb_new) |
930 | return -ECONNRESET; |
931 | |
932 | skb_copy_bits(skb, offset: srx->skb_offset, to: infop, len: to_copy); |
933 | |
934 | if (term->flag_r) { |
935 | siw_dbg_qp(qp, "TERM reports RDMAP hdr type %u, len %u (%s)\n" , |
936 | op, be16_to_cpu(term_info.ctrl.mpa_len), |
937 | term->flag_m ? "valid" : "invalid" ); |
938 | } else if (term->flag_d) { |
939 | siw_dbg_qp(qp, "TERM reports DDP hdr type %u, len %u (%s)\n" , |
940 | op, be16_to_cpu(term_info.ctrl.mpa_len), |
941 | term->flag_m ? "valid" : "invalid" ); |
942 | } |
943 | out: |
944 | siw_update_skb_rcvd(srx, length: to_copy); |
945 | srx->fpdu_part_rcvd += to_copy; |
946 | srx->fpdu_part_rem -= to_copy; |
947 | |
948 | return -ECONNRESET; |
949 | } |
950 | |
951 | static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx) |
952 | { |
953 | struct sk_buff *skb = srx->skb; |
954 | int avail = min(srx->skb_new, srx->fpdu_part_rem); |
955 | u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad; |
956 | __wsum crc_in, crc_own = 0; |
957 | |
958 | siw_dbg_qp(qp, "expected %d, available %d, pad %u\n" , |
959 | srx->fpdu_part_rem, srx->skb_new, srx->pad); |
960 | |
961 | skb_copy_bits(skb, offset: srx->skb_offset, to: tbuf, len: avail); |
962 | |
963 | siw_update_skb_rcvd(srx, length: avail); |
964 | srx->fpdu_part_rem -= avail; |
965 | |
966 | if (srx->fpdu_part_rem) |
967 | return -EAGAIN; |
968 | |
969 | if (!srx->mpa_crc_hd) |
970 | return 0; |
971 | |
972 | if (srx->pad) |
973 | crypto_shash_update(desc: srx->mpa_crc_hd, data: tbuf, len: srx->pad); |
974 | /* |
975 | * CRC32 is computed, transmitted and received directly in NBO, |
976 | * so there's never a reason to convert byte order. |
977 | */ |
978 | crypto_shash_final(desc: srx->mpa_crc_hd, out: (u8 *)&crc_own); |
979 | crc_in = (__force __wsum)srx->trailer.crc; |
980 | |
981 | if (unlikely(crc_in != crc_own)) { |
982 | pr_warn("siw: crc error. in: %08x, own %08x, op %u\n" , |
983 | crc_in, crc_own, qp->rx_stream.rdmap_op); |
984 | |
985 | siw_init_terminate(qp, layer: TERM_ERROR_LAYER_LLP, |
986 | etype: LLP_ETYPE_MPA, |
987 | ecode: LLP_ECODE_RECEIVED_CRC, in_tx: 0); |
988 | return -EINVAL; |
989 | } |
990 | return 0; |
991 | } |
992 | |
993 | #define MIN_DDP_HDR sizeof(struct iwarp_ctrl_tagged) |
994 | |
995 | static int siw_get_hdr(struct siw_rx_stream *srx) |
996 | { |
997 | struct sk_buff *skb = srx->skb; |
998 | struct siw_qp *qp = rx_qp(srx); |
999 | struct iwarp_ctrl *c_hdr = &srx->hdr.ctrl; |
1000 | struct siw_rx_fpdu *frx; |
1001 | u8 opcode; |
1002 | int bytes; |
1003 | |
1004 | if (srx->fpdu_part_rcvd < MIN_DDP_HDR) { |
1005 | /* |
1006 | * copy a mimimum sized (tagged) DDP frame control part |
1007 | */ |
1008 | bytes = min_t(int, srx->skb_new, |
1009 | MIN_DDP_HDR - srx->fpdu_part_rcvd); |
1010 | |
1011 | skb_copy_bits(skb, offset: srx->skb_offset, |
1012 | to: (char *)c_hdr + srx->fpdu_part_rcvd, len: bytes); |
1013 | |
1014 | siw_update_skb_rcvd(srx, length: bytes); |
1015 | srx->fpdu_part_rcvd += bytes; |
1016 | if (srx->fpdu_part_rcvd < MIN_DDP_HDR) |
1017 | return -EAGAIN; |
1018 | |
1019 | if (unlikely(__ddp_get_version(c_hdr) != DDP_VERSION)) { |
1020 | enum ddp_etype etype; |
1021 | enum ddp_ecode ecode; |
1022 | |
1023 | pr_warn("siw: received ddp version unsupported %d\n" , |
1024 | __ddp_get_version(c_hdr)); |
1025 | |
1026 | if (c_hdr->ddp_rdmap_ctrl & DDP_FLAG_TAGGED) { |
1027 | etype = DDP_ETYPE_TAGGED_BUF; |
1028 | ecode = DDP_ECODE_T_VERSION; |
1029 | } else { |
1030 | etype = DDP_ETYPE_UNTAGGED_BUF; |
1031 | ecode = DDP_ECODE_UT_VERSION; |
1032 | } |
1033 | siw_init_terminate(rx_qp(srx), layer: TERM_ERROR_LAYER_DDP, |
1034 | etype, ecode, in_tx: 0); |
1035 | return -EINVAL; |
1036 | } |
1037 | if (unlikely(__rdmap_get_version(c_hdr) != RDMAP_VERSION)) { |
1038 | pr_warn("siw: received rdmap version unsupported %d\n" , |
1039 | __rdmap_get_version(c_hdr)); |
1040 | |
1041 | siw_init_terminate(rx_qp(srx), layer: TERM_ERROR_LAYER_RDMAP, |
1042 | etype: RDMAP_ETYPE_REMOTE_OPERATION, |
1043 | ecode: RDMAP_ECODE_VERSION, in_tx: 0); |
1044 | return -EINVAL; |
1045 | } |
1046 | opcode = __rdmap_get_opcode(ctrl: c_hdr); |
1047 | |
1048 | if (opcode > RDMAP_TERMINATE) { |
1049 | pr_warn("siw: received unknown packet type %u\n" , |
1050 | opcode); |
1051 | |
1052 | siw_init_terminate(rx_qp(srx), layer: TERM_ERROR_LAYER_RDMAP, |
1053 | etype: RDMAP_ETYPE_REMOTE_OPERATION, |
1054 | ecode: RDMAP_ECODE_OPCODE, in_tx: 0); |
1055 | return -EINVAL; |
1056 | } |
1057 | siw_dbg_qp(rx_qp(srx), "new header, opcode %u\n" , opcode); |
1058 | } else { |
1059 | opcode = __rdmap_get_opcode(ctrl: c_hdr); |
1060 | } |
1061 | set_rx_fpdu_context(qp, opcode); |
1062 | frx = qp->rx_fpdu; |
1063 | |
1064 | /* |
1065 | * Figure out len of current hdr: variable length of |
1066 | * iwarp hdr may force us to copy hdr information in |
1067 | * two steps. Only tagged DDP messages are already |
1068 | * completely received. |
1069 | */ |
1070 | if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) { |
1071 | int hdrlen = iwarp_pktinfo[opcode].hdr_len; |
1072 | |
1073 | bytes = min_t(int, hdrlen - MIN_DDP_HDR, srx->skb_new); |
1074 | |
1075 | skb_copy_bits(skb, offset: srx->skb_offset, |
1076 | to: (char *)c_hdr + srx->fpdu_part_rcvd, len: bytes); |
1077 | |
1078 | siw_update_skb_rcvd(srx, length: bytes); |
1079 | srx->fpdu_part_rcvd += bytes; |
1080 | if (srx->fpdu_part_rcvd < hdrlen) |
1081 | return -EAGAIN; |
1082 | } |
1083 | |
1084 | /* |
1085 | * DDP/RDMAP header receive completed. Check if the current |
1086 | * DDP segment starts a new RDMAP message or continues a previously |
1087 | * started RDMAP message. |
1088 | * |
1089 | * Alternating reception of DDP segments (or FPDUs) from incomplete |
1090 | * tagged and untagged RDMAP messages is supported, as long as |
1091 | * the current tagged or untagged message gets eventually completed |
1092 | * w/o intersection from another message of the same type |
1093 | * (tagged/untagged). E.g., a WRITE can get intersected by a SEND, |
1094 | * but not by a READ RESPONSE etc. |
1095 | */ |
1096 | if (srx->mpa_crc_hd) { |
1097 | /* |
1098 | * Restart CRC computation |
1099 | */ |
1100 | crypto_shash_init(desc: srx->mpa_crc_hd); |
1101 | crypto_shash_update(desc: srx->mpa_crc_hd, data: (u8 *)c_hdr, |
1102 | len: srx->fpdu_part_rcvd); |
1103 | } |
1104 | if (frx->more_ddp_segs) { |
1105 | frx->first_ddp_seg = 0; |
1106 | if (frx->prev_rdmap_op != opcode) { |
1107 | pr_warn("siw: packet intersection: %u : %u\n" , |
1108 | frx->prev_rdmap_op, opcode); |
1109 | /* |
1110 | * The last inbound RDMA operation of same type |
1111 | * (tagged or untagged) is left unfinished. |
1112 | * To complete it in error, make it the current |
1113 | * operation again, even with the header already |
1114 | * overwritten. For error handling, only the opcode |
1115 | * and current rx context are relevant. |
1116 | */ |
1117 | set_rx_fpdu_context(qp, opcode: frx->prev_rdmap_op); |
1118 | __rdmap_set_opcode(ctrl: c_hdr, opcode: frx->prev_rdmap_op); |
1119 | return -EPROTO; |
1120 | } |
1121 | } else { |
1122 | frx->prev_rdmap_op = opcode; |
1123 | frx->first_ddp_seg = 1; |
1124 | } |
1125 | frx->more_ddp_segs = c_hdr->ddp_rdmap_ctrl & DDP_FLAG_LAST ? 0 : 1; |
1126 | |
1127 | return 0; |
1128 | } |
1129 | |
1130 | static int siw_check_tx_fence(struct siw_qp *qp) |
1131 | { |
1132 | struct siw_wqe *tx_waiting = tx_wqe(qp); |
1133 | struct siw_sqe *rreq; |
1134 | int resume_tx = 0, rv = 0; |
1135 | unsigned long flags; |
1136 | |
1137 | spin_lock_irqsave(&qp->orq_lock, flags); |
1138 | |
1139 | /* free current orq entry */ |
1140 | rreq = orq_get_current(qp); |
1141 | WRITE_ONCE(rreq->flags, 0); |
1142 | |
1143 | qp->orq_get++; |
1144 | |
1145 | if (qp->tx_ctx.orq_fence) { |
1146 | if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) { |
1147 | pr_warn("siw: [QP %u]: fence resume: bad status %d\n" , |
1148 | qp_id(qp), tx_waiting->wr_status); |
1149 | rv = -EPROTO; |
1150 | goto out; |
1151 | } |
1152 | /* resume SQ processing, if possible */ |
1153 | if (tx_waiting->sqe.opcode == SIW_OP_READ || |
1154 | tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) { |
1155 | |
1156 | /* SQ processing was stopped because of a full ORQ */ |
1157 | rreq = orq_get_free(qp); |
1158 | if (unlikely(!rreq)) { |
1159 | pr_warn("siw: [QP %u]: no ORQE\n" , qp_id(qp)); |
1160 | rv = -EPROTO; |
1161 | goto out; |
1162 | } |
1163 | siw_read_to_orq(rreq, sqe: &tx_waiting->sqe); |
1164 | |
1165 | qp->orq_put++; |
1166 | qp->tx_ctx.orq_fence = 0; |
1167 | resume_tx = 1; |
1168 | |
1169 | } else if (siw_orq_empty(qp)) { |
1170 | /* |
1171 | * SQ processing was stopped by fenced work request. |
1172 | * Resume since all previous Read's are now completed. |
1173 | */ |
1174 | qp->tx_ctx.orq_fence = 0; |
1175 | resume_tx = 1; |
1176 | } |
1177 | } |
1178 | out: |
1179 | spin_unlock_irqrestore(lock: &qp->orq_lock, flags); |
1180 | |
1181 | if (resume_tx) |
1182 | rv = siw_sq_start(qp); |
1183 | |
1184 | return rv; |
1185 | } |
1186 | |
1187 | /* |
1188 | * siw_rdmap_complete() |
1189 | * |
1190 | * Complete processing of an RDMA message after receiving all |
1191 | * DDP segmens or ABort processing after encountering error case. |
1192 | * |
1193 | * o SENDs + RRESPs will need for completion, |
1194 | * o RREQs need for READ RESPONSE initialization |
1195 | * o WRITEs need memory dereferencing |
1196 | * |
1197 | * TODO: Failed WRITEs need local error to be surfaced. |
1198 | */ |
1199 | static int siw_rdmap_complete(struct siw_qp *qp, int error) |
1200 | { |
1201 | struct siw_rx_stream *srx = &qp->rx_stream; |
1202 | struct siw_wqe *wqe = rx_wqe(qp->rx_fpdu); |
1203 | enum siw_wc_status wc_status = wqe->wc_status; |
1204 | u8 opcode = __rdmap_get_opcode(ctrl: &srx->hdr.ctrl); |
1205 | int rv = 0; |
1206 | |
1207 | switch (opcode) { |
1208 | case RDMAP_SEND_SE: |
1209 | case RDMAP_SEND_SE_INVAL: |
1210 | wqe->rqe.flags |= SIW_WQE_SOLICITED; |
1211 | fallthrough; |
1212 | |
1213 | case RDMAP_SEND: |
1214 | case RDMAP_SEND_INVAL: |
1215 | if (wqe->wr_status == SIW_WR_IDLE) |
1216 | break; |
1217 | |
1218 | srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]++; |
1219 | |
1220 | if (error != 0 && wc_status == SIW_WC_SUCCESS) |
1221 | wc_status = SIW_WC_GENERAL_ERR; |
1222 | /* |
1223 | * Handle STag invalidation request |
1224 | */ |
1225 | if (wc_status == SIW_WC_SUCCESS && |
1226 | (opcode == RDMAP_SEND_INVAL || |
1227 | opcode == RDMAP_SEND_SE_INVAL)) { |
1228 | rv = siw_invalidate_stag(pd: qp->pd, stag: srx->inval_stag); |
1229 | if (rv) { |
1230 | siw_init_terminate( |
1231 | qp, layer: TERM_ERROR_LAYER_RDMAP, |
1232 | etype: rv == -EACCES ? |
1233 | RDMAP_ETYPE_REMOTE_PROTECTION : |
1234 | RDMAP_ETYPE_REMOTE_OPERATION, |
1235 | ecode: RDMAP_ECODE_CANNOT_INVALIDATE, in_tx: 0); |
1236 | |
1237 | wc_status = SIW_WC_REM_INV_REQ_ERR; |
1238 | } |
1239 | rv = siw_rqe_complete(qp, rqe: &wqe->rqe, bytes: wqe->processed, |
1240 | inval_stag: rv ? 0 : srx->inval_stag, |
1241 | status: wc_status); |
1242 | } else { |
1243 | rv = siw_rqe_complete(qp, rqe: &wqe->rqe, bytes: wqe->processed, |
1244 | inval_stag: 0, status: wc_status); |
1245 | } |
1246 | siw_wqe_put_mem(wqe, op: SIW_OP_RECEIVE); |
1247 | break; |
1248 | |
1249 | case RDMAP_RDMA_READ_RESP: |
1250 | if (wqe->wr_status == SIW_WR_IDLE) |
1251 | break; |
1252 | |
1253 | if (error != 0) { |
1254 | if ((srx->state == SIW_GET_HDR && |
1255 | qp->rx_fpdu->first_ddp_seg) || error == -ENODATA) |
1256 | /* possible RREQ in ORQ left untouched */ |
1257 | break; |
1258 | |
1259 | if (wc_status == SIW_WC_SUCCESS) |
1260 | wc_status = SIW_WC_GENERAL_ERR; |
1261 | } else if (rdma_is_kernel_res(res: &qp->base_qp.res) && |
1262 | rx_type(wqe) == SIW_OP_READ_LOCAL_INV) { |
1263 | /* |
1264 | * Handle any STag invalidation request |
1265 | */ |
1266 | rv = siw_invalidate_stag(pd: qp->pd, stag: wqe->sqe.sge[0].lkey); |
1267 | if (rv) { |
1268 | siw_init_terminate(qp, layer: TERM_ERROR_LAYER_RDMAP, |
1269 | etype: RDMAP_ETYPE_CATASTROPHIC, |
1270 | ecode: RDMAP_ECODE_UNSPECIFIED, in_tx: 0); |
1271 | |
1272 | if (wc_status == SIW_WC_SUCCESS) { |
1273 | wc_status = SIW_WC_GENERAL_ERR; |
1274 | error = rv; |
1275 | } |
1276 | } |
1277 | } |
1278 | /* |
1279 | * All errors turn the wqe into signalled. |
1280 | */ |
1281 | if ((wqe->sqe.flags & SIW_WQE_SIGNALLED) || error != 0) |
1282 | rv = siw_sqe_complete(qp, sqe: &wqe->sqe, bytes: wqe->processed, |
1283 | status: wc_status); |
1284 | siw_wqe_put_mem(wqe, op: SIW_OP_READ); |
1285 | |
1286 | if (!error) { |
1287 | rv = siw_check_tx_fence(qp); |
1288 | } else { |
1289 | /* Disable current ORQ element */ |
1290 | if (qp->attrs.orq_size) |
1291 | WRITE_ONCE(orq_get_current(qp)->flags, 0); |
1292 | } |
1293 | break; |
1294 | |
1295 | case RDMAP_RDMA_READ_REQ: |
1296 | if (!error) { |
1297 | rv = siw_init_rresp(qp, srx); |
1298 | srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]++; |
1299 | } |
1300 | break; |
1301 | |
1302 | case RDMAP_RDMA_WRITE: |
1303 | if (wqe->wr_status == SIW_WR_IDLE) |
1304 | break; |
1305 | |
1306 | /* |
1307 | * Free References from memory object if |
1308 | * attached to receive context (inbound WRITE). |
1309 | * While a zero-length WRITE is allowed, |
1310 | * no memory reference got created. |
1311 | */ |
1312 | if (rx_mem(&qp->rx_tagged)) { |
1313 | siw_mem_put(rx_mem(&qp->rx_tagged)); |
1314 | rx_mem(&qp->rx_tagged) = NULL; |
1315 | } |
1316 | break; |
1317 | |
1318 | default: |
1319 | break; |
1320 | } |
1321 | wqe->wr_status = SIW_WR_IDLE; |
1322 | |
1323 | return rv; |
1324 | } |
1325 | |
1326 | /* |
1327 | * siw_tcp_rx_data() |
1328 | * |
1329 | * Main routine to consume inbound TCP payload |
1330 | * |
1331 | * @rd_desc: read descriptor |
1332 | * @skb: socket buffer |
1333 | * @off: offset in skb |
1334 | * @len: skb->len - offset : payload in skb |
1335 | */ |
1336 | int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb, |
1337 | unsigned int off, size_t len) |
1338 | { |
1339 | struct siw_qp *qp = rd_desc->arg.data; |
1340 | struct siw_rx_stream *srx = &qp->rx_stream; |
1341 | int rv; |
1342 | |
1343 | srx->skb = skb; |
1344 | srx->skb_new = skb->len - off; |
1345 | srx->skb_offset = off; |
1346 | srx->skb_copied = 0; |
1347 | |
1348 | siw_dbg_qp(qp, "new data, len %d\n" , srx->skb_new); |
1349 | |
1350 | while (srx->skb_new) { |
1351 | int run_completion = 1; |
1352 | |
1353 | if (unlikely(srx->rx_suspend)) { |
1354 | /* Do not process any more data */ |
1355 | srx->skb_copied += srx->skb_new; |
1356 | break; |
1357 | } |
1358 | switch (srx->state) { |
1359 | case SIW_GET_HDR: |
1360 | rv = siw_get_hdr(srx); |
1361 | if (!rv) { |
1362 | srx->fpdu_part_rem = |
1363 | be16_to_cpu(srx->hdr.ctrl.mpa_len) - |
1364 | srx->fpdu_part_rcvd + MPA_HDR_SIZE; |
1365 | |
1366 | if (srx->fpdu_part_rem) |
1367 | srx->pad = -srx->fpdu_part_rem & 0x3; |
1368 | else |
1369 | srx->pad = 0; |
1370 | |
1371 | srx->state = SIW_GET_DATA_START; |
1372 | srx->fpdu_part_rcvd = 0; |
1373 | } |
1374 | break; |
1375 | |
1376 | case SIW_GET_DATA_MORE: |
1377 | /* |
1378 | * Another data fragment of the same DDP segment. |
1379 | * Setting first_ddp_seg = 0 avoids repeating |
1380 | * initializations that shall occur only once per |
1381 | * DDP segment. |
1382 | */ |
1383 | qp->rx_fpdu->first_ddp_seg = 0; |
1384 | fallthrough; |
1385 | |
1386 | case SIW_GET_DATA_START: |
1387 | /* |
1388 | * Headers will be checked by the opcode-specific |
1389 | * data receive function below. |
1390 | */ |
1391 | rv = iwarp_pktinfo[qp->rx_stream.rdmap_op].rx_data(qp); |
1392 | if (!rv) { |
1393 | int mpa_len = |
1394 | be16_to_cpu(srx->hdr.ctrl.mpa_len) |
1395 | + MPA_HDR_SIZE; |
1396 | |
1397 | srx->fpdu_part_rem = (-mpa_len & 0x3) |
1398 | + MPA_CRC_SIZE; |
1399 | srx->fpdu_part_rcvd = 0; |
1400 | srx->state = SIW_GET_TRAILER; |
1401 | } else { |
1402 | if (unlikely(rv == -ECONNRESET)) |
1403 | run_completion = 0; |
1404 | else |
1405 | srx->state = SIW_GET_DATA_MORE; |
1406 | } |
1407 | break; |
1408 | |
1409 | case SIW_GET_TRAILER: |
1410 | /* |
1411 | * read CRC + any padding |
1412 | */ |
1413 | rv = siw_get_trailer(qp, srx); |
1414 | if (likely(!rv)) { |
1415 | /* |
1416 | * FPDU completed. |
1417 | * complete RDMAP message if last fragment |
1418 | */ |
1419 | srx->state = SIW_GET_HDR; |
1420 | srx->fpdu_part_rcvd = 0; |
1421 | |
1422 | if (!(srx->hdr.ctrl.ddp_rdmap_ctrl & |
1423 | DDP_FLAG_LAST)) |
1424 | /* more frags */ |
1425 | break; |
1426 | |
1427 | rv = siw_rdmap_complete(qp, error: 0); |
1428 | run_completion = 0; |
1429 | } |
1430 | break; |
1431 | |
1432 | default: |
1433 | pr_warn("QP[%u]: RX out of state\n" , qp_id(qp)); |
1434 | rv = -EPROTO; |
1435 | run_completion = 0; |
1436 | } |
1437 | if (unlikely(rv != 0 && rv != -EAGAIN)) { |
1438 | if ((srx->state > SIW_GET_HDR || |
1439 | qp->rx_fpdu->more_ddp_segs) && run_completion) |
1440 | siw_rdmap_complete(qp, error: rv); |
1441 | |
1442 | siw_dbg_qp(qp, "rx error %d, rx state %d\n" , rv, |
1443 | srx->state); |
1444 | |
1445 | siw_qp_cm_drop(qp, schedule: 1); |
1446 | |
1447 | break; |
1448 | } |
1449 | if (rv) { |
1450 | siw_dbg_qp(qp, "fpdu fragment, state %d, missing %d\n" , |
1451 | srx->state, srx->fpdu_part_rem); |
1452 | break; |
1453 | } |
1454 | } |
1455 | return srx->skb_copied; |
1456 | } |
1457 | |