1 | // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB |
2 | /* |
3 | * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. |
4 | * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. |
5 | */ |
6 | |
7 | #include <linux/skbuff.h> |
8 | |
9 | #include "rxe.h" |
10 | #include "rxe_loc.h" |
11 | #include "rxe_queue.h" |
12 | #include "rxe_task.h" |
13 | |
14 | enum comp_state { |
15 | COMPST_GET_ACK, |
16 | COMPST_GET_WQE, |
17 | COMPST_COMP_WQE, |
18 | COMPST_COMP_ACK, |
19 | COMPST_CHECK_PSN, |
20 | COMPST_CHECK_ACK, |
21 | COMPST_READ, |
22 | COMPST_ATOMIC, |
23 | COMPST_WRITE_SEND, |
24 | COMPST_UPDATE_COMP, |
25 | COMPST_ERROR_RETRY, |
26 | COMPST_RNR_RETRY, |
27 | COMPST_ERROR, |
28 | COMPST_EXIT, /* We have an issue, and we want to rerun the completer */ |
29 | COMPST_DONE, /* The completer finished successflly */ |
30 | }; |
31 | |
32 | static char *comp_state_name[] = { |
33 | [COMPST_GET_ACK] = "GET ACK" , |
34 | [COMPST_GET_WQE] = "GET WQE" , |
35 | [COMPST_COMP_WQE] = "COMP WQE" , |
36 | [COMPST_COMP_ACK] = "COMP ACK" , |
37 | [COMPST_CHECK_PSN] = "CHECK PSN" , |
38 | [COMPST_CHECK_ACK] = "CHECK ACK" , |
39 | [COMPST_READ] = "READ" , |
40 | [COMPST_ATOMIC] = "ATOMIC" , |
41 | [COMPST_WRITE_SEND] = "WRITE/SEND" , |
42 | [COMPST_UPDATE_COMP] = "UPDATE COMP" , |
43 | [COMPST_ERROR_RETRY] = "ERROR RETRY" , |
44 | [COMPST_RNR_RETRY] = "RNR RETRY" , |
45 | [COMPST_ERROR] = "ERROR" , |
46 | [COMPST_EXIT] = "EXIT" , |
47 | [COMPST_DONE] = "DONE" , |
48 | }; |
49 | |
50 | static unsigned long rnrnak_usec[32] = { |
51 | [IB_RNR_TIMER_655_36] = 655360, |
52 | [IB_RNR_TIMER_000_01] = 10, |
53 | [IB_RNR_TIMER_000_02] = 20, |
54 | [IB_RNR_TIMER_000_03] = 30, |
55 | [IB_RNR_TIMER_000_04] = 40, |
56 | [IB_RNR_TIMER_000_06] = 60, |
57 | [IB_RNR_TIMER_000_08] = 80, |
58 | [IB_RNR_TIMER_000_12] = 120, |
59 | [IB_RNR_TIMER_000_16] = 160, |
60 | [IB_RNR_TIMER_000_24] = 240, |
61 | [IB_RNR_TIMER_000_32] = 320, |
62 | [IB_RNR_TIMER_000_48] = 480, |
63 | [IB_RNR_TIMER_000_64] = 640, |
64 | [IB_RNR_TIMER_000_96] = 960, |
65 | [IB_RNR_TIMER_001_28] = 1280, |
66 | [IB_RNR_TIMER_001_92] = 1920, |
67 | [IB_RNR_TIMER_002_56] = 2560, |
68 | [IB_RNR_TIMER_003_84] = 3840, |
69 | [IB_RNR_TIMER_005_12] = 5120, |
70 | [IB_RNR_TIMER_007_68] = 7680, |
71 | [IB_RNR_TIMER_010_24] = 10240, |
72 | [IB_RNR_TIMER_015_36] = 15360, |
73 | [IB_RNR_TIMER_020_48] = 20480, |
74 | [IB_RNR_TIMER_030_72] = 30720, |
75 | [IB_RNR_TIMER_040_96] = 40960, |
76 | [IB_RNR_TIMER_061_44] = 61410, |
77 | [IB_RNR_TIMER_081_92] = 81920, |
78 | [IB_RNR_TIMER_122_88] = 122880, |
79 | [IB_RNR_TIMER_163_84] = 163840, |
80 | [IB_RNR_TIMER_245_76] = 245760, |
81 | [IB_RNR_TIMER_327_68] = 327680, |
82 | [IB_RNR_TIMER_491_52] = 491520, |
83 | }; |
84 | |
85 | static inline unsigned long rnrnak_jiffies(u8 timeout) |
86 | { |
87 | return max_t(unsigned long, |
88 | usecs_to_jiffies(rnrnak_usec[timeout]), 1); |
89 | } |
90 | |
91 | static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) |
92 | { |
93 | switch (opcode) { |
94 | case IB_WR_RDMA_WRITE: return IB_WC_RDMA_WRITE; |
95 | case IB_WR_RDMA_WRITE_WITH_IMM: return IB_WC_RDMA_WRITE; |
96 | case IB_WR_SEND: return IB_WC_SEND; |
97 | case IB_WR_SEND_WITH_IMM: return IB_WC_SEND; |
98 | case IB_WR_RDMA_READ: return IB_WC_RDMA_READ; |
99 | case IB_WR_ATOMIC_CMP_AND_SWP: return IB_WC_COMP_SWAP; |
100 | case IB_WR_ATOMIC_FETCH_AND_ADD: return IB_WC_FETCH_ADD; |
101 | case IB_WR_LSO: return IB_WC_LSO; |
102 | case IB_WR_SEND_WITH_INV: return IB_WC_SEND; |
103 | case IB_WR_RDMA_READ_WITH_INV: return IB_WC_RDMA_READ; |
104 | case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; |
105 | case IB_WR_REG_MR: return IB_WC_REG_MR; |
106 | case IB_WR_BIND_MW: return IB_WC_BIND_MW; |
107 | case IB_WR_ATOMIC_WRITE: return IB_WC_ATOMIC_WRITE; |
108 | case IB_WR_FLUSH: return IB_WC_FLUSH; |
109 | |
110 | default: |
111 | return 0xff; |
112 | } |
113 | } |
114 | |
115 | void retransmit_timer(struct timer_list *t) |
116 | { |
117 | struct rxe_qp *qp = from_timer(qp, t, retrans_timer); |
118 | unsigned long flags; |
119 | |
120 | rxe_dbg_qp(qp, "retransmit timer fired\n" ); |
121 | |
122 | spin_lock_irqsave(&qp->state_lock, flags); |
123 | if (qp->valid) { |
124 | qp->comp.timeout = 1; |
125 | rxe_sched_task(task: &qp->comp.task); |
126 | } |
127 | spin_unlock_irqrestore(lock: &qp->state_lock, flags); |
128 | } |
129 | |
130 | void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) |
131 | { |
132 | int must_sched; |
133 | |
134 | skb_queue_tail(list: &qp->resp_pkts, newsk: skb); |
135 | |
136 | must_sched = skb_queue_len(list_: &qp->resp_pkts) > 1; |
137 | if (must_sched != 0) |
138 | rxe_counter_inc(rxe: SKB_TO_PKT(skb)->rxe, index: RXE_CNT_COMPLETER_SCHED); |
139 | |
140 | if (must_sched) |
141 | rxe_sched_task(task: &qp->comp.task); |
142 | else |
143 | rxe_run_task(task: &qp->comp.task); |
144 | } |
145 | |
146 | static inline enum comp_state get_wqe(struct rxe_qp *qp, |
147 | struct rxe_pkt_info *pkt, |
148 | struct rxe_send_wqe **wqe_p) |
149 | { |
150 | struct rxe_send_wqe *wqe; |
151 | |
152 | /* we come here whether or not we found a response packet to see if |
153 | * there are any posted WQEs |
154 | */ |
155 | wqe = queue_head(q: qp->sq.queue, type: QUEUE_TYPE_FROM_CLIENT); |
156 | *wqe_p = wqe; |
157 | |
158 | /* no WQE or requester has not started it yet */ |
159 | if (!wqe || wqe->state == wqe_state_posted) |
160 | return pkt ? COMPST_DONE : COMPST_EXIT; |
161 | |
162 | /* WQE does not require an ack */ |
163 | if (wqe->state == wqe_state_done) |
164 | return COMPST_COMP_WQE; |
165 | |
166 | /* WQE caused an error */ |
167 | if (wqe->state == wqe_state_error) |
168 | return COMPST_ERROR; |
169 | |
170 | /* we have a WQE, if we also have an ack check its PSN */ |
171 | return pkt ? COMPST_CHECK_PSN : COMPST_EXIT; |
172 | } |
173 | |
174 | static inline void reset_retry_counters(struct rxe_qp *qp) |
175 | { |
176 | qp->comp.retry_cnt = qp->attr.retry_cnt; |
177 | qp->comp.rnr_retry = qp->attr.rnr_retry; |
178 | qp->comp.started_retry = 0; |
179 | } |
180 | |
181 | static inline enum comp_state check_psn(struct rxe_qp *qp, |
182 | struct rxe_pkt_info *pkt, |
183 | struct rxe_send_wqe *wqe) |
184 | { |
185 | s32 diff; |
186 | |
187 | /* check to see if response is past the oldest WQE. if it is, complete |
188 | * send/write or error read/atomic |
189 | */ |
190 | diff = psn_compare(psn_a: pkt->psn, psn_b: wqe->last_psn); |
191 | if (diff > 0) { |
192 | if (wqe->state == wqe_state_pending) { |
193 | if (wqe->mask & WR_ATOMIC_OR_READ_MASK) |
194 | return COMPST_ERROR_RETRY; |
195 | |
196 | reset_retry_counters(qp); |
197 | return COMPST_COMP_WQE; |
198 | } else { |
199 | return COMPST_DONE; |
200 | } |
201 | } |
202 | |
203 | /* compare response packet to expected response */ |
204 | diff = psn_compare(psn_a: pkt->psn, psn_b: qp->comp.psn); |
205 | if (diff < 0) { |
206 | /* response is most likely a retried packet if it matches an |
207 | * uncompleted WQE go complete it else ignore it |
208 | */ |
209 | if (pkt->psn == wqe->last_psn) |
210 | return COMPST_COMP_ACK; |
211 | else if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE && |
212 | (qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST || |
213 | qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE)) |
214 | return COMPST_CHECK_ACK; |
215 | else |
216 | return COMPST_DONE; |
217 | } else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) { |
218 | return COMPST_DONE; |
219 | } else { |
220 | return COMPST_CHECK_ACK; |
221 | } |
222 | } |
223 | |
224 | static inline enum comp_state check_ack(struct rxe_qp *qp, |
225 | struct rxe_pkt_info *pkt, |
226 | struct rxe_send_wqe *wqe) |
227 | { |
228 | unsigned int mask = pkt->mask; |
229 | u8 syn; |
230 | struct rxe_dev *rxe = to_rdev(dev: qp->ibqp.device); |
231 | |
232 | /* Check the sequence only */ |
233 | switch (qp->comp.opcode) { |
234 | case -1: |
235 | /* Will catch all *_ONLY cases. */ |
236 | if (!(mask & RXE_START_MASK)) |
237 | return COMPST_ERROR; |
238 | |
239 | break; |
240 | |
241 | case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: |
242 | case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: |
243 | /* Check NAK code to handle a remote error */ |
244 | if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE) |
245 | break; |
246 | |
247 | if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE && |
248 | pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) { |
249 | /* read retries of partial data may restart from |
250 | * read response first or response only. |
251 | */ |
252 | if ((pkt->psn == wqe->first_psn && |
253 | pkt->opcode == |
254 | IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) || |
255 | (wqe->first_psn == wqe->last_psn && |
256 | pkt->opcode == |
257 | IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY)) |
258 | break; |
259 | |
260 | return COMPST_ERROR; |
261 | } |
262 | break; |
263 | default: |
264 | WARN_ON_ONCE(1); |
265 | } |
266 | |
267 | /* Check operation validity. */ |
268 | switch (pkt->opcode) { |
269 | case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: |
270 | case IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST: |
271 | case IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY: |
272 | syn = aeth_syn(pkt); |
273 | |
274 | if ((syn & AETH_TYPE_MASK) != AETH_ACK) |
275 | return COMPST_ERROR; |
276 | |
277 | if (wqe->wr.opcode == IB_WR_ATOMIC_WRITE) |
278 | return COMPST_WRITE_SEND; |
279 | |
280 | fallthrough; |
281 | /* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH) |
282 | */ |
283 | case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: |
284 | if (wqe->wr.opcode != IB_WR_RDMA_READ && |
285 | wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV && |
286 | wqe->wr.opcode != IB_WR_FLUSH) { |
287 | wqe->status = IB_WC_FATAL_ERR; |
288 | return COMPST_ERROR; |
289 | } |
290 | reset_retry_counters(qp); |
291 | return COMPST_READ; |
292 | |
293 | case IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE: |
294 | syn = aeth_syn(pkt); |
295 | |
296 | if ((syn & AETH_TYPE_MASK) != AETH_ACK) |
297 | return COMPST_ERROR; |
298 | |
299 | if (wqe->wr.opcode != IB_WR_ATOMIC_CMP_AND_SWP && |
300 | wqe->wr.opcode != IB_WR_ATOMIC_FETCH_AND_ADD) |
301 | return COMPST_ERROR; |
302 | reset_retry_counters(qp); |
303 | return COMPST_ATOMIC; |
304 | |
305 | case IB_OPCODE_RC_ACKNOWLEDGE: |
306 | syn = aeth_syn(pkt); |
307 | switch (syn & AETH_TYPE_MASK) { |
308 | case AETH_ACK: |
309 | reset_retry_counters(qp); |
310 | return COMPST_WRITE_SEND; |
311 | |
312 | case AETH_RNR_NAK: |
313 | rxe_counter_inc(rxe, index: RXE_CNT_RCV_RNR); |
314 | return COMPST_RNR_RETRY; |
315 | |
316 | case AETH_NAK: |
317 | switch (syn) { |
318 | case AETH_NAK_PSN_SEQ_ERROR: |
319 | /* a nak implicitly acks all packets with psns |
320 | * before |
321 | */ |
322 | if (psn_compare(psn_a: pkt->psn, psn_b: qp->comp.psn) > 0) { |
323 | rxe_counter_inc(rxe, |
324 | index: RXE_CNT_RCV_SEQ_ERR); |
325 | qp->comp.psn = pkt->psn; |
326 | if (qp->req.wait_psn) { |
327 | qp->req.wait_psn = 0; |
328 | rxe_sched_task(task: &qp->req.task); |
329 | } |
330 | } |
331 | return COMPST_ERROR_RETRY; |
332 | |
333 | case AETH_NAK_INVALID_REQ: |
334 | wqe->status = IB_WC_REM_INV_REQ_ERR; |
335 | return COMPST_ERROR; |
336 | |
337 | case AETH_NAK_REM_ACC_ERR: |
338 | wqe->status = IB_WC_REM_ACCESS_ERR; |
339 | return COMPST_ERROR; |
340 | |
341 | case AETH_NAK_REM_OP_ERR: |
342 | wqe->status = IB_WC_REM_OP_ERR; |
343 | return COMPST_ERROR; |
344 | |
345 | default: |
346 | rxe_dbg_qp(qp, "unexpected nak %x\n" , syn); |
347 | wqe->status = IB_WC_REM_OP_ERR; |
348 | return COMPST_ERROR; |
349 | } |
350 | |
351 | default: |
352 | return COMPST_ERROR; |
353 | } |
354 | break; |
355 | |
356 | default: |
357 | rxe_dbg_qp(qp, "unexpected opcode\n" ); |
358 | } |
359 | |
360 | return COMPST_ERROR; |
361 | } |
362 | |
363 | static inline enum comp_state do_read(struct rxe_qp *qp, |
364 | struct rxe_pkt_info *pkt, |
365 | struct rxe_send_wqe *wqe) |
366 | { |
367 | int ret; |
368 | |
369 | ret = copy_data(pd: qp->pd, access: IB_ACCESS_LOCAL_WRITE, |
370 | dma: &wqe->dma, addr: payload_addr(pkt), |
371 | length: payload_size(pkt), dir: RXE_TO_MR_OBJ); |
372 | if (ret) { |
373 | wqe->status = IB_WC_LOC_PROT_ERR; |
374 | return COMPST_ERROR; |
375 | } |
376 | |
377 | if (wqe->dma.resid == 0 && (pkt->mask & RXE_END_MASK)) |
378 | return COMPST_COMP_ACK; |
379 | |
380 | return COMPST_UPDATE_COMP; |
381 | } |
382 | |
383 | static inline enum comp_state do_atomic(struct rxe_qp *qp, |
384 | struct rxe_pkt_info *pkt, |
385 | struct rxe_send_wqe *wqe) |
386 | { |
387 | int ret; |
388 | |
389 | u64 atomic_orig = atmack_orig(pkt); |
390 | |
391 | ret = copy_data(pd: qp->pd, access: IB_ACCESS_LOCAL_WRITE, |
392 | dma: &wqe->dma, addr: &atomic_orig, |
393 | length: sizeof(u64), dir: RXE_TO_MR_OBJ); |
394 | if (ret) { |
395 | wqe->status = IB_WC_LOC_PROT_ERR; |
396 | return COMPST_ERROR; |
397 | } |
398 | |
399 | return COMPST_COMP_ACK; |
400 | } |
401 | |
402 | static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, |
403 | struct rxe_cqe *cqe) |
404 | { |
405 | struct ib_wc *wc = &cqe->ibwc; |
406 | struct ib_uverbs_wc *uwc = &cqe->uibwc; |
407 | |
408 | memset(cqe, 0, sizeof(*cqe)); |
409 | |
410 | if (!qp->is_user) { |
411 | wc->wr_id = wqe->wr.wr_id; |
412 | wc->status = wqe->status; |
413 | wc->qp = &qp->ibqp; |
414 | } else { |
415 | uwc->wr_id = wqe->wr.wr_id; |
416 | uwc->status = wqe->status; |
417 | uwc->qp_num = qp->ibqp.qp_num; |
418 | } |
419 | |
420 | if (wqe->status == IB_WC_SUCCESS) { |
421 | if (!qp->is_user) { |
422 | wc->opcode = wr_to_wc_opcode(opcode: wqe->wr.opcode); |
423 | if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || |
424 | wqe->wr.opcode == IB_WR_SEND_WITH_IMM) |
425 | wc->wc_flags = IB_WC_WITH_IMM; |
426 | wc->byte_len = wqe->dma.length; |
427 | } else { |
428 | uwc->opcode = wr_to_wc_opcode(opcode: wqe->wr.opcode); |
429 | if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || |
430 | wqe->wr.opcode == IB_WR_SEND_WITH_IMM) |
431 | uwc->wc_flags = IB_WC_WITH_IMM; |
432 | uwc->byte_len = wqe->dma.length; |
433 | } |
434 | } else { |
435 | if (wqe->status != IB_WC_WR_FLUSH_ERR) |
436 | rxe_err_qp(qp, "non-flush error status = %d\n" , |
437 | wqe->status); |
438 | } |
439 | } |
440 | |
441 | /* |
442 | * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS |
443 | * ---------8<---------8<------------- |
444 | * ...Note that if a completion error occurs, a Work Completion |
445 | * will always be generated, even if the signaling |
446 | * indicator requests an Unsignaled Completion. |
447 | * ---------8<---------8<------------- |
448 | */ |
449 | static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) |
450 | { |
451 | struct rxe_dev *rxe = to_rdev(dev: qp->ibqp.device); |
452 | struct rxe_cqe cqe; |
453 | bool post; |
454 | |
455 | /* do we need to post a completion */ |
456 | post = ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) || |
457 | (wqe->wr.send_flags & IB_SEND_SIGNALED) || |
458 | wqe->status != IB_WC_SUCCESS); |
459 | |
460 | if (post) |
461 | make_send_cqe(qp, wqe, cqe: &cqe); |
462 | |
463 | queue_advance_consumer(q: qp->sq.queue, type: QUEUE_TYPE_FROM_CLIENT); |
464 | |
465 | if (post) |
466 | rxe_cq_post(cq: qp->scq, cqe: &cqe, solicited: 0); |
467 | |
468 | if (wqe->wr.opcode == IB_WR_SEND || |
469 | wqe->wr.opcode == IB_WR_SEND_WITH_IMM || |
470 | wqe->wr.opcode == IB_WR_SEND_WITH_INV) |
471 | rxe_counter_inc(rxe, index: RXE_CNT_RDMA_SEND); |
472 | |
473 | /* |
474 | * we completed something so let req run again |
475 | * if it is trying to fence |
476 | */ |
477 | if (qp->req.wait_fence) { |
478 | qp->req.wait_fence = 0; |
479 | rxe_sched_task(task: &qp->req.task); |
480 | } |
481 | } |
482 | |
483 | static void comp_check_sq_drain_done(struct rxe_qp *qp) |
484 | { |
485 | unsigned long flags; |
486 | |
487 | spin_lock_irqsave(&qp->state_lock, flags); |
488 | if (unlikely(qp_state(qp) == IB_QPS_SQD)) { |
489 | if (qp->attr.sq_draining && qp->comp.psn == qp->req.psn) { |
490 | qp->attr.sq_draining = 0; |
491 | spin_unlock_irqrestore(lock: &qp->state_lock, flags); |
492 | |
493 | if (qp->ibqp.event_handler) { |
494 | struct ib_event ev; |
495 | |
496 | ev.device = qp->ibqp.device; |
497 | ev.element.qp = &qp->ibqp; |
498 | ev.event = IB_EVENT_SQ_DRAINED; |
499 | qp->ibqp.event_handler(&ev, |
500 | qp->ibqp.qp_context); |
501 | } |
502 | return; |
503 | } |
504 | } |
505 | spin_unlock_irqrestore(lock: &qp->state_lock, flags); |
506 | } |
507 | |
508 | static inline enum comp_state complete_ack(struct rxe_qp *qp, |
509 | struct rxe_pkt_info *pkt, |
510 | struct rxe_send_wqe *wqe) |
511 | { |
512 | if (wqe->has_rd_atomic) { |
513 | wqe->has_rd_atomic = 0; |
514 | atomic_inc(v: &qp->req.rd_atomic); |
515 | if (qp->req.need_rd_atomic) { |
516 | qp->comp.timeout_retry = 0; |
517 | qp->req.need_rd_atomic = 0; |
518 | rxe_sched_task(task: &qp->req.task); |
519 | } |
520 | } |
521 | |
522 | comp_check_sq_drain_done(qp); |
523 | |
524 | do_complete(qp, wqe); |
525 | |
526 | if (psn_compare(psn_a: pkt->psn, psn_b: qp->comp.psn) >= 0) |
527 | return COMPST_UPDATE_COMP; |
528 | else |
529 | return COMPST_DONE; |
530 | } |
531 | |
532 | static inline enum comp_state complete_wqe(struct rxe_qp *qp, |
533 | struct rxe_pkt_info *pkt, |
534 | struct rxe_send_wqe *wqe) |
535 | { |
536 | if (pkt && wqe->state == wqe_state_pending) { |
537 | if (psn_compare(psn_a: wqe->last_psn, psn_b: qp->comp.psn) >= 0) { |
538 | qp->comp.psn = (wqe->last_psn + 1) & BTH_PSN_MASK; |
539 | qp->comp.opcode = -1; |
540 | } |
541 | |
542 | if (qp->req.wait_psn) { |
543 | qp->req.wait_psn = 0; |
544 | rxe_sched_task(task: &qp->req.task); |
545 | } |
546 | } |
547 | |
548 | do_complete(qp, wqe); |
549 | |
550 | return COMPST_GET_WQE; |
551 | } |
552 | |
553 | /* drain incoming response packet queue */ |
554 | static void drain_resp_pkts(struct rxe_qp *qp) |
555 | { |
556 | struct sk_buff *skb; |
557 | |
558 | while ((skb = skb_dequeue(list: &qp->resp_pkts))) { |
559 | rxe_put(qp); |
560 | kfree_skb(skb); |
561 | ib_device_put(device: qp->ibqp.device); |
562 | } |
563 | } |
564 | |
565 | /* complete send wqe with flush error */ |
566 | static int flush_send_wqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe) |
567 | { |
568 | struct rxe_cqe cqe = {}; |
569 | struct ib_wc *wc = &cqe.ibwc; |
570 | struct ib_uverbs_wc *uwc = &cqe.uibwc; |
571 | int err; |
572 | |
573 | if (qp->is_user) { |
574 | uwc->wr_id = wqe->wr.wr_id; |
575 | uwc->status = IB_WC_WR_FLUSH_ERR; |
576 | uwc->qp_num = qp->ibqp.qp_num; |
577 | } else { |
578 | wc->wr_id = wqe->wr.wr_id; |
579 | wc->status = IB_WC_WR_FLUSH_ERR; |
580 | wc->qp = &qp->ibqp; |
581 | } |
582 | |
583 | err = rxe_cq_post(cq: qp->scq, cqe: &cqe, solicited: 0); |
584 | if (err) |
585 | rxe_dbg_cq(qp->scq, "post cq failed, err = %d\n" , err); |
586 | |
587 | return err; |
588 | } |
589 | |
590 | /* drain and optionally complete the send queue |
591 | * if unable to complete a wqe, i.e. cq is full, stop |
592 | * completing and flush the remaining wqes |
593 | */ |
594 | static void flush_send_queue(struct rxe_qp *qp, bool notify) |
595 | { |
596 | struct rxe_send_wqe *wqe; |
597 | struct rxe_queue *q = qp->sq.queue; |
598 | int err; |
599 | |
600 | /* send queue never got created. nothing to do. */ |
601 | if (!qp->sq.queue) |
602 | return; |
603 | |
604 | while ((wqe = queue_head(q, type: q->type))) { |
605 | if (notify) { |
606 | err = flush_send_wqe(qp, wqe); |
607 | if (err) |
608 | notify = 0; |
609 | } |
610 | queue_advance_consumer(q, type: q->type); |
611 | } |
612 | } |
613 | |
614 | static void free_pkt(struct rxe_pkt_info *pkt) |
615 | { |
616 | struct sk_buff *skb = PKT_TO_SKB(pkt); |
617 | struct rxe_qp *qp = pkt->qp; |
618 | struct ib_device *dev = qp->ibqp.device; |
619 | |
620 | kfree_skb(skb); |
621 | rxe_put(qp); |
622 | ib_device_put(device: dev); |
623 | } |
624 | |
625 | /* reset the retry timer if |
626 | * - QP is type RC |
627 | * - there is a packet sent by the requester that |
628 | * might be acked (we still might get spurious |
629 | * timeouts but try to keep them as few as possible) |
630 | * - the timeout parameter is set |
631 | * - the QP is alive |
632 | */ |
633 | static void reset_retry_timer(struct rxe_qp *qp) |
634 | { |
635 | unsigned long flags; |
636 | |
637 | if (qp_type(qp) == IB_QPT_RC && qp->qp_timeout_jiffies) { |
638 | spin_lock_irqsave(&qp->state_lock, flags); |
639 | if (qp_state(qp) >= IB_QPS_RTS && |
640 | psn_compare(psn_a: qp->req.psn, psn_b: qp->comp.psn) > 0) |
641 | mod_timer(timer: &qp->retrans_timer, |
642 | expires: jiffies + qp->qp_timeout_jiffies); |
643 | spin_unlock_irqrestore(lock: &qp->state_lock, flags); |
644 | } |
645 | } |
646 | |
647 | int rxe_completer(struct rxe_qp *qp) |
648 | { |
649 | struct rxe_dev *rxe = to_rdev(dev: qp->ibqp.device); |
650 | struct rxe_send_wqe *wqe = NULL; |
651 | struct sk_buff *skb = NULL; |
652 | struct rxe_pkt_info *pkt = NULL; |
653 | enum comp_state state; |
654 | int ret; |
655 | unsigned long flags; |
656 | |
657 | spin_lock_irqsave(&qp->state_lock, flags); |
658 | if (!qp->valid || qp_state(qp) == IB_QPS_ERR || |
659 | qp_state(qp) == IB_QPS_RESET) { |
660 | bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR); |
661 | |
662 | drain_resp_pkts(qp); |
663 | flush_send_queue(qp, notify); |
664 | spin_unlock_irqrestore(lock: &qp->state_lock, flags); |
665 | goto exit; |
666 | } |
667 | spin_unlock_irqrestore(lock: &qp->state_lock, flags); |
668 | |
669 | if (qp->comp.timeout) { |
670 | qp->comp.timeout_retry = 1; |
671 | qp->comp.timeout = 0; |
672 | } else { |
673 | qp->comp.timeout_retry = 0; |
674 | } |
675 | |
676 | if (qp->req.need_retry) |
677 | goto exit; |
678 | |
679 | state = COMPST_GET_ACK; |
680 | |
681 | while (1) { |
682 | rxe_dbg_qp(qp, "state = %s\n" , comp_state_name[state]); |
683 | switch (state) { |
684 | case COMPST_GET_ACK: |
685 | skb = skb_dequeue(list: &qp->resp_pkts); |
686 | if (skb) { |
687 | pkt = SKB_TO_PKT(skb); |
688 | qp->comp.timeout_retry = 0; |
689 | } |
690 | state = COMPST_GET_WQE; |
691 | break; |
692 | |
693 | case COMPST_GET_WQE: |
694 | state = get_wqe(qp, pkt, wqe_p: &wqe); |
695 | break; |
696 | |
697 | case COMPST_CHECK_PSN: |
698 | state = check_psn(qp, pkt, wqe); |
699 | break; |
700 | |
701 | case COMPST_CHECK_ACK: |
702 | state = check_ack(qp, pkt, wqe); |
703 | break; |
704 | |
705 | case COMPST_READ: |
706 | state = do_read(qp, pkt, wqe); |
707 | break; |
708 | |
709 | case COMPST_ATOMIC: |
710 | state = do_atomic(qp, pkt, wqe); |
711 | break; |
712 | |
713 | case COMPST_WRITE_SEND: |
714 | if (wqe->state == wqe_state_pending && |
715 | wqe->last_psn == pkt->psn) |
716 | state = COMPST_COMP_ACK; |
717 | else |
718 | state = COMPST_UPDATE_COMP; |
719 | break; |
720 | |
721 | case COMPST_COMP_ACK: |
722 | state = complete_ack(qp, pkt, wqe); |
723 | break; |
724 | |
725 | case COMPST_COMP_WQE: |
726 | state = complete_wqe(qp, pkt, wqe); |
727 | break; |
728 | |
729 | case COMPST_UPDATE_COMP: |
730 | if (pkt->mask & RXE_END_MASK) |
731 | qp->comp.opcode = -1; |
732 | else |
733 | qp->comp.opcode = pkt->opcode; |
734 | |
735 | if (psn_compare(psn_a: pkt->psn, psn_b: qp->comp.psn) >= 0) |
736 | qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK; |
737 | |
738 | if (qp->req.wait_psn) { |
739 | qp->req.wait_psn = 0; |
740 | rxe_sched_task(task: &qp->req.task); |
741 | } |
742 | |
743 | state = COMPST_DONE; |
744 | break; |
745 | |
746 | case COMPST_DONE: |
747 | goto done; |
748 | |
749 | case COMPST_EXIT: |
750 | if (qp->comp.timeout_retry && wqe) { |
751 | state = COMPST_ERROR_RETRY; |
752 | break; |
753 | } |
754 | |
755 | reset_retry_timer(qp); |
756 | goto exit; |
757 | |
758 | case COMPST_ERROR_RETRY: |
759 | /* we come here if the retry timer fired and we did |
760 | * not receive a response packet. try to retry the send |
761 | * queue if that makes sense and the limits have not |
762 | * been exceeded. remember that some timeouts are |
763 | * spurious since we do not reset the timer but kick |
764 | * it down the road or let it expire |
765 | */ |
766 | |
767 | /* there is nothing to retry in this case */ |
768 | if (!wqe || (wqe->state == wqe_state_posted)) |
769 | goto exit; |
770 | |
771 | /* if we've started a retry, don't start another |
772 | * retry sequence, unless this is a timeout. |
773 | */ |
774 | if (qp->comp.started_retry && |
775 | !qp->comp.timeout_retry) |
776 | goto done; |
777 | |
778 | if (qp->comp.retry_cnt > 0) { |
779 | if (qp->comp.retry_cnt != 7) |
780 | qp->comp.retry_cnt--; |
781 | |
782 | /* no point in retrying if we have already |
783 | * seen the last ack that the requester could |
784 | * have caused |
785 | */ |
786 | if (psn_compare(psn_a: qp->req.psn, |
787 | psn_b: qp->comp.psn) > 0) { |
788 | /* tell the requester to retry the |
789 | * send queue next time around |
790 | */ |
791 | rxe_counter_inc(rxe, |
792 | index: RXE_CNT_COMP_RETRY); |
793 | qp->req.need_retry = 1; |
794 | qp->comp.started_retry = 1; |
795 | rxe_sched_task(task: &qp->req.task); |
796 | } |
797 | goto done; |
798 | |
799 | } else { |
800 | rxe_counter_inc(rxe, index: RXE_CNT_RETRY_EXCEEDED); |
801 | wqe->status = IB_WC_RETRY_EXC_ERR; |
802 | state = COMPST_ERROR; |
803 | } |
804 | break; |
805 | |
806 | case COMPST_RNR_RETRY: |
807 | /* we come here if we received an RNR NAK */ |
808 | if (qp->comp.rnr_retry > 0) { |
809 | if (qp->comp.rnr_retry != 7) |
810 | qp->comp.rnr_retry--; |
811 | |
812 | /* don't start a retry flow until the |
813 | * rnr timer has fired |
814 | */ |
815 | qp->req.wait_for_rnr_timer = 1; |
816 | rxe_dbg_qp(qp, "set rnr nak timer\n" ); |
817 | // TODO who protects from destroy_qp?? |
818 | mod_timer(timer: &qp->rnr_nak_timer, |
819 | expires: jiffies + rnrnak_jiffies(timeout: aeth_syn(pkt) |
820 | & ~AETH_TYPE_MASK)); |
821 | goto exit; |
822 | } else { |
823 | rxe_counter_inc(rxe, |
824 | index: RXE_CNT_RNR_RETRY_EXCEEDED); |
825 | wqe->status = IB_WC_RNR_RETRY_EXC_ERR; |
826 | state = COMPST_ERROR; |
827 | } |
828 | break; |
829 | |
830 | case COMPST_ERROR: |
831 | WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS); |
832 | do_complete(qp, wqe); |
833 | rxe_qp_error(qp); |
834 | goto exit; |
835 | } |
836 | } |
837 | |
838 | /* A non-zero return value will cause rxe_do_task to |
839 | * exit its loop and end the work item. A zero return |
840 | * will continue looping and return to rxe_completer |
841 | */ |
842 | done: |
843 | ret = 0; |
844 | goto out; |
845 | exit: |
846 | ret = -EAGAIN; |
847 | out: |
848 | if (pkt) |
849 | free_pkt(pkt); |
850 | return ret; |
851 | } |
852 | |