1 | // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB |
2 | /* |
3 | * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. |
4 | * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. |
5 | */ |
6 | |
7 | #include <linux/skbuff.h> |
8 | #include <crypto/hash.h> |
9 | |
10 | #include "rxe.h" |
11 | #include "rxe_loc.h" |
12 | #include "rxe_queue.h" |
13 | |
14 | static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, |
15 | u32 opcode); |
16 | |
17 | static inline void retry_first_write_send(struct rxe_qp *qp, |
18 | struct rxe_send_wqe *wqe, int npsn) |
19 | { |
20 | int i; |
21 | |
22 | for (i = 0; i < npsn; i++) { |
23 | int to_send = (wqe->dma.resid > qp->mtu) ? |
24 | qp->mtu : wqe->dma.resid; |
25 | |
26 | qp->req.opcode = next_opcode(qp, wqe, |
27 | opcode: wqe->wr.opcode); |
28 | |
29 | if (wqe->wr.send_flags & IB_SEND_INLINE) { |
30 | wqe->dma.resid -= to_send; |
31 | wqe->dma.sge_offset += to_send; |
32 | } else { |
33 | advance_dma_data(dma: &wqe->dma, length: to_send); |
34 | } |
35 | } |
36 | } |
37 | |
38 | static void req_retry(struct rxe_qp *qp) |
39 | { |
40 | struct rxe_send_wqe *wqe; |
41 | unsigned int wqe_index; |
42 | unsigned int mask; |
43 | int npsn; |
44 | int first = 1; |
45 | struct rxe_queue *q = qp->sq.queue; |
46 | unsigned int cons; |
47 | unsigned int prod; |
48 | |
49 | cons = queue_get_consumer(q, type: QUEUE_TYPE_FROM_CLIENT); |
50 | prod = queue_get_producer(q, type: QUEUE_TYPE_FROM_CLIENT); |
51 | |
52 | qp->req.wqe_index = cons; |
53 | qp->req.psn = qp->comp.psn; |
54 | qp->req.opcode = -1; |
55 | |
56 | for (wqe_index = cons; wqe_index != prod; |
57 | wqe_index = queue_next_index(q, index: wqe_index)) { |
58 | wqe = queue_addr_from_index(q: qp->sq.queue, index: wqe_index); |
59 | mask = wr_opcode_mask(opcode: wqe->wr.opcode, qp); |
60 | |
61 | if (wqe->state == wqe_state_posted) |
62 | break; |
63 | |
64 | if (wqe->state == wqe_state_done) |
65 | continue; |
66 | |
67 | wqe->iova = (mask & WR_ATOMIC_MASK) ? |
68 | wqe->wr.wr.atomic.remote_addr : |
69 | (mask & WR_READ_OR_WRITE_MASK) ? |
70 | wqe->wr.wr.rdma.remote_addr : |
71 | 0; |
72 | |
73 | if (!first || (mask & WR_READ_MASK) == 0) { |
74 | wqe->dma.resid = wqe->dma.length; |
75 | wqe->dma.cur_sge = 0; |
76 | wqe->dma.sge_offset = 0; |
77 | } |
78 | |
79 | if (first) { |
80 | first = 0; |
81 | |
82 | if (mask & WR_WRITE_OR_SEND_MASK) { |
83 | npsn = (qp->comp.psn - wqe->first_psn) & |
84 | BTH_PSN_MASK; |
85 | retry_first_write_send(qp, wqe, npsn); |
86 | } |
87 | |
88 | if (mask & WR_READ_MASK) { |
89 | npsn = (wqe->dma.length - wqe->dma.resid) / |
90 | qp->mtu; |
91 | wqe->iova += npsn * qp->mtu; |
92 | } |
93 | } |
94 | |
95 | wqe->state = wqe_state_posted; |
96 | } |
97 | } |
98 | |
99 | void rnr_nak_timer(struct timer_list *t) |
100 | { |
101 | struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer); |
102 | unsigned long flags; |
103 | |
104 | rxe_dbg_qp(qp, "nak timer fired\n" ); |
105 | |
106 | spin_lock_irqsave(&qp->state_lock, flags); |
107 | if (qp->valid) { |
108 | /* request a send queue retry */ |
109 | qp->req.need_retry = 1; |
110 | qp->req.wait_for_rnr_timer = 0; |
111 | rxe_sched_task(task: &qp->req.task); |
112 | } |
113 | spin_unlock_irqrestore(lock: &qp->state_lock, flags); |
114 | } |
115 | |
116 | static void req_check_sq_drain_done(struct rxe_qp *qp) |
117 | { |
118 | struct rxe_queue *q; |
119 | unsigned int index; |
120 | unsigned int cons; |
121 | struct rxe_send_wqe *wqe; |
122 | unsigned long flags; |
123 | |
124 | spin_lock_irqsave(&qp->state_lock, flags); |
125 | if (qp_state(qp) == IB_QPS_SQD) { |
126 | q = qp->sq.queue; |
127 | index = qp->req.wqe_index; |
128 | cons = queue_get_consumer(q, type: QUEUE_TYPE_FROM_CLIENT); |
129 | wqe = queue_addr_from_index(q, index: cons); |
130 | |
131 | /* check to see if we are drained; |
132 | * state_lock used by requester and completer |
133 | */ |
134 | do { |
135 | if (!qp->attr.sq_draining) |
136 | /* comp just finished */ |
137 | break; |
138 | |
139 | if (wqe && ((index != cons) || |
140 | (wqe->state != wqe_state_posted))) |
141 | /* comp not done yet */ |
142 | break; |
143 | |
144 | qp->attr.sq_draining = 0; |
145 | spin_unlock_irqrestore(lock: &qp->state_lock, flags); |
146 | |
147 | if (qp->ibqp.event_handler) { |
148 | struct ib_event ev; |
149 | |
150 | ev.device = qp->ibqp.device; |
151 | ev.element.qp = &qp->ibqp; |
152 | ev.event = IB_EVENT_SQ_DRAINED; |
153 | qp->ibqp.event_handler(&ev, |
154 | qp->ibqp.qp_context); |
155 | } |
156 | return; |
157 | } while (0); |
158 | } |
159 | spin_unlock_irqrestore(lock: &qp->state_lock, flags); |
160 | } |
161 | |
162 | static struct rxe_send_wqe *__req_next_wqe(struct rxe_qp *qp) |
163 | { |
164 | struct rxe_queue *q = qp->sq.queue; |
165 | unsigned int index = qp->req.wqe_index; |
166 | unsigned int prod; |
167 | |
168 | prod = queue_get_producer(q, type: QUEUE_TYPE_FROM_CLIENT); |
169 | if (index == prod) |
170 | return NULL; |
171 | else |
172 | return queue_addr_from_index(q, index); |
173 | } |
174 | |
175 | static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) |
176 | { |
177 | struct rxe_send_wqe *wqe; |
178 | unsigned long flags; |
179 | |
180 | req_check_sq_drain_done(qp); |
181 | |
182 | wqe = __req_next_wqe(qp); |
183 | if (wqe == NULL) |
184 | return NULL; |
185 | |
186 | spin_lock_irqsave(&qp->state_lock, flags); |
187 | if (unlikely((qp_state(qp) == IB_QPS_SQD) && |
188 | (wqe->state != wqe_state_processing))) { |
189 | spin_unlock_irqrestore(lock: &qp->state_lock, flags); |
190 | return NULL; |
191 | } |
192 | spin_unlock_irqrestore(lock: &qp->state_lock, flags); |
193 | |
194 | wqe->mask = wr_opcode_mask(opcode: wqe->wr.opcode, qp); |
195 | return wqe; |
196 | } |
197 | |
198 | /** |
199 | * rxe_wqe_is_fenced - check if next wqe is fenced |
200 | * @qp: the queue pair |
201 | * @wqe: the next wqe |
202 | * |
203 | * Returns: 1 if wqe needs to wait |
204 | * 0 if wqe is ready to go |
205 | */ |
206 | static int rxe_wqe_is_fenced(struct rxe_qp *qp, struct rxe_send_wqe *wqe) |
207 | { |
208 | /* Local invalidate fence (LIF) see IBA 10.6.5.1 |
209 | * Requires ALL previous operations on the send queue |
210 | * are complete. Make mandatory for the rxe driver. |
211 | */ |
212 | if (wqe->wr.opcode == IB_WR_LOCAL_INV) |
213 | return qp->req.wqe_index != queue_get_consumer(q: qp->sq.queue, |
214 | type: QUEUE_TYPE_FROM_CLIENT); |
215 | |
216 | /* Fence see IBA 10.8.3.3 |
217 | * Requires that all previous read and atomic operations |
218 | * are complete. |
219 | */ |
220 | return (wqe->wr.send_flags & IB_SEND_FENCE) && |
221 | atomic_read(v: &qp->req.rd_atomic) != qp->attr.max_rd_atomic; |
222 | } |
223 | |
224 | static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits) |
225 | { |
226 | switch (opcode) { |
227 | case IB_WR_RDMA_WRITE: |
228 | if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST || |
229 | qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE) |
230 | return fits ? |
231 | IB_OPCODE_RC_RDMA_WRITE_LAST : |
232 | IB_OPCODE_RC_RDMA_WRITE_MIDDLE; |
233 | else |
234 | return fits ? |
235 | IB_OPCODE_RC_RDMA_WRITE_ONLY : |
236 | IB_OPCODE_RC_RDMA_WRITE_FIRST; |
237 | |
238 | case IB_WR_RDMA_WRITE_WITH_IMM: |
239 | if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST || |
240 | qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE) |
241 | return fits ? |
242 | IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE : |
243 | IB_OPCODE_RC_RDMA_WRITE_MIDDLE; |
244 | else |
245 | return fits ? |
246 | IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE : |
247 | IB_OPCODE_RC_RDMA_WRITE_FIRST; |
248 | |
249 | case IB_WR_SEND: |
250 | if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST || |
251 | qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE) |
252 | return fits ? |
253 | IB_OPCODE_RC_SEND_LAST : |
254 | IB_OPCODE_RC_SEND_MIDDLE; |
255 | else |
256 | return fits ? |
257 | IB_OPCODE_RC_SEND_ONLY : |
258 | IB_OPCODE_RC_SEND_FIRST; |
259 | |
260 | case IB_WR_SEND_WITH_IMM: |
261 | if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST || |
262 | qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE) |
263 | return fits ? |
264 | IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE : |
265 | IB_OPCODE_RC_SEND_MIDDLE; |
266 | else |
267 | return fits ? |
268 | IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE : |
269 | IB_OPCODE_RC_SEND_FIRST; |
270 | |
271 | case IB_WR_FLUSH: |
272 | return IB_OPCODE_RC_FLUSH; |
273 | |
274 | case IB_WR_RDMA_READ: |
275 | return IB_OPCODE_RC_RDMA_READ_REQUEST; |
276 | |
277 | case IB_WR_ATOMIC_CMP_AND_SWP: |
278 | return IB_OPCODE_RC_COMPARE_SWAP; |
279 | |
280 | case IB_WR_ATOMIC_FETCH_AND_ADD: |
281 | return IB_OPCODE_RC_FETCH_ADD; |
282 | |
283 | case IB_WR_SEND_WITH_INV: |
284 | if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST || |
285 | qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE) |
286 | return fits ? IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE : |
287 | IB_OPCODE_RC_SEND_MIDDLE; |
288 | else |
289 | return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE : |
290 | IB_OPCODE_RC_SEND_FIRST; |
291 | |
292 | case IB_WR_ATOMIC_WRITE: |
293 | return IB_OPCODE_RC_ATOMIC_WRITE; |
294 | |
295 | case IB_WR_REG_MR: |
296 | case IB_WR_LOCAL_INV: |
297 | return opcode; |
298 | } |
299 | |
300 | return -EINVAL; |
301 | } |
302 | |
303 | static int next_opcode_uc(struct rxe_qp *qp, u32 opcode, int fits) |
304 | { |
305 | switch (opcode) { |
306 | case IB_WR_RDMA_WRITE: |
307 | if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST || |
308 | qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE) |
309 | return fits ? |
310 | IB_OPCODE_UC_RDMA_WRITE_LAST : |
311 | IB_OPCODE_UC_RDMA_WRITE_MIDDLE; |
312 | else |
313 | return fits ? |
314 | IB_OPCODE_UC_RDMA_WRITE_ONLY : |
315 | IB_OPCODE_UC_RDMA_WRITE_FIRST; |
316 | |
317 | case IB_WR_RDMA_WRITE_WITH_IMM: |
318 | if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST || |
319 | qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE) |
320 | return fits ? |
321 | IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE : |
322 | IB_OPCODE_UC_RDMA_WRITE_MIDDLE; |
323 | else |
324 | return fits ? |
325 | IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE : |
326 | IB_OPCODE_UC_RDMA_WRITE_FIRST; |
327 | |
328 | case IB_WR_SEND: |
329 | if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST || |
330 | qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE) |
331 | return fits ? |
332 | IB_OPCODE_UC_SEND_LAST : |
333 | IB_OPCODE_UC_SEND_MIDDLE; |
334 | else |
335 | return fits ? |
336 | IB_OPCODE_UC_SEND_ONLY : |
337 | IB_OPCODE_UC_SEND_FIRST; |
338 | |
339 | case IB_WR_SEND_WITH_IMM: |
340 | if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST || |
341 | qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE) |
342 | return fits ? |
343 | IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE : |
344 | IB_OPCODE_UC_SEND_MIDDLE; |
345 | else |
346 | return fits ? |
347 | IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE : |
348 | IB_OPCODE_UC_SEND_FIRST; |
349 | } |
350 | |
351 | return -EINVAL; |
352 | } |
353 | |
354 | static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, |
355 | u32 opcode) |
356 | { |
357 | int fits = (wqe->dma.resid <= qp->mtu); |
358 | |
359 | switch (qp_type(qp)) { |
360 | case IB_QPT_RC: |
361 | return next_opcode_rc(qp, opcode, fits); |
362 | |
363 | case IB_QPT_UC: |
364 | return next_opcode_uc(qp, opcode, fits); |
365 | |
366 | case IB_QPT_UD: |
367 | case IB_QPT_GSI: |
368 | switch (opcode) { |
369 | case IB_WR_SEND: |
370 | return IB_OPCODE_UD_SEND_ONLY; |
371 | |
372 | case IB_WR_SEND_WITH_IMM: |
373 | return IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; |
374 | } |
375 | break; |
376 | |
377 | default: |
378 | break; |
379 | } |
380 | |
381 | return -EINVAL; |
382 | } |
383 | |
384 | static inline int check_init_depth(struct rxe_qp *qp, struct rxe_send_wqe *wqe) |
385 | { |
386 | int depth; |
387 | |
388 | if (wqe->has_rd_atomic) |
389 | return 0; |
390 | |
391 | qp->req.need_rd_atomic = 1; |
392 | depth = atomic_dec_return(v: &qp->req.rd_atomic); |
393 | |
394 | if (depth >= 0) { |
395 | qp->req.need_rd_atomic = 0; |
396 | wqe->has_rd_atomic = 1; |
397 | return 0; |
398 | } |
399 | |
400 | atomic_inc(v: &qp->req.rd_atomic); |
401 | return -EAGAIN; |
402 | } |
403 | |
404 | static inline int get_mtu(struct rxe_qp *qp) |
405 | { |
406 | struct rxe_dev *rxe = to_rdev(dev: qp->ibqp.device); |
407 | |
408 | if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC)) |
409 | return qp->mtu; |
410 | |
411 | return rxe->port.mtu_cap; |
412 | } |
413 | |
414 | static struct sk_buff *init_req_packet(struct rxe_qp *qp, |
415 | struct rxe_av *av, |
416 | struct rxe_send_wqe *wqe, |
417 | int opcode, u32 payload, |
418 | struct rxe_pkt_info *pkt) |
419 | { |
420 | struct rxe_dev *rxe = to_rdev(dev: qp->ibqp.device); |
421 | struct sk_buff *skb; |
422 | struct rxe_send_wr *ibwr = &wqe->wr; |
423 | int pad = (-payload) & 0x3; |
424 | int paylen; |
425 | int solicited; |
426 | u32 qp_num; |
427 | int ack_req; |
428 | |
429 | /* length from start of bth to end of icrc */ |
430 | paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; |
431 | pkt->paylen = paylen; |
432 | |
433 | /* init skb */ |
434 | skb = rxe_init_packet(rxe, av, paylen, pkt); |
435 | if (unlikely(!skb)) |
436 | return NULL; |
437 | |
438 | /* init bth */ |
439 | solicited = (ibwr->send_flags & IB_SEND_SOLICITED) && |
440 | (pkt->mask & RXE_END_MASK) && |
441 | ((pkt->mask & (RXE_SEND_MASK)) || |
442 | (pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) == |
443 | (RXE_WRITE_MASK | RXE_IMMDT_MASK)); |
444 | |
445 | qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn : |
446 | qp->attr.dest_qp_num; |
447 | |
448 | ack_req = ((pkt->mask & RXE_END_MASK) || |
449 | (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK)); |
450 | if (ack_req) |
451 | qp->req.noack_pkts = 0; |
452 | |
453 | bth_init(pkt, opcode: pkt->opcode, se: solicited, mig: 0, pad, IB_DEFAULT_PKEY_FULL, qpn: qp_num, |
454 | ack_req, psn: pkt->psn); |
455 | |
456 | /* init optional headers */ |
457 | if (pkt->mask & RXE_RETH_MASK) { |
458 | if (pkt->mask & RXE_FETH_MASK) |
459 | reth_set_rkey(pkt, rkey: ibwr->wr.flush.rkey); |
460 | else |
461 | reth_set_rkey(pkt, rkey: ibwr->wr.rdma.rkey); |
462 | reth_set_va(pkt, va: wqe->iova); |
463 | reth_set_len(pkt, len: wqe->dma.resid); |
464 | } |
465 | |
466 | /* Fill Flush Extension Transport Header */ |
467 | if (pkt->mask & RXE_FETH_MASK) |
468 | feth_init(pkt, type: ibwr->wr.flush.type, level: ibwr->wr.flush.level); |
469 | |
470 | if (pkt->mask & RXE_IMMDT_MASK) |
471 | immdt_set_imm(pkt, imm: ibwr->ex.imm_data); |
472 | |
473 | if (pkt->mask & RXE_IETH_MASK) |
474 | ieth_set_rkey(pkt, rkey: ibwr->ex.invalidate_rkey); |
475 | |
476 | if (pkt->mask & RXE_ATMETH_MASK) { |
477 | atmeth_set_va(pkt, va: wqe->iova); |
478 | if (opcode == IB_OPCODE_RC_COMPARE_SWAP) { |
479 | atmeth_set_swap_add(pkt, swap_add: ibwr->wr.atomic.swap); |
480 | atmeth_set_comp(pkt, comp: ibwr->wr.atomic.compare_add); |
481 | } else { |
482 | atmeth_set_swap_add(pkt, swap_add: ibwr->wr.atomic.compare_add); |
483 | } |
484 | atmeth_set_rkey(pkt, rkey: ibwr->wr.atomic.rkey); |
485 | } |
486 | |
487 | if (pkt->mask & RXE_DETH_MASK) { |
488 | if (qp->ibqp.qp_num == 1) |
489 | deth_set_qkey(pkt, GSI_QKEY); |
490 | else |
491 | deth_set_qkey(pkt, qkey: ibwr->wr.ud.remote_qkey); |
492 | deth_set_sqp(pkt, sqp: qp->ibqp.qp_num); |
493 | } |
494 | |
495 | return skb; |
496 | } |
497 | |
498 | static int finish_packet(struct rxe_qp *qp, struct rxe_av *av, |
499 | struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt, |
500 | struct sk_buff *skb, u32 payload) |
501 | { |
502 | int err; |
503 | |
504 | err = rxe_prepare(av, pkt, skb); |
505 | if (err) |
506 | return err; |
507 | |
508 | if (pkt->mask & RXE_WRITE_OR_SEND_MASK) { |
509 | if (wqe->wr.send_flags & IB_SEND_INLINE) { |
510 | u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset]; |
511 | |
512 | memcpy(payload_addr(pkt), tmp, payload); |
513 | |
514 | wqe->dma.resid -= payload; |
515 | wqe->dma.sge_offset += payload; |
516 | } else { |
517 | err = copy_data(pd: qp->pd, access: 0, dma: &wqe->dma, |
518 | addr: payload_addr(pkt), length: payload, |
519 | dir: RXE_FROM_MR_OBJ); |
520 | if (err) |
521 | return err; |
522 | } |
523 | if (bth_pad(pkt)) { |
524 | u8 *pad = payload_addr(pkt) + payload; |
525 | |
526 | memset(pad, 0, bth_pad(pkt)); |
527 | } |
528 | } else if (pkt->mask & RXE_FLUSH_MASK) { |
529 | /* oA19-2: shall have no payload. */ |
530 | wqe->dma.resid = 0; |
531 | } |
532 | |
533 | if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { |
534 | memcpy(payload_addr(pkt), wqe->dma.atomic_wr, payload); |
535 | wqe->dma.resid -= payload; |
536 | } |
537 | |
538 | return 0; |
539 | } |
540 | |
541 | static void update_wqe_state(struct rxe_qp *qp, |
542 | struct rxe_send_wqe *wqe, |
543 | struct rxe_pkt_info *pkt) |
544 | { |
545 | if (pkt->mask & RXE_END_MASK) { |
546 | if (qp_type(qp) == IB_QPT_RC) |
547 | wqe->state = wqe_state_pending; |
548 | } else { |
549 | wqe->state = wqe_state_processing; |
550 | } |
551 | } |
552 | |
553 | static void update_wqe_psn(struct rxe_qp *qp, |
554 | struct rxe_send_wqe *wqe, |
555 | struct rxe_pkt_info *pkt, |
556 | u32 payload) |
557 | { |
558 | /* number of packets left to send including current one */ |
559 | int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; |
560 | |
561 | /* handle zero length packet case */ |
562 | if (num_pkt == 0) |
563 | num_pkt = 1; |
564 | |
565 | if (pkt->mask & RXE_START_MASK) { |
566 | wqe->first_psn = qp->req.psn; |
567 | wqe->last_psn = (qp->req.psn + num_pkt - 1) & BTH_PSN_MASK; |
568 | } |
569 | |
570 | if (pkt->mask & RXE_READ_MASK) |
571 | qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK; |
572 | else |
573 | qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; |
574 | } |
575 | |
576 | static void save_state(struct rxe_send_wqe *wqe, |
577 | struct rxe_qp *qp, |
578 | struct rxe_send_wqe *rollback_wqe, |
579 | u32 *rollback_psn) |
580 | { |
581 | rollback_wqe->state = wqe->state; |
582 | rollback_wqe->first_psn = wqe->first_psn; |
583 | rollback_wqe->last_psn = wqe->last_psn; |
584 | rollback_wqe->dma = wqe->dma; |
585 | *rollback_psn = qp->req.psn; |
586 | } |
587 | |
588 | static void rollback_state(struct rxe_send_wqe *wqe, |
589 | struct rxe_qp *qp, |
590 | struct rxe_send_wqe *rollback_wqe, |
591 | u32 rollback_psn) |
592 | { |
593 | wqe->state = rollback_wqe->state; |
594 | wqe->first_psn = rollback_wqe->first_psn; |
595 | wqe->last_psn = rollback_wqe->last_psn; |
596 | wqe->dma = rollback_wqe->dma; |
597 | qp->req.psn = rollback_psn; |
598 | } |
599 | |
600 | static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt) |
601 | { |
602 | qp->req.opcode = pkt->opcode; |
603 | |
604 | if (pkt->mask & RXE_END_MASK) |
605 | qp->req.wqe_index = queue_next_index(q: qp->sq.queue, |
606 | index: qp->req.wqe_index); |
607 | |
608 | qp->need_req_skb = 0; |
609 | |
610 | if (qp->qp_timeout_jiffies && !timer_pending(timer: &qp->retrans_timer)) |
611 | mod_timer(timer: &qp->retrans_timer, |
612 | expires: jiffies + qp->qp_timeout_jiffies); |
613 | } |
614 | |
615 | static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) |
616 | { |
617 | u8 opcode = wqe->wr.opcode; |
618 | u32 rkey; |
619 | int ret; |
620 | |
621 | switch (opcode) { |
622 | case IB_WR_LOCAL_INV: |
623 | rkey = wqe->wr.ex.invalidate_rkey; |
624 | if (rkey_is_mw(rkey)) |
625 | ret = rxe_invalidate_mw(qp, rkey); |
626 | else |
627 | ret = rxe_invalidate_mr(qp, key: rkey); |
628 | |
629 | if (unlikely(ret)) { |
630 | wqe->status = IB_WC_LOC_QP_OP_ERR; |
631 | return ret; |
632 | } |
633 | break; |
634 | case IB_WR_REG_MR: |
635 | ret = rxe_reg_fast_mr(qp, wqe); |
636 | if (unlikely(ret)) { |
637 | wqe->status = IB_WC_LOC_QP_OP_ERR; |
638 | return ret; |
639 | } |
640 | break; |
641 | case IB_WR_BIND_MW: |
642 | ret = rxe_bind_mw(qp, wqe); |
643 | if (unlikely(ret)) { |
644 | wqe->status = IB_WC_MW_BIND_ERR; |
645 | return ret; |
646 | } |
647 | break; |
648 | default: |
649 | rxe_dbg_qp(qp, "Unexpected send wqe opcode %d\n" , opcode); |
650 | wqe->status = IB_WC_LOC_QP_OP_ERR; |
651 | return -EINVAL; |
652 | } |
653 | |
654 | wqe->state = wqe_state_done; |
655 | wqe->status = IB_WC_SUCCESS; |
656 | qp->req.wqe_index = queue_next_index(q: qp->sq.queue, index: qp->req.wqe_index); |
657 | |
658 | /* There is no ack coming for local work requests |
659 | * which can lead to a deadlock. So go ahead and complete |
660 | * it now. |
661 | */ |
662 | rxe_sched_task(task: &qp->comp.task); |
663 | |
664 | return 0; |
665 | } |
666 | |
667 | int rxe_requester(struct rxe_qp *qp) |
668 | { |
669 | struct rxe_dev *rxe = to_rdev(dev: qp->ibqp.device); |
670 | struct rxe_pkt_info pkt; |
671 | struct sk_buff *skb; |
672 | struct rxe_send_wqe *wqe; |
673 | enum rxe_hdr_mask mask; |
674 | u32 payload; |
675 | int mtu; |
676 | int opcode; |
677 | int err; |
678 | int ret; |
679 | struct rxe_send_wqe rollback_wqe; |
680 | u32 rollback_psn; |
681 | struct rxe_queue *q = qp->sq.queue; |
682 | struct rxe_ah *ah; |
683 | struct rxe_av *av; |
684 | unsigned long flags; |
685 | |
686 | spin_lock_irqsave(&qp->state_lock, flags); |
687 | if (unlikely(!qp->valid)) { |
688 | spin_unlock_irqrestore(lock: &qp->state_lock, flags); |
689 | goto exit; |
690 | } |
691 | |
692 | if (unlikely(qp_state(qp) == IB_QPS_ERR)) { |
693 | wqe = __req_next_wqe(qp); |
694 | spin_unlock_irqrestore(lock: &qp->state_lock, flags); |
695 | if (wqe) |
696 | goto err; |
697 | else |
698 | goto exit; |
699 | } |
700 | |
701 | if (unlikely(qp_state(qp) == IB_QPS_RESET)) { |
702 | qp->req.wqe_index = queue_get_consumer(q, |
703 | type: QUEUE_TYPE_FROM_CLIENT); |
704 | qp->req.opcode = -1; |
705 | qp->req.need_rd_atomic = 0; |
706 | qp->req.wait_psn = 0; |
707 | qp->req.need_retry = 0; |
708 | qp->req.wait_for_rnr_timer = 0; |
709 | spin_unlock_irqrestore(lock: &qp->state_lock, flags); |
710 | goto exit; |
711 | } |
712 | spin_unlock_irqrestore(lock: &qp->state_lock, flags); |
713 | |
714 | /* we come here if the retransmit timer has fired |
715 | * or if the rnr timer has fired. If the retransmit |
716 | * timer fires while we are processing an RNR NAK wait |
717 | * until the rnr timer has fired before starting the |
718 | * retry flow |
719 | */ |
720 | if (unlikely(qp->req.need_retry && !qp->req.wait_for_rnr_timer)) { |
721 | req_retry(qp); |
722 | qp->req.need_retry = 0; |
723 | } |
724 | |
725 | wqe = req_next_wqe(qp); |
726 | if (unlikely(!wqe)) |
727 | goto exit; |
728 | |
729 | if (rxe_wqe_is_fenced(qp, wqe)) { |
730 | qp->req.wait_fence = 1; |
731 | goto exit; |
732 | } |
733 | |
734 | if (wqe->mask & WR_LOCAL_OP_MASK) { |
735 | err = rxe_do_local_ops(qp, wqe); |
736 | if (unlikely(err)) |
737 | goto err; |
738 | else |
739 | goto done; |
740 | } |
741 | |
742 | if (unlikely(qp_type(qp) == IB_QPT_RC && |
743 | psn_compare(qp->req.psn, (qp->comp.psn + |
744 | RXE_MAX_UNACKED_PSNS)) > 0)) { |
745 | qp->req.wait_psn = 1; |
746 | goto exit; |
747 | } |
748 | |
749 | /* Limit the number of inflight SKBs per QP */ |
750 | if (unlikely(atomic_read(&qp->skb_out) > |
751 | RXE_INFLIGHT_SKBS_PER_QP_HIGH)) { |
752 | qp->need_req_skb = 1; |
753 | goto exit; |
754 | } |
755 | |
756 | opcode = next_opcode(qp, wqe, opcode: wqe->wr.opcode); |
757 | if (unlikely(opcode < 0)) { |
758 | wqe->status = IB_WC_LOC_QP_OP_ERR; |
759 | goto err; |
760 | } |
761 | |
762 | mask = rxe_opcode[opcode].mask; |
763 | if (unlikely(mask & (RXE_READ_OR_ATOMIC_MASK | |
764 | RXE_ATOMIC_WRITE_MASK))) { |
765 | if (check_init_depth(qp, wqe)) |
766 | goto exit; |
767 | } |
768 | |
769 | mtu = get_mtu(qp); |
770 | payload = (mask & (RXE_WRITE_OR_SEND_MASK | RXE_ATOMIC_WRITE_MASK)) ? |
771 | wqe->dma.resid : 0; |
772 | if (payload > mtu) { |
773 | if (qp_type(qp) == IB_QPT_UD) { |
774 | /* C10-93.1.1: If the total sum of all the buffer lengths specified for a |
775 | * UD message exceeds the MTU of the port as returned by QueryHCA, the CI |
776 | * shall not emit any packets for this message. Further, the CI shall not |
777 | * generate an error due to this condition. |
778 | */ |
779 | |
780 | /* fake a successful UD send */ |
781 | wqe->first_psn = qp->req.psn; |
782 | wqe->last_psn = qp->req.psn; |
783 | qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; |
784 | qp->req.opcode = IB_OPCODE_UD_SEND_ONLY; |
785 | qp->req.wqe_index = queue_next_index(q: qp->sq.queue, |
786 | index: qp->req.wqe_index); |
787 | wqe->state = wqe_state_done; |
788 | wqe->status = IB_WC_SUCCESS; |
789 | rxe_sched_task(task: &qp->comp.task); |
790 | goto done; |
791 | } |
792 | payload = mtu; |
793 | } |
794 | |
795 | pkt.rxe = rxe; |
796 | pkt.opcode = opcode; |
797 | pkt.qp = qp; |
798 | pkt.psn = qp->req.psn; |
799 | pkt.mask = rxe_opcode[opcode].mask; |
800 | pkt.wqe = wqe; |
801 | |
802 | /* save wqe state before we build and send packet */ |
803 | save_state(wqe, qp, rollback_wqe: &rollback_wqe, rollback_psn: &rollback_psn); |
804 | |
805 | av = rxe_get_av(pkt: &pkt, ahp: &ah); |
806 | if (unlikely(!av)) { |
807 | rxe_dbg_qp(qp, "Failed no address vector\n" ); |
808 | wqe->status = IB_WC_LOC_QP_OP_ERR; |
809 | goto err; |
810 | } |
811 | |
812 | skb = init_req_packet(qp, av, wqe, opcode, payload, pkt: &pkt); |
813 | if (unlikely(!skb)) { |
814 | rxe_dbg_qp(qp, "Failed allocating skb\n" ); |
815 | wqe->status = IB_WC_LOC_QP_OP_ERR; |
816 | if (ah) |
817 | rxe_put(ah); |
818 | goto err; |
819 | } |
820 | |
821 | err = finish_packet(qp, av, wqe, pkt: &pkt, skb, payload); |
822 | if (unlikely(err)) { |
823 | rxe_dbg_qp(qp, "Error during finish packet\n" ); |
824 | if (err == -EFAULT) |
825 | wqe->status = IB_WC_LOC_PROT_ERR; |
826 | else |
827 | wqe->status = IB_WC_LOC_QP_OP_ERR; |
828 | kfree_skb(skb); |
829 | if (ah) |
830 | rxe_put(ah); |
831 | goto err; |
832 | } |
833 | |
834 | if (ah) |
835 | rxe_put(ah); |
836 | |
837 | /* update wqe state as though we had sent it */ |
838 | update_wqe_state(qp, wqe, pkt: &pkt); |
839 | update_wqe_psn(qp, wqe, pkt: &pkt, payload); |
840 | |
841 | err = rxe_xmit_packet(qp, pkt: &pkt, skb); |
842 | if (err) { |
843 | if (err != -EAGAIN) { |
844 | wqe->status = IB_WC_LOC_QP_OP_ERR; |
845 | goto err; |
846 | } |
847 | |
848 | /* the packet was dropped so reset wqe to the state |
849 | * before we sent it so we can try to resend |
850 | */ |
851 | rollback_state(wqe, qp, rollback_wqe: &rollback_wqe, rollback_psn); |
852 | |
853 | /* force a delay until the dropped packet is freed and |
854 | * the send queue is drained below the low water mark |
855 | */ |
856 | qp->need_req_skb = 1; |
857 | |
858 | rxe_sched_task(task: &qp->req.task); |
859 | goto exit; |
860 | } |
861 | |
862 | update_state(qp, pkt: &pkt); |
863 | |
864 | /* A non-zero return value will cause rxe_do_task to |
865 | * exit its loop and end the work item. A zero return |
866 | * will continue looping and return to rxe_requester |
867 | */ |
868 | done: |
869 | ret = 0; |
870 | goto out; |
871 | err: |
872 | /* update wqe_index for each wqe completion */ |
873 | qp->req.wqe_index = queue_next_index(q: qp->sq.queue, index: qp->req.wqe_index); |
874 | wqe->state = wqe_state_error; |
875 | rxe_qp_error(qp); |
876 | exit: |
877 | ret = -EAGAIN; |
878 | out: |
879 | return ret; |
880 | } |
881 | |