1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
2 | /* |
3 | * Copyright(c) 2015 - 2020 Intel Corporation. |
4 | */ |
5 | |
6 | #include <linux/err.h> |
7 | #include <linux/vmalloc.h> |
8 | #include <linux/hash.h> |
9 | #include <linux/module.h> |
10 | #include <linux/seq_file.h> |
11 | #include <rdma/rdma_vt.h> |
12 | #include <rdma/rdmavt_qp.h> |
13 | #include <rdma/ib_verbs.h> |
14 | |
15 | #include "hfi.h" |
16 | #include "qp.h" |
17 | #include "trace.h" |
18 | #include "verbs_txreq.h" |
19 | |
20 | unsigned int hfi1_qp_table_size = 256; |
21 | module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO); |
22 | MODULE_PARM_DESC(qp_table_size, "QP table size" ); |
23 | |
24 | static void flush_tx_list(struct rvt_qp *qp); |
25 | static int iowait_sleep( |
26 | struct sdma_engine *sde, |
27 | struct iowait_work *wait, |
28 | struct sdma_txreq *stx, |
29 | unsigned int seq, |
30 | bool pkts_sent); |
31 | static void iowait_wakeup(struct iowait *wait, int reason); |
32 | static void iowait_sdma_drained(struct iowait *wait); |
33 | static void qp_pio_drain(struct rvt_qp *qp); |
34 | |
35 | const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = { |
36 | [IB_WR_RDMA_WRITE] = { |
37 | .length = sizeof(struct ib_rdma_wr), |
38 | .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), |
39 | }, |
40 | |
41 | [IB_WR_RDMA_READ] = { |
42 | .length = sizeof(struct ib_rdma_wr), |
43 | .qpt_support = BIT(IB_QPT_RC), |
44 | .flags = RVT_OPERATION_ATOMIC, |
45 | }, |
46 | |
47 | [IB_WR_ATOMIC_CMP_AND_SWP] = { |
48 | .length = sizeof(struct ib_atomic_wr), |
49 | .qpt_support = BIT(IB_QPT_RC), |
50 | .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, |
51 | }, |
52 | |
53 | [IB_WR_ATOMIC_FETCH_AND_ADD] = { |
54 | .length = sizeof(struct ib_atomic_wr), |
55 | .qpt_support = BIT(IB_QPT_RC), |
56 | .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, |
57 | }, |
58 | |
59 | [IB_WR_RDMA_WRITE_WITH_IMM] = { |
60 | .length = sizeof(struct ib_rdma_wr), |
61 | .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), |
62 | }, |
63 | |
64 | [IB_WR_SEND] = { |
65 | .length = sizeof(struct ib_send_wr), |
66 | .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | |
67 | BIT(IB_QPT_UC) | BIT(IB_QPT_RC), |
68 | }, |
69 | |
70 | [IB_WR_SEND_WITH_IMM] = { |
71 | .length = sizeof(struct ib_send_wr), |
72 | .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | |
73 | BIT(IB_QPT_UC) | BIT(IB_QPT_RC), |
74 | }, |
75 | |
76 | [IB_WR_REG_MR] = { |
77 | .length = sizeof(struct ib_reg_wr), |
78 | .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), |
79 | .flags = RVT_OPERATION_LOCAL, |
80 | }, |
81 | |
82 | [IB_WR_LOCAL_INV] = { |
83 | .length = sizeof(struct ib_send_wr), |
84 | .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), |
85 | .flags = RVT_OPERATION_LOCAL, |
86 | }, |
87 | |
88 | [IB_WR_SEND_WITH_INV] = { |
89 | .length = sizeof(struct ib_send_wr), |
90 | .qpt_support = BIT(IB_QPT_RC), |
91 | }, |
92 | |
93 | [IB_WR_OPFN] = { |
94 | .length = sizeof(struct ib_atomic_wr), |
95 | .qpt_support = BIT(IB_QPT_RC), |
96 | .flags = RVT_OPERATION_USE_RESERVE, |
97 | }, |
98 | |
99 | [IB_WR_TID_RDMA_WRITE] = { |
100 | .length = sizeof(struct ib_rdma_wr), |
101 | .qpt_support = BIT(IB_QPT_RC), |
102 | .flags = RVT_OPERATION_IGN_RNR_CNT, |
103 | }, |
104 | |
105 | }; |
106 | |
107 | static void flush_list_head(struct list_head *l) |
108 | { |
109 | while (!list_empty(head: l)) { |
110 | struct sdma_txreq *tx; |
111 | |
112 | tx = list_first_entry( |
113 | l, |
114 | struct sdma_txreq, |
115 | list); |
116 | list_del_init(entry: &tx->list); |
117 | hfi1_put_txreq( |
118 | container_of(tx, struct verbs_txreq, txreq)); |
119 | } |
120 | } |
121 | |
122 | static void flush_tx_list(struct rvt_qp *qp) |
123 | { |
124 | struct hfi1_qp_priv *priv = qp->priv; |
125 | |
126 | flush_list_head(l: &iowait_get_ib_work(w: &priv->s_iowait)->tx_head); |
127 | flush_list_head(l: &iowait_get_tid_work(w: &priv->s_iowait)->tx_head); |
128 | } |
129 | |
130 | static void flush_iowait(struct rvt_qp *qp) |
131 | { |
132 | struct hfi1_qp_priv *priv = qp->priv; |
133 | unsigned long flags; |
134 | seqlock_t *lock = priv->s_iowait.lock; |
135 | |
136 | if (!lock) |
137 | return; |
138 | write_seqlock_irqsave(lock, flags); |
139 | if (!list_empty(head: &priv->s_iowait.list)) { |
140 | list_del_init(entry: &priv->s_iowait.list); |
141 | priv->s_iowait.lock = NULL; |
142 | rvt_put_qp(qp); |
143 | } |
144 | write_sequnlock_irqrestore(sl: lock, flags); |
145 | } |
146 | |
147 | /* |
148 | * This function is what we would push to the core layer if we wanted to be a |
149 | * "first class citizen". Instead we hide this here and rely on Verbs ULPs |
150 | * to blindly pass the MTU enum value from the PathRecord to us. |
151 | */ |
152 | static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) |
153 | { |
154 | /* Constraining 10KB packets to 8KB packets */ |
155 | if (mtu == (enum ib_mtu)OPA_MTU_10240) |
156 | mtu = (enum ib_mtu)OPA_MTU_8192; |
157 | return opa_mtu_enum_to_int(mtu: (enum opa_mtu)mtu); |
158 | } |
159 | |
160 | int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, |
161 | int attr_mask, struct ib_udata *udata) |
162 | { |
163 | struct ib_qp *ibqp = &qp->ibqp; |
164 | struct hfi1_ibdev *dev = to_idev(ibdev: ibqp->device); |
165 | struct hfi1_devdata *dd = dd_from_dev(dev); |
166 | u8 sc; |
167 | |
168 | if (attr_mask & IB_QP_AV) { |
169 | sc = ah_to_sc(ibdev: ibqp->device, ah_attr: &attr->ah_attr); |
170 | if (sc == 0xf) |
171 | return -EINVAL; |
172 | |
173 | if (!qp_to_sdma_engine(qp, sc5: sc) && |
174 | dd->flags & HFI1_HAS_SEND_DMA) |
175 | return -EINVAL; |
176 | |
177 | if (!qp_to_send_context(qp, sc5: sc)) |
178 | return -EINVAL; |
179 | } |
180 | |
181 | if (attr_mask & IB_QP_ALT_PATH) { |
182 | sc = ah_to_sc(ibdev: ibqp->device, ah_attr: &attr->alt_ah_attr); |
183 | if (sc == 0xf) |
184 | return -EINVAL; |
185 | |
186 | if (!qp_to_sdma_engine(qp, sc5: sc) && |
187 | dd->flags & HFI1_HAS_SEND_DMA) |
188 | return -EINVAL; |
189 | |
190 | if (!qp_to_send_context(qp, sc5: sc)) |
191 | return -EINVAL; |
192 | } |
193 | |
194 | return 0; |
195 | } |
196 | |
197 | /* |
198 | * qp_set_16b - Set the hdr_type based on whether the slid or the |
199 | * dlid in the connection is extended. Only applicable for RC and UC |
200 | * QPs. UD QPs determine this on the fly from the ah in the wqe |
201 | */ |
202 | static inline void qp_set_16b(struct rvt_qp *qp) |
203 | { |
204 | struct hfi1_pportdata *ppd; |
205 | struct hfi1_ibport *ibp; |
206 | struct hfi1_qp_priv *priv = qp->priv; |
207 | |
208 | /* Update ah_attr to account for extended LIDs */ |
209 | hfi1_update_ah_attr(ibdev: qp->ibqp.device, attr: &qp->remote_ah_attr); |
210 | |
211 | /* Create 32 bit LIDs */ |
212 | hfi1_make_opa_lid(attr: &qp->remote_ah_attr); |
213 | |
214 | if (!(rdma_ah_get_ah_flags(attr: &qp->remote_ah_attr) & IB_AH_GRH)) |
215 | return; |
216 | |
217 | ibp = to_iport(ibdev: qp->ibqp.device, port: qp->port_num); |
218 | ppd = ppd_from_ibp(ibp); |
219 | priv->hdr_type = hfi1_get_hdr_type(lid: ppd->lid, attr: &qp->remote_ah_attr); |
220 | } |
221 | |
222 | void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, |
223 | int attr_mask, struct ib_udata *udata) |
224 | { |
225 | struct ib_qp *ibqp = &qp->ibqp; |
226 | struct hfi1_qp_priv *priv = qp->priv; |
227 | |
228 | if (attr_mask & IB_QP_AV) { |
229 | priv->s_sc = ah_to_sc(ibdev: ibqp->device, ah_attr: &qp->remote_ah_attr); |
230 | priv->s_sde = qp_to_sdma_engine(qp, sc5: priv->s_sc); |
231 | priv->s_sendcontext = qp_to_send_context(qp, sc5: priv->s_sc); |
232 | qp_set_16b(qp); |
233 | } |
234 | |
235 | if (attr_mask & IB_QP_PATH_MIG_STATE && |
236 | attr->path_mig_state == IB_MIG_MIGRATED && |
237 | qp->s_mig_state == IB_MIG_ARMED) { |
238 | qp->s_flags |= HFI1_S_AHG_CLEAR; |
239 | priv->s_sc = ah_to_sc(ibdev: ibqp->device, ah_attr: &qp->remote_ah_attr); |
240 | priv->s_sde = qp_to_sdma_engine(qp, sc5: priv->s_sc); |
241 | priv->s_sendcontext = qp_to_send_context(qp, sc5: priv->s_sc); |
242 | qp_set_16b(qp); |
243 | } |
244 | |
245 | opfn_qp_init(qp, attr, attr_mask); |
246 | } |
247 | |
248 | /** |
249 | * hfi1_setup_wqe - set up the wqe |
250 | * @qp: The qp |
251 | * @wqe: The built wqe |
252 | * @call_send: Determine if the send should be posted or scheduled. |
253 | * |
254 | * Perform setup of the wqe. This is called |
255 | * prior to inserting the wqe into the ring but after |
256 | * the wqe has been setup by RDMAVT. This function |
257 | * allows the driver the opportunity to perform |
258 | * validation and additional setup of the wqe. |
259 | * |
260 | * Returns 0 on success, -EINVAL on failure |
261 | * |
262 | */ |
263 | int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send) |
264 | { |
265 | struct hfi1_ibport *ibp = to_iport(ibdev: qp->ibqp.device, port: qp->port_num); |
266 | struct rvt_ah *ah; |
267 | struct hfi1_pportdata *ppd; |
268 | struct hfi1_devdata *dd; |
269 | |
270 | switch (qp->ibqp.qp_type) { |
271 | case IB_QPT_RC: |
272 | hfi1_setup_tid_rdma_wqe(qp, wqe); |
273 | fallthrough; |
274 | case IB_QPT_UC: |
275 | if (wqe->length > 0x80000000U) |
276 | return -EINVAL; |
277 | if (wqe->length > qp->pmtu) |
278 | *call_send = false; |
279 | break; |
280 | case IB_QPT_SMI: |
281 | /* |
282 | * SM packets should exclusively use VL15 and their SL is |
283 | * ignored (IBTA v1.3, Section 3.5.8.2). Therefore, when ah |
284 | * is created, SL is 0 in most cases and as a result some |
285 | * fields (vl and pmtu) in ah may not be set correctly, |
286 | * depending on the SL2SC and SC2VL tables at the time. |
287 | */ |
288 | ppd = ppd_from_ibp(ibp); |
289 | dd = dd_from_ppd(ppd); |
290 | if (wqe->length > dd->vld[15].mtu) |
291 | return -EINVAL; |
292 | break; |
293 | case IB_QPT_GSI: |
294 | case IB_QPT_UD: |
295 | ah = rvt_get_swqe_ah(swqe: wqe); |
296 | if (wqe->length > (1 << ah->log_pmtu)) |
297 | return -EINVAL; |
298 | if (ibp->sl_to_sc[rdma_ah_get_sl(attr: &ah->attr)] == 0xf) |
299 | return -EINVAL; |
300 | break; |
301 | default: |
302 | break; |
303 | } |
304 | |
305 | /* |
306 | * System latency between send and schedule is large enough that |
307 | * forcing call_send to true for piothreshold packets is necessary. |
308 | */ |
309 | if (wqe->length <= piothreshold) |
310 | *call_send = true; |
311 | return 0; |
312 | } |
313 | |
314 | /** |
315 | * _hfi1_schedule_send - schedule progress |
316 | * @qp: the QP |
317 | * |
318 | * This schedules qp progress w/o regard to the s_flags. |
319 | * |
320 | * It is only used in the post send, which doesn't hold |
321 | * the s_lock. |
322 | */ |
323 | bool _hfi1_schedule_send(struct rvt_qp *qp) |
324 | { |
325 | struct hfi1_qp_priv *priv = qp->priv; |
326 | struct hfi1_ibport *ibp = |
327 | to_iport(ibdev: qp->ibqp.device, port: qp->port_num); |
328 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); |
329 | struct hfi1_devdata *dd = ppd->dd; |
330 | |
331 | if (dd->flags & HFI1_SHUTDOWN) |
332 | return true; |
333 | |
334 | return iowait_schedule(wait: &priv->s_iowait, wq: ppd->hfi1_wq, |
335 | cpu: priv->s_sde ? |
336 | priv->s_sde->cpu : |
337 | cpumask_first(srcp: cpumask_of_node(node: dd->node))); |
338 | } |
339 | |
340 | static void qp_pio_drain(struct rvt_qp *qp) |
341 | { |
342 | struct hfi1_qp_priv *priv = qp->priv; |
343 | |
344 | if (!priv->s_sendcontext) |
345 | return; |
346 | while (iowait_pio_pending(wait: &priv->s_iowait)) { |
347 | write_seqlock_irq(sl: &priv->s_sendcontext->waitlock); |
348 | hfi1_sc_wantpiobuf_intr(sc: priv->s_sendcontext, needint: 1); |
349 | write_sequnlock_irq(sl: &priv->s_sendcontext->waitlock); |
350 | iowait_pio_drain(wait: &priv->s_iowait); |
351 | write_seqlock_irq(sl: &priv->s_sendcontext->waitlock); |
352 | hfi1_sc_wantpiobuf_intr(sc: priv->s_sendcontext, needint: 0); |
353 | write_sequnlock_irq(sl: &priv->s_sendcontext->waitlock); |
354 | } |
355 | } |
356 | |
357 | /** |
358 | * hfi1_schedule_send - schedule progress |
359 | * @qp: the QP |
360 | * |
361 | * This schedules qp progress and caller should hold |
362 | * the s_lock. |
363 | * @return true if the first leg is scheduled; |
364 | * false if the first leg is not scheduled. |
365 | */ |
366 | bool hfi1_schedule_send(struct rvt_qp *qp) |
367 | { |
368 | lockdep_assert_held(&qp->s_lock); |
369 | if (hfi1_send_ok(qp)) { |
370 | _hfi1_schedule_send(qp); |
371 | return true; |
372 | } |
373 | if (qp->s_flags & HFI1_S_ANY_WAIT_IO) |
374 | iowait_set_flag(wait: &((struct hfi1_qp_priv *)qp->priv)->s_iowait, |
375 | IOWAIT_PENDING_IB); |
376 | return false; |
377 | } |
378 | |
379 | static void hfi1_qp_schedule(struct rvt_qp *qp) |
380 | { |
381 | struct hfi1_qp_priv *priv = qp->priv; |
382 | bool ret; |
383 | |
384 | if (iowait_flag_set(wait: &priv->s_iowait, IOWAIT_PENDING_IB)) { |
385 | ret = hfi1_schedule_send(qp); |
386 | if (ret) |
387 | iowait_clear_flag(wait: &priv->s_iowait, IOWAIT_PENDING_IB); |
388 | } |
389 | if (iowait_flag_set(wait: &priv->s_iowait, IOWAIT_PENDING_TID)) { |
390 | ret = hfi1_schedule_tid_send(qp); |
391 | if (ret) |
392 | iowait_clear_flag(wait: &priv->s_iowait, IOWAIT_PENDING_TID); |
393 | } |
394 | } |
395 | |
396 | void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag) |
397 | { |
398 | unsigned long flags; |
399 | |
400 | spin_lock_irqsave(&qp->s_lock, flags); |
401 | if (qp->s_flags & flag) { |
402 | qp->s_flags &= ~flag; |
403 | trace_hfi1_qpwakeup(qp, flags: flag); |
404 | hfi1_qp_schedule(qp); |
405 | } |
406 | spin_unlock_irqrestore(lock: &qp->s_lock, flags); |
407 | /* Notify hfi1_destroy_qp() if it is waiting. */ |
408 | rvt_put_qp(qp); |
409 | } |
410 | |
411 | void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait) |
412 | { |
413 | struct hfi1_qp_priv *priv = qp->priv; |
414 | |
415 | if (iowait_set_work_flag(w: wait) == IOWAIT_IB_SE) { |
416 | qp->s_flags &= ~RVT_S_BUSY; |
417 | /* |
418 | * If we are sending a first-leg packet from the second leg, |
419 | * we need to clear the busy flag from priv->s_flags to |
420 | * avoid a race condition when the qp wakes up before |
421 | * the call to hfi1_verbs_send() returns to the second |
422 | * leg. In that case, the second leg will terminate without |
423 | * being re-scheduled, resulting in failure to send TID RDMA |
424 | * WRITE DATA and TID RDMA ACK packets. |
425 | */ |
426 | if (priv->s_flags & HFI1_S_TID_BUSY_SET) { |
427 | priv->s_flags &= ~(HFI1_S_TID_BUSY_SET | |
428 | RVT_S_BUSY); |
429 | iowait_set_flag(wait: &priv->s_iowait, IOWAIT_PENDING_TID); |
430 | } |
431 | } else { |
432 | priv->s_flags &= ~RVT_S_BUSY; |
433 | } |
434 | } |
435 | |
436 | static int iowait_sleep( |
437 | struct sdma_engine *sde, |
438 | struct iowait_work *wait, |
439 | struct sdma_txreq *stx, |
440 | uint seq, |
441 | bool pkts_sent) |
442 | { |
443 | struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq); |
444 | struct rvt_qp *qp; |
445 | struct hfi1_qp_priv *priv; |
446 | unsigned long flags; |
447 | int ret = 0; |
448 | |
449 | qp = tx->qp; |
450 | priv = qp->priv; |
451 | |
452 | spin_lock_irqsave(&qp->s_lock, flags); |
453 | if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { |
454 | /* |
455 | * If we couldn't queue the DMA request, save the info |
456 | * and try again later rather than destroying the |
457 | * buffer and undoing the side effects of the copy. |
458 | */ |
459 | /* Make a common routine? */ |
460 | list_add_tail(new: &stx->list, head: &wait->tx_head); |
461 | write_seqlock(sl: &sde->waitlock); |
462 | if (sdma_progress(sde, seq, tx: stx)) |
463 | goto eagain; |
464 | if (list_empty(head: &priv->s_iowait.list)) { |
465 | struct hfi1_ibport *ibp = |
466 | to_iport(ibdev: qp->ibqp.device, port: qp->port_num); |
467 | |
468 | ibp->rvp.n_dmawait++; |
469 | qp->s_flags |= RVT_S_WAIT_DMA_DESC; |
470 | iowait_get_priority(w: &priv->s_iowait); |
471 | iowait_queue(pkts_sent, w: &priv->s_iowait, |
472 | wait_head: &sde->dmawait); |
473 | priv->s_iowait.lock = &sde->waitlock; |
474 | trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); |
475 | rvt_get_qp(qp); |
476 | } |
477 | write_sequnlock(sl: &sde->waitlock); |
478 | hfi1_qp_unbusy(qp, wait); |
479 | spin_unlock_irqrestore(lock: &qp->s_lock, flags); |
480 | ret = -EBUSY; |
481 | } else { |
482 | spin_unlock_irqrestore(lock: &qp->s_lock, flags); |
483 | hfi1_put_txreq(tx); |
484 | } |
485 | return ret; |
486 | eagain: |
487 | write_sequnlock(sl: &sde->waitlock); |
488 | spin_unlock_irqrestore(lock: &qp->s_lock, flags); |
489 | list_del_init(entry: &stx->list); |
490 | return -EAGAIN; |
491 | } |
492 | |
493 | static void iowait_wakeup(struct iowait *wait, int reason) |
494 | { |
495 | struct rvt_qp *qp = iowait_to_qp(s_iowait: wait); |
496 | |
497 | WARN_ON(reason != SDMA_AVAIL_REASON); |
498 | hfi1_qp_wakeup(qp, RVT_S_WAIT_DMA_DESC); |
499 | } |
500 | |
501 | static void iowait_sdma_drained(struct iowait *wait) |
502 | { |
503 | struct rvt_qp *qp = iowait_to_qp(s_iowait: wait); |
504 | unsigned long flags; |
505 | |
506 | /* |
507 | * This happens when the send engine notes |
508 | * a QP in the error state and cannot |
509 | * do the flush work until that QP's |
510 | * sdma work has finished. |
511 | */ |
512 | spin_lock_irqsave(&qp->s_lock, flags); |
513 | if (qp->s_flags & RVT_S_WAIT_DMA) { |
514 | qp->s_flags &= ~RVT_S_WAIT_DMA; |
515 | hfi1_schedule_send(qp); |
516 | } |
517 | spin_unlock_irqrestore(lock: &qp->s_lock, flags); |
518 | } |
519 | |
520 | static void hfi1_init_priority(struct iowait *w) |
521 | { |
522 | struct rvt_qp *qp = iowait_to_qp(s_iowait: w); |
523 | struct hfi1_qp_priv *priv = qp->priv; |
524 | |
525 | if (qp->s_flags & RVT_S_ACK_PENDING) |
526 | w->priority++; |
527 | if (priv->s_flags & RVT_S_ACK_PENDING) |
528 | w->priority++; |
529 | } |
530 | |
531 | /** |
532 | * qp_to_sdma_engine - map a qp to a send engine |
533 | * @qp: the QP |
534 | * @sc5: the 5 bit sc |
535 | * |
536 | * Return: |
537 | * A send engine for the qp or NULL for SMI type qp. |
538 | */ |
539 | struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5) |
540 | { |
541 | struct hfi1_devdata *dd = dd_from_ibdev(ibdev: qp->ibqp.device); |
542 | struct sdma_engine *sde; |
543 | |
544 | if (!(dd->flags & HFI1_HAS_SEND_DMA)) |
545 | return NULL; |
546 | switch (qp->ibqp.qp_type) { |
547 | case IB_QPT_SMI: |
548 | return NULL; |
549 | default: |
550 | break; |
551 | } |
552 | sde = sdma_select_engine_sc(dd, selector: qp->ibqp.qp_num >> dd->qos_shift, sc5); |
553 | return sde; |
554 | } |
555 | |
556 | /** |
557 | * qp_to_send_context - map a qp to a send context |
558 | * @qp: the QP |
559 | * @sc5: the 5 bit sc |
560 | * |
561 | * Return: |
562 | * A send context for the qp |
563 | */ |
564 | struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5) |
565 | { |
566 | struct hfi1_devdata *dd = dd_from_ibdev(ibdev: qp->ibqp.device); |
567 | |
568 | switch (qp->ibqp.qp_type) { |
569 | case IB_QPT_SMI: |
570 | /* SMA packets to VL15 */ |
571 | return dd->vld[15].sc; |
572 | default: |
573 | break; |
574 | } |
575 | |
576 | return pio_select_send_context_sc(dd, selector: qp->ibqp.qp_num >> dd->qos_shift, |
577 | sc5); |
578 | } |
579 | |
580 | static const char * const qp_type_str[] = { |
581 | "SMI" , "GSI" , "RC" , "UC" , "UD" , |
582 | }; |
583 | |
584 | static int qp_idle(struct rvt_qp *qp) |
585 | { |
586 | return |
587 | qp->s_last == qp->s_acked && |
588 | qp->s_acked == qp->s_cur && |
589 | qp->s_cur == qp->s_tail && |
590 | qp->s_tail == qp->s_head; |
591 | } |
592 | |
593 | /** |
594 | * qp_iter_print - print the qp information to seq_file |
595 | * @s: the seq_file to emit the qp information on |
596 | * @iter: the iterator for the qp hash list |
597 | */ |
598 | void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter) |
599 | { |
600 | struct rvt_swqe *wqe; |
601 | struct rvt_qp *qp = iter->qp; |
602 | struct hfi1_qp_priv *priv = qp->priv; |
603 | struct sdma_engine *sde; |
604 | struct send_context *send_context; |
605 | struct rvt_ack_entry *e = NULL; |
606 | struct rvt_srq *srq = qp->ibqp.srq ? |
607 | ibsrq_to_rvtsrq(ibsrq: qp->ibqp.srq) : NULL; |
608 | |
609 | sde = qp_to_sdma_engine(qp, sc5: priv->s_sc); |
610 | wqe = rvt_get_swqe_ptr(qp, n: qp->s_last); |
611 | send_context = qp_to_send_context(qp, sc5: priv->s_sc); |
612 | if (qp->s_ack_queue) |
613 | e = &qp->s_ack_queue[qp->s_tail_ack_queue]; |
614 | seq_printf(m: s, |
615 | fmt: "N %d %s QP %x R %u %s %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d OS %x %x E %x %x %x RNR %d %s %d\n" , |
616 | iter->n, |
617 | qp_idle(qp) ? "I" : "B" , |
618 | qp->ibqp.qp_num, |
619 | atomic_read(v: &qp->refcount), |
620 | qp_type_str[qp->ibqp.qp_type], |
621 | qp->state, |
622 | wqe ? wqe->wr.opcode : 0, |
623 | qp->s_flags, |
624 | iowait_sdma_pending(wait: &priv->s_iowait), |
625 | iowait_pio_pending(wait: &priv->s_iowait), |
626 | !list_empty(head: &priv->s_iowait.list), |
627 | qp->timeout, |
628 | wqe ? wqe->ssn : 0, |
629 | qp->s_lsn, |
630 | qp->s_last_psn, |
631 | qp->s_psn, qp->s_next_psn, |
632 | qp->s_sending_psn, qp->s_sending_hpsn, |
633 | qp->r_psn, |
634 | qp->s_last, qp->s_acked, qp->s_cur, |
635 | qp->s_tail, qp->s_head, qp->s_size, |
636 | qp->s_avail, |
637 | /* ack_queue ring pointers, size */ |
638 | qp->s_tail_ack_queue, qp->r_head_ack_queue, |
639 | rvt_max_atomic(rdi: &to_idev(ibdev: qp->ibqp.device)->rdi), |
640 | /* remote QP info */ |
641 | qp->remote_qpn, |
642 | rdma_ah_get_dlid(attr: &qp->remote_ah_attr), |
643 | rdma_ah_get_sl(attr: &qp->remote_ah_attr), |
644 | qp->pmtu, |
645 | qp->s_retry, |
646 | qp->s_retry_cnt, |
647 | qp->s_rnr_retry_cnt, |
648 | qp->s_rnr_retry, |
649 | sde, |
650 | sde ? sde->this_idx : 0, |
651 | send_context, |
652 | send_context ? send_context->sw_index : 0, |
653 | ib_cq_head(send_cq: qp->ibqp.send_cq), |
654 | ib_cq_tail(send_cq: qp->ibqp.send_cq), |
655 | qp->pid, |
656 | qp->s_state, |
657 | qp->s_ack_state, |
658 | /* ack queue information */ |
659 | e ? e->opcode : 0, |
660 | e ? e->psn : 0, |
661 | e ? e->lpsn : 0, |
662 | qp->r_min_rnr_timer, |
663 | srq ? "SRQ" : "RQ" , |
664 | srq ? srq->rq.size : qp->r_rq.size |
665 | ); |
666 | } |
667 | |
668 | void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp) |
669 | { |
670 | struct hfi1_qp_priv *priv; |
671 | |
672 | priv = kzalloc_node(size: sizeof(*priv), GFP_KERNEL, node: rdi->dparms.node); |
673 | if (!priv) |
674 | return ERR_PTR(error: -ENOMEM); |
675 | |
676 | priv->owner = qp; |
677 | |
678 | priv->s_ahg = kzalloc_node(size: sizeof(*priv->s_ahg), GFP_KERNEL, |
679 | node: rdi->dparms.node); |
680 | if (!priv->s_ahg) { |
681 | kfree(objp: priv); |
682 | return ERR_PTR(error: -ENOMEM); |
683 | } |
684 | iowait_init( |
685 | wait: &priv->s_iowait, |
686 | tx_limit: 1, |
687 | func: _hfi1_do_send, |
688 | tidfunc: _hfi1_do_tid_send, |
689 | sleep: iowait_sleep, |
690 | wakeup: iowait_wakeup, |
691 | sdma_drained: iowait_sdma_drained, |
692 | init_priority: hfi1_init_priority); |
693 | /* Init to a value to start the running average correctly */ |
694 | priv->s_running_pkt_size = piothreshold / 2; |
695 | return priv; |
696 | } |
697 | |
698 | void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp) |
699 | { |
700 | struct hfi1_qp_priv *priv = qp->priv; |
701 | |
702 | hfi1_qp_priv_tid_free(rdi, qp); |
703 | kfree(objp: priv->s_ahg); |
704 | kfree(objp: priv); |
705 | } |
706 | |
707 | unsigned free_all_qps(struct rvt_dev_info *rdi) |
708 | { |
709 | struct hfi1_ibdev *verbs_dev = container_of(rdi, |
710 | struct hfi1_ibdev, |
711 | rdi); |
712 | struct hfi1_devdata *dd = container_of(verbs_dev, |
713 | struct hfi1_devdata, |
714 | verbs_dev); |
715 | int n; |
716 | unsigned qp_inuse = 0; |
717 | |
718 | for (n = 0; n < dd->num_pports; n++) { |
719 | struct hfi1_ibport *ibp = &dd->pport[n].ibport_data; |
720 | |
721 | rcu_read_lock(); |
722 | if (rcu_dereference(ibp->rvp.qp[0])) |
723 | qp_inuse++; |
724 | if (rcu_dereference(ibp->rvp.qp[1])) |
725 | qp_inuse++; |
726 | rcu_read_unlock(); |
727 | } |
728 | |
729 | return qp_inuse; |
730 | } |
731 | |
732 | void flush_qp_waiters(struct rvt_qp *qp) |
733 | { |
734 | lockdep_assert_held(&qp->s_lock); |
735 | flush_iowait(qp); |
736 | hfi1_tid_rdma_flush_wait(qp); |
737 | } |
738 | |
739 | void stop_send_queue(struct rvt_qp *qp) |
740 | { |
741 | struct hfi1_qp_priv *priv = qp->priv; |
742 | |
743 | iowait_cancel_work(w: &priv->s_iowait); |
744 | if (cancel_work_sync(work: &priv->tid_rdma.trigger_work)) |
745 | rvt_put_qp(qp); |
746 | } |
747 | |
748 | void quiesce_qp(struct rvt_qp *qp) |
749 | { |
750 | struct hfi1_qp_priv *priv = qp->priv; |
751 | |
752 | hfi1_del_tid_reap_timer(qp); |
753 | hfi1_del_tid_retry_timer(qp); |
754 | iowait_sdma_drain(wait: &priv->s_iowait); |
755 | qp_pio_drain(qp); |
756 | flush_tx_list(qp); |
757 | } |
758 | |
759 | void notify_qp_reset(struct rvt_qp *qp) |
760 | { |
761 | hfi1_qp_kern_exp_rcv_clear_all(qp); |
762 | qp->r_adefered = 0; |
763 | clear_ahg(qp); |
764 | |
765 | /* Clear any OPFN state */ |
766 | if (qp->ibqp.qp_type == IB_QPT_RC) |
767 | opfn_conn_error(qp); |
768 | } |
769 | |
770 | /* |
771 | * Switch to alternate path. |
772 | * The QP s_lock should be held and interrupts disabled. |
773 | */ |
774 | void hfi1_migrate_qp(struct rvt_qp *qp) |
775 | { |
776 | struct hfi1_qp_priv *priv = qp->priv; |
777 | struct ib_event ev; |
778 | |
779 | qp->s_mig_state = IB_MIG_MIGRATED; |
780 | qp->remote_ah_attr = qp->alt_ah_attr; |
781 | qp->port_num = rdma_ah_get_port_num(attr: &qp->alt_ah_attr); |
782 | qp->s_pkey_index = qp->s_alt_pkey_index; |
783 | qp->s_flags |= HFI1_S_AHG_CLEAR; |
784 | priv->s_sc = ah_to_sc(ibdev: qp->ibqp.device, ah_attr: &qp->remote_ah_attr); |
785 | priv->s_sde = qp_to_sdma_engine(qp, sc5: priv->s_sc); |
786 | qp_set_16b(qp); |
787 | |
788 | ev.device = qp->ibqp.device; |
789 | ev.element.qp = &qp->ibqp; |
790 | ev.event = IB_EVENT_PATH_MIG; |
791 | qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); |
792 | } |
793 | |
794 | int mtu_to_path_mtu(u32 mtu) |
795 | { |
796 | return mtu_to_enum(mtu, default_if_bad: OPA_MTU_8192); |
797 | } |
798 | |
799 | u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu) |
800 | { |
801 | u32 mtu; |
802 | struct hfi1_ibdev *verbs_dev = container_of(rdi, |
803 | struct hfi1_ibdev, |
804 | rdi); |
805 | struct hfi1_devdata *dd = container_of(verbs_dev, |
806 | struct hfi1_devdata, |
807 | verbs_dev); |
808 | struct hfi1_ibport *ibp; |
809 | u8 sc, vl; |
810 | |
811 | ibp = &dd->pport[qp->port_num - 1].ibport_data; |
812 | sc = ibp->sl_to_sc[rdma_ah_get_sl(attr: &qp->remote_ah_attr)]; |
813 | vl = sc_to_vlt(dd, sc5: sc); |
814 | |
815 | mtu = verbs_mtu_enum_to_int(dev: qp->ibqp.device, mtu: pmtu); |
816 | if (vl < PER_VL_SEND_CONTEXTS) |
817 | mtu = min_t(u32, mtu, dd->vld[vl].mtu); |
818 | return mtu; |
819 | } |
820 | |
821 | int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp, |
822 | struct ib_qp_attr *attr) |
823 | { |
824 | int mtu, pidx = qp->port_num - 1; |
825 | struct hfi1_ibdev *verbs_dev = container_of(rdi, |
826 | struct hfi1_ibdev, |
827 | rdi); |
828 | struct hfi1_devdata *dd = container_of(verbs_dev, |
829 | struct hfi1_devdata, |
830 | verbs_dev); |
831 | mtu = verbs_mtu_enum_to_int(dev: qp->ibqp.device, mtu: attr->path_mtu); |
832 | if (mtu == -1) |
833 | return -1; /* values less than 0 are error */ |
834 | |
835 | if (mtu > dd->pport[pidx].ibmtu) |
836 | return mtu_to_enum(mtu: dd->pport[pidx].ibmtu, default_if_bad: IB_MTU_2048); |
837 | else |
838 | return attr->path_mtu; |
839 | } |
840 | |
841 | void notify_error_qp(struct rvt_qp *qp) |
842 | { |
843 | struct hfi1_qp_priv *priv = qp->priv; |
844 | seqlock_t *lock = priv->s_iowait.lock; |
845 | |
846 | if (lock) { |
847 | write_seqlock(sl: lock); |
848 | if (!list_empty(head: &priv->s_iowait.list) && |
849 | !(qp->s_flags & RVT_S_BUSY) && |
850 | !(priv->s_flags & RVT_S_BUSY)) { |
851 | qp->s_flags &= ~HFI1_S_ANY_WAIT_IO; |
852 | iowait_clear_flag(wait: &priv->s_iowait, IOWAIT_PENDING_IB); |
853 | iowait_clear_flag(wait: &priv->s_iowait, IOWAIT_PENDING_TID); |
854 | list_del_init(entry: &priv->s_iowait.list); |
855 | priv->s_iowait.lock = NULL; |
856 | rvt_put_qp(qp); |
857 | } |
858 | write_sequnlock(sl: lock); |
859 | } |
860 | |
861 | if (!(qp->s_flags & RVT_S_BUSY) && !(priv->s_flags & RVT_S_BUSY)) { |
862 | qp->s_hdrwords = 0; |
863 | if (qp->s_rdma_mr) { |
864 | rvt_put_mr(mr: qp->s_rdma_mr); |
865 | qp->s_rdma_mr = NULL; |
866 | } |
867 | flush_tx_list(qp); |
868 | } |
869 | } |
870 | |
871 | /** |
872 | * hfi1_qp_iter_cb - callback for iterator |
873 | * @qp: the qp |
874 | * @v: the sl in low bits of v |
875 | * |
876 | * This is called from the iterator callback to work |
877 | * on an individual qp. |
878 | */ |
879 | static void hfi1_qp_iter_cb(struct rvt_qp *qp, u64 v) |
880 | { |
881 | int lastwqe; |
882 | struct ib_event ev; |
883 | struct hfi1_ibport *ibp = |
884 | to_iport(ibdev: qp->ibqp.device, port: qp->port_num); |
885 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); |
886 | u8 sl = (u8)v; |
887 | |
888 | if (qp->port_num != ppd->port || |
889 | (qp->ibqp.qp_type != IB_QPT_UC && |
890 | qp->ibqp.qp_type != IB_QPT_RC) || |
891 | rdma_ah_get_sl(attr: &qp->remote_ah_attr) != sl || |
892 | !(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK)) |
893 | return; |
894 | |
895 | spin_lock_irq(lock: &qp->r_lock); |
896 | spin_lock(lock: &qp->s_hlock); |
897 | spin_lock(lock: &qp->s_lock); |
898 | lastwqe = rvt_error_qp(qp, err: IB_WC_WR_FLUSH_ERR); |
899 | spin_unlock(lock: &qp->s_lock); |
900 | spin_unlock(lock: &qp->s_hlock); |
901 | spin_unlock_irq(lock: &qp->r_lock); |
902 | if (lastwqe) { |
903 | ev.device = qp->ibqp.device; |
904 | ev.element.qp = &qp->ibqp; |
905 | ev.event = IB_EVENT_QP_LAST_WQE_REACHED; |
906 | qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); |
907 | } |
908 | } |
909 | |
910 | /** |
911 | * hfi1_error_port_qps - put a port's RC/UC qps into error state |
912 | * @ibp: the ibport. |
913 | * @sl: the service level. |
914 | * |
915 | * This function places all RC/UC qps with a given service level into error |
916 | * state. It is generally called to force upper lay apps to abandon stale qps |
917 | * after an sl->sc mapping change. |
918 | */ |
919 | void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl) |
920 | { |
921 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); |
922 | struct hfi1_ibdev *dev = &ppd->dd->verbs_dev; |
923 | |
924 | rvt_qp_iter(rdi: &dev->rdi, v: sl, cb: hfi1_qp_iter_cb); |
925 | } |
926 | |