1 | // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) |
2 | /* |
3 | * Copyright(c) 2018 Intel Corporation. |
4 | * |
5 | */ |
6 | #include "hfi.h" |
7 | #include "trace.h" |
8 | #include "qp.h" |
9 | #include "opfn.h" |
10 | |
11 | #define IB_BTHE_E BIT(IB_BTHE_E_SHIFT) |
12 | |
13 | #define OPFN_CODE(code) BIT((code) - 1) |
14 | #define OPFN_MASK(code) OPFN_CODE(STL_VERBS_EXTD_##code) |
15 | |
16 | struct hfi1_opfn_type { |
17 | bool (*request)(struct rvt_qp *qp, u64 *data); |
18 | bool (*response)(struct rvt_qp *qp, u64 *data); |
19 | bool (*reply)(struct rvt_qp *qp, u64 data); |
20 | void (*error)(struct rvt_qp *qp); |
21 | }; |
22 | |
23 | static struct hfi1_opfn_type hfi1_opfn_handlers[STL_VERBS_EXTD_MAX] = { |
24 | [STL_VERBS_EXTD_TID_RDMA] = { |
25 | .request = tid_rdma_conn_req, |
26 | .response = tid_rdma_conn_resp, |
27 | .reply = tid_rdma_conn_reply, |
28 | .error = tid_rdma_conn_error, |
29 | }, |
30 | }; |
31 | |
32 | static struct workqueue_struct *opfn_wq; |
33 | |
34 | static void opfn_schedule_conn_request(struct rvt_qp *qp); |
35 | |
36 | static bool hfi1_opfn_extended(u32 bth1) |
37 | { |
38 | return !!(bth1 & IB_BTHE_E); |
39 | } |
40 | |
41 | static void opfn_conn_request(struct rvt_qp *qp) |
42 | { |
43 | struct hfi1_qp_priv *priv = qp->priv; |
44 | struct ib_atomic_wr wr; |
45 | u16 mask, capcode; |
46 | struct hfi1_opfn_type *extd; |
47 | u64 data; |
48 | unsigned long flags; |
49 | int ret = 0; |
50 | |
51 | trace_hfi1_opfn_state_conn_request(qp); |
52 | spin_lock_irqsave(&priv->opfn.lock, flags); |
53 | /* |
54 | * Exit if the extended bit is not set, or if nothing is requested, or |
55 | * if we have completed all requests, or if a previous request is in |
56 | * progress |
57 | */ |
58 | if (!priv->opfn.extended || !priv->opfn.requested || |
59 | priv->opfn.requested == priv->opfn.completed || priv->opfn.curr) |
60 | goto done; |
61 | |
62 | mask = priv->opfn.requested & ~priv->opfn.completed; |
63 | capcode = ilog2(mask & ~(mask - 1)) + 1; |
64 | if (capcode >= STL_VERBS_EXTD_MAX) { |
65 | priv->opfn.completed |= OPFN_CODE(capcode); |
66 | goto done; |
67 | } |
68 | |
69 | extd = &hfi1_opfn_handlers[capcode]; |
70 | if (!extd || !extd->request || !extd->request(qp, &data)) { |
71 | /* |
72 | * Either there is no handler for this capability or the request |
73 | * packet could not be generated. Either way, mark it as done so |
74 | * we don't keep attempting to complete it. |
75 | */ |
76 | priv->opfn.completed |= OPFN_CODE(capcode); |
77 | goto done; |
78 | } |
79 | |
80 | trace_hfi1_opfn_data_conn_request(qp, capcode, data); |
81 | data = (data & ~0xf) | capcode; |
82 | |
83 | memset(&wr, 0, sizeof(wr)); |
84 | wr.wr.opcode = IB_WR_OPFN; |
85 | wr.remote_addr = HFI1_VERBS_E_ATOMIC_VADDR; |
86 | wr.compare_add = data; |
87 | |
88 | priv->opfn.curr = capcode; /* A new request is now in progress */ |
89 | /* Drop opfn.lock before calling ib_post_send() */ |
90 | spin_unlock_irqrestore(lock: &priv->opfn.lock, flags); |
91 | |
92 | ret = ib_post_send(qp: &qp->ibqp, send_wr: &wr.wr, NULL); |
93 | if (ret) |
94 | goto err; |
95 | trace_hfi1_opfn_state_conn_request(qp); |
96 | return; |
97 | err: |
98 | trace_hfi1_msg_opfn_conn_request(qp, msg: "ib_ost_send failed: ret = " , |
99 | more: (u64)ret); |
100 | spin_lock_irqsave(&priv->opfn.lock, flags); |
101 | /* |
102 | * In case of an unexpected error return from ib_post_send |
103 | * clear opfn.curr and reschedule to try again |
104 | */ |
105 | priv->opfn.curr = STL_VERBS_EXTD_NONE; |
106 | opfn_schedule_conn_request(qp); |
107 | done: |
108 | spin_unlock_irqrestore(lock: &priv->opfn.lock, flags); |
109 | } |
110 | |
111 | void opfn_send_conn_request(struct work_struct *work) |
112 | { |
113 | struct hfi1_opfn_data *od; |
114 | struct hfi1_qp_priv *qpriv; |
115 | |
116 | od = container_of(work, struct hfi1_opfn_data, opfn_work); |
117 | qpriv = container_of(od, struct hfi1_qp_priv, opfn); |
118 | |
119 | opfn_conn_request(qp: qpriv->owner); |
120 | } |
121 | |
122 | /* |
123 | * When QP s_lock is held in the caller, the OPFN request must be scheduled |
124 | * to a different workqueue to avoid double locking QP s_lock in call to |
125 | * ib_post_send in opfn_conn_request |
126 | */ |
127 | static void opfn_schedule_conn_request(struct rvt_qp *qp) |
128 | { |
129 | struct hfi1_qp_priv *priv = qp->priv; |
130 | |
131 | trace_hfi1_opfn_state_sched_conn_request(qp); |
132 | queue_work(wq: opfn_wq, work: &priv->opfn.opfn_work); |
133 | } |
134 | |
135 | void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e, |
136 | struct ib_atomic_eth *ateth) |
137 | { |
138 | struct hfi1_qp_priv *priv = qp->priv; |
139 | u64 data = be64_to_cpu(ateth->compare_data); |
140 | struct hfi1_opfn_type *extd; |
141 | u8 capcode; |
142 | unsigned long flags; |
143 | |
144 | trace_hfi1_opfn_state_conn_response(qp); |
145 | capcode = data & 0xf; |
146 | trace_hfi1_opfn_data_conn_response(qp, capcode, data); |
147 | if (!capcode || capcode >= STL_VERBS_EXTD_MAX) |
148 | return; |
149 | |
150 | extd = &hfi1_opfn_handlers[capcode]; |
151 | |
152 | if (!extd || !extd->response) { |
153 | e->atomic_data = capcode; |
154 | return; |
155 | } |
156 | |
157 | spin_lock_irqsave(&priv->opfn.lock, flags); |
158 | if (priv->opfn.completed & OPFN_CODE(capcode)) { |
159 | /* |
160 | * We are receiving a request for a feature that has already |
161 | * been negotiated. This may mean that the other side has reset |
162 | */ |
163 | priv->opfn.completed &= ~OPFN_CODE(capcode); |
164 | if (extd->error) |
165 | extd->error(qp); |
166 | } |
167 | |
168 | if (extd->response(qp, &data)) |
169 | priv->opfn.completed |= OPFN_CODE(capcode); |
170 | e->atomic_data = (data & ~0xf) | capcode; |
171 | trace_hfi1_opfn_state_conn_response(qp); |
172 | spin_unlock_irqrestore(lock: &priv->opfn.lock, flags); |
173 | } |
174 | |
175 | void opfn_conn_reply(struct rvt_qp *qp, u64 data) |
176 | { |
177 | struct hfi1_qp_priv *priv = qp->priv; |
178 | struct hfi1_opfn_type *extd; |
179 | u8 capcode; |
180 | unsigned long flags; |
181 | |
182 | trace_hfi1_opfn_state_conn_reply(qp); |
183 | capcode = data & 0xf; |
184 | trace_hfi1_opfn_data_conn_reply(qp, capcode, data); |
185 | if (!capcode || capcode >= STL_VERBS_EXTD_MAX) |
186 | return; |
187 | |
188 | spin_lock_irqsave(&priv->opfn.lock, flags); |
189 | /* |
190 | * Either there is no previous request or the reply is not for the |
191 | * current request |
192 | */ |
193 | if (!priv->opfn.curr || capcode != priv->opfn.curr) |
194 | goto done; |
195 | |
196 | extd = &hfi1_opfn_handlers[capcode]; |
197 | |
198 | if (!extd || !extd->reply) |
199 | goto clear; |
200 | |
201 | if (extd->reply(qp, data)) |
202 | priv->opfn.completed |= OPFN_CODE(capcode); |
203 | clear: |
204 | /* |
205 | * Clear opfn.curr to indicate that the previous request is no longer in |
206 | * progress |
207 | */ |
208 | priv->opfn.curr = STL_VERBS_EXTD_NONE; |
209 | trace_hfi1_opfn_state_conn_reply(qp); |
210 | done: |
211 | spin_unlock_irqrestore(lock: &priv->opfn.lock, flags); |
212 | } |
213 | |
214 | void opfn_conn_error(struct rvt_qp *qp) |
215 | { |
216 | struct hfi1_qp_priv *priv = qp->priv; |
217 | struct hfi1_opfn_type *extd = NULL; |
218 | unsigned long flags; |
219 | u16 capcode; |
220 | |
221 | trace_hfi1_opfn_state_conn_error(qp); |
222 | trace_hfi1_msg_opfn_conn_error(qp, msg: "error. qp state " , more: (u64)qp->state); |
223 | /* |
224 | * The QP has gone into the Error state. We have to invalidate all |
225 | * negotiated feature, including the one in progress (if any). The RC |
226 | * QP handling will clean the WQE for the connection request. |
227 | */ |
228 | spin_lock_irqsave(&priv->opfn.lock, flags); |
229 | while (priv->opfn.completed) { |
230 | capcode = priv->opfn.completed & ~(priv->opfn.completed - 1); |
231 | extd = &hfi1_opfn_handlers[ilog2(capcode) + 1]; |
232 | if (extd->error) |
233 | extd->error(qp); |
234 | priv->opfn.completed &= ~OPFN_CODE(capcode); |
235 | } |
236 | priv->opfn.extended = 0; |
237 | priv->opfn.requested = 0; |
238 | priv->opfn.curr = STL_VERBS_EXTD_NONE; |
239 | spin_unlock_irqrestore(lock: &priv->opfn.lock, flags); |
240 | } |
241 | |
242 | void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask) |
243 | { |
244 | struct ib_qp *ibqp = &qp->ibqp; |
245 | struct hfi1_qp_priv *priv = qp->priv; |
246 | unsigned long flags; |
247 | |
248 | if (attr_mask & IB_QP_RETRY_CNT) |
249 | priv->s_retry = attr->retry_cnt; |
250 | |
251 | spin_lock_irqsave(&priv->opfn.lock, flags); |
252 | if (ibqp->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) { |
253 | struct tid_rdma_params *local = &priv->tid_rdma.local; |
254 | |
255 | if (attr_mask & IB_QP_TIMEOUT) |
256 | priv->tid_retry_timeout_jiffies = qp->timeout_jiffies; |
257 | if (qp->pmtu == enum_to_mtu(OPA_MTU_4096) || |
258 | qp->pmtu == enum_to_mtu(mtu: OPA_MTU_8192)) { |
259 | tid_rdma_opfn_init(qp, p: local); |
260 | /* |
261 | * We only want to set the OPFN requested bit when the |
262 | * QP transitions to RTS. |
263 | */ |
264 | if (attr_mask & IB_QP_STATE && |
265 | attr->qp_state == IB_QPS_RTS) { |
266 | priv->opfn.requested |= OPFN_MASK(TID_RDMA); |
267 | /* |
268 | * If the QP is transitioning to RTS and the |
269 | * opfn.completed for TID RDMA has already been |
270 | * set, the QP is being moved *back* into RTS. |
271 | * We can now renegotiate the TID RDMA |
272 | * parameters. |
273 | */ |
274 | if (priv->opfn.completed & |
275 | OPFN_MASK(TID_RDMA)) { |
276 | priv->opfn.completed &= |
277 | ~OPFN_MASK(TID_RDMA); |
278 | /* |
279 | * Since the opfn.completed bit was |
280 | * already set, it is safe to assume |
281 | * that the opfn.extended is also set. |
282 | */ |
283 | opfn_schedule_conn_request(qp); |
284 | } |
285 | } |
286 | } else { |
287 | memset(local, 0, sizeof(*local)); |
288 | } |
289 | } |
290 | spin_unlock_irqrestore(lock: &priv->opfn.lock, flags); |
291 | } |
292 | |
293 | void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1) |
294 | { |
295 | struct hfi1_qp_priv *priv = qp->priv; |
296 | |
297 | if (!priv->opfn.extended && hfi1_opfn_extended(bth1) && |
298 | HFI1_CAP_IS_KSET(OPFN)) { |
299 | priv->opfn.extended = 1; |
300 | if (qp->state == IB_QPS_RTS) |
301 | opfn_conn_request(qp); |
302 | } |
303 | } |
304 | |
305 | int opfn_init(void) |
306 | { |
307 | opfn_wq = alloc_workqueue(fmt: "hfi_opfn" , |
308 | flags: WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | |
309 | WQ_MEM_RECLAIM, |
310 | HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES); |
311 | if (!opfn_wq) |
312 | return -ENOMEM; |
313 | |
314 | return 0; |
315 | } |
316 | |
317 | void opfn_exit(void) |
318 | { |
319 | if (opfn_wq) { |
320 | destroy_workqueue(wq: opfn_wq); |
321 | opfn_wq = NULL; |
322 | } |
323 | } |
324 | |