1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
2 | |
3 | /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ |
4 | /* Copyright (c) 2008-2019, IBM Corporation */ |
5 | |
6 | #include <linux/errno.h> |
7 | #include <linux/types.h> |
8 | #include <linux/uaccess.h> |
9 | #include <linux/vmalloc.h> |
10 | #include <linux/xarray.h> |
11 | #include <net/addrconf.h> |
12 | |
13 | #include <rdma/iw_cm.h> |
14 | #include <rdma/ib_verbs.h> |
15 | #include <rdma/ib_user_verbs.h> |
16 | #include <rdma/uverbs_ioctl.h> |
17 | |
18 | #include "siw.h" |
19 | #include "siw_verbs.h" |
20 | #include "siw_mem.h" |
21 | |
22 | static int siw_qp_state_to_ib_qp_state[SIW_QP_STATE_COUNT] = { |
23 | [SIW_QP_STATE_IDLE] = IB_QPS_INIT, |
24 | [SIW_QP_STATE_RTR] = IB_QPS_RTR, |
25 | [SIW_QP_STATE_RTS] = IB_QPS_RTS, |
26 | [SIW_QP_STATE_CLOSING] = IB_QPS_SQD, |
27 | [SIW_QP_STATE_TERMINATE] = IB_QPS_SQE, |
28 | [SIW_QP_STATE_ERROR] = IB_QPS_ERR |
29 | }; |
30 | |
31 | static int ib_qp_state_to_siw_qp_state[IB_QPS_ERR + 1] = { |
32 | [IB_QPS_RESET] = SIW_QP_STATE_IDLE, |
33 | [IB_QPS_INIT] = SIW_QP_STATE_IDLE, |
34 | [IB_QPS_RTR] = SIW_QP_STATE_RTR, |
35 | [IB_QPS_RTS] = SIW_QP_STATE_RTS, |
36 | [IB_QPS_SQD] = SIW_QP_STATE_CLOSING, |
37 | [IB_QPS_SQE] = SIW_QP_STATE_TERMINATE, |
38 | [IB_QPS_ERR] = SIW_QP_STATE_ERROR |
39 | }; |
40 | |
41 | static char ib_qp_state_to_string[IB_QPS_ERR + 1][sizeof("RESET" )] = { |
42 | [IB_QPS_RESET] = "RESET" , [IB_QPS_INIT] = "INIT" , [IB_QPS_RTR] = "RTR" , |
43 | [IB_QPS_RTS] = "RTS" , [IB_QPS_SQD] = "SQD" , [IB_QPS_SQE] = "SQE" , |
44 | [IB_QPS_ERR] = "ERR" |
45 | }; |
46 | |
47 | void siw_mmap_free(struct rdma_user_mmap_entry *rdma_entry) |
48 | { |
49 | struct siw_user_mmap_entry *entry = to_siw_mmap_entry(rdma_mmap: rdma_entry); |
50 | |
51 | kfree(objp: entry); |
52 | } |
53 | |
54 | int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) |
55 | { |
56 | struct siw_ucontext *uctx = to_siw_ctx(base_ctx: ctx); |
57 | size_t size = vma->vm_end - vma->vm_start; |
58 | struct rdma_user_mmap_entry *rdma_entry; |
59 | struct siw_user_mmap_entry *entry; |
60 | int rv = -EINVAL; |
61 | |
62 | /* |
63 | * Must be page aligned |
64 | */ |
65 | if (vma->vm_start & (PAGE_SIZE - 1)) { |
66 | pr_warn("siw: mmap not page aligned\n" ); |
67 | return -EINVAL; |
68 | } |
69 | rdma_entry = rdma_user_mmap_entry_get(ucontext: &uctx->base_ucontext, vma); |
70 | if (!rdma_entry) { |
71 | siw_dbg(&uctx->sdev->base_dev, "mmap lookup failed: %lu, %#zx\n" , |
72 | vma->vm_pgoff, size); |
73 | return -EINVAL; |
74 | } |
75 | entry = to_siw_mmap_entry(rdma_mmap: rdma_entry); |
76 | |
77 | rv = remap_vmalloc_range(vma, addr: entry->address, pgoff: 0); |
78 | if (rv) |
79 | pr_warn("remap_vmalloc_range failed: %lu, %zu\n" , vma->vm_pgoff, |
80 | size); |
81 | rdma_user_mmap_entry_put(entry: rdma_entry); |
82 | |
83 | return rv; |
84 | } |
85 | |
86 | int siw_alloc_ucontext(struct ib_ucontext *base_ctx, struct ib_udata *udata) |
87 | { |
88 | struct siw_device *sdev = to_siw_dev(base_dev: base_ctx->device); |
89 | struct siw_ucontext *ctx = to_siw_ctx(base_ctx); |
90 | struct siw_uresp_alloc_ctx uresp = {}; |
91 | int rv; |
92 | |
93 | if (atomic_inc_return(v: &sdev->num_ctx) > SIW_MAX_CONTEXT) { |
94 | rv = -ENOMEM; |
95 | goto err_out; |
96 | } |
97 | ctx->sdev = sdev; |
98 | |
99 | uresp.dev_id = sdev->vendor_part_id; |
100 | |
101 | if (udata->outlen < sizeof(uresp)) { |
102 | rv = -EINVAL; |
103 | goto err_out; |
104 | } |
105 | rv = ib_copy_to_udata(udata, src: &uresp, len: sizeof(uresp)); |
106 | if (rv) |
107 | goto err_out; |
108 | |
109 | siw_dbg(base_ctx->device, "success. now %d context(s)\n" , |
110 | atomic_read(&sdev->num_ctx)); |
111 | |
112 | return 0; |
113 | |
114 | err_out: |
115 | atomic_dec(v: &sdev->num_ctx); |
116 | siw_dbg(base_ctx->device, "failure %d. now %d context(s)\n" , rv, |
117 | atomic_read(&sdev->num_ctx)); |
118 | |
119 | return rv; |
120 | } |
121 | |
122 | void siw_dealloc_ucontext(struct ib_ucontext *base_ctx) |
123 | { |
124 | struct siw_ucontext *uctx = to_siw_ctx(base_ctx); |
125 | |
126 | atomic_dec(v: &uctx->sdev->num_ctx); |
127 | } |
128 | |
129 | int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, |
130 | struct ib_udata *udata) |
131 | { |
132 | struct siw_device *sdev = to_siw_dev(base_dev); |
133 | |
134 | if (udata->inlen || udata->outlen) |
135 | return -EINVAL; |
136 | |
137 | memset(attr, 0, sizeof(*attr)); |
138 | |
139 | /* Revisit atomic caps if RFC 7306 gets supported */ |
140 | attr->atomic_cap = 0; |
141 | attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS; |
142 | attr->kernel_cap_flags = IBK_ALLOW_USER_UNREG; |
143 | attr->max_cq = sdev->attrs.max_cq; |
144 | attr->max_cqe = sdev->attrs.max_cqe; |
145 | attr->max_fast_reg_page_list_len = SIW_MAX_SGE_PBL; |
146 | attr->max_mr = sdev->attrs.max_mr; |
147 | attr->max_mw = sdev->attrs.max_mw; |
148 | attr->max_mr_size = ~0ull; |
149 | attr->max_pd = sdev->attrs.max_pd; |
150 | attr->max_qp = sdev->attrs.max_qp; |
151 | attr->max_qp_init_rd_atom = sdev->attrs.max_ird; |
152 | attr->max_qp_rd_atom = sdev->attrs.max_ord; |
153 | attr->max_qp_wr = sdev->attrs.max_qp_wr; |
154 | attr->max_recv_sge = sdev->attrs.max_sge; |
155 | attr->max_res_rd_atom = sdev->attrs.max_qp * sdev->attrs.max_ird; |
156 | attr->max_send_sge = sdev->attrs.max_sge; |
157 | attr->max_sge_rd = sdev->attrs.max_sge_rd; |
158 | attr->max_srq = sdev->attrs.max_srq; |
159 | attr->max_srq_sge = sdev->attrs.max_srq_sge; |
160 | attr->max_srq_wr = sdev->attrs.max_srq_wr; |
161 | attr->page_size_cap = PAGE_SIZE; |
162 | attr->vendor_id = SIW_VENDOR_ID; |
163 | attr->vendor_part_id = sdev->vendor_part_id; |
164 | |
165 | addrconf_addr_eui48(eui: (u8 *)&attr->sys_image_guid, |
166 | addr: sdev->raw_gid); |
167 | |
168 | return 0; |
169 | } |
170 | |
171 | int siw_query_port(struct ib_device *base_dev, u32 port, |
172 | struct ib_port_attr *attr) |
173 | { |
174 | struct siw_device *sdev = to_siw_dev(base_dev); |
175 | int rv; |
176 | |
177 | memset(attr, 0, sizeof(*attr)); |
178 | |
179 | rv = ib_get_eth_speed(dev: base_dev, port_num: port, speed: &attr->active_speed, |
180 | width: &attr->active_width); |
181 | attr->gid_tbl_len = 1; |
182 | attr->max_msg_sz = -1; |
183 | attr->max_mtu = ib_mtu_int_to_enum(mtu: sdev->netdev->mtu); |
184 | attr->active_mtu = ib_mtu_int_to_enum(mtu: sdev->netdev->mtu); |
185 | attr->phys_state = sdev->state == IB_PORT_ACTIVE ? |
186 | IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED; |
187 | attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; |
188 | attr->state = sdev->state; |
189 | /* |
190 | * All zero |
191 | * |
192 | * attr->lid = 0; |
193 | * attr->bad_pkey_cntr = 0; |
194 | * attr->qkey_viol_cntr = 0; |
195 | * attr->sm_lid = 0; |
196 | * attr->lmc = 0; |
197 | * attr->max_vl_num = 0; |
198 | * attr->sm_sl = 0; |
199 | * attr->subnet_timeout = 0; |
200 | * attr->init_type_repy = 0; |
201 | */ |
202 | return rv; |
203 | } |
204 | |
205 | int siw_get_port_immutable(struct ib_device *base_dev, u32 port, |
206 | struct ib_port_immutable *port_immutable) |
207 | { |
208 | struct ib_port_attr attr; |
209 | int rv = siw_query_port(base_dev, port, attr: &attr); |
210 | |
211 | if (rv) |
212 | return rv; |
213 | |
214 | port_immutable->gid_tbl_len = attr.gid_tbl_len; |
215 | port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; |
216 | |
217 | return 0; |
218 | } |
219 | |
220 | int siw_query_gid(struct ib_device *base_dev, u32 port, int idx, |
221 | union ib_gid *gid) |
222 | { |
223 | struct siw_device *sdev = to_siw_dev(base_dev); |
224 | |
225 | /* subnet_prefix == interface_id == 0; */ |
226 | memset(gid, 0, sizeof(*gid)); |
227 | memcpy(gid->raw, sdev->raw_gid, ETH_ALEN); |
228 | |
229 | return 0; |
230 | } |
231 | |
232 | int siw_alloc_pd(struct ib_pd *pd, struct ib_udata *udata) |
233 | { |
234 | struct siw_device *sdev = to_siw_dev(base_dev: pd->device); |
235 | |
236 | if (atomic_inc_return(v: &sdev->num_pd) > SIW_MAX_PD) { |
237 | atomic_dec(v: &sdev->num_pd); |
238 | return -ENOMEM; |
239 | } |
240 | siw_dbg_pd(pd, "now %d PD's(s)\n" , atomic_read(&sdev->num_pd)); |
241 | |
242 | return 0; |
243 | } |
244 | |
245 | int siw_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) |
246 | { |
247 | struct siw_device *sdev = to_siw_dev(base_dev: pd->device); |
248 | |
249 | siw_dbg_pd(pd, "free PD\n" ); |
250 | atomic_dec(v: &sdev->num_pd); |
251 | return 0; |
252 | } |
253 | |
254 | void siw_qp_get_ref(struct ib_qp *base_qp) |
255 | { |
256 | siw_qp_get(qp: to_siw_qp(base_qp)); |
257 | } |
258 | |
259 | void siw_qp_put_ref(struct ib_qp *base_qp) |
260 | { |
261 | siw_qp_put(qp: to_siw_qp(base_qp)); |
262 | } |
263 | |
264 | static struct rdma_user_mmap_entry * |
265 | siw_mmap_entry_insert(struct siw_ucontext *uctx, |
266 | void *address, size_t length, |
267 | u64 *offset) |
268 | { |
269 | struct siw_user_mmap_entry *entry = kzalloc(size: sizeof(*entry), GFP_KERNEL); |
270 | int rv; |
271 | |
272 | *offset = SIW_INVAL_UOBJ_KEY; |
273 | if (!entry) |
274 | return NULL; |
275 | |
276 | entry->address = address; |
277 | |
278 | rv = rdma_user_mmap_entry_insert(ucontext: &uctx->base_ucontext, |
279 | entry: &entry->rdma_entry, |
280 | length); |
281 | if (rv) { |
282 | kfree(objp: entry); |
283 | return NULL; |
284 | } |
285 | |
286 | *offset = rdma_user_mmap_get_offset(entry: &entry->rdma_entry); |
287 | |
288 | return &entry->rdma_entry; |
289 | } |
290 | |
291 | /* |
292 | * siw_create_qp() |
293 | * |
294 | * Create QP of requested size on given device. |
295 | * |
296 | * @qp: Queue pait |
297 | * @attrs: Initial QP attributes. |
298 | * @udata: used to provide QP ID, SQ and RQ size back to user. |
299 | */ |
300 | |
301 | int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, |
302 | struct ib_udata *udata) |
303 | { |
304 | struct ib_pd *pd = ibqp->pd; |
305 | struct siw_qp *qp = to_siw_qp(base_qp: ibqp); |
306 | struct ib_device *base_dev = pd->device; |
307 | struct siw_device *sdev = to_siw_dev(base_dev); |
308 | struct siw_ucontext *uctx = |
309 | rdma_udata_to_drv_context(udata, struct siw_ucontext, |
310 | base_ucontext); |
311 | unsigned long flags; |
312 | int num_sqe, num_rqe, rv = 0; |
313 | size_t length; |
314 | |
315 | siw_dbg(base_dev, "create new QP\n" ); |
316 | |
317 | if (attrs->create_flags) |
318 | return -EOPNOTSUPP; |
319 | |
320 | if (atomic_inc_return(v: &sdev->num_qp) > SIW_MAX_QP) { |
321 | siw_dbg(base_dev, "too many QP's\n" ); |
322 | rv = -ENOMEM; |
323 | goto err_atomic; |
324 | } |
325 | if (attrs->qp_type != IB_QPT_RC) { |
326 | siw_dbg(base_dev, "only RC QP's supported\n" ); |
327 | rv = -EOPNOTSUPP; |
328 | goto err_atomic; |
329 | } |
330 | if ((attrs->cap.max_send_wr > SIW_MAX_QP_WR) || |
331 | (attrs->cap.max_recv_wr > SIW_MAX_QP_WR) || |
332 | (attrs->cap.max_send_sge > SIW_MAX_SGE) || |
333 | (attrs->cap.max_recv_sge > SIW_MAX_SGE)) { |
334 | siw_dbg(base_dev, "QP size error\n" ); |
335 | rv = -EINVAL; |
336 | goto err_atomic; |
337 | } |
338 | if (attrs->cap.max_inline_data > SIW_MAX_INLINE) { |
339 | siw_dbg(base_dev, "max inline send: %d > %d\n" , |
340 | attrs->cap.max_inline_data, (int)SIW_MAX_INLINE); |
341 | rv = -EINVAL; |
342 | goto err_atomic; |
343 | } |
344 | /* |
345 | * NOTE: we don't allow for a QP unable to hold any SQ WQE |
346 | */ |
347 | if (attrs->cap.max_send_wr == 0) { |
348 | siw_dbg(base_dev, "QP must have send queue\n" ); |
349 | rv = -EINVAL; |
350 | goto err_atomic; |
351 | } |
352 | |
353 | if (!attrs->send_cq || (!attrs->recv_cq && !attrs->srq)) { |
354 | siw_dbg(base_dev, "send CQ or receive CQ invalid\n" ); |
355 | rv = -EINVAL; |
356 | goto err_atomic; |
357 | } |
358 | |
359 | init_rwsem(&qp->state_lock); |
360 | spin_lock_init(&qp->sq_lock); |
361 | spin_lock_init(&qp->rq_lock); |
362 | spin_lock_init(&qp->orq_lock); |
363 | |
364 | rv = siw_qp_add(sdev, qp); |
365 | if (rv) |
366 | goto err_atomic; |
367 | |
368 | |
369 | /* All queue indices are derived from modulo operations |
370 | * on a free running 'get' (consumer) and 'put' (producer) |
371 | * unsigned counter. Having queue sizes at power of two |
372 | * avoids handling counter wrap around. |
373 | */ |
374 | num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr); |
375 | num_rqe = attrs->cap.max_recv_wr; |
376 | if (num_rqe) |
377 | num_rqe = roundup_pow_of_two(num_rqe); |
378 | |
379 | if (udata) |
380 | qp->sendq = vmalloc_user(size: num_sqe * sizeof(struct siw_sqe)); |
381 | else |
382 | qp->sendq = vcalloc(n: num_sqe, size: sizeof(struct siw_sqe)); |
383 | |
384 | if (qp->sendq == NULL) { |
385 | rv = -ENOMEM; |
386 | goto err_out_xa; |
387 | } |
388 | if (attrs->sq_sig_type != IB_SIGNAL_REQ_WR) { |
389 | if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) |
390 | qp->attrs.flags |= SIW_SIGNAL_ALL_WR; |
391 | else { |
392 | rv = -EINVAL; |
393 | goto err_out_xa; |
394 | } |
395 | } |
396 | qp->pd = pd; |
397 | qp->scq = to_siw_cq(base_cq: attrs->send_cq); |
398 | qp->rcq = to_siw_cq(base_cq: attrs->recv_cq); |
399 | |
400 | if (attrs->srq) { |
401 | /* |
402 | * SRQ support. |
403 | * Verbs 6.3.7: ignore RQ size, if SRQ present |
404 | * Verbs 6.3.5: do not check PD of SRQ against PD of QP |
405 | */ |
406 | qp->srq = to_siw_srq(base_srq: attrs->srq); |
407 | qp->attrs.rq_size = 0; |
408 | siw_dbg(base_dev, "QP [%u]: SRQ attached\n" , |
409 | qp->base_qp.qp_num); |
410 | } else if (num_rqe) { |
411 | if (udata) |
412 | qp->recvq = |
413 | vmalloc_user(size: num_rqe * sizeof(struct siw_rqe)); |
414 | else |
415 | qp->recvq = vcalloc(n: num_rqe, size: sizeof(struct siw_rqe)); |
416 | |
417 | if (qp->recvq == NULL) { |
418 | rv = -ENOMEM; |
419 | goto err_out_xa; |
420 | } |
421 | qp->attrs.rq_size = num_rqe; |
422 | } |
423 | qp->attrs.sq_size = num_sqe; |
424 | qp->attrs.sq_max_sges = attrs->cap.max_send_sge; |
425 | qp->attrs.rq_max_sges = attrs->cap.max_recv_sge; |
426 | |
427 | /* Make those two tunables fixed for now. */ |
428 | qp->tx_ctx.gso_seg_limit = 1; |
429 | qp->tx_ctx.zcopy_tx = zcopy_tx; |
430 | |
431 | qp->attrs.state = SIW_QP_STATE_IDLE; |
432 | |
433 | if (udata) { |
434 | struct siw_uresp_create_qp uresp = {}; |
435 | |
436 | uresp.num_sqe = num_sqe; |
437 | uresp.num_rqe = num_rqe; |
438 | uresp.qp_id = qp_id(qp); |
439 | |
440 | if (qp->sendq) { |
441 | length = num_sqe * sizeof(struct siw_sqe); |
442 | qp->sq_entry = |
443 | siw_mmap_entry_insert(uctx, address: qp->sendq, |
444 | length, offset: &uresp.sq_key); |
445 | if (!qp->sq_entry) { |
446 | rv = -ENOMEM; |
447 | goto err_out_xa; |
448 | } |
449 | } |
450 | |
451 | if (qp->recvq) { |
452 | length = num_rqe * sizeof(struct siw_rqe); |
453 | qp->rq_entry = |
454 | siw_mmap_entry_insert(uctx, address: qp->recvq, |
455 | length, offset: &uresp.rq_key); |
456 | if (!qp->rq_entry) { |
457 | uresp.sq_key = SIW_INVAL_UOBJ_KEY; |
458 | rv = -ENOMEM; |
459 | goto err_out_xa; |
460 | } |
461 | } |
462 | |
463 | if (udata->outlen < sizeof(uresp)) { |
464 | rv = -EINVAL; |
465 | goto err_out_xa; |
466 | } |
467 | rv = ib_copy_to_udata(udata, src: &uresp, len: sizeof(uresp)); |
468 | if (rv) |
469 | goto err_out_xa; |
470 | } |
471 | qp->tx_cpu = siw_get_tx_cpu(sdev); |
472 | if (qp->tx_cpu < 0) { |
473 | rv = -EINVAL; |
474 | goto err_out_xa; |
475 | } |
476 | INIT_LIST_HEAD(list: &qp->devq); |
477 | spin_lock_irqsave(&sdev->lock, flags); |
478 | list_add_tail(new: &qp->devq, head: &sdev->qp_list); |
479 | spin_unlock_irqrestore(lock: &sdev->lock, flags); |
480 | |
481 | init_completion(x: &qp->qp_free); |
482 | |
483 | return 0; |
484 | |
485 | err_out_xa: |
486 | xa_erase(&sdev->qp_xa, index: qp_id(qp)); |
487 | if (uctx) { |
488 | rdma_user_mmap_entry_remove(entry: qp->sq_entry); |
489 | rdma_user_mmap_entry_remove(entry: qp->rq_entry); |
490 | } |
491 | vfree(addr: qp->sendq); |
492 | vfree(addr: qp->recvq); |
493 | |
494 | err_atomic: |
495 | atomic_dec(v: &sdev->num_qp); |
496 | return rv; |
497 | } |
498 | |
499 | /* |
500 | * Minimum siw_query_qp() verb interface. |
501 | * |
502 | * @qp_attr_mask is not used but all available information is provided |
503 | */ |
504 | int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr, |
505 | int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) |
506 | { |
507 | struct siw_qp *qp; |
508 | struct siw_device *sdev; |
509 | |
510 | if (base_qp && qp_attr && qp_init_attr) { |
511 | qp = to_siw_qp(base_qp); |
512 | sdev = to_siw_dev(base_dev: base_qp->device); |
513 | } else { |
514 | return -EINVAL; |
515 | } |
516 | qp_attr->qp_state = siw_qp_state_to_ib_qp_state[qp->attrs.state]; |
517 | qp_attr->cap.max_inline_data = SIW_MAX_INLINE; |
518 | qp_attr->cap.max_send_wr = qp->attrs.sq_size; |
519 | qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges; |
520 | qp_attr->cap.max_recv_wr = qp->attrs.rq_size; |
521 | qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges; |
522 | qp_attr->path_mtu = ib_mtu_int_to_enum(mtu: sdev->netdev->mtu); |
523 | qp_attr->max_rd_atomic = qp->attrs.irq_size; |
524 | qp_attr->max_dest_rd_atomic = qp->attrs.orq_size; |
525 | |
526 | qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | |
527 | IB_ACCESS_REMOTE_WRITE | |
528 | IB_ACCESS_REMOTE_READ; |
529 | |
530 | qp_init_attr->qp_type = base_qp->qp_type; |
531 | qp_init_attr->send_cq = base_qp->send_cq; |
532 | qp_init_attr->recv_cq = base_qp->recv_cq; |
533 | qp_init_attr->srq = base_qp->srq; |
534 | |
535 | qp_init_attr->cap = qp_attr->cap; |
536 | |
537 | return 0; |
538 | } |
539 | |
540 | int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr, |
541 | int attr_mask, struct ib_udata *udata) |
542 | { |
543 | struct siw_qp_attrs new_attrs; |
544 | enum siw_qp_attr_mask siw_attr_mask = 0; |
545 | struct siw_qp *qp = to_siw_qp(base_qp); |
546 | int rv = 0; |
547 | |
548 | if (!attr_mask) |
549 | return 0; |
550 | |
551 | if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) |
552 | return -EOPNOTSUPP; |
553 | |
554 | memset(&new_attrs, 0, sizeof(new_attrs)); |
555 | |
556 | if (attr_mask & IB_QP_ACCESS_FLAGS) { |
557 | siw_attr_mask = SIW_QP_ATTR_ACCESS_FLAGS; |
558 | |
559 | if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) |
560 | new_attrs.flags |= SIW_RDMA_READ_ENABLED; |
561 | if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) |
562 | new_attrs.flags |= SIW_RDMA_WRITE_ENABLED; |
563 | if (attr->qp_access_flags & IB_ACCESS_MW_BIND) |
564 | new_attrs.flags |= SIW_RDMA_BIND_ENABLED; |
565 | } |
566 | if (attr_mask & IB_QP_STATE) { |
567 | siw_dbg_qp(qp, "desired IB QP state: %s\n" , |
568 | ib_qp_state_to_string[attr->qp_state]); |
569 | |
570 | new_attrs.state = ib_qp_state_to_siw_qp_state[attr->qp_state]; |
571 | |
572 | if (new_attrs.state > SIW_QP_STATE_RTS) |
573 | qp->tx_ctx.tx_suspend = 1; |
574 | |
575 | siw_attr_mask |= SIW_QP_ATTR_STATE; |
576 | } |
577 | if (!siw_attr_mask) |
578 | goto out; |
579 | |
580 | down_write(sem: &qp->state_lock); |
581 | |
582 | rv = siw_qp_modify(qp, attr: &new_attrs, mask: siw_attr_mask); |
583 | |
584 | up_write(sem: &qp->state_lock); |
585 | out: |
586 | return rv; |
587 | } |
588 | |
589 | int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) |
590 | { |
591 | struct siw_qp *qp = to_siw_qp(base_qp); |
592 | struct siw_ucontext *uctx = |
593 | rdma_udata_to_drv_context(udata, struct siw_ucontext, |
594 | base_ucontext); |
595 | struct siw_qp_attrs qp_attrs; |
596 | |
597 | siw_dbg_qp(qp, "state %d\n" , qp->attrs.state); |
598 | |
599 | /* |
600 | * Mark QP as in process of destruction to prevent from |
601 | * any async callbacks to RDMA core |
602 | */ |
603 | qp->attrs.flags |= SIW_QP_IN_DESTROY; |
604 | qp->rx_stream.rx_suspend = 1; |
605 | |
606 | if (uctx) { |
607 | rdma_user_mmap_entry_remove(entry: qp->sq_entry); |
608 | rdma_user_mmap_entry_remove(entry: qp->rq_entry); |
609 | } |
610 | |
611 | down_write(sem: &qp->state_lock); |
612 | |
613 | qp_attrs.state = SIW_QP_STATE_ERROR; |
614 | siw_qp_modify(qp, attr: &qp_attrs, mask: SIW_QP_ATTR_STATE); |
615 | |
616 | if (qp->cep) { |
617 | siw_cep_put(cep: qp->cep); |
618 | qp->cep = NULL; |
619 | } |
620 | up_write(sem: &qp->state_lock); |
621 | |
622 | kfree(objp: qp->tx_ctx.mpa_crc_hd); |
623 | kfree(objp: qp->rx_stream.mpa_crc_hd); |
624 | |
625 | qp->scq = qp->rcq = NULL; |
626 | |
627 | siw_qp_put(qp); |
628 | wait_for_completion(&qp->qp_free); |
629 | |
630 | return 0; |
631 | } |
632 | |
633 | /* |
634 | * siw_copy_inline_sgl() |
635 | * |
636 | * Prepare sgl of inlined data for sending. For userland callers |
637 | * function checks if given buffer addresses and len's are within |
638 | * process context bounds. |
639 | * Data from all provided sge's are copied together into the wqe, |
640 | * referenced by a single sge. |
641 | */ |
642 | static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, |
643 | struct siw_sqe *sqe) |
644 | { |
645 | struct ib_sge *core_sge = core_wr->sg_list; |
646 | void *kbuf = &sqe->sge[1]; |
647 | int num_sge = core_wr->num_sge, bytes = 0; |
648 | |
649 | sqe->sge[0].laddr = (uintptr_t)kbuf; |
650 | sqe->sge[0].lkey = 0; |
651 | |
652 | while (num_sge--) { |
653 | if (!core_sge->length) { |
654 | core_sge++; |
655 | continue; |
656 | } |
657 | bytes += core_sge->length; |
658 | if (bytes > SIW_MAX_INLINE) { |
659 | bytes = -EINVAL; |
660 | break; |
661 | } |
662 | memcpy(kbuf, ib_virt_dma_to_ptr(core_sge->addr), |
663 | core_sge->length); |
664 | |
665 | kbuf += core_sge->length; |
666 | core_sge++; |
667 | } |
668 | sqe->sge[0].length = max(bytes, 0); |
669 | sqe->num_sge = bytes > 0 ? 1 : 0; |
670 | |
671 | return bytes; |
672 | } |
673 | |
674 | /* Complete SQ WR's without processing */ |
675 | static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr, |
676 | const struct ib_send_wr **bad_wr) |
677 | { |
678 | int rv = 0; |
679 | |
680 | while (wr) { |
681 | struct siw_sqe sqe = {}; |
682 | |
683 | switch (wr->opcode) { |
684 | case IB_WR_RDMA_WRITE: |
685 | sqe.opcode = SIW_OP_WRITE; |
686 | break; |
687 | case IB_WR_RDMA_READ: |
688 | sqe.opcode = SIW_OP_READ; |
689 | break; |
690 | case IB_WR_RDMA_READ_WITH_INV: |
691 | sqe.opcode = SIW_OP_READ_LOCAL_INV; |
692 | break; |
693 | case IB_WR_SEND: |
694 | sqe.opcode = SIW_OP_SEND; |
695 | break; |
696 | case IB_WR_SEND_WITH_IMM: |
697 | sqe.opcode = SIW_OP_SEND_WITH_IMM; |
698 | break; |
699 | case IB_WR_SEND_WITH_INV: |
700 | sqe.opcode = SIW_OP_SEND_REMOTE_INV; |
701 | break; |
702 | case IB_WR_LOCAL_INV: |
703 | sqe.opcode = SIW_OP_INVAL_STAG; |
704 | break; |
705 | case IB_WR_REG_MR: |
706 | sqe.opcode = SIW_OP_REG_MR; |
707 | break; |
708 | default: |
709 | rv = -EINVAL; |
710 | break; |
711 | } |
712 | if (!rv) { |
713 | sqe.id = wr->wr_id; |
714 | rv = siw_sqe_complete(qp, sqe: &sqe, bytes: 0, |
715 | status: SIW_WC_WR_FLUSH_ERR); |
716 | } |
717 | if (rv) { |
718 | if (bad_wr) |
719 | *bad_wr = wr; |
720 | break; |
721 | } |
722 | wr = wr->next; |
723 | } |
724 | return rv; |
725 | } |
726 | |
727 | /* Complete RQ WR's without processing */ |
728 | static int siw_rq_flush_wr(struct siw_qp *qp, const struct ib_recv_wr *wr, |
729 | const struct ib_recv_wr **bad_wr) |
730 | { |
731 | struct siw_rqe rqe = {}; |
732 | int rv = 0; |
733 | |
734 | while (wr) { |
735 | rqe.id = wr->wr_id; |
736 | rv = siw_rqe_complete(qp, rqe: &rqe, bytes: 0, inval_stag: 0, status: SIW_WC_WR_FLUSH_ERR); |
737 | if (rv) { |
738 | if (bad_wr) |
739 | *bad_wr = wr; |
740 | break; |
741 | } |
742 | wr = wr->next; |
743 | } |
744 | return rv; |
745 | } |
746 | |
747 | /* |
748 | * siw_post_send() |
749 | * |
750 | * Post a list of S-WR's to a SQ. |
751 | * |
752 | * @base_qp: Base QP contained in siw QP |
753 | * @wr: Null terminated list of user WR's |
754 | * @bad_wr: Points to failing WR in case of synchronous failure. |
755 | */ |
756 | int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, |
757 | const struct ib_send_wr **bad_wr) |
758 | { |
759 | struct siw_qp *qp = to_siw_qp(base_qp); |
760 | struct siw_wqe *wqe = tx_wqe(qp); |
761 | |
762 | unsigned long flags; |
763 | int rv = 0; |
764 | |
765 | if (wr && !rdma_is_kernel_res(res: &qp->base_qp.res)) { |
766 | siw_dbg_qp(qp, "wr must be empty for user mapped sq\n" ); |
767 | *bad_wr = wr; |
768 | return -EINVAL; |
769 | } |
770 | |
771 | /* |
772 | * Try to acquire QP state lock. Must be non-blocking |
773 | * to accommodate kernel clients needs. |
774 | */ |
775 | if (!down_read_trylock(sem: &qp->state_lock)) { |
776 | if (qp->attrs.state == SIW_QP_STATE_ERROR) { |
777 | /* |
778 | * ERROR state is final, so we can be sure |
779 | * this state will not change as long as the QP |
780 | * exists. |
781 | * |
782 | * This handles an ib_drain_sq() call with |
783 | * a concurrent request to set the QP state |
784 | * to ERROR. |
785 | */ |
786 | rv = siw_sq_flush_wr(qp, wr, bad_wr); |
787 | } else { |
788 | siw_dbg_qp(qp, "QP locked, state %d\n" , |
789 | qp->attrs.state); |
790 | *bad_wr = wr; |
791 | rv = -ENOTCONN; |
792 | } |
793 | return rv; |
794 | } |
795 | if (unlikely(qp->attrs.state != SIW_QP_STATE_RTS)) { |
796 | if (qp->attrs.state == SIW_QP_STATE_ERROR) { |
797 | /* |
798 | * Immediately flush this WR to CQ, if QP |
799 | * is in ERROR state. SQ is guaranteed to |
800 | * be empty, so WR complets in-order. |
801 | * |
802 | * Typically triggered by ib_drain_sq(). |
803 | */ |
804 | rv = siw_sq_flush_wr(qp, wr, bad_wr); |
805 | } else { |
806 | siw_dbg_qp(qp, "QP out of state %d\n" , |
807 | qp->attrs.state); |
808 | *bad_wr = wr; |
809 | rv = -ENOTCONN; |
810 | } |
811 | up_read(sem: &qp->state_lock); |
812 | return rv; |
813 | } |
814 | spin_lock_irqsave(&qp->sq_lock, flags); |
815 | |
816 | while (wr) { |
817 | u32 idx = qp->sq_put % qp->attrs.sq_size; |
818 | struct siw_sqe *sqe = &qp->sendq[idx]; |
819 | |
820 | if (sqe->flags) { |
821 | siw_dbg_qp(qp, "sq full\n" ); |
822 | rv = -ENOMEM; |
823 | break; |
824 | } |
825 | if (wr->num_sge > qp->attrs.sq_max_sges) { |
826 | siw_dbg_qp(qp, "too many sge's: %d\n" , wr->num_sge); |
827 | rv = -EINVAL; |
828 | break; |
829 | } |
830 | sqe->id = wr->wr_id; |
831 | |
832 | if ((wr->send_flags & IB_SEND_SIGNALED) || |
833 | (qp->attrs.flags & SIW_SIGNAL_ALL_WR)) |
834 | sqe->flags |= SIW_WQE_SIGNALLED; |
835 | |
836 | if (wr->send_flags & IB_SEND_FENCE) |
837 | sqe->flags |= SIW_WQE_READ_FENCE; |
838 | |
839 | switch (wr->opcode) { |
840 | case IB_WR_SEND: |
841 | case IB_WR_SEND_WITH_INV: |
842 | if (wr->send_flags & IB_SEND_SOLICITED) |
843 | sqe->flags |= SIW_WQE_SOLICITED; |
844 | |
845 | if (!(wr->send_flags & IB_SEND_INLINE)) { |
846 | siw_copy_sgl(sge: wr->sg_list, siw_sge: sqe->sge, |
847 | num_sge: wr->num_sge); |
848 | sqe->num_sge = wr->num_sge; |
849 | } else { |
850 | rv = siw_copy_inline_sgl(core_wr: wr, sqe); |
851 | if (rv <= 0) { |
852 | rv = -EINVAL; |
853 | break; |
854 | } |
855 | sqe->flags |= SIW_WQE_INLINE; |
856 | sqe->num_sge = 1; |
857 | } |
858 | if (wr->opcode == IB_WR_SEND) |
859 | sqe->opcode = SIW_OP_SEND; |
860 | else { |
861 | sqe->opcode = SIW_OP_SEND_REMOTE_INV; |
862 | sqe->rkey = wr->ex.invalidate_rkey; |
863 | } |
864 | break; |
865 | |
866 | case IB_WR_RDMA_READ_WITH_INV: |
867 | case IB_WR_RDMA_READ: |
868 | /* |
869 | * iWarp restricts RREAD sink to SGL containing |
870 | * 1 SGE only. we could relax to SGL with multiple |
871 | * elements referring the SAME ltag or even sending |
872 | * a private per-rreq tag referring to a checked |
873 | * local sgl with MULTIPLE ltag's. |
874 | */ |
875 | if (unlikely(wr->num_sge != 1)) { |
876 | rv = -EINVAL; |
877 | break; |
878 | } |
879 | siw_copy_sgl(sge: wr->sg_list, siw_sge: &sqe->sge[0], num_sge: 1); |
880 | /* |
881 | * NOTE: zero length RREAD is allowed! |
882 | */ |
883 | sqe->raddr = rdma_wr(wr)->remote_addr; |
884 | sqe->rkey = rdma_wr(wr)->rkey; |
885 | sqe->num_sge = 1; |
886 | |
887 | if (wr->opcode == IB_WR_RDMA_READ) |
888 | sqe->opcode = SIW_OP_READ; |
889 | else |
890 | sqe->opcode = SIW_OP_READ_LOCAL_INV; |
891 | break; |
892 | |
893 | case IB_WR_RDMA_WRITE: |
894 | if (!(wr->send_flags & IB_SEND_INLINE)) { |
895 | siw_copy_sgl(sge: wr->sg_list, siw_sge: &sqe->sge[0], |
896 | num_sge: wr->num_sge); |
897 | sqe->num_sge = wr->num_sge; |
898 | } else { |
899 | rv = siw_copy_inline_sgl(core_wr: wr, sqe); |
900 | if (unlikely(rv < 0)) { |
901 | rv = -EINVAL; |
902 | break; |
903 | } |
904 | sqe->flags |= SIW_WQE_INLINE; |
905 | sqe->num_sge = 1; |
906 | } |
907 | sqe->raddr = rdma_wr(wr)->remote_addr; |
908 | sqe->rkey = rdma_wr(wr)->rkey; |
909 | sqe->opcode = SIW_OP_WRITE; |
910 | break; |
911 | |
912 | case IB_WR_REG_MR: |
913 | sqe->base_mr = (uintptr_t)reg_wr(wr)->mr; |
914 | sqe->rkey = reg_wr(wr)->key; |
915 | sqe->access = reg_wr(wr)->access & IWARP_ACCESS_MASK; |
916 | sqe->opcode = SIW_OP_REG_MR; |
917 | break; |
918 | |
919 | case IB_WR_LOCAL_INV: |
920 | sqe->rkey = wr->ex.invalidate_rkey; |
921 | sqe->opcode = SIW_OP_INVAL_STAG; |
922 | break; |
923 | |
924 | default: |
925 | siw_dbg_qp(qp, "ib wr type %d unsupported\n" , |
926 | wr->opcode); |
927 | rv = -EINVAL; |
928 | break; |
929 | } |
930 | siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%pK\n" , |
931 | sqe->opcode, sqe->flags, |
932 | (void *)(uintptr_t)sqe->id); |
933 | |
934 | if (unlikely(rv < 0)) |
935 | break; |
936 | |
937 | /* make SQE only valid after completely written */ |
938 | smp_wmb(); |
939 | sqe->flags |= SIW_WQE_VALID; |
940 | |
941 | qp->sq_put++; |
942 | wr = wr->next; |
943 | } |
944 | |
945 | /* |
946 | * Send directly if SQ processing is not in progress. |
947 | * Eventual immediate errors (rv < 0) do not affect the involved |
948 | * RI resources (Verbs, 8.3.1) and thus do not prevent from SQ |
949 | * processing, if new work is already pending. But rv must be passed |
950 | * to caller. |
951 | */ |
952 | if (wqe->wr_status != SIW_WR_IDLE) { |
953 | spin_unlock_irqrestore(lock: &qp->sq_lock, flags); |
954 | goto skip_direct_sending; |
955 | } |
956 | rv = siw_activate_tx(qp); |
957 | spin_unlock_irqrestore(lock: &qp->sq_lock, flags); |
958 | |
959 | if (rv <= 0) |
960 | goto skip_direct_sending; |
961 | |
962 | if (rdma_is_kernel_res(res: &qp->base_qp.res)) { |
963 | rv = siw_sq_start(qp); |
964 | } else { |
965 | qp->tx_ctx.in_syscall = 1; |
966 | |
967 | if (siw_qp_sq_process(qp) != 0 && !(qp->tx_ctx.tx_suspend)) |
968 | siw_qp_cm_drop(qp, schedule: 0); |
969 | |
970 | qp->tx_ctx.in_syscall = 0; |
971 | } |
972 | skip_direct_sending: |
973 | |
974 | up_read(sem: &qp->state_lock); |
975 | |
976 | if (rv >= 0) |
977 | return 0; |
978 | /* |
979 | * Immediate error |
980 | */ |
981 | siw_dbg_qp(qp, "error %d\n" , rv); |
982 | |
983 | *bad_wr = wr; |
984 | return rv; |
985 | } |
986 | |
987 | /* |
988 | * siw_post_receive() |
989 | * |
990 | * Post a list of R-WR's to a RQ. |
991 | * |
992 | * @base_qp: Base QP contained in siw QP |
993 | * @wr: Null terminated list of user WR's |
994 | * @bad_wr: Points to failing WR in case of synchronous failure. |
995 | */ |
996 | int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, |
997 | const struct ib_recv_wr **bad_wr) |
998 | { |
999 | struct siw_qp *qp = to_siw_qp(base_qp); |
1000 | unsigned long flags; |
1001 | int rv = 0; |
1002 | |
1003 | if (qp->srq || qp->attrs.rq_size == 0) { |
1004 | *bad_wr = wr; |
1005 | return -EINVAL; |
1006 | } |
1007 | if (!rdma_is_kernel_res(res: &qp->base_qp.res)) { |
1008 | siw_dbg_qp(qp, "no kernel post_recv for user mapped rq\n" ); |
1009 | *bad_wr = wr; |
1010 | return -EINVAL; |
1011 | } |
1012 | |
1013 | /* |
1014 | * Try to acquire QP state lock. Must be non-blocking |
1015 | * to accommodate kernel clients needs. |
1016 | */ |
1017 | if (!down_read_trylock(sem: &qp->state_lock)) { |
1018 | if (qp->attrs.state == SIW_QP_STATE_ERROR) { |
1019 | /* |
1020 | * ERROR state is final, so we can be sure |
1021 | * this state will not change as long as the QP |
1022 | * exists. |
1023 | * |
1024 | * This handles an ib_drain_rq() call with |
1025 | * a concurrent request to set the QP state |
1026 | * to ERROR. |
1027 | */ |
1028 | rv = siw_rq_flush_wr(qp, wr, bad_wr); |
1029 | } else { |
1030 | siw_dbg_qp(qp, "QP locked, state %d\n" , |
1031 | qp->attrs.state); |
1032 | *bad_wr = wr; |
1033 | rv = -ENOTCONN; |
1034 | } |
1035 | return rv; |
1036 | } |
1037 | if (qp->attrs.state > SIW_QP_STATE_RTS) { |
1038 | if (qp->attrs.state == SIW_QP_STATE_ERROR) { |
1039 | /* |
1040 | * Immediately flush this WR to CQ, if QP |
1041 | * is in ERROR state. RQ is guaranteed to |
1042 | * be empty, so WR complets in-order. |
1043 | * |
1044 | * Typically triggered by ib_drain_rq(). |
1045 | */ |
1046 | rv = siw_rq_flush_wr(qp, wr, bad_wr); |
1047 | } else { |
1048 | siw_dbg_qp(qp, "QP out of state %d\n" , |
1049 | qp->attrs.state); |
1050 | *bad_wr = wr; |
1051 | rv = -ENOTCONN; |
1052 | } |
1053 | up_read(sem: &qp->state_lock); |
1054 | return rv; |
1055 | } |
1056 | /* |
1057 | * Serialize potentially multiple producers. |
1058 | * Not needed for single threaded consumer side. |
1059 | */ |
1060 | spin_lock_irqsave(&qp->rq_lock, flags); |
1061 | |
1062 | while (wr) { |
1063 | u32 idx = qp->rq_put % qp->attrs.rq_size; |
1064 | struct siw_rqe *rqe = &qp->recvq[idx]; |
1065 | |
1066 | if (rqe->flags) { |
1067 | siw_dbg_qp(qp, "RQ full\n" ); |
1068 | rv = -ENOMEM; |
1069 | break; |
1070 | } |
1071 | if (wr->num_sge > qp->attrs.rq_max_sges) { |
1072 | siw_dbg_qp(qp, "too many sge's: %d\n" , wr->num_sge); |
1073 | rv = -EINVAL; |
1074 | break; |
1075 | } |
1076 | rqe->id = wr->wr_id; |
1077 | rqe->num_sge = wr->num_sge; |
1078 | siw_copy_sgl(sge: wr->sg_list, siw_sge: rqe->sge, num_sge: wr->num_sge); |
1079 | |
1080 | /* make sure RQE is completely written before valid */ |
1081 | smp_wmb(); |
1082 | |
1083 | rqe->flags = SIW_WQE_VALID; |
1084 | |
1085 | qp->rq_put++; |
1086 | wr = wr->next; |
1087 | } |
1088 | spin_unlock_irqrestore(lock: &qp->rq_lock, flags); |
1089 | |
1090 | up_read(sem: &qp->state_lock); |
1091 | |
1092 | if (rv < 0) { |
1093 | siw_dbg_qp(qp, "error %d\n" , rv); |
1094 | *bad_wr = wr; |
1095 | } |
1096 | return rv > 0 ? 0 : rv; |
1097 | } |
1098 | |
1099 | int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata) |
1100 | { |
1101 | struct siw_cq *cq = to_siw_cq(base_cq); |
1102 | struct siw_device *sdev = to_siw_dev(base_dev: base_cq->device); |
1103 | struct siw_ucontext *ctx = |
1104 | rdma_udata_to_drv_context(udata, struct siw_ucontext, |
1105 | base_ucontext); |
1106 | |
1107 | siw_dbg_cq(cq, "free CQ resources\n" ); |
1108 | |
1109 | siw_cq_flush(cq); |
1110 | |
1111 | if (ctx) |
1112 | rdma_user_mmap_entry_remove(entry: cq->cq_entry); |
1113 | |
1114 | atomic_dec(v: &sdev->num_cq); |
1115 | |
1116 | vfree(addr: cq->queue); |
1117 | return 0; |
1118 | } |
1119 | |
1120 | /* |
1121 | * siw_create_cq() |
1122 | * |
1123 | * Populate CQ of requested size |
1124 | * |
1125 | * @base_cq: CQ as allocated by RDMA midlayer |
1126 | * @attr: Initial CQ attributes |
1127 | * @udata: relates to user context |
1128 | */ |
1129 | |
1130 | int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, |
1131 | struct ib_udata *udata) |
1132 | { |
1133 | struct siw_device *sdev = to_siw_dev(base_dev: base_cq->device); |
1134 | struct siw_cq *cq = to_siw_cq(base_cq); |
1135 | int rv, size = attr->cqe; |
1136 | |
1137 | if (attr->flags) |
1138 | return -EOPNOTSUPP; |
1139 | |
1140 | if (atomic_inc_return(v: &sdev->num_cq) > SIW_MAX_CQ) { |
1141 | siw_dbg(base_cq->device, "too many CQ's\n" ); |
1142 | rv = -ENOMEM; |
1143 | goto err_out; |
1144 | } |
1145 | if (size < 1 || size > sdev->attrs.max_cqe) { |
1146 | siw_dbg(base_cq->device, "CQ size error: %d\n" , size); |
1147 | rv = -EINVAL; |
1148 | goto err_out; |
1149 | } |
1150 | size = roundup_pow_of_two(size); |
1151 | cq->base_cq.cqe = size; |
1152 | cq->num_cqe = size; |
1153 | |
1154 | if (udata) |
1155 | cq->queue = vmalloc_user(size: size * sizeof(struct siw_cqe) + |
1156 | sizeof(struct siw_cq_ctrl)); |
1157 | else |
1158 | cq->queue = vzalloc(size: size * sizeof(struct siw_cqe) + |
1159 | sizeof(struct siw_cq_ctrl)); |
1160 | |
1161 | if (cq->queue == NULL) { |
1162 | rv = -ENOMEM; |
1163 | goto err_out; |
1164 | } |
1165 | get_random_bytes(buf: &cq->id, len: 4); |
1166 | siw_dbg(base_cq->device, "new CQ [%u]\n" , cq->id); |
1167 | |
1168 | spin_lock_init(&cq->lock); |
1169 | |
1170 | cq->notify = (struct siw_cq_ctrl *)&cq->queue[size]; |
1171 | |
1172 | if (udata) { |
1173 | struct siw_uresp_create_cq uresp = {}; |
1174 | struct siw_ucontext *ctx = |
1175 | rdma_udata_to_drv_context(udata, struct siw_ucontext, |
1176 | base_ucontext); |
1177 | size_t length = size * sizeof(struct siw_cqe) + |
1178 | sizeof(struct siw_cq_ctrl); |
1179 | |
1180 | cq->cq_entry = |
1181 | siw_mmap_entry_insert(uctx: ctx, address: cq->queue, |
1182 | length, offset: &uresp.cq_key); |
1183 | if (!cq->cq_entry) { |
1184 | rv = -ENOMEM; |
1185 | goto err_out; |
1186 | } |
1187 | |
1188 | uresp.cq_id = cq->id; |
1189 | uresp.num_cqe = size; |
1190 | |
1191 | if (udata->outlen < sizeof(uresp)) { |
1192 | rv = -EINVAL; |
1193 | goto err_out; |
1194 | } |
1195 | rv = ib_copy_to_udata(udata, src: &uresp, len: sizeof(uresp)); |
1196 | if (rv) |
1197 | goto err_out; |
1198 | } |
1199 | return 0; |
1200 | |
1201 | err_out: |
1202 | siw_dbg(base_cq->device, "CQ creation failed: %d" , rv); |
1203 | |
1204 | if (cq->queue) { |
1205 | struct siw_ucontext *ctx = |
1206 | rdma_udata_to_drv_context(udata, struct siw_ucontext, |
1207 | base_ucontext); |
1208 | if (ctx) |
1209 | rdma_user_mmap_entry_remove(entry: cq->cq_entry); |
1210 | vfree(addr: cq->queue); |
1211 | } |
1212 | atomic_dec(v: &sdev->num_cq); |
1213 | |
1214 | return rv; |
1215 | } |
1216 | |
1217 | /* |
1218 | * siw_poll_cq() |
1219 | * |
1220 | * Reap CQ entries if available and copy work completion status into |
1221 | * array of WC's provided by caller. Returns number of reaped CQE's. |
1222 | * |
1223 | * @base_cq: Base CQ contained in siw CQ. |
1224 | * @num_cqe: Maximum number of CQE's to reap. |
1225 | * @wc: Array of work completions to be filled by siw. |
1226 | */ |
1227 | int siw_poll_cq(struct ib_cq *base_cq, int num_cqe, struct ib_wc *wc) |
1228 | { |
1229 | struct siw_cq *cq = to_siw_cq(base_cq); |
1230 | int i; |
1231 | |
1232 | for (i = 0; i < num_cqe; i++) { |
1233 | if (!siw_reap_cqe(cq, wc)) |
1234 | break; |
1235 | wc++; |
1236 | } |
1237 | return i; |
1238 | } |
1239 | |
1240 | /* |
1241 | * siw_req_notify_cq() |
1242 | * |
1243 | * Request notification for new CQE's added to that CQ. |
1244 | * Defined flags: |
1245 | * o SIW_CQ_NOTIFY_SOLICITED lets siw trigger a notification |
1246 | * event if a WQE with notification flag set enters the CQ |
1247 | * o SIW_CQ_NOTIFY_NEXT_COMP lets siw trigger a notification |
1248 | * event if a WQE enters the CQ. |
1249 | * o IB_CQ_REPORT_MISSED_EVENTS: return value will provide the |
1250 | * number of not reaped CQE's regardless of its notification |
1251 | * type and current or new CQ notification settings. |
1252 | * |
1253 | * @base_cq: Base CQ contained in siw CQ. |
1254 | * @flags: Requested notification flags. |
1255 | */ |
1256 | int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags) |
1257 | { |
1258 | struct siw_cq *cq = to_siw_cq(base_cq); |
1259 | |
1260 | siw_dbg_cq(cq, "flags: 0x%02x\n" , flags); |
1261 | |
1262 | if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) |
1263 | /* |
1264 | * Enable CQ event for next solicited completion. |
1265 | * and make it visible to all associated producers. |
1266 | */ |
1267 | smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED); |
1268 | else |
1269 | /* |
1270 | * Enable CQ event for any signalled completion. |
1271 | * and make it visible to all associated producers. |
1272 | */ |
1273 | smp_store_mb(cq->notify->flags, SIW_NOTIFY_ALL); |
1274 | |
1275 | if (flags & IB_CQ_REPORT_MISSED_EVENTS) |
1276 | return cq->cq_put - cq->cq_get; |
1277 | |
1278 | return 0; |
1279 | } |
1280 | |
1281 | /* |
1282 | * siw_dereg_mr() |
1283 | * |
1284 | * Release Memory Region. |
1285 | * |
1286 | * @base_mr: Base MR contained in siw MR. |
1287 | * @udata: points to user context, unused. |
1288 | */ |
1289 | int siw_dereg_mr(struct ib_mr *base_mr, struct ib_udata *udata) |
1290 | { |
1291 | struct siw_mr *mr = to_siw_mr(base_mr); |
1292 | struct siw_device *sdev = to_siw_dev(base_dev: base_mr->device); |
1293 | |
1294 | siw_dbg_mem(mr->mem, "deregister MR\n" ); |
1295 | |
1296 | atomic_dec(v: &sdev->num_mr); |
1297 | |
1298 | siw_mr_drop_mem(mr); |
1299 | kfree_rcu(mr, rcu); |
1300 | |
1301 | return 0; |
1302 | } |
1303 | |
1304 | /* |
1305 | * siw_reg_user_mr() |
1306 | * |
1307 | * Register Memory Region. |
1308 | * |
1309 | * @pd: Protection Domain |
1310 | * @start: starting address of MR (virtual address) |
1311 | * @len: len of MR |
1312 | * @rnic_va: not used by siw |
1313 | * @rights: MR access rights |
1314 | * @udata: user buffer to communicate STag and Key. |
1315 | */ |
1316 | struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, |
1317 | u64 rnic_va, int rights, struct ib_udata *udata) |
1318 | { |
1319 | struct siw_mr *mr = NULL; |
1320 | struct siw_umem *umem = NULL; |
1321 | struct siw_ureq_reg_mr ureq; |
1322 | struct siw_device *sdev = to_siw_dev(base_dev: pd->device); |
1323 | int rv; |
1324 | |
1325 | siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n" , |
1326 | (void *)(uintptr_t)start, (void *)(uintptr_t)rnic_va, |
1327 | (unsigned long long)len); |
1328 | |
1329 | if (atomic_inc_return(v: &sdev->num_mr) > SIW_MAX_MR) { |
1330 | siw_dbg_pd(pd, "too many mr's\n" ); |
1331 | rv = -ENOMEM; |
1332 | goto err_out; |
1333 | } |
1334 | if (!len) { |
1335 | rv = -EINVAL; |
1336 | goto err_out; |
1337 | } |
1338 | umem = siw_umem_get(base_dave: pd->device, start, len, rights); |
1339 | if (IS_ERR(ptr: umem)) { |
1340 | rv = PTR_ERR(ptr: umem); |
1341 | siw_dbg_pd(pd, "getting user memory failed: %d\n" , rv); |
1342 | umem = NULL; |
1343 | goto err_out; |
1344 | } |
1345 | mr = kzalloc(size: sizeof(*mr), GFP_KERNEL); |
1346 | if (!mr) { |
1347 | rv = -ENOMEM; |
1348 | goto err_out; |
1349 | } |
1350 | rv = siw_mr_add_mem(mr, pd, mem_obj: umem, start, len, rights); |
1351 | if (rv) |
1352 | goto err_out; |
1353 | |
1354 | if (udata) { |
1355 | struct siw_uresp_reg_mr uresp = {}; |
1356 | struct siw_mem *mem = mr->mem; |
1357 | |
1358 | if (udata->inlen < sizeof(ureq)) { |
1359 | rv = -EINVAL; |
1360 | goto err_out; |
1361 | } |
1362 | rv = ib_copy_from_udata(dest: &ureq, udata, len: sizeof(ureq)); |
1363 | if (rv) |
1364 | goto err_out; |
1365 | |
1366 | mr->base_mr.lkey |= ureq.stag_key; |
1367 | mr->base_mr.rkey |= ureq.stag_key; |
1368 | mem->stag |= ureq.stag_key; |
1369 | uresp.stag = mem->stag; |
1370 | |
1371 | if (udata->outlen < sizeof(uresp)) { |
1372 | rv = -EINVAL; |
1373 | goto err_out; |
1374 | } |
1375 | rv = ib_copy_to_udata(udata, src: &uresp, len: sizeof(uresp)); |
1376 | if (rv) |
1377 | goto err_out; |
1378 | } |
1379 | mr->mem->stag_valid = 1; |
1380 | |
1381 | return &mr->base_mr; |
1382 | |
1383 | err_out: |
1384 | atomic_dec(v: &sdev->num_mr); |
1385 | if (mr) { |
1386 | if (mr->mem) |
1387 | siw_mr_drop_mem(mr); |
1388 | kfree_rcu(mr, rcu); |
1389 | } else { |
1390 | if (umem) |
1391 | siw_umem_release(umem); |
1392 | } |
1393 | return ERR_PTR(error: rv); |
1394 | } |
1395 | |
1396 | struct ib_mr *siw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, |
1397 | u32 max_sge) |
1398 | { |
1399 | struct siw_device *sdev = to_siw_dev(base_dev: pd->device); |
1400 | struct siw_mr *mr = NULL; |
1401 | struct siw_pbl *pbl = NULL; |
1402 | int rv; |
1403 | |
1404 | if (atomic_inc_return(v: &sdev->num_mr) > SIW_MAX_MR) { |
1405 | siw_dbg_pd(pd, "too many mr's\n" ); |
1406 | rv = -ENOMEM; |
1407 | goto err_out; |
1408 | } |
1409 | if (mr_type != IB_MR_TYPE_MEM_REG) { |
1410 | siw_dbg_pd(pd, "mr type %d unsupported\n" , mr_type); |
1411 | rv = -EOPNOTSUPP; |
1412 | goto err_out; |
1413 | } |
1414 | if (max_sge > SIW_MAX_SGE_PBL) { |
1415 | siw_dbg_pd(pd, "too many sge's: %d\n" , max_sge); |
1416 | rv = -ENOMEM; |
1417 | goto err_out; |
1418 | } |
1419 | pbl = siw_pbl_alloc(num_buf: max_sge); |
1420 | if (IS_ERR(ptr: pbl)) { |
1421 | rv = PTR_ERR(ptr: pbl); |
1422 | siw_dbg_pd(pd, "pbl allocation failed: %d\n" , rv); |
1423 | pbl = NULL; |
1424 | goto err_out; |
1425 | } |
1426 | mr = kzalloc(size: sizeof(*mr), GFP_KERNEL); |
1427 | if (!mr) { |
1428 | rv = -ENOMEM; |
1429 | goto err_out; |
1430 | } |
1431 | rv = siw_mr_add_mem(mr, pd, mem_obj: pbl, start: 0, len: max_sge * PAGE_SIZE, rights: 0); |
1432 | if (rv) |
1433 | goto err_out; |
1434 | |
1435 | mr->mem->is_pbl = 1; |
1436 | |
1437 | siw_dbg_pd(pd, "[MEM %u]: success\n" , mr->mem->stag); |
1438 | |
1439 | return &mr->base_mr; |
1440 | |
1441 | err_out: |
1442 | atomic_dec(v: &sdev->num_mr); |
1443 | |
1444 | if (!mr) { |
1445 | kfree(objp: pbl); |
1446 | } else { |
1447 | if (mr->mem) |
1448 | siw_mr_drop_mem(mr); |
1449 | kfree_rcu(mr, rcu); |
1450 | } |
1451 | siw_dbg_pd(pd, "failed: %d\n" , rv); |
1452 | |
1453 | return ERR_PTR(error: rv); |
1454 | } |
1455 | |
1456 | /* Just used to count number of pages being mapped */ |
1457 | static int siw_set_pbl_page(struct ib_mr *base_mr, u64 buf_addr) |
1458 | { |
1459 | return 0; |
1460 | } |
1461 | |
1462 | int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, |
1463 | unsigned int *sg_off) |
1464 | { |
1465 | struct scatterlist *slp; |
1466 | struct siw_mr *mr = to_siw_mr(base_mr); |
1467 | struct siw_mem *mem = mr->mem; |
1468 | struct siw_pbl *pbl = mem->pbl; |
1469 | struct siw_pble *pble; |
1470 | unsigned long pbl_size; |
1471 | int i, rv; |
1472 | |
1473 | if (!pbl) { |
1474 | siw_dbg_mem(mem, "no PBL allocated\n" ); |
1475 | return -EINVAL; |
1476 | } |
1477 | pble = pbl->pbe; |
1478 | |
1479 | if (pbl->max_buf < num_sle) { |
1480 | siw_dbg_mem(mem, "too many SGE's: %d > %d\n" , |
1481 | num_sle, pbl->max_buf); |
1482 | return -ENOMEM; |
1483 | } |
1484 | for_each_sg(sl, slp, num_sle, i) { |
1485 | if (sg_dma_len(slp) == 0) { |
1486 | siw_dbg_mem(mem, "empty SGE\n" ); |
1487 | return -EINVAL; |
1488 | } |
1489 | if (i == 0) { |
1490 | pble->addr = sg_dma_address(slp); |
1491 | pble->size = sg_dma_len(slp); |
1492 | pble->pbl_off = 0; |
1493 | pbl_size = pble->size; |
1494 | pbl->num_buf = 1; |
1495 | } else { |
1496 | /* Merge PBL entries if adjacent */ |
1497 | if (pble->addr + pble->size == sg_dma_address(slp)) { |
1498 | pble->size += sg_dma_len(slp); |
1499 | } else { |
1500 | pble++; |
1501 | pbl->num_buf++; |
1502 | pble->addr = sg_dma_address(slp); |
1503 | pble->size = sg_dma_len(slp); |
1504 | pble->pbl_off = pbl_size; |
1505 | } |
1506 | pbl_size += sg_dma_len(slp); |
1507 | } |
1508 | siw_dbg_mem(mem, |
1509 | "sge[%d], size %u, addr 0x%p, total %lu\n" , |
1510 | i, pble->size, ib_virt_dma_to_ptr(pble->addr), |
1511 | pbl_size); |
1512 | } |
1513 | rv = ib_sg_to_pages(mr: base_mr, sgl: sl, sg_nents: num_sle, sg_offset: sg_off, set_page: siw_set_pbl_page); |
1514 | if (rv > 0) { |
1515 | mem->len = base_mr->length; |
1516 | mem->va = base_mr->iova; |
1517 | siw_dbg_mem(mem, |
1518 | "%llu bytes, start 0x%pK, %u SLE to %u entries\n" , |
1519 | mem->len, (void *)(uintptr_t)mem->va, num_sle, |
1520 | pbl->num_buf); |
1521 | } |
1522 | return rv; |
1523 | } |
1524 | |
1525 | /* |
1526 | * siw_get_dma_mr() |
1527 | * |
1528 | * Create a (empty) DMA memory region, where no umem is attached. |
1529 | */ |
1530 | struct ib_mr *siw_get_dma_mr(struct ib_pd *pd, int rights) |
1531 | { |
1532 | struct siw_device *sdev = to_siw_dev(base_dev: pd->device); |
1533 | struct siw_mr *mr = NULL; |
1534 | int rv; |
1535 | |
1536 | if (atomic_inc_return(v: &sdev->num_mr) > SIW_MAX_MR) { |
1537 | siw_dbg_pd(pd, "too many mr's\n" ); |
1538 | rv = -ENOMEM; |
1539 | goto err_out; |
1540 | } |
1541 | mr = kzalloc(size: sizeof(*mr), GFP_KERNEL); |
1542 | if (!mr) { |
1543 | rv = -ENOMEM; |
1544 | goto err_out; |
1545 | } |
1546 | rv = siw_mr_add_mem(mr, pd, NULL, start: 0, ULONG_MAX, rights); |
1547 | if (rv) |
1548 | goto err_out; |
1549 | |
1550 | mr->mem->stag_valid = 1; |
1551 | |
1552 | siw_dbg_pd(pd, "[MEM %u]: success\n" , mr->mem->stag); |
1553 | |
1554 | return &mr->base_mr; |
1555 | |
1556 | err_out: |
1557 | if (rv) |
1558 | kfree(objp: mr); |
1559 | |
1560 | atomic_dec(v: &sdev->num_mr); |
1561 | |
1562 | return ERR_PTR(error: rv); |
1563 | } |
1564 | |
1565 | /* |
1566 | * siw_create_srq() |
1567 | * |
1568 | * Create Shared Receive Queue of attributes @init_attrs |
1569 | * within protection domain given by @pd. |
1570 | * |
1571 | * @base_srq: Base SRQ contained in siw SRQ. |
1572 | * @init_attrs: SRQ init attributes. |
1573 | * @udata: points to user context |
1574 | */ |
1575 | int siw_create_srq(struct ib_srq *base_srq, |
1576 | struct ib_srq_init_attr *init_attrs, struct ib_udata *udata) |
1577 | { |
1578 | struct siw_srq *srq = to_siw_srq(base_srq); |
1579 | struct ib_srq_attr *attrs = &init_attrs->attr; |
1580 | struct siw_device *sdev = to_siw_dev(base_dev: base_srq->device); |
1581 | struct siw_ucontext *ctx = |
1582 | rdma_udata_to_drv_context(udata, struct siw_ucontext, |
1583 | base_ucontext); |
1584 | int rv; |
1585 | |
1586 | if (init_attrs->srq_type != IB_SRQT_BASIC) |
1587 | return -EOPNOTSUPP; |
1588 | |
1589 | if (atomic_inc_return(v: &sdev->num_srq) > SIW_MAX_SRQ) { |
1590 | siw_dbg_pd(base_srq->pd, "too many SRQ's\n" ); |
1591 | rv = -ENOMEM; |
1592 | goto err_out; |
1593 | } |
1594 | if (attrs->max_wr == 0 || attrs->max_wr > SIW_MAX_SRQ_WR || |
1595 | attrs->max_sge > SIW_MAX_SGE || attrs->srq_limit > attrs->max_wr) { |
1596 | rv = -EINVAL; |
1597 | goto err_out; |
1598 | } |
1599 | srq->max_sge = attrs->max_sge; |
1600 | srq->num_rqe = roundup_pow_of_two(attrs->max_wr); |
1601 | srq->limit = attrs->srq_limit; |
1602 | if (srq->limit) |
1603 | srq->armed = true; |
1604 | |
1605 | srq->is_kernel_res = !udata; |
1606 | |
1607 | if (udata) |
1608 | srq->recvq = |
1609 | vmalloc_user(size: srq->num_rqe * sizeof(struct siw_rqe)); |
1610 | else |
1611 | srq->recvq = vcalloc(n: srq->num_rqe, size: sizeof(struct siw_rqe)); |
1612 | |
1613 | if (srq->recvq == NULL) { |
1614 | rv = -ENOMEM; |
1615 | goto err_out; |
1616 | } |
1617 | if (udata) { |
1618 | struct siw_uresp_create_srq uresp = {}; |
1619 | size_t length = srq->num_rqe * sizeof(struct siw_rqe); |
1620 | |
1621 | srq->srq_entry = |
1622 | siw_mmap_entry_insert(uctx: ctx, address: srq->recvq, |
1623 | length, offset: &uresp.srq_key); |
1624 | if (!srq->srq_entry) { |
1625 | rv = -ENOMEM; |
1626 | goto err_out; |
1627 | } |
1628 | |
1629 | uresp.num_rqe = srq->num_rqe; |
1630 | |
1631 | if (udata->outlen < sizeof(uresp)) { |
1632 | rv = -EINVAL; |
1633 | goto err_out; |
1634 | } |
1635 | rv = ib_copy_to_udata(udata, src: &uresp, len: sizeof(uresp)); |
1636 | if (rv) |
1637 | goto err_out; |
1638 | } |
1639 | spin_lock_init(&srq->lock); |
1640 | |
1641 | siw_dbg_pd(base_srq->pd, "[SRQ]: success\n" ); |
1642 | |
1643 | return 0; |
1644 | |
1645 | err_out: |
1646 | if (srq->recvq) { |
1647 | if (ctx) |
1648 | rdma_user_mmap_entry_remove(entry: srq->srq_entry); |
1649 | vfree(addr: srq->recvq); |
1650 | } |
1651 | atomic_dec(v: &sdev->num_srq); |
1652 | |
1653 | return rv; |
1654 | } |
1655 | |
1656 | /* |
1657 | * siw_modify_srq() |
1658 | * |
1659 | * Modify SRQ. The caller may resize SRQ and/or set/reset notification |
1660 | * limit and (re)arm IB_EVENT_SRQ_LIMIT_REACHED notification. |
1661 | * |
1662 | * NOTE: it is unclear if RDMA core allows for changing the MAX_SGE |
1663 | * parameter. siw_modify_srq() does not check the attrs->max_sge param. |
1664 | */ |
1665 | int siw_modify_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs, |
1666 | enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) |
1667 | { |
1668 | struct siw_srq *srq = to_siw_srq(base_srq); |
1669 | unsigned long flags; |
1670 | int rv = 0; |
1671 | |
1672 | spin_lock_irqsave(&srq->lock, flags); |
1673 | |
1674 | if (attr_mask & IB_SRQ_MAX_WR) { |
1675 | /* resize request not yet supported */ |
1676 | rv = -EOPNOTSUPP; |
1677 | goto out; |
1678 | } |
1679 | if (attr_mask & IB_SRQ_LIMIT) { |
1680 | if (attrs->srq_limit) { |
1681 | if (unlikely(attrs->srq_limit > srq->num_rqe)) { |
1682 | rv = -EINVAL; |
1683 | goto out; |
1684 | } |
1685 | srq->armed = true; |
1686 | } else { |
1687 | srq->armed = false; |
1688 | } |
1689 | srq->limit = attrs->srq_limit; |
1690 | } |
1691 | out: |
1692 | spin_unlock_irqrestore(lock: &srq->lock, flags); |
1693 | |
1694 | return rv; |
1695 | } |
1696 | |
1697 | /* |
1698 | * siw_query_srq() |
1699 | * |
1700 | * Query SRQ attributes. |
1701 | */ |
1702 | int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs) |
1703 | { |
1704 | struct siw_srq *srq = to_siw_srq(base_srq); |
1705 | unsigned long flags; |
1706 | |
1707 | spin_lock_irqsave(&srq->lock, flags); |
1708 | |
1709 | attrs->max_wr = srq->num_rqe; |
1710 | attrs->max_sge = srq->max_sge; |
1711 | attrs->srq_limit = srq->limit; |
1712 | |
1713 | spin_unlock_irqrestore(lock: &srq->lock, flags); |
1714 | |
1715 | return 0; |
1716 | } |
1717 | |
1718 | /* |
1719 | * siw_destroy_srq() |
1720 | * |
1721 | * Destroy SRQ. |
1722 | * It is assumed that the SRQ is not referenced by any |
1723 | * QP anymore - the code trusts the RDMA core environment to keep track |
1724 | * of QP references. |
1725 | */ |
1726 | int siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata) |
1727 | { |
1728 | struct siw_srq *srq = to_siw_srq(base_srq); |
1729 | struct siw_device *sdev = to_siw_dev(base_dev: base_srq->device); |
1730 | struct siw_ucontext *ctx = |
1731 | rdma_udata_to_drv_context(udata, struct siw_ucontext, |
1732 | base_ucontext); |
1733 | |
1734 | if (ctx) |
1735 | rdma_user_mmap_entry_remove(entry: srq->srq_entry); |
1736 | vfree(addr: srq->recvq); |
1737 | atomic_dec(v: &sdev->num_srq); |
1738 | return 0; |
1739 | } |
1740 | |
1741 | /* |
1742 | * siw_post_srq_recv() |
1743 | * |
1744 | * Post a list of receive queue elements to SRQ. |
1745 | * NOTE: The function does not check or lock a certain SRQ state |
1746 | * during the post operation. The code simply trusts the |
1747 | * RDMA core environment. |
1748 | * |
1749 | * @base_srq: Base SRQ contained in siw SRQ |
1750 | * @wr: List of R-WR's |
1751 | * @bad_wr: Updated to failing WR if posting fails. |
1752 | */ |
1753 | int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, |
1754 | const struct ib_recv_wr **bad_wr) |
1755 | { |
1756 | struct siw_srq *srq = to_siw_srq(base_srq); |
1757 | unsigned long flags; |
1758 | int rv = 0; |
1759 | |
1760 | if (unlikely(!srq->is_kernel_res)) { |
1761 | siw_dbg_pd(base_srq->pd, |
1762 | "[SRQ]: no kernel post_recv for mapped srq\n" ); |
1763 | rv = -EINVAL; |
1764 | goto out; |
1765 | } |
1766 | /* |
1767 | * Serialize potentially multiple producers. |
1768 | * Also needed to serialize potentially multiple |
1769 | * consumers. |
1770 | */ |
1771 | spin_lock_irqsave(&srq->lock, flags); |
1772 | |
1773 | while (wr) { |
1774 | u32 idx = srq->rq_put % srq->num_rqe; |
1775 | struct siw_rqe *rqe = &srq->recvq[idx]; |
1776 | |
1777 | if (rqe->flags) { |
1778 | siw_dbg_pd(base_srq->pd, "SRQ full\n" ); |
1779 | rv = -ENOMEM; |
1780 | break; |
1781 | } |
1782 | if (unlikely(wr->num_sge > srq->max_sge)) { |
1783 | siw_dbg_pd(base_srq->pd, |
1784 | "[SRQ]: too many sge's: %d\n" , wr->num_sge); |
1785 | rv = -EINVAL; |
1786 | break; |
1787 | } |
1788 | rqe->id = wr->wr_id; |
1789 | rqe->num_sge = wr->num_sge; |
1790 | siw_copy_sgl(sge: wr->sg_list, siw_sge: rqe->sge, num_sge: wr->num_sge); |
1791 | |
1792 | /* Make sure S-RQE is completely written before valid */ |
1793 | smp_wmb(); |
1794 | |
1795 | rqe->flags = SIW_WQE_VALID; |
1796 | |
1797 | srq->rq_put++; |
1798 | wr = wr->next; |
1799 | } |
1800 | spin_unlock_irqrestore(lock: &srq->lock, flags); |
1801 | out: |
1802 | if (unlikely(rv < 0)) { |
1803 | siw_dbg_pd(base_srq->pd, "[SRQ]: error %d\n" , rv); |
1804 | *bad_wr = wr; |
1805 | } |
1806 | return rv; |
1807 | } |
1808 | |
1809 | void siw_qp_event(struct siw_qp *qp, enum ib_event_type etype) |
1810 | { |
1811 | struct ib_event event; |
1812 | struct ib_qp *base_qp = &qp->base_qp; |
1813 | |
1814 | /* |
1815 | * Do not report asynchronous errors on QP which gets |
1816 | * destroyed via verbs interface (siw_destroy_qp()) |
1817 | */ |
1818 | if (qp->attrs.flags & SIW_QP_IN_DESTROY) |
1819 | return; |
1820 | |
1821 | event.event = etype; |
1822 | event.device = base_qp->device; |
1823 | event.element.qp = base_qp; |
1824 | |
1825 | if (base_qp->event_handler) { |
1826 | siw_dbg_qp(qp, "reporting event %d\n" , etype); |
1827 | base_qp->event_handler(&event, base_qp->qp_context); |
1828 | } |
1829 | } |
1830 | |
1831 | void siw_cq_event(struct siw_cq *cq, enum ib_event_type etype) |
1832 | { |
1833 | struct ib_event event; |
1834 | struct ib_cq *base_cq = &cq->base_cq; |
1835 | |
1836 | event.event = etype; |
1837 | event.device = base_cq->device; |
1838 | event.element.cq = base_cq; |
1839 | |
1840 | if (base_cq->event_handler) { |
1841 | siw_dbg_cq(cq, "reporting CQ event %d\n" , etype); |
1842 | base_cq->event_handler(&event, base_cq->cq_context); |
1843 | } |
1844 | } |
1845 | |
1846 | void siw_srq_event(struct siw_srq *srq, enum ib_event_type etype) |
1847 | { |
1848 | struct ib_event event; |
1849 | struct ib_srq *base_srq = &srq->base_srq; |
1850 | |
1851 | event.event = etype; |
1852 | event.device = base_srq->device; |
1853 | event.element.srq = base_srq; |
1854 | |
1855 | if (base_srq->event_handler) { |
1856 | siw_dbg_pd(srq->base_srq.pd, |
1857 | "reporting SRQ event %d\n" , etype); |
1858 | base_srq->event_handler(&event, base_srq->srq_context); |
1859 | } |
1860 | } |
1861 | |
1862 | void siw_port_event(struct siw_device *sdev, u32 port, enum ib_event_type etype) |
1863 | { |
1864 | struct ib_event event; |
1865 | |
1866 | event.event = etype; |
1867 | event.device = &sdev->base_dev; |
1868 | event.element.port_num = port; |
1869 | |
1870 | siw_dbg(&sdev->base_dev, "reporting port event %d\n" , etype); |
1871 | |
1872 | ib_dispatch_event(event: &event); |
1873 | } |
1874 | |