1 | // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB |
2 | /* |
3 | * Copyright (c) 2022 Hewlett Packard Enterprise, Inc. All rights reserved. |
4 | * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. |
5 | * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. |
6 | */ |
7 | |
8 | /* |
9 | * rxe_mcast.c implements driver support for multicast transport. |
10 | * It is based on two data structures struct rxe_mcg ('mcg') and |
11 | * struct rxe_mca ('mca'). An mcg is allocated each time a qp is |
12 | * attached to a new mgid for the first time. These are indexed by |
13 | * a red-black tree using the mgid. This data structure is searched |
14 | * for the mcg when a multicast packet is received and when another |
15 | * qp is attached to the same mgid. It is cleaned up when the last qp |
16 | * is detached from the mcg. Each time a qp is attached to an mcg an |
17 | * mca is created. It holds a pointer to the qp and is added to a list |
18 | * of qp's that are attached to the mcg. The qp_list is used to replicate |
19 | * mcast packets in the rxe receive path. |
20 | */ |
21 | |
22 | #include "rxe.h" |
23 | |
24 | /** |
25 | * rxe_mcast_add - add multicast address to rxe device |
26 | * @rxe: rxe device object |
27 | * @mgid: multicast address as a gid |
28 | * |
29 | * Returns 0 on success else an error |
30 | */ |
31 | static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) |
32 | { |
33 | unsigned char ll_addr[ETH_ALEN]; |
34 | |
35 | ipv6_eth_mc_map(addr: (struct in6_addr *)mgid->raw, buf: ll_addr); |
36 | |
37 | return dev_mc_add(dev: rxe->ndev, addr: ll_addr); |
38 | } |
39 | |
40 | /** |
41 | * rxe_mcast_del - delete multicast address from rxe device |
42 | * @rxe: rxe device object |
43 | * @mgid: multicast address as a gid |
44 | * |
45 | * Returns 0 on success else an error |
46 | */ |
47 | static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) |
48 | { |
49 | unsigned char ll_addr[ETH_ALEN]; |
50 | |
51 | ipv6_eth_mc_map(addr: (struct in6_addr *)mgid->raw, buf: ll_addr); |
52 | |
53 | return dev_mc_del(dev: rxe->ndev, addr: ll_addr); |
54 | } |
55 | |
56 | /** |
57 | * __rxe_insert_mcg - insert an mcg into red-black tree (rxe->mcg_tree) |
58 | * @mcg: mcg object with an embedded red-black tree node |
59 | * |
60 | * Context: caller must hold a reference to mcg and rxe->mcg_lock and |
61 | * is responsible to avoid adding the same mcg twice to the tree. |
62 | */ |
63 | static void __rxe_insert_mcg(struct rxe_mcg *mcg) |
64 | { |
65 | struct rb_root *tree = &mcg->rxe->mcg_tree; |
66 | struct rb_node **link = &tree->rb_node; |
67 | struct rb_node *node = NULL; |
68 | struct rxe_mcg *tmp; |
69 | int cmp; |
70 | |
71 | while (*link) { |
72 | node = *link; |
73 | tmp = rb_entry(node, struct rxe_mcg, node); |
74 | |
75 | cmp = memcmp(p: &tmp->mgid, q: &mcg->mgid, size: sizeof(mcg->mgid)); |
76 | if (cmp > 0) |
77 | link = &(*link)->rb_left; |
78 | else |
79 | link = &(*link)->rb_right; |
80 | } |
81 | |
82 | rb_link_node(node: &mcg->node, parent: node, rb_link: link); |
83 | rb_insert_color(&mcg->node, tree); |
84 | } |
85 | |
86 | /** |
87 | * __rxe_remove_mcg - remove an mcg from red-black tree holding lock |
88 | * @mcg: mcast group object with an embedded red-black tree node |
89 | * |
90 | * Context: caller must hold a reference to mcg and rxe->mcg_lock |
91 | */ |
92 | static void __rxe_remove_mcg(struct rxe_mcg *mcg) |
93 | { |
94 | rb_erase(&mcg->node, &mcg->rxe->mcg_tree); |
95 | } |
96 | |
97 | /** |
98 | * __rxe_lookup_mcg - lookup mcg in rxe->mcg_tree while holding lock |
99 | * @rxe: rxe device object |
100 | * @mgid: multicast IP address |
101 | * |
102 | * Context: caller must hold rxe->mcg_lock |
103 | * Returns: mcg on success and takes a ref to mcg else NULL |
104 | */ |
105 | static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, |
106 | union ib_gid *mgid) |
107 | { |
108 | struct rb_root *tree = &rxe->mcg_tree; |
109 | struct rxe_mcg *mcg; |
110 | struct rb_node *node; |
111 | int cmp; |
112 | |
113 | node = tree->rb_node; |
114 | |
115 | while (node) { |
116 | mcg = rb_entry(node, struct rxe_mcg, node); |
117 | |
118 | cmp = memcmp(p: &mcg->mgid, q: mgid, size: sizeof(*mgid)); |
119 | |
120 | if (cmp > 0) |
121 | node = node->rb_left; |
122 | else if (cmp < 0) |
123 | node = node->rb_right; |
124 | else |
125 | break; |
126 | } |
127 | |
128 | if (node) { |
129 | kref_get(kref: &mcg->ref_cnt); |
130 | return mcg; |
131 | } |
132 | |
133 | return NULL; |
134 | } |
135 | |
136 | /** |
137 | * rxe_lookup_mcg - lookup up mcg in red-back tree |
138 | * @rxe: rxe device object |
139 | * @mgid: multicast IP address |
140 | * |
141 | * Returns: mcg if found else NULL |
142 | */ |
143 | struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) |
144 | { |
145 | struct rxe_mcg *mcg; |
146 | |
147 | spin_lock_bh(lock: &rxe->mcg_lock); |
148 | mcg = __rxe_lookup_mcg(rxe, mgid); |
149 | spin_unlock_bh(lock: &rxe->mcg_lock); |
150 | |
151 | return mcg; |
152 | } |
153 | |
154 | /** |
155 | * __rxe_init_mcg - initialize a new mcg |
156 | * @rxe: rxe device |
157 | * @mgid: multicast address as a gid |
158 | * @mcg: new mcg object |
159 | * |
160 | * Context: caller should hold rxe->mcg lock |
161 | */ |
162 | static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, |
163 | struct rxe_mcg *mcg) |
164 | { |
165 | kref_init(kref: &mcg->ref_cnt); |
166 | memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); |
167 | INIT_LIST_HEAD(list: &mcg->qp_list); |
168 | mcg->rxe = rxe; |
169 | |
170 | /* caller holds a ref on mcg but that will be |
171 | * dropped when mcg goes out of scope. We need to take a ref |
172 | * on the pointer that will be saved in the red-black tree |
173 | * by __rxe_insert_mcg and used to lookup mcg from mgid later. |
174 | * Inserting mcg makes it visible to outside so this should |
175 | * be done last after the object is ready. |
176 | */ |
177 | kref_get(kref: &mcg->ref_cnt); |
178 | __rxe_insert_mcg(mcg); |
179 | } |
180 | |
181 | /** |
182 | * rxe_get_mcg - lookup or allocate a mcg |
183 | * @rxe: rxe device object |
184 | * @mgid: multicast IP address as a gid |
185 | * |
186 | * Returns: mcg on success else ERR_PTR(error) |
187 | */ |
188 | static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) |
189 | { |
190 | struct rxe_mcg *mcg, *tmp; |
191 | int err; |
192 | |
193 | if (rxe->attr.max_mcast_grp == 0) |
194 | return ERR_PTR(error: -EINVAL); |
195 | |
196 | /* check to see if mcg already exists */ |
197 | mcg = rxe_lookup_mcg(rxe, mgid); |
198 | if (mcg) |
199 | return mcg; |
200 | |
201 | /* check to see if we have reached limit */ |
202 | if (atomic_inc_return(v: &rxe->mcg_num) > rxe->attr.max_mcast_grp) { |
203 | err = -ENOMEM; |
204 | goto err_dec; |
205 | } |
206 | |
207 | /* speculative alloc of new mcg */ |
208 | mcg = kzalloc(size: sizeof(*mcg), GFP_KERNEL); |
209 | if (!mcg) { |
210 | err = -ENOMEM; |
211 | goto err_dec; |
212 | } |
213 | |
214 | spin_lock_bh(lock: &rxe->mcg_lock); |
215 | /* re-check to see if someone else just added it */ |
216 | tmp = __rxe_lookup_mcg(rxe, mgid); |
217 | if (tmp) { |
218 | spin_unlock_bh(lock: &rxe->mcg_lock); |
219 | atomic_dec(v: &rxe->mcg_num); |
220 | kfree(objp: mcg); |
221 | return tmp; |
222 | } |
223 | |
224 | __rxe_init_mcg(rxe, mgid, mcg); |
225 | spin_unlock_bh(lock: &rxe->mcg_lock); |
226 | |
227 | /* add mcast address outside of lock */ |
228 | err = rxe_mcast_add(rxe, mgid); |
229 | if (!err) |
230 | return mcg; |
231 | |
232 | kfree(objp: mcg); |
233 | err_dec: |
234 | atomic_dec(v: &rxe->mcg_num); |
235 | return ERR_PTR(error: err); |
236 | } |
237 | |
238 | /** |
239 | * rxe_cleanup_mcg - cleanup mcg for kref_put |
240 | * @kref: struct kref embnedded in mcg |
241 | */ |
242 | void rxe_cleanup_mcg(struct kref *kref) |
243 | { |
244 | struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt); |
245 | |
246 | kfree(objp: mcg); |
247 | } |
248 | |
249 | /** |
250 | * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_lock |
251 | * @mcg: the mcg object |
252 | * |
253 | * Context: caller is holding rxe->mcg_lock |
254 | * no qp's are attached to mcg |
255 | */ |
256 | static void __rxe_destroy_mcg(struct rxe_mcg *mcg) |
257 | { |
258 | struct rxe_dev *rxe = mcg->rxe; |
259 | |
260 | /* remove mcg from red-black tree then drop ref */ |
261 | __rxe_remove_mcg(mcg); |
262 | kref_put(kref: &mcg->ref_cnt, release: rxe_cleanup_mcg); |
263 | |
264 | atomic_dec(v: &rxe->mcg_num); |
265 | } |
266 | |
267 | /** |
268 | * rxe_destroy_mcg - destroy mcg object |
269 | * @mcg: the mcg object |
270 | * |
271 | * Context: no qp's are attached to mcg |
272 | */ |
273 | static void rxe_destroy_mcg(struct rxe_mcg *mcg) |
274 | { |
275 | /* delete mcast address outside of lock */ |
276 | rxe_mcast_del(rxe: mcg->rxe, mgid: &mcg->mgid); |
277 | |
278 | spin_lock_bh(lock: &mcg->rxe->mcg_lock); |
279 | __rxe_destroy_mcg(mcg); |
280 | spin_unlock_bh(lock: &mcg->rxe->mcg_lock); |
281 | } |
282 | |
283 | /** |
284 | * __rxe_init_mca - initialize a new mca holding lock |
285 | * @qp: qp object |
286 | * @mcg: mcg object |
287 | * @mca: empty space for new mca |
288 | * |
289 | * Context: caller must hold references on qp and mcg, rxe->mcg_lock |
290 | * and pass memory for new mca |
291 | * |
292 | * Returns: 0 on success else an error |
293 | */ |
294 | static int __rxe_init_mca(struct rxe_qp *qp, struct rxe_mcg *mcg, |
295 | struct rxe_mca *mca) |
296 | { |
297 | struct rxe_dev *rxe = to_rdev(dev: qp->ibqp.device); |
298 | int n; |
299 | |
300 | n = atomic_inc_return(v: &rxe->mcg_attach); |
301 | if (n > rxe->attr.max_total_mcast_qp_attach) { |
302 | atomic_dec(v: &rxe->mcg_attach); |
303 | return -ENOMEM; |
304 | } |
305 | |
306 | n = atomic_inc_return(v: &mcg->qp_num); |
307 | if (n > rxe->attr.max_mcast_qp_attach) { |
308 | atomic_dec(v: &mcg->qp_num); |
309 | atomic_dec(v: &rxe->mcg_attach); |
310 | return -ENOMEM; |
311 | } |
312 | |
313 | atomic_inc(v: &qp->mcg_num); |
314 | |
315 | rxe_get(qp); |
316 | mca->qp = qp; |
317 | |
318 | list_add_tail(new: &mca->qp_list, head: &mcg->qp_list); |
319 | |
320 | return 0; |
321 | } |
322 | |
323 | /** |
324 | * rxe_attach_mcg - attach qp to mcg if not already attached |
325 | * @qp: qp object |
326 | * @mcg: mcg object |
327 | * |
328 | * Context: caller must hold reference on qp and mcg. |
329 | * Returns: 0 on success else an error |
330 | */ |
331 | static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) |
332 | { |
333 | struct rxe_dev *rxe = mcg->rxe; |
334 | struct rxe_mca *mca, *tmp; |
335 | int err; |
336 | |
337 | /* check to see if the qp is already a member of the group */ |
338 | spin_lock_bh(lock: &rxe->mcg_lock); |
339 | list_for_each_entry(mca, &mcg->qp_list, qp_list) { |
340 | if (mca->qp == qp) { |
341 | spin_unlock_bh(lock: &rxe->mcg_lock); |
342 | return 0; |
343 | } |
344 | } |
345 | spin_unlock_bh(lock: &rxe->mcg_lock); |
346 | |
347 | /* speculative alloc new mca without using GFP_ATOMIC */ |
348 | mca = kzalloc(size: sizeof(*mca), GFP_KERNEL); |
349 | if (!mca) |
350 | return -ENOMEM; |
351 | |
352 | spin_lock_bh(lock: &rxe->mcg_lock); |
353 | /* re-check to see if someone else just attached qp */ |
354 | list_for_each_entry(tmp, &mcg->qp_list, qp_list) { |
355 | if (tmp->qp == qp) { |
356 | kfree(objp: mca); |
357 | err = 0; |
358 | goto out; |
359 | } |
360 | } |
361 | |
362 | err = __rxe_init_mca(qp, mcg, mca); |
363 | if (err) |
364 | kfree(objp: mca); |
365 | out: |
366 | spin_unlock_bh(lock: &rxe->mcg_lock); |
367 | return err; |
368 | } |
369 | |
370 | /** |
371 | * __rxe_cleanup_mca - cleanup mca object holding lock |
372 | * @mca: mca object |
373 | * @mcg: mcg object |
374 | * |
375 | * Context: caller must hold a reference to mcg and rxe->mcg_lock |
376 | */ |
377 | static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg) |
378 | { |
379 | list_del(entry: &mca->qp_list); |
380 | |
381 | atomic_dec(v: &mcg->qp_num); |
382 | atomic_dec(v: &mcg->rxe->mcg_attach); |
383 | atomic_dec(v: &mca->qp->mcg_num); |
384 | rxe_put(mca->qp); |
385 | |
386 | kfree(objp: mca); |
387 | } |
388 | |
389 | /** |
390 | * rxe_detach_mcg - detach qp from mcg |
391 | * @mcg: mcg object |
392 | * @qp: qp object |
393 | * |
394 | * Returns: 0 on success else an error if qp is not attached. |
395 | */ |
396 | static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) |
397 | { |
398 | struct rxe_dev *rxe = mcg->rxe; |
399 | struct rxe_mca *mca, *tmp; |
400 | |
401 | spin_lock_bh(lock: &rxe->mcg_lock); |
402 | list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { |
403 | if (mca->qp == qp) { |
404 | __rxe_cleanup_mca(mca, mcg); |
405 | |
406 | /* if the number of qp's attached to the |
407 | * mcast group falls to zero go ahead and |
408 | * tear it down. This will not free the |
409 | * object since we are still holding a ref |
410 | * from the caller |
411 | */ |
412 | if (atomic_read(v: &mcg->qp_num) <= 0) |
413 | __rxe_destroy_mcg(mcg); |
414 | |
415 | spin_unlock_bh(lock: &rxe->mcg_lock); |
416 | return 0; |
417 | } |
418 | } |
419 | |
420 | /* we didn't find the qp on the list */ |
421 | spin_unlock_bh(lock: &rxe->mcg_lock); |
422 | return -EINVAL; |
423 | } |
424 | |
425 | /** |
426 | * rxe_attach_mcast - attach qp to multicast group (see IBA-11.3.1) |
427 | * @ibqp: (IB) qp object |
428 | * @mgid: multicast IP address |
429 | * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) |
430 | * |
431 | * Returns: 0 on success else an errno |
432 | */ |
433 | int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) |
434 | { |
435 | int err; |
436 | struct rxe_dev *rxe = to_rdev(dev: ibqp->device); |
437 | struct rxe_qp *qp = to_rqp(qp: ibqp); |
438 | struct rxe_mcg *mcg; |
439 | |
440 | /* takes a ref on mcg if successful */ |
441 | mcg = rxe_get_mcg(rxe, mgid); |
442 | if (IS_ERR(ptr: mcg)) |
443 | return PTR_ERR(ptr: mcg); |
444 | |
445 | err = rxe_attach_mcg(mcg, qp); |
446 | |
447 | /* if we failed to attach the first qp to mcg tear it down */ |
448 | if (atomic_read(v: &mcg->qp_num) == 0) |
449 | rxe_destroy_mcg(mcg); |
450 | |
451 | kref_put(kref: &mcg->ref_cnt, release: rxe_cleanup_mcg); |
452 | |
453 | return err; |
454 | } |
455 | |
456 | /** |
457 | * rxe_detach_mcast - detach qp from multicast group (see IBA-11.3.2) |
458 | * @ibqp: address of (IB) qp object |
459 | * @mgid: multicast IP address |
460 | * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) |
461 | * |
462 | * Returns: 0 on success else an errno |
463 | */ |
464 | int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) |
465 | { |
466 | struct rxe_dev *rxe = to_rdev(dev: ibqp->device); |
467 | struct rxe_qp *qp = to_rqp(qp: ibqp); |
468 | struct rxe_mcg *mcg; |
469 | int err; |
470 | |
471 | mcg = rxe_lookup_mcg(rxe, mgid); |
472 | if (!mcg) |
473 | return -EINVAL; |
474 | |
475 | err = rxe_detach_mcg(mcg, qp); |
476 | kref_put(kref: &mcg->ref_cnt, release: rxe_cleanup_mcg); |
477 | |
478 | return err; |
479 | } |
480 | |