1 | /* |
2 | * Copyright (c) 2015, Mellanox Technologies inc. All rights reserved. |
3 | * |
4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU |
6 | * General Public License (GPL) Version 2, available from the file |
7 | * COPYING in the main directory of this source tree, or the |
8 | * OpenIB.org BSD license below: |
9 | * |
10 | * Redistribution and use in source and binary forms, with or |
11 | * without modification, are permitted provided that the following |
12 | * conditions are met: |
13 | * |
14 | * - Redistributions of source code must retain the above |
15 | * copyright notice, this list of conditions and the following |
16 | * disclaimer. |
17 | * |
18 | * - Redistributions in binary form must reproduce the above |
19 | * copyright notice, this list of conditions and the following |
20 | * disclaimer in the documentation and/or other materials |
21 | * provided with the distribution. |
22 | * |
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
30 | * SOFTWARE. |
31 | */ |
32 | |
33 | #include "core_priv.h" |
34 | |
35 | #include <linux/in.h> |
36 | #include <linux/in6.h> |
37 | |
38 | /* For in6_dev_get/in6_dev_put */ |
39 | #include <net/addrconf.h> |
40 | #include <net/bonding.h> |
41 | |
42 | #include <rdma/ib_cache.h> |
43 | #include <rdma/ib_addr.h> |
44 | |
45 | static struct workqueue_struct *gid_cache_wq; |
46 | |
47 | enum gid_op_type { |
48 | GID_DEL = 0, |
49 | GID_ADD |
50 | }; |
51 | |
52 | struct update_gid_event_work { |
53 | struct work_struct work; |
54 | union ib_gid gid; |
55 | struct ib_gid_attr gid_attr; |
56 | enum gid_op_type gid_op; |
57 | }; |
58 | |
59 | #define ROCE_NETDEV_CALLBACK_SZ 3 |
60 | struct netdev_event_work_cmd { |
61 | roce_netdev_callback cb; |
62 | roce_netdev_filter filter; |
63 | struct net_device *ndev; |
64 | struct net_device *filter_ndev; |
65 | }; |
66 | |
67 | struct netdev_event_work { |
68 | struct work_struct work; |
69 | struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ]; |
70 | }; |
71 | |
72 | static const struct { |
73 | bool (*is_supported)(const struct ib_device *device, u32 port_num); |
74 | enum ib_gid_type gid_type; |
75 | } PORT_CAP_TO_GID_TYPE[] = { |
76 | {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE}, |
77 | {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP}, |
78 | }; |
79 | |
80 | #define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE) |
81 | |
82 | unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u32 port) |
83 | { |
84 | int i; |
85 | unsigned int ret_flags = 0; |
86 | |
87 | if (!rdma_protocol_roce(device: ib_dev, port_num: port)) |
88 | return 1UL << IB_GID_TYPE_IB; |
89 | |
90 | for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++) |
91 | if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port)) |
92 | ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type; |
93 | |
94 | return ret_flags; |
95 | } |
96 | EXPORT_SYMBOL(roce_gid_type_mask_support); |
97 | |
98 | static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev, |
99 | u32 port, union ib_gid *gid, |
100 | struct ib_gid_attr *gid_attr) |
101 | { |
102 | int i; |
103 | unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port); |
104 | |
105 | for (i = 0; i < IB_GID_TYPE_SIZE; i++) { |
106 | if ((1UL << i) & gid_type_mask) { |
107 | gid_attr->gid_type = i; |
108 | switch (gid_op) { |
109 | case GID_ADD: |
110 | ib_cache_gid_add(ib_dev, port, |
111 | gid, attr: gid_attr); |
112 | break; |
113 | case GID_DEL: |
114 | ib_cache_gid_del(ib_dev, port, |
115 | gid, attr: gid_attr); |
116 | break; |
117 | } |
118 | } |
119 | } |
120 | } |
121 | |
122 | enum bonding_slave_state { |
123 | BONDING_SLAVE_STATE_ACTIVE = 1UL << 0, |
124 | BONDING_SLAVE_STATE_INACTIVE = 1UL << 1, |
125 | /* No primary slave or the device isn't a slave in bonding */ |
126 | BONDING_SLAVE_STATE_NA = 1UL << 2, |
127 | }; |
128 | |
129 | static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_device *dev, |
130 | struct net_device *upper) |
131 | { |
132 | if (upper && netif_is_bond_master(dev: upper)) { |
133 | struct net_device *pdev = |
134 | bond_option_active_slave_get_rcu(bond: netdev_priv(dev: upper)); |
135 | |
136 | if (pdev) |
137 | return dev == pdev ? BONDING_SLAVE_STATE_ACTIVE : |
138 | BONDING_SLAVE_STATE_INACTIVE; |
139 | } |
140 | |
141 | return BONDING_SLAVE_STATE_NA; |
142 | } |
143 | |
144 | #define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \ |
145 | BONDING_SLAVE_STATE_NA) |
146 | static bool |
147 | is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u32 port, |
148 | struct net_device *rdma_ndev, void *cookie) |
149 | { |
150 | struct net_device *real_dev; |
151 | bool res; |
152 | |
153 | if (!rdma_ndev) |
154 | return false; |
155 | |
156 | rcu_read_lock(); |
157 | real_dev = rdma_vlan_dev_real_dev(dev: cookie); |
158 | if (!real_dev) |
159 | real_dev = cookie; |
160 | |
161 | res = ((rdma_is_upper_dev_rcu(dev: rdma_ndev, upper: cookie) && |
162 | (is_eth_active_slave_of_bonding_rcu(dev: rdma_ndev, upper: real_dev) & |
163 | REQUIRED_BOND_STATES)) || |
164 | real_dev == rdma_ndev); |
165 | |
166 | rcu_read_unlock(); |
167 | return res; |
168 | } |
169 | |
170 | static bool |
171 | is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u32 port, |
172 | struct net_device *rdma_ndev, void *cookie) |
173 | { |
174 | struct net_device *master_dev; |
175 | bool res; |
176 | |
177 | if (!rdma_ndev) |
178 | return false; |
179 | |
180 | rcu_read_lock(); |
181 | master_dev = netdev_master_upper_dev_get_rcu(dev: rdma_ndev); |
182 | res = is_eth_active_slave_of_bonding_rcu(dev: rdma_ndev, upper: master_dev) == |
183 | BONDING_SLAVE_STATE_INACTIVE; |
184 | rcu_read_unlock(); |
185 | |
186 | return res; |
187 | } |
188 | |
189 | /** |
190 | * is_ndev_for_default_gid_filter - Check if a given netdevice |
191 | * can be considered for default GIDs or not. |
192 | * @ib_dev: IB device to check |
193 | * @port: Port to consider for adding default GID |
194 | * @rdma_ndev: rdma netdevice pointer |
195 | * @cookie: Netdevice to consider to form a default GID |
196 | * |
197 | * is_ndev_for_default_gid_filter() returns true if a given netdevice can be |
198 | * considered for deriving default RoCE GID, returns false otherwise. |
199 | */ |
200 | static bool |
201 | is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u32 port, |
202 | struct net_device *rdma_ndev, void *cookie) |
203 | { |
204 | struct net_device *cookie_ndev = cookie; |
205 | bool res; |
206 | |
207 | if (!rdma_ndev) |
208 | return false; |
209 | |
210 | rcu_read_lock(); |
211 | |
212 | /* |
213 | * When rdma netdevice is used in bonding, bonding master netdevice |
214 | * should be considered for default GIDs. Therefore, ignore slave rdma |
215 | * netdevices when bonding is considered. |
216 | * Additionally when event(cookie) netdevice is bond master device, |
217 | * make sure that it the upper netdevice of rdma netdevice. |
218 | */ |
219 | res = ((cookie_ndev == rdma_ndev && !netif_is_bond_slave(dev: rdma_ndev)) || |
220 | (netif_is_bond_master(dev: cookie_ndev) && |
221 | rdma_is_upper_dev_rcu(dev: rdma_ndev, upper: cookie_ndev))); |
222 | |
223 | rcu_read_unlock(); |
224 | return res; |
225 | } |
226 | |
227 | static bool pass_all_filter(struct ib_device *ib_dev, u32 port, |
228 | struct net_device *rdma_ndev, void *cookie) |
229 | { |
230 | return true; |
231 | } |
232 | |
233 | static bool upper_device_filter(struct ib_device *ib_dev, u32 port, |
234 | struct net_device *rdma_ndev, void *cookie) |
235 | { |
236 | bool res; |
237 | |
238 | if (!rdma_ndev) |
239 | return false; |
240 | |
241 | if (rdma_ndev == cookie) |
242 | return true; |
243 | |
244 | rcu_read_lock(); |
245 | res = rdma_is_upper_dev_rcu(dev: rdma_ndev, upper: cookie); |
246 | rcu_read_unlock(); |
247 | |
248 | return res; |
249 | } |
250 | |
251 | /** |
252 | * is_upper_ndev_bond_master_filter - Check if a given netdevice |
253 | * is bond master device of netdevice of the RDMA device of port. |
254 | * @ib_dev: IB device to check |
255 | * @port: Port to consider for adding default GID |
256 | * @rdma_ndev: Pointer to rdma netdevice |
257 | * @cookie: Netdevice to consider to form a default GID |
258 | * |
259 | * is_upper_ndev_bond_master_filter() returns true if a cookie_netdev |
260 | * is bond master device and rdma_ndev is its lower netdevice. It might |
261 | * not have been established as slave device yet. |
262 | */ |
263 | static bool |
264 | is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u32 port, |
265 | struct net_device *rdma_ndev, |
266 | void *cookie) |
267 | { |
268 | struct net_device *cookie_ndev = cookie; |
269 | bool match = false; |
270 | |
271 | if (!rdma_ndev) |
272 | return false; |
273 | |
274 | rcu_read_lock(); |
275 | if (netif_is_bond_master(dev: cookie_ndev) && |
276 | rdma_is_upper_dev_rcu(dev: rdma_ndev, upper: cookie_ndev)) |
277 | match = true; |
278 | rcu_read_unlock(); |
279 | return match; |
280 | } |
281 | |
282 | static void update_gid_ip(enum gid_op_type gid_op, |
283 | struct ib_device *ib_dev, |
284 | u32 port, struct net_device *ndev, |
285 | struct sockaddr *addr) |
286 | { |
287 | union ib_gid gid; |
288 | struct ib_gid_attr gid_attr; |
289 | |
290 | rdma_ip2gid(addr, gid: &gid); |
291 | memset(&gid_attr, 0, sizeof(gid_attr)); |
292 | gid_attr.ndev = ndev; |
293 | |
294 | update_gid(gid_op, ib_dev, port, gid: &gid, gid_attr: &gid_attr); |
295 | } |
296 | |
297 | static void bond_delete_netdev_default_gids(struct ib_device *ib_dev, |
298 | u32 port, |
299 | struct net_device *rdma_ndev, |
300 | struct net_device *event_ndev) |
301 | { |
302 | struct net_device *real_dev = rdma_vlan_dev_real_dev(dev: event_ndev); |
303 | unsigned long gid_type_mask; |
304 | |
305 | if (!rdma_ndev) |
306 | return; |
307 | |
308 | if (!real_dev) |
309 | real_dev = event_ndev; |
310 | |
311 | rcu_read_lock(); |
312 | |
313 | if (((rdma_ndev != event_ndev && |
314 | !rdma_is_upper_dev_rcu(dev: rdma_ndev, upper: event_ndev)) || |
315 | is_eth_active_slave_of_bonding_rcu(dev: rdma_ndev, upper: real_dev) |
316 | == |
317 | BONDING_SLAVE_STATE_INACTIVE)) { |
318 | rcu_read_unlock(); |
319 | return; |
320 | } |
321 | |
322 | rcu_read_unlock(); |
323 | |
324 | gid_type_mask = roce_gid_type_mask_support(ib_dev, port); |
325 | |
326 | ib_cache_gid_set_default_gid(ib_dev, port, ndev: rdma_ndev, |
327 | gid_type_mask, |
328 | mode: IB_CACHE_GID_DEFAULT_MODE_DELETE); |
329 | } |
330 | |
331 | static void enum_netdev_ipv4_ips(struct ib_device *ib_dev, |
332 | u32 port, struct net_device *ndev) |
333 | { |
334 | const struct in_ifaddr *ifa; |
335 | struct in_device *in_dev; |
336 | struct sin_list { |
337 | struct list_head list; |
338 | struct sockaddr_in ip; |
339 | }; |
340 | struct sin_list *sin_iter; |
341 | struct sin_list *sin_temp; |
342 | |
343 | LIST_HEAD(sin_list); |
344 | if (ndev->reg_state >= NETREG_UNREGISTERING) |
345 | return; |
346 | |
347 | rcu_read_lock(); |
348 | in_dev = __in_dev_get_rcu(dev: ndev); |
349 | if (!in_dev) { |
350 | rcu_read_unlock(); |
351 | return; |
352 | } |
353 | |
354 | in_dev_for_each_ifa_rcu(ifa, in_dev) { |
355 | struct sin_list *entry = kzalloc(size: sizeof(*entry), GFP_ATOMIC); |
356 | |
357 | if (!entry) |
358 | continue; |
359 | |
360 | entry->ip.sin_family = AF_INET; |
361 | entry->ip.sin_addr.s_addr = ifa->ifa_address; |
362 | list_add_tail(new: &entry->list, head: &sin_list); |
363 | } |
364 | |
365 | rcu_read_unlock(); |
366 | |
367 | list_for_each_entry_safe(sin_iter, sin_temp, &sin_list, list) { |
368 | update_gid_ip(gid_op: GID_ADD, ib_dev, port, ndev, |
369 | addr: (struct sockaddr *)&sin_iter->ip); |
370 | list_del(entry: &sin_iter->list); |
371 | kfree(objp: sin_iter); |
372 | } |
373 | } |
374 | |
375 | static void enum_netdev_ipv6_ips(struct ib_device *ib_dev, |
376 | u32 port, struct net_device *ndev) |
377 | { |
378 | struct inet6_ifaddr *ifp; |
379 | struct inet6_dev *in6_dev; |
380 | struct sin6_list { |
381 | struct list_head list; |
382 | struct sockaddr_in6 sin6; |
383 | }; |
384 | struct sin6_list *sin6_iter; |
385 | struct sin6_list *sin6_temp; |
386 | struct ib_gid_attr gid_attr = {.ndev = ndev}; |
387 | LIST_HEAD(sin6_list); |
388 | |
389 | if (ndev->reg_state >= NETREG_UNREGISTERING) |
390 | return; |
391 | |
392 | in6_dev = in6_dev_get(dev: ndev); |
393 | if (!in6_dev) |
394 | return; |
395 | |
396 | read_lock_bh(&in6_dev->lock); |
397 | list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { |
398 | struct sin6_list *entry = kzalloc(size: sizeof(*entry), GFP_ATOMIC); |
399 | |
400 | if (!entry) |
401 | continue; |
402 | |
403 | entry->sin6.sin6_family = AF_INET6; |
404 | entry->sin6.sin6_addr = ifp->addr; |
405 | list_add_tail(new: &entry->list, head: &sin6_list); |
406 | } |
407 | read_unlock_bh(&in6_dev->lock); |
408 | |
409 | in6_dev_put(idev: in6_dev); |
410 | |
411 | list_for_each_entry_safe(sin6_iter, sin6_temp, &sin6_list, list) { |
412 | union ib_gid gid; |
413 | |
414 | rdma_ip2gid(addr: (struct sockaddr *)&sin6_iter->sin6, gid: &gid); |
415 | update_gid(gid_op: GID_ADD, ib_dev, port, gid: &gid, gid_attr: &gid_attr); |
416 | list_del(entry: &sin6_iter->list); |
417 | kfree(objp: sin6_iter); |
418 | } |
419 | } |
420 | |
421 | static void _add_netdev_ips(struct ib_device *ib_dev, u32 port, |
422 | struct net_device *ndev) |
423 | { |
424 | enum_netdev_ipv4_ips(ib_dev, port, ndev); |
425 | if (IS_ENABLED(CONFIG_IPV6)) |
426 | enum_netdev_ipv6_ips(ib_dev, port, ndev); |
427 | } |
428 | |
429 | static void add_netdev_ips(struct ib_device *ib_dev, u32 port, |
430 | struct net_device *rdma_ndev, void *cookie) |
431 | { |
432 | _add_netdev_ips(ib_dev, port, ndev: cookie); |
433 | } |
434 | |
435 | static void del_netdev_ips(struct ib_device *ib_dev, u32 port, |
436 | struct net_device *rdma_ndev, void *cookie) |
437 | { |
438 | ib_cache_gid_del_all_netdev_gids(ib_dev, port, ndev: cookie); |
439 | } |
440 | |
441 | /** |
442 | * del_default_gids - Delete default GIDs of the event/cookie netdevice |
443 | * @ib_dev: RDMA device pointer |
444 | * @port: Port of the RDMA device whose GID table to consider |
445 | * @rdma_ndev: Unused rdma netdevice |
446 | * @cookie: Pointer to event netdevice |
447 | * |
448 | * del_default_gids() deletes the default GIDs of the event/cookie netdevice. |
449 | */ |
450 | static void del_default_gids(struct ib_device *ib_dev, u32 port, |
451 | struct net_device *rdma_ndev, void *cookie) |
452 | { |
453 | struct net_device *cookie_ndev = cookie; |
454 | unsigned long gid_type_mask; |
455 | |
456 | gid_type_mask = roce_gid_type_mask_support(ib_dev, port); |
457 | |
458 | ib_cache_gid_set_default_gid(ib_dev, port, ndev: cookie_ndev, gid_type_mask, |
459 | mode: IB_CACHE_GID_DEFAULT_MODE_DELETE); |
460 | } |
461 | |
462 | static void add_default_gids(struct ib_device *ib_dev, u32 port, |
463 | struct net_device *rdma_ndev, void *cookie) |
464 | { |
465 | struct net_device *event_ndev = cookie; |
466 | unsigned long gid_type_mask; |
467 | |
468 | gid_type_mask = roce_gid_type_mask_support(ib_dev, port); |
469 | ib_cache_gid_set_default_gid(ib_dev, port, ndev: event_ndev, gid_type_mask, |
470 | mode: IB_CACHE_GID_DEFAULT_MODE_SET); |
471 | } |
472 | |
473 | static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, |
474 | u32 port, |
475 | struct net_device *rdma_ndev, |
476 | void *cookie) |
477 | { |
478 | struct net *net; |
479 | struct net_device *ndev; |
480 | |
481 | /* Lock the rtnl to make sure the netdevs does not move under |
482 | * our feet |
483 | */ |
484 | rtnl_lock(); |
485 | down_read(sem: &net_rwsem); |
486 | for_each_net(net) |
487 | for_each_netdev(net, ndev) { |
488 | /* |
489 | * Filter and add default GIDs of the primary netdevice |
490 | * when not in bonding mode, or add default GIDs |
491 | * of bond master device, when in bonding mode. |
492 | */ |
493 | if (is_ndev_for_default_gid_filter(ib_dev, port, |
494 | rdma_ndev, cookie: ndev)) |
495 | add_default_gids(ib_dev, port, rdma_ndev, cookie: ndev); |
496 | |
497 | if (is_eth_port_of_netdev_filter(ib_dev, port, |
498 | rdma_ndev, cookie: ndev)) |
499 | _add_netdev_ips(ib_dev, port, ndev); |
500 | } |
501 | up_read(sem: &net_rwsem); |
502 | rtnl_unlock(); |
503 | } |
504 | |
505 | /** |
506 | * rdma_roce_rescan_device - Rescan all of the network devices in the system |
507 | * and add their gids, as needed, to the relevant RoCE devices. |
508 | * |
509 | * @ib_dev: the rdma device |
510 | */ |
511 | void rdma_roce_rescan_device(struct ib_device *ib_dev) |
512 | { |
513 | ib_enum_roce_netdev(ib_dev, filter: pass_all_filter, NULL, |
514 | cb: enum_all_gids_of_dev_cb, NULL); |
515 | } |
516 | EXPORT_SYMBOL(rdma_roce_rescan_device); |
517 | |
518 | static void callback_for_addr_gid_device_scan(struct ib_device *device, |
519 | u32 port, |
520 | struct net_device *rdma_ndev, |
521 | void *cookie) |
522 | { |
523 | struct update_gid_event_work *parsed = cookie; |
524 | |
525 | return update_gid(gid_op: parsed->gid_op, ib_dev: device, |
526 | port, gid: &parsed->gid, |
527 | gid_attr: &parsed->gid_attr); |
528 | } |
529 | |
530 | struct upper_list { |
531 | struct list_head list; |
532 | struct net_device *upper; |
533 | }; |
534 | |
535 | static int netdev_upper_walk(struct net_device *upper, |
536 | struct netdev_nested_priv *priv) |
537 | { |
538 | struct upper_list *entry = kmalloc(size: sizeof(*entry), GFP_ATOMIC); |
539 | struct list_head *upper_list = (struct list_head *)priv->data; |
540 | |
541 | if (!entry) |
542 | return 0; |
543 | |
544 | list_add_tail(new: &entry->list, head: upper_list); |
545 | dev_hold(dev: upper); |
546 | entry->upper = upper; |
547 | |
548 | return 0; |
549 | } |
550 | |
551 | static void handle_netdev_upper(struct ib_device *ib_dev, u32 port, |
552 | void *cookie, |
553 | void (*handle_netdev)(struct ib_device *ib_dev, |
554 | u32 port, |
555 | struct net_device *ndev)) |
556 | { |
557 | struct net_device *ndev = cookie; |
558 | struct netdev_nested_priv priv; |
559 | struct upper_list *upper_iter; |
560 | struct upper_list *upper_temp; |
561 | LIST_HEAD(upper_list); |
562 | |
563 | priv.data = &upper_list; |
564 | rcu_read_lock(); |
565 | netdev_walk_all_upper_dev_rcu(dev: ndev, fn: netdev_upper_walk, priv: &priv); |
566 | rcu_read_unlock(); |
567 | |
568 | handle_netdev(ib_dev, port, ndev); |
569 | list_for_each_entry_safe(upper_iter, upper_temp, &upper_list, |
570 | list) { |
571 | handle_netdev(ib_dev, port, upper_iter->upper); |
572 | dev_put(dev: upper_iter->upper); |
573 | list_del(entry: &upper_iter->list); |
574 | kfree(objp: upper_iter); |
575 | } |
576 | } |
577 | |
578 | static void _roce_del_all_netdev_gids(struct ib_device *ib_dev, u32 port, |
579 | struct net_device *event_ndev) |
580 | { |
581 | ib_cache_gid_del_all_netdev_gids(ib_dev, port, ndev: event_ndev); |
582 | } |
583 | |
584 | static void del_netdev_upper_ips(struct ib_device *ib_dev, u32 port, |
585 | struct net_device *rdma_ndev, void *cookie) |
586 | { |
587 | handle_netdev_upper(ib_dev, port, cookie, handle_netdev: _roce_del_all_netdev_gids); |
588 | } |
589 | |
590 | static void add_netdev_upper_ips(struct ib_device *ib_dev, u32 port, |
591 | struct net_device *rdma_ndev, void *cookie) |
592 | { |
593 | handle_netdev_upper(ib_dev, port, cookie, handle_netdev: _add_netdev_ips); |
594 | } |
595 | |
596 | static void del_netdev_default_ips_join(struct ib_device *ib_dev, u32 port, |
597 | struct net_device *rdma_ndev, |
598 | void *cookie) |
599 | { |
600 | struct net_device *master_ndev; |
601 | |
602 | rcu_read_lock(); |
603 | master_ndev = netdev_master_upper_dev_get_rcu(dev: rdma_ndev); |
604 | if (master_ndev) |
605 | dev_hold(dev: master_ndev); |
606 | rcu_read_unlock(); |
607 | |
608 | if (master_ndev) { |
609 | bond_delete_netdev_default_gids(ib_dev, port, rdma_ndev, |
610 | event_ndev: master_ndev); |
611 | dev_put(dev: master_ndev); |
612 | } |
613 | } |
614 | |
615 | /* The following functions operate on all IB devices. netdevice_event and |
616 | * addr_event execute ib_enum_all_roce_netdevs through a work. |
617 | * ib_enum_all_roce_netdevs iterates through all IB devices. |
618 | */ |
619 | |
620 | static void netdevice_event_work_handler(struct work_struct *_work) |
621 | { |
622 | struct netdev_event_work *work = |
623 | container_of(_work, struct netdev_event_work, work); |
624 | unsigned int i; |
625 | |
626 | for (i = 0; i < ARRAY_SIZE(work->cmds) && work->cmds[i].cb; i++) { |
627 | ib_enum_all_roce_netdevs(filter: work->cmds[i].filter, |
628 | filter_cookie: work->cmds[i].filter_ndev, |
629 | cb: work->cmds[i].cb, |
630 | cookie: work->cmds[i].ndev); |
631 | dev_put(dev: work->cmds[i].ndev); |
632 | dev_put(dev: work->cmds[i].filter_ndev); |
633 | } |
634 | |
635 | kfree(objp: work); |
636 | } |
637 | |
638 | static int netdevice_queue_work(struct netdev_event_work_cmd *cmds, |
639 | struct net_device *ndev) |
640 | { |
641 | unsigned int i; |
642 | struct netdev_event_work *ndev_work = |
643 | kmalloc(size: sizeof(*ndev_work), GFP_KERNEL); |
644 | |
645 | if (!ndev_work) |
646 | return NOTIFY_DONE; |
647 | |
648 | memcpy(ndev_work->cmds, cmds, sizeof(ndev_work->cmds)); |
649 | for (i = 0; i < ARRAY_SIZE(ndev_work->cmds) && ndev_work->cmds[i].cb; i++) { |
650 | if (!ndev_work->cmds[i].ndev) |
651 | ndev_work->cmds[i].ndev = ndev; |
652 | if (!ndev_work->cmds[i].filter_ndev) |
653 | ndev_work->cmds[i].filter_ndev = ndev; |
654 | dev_hold(dev: ndev_work->cmds[i].ndev); |
655 | dev_hold(dev: ndev_work->cmds[i].filter_ndev); |
656 | } |
657 | INIT_WORK(&ndev_work->work, netdevice_event_work_handler); |
658 | |
659 | queue_work(wq: gid_cache_wq, work: &ndev_work->work); |
660 | |
661 | return NOTIFY_DONE; |
662 | } |
663 | |
664 | static const struct netdev_event_work_cmd add_cmd = { |
665 | .cb = add_netdev_ips, |
666 | .filter = is_eth_port_of_netdev_filter |
667 | }; |
668 | |
669 | static const struct netdev_event_work_cmd add_cmd_upper_ips = { |
670 | .cb = add_netdev_upper_ips, |
671 | .filter = is_eth_port_of_netdev_filter |
672 | }; |
673 | |
674 | static void |
675 | ndev_event_unlink(struct netdev_notifier_changeupper_info *changeupper_info, |
676 | struct netdev_event_work_cmd *cmds) |
677 | { |
678 | static const struct netdev_event_work_cmd |
679 | upper_ips_del_cmd = { |
680 | .cb = del_netdev_upper_ips, |
681 | .filter = upper_device_filter |
682 | }; |
683 | |
684 | cmds[0] = upper_ips_del_cmd; |
685 | cmds[0].ndev = changeupper_info->upper_dev; |
686 | cmds[1] = add_cmd; |
687 | } |
688 | |
689 | static const struct netdev_event_work_cmd bonding_default_add_cmd = { |
690 | .cb = add_default_gids, |
691 | .filter = is_upper_ndev_bond_master_filter |
692 | }; |
693 | |
694 | static void |
695 | ndev_event_link(struct net_device *event_ndev, |
696 | struct netdev_notifier_changeupper_info *changeupper_info, |
697 | struct netdev_event_work_cmd *cmds) |
698 | { |
699 | static const struct netdev_event_work_cmd |
700 | bonding_default_del_cmd = { |
701 | .cb = del_default_gids, |
702 | .filter = is_upper_ndev_bond_master_filter |
703 | }; |
704 | /* |
705 | * When a lower netdev is linked to its upper bonding |
706 | * netdev, delete lower slave netdev's default GIDs. |
707 | */ |
708 | cmds[0] = bonding_default_del_cmd; |
709 | cmds[0].ndev = event_ndev; |
710 | cmds[0].filter_ndev = changeupper_info->upper_dev; |
711 | |
712 | /* Now add bonding upper device default GIDs */ |
713 | cmds[1] = bonding_default_add_cmd; |
714 | cmds[1].ndev = changeupper_info->upper_dev; |
715 | cmds[1].filter_ndev = changeupper_info->upper_dev; |
716 | |
717 | /* Now add bonding upper device IP based GIDs */ |
718 | cmds[2] = add_cmd_upper_ips; |
719 | cmds[2].ndev = changeupper_info->upper_dev; |
720 | cmds[2].filter_ndev = changeupper_info->upper_dev; |
721 | } |
722 | |
723 | static void netdevice_event_changeupper(struct net_device *event_ndev, |
724 | struct netdev_notifier_changeupper_info *changeupper_info, |
725 | struct netdev_event_work_cmd *cmds) |
726 | { |
727 | if (changeupper_info->linking) |
728 | ndev_event_link(event_ndev, changeupper_info, cmds); |
729 | else |
730 | ndev_event_unlink(changeupper_info, cmds); |
731 | } |
732 | |
733 | static const struct netdev_event_work_cmd add_default_gid_cmd = { |
734 | .cb = add_default_gids, |
735 | .filter = is_ndev_for_default_gid_filter, |
736 | }; |
737 | |
738 | static int netdevice_event(struct notifier_block *this, unsigned long event, |
739 | void *ptr) |
740 | { |
741 | static const struct netdev_event_work_cmd del_cmd = { |
742 | .cb = del_netdev_ips, .filter = pass_all_filter}; |
743 | static const struct netdev_event_work_cmd |
744 | bonding_default_del_cmd_join = { |
745 | .cb = del_netdev_default_ips_join, |
746 | .filter = is_eth_port_inactive_slave_filter |
747 | }; |
748 | static const struct netdev_event_work_cmd |
749 | netdev_del_cmd = { |
750 | .cb = del_netdev_ips, |
751 | .filter = is_eth_port_of_netdev_filter |
752 | }; |
753 | static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = { |
754 | .cb = del_netdev_upper_ips, .filter = upper_device_filter}; |
755 | struct net_device *ndev = netdev_notifier_info_to_dev(info: ptr); |
756 | struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ] = { {NULL} }; |
757 | |
758 | if (ndev->type != ARPHRD_ETHER) |
759 | return NOTIFY_DONE; |
760 | |
761 | switch (event) { |
762 | case NETDEV_REGISTER: |
763 | case NETDEV_UP: |
764 | cmds[0] = bonding_default_del_cmd_join; |
765 | cmds[1] = add_default_gid_cmd; |
766 | cmds[2] = add_cmd; |
767 | break; |
768 | |
769 | case NETDEV_UNREGISTER: |
770 | if (ndev->reg_state < NETREG_UNREGISTERED) |
771 | cmds[0] = del_cmd; |
772 | else |
773 | return NOTIFY_DONE; |
774 | break; |
775 | |
776 | case NETDEV_CHANGEADDR: |
777 | cmds[0] = netdev_del_cmd; |
778 | if (ndev->reg_state == NETREG_REGISTERED) { |
779 | cmds[1] = add_default_gid_cmd; |
780 | cmds[2] = add_cmd; |
781 | } |
782 | break; |
783 | |
784 | case NETDEV_CHANGEUPPER: |
785 | netdevice_event_changeupper(event_ndev: ndev, |
786 | container_of(ptr, struct netdev_notifier_changeupper_info, info), |
787 | cmds); |
788 | break; |
789 | |
790 | case NETDEV_BONDING_FAILOVER: |
791 | cmds[0] = bonding_event_ips_del_cmd; |
792 | /* Add default GIDs of the bond device */ |
793 | cmds[1] = bonding_default_add_cmd; |
794 | /* Add IP based GIDs of the bond device */ |
795 | cmds[2] = add_cmd_upper_ips; |
796 | break; |
797 | |
798 | default: |
799 | return NOTIFY_DONE; |
800 | } |
801 | |
802 | return netdevice_queue_work(cmds, ndev); |
803 | } |
804 | |
805 | static void update_gid_event_work_handler(struct work_struct *_work) |
806 | { |
807 | struct update_gid_event_work *work = |
808 | container_of(_work, struct update_gid_event_work, work); |
809 | |
810 | ib_enum_all_roce_netdevs(filter: is_eth_port_of_netdev_filter, |
811 | filter_cookie: work->gid_attr.ndev, |
812 | cb: callback_for_addr_gid_device_scan, cookie: work); |
813 | |
814 | dev_put(dev: work->gid_attr.ndev); |
815 | kfree(objp: work); |
816 | } |
817 | |
818 | static int addr_event(struct notifier_block *this, unsigned long event, |
819 | struct sockaddr *sa, struct net_device *ndev) |
820 | { |
821 | struct update_gid_event_work *work; |
822 | enum gid_op_type gid_op; |
823 | |
824 | if (ndev->type != ARPHRD_ETHER) |
825 | return NOTIFY_DONE; |
826 | |
827 | switch (event) { |
828 | case NETDEV_UP: |
829 | gid_op = GID_ADD; |
830 | break; |
831 | |
832 | case NETDEV_DOWN: |
833 | gid_op = GID_DEL; |
834 | break; |
835 | |
836 | default: |
837 | return NOTIFY_DONE; |
838 | } |
839 | |
840 | work = kmalloc(size: sizeof(*work), GFP_ATOMIC); |
841 | if (!work) |
842 | return NOTIFY_DONE; |
843 | |
844 | INIT_WORK(&work->work, update_gid_event_work_handler); |
845 | |
846 | rdma_ip2gid(addr: sa, gid: &work->gid); |
847 | work->gid_op = gid_op; |
848 | |
849 | memset(&work->gid_attr, 0, sizeof(work->gid_attr)); |
850 | dev_hold(dev: ndev); |
851 | work->gid_attr.ndev = ndev; |
852 | |
853 | queue_work(wq: gid_cache_wq, work: &work->work); |
854 | |
855 | return NOTIFY_DONE; |
856 | } |
857 | |
858 | static int inetaddr_event(struct notifier_block *this, unsigned long event, |
859 | void *ptr) |
860 | { |
861 | struct sockaddr_in in; |
862 | struct net_device *ndev; |
863 | struct in_ifaddr *ifa = ptr; |
864 | |
865 | in.sin_family = AF_INET; |
866 | in.sin_addr.s_addr = ifa->ifa_address; |
867 | ndev = ifa->ifa_dev->dev; |
868 | |
869 | return addr_event(this, event, sa: (struct sockaddr *)&in, ndev); |
870 | } |
871 | |
872 | static int inet6addr_event(struct notifier_block *this, unsigned long event, |
873 | void *ptr) |
874 | { |
875 | struct sockaddr_in6 in6; |
876 | struct net_device *ndev; |
877 | struct inet6_ifaddr *ifa6 = ptr; |
878 | |
879 | in6.sin6_family = AF_INET6; |
880 | in6.sin6_addr = ifa6->addr; |
881 | ndev = ifa6->idev->dev; |
882 | |
883 | return addr_event(this, event, sa: (struct sockaddr *)&in6, ndev); |
884 | } |
885 | |
886 | static struct notifier_block nb_netdevice = { |
887 | .notifier_call = netdevice_event |
888 | }; |
889 | |
890 | static struct notifier_block nb_inetaddr = { |
891 | .notifier_call = inetaddr_event |
892 | }; |
893 | |
894 | static struct notifier_block nb_inet6addr = { |
895 | .notifier_call = inet6addr_event |
896 | }; |
897 | |
898 | int __init roce_gid_mgmt_init(void) |
899 | { |
900 | gid_cache_wq = alloc_ordered_workqueue("gid-cache-wq" , 0); |
901 | if (!gid_cache_wq) |
902 | return -ENOMEM; |
903 | |
904 | register_inetaddr_notifier(nb: &nb_inetaddr); |
905 | if (IS_ENABLED(CONFIG_IPV6)) |
906 | register_inet6addr_notifier(nb: &nb_inet6addr); |
907 | /* We relay on the netdevice notifier to enumerate all |
908 | * existing devices in the system. Register to this notifier |
909 | * last to make sure we will not miss any IP add/del |
910 | * callbacks. |
911 | */ |
912 | register_netdevice_notifier(nb: &nb_netdevice); |
913 | |
914 | return 0; |
915 | } |
916 | |
917 | void __exit roce_gid_mgmt_cleanup(void) |
918 | { |
919 | if (IS_ENABLED(CONFIG_IPV6)) |
920 | unregister_inet6addr_notifier(nb: &nb_inet6addr); |
921 | unregister_inetaddr_notifier(nb: &nb_inetaddr); |
922 | unregister_netdevice_notifier(nb: &nb_netdevice); |
923 | /* Ensure all gid deletion tasks complete before we go down, |
924 | * to avoid any reference to free'd memory. By the time |
925 | * ib-core is removed, all physical devices have been removed, |
926 | * so no issue with remaining hardware contexts. |
927 | */ |
928 | destroy_workqueue(wq: gid_cache_wq); |
929 | } |
930 | |