1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * Generic nexthop implementation |
4 | * |
5 | * Copyright (c) 2017-19 Cumulus Networks |
6 | * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> |
7 | */ |
8 | |
9 | #ifndef __LINUX_NEXTHOP_H |
10 | #define __LINUX_NEXTHOP_H |
11 | |
12 | #include <linux/netdevice.h> |
13 | #include <linux/notifier.h> |
14 | #include <linux/route.h> |
15 | #include <linux/types.h> |
16 | #include <net/ip_fib.h> |
17 | #include <net/ip6_fib.h> |
18 | #include <net/netlink.h> |
19 | |
20 | #define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK |
21 | |
22 | struct nexthop; |
23 | |
24 | struct nh_config { |
25 | u32 nh_id; |
26 | |
27 | u8 nh_family; |
28 | u8 nh_protocol; |
29 | u8 nh_blackhole; |
30 | u8 nh_fdb; |
31 | u32 nh_flags; |
32 | |
33 | int nh_ifindex; |
34 | struct net_device *dev; |
35 | |
36 | union { |
37 | __be32 ipv4; |
38 | struct in6_addr ipv6; |
39 | } gw; |
40 | |
41 | struct nlattr *nh_grp; |
42 | u16 nh_grp_type; |
43 | u16 nh_grp_res_num_buckets; |
44 | unsigned long nh_grp_res_idle_timer; |
45 | unsigned long nh_grp_res_unbalanced_timer; |
46 | bool nh_grp_res_has_num_buckets; |
47 | bool nh_grp_res_has_idle_timer; |
48 | bool nh_grp_res_has_unbalanced_timer; |
49 | |
50 | bool nh_hw_stats; |
51 | |
52 | struct nlattr *nh_encap; |
53 | u16 nh_encap_type; |
54 | |
55 | u32 nlflags; |
56 | struct nl_info nlinfo; |
57 | }; |
58 | |
59 | struct nh_info { |
60 | struct hlist_node dev_hash; /* entry on netns devhash */ |
61 | struct nexthop *nh_parent; |
62 | |
63 | u8 family; |
64 | bool reject_nh; |
65 | bool fdb_nh; |
66 | |
67 | union { |
68 | struct fib_nh_common fib_nhc; |
69 | struct fib_nh fib_nh; |
70 | struct fib6_nh fib6_nh; |
71 | }; |
72 | }; |
73 | |
74 | struct nh_res_bucket { |
75 | struct nh_grp_entry __rcu *nh_entry; |
76 | atomic_long_t used_time; |
77 | unsigned long migrated_time; |
78 | bool occupied; |
79 | u8 nh_flags; |
80 | }; |
81 | |
82 | struct nh_res_table { |
83 | struct net *net; |
84 | u32 nhg_id; |
85 | struct delayed_work upkeep_dw; |
86 | |
87 | /* List of NHGEs that have too few buckets ("uw" for underweight). |
88 | * Reclaimed buckets will be given to entries in this list. |
89 | */ |
90 | struct list_head uw_nh_entries; |
91 | unsigned long unbalanced_since; |
92 | |
93 | u32 idle_timer; |
94 | u32 unbalanced_timer; |
95 | |
96 | u16 num_nh_buckets; |
97 | struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets); |
98 | }; |
99 | |
100 | struct nh_grp_entry_stats { |
101 | u64_stats_t packets; |
102 | struct u64_stats_sync syncp; |
103 | }; |
104 | |
105 | struct nh_grp_entry { |
106 | struct nexthop *nh; |
107 | struct nh_grp_entry_stats __percpu *stats; |
108 | u8 weight; |
109 | |
110 | union { |
111 | struct { |
112 | atomic_t upper_bound; |
113 | } hthr; |
114 | struct { |
115 | /* Member on uw_nh_entries. */ |
116 | struct list_head uw_nh_entry; |
117 | |
118 | u16 count_buckets; |
119 | u16 wants_buckets; |
120 | } res; |
121 | }; |
122 | |
123 | struct list_head nh_list; |
124 | struct nexthop *nh_parent; /* nexthop of group with this entry */ |
125 | u64 packets_hw; |
126 | }; |
127 | |
128 | struct nh_group { |
129 | struct nh_group *spare; /* spare group for removals */ |
130 | u16 num_nh; |
131 | bool is_multipath; |
132 | bool hash_threshold; |
133 | bool resilient; |
134 | bool fdb_nh; |
135 | bool has_v4; |
136 | bool hw_stats; |
137 | |
138 | struct nh_res_table __rcu *res_table; |
139 | struct nh_grp_entry nh_entries[] __counted_by(num_nh); |
140 | }; |
141 | |
142 | struct nexthop { |
143 | struct rb_node rb_node; /* entry on netns rbtree */ |
144 | struct list_head fi_list; /* v4 entries using nh */ |
145 | struct list_head f6i_list; /* v6 entries using nh */ |
146 | struct list_head fdb_list; /* fdb entries using this nh */ |
147 | struct list_head grp_list; /* nh group entries using this nh */ |
148 | struct net *net; |
149 | |
150 | u32 id; |
151 | |
152 | u8 protocol; /* app managing this nh */ |
153 | u8 nh_flags; |
154 | bool is_group; |
155 | |
156 | refcount_t refcnt; |
157 | struct rcu_head rcu; |
158 | |
159 | union { |
160 | struct nh_info __rcu *nh_info; |
161 | struct nh_group __rcu *nh_grp; |
162 | }; |
163 | }; |
164 | |
165 | enum nexthop_event_type { |
166 | NEXTHOP_EVENT_DEL, |
167 | NEXTHOP_EVENT_REPLACE, |
168 | NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, |
169 | NEXTHOP_EVENT_BUCKET_REPLACE, |
170 | NEXTHOP_EVENT_HW_STATS_REPORT_DELTA, |
171 | }; |
172 | |
173 | enum nh_notifier_info_type { |
174 | NH_NOTIFIER_INFO_TYPE_SINGLE, |
175 | NH_NOTIFIER_INFO_TYPE_GRP, |
176 | NH_NOTIFIER_INFO_TYPE_RES_TABLE, |
177 | NH_NOTIFIER_INFO_TYPE_RES_BUCKET, |
178 | NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS, |
179 | }; |
180 | |
181 | struct nh_notifier_single_info { |
182 | struct net_device *dev; |
183 | u8 gw_family; |
184 | union { |
185 | __be32 ipv4; |
186 | struct in6_addr ipv6; |
187 | }; |
188 | u32 id; |
189 | u8 is_reject:1, |
190 | is_fdb:1, |
191 | has_encap:1; |
192 | }; |
193 | |
194 | struct nh_notifier_grp_entry_info { |
195 | u8 weight; |
196 | struct nh_notifier_single_info nh; |
197 | }; |
198 | |
199 | struct nh_notifier_grp_info { |
200 | u16 num_nh; |
201 | bool is_fdb; |
202 | bool hw_stats; |
203 | struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh); |
204 | }; |
205 | |
206 | struct nh_notifier_res_bucket_info { |
207 | u16 bucket_index; |
208 | unsigned int idle_timer_ms; |
209 | bool force; |
210 | struct nh_notifier_single_info old_nh; |
211 | struct nh_notifier_single_info new_nh; |
212 | }; |
213 | |
214 | struct nh_notifier_res_table_info { |
215 | u16 num_nh_buckets; |
216 | bool hw_stats; |
217 | struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets); |
218 | }; |
219 | |
220 | struct nh_notifier_grp_hw_stats_entry_info { |
221 | u32 id; |
222 | u64 packets; |
223 | }; |
224 | |
225 | struct nh_notifier_grp_hw_stats_info { |
226 | u16 num_nh; |
227 | bool hw_stats_used; |
228 | struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh); |
229 | }; |
230 | |
231 | struct nh_notifier_info { |
232 | struct net *net; |
233 | struct netlink_ext_ack *extack; |
234 | u32 id; |
235 | enum nh_notifier_info_type type; |
236 | union { |
237 | struct nh_notifier_single_info *nh; |
238 | struct nh_notifier_grp_info *nh_grp; |
239 | struct nh_notifier_res_table_info *nh_res_table; |
240 | struct nh_notifier_res_bucket_info *nh_res_bucket; |
241 | struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats; |
242 | }; |
243 | }; |
244 | |
245 | int register_nexthop_notifier(struct net *net, struct notifier_block *nb, |
246 | struct netlink_ext_ack *extack); |
247 | int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); |
248 | int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); |
249 | void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); |
250 | void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, |
251 | bool offload, bool trap); |
252 | void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, |
253 | unsigned long *activity); |
254 | void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info, |
255 | unsigned int nh_idx, |
256 | u64 delta_packets); |
257 | |
258 | /* caller is holding rcu or rtnl; no reference taken to nexthop */ |
259 | struct nexthop *nexthop_find_by_id(struct net *net, u32 id); |
260 | void nexthop_free_rcu(struct rcu_head *head); |
261 | |
262 | static inline bool nexthop_get(struct nexthop *nh) |
263 | { |
264 | return refcount_inc_not_zero(r: &nh->refcnt); |
265 | } |
266 | |
267 | static inline void nexthop_put(struct nexthop *nh) |
268 | { |
269 | if (refcount_dec_and_test(r: &nh->refcnt)) |
270 | call_rcu(head: &nh->rcu, func: nexthop_free_rcu); |
271 | } |
272 | |
273 | static inline bool nexthop_cmp(const struct nexthop *nh1, |
274 | const struct nexthop *nh2) |
275 | { |
276 | return nh1 == nh2; |
277 | } |
278 | |
279 | static inline bool nexthop_is_fdb(const struct nexthop *nh) |
280 | { |
281 | if (nh->is_group) { |
282 | const struct nh_group *nh_grp; |
283 | |
284 | nh_grp = rcu_dereference_rtnl(nh->nh_grp); |
285 | return nh_grp->fdb_nh; |
286 | } else { |
287 | const struct nh_info *nhi; |
288 | |
289 | nhi = rcu_dereference_rtnl(nh->nh_info); |
290 | return nhi->fdb_nh; |
291 | } |
292 | } |
293 | |
294 | static inline bool nexthop_has_v4(const struct nexthop *nh) |
295 | { |
296 | if (nh->is_group) { |
297 | struct nh_group *nh_grp; |
298 | |
299 | nh_grp = rcu_dereference_rtnl(nh->nh_grp); |
300 | return nh_grp->has_v4; |
301 | } |
302 | return false; |
303 | } |
304 | |
305 | static inline bool nexthop_is_multipath(const struct nexthop *nh) |
306 | { |
307 | if (nh->is_group) { |
308 | struct nh_group *nh_grp; |
309 | |
310 | nh_grp = rcu_dereference_rtnl(nh->nh_grp); |
311 | return nh_grp->is_multipath; |
312 | } |
313 | return false; |
314 | } |
315 | |
316 | struct nexthop *nexthop_select_path(struct nexthop *nh, int hash); |
317 | |
318 | static inline unsigned int nexthop_num_path(const struct nexthop *nh) |
319 | { |
320 | unsigned int rc = 1; |
321 | |
322 | if (nh->is_group) { |
323 | struct nh_group *nh_grp; |
324 | |
325 | nh_grp = rcu_dereference_rtnl(nh->nh_grp); |
326 | if (nh_grp->is_multipath) |
327 | rc = nh_grp->num_nh; |
328 | } |
329 | |
330 | return rc; |
331 | } |
332 | |
333 | static inline |
334 | struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel) |
335 | { |
336 | /* for_nexthops macros in fib_semantics.c grabs a pointer to |
337 | * the nexthop before checking nhsel |
338 | */ |
339 | if (nhsel >= nhg->num_nh) |
340 | return NULL; |
341 | |
342 | return nhg->nh_entries[nhsel].nh; |
343 | } |
344 | |
345 | static inline |
346 | int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, |
347 | u8 rt_family) |
348 | { |
349 | struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp); |
350 | int i; |
351 | |
352 | for (i = 0; i < nhg->num_nh; i++) { |
353 | struct nexthop *nhe = nhg->nh_entries[i].nh; |
354 | struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info); |
355 | struct fib_nh_common *nhc = &nhi->fib_nhc; |
356 | int weight = nhg->nh_entries[i].weight; |
357 | |
358 | if (fib_add_nexthop(skb, nh: nhc, nh_weight: weight, rt_family, nh_tclassid: 0) < 0) |
359 | return -EMSGSIZE; |
360 | } |
361 | |
362 | return 0; |
363 | } |
364 | |
365 | /* called with rcu lock */ |
366 | static inline bool nexthop_is_blackhole(const struct nexthop *nh) |
367 | { |
368 | const struct nh_info *nhi; |
369 | |
370 | if (nh->is_group) { |
371 | struct nh_group *nh_grp; |
372 | |
373 | nh_grp = rcu_dereference_rtnl(nh->nh_grp); |
374 | if (nh_grp->num_nh > 1) |
375 | return false; |
376 | |
377 | nh = nh_grp->nh_entries[0].nh; |
378 | } |
379 | |
380 | nhi = rcu_dereference_rtnl(nh->nh_info); |
381 | return nhi->reject_nh; |
382 | } |
383 | |
384 | static inline void nexthop_path_fib_result(struct fib_result *res, int hash) |
385 | { |
386 | struct nh_info *nhi; |
387 | struct nexthop *nh; |
388 | |
389 | nh = nexthop_select_path(nh: res->fi->nh, hash); |
390 | nhi = rcu_dereference(nh->nh_info); |
391 | res->nhc = &nhi->fib_nhc; |
392 | } |
393 | |
394 | /* called with rcu read lock or rtnl held */ |
395 | static inline |
396 | struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel) |
397 | { |
398 | struct nh_info *nhi; |
399 | |
400 | BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0); |
401 | BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0); |
402 | |
403 | if (nh->is_group) { |
404 | struct nh_group *nh_grp; |
405 | |
406 | nh_grp = rcu_dereference_rtnl(nh->nh_grp); |
407 | if (nh_grp->is_multipath) { |
408 | nh = nexthop_mpath_select(nhg: nh_grp, nhsel); |
409 | if (!nh) |
410 | return NULL; |
411 | } |
412 | } |
413 | |
414 | nhi = rcu_dereference_rtnl(nh->nh_info); |
415 | return &nhi->fib_nhc; |
416 | } |
417 | |
418 | /* called from fib_table_lookup with rcu_lock */ |
419 | static inline |
420 | struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh, |
421 | int fib_flags, |
422 | const struct flowi4 *flp, |
423 | int *nhsel) |
424 | { |
425 | struct nh_info *nhi; |
426 | |
427 | if (nh->is_group) { |
428 | struct nh_group *nhg = rcu_dereference(nh->nh_grp); |
429 | int i; |
430 | |
431 | for (i = 0; i < nhg->num_nh; i++) { |
432 | struct nexthop *nhe = nhg->nh_entries[i].nh; |
433 | |
434 | nhi = rcu_dereference(nhe->nh_info); |
435 | if (fib_lookup_good_nhc(nhc: &nhi->fib_nhc, fib_flags, flp)) { |
436 | *nhsel = i; |
437 | return &nhi->fib_nhc; |
438 | } |
439 | } |
440 | } else { |
441 | nhi = rcu_dereference(nh->nh_info); |
442 | if (fib_lookup_good_nhc(nhc: &nhi->fib_nhc, fib_flags, flp)) { |
443 | *nhsel = 0; |
444 | return &nhi->fib_nhc; |
445 | } |
446 | } |
447 | |
448 | return NULL; |
449 | } |
450 | |
451 | static inline bool nexthop_uses_dev(const struct nexthop *nh, |
452 | const struct net_device *dev) |
453 | { |
454 | struct nh_info *nhi; |
455 | |
456 | if (nh->is_group) { |
457 | struct nh_group *nhg = rcu_dereference(nh->nh_grp); |
458 | int i; |
459 | |
460 | for (i = 0; i < nhg->num_nh; i++) { |
461 | struct nexthop *nhe = nhg->nh_entries[i].nh; |
462 | |
463 | nhi = rcu_dereference(nhe->nh_info); |
464 | if (nhc_l3mdev_matches_dev(nhc: &nhi->fib_nhc, dev)) |
465 | return true; |
466 | } |
467 | } else { |
468 | nhi = rcu_dereference(nh->nh_info); |
469 | if (nhc_l3mdev_matches_dev(nhc: &nhi->fib_nhc, dev)) |
470 | return true; |
471 | } |
472 | |
473 | return false; |
474 | } |
475 | |
476 | static inline unsigned int fib_info_num_path(const struct fib_info *fi) |
477 | { |
478 | if (unlikely(fi->nh)) |
479 | return nexthop_num_path(nh: fi->nh); |
480 | |
481 | return fi->fib_nhs; |
482 | } |
483 | |
484 | int fib_check_nexthop(struct nexthop *nh, u8 scope, |
485 | struct netlink_ext_ack *extack); |
486 | |
487 | static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) |
488 | { |
489 | if (unlikely(fi->nh)) |
490 | return nexthop_fib_nhc(nh: fi->nh, nhsel); |
491 | |
492 | return &fi->fib_nh[nhsel].nh_common; |
493 | } |
494 | |
495 | /* only used when fib_nh is built into fib_info */ |
496 | static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel) |
497 | { |
498 | WARN_ON(fi->nh); |
499 | |
500 | return &fi->fib_nh[nhsel]; |
501 | } |
502 | |
503 | /* |
504 | * IPv6 variants |
505 | */ |
506 | int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, |
507 | struct netlink_ext_ack *extack); |
508 | |
509 | /* Caller should either hold rcu_read_lock(), or RTNL. */ |
510 | static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) |
511 | { |
512 | struct nh_info *nhi; |
513 | |
514 | if (nh->is_group) { |
515 | struct nh_group *nh_grp; |
516 | |
517 | nh_grp = rcu_dereference_rtnl(nh->nh_grp); |
518 | nh = nexthop_mpath_select(nhg: nh_grp, nhsel: 0); |
519 | if (!nh) |
520 | return NULL; |
521 | } |
522 | |
523 | nhi = rcu_dereference_rtnl(nh->nh_info); |
524 | if (nhi->family == AF_INET6) |
525 | return &nhi->fib6_nh; |
526 | |
527 | return NULL; |
528 | } |
529 | |
530 | static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i) |
531 | { |
532 | struct fib6_nh *fib6_nh; |
533 | |
534 | fib6_nh = f6i->nh ? nexthop_fib6_nh(nh: f6i->nh) : f6i->fib6_nh; |
535 | return fib6_nh->fib_nh_dev; |
536 | } |
537 | |
538 | static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) |
539 | { |
540 | struct nexthop *nh = res->f6i->nh; |
541 | struct nh_info *nhi; |
542 | |
543 | nh = nexthop_select_path(nh, hash); |
544 | |
545 | nhi = rcu_dereference_rtnl(nh->nh_info); |
546 | if (nhi->reject_nh) { |
547 | res->fib6_type = RTN_BLACKHOLE; |
548 | res->fib6_flags |= RTF_REJECT; |
549 | res->nh = nexthop_fib6_nh(nh); |
550 | } else { |
551 | res->nh = &nhi->fib6_nh; |
552 | } |
553 | } |
554 | |
555 | int nexthop_for_each_fib6_nh(struct nexthop *nh, |
556 | int (*cb)(struct fib6_nh *nh, void *arg), |
557 | void *arg); |
558 | |
559 | static inline int nexthop_get_family(struct nexthop *nh) |
560 | { |
561 | struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); |
562 | |
563 | return nhi->family; |
564 | } |
565 | |
566 | static inline |
567 | struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh) |
568 | { |
569 | struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); |
570 | |
571 | return &nhi->fib_nhc; |
572 | } |
573 | |
574 | static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh, |
575 | int hash) |
576 | { |
577 | struct nh_info *nhi; |
578 | struct nexthop *nhp; |
579 | |
580 | nhp = nexthop_select_path(nh, hash); |
581 | if (unlikely(!nhp)) |
582 | return NULL; |
583 | nhi = rcu_dereference(nhp->nh_info); |
584 | return &nhi->fib_nhc; |
585 | } |
586 | #endif |
587 | |