1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Generic nexthop implementation
4 *
5 * Copyright (c) 2017-19 Cumulus Networks
6 * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
7 */
8
9#ifndef __LINUX_NEXTHOP_H
10#define __LINUX_NEXTHOP_H
11
12#include <linux/netdevice.h>
13#include <linux/notifier.h>
14#include <linux/route.h>
15#include <linux/types.h>
16#include <net/ip_fib.h>
17#include <net/ip6_fib.h>
18#include <net/netlink.h>
19
20#define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK
21
22struct nexthop;
23
24struct nh_config {
25 u32 nh_id;
26
27 u8 nh_family;
28 u8 nh_protocol;
29 u8 nh_blackhole;
30 u8 nh_fdb;
31 u32 nh_flags;
32
33 int nh_ifindex;
34 struct net_device *dev;
35
36 union {
37 __be32 ipv4;
38 struct in6_addr ipv6;
39 } gw;
40
41 struct nlattr *nh_grp;
42 u16 nh_grp_type;
43 u16 nh_grp_res_num_buckets;
44 unsigned long nh_grp_res_idle_timer;
45 unsigned long nh_grp_res_unbalanced_timer;
46 bool nh_grp_res_has_num_buckets;
47 bool nh_grp_res_has_idle_timer;
48 bool nh_grp_res_has_unbalanced_timer;
49
50 bool nh_hw_stats;
51
52 struct nlattr *nh_encap;
53 u16 nh_encap_type;
54
55 u32 nlflags;
56 struct nl_info nlinfo;
57};
58
59struct nh_info {
60 struct hlist_node dev_hash; /* entry on netns devhash */
61 struct nexthop *nh_parent;
62
63 u8 family;
64 bool reject_nh;
65 bool fdb_nh;
66
67 union {
68 struct fib_nh_common fib_nhc;
69 struct fib_nh fib_nh;
70 struct fib6_nh fib6_nh;
71 };
72};
73
74struct nh_res_bucket {
75 struct nh_grp_entry __rcu *nh_entry;
76 atomic_long_t used_time;
77 unsigned long migrated_time;
78 bool occupied;
79 u8 nh_flags;
80};
81
82struct nh_res_table {
83 struct net *net;
84 u32 nhg_id;
85 struct delayed_work upkeep_dw;
86
87 /* List of NHGEs that have too few buckets ("uw" for underweight).
88 * Reclaimed buckets will be given to entries in this list.
89 */
90 struct list_head uw_nh_entries;
91 unsigned long unbalanced_since;
92
93 u32 idle_timer;
94 u32 unbalanced_timer;
95
96 u16 num_nh_buckets;
97 struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets);
98};
99
100struct nh_grp_entry_stats {
101 u64_stats_t packets;
102 struct u64_stats_sync syncp;
103};
104
105struct nh_grp_entry {
106 struct nexthop *nh;
107 struct nh_grp_entry_stats __percpu *stats;
108 u8 weight;
109
110 union {
111 struct {
112 atomic_t upper_bound;
113 } hthr;
114 struct {
115 /* Member on uw_nh_entries. */
116 struct list_head uw_nh_entry;
117
118 u16 count_buckets;
119 u16 wants_buckets;
120 } res;
121 };
122
123 struct list_head nh_list;
124 struct nexthop *nh_parent; /* nexthop of group with this entry */
125 u64 packets_hw;
126};
127
128struct nh_group {
129 struct nh_group *spare; /* spare group for removals */
130 u16 num_nh;
131 bool is_multipath;
132 bool hash_threshold;
133 bool resilient;
134 bool fdb_nh;
135 bool has_v4;
136 bool hw_stats;
137
138 struct nh_res_table __rcu *res_table;
139 struct nh_grp_entry nh_entries[] __counted_by(num_nh);
140};
141
142struct nexthop {
143 struct rb_node rb_node; /* entry on netns rbtree */
144 struct list_head fi_list; /* v4 entries using nh */
145 struct list_head f6i_list; /* v6 entries using nh */
146 struct list_head fdb_list; /* fdb entries using this nh */
147 struct list_head grp_list; /* nh group entries using this nh */
148 struct net *net;
149
150 u32 id;
151
152 u8 protocol; /* app managing this nh */
153 u8 nh_flags;
154 bool is_group;
155
156 refcount_t refcnt;
157 struct rcu_head rcu;
158
159 union {
160 struct nh_info __rcu *nh_info;
161 struct nh_group __rcu *nh_grp;
162 };
163};
164
165enum nexthop_event_type {
166 NEXTHOP_EVENT_DEL,
167 NEXTHOP_EVENT_REPLACE,
168 NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
169 NEXTHOP_EVENT_BUCKET_REPLACE,
170 NEXTHOP_EVENT_HW_STATS_REPORT_DELTA,
171};
172
173enum nh_notifier_info_type {
174 NH_NOTIFIER_INFO_TYPE_SINGLE,
175 NH_NOTIFIER_INFO_TYPE_GRP,
176 NH_NOTIFIER_INFO_TYPE_RES_TABLE,
177 NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
178 NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS,
179};
180
181struct nh_notifier_single_info {
182 struct net_device *dev;
183 u8 gw_family;
184 union {
185 __be32 ipv4;
186 struct in6_addr ipv6;
187 };
188 u32 id;
189 u8 is_reject:1,
190 is_fdb:1,
191 has_encap:1;
192};
193
194struct nh_notifier_grp_entry_info {
195 u8 weight;
196 struct nh_notifier_single_info nh;
197};
198
199struct nh_notifier_grp_info {
200 u16 num_nh;
201 bool is_fdb;
202 bool hw_stats;
203 struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh);
204};
205
206struct nh_notifier_res_bucket_info {
207 u16 bucket_index;
208 unsigned int idle_timer_ms;
209 bool force;
210 struct nh_notifier_single_info old_nh;
211 struct nh_notifier_single_info new_nh;
212};
213
214struct nh_notifier_res_table_info {
215 u16 num_nh_buckets;
216 bool hw_stats;
217 struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets);
218};
219
220struct nh_notifier_grp_hw_stats_entry_info {
221 u32 id;
222 u64 packets;
223};
224
225struct nh_notifier_grp_hw_stats_info {
226 u16 num_nh;
227 bool hw_stats_used;
228 struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh);
229};
230
231struct nh_notifier_info {
232 struct net *net;
233 struct netlink_ext_ack *extack;
234 u32 id;
235 enum nh_notifier_info_type type;
236 union {
237 struct nh_notifier_single_info *nh;
238 struct nh_notifier_grp_info *nh_grp;
239 struct nh_notifier_res_table_info *nh_res_table;
240 struct nh_notifier_res_bucket_info *nh_res_bucket;
241 struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats;
242 };
243};
244
245int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
246 struct netlink_ext_ack *extack);
247int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
248int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
249void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
250void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
251 bool offload, bool trap);
252void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
253 unsigned long *activity);
254void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info,
255 unsigned int nh_idx,
256 u64 delta_packets);
257
258/* caller is holding rcu or rtnl; no reference taken to nexthop */
259struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
260void nexthop_free_rcu(struct rcu_head *head);
261
262static inline bool nexthop_get(struct nexthop *nh)
263{
264 return refcount_inc_not_zero(r: &nh->refcnt);
265}
266
267static inline void nexthop_put(struct nexthop *nh)
268{
269 if (refcount_dec_and_test(r: &nh->refcnt))
270 call_rcu(head: &nh->rcu, func: nexthop_free_rcu);
271}
272
273static inline bool nexthop_cmp(const struct nexthop *nh1,
274 const struct nexthop *nh2)
275{
276 return nh1 == nh2;
277}
278
279static inline bool nexthop_is_fdb(const struct nexthop *nh)
280{
281 if (nh->is_group) {
282 const struct nh_group *nh_grp;
283
284 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
285 return nh_grp->fdb_nh;
286 } else {
287 const struct nh_info *nhi;
288
289 nhi = rcu_dereference_rtnl(nh->nh_info);
290 return nhi->fdb_nh;
291 }
292}
293
294static inline bool nexthop_has_v4(const struct nexthop *nh)
295{
296 if (nh->is_group) {
297 struct nh_group *nh_grp;
298
299 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
300 return nh_grp->has_v4;
301 }
302 return false;
303}
304
305static inline bool nexthop_is_multipath(const struct nexthop *nh)
306{
307 if (nh->is_group) {
308 struct nh_group *nh_grp;
309
310 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
311 return nh_grp->is_multipath;
312 }
313 return false;
314}
315
316struct nexthop *nexthop_select_path(struct nexthop *nh, int hash);
317
318static inline unsigned int nexthop_num_path(const struct nexthop *nh)
319{
320 unsigned int rc = 1;
321
322 if (nh->is_group) {
323 struct nh_group *nh_grp;
324
325 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
326 if (nh_grp->is_multipath)
327 rc = nh_grp->num_nh;
328 }
329
330 return rc;
331}
332
333static inline
334struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel)
335{
336 /* for_nexthops macros in fib_semantics.c grabs a pointer to
337 * the nexthop before checking nhsel
338 */
339 if (nhsel >= nhg->num_nh)
340 return NULL;
341
342 return nhg->nh_entries[nhsel].nh;
343}
344
345static inline
346int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
347 u8 rt_family)
348{
349 struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
350 int i;
351
352 for (i = 0; i < nhg->num_nh; i++) {
353 struct nexthop *nhe = nhg->nh_entries[i].nh;
354 struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info);
355 struct fib_nh_common *nhc = &nhi->fib_nhc;
356 int weight = nhg->nh_entries[i].weight;
357
358 if (fib_add_nexthop(skb, nh: nhc, nh_weight: weight, rt_family, nh_tclassid: 0) < 0)
359 return -EMSGSIZE;
360 }
361
362 return 0;
363}
364
365/* called with rcu lock */
366static inline bool nexthop_is_blackhole(const struct nexthop *nh)
367{
368 const struct nh_info *nhi;
369
370 if (nh->is_group) {
371 struct nh_group *nh_grp;
372
373 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
374 if (nh_grp->num_nh > 1)
375 return false;
376
377 nh = nh_grp->nh_entries[0].nh;
378 }
379
380 nhi = rcu_dereference_rtnl(nh->nh_info);
381 return nhi->reject_nh;
382}
383
384static inline void nexthop_path_fib_result(struct fib_result *res, int hash)
385{
386 struct nh_info *nhi;
387 struct nexthop *nh;
388
389 nh = nexthop_select_path(nh: res->fi->nh, hash);
390 nhi = rcu_dereference(nh->nh_info);
391 res->nhc = &nhi->fib_nhc;
392}
393
394/* called with rcu read lock or rtnl held */
395static inline
396struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
397{
398 struct nh_info *nhi;
399
400 BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0);
401 BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0);
402
403 if (nh->is_group) {
404 struct nh_group *nh_grp;
405
406 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
407 if (nh_grp->is_multipath) {
408 nh = nexthop_mpath_select(nhg: nh_grp, nhsel);
409 if (!nh)
410 return NULL;
411 }
412 }
413
414 nhi = rcu_dereference_rtnl(nh->nh_info);
415 return &nhi->fib_nhc;
416}
417
418/* called from fib_table_lookup with rcu_lock */
419static inline
420struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh,
421 int fib_flags,
422 const struct flowi4 *flp,
423 int *nhsel)
424{
425 struct nh_info *nhi;
426
427 if (nh->is_group) {
428 struct nh_group *nhg = rcu_dereference(nh->nh_grp);
429 int i;
430
431 for (i = 0; i < nhg->num_nh; i++) {
432 struct nexthop *nhe = nhg->nh_entries[i].nh;
433
434 nhi = rcu_dereference(nhe->nh_info);
435 if (fib_lookup_good_nhc(nhc: &nhi->fib_nhc, fib_flags, flp)) {
436 *nhsel = i;
437 return &nhi->fib_nhc;
438 }
439 }
440 } else {
441 nhi = rcu_dereference(nh->nh_info);
442 if (fib_lookup_good_nhc(nhc: &nhi->fib_nhc, fib_flags, flp)) {
443 *nhsel = 0;
444 return &nhi->fib_nhc;
445 }
446 }
447
448 return NULL;
449}
450
451static inline bool nexthop_uses_dev(const struct nexthop *nh,
452 const struct net_device *dev)
453{
454 struct nh_info *nhi;
455
456 if (nh->is_group) {
457 struct nh_group *nhg = rcu_dereference(nh->nh_grp);
458 int i;
459
460 for (i = 0; i < nhg->num_nh; i++) {
461 struct nexthop *nhe = nhg->nh_entries[i].nh;
462
463 nhi = rcu_dereference(nhe->nh_info);
464 if (nhc_l3mdev_matches_dev(nhc: &nhi->fib_nhc, dev))
465 return true;
466 }
467 } else {
468 nhi = rcu_dereference(nh->nh_info);
469 if (nhc_l3mdev_matches_dev(nhc: &nhi->fib_nhc, dev))
470 return true;
471 }
472
473 return false;
474}
475
476static inline unsigned int fib_info_num_path(const struct fib_info *fi)
477{
478 if (unlikely(fi->nh))
479 return nexthop_num_path(nh: fi->nh);
480
481 return fi->fib_nhs;
482}
483
484int fib_check_nexthop(struct nexthop *nh, u8 scope,
485 struct netlink_ext_ack *extack);
486
487static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel)
488{
489 if (unlikely(fi->nh))
490 return nexthop_fib_nhc(nh: fi->nh, nhsel);
491
492 return &fi->fib_nh[nhsel].nh_common;
493}
494
495/* only used when fib_nh is built into fib_info */
496static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)
497{
498 WARN_ON(fi->nh);
499
500 return &fi->fib_nh[nhsel];
501}
502
503/*
504 * IPv6 variants
505 */
506int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
507 struct netlink_ext_ack *extack);
508
509/* Caller should either hold rcu_read_lock(), or RTNL. */
510static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
511{
512 struct nh_info *nhi;
513
514 if (nh->is_group) {
515 struct nh_group *nh_grp;
516
517 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
518 nh = nexthop_mpath_select(nhg: nh_grp, nhsel: 0);
519 if (!nh)
520 return NULL;
521 }
522
523 nhi = rcu_dereference_rtnl(nh->nh_info);
524 if (nhi->family == AF_INET6)
525 return &nhi->fib6_nh;
526
527 return NULL;
528}
529
530static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
531{
532 struct fib6_nh *fib6_nh;
533
534 fib6_nh = f6i->nh ? nexthop_fib6_nh(nh: f6i->nh) : f6i->fib6_nh;
535 return fib6_nh->fib_nh_dev;
536}
537
538static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
539{
540 struct nexthop *nh = res->f6i->nh;
541 struct nh_info *nhi;
542
543 nh = nexthop_select_path(nh, hash);
544
545 nhi = rcu_dereference_rtnl(nh->nh_info);
546 if (nhi->reject_nh) {
547 res->fib6_type = RTN_BLACKHOLE;
548 res->fib6_flags |= RTF_REJECT;
549 res->nh = nexthop_fib6_nh(nh);
550 } else {
551 res->nh = &nhi->fib6_nh;
552 }
553}
554
555int nexthop_for_each_fib6_nh(struct nexthop *nh,
556 int (*cb)(struct fib6_nh *nh, void *arg),
557 void *arg);
558
559static inline int nexthop_get_family(struct nexthop *nh)
560{
561 struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
562
563 return nhi->family;
564}
565
566static inline
567struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh)
568{
569 struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
570
571 return &nhi->fib_nhc;
572}
573
574static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh,
575 int hash)
576{
577 struct nh_info *nhi;
578 struct nexthop *nhp;
579
580 nhp = nexthop_select_path(nh, hash);
581 if (unlikely(!nhp))
582 return NULL;
583 nhi = rcu_dereference(nhp->nh_info);
584 return &nhi->fib_nhc;
585}
586#endif
587

source code of linux/include/net/nexthop.h