1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
4 | * operating system. INET is implemented using the BSD Socket |
5 | * interface as the means of communication with the user level. |
6 | * |
7 | * ROUTE - implementation of the IP router. |
8 | * |
9 | * Authors: Ross Biro |
10 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
11 | * Alan Cox, <gw4pts@gw4pts.ampr.org> |
12 | * Linus Torvalds, <Linus.Torvalds@helsinki.fi> |
13 | * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
14 | * |
15 | * Fixes: |
16 | * Alan Cox : Verify area fixes. |
17 | * Alan Cox : cli() protects routing changes |
18 | * Rui Oliveira : ICMP routing table updates |
19 | * (rco@di.uminho.pt) Routing table insertion and update |
20 | * Linus Torvalds : Rewrote bits to be sensible |
21 | * Alan Cox : Added BSD route gw semantics |
22 | * Alan Cox : Super /proc >4K |
23 | * Alan Cox : MTU in route table |
24 | * Alan Cox : MSS actually. Also added the window |
25 | * clamper. |
26 | * Sam Lantinga : Fixed route matching in rt_del() |
27 | * Alan Cox : Routing cache support. |
28 | * Alan Cox : Removed compatibility cruft. |
29 | * Alan Cox : RTF_REJECT support. |
30 | * Alan Cox : TCP irtt support. |
31 | * Jonathan Naylor : Added Metric support. |
32 | * Miquel van Smoorenburg : BSD API fixes. |
33 | * Miquel van Smoorenburg : Metrics. |
34 | * Alan Cox : Use __u32 properly |
35 | * Alan Cox : Aligned routing errors more closely with BSD |
36 | * our system is still very different. |
37 | * Alan Cox : Faster /proc handling |
38 | * Alexey Kuznetsov : Massive rework to support tree based routing, |
39 | * routing caches and better behaviour. |
40 | * |
41 | * Olaf Erb : irtt wasn't being copied right. |
42 | * Bjorn Ekwall : Kerneld route support. |
43 | * Alan Cox : Multicast fixed (I hope) |
44 | * Pavel Krauz : Limited broadcast fixed |
45 | * Mike McLagan : Routing by source |
46 | * Alexey Kuznetsov : End of old history. Split to fib.c and |
47 | * route.c and rewritten from scratch. |
48 | * Andi Kleen : Load-limit warning messages. |
49 | * Vitaly E. Lavrov : Transparent proxy revived after year coma. |
50 | * Vitaly E. Lavrov : Race condition in ip_route_input_slow. |
51 | * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow. |
52 | * Vladimir V. Ivanov : IP rule info (flowid) is really useful. |
53 | * Marc Boucher : routing by fwmark |
54 | * Robert Olsson : Added rt_cache statistics |
55 | * Arnaldo C. Melo : Convert proc stuff to seq_file |
56 | * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes. |
57 | * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect |
58 | * Ilia Sotnikov : Removed TOS from hash calculations |
59 | */ |
60 | |
61 | #define pr_fmt(fmt) "IPv4: " fmt |
62 | |
63 | #include <linux/module.h> |
64 | #include <linux/bitops.h> |
65 | #include <linux/kernel.h> |
66 | #include <linux/mm.h> |
67 | #include <linux/memblock.h> |
68 | #include <linux/socket.h> |
69 | #include <linux/errno.h> |
70 | #include <linux/in.h> |
71 | #include <linux/inet.h> |
72 | #include <linux/netdevice.h> |
73 | #include <linux/proc_fs.h> |
74 | #include <linux/init.h> |
75 | #include <linux/skbuff.h> |
76 | #include <linux/inetdevice.h> |
77 | #include <linux/igmp.h> |
78 | #include <linux/pkt_sched.h> |
79 | #include <linux/mroute.h> |
80 | #include <linux/netfilter_ipv4.h> |
81 | #include <linux/random.h> |
82 | #include <linux/rcupdate.h> |
83 | #include <linux/slab.h> |
84 | #include <linux/jhash.h> |
85 | #include <net/dst.h> |
86 | #include <net/dst_metadata.h> |
87 | #include <net/inet_dscp.h> |
88 | #include <net/net_namespace.h> |
89 | #include <net/ip.h> |
90 | #include <net/route.h> |
91 | #include <net/inetpeer.h> |
92 | #include <net/sock.h> |
93 | #include <net/ip_fib.h> |
94 | #include <net/nexthop.h> |
95 | #include <net/tcp.h> |
96 | #include <net/icmp.h> |
97 | #include <net/xfrm.h> |
98 | #include <net/lwtunnel.h> |
99 | #include <net/netevent.h> |
100 | #include <net/rtnetlink.h> |
101 | #ifdef CONFIG_SYSCTL |
102 | #include <linux/sysctl.h> |
103 | #endif |
104 | #include <net/secure_seq.h> |
105 | #include <net/ip_tunnels.h> |
106 | |
107 | #include "fib_lookup.h" |
108 | |
109 | #define RT_FL_TOS(oldflp4) \ |
110 | ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) |
111 | |
112 | #define RT_GC_TIMEOUT (300*HZ) |
113 | |
114 | #define DEFAULT_MIN_PMTU (512 + 20 + 20) |
115 | #define DEFAULT_MTU_EXPIRES (10 * 60 * HZ) |
116 | #define DEFAULT_MIN_ADVMSS 256 |
117 | static int ip_rt_max_size; |
118 | static int ip_rt_redirect_number __read_mostly = 9; |
119 | static int ip_rt_redirect_load __read_mostly = HZ / 50; |
120 | static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1)); |
121 | static int ip_rt_error_cost __read_mostly = HZ; |
122 | static int ip_rt_error_burst __read_mostly = 5 * HZ; |
123 | |
124 | static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; |
125 | |
126 | /* |
127 | * Interface to generic destination cache. |
128 | */ |
129 | |
130 | INDIRECT_CALLABLE_SCOPE |
131 | struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); |
132 | static unsigned int ipv4_default_advmss(const struct dst_entry *dst); |
133 | INDIRECT_CALLABLE_SCOPE |
134 | unsigned int ipv4_mtu(const struct dst_entry *dst); |
135 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); |
136 | static void ipv4_link_failure(struct sk_buff *skb); |
137 | static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, |
138 | struct sk_buff *skb, u32 mtu, |
139 | bool confirm_neigh); |
140 | static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, |
141 | struct sk_buff *skb); |
142 | static void ipv4_dst_destroy(struct dst_entry *dst); |
143 | |
144 | static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) |
145 | { |
146 | WARN_ON(1); |
147 | return NULL; |
148 | } |
149 | |
150 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, |
151 | struct sk_buff *skb, |
152 | const void *daddr); |
153 | static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr); |
154 | |
155 | static struct dst_ops ipv4_dst_ops = { |
156 | .family = AF_INET, |
157 | .check = ipv4_dst_check, |
158 | .default_advmss = ipv4_default_advmss, |
159 | .mtu = ipv4_mtu, |
160 | .cow_metrics = ipv4_cow_metrics, |
161 | .destroy = ipv4_dst_destroy, |
162 | .negative_advice = ipv4_negative_advice, |
163 | .link_failure = ipv4_link_failure, |
164 | .update_pmtu = ip_rt_update_pmtu, |
165 | .redirect = ip_do_redirect, |
166 | .local_out = __ip_local_out, |
167 | .neigh_lookup = ipv4_neigh_lookup, |
168 | .confirm_neigh = ipv4_confirm_neigh, |
169 | }; |
170 | |
171 | #define ECN_OR_COST(class) TC_PRIO_##class |
172 | |
173 | const __u8 ip_tos2prio[16] = { |
174 | TC_PRIO_BESTEFFORT, |
175 | ECN_OR_COST(BESTEFFORT), |
176 | TC_PRIO_BESTEFFORT, |
177 | ECN_OR_COST(BESTEFFORT), |
178 | TC_PRIO_BULK, |
179 | ECN_OR_COST(BULK), |
180 | TC_PRIO_BULK, |
181 | ECN_OR_COST(BULK), |
182 | TC_PRIO_INTERACTIVE, |
183 | ECN_OR_COST(INTERACTIVE), |
184 | TC_PRIO_INTERACTIVE, |
185 | ECN_OR_COST(INTERACTIVE), |
186 | TC_PRIO_INTERACTIVE_BULK, |
187 | ECN_OR_COST(INTERACTIVE_BULK), |
188 | TC_PRIO_INTERACTIVE_BULK, |
189 | ECN_OR_COST(INTERACTIVE_BULK) |
190 | }; |
191 | EXPORT_SYMBOL(ip_tos2prio); |
192 | |
193 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); |
194 | #define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field) |
195 | |
196 | #ifdef CONFIG_PROC_FS |
197 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) |
198 | { |
199 | if (*pos) |
200 | return NULL; |
201 | return SEQ_START_TOKEN; |
202 | } |
203 | |
204 | static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
205 | { |
206 | ++*pos; |
207 | return NULL; |
208 | } |
209 | |
210 | static void rt_cache_seq_stop(struct seq_file *seq, void *v) |
211 | { |
212 | } |
213 | |
214 | static int rt_cache_seq_show(struct seq_file *seq, void *v) |
215 | { |
216 | if (v == SEQ_START_TOKEN) |
217 | seq_printf(m: seq, fmt: "%-127s\n" , |
218 | "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" |
219 | "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t" |
220 | "HHUptod\tSpecDst" ); |
221 | return 0; |
222 | } |
223 | |
224 | static const struct seq_operations rt_cache_seq_ops = { |
225 | .start = rt_cache_seq_start, |
226 | .next = rt_cache_seq_next, |
227 | .stop = rt_cache_seq_stop, |
228 | .show = rt_cache_seq_show, |
229 | }; |
230 | |
231 | static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos) |
232 | { |
233 | int cpu; |
234 | |
235 | if (*pos == 0) |
236 | return SEQ_START_TOKEN; |
237 | |
238 | for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { |
239 | if (!cpu_possible(cpu)) |
240 | continue; |
241 | *pos = cpu+1; |
242 | return &per_cpu(rt_cache_stat, cpu); |
243 | } |
244 | return NULL; |
245 | } |
246 | |
247 | static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
248 | { |
249 | int cpu; |
250 | |
251 | for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { |
252 | if (!cpu_possible(cpu)) |
253 | continue; |
254 | *pos = cpu+1; |
255 | return &per_cpu(rt_cache_stat, cpu); |
256 | } |
257 | (*pos)++; |
258 | return NULL; |
259 | |
260 | } |
261 | |
262 | static void rt_cpu_seq_stop(struct seq_file *seq, void *v) |
263 | { |
264 | |
265 | } |
266 | |
267 | static int rt_cpu_seq_show(struct seq_file *seq, void *v) |
268 | { |
269 | struct rt_cache_stat *st = v; |
270 | |
271 | if (v == SEQ_START_TOKEN) { |
272 | seq_puts(m: seq, s: "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n" ); |
273 | return 0; |
274 | } |
275 | |
276 | seq_printf(m: seq, fmt: "%08x %08x %08x %08x %08x %08x %08x " |
277 | "%08x %08x %08x %08x %08x %08x " |
278 | "%08x %08x %08x %08x\n" , |
279 | dst_entries_get_slow(dst: &ipv4_dst_ops), |
280 | 0, /* st->in_hit */ |
281 | st->in_slow_tot, |
282 | st->in_slow_mc, |
283 | st->in_no_route, |
284 | st->in_brd, |
285 | st->in_martian_dst, |
286 | st->in_martian_src, |
287 | |
288 | 0, /* st->out_hit */ |
289 | st->out_slow_tot, |
290 | st->out_slow_mc, |
291 | |
292 | 0, /* st->gc_total */ |
293 | 0, /* st->gc_ignored */ |
294 | 0, /* st->gc_goal_miss */ |
295 | 0, /* st->gc_dst_overflow */ |
296 | 0, /* st->in_hlist_search */ |
297 | 0 /* st->out_hlist_search */ |
298 | ); |
299 | return 0; |
300 | } |
301 | |
302 | static const struct seq_operations rt_cpu_seq_ops = { |
303 | .start = rt_cpu_seq_start, |
304 | .next = rt_cpu_seq_next, |
305 | .stop = rt_cpu_seq_stop, |
306 | .show = rt_cpu_seq_show, |
307 | }; |
308 | |
309 | #ifdef CONFIG_IP_ROUTE_CLASSID |
310 | static int rt_acct_proc_show(struct seq_file *m, void *v) |
311 | { |
312 | struct ip_rt_acct *dst, *src; |
313 | unsigned int i, j; |
314 | |
315 | dst = kcalloc(n: 256, size: sizeof(struct ip_rt_acct), GFP_KERNEL); |
316 | if (!dst) |
317 | return -ENOMEM; |
318 | |
319 | for_each_possible_cpu(i) { |
320 | src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i); |
321 | for (j = 0; j < 256; j++) { |
322 | dst[j].o_bytes += src[j].o_bytes; |
323 | dst[j].o_packets += src[j].o_packets; |
324 | dst[j].i_bytes += src[j].i_bytes; |
325 | dst[j].i_packets += src[j].i_packets; |
326 | } |
327 | } |
328 | |
329 | seq_write(seq: m, data: dst, len: 256 * sizeof(struct ip_rt_acct)); |
330 | kfree(objp: dst); |
331 | return 0; |
332 | } |
333 | #endif |
334 | |
335 | static int __net_init ip_rt_do_proc_init(struct net *net) |
336 | { |
337 | struct proc_dir_entry *pde; |
338 | |
339 | pde = proc_create_seq("rt_cache" , 0444, net->proc_net, |
340 | &rt_cache_seq_ops); |
341 | if (!pde) |
342 | goto err1; |
343 | |
344 | pde = proc_create_seq("rt_cache" , 0444, net->proc_net_stat, |
345 | &rt_cpu_seq_ops); |
346 | if (!pde) |
347 | goto err2; |
348 | |
349 | #ifdef CONFIG_IP_ROUTE_CLASSID |
350 | pde = proc_create_single("rt_acct" , 0, net->proc_net, |
351 | rt_acct_proc_show); |
352 | if (!pde) |
353 | goto err3; |
354 | #endif |
355 | return 0; |
356 | |
357 | #ifdef CONFIG_IP_ROUTE_CLASSID |
358 | err3: |
359 | remove_proc_entry("rt_cache" , net->proc_net_stat); |
360 | #endif |
361 | err2: |
362 | remove_proc_entry("rt_cache" , net->proc_net); |
363 | err1: |
364 | return -ENOMEM; |
365 | } |
366 | |
367 | static void __net_exit ip_rt_do_proc_exit(struct net *net) |
368 | { |
369 | remove_proc_entry("rt_cache" , net->proc_net_stat); |
370 | remove_proc_entry("rt_cache" , net->proc_net); |
371 | #ifdef CONFIG_IP_ROUTE_CLASSID |
372 | remove_proc_entry("rt_acct" , net->proc_net); |
373 | #endif |
374 | } |
375 | |
376 | static struct pernet_operations ip_rt_proc_ops __net_initdata = { |
377 | .init = ip_rt_do_proc_init, |
378 | .exit = ip_rt_do_proc_exit, |
379 | }; |
380 | |
381 | static int __init ip_rt_proc_init(void) |
382 | { |
383 | return register_pernet_subsys(&ip_rt_proc_ops); |
384 | } |
385 | |
386 | #else |
387 | static inline int ip_rt_proc_init(void) |
388 | { |
389 | return 0; |
390 | } |
391 | #endif /* CONFIG_PROC_FS */ |
392 | |
393 | static inline bool rt_is_expired(const struct rtable *rth) |
394 | { |
395 | return rth->rt_genid != rt_genid_ipv4(net: dev_net(dev: rth->dst.dev)); |
396 | } |
397 | |
398 | void rt_cache_flush(struct net *net) |
399 | { |
400 | rt_genid_bump_ipv4(net); |
401 | } |
402 | |
403 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, |
404 | struct sk_buff *skb, |
405 | const void *daddr) |
406 | { |
407 | const struct rtable *rt = container_of(dst, struct rtable, dst); |
408 | struct net_device *dev = dst->dev; |
409 | struct neighbour *n; |
410 | |
411 | rcu_read_lock(); |
412 | |
413 | if (likely(rt->rt_gw_family == AF_INET)) { |
414 | n = ip_neigh_gw4(dev, daddr: rt->rt_gw4); |
415 | } else if (rt->rt_gw_family == AF_INET6) { |
416 | n = ip_neigh_gw6(dev, addr: &rt->rt_gw6); |
417 | } else { |
418 | __be32 pkey; |
419 | |
420 | pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr); |
421 | n = ip_neigh_gw4(dev, daddr: pkey); |
422 | } |
423 | |
424 | if (!IS_ERR(ptr: n) && !refcount_inc_not_zero(r: &n->refcnt)) |
425 | n = NULL; |
426 | |
427 | rcu_read_unlock(); |
428 | |
429 | return n; |
430 | } |
431 | |
432 | static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) |
433 | { |
434 | const struct rtable *rt = container_of(dst, struct rtable, dst); |
435 | struct net_device *dev = dst->dev; |
436 | const __be32 *pkey = daddr; |
437 | |
438 | if (rt->rt_gw_family == AF_INET) { |
439 | pkey = (const __be32 *)&rt->rt_gw4; |
440 | } else if (rt->rt_gw_family == AF_INET6) { |
441 | return __ipv6_confirm_neigh_stub(dev, pkey: &rt->rt_gw6); |
442 | } else if (!daddr || |
443 | (rt->rt_flags & |
444 | (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) { |
445 | return; |
446 | } |
447 | __ipv4_confirm_neigh(dev, key: *(__force u32 *)pkey); |
448 | } |
449 | |
450 | /* Hash tables of size 2048..262144 depending on RAM size. |
451 | * Each bucket uses 8 bytes. |
452 | */ |
453 | static u32 ip_idents_mask __read_mostly; |
454 | static atomic_t *ip_idents __read_mostly; |
455 | static u32 *ip_tstamps __read_mostly; |
456 | |
457 | /* In order to protect privacy, we add a perturbation to identifiers |
458 | * if one generator is seldom used. This makes hard for an attacker |
459 | * to infer how many packets were sent between two points in time. |
460 | */ |
461 | static u32 ip_idents_reserve(u32 hash, int segs) |
462 | { |
463 | u32 bucket, old, now = (u32)jiffies; |
464 | atomic_t *p_id; |
465 | u32 *p_tstamp; |
466 | u32 delta = 0; |
467 | |
468 | bucket = hash & ip_idents_mask; |
469 | p_tstamp = ip_tstamps + bucket; |
470 | p_id = ip_idents + bucket; |
471 | old = READ_ONCE(*p_tstamp); |
472 | |
473 | if (old != now && cmpxchg(p_tstamp, old, now) == old) |
474 | delta = get_random_u32_below(ceil: now - old); |
475 | |
476 | /* If UBSAN reports an error there, please make sure your compiler |
477 | * supports -fno-strict-overflow before reporting it that was a bug |
478 | * in UBSAN, and it has been fixed in GCC-8. |
479 | */ |
480 | return atomic_add_return(i: segs + delta, v: p_id) - segs; |
481 | } |
482 | |
483 | void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) |
484 | { |
485 | u32 hash, id; |
486 | |
487 | /* Note the following code is not safe, but this is okay. */ |
488 | if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) |
489 | get_random_bytes(buf: &net->ipv4.ip_id_key, |
490 | len: sizeof(net->ipv4.ip_id_key)); |
491 | |
492 | hash = siphash_3u32(a: (__force u32)iph->daddr, |
493 | b: (__force u32)iph->saddr, |
494 | c: iph->protocol, |
495 | key: &net->ipv4.ip_id_key); |
496 | id = ip_idents_reserve(hash, segs); |
497 | iph->id = htons(id); |
498 | } |
499 | EXPORT_SYMBOL(__ip_select_ident); |
500 | |
501 | static void ip_rt_fix_tos(struct flowi4 *fl4) |
502 | { |
503 | __u8 tos = RT_FL_TOS(fl4); |
504 | |
505 | fl4->flowi4_tos = tos & IPTOS_RT_MASK; |
506 | if (tos & RTO_ONLINK) |
507 | fl4->flowi4_scope = RT_SCOPE_LINK; |
508 | } |
509 | |
510 | static void __build_flow_key(const struct net *net, struct flowi4 *fl4, |
511 | const struct sock *sk, const struct iphdr *iph, |
512 | int oif, __u8 tos, u8 prot, u32 mark, |
513 | int flow_flags) |
514 | { |
515 | __u8 scope = RT_SCOPE_UNIVERSE; |
516 | |
517 | if (sk) { |
518 | oif = sk->sk_bound_dev_if; |
519 | mark = READ_ONCE(sk->sk_mark); |
520 | tos = ip_sock_rt_tos(sk); |
521 | scope = ip_sock_rt_scope(sk); |
522 | prot = inet_test_bit(HDRINCL, sk) ? IPPROTO_RAW : |
523 | sk->sk_protocol; |
524 | } |
525 | |
526 | flowi4_init_output(fl4, oif, mark, tos: tos & IPTOS_RT_MASK, scope, |
527 | proto: prot, flags: flow_flags, daddr: iph->daddr, saddr: iph->saddr, dport: 0, sport: 0, |
528 | uid: sock_net_uid(net, sk)); |
529 | } |
530 | |
531 | static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, |
532 | const struct sock *sk) |
533 | { |
534 | const struct net *net = dev_net(dev: skb->dev); |
535 | const struct iphdr *iph = ip_hdr(skb); |
536 | int oif = skb->dev->ifindex; |
537 | u8 prot = iph->protocol; |
538 | u32 mark = skb->mark; |
539 | __u8 tos = iph->tos; |
540 | |
541 | __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, flow_flags: 0); |
542 | } |
543 | |
544 | static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) |
545 | { |
546 | const struct inet_sock *inet = inet_sk(sk); |
547 | const struct ip_options_rcu *inet_opt; |
548 | __be32 daddr = inet->inet_daddr; |
549 | |
550 | rcu_read_lock(); |
551 | inet_opt = rcu_dereference(inet->inet_opt); |
552 | if (inet_opt && inet_opt->opt.srr) |
553 | daddr = inet_opt->opt.faddr; |
554 | flowi4_init_output(fl4, oif: sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark), |
555 | tos: ip_sock_rt_tos(sk) & IPTOS_RT_MASK, |
556 | scope: ip_sock_rt_scope(sk), |
557 | inet_test_bit(HDRINCL, sk) ? |
558 | IPPROTO_RAW : sk->sk_protocol, |
559 | flags: inet_sk_flowi_flags(sk), |
560 | daddr, saddr: inet->inet_saddr, dport: 0, sport: 0, uid: sk->sk_uid); |
561 | rcu_read_unlock(); |
562 | } |
563 | |
564 | static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, |
565 | const struct sk_buff *skb) |
566 | { |
567 | if (skb) |
568 | build_skb_flow_key(fl4, skb, sk); |
569 | else |
570 | build_sk_flow_key(fl4, sk); |
571 | } |
572 | |
573 | static DEFINE_SPINLOCK(fnhe_lock); |
574 | |
575 | static void fnhe_flush_routes(struct fib_nh_exception *fnhe) |
576 | { |
577 | struct rtable *rt; |
578 | |
579 | rt = rcu_dereference(fnhe->fnhe_rth_input); |
580 | if (rt) { |
581 | RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL); |
582 | dst_dev_put(dst: &rt->dst); |
583 | dst_release(dst: &rt->dst); |
584 | } |
585 | rt = rcu_dereference(fnhe->fnhe_rth_output); |
586 | if (rt) { |
587 | RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL); |
588 | dst_dev_put(dst: &rt->dst); |
589 | dst_release(dst: &rt->dst); |
590 | } |
591 | } |
592 | |
593 | static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash) |
594 | { |
595 | struct fib_nh_exception __rcu **fnhe_p, **oldest_p; |
596 | struct fib_nh_exception *fnhe, *oldest = NULL; |
597 | |
598 | for (fnhe_p = &hash->chain; ; fnhe_p = &fnhe->fnhe_next) { |
599 | fnhe = rcu_dereference_protected(*fnhe_p, |
600 | lockdep_is_held(&fnhe_lock)); |
601 | if (!fnhe) |
602 | break; |
603 | if (!oldest || |
604 | time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) { |
605 | oldest = fnhe; |
606 | oldest_p = fnhe_p; |
607 | } |
608 | } |
609 | fnhe_flush_routes(fnhe: oldest); |
610 | *oldest_p = oldest->fnhe_next; |
611 | kfree_rcu(oldest, rcu); |
612 | } |
613 | |
614 | static u32 fnhe_hashfun(__be32 daddr) |
615 | { |
616 | static siphash_aligned_key_t fnhe_hash_key; |
617 | u64 hval; |
618 | |
619 | net_get_random_once(&fnhe_hash_key, sizeof(fnhe_hash_key)); |
620 | hval = siphash_1u32(a: (__force u32)daddr, key: &fnhe_hash_key); |
621 | return hash_64(val: hval, FNHE_HASH_SHIFT); |
622 | } |
623 | |
624 | static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) |
625 | { |
626 | rt->rt_pmtu = fnhe->fnhe_pmtu; |
627 | rt->rt_mtu_locked = fnhe->fnhe_mtu_locked; |
628 | rt->dst.expires = fnhe->fnhe_expires; |
629 | |
630 | if (fnhe->fnhe_gw) { |
631 | rt->rt_flags |= RTCF_REDIRECTED; |
632 | rt->rt_uses_gateway = 1; |
633 | rt->rt_gw_family = AF_INET; |
634 | rt->rt_gw4 = fnhe->fnhe_gw; |
635 | } |
636 | } |
637 | |
638 | static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr, |
639 | __be32 gw, u32 pmtu, bool lock, |
640 | unsigned long expires) |
641 | { |
642 | struct fnhe_hash_bucket *hash; |
643 | struct fib_nh_exception *fnhe; |
644 | struct rtable *rt; |
645 | u32 genid, hval; |
646 | unsigned int i; |
647 | int depth; |
648 | |
649 | genid = fnhe_genid(net: dev_net(dev: nhc->nhc_dev)); |
650 | hval = fnhe_hashfun(daddr); |
651 | |
652 | spin_lock_bh(lock: &fnhe_lock); |
653 | |
654 | hash = rcu_dereference(nhc->nhc_exceptions); |
655 | if (!hash) { |
656 | hash = kcalloc(FNHE_HASH_SIZE, size: sizeof(*hash), GFP_ATOMIC); |
657 | if (!hash) |
658 | goto out_unlock; |
659 | rcu_assign_pointer(nhc->nhc_exceptions, hash); |
660 | } |
661 | |
662 | hash += hval; |
663 | |
664 | depth = 0; |
665 | for (fnhe = rcu_dereference(hash->chain); fnhe; |
666 | fnhe = rcu_dereference(fnhe->fnhe_next)) { |
667 | if (fnhe->fnhe_daddr == daddr) |
668 | break; |
669 | depth++; |
670 | } |
671 | |
672 | if (fnhe) { |
673 | if (fnhe->fnhe_genid != genid) |
674 | fnhe->fnhe_genid = genid; |
675 | if (gw) |
676 | fnhe->fnhe_gw = gw; |
677 | if (pmtu) { |
678 | fnhe->fnhe_pmtu = pmtu; |
679 | fnhe->fnhe_mtu_locked = lock; |
680 | } |
681 | fnhe->fnhe_expires = max(1UL, expires); |
682 | /* Update all cached dsts too */ |
683 | rt = rcu_dereference(fnhe->fnhe_rth_input); |
684 | if (rt) |
685 | fill_route_from_fnhe(rt, fnhe); |
686 | rt = rcu_dereference(fnhe->fnhe_rth_output); |
687 | if (rt) |
688 | fill_route_from_fnhe(rt, fnhe); |
689 | } else { |
690 | /* Randomize max depth to avoid some side channels attacks. */ |
691 | int max_depth = FNHE_RECLAIM_DEPTH + |
692 | get_random_u32_below(FNHE_RECLAIM_DEPTH); |
693 | |
694 | while (depth > max_depth) { |
695 | fnhe_remove_oldest(hash); |
696 | depth--; |
697 | } |
698 | |
699 | fnhe = kzalloc(size: sizeof(*fnhe), GFP_ATOMIC); |
700 | if (!fnhe) |
701 | goto out_unlock; |
702 | |
703 | fnhe->fnhe_next = hash->chain; |
704 | |
705 | fnhe->fnhe_genid = genid; |
706 | fnhe->fnhe_daddr = daddr; |
707 | fnhe->fnhe_gw = gw; |
708 | fnhe->fnhe_pmtu = pmtu; |
709 | fnhe->fnhe_mtu_locked = lock; |
710 | fnhe->fnhe_expires = max(1UL, expires); |
711 | |
712 | rcu_assign_pointer(hash->chain, fnhe); |
713 | |
714 | /* Exception created; mark the cached routes for the nexthop |
715 | * stale, so anyone caching it rechecks if this exception |
716 | * applies to them. |
717 | */ |
718 | rt = rcu_dereference(nhc->nhc_rth_input); |
719 | if (rt) |
720 | rt->dst.obsolete = DST_OBSOLETE_KILL; |
721 | |
722 | for_each_possible_cpu(i) { |
723 | struct rtable __rcu **prt; |
724 | |
725 | prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i); |
726 | rt = rcu_dereference(*prt); |
727 | if (rt) |
728 | rt->dst.obsolete = DST_OBSOLETE_KILL; |
729 | } |
730 | } |
731 | |
732 | fnhe->fnhe_stamp = jiffies; |
733 | |
734 | out_unlock: |
735 | spin_unlock_bh(lock: &fnhe_lock); |
736 | } |
737 | |
738 | static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4, |
739 | bool kill_route) |
740 | { |
741 | __be32 new_gw = icmp_hdr(skb)->un.gateway; |
742 | __be32 old_gw = ip_hdr(skb)->saddr; |
743 | struct net_device *dev = skb->dev; |
744 | struct in_device *in_dev; |
745 | struct fib_result res; |
746 | struct neighbour *n; |
747 | struct net *net; |
748 | |
749 | switch (icmp_hdr(skb)->code & 7) { |
750 | case ICMP_REDIR_NET: |
751 | case ICMP_REDIR_NETTOS: |
752 | case ICMP_REDIR_HOST: |
753 | case ICMP_REDIR_HOSTTOS: |
754 | break; |
755 | |
756 | default: |
757 | return; |
758 | } |
759 | |
760 | if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw) |
761 | return; |
762 | |
763 | in_dev = __in_dev_get_rcu(dev); |
764 | if (!in_dev) |
765 | return; |
766 | |
767 | net = dev_net(dev); |
768 | if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || |
769 | ipv4_is_multicast(addr: new_gw) || ipv4_is_lbcast(addr: new_gw) || |
770 | ipv4_is_zeronet(addr: new_gw)) |
771 | goto reject_redirect; |
772 | |
773 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { |
774 | if (!inet_addr_onlink(in_dev, a: new_gw, b: old_gw)) |
775 | goto reject_redirect; |
776 | if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(gw: new_gw, dev)) |
777 | goto reject_redirect; |
778 | } else { |
779 | if (inet_addr_type(net, addr: new_gw) != RTN_UNICAST) |
780 | goto reject_redirect; |
781 | } |
782 | |
783 | n = __ipv4_neigh_lookup(dev: rt->dst.dev, key: new_gw); |
784 | if (!n) |
785 | n = neigh_create(tbl: &arp_tbl, pkey: &new_gw, dev: rt->dst.dev); |
786 | if (!IS_ERR(ptr: n)) { |
787 | if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { |
788 | neigh_event_send(neigh: n, NULL); |
789 | } else { |
790 | if (fib_lookup(net, flp: fl4, res: &res, flags: 0) == 0) { |
791 | struct fib_nh_common *nhc; |
792 | |
793 | fib_select_path(net, res: &res, fl4, skb); |
794 | nhc = FIB_RES_NHC(res); |
795 | update_or_create_fnhe(nhc, daddr: fl4->daddr, gw: new_gw, |
796 | pmtu: 0, lock: false, |
797 | expires: jiffies + ip_rt_gc_timeout); |
798 | } |
799 | if (kill_route) |
800 | rt->dst.obsolete = DST_OBSOLETE_KILL; |
801 | call_netevent_notifiers(val: NETEVENT_NEIGH_UPDATE, v: n); |
802 | } |
803 | neigh_release(neigh: n); |
804 | } |
805 | return; |
806 | |
807 | reject_redirect: |
808 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
809 | if (IN_DEV_LOG_MARTIANS(in_dev)) { |
810 | const struct iphdr *iph = (const struct iphdr *) skb->data; |
811 | __be32 daddr = iph->daddr; |
812 | __be32 saddr = iph->saddr; |
813 | |
814 | net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n" |
815 | " Advised path = %pI4 -> %pI4\n" , |
816 | &old_gw, dev->name, &new_gw, |
817 | &saddr, &daddr); |
818 | } |
819 | #endif |
820 | ; |
821 | } |
822 | |
823 | static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) |
824 | { |
825 | struct rtable *rt; |
826 | struct flowi4 fl4; |
827 | const struct iphdr *iph = (const struct iphdr *) skb->data; |
828 | struct net *net = dev_net(dev: skb->dev); |
829 | int oif = skb->dev->ifindex; |
830 | u8 prot = iph->protocol; |
831 | u32 mark = skb->mark; |
832 | __u8 tos = iph->tos; |
833 | |
834 | rt = (struct rtable *) dst; |
835 | |
836 | __build_flow_key(net, fl4: &fl4, sk, iph, oif, tos, prot, mark, flow_flags: 0); |
837 | __ip_do_redirect(rt, skb, fl4: &fl4, kill_route: true); |
838 | } |
839 | |
840 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) |
841 | { |
842 | struct rtable *rt = (struct rtable *)dst; |
843 | struct dst_entry *ret = dst; |
844 | |
845 | if (rt) { |
846 | if (dst->obsolete > 0) { |
847 | ip_rt_put(rt); |
848 | ret = NULL; |
849 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || |
850 | rt->dst.expires) { |
851 | ip_rt_put(rt); |
852 | ret = NULL; |
853 | } |
854 | } |
855 | return ret; |
856 | } |
857 | |
858 | /* |
859 | * Algorithm: |
860 | * 1. The first ip_rt_redirect_number redirects are sent |
861 | * with exponential backoff, then we stop sending them at all, |
862 | * assuming that the host ignores our redirects. |
863 | * 2. If we did not see packets requiring redirects |
864 | * during ip_rt_redirect_silence, we assume that the host |
865 | * forgot redirected route and start to send redirects again. |
866 | * |
867 | * This algorithm is much cheaper and more intelligent than dumb load limiting |
868 | * in icmp.c. |
869 | * |
870 | * NOTE. Do not forget to inhibit load limiting for redirects (redundant) |
871 | * and "frag. need" (breaks PMTU discovery) in icmp.c. |
872 | */ |
873 | |
874 | void ip_rt_send_redirect(struct sk_buff *skb) |
875 | { |
876 | struct rtable *rt = skb_rtable(skb); |
877 | struct in_device *in_dev; |
878 | struct inet_peer *peer; |
879 | struct net *net; |
880 | int log_martians; |
881 | int vif; |
882 | |
883 | rcu_read_lock(); |
884 | in_dev = __in_dev_get_rcu(dev: rt->dst.dev); |
885 | if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { |
886 | rcu_read_unlock(); |
887 | return; |
888 | } |
889 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); |
890 | vif = l3mdev_master_ifindex_rcu(dev: rt->dst.dev); |
891 | rcu_read_unlock(); |
892 | |
893 | net = dev_net(dev: rt->dst.dev); |
894 | peer = inet_getpeer_v4(base: net->ipv4.peers, v4daddr: ip_hdr(skb)->saddr, vif, create: 1); |
895 | if (!peer) { |
896 | icmp_send(skb_in: skb, ICMP_REDIRECT, ICMP_REDIR_HOST, |
897 | info: rt_nexthop(rt, daddr: ip_hdr(skb)->daddr)); |
898 | return; |
899 | } |
900 | |
901 | /* No redirected packets during ip_rt_redirect_silence; |
902 | * reset the algorithm. |
903 | */ |
904 | if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) { |
905 | peer->rate_tokens = 0; |
906 | peer->n_redirects = 0; |
907 | } |
908 | |
909 | /* Too many ignored redirects; do not send anything |
910 | * set dst.rate_last to the last seen redirected packet. |
911 | */ |
912 | if (peer->n_redirects >= ip_rt_redirect_number) { |
913 | peer->rate_last = jiffies; |
914 | goto out_put_peer; |
915 | } |
916 | |
917 | /* Check for load limit; set rate_last to the latest sent |
918 | * redirect. |
919 | */ |
920 | if (peer->n_redirects == 0 || |
921 | time_after(jiffies, |
922 | (peer->rate_last + |
923 | (ip_rt_redirect_load << peer->n_redirects)))) { |
924 | __be32 gw = rt_nexthop(rt, daddr: ip_hdr(skb)->daddr); |
925 | |
926 | icmp_send(skb_in: skb, ICMP_REDIRECT, ICMP_REDIR_HOST, info: gw); |
927 | peer->rate_last = jiffies; |
928 | ++peer->n_redirects; |
929 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
930 | if (log_martians && |
931 | peer->n_redirects == ip_rt_redirect_number) |
932 | net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n" , |
933 | &ip_hdr(skb)->saddr, inet_iif(skb), |
934 | &ip_hdr(skb)->daddr, &gw); |
935 | #endif |
936 | } |
937 | out_put_peer: |
938 | inet_putpeer(p: peer); |
939 | } |
940 | |
941 | static int ip_error(struct sk_buff *skb) |
942 | { |
943 | struct rtable *rt = skb_rtable(skb); |
944 | struct net_device *dev = skb->dev; |
945 | struct in_device *in_dev; |
946 | struct inet_peer *peer; |
947 | unsigned long now; |
948 | struct net *net; |
949 | SKB_DR(reason); |
950 | bool send; |
951 | int code; |
952 | |
953 | if (netif_is_l3_master(dev: skb->dev)) { |
954 | dev = __dev_get_by_index(net: dev_net(dev: skb->dev), IPCB(skb)->iif); |
955 | if (!dev) |
956 | goto out; |
957 | } |
958 | |
959 | in_dev = __in_dev_get_rcu(dev); |
960 | |
961 | /* IP on this device is disabled. */ |
962 | if (!in_dev) |
963 | goto out; |
964 | |
965 | net = dev_net(dev: rt->dst.dev); |
966 | if (!IN_DEV_FORWARD(in_dev)) { |
967 | switch (rt->dst.error) { |
968 | case EHOSTUNREACH: |
969 | SKB_DR_SET(reason, IP_INADDRERRORS); |
970 | __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS); |
971 | break; |
972 | |
973 | case ENETUNREACH: |
974 | SKB_DR_SET(reason, IP_INNOROUTES); |
975 | __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); |
976 | break; |
977 | } |
978 | goto out; |
979 | } |
980 | |
981 | switch (rt->dst.error) { |
982 | case EINVAL: |
983 | default: |
984 | goto out; |
985 | case EHOSTUNREACH: |
986 | code = ICMP_HOST_UNREACH; |
987 | break; |
988 | case ENETUNREACH: |
989 | code = ICMP_NET_UNREACH; |
990 | SKB_DR_SET(reason, IP_INNOROUTES); |
991 | __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); |
992 | break; |
993 | case EACCES: |
994 | code = ICMP_PKT_FILTERED; |
995 | break; |
996 | } |
997 | |
998 | peer = inet_getpeer_v4(base: net->ipv4.peers, v4daddr: ip_hdr(skb)->saddr, |
999 | vif: l3mdev_master_ifindex(dev: skb->dev), create: 1); |
1000 | |
1001 | send = true; |
1002 | if (peer) { |
1003 | now = jiffies; |
1004 | peer->rate_tokens += now - peer->rate_last; |
1005 | if (peer->rate_tokens > ip_rt_error_burst) |
1006 | peer->rate_tokens = ip_rt_error_burst; |
1007 | peer->rate_last = now; |
1008 | if (peer->rate_tokens >= ip_rt_error_cost) |
1009 | peer->rate_tokens -= ip_rt_error_cost; |
1010 | else |
1011 | send = false; |
1012 | inet_putpeer(p: peer); |
1013 | } |
1014 | if (send) |
1015 | icmp_send(skb_in: skb, ICMP_DEST_UNREACH, code, info: 0); |
1016 | |
1017 | out: kfree_skb_reason(skb, reason); |
1018 | return 0; |
1019 | } |
1020 | |
1021 | static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) |
1022 | { |
1023 | struct dst_entry *dst = &rt->dst; |
1024 | struct net *net = dev_net(dev: dst->dev); |
1025 | struct fib_result res; |
1026 | bool lock = false; |
1027 | u32 old_mtu; |
1028 | |
1029 | if (ip_mtu_locked(dst)) |
1030 | return; |
1031 | |
1032 | old_mtu = ipv4_mtu(dst); |
1033 | if (old_mtu < mtu) |
1034 | return; |
1035 | |
1036 | if (mtu < net->ipv4.ip_rt_min_pmtu) { |
1037 | lock = true; |
1038 | mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu); |
1039 | } |
1040 | |
1041 | if (rt->rt_pmtu == mtu && !lock && |
1042 | time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2)) |
1043 | return; |
1044 | |
1045 | rcu_read_lock(); |
1046 | if (fib_lookup(net, flp: fl4, res: &res, flags: 0) == 0) { |
1047 | struct fib_nh_common *nhc; |
1048 | |
1049 | fib_select_path(net, res: &res, fl4, NULL); |
1050 | nhc = FIB_RES_NHC(res); |
1051 | update_or_create_fnhe(nhc, daddr: fl4->daddr, gw: 0, pmtu: mtu, lock, |
1052 | expires: jiffies + net->ipv4.ip_rt_mtu_expires); |
1053 | } |
1054 | rcu_read_unlock(); |
1055 | } |
1056 | |
1057 | static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, |
1058 | struct sk_buff *skb, u32 mtu, |
1059 | bool confirm_neigh) |
1060 | { |
1061 | struct rtable *rt = (struct rtable *) dst; |
1062 | struct flowi4 fl4; |
1063 | |
1064 | ip_rt_build_flow_key(fl4: &fl4, sk, skb); |
1065 | |
1066 | /* Don't make lookup fail for bridged encapsulations */ |
1067 | if (skb && netif_is_any_bridge_port(dev: skb->dev)) |
1068 | fl4.flowi4_oif = 0; |
1069 | |
1070 | __ip_rt_update_pmtu(rt, fl4: &fl4, mtu); |
1071 | } |
1072 | |
1073 | void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, |
1074 | int oif, u8 protocol) |
1075 | { |
1076 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
1077 | struct flowi4 fl4; |
1078 | struct rtable *rt; |
1079 | u32 mark = IP4_REPLY_MARK(net, skb->mark); |
1080 | |
1081 | __build_flow_key(net, fl4: &fl4, NULL, iph, oif, tos: iph->tos, prot: protocol, mark, |
1082 | flow_flags: 0); |
1083 | rt = __ip_route_output_key(net, flp: &fl4); |
1084 | if (!IS_ERR(ptr: rt)) { |
1085 | __ip_rt_update_pmtu(rt, fl4: &fl4, mtu); |
1086 | ip_rt_put(rt); |
1087 | } |
1088 | } |
1089 | EXPORT_SYMBOL_GPL(ipv4_update_pmtu); |
1090 | |
1091 | static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) |
1092 | { |
1093 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
1094 | struct flowi4 fl4; |
1095 | struct rtable *rt; |
1096 | |
1097 | __build_flow_key(net: sock_net(sk), fl4: &fl4, sk, iph, oif: 0, tos: 0, prot: 0, mark: 0, flow_flags: 0); |
1098 | |
1099 | if (!fl4.flowi4_mark) |
1100 | fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); |
1101 | |
1102 | rt = __ip_route_output_key(net: sock_net(sk), flp: &fl4); |
1103 | if (!IS_ERR(ptr: rt)) { |
1104 | __ip_rt_update_pmtu(rt, fl4: &fl4, mtu); |
1105 | ip_rt_put(rt); |
1106 | } |
1107 | } |
1108 | |
1109 | void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) |
1110 | { |
1111 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
1112 | struct flowi4 fl4; |
1113 | struct rtable *rt; |
1114 | struct dst_entry *odst = NULL; |
1115 | bool new = false; |
1116 | struct net *net = sock_net(sk); |
1117 | |
1118 | bh_lock_sock(sk); |
1119 | |
1120 | if (!ip_sk_accept_pmtu(sk)) |
1121 | goto out; |
1122 | |
1123 | odst = sk_dst_get(sk); |
1124 | |
1125 | if (sock_owned_by_user(sk) || !odst) { |
1126 | __ipv4_sk_update_pmtu(skb, sk, mtu); |
1127 | goto out; |
1128 | } |
1129 | |
1130 | __build_flow_key(net, fl4: &fl4, sk, iph, oif: 0, tos: 0, prot: 0, mark: 0, flow_flags: 0); |
1131 | |
1132 | rt = (struct rtable *)odst; |
1133 | if (odst->obsolete && !odst->ops->check(odst, 0)) { |
1134 | rt = ip_route_output_flow(sock_net(sk), flp: &fl4, sk); |
1135 | if (IS_ERR(ptr: rt)) |
1136 | goto out; |
1137 | |
1138 | new = true; |
1139 | } |
1140 | |
1141 | __ip_rt_update_pmtu(rt: (struct rtable *)xfrm_dst_path(dst: &rt->dst), fl4: &fl4, mtu); |
1142 | |
1143 | if (!dst_check(dst: &rt->dst, cookie: 0)) { |
1144 | if (new) |
1145 | dst_release(dst: &rt->dst); |
1146 | |
1147 | rt = ip_route_output_flow(sock_net(sk), flp: &fl4, sk); |
1148 | if (IS_ERR(ptr: rt)) |
1149 | goto out; |
1150 | |
1151 | new = true; |
1152 | } |
1153 | |
1154 | if (new) |
1155 | sk_dst_set(sk, dst: &rt->dst); |
1156 | |
1157 | out: |
1158 | bh_unlock_sock(sk); |
1159 | dst_release(dst: odst); |
1160 | } |
1161 | EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); |
1162 | |
1163 | void ipv4_redirect(struct sk_buff *skb, struct net *net, |
1164 | int oif, u8 protocol) |
1165 | { |
1166 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
1167 | struct flowi4 fl4; |
1168 | struct rtable *rt; |
1169 | |
1170 | __build_flow_key(net, fl4: &fl4, NULL, iph, oif, tos: iph->tos, prot: protocol, mark: 0, flow_flags: 0); |
1171 | rt = __ip_route_output_key(net, flp: &fl4); |
1172 | if (!IS_ERR(ptr: rt)) { |
1173 | __ip_do_redirect(rt, skb, fl4: &fl4, kill_route: false); |
1174 | ip_rt_put(rt); |
1175 | } |
1176 | } |
1177 | EXPORT_SYMBOL_GPL(ipv4_redirect); |
1178 | |
1179 | void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) |
1180 | { |
1181 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
1182 | struct flowi4 fl4; |
1183 | struct rtable *rt; |
1184 | struct net *net = sock_net(sk); |
1185 | |
1186 | __build_flow_key(net, fl4: &fl4, sk, iph, oif: 0, tos: 0, prot: 0, mark: 0, flow_flags: 0); |
1187 | rt = __ip_route_output_key(net, flp: &fl4); |
1188 | if (!IS_ERR(ptr: rt)) { |
1189 | __ip_do_redirect(rt, skb, fl4: &fl4, kill_route: false); |
1190 | ip_rt_put(rt); |
1191 | } |
1192 | } |
1193 | EXPORT_SYMBOL_GPL(ipv4_sk_redirect); |
1194 | |
1195 | INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst, |
1196 | u32 cookie) |
1197 | { |
1198 | struct rtable *rt = (struct rtable *) dst; |
1199 | |
1200 | /* All IPV4 dsts are created with ->obsolete set to the value |
1201 | * DST_OBSOLETE_FORCE_CHK which forces validation calls down |
1202 | * into this function always. |
1203 | * |
1204 | * When a PMTU/redirect information update invalidates a route, |
1205 | * this is indicated by setting obsolete to DST_OBSOLETE_KILL or |
1206 | * DST_OBSOLETE_DEAD. |
1207 | */ |
1208 | if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rth: rt)) |
1209 | return NULL; |
1210 | return dst; |
1211 | } |
1212 | EXPORT_INDIRECT_CALLABLE(ipv4_dst_check); |
1213 | |
1214 | static void ipv4_send_dest_unreach(struct sk_buff *skb) |
1215 | { |
1216 | struct net_device *dev; |
1217 | struct ip_options opt; |
1218 | int res; |
1219 | |
1220 | /* Recompile ip options since IPCB may not be valid anymore. |
1221 | * Also check we have a reasonable ipv4 header. |
1222 | */ |
1223 | if (!pskb_network_may_pull(skb, len: sizeof(struct iphdr)) || |
1224 | ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5) |
1225 | return; |
1226 | |
1227 | memset(&opt, 0, sizeof(opt)); |
1228 | if (ip_hdr(skb)->ihl > 5) { |
1229 | if (!pskb_network_may_pull(skb, len: ip_hdr(skb)->ihl * 4)) |
1230 | return; |
1231 | opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); |
1232 | |
1233 | rcu_read_lock(); |
1234 | dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev; |
1235 | res = __ip_options_compile(net: dev_net(dev), opt: &opt, skb, NULL); |
1236 | rcu_read_unlock(); |
1237 | |
1238 | if (res) |
1239 | return; |
1240 | } |
1241 | __icmp_send(skb_in: skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, info: 0, opt: &opt); |
1242 | } |
1243 | |
1244 | static void ipv4_link_failure(struct sk_buff *skb) |
1245 | { |
1246 | struct rtable *rt; |
1247 | |
1248 | ipv4_send_dest_unreach(skb); |
1249 | |
1250 | rt = skb_rtable(skb); |
1251 | if (rt) |
1252 | dst_set_expires(dst: &rt->dst, timeout: 0); |
1253 | } |
1254 | |
1255 | static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb) |
1256 | { |
1257 | pr_debug("%s: %pI4 -> %pI4, %s\n" , |
1258 | __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, |
1259 | skb->dev ? skb->dev->name : "?" ); |
1260 | kfree_skb(skb); |
1261 | WARN_ON(1); |
1262 | return 0; |
1263 | } |
1264 | |
1265 | /* |
1266 | * We do not cache source address of outgoing interface, |
1267 | * because it is used only by IP RR, TS and SRR options, |
1268 | * so that it out of fast path. |
1269 | * |
1270 | * BTW remember: "addr" is allowed to be not aligned |
1271 | * in IP options! |
1272 | */ |
1273 | |
1274 | void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) |
1275 | { |
1276 | __be32 src; |
1277 | |
1278 | if (rt_is_output_route(rt)) |
1279 | src = ip_hdr(skb)->saddr; |
1280 | else { |
1281 | struct fib_result res; |
1282 | struct iphdr *iph = ip_hdr(skb); |
1283 | struct flowi4 fl4 = { |
1284 | .daddr = iph->daddr, |
1285 | .saddr = iph->saddr, |
1286 | .flowi4_tos = RT_TOS(iph->tos), |
1287 | .flowi4_oif = rt->dst.dev->ifindex, |
1288 | .flowi4_iif = skb->dev->ifindex, |
1289 | .flowi4_mark = skb->mark, |
1290 | }; |
1291 | |
1292 | rcu_read_lock(); |
1293 | if (fib_lookup(net: dev_net(dev: rt->dst.dev), flp: &fl4, res: &res, flags: 0) == 0) |
1294 | src = fib_result_prefsrc(net: dev_net(dev: rt->dst.dev), res: &res); |
1295 | else |
1296 | src = inet_select_addr(dev: rt->dst.dev, |
1297 | dst: rt_nexthop(rt, daddr: iph->daddr), |
1298 | scope: RT_SCOPE_UNIVERSE); |
1299 | rcu_read_unlock(); |
1300 | } |
1301 | memcpy(addr, &src, 4); |
1302 | } |
1303 | |
1304 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1305 | static void set_class_tag(struct rtable *rt, u32 tag) |
1306 | { |
1307 | if (!(rt->dst.tclassid & 0xFFFF)) |
1308 | rt->dst.tclassid |= tag & 0xFFFF; |
1309 | if (!(rt->dst.tclassid & 0xFFFF0000)) |
1310 | rt->dst.tclassid |= tag & 0xFFFF0000; |
1311 | } |
1312 | #endif |
1313 | |
1314 | static unsigned int ipv4_default_advmss(const struct dst_entry *dst) |
1315 | { |
1316 | struct net *net = dev_net(dev: dst->dev); |
1317 | unsigned int = sizeof(struct tcphdr) + sizeof(struct iphdr); |
1318 | unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, |
1319 | net->ipv4.ip_rt_min_advmss); |
1320 | |
1321 | return min(advmss, IPV4_MAX_PMTU - header_size); |
1322 | } |
1323 | |
1324 | INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst) |
1325 | { |
1326 | return ip_dst_mtu_maybe_forward(dst, forwarding: false); |
1327 | } |
1328 | EXPORT_INDIRECT_CALLABLE(ipv4_mtu); |
1329 | |
1330 | static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr) |
1331 | { |
1332 | struct fnhe_hash_bucket *hash; |
1333 | struct fib_nh_exception *fnhe, __rcu **fnhe_p; |
1334 | u32 hval = fnhe_hashfun(daddr); |
1335 | |
1336 | spin_lock_bh(lock: &fnhe_lock); |
1337 | |
1338 | hash = rcu_dereference_protected(nhc->nhc_exceptions, |
1339 | lockdep_is_held(&fnhe_lock)); |
1340 | hash += hval; |
1341 | |
1342 | fnhe_p = &hash->chain; |
1343 | fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); |
1344 | while (fnhe) { |
1345 | if (fnhe->fnhe_daddr == daddr) { |
1346 | rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( |
1347 | fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); |
1348 | /* set fnhe_daddr to 0 to ensure it won't bind with |
1349 | * new dsts in rt_bind_exception(). |
1350 | */ |
1351 | fnhe->fnhe_daddr = 0; |
1352 | fnhe_flush_routes(fnhe); |
1353 | kfree_rcu(fnhe, rcu); |
1354 | break; |
1355 | } |
1356 | fnhe_p = &fnhe->fnhe_next; |
1357 | fnhe = rcu_dereference_protected(fnhe->fnhe_next, |
1358 | lockdep_is_held(&fnhe_lock)); |
1359 | } |
1360 | |
1361 | spin_unlock_bh(lock: &fnhe_lock); |
1362 | } |
1363 | |
1364 | static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc, |
1365 | __be32 daddr) |
1366 | { |
1367 | struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions); |
1368 | struct fib_nh_exception *fnhe; |
1369 | u32 hval; |
1370 | |
1371 | if (!hash) |
1372 | return NULL; |
1373 | |
1374 | hval = fnhe_hashfun(daddr); |
1375 | |
1376 | for (fnhe = rcu_dereference(hash[hval].chain); fnhe; |
1377 | fnhe = rcu_dereference(fnhe->fnhe_next)) { |
1378 | if (fnhe->fnhe_daddr == daddr) { |
1379 | if (fnhe->fnhe_expires && |
1380 | time_after(jiffies, fnhe->fnhe_expires)) { |
1381 | ip_del_fnhe(nhc, daddr); |
1382 | break; |
1383 | } |
1384 | return fnhe; |
1385 | } |
1386 | } |
1387 | return NULL; |
1388 | } |
1389 | |
1390 | /* MTU selection: |
1391 | * 1. mtu on route is locked - use it |
1392 | * 2. mtu from nexthop exception |
1393 | * 3. mtu from egress device |
1394 | */ |
1395 | |
1396 | u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) |
1397 | { |
1398 | struct fib_nh_common *nhc = res->nhc; |
1399 | struct net_device *dev = nhc->nhc_dev; |
1400 | struct fib_info *fi = res->fi; |
1401 | u32 mtu = 0; |
1402 | |
1403 | if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) || |
1404 | fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU)) |
1405 | mtu = fi->fib_mtu; |
1406 | |
1407 | if (likely(!mtu)) { |
1408 | struct fib_nh_exception *fnhe; |
1409 | |
1410 | fnhe = find_exception(nhc, daddr); |
1411 | if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires)) |
1412 | mtu = fnhe->fnhe_pmtu; |
1413 | } |
1414 | |
1415 | if (likely(!mtu)) |
1416 | mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU); |
1417 | |
1418 | return mtu - lwtunnel_headroom(lwtstate: nhc->nhc_lwtstate, mtu); |
1419 | } |
1420 | |
1421 | static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, |
1422 | __be32 daddr, const bool do_cache) |
1423 | { |
1424 | bool ret = false; |
1425 | |
1426 | spin_lock_bh(lock: &fnhe_lock); |
1427 | |
1428 | if (daddr == fnhe->fnhe_daddr) { |
1429 | struct rtable __rcu **porig; |
1430 | struct rtable *orig; |
1431 | int genid = fnhe_genid(net: dev_net(dev: rt->dst.dev)); |
1432 | |
1433 | if (rt_is_input_route(rt)) |
1434 | porig = &fnhe->fnhe_rth_input; |
1435 | else |
1436 | porig = &fnhe->fnhe_rth_output; |
1437 | orig = rcu_dereference(*porig); |
1438 | |
1439 | if (fnhe->fnhe_genid != genid) { |
1440 | fnhe->fnhe_genid = genid; |
1441 | fnhe->fnhe_gw = 0; |
1442 | fnhe->fnhe_pmtu = 0; |
1443 | fnhe->fnhe_expires = 0; |
1444 | fnhe->fnhe_mtu_locked = false; |
1445 | fnhe_flush_routes(fnhe); |
1446 | orig = NULL; |
1447 | } |
1448 | fill_route_from_fnhe(rt, fnhe); |
1449 | if (!rt->rt_gw4) { |
1450 | rt->rt_gw4 = daddr; |
1451 | rt->rt_gw_family = AF_INET; |
1452 | } |
1453 | |
1454 | if (do_cache) { |
1455 | dst_hold(dst: &rt->dst); |
1456 | rcu_assign_pointer(*porig, rt); |
1457 | if (orig) { |
1458 | dst_dev_put(dst: &orig->dst); |
1459 | dst_release(dst: &orig->dst); |
1460 | } |
1461 | ret = true; |
1462 | } |
1463 | |
1464 | fnhe->fnhe_stamp = jiffies; |
1465 | } |
1466 | spin_unlock_bh(lock: &fnhe_lock); |
1467 | |
1468 | return ret; |
1469 | } |
1470 | |
1471 | static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt) |
1472 | { |
1473 | struct rtable *orig, *prev, **p; |
1474 | bool ret = true; |
1475 | |
1476 | if (rt_is_input_route(rt)) { |
1477 | p = (struct rtable **)&nhc->nhc_rth_input; |
1478 | } else { |
1479 | p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output); |
1480 | } |
1481 | orig = *p; |
1482 | |
1483 | /* hold dst before doing cmpxchg() to avoid race condition |
1484 | * on this dst |
1485 | */ |
1486 | dst_hold(dst: &rt->dst); |
1487 | prev = cmpxchg(p, orig, rt); |
1488 | if (prev == orig) { |
1489 | if (orig) { |
1490 | rt_add_uncached_list(rt: orig); |
1491 | dst_release(dst: &orig->dst); |
1492 | } |
1493 | } else { |
1494 | dst_release(dst: &rt->dst); |
1495 | ret = false; |
1496 | } |
1497 | |
1498 | return ret; |
1499 | } |
1500 | |
1501 | struct uncached_list { |
1502 | spinlock_t lock; |
1503 | struct list_head head; |
1504 | struct list_head quarantine; |
1505 | }; |
1506 | |
1507 | static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); |
1508 | |
1509 | void rt_add_uncached_list(struct rtable *rt) |
1510 | { |
1511 | struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); |
1512 | |
1513 | rt->dst.rt_uncached_list = ul; |
1514 | |
1515 | spin_lock_bh(lock: &ul->lock); |
1516 | list_add_tail(new: &rt->dst.rt_uncached, head: &ul->head); |
1517 | spin_unlock_bh(lock: &ul->lock); |
1518 | } |
1519 | |
1520 | void rt_del_uncached_list(struct rtable *rt) |
1521 | { |
1522 | if (!list_empty(head: &rt->dst.rt_uncached)) { |
1523 | struct uncached_list *ul = rt->dst.rt_uncached_list; |
1524 | |
1525 | spin_lock_bh(lock: &ul->lock); |
1526 | list_del_init(entry: &rt->dst.rt_uncached); |
1527 | spin_unlock_bh(lock: &ul->lock); |
1528 | } |
1529 | } |
1530 | |
1531 | static void ipv4_dst_destroy(struct dst_entry *dst) |
1532 | { |
1533 | struct rtable *rt = (struct rtable *)dst; |
1534 | |
1535 | ip_dst_metrics_put(dst); |
1536 | rt_del_uncached_list(rt); |
1537 | } |
1538 | |
1539 | void rt_flush_dev(struct net_device *dev) |
1540 | { |
1541 | struct rtable *rt, *safe; |
1542 | int cpu; |
1543 | |
1544 | for_each_possible_cpu(cpu) { |
1545 | struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); |
1546 | |
1547 | if (list_empty(head: &ul->head)) |
1548 | continue; |
1549 | |
1550 | spin_lock_bh(lock: &ul->lock); |
1551 | list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) { |
1552 | if (rt->dst.dev != dev) |
1553 | continue; |
1554 | rt->dst.dev = blackhole_netdev; |
1555 | netdev_ref_replace(odev: dev, ndev: blackhole_netdev, |
1556 | tracker: &rt->dst.dev_tracker, GFP_ATOMIC); |
1557 | list_move(list: &rt->dst.rt_uncached, head: &ul->quarantine); |
1558 | } |
1559 | spin_unlock_bh(lock: &ul->lock); |
1560 | } |
1561 | } |
1562 | |
1563 | static bool rt_cache_valid(const struct rtable *rt) |
1564 | { |
1565 | return rt && |
1566 | rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && |
1567 | !rt_is_expired(rth: rt); |
1568 | } |
1569 | |
1570 | static void rt_set_nexthop(struct rtable *rt, __be32 daddr, |
1571 | const struct fib_result *res, |
1572 | struct fib_nh_exception *fnhe, |
1573 | struct fib_info *fi, u16 type, u32 itag, |
1574 | const bool do_cache) |
1575 | { |
1576 | bool cached = false; |
1577 | |
1578 | if (fi) { |
1579 | struct fib_nh_common *nhc = FIB_RES_NHC(*res); |
1580 | |
1581 | if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) { |
1582 | rt->rt_uses_gateway = 1; |
1583 | rt->rt_gw_family = nhc->nhc_gw_family; |
1584 | /* only INET and INET6 are supported */ |
1585 | if (likely(nhc->nhc_gw_family == AF_INET)) |
1586 | rt->rt_gw4 = nhc->nhc_gw.ipv4; |
1587 | else |
1588 | rt->rt_gw6 = nhc->nhc_gw.ipv6; |
1589 | } |
1590 | |
1591 | ip_dst_init_metrics(dst: &rt->dst, fib_metrics: fi->fib_metrics); |
1592 | |
1593 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1594 | if (nhc->nhc_family == AF_INET) { |
1595 | struct fib_nh *nh; |
1596 | |
1597 | nh = container_of(nhc, struct fib_nh, nh_common); |
1598 | rt->dst.tclassid = nh->nh_tclassid; |
1599 | } |
1600 | #endif |
1601 | rt->dst.lwtstate = lwtstate_get(lws: nhc->nhc_lwtstate); |
1602 | if (unlikely(fnhe)) |
1603 | cached = rt_bind_exception(rt, fnhe, daddr, do_cache); |
1604 | else if (do_cache) |
1605 | cached = rt_cache_route(nhc, rt); |
1606 | if (unlikely(!cached)) { |
1607 | /* Routes we intend to cache in nexthop exception or |
1608 | * FIB nexthop have the DST_NOCACHE bit clear. |
1609 | * However, if we are unsuccessful at storing this |
1610 | * route into the cache we really need to set it. |
1611 | */ |
1612 | if (!rt->rt_gw4) { |
1613 | rt->rt_gw_family = AF_INET; |
1614 | rt->rt_gw4 = daddr; |
1615 | } |
1616 | rt_add_uncached_list(rt); |
1617 | } |
1618 | } else |
1619 | rt_add_uncached_list(rt); |
1620 | |
1621 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1622 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
1623 | set_class_tag(rt, tag: res->tclassid); |
1624 | #endif |
1625 | set_class_tag(rt, tag: itag); |
1626 | #endif |
1627 | } |
1628 | |
1629 | struct rtable *rt_dst_alloc(struct net_device *dev, |
1630 | unsigned int flags, u16 type, |
1631 | bool noxfrm) |
1632 | { |
1633 | struct rtable *rt; |
1634 | |
1635 | rt = dst_alloc(ops: &ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK, |
1636 | flags: (noxfrm ? DST_NOXFRM : 0)); |
1637 | |
1638 | if (rt) { |
1639 | rt->rt_genid = rt_genid_ipv4(net: dev_net(dev)); |
1640 | rt->rt_flags = flags; |
1641 | rt->rt_type = type; |
1642 | rt->rt_is_input = 0; |
1643 | rt->rt_iif = 0; |
1644 | rt->rt_pmtu = 0; |
1645 | rt->rt_mtu_locked = 0; |
1646 | rt->rt_uses_gateway = 0; |
1647 | rt->rt_gw_family = 0; |
1648 | rt->rt_gw4 = 0; |
1649 | |
1650 | rt->dst.output = ip_output; |
1651 | if (flags & RTCF_LOCAL) |
1652 | rt->dst.input = ip_local_deliver; |
1653 | } |
1654 | |
1655 | return rt; |
1656 | } |
1657 | EXPORT_SYMBOL(rt_dst_alloc); |
1658 | |
1659 | struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) |
1660 | { |
1661 | struct rtable *new_rt; |
1662 | |
1663 | new_rt = dst_alloc(ops: &ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK, |
1664 | flags: rt->dst.flags); |
1665 | |
1666 | if (new_rt) { |
1667 | new_rt->rt_genid = rt_genid_ipv4(net: dev_net(dev)); |
1668 | new_rt->rt_flags = rt->rt_flags; |
1669 | new_rt->rt_type = rt->rt_type; |
1670 | new_rt->rt_is_input = rt->rt_is_input; |
1671 | new_rt->rt_iif = rt->rt_iif; |
1672 | new_rt->rt_pmtu = rt->rt_pmtu; |
1673 | new_rt->rt_mtu_locked = rt->rt_mtu_locked; |
1674 | new_rt->rt_gw_family = rt->rt_gw_family; |
1675 | if (rt->rt_gw_family == AF_INET) |
1676 | new_rt->rt_gw4 = rt->rt_gw4; |
1677 | else if (rt->rt_gw_family == AF_INET6) |
1678 | new_rt->rt_gw6 = rt->rt_gw6; |
1679 | |
1680 | new_rt->dst.input = rt->dst.input; |
1681 | new_rt->dst.output = rt->dst.output; |
1682 | new_rt->dst.error = rt->dst.error; |
1683 | new_rt->dst.lastuse = jiffies; |
1684 | new_rt->dst.lwtstate = lwtstate_get(lws: rt->dst.lwtstate); |
1685 | } |
1686 | return new_rt; |
1687 | } |
1688 | EXPORT_SYMBOL(rt_dst_clone); |
1689 | |
1690 | /* called in rcu_read_lock() section */ |
1691 | int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
1692 | u8 tos, struct net_device *dev, |
1693 | struct in_device *in_dev, u32 *itag) |
1694 | { |
1695 | int err; |
1696 | |
1697 | /* Primary sanity checks. */ |
1698 | if (!in_dev) |
1699 | return -EINVAL; |
1700 | |
1701 | if (ipv4_is_multicast(addr: saddr) || ipv4_is_lbcast(addr: saddr) || |
1702 | skb->protocol != htons(ETH_P_IP)) |
1703 | return -EINVAL; |
1704 | |
1705 | if (ipv4_is_loopback(addr: saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev)) |
1706 | return -EINVAL; |
1707 | |
1708 | if (ipv4_is_zeronet(addr: saddr)) { |
1709 | if (!ipv4_is_local_multicast(addr: daddr) && |
1710 | ip_hdr(skb)->protocol != IPPROTO_IGMP) |
1711 | return -EINVAL; |
1712 | } else { |
1713 | err = fib_validate_source(skb, src: saddr, dst: 0, tos, oif: 0, dev, |
1714 | idev: in_dev, itag); |
1715 | if (err < 0) |
1716 | return err; |
1717 | } |
1718 | return 0; |
1719 | } |
1720 | |
1721 | /* called in rcu_read_lock() section */ |
1722 | static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
1723 | u8 tos, struct net_device *dev, int our) |
1724 | { |
1725 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
1726 | unsigned int flags = RTCF_MULTICAST; |
1727 | struct rtable *rth; |
1728 | u32 itag = 0; |
1729 | int err; |
1730 | |
1731 | err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, itag: &itag); |
1732 | if (err) |
1733 | return err; |
1734 | |
1735 | if (our) |
1736 | flags |= RTCF_LOCAL; |
1737 | |
1738 | if (IN_DEV_ORCONF(in_dev, NOPOLICY)) |
1739 | IPCB(skb)->flags |= IPSKB_NOPOLICY; |
1740 | |
1741 | rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, |
1742 | false); |
1743 | if (!rth) |
1744 | return -ENOBUFS; |
1745 | |
1746 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1747 | rth->dst.tclassid = itag; |
1748 | #endif |
1749 | rth->dst.output = ip_rt_bug; |
1750 | rth->rt_is_input= 1; |
1751 | |
1752 | #ifdef CONFIG_IP_MROUTE |
1753 | if (!ipv4_is_local_multicast(addr: daddr) && IN_DEV_MFORWARD(in_dev)) |
1754 | rth->dst.input = ip_mr_input; |
1755 | #endif |
1756 | RT_CACHE_STAT_INC(in_slow_mc); |
1757 | |
1758 | skb_dst_drop(skb); |
1759 | skb_dst_set(skb, dst: &rth->dst); |
1760 | return 0; |
1761 | } |
1762 | |
1763 | |
1764 | static void ip_handle_martian_source(struct net_device *dev, |
1765 | struct in_device *in_dev, |
1766 | struct sk_buff *skb, |
1767 | __be32 daddr, |
1768 | __be32 saddr) |
1769 | { |
1770 | RT_CACHE_STAT_INC(in_martian_src); |
1771 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1772 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) { |
1773 | /* |
1774 | * RFC1812 recommendation, if source is martian, |
1775 | * the only hint is MAC header. |
1776 | */ |
1777 | pr_warn("martian source %pI4 from %pI4, on dev %s\n" , |
1778 | &daddr, &saddr, dev->name); |
1779 | if (dev->hard_header_len && skb_mac_header_was_set(skb)) { |
1780 | print_hex_dump(KERN_WARNING, prefix_str: "ll header: " , |
1781 | prefix_type: DUMP_PREFIX_OFFSET, rowsize: 16, groupsize: 1, |
1782 | buf: skb_mac_header(skb), |
1783 | len: dev->hard_header_len, ascii: false); |
1784 | } |
1785 | } |
1786 | #endif |
1787 | } |
1788 | |
1789 | /* called in rcu_read_lock() section */ |
1790 | static int __mkroute_input(struct sk_buff *skb, |
1791 | const struct fib_result *res, |
1792 | struct in_device *in_dev, |
1793 | __be32 daddr, __be32 saddr, u32 tos) |
1794 | { |
1795 | struct fib_nh_common *nhc = FIB_RES_NHC(*res); |
1796 | struct net_device *dev = nhc->nhc_dev; |
1797 | struct fib_nh_exception *fnhe; |
1798 | struct rtable *rth; |
1799 | int err; |
1800 | struct in_device *out_dev; |
1801 | bool do_cache; |
1802 | u32 itag = 0; |
1803 | |
1804 | /* get a working reference to the output device */ |
1805 | out_dev = __in_dev_get_rcu(dev); |
1806 | if (!out_dev) { |
1807 | net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n" ); |
1808 | return -EINVAL; |
1809 | } |
1810 | |
1811 | err = fib_validate_source(skb, src: saddr, dst: daddr, tos, FIB_RES_OIF(*res), |
1812 | dev: in_dev->dev, idev: in_dev, itag: &itag); |
1813 | if (err < 0) { |
1814 | ip_handle_martian_source(dev: in_dev->dev, in_dev, skb, daddr, |
1815 | saddr); |
1816 | |
1817 | goto cleanup; |
1818 | } |
1819 | |
1820 | do_cache = res->fi && !itag; |
1821 | if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && |
1822 | skb->protocol == htons(ETH_P_IP)) { |
1823 | __be32 gw; |
1824 | |
1825 | gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0; |
1826 | if (IN_DEV_SHARED_MEDIA(out_dev) || |
1827 | inet_addr_onlink(in_dev: out_dev, a: saddr, b: gw)) |
1828 | IPCB(skb)->flags |= IPSKB_DOREDIRECT; |
1829 | } |
1830 | |
1831 | if (skb->protocol != htons(ETH_P_IP)) { |
1832 | /* Not IP (i.e. ARP). Do not create route, if it is |
1833 | * invalid for proxy arp. DNAT routes are always valid. |
1834 | * |
1835 | * Proxy arp feature have been extended to allow, ARP |
1836 | * replies back to the same interface, to support |
1837 | * Private VLAN switch technologies. See arp.c. |
1838 | */ |
1839 | if (out_dev == in_dev && |
1840 | IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) { |
1841 | err = -EINVAL; |
1842 | goto cleanup; |
1843 | } |
1844 | } |
1845 | |
1846 | if (IN_DEV_ORCONF(in_dev, NOPOLICY)) |
1847 | IPCB(skb)->flags |= IPSKB_NOPOLICY; |
1848 | |
1849 | fnhe = find_exception(nhc, daddr); |
1850 | if (do_cache) { |
1851 | if (fnhe) |
1852 | rth = rcu_dereference(fnhe->fnhe_rth_input); |
1853 | else |
1854 | rth = rcu_dereference(nhc->nhc_rth_input); |
1855 | if (rt_cache_valid(rt: rth)) { |
1856 | skb_dst_set_noref(skb, dst: &rth->dst); |
1857 | goto out; |
1858 | } |
1859 | } |
1860 | |
1861 | rth = rt_dst_alloc(out_dev->dev, 0, res->type, |
1862 | IN_DEV_ORCONF(out_dev, NOXFRM)); |
1863 | if (!rth) { |
1864 | err = -ENOBUFS; |
1865 | goto cleanup; |
1866 | } |
1867 | |
1868 | rth->rt_is_input = 1; |
1869 | RT_CACHE_STAT_INC(in_slow_tot); |
1870 | |
1871 | rth->dst.input = ip_forward; |
1872 | |
1873 | rt_set_nexthop(rt: rth, daddr, res, fnhe, fi: res->fi, type: res->type, itag, |
1874 | do_cache); |
1875 | lwtunnel_set_redirect(dst: &rth->dst); |
1876 | skb_dst_set(skb, dst: &rth->dst); |
1877 | out: |
1878 | err = 0; |
1879 | cleanup: |
1880 | return err; |
1881 | } |
1882 | |
1883 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
1884 | /* To make ICMP packets follow the right flow, the multipath hash is |
1885 | * calculated from the inner IP addresses. |
1886 | */ |
1887 | static void ip_multipath_l3_keys(const struct sk_buff *skb, |
1888 | struct flow_keys *hash_keys) |
1889 | { |
1890 | const struct iphdr *outer_iph = ip_hdr(skb); |
1891 | const struct iphdr *key_iph = outer_iph; |
1892 | const struct iphdr *inner_iph; |
1893 | const struct icmphdr *icmph; |
1894 | struct iphdr _inner_iph; |
1895 | struct icmphdr _icmph; |
1896 | |
1897 | if (likely(outer_iph->protocol != IPPROTO_ICMP)) |
1898 | goto out; |
1899 | |
1900 | if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) |
1901 | goto out; |
1902 | |
1903 | icmph = skb_header_pointer(skb, offset: outer_iph->ihl * 4, len: sizeof(_icmph), |
1904 | buffer: &_icmph); |
1905 | if (!icmph) |
1906 | goto out; |
1907 | |
1908 | if (!icmp_is_err(type: icmph->type)) |
1909 | goto out; |
1910 | |
1911 | inner_iph = skb_header_pointer(skb, |
1912 | offset: outer_iph->ihl * 4 + sizeof(_icmph), |
1913 | len: sizeof(_inner_iph), buffer: &_inner_iph); |
1914 | if (!inner_iph) |
1915 | goto out; |
1916 | |
1917 | key_iph = inner_iph; |
1918 | out: |
1919 | hash_keys->addrs.v4addrs.src = key_iph->saddr; |
1920 | hash_keys->addrs.v4addrs.dst = key_iph->daddr; |
1921 | } |
1922 | |
1923 | static u32 fib_multipath_custom_hash_outer(const struct net *net, |
1924 | const struct sk_buff *skb, |
1925 | bool *p_has_inner) |
1926 | { |
1927 | u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); |
1928 | struct flow_keys keys, hash_keys; |
1929 | |
1930 | if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) |
1931 | return 0; |
1932 | |
1933 | memset(&hash_keys, 0, sizeof(hash_keys)); |
1934 | skb_flow_dissect_flow_keys(skb, flow: &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); |
1935 | |
1936 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; |
1937 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) |
1938 | hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; |
1939 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) |
1940 | hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; |
1941 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) |
1942 | hash_keys.basic.ip_proto = keys.basic.ip_proto; |
1943 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) |
1944 | hash_keys.ports.src = keys.ports.src; |
1945 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) |
1946 | hash_keys.ports.dst = keys.ports.dst; |
1947 | |
1948 | *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); |
1949 | return flow_hash_from_keys(keys: &hash_keys); |
1950 | } |
1951 | |
1952 | static u32 fib_multipath_custom_hash_inner(const struct net *net, |
1953 | const struct sk_buff *skb, |
1954 | bool has_inner) |
1955 | { |
1956 | u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); |
1957 | struct flow_keys keys, hash_keys; |
1958 | |
1959 | /* We assume the packet carries an encapsulation, but if none was |
1960 | * encountered during dissection of the outer flow, then there is no |
1961 | * point in calling the flow dissector again. |
1962 | */ |
1963 | if (!has_inner) |
1964 | return 0; |
1965 | |
1966 | if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)) |
1967 | return 0; |
1968 | |
1969 | memset(&hash_keys, 0, sizeof(hash_keys)); |
1970 | skb_flow_dissect_flow_keys(skb, flow: &keys, flags: 0); |
1971 | |
1972 | if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION)) |
1973 | return 0; |
1974 | |
1975 | if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { |
1976 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; |
1977 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) |
1978 | hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; |
1979 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) |
1980 | hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; |
1981 | } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { |
1982 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; |
1983 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) |
1984 | hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; |
1985 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) |
1986 | hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; |
1987 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL) |
1988 | hash_keys.tags.flow_label = keys.tags.flow_label; |
1989 | } |
1990 | |
1991 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) |
1992 | hash_keys.basic.ip_proto = keys.basic.ip_proto; |
1993 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT) |
1994 | hash_keys.ports.src = keys.ports.src; |
1995 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) |
1996 | hash_keys.ports.dst = keys.ports.dst; |
1997 | |
1998 | return flow_hash_from_keys(keys: &hash_keys); |
1999 | } |
2000 | |
2001 | static u32 fib_multipath_custom_hash_skb(const struct net *net, |
2002 | const struct sk_buff *skb) |
2003 | { |
2004 | u32 mhash, mhash_inner; |
2005 | bool has_inner = true; |
2006 | |
2007 | mhash = fib_multipath_custom_hash_outer(net, skb, p_has_inner: &has_inner); |
2008 | mhash_inner = fib_multipath_custom_hash_inner(net, skb, has_inner); |
2009 | |
2010 | return jhash_2words(a: mhash, b: mhash_inner, initval: 0); |
2011 | } |
2012 | |
2013 | static u32 fib_multipath_custom_hash_fl4(const struct net *net, |
2014 | const struct flowi4 *fl4) |
2015 | { |
2016 | u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); |
2017 | struct flow_keys hash_keys; |
2018 | |
2019 | if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) |
2020 | return 0; |
2021 | |
2022 | memset(&hash_keys, 0, sizeof(hash_keys)); |
2023 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; |
2024 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) |
2025 | hash_keys.addrs.v4addrs.src = fl4->saddr; |
2026 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) |
2027 | hash_keys.addrs.v4addrs.dst = fl4->daddr; |
2028 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) |
2029 | hash_keys.basic.ip_proto = fl4->flowi4_proto; |
2030 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) |
2031 | hash_keys.ports.src = fl4->fl4_sport; |
2032 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) |
2033 | hash_keys.ports.dst = fl4->fl4_dport; |
2034 | |
2035 | return flow_hash_from_keys(keys: &hash_keys); |
2036 | } |
2037 | |
2038 | /* if skb is set it will be used and fl4 can be NULL */ |
2039 | int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, |
2040 | const struct sk_buff *skb, struct flow_keys *flkeys) |
2041 | { |
2042 | u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0; |
2043 | struct flow_keys hash_keys; |
2044 | u32 mhash = 0; |
2045 | |
2046 | switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) { |
2047 | case 0: |
2048 | memset(&hash_keys, 0, sizeof(hash_keys)); |
2049 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; |
2050 | if (skb) { |
2051 | ip_multipath_l3_keys(skb, hash_keys: &hash_keys); |
2052 | } else { |
2053 | hash_keys.addrs.v4addrs.src = fl4->saddr; |
2054 | hash_keys.addrs.v4addrs.dst = fl4->daddr; |
2055 | } |
2056 | mhash = flow_hash_from_keys(keys: &hash_keys); |
2057 | break; |
2058 | case 1: |
2059 | /* skb is currently provided only when forwarding */ |
2060 | if (skb) { |
2061 | unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; |
2062 | struct flow_keys keys; |
2063 | |
2064 | /* short-circuit if we already have L4 hash present */ |
2065 | if (skb->l4_hash) |
2066 | return skb_get_hash_raw(skb) >> 1; |
2067 | |
2068 | memset(&hash_keys, 0, sizeof(hash_keys)); |
2069 | |
2070 | if (!flkeys) { |
2071 | skb_flow_dissect_flow_keys(skb, flow: &keys, flags: flag); |
2072 | flkeys = &keys; |
2073 | } |
2074 | |
2075 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; |
2076 | hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src; |
2077 | hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst; |
2078 | hash_keys.ports.src = flkeys->ports.src; |
2079 | hash_keys.ports.dst = flkeys->ports.dst; |
2080 | hash_keys.basic.ip_proto = flkeys->basic.ip_proto; |
2081 | } else { |
2082 | memset(&hash_keys, 0, sizeof(hash_keys)); |
2083 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; |
2084 | hash_keys.addrs.v4addrs.src = fl4->saddr; |
2085 | hash_keys.addrs.v4addrs.dst = fl4->daddr; |
2086 | hash_keys.ports.src = fl4->fl4_sport; |
2087 | hash_keys.ports.dst = fl4->fl4_dport; |
2088 | hash_keys.basic.ip_proto = fl4->flowi4_proto; |
2089 | } |
2090 | mhash = flow_hash_from_keys(keys: &hash_keys); |
2091 | break; |
2092 | case 2: |
2093 | memset(&hash_keys, 0, sizeof(hash_keys)); |
2094 | /* skb is currently provided only when forwarding */ |
2095 | if (skb) { |
2096 | struct flow_keys keys; |
2097 | |
2098 | skb_flow_dissect_flow_keys(skb, flow: &keys, flags: 0); |
2099 | /* Inner can be v4 or v6 */ |
2100 | if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { |
2101 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; |
2102 | hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; |
2103 | hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; |
2104 | } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { |
2105 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; |
2106 | hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; |
2107 | hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; |
2108 | hash_keys.tags.flow_label = keys.tags.flow_label; |
2109 | hash_keys.basic.ip_proto = keys.basic.ip_proto; |
2110 | } else { |
2111 | /* Same as case 0 */ |
2112 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; |
2113 | ip_multipath_l3_keys(skb, hash_keys: &hash_keys); |
2114 | } |
2115 | } else { |
2116 | /* Same as case 0 */ |
2117 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; |
2118 | hash_keys.addrs.v4addrs.src = fl4->saddr; |
2119 | hash_keys.addrs.v4addrs.dst = fl4->daddr; |
2120 | } |
2121 | mhash = flow_hash_from_keys(keys: &hash_keys); |
2122 | break; |
2123 | case 3: |
2124 | if (skb) |
2125 | mhash = fib_multipath_custom_hash_skb(net, skb); |
2126 | else |
2127 | mhash = fib_multipath_custom_hash_fl4(net, fl4); |
2128 | break; |
2129 | } |
2130 | |
2131 | if (multipath_hash) |
2132 | mhash = jhash_2words(a: mhash, b: multipath_hash, initval: 0); |
2133 | |
2134 | return mhash >> 1; |
2135 | } |
2136 | #endif /* CONFIG_IP_ROUTE_MULTIPATH */ |
2137 | |
2138 | static int ip_mkroute_input(struct sk_buff *skb, |
2139 | struct fib_result *res, |
2140 | struct in_device *in_dev, |
2141 | __be32 daddr, __be32 saddr, u32 tos, |
2142 | struct flow_keys *hkeys) |
2143 | { |
2144 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
2145 | if (res->fi && fib_info_num_path(fi: res->fi) > 1) { |
2146 | int h = fib_multipath_hash(net: res->fi->fib_net, NULL, skb, flkeys: hkeys); |
2147 | |
2148 | fib_select_multipath(res, hash: h); |
2149 | IPCB(skb)->flags |= IPSKB_MULTIPATH; |
2150 | } |
2151 | #endif |
2152 | |
2153 | /* create a routing cache entry */ |
2154 | return __mkroute_input(skb, res, in_dev, daddr, saddr, tos); |
2155 | } |
2156 | |
2157 | /* Implements all the saddr-related checks as ip_route_input_slow(), |
2158 | * assuming daddr is valid and the destination is not a local broadcast one. |
2159 | * Uses the provided hint instead of performing a route lookup. |
2160 | */ |
2161 | int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
2162 | u8 tos, struct net_device *dev, |
2163 | const struct sk_buff *hint) |
2164 | { |
2165 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2166 | struct rtable *rt = skb_rtable(skb: hint); |
2167 | struct net *net = dev_net(dev); |
2168 | int err = -EINVAL; |
2169 | u32 tag = 0; |
2170 | |
2171 | if (ipv4_is_multicast(addr: saddr) || ipv4_is_lbcast(addr: saddr)) |
2172 | goto martian_source; |
2173 | |
2174 | if (ipv4_is_zeronet(addr: saddr)) |
2175 | goto martian_source; |
2176 | |
2177 | if (ipv4_is_loopback(addr: saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) |
2178 | goto martian_source; |
2179 | |
2180 | if (rt->rt_type != RTN_LOCAL) |
2181 | goto skip_validate_source; |
2182 | |
2183 | tos &= IPTOS_RT_MASK; |
2184 | err = fib_validate_source(skb, src: saddr, dst: daddr, tos, oif: 0, dev, idev: in_dev, itag: &tag); |
2185 | if (err < 0) |
2186 | goto martian_source; |
2187 | |
2188 | skip_validate_source: |
2189 | skb_dst_copy(nskb: skb, oskb: hint); |
2190 | return 0; |
2191 | |
2192 | martian_source: |
2193 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); |
2194 | return err; |
2195 | } |
2196 | |
2197 | /* get device for dst_alloc with local routes */ |
2198 | static struct net_device *ip_rt_get_dev(struct net *net, |
2199 | const struct fib_result *res) |
2200 | { |
2201 | struct fib_nh_common *nhc = res->fi ? res->nhc : NULL; |
2202 | struct net_device *dev = NULL; |
2203 | |
2204 | if (nhc) |
2205 | dev = l3mdev_master_dev_rcu(dev: nhc->nhc_dev); |
2206 | |
2207 | return dev ? : net->loopback_dev; |
2208 | } |
2209 | |
2210 | /* |
2211 | * NOTE. We drop all the packets that has local source |
2212 | * addresses, because every properly looped back packet |
2213 | * must have correct destination already attached by output routine. |
2214 | * Changes in the enforced policies must be applied also to |
2215 | * ip_route_use_hint(). |
2216 | * |
2217 | * Such approach solves two big problems: |
2218 | * 1. Not simplex devices are handled properly. |
2219 | * 2. IP spoofing attempts are filtered with 100% of guarantee. |
2220 | * called with rcu_read_lock() |
2221 | */ |
2222 | |
2223 | static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
2224 | u8 tos, struct net_device *dev, |
2225 | struct fib_result *res) |
2226 | { |
2227 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2228 | struct flow_keys *flkeys = NULL, _flkeys; |
2229 | struct net *net = dev_net(dev); |
2230 | struct ip_tunnel_info *tun_info; |
2231 | int err = -EINVAL; |
2232 | unsigned int flags = 0; |
2233 | u32 itag = 0; |
2234 | struct rtable *rth; |
2235 | struct flowi4 fl4; |
2236 | bool do_cache = true; |
2237 | |
2238 | /* IP on this device is disabled. */ |
2239 | |
2240 | if (!in_dev) |
2241 | goto out; |
2242 | |
2243 | /* Check for the most weird martians, which can be not detected |
2244 | * by fib_lookup. |
2245 | */ |
2246 | |
2247 | tun_info = skb_tunnel_info(skb); |
2248 | if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) |
2249 | fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id; |
2250 | else |
2251 | fl4.flowi4_tun_key.tun_id = 0; |
2252 | skb_dst_drop(skb); |
2253 | |
2254 | if (ipv4_is_multicast(addr: saddr) || ipv4_is_lbcast(addr: saddr)) |
2255 | goto martian_source; |
2256 | |
2257 | res->fi = NULL; |
2258 | res->table = NULL; |
2259 | if (ipv4_is_lbcast(addr: daddr) || (saddr == 0 && daddr == 0)) |
2260 | goto brd_input; |
2261 | |
2262 | /* Accept zero addresses only to limited broadcast; |
2263 | * I even do not know to fix it or not. Waiting for complains :-) |
2264 | */ |
2265 | if (ipv4_is_zeronet(addr: saddr)) |
2266 | goto martian_source; |
2267 | |
2268 | if (ipv4_is_zeronet(addr: daddr)) |
2269 | goto martian_destination; |
2270 | |
2271 | /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(), |
2272 | * and call it once if daddr or/and saddr are loopback addresses |
2273 | */ |
2274 | if (ipv4_is_loopback(addr: daddr)) { |
2275 | if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) |
2276 | goto martian_destination; |
2277 | } else if (ipv4_is_loopback(addr: saddr)) { |
2278 | if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) |
2279 | goto martian_source; |
2280 | } |
2281 | |
2282 | /* |
2283 | * Now we are ready to route packet. |
2284 | */ |
2285 | fl4.flowi4_l3mdev = 0; |
2286 | fl4.flowi4_oif = 0; |
2287 | fl4.flowi4_iif = dev->ifindex; |
2288 | fl4.flowi4_mark = skb->mark; |
2289 | fl4.flowi4_tos = tos; |
2290 | fl4.flowi4_scope = RT_SCOPE_UNIVERSE; |
2291 | fl4.flowi4_flags = 0; |
2292 | fl4.daddr = daddr; |
2293 | fl4.saddr = saddr; |
2294 | fl4.flowi4_uid = sock_net_uid(net, NULL); |
2295 | fl4.flowi4_multipath_hash = 0; |
2296 | |
2297 | if (fib4_rules_early_flow_dissect(net, skb, fl4: &fl4, flkeys: &_flkeys)) { |
2298 | flkeys = &_flkeys; |
2299 | } else { |
2300 | fl4.flowi4_proto = 0; |
2301 | fl4.fl4_sport = 0; |
2302 | fl4.fl4_dport = 0; |
2303 | } |
2304 | |
2305 | err = fib_lookup(net, flp: &fl4, res, flags: 0); |
2306 | if (err != 0) { |
2307 | if (!IN_DEV_FORWARD(in_dev)) |
2308 | err = -EHOSTUNREACH; |
2309 | goto no_route; |
2310 | } |
2311 | |
2312 | if (res->type == RTN_BROADCAST) { |
2313 | if (IN_DEV_BFORWARD(in_dev)) |
2314 | goto make_route; |
2315 | /* not do cache if bc_forwarding is enabled */ |
2316 | if (IPV4_DEVCONF_ALL(net, BC_FORWARDING)) |
2317 | do_cache = false; |
2318 | goto brd_input; |
2319 | } |
2320 | |
2321 | if (res->type == RTN_LOCAL) { |
2322 | err = fib_validate_source(skb, src: saddr, dst: daddr, tos, |
2323 | oif: 0, dev, idev: in_dev, itag: &itag); |
2324 | if (err < 0) |
2325 | goto martian_source; |
2326 | goto local_input; |
2327 | } |
2328 | |
2329 | if (!IN_DEV_FORWARD(in_dev)) { |
2330 | err = -EHOSTUNREACH; |
2331 | goto no_route; |
2332 | } |
2333 | if (res->type != RTN_UNICAST) |
2334 | goto martian_destination; |
2335 | |
2336 | make_route: |
2337 | err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, hkeys: flkeys); |
2338 | out: return err; |
2339 | |
2340 | brd_input: |
2341 | if (skb->protocol != htons(ETH_P_IP)) |
2342 | goto e_inval; |
2343 | |
2344 | if (!ipv4_is_zeronet(addr: saddr)) { |
2345 | err = fib_validate_source(skb, src: saddr, dst: 0, tos, oif: 0, dev, |
2346 | idev: in_dev, itag: &itag); |
2347 | if (err < 0) |
2348 | goto martian_source; |
2349 | } |
2350 | flags |= RTCF_BROADCAST; |
2351 | res->type = RTN_BROADCAST; |
2352 | RT_CACHE_STAT_INC(in_brd); |
2353 | |
2354 | local_input: |
2355 | if (IN_DEV_ORCONF(in_dev, NOPOLICY)) |
2356 | IPCB(skb)->flags |= IPSKB_NOPOLICY; |
2357 | |
2358 | do_cache &= res->fi && !itag; |
2359 | if (do_cache) { |
2360 | struct fib_nh_common *nhc = FIB_RES_NHC(*res); |
2361 | |
2362 | rth = rcu_dereference(nhc->nhc_rth_input); |
2363 | if (rt_cache_valid(rt: rth)) { |
2364 | skb_dst_set_noref(skb, dst: &rth->dst); |
2365 | err = 0; |
2366 | goto out; |
2367 | } |
2368 | } |
2369 | |
2370 | rth = rt_dst_alloc(ip_rt_get_dev(net, res), |
2371 | flags | RTCF_LOCAL, res->type, false); |
2372 | if (!rth) |
2373 | goto e_nobufs; |
2374 | |
2375 | rth->dst.output= ip_rt_bug; |
2376 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2377 | rth->dst.tclassid = itag; |
2378 | #endif |
2379 | rth->rt_is_input = 1; |
2380 | |
2381 | RT_CACHE_STAT_INC(in_slow_tot); |
2382 | if (res->type == RTN_UNREACHABLE) { |
2383 | rth->dst.input= ip_error; |
2384 | rth->dst.error= -err; |
2385 | rth->rt_flags &= ~RTCF_LOCAL; |
2386 | } |
2387 | |
2388 | if (do_cache) { |
2389 | struct fib_nh_common *nhc = FIB_RES_NHC(*res); |
2390 | |
2391 | rth->dst.lwtstate = lwtstate_get(lws: nhc->nhc_lwtstate); |
2392 | if (lwtunnel_input_redirect(lwtstate: rth->dst.lwtstate)) { |
2393 | WARN_ON(rth->dst.input == lwtunnel_input); |
2394 | rth->dst.lwtstate->orig_input = rth->dst.input; |
2395 | rth->dst.input = lwtunnel_input; |
2396 | } |
2397 | |
2398 | if (unlikely(!rt_cache_route(nhc, rth))) |
2399 | rt_add_uncached_list(rt: rth); |
2400 | } |
2401 | skb_dst_set(skb, dst: &rth->dst); |
2402 | err = 0; |
2403 | goto out; |
2404 | |
2405 | no_route: |
2406 | RT_CACHE_STAT_INC(in_no_route); |
2407 | res->type = RTN_UNREACHABLE; |
2408 | res->fi = NULL; |
2409 | res->table = NULL; |
2410 | goto local_input; |
2411 | |
2412 | /* |
2413 | * Do not cache martian addresses: they should be logged (RFC1812) |
2414 | */ |
2415 | martian_destination: |
2416 | RT_CACHE_STAT_INC(in_martian_dst); |
2417 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
2418 | if (IN_DEV_LOG_MARTIANS(in_dev)) |
2419 | net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n" , |
2420 | &daddr, &saddr, dev->name); |
2421 | #endif |
2422 | |
2423 | e_inval: |
2424 | err = -EINVAL; |
2425 | goto out; |
2426 | |
2427 | e_nobufs: |
2428 | err = -ENOBUFS; |
2429 | goto out; |
2430 | |
2431 | martian_source: |
2432 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); |
2433 | goto out; |
2434 | } |
2435 | |
2436 | /* called with rcu_read_lock held */ |
2437 | static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
2438 | u8 tos, struct net_device *dev, struct fib_result *res) |
2439 | { |
2440 | /* Multicast recognition logic is moved from route cache to here. |
2441 | * The problem was that too many Ethernet cards have broken/missing |
2442 | * hardware multicast filters :-( As result the host on multicasting |
2443 | * network acquires a lot of useless route cache entries, sort of |
2444 | * SDR messages from all the world. Now we try to get rid of them. |
2445 | * Really, provided software IP multicast filter is organized |
2446 | * reasonably (at least, hashed), it does not result in a slowdown |
2447 | * comparing with route cache reject entries. |
2448 | * Note, that multicast routers are not affected, because |
2449 | * route cache entry is created eventually. |
2450 | */ |
2451 | if (ipv4_is_multicast(addr: daddr)) { |
2452 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2453 | int our = 0; |
2454 | int err = -EINVAL; |
2455 | |
2456 | if (!in_dev) |
2457 | return err; |
2458 | our = ip_check_mc_rcu(dev: in_dev, mc_addr: daddr, src_addr: saddr, |
2459 | proto: ip_hdr(skb)->protocol); |
2460 | |
2461 | /* check l3 master if no match yet */ |
2462 | if (!our && netif_is_l3_slave(dev)) { |
2463 | struct in_device *l3_in_dev; |
2464 | |
2465 | l3_in_dev = __in_dev_get_rcu(dev: skb->dev); |
2466 | if (l3_in_dev) |
2467 | our = ip_check_mc_rcu(dev: l3_in_dev, mc_addr: daddr, src_addr: saddr, |
2468 | proto: ip_hdr(skb)->protocol); |
2469 | } |
2470 | |
2471 | if (our |
2472 | #ifdef CONFIG_IP_MROUTE |
2473 | || |
2474 | (!ipv4_is_local_multicast(addr: daddr) && |
2475 | IN_DEV_MFORWARD(in_dev)) |
2476 | #endif |
2477 | ) { |
2478 | err = ip_route_input_mc(skb, daddr, saddr, |
2479 | tos, dev, our); |
2480 | } |
2481 | return err; |
2482 | } |
2483 | |
2484 | return ip_route_input_slow(skb, daddr, saddr, tos, dev, res); |
2485 | } |
2486 | |
2487 | int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
2488 | u8 tos, struct net_device *dev) |
2489 | { |
2490 | struct fib_result res; |
2491 | int err; |
2492 | |
2493 | tos &= IPTOS_RT_MASK; |
2494 | rcu_read_lock(); |
2495 | err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, res: &res); |
2496 | rcu_read_unlock(); |
2497 | |
2498 | return err; |
2499 | } |
2500 | EXPORT_SYMBOL(ip_route_input_noref); |
2501 | |
2502 | /* called with rcu_read_lock() */ |
2503 | static struct rtable *__mkroute_output(const struct fib_result *res, |
2504 | const struct flowi4 *fl4, int orig_oif, |
2505 | struct net_device *dev_out, |
2506 | unsigned int flags) |
2507 | { |
2508 | struct fib_info *fi = res->fi; |
2509 | struct fib_nh_exception *fnhe; |
2510 | struct in_device *in_dev; |
2511 | u16 type = res->type; |
2512 | struct rtable *rth; |
2513 | bool do_cache; |
2514 | |
2515 | in_dev = __in_dev_get_rcu(dev: dev_out); |
2516 | if (!in_dev) |
2517 | return ERR_PTR(error: -EINVAL); |
2518 | |
2519 | if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) |
2520 | if (ipv4_is_loopback(addr: fl4->saddr) && |
2521 | !(dev_out->flags & IFF_LOOPBACK) && |
2522 | !netif_is_l3_master(dev: dev_out)) |
2523 | return ERR_PTR(error: -EINVAL); |
2524 | |
2525 | if (ipv4_is_lbcast(addr: fl4->daddr)) |
2526 | type = RTN_BROADCAST; |
2527 | else if (ipv4_is_multicast(addr: fl4->daddr)) |
2528 | type = RTN_MULTICAST; |
2529 | else if (ipv4_is_zeronet(addr: fl4->daddr)) |
2530 | return ERR_PTR(error: -EINVAL); |
2531 | |
2532 | if (dev_out->flags & IFF_LOOPBACK) |
2533 | flags |= RTCF_LOCAL; |
2534 | |
2535 | do_cache = true; |
2536 | if (type == RTN_BROADCAST) { |
2537 | flags |= RTCF_BROADCAST | RTCF_LOCAL; |
2538 | fi = NULL; |
2539 | } else if (type == RTN_MULTICAST) { |
2540 | flags |= RTCF_MULTICAST | RTCF_LOCAL; |
2541 | if (!ip_check_mc_rcu(dev: in_dev, mc_addr: fl4->daddr, src_addr: fl4->saddr, |
2542 | proto: fl4->flowi4_proto)) |
2543 | flags &= ~RTCF_LOCAL; |
2544 | else |
2545 | do_cache = false; |
2546 | /* If multicast route do not exist use |
2547 | * default one, but do not gateway in this case. |
2548 | * Yes, it is hack. |
2549 | */ |
2550 | if (fi && res->prefixlen < 4) |
2551 | fi = NULL; |
2552 | } else if ((type == RTN_LOCAL) && (orig_oif != 0) && |
2553 | (orig_oif != dev_out->ifindex)) { |
2554 | /* For local routes that require a particular output interface |
2555 | * we do not want to cache the result. Caching the result |
2556 | * causes incorrect behaviour when there are multiple source |
2557 | * addresses on the interface, the end result being that if the |
2558 | * intended recipient is waiting on that interface for the |
2559 | * packet he won't receive it because it will be delivered on |
2560 | * the loopback interface and the IP_PKTINFO ipi_ifindex will |
2561 | * be set to the loopback interface as well. |
2562 | */ |
2563 | do_cache = false; |
2564 | } |
2565 | |
2566 | fnhe = NULL; |
2567 | do_cache &= fi != NULL; |
2568 | if (fi) { |
2569 | struct fib_nh_common *nhc = FIB_RES_NHC(*res); |
2570 | struct rtable __rcu **prth; |
2571 | |
2572 | fnhe = find_exception(nhc, daddr: fl4->daddr); |
2573 | if (!do_cache) |
2574 | goto add; |
2575 | if (fnhe) { |
2576 | prth = &fnhe->fnhe_rth_output; |
2577 | } else { |
2578 | if (unlikely(fl4->flowi4_flags & |
2579 | FLOWI_FLAG_KNOWN_NH && |
2580 | !(nhc->nhc_gw_family && |
2581 | nhc->nhc_scope == RT_SCOPE_LINK))) { |
2582 | do_cache = false; |
2583 | goto add; |
2584 | } |
2585 | prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output); |
2586 | } |
2587 | rth = rcu_dereference(*prth); |
2588 | if (rt_cache_valid(rt: rth) && dst_hold_safe(dst: &rth->dst)) |
2589 | return rth; |
2590 | } |
2591 | |
2592 | add: |
2593 | rth = rt_dst_alloc(dev_out, flags, type, |
2594 | IN_DEV_ORCONF(in_dev, NOXFRM)); |
2595 | if (!rth) |
2596 | return ERR_PTR(error: -ENOBUFS); |
2597 | |
2598 | rth->rt_iif = orig_oif; |
2599 | |
2600 | RT_CACHE_STAT_INC(out_slow_tot); |
2601 | |
2602 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { |
2603 | if (flags & RTCF_LOCAL && |
2604 | !(dev_out->flags & IFF_LOOPBACK)) { |
2605 | rth->dst.output = ip_mc_output; |
2606 | RT_CACHE_STAT_INC(out_slow_mc); |
2607 | } |
2608 | #ifdef CONFIG_IP_MROUTE |
2609 | if (type == RTN_MULTICAST) { |
2610 | if (IN_DEV_MFORWARD(in_dev) && |
2611 | !ipv4_is_local_multicast(addr: fl4->daddr)) { |
2612 | rth->dst.input = ip_mr_input; |
2613 | rth->dst.output = ip_mc_output; |
2614 | } |
2615 | } |
2616 | #endif |
2617 | } |
2618 | |
2619 | rt_set_nexthop(rt: rth, daddr: fl4->daddr, res, fnhe, fi, type, itag: 0, do_cache); |
2620 | lwtunnel_set_redirect(dst: &rth->dst); |
2621 | |
2622 | return rth; |
2623 | } |
2624 | |
2625 | /* |
2626 | * Major route resolver routine. |
2627 | */ |
2628 | |
2629 | struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, |
2630 | const struct sk_buff *skb) |
2631 | { |
2632 | struct fib_result res = { |
2633 | .type = RTN_UNSPEC, |
2634 | .fi = NULL, |
2635 | .table = NULL, |
2636 | .tclassid = 0, |
2637 | }; |
2638 | struct rtable *rth; |
2639 | |
2640 | fl4->flowi4_iif = LOOPBACK_IFINDEX; |
2641 | ip_rt_fix_tos(fl4); |
2642 | |
2643 | rcu_read_lock(); |
2644 | rth = ip_route_output_key_hash_rcu(net, flp: fl4, res: &res, skb); |
2645 | rcu_read_unlock(); |
2646 | |
2647 | return rth; |
2648 | } |
2649 | EXPORT_SYMBOL_GPL(ip_route_output_key_hash); |
2650 | |
2651 | struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, |
2652 | struct fib_result *res, |
2653 | const struct sk_buff *skb) |
2654 | { |
2655 | struct net_device *dev_out = NULL; |
2656 | int orig_oif = fl4->flowi4_oif; |
2657 | unsigned int flags = 0; |
2658 | struct rtable *rth; |
2659 | int err; |
2660 | |
2661 | if (fl4->saddr) { |
2662 | if (ipv4_is_multicast(addr: fl4->saddr) || |
2663 | ipv4_is_lbcast(addr: fl4->saddr) || |
2664 | ipv4_is_zeronet(addr: fl4->saddr)) { |
2665 | rth = ERR_PTR(error: -EINVAL); |
2666 | goto out; |
2667 | } |
2668 | |
2669 | rth = ERR_PTR(error: -ENETUNREACH); |
2670 | |
2671 | /* I removed check for oif == dev_out->oif here. |
2672 | * It was wrong for two reasons: |
2673 | * 1. ip_dev_find(net, saddr) can return wrong iface, if saddr |
2674 | * is assigned to multiple interfaces. |
2675 | * 2. Moreover, we are allowed to send packets with saddr |
2676 | * of another iface. --ANK |
2677 | */ |
2678 | |
2679 | if (fl4->flowi4_oif == 0 && |
2680 | (ipv4_is_multicast(addr: fl4->daddr) || |
2681 | ipv4_is_lbcast(addr: fl4->daddr))) { |
2682 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2683 | dev_out = __ip_dev_find(net, addr: fl4->saddr, devref: false); |
2684 | if (!dev_out) |
2685 | goto out; |
2686 | |
2687 | /* Special hack: user can direct multicasts |
2688 | * and limited broadcast via necessary interface |
2689 | * without fiddling with IP_MULTICAST_IF or IP_PKTINFO. |
2690 | * This hack is not just for fun, it allows |
2691 | * vic,vat and friends to work. |
2692 | * They bind socket to loopback, set ttl to zero |
2693 | * and expect that it will work. |
2694 | * From the viewpoint of routing cache they are broken, |
2695 | * because we are not allowed to build multicast path |
2696 | * with loopback source addr (look, routing cache |
2697 | * cannot know, that ttl is zero, so that packet |
2698 | * will not leave this host and route is valid). |
2699 | * Luckily, this hack is good workaround. |
2700 | */ |
2701 | |
2702 | fl4->flowi4_oif = dev_out->ifindex; |
2703 | goto make_route; |
2704 | } |
2705 | |
2706 | if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { |
2707 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2708 | if (!__ip_dev_find(net, addr: fl4->saddr, devref: false)) |
2709 | goto out; |
2710 | } |
2711 | } |
2712 | |
2713 | |
2714 | if (fl4->flowi4_oif) { |
2715 | dev_out = dev_get_by_index_rcu(net, ifindex: fl4->flowi4_oif); |
2716 | rth = ERR_PTR(error: -ENODEV); |
2717 | if (!dev_out) |
2718 | goto out; |
2719 | |
2720 | /* RACE: Check return value of inet_select_addr instead. */ |
2721 | if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev: dev_out)) { |
2722 | rth = ERR_PTR(error: -ENETUNREACH); |
2723 | goto out; |
2724 | } |
2725 | if (ipv4_is_local_multicast(addr: fl4->daddr) || |
2726 | ipv4_is_lbcast(addr: fl4->daddr) || |
2727 | fl4->flowi4_proto == IPPROTO_IGMP) { |
2728 | if (!fl4->saddr) |
2729 | fl4->saddr = inet_select_addr(dev: dev_out, dst: 0, |
2730 | scope: RT_SCOPE_LINK); |
2731 | goto make_route; |
2732 | } |
2733 | if (!fl4->saddr) { |
2734 | if (ipv4_is_multicast(addr: fl4->daddr)) |
2735 | fl4->saddr = inet_select_addr(dev: dev_out, dst: 0, |
2736 | scope: fl4->flowi4_scope); |
2737 | else if (!fl4->daddr) |
2738 | fl4->saddr = inet_select_addr(dev: dev_out, dst: 0, |
2739 | scope: RT_SCOPE_HOST); |
2740 | } |
2741 | } |
2742 | |
2743 | if (!fl4->daddr) { |
2744 | fl4->daddr = fl4->saddr; |
2745 | if (!fl4->daddr) |
2746 | fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); |
2747 | dev_out = net->loopback_dev; |
2748 | fl4->flowi4_oif = LOOPBACK_IFINDEX; |
2749 | res->type = RTN_LOCAL; |
2750 | flags |= RTCF_LOCAL; |
2751 | goto make_route; |
2752 | } |
2753 | |
2754 | err = fib_lookup(net, flp: fl4, res, flags: 0); |
2755 | if (err) { |
2756 | res->fi = NULL; |
2757 | res->table = NULL; |
2758 | if (fl4->flowi4_oif && |
2759 | (ipv4_is_multicast(addr: fl4->daddr) || !fl4->flowi4_l3mdev)) { |
2760 | /* Apparently, routing tables are wrong. Assume, |
2761 | * that the destination is on link. |
2762 | * |
2763 | * WHY? DW. |
2764 | * Because we are allowed to send to iface |
2765 | * even if it has NO routes and NO assigned |
2766 | * addresses. When oif is specified, routing |
2767 | * tables are looked up with only one purpose: |
2768 | * to catch if destination is gatewayed, rather than |
2769 | * direct. Moreover, if MSG_DONTROUTE is set, |
2770 | * we send packet, ignoring both routing tables |
2771 | * and ifaddr state. --ANK |
2772 | * |
2773 | * |
2774 | * We could make it even if oif is unknown, |
2775 | * likely IPv6, but we do not. |
2776 | */ |
2777 | |
2778 | if (fl4->saddr == 0) |
2779 | fl4->saddr = inet_select_addr(dev: dev_out, dst: 0, |
2780 | scope: RT_SCOPE_LINK); |
2781 | res->type = RTN_UNICAST; |
2782 | goto make_route; |
2783 | } |
2784 | rth = ERR_PTR(error: err); |
2785 | goto out; |
2786 | } |
2787 | |
2788 | if (res->type == RTN_LOCAL) { |
2789 | if (!fl4->saddr) { |
2790 | if (res->fi->fib_prefsrc) |
2791 | fl4->saddr = res->fi->fib_prefsrc; |
2792 | else |
2793 | fl4->saddr = fl4->daddr; |
2794 | } |
2795 | |
2796 | /* L3 master device is the loopback for that domain */ |
2797 | dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? : |
2798 | net->loopback_dev; |
2799 | |
2800 | /* make sure orig_oif points to fib result device even |
2801 | * though packet rx/tx happens over loopback or l3mdev |
2802 | */ |
2803 | orig_oif = FIB_RES_OIF(*res); |
2804 | |
2805 | fl4->flowi4_oif = dev_out->ifindex; |
2806 | flags |= RTCF_LOCAL; |
2807 | goto make_route; |
2808 | } |
2809 | |
2810 | fib_select_path(net, res, fl4, skb); |
2811 | |
2812 | dev_out = FIB_RES_DEV(*res); |
2813 | |
2814 | make_route: |
2815 | rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags); |
2816 | |
2817 | out: |
2818 | return rth; |
2819 | } |
2820 | |
2821 | static struct dst_ops ipv4_dst_blackhole_ops = { |
2822 | .family = AF_INET, |
2823 | .default_advmss = ipv4_default_advmss, |
2824 | .neigh_lookup = ipv4_neigh_lookup, |
2825 | .check = dst_blackhole_check, |
2826 | .cow_metrics = dst_blackhole_cow_metrics, |
2827 | .update_pmtu = dst_blackhole_update_pmtu, |
2828 | .redirect = dst_blackhole_redirect, |
2829 | .mtu = dst_blackhole_mtu, |
2830 | }; |
2831 | |
2832 | struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) |
2833 | { |
2834 | struct rtable *ort = (struct rtable *) dst_orig; |
2835 | struct rtable *rt; |
2836 | |
2837 | rt = dst_alloc(ops: &ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, flags: 0); |
2838 | if (rt) { |
2839 | struct dst_entry *new = &rt->dst; |
2840 | |
2841 | new->__use = 1; |
2842 | new->input = dst_discard; |
2843 | new->output = dst_discard_out; |
2844 | |
2845 | new->dev = net->loopback_dev; |
2846 | netdev_hold(dev: new->dev, tracker: &new->dev_tracker, GFP_ATOMIC); |
2847 | |
2848 | rt->rt_is_input = ort->rt_is_input; |
2849 | rt->rt_iif = ort->rt_iif; |
2850 | rt->rt_pmtu = ort->rt_pmtu; |
2851 | rt->rt_mtu_locked = ort->rt_mtu_locked; |
2852 | |
2853 | rt->rt_genid = rt_genid_ipv4(net); |
2854 | rt->rt_flags = ort->rt_flags; |
2855 | rt->rt_type = ort->rt_type; |
2856 | rt->rt_uses_gateway = ort->rt_uses_gateway; |
2857 | rt->rt_gw_family = ort->rt_gw_family; |
2858 | if (rt->rt_gw_family == AF_INET) |
2859 | rt->rt_gw4 = ort->rt_gw4; |
2860 | else if (rt->rt_gw_family == AF_INET6) |
2861 | rt->rt_gw6 = ort->rt_gw6; |
2862 | } |
2863 | |
2864 | dst_release(dst: dst_orig); |
2865 | |
2866 | return rt ? &rt->dst : ERR_PTR(error: -ENOMEM); |
2867 | } |
2868 | |
2869 | struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, |
2870 | const struct sock *sk) |
2871 | { |
2872 | struct rtable *rt = __ip_route_output_key(net, flp: flp4); |
2873 | |
2874 | if (IS_ERR(ptr: rt)) |
2875 | return rt; |
2876 | |
2877 | if (flp4->flowi4_proto) { |
2878 | flp4->flowi4_oif = rt->dst.dev->ifindex; |
2879 | rt = (struct rtable *)xfrm_lookup_route(net, dst_orig: &rt->dst, |
2880 | fl: flowi4_to_flowi(fl4: flp4), |
2881 | sk, flags: 0); |
2882 | } |
2883 | |
2884 | return rt; |
2885 | } |
2886 | EXPORT_SYMBOL_GPL(ip_route_output_flow); |
2887 | |
2888 | /* called with rcu_read_lock held */ |
2889 | static int rt_fill_info(struct net *net, __be32 dst, __be32 src, |
2890 | struct rtable *rt, u32 table_id, struct flowi4 *fl4, |
2891 | struct sk_buff *skb, u32 portid, u32 seq, |
2892 | unsigned int flags) |
2893 | { |
2894 | struct rtmsg *r; |
2895 | struct nlmsghdr *nlh; |
2896 | unsigned long expires = 0; |
2897 | u32 error; |
2898 | u32 metrics[RTAX_MAX]; |
2899 | |
2900 | nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, payload: sizeof(*r), flags); |
2901 | if (!nlh) |
2902 | return -EMSGSIZE; |
2903 | |
2904 | r = nlmsg_data(nlh); |
2905 | r->rtm_family = AF_INET; |
2906 | r->rtm_dst_len = 32; |
2907 | r->rtm_src_len = 0; |
2908 | r->rtm_tos = fl4 ? fl4->flowi4_tos : 0; |
2909 | r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT; |
2910 | if (nla_put_u32(skb, attrtype: RTA_TABLE, value: table_id)) |
2911 | goto nla_put_failure; |
2912 | r->rtm_type = rt->rt_type; |
2913 | r->rtm_scope = RT_SCOPE_UNIVERSE; |
2914 | r->rtm_protocol = RTPROT_UNSPEC; |
2915 | r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; |
2916 | if (rt->rt_flags & RTCF_NOTIFY) |
2917 | r->rtm_flags |= RTM_F_NOTIFY; |
2918 | if (IPCB(skb)->flags & IPSKB_DOREDIRECT) |
2919 | r->rtm_flags |= RTCF_DOREDIRECT; |
2920 | |
2921 | if (nla_put_in_addr(skb, attrtype: RTA_DST, addr: dst)) |
2922 | goto nla_put_failure; |
2923 | if (src) { |
2924 | r->rtm_src_len = 32; |
2925 | if (nla_put_in_addr(skb, attrtype: RTA_SRC, addr: src)) |
2926 | goto nla_put_failure; |
2927 | } |
2928 | if (rt->dst.dev && |
2929 | nla_put_u32(skb, attrtype: RTA_OIF, value: rt->dst.dev->ifindex)) |
2930 | goto nla_put_failure; |
2931 | if (rt->dst.lwtstate && |
2932 | lwtunnel_fill_encap(skb, lwtstate: rt->dst.lwtstate, encap_attr: RTA_ENCAP, encap_type_attr: RTA_ENCAP_TYPE) < 0) |
2933 | goto nla_put_failure; |
2934 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2935 | if (rt->dst.tclassid && |
2936 | nla_put_u32(skb, attrtype: RTA_FLOW, value: rt->dst.tclassid)) |
2937 | goto nla_put_failure; |
2938 | #endif |
2939 | if (fl4 && !rt_is_input_route(rt) && |
2940 | fl4->saddr != src) { |
2941 | if (nla_put_in_addr(skb, attrtype: RTA_PREFSRC, addr: fl4->saddr)) |
2942 | goto nla_put_failure; |
2943 | } |
2944 | if (rt->rt_uses_gateway) { |
2945 | if (rt->rt_gw_family == AF_INET && |
2946 | nla_put_in_addr(skb, attrtype: RTA_GATEWAY, addr: rt->rt_gw4)) { |
2947 | goto nla_put_failure; |
2948 | } else if (rt->rt_gw_family == AF_INET6) { |
2949 | int alen = sizeof(struct in6_addr); |
2950 | struct nlattr *nla; |
2951 | struct rtvia *via; |
2952 | |
2953 | nla = nla_reserve(skb, attrtype: RTA_VIA, attrlen: alen + 2); |
2954 | if (!nla) |
2955 | goto nla_put_failure; |
2956 | |
2957 | via = nla_data(nla); |
2958 | via->rtvia_family = AF_INET6; |
2959 | memcpy(via->rtvia_addr, &rt->rt_gw6, alen); |
2960 | } |
2961 | } |
2962 | |
2963 | expires = rt->dst.expires; |
2964 | if (expires) { |
2965 | unsigned long now = jiffies; |
2966 | |
2967 | if (time_before(now, expires)) |
2968 | expires -= now; |
2969 | else |
2970 | expires = 0; |
2971 | } |
2972 | |
2973 | memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); |
2974 | if (rt->rt_pmtu && expires) |
2975 | metrics[RTAX_MTU - 1] = rt->rt_pmtu; |
2976 | if (rt->rt_mtu_locked && expires) |
2977 | metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU); |
2978 | if (rtnetlink_put_metrics(skb, metrics) < 0) |
2979 | goto nla_put_failure; |
2980 | |
2981 | if (fl4) { |
2982 | if (fl4->flowi4_mark && |
2983 | nla_put_u32(skb, attrtype: RTA_MARK, value: fl4->flowi4_mark)) |
2984 | goto nla_put_failure; |
2985 | |
2986 | if (!uid_eq(left: fl4->flowi4_uid, INVALID_UID) && |
2987 | nla_put_u32(skb, attrtype: RTA_UID, |
2988 | value: from_kuid_munged(current_user_ns(), |
2989 | uid: fl4->flowi4_uid))) |
2990 | goto nla_put_failure; |
2991 | |
2992 | if (rt_is_input_route(rt)) { |
2993 | #ifdef CONFIG_IP_MROUTE |
2994 | if (ipv4_is_multicast(addr: dst) && |
2995 | !ipv4_is_local_multicast(addr: dst) && |
2996 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { |
2997 | int err = ipmr_get_route(net, skb, |
2998 | saddr: fl4->saddr, daddr: fl4->daddr, |
2999 | rtm: r, portid); |
3000 | |
3001 | if (err <= 0) { |
3002 | if (err == 0) |
3003 | return 0; |
3004 | goto nla_put_failure; |
3005 | } |
3006 | } else |
3007 | #endif |
3008 | if (nla_put_u32(skb, attrtype: RTA_IIF, value: fl4->flowi4_iif)) |
3009 | goto nla_put_failure; |
3010 | } |
3011 | } |
3012 | |
3013 | error = rt->dst.error; |
3014 | |
3015 | if (rtnl_put_cacheinfo(skb, dst: &rt->dst, id: 0, expires, error) < 0) |
3016 | goto nla_put_failure; |
3017 | |
3018 | nlmsg_end(skb, nlh); |
3019 | return 0; |
3020 | |
3021 | nla_put_failure: |
3022 | nlmsg_cancel(skb, nlh); |
3023 | return -EMSGSIZE; |
3024 | } |
3025 | |
3026 | static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb, |
3027 | struct netlink_callback *cb, u32 table_id, |
3028 | struct fnhe_hash_bucket *bucket, int genid, |
3029 | int *fa_index, int fa_start, unsigned int flags) |
3030 | { |
3031 | int i; |
3032 | |
3033 | for (i = 0; i < FNHE_HASH_SIZE; i++) { |
3034 | struct fib_nh_exception *fnhe; |
3035 | |
3036 | for (fnhe = rcu_dereference(bucket[i].chain); fnhe; |
3037 | fnhe = rcu_dereference(fnhe->fnhe_next)) { |
3038 | struct rtable *rt; |
3039 | int err; |
3040 | |
3041 | if (*fa_index < fa_start) |
3042 | goto next; |
3043 | |
3044 | if (fnhe->fnhe_genid != genid) |
3045 | goto next; |
3046 | |
3047 | if (fnhe->fnhe_expires && |
3048 | time_after(jiffies, fnhe->fnhe_expires)) |
3049 | goto next; |
3050 | |
3051 | rt = rcu_dereference(fnhe->fnhe_rth_input); |
3052 | if (!rt) |
3053 | rt = rcu_dereference(fnhe->fnhe_rth_output); |
3054 | if (!rt) |
3055 | goto next; |
3056 | |
3057 | err = rt_fill_info(net, dst: fnhe->fnhe_daddr, src: 0, rt, |
3058 | table_id, NULL, skb, |
3059 | NETLINK_CB(cb->skb).portid, |
3060 | seq: cb->nlh->nlmsg_seq, flags); |
3061 | if (err) |
3062 | return err; |
3063 | next: |
3064 | (*fa_index)++; |
3065 | } |
3066 | } |
3067 | |
3068 | return 0; |
3069 | } |
3070 | |
3071 | int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb, |
3072 | u32 table_id, struct fib_info *fi, |
3073 | int *fa_index, int fa_start, unsigned int flags) |
3074 | { |
3075 | struct net *net = sock_net(sk: cb->skb->sk); |
3076 | int nhsel, genid = fnhe_genid(net); |
3077 | |
3078 | for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) { |
3079 | struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel); |
3080 | struct fnhe_hash_bucket *bucket; |
3081 | int err; |
3082 | |
3083 | if (nhc->nhc_flags & RTNH_F_DEAD) |
3084 | continue; |
3085 | |
3086 | rcu_read_lock(); |
3087 | bucket = rcu_dereference(nhc->nhc_exceptions); |
3088 | err = 0; |
3089 | if (bucket) |
3090 | err = fnhe_dump_bucket(net, skb, cb, table_id, bucket, |
3091 | genid, fa_index, fa_start, |
3092 | flags); |
3093 | rcu_read_unlock(); |
3094 | if (err) |
3095 | return err; |
3096 | } |
3097 | |
3098 | return 0; |
3099 | } |
3100 | |
3101 | static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst, |
3102 | u8 ip_proto, __be16 sport, |
3103 | __be16 dport) |
3104 | { |
3105 | struct sk_buff *skb; |
3106 | struct iphdr *iph; |
3107 | |
3108 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); |
3109 | if (!skb) |
3110 | return NULL; |
3111 | |
3112 | /* Reserve room for dummy headers, this skb can pass |
3113 | * through good chunk of routing engine. |
3114 | */ |
3115 | skb_reset_mac_header(skb); |
3116 | skb_reset_network_header(skb); |
3117 | skb->protocol = htons(ETH_P_IP); |
3118 | iph = skb_put(skb, len: sizeof(struct iphdr)); |
3119 | iph->protocol = ip_proto; |
3120 | iph->saddr = src; |
3121 | iph->daddr = dst; |
3122 | iph->version = 0x4; |
3123 | iph->frag_off = 0; |
3124 | iph->ihl = 0x5; |
3125 | skb_set_transport_header(skb, offset: skb->len); |
3126 | |
3127 | switch (iph->protocol) { |
3128 | case IPPROTO_UDP: { |
3129 | struct udphdr *udph; |
3130 | |
3131 | udph = skb_put_zero(skb, len: sizeof(struct udphdr)); |
3132 | udph->source = sport; |
3133 | udph->dest = dport; |
3134 | udph->len = htons(sizeof(struct udphdr)); |
3135 | udph->check = 0; |
3136 | break; |
3137 | } |
3138 | case IPPROTO_TCP: { |
3139 | struct tcphdr *tcph; |
3140 | |
3141 | tcph = skb_put_zero(skb, len: sizeof(struct tcphdr)); |
3142 | tcph->source = sport; |
3143 | tcph->dest = dport; |
3144 | tcph->doff = sizeof(struct tcphdr) / 4; |
3145 | tcph->rst = 1; |
3146 | tcph->check = ~tcp_v4_check(len: sizeof(struct tcphdr), |
3147 | saddr: src, daddr: dst, base: 0); |
3148 | break; |
3149 | } |
3150 | case IPPROTO_ICMP: { |
3151 | struct icmphdr *icmph; |
3152 | |
3153 | icmph = skb_put_zero(skb, len: sizeof(struct icmphdr)); |
3154 | icmph->type = ICMP_ECHO; |
3155 | icmph->code = 0; |
3156 | } |
3157 | } |
3158 | |
3159 | return skb; |
3160 | } |
3161 | |
3162 | static int inet_rtm_valid_getroute_req(struct sk_buff *skb, |
3163 | const struct nlmsghdr *nlh, |
3164 | struct nlattr **tb, |
3165 | struct netlink_ext_ack *extack) |
3166 | { |
3167 | struct rtmsg *rtm; |
3168 | int i, err; |
3169 | |
3170 | if (nlh->nlmsg_len < nlmsg_msg_size(payload: sizeof(*rtm))) { |
3171 | NL_SET_ERR_MSG(extack, |
3172 | "ipv4: Invalid header for route get request" ); |
3173 | return -EINVAL; |
3174 | } |
3175 | |
3176 | if (!netlink_strict_get_check(skb)) |
3177 | return nlmsg_parse_deprecated(nlh, hdrlen: sizeof(*rtm), tb, RTA_MAX, |
3178 | policy: rtm_ipv4_policy, extack); |
3179 | |
3180 | rtm = nlmsg_data(nlh); |
3181 | if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || |
3182 | (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || |
3183 | rtm->rtm_table || rtm->rtm_protocol || |
3184 | rtm->rtm_scope || rtm->rtm_type) { |
3185 | NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request" ); |
3186 | return -EINVAL; |
3187 | } |
3188 | |
3189 | if (rtm->rtm_flags & ~(RTM_F_NOTIFY | |
3190 | RTM_F_LOOKUP_TABLE | |
3191 | RTM_F_FIB_MATCH)) { |
3192 | NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request" ); |
3193 | return -EINVAL; |
3194 | } |
3195 | |
3196 | err = nlmsg_parse_deprecated_strict(nlh, hdrlen: sizeof(*rtm), tb, RTA_MAX, |
3197 | policy: rtm_ipv4_policy, extack); |
3198 | if (err) |
3199 | return err; |
3200 | |
3201 | if ((tb[RTA_SRC] && !rtm->rtm_src_len) || |
3202 | (tb[RTA_DST] && !rtm->rtm_dst_len)) { |
3203 | NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4" ); |
3204 | return -EINVAL; |
3205 | } |
3206 | |
3207 | for (i = 0; i <= RTA_MAX; i++) { |
3208 | if (!tb[i]) |
3209 | continue; |
3210 | |
3211 | switch (i) { |
3212 | case RTA_IIF: |
3213 | case RTA_OIF: |
3214 | case RTA_SRC: |
3215 | case RTA_DST: |
3216 | case RTA_IP_PROTO: |
3217 | case RTA_SPORT: |
3218 | case RTA_DPORT: |
3219 | case RTA_MARK: |
3220 | case RTA_UID: |
3221 | break; |
3222 | default: |
3223 | NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request" ); |
3224 | return -EINVAL; |
3225 | } |
3226 | } |
3227 | |
3228 | return 0; |
3229 | } |
3230 | |
3231 | static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, |
3232 | struct netlink_ext_ack *extack) |
3233 | { |
3234 | struct net *net = sock_net(sk: in_skb->sk); |
3235 | struct nlattr *tb[RTA_MAX+1]; |
3236 | u32 table_id = RT_TABLE_MAIN; |
3237 | __be16 sport = 0, dport = 0; |
3238 | struct fib_result res = {}; |
3239 | u8 ip_proto = IPPROTO_UDP; |
3240 | struct rtable *rt = NULL; |
3241 | struct sk_buff *skb; |
3242 | struct rtmsg *rtm; |
3243 | struct flowi4 fl4 = {}; |
3244 | __be32 dst = 0; |
3245 | __be32 src = 0; |
3246 | kuid_t uid; |
3247 | u32 iif; |
3248 | int err; |
3249 | int mark; |
3250 | |
3251 | err = inet_rtm_valid_getroute_req(skb: in_skb, nlh, tb, extack); |
3252 | if (err < 0) |
3253 | return err; |
3254 | |
3255 | rtm = nlmsg_data(nlh); |
3256 | src = tb[RTA_SRC] ? nla_get_in_addr(nla: tb[RTA_SRC]) : 0; |
3257 | dst = tb[RTA_DST] ? nla_get_in_addr(nla: tb[RTA_DST]) : 0; |
3258 | iif = tb[RTA_IIF] ? nla_get_u32(nla: tb[RTA_IIF]) : 0; |
3259 | mark = tb[RTA_MARK] ? nla_get_u32(nla: tb[RTA_MARK]) : 0; |
3260 | if (tb[RTA_UID]) |
3261 | uid = make_kuid(current_user_ns(), uid: nla_get_u32(nla: tb[RTA_UID])); |
3262 | else |
3263 | uid = (iif ? INVALID_UID : current_uid()); |
3264 | |
3265 | if (tb[RTA_IP_PROTO]) { |
3266 | err = rtm_getroute_parse_ip_proto(attr: tb[RTA_IP_PROTO], |
3267 | ip_proto: &ip_proto, AF_INET, extack); |
3268 | if (err) |
3269 | return err; |
3270 | } |
3271 | |
3272 | if (tb[RTA_SPORT]) |
3273 | sport = nla_get_be16(nla: tb[RTA_SPORT]); |
3274 | |
3275 | if (tb[RTA_DPORT]) |
3276 | dport = nla_get_be16(nla: tb[RTA_DPORT]); |
3277 | |
3278 | skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport); |
3279 | if (!skb) |
3280 | return -ENOBUFS; |
3281 | |
3282 | fl4.daddr = dst; |
3283 | fl4.saddr = src; |
3284 | fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK; |
3285 | fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(nla: tb[RTA_OIF]) : 0; |
3286 | fl4.flowi4_mark = mark; |
3287 | fl4.flowi4_uid = uid; |
3288 | if (sport) |
3289 | fl4.fl4_sport = sport; |
3290 | if (dport) |
3291 | fl4.fl4_dport = dport; |
3292 | fl4.flowi4_proto = ip_proto; |
3293 | |
3294 | rcu_read_lock(); |
3295 | |
3296 | if (iif) { |
3297 | struct net_device *dev; |
3298 | |
3299 | dev = dev_get_by_index_rcu(net, ifindex: iif); |
3300 | if (!dev) { |
3301 | err = -ENODEV; |
3302 | goto errout_rcu; |
3303 | } |
3304 | |
3305 | fl4.flowi4_iif = iif; /* for rt_fill_info */ |
3306 | skb->dev = dev; |
3307 | skb->mark = mark; |
3308 | err = ip_route_input_rcu(skb, daddr: dst, saddr: src, |
3309 | tos: rtm->rtm_tos & IPTOS_RT_MASK, dev, |
3310 | res: &res); |
3311 | |
3312 | rt = skb_rtable(skb); |
3313 | if (err == 0 && rt->dst.error) |
3314 | err = -rt->dst.error; |
3315 | } else { |
3316 | fl4.flowi4_iif = LOOPBACK_IFINDEX; |
3317 | skb->dev = net->loopback_dev; |
3318 | rt = ip_route_output_key_hash_rcu(net, fl4: &fl4, res: &res, skb); |
3319 | err = 0; |
3320 | if (IS_ERR(ptr: rt)) |
3321 | err = PTR_ERR(ptr: rt); |
3322 | else |
3323 | skb_dst_set(skb, dst: &rt->dst); |
3324 | } |
3325 | |
3326 | if (err) |
3327 | goto errout_rcu; |
3328 | |
3329 | if (rtm->rtm_flags & RTM_F_NOTIFY) |
3330 | rt->rt_flags |= RTCF_NOTIFY; |
3331 | |
3332 | if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) |
3333 | table_id = res.table ? res.table->tb_id : 0; |
3334 | |
3335 | /* reset skb for netlink reply msg */ |
3336 | skb_trim(skb, len: 0); |
3337 | skb_reset_network_header(skb); |
3338 | skb_reset_transport_header(skb); |
3339 | skb_reset_mac_header(skb); |
3340 | |
3341 | if (rtm->rtm_flags & RTM_F_FIB_MATCH) { |
3342 | struct fib_rt_info fri; |
3343 | |
3344 | if (!res.fi) { |
3345 | err = fib_props[res.type].error; |
3346 | if (!err) |
3347 | err = -EHOSTUNREACH; |
3348 | goto errout_rcu; |
3349 | } |
3350 | fri.fi = res.fi; |
3351 | fri.tb_id = table_id; |
3352 | fri.dst = res.prefix; |
3353 | fri.dst_len = res.prefixlen; |
3354 | fri.dscp = inet_dsfield_to_dscp(dsfield: fl4.flowi4_tos); |
3355 | fri.type = rt->rt_type; |
3356 | fri.offload = 0; |
3357 | fri.trap = 0; |
3358 | fri.offload_failed = 0; |
3359 | if (res.fa_head) { |
3360 | struct fib_alias *fa; |
3361 | |
3362 | hlist_for_each_entry_rcu(fa, res.fa_head, fa_list) { |
3363 | u8 slen = 32 - fri.dst_len; |
3364 | |
3365 | if (fa->fa_slen == slen && |
3366 | fa->tb_id == fri.tb_id && |
3367 | fa->fa_dscp == fri.dscp && |
3368 | fa->fa_info == res.fi && |
3369 | fa->fa_type == fri.type) { |
3370 | fri.offload = READ_ONCE(fa->offload); |
3371 | fri.trap = READ_ONCE(fa->trap); |
3372 | fri.offload_failed = |
3373 | READ_ONCE(fa->offload_failed); |
3374 | break; |
3375 | } |
3376 | } |
3377 | } |
3378 | err = fib_dump_info(skb, NETLINK_CB(in_skb).portid, |
3379 | seq: nlh->nlmsg_seq, RTM_NEWROUTE, fri: &fri, flags: 0); |
3380 | } else { |
3381 | err = rt_fill_info(net, dst, src, rt, table_id, fl4: &fl4, skb, |
3382 | NETLINK_CB(in_skb).portid, |
3383 | seq: nlh->nlmsg_seq, flags: 0); |
3384 | } |
3385 | if (err < 0) |
3386 | goto errout_rcu; |
3387 | |
3388 | rcu_read_unlock(); |
3389 | |
3390 | err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); |
3391 | |
3392 | errout_free: |
3393 | return err; |
3394 | errout_rcu: |
3395 | rcu_read_unlock(); |
3396 | kfree_skb(skb); |
3397 | goto errout_free; |
3398 | } |
3399 | |
3400 | void ip_rt_multicast_event(struct in_device *in_dev) |
3401 | { |
3402 | rt_cache_flush(net: dev_net(dev: in_dev->dev)); |
3403 | } |
3404 | |
3405 | #ifdef CONFIG_SYSCTL |
3406 | static int ip_rt_gc_interval __read_mostly = 60 * HZ; |
3407 | static int ip_rt_gc_min_interval __read_mostly = HZ / 2; |
3408 | static int ip_rt_gc_elasticity __read_mostly = 8; |
3409 | static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU; |
3410 | |
3411 | static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write, |
3412 | void *buffer, size_t *lenp, loff_t *ppos) |
3413 | { |
3414 | struct net *net = (struct net *)__ctl->extra1; |
3415 | |
3416 | if (write) { |
3417 | rt_cache_flush(net); |
3418 | fnhe_genid_bump(net); |
3419 | return 0; |
3420 | } |
3421 | |
3422 | return -EINVAL; |
3423 | } |
3424 | |
3425 | static struct ctl_table ipv4_route_table[] = { |
3426 | { |
3427 | .procname = "gc_thresh" , |
3428 | .data = &ipv4_dst_ops.gc_thresh, |
3429 | .maxlen = sizeof(int), |
3430 | .mode = 0644, |
3431 | .proc_handler = proc_dointvec, |
3432 | }, |
3433 | { |
3434 | .procname = "max_size" , |
3435 | .data = &ip_rt_max_size, |
3436 | .maxlen = sizeof(int), |
3437 | .mode = 0644, |
3438 | .proc_handler = proc_dointvec, |
3439 | }, |
3440 | { |
3441 | /* Deprecated. Use gc_min_interval_ms */ |
3442 | |
3443 | .procname = "gc_min_interval" , |
3444 | .data = &ip_rt_gc_min_interval, |
3445 | .maxlen = sizeof(int), |
3446 | .mode = 0644, |
3447 | .proc_handler = proc_dointvec_jiffies, |
3448 | }, |
3449 | { |
3450 | .procname = "gc_min_interval_ms" , |
3451 | .data = &ip_rt_gc_min_interval, |
3452 | .maxlen = sizeof(int), |
3453 | .mode = 0644, |
3454 | .proc_handler = proc_dointvec_ms_jiffies, |
3455 | }, |
3456 | { |
3457 | .procname = "gc_timeout" , |
3458 | .data = &ip_rt_gc_timeout, |
3459 | .maxlen = sizeof(int), |
3460 | .mode = 0644, |
3461 | .proc_handler = proc_dointvec_jiffies, |
3462 | }, |
3463 | { |
3464 | .procname = "gc_interval" , |
3465 | .data = &ip_rt_gc_interval, |
3466 | .maxlen = sizeof(int), |
3467 | .mode = 0644, |
3468 | .proc_handler = proc_dointvec_jiffies, |
3469 | }, |
3470 | { |
3471 | .procname = "redirect_load" , |
3472 | .data = &ip_rt_redirect_load, |
3473 | .maxlen = sizeof(int), |
3474 | .mode = 0644, |
3475 | .proc_handler = proc_dointvec, |
3476 | }, |
3477 | { |
3478 | .procname = "redirect_number" , |
3479 | .data = &ip_rt_redirect_number, |
3480 | .maxlen = sizeof(int), |
3481 | .mode = 0644, |
3482 | .proc_handler = proc_dointvec, |
3483 | }, |
3484 | { |
3485 | .procname = "redirect_silence" , |
3486 | .data = &ip_rt_redirect_silence, |
3487 | .maxlen = sizeof(int), |
3488 | .mode = 0644, |
3489 | .proc_handler = proc_dointvec, |
3490 | }, |
3491 | { |
3492 | .procname = "error_cost" , |
3493 | .data = &ip_rt_error_cost, |
3494 | .maxlen = sizeof(int), |
3495 | .mode = 0644, |
3496 | .proc_handler = proc_dointvec, |
3497 | }, |
3498 | { |
3499 | .procname = "error_burst" , |
3500 | .data = &ip_rt_error_burst, |
3501 | .maxlen = sizeof(int), |
3502 | .mode = 0644, |
3503 | .proc_handler = proc_dointvec, |
3504 | }, |
3505 | { |
3506 | .procname = "gc_elasticity" , |
3507 | .data = &ip_rt_gc_elasticity, |
3508 | .maxlen = sizeof(int), |
3509 | .mode = 0644, |
3510 | .proc_handler = proc_dointvec, |
3511 | }, |
3512 | { } |
3513 | }; |
3514 | |
3515 | static const char ipv4_route_flush_procname[] = "flush" ; |
3516 | |
3517 | static struct ctl_table ipv4_route_netns_table[] = { |
3518 | { |
3519 | .procname = ipv4_route_flush_procname, |
3520 | .maxlen = sizeof(int), |
3521 | .mode = 0200, |
3522 | .proc_handler = ipv4_sysctl_rtcache_flush, |
3523 | }, |
3524 | { |
3525 | .procname = "min_pmtu" , |
3526 | .data = &init_net.ipv4.ip_rt_min_pmtu, |
3527 | .maxlen = sizeof(int), |
3528 | .mode = 0644, |
3529 | .proc_handler = proc_dointvec_minmax, |
3530 | .extra1 = &ip_min_valid_pmtu, |
3531 | }, |
3532 | { |
3533 | .procname = "mtu_expires" , |
3534 | .data = &init_net.ipv4.ip_rt_mtu_expires, |
3535 | .maxlen = sizeof(int), |
3536 | .mode = 0644, |
3537 | .proc_handler = proc_dointvec_jiffies, |
3538 | }, |
3539 | { |
3540 | .procname = "min_adv_mss" , |
3541 | .data = &init_net.ipv4.ip_rt_min_advmss, |
3542 | .maxlen = sizeof(int), |
3543 | .mode = 0644, |
3544 | .proc_handler = proc_dointvec, |
3545 | }, |
3546 | { }, |
3547 | }; |
3548 | |
3549 | static __net_init int sysctl_route_net_init(struct net *net) |
3550 | { |
3551 | struct ctl_table *tbl; |
3552 | size_t table_size = ARRAY_SIZE(ipv4_route_netns_table); |
3553 | |
3554 | tbl = ipv4_route_netns_table; |
3555 | if (!net_eq(net1: net, net2: &init_net)) { |
3556 | int i; |
3557 | |
3558 | tbl = kmemdup(p: tbl, size: sizeof(ipv4_route_netns_table), GFP_KERNEL); |
3559 | if (!tbl) |
3560 | goto err_dup; |
3561 | |
3562 | /* Don't export non-whitelisted sysctls to unprivileged users */ |
3563 | if (net->user_ns != &init_user_ns) { |
3564 | if (tbl[0].procname != ipv4_route_flush_procname) { |
3565 | tbl[0].procname = NULL; |
3566 | table_size = 0; |
3567 | } |
3568 | } |
3569 | |
3570 | /* Update the variables to point into the current struct net |
3571 | * except for the first element flush |
3572 | */ |
3573 | for (i = 1; i < ARRAY_SIZE(ipv4_route_netns_table) - 1; i++) |
3574 | tbl[i].data += (void *)net - (void *)&init_net; |
3575 | } |
3576 | tbl[0].extra1 = net; |
3577 | |
3578 | net->ipv4.route_hdr = register_net_sysctl_sz(net, path: "net/ipv4/route" , |
3579 | table: tbl, table_size); |
3580 | if (!net->ipv4.route_hdr) |
3581 | goto err_reg; |
3582 | return 0; |
3583 | |
3584 | err_reg: |
3585 | if (tbl != ipv4_route_netns_table) |
3586 | kfree(objp: tbl); |
3587 | err_dup: |
3588 | return -ENOMEM; |
3589 | } |
3590 | |
3591 | static __net_exit void sysctl_route_net_exit(struct net *net) |
3592 | { |
3593 | struct ctl_table *tbl; |
3594 | |
3595 | tbl = net->ipv4.route_hdr->ctl_table_arg; |
3596 | unregister_net_sysctl_table(header: net->ipv4.route_hdr); |
3597 | BUG_ON(tbl == ipv4_route_netns_table); |
3598 | kfree(objp: tbl); |
3599 | } |
3600 | |
3601 | static __net_initdata struct pernet_operations sysctl_route_ops = { |
3602 | .init = sysctl_route_net_init, |
3603 | .exit = sysctl_route_net_exit, |
3604 | }; |
3605 | #endif |
3606 | |
3607 | static __net_init int netns_ip_rt_init(struct net *net) |
3608 | { |
3609 | /* Set default value for namespaceified sysctls */ |
3610 | net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU; |
3611 | net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES; |
3612 | net->ipv4.ip_rt_min_advmss = DEFAULT_MIN_ADVMSS; |
3613 | return 0; |
3614 | } |
3615 | |
3616 | static struct pernet_operations __net_initdata ip_rt_ops = { |
3617 | .init = netns_ip_rt_init, |
3618 | }; |
3619 | |
3620 | static __net_init int rt_genid_init(struct net *net) |
3621 | { |
3622 | atomic_set(v: &net->ipv4.rt_genid, i: 0); |
3623 | atomic_set(v: &net->fnhe_genid, i: 0); |
3624 | atomic_set(v: &net->ipv4.dev_addr_genid, i: get_random_u32()); |
3625 | return 0; |
3626 | } |
3627 | |
3628 | static __net_initdata struct pernet_operations rt_genid_ops = { |
3629 | .init = rt_genid_init, |
3630 | }; |
3631 | |
3632 | static int __net_init ipv4_inetpeer_init(struct net *net) |
3633 | { |
3634 | struct inet_peer_base *bp = kmalloc(size: sizeof(*bp), GFP_KERNEL); |
3635 | |
3636 | if (!bp) |
3637 | return -ENOMEM; |
3638 | inet_peer_base_init(bp); |
3639 | net->ipv4.peers = bp; |
3640 | return 0; |
3641 | } |
3642 | |
3643 | static void __net_exit ipv4_inetpeer_exit(struct net *net) |
3644 | { |
3645 | struct inet_peer_base *bp = net->ipv4.peers; |
3646 | |
3647 | net->ipv4.peers = NULL; |
3648 | inetpeer_invalidate_tree(bp); |
3649 | kfree(objp: bp); |
3650 | } |
3651 | |
3652 | static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { |
3653 | .init = ipv4_inetpeer_init, |
3654 | .exit = ipv4_inetpeer_exit, |
3655 | }; |
3656 | |
3657 | #ifdef CONFIG_IP_ROUTE_CLASSID |
3658 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; |
3659 | #endif /* CONFIG_IP_ROUTE_CLASSID */ |
3660 | |
3661 | int __init ip_rt_init(void) |
3662 | { |
3663 | void *idents_hash; |
3664 | int cpu; |
3665 | |
3666 | /* For modern hosts, this will use 2 MB of memory */ |
3667 | idents_hash = alloc_large_system_hash(tablename: "IP idents" , |
3668 | bucketsize: sizeof(*ip_idents) + sizeof(*ip_tstamps), |
3669 | numentries: 0, |
3670 | scale: 16, /* one bucket per 64 KB */ |
3671 | HASH_ZERO, |
3672 | NULL, |
3673 | hash_mask: &ip_idents_mask, |
3674 | low_limit: 2048, |
3675 | high_limit: 256*1024); |
3676 | |
3677 | ip_idents = idents_hash; |
3678 | |
3679 | get_random_bytes(buf: ip_idents, len: (ip_idents_mask + 1) * sizeof(*ip_idents)); |
3680 | |
3681 | ip_tstamps = idents_hash + (ip_idents_mask + 1) * sizeof(*ip_idents); |
3682 | |
3683 | for_each_possible_cpu(cpu) { |
3684 | struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); |
3685 | |
3686 | INIT_LIST_HEAD(list: &ul->head); |
3687 | INIT_LIST_HEAD(list: &ul->quarantine); |
3688 | spin_lock_init(&ul->lock); |
3689 | } |
3690 | #ifdef CONFIG_IP_ROUTE_CLASSID |
3691 | ip_rt_acct = __alloc_percpu(size: 256 * sizeof(struct ip_rt_acct), align: __alignof__(struct ip_rt_acct)); |
3692 | if (!ip_rt_acct) |
3693 | panic(fmt: "IP: failed to allocate ip_rt_acct\n" ); |
3694 | #endif |
3695 | |
3696 | ipv4_dst_ops.kmem_cachep = |
3697 | kmem_cache_create(name: "ip_dst_cache" , size: sizeof(struct rtable), align: 0, |
3698 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
3699 | |
3700 | ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; |
3701 | |
3702 | if (dst_entries_init(dst: &ipv4_dst_ops) < 0) |
3703 | panic(fmt: "IP: failed to allocate ipv4_dst_ops counter\n" ); |
3704 | |
3705 | if (dst_entries_init(dst: &ipv4_dst_blackhole_ops) < 0) |
3706 | panic(fmt: "IP: failed to allocate ipv4_dst_blackhole_ops counter\n" ); |
3707 | |
3708 | ipv4_dst_ops.gc_thresh = ~0; |
3709 | ip_rt_max_size = INT_MAX; |
3710 | |
3711 | devinet_init(); |
3712 | ip_fib_init(); |
3713 | |
3714 | if (ip_rt_proc_init()) |
3715 | pr_err("Unable to create route proc files\n" ); |
3716 | #ifdef CONFIG_XFRM |
3717 | xfrm_init(); |
3718 | xfrm4_init(); |
3719 | #endif |
3720 | rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, |
3721 | flags: RTNL_FLAG_DOIT_UNLOCKED); |
3722 | |
3723 | #ifdef CONFIG_SYSCTL |
3724 | register_pernet_subsys(&sysctl_route_ops); |
3725 | #endif |
3726 | register_pernet_subsys(&ip_rt_ops); |
3727 | register_pernet_subsys(&rt_genid_ops); |
3728 | register_pernet_subsys(&ipv4_inetpeer_ops); |
3729 | return 0; |
3730 | } |
3731 | |
3732 | #ifdef CONFIG_SYSCTL |
3733 | /* |
3734 | * We really need to sanitize the damn ipv4 init order, then all |
3735 | * this nonsense will go away. |
3736 | */ |
3737 | void __init ip_static_sysctl_init(void) |
3738 | { |
3739 | register_net_sysctl(&init_net, "net/ipv4/route" , ipv4_route_table); |
3740 | } |
3741 | #endif |
3742 | |