1 | /* |
2 | * Handle firewalling |
3 | * Linux ethernet bridge |
4 | * |
5 | * Authors: |
6 | * Lennert Buytenhek <buytenh@gnu.org> |
7 | * Bart De Schuymer <bdschuym@pandora.be> |
8 | * |
9 | * This program is free software; you can redistribute it and/or |
10 | * modify it under the terms of the GNU General Public License |
11 | * as published by the Free Software Foundation; either version |
12 | * 2 of the License, or (at your option) any later version. |
13 | * |
14 | * Lennert dedicates this file to Kerstin Wurdinger. |
15 | */ |
16 | |
17 | #include <linux/module.h> |
18 | #include <linux/kernel.h> |
19 | #include <linux/slab.h> |
20 | #include <linux/ip.h> |
21 | #include <linux/netdevice.h> |
22 | #include <linux/skbuff.h> |
23 | #include <linux/if_arp.h> |
24 | #include <linux/if_ether.h> |
25 | #include <linux/if_vlan.h> |
26 | #include <linux/if_pppox.h> |
27 | #include <linux/ppp_defs.h> |
28 | #include <linux/netfilter_bridge.h> |
29 | #include <uapi/linux/netfilter_bridge.h> |
30 | #include <linux/netfilter_ipv4.h> |
31 | #include <linux/netfilter_ipv6.h> |
32 | #include <linux/netfilter_arp.h> |
33 | #include <linux/in_route.h> |
34 | #include <linux/rculist.h> |
35 | #include <linux/inetdevice.h> |
36 | |
37 | #include <net/ip.h> |
38 | #include <net/ipv6.h> |
39 | #include <net/addrconf.h> |
40 | #include <net/route.h> |
41 | #include <net/netfilter/br_netfilter.h> |
42 | #include <net/netns/generic.h> |
43 | |
44 | #include <linux/uaccess.h> |
45 | #include "br_private.h" |
46 | #ifdef CONFIG_SYSCTL |
47 | #include <linux/sysctl.h> |
48 | #endif |
49 | |
50 | static unsigned int brnf_net_id __read_mostly; |
51 | |
52 | struct brnf_net { |
53 | bool enabled; |
54 | }; |
55 | |
56 | #ifdef CONFIG_SYSCTL |
57 | static struct ctl_table_header *; |
58 | static int brnf_call_iptables __read_mostly = 1; |
59 | static int brnf_call_ip6tables __read_mostly = 1; |
60 | static int brnf_call_arptables __read_mostly = 1; |
61 | static int brnf_filter_vlan_tagged __read_mostly; |
62 | static int brnf_filter_pppoe_tagged __read_mostly; |
63 | static int brnf_pass_vlan_indev __read_mostly; |
64 | #else |
65 | #define brnf_call_iptables 1 |
66 | #define brnf_call_ip6tables 1 |
67 | #define brnf_call_arptables 1 |
68 | #define brnf_filter_vlan_tagged 0 |
69 | #define brnf_filter_pppoe_tagged 0 |
70 | #define brnf_pass_vlan_indev 0 |
71 | #endif |
72 | |
73 | #define IS_IP(skb) \ |
74 | (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) |
75 | |
76 | #define IS_IPV6(skb) \ |
77 | (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) |
78 | |
79 | #define IS_ARP(skb) \ |
80 | (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) |
81 | |
82 | static inline __be16 vlan_proto(const struct sk_buff *skb) |
83 | { |
84 | if (skb_vlan_tag_present(skb)) |
85 | return skb->protocol; |
86 | else if (skb->protocol == htons(ETH_P_8021Q)) |
87 | return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; |
88 | else |
89 | return 0; |
90 | } |
91 | |
92 | #define IS_VLAN_IP(skb) \ |
93 | (vlan_proto(skb) == htons(ETH_P_IP) && \ |
94 | brnf_filter_vlan_tagged) |
95 | |
96 | #define IS_VLAN_IPV6(skb) \ |
97 | (vlan_proto(skb) == htons(ETH_P_IPV6) && \ |
98 | brnf_filter_vlan_tagged) |
99 | |
100 | #define IS_VLAN_ARP(skb) \ |
101 | (vlan_proto(skb) == htons(ETH_P_ARP) && \ |
102 | brnf_filter_vlan_tagged) |
103 | |
104 | static inline __be16 pppoe_proto(const struct sk_buff *skb) |
105 | { |
106 | return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + |
107 | sizeof(struct pppoe_hdr))); |
108 | } |
109 | |
110 | #define IS_PPPOE_IP(skb) \ |
111 | (skb->protocol == htons(ETH_P_PPP_SES) && \ |
112 | pppoe_proto(skb) == htons(PPP_IP) && \ |
113 | brnf_filter_pppoe_tagged) |
114 | |
115 | #define IS_PPPOE_IPV6(skb) \ |
116 | (skb->protocol == htons(ETH_P_PPP_SES) && \ |
117 | pppoe_proto(skb) == htons(PPP_IPV6) && \ |
118 | brnf_filter_pppoe_tagged) |
119 | |
120 | /* largest possible L2 header, see br_nf_dev_queue_xmit() */ |
121 | #define (PPPOE_SES_HLEN + ETH_HLEN) |
122 | |
123 | struct brnf_frag_data { |
124 | char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; |
125 | u8 encap_size; |
126 | u8 size; |
127 | u16 vlan_tci; |
128 | __be16 vlan_proto; |
129 | }; |
130 | |
131 | static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); |
132 | |
133 | static void nf_bridge_info_free(struct sk_buff *skb) |
134 | { |
135 | skb_ext_del(skb, SKB_EXT_BRIDGE_NF); |
136 | } |
137 | |
138 | static inline struct net_device *bridge_parent(const struct net_device *dev) |
139 | { |
140 | struct net_bridge_port *port; |
141 | |
142 | port = br_port_get_rcu(dev); |
143 | return port ? port->br->dev : NULL; |
144 | } |
145 | |
146 | static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) |
147 | { |
148 | return skb_ext_add(skb, SKB_EXT_BRIDGE_NF); |
149 | } |
150 | |
151 | unsigned int (const struct sk_buff *skb) |
152 | { |
153 | switch (skb->protocol) { |
154 | case __cpu_to_be16(ETH_P_8021Q): |
155 | return VLAN_HLEN; |
156 | case __cpu_to_be16(ETH_P_PPP_SES): |
157 | return PPPOE_SES_HLEN; |
158 | default: |
159 | return 0; |
160 | } |
161 | } |
162 | |
163 | static inline void (struct sk_buff *skb) |
164 | { |
165 | unsigned int len = nf_bridge_encap_header_len(skb); |
166 | |
167 | skb_pull(skb, len); |
168 | skb->network_header += len; |
169 | } |
170 | |
171 | static inline void (struct sk_buff *skb) |
172 | { |
173 | unsigned int len = nf_bridge_encap_header_len(skb); |
174 | |
175 | skb_pull_rcsum(skb, len); |
176 | skb->network_header += len; |
177 | } |
178 | |
179 | /* When handing a packet over to the IP layer |
180 | * check whether we have a skb that is in the |
181 | * expected format |
182 | */ |
183 | |
184 | static int br_validate_ipv4(struct net *net, struct sk_buff *skb) |
185 | { |
186 | const struct iphdr *iph; |
187 | u32 len; |
188 | |
189 | if (!pskb_may_pull(skb, sizeof(struct iphdr))) |
190 | goto inhdr_error; |
191 | |
192 | iph = ip_hdr(skb); |
193 | |
194 | /* Basic sanity checks */ |
195 | if (iph->ihl < 5 || iph->version != 4) |
196 | goto inhdr_error; |
197 | |
198 | if (!pskb_may_pull(skb, iph->ihl*4)) |
199 | goto inhdr_error; |
200 | |
201 | iph = ip_hdr(skb); |
202 | if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) |
203 | goto csum_error; |
204 | |
205 | len = ntohs(iph->tot_len); |
206 | if (skb->len < len) { |
207 | __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); |
208 | goto drop; |
209 | } else if (len < (iph->ihl*4)) |
210 | goto inhdr_error; |
211 | |
212 | if (pskb_trim_rcsum(skb, len)) { |
213 | __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); |
214 | goto drop; |
215 | } |
216 | |
217 | memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); |
218 | /* We should really parse IP options here but until |
219 | * somebody who actually uses IP options complains to |
220 | * us we'll just silently ignore the options because |
221 | * we're lazy! |
222 | */ |
223 | return 0; |
224 | |
225 | csum_error: |
226 | __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); |
227 | inhdr_error: |
228 | __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); |
229 | drop: |
230 | return -1; |
231 | } |
232 | |
233 | void nf_bridge_update_protocol(struct sk_buff *skb) |
234 | { |
235 | const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); |
236 | |
237 | switch (nf_bridge->orig_proto) { |
238 | case BRNF_PROTO_8021Q: |
239 | skb->protocol = htons(ETH_P_8021Q); |
240 | break; |
241 | case BRNF_PROTO_PPPOE: |
242 | skb->protocol = htons(ETH_P_PPP_SES); |
243 | break; |
244 | case BRNF_PROTO_UNCHANGED: |
245 | break; |
246 | } |
247 | } |
248 | |
249 | /* Obtain the correct destination MAC address, while preserving the original |
250 | * source MAC address. If we already know this address, we just copy it. If we |
251 | * don't, we use the neighbour framework to find out. In both cases, we make |
252 | * sure that br_handle_frame_finish() is called afterwards. |
253 | */ |
254 | int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb) |
255 | { |
256 | struct neighbour *neigh; |
257 | struct dst_entry *dst; |
258 | |
259 | skb->dev = bridge_parent(skb->dev); |
260 | if (!skb->dev) |
261 | goto free_skb; |
262 | dst = skb_dst(skb); |
263 | neigh = dst_neigh_lookup_skb(dst, skb); |
264 | if (neigh) { |
265 | struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); |
266 | int ret; |
267 | |
268 | if ((neigh->nud_state & NUD_CONNECTED) && neigh->hh.hh_len) { |
269 | neigh_hh_bridge(&neigh->hh, skb); |
270 | skb->dev = nf_bridge->physindev; |
271 | ret = br_handle_frame_finish(net, sk, skb); |
272 | } else { |
273 | /* the neighbour function below overwrites the complete |
274 | * MAC header, so we save the Ethernet source address and |
275 | * protocol number. |
276 | */ |
277 | skb_copy_from_linear_data_offset(skb, |
278 | -(ETH_HLEN-ETH_ALEN), |
279 | nf_bridge->neigh_header, |
280 | ETH_HLEN-ETH_ALEN); |
281 | /* tell br_dev_xmit to continue with forwarding */ |
282 | nf_bridge->bridged_dnat = 1; |
283 | /* FIXME Need to refragment */ |
284 | ret = neigh->output(neigh, skb); |
285 | } |
286 | neigh_release(neigh); |
287 | return ret; |
288 | } |
289 | free_skb: |
290 | kfree_skb(skb); |
291 | return 0; |
292 | } |
293 | |
294 | static inline bool |
295 | br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb, |
296 | const struct nf_bridge_info *nf_bridge) |
297 | { |
298 | return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; |
299 | } |
300 | |
301 | /* This requires some explaining. If DNAT has taken place, |
302 | * we will need to fix up the destination Ethernet address. |
303 | * This is also true when SNAT takes place (for the reply direction). |
304 | * |
305 | * There are two cases to consider: |
306 | * 1. The packet was DNAT'ed to a device in the same bridge |
307 | * port group as it was received on. We can still bridge |
308 | * the packet. |
309 | * 2. The packet was DNAT'ed to a different device, either |
310 | * a non-bridged device or another bridge port group. |
311 | * The packet will need to be routed. |
312 | * |
313 | * The correct way of distinguishing between these two cases is to |
314 | * call ip_route_input() and to look at skb->dst->dev, which is |
315 | * changed to the destination device if ip_route_input() succeeds. |
316 | * |
317 | * Let's first consider the case that ip_route_input() succeeds: |
318 | * |
319 | * If the output device equals the logical bridge device the packet |
320 | * came in on, we can consider this bridging. The corresponding MAC |
321 | * address will be obtained in br_nf_pre_routing_finish_bridge. |
322 | * Otherwise, the packet is considered to be routed and we just |
323 | * change the destination MAC address so that the packet will |
324 | * later be passed up to the IP stack to be routed. For a redirected |
325 | * packet, ip_route_input() will give back the localhost as output device, |
326 | * which differs from the bridge device. |
327 | * |
328 | * Let's now consider the case that ip_route_input() fails: |
329 | * |
330 | * This can be because the destination address is martian, in which case |
331 | * the packet will be dropped. |
332 | * If IP forwarding is disabled, ip_route_input() will fail, while |
333 | * ip_route_output_key() can return success. The source |
334 | * address for ip_route_output_key() is set to zero, so ip_route_output_key() |
335 | * thinks we're handling a locally generated packet and won't care |
336 | * if IP forwarding is enabled. If the output device equals the logical bridge |
337 | * device, we proceed as if ip_route_input() succeeded. If it differs from the |
338 | * logical bridge port or if ip_route_output_key() fails we drop the packet. |
339 | */ |
340 | static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb) |
341 | { |
342 | struct net_device *dev = skb->dev; |
343 | struct iphdr *iph = ip_hdr(skb); |
344 | struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); |
345 | struct rtable *rt; |
346 | int err; |
347 | |
348 | nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; |
349 | |
350 | if (nf_bridge->pkt_otherhost) { |
351 | skb->pkt_type = PACKET_OTHERHOST; |
352 | nf_bridge->pkt_otherhost = false; |
353 | } |
354 | nf_bridge->in_prerouting = 0; |
355 | if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { |
356 | if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { |
357 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
358 | |
359 | /* If err equals -EHOSTUNREACH the error is due to a |
360 | * martian destination or due to the fact that |
361 | * forwarding is disabled. For most martian packets, |
362 | * ip_route_output_key() will fail. It won't fail for 2 types of |
363 | * martian destinations: loopback destinations and destination |
364 | * 0.0.0.0. In both cases the packet will be dropped because the |
365 | * destination is the loopback device and not the bridge. */ |
366 | if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) |
367 | goto free_skb; |
368 | |
369 | rt = ip_route_output(net, iph->daddr, 0, |
370 | RT_TOS(iph->tos), 0); |
371 | if (!IS_ERR(rt)) { |
372 | /* - Bridged-and-DNAT'ed traffic doesn't |
373 | * require ip_forwarding. */ |
374 | if (rt->dst.dev == dev) { |
375 | skb_dst_set(skb, &rt->dst); |
376 | goto bridged_dnat; |
377 | } |
378 | ip_rt_put(rt); |
379 | } |
380 | free_skb: |
381 | kfree_skb(skb); |
382 | return 0; |
383 | } else { |
384 | if (skb_dst(skb)->dev == dev) { |
385 | bridged_dnat: |
386 | skb->dev = nf_bridge->physindev; |
387 | nf_bridge_update_protocol(skb); |
388 | nf_bridge_push_encap_header(skb); |
389 | br_nf_hook_thresh(NF_BR_PRE_ROUTING, |
390 | net, sk, skb, skb->dev, |
391 | NULL, |
392 | br_nf_pre_routing_finish_bridge); |
393 | return 0; |
394 | } |
395 | ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); |
396 | skb->pkt_type = PACKET_HOST; |
397 | } |
398 | } else { |
399 | rt = bridge_parent_rtable(nf_bridge->physindev); |
400 | if (!rt) { |
401 | kfree_skb(skb); |
402 | return 0; |
403 | } |
404 | skb_dst_set_noref(skb, &rt->dst); |
405 | } |
406 | |
407 | skb->dev = nf_bridge->physindev; |
408 | nf_bridge_update_protocol(skb); |
409 | nf_bridge_push_encap_header(skb); |
410 | br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, |
411 | br_handle_frame_finish); |
412 | return 0; |
413 | } |
414 | |
415 | static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev) |
416 | { |
417 | struct net_device *vlan, *br; |
418 | |
419 | br = bridge_parent(dev); |
420 | if (brnf_pass_vlan_indev == 0 || !skb_vlan_tag_present(skb)) |
421 | return br; |
422 | |
423 | vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, |
424 | skb_vlan_tag_get(skb) & VLAN_VID_MASK); |
425 | |
426 | return vlan ? vlan : br; |
427 | } |
428 | |
429 | /* Some common code for IPv4/IPv6 */ |
430 | struct net_device *setup_pre_routing(struct sk_buff *skb) |
431 | { |
432 | struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); |
433 | |
434 | if (skb->pkt_type == PACKET_OTHERHOST) { |
435 | skb->pkt_type = PACKET_HOST; |
436 | nf_bridge->pkt_otherhost = true; |
437 | } |
438 | |
439 | nf_bridge->in_prerouting = 1; |
440 | nf_bridge->physindev = skb->dev; |
441 | skb->dev = brnf_get_logical_dev(skb, skb->dev); |
442 | |
443 | if (skb->protocol == htons(ETH_P_8021Q)) |
444 | nf_bridge->orig_proto = BRNF_PROTO_8021Q; |
445 | else if (skb->protocol == htons(ETH_P_PPP_SES)) |
446 | nf_bridge->orig_proto = BRNF_PROTO_PPPOE; |
447 | |
448 | /* Must drop socket now because of tproxy. */ |
449 | skb_orphan(skb); |
450 | return skb->dev; |
451 | } |
452 | |
453 | /* Direct IPv6 traffic to br_nf_pre_routing_ipv6. |
454 | * Replicate the checks that IPv4 does on packet reception. |
455 | * Set skb->dev to the bridge device (i.e. parent of the |
456 | * receiving device) to make netfilter happy, the REDIRECT |
457 | * target in particular. Save the original destination IP |
458 | * address to be able to detect DNAT afterwards. */ |
459 | static unsigned int br_nf_pre_routing(void *priv, |
460 | struct sk_buff *skb, |
461 | const struct nf_hook_state *state) |
462 | { |
463 | struct nf_bridge_info *nf_bridge; |
464 | struct net_bridge_port *p; |
465 | struct net_bridge *br; |
466 | __u32 len = nf_bridge_encap_header_len(skb); |
467 | |
468 | if (unlikely(!pskb_may_pull(skb, len))) |
469 | return NF_DROP; |
470 | |
471 | p = br_port_get_rcu(state->in); |
472 | if (p == NULL) |
473 | return NF_DROP; |
474 | br = p->br; |
475 | |
476 | if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) { |
477 | if (!brnf_call_ip6tables && |
478 | !br_opt_get(br, BROPT_NF_CALL_IP6TABLES)) |
479 | return NF_ACCEPT; |
480 | |
481 | nf_bridge_pull_encap_header_rcsum(skb); |
482 | return br_nf_pre_routing_ipv6(priv, skb, state); |
483 | } |
484 | |
485 | if (!brnf_call_iptables && !br_opt_get(br, BROPT_NF_CALL_IPTABLES)) |
486 | return NF_ACCEPT; |
487 | |
488 | if (!IS_IP(skb) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb)) |
489 | return NF_ACCEPT; |
490 | |
491 | nf_bridge_pull_encap_header_rcsum(skb); |
492 | |
493 | if (br_validate_ipv4(state->net, skb)) |
494 | return NF_DROP; |
495 | |
496 | if (!nf_bridge_alloc(skb)) |
497 | return NF_DROP; |
498 | if (!setup_pre_routing(skb)) |
499 | return NF_DROP; |
500 | |
501 | nf_bridge = nf_bridge_info_get(skb); |
502 | nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; |
503 | |
504 | skb->protocol = htons(ETH_P_IP); |
505 | |
506 | NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb, |
507 | skb->dev, NULL, |
508 | br_nf_pre_routing_finish); |
509 | |
510 | return NF_STOLEN; |
511 | } |
512 | |
513 | |
514 | /* PF_BRIDGE/FORWARD *************************************************/ |
515 | static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) |
516 | { |
517 | struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); |
518 | struct net_device *in; |
519 | |
520 | if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { |
521 | |
522 | if (skb->protocol == htons(ETH_P_IP)) |
523 | nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; |
524 | |
525 | if (skb->protocol == htons(ETH_P_IPV6)) |
526 | nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; |
527 | |
528 | in = nf_bridge->physindev; |
529 | if (nf_bridge->pkt_otherhost) { |
530 | skb->pkt_type = PACKET_OTHERHOST; |
531 | nf_bridge->pkt_otherhost = false; |
532 | } |
533 | nf_bridge_update_protocol(skb); |
534 | } else { |
535 | in = *((struct net_device **)(skb->cb)); |
536 | } |
537 | nf_bridge_push_encap_header(skb); |
538 | |
539 | br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev, |
540 | br_forward_finish); |
541 | return 0; |
542 | } |
543 | |
544 | |
545 | /* This is the 'purely bridged' case. For IP, we pass the packet to |
546 | * netfilter with indev and outdev set to the bridge device, |
547 | * but we are still able to filter on the 'real' indev/outdev |
548 | * because of the physdev module. For ARP, indev and outdev are the |
549 | * bridge ports. */ |
550 | static unsigned int br_nf_forward_ip(void *priv, |
551 | struct sk_buff *skb, |
552 | const struct nf_hook_state *state) |
553 | { |
554 | struct nf_bridge_info *nf_bridge; |
555 | struct net_device *parent; |
556 | u_int8_t pf; |
557 | |
558 | nf_bridge = nf_bridge_info_get(skb); |
559 | if (!nf_bridge) |
560 | return NF_ACCEPT; |
561 | |
562 | /* Need exclusive nf_bridge_info since we might have multiple |
563 | * different physoutdevs. */ |
564 | if (!nf_bridge_unshare(skb)) |
565 | return NF_DROP; |
566 | |
567 | nf_bridge = nf_bridge_info_get(skb); |
568 | if (!nf_bridge) |
569 | return NF_DROP; |
570 | |
571 | parent = bridge_parent(state->out); |
572 | if (!parent) |
573 | return NF_DROP; |
574 | |
575 | if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) |
576 | pf = NFPROTO_IPV4; |
577 | else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) |
578 | pf = NFPROTO_IPV6; |
579 | else |
580 | return NF_ACCEPT; |
581 | |
582 | nf_bridge_pull_encap_header(skb); |
583 | |
584 | if (skb->pkt_type == PACKET_OTHERHOST) { |
585 | skb->pkt_type = PACKET_HOST; |
586 | nf_bridge->pkt_otherhost = true; |
587 | } |
588 | |
589 | if (pf == NFPROTO_IPV4) { |
590 | if (br_validate_ipv4(state->net, skb)) |
591 | return NF_DROP; |
592 | IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; |
593 | } |
594 | |
595 | if (pf == NFPROTO_IPV6) { |
596 | if (br_validate_ipv6(state->net, skb)) |
597 | return NF_DROP; |
598 | IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; |
599 | } |
600 | |
601 | nf_bridge->physoutdev = skb->dev; |
602 | if (pf == NFPROTO_IPV4) |
603 | skb->protocol = htons(ETH_P_IP); |
604 | else |
605 | skb->protocol = htons(ETH_P_IPV6); |
606 | |
607 | NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb, |
608 | brnf_get_logical_dev(skb, state->in), |
609 | parent, br_nf_forward_finish); |
610 | |
611 | return NF_STOLEN; |
612 | } |
613 | |
614 | static unsigned int br_nf_forward_arp(void *priv, |
615 | struct sk_buff *skb, |
616 | const struct nf_hook_state *state) |
617 | { |
618 | struct net_bridge_port *p; |
619 | struct net_bridge *br; |
620 | struct net_device **d = (struct net_device **)(skb->cb); |
621 | |
622 | p = br_port_get_rcu(state->out); |
623 | if (p == NULL) |
624 | return NF_ACCEPT; |
625 | br = p->br; |
626 | |
627 | if (!brnf_call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES)) |
628 | return NF_ACCEPT; |
629 | |
630 | if (!IS_ARP(skb)) { |
631 | if (!IS_VLAN_ARP(skb)) |
632 | return NF_ACCEPT; |
633 | nf_bridge_pull_encap_header(skb); |
634 | } |
635 | |
636 | if (arp_hdr(skb)->ar_pln != 4) { |
637 | if (IS_VLAN_ARP(skb)) |
638 | nf_bridge_push_encap_header(skb); |
639 | return NF_ACCEPT; |
640 | } |
641 | *d = state->in; |
642 | NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->net, state->sk, skb, |
643 | state->in, state->out, br_nf_forward_finish); |
644 | |
645 | return NF_STOLEN; |
646 | } |
647 | |
648 | static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) |
649 | { |
650 | struct brnf_frag_data *data; |
651 | int err; |
652 | |
653 | data = this_cpu_ptr(&brnf_frag_data_storage); |
654 | err = skb_cow_head(skb, data->size); |
655 | |
656 | if (err) { |
657 | kfree_skb(skb); |
658 | return 0; |
659 | } |
660 | |
661 | if (data->vlan_proto) |
662 | __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci); |
663 | |
664 | skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); |
665 | __skb_push(skb, data->encap_size); |
666 | |
667 | nf_bridge_info_free(skb); |
668 | return br_dev_queue_push_xmit(net, sk, skb); |
669 | } |
670 | |
671 | static int |
672 | br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, |
673 | int (*output)(struct net *, struct sock *, struct sk_buff *)) |
674 | { |
675 | unsigned int mtu = ip_skb_dst_mtu(sk, skb); |
676 | struct iphdr *iph = ip_hdr(skb); |
677 | |
678 | if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || |
679 | (IPCB(skb)->frag_max_size && |
680 | IPCB(skb)->frag_max_size > mtu))) { |
681 | IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); |
682 | kfree_skb(skb); |
683 | return -EMSGSIZE; |
684 | } |
685 | |
686 | return ip_do_fragment(net, sk, skb, output); |
687 | } |
688 | |
689 | static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) |
690 | { |
691 | const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); |
692 | |
693 | if (nf_bridge->orig_proto == BRNF_PROTO_PPPOE) |
694 | return PPPOE_SES_HLEN; |
695 | return 0; |
696 | } |
697 | |
698 | static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) |
699 | { |
700 | struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); |
701 | unsigned int mtu, mtu_reserved; |
702 | |
703 | mtu_reserved = nf_bridge_mtu_reduction(skb); |
704 | mtu = skb->dev->mtu; |
705 | |
706 | if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) |
707 | mtu = nf_bridge->frag_max_size; |
708 | |
709 | if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { |
710 | nf_bridge_info_free(skb); |
711 | return br_dev_queue_push_xmit(net, sk, skb); |
712 | } |
713 | |
714 | /* This is wrong! We should preserve the original fragment |
715 | * boundaries by preserving frag_list rather than refragmenting. |
716 | */ |
717 | if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) && |
718 | skb->protocol == htons(ETH_P_IP)) { |
719 | struct brnf_frag_data *data; |
720 | |
721 | if (br_validate_ipv4(net, skb)) |
722 | goto drop; |
723 | |
724 | IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; |
725 | |
726 | nf_bridge_update_protocol(skb); |
727 | |
728 | data = this_cpu_ptr(&brnf_frag_data_storage); |
729 | |
730 | if (skb_vlan_tag_present(skb)) { |
731 | data->vlan_tci = skb->vlan_tci; |
732 | data->vlan_proto = skb->vlan_proto; |
733 | } else { |
734 | data->vlan_proto = 0; |
735 | } |
736 | |
737 | data->encap_size = nf_bridge_encap_header_len(skb); |
738 | data->size = ETH_HLEN + data->encap_size; |
739 | |
740 | skb_copy_from_linear_data_offset(skb, -data->size, data->mac, |
741 | data->size); |
742 | |
743 | return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit); |
744 | } |
745 | if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && |
746 | skb->protocol == htons(ETH_P_IPV6)) { |
747 | const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); |
748 | struct brnf_frag_data *data; |
749 | |
750 | if (br_validate_ipv6(net, skb)) |
751 | goto drop; |
752 | |
753 | IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; |
754 | |
755 | nf_bridge_update_protocol(skb); |
756 | |
757 | data = this_cpu_ptr(&brnf_frag_data_storage); |
758 | data->encap_size = nf_bridge_encap_header_len(skb); |
759 | data->size = ETH_HLEN + data->encap_size; |
760 | |
761 | skb_copy_from_linear_data_offset(skb, -data->size, data->mac, |
762 | data->size); |
763 | |
764 | if (v6ops) |
765 | return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit); |
766 | |
767 | kfree_skb(skb); |
768 | return -EMSGSIZE; |
769 | } |
770 | nf_bridge_info_free(skb); |
771 | return br_dev_queue_push_xmit(net, sk, skb); |
772 | drop: |
773 | kfree_skb(skb); |
774 | return 0; |
775 | } |
776 | |
777 | /* PF_BRIDGE/POST_ROUTING ********************************************/ |
778 | static unsigned int br_nf_post_routing(void *priv, |
779 | struct sk_buff *skb, |
780 | const struct nf_hook_state *state) |
781 | { |
782 | struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); |
783 | struct net_device *realoutdev = bridge_parent(skb->dev); |
784 | u_int8_t pf; |
785 | |
786 | /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in |
787 | * on a bridge, but was delivered locally and is now being routed: |
788 | * |
789 | * POST_ROUTING was already invoked from the ip stack. |
790 | */ |
791 | if (!nf_bridge || !nf_bridge->physoutdev) |
792 | return NF_ACCEPT; |
793 | |
794 | if (!realoutdev) |
795 | return NF_DROP; |
796 | |
797 | if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) |
798 | pf = NFPROTO_IPV4; |
799 | else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) |
800 | pf = NFPROTO_IPV6; |
801 | else |
802 | return NF_ACCEPT; |
803 | |
804 | /* We assume any code from br_dev_queue_push_xmit onwards doesn't care |
805 | * about the value of skb->pkt_type. */ |
806 | if (skb->pkt_type == PACKET_OTHERHOST) { |
807 | skb->pkt_type = PACKET_HOST; |
808 | nf_bridge->pkt_otherhost = true; |
809 | } |
810 | |
811 | nf_bridge_pull_encap_header(skb); |
812 | if (pf == NFPROTO_IPV4) |
813 | skb->protocol = htons(ETH_P_IP); |
814 | else |
815 | skb->protocol = htons(ETH_P_IPV6); |
816 | |
817 | NF_HOOK(pf, NF_INET_POST_ROUTING, state->net, state->sk, skb, |
818 | NULL, realoutdev, |
819 | br_nf_dev_queue_xmit); |
820 | |
821 | return NF_STOLEN; |
822 | } |
823 | |
824 | /* IP/SABOTAGE *****************************************************/ |
825 | /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING |
826 | * for the second time. */ |
827 | static unsigned int ip_sabotage_in(void *priv, |
828 | struct sk_buff *skb, |
829 | const struct nf_hook_state *state) |
830 | { |
831 | struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); |
832 | |
833 | if (nf_bridge && !nf_bridge->in_prerouting && |
834 | !netif_is_l3_master(skb->dev) && |
835 | !netif_is_l3_slave(skb->dev)) { |
836 | state->okfn(state->net, state->sk, skb); |
837 | return NF_STOLEN; |
838 | } |
839 | |
840 | return NF_ACCEPT; |
841 | } |
842 | |
843 | /* This is called when br_netfilter has called into iptables/netfilter, |
844 | * and DNAT has taken place on a bridge-forwarded packet. |
845 | * |
846 | * neigh->output has created a new MAC header, with local br0 MAC |
847 | * as saddr. |
848 | * |
849 | * This restores the original MAC saddr of the bridged packet |
850 | * before invoking bridge forward logic to transmit the packet. |
851 | */ |
852 | static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) |
853 | { |
854 | struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); |
855 | |
856 | skb_pull(skb, ETH_HLEN); |
857 | nf_bridge->bridged_dnat = 0; |
858 | |
859 | BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); |
860 | |
861 | skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), |
862 | nf_bridge->neigh_header, |
863 | ETH_HLEN - ETH_ALEN); |
864 | skb->dev = nf_bridge->physindev; |
865 | |
866 | nf_bridge->physoutdev = NULL; |
867 | br_handle_frame_finish(dev_net(skb->dev), NULL, skb); |
868 | } |
869 | |
870 | static int br_nf_dev_xmit(struct sk_buff *skb) |
871 | { |
872 | const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); |
873 | |
874 | if (nf_bridge && nf_bridge->bridged_dnat) { |
875 | br_nf_pre_routing_finish_bridge_slow(skb); |
876 | return 1; |
877 | } |
878 | return 0; |
879 | } |
880 | |
881 | static const struct nf_br_ops br_ops = { |
882 | .br_dev_xmit_hook = br_nf_dev_xmit, |
883 | }; |
884 | |
885 | /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because |
886 | * br_dev_queue_push_xmit is called afterwards */ |
887 | static const struct nf_hook_ops br_nf_ops[] = { |
888 | { |
889 | .hook = br_nf_pre_routing, |
890 | .pf = NFPROTO_BRIDGE, |
891 | .hooknum = NF_BR_PRE_ROUTING, |
892 | .priority = NF_BR_PRI_BRNF, |
893 | }, |
894 | { |
895 | .hook = br_nf_forward_ip, |
896 | .pf = NFPROTO_BRIDGE, |
897 | .hooknum = NF_BR_FORWARD, |
898 | .priority = NF_BR_PRI_BRNF - 1, |
899 | }, |
900 | { |
901 | .hook = br_nf_forward_arp, |
902 | .pf = NFPROTO_BRIDGE, |
903 | .hooknum = NF_BR_FORWARD, |
904 | .priority = NF_BR_PRI_BRNF, |
905 | }, |
906 | { |
907 | .hook = br_nf_post_routing, |
908 | .pf = NFPROTO_BRIDGE, |
909 | .hooknum = NF_BR_POST_ROUTING, |
910 | .priority = NF_BR_PRI_LAST, |
911 | }, |
912 | { |
913 | .hook = ip_sabotage_in, |
914 | .pf = NFPROTO_IPV4, |
915 | .hooknum = NF_INET_PRE_ROUTING, |
916 | .priority = NF_IP_PRI_FIRST, |
917 | }, |
918 | { |
919 | .hook = ip_sabotage_in, |
920 | .pf = NFPROTO_IPV6, |
921 | .hooknum = NF_INET_PRE_ROUTING, |
922 | .priority = NF_IP6_PRI_FIRST, |
923 | }, |
924 | }; |
925 | |
926 | static int brnf_device_event(struct notifier_block *unused, unsigned long event, |
927 | void *ptr) |
928 | { |
929 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); |
930 | struct brnf_net *brnet; |
931 | struct net *net; |
932 | int ret; |
933 | |
934 | if (event != NETDEV_REGISTER || !(dev->priv_flags & IFF_EBRIDGE)) |
935 | return NOTIFY_DONE; |
936 | |
937 | ASSERT_RTNL(); |
938 | |
939 | net = dev_net(dev); |
940 | brnet = net_generic(net, brnf_net_id); |
941 | if (brnet->enabled) |
942 | return NOTIFY_OK; |
943 | |
944 | ret = nf_register_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); |
945 | if (ret) |
946 | return NOTIFY_BAD; |
947 | |
948 | brnet->enabled = true; |
949 | return NOTIFY_OK; |
950 | } |
951 | |
952 | static void __net_exit brnf_exit_net(struct net *net) |
953 | { |
954 | struct brnf_net *brnet = net_generic(net, brnf_net_id); |
955 | |
956 | if (!brnet->enabled) |
957 | return; |
958 | |
959 | nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); |
960 | brnet->enabled = false; |
961 | } |
962 | |
963 | static struct pernet_operations brnf_net_ops __read_mostly = { |
964 | .exit = brnf_exit_net, |
965 | .id = &brnf_net_id, |
966 | .size = sizeof(struct brnf_net), |
967 | }; |
968 | |
969 | static struct notifier_block brnf_notifier __read_mostly = { |
970 | .notifier_call = brnf_device_event, |
971 | }; |
972 | |
973 | /* recursively invokes nf_hook_slow (again), skipping already-called |
974 | * hooks (< NF_BR_PRI_BRNF). |
975 | * |
976 | * Called with rcu read lock held. |
977 | */ |
978 | int br_nf_hook_thresh(unsigned int hook, struct net *net, |
979 | struct sock *sk, struct sk_buff *skb, |
980 | struct net_device *indev, |
981 | struct net_device *outdev, |
982 | int (*okfn)(struct net *, struct sock *, |
983 | struct sk_buff *)) |
984 | { |
985 | const struct nf_hook_entries *e; |
986 | struct nf_hook_state state; |
987 | struct nf_hook_ops **ops; |
988 | unsigned int i; |
989 | int ret; |
990 | |
991 | e = rcu_dereference(net->nf.hooks_bridge[hook]); |
992 | if (!e) |
993 | return okfn(net, sk, skb); |
994 | |
995 | ops = nf_hook_entries_get_hook_ops(e); |
996 | for (i = 0; i < e->num_hook_entries && |
997 | ops[i]->priority <= NF_BR_PRI_BRNF; i++) |
998 | ; |
999 | |
1000 | nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, |
1001 | sk, net, okfn); |
1002 | |
1003 | ret = nf_hook_slow(skb, &state, e, i); |
1004 | if (ret == 1) |
1005 | ret = okfn(net, sk, skb); |
1006 | |
1007 | return ret; |
1008 | } |
1009 | |
1010 | #ifdef CONFIG_SYSCTL |
1011 | static |
1012 | int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, |
1013 | void __user *buffer, size_t *lenp, loff_t *ppos) |
1014 | { |
1015 | int ret; |
1016 | |
1017 | ret = proc_dointvec(ctl, write, buffer, lenp, ppos); |
1018 | |
1019 | if (write && *(int *)(ctl->data)) |
1020 | *(int *)(ctl->data) = 1; |
1021 | return ret; |
1022 | } |
1023 | |
1024 | static struct ctl_table brnf_table[] = { |
1025 | { |
1026 | .procname = "bridge-nf-call-arptables" , |
1027 | .data = &brnf_call_arptables, |
1028 | .maxlen = sizeof(int), |
1029 | .mode = 0644, |
1030 | .proc_handler = brnf_sysctl_call_tables, |
1031 | }, |
1032 | { |
1033 | .procname = "bridge-nf-call-iptables" , |
1034 | .data = &brnf_call_iptables, |
1035 | .maxlen = sizeof(int), |
1036 | .mode = 0644, |
1037 | .proc_handler = brnf_sysctl_call_tables, |
1038 | }, |
1039 | { |
1040 | .procname = "bridge-nf-call-ip6tables" , |
1041 | .data = &brnf_call_ip6tables, |
1042 | .maxlen = sizeof(int), |
1043 | .mode = 0644, |
1044 | .proc_handler = brnf_sysctl_call_tables, |
1045 | }, |
1046 | { |
1047 | .procname = "bridge-nf-filter-vlan-tagged" , |
1048 | .data = &brnf_filter_vlan_tagged, |
1049 | .maxlen = sizeof(int), |
1050 | .mode = 0644, |
1051 | .proc_handler = brnf_sysctl_call_tables, |
1052 | }, |
1053 | { |
1054 | .procname = "bridge-nf-filter-pppoe-tagged" , |
1055 | .data = &brnf_filter_pppoe_tagged, |
1056 | .maxlen = sizeof(int), |
1057 | .mode = 0644, |
1058 | .proc_handler = brnf_sysctl_call_tables, |
1059 | }, |
1060 | { |
1061 | .procname = "bridge-nf-pass-vlan-input-dev" , |
1062 | .data = &brnf_pass_vlan_indev, |
1063 | .maxlen = sizeof(int), |
1064 | .mode = 0644, |
1065 | .proc_handler = brnf_sysctl_call_tables, |
1066 | }, |
1067 | { } |
1068 | }; |
1069 | #endif |
1070 | |
1071 | static int __init br_netfilter_init(void) |
1072 | { |
1073 | int ret; |
1074 | |
1075 | ret = register_pernet_subsys(&brnf_net_ops); |
1076 | if (ret < 0) |
1077 | return ret; |
1078 | |
1079 | ret = register_netdevice_notifier(&brnf_notifier); |
1080 | if (ret < 0) { |
1081 | unregister_pernet_subsys(&brnf_net_ops); |
1082 | return ret; |
1083 | } |
1084 | |
1085 | #ifdef CONFIG_SYSCTL |
1086 | brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge" , brnf_table); |
1087 | if (brnf_sysctl_header == NULL) { |
1088 | printk(KERN_WARNING |
1089 | "br_netfilter: can't register to sysctl.\n" ); |
1090 | unregister_netdevice_notifier(&brnf_notifier); |
1091 | unregister_pernet_subsys(&brnf_net_ops); |
1092 | return -ENOMEM; |
1093 | } |
1094 | #endif |
1095 | RCU_INIT_POINTER(nf_br_ops, &br_ops); |
1096 | printk(KERN_NOTICE "Bridge firewalling registered\n" ); |
1097 | return 0; |
1098 | } |
1099 | |
1100 | static void __exit br_netfilter_fini(void) |
1101 | { |
1102 | RCU_INIT_POINTER(nf_br_ops, NULL); |
1103 | unregister_netdevice_notifier(&brnf_notifier); |
1104 | unregister_pernet_subsys(&brnf_net_ops); |
1105 | #ifdef CONFIG_SYSCTL |
1106 | unregister_net_sysctl_table(brnf_sysctl_header); |
1107 | #endif |
1108 | } |
1109 | |
1110 | module_init(br_netfilter_init); |
1111 | module_exit(br_netfilter_fini); |
1112 | |
1113 | MODULE_LICENSE("GPL" ); |
1114 | MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>" ); |
1115 | MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>" ); |
1116 | MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge" ); |
1117 | |