1/* SPDX-License-Identifier: GPL-2.0 */
2#include <linux/types.h>
3#include <linux/ip.h>
4#include <linux/netfilter.h>
5#include <linux/netfilter_ipv6.h>
6#include <linux/netfilter_bridge.h>
7#include <linux/module.h>
8#include <linux/skbuff.h>
9#include <linux/icmp.h>
10#include <linux/sysctl.h>
11#include <net/route.h>
12#include <net/ip.h>
13
14#include <net/netfilter/nf_conntrack.h>
15#include <net/netfilter/nf_conntrack_core.h>
16#include <net/netfilter/nf_conntrack_helper.h>
17#include <net/netfilter/nf_conntrack_bridge.h>
18
19#include <linux/netfilter/nf_tables.h>
20#include <net/netfilter/nf_tables.h>
21
22#include "../br_private.h"
23
24/* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff
25 * has been linearized or cloned.
26 */
27static int nf_br_ip_fragment(struct net *net, struct sock *sk,
28 struct sk_buff *skb,
29 struct nf_bridge_frag_data *data,
30 int (*output)(struct net *, struct sock *sk,
31 const struct nf_bridge_frag_data *data,
32 struct sk_buff *))
33{
34 int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
35 bool mono_delivery_time = skb->mono_delivery_time;
36 unsigned int hlen, ll_rs, mtu;
37 ktime_t tstamp = skb->tstamp;
38 struct ip_frag_state state;
39 struct iphdr *iph;
40 int err;
41
42 /* for offloaded checksums cleanup checksum before fragmentation */
43 if (skb->ip_summed == CHECKSUM_PARTIAL &&
44 (err = skb_checksum_help(skb)))
45 goto blackhole;
46
47 iph = ip_hdr(skb);
48
49 /*
50 * Setup starting values
51 */
52
53 hlen = iph->ihl * 4;
54 frag_max_size -= hlen;
55 ll_rs = LL_RESERVED_SPACE(skb->dev);
56 mtu = skb->dev->mtu;
57
58 if (skb_has_frag_list(skb)) {
59 unsigned int first_len = skb_pagelen(skb);
60 struct ip_fraglist_iter iter;
61 struct sk_buff *frag;
62
63 if (first_len - hlen > mtu ||
64 skb_headroom(skb) < ll_rs)
65 goto blackhole;
66
67 if (skb_cloned(skb))
68 goto slow_path;
69
70 skb_walk_frags(skb, frag) {
71 if (frag->len > mtu ||
72 skb_headroom(skb: frag) < hlen + ll_rs)
73 goto blackhole;
74
75 if (skb_shared(skb: frag))
76 goto slow_path;
77 }
78
79 ip_fraglist_init(skb, iph, hlen, iter: &iter);
80
81 for (;;) {
82 if (iter.frag)
83 ip_fraglist_prepare(skb, iter: &iter);
84
85 skb_set_delivery_time(skb, kt: tstamp, mono: mono_delivery_time);
86 err = output(net, sk, data, skb);
87 if (err || !iter.frag)
88 break;
89
90 skb = ip_fraglist_next(iter: &iter);
91 }
92
93 if (!err)
94 return 0;
95
96 kfree_skb_list(segs: iter.frag);
97
98 return err;
99 }
100slow_path:
101 /* This is a linearized skbuff, the original geometry is lost for us.
102 * This may also be a clone skbuff, we could preserve the geometry for
103 * the copies but probably not worth the effort.
104 */
105 ip_frag_init(skb, hlen, ll_rs, mtu: frag_max_size, DF: false, state: &state);
106
107 while (state.left > 0) {
108 struct sk_buff *skb2;
109
110 skb2 = ip_frag_next(skb, state: &state);
111 if (IS_ERR(ptr: skb2)) {
112 err = PTR_ERR(ptr: skb2);
113 goto blackhole;
114 }
115
116 skb_set_delivery_time(skb: skb2, kt: tstamp, mono: mono_delivery_time);
117 err = output(net, sk, data, skb2);
118 if (err)
119 goto blackhole;
120 }
121 consume_skb(skb);
122 return err;
123
124blackhole:
125 kfree_skb(skb);
126 return 0;
127}
128
129/* ip_defrag() expects IPCB() in place. */
130static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb,
131 size_t inet_skb_parm_size)
132{
133 memcpy(cb, skb->cb, sizeof(*cb));
134 memset(skb->cb, 0, inet_skb_parm_size);
135}
136
137static void br_skb_cb_restore(struct sk_buff *skb,
138 const struct br_input_skb_cb *cb,
139 u16 fragsz)
140{
141 memcpy(skb->cb, cb, sizeof(*cb));
142 BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz;
143}
144
145static unsigned int nf_ct_br_defrag4(struct sk_buff *skb,
146 const struct nf_hook_state *state)
147{
148 u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
149 enum ip_conntrack_info ctinfo;
150 struct br_input_skb_cb cb;
151 const struct nf_conn *ct;
152 int err;
153
154 if (!ip_is_fragment(iph: ip_hdr(skb)))
155 return NF_ACCEPT;
156
157 ct = nf_ct_get(skb, ctinfo: &ctinfo);
158 if (ct)
159 zone_id = nf_ct_zone_id(zone: nf_ct_zone(ct), CTINFO2DIR(ctinfo));
160
161 br_skb_cb_save(skb, cb: &cb, inet_skb_parm_size: sizeof(struct inet_skb_parm));
162 local_bh_disable();
163 err = ip_defrag(net: state->net, skb,
164 user: IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
165 local_bh_enable();
166 if (!err) {
167 br_skb_cb_restore(skb, cb: &cb, IPCB(skb)->frag_max_size);
168 skb->ignore_df = 1;
169 return NF_ACCEPT;
170 }
171
172 return NF_STOLEN;
173}
174
175static unsigned int nf_ct_br_defrag6(struct sk_buff *skb,
176 const struct nf_hook_state *state)
177{
178#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
179 u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
180 enum ip_conntrack_info ctinfo;
181 struct br_input_skb_cb cb;
182 const struct nf_conn *ct;
183 int err;
184
185 ct = nf_ct_get(skb, ctinfo: &ctinfo);
186 if (ct)
187 zone_id = nf_ct_zone_id(zone: nf_ct_zone(ct), CTINFO2DIR(ctinfo));
188
189 br_skb_cb_save(skb, cb: &cb, inet_skb_parm_size: sizeof(struct inet6_skb_parm));
190
191 err = nf_ct_frag6_gather(net: state->net, skb,
192 user: IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
193 /* queued */
194 if (err == -EINPROGRESS)
195 return NF_STOLEN;
196
197 br_skb_cb_restore(skb, cb: &cb, IP6CB(skb)->frag_max_size);
198 return err == 0 ? NF_ACCEPT : NF_DROP;
199#else
200 return NF_ACCEPT;
201#endif
202}
203
204static int nf_ct_br_ip_check(const struct sk_buff *skb)
205{
206 const struct iphdr *iph;
207 int nhoff, len;
208
209 nhoff = skb_network_offset(skb);
210 iph = ip_hdr(skb);
211 if (iph->ihl < 5 ||
212 iph->version != 4)
213 return -1;
214
215 len = skb_ip_totlen(skb);
216 if (skb->len < nhoff + len ||
217 len < (iph->ihl * 4))
218 return -1;
219
220 return 0;
221}
222
223static int nf_ct_br_ipv6_check(const struct sk_buff *skb)
224{
225 const struct ipv6hdr *hdr;
226 int nhoff, len;
227
228 nhoff = skb_network_offset(skb);
229 hdr = ipv6_hdr(skb);
230 if (hdr->version != 6)
231 return -1;
232
233 len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff;
234 if (skb->len < len)
235 return -1;
236
237 return 0;
238}
239
240static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
241 const struct nf_hook_state *state)
242{
243 struct nf_hook_state bridge_state = *state;
244 enum ip_conntrack_info ctinfo;
245 struct nf_conn *ct;
246 u32 len;
247 int ret;
248
249 ct = nf_ct_get(skb, ctinfo: &ctinfo);
250 if ((ct && !nf_ct_is_template(ct)) ||
251 ctinfo == IP_CT_UNTRACKED)
252 return NF_ACCEPT;
253
254 switch (skb->protocol) {
255 case htons(ETH_P_IP):
256 if (!pskb_may_pull(skb, len: sizeof(struct iphdr)))
257 return NF_ACCEPT;
258
259 len = skb_ip_totlen(skb);
260 if (pskb_trim_rcsum(skb, len))
261 return NF_ACCEPT;
262
263 if (nf_ct_br_ip_check(skb))
264 return NF_ACCEPT;
265
266 bridge_state.pf = NFPROTO_IPV4;
267 ret = nf_ct_br_defrag4(skb, state: &bridge_state);
268 break;
269 case htons(ETH_P_IPV6):
270 if (!pskb_may_pull(skb, len: sizeof(struct ipv6hdr)))
271 return NF_ACCEPT;
272
273 len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
274 if (pskb_trim_rcsum(skb, len))
275 return NF_ACCEPT;
276
277 if (nf_ct_br_ipv6_check(skb))
278 return NF_ACCEPT;
279
280 bridge_state.pf = NFPROTO_IPV6;
281 ret = nf_ct_br_defrag6(skb, state: &bridge_state);
282 break;
283 default:
284 nf_ct_set(skb, NULL, info: IP_CT_UNTRACKED);
285 return NF_ACCEPT;
286 }
287
288 if (ret != NF_ACCEPT)
289 return ret;
290
291 return nf_conntrack_in(skb, state: &bridge_state);
292}
293
294static void nf_ct_bridge_frag_save(struct sk_buff *skb,
295 struct nf_bridge_frag_data *data)
296{
297 if (skb_vlan_tag_present(skb)) {
298 data->vlan_present = true;
299 data->vlan_tci = skb->vlan_tci;
300 data->vlan_proto = skb->vlan_proto;
301 } else {
302 data->vlan_present = false;
303 }
304 skb_copy_from_linear_data_offset(skb, offset: -ETH_HLEN, to: data->mac, ETH_HLEN);
305}
306
307static unsigned int
308nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
309 int (*output)(struct net *, struct sock *sk,
310 const struct nf_bridge_frag_data *data,
311 struct sk_buff *))
312{
313 struct nf_bridge_frag_data data;
314
315 if (!BR_INPUT_SKB_CB(skb)->frag_max_size)
316 return NF_ACCEPT;
317
318 nf_ct_bridge_frag_save(skb, data: &data);
319 switch (skb->protocol) {
320 case htons(ETH_P_IP):
321 nf_br_ip_fragment(net: state->net, sk: state->sk, skb, data: &data, output);
322 break;
323 case htons(ETH_P_IPV6):
324 nf_br_ip6_fragment(net: state->net, sk: state->sk, skb, data: &data, output);
325 break;
326 default:
327 WARN_ON_ONCE(1);
328 return NF_DROP;
329 }
330
331 return NF_STOLEN;
332}
333
334/* Actually only slow path refragmentation needs this. */
335static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
336 const struct nf_bridge_frag_data *data)
337{
338 int err;
339
340 err = skb_cow_head(skb, ETH_HLEN);
341 if (err) {
342 kfree_skb(skb);
343 return -ENOMEM;
344 }
345 if (data->vlan_present)
346 __vlan_hwaccel_put_tag(skb, vlan_proto: data->vlan_proto, vlan_tci: data->vlan_tci);
347 else if (skb_vlan_tag_present(skb))
348 __vlan_hwaccel_clear_tag(skb);
349
350 skb_copy_to_linear_data_offset(skb, offset: -ETH_HLEN, from: data->mac, ETH_HLEN);
351 skb_reset_mac_header(skb);
352
353 return 0;
354}
355
356static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk,
357 const struct nf_bridge_frag_data *data,
358 struct sk_buff *skb)
359{
360 int err;
361
362 err = nf_ct_bridge_frag_restore(skb, data);
363 if (err < 0)
364 return err;
365
366 return br_dev_queue_push_xmit(net, sk, skb);
367}
368
369static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb,
370 const struct nf_hook_state *state)
371{
372 int ret;
373
374 ret = nf_confirm(priv, skb, state);
375 if (ret != NF_ACCEPT)
376 return ret;
377
378 return nf_ct_bridge_refrag(skb, state, output: nf_ct_bridge_refrag_post);
379}
380
381static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
382 {
383 .hook = nf_ct_bridge_pre,
384 .pf = NFPROTO_BRIDGE,
385 .hooknum = NF_BR_PRE_ROUTING,
386 .priority = NF_IP_PRI_CONNTRACK,
387 },
388 {
389 .hook = nf_ct_bridge_post,
390 .pf = NFPROTO_BRIDGE,
391 .hooknum = NF_BR_POST_ROUTING,
392 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
393 },
394};
395
396static struct nf_ct_bridge_info bridge_info = {
397 .ops = nf_ct_bridge_hook_ops,
398 .ops_size = ARRAY_SIZE(nf_ct_bridge_hook_ops),
399 .me = THIS_MODULE,
400};
401
402static int __init nf_conntrack_l3proto_bridge_init(void)
403{
404 nf_ct_bridge_register(info: &bridge_info);
405
406 return 0;
407}
408
409static void __exit nf_conntrack_l3proto_bridge_fini(void)
410{
411 nf_ct_bridge_unregister(info: &bridge_info);
412}
413
414module_init(nf_conntrack_l3proto_bridge_init);
415module_exit(nf_conntrack_l3proto_bridge_fini);
416
417MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
418MODULE_LICENSE("GPL");
419

source code of linux/net/bridge/netfilter/nf_conntrack_bridge.c