1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #include <linux/types.h> |
3 | #include <linux/ip.h> |
4 | #include <linux/netfilter.h> |
5 | #include <linux/netfilter_ipv6.h> |
6 | #include <linux/netfilter_bridge.h> |
7 | #include <linux/module.h> |
8 | #include <linux/skbuff.h> |
9 | #include <linux/icmp.h> |
10 | #include <linux/sysctl.h> |
11 | #include <net/route.h> |
12 | #include <net/ip.h> |
13 | |
14 | #include <net/netfilter/nf_conntrack.h> |
15 | #include <net/netfilter/nf_conntrack_core.h> |
16 | #include <net/netfilter/nf_conntrack_helper.h> |
17 | #include <net/netfilter/nf_conntrack_bridge.h> |
18 | |
19 | #include <linux/netfilter/nf_tables.h> |
20 | #include <net/netfilter/nf_tables.h> |
21 | |
22 | #include "../br_private.h" |
23 | |
24 | /* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff |
25 | * has been linearized or cloned. |
26 | */ |
27 | static int nf_br_ip_fragment(struct net *net, struct sock *sk, |
28 | struct sk_buff *skb, |
29 | struct nf_bridge_frag_data *data, |
30 | int (*output)(struct net *, struct sock *sk, |
31 | const struct nf_bridge_frag_data *data, |
32 | struct sk_buff *)) |
33 | { |
34 | int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; |
35 | bool mono_delivery_time = skb->mono_delivery_time; |
36 | unsigned int hlen, ll_rs, mtu; |
37 | ktime_t tstamp = skb->tstamp; |
38 | struct ip_frag_state state; |
39 | struct iphdr *iph; |
40 | int err; |
41 | |
42 | /* for offloaded checksums cleanup checksum before fragmentation */ |
43 | if (skb->ip_summed == CHECKSUM_PARTIAL && |
44 | (err = skb_checksum_help(skb))) |
45 | goto blackhole; |
46 | |
47 | iph = ip_hdr(skb); |
48 | |
49 | /* |
50 | * Setup starting values |
51 | */ |
52 | |
53 | hlen = iph->ihl * 4; |
54 | frag_max_size -= hlen; |
55 | ll_rs = LL_RESERVED_SPACE(skb->dev); |
56 | mtu = skb->dev->mtu; |
57 | |
58 | if (skb_has_frag_list(skb)) { |
59 | unsigned int first_len = skb_pagelen(skb); |
60 | struct ip_fraglist_iter iter; |
61 | struct sk_buff *frag; |
62 | |
63 | if (first_len - hlen > mtu || |
64 | skb_headroom(skb) < ll_rs) |
65 | goto blackhole; |
66 | |
67 | if (skb_cloned(skb)) |
68 | goto slow_path; |
69 | |
70 | skb_walk_frags(skb, frag) { |
71 | if (frag->len > mtu || |
72 | skb_headroom(skb: frag) < hlen + ll_rs) |
73 | goto blackhole; |
74 | |
75 | if (skb_shared(skb: frag)) |
76 | goto slow_path; |
77 | } |
78 | |
79 | ip_fraglist_init(skb, iph, hlen, iter: &iter); |
80 | |
81 | for (;;) { |
82 | if (iter.frag) |
83 | ip_fraglist_prepare(skb, iter: &iter); |
84 | |
85 | skb_set_delivery_time(skb, kt: tstamp, mono: mono_delivery_time); |
86 | err = output(net, sk, data, skb); |
87 | if (err || !iter.frag) |
88 | break; |
89 | |
90 | skb = ip_fraglist_next(iter: &iter); |
91 | } |
92 | |
93 | if (!err) |
94 | return 0; |
95 | |
96 | kfree_skb_list(segs: iter.frag); |
97 | |
98 | return err; |
99 | } |
100 | slow_path: |
101 | /* This is a linearized skbuff, the original geometry is lost for us. |
102 | * This may also be a clone skbuff, we could preserve the geometry for |
103 | * the copies but probably not worth the effort. |
104 | */ |
105 | ip_frag_init(skb, hlen, ll_rs, mtu: frag_max_size, DF: false, state: &state); |
106 | |
107 | while (state.left > 0) { |
108 | struct sk_buff *skb2; |
109 | |
110 | skb2 = ip_frag_next(skb, state: &state); |
111 | if (IS_ERR(ptr: skb2)) { |
112 | err = PTR_ERR(ptr: skb2); |
113 | goto blackhole; |
114 | } |
115 | |
116 | skb_set_delivery_time(skb: skb2, kt: tstamp, mono: mono_delivery_time); |
117 | err = output(net, sk, data, skb2); |
118 | if (err) |
119 | goto blackhole; |
120 | } |
121 | consume_skb(skb); |
122 | return err; |
123 | |
124 | blackhole: |
125 | kfree_skb(skb); |
126 | return 0; |
127 | } |
128 | |
129 | /* ip_defrag() expects IPCB() in place. */ |
130 | static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb, |
131 | size_t inet_skb_parm_size) |
132 | { |
133 | memcpy(cb, skb->cb, sizeof(*cb)); |
134 | memset(skb->cb, 0, inet_skb_parm_size); |
135 | } |
136 | |
137 | static void br_skb_cb_restore(struct sk_buff *skb, |
138 | const struct br_input_skb_cb *cb, |
139 | u16 fragsz) |
140 | { |
141 | memcpy(skb->cb, cb, sizeof(*cb)); |
142 | BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz; |
143 | } |
144 | |
145 | static unsigned int nf_ct_br_defrag4(struct sk_buff *skb, |
146 | const struct nf_hook_state *state) |
147 | { |
148 | u16 zone_id = NF_CT_DEFAULT_ZONE_ID; |
149 | enum ip_conntrack_info ctinfo; |
150 | struct br_input_skb_cb cb; |
151 | const struct nf_conn *ct; |
152 | int err; |
153 | |
154 | if (!ip_is_fragment(iph: ip_hdr(skb))) |
155 | return NF_ACCEPT; |
156 | |
157 | ct = nf_ct_get(skb, ctinfo: &ctinfo); |
158 | if (ct) |
159 | zone_id = nf_ct_zone_id(zone: nf_ct_zone(ct), CTINFO2DIR(ctinfo)); |
160 | |
161 | br_skb_cb_save(skb, cb: &cb, inet_skb_parm_size: sizeof(struct inet_skb_parm)); |
162 | local_bh_disable(); |
163 | err = ip_defrag(net: state->net, skb, |
164 | user: IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); |
165 | local_bh_enable(); |
166 | if (!err) { |
167 | br_skb_cb_restore(skb, cb: &cb, IPCB(skb)->frag_max_size); |
168 | skb->ignore_df = 1; |
169 | return NF_ACCEPT; |
170 | } |
171 | |
172 | return NF_STOLEN; |
173 | } |
174 | |
175 | static unsigned int nf_ct_br_defrag6(struct sk_buff *skb, |
176 | const struct nf_hook_state *state) |
177 | { |
178 | #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) |
179 | u16 zone_id = NF_CT_DEFAULT_ZONE_ID; |
180 | enum ip_conntrack_info ctinfo; |
181 | struct br_input_skb_cb cb; |
182 | const struct nf_conn *ct; |
183 | int err; |
184 | |
185 | ct = nf_ct_get(skb, ctinfo: &ctinfo); |
186 | if (ct) |
187 | zone_id = nf_ct_zone_id(zone: nf_ct_zone(ct), CTINFO2DIR(ctinfo)); |
188 | |
189 | br_skb_cb_save(skb, cb: &cb, inet_skb_parm_size: sizeof(struct inet6_skb_parm)); |
190 | |
191 | err = nf_ct_frag6_gather(net: state->net, skb, |
192 | user: IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); |
193 | /* queued */ |
194 | if (err == -EINPROGRESS) |
195 | return NF_STOLEN; |
196 | |
197 | br_skb_cb_restore(skb, cb: &cb, IP6CB(skb)->frag_max_size); |
198 | return err == 0 ? NF_ACCEPT : NF_DROP; |
199 | #else |
200 | return NF_ACCEPT; |
201 | #endif |
202 | } |
203 | |
204 | static int nf_ct_br_ip_check(const struct sk_buff *skb) |
205 | { |
206 | const struct iphdr *iph; |
207 | int nhoff, len; |
208 | |
209 | nhoff = skb_network_offset(skb); |
210 | iph = ip_hdr(skb); |
211 | if (iph->ihl < 5 || |
212 | iph->version != 4) |
213 | return -1; |
214 | |
215 | len = skb_ip_totlen(skb); |
216 | if (skb->len < nhoff + len || |
217 | len < (iph->ihl * 4)) |
218 | return -1; |
219 | |
220 | return 0; |
221 | } |
222 | |
223 | static int nf_ct_br_ipv6_check(const struct sk_buff *skb) |
224 | { |
225 | const struct ipv6hdr *hdr; |
226 | int nhoff, len; |
227 | |
228 | nhoff = skb_network_offset(skb); |
229 | hdr = ipv6_hdr(skb); |
230 | if (hdr->version != 6) |
231 | return -1; |
232 | |
233 | len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff; |
234 | if (skb->len < len) |
235 | return -1; |
236 | |
237 | return 0; |
238 | } |
239 | |
240 | static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, |
241 | const struct nf_hook_state *state) |
242 | { |
243 | struct nf_hook_state bridge_state = *state; |
244 | enum ip_conntrack_info ctinfo; |
245 | struct nf_conn *ct; |
246 | u32 len; |
247 | int ret; |
248 | |
249 | ct = nf_ct_get(skb, ctinfo: &ctinfo); |
250 | if ((ct && !nf_ct_is_template(ct)) || |
251 | ctinfo == IP_CT_UNTRACKED) |
252 | return NF_ACCEPT; |
253 | |
254 | switch (skb->protocol) { |
255 | case htons(ETH_P_IP): |
256 | if (!pskb_may_pull(skb, len: sizeof(struct iphdr))) |
257 | return NF_ACCEPT; |
258 | |
259 | len = skb_ip_totlen(skb); |
260 | if (pskb_trim_rcsum(skb, len)) |
261 | return NF_ACCEPT; |
262 | |
263 | if (nf_ct_br_ip_check(skb)) |
264 | return NF_ACCEPT; |
265 | |
266 | bridge_state.pf = NFPROTO_IPV4; |
267 | ret = nf_ct_br_defrag4(skb, state: &bridge_state); |
268 | break; |
269 | case htons(ETH_P_IPV6): |
270 | if (!pskb_may_pull(skb, len: sizeof(struct ipv6hdr))) |
271 | return NF_ACCEPT; |
272 | |
273 | len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); |
274 | if (pskb_trim_rcsum(skb, len)) |
275 | return NF_ACCEPT; |
276 | |
277 | if (nf_ct_br_ipv6_check(skb)) |
278 | return NF_ACCEPT; |
279 | |
280 | bridge_state.pf = NFPROTO_IPV6; |
281 | ret = nf_ct_br_defrag6(skb, state: &bridge_state); |
282 | break; |
283 | default: |
284 | nf_ct_set(skb, NULL, info: IP_CT_UNTRACKED); |
285 | return NF_ACCEPT; |
286 | } |
287 | |
288 | if (ret != NF_ACCEPT) |
289 | return ret; |
290 | |
291 | return nf_conntrack_in(skb, state: &bridge_state); |
292 | } |
293 | |
294 | static void nf_ct_bridge_frag_save(struct sk_buff *skb, |
295 | struct nf_bridge_frag_data *data) |
296 | { |
297 | if (skb_vlan_tag_present(skb)) { |
298 | data->vlan_present = true; |
299 | data->vlan_tci = skb->vlan_tci; |
300 | data->vlan_proto = skb->vlan_proto; |
301 | } else { |
302 | data->vlan_present = false; |
303 | } |
304 | skb_copy_from_linear_data_offset(skb, offset: -ETH_HLEN, to: data->mac, ETH_HLEN); |
305 | } |
306 | |
307 | static unsigned int |
308 | nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state, |
309 | int (*output)(struct net *, struct sock *sk, |
310 | const struct nf_bridge_frag_data *data, |
311 | struct sk_buff *)) |
312 | { |
313 | struct nf_bridge_frag_data data; |
314 | |
315 | if (!BR_INPUT_SKB_CB(skb)->frag_max_size) |
316 | return NF_ACCEPT; |
317 | |
318 | nf_ct_bridge_frag_save(skb, data: &data); |
319 | switch (skb->protocol) { |
320 | case htons(ETH_P_IP): |
321 | nf_br_ip_fragment(net: state->net, sk: state->sk, skb, data: &data, output); |
322 | break; |
323 | case htons(ETH_P_IPV6): |
324 | nf_br_ip6_fragment(net: state->net, sk: state->sk, skb, data: &data, output); |
325 | break; |
326 | default: |
327 | WARN_ON_ONCE(1); |
328 | return NF_DROP; |
329 | } |
330 | |
331 | return NF_STOLEN; |
332 | } |
333 | |
334 | /* Actually only slow path refragmentation needs this. */ |
335 | static int nf_ct_bridge_frag_restore(struct sk_buff *skb, |
336 | const struct nf_bridge_frag_data *data) |
337 | { |
338 | int err; |
339 | |
340 | err = skb_cow_head(skb, ETH_HLEN); |
341 | if (err) { |
342 | kfree_skb(skb); |
343 | return -ENOMEM; |
344 | } |
345 | if (data->vlan_present) |
346 | __vlan_hwaccel_put_tag(skb, vlan_proto: data->vlan_proto, vlan_tci: data->vlan_tci); |
347 | else if (skb_vlan_tag_present(skb)) |
348 | __vlan_hwaccel_clear_tag(skb); |
349 | |
350 | skb_copy_to_linear_data_offset(skb, offset: -ETH_HLEN, from: data->mac, ETH_HLEN); |
351 | skb_reset_mac_header(skb); |
352 | |
353 | return 0; |
354 | } |
355 | |
356 | static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk, |
357 | const struct nf_bridge_frag_data *data, |
358 | struct sk_buff *skb) |
359 | { |
360 | int err; |
361 | |
362 | err = nf_ct_bridge_frag_restore(skb, data); |
363 | if (err < 0) |
364 | return err; |
365 | |
366 | return br_dev_queue_push_xmit(net, sk, skb); |
367 | } |
368 | |
369 | static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb, |
370 | const struct nf_hook_state *state) |
371 | { |
372 | int ret; |
373 | |
374 | ret = nf_confirm(priv, skb, state); |
375 | if (ret != NF_ACCEPT) |
376 | return ret; |
377 | |
378 | return nf_ct_bridge_refrag(skb, state, output: nf_ct_bridge_refrag_post); |
379 | } |
380 | |
381 | static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { |
382 | { |
383 | .hook = nf_ct_bridge_pre, |
384 | .pf = NFPROTO_BRIDGE, |
385 | .hooknum = NF_BR_PRE_ROUTING, |
386 | .priority = NF_IP_PRI_CONNTRACK, |
387 | }, |
388 | { |
389 | .hook = nf_ct_bridge_post, |
390 | .pf = NFPROTO_BRIDGE, |
391 | .hooknum = NF_BR_POST_ROUTING, |
392 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, |
393 | }, |
394 | }; |
395 | |
396 | static struct nf_ct_bridge_info bridge_info = { |
397 | .ops = nf_ct_bridge_hook_ops, |
398 | .ops_size = ARRAY_SIZE(nf_ct_bridge_hook_ops), |
399 | .me = THIS_MODULE, |
400 | }; |
401 | |
402 | static int __init nf_conntrack_l3proto_bridge_init(void) |
403 | { |
404 | nf_ct_bridge_register(info: &bridge_info); |
405 | |
406 | return 0; |
407 | } |
408 | |
409 | static void __exit nf_conntrack_l3proto_bridge_fini(void) |
410 | { |
411 | nf_ct_bridge_unregister(info: &bridge_info); |
412 | } |
413 | |
414 | module_init(nf_conntrack_l3proto_bridge_init); |
415 | module_exit(nf_conntrack_l3proto_bridge_fini); |
416 | |
417 | MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE)); |
418 | MODULE_LICENSE("GPL" ); |
419 | |