1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch> |
3 | */ |
4 | |
5 | #include <linux/filter.h> |
6 | #include <linux/kernel.h> |
7 | #include <linux/module.h> |
8 | #include <linux/skbuff.h> |
9 | #include <linux/types.h> |
10 | #include <linux/bpf.h> |
11 | #include <net/lwtunnel.h> |
12 | #include <net/gre.h> |
13 | #include <net/ip6_route.h> |
14 | #include <net/ipv6_stubs.h> |
15 | |
16 | struct bpf_lwt_prog { |
17 | struct bpf_prog *prog; |
18 | char *name; |
19 | }; |
20 | |
21 | struct bpf_lwt { |
22 | struct bpf_lwt_prog in; |
23 | struct bpf_lwt_prog out; |
24 | struct bpf_lwt_prog xmit; |
25 | int family; |
26 | }; |
27 | |
28 | #define MAX_PROG_NAME 256 |
29 | |
30 | static inline struct bpf_lwt *bpf_lwt_lwtunnel(struct lwtunnel_state *lwt) |
31 | { |
32 | return (struct bpf_lwt *)lwt->data; |
33 | } |
34 | |
35 | #define NO_REDIRECT false |
36 | #define CAN_REDIRECT true |
37 | |
38 | static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, |
39 | struct dst_entry *dst, bool can_redirect) |
40 | { |
41 | int ret; |
42 | |
43 | /* Migration disable and BH disable are needed to protect per-cpu |
44 | * redirect_info between BPF prog and skb_do_redirect(). |
45 | */ |
46 | migrate_disable(); |
47 | local_bh_disable(); |
48 | bpf_compute_data_pointers(skb); |
49 | ret = bpf_prog_run_save_cb(prog: lwt->prog, skb); |
50 | |
51 | switch (ret) { |
52 | case BPF_OK: |
53 | case BPF_LWT_REROUTE: |
54 | break; |
55 | |
56 | case BPF_REDIRECT: |
57 | if (unlikely(!can_redirect)) { |
58 | pr_warn_once("Illegal redirect return code in prog %s\n" , |
59 | lwt->name ? : "<unknown>" ); |
60 | ret = BPF_OK; |
61 | } else { |
62 | skb_reset_mac_header(skb); |
63 | skb_do_redirect(skb); |
64 | ret = BPF_REDIRECT; |
65 | } |
66 | break; |
67 | |
68 | case BPF_DROP: |
69 | kfree_skb(skb); |
70 | ret = -EPERM; |
71 | break; |
72 | |
73 | default: |
74 | pr_warn_once("bpf-lwt: Illegal return value %u, expect packet loss\n" , ret); |
75 | kfree_skb(skb); |
76 | ret = -EINVAL; |
77 | break; |
78 | } |
79 | |
80 | local_bh_enable(); |
81 | migrate_enable(); |
82 | |
83 | return ret; |
84 | } |
85 | |
86 | static int bpf_lwt_input_reroute(struct sk_buff *skb) |
87 | { |
88 | int err = -EINVAL; |
89 | |
90 | if (skb->protocol == htons(ETH_P_IP)) { |
91 | struct net_device *dev = skb_dst(skb)->dev; |
92 | struct iphdr *iph = ip_hdr(skb); |
93 | |
94 | dev_hold(dev); |
95 | skb_dst_drop(skb); |
96 | err = ip_route_input_noref(skb, dst: iph->daddr, src: iph->saddr, |
97 | tos: iph->tos, devin: dev); |
98 | dev_put(dev); |
99 | } else if (skb->protocol == htons(ETH_P_IPV6)) { |
100 | skb_dst_drop(skb); |
101 | err = ipv6_stub->ipv6_route_input(skb); |
102 | } else { |
103 | err = -EAFNOSUPPORT; |
104 | } |
105 | |
106 | if (err) |
107 | goto err; |
108 | return dst_input(skb); |
109 | |
110 | err: |
111 | kfree_skb(skb); |
112 | return err; |
113 | } |
114 | |
115 | static int bpf_input(struct sk_buff *skb) |
116 | { |
117 | struct dst_entry *dst = skb_dst(skb); |
118 | struct bpf_lwt *bpf; |
119 | int ret; |
120 | |
121 | bpf = bpf_lwt_lwtunnel(lwt: dst->lwtstate); |
122 | if (bpf->in.prog) { |
123 | ret = run_lwt_bpf(skb, lwt: &bpf->in, dst, NO_REDIRECT); |
124 | if (ret < 0) |
125 | return ret; |
126 | if (ret == BPF_LWT_REROUTE) |
127 | return bpf_lwt_input_reroute(skb); |
128 | } |
129 | |
130 | if (unlikely(!dst->lwtstate->orig_input)) { |
131 | kfree_skb(skb); |
132 | return -EINVAL; |
133 | } |
134 | |
135 | return dst->lwtstate->orig_input(skb); |
136 | } |
137 | |
138 | static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb) |
139 | { |
140 | struct dst_entry *dst = skb_dst(skb); |
141 | struct bpf_lwt *bpf; |
142 | int ret; |
143 | |
144 | bpf = bpf_lwt_lwtunnel(lwt: dst->lwtstate); |
145 | if (bpf->out.prog) { |
146 | ret = run_lwt_bpf(skb, lwt: &bpf->out, dst, NO_REDIRECT); |
147 | if (ret < 0) |
148 | return ret; |
149 | } |
150 | |
151 | if (unlikely(!dst->lwtstate->orig_output)) { |
152 | pr_warn_once("orig_output not set on dst for prog %s\n" , |
153 | bpf->out.name); |
154 | kfree_skb(skb); |
155 | return -EINVAL; |
156 | } |
157 | |
158 | return dst->lwtstate->orig_output(net, sk, skb); |
159 | } |
160 | |
161 | static int xmit_check_hhlen(struct sk_buff *skb, int hh_len) |
162 | { |
163 | if (skb_headroom(skb) < hh_len) { |
164 | int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); |
165 | |
166 | if (pskb_expand_head(skb, nhead, ntail: 0, GFP_ATOMIC)) |
167 | return -ENOMEM; |
168 | } |
169 | |
170 | return 0; |
171 | } |
172 | |
173 | static int bpf_lwt_xmit_reroute(struct sk_buff *skb) |
174 | { |
175 | struct net_device *l3mdev = l3mdev_master_dev_rcu(dev: skb_dst(skb)->dev); |
176 | int oif = l3mdev ? l3mdev->ifindex : 0; |
177 | struct dst_entry *dst = NULL; |
178 | int err = -EAFNOSUPPORT; |
179 | struct sock *sk; |
180 | struct net *net; |
181 | bool ipv4; |
182 | |
183 | if (skb->protocol == htons(ETH_P_IP)) |
184 | ipv4 = true; |
185 | else if (skb->protocol == htons(ETH_P_IPV6)) |
186 | ipv4 = false; |
187 | else |
188 | goto err; |
189 | |
190 | sk = sk_to_full_sk(sk: skb->sk); |
191 | if (sk) { |
192 | if (sk->sk_bound_dev_if) |
193 | oif = sk->sk_bound_dev_if; |
194 | net = sock_net(sk); |
195 | } else { |
196 | net = dev_net(dev: skb_dst(skb)->dev); |
197 | } |
198 | |
199 | if (ipv4) { |
200 | struct iphdr *iph = ip_hdr(skb); |
201 | struct flowi4 fl4 = {}; |
202 | struct rtable *rt; |
203 | |
204 | fl4.flowi4_oif = oif; |
205 | fl4.flowi4_mark = skb->mark; |
206 | fl4.flowi4_uid = sock_net_uid(net, sk); |
207 | fl4.flowi4_tos = RT_TOS(iph->tos); |
208 | fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; |
209 | fl4.flowi4_proto = iph->protocol; |
210 | fl4.daddr = iph->daddr; |
211 | fl4.saddr = iph->saddr; |
212 | |
213 | rt = ip_route_output_key(net, flp: &fl4); |
214 | if (IS_ERR(ptr: rt)) { |
215 | err = PTR_ERR(ptr: rt); |
216 | goto err; |
217 | } |
218 | dst = &rt->dst; |
219 | } else { |
220 | struct ipv6hdr *iph6 = ipv6_hdr(skb); |
221 | struct flowi6 fl6 = {}; |
222 | |
223 | fl6.flowi6_oif = oif; |
224 | fl6.flowi6_mark = skb->mark; |
225 | fl6.flowi6_uid = sock_net_uid(net, sk); |
226 | fl6.flowlabel = ip6_flowinfo(hdr: iph6); |
227 | fl6.flowi6_proto = iph6->nexthdr; |
228 | fl6.daddr = iph6->daddr; |
229 | fl6.saddr = iph6->saddr; |
230 | |
231 | dst = ipv6_stub->ipv6_dst_lookup_flow(net, skb->sk, &fl6, NULL); |
232 | if (IS_ERR(ptr: dst)) { |
233 | err = PTR_ERR(ptr: dst); |
234 | goto err; |
235 | } |
236 | } |
237 | if (unlikely(dst->error)) { |
238 | err = dst->error; |
239 | dst_release(dst); |
240 | goto err; |
241 | } |
242 | |
243 | /* Although skb header was reserved in bpf_lwt_push_ip_encap(), it |
244 | * was done for the previous dst, so we are doing it here again, in |
245 | * case the new dst needs much more space. The call below is a noop |
246 | * if there is enough header space in skb. |
247 | */ |
248 | err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); |
249 | if (unlikely(err)) |
250 | goto err; |
251 | |
252 | skb_dst_drop(skb); |
253 | skb_dst_set(skb, dst); |
254 | |
255 | err = dst_output(net: dev_net(dev: skb_dst(skb)->dev), sk: skb->sk, skb); |
256 | if (unlikely(err)) |
257 | return net_xmit_errno(err); |
258 | |
259 | /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */ |
260 | return LWTUNNEL_XMIT_DONE; |
261 | |
262 | err: |
263 | kfree_skb(skb); |
264 | return err; |
265 | } |
266 | |
267 | static int bpf_xmit(struct sk_buff *skb) |
268 | { |
269 | struct dst_entry *dst = skb_dst(skb); |
270 | struct bpf_lwt *bpf; |
271 | |
272 | bpf = bpf_lwt_lwtunnel(lwt: dst->lwtstate); |
273 | if (bpf->xmit.prog) { |
274 | int hh_len = dst->dev->hard_header_len; |
275 | __be16 proto = skb->protocol; |
276 | int ret; |
277 | |
278 | ret = run_lwt_bpf(skb, lwt: &bpf->xmit, dst, CAN_REDIRECT); |
279 | switch (ret) { |
280 | case BPF_OK: |
281 | /* If the header changed, e.g. via bpf_lwt_push_encap, |
282 | * BPF_LWT_REROUTE below should have been used if the |
283 | * protocol was also changed. |
284 | */ |
285 | if (skb->protocol != proto) { |
286 | kfree_skb(skb); |
287 | return -EINVAL; |
288 | } |
289 | /* If the header was expanded, headroom might be too |
290 | * small for L2 header to come, expand as needed. |
291 | */ |
292 | ret = xmit_check_hhlen(skb, hh_len); |
293 | if (unlikely(ret)) |
294 | return ret; |
295 | |
296 | return LWTUNNEL_XMIT_CONTINUE; |
297 | case BPF_REDIRECT: |
298 | return LWTUNNEL_XMIT_DONE; |
299 | case BPF_LWT_REROUTE: |
300 | return bpf_lwt_xmit_reroute(skb); |
301 | default: |
302 | return ret; |
303 | } |
304 | } |
305 | |
306 | return LWTUNNEL_XMIT_CONTINUE; |
307 | } |
308 | |
309 | static void bpf_lwt_prog_destroy(struct bpf_lwt_prog *prog) |
310 | { |
311 | if (prog->prog) |
312 | bpf_prog_put(prog: prog->prog); |
313 | |
314 | kfree(objp: prog->name); |
315 | } |
316 | |
317 | static void bpf_destroy_state(struct lwtunnel_state *lwt) |
318 | { |
319 | struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt); |
320 | |
321 | bpf_lwt_prog_destroy(prog: &bpf->in); |
322 | bpf_lwt_prog_destroy(prog: &bpf->out); |
323 | bpf_lwt_prog_destroy(prog: &bpf->xmit); |
324 | } |
325 | |
326 | static const struct nla_policy bpf_prog_policy[LWT_BPF_PROG_MAX + 1] = { |
327 | [LWT_BPF_PROG_FD] = { .type = NLA_U32, }, |
328 | [LWT_BPF_PROG_NAME] = { .type = NLA_NUL_STRING, |
329 | .len = MAX_PROG_NAME }, |
330 | }; |
331 | |
332 | static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog, |
333 | enum bpf_prog_type type) |
334 | { |
335 | struct nlattr *tb[LWT_BPF_PROG_MAX + 1]; |
336 | struct bpf_prog *p; |
337 | int ret; |
338 | u32 fd; |
339 | |
340 | ret = nla_parse_nested_deprecated(tb, LWT_BPF_PROG_MAX, nla: attr, |
341 | policy: bpf_prog_policy, NULL); |
342 | if (ret < 0) |
343 | return ret; |
344 | |
345 | if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME]) |
346 | return -EINVAL; |
347 | |
348 | prog->name = nla_memdup(src: tb[LWT_BPF_PROG_NAME], GFP_ATOMIC); |
349 | if (!prog->name) |
350 | return -ENOMEM; |
351 | |
352 | fd = nla_get_u32(nla: tb[LWT_BPF_PROG_FD]); |
353 | p = bpf_prog_get_type(ufd: fd, type); |
354 | if (IS_ERR(ptr: p)) |
355 | return PTR_ERR(ptr: p); |
356 | |
357 | prog->prog = p; |
358 | |
359 | return 0; |
360 | } |
361 | |
362 | static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = { |
363 | [LWT_BPF_IN] = { .type = NLA_NESTED, }, |
364 | [LWT_BPF_OUT] = { .type = NLA_NESTED, }, |
365 | [LWT_BPF_XMIT] = { .type = NLA_NESTED, }, |
366 | [LWT_BPF_XMIT_HEADROOM] = { .type = NLA_U32 }, |
367 | }; |
368 | |
369 | static int bpf_build_state(struct net *net, struct nlattr *nla, |
370 | unsigned int family, const void *cfg, |
371 | struct lwtunnel_state **ts, |
372 | struct netlink_ext_ack *extack) |
373 | { |
374 | struct nlattr *tb[LWT_BPF_MAX + 1]; |
375 | struct lwtunnel_state *newts; |
376 | struct bpf_lwt *bpf; |
377 | int ret; |
378 | |
379 | if (family != AF_INET && family != AF_INET6) |
380 | return -EAFNOSUPPORT; |
381 | |
382 | ret = nla_parse_nested_deprecated(tb, LWT_BPF_MAX, nla, policy: bpf_nl_policy, |
383 | extack); |
384 | if (ret < 0) |
385 | return ret; |
386 | |
387 | if (!tb[LWT_BPF_IN] && !tb[LWT_BPF_OUT] && !tb[LWT_BPF_XMIT]) |
388 | return -EINVAL; |
389 | |
390 | newts = lwtunnel_state_alloc(hdr_len: sizeof(*bpf)); |
391 | if (!newts) |
392 | return -ENOMEM; |
393 | |
394 | newts->type = LWTUNNEL_ENCAP_BPF; |
395 | bpf = bpf_lwt_lwtunnel(lwt: newts); |
396 | |
397 | if (tb[LWT_BPF_IN]) { |
398 | newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; |
399 | ret = bpf_parse_prog(attr: tb[LWT_BPF_IN], prog: &bpf->in, |
400 | type: BPF_PROG_TYPE_LWT_IN); |
401 | if (ret < 0) |
402 | goto errout; |
403 | } |
404 | |
405 | if (tb[LWT_BPF_OUT]) { |
406 | newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; |
407 | ret = bpf_parse_prog(attr: tb[LWT_BPF_OUT], prog: &bpf->out, |
408 | type: BPF_PROG_TYPE_LWT_OUT); |
409 | if (ret < 0) |
410 | goto errout; |
411 | } |
412 | |
413 | if (tb[LWT_BPF_XMIT]) { |
414 | newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT; |
415 | ret = bpf_parse_prog(attr: tb[LWT_BPF_XMIT], prog: &bpf->xmit, |
416 | type: BPF_PROG_TYPE_LWT_XMIT); |
417 | if (ret < 0) |
418 | goto errout; |
419 | } |
420 | |
421 | if (tb[LWT_BPF_XMIT_HEADROOM]) { |
422 | u32 headroom = nla_get_u32(nla: tb[LWT_BPF_XMIT_HEADROOM]); |
423 | |
424 | if (headroom > LWT_BPF_MAX_HEADROOM) { |
425 | ret = -ERANGE; |
426 | goto errout; |
427 | } |
428 | |
429 | newts->headroom = headroom; |
430 | } |
431 | |
432 | bpf->family = family; |
433 | *ts = newts; |
434 | |
435 | return 0; |
436 | |
437 | errout: |
438 | bpf_destroy_state(lwt: newts); |
439 | kfree(objp: newts); |
440 | return ret; |
441 | } |
442 | |
443 | static int bpf_fill_lwt_prog(struct sk_buff *skb, int attr, |
444 | struct bpf_lwt_prog *prog) |
445 | { |
446 | struct nlattr *nest; |
447 | |
448 | if (!prog->prog) |
449 | return 0; |
450 | |
451 | nest = nla_nest_start_noflag(skb, attrtype: attr); |
452 | if (!nest) |
453 | return -EMSGSIZE; |
454 | |
455 | if (prog->name && |
456 | nla_put_string(skb, attrtype: LWT_BPF_PROG_NAME, str: prog->name)) |
457 | return -EMSGSIZE; |
458 | |
459 | return nla_nest_end(skb, start: nest); |
460 | } |
461 | |
462 | static int bpf_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwt) |
463 | { |
464 | struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt); |
465 | |
466 | if (bpf_fill_lwt_prog(skb, attr: LWT_BPF_IN, prog: &bpf->in) < 0 || |
467 | bpf_fill_lwt_prog(skb, attr: LWT_BPF_OUT, prog: &bpf->out) < 0 || |
468 | bpf_fill_lwt_prog(skb, attr: LWT_BPF_XMIT, prog: &bpf->xmit) < 0) |
469 | return -EMSGSIZE; |
470 | |
471 | return 0; |
472 | } |
473 | |
474 | static int bpf_encap_nlsize(struct lwtunnel_state *lwtstate) |
475 | { |
476 | int nest_len = nla_total_size(payload: sizeof(struct nlattr)) + |
477 | nla_total_size(MAX_PROG_NAME) + /* LWT_BPF_PROG_NAME */ |
478 | 0; |
479 | |
480 | return nest_len + /* LWT_BPF_IN */ |
481 | nest_len + /* LWT_BPF_OUT */ |
482 | nest_len + /* LWT_BPF_XMIT */ |
483 | 0; |
484 | } |
485 | |
486 | static int bpf_lwt_prog_cmp(struct bpf_lwt_prog *a, struct bpf_lwt_prog *b) |
487 | { |
488 | /* FIXME: |
489 | * The LWT state is currently rebuilt for delete requests which |
490 | * results in a new bpf_prog instance. Comparing names for now. |
491 | */ |
492 | if (!a->name && !b->name) |
493 | return 0; |
494 | |
495 | if (!a->name || !b->name) |
496 | return 1; |
497 | |
498 | return strcmp(a->name, b->name); |
499 | } |
500 | |
501 | static int bpf_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) |
502 | { |
503 | struct bpf_lwt *a_bpf = bpf_lwt_lwtunnel(lwt: a); |
504 | struct bpf_lwt *b_bpf = bpf_lwt_lwtunnel(lwt: b); |
505 | |
506 | return bpf_lwt_prog_cmp(a: &a_bpf->in, b: &b_bpf->in) || |
507 | bpf_lwt_prog_cmp(a: &a_bpf->out, b: &b_bpf->out) || |
508 | bpf_lwt_prog_cmp(a: &a_bpf->xmit, b: &b_bpf->xmit); |
509 | } |
510 | |
511 | static const struct lwtunnel_encap_ops bpf_encap_ops = { |
512 | .build_state = bpf_build_state, |
513 | .destroy_state = bpf_destroy_state, |
514 | .input = bpf_input, |
515 | .output = bpf_output, |
516 | .xmit = bpf_xmit, |
517 | .fill_encap = bpf_fill_encap_info, |
518 | .get_encap_size = bpf_encap_nlsize, |
519 | .cmp_encap = bpf_encap_cmp, |
520 | .owner = THIS_MODULE, |
521 | }; |
522 | |
523 | static int handle_gso_type(struct sk_buff *skb, unsigned int gso_type, |
524 | int encap_len) |
525 | { |
526 | struct skb_shared_info *shinfo = skb_shinfo(skb); |
527 | |
528 | gso_type |= SKB_GSO_DODGY; |
529 | shinfo->gso_type |= gso_type; |
530 | skb_decrease_gso_size(shinfo, decrement: encap_len); |
531 | shinfo->gso_segs = 0; |
532 | return 0; |
533 | } |
534 | |
535 | static int handle_gso_encap(struct sk_buff *skb, bool ipv4, int encap_len) |
536 | { |
537 | int next_hdr_offset; |
538 | void *next_hdr; |
539 | __u8 protocol; |
540 | |
541 | /* SCTP and UDP_L4 gso need more nuanced handling than what |
542 | * handle_gso_type() does above: skb_decrease_gso_size() is not enough. |
543 | * So at the moment only TCP GSO packets are let through. |
544 | */ |
545 | if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) |
546 | return -ENOTSUPP; |
547 | |
548 | if (ipv4) { |
549 | protocol = ip_hdr(skb)->protocol; |
550 | next_hdr_offset = sizeof(struct iphdr); |
551 | next_hdr = skb_network_header(skb) + next_hdr_offset; |
552 | } else { |
553 | protocol = ipv6_hdr(skb)->nexthdr; |
554 | next_hdr_offset = sizeof(struct ipv6hdr); |
555 | next_hdr = skb_network_header(skb) + next_hdr_offset; |
556 | } |
557 | |
558 | switch (protocol) { |
559 | case IPPROTO_GRE: |
560 | next_hdr_offset += sizeof(struct gre_base_hdr); |
561 | if (next_hdr_offset > encap_len) |
562 | return -EINVAL; |
563 | |
564 | if (((struct gre_base_hdr *)next_hdr)->flags & GRE_CSUM) |
565 | return handle_gso_type(skb, gso_type: SKB_GSO_GRE_CSUM, |
566 | encap_len); |
567 | return handle_gso_type(skb, gso_type: SKB_GSO_GRE, encap_len); |
568 | |
569 | case IPPROTO_UDP: |
570 | next_hdr_offset += sizeof(struct udphdr); |
571 | if (next_hdr_offset > encap_len) |
572 | return -EINVAL; |
573 | |
574 | if (((struct udphdr *)next_hdr)->check) |
575 | return handle_gso_type(skb, gso_type: SKB_GSO_UDP_TUNNEL_CSUM, |
576 | encap_len); |
577 | return handle_gso_type(skb, gso_type: SKB_GSO_UDP_TUNNEL, encap_len); |
578 | |
579 | case IPPROTO_IP: |
580 | case IPPROTO_IPV6: |
581 | if (ipv4) |
582 | return handle_gso_type(skb, gso_type: SKB_GSO_IPXIP4, encap_len); |
583 | else |
584 | return handle_gso_type(skb, gso_type: SKB_GSO_IPXIP6, encap_len); |
585 | |
586 | default: |
587 | return -EPROTONOSUPPORT; |
588 | } |
589 | } |
590 | |
591 | int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress) |
592 | { |
593 | struct iphdr *iph; |
594 | bool ipv4; |
595 | int err; |
596 | |
597 | if (unlikely(len < sizeof(struct iphdr) || len > LWT_BPF_MAX_HEADROOM)) |
598 | return -EINVAL; |
599 | |
600 | /* validate protocol and length */ |
601 | iph = (struct iphdr *)hdr; |
602 | if (iph->version == 4) { |
603 | ipv4 = true; |
604 | if (unlikely(len < iph->ihl * 4)) |
605 | return -EINVAL; |
606 | } else if (iph->version == 6) { |
607 | ipv4 = false; |
608 | if (unlikely(len < sizeof(struct ipv6hdr))) |
609 | return -EINVAL; |
610 | } else { |
611 | return -EINVAL; |
612 | } |
613 | |
614 | if (ingress) |
615 | err = skb_cow_head(skb, headroom: len + skb->mac_len); |
616 | else |
617 | err = skb_cow_head(skb, |
618 | headroom: len + LL_RESERVED_SPACE(skb_dst(skb)->dev)); |
619 | if (unlikely(err)) |
620 | return err; |
621 | |
622 | /* push the encap headers and fix pointers */ |
623 | skb_reset_inner_headers(skb); |
624 | skb_reset_inner_mac_header(skb); /* mac header is not yet set */ |
625 | skb_set_inner_protocol(skb, protocol: skb->protocol); |
626 | skb->encapsulation = 1; |
627 | skb_push(skb, len); |
628 | if (ingress) |
629 | skb_postpush_rcsum(skb, start: iph, len); |
630 | skb_reset_network_header(skb); |
631 | memcpy(skb_network_header(skb), hdr, len); |
632 | bpf_compute_data_pointers(skb); |
633 | skb_clear_hash(skb); |
634 | |
635 | if (ipv4) { |
636 | skb->protocol = htons(ETH_P_IP); |
637 | iph = ip_hdr(skb); |
638 | |
639 | if (!iph->check) |
640 | iph->check = ip_fast_csum(iph: (unsigned char *)iph, |
641 | ihl: iph->ihl); |
642 | } else { |
643 | skb->protocol = htons(ETH_P_IPV6); |
644 | } |
645 | |
646 | if (skb_is_gso(skb)) |
647 | return handle_gso_encap(skb, ipv4, encap_len: len); |
648 | |
649 | return 0; |
650 | } |
651 | |
652 | static int __init bpf_lwt_init(void) |
653 | { |
654 | return lwtunnel_encap_add_ops(op: &bpf_encap_ops, num: LWTUNNEL_ENCAP_BPF); |
655 | } |
656 | |
657 | subsys_initcall(bpf_lwt_init) |
658 | |