1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (c) 2022 Pablo Neira Ayuso <pablo@netfilter.org> |
4 | */ |
5 | |
6 | #include <linux/kernel.h> |
7 | #include <linux/if_vlan.h> |
8 | #include <linux/init.h> |
9 | #include <linux/module.h> |
10 | #include <linux/netlink.h> |
11 | #include <linux/netfilter.h> |
12 | #include <linux/netfilter/nf_tables.h> |
13 | #include <net/netfilter/nf_tables_core.h> |
14 | #include <net/netfilter/nf_tables.h> |
15 | #include <net/netfilter/nft_meta.h> |
16 | #include <net/netfilter/nf_tables_offload.h> |
17 | #include <linux/tcp.h> |
18 | #include <linux/udp.h> |
19 | #include <net/gre.h> |
20 | #include <net/geneve.h> |
21 | #include <net/ip.h> |
22 | #include <linux/icmpv6.h> |
23 | #include <linux/ip.h> |
24 | #include <linux/ipv6.h> |
25 | |
26 | static DEFINE_PER_CPU(struct nft_inner_tun_ctx, nft_pcpu_tun_ctx); |
27 | |
28 | /* Same layout as nft_expr but it embeds the private expression data area. */ |
29 | struct __nft_expr { |
30 | const struct nft_expr_ops *ops; |
31 | union { |
32 | struct nft_payload payload; |
33 | struct nft_meta meta; |
34 | } __attribute__((aligned(__alignof__(u64)))); |
35 | }; |
36 | |
37 | enum { |
38 | NFT_INNER_EXPR_PAYLOAD, |
39 | NFT_INNER_EXPR_META, |
40 | }; |
41 | |
42 | struct nft_inner { |
43 | u8 flags; |
44 | u8 hdrsize; |
45 | u8 type; |
46 | u8 expr_type; |
47 | |
48 | struct __nft_expr expr; |
49 | }; |
50 | |
51 | static int nft_inner_parse_l2l3(const struct nft_inner *priv, |
52 | const struct nft_pktinfo *pkt, |
53 | struct nft_inner_tun_ctx *ctx, u32 off) |
54 | { |
55 | __be16 llproto, outer_llproto; |
56 | u32 nhoff, thoff; |
57 | |
58 | if (priv->flags & NFT_INNER_LL) { |
59 | struct vlan_ethhdr *veth, _veth; |
60 | struct ethhdr *eth, _eth; |
61 | u32 hdrsize; |
62 | |
63 | eth = skb_header_pointer(skb: pkt->skb, offset: off, len: sizeof(_eth), buffer: &_eth); |
64 | if (!eth) |
65 | return -1; |
66 | |
67 | switch (eth->h_proto) { |
68 | case htons(ETH_P_IP): |
69 | case htons(ETH_P_IPV6): |
70 | llproto = eth->h_proto; |
71 | hdrsize = sizeof(_eth); |
72 | break; |
73 | case htons(ETH_P_8021Q): |
74 | veth = skb_header_pointer(skb: pkt->skb, offset: off, len: sizeof(_veth), buffer: &_veth); |
75 | if (!veth) |
76 | return -1; |
77 | |
78 | outer_llproto = veth->h_vlan_encapsulated_proto; |
79 | llproto = veth->h_vlan_proto; |
80 | hdrsize = sizeof(_veth); |
81 | break; |
82 | default: |
83 | return -1; |
84 | } |
85 | |
86 | ctx->inner_lloff = off; |
87 | ctx->flags |= NFT_PAYLOAD_CTX_INNER_LL; |
88 | off += hdrsize; |
89 | } else { |
90 | struct iphdr *iph; |
91 | u32 _version; |
92 | |
93 | iph = skb_header_pointer(skb: pkt->skb, offset: off, len: sizeof(_version), buffer: &_version); |
94 | if (!iph) |
95 | return -1; |
96 | |
97 | switch (iph->version) { |
98 | case 4: |
99 | llproto = htons(ETH_P_IP); |
100 | break; |
101 | case 6: |
102 | llproto = htons(ETH_P_IPV6); |
103 | break; |
104 | default: |
105 | return -1; |
106 | } |
107 | } |
108 | |
109 | ctx->llproto = llproto; |
110 | if (llproto == htons(ETH_P_8021Q)) |
111 | llproto = outer_llproto; |
112 | |
113 | nhoff = off; |
114 | |
115 | switch (llproto) { |
116 | case htons(ETH_P_IP): { |
117 | struct iphdr *iph, _iph; |
118 | |
119 | iph = skb_header_pointer(skb: pkt->skb, offset: nhoff, len: sizeof(_iph), buffer: &_iph); |
120 | if (!iph) |
121 | return -1; |
122 | |
123 | if (iph->ihl < 5 || iph->version != 4) |
124 | return -1; |
125 | |
126 | ctx->inner_nhoff = nhoff; |
127 | ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH; |
128 | |
129 | thoff = nhoff + (iph->ihl * 4); |
130 | if ((ntohs(iph->frag_off) & IP_OFFSET) == 0) { |
131 | ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; |
132 | ctx->inner_thoff = thoff; |
133 | ctx->l4proto = iph->protocol; |
134 | } |
135 | } |
136 | break; |
137 | case htons(ETH_P_IPV6): { |
138 | struct ipv6hdr *ip6h, _ip6h; |
139 | int fh_flags = IP6_FH_F_AUTH; |
140 | unsigned short fragoff; |
141 | int l4proto; |
142 | |
143 | ip6h = skb_header_pointer(skb: pkt->skb, offset: nhoff, len: sizeof(_ip6h), buffer: &_ip6h); |
144 | if (!ip6h) |
145 | return -1; |
146 | |
147 | if (ip6h->version != 6) |
148 | return -1; |
149 | |
150 | ctx->inner_nhoff = nhoff; |
151 | ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH; |
152 | |
153 | thoff = nhoff; |
154 | l4proto = ipv6_find_hdr(skb: pkt->skb, offset: &thoff, target: -1, fragoff: &fragoff, fragflg: &fh_flags); |
155 | if (l4proto < 0 || thoff > U16_MAX) |
156 | return -1; |
157 | |
158 | if (fragoff == 0) { |
159 | thoff = nhoff + sizeof(_ip6h); |
160 | ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; |
161 | ctx->inner_thoff = thoff; |
162 | ctx->l4proto = l4proto; |
163 | } |
164 | } |
165 | break; |
166 | default: |
167 | return -1; |
168 | } |
169 | |
170 | return 0; |
171 | } |
172 | |
173 | static int nft_inner_parse_tunhdr(const struct nft_inner *priv, |
174 | const struct nft_pktinfo *pkt, |
175 | struct nft_inner_tun_ctx *ctx, u32 *off) |
176 | { |
177 | if (pkt->tprot == IPPROTO_GRE) { |
178 | ctx->inner_tunoff = pkt->thoff; |
179 | ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN; |
180 | return 0; |
181 | } |
182 | |
183 | if (pkt->tprot != IPPROTO_UDP) |
184 | return -1; |
185 | |
186 | ctx->inner_tunoff = *off; |
187 | ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN; |
188 | *off += priv->hdrsize; |
189 | |
190 | switch (priv->type) { |
191 | case NFT_INNER_GENEVE: { |
192 | struct genevehdr *gnvh, _gnvh; |
193 | |
194 | gnvh = skb_header_pointer(skb: pkt->skb, offset: pkt->inneroff, |
195 | len: sizeof(_gnvh), buffer: &_gnvh); |
196 | if (!gnvh) |
197 | return -1; |
198 | |
199 | *off += gnvh->opt_len * 4; |
200 | } |
201 | break; |
202 | default: |
203 | break; |
204 | } |
205 | |
206 | return 0; |
207 | } |
208 | |
209 | static int nft_inner_parse(const struct nft_inner *priv, |
210 | struct nft_pktinfo *pkt, |
211 | struct nft_inner_tun_ctx *tun_ctx) |
212 | { |
213 | struct nft_inner_tun_ctx ctx = {}; |
214 | u32 off = pkt->inneroff; |
215 | |
216 | if (priv->flags & NFT_INNER_HDRSIZE && |
217 | nft_inner_parse_tunhdr(priv, pkt, ctx: &ctx, off: &off) < 0) |
218 | return -1; |
219 | |
220 | if (priv->flags & (NFT_INNER_LL | NFT_INNER_NH)) { |
221 | if (nft_inner_parse_l2l3(priv, pkt, ctx: &ctx, off) < 0) |
222 | return -1; |
223 | } else if (priv->flags & NFT_INNER_TH) { |
224 | ctx.inner_thoff = off; |
225 | ctx.flags |= NFT_PAYLOAD_CTX_INNER_TH; |
226 | } |
227 | |
228 | *tun_ctx = ctx; |
229 | tun_ctx->type = priv->type; |
230 | pkt->flags |= NFT_PKTINFO_INNER_FULL; |
231 | |
232 | return 0; |
233 | } |
234 | |
235 | static bool nft_inner_parse_needed(const struct nft_inner *priv, |
236 | const struct nft_pktinfo *pkt, |
237 | const struct nft_inner_tun_ctx *tun_ctx) |
238 | { |
239 | if (!(pkt->flags & NFT_PKTINFO_INNER_FULL)) |
240 | return true; |
241 | |
242 | if (priv->type != tun_ctx->type) |
243 | return true; |
244 | |
245 | return false; |
246 | } |
247 | |
248 | static void nft_inner_eval(const struct nft_expr *expr, struct nft_regs *regs, |
249 | const struct nft_pktinfo *pkt) |
250 | { |
251 | struct nft_inner_tun_ctx *tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx); |
252 | const struct nft_inner *priv = nft_expr_priv(expr); |
253 | |
254 | if (nft_payload_inner_offset(pkt) < 0) |
255 | goto err; |
256 | |
257 | if (nft_inner_parse_needed(priv, pkt, tun_ctx) && |
258 | nft_inner_parse(priv, pkt: (struct nft_pktinfo *)pkt, tun_ctx) < 0) |
259 | goto err; |
260 | |
261 | switch (priv->expr_type) { |
262 | case NFT_INNER_EXPR_PAYLOAD: |
263 | nft_payload_inner_eval(expr: (struct nft_expr *)&priv->expr, regs, pkt, ctx: tun_ctx); |
264 | break; |
265 | case NFT_INNER_EXPR_META: |
266 | nft_meta_inner_eval(expr: (struct nft_expr *)&priv->expr, regs, pkt, tun_ctx); |
267 | break; |
268 | default: |
269 | WARN_ON_ONCE(1); |
270 | goto err; |
271 | } |
272 | return; |
273 | err: |
274 | regs->verdict.code = NFT_BREAK; |
275 | } |
276 | |
277 | static const struct nla_policy nft_inner_policy[NFTA_INNER_MAX + 1] = { |
278 | [NFTA_INNER_NUM] = { .type = NLA_U32 }, |
279 | [NFTA_INNER_FLAGS] = { .type = NLA_U32 }, |
280 | [NFTA_INNER_HDRSIZE] = { .type = NLA_U32 }, |
281 | [NFTA_INNER_TYPE] = { .type = NLA_U32 }, |
282 | [NFTA_INNER_EXPR] = { .type = NLA_NESTED }, |
283 | }; |
284 | |
285 | struct nft_expr_info { |
286 | const struct nft_expr_ops *ops; |
287 | const struct nlattr *attr; |
288 | struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; |
289 | }; |
290 | |
291 | static int nft_inner_init(const struct nft_ctx *ctx, |
292 | const struct nft_expr *expr, |
293 | const struct nlattr * const tb[]) |
294 | { |
295 | struct nft_inner *priv = nft_expr_priv(expr); |
296 | u32 flags, hdrsize, type, num; |
297 | struct nft_expr_info expr_info; |
298 | int err; |
299 | |
300 | if (!tb[NFTA_INNER_FLAGS] || |
301 | !tb[NFTA_INNER_NUM] || |
302 | !tb[NFTA_INNER_HDRSIZE] || |
303 | !tb[NFTA_INNER_TYPE] || |
304 | !tb[NFTA_INNER_EXPR]) |
305 | return -EINVAL; |
306 | |
307 | flags = ntohl(nla_get_be32(tb[NFTA_INNER_FLAGS])); |
308 | if (flags & ~NFT_INNER_MASK) |
309 | return -EOPNOTSUPP; |
310 | |
311 | num = ntohl(nla_get_be32(tb[NFTA_INNER_NUM])); |
312 | if (num != 0) |
313 | return -EOPNOTSUPP; |
314 | |
315 | hdrsize = ntohl(nla_get_be32(tb[NFTA_INNER_HDRSIZE])); |
316 | type = ntohl(nla_get_be32(tb[NFTA_INNER_TYPE])); |
317 | |
318 | if (type > U8_MAX) |
319 | return -EINVAL; |
320 | |
321 | if (flags & NFT_INNER_HDRSIZE) { |
322 | if (hdrsize == 0 || hdrsize > 64) |
323 | return -EOPNOTSUPP; |
324 | } |
325 | |
326 | priv->flags = flags; |
327 | priv->hdrsize = hdrsize; |
328 | priv->type = type; |
329 | |
330 | err = nft_expr_inner_parse(ctx, nla: tb[NFTA_INNER_EXPR], info: &expr_info); |
331 | if (err < 0) |
332 | return err; |
333 | |
334 | priv->expr.ops = expr_info.ops; |
335 | |
336 | if (!strcmp(expr_info.ops->type->name, "payload" )) |
337 | priv->expr_type = NFT_INNER_EXPR_PAYLOAD; |
338 | else if (!strcmp(expr_info.ops->type->name, "meta" )) |
339 | priv->expr_type = NFT_INNER_EXPR_META; |
340 | else |
341 | return -EINVAL; |
342 | |
343 | err = expr_info.ops->init(ctx, (struct nft_expr *)&priv->expr, |
344 | (const struct nlattr * const*)expr_info.tb); |
345 | if (err < 0) |
346 | return err; |
347 | |
348 | return 0; |
349 | } |
350 | |
351 | static int nft_inner_dump(struct sk_buff *skb, |
352 | const struct nft_expr *expr, bool reset) |
353 | { |
354 | const struct nft_inner *priv = nft_expr_priv(expr); |
355 | |
356 | if (nla_put_be32(skb, attrtype: NFTA_INNER_NUM, htonl(0)) || |
357 | nla_put_be32(skb, attrtype: NFTA_INNER_TYPE, htonl(priv->type)) || |
358 | nla_put_be32(skb, attrtype: NFTA_INNER_FLAGS, htonl(priv->flags)) || |
359 | nla_put_be32(skb, attrtype: NFTA_INNER_HDRSIZE, htonl(priv->hdrsize))) |
360 | goto nla_put_failure; |
361 | |
362 | if (nft_expr_dump(skb, attr: NFTA_INNER_EXPR, |
363 | expr: (struct nft_expr *)&priv->expr, reset) < 0) |
364 | goto nla_put_failure; |
365 | |
366 | return 0; |
367 | |
368 | nla_put_failure: |
369 | return -1; |
370 | } |
371 | |
372 | static const struct nft_expr_ops nft_inner_ops = { |
373 | .type = &nft_inner_type, |
374 | .size = NFT_EXPR_SIZE(sizeof(struct nft_inner)), |
375 | .eval = nft_inner_eval, |
376 | .init = nft_inner_init, |
377 | .dump = nft_inner_dump, |
378 | }; |
379 | |
380 | struct nft_expr_type nft_inner_type __read_mostly = { |
381 | .name = "inner" , |
382 | .ops = &nft_inner_ops, |
383 | .policy = nft_inner_policy, |
384 | .maxattr = NFTA_INNER_MAX, |
385 | .owner = THIS_MODULE, |
386 | }; |
387 | |