1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/bpf.h> |
3 | #include <linux/filter.h> |
4 | #include <linux/kmod.h> |
5 | #include <linux/module.h> |
6 | #include <linux/netfilter.h> |
7 | |
8 | #include <net/netfilter/nf_bpf_link.h> |
9 | #include <uapi/linux/netfilter_ipv4.h> |
10 | |
11 | static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb, |
12 | const struct nf_hook_state *s) |
13 | { |
14 | const struct bpf_prog *prog = bpf_prog; |
15 | struct bpf_nf_ctx ctx = { |
16 | .state = s, |
17 | .skb = skb, |
18 | }; |
19 | |
20 | return bpf_prog_run(prog, ctx: &ctx); |
21 | } |
22 | |
23 | struct bpf_nf_link { |
24 | struct bpf_link link; |
25 | struct nf_hook_ops hook_ops; |
26 | struct net *net; |
27 | u32 dead; |
28 | const struct nf_defrag_hook *defrag_hook; |
29 | }; |
30 | |
31 | #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) |
32 | static const struct nf_defrag_hook * |
33 | get_proto_defrag_hook(struct bpf_nf_link *link, |
34 | const struct nf_defrag_hook __rcu **ptr_global_hook, |
35 | const char *mod) |
36 | { |
37 | const struct nf_defrag_hook *hook; |
38 | int err; |
39 | |
40 | /* RCU protects us from races against module unloading */ |
41 | rcu_read_lock(); |
42 | hook = rcu_dereference(*ptr_global_hook); |
43 | if (!hook) { |
44 | rcu_read_unlock(); |
45 | err = request_module(mod); |
46 | if (err) |
47 | return ERR_PTR(error: err < 0 ? err : -EINVAL); |
48 | |
49 | rcu_read_lock(); |
50 | hook = rcu_dereference(*ptr_global_hook); |
51 | } |
52 | |
53 | if (hook && try_module_get(module: hook->owner)) { |
54 | /* Once we have a refcnt on the module, we no longer need RCU */ |
55 | hook = rcu_pointer_handoff(hook); |
56 | } else { |
57 | WARN_ONCE(!hook, "%s has bad registration" , mod); |
58 | hook = ERR_PTR(error: -ENOENT); |
59 | } |
60 | rcu_read_unlock(); |
61 | |
62 | if (!IS_ERR(ptr: hook)) { |
63 | err = hook->enable(link->net); |
64 | if (err) { |
65 | module_put(module: hook->owner); |
66 | hook = ERR_PTR(error: err); |
67 | } |
68 | } |
69 | |
70 | return hook; |
71 | } |
72 | #endif |
73 | |
74 | static int bpf_nf_enable_defrag(struct bpf_nf_link *link) |
75 | { |
76 | const struct nf_defrag_hook __maybe_unused *hook; |
77 | |
78 | switch (link->hook_ops.pf) { |
79 | #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) |
80 | case NFPROTO_IPV4: |
81 | hook = get_proto_defrag_hook(link, ptr_global_hook: &nf_defrag_v4_hook, mod: "nf_defrag_ipv4" ); |
82 | if (IS_ERR(ptr: hook)) |
83 | return PTR_ERR(ptr: hook); |
84 | |
85 | link->defrag_hook = hook; |
86 | return 0; |
87 | #endif |
88 | #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) |
89 | case NFPROTO_IPV6: |
90 | hook = get_proto_defrag_hook(link, ptr_global_hook: &nf_defrag_v6_hook, mod: "nf_defrag_ipv6" ); |
91 | if (IS_ERR(ptr: hook)) |
92 | return PTR_ERR(ptr: hook); |
93 | |
94 | link->defrag_hook = hook; |
95 | return 0; |
96 | #endif |
97 | default: |
98 | return -EAFNOSUPPORT; |
99 | } |
100 | } |
101 | |
102 | static void bpf_nf_disable_defrag(struct bpf_nf_link *link) |
103 | { |
104 | const struct nf_defrag_hook *hook = link->defrag_hook; |
105 | |
106 | if (!hook) |
107 | return; |
108 | hook->disable(link->net); |
109 | module_put(module: hook->owner); |
110 | } |
111 | |
112 | static void bpf_nf_link_release(struct bpf_link *link) |
113 | { |
114 | struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); |
115 | |
116 | if (nf_link->dead) |
117 | return; |
118 | |
119 | /* do not double release in case .detach was already called */ |
120 | if (!cmpxchg(&nf_link->dead, 0, 1)) { |
121 | nf_unregister_net_hook(net: nf_link->net, ops: &nf_link->hook_ops); |
122 | bpf_nf_disable_defrag(link: nf_link); |
123 | } |
124 | } |
125 | |
126 | static void bpf_nf_link_dealloc(struct bpf_link *link) |
127 | { |
128 | struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); |
129 | |
130 | kfree(objp: nf_link); |
131 | } |
132 | |
133 | static int bpf_nf_link_detach(struct bpf_link *link) |
134 | { |
135 | bpf_nf_link_release(link); |
136 | return 0; |
137 | } |
138 | |
139 | static void bpf_nf_link_show_info(const struct bpf_link *link, |
140 | struct seq_file *seq) |
141 | { |
142 | struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); |
143 | |
144 | seq_printf(m: seq, fmt: "pf:\t%u\thooknum:\t%u\tprio:\t%d\n" , |
145 | nf_link->hook_ops.pf, nf_link->hook_ops.hooknum, |
146 | nf_link->hook_ops.priority); |
147 | } |
148 | |
149 | static int bpf_nf_link_fill_link_info(const struct bpf_link *link, |
150 | struct bpf_link_info *info) |
151 | { |
152 | struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); |
153 | |
154 | info->netfilter.pf = nf_link->hook_ops.pf; |
155 | info->netfilter.hooknum = nf_link->hook_ops.hooknum; |
156 | info->netfilter.priority = nf_link->hook_ops.priority; |
157 | info->netfilter.flags = 0; |
158 | |
159 | return 0; |
160 | } |
161 | |
162 | static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog, |
163 | struct bpf_prog *old_prog) |
164 | { |
165 | return -EOPNOTSUPP; |
166 | } |
167 | |
168 | static const struct bpf_link_ops bpf_nf_link_lops = { |
169 | .release = bpf_nf_link_release, |
170 | .dealloc = bpf_nf_link_dealloc, |
171 | .detach = bpf_nf_link_detach, |
172 | .show_fdinfo = bpf_nf_link_show_info, |
173 | .fill_link_info = bpf_nf_link_fill_link_info, |
174 | .update_prog = bpf_nf_link_update, |
175 | }; |
176 | |
177 | static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr) |
178 | { |
179 | int prio; |
180 | |
181 | switch (attr->link_create.netfilter.pf) { |
182 | case NFPROTO_IPV4: |
183 | case NFPROTO_IPV6: |
184 | if (attr->link_create.netfilter.hooknum >= NF_INET_NUMHOOKS) |
185 | return -EPROTO; |
186 | break; |
187 | default: |
188 | return -EAFNOSUPPORT; |
189 | } |
190 | |
191 | if (attr->link_create.netfilter.flags & ~BPF_F_NETFILTER_IP_DEFRAG) |
192 | return -EOPNOTSUPP; |
193 | |
194 | /* make sure conntrack confirm is always last */ |
195 | prio = attr->link_create.netfilter.priority; |
196 | if (prio == NF_IP_PRI_FIRST) |
197 | return -ERANGE; /* sabotage_in and other warts */ |
198 | else if (prio == NF_IP_PRI_LAST) |
199 | return -ERANGE; /* e.g. conntrack confirm */ |
200 | else if ((attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) && |
201 | prio <= NF_IP_PRI_CONNTRACK_DEFRAG) |
202 | return -ERANGE; /* cannot use defrag if prog runs before nf_defrag */ |
203 | |
204 | return 0; |
205 | } |
206 | |
207 | int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) |
208 | { |
209 | struct net *net = current->nsproxy->net_ns; |
210 | struct bpf_link_primer link_primer; |
211 | struct bpf_nf_link *link; |
212 | int err; |
213 | |
214 | if (attr->link_create.flags) |
215 | return -EINVAL; |
216 | |
217 | err = bpf_nf_check_pf_and_hooks(attr); |
218 | if (err) |
219 | return err; |
220 | |
221 | link = kzalloc(size: sizeof(*link), GFP_USER); |
222 | if (!link) |
223 | return -ENOMEM; |
224 | |
225 | bpf_link_init(link: &link->link, type: BPF_LINK_TYPE_NETFILTER, ops: &bpf_nf_link_lops, prog); |
226 | |
227 | link->hook_ops.hook = nf_hook_run_bpf; |
228 | link->hook_ops.hook_ops_type = NF_HOOK_OP_BPF; |
229 | link->hook_ops.priv = prog; |
230 | |
231 | link->hook_ops.pf = attr->link_create.netfilter.pf; |
232 | link->hook_ops.priority = attr->link_create.netfilter.priority; |
233 | link->hook_ops.hooknum = attr->link_create.netfilter.hooknum; |
234 | |
235 | link->net = net; |
236 | link->dead = false; |
237 | link->defrag_hook = NULL; |
238 | |
239 | err = bpf_link_prime(link: &link->link, primer: &link_primer); |
240 | if (err) { |
241 | kfree(objp: link); |
242 | return err; |
243 | } |
244 | |
245 | if (attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) { |
246 | err = bpf_nf_enable_defrag(link); |
247 | if (err) { |
248 | bpf_link_cleanup(primer: &link_primer); |
249 | return err; |
250 | } |
251 | } |
252 | |
253 | err = nf_register_net_hook(net, ops: &link->hook_ops); |
254 | if (err) { |
255 | bpf_nf_disable_defrag(link); |
256 | bpf_link_cleanup(primer: &link_primer); |
257 | return err; |
258 | } |
259 | |
260 | return bpf_link_settle(primer: &link_primer); |
261 | } |
262 | |
263 | const struct bpf_prog_ops netfilter_prog_ops = { |
264 | .test_run = bpf_prog_test_run_nf, |
265 | }; |
266 | |
267 | static bool nf_ptr_to_btf_id(struct bpf_insn_access_aux *info, const char *name) |
268 | { |
269 | struct btf *btf; |
270 | s32 type_id; |
271 | |
272 | btf = bpf_get_btf_vmlinux(); |
273 | if (IS_ERR_OR_NULL(ptr: btf)) |
274 | return false; |
275 | |
276 | type_id = btf_find_by_name_kind(btf, name, kind: BTF_KIND_STRUCT); |
277 | if (WARN_ON_ONCE(type_id < 0)) |
278 | return false; |
279 | |
280 | info->btf = btf; |
281 | info->btf_id = type_id; |
282 | info->reg_type = PTR_TO_BTF_ID | PTR_TRUSTED; |
283 | return true; |
284 | } |
285 | |
286 | static bool nf_is_valid_access(int off, int size, enum bpf_access_type type, |
287 | const struct bpf_prog *prog, |
288 | struct bpf_insn_access_aux *info) |
289 | { |
290 | if (off < 0 || off >= sizeof(struct bpf_nf_ctx)) |
291 | return false; |
292 | |
293 | if (type == BPF_WRITE) |
294 | return false; |
295 | |
296 | switch (off) { |
297 | case bpf_ctx_range(struct bpf_nf_ctx, skb): |
298 | if (size != sizeof_field(struct bpf_nf_ctx, skb)) |
299 | return false; |
300 | |
301 | return nf_ptr_to_btf_id(info, name: "sk_buff" ); |
302 | case bpf_ctx_range(struct bpf_nf_ctx, state): |
303 | if (size != sizeof_field(struct bpf_nf_ctx, state)) |
304 | return false; |
305 | |
306 | return nf_ptr_to_btf_id(info, name: "nf_hook_state" ); |
307 | default: |
308 | return false; |
309 | } |
310 | |
311 | return false; |
312 | } |
313 | |
314 | static const struct bpf_func_proto * |
315 | bpf_nf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
316 | { |
317 | return bpf_base_func_proto(func_id, prog); |
318 | } |
319 | |
320 | const struct bpf_verifier_ops netfilter_verifier_ops = { |
321 | .is_valid_access = nf_is_valid_access, |
322 | .get_func_proto = bpf_nf_func_proto, |
323 | }; |
324 | |