1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | #include <linux/bpf.h> |
4 | #include <linux/bpf-netns.h> |
5 | #include <linux/filter.h> |
6 | #include <net/net_namespace.h> |
7 | |
8 | /* |
9 | * Functions to manage BPF programs attached to netns |
10 | */ |
11 | |
12 | struct bpf_netns_link { |
13 | struct bpf_link link; |
14 | enum bpf_attach_type type; |
15 | enum netns_bpf_attach_type netns_type; |
16 | |
17 | /* We don't hold a ref to net in order to auto-detach the link |
18 | * when netns is going away. Instead we rely on pernet |
19 | * pre_exit callback to clear this pointer. Must be accessed |
20 | * with netns_bpf_mutex held. |
21 | */ |
22 | struct net *net; |
23 | struct list_head node; /* node in list of links attached to net */ |
24 | }; |
25 | |
26 | /* Protects updates to netns_bpf */ |
27 | DEFINE_MUTEX(netns_bpf_mutex); |
28 | |
29 | static void netns_bpf_attach_type_unneed(enum netns_bpf_attach_type type) |
30 | { |
31 | switch (type) { |
32 | #ifdef CONFIG_INET |
33 | case NETNS_BPF_SK_LOOKUP: |
34 | static_branch_dec(&bpf_sk_lookup_enabled); |
35 | break; |
36 | #endif |
37 | default: |
38 | break; |
39 | } |
40 | } |
41 | |
42 | static void netns_bpf_attach_type_need(enum netns_bpf_attach_type type) |
43 | { |
44 | switch (type) { |
45 | #ifdef CONFIG_INET |
46 | case NETNS_BPF_SK_LOOKUP: |
47 | static_branch_inc(&bpf_sk_lookup_enabled); |
48 | break; |
49 | #endif |
50 | default: |
51 | break; |
52 | } |
53 | } |
54 | |
55 | /* Must be called with netns_bpf_mutex held. */ |
56 | static void netns_bpf_run_array_detach(struct net *net, |
57 | enum netns_bpf_attach_type type) |
58 | { |
59 | struct bpf_prog_array *run_array; |
60 | |
61 | run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL, |
62 | lockdep_is_held(&netns_bpf_mutex)); |
63 | bpf_prog_array_free(progs: run_array); |
64 | } |
65 | |
66 | static int link_index(struct net *net, enum netns_bpf_attach_type type, |
67 | struct bpf_netns_link *link) |
68 | { |
69 | struct bpf_netns_link *pos; |
70 | int i = 0; |
71 | |
72 | list_for_each_entry(pos, &net->bpf.links[type], node) { |
73 | if (pos == link) |
74 | return i; |
75 | i++; |
76 | } |
77 | return -ENOENT; |
78 | } |
79 | |
80 | static int link_count(struct net *net, enum netns_bpf_attach_type type) |
81 | { |
82 | struct list_head *pos; |
83 | int i = 0; |
84 | |
85 | list_for_each(pos, &net->bpf.links[type]) |
86 | i++; |
87 | return i; |
88 | } |
89 | |
90 | static void fill_prog_array(struct net *net, enum netns_bpf_attach_type type, |
91 | struct bpf_prog_array *prog_array) |
92 | { |
93 | struct bpf_netns_link *pos; |
94 | unsigned int i = 0; |
95 | |
96 | list_for_each_entry(pos, &net->bpf.links[type], node) { |
97 | prog_array->items[i].prog = pos->link.prog; |
98 | i++; |
99 | } |
100 | } |
101 | |
102 | static void bpf_netns_link_release(struct bpf_link *link) |
103 | { |
104 | struct bpf_netns_link *net_link = |
105 | container_of(link, struct bpf_netns_link, link); |
106 | enum netns_bpf_attach_type type = net_link->netns_type; |
107 | struct bpf_prog_array *old_array, *new_array; |
108 | struct net *net; |
109 | int cnt, idx; |
110 | |
111 | mutex_lock(&netns_bpf_mutex); |
112 | |
113 | /* We can race with cleanup_net, but if we see a non-NULL |
114 | * struct net pointer, pre_exit has not run yet and wait for |
115 | * netns_bpf_mutex. |
116 | */ |
117 | net = net_link->net; |
118 | if (!net) |
119 | goto out_unlock; |
120 | |
121 | /* Mark attach point as unused */ |
122 | netns_bpf_attach_type_unneed(type); |
123 | |
124 | /* Remember link position in case of safe delete */ |
125 | idx = link_index(net, type, link: net_link); |
126 | list_del(entry: &net_link->node); |
127 | |
128 | cnt = link_count(net, type); |
129 | if (!cnt) { |
130 | netns_bpf_run_array_detach(net, type); |
131 | goto out_unlock; |
132 | } |
133 | |
134 | old_array = rcu_dereference_protected(net->bpf.run_array[type], |
135 | lockdep_is_held(&netns_bpf_mutex)); |
136 | new_array = bpf_prog_array_alloc(prog_cnt: cnt, GFP_KERNEL); |
137 | if (!new_array) { |
138 | WARN_ON(bpf_prog_array_delete_safe_at(old_array, idx)); |
139 | goto out_unlock; |
140 | } |
141 | fill_prog_array(net, type, prog_array: new_array); |
142 | rcu_assign_pointer(net->bpf.run_array[type], new_array); |
143 | bpf_prog_array_free(progs: old_array); |
144 | |
145 | out_unlock: |
146 | net_link->net = NULL; |
147 | mutex_unlock(lock: &netns_bpf_mutex); |
148 | } |
149 | |
150 | static int bpf_netns_link_detach(struct bpf_link *link) |
151 | { |
152 | bpf_netns_link_release(link); |
153 | return 0; |
154 | } |
155 | |
156 | static void bpf_netns_link_dealloc(struct bpf_link *link) |
157 | { |
158 | struct bpf_netns_link *net_link = |
159 | container_of(link, struct bpf_netns_link, link); |
160 | |
161 | kfree(objp: net_link); |
162 | } |
163 | |
164 | static int bpf_netns_link_update_prog(struct bpf_link *link, |
165 | struct bpf_prog *new_prog, |
166 | struct bpf_prog *old_prog) |
167 | { |
168 | struct bpf_netns_link *net_link = |
169 | container_of(link, struct bpf_netns_link, link); |
170 | enum netns_bpf_attach_type type = net_link->netns_type; |
171 | struct bpf_prog_array *run_array; |
172 | struct net *net; |
173 | int idx, ret; |
174 | |
175 | if (old_prog && old_prog != link->prog) |
176 | return -EPERM; |
177 | if (new_prog->type != link->prog->type) |
178 | return -EINVAL; |
179 | |
180 | mutex_lock(&netns_bpf_mutex); |
181 | |
182 | net = net_link->net; |
183 | if (!net || !check_net(net)) { |
184 | /* Link auto-detached or netns dying */ |
185 | ret = -ENOLINK; |
186 | goto out_unlock; |
187 | } |
188 | |
189 | run_array = rcu_dereference_protected(net->bpf.run_array[type], |
190 | lockdep_is_held(&netns_bpf_mutex)); |
191 | idx = link_index(net, type, link: net_link); |
192 | ret = bpf_prog_array_update_at(array: run_array, index: idx, prog: new_prog); |
193 | if (ret) |
194 | goto out_unlock; |
195 | |
196 | old_prog = xchg(&link->prog, new_prog); |
197 | bpf_prog_put(prog: old_prog); |
198 | |
199 | out_unlock: |
200 | mutex_unlock(lock: &netns_bpf_mutex); |
201 | return ret; |
202 | } |
203 | |
204 | static int bpf_netns_link_fill_info(const struct bpf_link *link, |
205 | struct bpf_link_info *info) |
206 | { |
207 | const struct bpf_netns_link *net_link = |
208 | container_of(link, struct bpf_netns_link, link); |
209 | unsigned int inum = 0; |
210 | struct net *net; |
211 | |
212 | mutex_lock(&netns_bpf_mutex); |
213 | net = net_link->net; |
214 | if (net && check_net(net)) |
215 | inum = net->ns.inum; |
216 | mutex_unlock(lock: &netns_bpf_mutex); |
217 | |
218 | info->netns.netns_ino = inum; |
219 | info->netns.attach_type = net_link->type; |
220 | return 0; |
221 | } |
222 | |
223 | static void bpf_netns_link_show_fdinfo(const struct bpf_link *link, |
224 | struct seq_file *seq) |
225 | { |
226 | struct bpf_link_info info = {}; |
227 | |
228 | bpf_netns_link_fill_info(link, info: &info); |
229 | seq_printf(m: seq, |
230 | fmt: "netns_ino:\t%u\n" |
231 | "attach_type:\t%u\n" , |
232 | info.netns.netns_ino, |
233 | info.netns.attach_type); |
234 | } |
235 | |
236 | static const struct bpf_link_ops bpf_netns_link_ops = { |
237 | .release = bpf_netns_link_release, |
238 | .dealloc = bpf_netns_link_dealloc, |
239 | .detach = bpf_netns_link_detach, |
240 | .update_prog = bpf_netns_link_update_prog, |
241 | .fill_link_info = bpf_netns_link_fill_info, |
242 | .show_fdinfo = bpf_netns_link_show_fdinfo, |
243 | }; |
244 | |
245 | /* Must be called with netns_bpf_mutex held. */ |
246 | static int __netns_bpf_prog_query(const union bpf_attr *attr, |
247 | union bpf_attr __user *uattr, |
248 | struct net *net, |
249 | enum netns_bpf_attach_type type) |
250 | { |
251 | __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); |
252 | struct bpf_prog_array *run_array; |
253 | u32 prog_cnt = 0, flags = 0; |
254 | |
255 | run_array = rcu_dereference_protected(net->bpf.run_array[type], |
256 | lockdep_is_held(&netns_bpf_mutex)); |
257 | if (run_array) |
258 | prog_cnt = bpf_prog_array_length(progs: run_array); |
259 | |
260 | if (copy_to_user(to: &uattr->query.attach_flags, from: &flags, n: sizeof(flags))) |
261 | return -EFAULT; |
262 | if (copy_to_user(to: &uattr->query.prog_cnt, from: &prog_cnt, n: sizeof(prog_cnt))) |
263 | return -EFAULT; |
264 | if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) |
265 | return 0; |
266 | |
267 | return bpf_prog_array_copy_to_user(progs: run_array, prog_ids, |
268 | cnt: attr->query.prog_cnt); |
269 | } |
270 | |
271 | int netns_bpf_prog_query(const union bpf_attr *attr, |
272 | union bpf_attr __user *uattr) |
273 | { |
274 | enum netns_bpf_attach_type type; |
275 | struct net *net; |
276 | int ret; |
277 | |
278 | if (attr->query.query_flags) |
279 | return -EINVAL; |
280 | |
281 | type = to_netns_bpf_attach_type(attach_type: attr->query.attach_type); |
282 | if (type < 0) |
283 | return -EINVAL; |
284 | |
285 | net = get_net_ns_by_fd(fd: attr->query.target_fd); |
286 | if (IS_ERR(ptr: net)) |
287 | return PTR_ERR(ptr: net); |
288 | |
289 | mutex_lock(&netns_bpf_mutex); |
290 | ret = __netns_bpf_prog_query(attr, uattr, net, type); |
291 | mutex_unlock(lock: &netns_bpf_mutex); |
292 | |
293 | put_net(net); |
294 | return ret; |
295 | } |
296 | |
297 | int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) |
298 | { |
299 | struct bpf_prog_array *run_array; |
300 | enum netns_bpf_attach_type type; |
301 | struct bpf_prog *attached; |
302 | struct net *net; |
303 | int ret; |
304 | |
305 | if (attr->target_fd || attr->attach_flags || attr->replace_bpf_fd) |
306 | return -EINVAL; |
307 | |
308 | type = to_netns_bpf_attach_type(attach_type: attr->attach_type); |
309 | if (type < 0) |
310 | return -EINVAL; |
311 | |
312 | net = current->nsproxy->net_ns; |
313 | mutex_lock(&netns_bpf_mutex); |
314 | |
315 | /* Attaching prog directly is not compatible with links */ |
316 | if (!list_empty(head: &net->bpf.links[type])) { |
317 | ret = -EEXIST; |
318 | goto out_unlock; |
319 | } |
320 | |
321 | switch (type) { |
322 | case NETNS_BPF_FLOW_DISSECTOR: |
323 | ret = flow_dissector_bpf_prog_attach_check(net, prog); |
324 | break; |
325 | default: |
326 | ret = -EINVAL; |
327 | break; |
328 | } |
329 | if (ret) |
330 | goto out_unlock; |
331 | |
332 | attached = net->bpf.progs[type]; |
333 | if (attached == prog) { |
334 | /* The same program cannot be attached twice */ |
335 | ret = -EINVAL; |
336 | goto out_unlock; |
337 | } |
338 | |
339 | run_array = rcu_dereference_protected(net->bpf.run_array[type], |
340 | lockdep_is_held(&netns_bpf_mutex)); |
341 | if (run_array) { |
342 | WRITE_ONCE(run_array->items[0].prog, prog); |
343 | } else { |
344 | run_array = bpf_prog_array_alloc(prog_cnt: 1, GFP_KERNEL); |
345 | if (!run_array) { |
346 | ret = -ENOMEM; |
347 | goto out_unlock; |
348 | } |
349 | run_array->items[0].prog = prog; |
350 | rcu_assign_pointer(net->bpf.run_array[type], run_array); |
351 | } |
352 | |
353 | net->bpf.progs[type] = prog; |
354 | if (attached) |
355 | bpf_prog_put(prog: attached); |
356 | |
357 | out_unlock: |
358 | mutex_unlock(lock: &netns_bpf_mutex); |
359 | |
360 | return ret; |
361 | } |
362 | |
363 | /* Must be called with netns_bpf_mutex held. */ |
364 | static int __netns_bpf_prog_detach(struct net *net, |
365 | enum netns_bpf_attach_type type, |
366 | struct bpf_prog *old) |
367 | { |
368 | struct bpf_prog *attached; |
369 | |
370 | /* Progs attached via links cannot be detached */ |
371 | if (!list_empty(head: &net->bpf.links[type])) |
372 | return -EINVAL; |
373 | |
374 | attached = net->bpf.progs[type]; |
375 | if (!attached || attached != old) |
376 | return -ENOENT; |
377 | netns_bpf_run_array_detach(net, type); |
378 | net->bpf.progs[type] = NULL; |
379 | bpf_prog_put(prog: attached); |
380 | return 0; |
381 | } |
382 | |
383 | int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) |
384 | { |
385 | enum netns_bpf_attach_type type; |
386 | struct bpf_prog *prog; |
387 | int ret; |
388 | |
389 | if (attr->target_fd) |
390 | return -EINVAL; |
391 | |
392 | type = to_netns_bpf_attach_type(attach_type: attr->attach_type); |
393 | if (type < 0) |
394 | return -EINVAL; |
395 | |
396 | prog = bpf_prog_get_type(ufd: attr->attach_bpf_fd, type: ptype); |
397 | if (IS_ERR(ptr: prog)) |
398 | return PTR_ERR(ptr: prog); |
399 | |
400 | mutex_lock(&netns_bpf_mutex); |
401 | ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type, old: prog); |
402 | mutex_unlock(lock: &netns_bpf_mutex); |
403 | |
404 | bpf_prog_put(prog); |
405 | |
406 | return ret; |
407 | } |
408 | |
409 | static int netns_bpf_max_progs(enum netns_bpf_attach_type type) |
410 | { |
411 | switch (type) { |
412 | case NETNS_BPF_FLOW_DISSECTOR: |
413 | return 1; |
414 | case NETNS_BPF_SK_LOOKUP: |
415 | return 64; |
416 | default: |
417 | return 0; |
418 | } |
419 | } |
420 | |
421 | static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, |
422 | enum netns_bpf_attach_type type) |
423 | { |
424 | struct bpf_netns_link *net_link = |
425 | container_of(link, struct bpf_netns_link, link); |
426 | struct bpf_prog_array *run_array; |
427 | int cnt, err; |
428 | |
429 | mutex_lock(&netns_bpf_mutex); |
430 | |
431 | cnt = link_count(net, type); |
432 | if (cnt >= netns_bpf_max_progs(type)) { |
433 | err = -E2BIG; |
434 | goto out_unlock; |
435 | } |
436 | /* Links are not compatible with attaching prog directly */ |
437 | if (net->bpf.progs[type]) { |
438 | err = -EEXIST; |
439 | goto out_unlock; |
440 | } |
441 | |
442 | switch (type) { |
443 | case NETNS_BPF_FLOW_DISSECTOR: |
444 | err = flow_dissector_bpf_prog_attach_check(net, prog: link->prog); |
445 | break; |
446 | case NETNS_BPF_SK_LOOKUP: |
447 | err = 0; /* nothing to check */ |
448 | break; |
449 | default: |
450 | err = -EINVAL; |
451 | break; |
452 | } |
453 | if (err) |
454 | goto out_unlock; |
455 | |
456 | run_array = bpf_prog_array_alloc(prog_cnt: cnt + 1, GFP_KERNEL); |
457 | if (!run_array) { |
458 | err = -ENOMEM; |
459 | goto out_unlock; |
460 | } |
461 | |
462 | list_add_tail(new: &net_link->node, head: &net->bpf.links[type]); |
463 | |
464 | fill_prog_array(net, type, prog_array: run_array); |
465 | run_array = rcu_replace_pointer(net->bpf.run_array[type], run_array, |
466 | lockdep_is_held(&netns_bpf_mutex)); |
467 | bpf_prog_array_free(progs: run_array); |
468 | |
469 | /* Mark attach point as used */ |
470 | netns_bpf_attach_type_need(type); |
471 | |
472 | out_unlock: |
473 | mutex_unlock(lock: &netns_bpf_mutex); |
474 | return err; |
475 | } |
476 | |
477 | int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog) |
478 | { |
479 | enum netns_bpf_attach_type netns_type; |
480 | struct bpf_link_primer link_primer; |
481 | struct bpf_netns_link *net_link; |
482 | enum bpf_attach_type type; |
483 | struct net *net; |
484 | int err; |
485 | |
486 | if (attr->link_create.flags) |
487 | return -EINVAL; |
488 | |
489 | type = attr->link_create.attach_type; |
490 | netns_type = to_netns_bpf_attach_type(attach_type: type); |
491 | if (netns_type < 0) |
492 | return -EINVAL; |
493 | |
494 | net = get_net_ns_by_fd(fd: attr->link_create.target_fd); |
495 | if (IS_ERR(ptr: net)) |
496 | return PTR_ERR(ptr: net); |
497 | |
498 | net_link = kzalloc(size: sizeof(*net_link), GFP_USER); |
499 | if (!net_link) { |
500 | err = -ENOMEM; |
501 | goto out_put_net; |
502 | } |
503 | bpf_link_init(link: &net_link->link, type: BPF_LINK_TYPE_NETNS, |
504 | ops: &bpf_netns_link_ops, prog); |
505 | net_link->net = net; |
506 | net_link->type = type; |
507 | net_link->netns_type = netns_type; |
508 | |
509 | err = bpf_link_prime(link: &net_link->link, primer: &link_primer); |
510 | if (err) { |
511 | kfree(objp: net_link); |
512 | goto out_put_net; |
513 | } |
514 | |
515 | err = netns_bpf_link_attach(net, link: &net_link->link, type: netns_type); |
516 | if (err) { |
517 | bpf_link_cleanup(primer: &link_primer); |
518 | goto out_put_net; |
519 | } |
520 | |
521 | put_net(net); |
522 | return bpf_link_settle(primer: &link_primer); |
523 | |
524 | out_put_net: |
525 | put_net(net); |
526 | return err; |
527 | } |
528 | |
529 | static int __net_init netns_bpf_pernet_init(struct net *net) |
530 | { |
531 | int type; |
532 | |
533 | for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) |
534 | INIT_LIST_HEAD(list: &net->bpf.links[type]); |
535 | |
536 | return 0; |
537 | } |
538 | |
539 | static void __net_exit netns_bpf_pernet_pre_exit(struct net *net) |
540 | { |
541 | enum netns_bpf_attach_type type; |
542 | struct bpf_netns_link *net_link; |
543 | |
544 | mutex_lock(&netns_bpf_mutex); |
545 | for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) { |
546 | netns_bpf_run_array_detach(net, type); |
547 | list_for_each_entry(net_link, &net->bpf.links[type], node) { |
548 | net_link->net = NULL; /* auto-detach link */ |
549 | netns_bpf_attach_type_unneed(type); |
550 | } |
551 | if (net->bpf.progs[type]) |
552 | bpf_prog_put(prog: net->bpf.progs[type]); |
553 | } |
554 | mutex_unlock(lock: &netns_bpf_mutex); |
555 | } |
556 | |
557 | static struct pernet_operations netns_bpf_pernet_ops __net_initdata = { |
558 | .init = netns_bpf_pernet_init, |
559 | .pre_exit = netns_bpf_pernet_pre_exit, |
560 | }; |
561 | |
562 | static int __init netns_bpf_init(void) |
563 | { |
564 | return register_pernet_subsys(&netns_bpf_pernet_ops); |
565 | } |
566 | |
567 | subsys_initcall(netns_bpf_init); |
568 | |