1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Minimal file system backend for holding eBPF maps and programs, |
4 | * used by bpf(2) object pinning. |
5 | * |
6 | * Authors: |
7 | * |
8 | * Daniel Borkmann <daniel@iogearbox.net> |
9 | */ |
10 | |
11 | #include <linux/init.h> |
12 | #include <linux/magic.h> |
13 | #include <linux/major.h> |
14 | #include <linux/mount.h> |
15 | #include <linux/namei.h> |
16 | #include <linux/fs.h> |
17 | #include <linux/fs_context.h> |
18 | #include <linux/fs_parser.h> |
19 | #include <linux/kdev_t.h> |
20 | #include <linux/filter.h> |
21 | #include <linux/bpf.h> |
22 | #include <linux/bpf_trace.h> |
23 | #include "preload/bpf_preload.h" |
24 | |
25 | enum bpf_type { |
26 | BPF_TYPE_UNSPEC = 0, |
27 | BPF_TYPE_PROG, |
28 | BPF_TYPE_MAP, |
29 | BPF_TYPE_LINK, |
30 | }; |
31 | |
32 | static void *bpf_any_get(void *raw, enum bpf_type type) |
33 | { |
34 | switch (type) { |
35 | case BPF_TYPE_PROG: |
36 | bpf_prog_inc(prog: raw); |
37 | break; |
38 | case BPF_TYPE_MAP: |
39 | bpf_map_inc_with_uref(map: raw); |
40 | break; |
41 | case BPF_TYPE_LINK: |
42 | bpf_link_inc(link: raw); |
43 | break; |
44 | default: |
45 | WARN_ON_ONCE(1); |
46 | break; |
47 | } |
48 | |
49 | return raw; |
50 | } |
51 | |
52 | static void bpf_any_put(void *raw, enum bpf_type type) |
53 | { |
54 | switch (type) { |
55 | case BPF_TYPE_PROG: |
56 | bpf_prog_put(prog: raw); |
57 | break; |
58 | case BPF_TYPE_MAP: |
59 | bpf_map_put_with_uref(map: raw); |
60 | break; |
61 | case BPF_TYPE_LINK: |
62 | bpf_link_put(link: raw); |
63 | break; |
64 | default: |
65 | WARN_ON_ONCE(1); |
66 | break; |
67 | } |
68 | } |
69 | |
70 | static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) |
71 | { |
72 | void *raw; |
73 | |
74 | raw = bpf_map_get_with_uref(ufd); |
75 | if (!IS_ERR(ptr: raw)) { |
76 | *type = BPF_TYPE_MAP; |
77 | return raw; |
78 | } |
79 | |
80 | raw = bpf_prog_get(ufd); |
81 | if (!IS_ERR(ptr: raw)) { |
82 | *type = BPF_TYPE_PROG; |
83 | return raw; |
84 | } |
85 | |
86 | raw = bpf_link_get_from_fd(ufd); |
87 | if (!IS_ERR(ptr: raw)) { |
88 | *type = BPF_TYPE_LINK; |
89 | return raw; |
90 | } |
91 | |
92 | return ERR_PTR(error: -EINVAL); |
93 | } |
94 | |
95 | static const struct inode_operations bpf_dir_iops; |
96 | |
97 | static const struct inode_operations bpf_prog_iops = { }; |
98 | static const struct inode_operations bpf_map_iops = { }; |
99 | static const struct inode_operations bpf_link_iops = { }; |
100 | |
101 | static struct inode *bpf_get_inode(struct super_block *sb, |
102 | const struct inode *dir, |
103 | umode_t mode) |
104 | { |
105 | struct inode *inode; |
106 | |
107 | switch (mode & S_IFMT) { |
108 | case S_IFDIR: |
109 | case S_IFREG: |
110 | case S_IFLNK: |
111 | break; |
112 | default: |
113 | return ERR_PTR(error: -EINVAL); |
114 | } |
115 | |
116 | inode = new_inode(sb); |
117 | if (!inode) |
118 | return ERR_PTR(error: -ENOSPC); |
119 | |
120 | inode->i_ino = get_next_ino(); |
121 | simple_inode_init_ts(inode); |
122 | |
123 | inode_init_owner(idmap: &nop_mnt_idmap, inode, dir, mode); |
124 | |
125 | return inode; |
126 | } |
127 | |
128 | static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) |
129 | { |
130 | *type = BPF_TYPE_UNSPEC; |
131 | if (inode->i_op == &bpf_prog_iops) |
132 | *type = BPF_TYPE_PROG; |
133 | else if (inode->i_op == &bpf_map_iops) |
134 | *type = BPF_TYPE_MAP; |
135 | else if (inode->i_op == &bpf_link_iops) |
136 | *type = BPF_TYPE_LINK; |
137 | else |
138 | return -EACCES; |
139 | |
140 | return 0; |
141 | } |
142 | |
143 | static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode, |
144 | struct inode *dir) |
145 | { |
146 | d_instantiate(dentry, inode); |
147 | dget(dentry); |
148 | |
149 | inode_set_mtime_to_ts(inode: dir, ts: inode_set_ctime_current(inode: dir)); |
150 | } |
151 | |
152 | static int bpf_mkdir(struct mnt_idmap *idmap, struct inode *dir, |
153 | struct dentry *dentry, umode_t mode) |
154 | { |
155 | struct inode *inode; |
156 | |
157 | inode = bpf_get_inode(sb: dir->i_sb, dir, mode: mode | S_IFDIR); |
158 | if (IS_ERR(ptr: inode)) |
159 | return PTR_ERR(ptr: inode); |
160 | |
161 | inode->i_op = &bpf_dir_iops; |
162 | inode->i_fop = &simple_dir_operations; |
163 | |
164 | inc_nlink(inode); |
165 | inc_nlink(inode: dir); |
166 | |
167 | bpf_dentry_finalize(dentry, inode, dir); |
168 | return 0; |
169 | } |
170 | |
171 | struct map_iter { |
172 | void *key; |
173 | bool done; |
174 | }; |
175 | |
176 | static struct map_iter *map_iter(struct seq_file *m) |
177 | { |
178 | return m->private; |
179 | } |
180 | |
181 | static struct bpf_map *seq_file_to_map(struct seq_file *m) |
182 | { |
183 | return file_inode(f: m->file)->i_private; |
184 | } |
185 | |
186 | static void map_iter_free(struct map_iter *iter) |
187 | { |
188 | if (iter) { |
189 | kfree(objp: iter->key); |
190 | kfree(objp: iter); |
191 | } |
192 | } |
193 | |
194 | static struct map_iter *map_iter_alloc(struct bpf_map *map) |
195 | { |
196 | struct map_iter *iter; |
197 | |
198 | iter = kzalloc(size: sizeof(*iter), GFP_KERNEL | __GFP_NOWARN); |
199 | if (!iter) |
200 | goto error; |
201 | |
202 | iter->key = kzalloc(size: map->key_size, GFP_KERNEL | __GFP_NOWARN); |
203 | if (!iter->key) |
204 | goto error; |
205 | |
206 | return iter; |
207 | |
208 | error: |
209 | map_iter_free(iter); |
210 | return NULL; |
211 | } |
212 | |
213 | static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) |
214 | { |
215 | struct bpf_map *map = seq_file_to_map(m); |
216 | void *key = map_iter(m)->key; |
217 | void *prev_key; |
218 | |
219 | (*pos)++; |
220 | if (map_iter(m)->done) |
221 | return NULL; |
222 | |
223 | if (unlikely(v == SEQ_START_TOKEN)) |
224 | prev_key = NULL; |
225 | else |
226 | prev_key = key; |
227 | |
228 | rcu_read_lock(); |
229 | if (map->ops->map_get_next_key(map, prev_key, key)) { |
230 | map_iter(m)->done = true; |
231 | key = NULL; |
232 | } |
233 | rcu_read_unlock(); |
234 | return key; |
235 | } |
236 | |
237 | static void *map_seq_start(struct seq_file *m, loff_t *pos) |
238 | { |
239 | if (map_iter(m)->done) |
240 | return NULL; |
241 | |
242 | return *pos ? map_iter(m)->key : SEQ_START_TOKEN; |
243 | } |
244 | |
245 | static void map_seq_stop(struct seq_file *m, void *v) |
246 | { |
247 | } |
248 | |
249 | static int map_seq_show(struct seq_file *m, void *v) |
250 | { |
251 | struct bpf_map *map = seq_file_to_map(m); |
252 | void *key = map_iter(m)->key; |
253 | |
254 | if (unlikely(v == SEQ_START_TOKEN)) { |
255 | seq_puts(m, s: "# WARNING!! The output is for debug purpose only\n" ); |
256 | seq_puts(m, s: "# WARNING!! The output format will change\n" ); |
257 | } else { |
258 | map->ops->map_seq_show_elem(map, key, m); |
259 | } |
260 | |
261 | return 0; |
262 | } |
263 | |
264 | static const struct seq_operations bpffs_map_seq_ops = { |
265 | .start = map_seq_start, |
266 | .next = map_seq_next, |
267 | .show = map_seq_show, |
268 | .stop = map_seq_stop, |
269 | }; |
270 | |
271 | static int bpffs_map_open(struct inode *inode, struct file *file) |
272 | { |
273 | struct bpf_map *map = inode->i_private; |
274 | struct map_iter *iter; |
275 | struct seq_file *m; |
276 | int err; |
277 | |
278 | iter = map_iter_alloc(map); |
279 | if (!iter) |
280 | return -ENOMEM; |
281 | |
282 | err = seq_open(file, &bpffs_map_seq_ops); |
283 | if (err) { |
284 | map_iter_free(iter); |
285 | return err; |
286 | } |
287 | |
288 | m = file->private_data; |
289 | m->private = iter; |
290 | |
291 | return 0; |
292 | } |
293 | |
294 | static int bpffs_map_release(struct inode *inode, struct file *file) |
295 | { |
296 | struct seq_file *m = file->private_data; |
297 | |
298 | map_iter_free(iter: map_iter(m)); |
299 | |
300 | return seq_release(inode, file); |
301 | } |
302 | |
303 | /* bpffs_map_fops should only implement the basic |
304 | * read operation for a BPF map. The purpose is to |
305 | * provide a simple user intuitive way to do |
306 | * "cat bpffs/pathto/a-pinned-map". |
307 | * |
308 | * Other operations (e.g. write, lookup...) should be realized by |
309 | * the userspace tools (e.g. bpftool) through the |
310 | * BPF_OBJ_GET_INFO_BY_FD and the map's lookup/update |
311 | * interface. |
312 | */ |
313 | static const struct file_operations bpffs_map_fops = { |
314 | .open = bpffs_map_open, |
315 | .read = seq_read, |
316 | .release = bpffs_map_release, |
317 | }; |
318 | |
319 | static int bpffs_obj_open(struct inode *inode, struct file *file) |
320 | { |
321 | return -EIO; |
322 | } |
323 | |
324 | static const struct file_operations bpffs_obj_fops = { |
325 | .open = bpffs_obj_open, |
326 | }; |
327 | |
328 | static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, |
329 | const struct inode_operations *iops, |
330 | const struct file_operations *fops) |
331 | { |
332 | struct inode *dir = dentry->d_parent->d_inode; |
333 | struct inode *inode = bpf_get_inode(sb: dir->i_sb, dir, mode); |
334 | if (IS_ERR(ptr: inode)) |
335 | return PTR_ERR(ptr: inode); |
336 | |
337 | inode->i_op = iops; |
338 | inode->i_fop = fops; |
339 | inode->i_private = raw; |
340 | |
341 | bpf_dentry_finalize(dentry, inode, dir); |
342 | return 0; |
343 | } |
344 | |
345 | static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg) |
346 | { |
347 | return bpf_mkobj_ops(dentry, mode, raw: arg, iops: &bpf_prog_iops, |
348 | fops: &bpffs_obj_fops); |
349 | } |
350 | |
351 | static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg) |
352 | { |
353 | struct bpf_map *map = arg; |
354 | |
355 | return bpf_mkobj_ops(dentry, mode, raw: arg, iops: &bpf_map_iops, |
356 | fops: bpf_map_support_seq_show(map) ? |
357 | &bpffs_map_fops : &bpffs_obj_fops); |
358 | } |
359 | |
360 | static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg) |
361 | { |
362 | struct bpf_link *link = arg; |
363 | |
364 | return bpf_mkobj_ops(dentry, mode, raw: arg, iops: &bpf_link_iops, |
365 | fops: bpf_link_is_iter(link) ? |
366 | &bpf_iter_fops : &bpffs_obj_fops); |
367 | } |
368 | |
369 | static struct dentry * |
370 | bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) |
371 | { |
372 | /* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future |
373 | * extensions. That allows popoulate_bpffs() create special files. |
374 | */ |
375 | if ((dir->i_mode & S_IALLUGO) && |
376 | strchr(dentry->d_name.name, '.')) |
377 | return ERR_PTR(error: -EPERM); |
378 | |
379 | return simple_lookup(dir, dentry, flags); |
380 | } |
381 | |
382 | static int bpf_symlink(struct mnt_idmap *idmap, struct inode *dir, |
383 | struct dentry *dentry, const char *target) |
384 | { |
385 | char *link = kstrdup(s: target, GFP_USER | __GFP_NOWARN); |
386 | struct inode *inode; |
387 | |
388 | if (!link) |
389 | return -ENOMEM; |
390 | |
391 | inode = bpf_get_inode(sb: dir->i_sb, dir, S_IRWXUGO | S_IFLNK); |
392 | if (IS_ERR(ptr: inode)) { |
393 | kfree(objp: link); |
394 | return PTR_ERR(ptr: inode); |
395 | } |
396 | |
397 | inode->i_op = &simple_symlink_inode_operations; |
398 | inode->i_link = link; |
399 | |
400 | bpf_dentry_finalize(dentry, inode, dir); |
401 | return 0; |
402 | } |
403 | |
404 | static const struct inode_operations bpf_dir_iops = { |
405 | .lookup = bpf_lookup, |
406 | .mkdir = bpf_mkdir, |
407 | .symlink = bpf_symlink, |
408 | .rmdir = simple_rmdir, |
409 | .rename = simple_rename, |
410 | .link = simple_link, |
411 | .unlink = simple_unlink, |
412 | }; |
413 | |
414 | /* pin iterator link into bpffs */ |
415 | static int bpf_iter_link_pin_kernel(struct dentry *parent, |
416 | const char *name, struct bpf_link *link) |
417 | { |
418 | umode_t mode = S_IFREG | S_IRUSR; |
419 | struct dentry *dentry; |
420 | int ret; |
421 | |
422 | inode_lock(inode: parent->d_inode); |
423 | dentry = lookup_one_len(name, parent, strlen(name)); |
424 | if (IS_ERR(ptr: dentry)) { |
425 | inode_unlock(inode: parent->d_inode); |
426 | return PTR_ERR(ptr: dentry); |
427 | } |
428 | ret = bpf_mkobj_ops(dentry, mode, raw: link, iops: &bpf_link_iops, |
429 | fops: &bpf_iter_fops); |
430 | dput(dentry); |
431 | inode_unlock(inode: parent->d_inode); |
432 | return ret; |
433 | } |
434 | |
435 | static int bpf_obj_do_pin(int path_fd, const char __user *pathname, void *raw, |
436 | enum bpf_type type) |
437 | { |
438 | struct dentry *dentry; |
439 | struct inode *dir; |
440 | struct path path; |
441 | umode_t mode; |
442 | int ret; |
443 | |
444 | dentry = user_path_create(path_fd, pathname, &path, 0); |
445 | if (IS_ERR(ptr: dentry)) |
446 | return PTR_ERR(ptr: dentry); |
447 | |
448 | dir = d_inode(dentry: path.dentry); |
449 | if (dir->i_op != &bpf_dir_iops) { |
450 | ret = -EPERM; |
451 | goto out; |
452 | } |
453 | |
454 | mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); |
455 | ret = security_path_mknod(dir: &path, dentry, mode, dev: 0); |
456 | if (ret) |
457 | goto out; |
458 | |
459 | switch (type) { |
460 | case BPF_TYPE_PROG: |
461 | ret = vfs_mkobj(dentry, mode, f: bpf_mkprog, raw); |
462 | break; |
463 | case BPF_TYPE_MAP: |
464 | ret = vfs_mkobj(dentry, mode, f: bpf_mkmap, raw); |
465 | break; |
466 | case BPF_TYPE_LINK: |
467 | ret = vfs_mkobj(dentry, mode, f: bpf_mklink, raw); |
468 | break; |
469 | default: |
470 | ret = -EPERM; |
471 | } |
472 | out: |
473 | done_path_create(&path, dentry); |
474 | return ret; |
475 | } |
476 | |
477 | int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname) |
478 | { |
479 | enum bpf_type type; |
480 | void *raw; |
481 | int ret; |
482 | |
483 | raw = bpf_fd_probe_obj(ufd, type: &type); |
484 | if (IS_ERR(ptr: raw)) |
485 | return PTR_ERR(ptr: raw); |
486 | |
487 | ret = bpf_obj_do_pin(path_fd, pathname, raw, type); |
488 | if (ret != 0) |
489 | bpf_any_put(raw, type); |
490 | |
491 | return ret; |
492 | } |
493 | |
494 | static void *bpf_obj_do_get(int path_fd, const char __user *pathname, |
495 | enum bpf_type *type, int flags) |
496 | { |
497 | struct inode *inode; |
498 | struct path path; |
499 | void *raw; |
500 | int ret; |
501 | |
502 | ret = user_path_at(dfd: path_fd, name: pathname, LOOKUP_FOLLOW, path: &path); |
503 | if (ret) |
504 | return ERR_PTR(error: ret); |
505 | |
506 | inode = d_backing_inode(upper: path.dentry); |
507 | ret = path_permission(path: &path, ACC_MODE(flags)); |
508 | if (ret) |
509 | goto out; |
510 | |
511 | ret = bpf_inode_type(inode, type); |
512 | if (ret) |
513 | goto out; |
514 | |
515 | raw = bpf_any_get(raw: inode->i_private, type: *type); |
516 | if (!IS_ERR(ptr: raw)) |
517 | touch_atime(&path); |
518 | |
519 | path_put(&path); |
520 | return raw; |
521 | out: |
522 | path_put(&path); |
523 | return ERR_PTR(error: ret); |
524 | } |
525 | |
526 | int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags) |
527 | { |
528 | enum bpf_type type = BPF_TYPE_UNSPEC; |
529 | int f_flags; |
530 | void *raw; |
531 | int ret; |
532 | |
533 | f_flags = bpf_get_file_flag(flags); |
534 | if (f_flags < 0) |
535 | return f_flags; |
536 | |
537 | raw = bpf_obj_do_get(path_fd, pathname, type: &type, flags: f_flags); |
538 | if (IS_ERR(ptr: raw)) |
539 | return PTR_ERR(ptr: raw); |
540 | |
541 | if (type == BPF_TYPE_PROG) |
542 | ret = bpf_prog_new_fd(prog: raw); |
543 | else if (type == BPF_TYPE_MAP) |
544 | ret = bpf_map_new_fd(map: raw, flags: f_flags); |
545 | else if (type == BPF_TYPE_LINK) |
546 | ret = (f_flags != O_RDWR) ? -EINVAL : bpf_link_new_fd(link: raw); |
547 | else |
548 | return -ENOENT; |
549 | |
550 | if (ret < 0) |
551 | bpf_any_put(raw, type); |
552 | return ret; |
553 | } |
554 | |
555 | static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) |
556 | { |
557 | struct bpf_prog *prog; |
558 | int ret = inode_permission(&nop_mnt_idmap, inode, MAY_READ); |
559 | if (ret) |
560 | return ERR_PTR(error: ret); |
561 | |
562 | if (inode->i_op == &bpf_map_iops) |
563 | return ERR_PTR(error: -EINVAL); |
564 | if (inode->i_op == &bpf_link_iops) |
565 | return ERR_PTR(error: -EINVAL); |
566 | if (inode->i_op != &bpf_prog_iops) |
567 | return ERR_PTR(error: -EACCES); |
568 | |
569 | prog = inode->i_private; |
570 | |
571 | ret = security_bpf_prog(prog); |
572 | if (ret < 0) |
573 | return ERR_PTR(error: ret); |
574 | |
575 | if (!bpf_prog_get_ok(prog, &type, false)) |
576 | return ERR_PTR(error: -EINVAL); |
577 | |
578 | bpf_prog_inc(prog); |
579 | return prog; |
580 | } |
581 | |
582 | struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) |
583 | { |
584 | struct bpf_prog *prog; |
585 | struct path path; |
586 | int ret = kern_path(name, LOOKUP_FOLLOW, &path); |
587 | if (ret) |
588 | return ERR_PTR(error: ret); |
589 | prog = __get_prog_inode(inode: d_backing_inode(upper: path.dentry), type); |
590 | if (!IS_ERR(ptr: prog)) |
591 | touch_atime(&path); |
592 | path_put(&path); |
593 | return prog; |
594 | } |
595 | EXPORT_SYMBOL(bpf_prog_get_type_path); |
596 | |
597 | /* |
598 | * Display the mount options in /proc/mounts. |
599 | */ |
600 | static int bpf_show_options(struct seq_file *m, struct dentry *root) |
601 | { |
602 | umode_t mode = d_inode(dentry: root)->i_mode & S_IALLUGO & ~S_ISVTX; |
603 | |
604 | if (mode != S_IRWXUGO) |
605 | seq_printf(m, fmt: ",mode=%o" , mode); |
606 | return 0; |
607 | } |
608 | |
609 | static void bpf_free_inode(struct inode *inode) |
610 | { |
611 | enum bpf_type type; |
612 | |
613 | if (S_ISLNK(inode->i_mode)) |
614 | kfree(objp: inode->i_link); |
615 | if (!bpf_inode_type(inode, type: &type)) |
616 | bpf_any_put(raw: inode->i_private, type); |
617 | free_inode_nonrcu(inode); |
618 | } |
619 | |
620 | static const struct super_operations bpf_super_ops = { |
621 | .statfs = simple_statfs, |
622 | .drop_inode = generic_delete_inode, |
623 | .show_options = bpf_show_options, |
624 | .free_inode = bpf_free_inode, |
625 | }; |
626 | |
627 | enum { |
628 | OPT_MODE, |
629 | }; |
630 | |
631 | static const struct fs_parameter_spec bpf_fs_parameters[] = { |
632 | fsparam_u32oct ("mode" , OPT_MODE), |
633 | {} |
634 | }; |
635 | |
636 | struct bpf_mount_opts { |
637 | umode_t mode; |
638 | }; |
639 | |
640 | static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) |
641 | { |
642 | struct bpf_mount_opts *opts = fc->fs_private; |
643 | struct fs_parse_result result; |
644 | int opt; |
645 | |
646 | opt = fs_parse(fc, desc: bpf_fs_parameters, param, result: &result); |
647 | if (opt < 0) { |
648 | /* We might like to report bad mount options here, but |
649 | * traditionally we've ignored all mount options, so we'd |
650 | * better continue to ignore non-existing options for bpf. |
651 | */ |
652 | if (opt == -ENOPARAM) { |
653 | opt = vfs_parse_fs_param_source(fc, param); |
654 | if (opt != -ENOPARAM) |
655 | return opt; |
656 | |
657 | return 0; |
658 | } |
659 | |
660 | if (opt < 0) |
661 | return opt; |
662 | } |
663 | |
664 | switch (opt) { |
665 | case OPT_MODE: |
666 | opts->mode = result.uint_32 & S_IALLUGO; |
667 | break; |
668 | } |
669 | |
670 | return 0; |
671 | } |
672 | |
673 | struct bpf_preload_ops *bpf_preload_ops; |
674 | EXPORT_SYMBOL_GPL(bpf_preload_ops); |
675 | |
676 | static bool bpf_preload_mod_get(void) |
677 | { |
678 | /* If bpf_preload.ko wasn't loaded earlier then load it now. |
679 | * When bpf_preload is built into vmlinux the module's __init |
680 | * function will populate it. |
681 | */ |
682 | if (!bpf_preload_ops) { |
683 | request_module("bpf_preload" ); |
684 | if (!bpf_preload_ops) |
685 | return false; |
686 | } |
687 | /* And grab the reference, so the module doesn't disappear while the |
688 | * kernel is interacting with the kernel module and its UMD. |
689 | */ |
690 | if (!try_module_get(module: bpf_preload_ops->owner)) { |
691 | pr_err("bpf_preload module get failed.\n" ); |
692 | return false; |
693 | } |
694 | return true; |
695 | } |
696 | |
697 | static void bpf_preload_mod_put(void) |
698 | { |
699 | if (bpf_preload_ops) |
700 | /* now user can "rmmod bpf_preload" if necessary */ |
701 | module_put(module: bpf_preload_ops->owner); |
702 | } |
703 | |
704 | static DEFINE_MUTEX(bpf_preload_lock); |
705 | |
706 | static int populate_bpffs(struct dentry *parent) |
707 | { |
708 | struct bpf_preload_info objs[BPF_PRELOAD_LINKS] = {}; |
709 | int err = 0, i; |
710 | |
711 | /* grab the mutex to make sure the kernel interactions with bpf_preload |
712 | * are serialized |
713 | */ |
714 | mutex_lock(&bpf_preload_lock); |
715 | |
716 | /* if bpf_preload.ko wasn't built into vmlinux then load it */ |
717 | if (!bpf_preload_mod_get()) |
718 | goto out; |
719 | |
720 | err = bpf_preload_ops->preload(objs); |
721 | if (err) |
722 | goto out_put; |
723 | for (i = 0; i < BPF_PRELOAD_LINKS; i++) { |
724 | bpf_link_inc(link: objs[i].link); |
725 | err = bpf_iter_link_pin_kernel(parent, |
726 | name: objs[i].link_name, link: objs[i].link); |
727 | if (err) { |
728 | bpf_link_put(link: objs[i].link); |
729 | goto out_put; |
730 | } |
731 | } |
732 | out_put: |
733 | bpf_preload_mod_put(); |
734 | out: |
735 | mutex_unlock(lock: &bpf_preload_lock); |
736 | return err; |
737 | } |
738 | |
739 | static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) |
740 | { |
741 | static const struct tree_descr bpf_rfiles[] = { { "" } }; |
742 | struct bpf_mount_opts *opts = fc->fs_private; |
743 | struct inode *inode; |
744 | int ret; |
745 | |
746 | ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); |
747 | if (ret) |
748 | return ret; |
749 | |
750 | sb->s_op = &bpf_super_ops; |
751 | |
752 | inode = sb->s_root->d_inode; |
753 | inode->i_op = &bpf_dir_iops; |
754 | inode->i_mode &= ~S_IALLUGO; |
755 | populate_bpffs(parent: sb->s_root); |
756 | inode->i_mode |= S_ISVTX | opts->mode; |
757 | return 0; |
758 | } |
759 | |
760 | static int bpf_get_tree(struct fs_context *fc) |
761 | { |
762 | return get_tree_nodev(fc, fill_super: bpf_fill_super); |
763 | } |
764 | |
765 | static void bpf_free_fc(struct fs_context *fc) |
766 | { |
767 | kfree(objp: fc->fs_private); |
768 | } |
769 | |
770 | static const struct fs_context_operations bpf_context_ops = { |
771 | .free = bpf_free_fc, |
772 | .parse_param = bpf_parse_param, |
773 | .get_tree = bpf_get_tree, |
774 | }; |
775 | |
776 | /* |
777 | * Set up the filesystem mount context. |
778 | */ |
779 | static int bpf_init_fs_context(struct fs_context *fc) |
780 | { |
781 | struct bpf_mount_opts *opts; |
782 | |
783 | opts = kzalloc(size: sizeof(struct bpf_mount_opts), GFP_KERNEL); |
784 | if (!opts) |
785 | return -ENOMEM; |
786 | |
787 | opts->mode = S_IRWXUGO; |
788 | |
789 | fc->fs_private = opts; |
790 | fc->ops = &bpf_context_ops; |
791 | return 0; |
792 | } |
793 | |
794 | static struct file_system_type bpf_fs_type = { |
795 | .owner = THIS_MODULE, |
796 | .name = "bpf" , |
797 | .init_fs_context = bpf_init_fs_context, |
798 | .parameters = bpf_fs_parameters, |
799 | .kill_sb = kill_litter_super, |
800 | }; |
801 | |
802 | static int __init bpf_init(void) |
803 | { |
804 | int ret; |
805 | |
806 | ret = sysfs_create_mount_point(parent_kobj: fs_kobj, name: "bpf" ); |
807 | if (ret) |
808 | return ret; |
809 | |
810 | ret = register_filesystem(&bpf_fs_type); |
811 | if (ret) |
812 | sysfs_remove_mount_point(parent_kobj: fs_kobj, name: "bpf" ); |
813 | |
814 | return ret; |
815 | } |
816 | fs_initcall(bpf_init); |
817 | |