1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright (c) 2020 Facebook */ |
3 | |
4 | #include <linux/fs.h> |
5 | #include <linux/anon_inodes.h> |
6 | #include <linux/filter.h> |
7 | #include <linux/bpf.h> |
8 | #include <linux/rcupdate_trace.h> |
9 | |
10 | struct bpf_iter_target_info { |
11 | struct list_head list; |
12 | const struct bpf_iter_reg *reg_info; |
13 | u32 btf_id; /* cached value */ |
14 | }; |
15 | |
16 | struct bpf_iter_link { |
17 | struct bpf_link link; |
18 | struct bpf_iter_aux_info aux; |
19 | struct bpf_iter_target_info *tinfo; |
20 | }; |
21 | |
22 | struct bpf_iter_priv_data { |
23 | struct bpf_iter_target_info *tinfo; |
24 | const struct bpf_iter_seq_info *seq_info; |
25 | struct bpf_prog *prog; |
26 | u64 session_id; |
27 | u64 seq_num; |
28 | bool done_stop; |
29 | u8 target_private[] __aligned(8); |
30 | }; |
31 | |
32 | static struct list_head targets = LIST_HEAD_INIT(targets); |
33 | static DEFINE_MUTEX(targets_mutex); |
34 | |
35 | /* protect bpf_iter_link changes */ |
36 | static DEFINE_MUTEX(link_mutex); |
37 | |
38 | /* incremented on every opened seq_file */ |
39 | static atomic64_t session_id; |
40 | |
41 | static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, |
42 | const struct bpf_iter_seq_info *seq_info); |
43 | |
44 | static void bpf_iter_inc_seq_num(struct seq_file *seq) |
45 | { |
46 | struct bpf_iter_priv_data *iter_priv; |
47 | |
48 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, |
49 | target_private); |
50 | iter_priv->seq_num++; |
51 | } |
52 | |
53 | static void bpf_iter_dec_seq_num(struct seq_file *seq) |
54 | { |
55 | struct bpf_iter_priv_data *iter_priv; |
56 | |
57 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, |
58 | target_private); |
59 | iter_priv->seq_num--; |
60 | } |
61 | |
62 | static void bpf_iter_done_stop(struct seq_file *seq) |
63 | { |
64 | struct bpf_iter_priv_data *iter_priv; |
65 | |
66 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, |
67 | target_private); |
68 | iter_priv->done_stop = true; |
69 | } |
70 | |
71 | static inline bool bpf_iter_target_support_resched(const struct bpf_iter_target_info *tinfo) |
72 | { |
73 | return tinfo->reg_info->feature & BPF_ITER_RESCHED; |
74 | } |
75 | |
76 | static bool bpf_iter_support_resched(struct seq_file *seq) |
77 | { |
78 | struct bpf_iter_priv_data *iter_priv; |
79 | |
80 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, |
81 | target_private); |
82 | return bpf_iter_target_support_resched(tinfo: iter_priv->tinfo); |
83 | } |
84 | |
85 | /* maximum visited objects before bailing out */ |
86 | #define MAX_ITER_OBJECTS 1000000 |
87 | |
88 | /* bpf_seq_read, a customized and simpler version for bpf iterator. |
89 | * The following are differences from seq_read(): |
90 | * . fixed buffer size (PAGE_SIZE) |
91 | * . assuming NULL ->llseek() |
92 | * . stop() may call bpf program, handling potential overflow there |
93 | */ |
94 | static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, |
95 | loff_t *ppos) |
96 | { |
97 | struct seq_file *seq = file->private_data; |
98 | size_t n, offs, copied = 0; |
99 | int err = 0, num_objs = 0; |
100 | bool can_resched; |
101 | void *p; |
102 | |
103 | mutex_lock(&seq->lock); |
104 | |
105 | if (!seq->buf) { |
106 | seq->size = PAGE_SIZE << 3; |
107 | seq->buf = kvmalloc(size: seq->size, GFP_KERNEL); |
108 | if (!seq->buf) { |
109 | err = -ENOMEM; |
110 | goto done; |
111 | } |
112 | } |
113 | |
114 | if (seq->count) { |
115 | n = min(seq->count, size); |
116 | err = copy_to_user(to: buf, from: seq->buf + seq->from, n); |
117 | if (err) { |
118 | err = -EFAULT; |
119 | goto done; |
120 | } |
121 | seq->count -= n; |
122 | seq->from += n; |
123 | copied = n; |
124 | goto done; |
125 | } |
126 | |
127 | seq->from = 0; |
128 | p = seq->op->start(seq, &seq->index); |
129 | if (!p) |
130 | goto stop; |
131 | if (IS_ERR(ptr: p)) { |
132 | err = PTR_ERR(ptr: p); |
133 | seq->op->stop(seq, p); |
134 | seq->count = 0; |
135 | goto done; |
136 | } |
137 | |
138 | err = seq->op->show(seq, p); |
139 | if (err > 0) { |
140 | /* object is skipped, decrease seq_num, so next |
141 | * valid object can reuse the same seq_num. |
142 | */ |
143 | bpf_iter_dec_seq_num(seq); |
144 | seq->count = 0; |
145 | } else if (err < 0 || seq_has_overflowed(m: seq)) { |
146 | if (!err) |
147 | err = -E2BIG; |
148 | seq->op->stop(seq, p); |
149 | seq->count = 0; |
150 | goto done; |
151 | } |
152 | |
153 | can_resched = bpf_iter_support_resched(seq); |
154 | while (1) { |
155 | loff_t pos = seq->index; |
156 | |
157 | num_objs++; |
158 | offs = seq->count; |
159 | p = seq->op->next(seq, p, &seq->index); |
160 | if (pos == seq->index) { |
161 | pr_info_ratelimited("buggy seq_file .next function %ps " |
162 | "did not updated position index\n" , |
163 | seq->op->next); |
164 | seq->index++; |
165 | } |
166 | |
167 | if (IS_ERR_OR_NULL(ptr: p)) |
168 | break; |
169 | |
170 | /* got a valid next object, increase seq_num */ |
171 | bpf_iter_inc_seq_num(seq); |
172 | |
173 | if (seq->count >= size) |
174 | break; |
175 | |
176 | if (num_objs >= MAX_ITER_OBJECTS) { |
177 | if (offs == 0) { |
178 | err = -EAGAIN; |
179 | seq->op->stop(seq, p); |
180 | goto done; |
181 | } |
182 | break; |
183 | } |
184 | |
185 | err = seq->op->show(seq, p); |
186 | if (err > 0) { |
187 | bpf_iter_dec_seq_num(seq); |
188 | seq->count = offs; |
189 | } else if (err < 0 || seq_has_overflowed(m: seq)) { |
190 | seq->count = offs; |
191 | if (offs == 0) { |
192 | if (!err) |
193 | err = -E2BIG; |
194 | seq->op->stop(seq, p); |
195 | goto done; |
196 | } |
197 | break; |
198 | } |
199 | |
200 | if (can_resched) |
201 | cond_resched(); |
202 | } |
203 | stop: |
204 | offs = seq->count; |
205 | if (IS_ERR(ptr: p)) { |
206 | seq->op->stop(seq, NULL); |
207 | err = PTR_ERR(ptr: p); |
208 | goto done; |
209 | } |
210 | /* bpf program called if !p */ |
211 | seq->op->stop(seq, p); |
212 | if (!p) { |
213 | if (!seq_has_overflowed(m: seq)) { |
214 | bpf_iter_done_stop(seq); |
215 | } else { |
216 | seq->count = offs; |
217 | if (offs == 0) { |
218 | err = -E2BIG; |
219 | goto done; |
220 | } |
221 | } |
222 | } |
223 | |
224 | n = min(seq->count, size); |
225 | err = copy_to_user(to: buf, from: seq->buf, n); |
226 | if (err) { |
227 | err = -EFAULT; |
228 | goto done; |
229 | } |
230 | copied = n; |
231 | seq->count -= n; |
232 | seq->from = n; |
233 | done: |
234 | if (!copied) |
235 | copied = err; |
236 | else |
237 | *ppos += copied; |
238 | mutex_unlock(lock: &seq->lock); |
239 | return copied; |
240 | } |
241 | |
242 | static const struct bpf_iter_seq_info * |
243 | __get_seq_info(struct bpf_iter_link *link) |
244 | { |
245 | const struct bpf_iter_seq_info *seq_info; |
246 | |
247 | if (link->aux.map) { |
248 | seq_info = link->aux.map->ops->iter_seq_info; |
249 | if (seq_info) |
250 | return seq_info; |
251 | } |
252 | |
253 | return link->tinfo->reg_info->seq_info; |
254 | } |
255 | |
256 | static int iter_open(struct inode *inode, struct file *file) |
257 | { |
258 | struct bpf_iter_link *link = inode->i_private; |
259 | |
260 | return prepare_seq_file(file, link, seq_info: __get_seq_info(link)); |
261 | } |
262 | |
263 | static int iter_release(struct inode *inode, struct file *file) |
264 | { |
265 | struct bpf_iter_priv_data *iter_priv; |
266 | struct seq_file *seq; |
267 | |
268 | seq = file->private_data; |
269 | if (!seq) |
270 | return 0; |
271 | |
272 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, |
273 | target_private); |
274 | |
275 | if (iter_priv->seq_info->fini_seq_private) |
276 | iter_priv->seq_info->fini_seq_private(seq->private); |
277 | |
278 | bpf_prog_put(prog: iter_priv->prog); |
279 | seq->private = iter_priv; |
280 | |
281 | return seq_release_private(inode, file); |
282 | } |
283 | |
284 | const struct file_operations bpf_iter_fops = { |
285 | .open = iter_open, |
286 | .llseek = no_llseek, |
287 | .read = bpf_seq_read, |
288 | .release = iter_release, |
289 | }; |
290 | |
291 | /* The argument reg_info will be cached in bpf_iter_target_info. |
292 | * The common practice is to declare target reg_info as |
293 | * a const static variable and passed as an argument to |
294 | * bpf_iter_reg_target(). |
295 | */ |
296 | int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info) |
297 | { |
298 | struct bpf_iter_target_info *tinfo; |
299 | |
300 | tinfo = kzalloc(size: sizeof(*tinfo), GFP_KERNEL); |
301 | if (!tinfo) |
302 | return -ENOMEM; |
303 | |
304 | tinfo->reg_info = reg_info; |
305 | INIT_LIST_HEAD(list: &tinfo->list); |
306 | |
307 | mutex_lock(&targets_mutex); |
308 | list_add(new: &tinfo->list, head: &targets); |
309 | mutex_unlock(lock: &targets_mutex); |
310 | |
311 | return 0; |
312 | } |
313 | |
314 | void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info) |
315 | { |
316 | struct bpf_iter_target_info *tinfo; |
317 | bool found = false; |
318 | |
319 | mutex_lock(&targets_mutex); |
320 | list_for_each_entry(tinfo, &targets, list) { |
321 | if (reg_info == tinfo->reg_info) { |
322 | list_del(entry: &tinfo->list); |
323 | kfree(objp: tinfo); |
324 | found = true; |
325 | break; |
326 | } |
327 | } |
328 | mutex_unlock(lock: &targets_mutex); |
329 | |
330 | WARN_ON(found == false); |
331 | } |
332 | |
333 | static void cache_btf_id(struct bpf_iter_target_info *tinfo, |
334 | struct bpf_prog *prog) |
335 | { |
336 | tinfo->btf_id = prog->aux->attach_btf_id; |
337 | } |
338 | |
339 | bool bpf_iter_prog_supported(struct bpf_prog *prog) |
340 | { |
341 | const char *attach_fname = prog->aux->attach_func_name; |
342 | struct bpf_iter_target_info *tinfo = NULL, *iter; |
343 | u32 prog_btf_id = prog->aux->attach_btf_id; |
344 | const char *prefix = BPF_ITER_FUNC_PREFIX; |
345 | int prefix_len = strlen(prefix); |
346 | |
347 | if (strncmp(attach_fname, prefix, prefix_len)) |
348 | return false; |
349 | |
350 | mutex_lock(&targets_mutex); |
351 | list_for_each_entry(iter, &targets, list) { |
352 | if (iter->btf_id && iter->btf_id == prog_btf_id) { |
353 | tinfo = iter; |
354 | break; |
355 | } |
356 | if (!strcmp(attach_fname + prefix_len, iter->reg_info->target)) { |
357 | cache_btf_id(tinfo: iter, prog); |
358 | tinfo = iter; |
359 | break; |
360 | } |
361 | } |
362 | mutex_unlock(lock: &targets_mutex); |
363 | |
364 | if (tinfo) { |
365 | prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size; |
366 | prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info; |
367 | } |
368 | |
369 | return tinfo != NULL; |
370 | } |
371 | |
372 | const struct bpf_func_proto * |
373 | bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
374 | { |
375 | const struct bpf_iter_target_info *tinfo; |
376 | const struct bpf_func_proto *fn = NULL; |
377 | |
378 | mutex_lock(&targets_mutex); |
379 | list_for_each_entry(tinfo, &targets, list) { |
380 | if (tinfo->btf_id == prog->aux->attach_btf_id) { |
381 | const struct bpf_iter_reg *reg_info; |
382 | |
383 | reg_info = tinfo->reg_info; |
384 | if (reg_info->get_func_proto) |
385 | fn = reg_info->get_func_proto(func_id, prog); |
386 | break; |
387 | } |
388 | } |
389 | mutex_unlock(lock: &targets_mutex); |
390 | |
391 | return fn; |
392 | } |
393 | |
394 | static void bpf_iter_link_release(struct bpf_link *link) |
395 | { |
396 | struct bpf_iter_link *iter_link = |
397 | container_of(link, struct bpf_iter_link, link); |
398 | |
399 | if (iter_link->tinfo->reg_info->detach_target) |
400 | iter_link->tinfo->reg_info->detach_target(&iter_link->aux); |
401 | } |
402 | |
403 | static void bpf_iter_link_dealloc(struct bpf_link *link) |
404 | { |
405 | struct bpf_iter_link *iter_link = |
406 | container_of(link, struct bpf_iter_link, link); |
407 | |
408 | kfree(objp: iter_link); |
409 | } |
410 | |
411 | static int bpf_iter_link_replace(struct bpf_link *link, |
412 | struct bpf_prog *new_prog, |
413 | struct bpf_prog *old_prog) |
414 | { |
415 | int ret = 0; |
416 | |
417 | mutex_lock(&link_mutex); |
418 | if (old_prog && link->prog != old_prog) { |
419 | ret = -EPERM; |
420 | goto out_unlock; |
421 | } |
422 | |
423 | if (link->prog->type != new_prog->type || |
424 | link->prog->expected_attach_type != new_prog->expected_attach_type || |
425 | link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) { |
426 | ret = -EINVAL; |
427 | goto out_unlock; |
428 | } |
429 | |
430 | old_prog = xchg(&link->prog, new_prog); |
431 | bpf_prog_put(prog: old_prog); |
432 | |
433 | out_unlock: |
434 | mutex_unlock(lock: &link_mutex); |
435 | return ret; |
436 | } |
437 | |
438 | static void bpf_iter_link_show_fdinfo(const struct bpf_link *link, |
439 | struct seq_file *seq) |
440 | { |
441 | struct bpf_iter_link *iter_link = |
442 | container_of(link, struct bpf_iter_link, link); |
443 | bpf_iter_show_fdinfo_t show_fdinfo; |
444 | |
445 | seq_printf(m: seq, |
446 | fmt: "target_name:\t%s\n" , |
447 | iter_link->tinfo->reg_info->target); |
448 | |
449 | show_fdinfo = iter_link->tinfo->reg_info->show_fdinfo; |
450 | if (show_fdinfo) |
451 | show_fdinfo(&iter_link->aux, seq); |
452 | } |
453 | |
454 | static int bpf_iter_link_fill_link_info(const struct bpf_link *link, |
455 | struct bpf_link_info *info) |
456 | { |
457 | struct bpf_iter_link *iter_link = |
458 | container_of(link, struct bpf_iter_link, link); |
459 | char __user *ubuf = u64_to_user_ptr(info->iter.target_name); |
460 | bpf_iter_fill_link_info_t fill_link_info; |
461 | u32 ulen = info->iter.target_name_len; |
462 | const char *target_name; |
463 | u32 target_len; |
464 | |
465 | if (!ulen ^ !ubuf) |
466 | return -EINVAL; |
467 | |
468 | target_name = iter_link->tinfo->reg_info->target; |
469 | target_len = strlen(target_name); |
470 | info->iter.target_name_len = target_len + 1; |
471 | |
472 | if (ubuf) { |
473 | if (ulen >= target_len + 1) { |
474 | if (copy_to_user(to: ubuf, from: target_name, n: target_len + 1)) |
475 | return -EFAULT; |
476 | } else { |
477 | char zero = '\0'; |
478 | |
479 | if (copy_to_user(to: ubuf, from: target_name, n: ulen - 1)) |
480 | return -EFAULT; |
481 | if (put_user(zero, ubuf + ulen - 1)) |
482 | return -EFAULT; |
483 | return -ENOSPC; |
484 | } |
485 | } |
486 | |
487 | fill_link_info = iter_link->tinfo->reg_info->fill_link_info; |
488 | if (fill_link_info) |
489 | return fill_link_info(&iter_link->aux, info); |
490 | |
491 | return 0; |
492 | } |
493 | |
494 | static const struct bpf_link_ops bpf_iter_link_lops = { |
495 | .release = bpf_iter_link_release, |
496 | .dealloc = bpf_iter_link_dealloc, |
497 | .update_prog = bpf_iter_link_replace, |
498 | .show_fdinfo = bpf_iter_link_show_fdinfo, |
499 | .fill_link_info = bpf_iter_link_fill_link_info, |
500 | }; |
501 | |
502 | bool bpf_link_is_iter(struct bpf_link *link) |
503 | { |
504 | return link->ops == &bpf_iter_link_lops; |
505 | } |
506 | |
507 | int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, |
508 | struct bpf_prog *prog) |
509 | { |
510 | struct bpf_iter_target_info *tinfo = NULL, *iter; |
511 | struct bpf_link_primer link_primer; |
512 | union bpf_iter_link_info linfo; |
513 | struct bpf_iter_link *link; |
514 | u32 prog_btf_id, linfo_len; |
515 | bpfptr_t ulinfo; |
516 | int err; |
517 | |
518 | if (attr->link_create.target_fd || attr->link_create.flags) |
519 | return -EINVAL; |
520 | |
521 | memset(&linfo, 0, sizeof(union bpf_iter_link_info)); |
522 | |
523 | ulinfo = make_bpfptr(addr: attr->link_create.iter_info, is_kernel: uattr.is_kernel); |
524 | linfo_len = attr->link_create.iter_info_len; |
525 | if (bpfptr_is_null(bpfptr: ulinfo) ^ !linfo_len) |
526 | return -EINVAL; |
527 | |
528 | if (!bpfptr_is_null(bpfptr: ulinfo)) { |
529 | err = bpf_check_uarg_tail_zero(uaddr: ulinfo, expected_size: sizeof(linfo), |
530 | actual_size: linfo_len); |
531 | if (err) |
532 | return err; |
533 | linfo_len = min_t(u32, linfo_len, sizeof(linfo)); |
534 | if (copy_from_bpfptr(dst: &linfo, src: ulinfo, size: linfo_len)) |
535 | return -EFAULT; |
536 | } |
537 | |
538 | prog_btf_id = prog->aux->attach_btf_id; |
539 | mutex_lock(&targets_mutex); |
540 | list_for_each_entry(iter, &targets, list) { |
541 | if (iter->btf_id == prog_btf_id) { |
542 | tinfo = iter; |
543 | break; |
544 | } |
545 | } |
546 | mutex_unlock(lock: &targets_mutex); |
547 | if (!tinfo) |
548 | return -ENOENT; |
549 | |
550 | /* Only allow sleepable program for resched-able iterator */ |
551 | if (prog->sleepable && !bpf_iter_target_support_resched(tinfo)) |
552 | return -EINVAL; |
553 | |
554 | link = kzalloc(size: sizeof(*link), GFP_USER | __GFP_NOWARN); |
555 | if (!link) |
556 | return -ENOMEM; |
557 | |
558 | bpf_link_init(link: &link->link, type: BPF_LINK_TYPE_ITER, ops: &bpf_iter_link_lops, prog); |
559 | link->tinfo = tinfo; |
560 | |
561 | err = bpf_link_prime(link: &link->link, primer: &link_primer); |
562 | if (err) { |
563 | kfree(objp: link); |
564 | return err; |
565 | } |
566 | |
567 | if (tinfo->reg_info->attach_target) { |
568 | err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux); |
569 | if (err) { |
570 | bpf_link_cleanup(primer: &link_primer); |
571 | return err; |
572 | } |
573 | } |
574 | |
575 | return bpf_link_settle(primer: &link_primer); |
576 | } |
577 | |
578 | static void init_seq_meta(struct bpf_iter_priv_data *priv_data, |
579 | struct bpf_iter_target_info *tinfo, |
580 | const struct bpf_iter_seq_info *seq_info, |
581 | struct bpf_prog *prog) |
582 | { |
583 | priv_data->tinfo = tinfo; |
584 | priv_data->seq_info = seq_info; |
585 | priv_data->prog = prog; |
586 | priv_data->session_id = atomic64_inc_return(v: &session_id); |
587 | priv_data->seq_num = 0; |
588 | priv_data->done_stop = false; |
589 | } |
590 | |
591 | static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, |
592 | const struct bpf_iter_seq_info *seq_info) |
593 | { |
594 | struct bpf_iter_priv_data *priv_data; |
595 | struct bpf_iter_target_info *tinfo; |
596 | struct bpf_prog *prog; |
597 | u32 total_priv_dsize; |
598 | struct seq_file *seq; |
599 | int err = 0; |
600 | |
601 | mutex_lock(&link_mutex); |
602 | prog = link->link.prog; |
603 | bpf_prog_inc(prog); |
604 | mutex_unlock(lock: &link_mutex); |
605 | |
606 | tinfo = link->tinfo; |
607 | total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) + |
608 | seq_info->seq_priv_size; |
609 | priv_data = __seq_open_private(file, seq_info->seq_ops, |
610 | total_priv_dsize); |
611 | if (!priv_data) { |
612 | err = -ENOMEM; |
613 | goto release_prog; |
614 | } |
615 | |
616 | if (seq_info->init_seq_private) { |
617 | err = seq_info->init_seq_private(priv_data->target_private, &link->aux); |
618 | if (err) |
619 | goto release_seq_file; |
620 | } |
621 | |
622 | init_seq_meta(priv_data, tinfo, seq_info, prog); |
623 | seq = file->private_data; |
624 | seq->private = priv_data->target_private; |
625 | |
626 | return 0; |
627 | |
628 | release_seq_file: |
629 | seq_release_private(file->f_inode, file); |
630 | file->private_data = NULL; |
631 | release_prog: |
632 | bpf_prog_put(prog); |
633 | return err; |
634 | } |
635 | |
636 | int bpf_iter_new_fd(struct bpf_link *link) |
637 | { |
638 | struct bpf_iter_link *iter_link; |
639 | struct file *file; |
640 | unsigned int flags; |
641 | int err, fd; |
642 | |
643 | if (link->ops != &bpf_iter_link_lops) |
644 | return -EINVAL; |
645 | |
646 | flags = O_RDONLY | O_CLOEXEC; |
647 | fd = get_unused_fd_flags(flags); |
648 | if (fd < 0) |
649 | return fd; |
650 | |
651 | file = anon_inode_getfile(name: "bpf_iter" , fops: &bpf_iter_fops, NULL, flags); |
652 | if (IS_ERR(ptr: file)) { |
653 | err = PTR_ERR(ptr: file); |
654 | goto free_fd; |
655 | } |
656 | |
657 | iter_link = container_of(link, struct bpf_iter_link, link); |
658 | err = prepare_seq_file(file, link: iter_link, seq_info: __get_seq_info(link: iter_link)); |
659 | if (err) |
660 | goto free_file; |
661 | |
662 | fd_install(fd, file); |
663 | return fd; |
664 | |
665 | free_file: |
666 | fput(file); |
667 | free_fd: |
668 | put_unused_fd(fd); |
669 | return err; |
670 | } |
671 | |
672 | struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop) |
673 | { |
674 | struct bpf_iter_priv_data *iter_priv; |
675 | struct seq_file *seq; |
676 | void *seq_priv; |
677 | |
678 | seq = meta->seq; |
679 | if (seq->file->f_op != &bpf_iter_fops) |
680 | return NULL; |
681 | |
682 | seq_priv = seq->private; |
683 | iter_priv = container_of(seq_priv, struct bpf_iter_priv_data, |
684 | target_private); |
685 | |
686 | if (in_stop && iter_priv->done_stop) |
687 | return NULL; |
688 | |
689 | meta->session_id = iter_priv->session_id; |
690 | meta->seq_num = iter_priv->seq_num; |
691 | |
692 | return iter_priv->prog; |
693 | } |
694 | |
695 | int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) |
696 | { |
697 | struct bpf_run_ctx run_ctx, *old_run_ctx; |
698 | int ret; |
699 | |
700 | if (prog->sleepable) { |
701 | rcu_read_lock_trace(); |
702 | migrate_disable(); |
703 | might_fault(); |
704 | old_run_ctx = bpf_set_run_ctx(new_ctx: &run_ctx); |
705 | ret = bpf_prog_run(prog, ctx); |
706 | bpf_reset_run_ctx(old_ctx: old_run_ctx); |
707 | migrate_enable(); |
708 | rcu_read_unlock_trace(); |
709 | } else { |
710 | rcu_read_lock(); |
711 | migrate_disable(); |
712 | old_run_ctx = bpf_set_run_ctx(new_ctx: &run_ctx); |
713 | ret = bpf_prog_run(prog, ctx); |
714 | bpf_reset_run_ctx(old_ctx: old_run_ctx); |
715 | migrate_enable(); |
716 | rcu_read_unlock(); |
717 | } |
718 | |
719 | /* bpf program can only return 0 or 1: |
720 | * 0 : okay |
721 | * 1 : retry the same object |
722 | * The bpf_iter_run_prog() return value |
723 | * will be seq_ops->show() return value. |
724 | */ |
725 | return ret == 0 ? 0 : -EAGAIN; |
726 | } |
727 | |
728 | BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn, |
729 | void *, callback_ctx, u64, flags) |
730 | { |
731 | return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags); |
732 | } |
733 | |
734 | const struct bpf_func_proto bpf_for_each_map_elem_proto = { |
735 | .func = bpf_for_each_map_elem, |
736 | .gpl_only = false, |
737 | .ret_type = RET_INTEGER, |
738 | .arg1_type = ARG_CONST_MAP_PTR, |
739 | .arg2_type = ARG_PTR_TO_FUNC, |
740 | .arg3_type = ARG_PTR_TO_STACK_OR_NULL, |
741 | .arg4_type = ARG_ANYTHING, |
742 | }; |
743 | |
744 | BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx, |
745 | u64, flags) |
746 | { |
747 | bpf_callback_t callback = (bpf_callback_t)callback_fn; |
748 | u64 ret; |
749 | u32 i; |
750 | |
751 | /* Note: these safety checks are also verified when bpf_loop |
752 | * is inlined, be careful to modify this code in sync. See |
753 | * function verifier.c:inline_bpf_loop. |
754 | */ |
755 | if (flags) |
756 | return -EINVAL; |
757 | if (nr_loops > BPF_MAX_LOOPS) |
758 | return -E2BIG; |
759 | |
760 | for (i = 0; i < nr_loops; i++) { |
761 | ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0); |
762 | /* return value: 0 - continue, 1 - stop and return */ |
763 | if (ret) |
764 | return i + 1; |
765 | } |
766 | |
767 | return i; |
768 | } |
769 | |
770 | const struct bpf_func_proto bpf_loop_proto = { |
771 | .func = bpf_loop, |
772 | .gpl_only = false, |
773 | .ret_type = RET_INTEGER, |
774 | .arg1_type = ARG_ANYTHING, |
775 | .arg2_type = ARG_PTR_TO_FUNC, |
776 | .arg3_type = ARG_PTR_TO_STACK_OR_NULL, |
777 | .arg4_type = ARG_ANYTHING, |
778 | }; |
779 | |
780 | struct bpf_iter_num_kern { |
781 | int cur; /* current value, inclusive */ |
782 | int end; /* final value, exclusive */ |
783 | } __aligned(8); |
784 | |
785 | __bpf_kfunc_start_defs(); |
786 | |
787 | __bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) |
788 | { |
789 | struct bpf_iter_num_kern *s = (void *)it; |
790 | |
791 | BUILD_BUG_ON(sizeof(struct bpf_iter_num_kern) != sizeof(struct bpf_iter_num)); |
792 | BUILD_BUG_ON(__alignof__(struct bpf_iter_num_kern) != __alignof__(struct bpf_iter_num)); |
793 | |
794 | /* start == end is legit, it's an empty range and we'll just get NULL |
795 | * on first (and any subsequent) bpf_iter_num_next() call |
796 | */ |
797 | if (start > end) { |
798 | s->cur = s->end = 0; |
799 | return -EINVAL; |
800 | } |
801 | |
802 | /* avoid overflows, e.g., if start == INT_MIN and end == INT_MAX */ |
803 | if ((s64)end - (s64)start > BPF_MAX_LOOPS) { |
804 | s->cur = s->end = 0; |
805 | return -E2BIG; |
806 | } |
807 | |
808 | /* user will call bpf_iter_num_next() first, |
809 | * which will set s->cur to exactly start value; |
810 | * underflow shouldn't matter |
811 | */ |
812 | s->cur = start - 1; |
813 | s->end = end; |
814 | |
815 | return 0; |
816 | } |
817 | |
818 | __bpf_kfunc int *bpf_iter_num_next(struct bpf_iter_num* it) |
819 | { |
820 | struct bpf_iter_num_kern *s = (void *)it; |
821 | |
822 | /* check failed initialization or if we are done (same behavior); |
823 | * need to be careful about overflow, so convert to s64 for checks, |
824 | * e.g., if s->cur == s->end == INT_MAX, we can't just do |
825 | * s->cur + 1 >= s->end |
826 | */ |
827 | if ((s64)(s->cur + 1) >= s->end) { |
828 | s->cur = s->end = 0; |
829 | return NULL; |
830 | } |
831 | |
832 | s->cur++; |
833 | |
834 | return &s->cur; |
835 | } |
836 | |
837 | __bpf_kfunc void bpf_iter_num_destroy(struct bpf_iter_num *it) |
838 | { |
839 | struct bpf_iter_num_kern *s = (void *)it; |
840 | |
841 | s->cur = s->end = 0; |
842 | } |
843 | |
844 | __bpf_kfunc_end_defs(); |
845 | |