1// SPDX-License-Identifier: GPL-2.0-only
2/* Copyright (c) 2019 Facebook */
3#include <linux/hash.h>
4#include <linux/bpf.h>
5#include <linux/filter.h>
6#include <linux/ftrace.h>
7#include <linux/rbtree_latch.h>
8#include <linux/perf_event.h>
9#include <linux/btf.h>
10#include <linux/rcupdate_trace.h>
11#include <linux/rcupdate_wait.h>
12#include <linux/static_call.h>
13#include <linux/bpf_verifier.h>
14#include <linux/bpf_lsm.h>
15#include <linux/delay.h>
16
17/* dummy _ops. The verifier will operate on target program's ops. */
18const struct bpf_verifier_ops bpf_extension_verifier_ops = {
19};
20const struct bpf_prog_ops bpf_extension_prog_ops = {
21};
22
23/* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
24#define TRAMPOLINE_HASH_BITS 10
25#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
26
27static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
28
29/* serializes access to trampoline_table */
30static DEFINE_MUTEX(trampoline_mutex);
31
32#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
33static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
34
35static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd)
36{
37 struct bpf_trampoline *tr = ops->private;
38 int ret = 0;
39
40 if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
41 /* This is called inside register_ftrace_direct_multi(), so
42 * tr->mutex is already locked.
43 */
44 lockdep_assert_held_once(&tr->mutex);
45
46 /* Instead of updating the trampoline here, we propagate
47 * -EAGAIN to register_ftrace_direct(). Then we can
48 * retry register_ftrace_direct() after updating the
49 * trampoline.
50 */
51 if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
52 !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) {
53 if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY))
54 return -EBUSY;
55
56 tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
57 return -EAGAIN;
58 }
59
60 return 0;
61 }
62
63 /* The normal locking order is
64 * tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
65 *
66 * The following two commands are called from
67 *
68 * prepare_direct_functions_for_ipmodify
69 * cleanup_direct_functions_after_ipmodify
70 *
71 * In both cases, direct_mutex is already locked. Use
72 * mutex_trylock(&tr->mutex) to avoid deadlock in race condition
73 * (something else is making changes to this same trampoline).
74 */
75 if (!mutex_trylock(lock: &tr->mutex)) {
76 /* sleep 1 ms to make sure whatever holding tr->mutex makes
77 * some progress.
78 */
79 msleep(msecs: 1);
80 return -EAGAIN;
81 }
82
83 switch (cmd) {
84 case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER:
85 tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
86
87 if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
88 !(tr->flags & BPF_TRAMP_F_ORIG_STACK))
89 ret = bpf_trampoline_update(tr, lock_direct_mutex: false /* lock_direct_mutex */);
90 break;
91 case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER:
92 tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY;
93
94 if (tr->flags & BPF_TRAMP_F_ORIG_STACK)
95 ret = bpf_trampoline_update(tr, lock_direct_mutex: false /* lock_direct_mutex */);
96 break;
97 default:
98 ret = -EINVAL;
99 break;
100 }
101
102 mutex_unlock(lock: &tr->mutex);
103 return ret;
104}
105#endif
106
107bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
108{
109 enum bpf_attach_type eatype = prog->expected_attach_type;
110 enum bpf_prog_type ptype = prog->type;
111
112 return (ptype == BPF_PROG_TYPE_TRACING &&
113 (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT ||
114 eatype == BPF_MODIFY_RETURN)) ||
115 (ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC);
116}
117
118void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym)
119{
120 ksym->start = (unsigned long) data;
121 ksym->end = ksym->start + size;
122 bpf_ksym_add(ksym);
123 perf_event_ksymbol(ksym_type: PERF_RECORD_KSYMBOL_TYPE_BPF, addr: ksym->start,
124 PAGE_SIZE, unregister: false, sym: ksym->name);
125}
126
127void bpf_image_ksym_del(struct bpf_ksym *ksym)
128{
129 bpf_ksym_del(ksym);
130 perf_event_ksymbol(ksym_type: PERF_RECORD_KSYMBOL_TYPE_BPF, addr: ksym->start,
131 PAGE_SIZE, unregister: true, sym: ksym->name);
132}
133
134static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
135{
136 struct bpf_trampoline *tr;
137 struct hlist_head *head;
138 int i;
139
140 mutex_lock(&trampoline_mutex);
141 head = &trampoline_table[hash_64(val: key, TRAMPOLINE_HASH_BITS)];
142 hlist_for_each_entry(tr, head, hlist) {
143 if (tr->key == key) {
144 refcount_inc(r: &tr->refcnt);
145 goto out;
146 }
147 }
148 tr = kzalloc(size: sizeof(*tr), GFP_KERNEL);
149 if (!tr)
150 goto out;
151#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
152 tr->fops = kzalloc(size: sizeof(struct ftrace_ops), GFP_KERNEL);
153 if (!tr->fops) {
154 kfree(objp: tr);
155 tr = NULL;
156 goto out;
157 }
158 tr->fops->private = tr;
159 tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
160#endif
161
162 tr->key = key;
163 INIT_HLIST_NODE(h: &tr->hlist);
164 hlist_add_head(n: &tr->hlist, h: head);
165 refcount_set(r: &tr->refcnt, n: 1);
166 mutex_init(&tr->mutex);
167 for (i = 0; i < BPF_TRAMP_MAX; i++)
168 INIT_HLIST_HEAD(&tr->progs_hlist[i]);
169out:
170 mutex_unlock(lock: &trampoline_mutex);
171 return tr;
172}
173
174static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
175{
176 void *ip = tr->func.addr;
177 int ret;
178
179 if (tr->func.ftrace_managed)
180 ret = unregister_ftrace_direct(ops: tr->fops, addr: (long)old_addr, free_filters: false);
181 else
182 ret = bpf_arch_text_poke(ip, t: BPF_MOD_CALL, addr1: old_addr, NULL);
183
184 return ret;
185}
186
187static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr,
188 bool lock_direct_mutex)
189{
190 void *ip = tr->func.addr;
191 int ret;
192
193 if (tr->func.ftrace_managed) {
194 if (lock_direct_mutex)
195 ret = modify_ftrace_direct(ops: tr->fops, addr: (long)new_addr);
196 else
197 ret = modify_ftrace_direct_nolock(ops: tr->fops, addr: (long)new_addr);
198 } else {
199 ret = bpf_arch_text_poke(ip, t: BPF_MOD_CALL, addr1: old_addr, addr2: new_addr);
200 }
201 return ret;
202}
203
204/* first time registering */
205static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
206{
207 void *ip = tr->func.addr;
208 unsigned long faddr;
209 int ret;
210
211 faddr = ftrace_location(ip: (unsigned long)ip);
212 if (faddr) {
213 if (!tr->fops)
214 return -ENOTSUPP;
215 tr->func.ftrace_managed = true;
216 }
217
218 if (tr->func.ftrace_managed) {
219 ftrace_set_filter_ip(ops: tr->fops, ip: (unsigned long)ip, remove: 0, reset: 1);
220 ret = register_ftrace_direct(ops: tr->fops, addr: (long)new_addr);
221 } else {
222 ret = bpf_arch_text_poke(ip, t: BPF_MOD_CALL, NULL, addr2: new_addr);
223 }
224
225 return ret;
226}
227
228static struct bpf_tramp_links *
229bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg)
230{
231 struct bpf_tramp_link *link;
232 struct bpf_tramp_links *tlinks;
233 struct bpf_tramp_link **links;
234 int kind;
235
236 *total = 0;
237 tlinks = kcalloc(n: BPF_TRAMP_MAX, size: sizeof(*tlinks), GFP_KERNEL);
238 if (!tlinks)
239 return ERR_PTR(error: -ENOMEM);
240
241 for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
242 tlinks[kind].nr_links = tr->progs_cnt[kind];
243 *total += tr->progs_cnt[kind];
244 links = tlinks[kind].links;
245
246 hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
247 *ip_arg |= link->link.prog->call_get_func_ip;
248 *links++ = link;
249 }
250 }
251 return tlinks;
252}
253
254static void bpf_tramp_image_free(struct bpf_tramp_image *im)
255{
256 bpf_image_ksym_del(ksym: &im->ksym);
257 arch_free_bpf_trampoline(image: im->image, size: im->size);
258 bpf_jit_uncharge_modmem(size: im->size);
259 percpu_ref_exit(ref: &im->pcref);
260 kfree_rcu(im, rcu);
261}
262
263static void __bpf_tramp_image_put_deferred(struct work_struct *work)
264{
265 struct bpf_tramp_image *im;
266
267 im = container_of(work, struct bpf_tramp_image, work);
268 bpf_tramp_image_free(im);
269}
270
271/* callback, fexit step 3 or fentry step 2 */
272static void __bpf_tramp_image_put_rcu(struct rcu_head *rcu)
273{
274 struct bpf_tramp_image *im;
275
276 im = container_of(rcu, struct bpf_tramp_image, rcu);
277 INIT_WORK(&im->work, __bpf_tramp_image_put_deferred);
278 schedule_work(work: &im->work);
279}
280
281/* callback, fexit step 2. Called after percpu_ref_kill confirms. */
282static void __bpf_tramp_image_release(struct percpu_ref *pcref)
283{
284 struct bpf_tramp_image *im;
285
286 im = container_of(pcref, struct bpf_tramp_image, pcref);
287 call_rcu_tasks(head: &im->rcu, func: __bpf_tramp_image_put_rcu);
288}
289
290/* callback, fexit or fentry step 1 */
291static void __bpf_tramp_image_put_rcu_tasks(struct rcu_head *rcu)
292{
293 struct bpf_tramp_image *im;
294
295 im = container_of(rcu, struct bpf_tramp_image, rcu);
296 if (im->ip_after_call)
297 /* the case of fmod_ret/fexit trampoline and CONFIG_PREEMPTION=y */
298 percpu_ref_kill(ref: &im->pcref);
299 else
300 /* the case of fentry trampoline */
301 call_rcu_tasks(head: &im->rcu, func: __bpf_tramp_image_put_rcu);
302}
303
304static void bpf_tramp_image_put(struct bpf_tramp_image *im)
305{
306 /* The trampoline image that calls original function is using:
307 * rcu_read_lock_trace to protect sleepable bpf progs
308 * rcu_read_lock to protect normal bpf progs
309 * percpu_ref to protect trampoline itself
310 * rcu tasks to protect trampoline asm not covered by percpu_ref
311 * (which are few asm insns before __bpf_tramp_enter and
312 * after __bpf_tramp_exit)
313 *
314 * The trampoline is unreachable before bpf_tramp_image_put().
315 *
316 * First, patch the trampoline to avoid calling into fexit progs.
317 * The progs will be freed even if the original function is still
318 * executing or sleeping.
319 * In case of CONFIG_PREEMPT=y use call_rcu_tasks() to wait on
320 * first few asm instructions to execute and call into
321 * __bpf_tramp_enter->percpu_ref_get.
322 * Then use percpu_ref_kill to wait for the trampoline and the original
323 * function to finish.
324 * Then use call_rcu_tasks() to make sure few asm insns in
325 * the trampoline epilogue are done as well.
326 *
327 * In !PREEMPT case the task that got interrupted in the first asm
328 * insns won't go through an RCU quiescent state which the
329 * percpu_ref_kill will be waiting for. Hence the first
330 * call_rcu_tasks() is not necessary.
331 */
332 if (im->ip_after_call) {
333 int err = bpf_arch_text_poke(ip: im->ip_after_call, t: BPF_MOD_JUMP,
334 NULL, addr2: im->ip_epilogue);
335 WARN_ON(err);
336 if (IS_ENABLED(CONFIG_PREEMPTION))
337 call_rcu_tasks(head: &im->rcu, func: __bpf_tramp_image_put_rcu_tasks);
338 else
339 percpu_ref_kill(ref: &im->pcref);
340 return;
341 }
342
343 /* The trampoline without fexit and fmod_ret progs doesn't call original
344 * function and doesn't use percpu_ref.
345 * Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
346 * Then use call_rcu_tasks() to wait for the rest of trampoline asm
347 * and normal progs.
348 */
349 call_rcu_tasks_trace(rhp: &im->rcu, func: __bpf_tramp_image_put_rcu_tasks);
350}
351
352static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size)
353{
354 struct bpf_tramp_image *im;
355 struct bpf_ksym *ksym;
356 void *image;
357 int err = -ENOMEM;
358
359 im = kzalloc(size: sizeof(*im), GFP_KERNEL);
360 if (!im)
361 goto out;
362
363 err = bpf_jit_charge_modmem(size);
364 if (err)
365 goto out_free_im;
366 im->size = size;
367
368 err = -ENOMEM;
369 im->image = image = arch_alloc_bpf_trampoline(size);
370 if (!image)
371 goto out_uncharge;
372
373 err = percpu_ref_init(ref: &im->pcref, release: __bpf_tramp_image_release, flags: 0, GFP_KERNEL);
374 if (err)
375 goto out_free_image;
376
377 ksym = &im->ksym;
378 INIT_LIST_HEAD_RCU(list: &ksym->lnode);
379 snprintf(buf: ksym->name, KSYM_NAME_LEN, fmt: "bpf_trampoline_%llu", key);
380 bpf_image_ksym_add(data: image, size, ksym);
381 return im;
382
383out_free_image:
384 arch_free_bpf_trampoline(image: im->image, size: im->size);
385out_uncharge:
386 bpf_jit_uncharge_modmem(size);
387out_free_im:
388 kfree(objp: im);
389out:
390 return ERR_PTR(error: err);
391}
392
393static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex)
394{
395 struct bpf_tramp_image *im;
396 struct bpf_tramp_links *tlinks;
397 u32 orig_flags = tr->flags;
398 bool ip_arg = false;
399 int err, total, size;
400
401 tlinks = bpf_trampoline_get_progs(tr, total: &total, ip_arg: &ip_arg);
402 if (IS_ERR(ptr: tlinks))
403 return PTR_ERR(ptr: tlinks);
404
405 if (total == 0) {
406 err = unregister_fentry(tr, old_addr: tr->cur_image->image);
407 bpf_tramp_image_put(im: tr->cur_image);
408 tr->cur_image = NULL;
409 goto out;
410 }
411
412 /* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */
413 tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX);
414
415 if (tlinks[BPF_TRAMP_FEXIT].nr_links ||
416 tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) {
417 /* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME
418 * should not be set together.
419 */
420 tr->flags |= BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
421 } else {
422 tr->flags |= BPF_TRAMP_F_RESTORE_REGS;
423 }
424
425 if (ip_arg)
426 tr->flags |= BPF_TRAMP_F_IP_ARG;
427
428#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
429again:
430 if ((tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) &&
431 (tr->flags & BPF_TRAMP_F_CALL_ORIG))
432 tr->flags |= BPF_TRAMP_F_ORIG_STACK;
433#endif
434
435 size = arch_bpf_trampoline_size(m: &tr->func.model, flags: tr->flags,
436 tlinks, func_addr: tr->func.addr);
437 if (size < 0) {
438 err = size;
439 goto out;
440 }
441
442 if (size > PAGE_SIZE) {
443 err = -E2BIG;
444 goto out;
445 }
446
447 im = bpf_tramp_image_alloc(key: tr->key, size);
448 if (IS_ERR(ptr: im)) {
449 err = PTR_ERR(ptr: im);
450 goto out;
451 }
452
453 err = arch_prepare_bpf_trampoline(im, image: im->image, image_end: im->image + size,
454 m: &tr->func.model, flags: tr->flags, tlinks,
455 func_addr: tr->func.addr);
456 if (err < 0)
457 goto out_free;
458
459 arch_protect_bpf_trampoline(image: im->image, size: im->size);
460
461 WARN_ON(tr->cur_image && total == 0);
462 if (tr->cur_image)
463 /* progs already running at this address */
464 err = modify_fentry(tr, old_addr: tr->cur_image->image, new_addr: im->image, lock_direct_mutex);
465 else
466 /* first time registering */
467 err = register_fentry(tr, new_addr: im->image);
468
469#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
470 if (err == -EAGAIN) {
471 /* -EAGAIN from bpf_tramp_ftrace_ops_func. Now
472 * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the
473 * trampoline again, and retry register.
474 */
475 /* reset fops->func and fops->trampoline for re-register */
476 tr->fops->func = NULL;
477 tr->fops->trampoline = 0;
478
479 /* free im memory and reallocate later */
480 bpf_tramp_image_free(im);
481 goto again;
482 }
483#endif
484 if (err)
485 goto out_free;
486
487 if (tr->cur_image)
488 bpf_tramp_image_put(im: tr->cur_image);
489 tr->cur_image = im;
490out:
491 /* If any error happens, restore previous flags */
492 if (err)
493 tr->flags = orig_flags;
494 kfree(objp: tlinks);
495 return err;
496
497out_free:
498 bpf_tramp_image_free(im);
499 goto out;
500}
501
502static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
503{
504 switch (prog->expected_attach_type) {
505 case BPF_TRACE_FENTRY:
506 return BPF_TRAMP_FENTRY;
507 case BPF_MODIFY_RETURN:
508 return BPF_TRAMP_MODIFY_RETURN;
509 case BPF_TRACE_FEXIT:
510 return BPF_TRAMP_FEXIT;
511 case BPF_LSM_MAC:
512 if (!prog->aux->attach_func_proto->type)
513 /* The function returns void, we cannot modify its
514 * return value.
515 */
516 return BPF_TRAMP_FEXIT;
517 else
518 return BPF_TRAMP_MODIFY_RETURN;
519 default:
520 return BPF_TRAMP_REPLACE;
521 }
522}
523
524static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
525{
526 enum bpf_tramp_prog_type kind;
527 struct bpf_tramp_link *link_exiting;
528 int err = 0;
529 int cnt = 0, i;
530
531 kind = bpf_attach_type_to_tramp(prog: link->link.prog);
532 if (tr->extension_prog)
533 /* cannot attach fentry/fexit if extension prog is attached.
534 * cannot overwrite extension prog either.
535 */
536 return -EBUSY;
537
538 for (i = 0; i < BPF_TRAMP_MAX; i++)
539 cnt += tr->progs_cnt[i];
540
541 if (kind == BPF_TRAMP_REPLACE) {
542 /* Cannot attach extension if fentry/fexit are in use. */
543 if (cnt)
544 return -EBUSY;
545 tr->extension_prog = link->link.prog;
546 return bpf_arch_text_poke(ip: tr->func.addr, t: BPF_MOD_JUMP, NULL,
547 addr2: link->link.prog->bpf_func);
548 }
549 if (cnt >= BPF_MAX_TRAMP_LINKS)
550 return -E2BIG;
551 if (!hlist_unhashed(h: &link->tramp_hlist))
552 /* prog already linked */
553 return -EBUSY;
554 hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) {
555 if (link_exiting->link.prog != link->link.prog)
556 continue;
557 /* prog already linked */
558 return -EBUSY;
559 }
560
561 hlist_add_head(n: &link->tramp_hlist, h: &tr->progs_hlist[kind]);
562 tr->progs_cnt[kind]++;
563 err = bpf_trampoline_update(tr, lock_direct_mutex: true /* lock_direct_mutex */);
564 if (err) {
565 hlist_del_init(n: &link->tramp_hlist);
566 tr->progs_cnt[kind]--;
567 }
568 return err;
569}
570
571int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
572{
573 int err;
574
575 mutex_lock(&tr->mutex);
576 err = __bpf_trampoline_link_prog(link, tr);
577 mutex_unlock(lock: &tr->mutex);
578 return err;
579}
580
581static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
582{
583 enum bpf_tramp_prog_type kind;
584 int err;
585
586 kind = bpf_attach_type_to_tramp(prog: link->link.prog);
587 if (kind == BPF_TRAMP_REPLACE) {
588 WARN_ON_ONCE(!tr->extension_prog);
589 err = bpf_arch_text_poke(ip: tr->func.addr, t: BPF_MOD_JUMP,
590 addr1: tr->extension_prog->bpf_func, NULL);
591 tr->extension_prog = NULL;
592 return err;
593 }
594 hlist_del_init(n: &link->tramp_hlist);
595 tr->progs_cnt[kind]--;
596 return bpf_trampoline_update(tr, lock_direct_mutex: true /* lock_direct_mutex */);
597}
598
599/* bpf_trampoline_unlink_prog() should never fail. */
600int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
601{
602 int err;
603
604 mutex_lock(&tr->mutex);
605 err = __bpf_trampoline_unlink_prog(link, tr);
606 mutex_unlock(lock: &tr->mutex);
607 return err;
608}
609
610#if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
611static void bpf_shim_tramp_link_release(struct bpf_link *link)
612{
613 struct bpf_shim_tramp_link *shim_link =
614 container_of(link, struct bpf_shim_tramp_link, link.link);
615
616 /* paired with 'shim_link->trampoline = tr' in bpf_trampoline_link_cgroup_shim */
617 if (!shim_link->trampoline)
618 return;
619
620 WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline));
621 bpf_trampoline_put(tr: shim_link->trampoline);
622}
623
624static void bpf_shim_tramp_link_dealloc(struct bpf_link *link)
625{
626 struct bpf_shim_tramp_link *shim_link =
627 container_of(link, struct bpf_shim_tramp_link, link.link);
628
629 kfree(objp: shim_link);
630}
631
632static const struct bpf_link_ops bpf_shim_tramp_link_lops = {
633 .release = bpf_shim_tramp_link_release,
634 .dealloc = bpf_shim_tramp_link_dealloc,
635};
636
637static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog,
638 bpf_func_t bpf_func,
639 int cgroup_atype)
640{
641 struct bpf_shim_tramp_link *shim_link = NULL;
642 struct bpf_prog *p;
643
644 shim_link = kzalloc(size: sizeof(*shim_link), GFP_USER);
645 if (!shim_link)
646 return NULL;
647
648 p = bpf_prog_alloc(size: 1, gfp_extra_flags: 0);
649 if (!p) {
650 kfree(objp: shim_link);
651 return NULL;
652 }
653
654 p->jited = false;
655 p->bpf_func = bpf_func;
656
657 p->aux->cgroup_atype = cgroup_atype;
658 p->aux->attach_func_proto = prog->aux->attach_func_proto;
659 p->aux->attach_btf_id = prog->aux->attach_btf_id;
660 p->aux->attach_btf = prog->aux->attach_btf;
661 btf_get(btf: p->aux->attach_btf);
662 p->type = BPF_PROG_TYPE_LSM;
663 p->expected_attach_type = BPF_LSM_MAC;
664 bpf_prog_inc(prog: p);
665 bpf_link_init(link: &shim_link->link.link, type: BPF_LINK_TYPE_UNSPEC,
666 ops: &bpf_shim_tramp_link_lops, prog: p);
667 bpf_cgroup_atype_get(attach_btf_id: p->aux->attach_btf_id, cgroup_atype);
668
669 return shim_link;
670}
671
672static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr,
673 bpf_func_t bpf_func)
674{
675 struct bpf_tramp_link *link;
676 int kind;
677
678 for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
679 hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
680 struct bpf_prog *p = link->link.prog;
681
682 if (p->bpf_func == bpf_func)
683 return container_of(link, struct bpf_shim_tramp_link, link);
684 }
685 }
686
687 return NULL;
688}
689
690int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
691 int cgroup_atype)
692{
693 struct bpf_shim_tramp_link *shim_link = NULL;
694 struct bpf_attach_target_info tgt_info = {};
695 struct bpf_trampoline *tr;
696 bpf_func_t bpf_func;
697 u64 key;
698 int err;
699
700 err = bpf_check_attach_target(NULL, prog, NULL,
701 btf_id: prog->aux->attach_btf_id,
702 tgt_info: &tgt_info);
703 if (err)
704 return err;
705
706 key = bpf_trampoline_compute_key(NULL, btf: prog->aux->attach_btf,
707 btf_id: prog->aux->attach_btf_id);
708
709 bpf_lsm_find_cgroup_shim(prog, bpf_func: &bpf_func);
710 tr = bpf_trampoline_get(key, tgt_info: &tgt_info);
711 if (!tr)
712 return -ENOMEM;
713
714 mutex_lock(&tr->mutex);
715
716 shim_link = cgroup_shim_find(tr, bpf_func);
717 if (shim_link) {
718 /* Reusing existing shim attached by the other program. */
719 bpf_link_inc(link: &shim_link->link.link);
720
721 mutex_unlock(lock: &tr->mutex);
722 bpf_trampoline_put(tr); /* bpf_trampoline_get above */
723 return 0;
724 }
725
726 /* Allocate and install new shim. */
727
728 shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype);
729 if (!shim_link) {
730 err = -ENOMEM;
731 goto err;
732 }
733
734 err = __bpf_trampoline_link_prog(link: &shim_link->link, tr);
735 if (err)
736 goto err;
737
738 shim_link->trampoline = tr;
739 /* note, we're still holding tr refcnt from above */
740
741 mutex_unlock(lock: &tr->mutex);
742
743 return 0;
744err:
745 mutex_unlock(lock: &tr->mutex);
746
747 if (shim_link)
748 bpf_link_put(link: &shim_link->link.link);
749
750 /* have to release tr while _not_ holding its mutex */
751 bpf_trampoline_put(tr); /* bpf_trampoline_get above */
752
753 return err;
754}
755
756void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
757{
758 struct bpf_shim_tramp_link *shim_link = NULL;
759 struct bpf_trampoline *tr;
760 bpf_func_t bpf_func;
761 u64 key;
762
763 key = bpf_trampoline_compute_key(NULL, btf: prog->aux->attach_btf,
764 btf_id: prog->aux->attach_btf_id);
765
766 bpf_lsm_find_cgroup_shim(prog, bpf_func: &bpf_func);
767 tr = bpf_trampoline_lookup(key);
768 if (WARN_ON_ONCE(!tr))
769 return;
770
771 mutex_lock(&tr->mutex);
772 shim_link = cgroup_shim_find(tr, bpf_func);
773 mutex_unlock(lock: &tr->mutex);
774
775 if (shim_link)
776 bpf_link_put(link: &shim_link->link.link);
777
778 bpf_trampoline_put(tr); /* bpf_trampoline_lookup above */
779}
780#endif
781
782struct bpf_trampoline *bpf_trampoline_get(u64 key,
783 struct bpf_attach_target_info *tgt_info)
784{
785 struct bpf_trampoline *tr;
786
787 tr = bpf_trampoline_lookup(key);
788 if (!tr)
789 return NULL;
790
791 mutex_lock(&tr->mutex);
792 if (tr->func.addr)
793 goto out;
794
795 memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel));
796 tr->func.addr = (void *)tgt_info->tgt_addr;
797out:
798 mutex_unlock(lock: &tr->mutex);
799 return tr;
800}
801
802void bpf_trampoline_put(struct bpf_trampoline *tr)
803{
804 int i;
805
806 if (!tr)
807 return;
808 mutex_lock(&trampoline_mutex);
809 if (!refcount_dec_and_test(r: &tr->refcnt))
810 goto out;
811 WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
812
813 for (i = 0; i < BPF_TRAMP_MAX; i++)
814 if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i])))
815 goto out;
816
817 /* This code will be executed even when the last bpf_tramp_image
818 * is alive. All progs are detached from the trampoline and the
819 * trampoline image is patched with jmp into epilogue to skip
820 * fexit progs. The fentry-only trampoline will be freed via
821 * multiple rcu callbacks.
822 */
823 hlist_del(n: &tr->hlist);
824 if (tr->fops) {
825 ftrace_free_filter(ops: tr->fops);
826 kfree(objp: tr->fops);
827 }
828 kfree(objp: tr);
829out:
830 mutex_unlock(lock: &trampoline_mutex);
831}
832
833#define NO_START_TIME 1
834static __always_inline u64 notrace bpf_prog_start_time(void)
835{
836 u64 start = NO_START_TIME;
837
838 if (static_branch_unlikely(&bpf_stats_enabled_key)) {
839 start = sched_clock();
840 if (unlikely(!start))
841 start = NO_START_TIME;
842 }
843 return start;
844}
845
846/* The logic is similar to bpf_prog_run(), but with an explicit
847 * rcu_read_lock() and migrate_disable() which are required
848 * for the trampoline. The macro is split into
849 * call __bpf_prog_enter
850 * call prog->bpf_func
851 * call __bpf_prog_exit
852 *
853 * __bpf_prog_enter returns:
854 * 0 - skip execution of the bpf prog
855 * 1 - execute bpf prog
856 * [2..MAX_U64] - execute bpf prog and record execution time.
857 * This is start time.
858 */
859static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
860 __acquires(RCU)
861{
862 rcu_read_lock();
863 migrate_disable();
864
865 run_ctx->saved_run_ctx = bpf_set_run_ctx(new_ctx: &run_ctx->run_ctx);
866
867 if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
868 bpf_prog_inc_misses_counter(prog);
869 return 0;
870 }
871 return bpf_prog_start_time();
872}
873
874static void notrace update_prog_stats(struct bpf_prog *prog,
875 u64 start)
876{
877 struct bpf_prog_stats *stats;
878
879 if (static_branch_unlikely(&bpf_stats_enabled_key) &&
880 /* static_key could be enabled in __bpf_prog_enter*
881 * and disabled in __bpf_prog_exit*.
882 * And vice versa.
883 * Hence check that 'start' is valid.
884 */
885 start > NO_START_TIME) {
886 unsigned long flags;
887
888 stats = this_cpu_ptr(prog->stats);
889 flags = u64_stats_update_begin_irqsave(syncp: &stats->syncp);
890 u64_stats_inc(p: &stats->cnt);
891 u64_stats_add(p: &stats->nsecs, val: sched_clock() - start);
892 u64_stats_update_end_irqrestore(syncp: &stats->syncp, flags);
893 }
894}
895
896static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start,
897 struct bpf_tramp_run_ctx *run_ctx)
898 __releases(RCU)
899{
900 bpf_reset_run_ctx(old_ctx: run_ctx->saved_run_ctx);
901
902 update_prog_stats(prog, start);
903 this_cpu_dec(*(prog->active));
904 migrate_enable();
905 rcu_read_unlock();
906}
907
908static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
909 struct bpf_tramp_run_ctx *run_ctx)
910 __acquires(RCU)
911{
912 /* Runtime stats are exported via actual BPF_LSM_CGROUP
913 * programs, not the shims.
914 */
915 rcu_read_lock();
916 migrate_disable();
917
918 run_ctx->saved_run_ctx = bpf_set_run_ctx(new_ctx: &run_ctx->run_ctx);
919
920 return NO_START_TIME;
921}
922
923static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
924 struct bpf_tramp_run_ctx *run_ctx)
925 __releases(RCU)
926{
927 bpf_reset_run_ctx(old_ctx: run_ctx->saved_run_ctx);
928
929 migrate_enable();
930 rcu_read_unlock();
931}
932
933u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
934 struct bpf_tramp_run_ctx *run_ctx)
935{
936 rcu_read_lock_trace();
937 migrate_disable();
938 might_fault();
939
940 run_ctx->saved_run_ctx = bpf_set_run_ctx(new_ctx: &run_ctx->run_ctx);
941
942 if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
943 bpf_prog_inc_misses_counter(prog);
944 return 0;
945 }
946 return bpf_prog_start_time();
947}
948
949void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
950 struct bpf_tramp_run_ctx *run_ctx)
951{
952 bpf_reset_run_ctx(old_ctx: run_ctx->saved_run_ctx);
953
954 update_prog_stats(prog, start);
955 this_cpu_dec(*(prog->active));
956 migrate_enable();
957 rcu_read_unlock_trace();
958}
959
960static u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog,
961 struct bpf_tramp_run_ctx *run_ctx)
962{
963 rcu_read_lock_trace();
964 migrate_disable();
965 might_fault();
966
967 run_ctx->saved_run_ctx = bpf_set_run_ctx(new_ctx: &run_ctx->run_ctx);
968
969 return bpf_prog_start_time();
970}
971
972static void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
973 struct bpf_tramp_run_ctx *run_ctx)
974{
975 bpf_reset_run_ctx(old_ctx: run_ctx->saved_run_ctx);
976
977 update_prog_stats(prog, start);
978 migrate_enable();
979 rcu_read_unlock_trace();
980}
981
982static u64 notrace __bpf_prog_enter(struct bpf_prog *prog,
983 struct bpf_tramp_run_ctx *run_ctx)
984 __acquires(RCU)
985{
986 rcu_read_lock();
987 migrate_disable();
988
989 run_ctx->saved_run_ctx = bpf_set_run_ctx(new_ctx: &run_ctx->run_ctx);
990
991 return bpf_prog_start_time();
992}
993
994static void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start,
995 struct bpf_tramp_run_ctx *run_ctx)
996 __releases(RCU)
997{
998 bpf_reset_run_ctx(old_ctx: run_ctx->saved_run_ctx);
999
1000 update_prog_stats(prog, start);
1001 migrate_enable();
1002 rcu_read_unlock();
1003}
1004
1005void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr)
1006{
1007 percpu_ref_get(ref: &tr->pcref);
1008}
1009
1010void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
1011{
1012 percpu_ref_put(ref: &tr->pcref);
1013}
1014
1015bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog)
1016{
1017 bool sleepable = prog->sleepable;
1018
1019 if (bpf_prog_check_recur(prog))
1020 return sleepable ? __bpf_prog_enter_sleepable_recur :
1021 __bpf_prog_enter_recur;
1022
1023 if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
1024 prog->expected_attach_type == BPF_LSM_CGROUP)
1025 return __bpf_prog_enter_lsm_cgroup;
1026
1027 return sleepable ? __bpf_prog_enter_sleepable : __bpf_prog_enter;
1028}
1029
1030bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog)
1031{
1032 bool sleepable = prog->sleepable;
1033
1034 if (bpf_prog_check_recur(prog))
1035 return sleepable ? __bpf_prog_exit_sleepable_recur :
1036 __bpf_prog_exit_recur;
1037
1038 if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
1039 prog->expected_attach_type == BPF_LSM_CGROUP)
1040 return __bpf_prog_exit_lsm_cgroup;
1041
1042 return sleepable ? __bpf_prog_exit_sleepable : __bpf_prog_exit;
1043}
1044
1045int __weak
1046arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
1047 const struct btf_func_model *m, u32 flags,
1048 struct bpf_tramp_links *tlinks,
1049 void *func_addr)
1050{
1051 return -ENOTSUPP;
1052}
1053
1054void * __weak arch_alloc_bpf_trampoline(unsigned int size)
1055{
1056 void *image;
1057
1058 if (WARN_ON_ONCE(size > PAGE_SIZE))
1059 return NULL;
1060 image = bpf_jit_alloc_exec(PAGE_SIZE);
1061 if (image)
1062 set_vm_flush_reset_perms(image);
1063 return image;
1064}
1065
1066void __weak arch_free_bpf_trampoline(void *image, unsigned int size)
1067{
1068 WARN_ON_ONCE(size > PAGE_SIZE);
1069 /* bpf_jit_free_exec doesn't need "size", but
1070 * bpf_prog_pack_free() needs it.
1071 */
1072 bpf_jit_free_exec(addr: image);
1073}
1074
1075void __weak arch_protect_bpf_trampoline(void *image, unsigned int size)
1076{
1077 WARN_ON_ONCE(size > PAGE_SIZE);
1078 set_memory_rox(addr: (long)image, numpages: 1);
1079}
1080
1081void __weak arch_unprotect_bpf_trampoline(void *image, unsigned int size)
1082{
1083 WARN_ON_ONCE(size > PAGE_SIZE);
1084 set_memory_nx(addr: (long)image, numpages: 1);
1085 set_memory_rw(addr: (long)image, numpages: 1);
1086}
1087
1088int __weak arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
1089 struct bpf_tramp_links *tlinks, void *func_addr)
1090{
1091 return -ENOTSUPP;
1092}
1093
1094static int __init init_trampolines(void)
1095{
1096 int i;
1097
1098 for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
1099 INIT_HLIST_HEAD(&trampoline_table[i]);
1100 return 0;
1101}
1102late_initcall(init_trampolines);
1103

source code of linux/kernel/bpf/trampoline.c