helpers.c source code [linux/kernel/bpf/helpers.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/ Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com*
3	*/
4	#include <linux/bpf.h>
5	#include <linux/btf.h>
6	#include <linux/bpf-cgroup.h>
7	#include <linux/cgroup.h>
8	#include <linux/rcupdate.h>
9	#include <linux/random.h>
10	#include <linux/smp.h>
11	#include <linux/topology.h>
12	#include <linux/ktime.h>
13	#include <linux/sched.h>
14	#include <linux/uidgid.h>
15	#include <linux/filter.h>
16	#include <linux/ctype.h>
17	#include <linux/jiffies.h>
18	#include <linux/pid_namespace.h>
19	#include <linux/poison.h>
20	#include <linux/proc_ns.h>
21	#include <linux/sched/task.h>
22	#include <linux/security.h>
23	#include <linux/btf_ids.h>
24	#include <linux/bpf_mem_alloc.h>
25	#include <linux/kasan.h>
26
27	#include "../../lib/kstrtox.h"
28
29	/ If kernel subsystem is allowing eBPF programs to call this function,*
30	* inside its own verifier_ops->get_func_proto() callback it should return
31	* bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
32	*
33	* Different map implementations will rely on rcu in map methods
34	* lookup/update/delete, therefore eBPF programs must run under rcu lock
35	* if program is allowed to access maps, so check rcu_read_lock_held in
36	* all three functions.
37	*/
38	BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map , map, void* *, key)
39	{
40	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
41	return (unsigned long) map->ops->map_lookup_elem(map, key);
42	}
43
44	const struct bpf_func_proto bpf_map_lookup_elem_proto = {
45	.func = bpf_map_lookup_elem,
46	.gpl_only = false,
47	.pkt_access = true,
48	.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
49	.arg1_type = ARG_CONST_MAP_PTR,
50	.arg2_type = ARG_PTR_TO_MAP_KEY,
51	};
52
53	BPF_CALL_4(bpf_map_update_elem, struct bpf_map , map, void* *, key,
54	void *, value, u64, flags)
55	{
56	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
57	return map->ops->map_update_elem(map, key, value, flags);
58	}
59
60	const struct bpf_func_proto bpf_map_update_elem_proto = {
61	.func = bpf_map_update_elem,
62	.gpl_only = false,
63	.pkt_access = true,
64	.ret_type = RET_INTEGER,
65	.arg1_type = ARG_CONST_MAP_PTR,
66	.arg2_type = ARG_PTR_TO_MAP_KEY,
67	.arg3_type = ARG_PTR_TO_MAP_VALUE,
68	.arg4_type = ARG_ANYTHING,
69	};
70
71	BPF_CALL_2(bpf_map_delete_elem, struct bpf_map , map, void* *, key)
72	{
73	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
74	return map->ops->map_delete_elem(map, key);
75	}
76
77	const struct bpf_func_proto bpf_map_delete_elem_proto = {
78	.func = bpf_map_delete_elem,
79	.gpl_only = false,
80	.pkt_access = true,
81	.ret_type = RET_INTEGER,
82	.arg1_type = ARG_CONST_MAP_PTR,
83	.arg2_type = ARG_PTR_TO_MAP_KEY,
84	};
85
86	BPF_CALL_3(bpf_map_push_elem, struct bpf_map , map, void* *, value, u64, flags)
87	{
88	return map->ops->map_push_elem(map, value, flags);
89	}
90
91	const struct bpf_func_proto bpf_map_push_elem_proto = {
92	.func = bpf_map_push_elem,
93	.gpl_only = false,
94	.pkt_access = true,
95	.ret_type = RET_INTEGER,
96	.arg1_type = ARG_CONST_MAP_PTR,
97	.arg2_type = ARG_PTR_TO_MAP_VALUE,
98	.arg3_type = ARG_ANYTHING,
99	};
100
101	BPF_CALL_2(bpf_map_pop_elem, struct bpf_map , map, void* *, value)
102	{
103	return map->ops->map_pop_elem(map, value);
104	}
105
106	const struct bpf_func_proto bpf_map_pop_elem_proto = {
107	.func = bpf_map_pop_elem,
108	.gpl_only = false,
109	.ret_type = RET_INTEGER,
110	.arg1_type = ARG_CONST_MAP_PTR,
111	.arg2_type = ARG_PTR_TO_MAP_VALUE \| MEM_UNINIT,
112	};
113
114	BPF_CALL_2(bpf_map_peek_elem, struct bpf_map , map, void* *, value)
115	{
116	return map->ops->map_peek_elem(map, value);
117	}
118
119	const struct bpf_func_proto bpf_map_peek_elem_proto = {
120	.func = bpf_map_peek_elem,
121	.gpl_only = false,
122	.ret_type = RET_INTEGER,
123	.arg1_type = ARG_CONST_MAP_PTR,
124	.arg2_type = ARG_PTR_TO_MAP_VALUE \| MEM_UNINIT,
125	};
126
127	BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map , map, void* *, key, u32, cpu)
128	{
129	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
130	return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu);
131	}
132
133	const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto = {
134	.func = bpf_map_lookup_percpu_elem,
135	.gpl_only = false,
136	.pkt_access = true,
137	.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
138	.arg1_type = ARG_CONST_MAP_PTR,
139	.arg2_type = ARG_PTR_TO_MAP_KEY,
140	.arg3_type = ARG_ANYTHING,
141	};
142
143	const struct bpf_func_proto bpf_get_prandom_u32_proto = {
144	.func = bpf_user_rnd_u32,
145	.gpl_only = false,
146	.ret_type = RET_INTEGER,
147	};
148
149	BPF_CALL_0(bpf_get_smp_processor_id)
150	{
151	return smp_processor_id();
152	}
153
154	const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
155	.func = bpf_get_smp_processor_id,
156	.gpl_only = false,
157	.ret_type = RET_INTEGER,
158	};
159
160	BPF_CALL_0(bpf_get_numa_node_id)
161	{
162	return numa_node_id();
163	}
164
165	const struct bpf_func_proto bpf_get_numa_node_id_proto = {
166	.func = bpf_get_numa_node_id,
167	.gpl_only = false,
168	.ret_type = RET_INTEGER,
169	};
170
171	BPF_CALL_0(bpf_ktime_get_ns)
172	{
173	/ NMI safe access to clock monotonic /
174	return ktime_get_mono_fast_ns();
175	}
176
177	const struct bpf_func_proto bpf_ktime_get_ns_proto = {
178	.func = bpf_ktime_get_ns,
179	.gpl_only = false,
180	.ret_type = RET_INTEGER,
181	};
182
183	BPF_CALL_0(bpf_ktime_get_boot_ns)
184	{
185	/ NMI safe access to clock boottime /
186	return ktime_get_boot_fast_ns();
187	}
188
189	const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
190	.func = bpf_ktime_get_boot_ns,
191	.gpl_only = false,
192	.ret_type = RET_INTEGER,
193	};
194
195	BPF_CALL_0(bpf_ktime_get_coarse_ns)
196	{
197	return ktime_get_coarse_ns();
198	}
199
200	const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = {
201	.func = bpf_ktime_get_coarse_ns,
202	.gpl_only = false,
203	.ret_type = RET_INTEGER,
204	};
205
206	BPF_CALL_0(bpf_ktime_get_tai_ns)
207	{
208	/ NMI safe access to clock tai /
209	return ktime_get_tai_fast_ns();
210	}
211
212	const struct bpf_func_proto bpf_ktime_get_tai_ns_proto = {
213	.func = bpf_ktime_get_tai_ns,
214	.gpl_only = false,
215	.ret_type = RET_INTEGER,
216	};
217
218	BPF_CALL_0(bpf_get_current_pid_tgid)
219	{
220	struct task_struct *task = current;
221
222	if (unlikely(!task))
223	return -EINVAL;
224
225	return (u64) task->tgid << `32` \| task->pid;
226	}
227
228	const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
229	.func = bpf_get_current_pid_tgid,
230	.gpl_only = false,
231	.ret_type = RET_INTEGER,
232	};
233
234	BPF_CALL_0(bpf_get_current_uid_gid)
235	{
236	struct task_struct *task = current;
237	kuid_t uid;
238	kgid_t gid;
239
240	if (unlikely(!task))
241	return -EINVAL;
242
243	current_uid_gid(&uid, &gid);
244	return (u64) from_kgid(to: &init_user_ns, gid) << `32` \|
245	from_kuid(to: &init_user_ns, uid);
246	}
247
248	const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
249	.func = bpf_get_current_uid_gid,
250	.gpl_only = false,
251	.ret_type = RET_INTEGER,
252	};
253
254	BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
255	{
256	struct task_struct *task = current;
257
258	if (unlikely(!task))
259	goto err_clear;
260
261	/ Verifier guarantees that size > 0 /
262	strscpy_pad(dest: buf, src: task->comm, count: size);
263	return `0`;
264	err_clear:
265	memset(buf, `0`, size);
266	return -EINVAL;
267	}
268
269	const struct bpf_func_proto bpf_get_current_comm_proto = {
270	.func = bpf_get_current_comm,
271	.gpl_only = false,
272	.ret_type = RET_INTEGER,
273	.arg1_type = ARG_PTR_TO_UNINIT_MEM,
274	.arg2_type = ARG_CONST_SIZE,
275	};
276
277	#if defined(CONFIG_QUEUED_SPINLOCKS) \|\| defined(CONFIG_BPF_ARCH_SPINLOCK)
278
279	static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
280	{
281	arch_spinlock_t l = (void* *)lock;
282	union {
283	__u32 val;
284	arch_spinlock_t lock;
285	} u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
286
287	compiletime_assert(u.val == `0`, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
288	BUILD_BUG_ON(sizeof(l) != sizeof*(__u32));
289	BUILD_BUG_ON(sizeof(lock) != sizeof*(__u32));
290	preempt_disable();
291	arch_spin_lock(l);
292	}
293
294	static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
295	{
296	arch_spinlock_t l = (void* *)lock;
297
298	arch_spin_unlock(l);
299	preempt_enable();
300	}
301
302	#else
303
304	static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
305	{
306	atomic_t l = (void* *)lock;
307
308	BUILD_BUG_ON(sizeof(l) != sizeof(lock));
309	do {
310	atomic_cond_read_relaxed(l, !VAL);
311	} while (atomic_xchg(l, `1`));
312	}
313
314	static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
315	{
316	atomic_t l = (void* *)lock;
317
318	atomic_set_release(l, `0`);
319	}
320
321	#endif
322
323	static DEFINE_PER_CPU(unsigned long, irqsave_flags);
324
325	static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
326	{
327	unsigned long flags;
328
329	local_irq_save(flags);
330	__bpf_spin_lock(lock);
331	__this_cpu_write(irqsave_flags, flags);
332	}
333
334	notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
335	{
336	__bpf_spin_lock_irqsave(lock);
337	return `0`;
338	}
339
340	const struct bpf_func_proto bpf_spin_lock_proto = {
341	.func = bpf_spin_lock,
342	.gpl_only = false,
343	.ret_type = RET_VOID,
344	.arg1_type = ARG_PTR_TO_SPIN_LOCK,
345	.arg1_btf_id = BPF_PTR_POISON,
346	};
347
348	static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
349	{
350	unsigned long flags;
351
352	flags = __this_cpu_read(irqsave_flags);
353	__bpf_spin_unlock(lock);
354	local_irq_restore(flags);
355	}
356
357	notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
358	{
359	__bpf_spin_unlock_irqrestore(lock);
360	return `0`;
361	}
362
363	const struct bpf_func_proto bpf_spin_unlock_proto = {
364	.func = bpf_spin_unlock,
365	.gpl_only = false,
366	.ret_type = RET_VOID,
367	.arg1_type = ARG_PTR_TO_SPIN_LOCK,
368	.arg1_btf_id = BPF_PTR_POISON,
369	};
370
371	void copy_map_value_locked(struct bpf_map map, void* dst, void* *src,
372	bool lock_src)
373	{
374	struct bpf_spin_lock *lock;
375
376	if (lock_src)
377	lock = src + map->record->spin_lock_off;
378	else
379	lock = dst + map->record->spin_lock_off;
380	preempt_disable();
381	__bpf_spin_lock_irqsave(lock);
382	copy_map_value(map, dst, src);
383	__bpf_spin_unlock_irqrestore(lock);
384	preempt_enable();
385	}
386
387	BPF_CALL_0(bpf_jiffies64)
388	{
389	return get_jiffies_64();
390	}
391
392	const struct bpf_func_proto bpf_jiffies64_proto = {
393	.func = bpf_jiffies64,
394	.gpl_only = false,
395	.ret_type = RET_INTEGER,
396	};
397
398	#ifdef CONFIG_CGROUPS
399	BPF_CALL_0(bpf_get_current_cgroup_id)
400	{
401	struct cgroup *cgrp;
402	u64 cgrp_id;
403
404	rcu_read_lock();
405	cgrp = task_dfl_cgroup(current);
406	cgrp_id = cgroup_id(cgrp);
407	rcu_read_unlock();
408
409	return cgrp_id;
410	}
411
412	const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
413	.func = bpf_get_current_cgroup_id,
414	.gpl_only = false,
415	.ret_type = RET_INTEGER,
416	};
417
418	BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level)
419	{
420	struct cgroup *cgrp;
421	struct cgroup *ancestor;
422	u64 cgrp_id;
423
424	rcu_read_lock();
425	cgrp = task_dfl_cgroup(current);
426	ancestor = cgroup_ancestor(cgrp, ancestor_level);
427	cgrp_id = ancestor ? cgroup_id(cgrp: ancestor) : `0`;
428	rcu_read_unlock();
429
430	return cgrp_id;
431	}
432
433	const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
434	.func = bpf_get_current_ancestor_cgroup_id,
435	.gpl_only = false,
436	.ret_type = RET_INTEGER,
437	.arg1_type = ARG_ANYTHING,
438	};
439	#endif /* CONFIG_CGROUPS */
440
441	#define BPF_STRTOX_BASE_MASK 0x1F
442
443	static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags,
444	unsigned long long res, bool is_negative)
445	{
446	unsigned int base = flags & BPF_STRTOX_BASE_MASK;
447	const char *cur_buf = buf;
448	size_t cur_len = buf_len;
449	unsigned int consumed;
450	size_t val_len;
451	char str[`64`];
452
453	if (!buf \|\| !buf_len \|\| !res \|\| !is_negative)
454	return -EINVAL;
455
456	if (base != `0` && base != `8` && base != `10` && base != `16`)
457	return -EINVAL;
458
459	if (flags & ~BPF_STRTOX_BASE_MASK)
460	return -EINVAL;
461
462	while (cur_buf < buf + buf_len && isspace(*cur_buf))
463	++cur_buf;
464
465	is_negative = (cur_buf < buf + buf_len && cur_buf == `'-'`);
466	if (*is_negative)
467	++cur_buf;
468
469	consumed = cur_buf - buf;
470	cur_len -= consumed;
471	if (!cur_len)
472	return -EINVAL;
473
474	cur_len = min(cur_len, sizeof(str) - `1`);
475	memcpy(str, cur_buf, cur_len);
476	str[cur_len] = `'\0'`;
477	cur_buf = str;
478
479	cur_buf = _parse_integer_fixup_radix(s: cur_buf, base: &base);
480	val_len = _parse_integer(s: cur_buf, base, res);
481
482	if (val_len & KSTRTOX_OVERFLOW)
483	return -ERANGE;
484
485	if (val_len == `0`)
486	return -EINVAL;
487
488	cur_buf += val_len;
489	consumed += cur_buf - str;
490
491	return consumed;
492	}
493
494	static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
495	long long *res)
496	{
497	unsigned long long _res;
498	bool is_negative;
499	int err;
500
501	err = __bpf_strtoull(buf, buf_len, flags, res: &_res, is_negative: &is_negative);
502	if (err < `0`)
503	return err;
504	if (is_negative) {
505	if ((long long)-_res > `0`)
506	return -ERANGE;
507	*res = -_res;
508	} else {
509	if ((long long)_res < `0`)
510	return -ERANGE;
511	*res = _res;
512	}
513	return err;
514	}
515
516	BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
517	long *, res)
518	{
519	long long _res;
520	int err;
521
522	err = __bpf_strtoll(buf, buf_len, flags, res: &_res);
523	if (err < `0`)
524	return err;
525	if (_res != (long)_res)
526	return -ERANGE;
527	*res = _res;
528	return err;
529	}
530
531	const struct bpf_func_proto bpf_strtol_proto = {
532	.func = bpf_strtol,
533	.gpl_only = false,
534	.ret_type = RET_INTEGER,
535	.arg1_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
536	.arg2_type = ARG_CONST_SIZE,
537	.arg3_type = ARG_ANYTHING,
538	.arg4_type = ARG_PTR_TO_LONG,
539	};
540
541	BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
542	unsigned long *, res)
543	{
544	unsigned long long _res;
545	bool is_negative;
546	int err;
547
548	err = __bpf_strtoull(buf, buf_len, flags, res: &_res, is_negative: &is_negative);
549	if (err < `0`)
550	return err;
551	if (is_negative)
552	return -EINVAL;
553	if (_res != (unsigned long)_res)
554	return -ERANGE;
555	*res = _res;
556	return err;
557	}
558
559	const struct bpf_func_proto bpf_strtoul_proto = {
560	.func = bpf_strtoul,
561	.gpl_only = false,
562	.ret_type = RET_INTEGER,
563	.arg1_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
564	.arg2_type = ARG_CONST_SIZE,
565	.arg3_type = ARG_ANYTHING,
566	.arg4_type = ARG_PTR_TO_LONG,
567	};
568
569	BPF_CALL_3(bpf_strncmp, const char , s1, u32, s1_sz, const* char *, s2)
570	{
571	return strncmp(s1, s2, s1_sz);
572	}
573
574	static const struct bpf_func_proto bpf_strncmp_proto = {
575	.func = bpf_strncmp,
576	.gpl_only = false,
577	.ret_type = RET_INTEGER,
578	.arg1_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
579	.arg2_type = ARG_CONST_SIZE,
580	.arg3_type = ARG_PTR_TO_CONST_STR,
581	};
582
583	BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
584	struct bpf_pidns_info *, nsdata, u32, size)
585	{
586	struct task_struct *task = current;
587	struct pid_namespace *pidns;
588	int err = -EINVAL;
589
590	if (unlikely(size != sizeof(struct bpf_pidns_info)))
591	goto clear;
592
593	if (unlikely((u64)(dev_t)dev != dev))
594	goto clear;
595
596	if (unlikely(!task))
597	goto clear;
598
599	pidns = task_active_pid_ns(tsk: task);
600	if (unlikely(!pidns)) {
601	err = -ENOENT;
602	goto clear;
603	}
604
605	if (!ns_match(ns: &pidns->ns, dev: (dev_t)dev, ino))
606	goto clear;
607
608	nsdata->pid = task_pid_nr_ns(tsk: task, ns: pidns);
609	nsdata->tgid = task_tgid_nr_ns(tsk: task, ns: pidns);
610	return `0`;
611	clear:
612	memset((void *)nsdata, `0`, (size_t) size);
613	return err;
614	}
615
616	const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
617	.func = bpf_get_ns_current_pid_tgid,
618	.gpl_only = false,
619	.ret_type = RET_INTEGER,
620	.arg1_type = ARG_ANYTHING,
621	.arg2_type = ARG_ANYTHING,
622	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
623	.arg4_type = ARG_CONST_SIZE,
624	};
625
626	static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
627	.func = bpf_get_raw_cpu_id,
628	.gpl_only = false,
629	.ret_type = RET_INTEGER,
630	};
631
632	BPF_CALL_5(bpf_event_output_data, void , ctx, struct* bpf_map *, map,
633	u64, flags, void *, data, u64, size)
634	{
635	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
636	return -EINVAL;
637
638	return bpf_event_output(map, flags, meta: data, meta_size: size, NULL, ctx_size: `0`, NULL);
639	}
640
641	const struct bpf_func_proto bpf_event_output_data_proto = {
642	.func = bpf_event_output_data,
643	.gpl_only = true,
644	.ret_type = RET_INTEGER,
645	.arg1_type = ARG_PTR_TO_CTX,
646	.arg2_type = ARG_CONST_MAP_PTR,
647	.arg3_type = ARG_ANYTHING,
648	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
649	.arg5_type = ARG_CONST_SIZE_OR_ZERO,
650	};
651
652	BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
653	const void __user *, user_ptr)
654	{
655	int ret = copy_from_user(to: dst, from: user_ptr, n: size);
656
657	if (unlikely(ret)) {
658	memset(dst, `0`, size);
659	ret = -EFAULT;
660	}
661
662	return ret;
663	}
664
665	const struct bpf_func_proto bpf_copy_from_user_proto = {
666	.func = bpf_copy_from_user,
667	.gpl_only = false,
668	.might_sleep = true,
669	.ret_type = RET_INTEGER,
670	.arg1_type = ARG_PTR_TO_UNINIT_MEM,
671	.arg2_type = ARG_CONST_SIZE_OR_ZERO,
672	.arg3_type = ARG_ANYTHING,
673	};
674
675	BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
676	const void __user , user_ptr, struct* task_struct *, tsk, u64, flags)
677	{
678	int ret;
679
680	/ flags is not used yet /
681	if (unlikely(flags))
682	return -EINVAL;
683
684	if (unlikely(!size))
685	return `0`;
686
687	ret = access_process_vm(tsk, addr: (unsigned long)user_ptr, buf: dst, len: size, gup_flags: `0`);
688	if (ret == size)
689	return `0`;
690
691	memset(dst, `0`, size);
692	/ Return -EFAULT for partial read /
693	return ret < `0` ? ret : -EFAULT;
694	}
695
696	const struct bpf_func_proto bpf_copy_from_user_task_proto = {
697	.func = bpf_copy_from_user_task,
698	.gpl_only = true,
699	.might_sleep = true,
700	.ret_type = RET_INTEGER,
701	.arg1_type = ARG_PTR_TO_UNINIT_MEM,
702	.arg2_type = ARG_CONST_SIZE_OR_ZERO,
703	.arg3_type = ARG_ANYTHING,
704	.arg4_type = ARG_PTR_TO_BTF_ID,
705	.arg4_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
706	.arg5_type = ARG_ANYTHING
707	};
708
709	BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
710	{
711	if (cpu >= nr_cpu_ids)
712	return (unsigned long)NULL;
713
714	return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
715	}
716
717	const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
718	.func = bpf_per_cpu_ptr,
719	.gpl_only = false,
720	.ret_type = RET_PTR_TO_MEM_OR_BTF_ID \| PTR_MAYBE_NULL \| MEM_RDONLY,
721	.arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
722	.arg2_type = ARG_ANYTHING,
723	};
724
725	BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
726	{
727	return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
728	}
729
730	const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
731	.func = bpf_this_cpu_ptr,
732	.gpl_only = false,
733	.ret_type = RET_PTR_TO_MEM_OR_BTF_ID \| MEM_RDONLY,
734	.arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
735	};
736
737	static int bpf_trace_copy_string(char buf, void* unsafe_ptr, char* fmt_ptype,
738	size_t bufsz)
739	{
740	void __user user_ptr = (__force void* __user *)unsafe_ptr;
741
742	buf[`0`] = `0`;
743
744	switch (fmt_ptype) {
745	case `'s'`:
746	#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
747	if ((unsigned long)unsafe_ptr < TASK_SIZE)
748	return strncpy_from_user_nofault(dst: buf, unsafe_addr: user_ptr, count: bufsz);
749	fallthrough;
750	#endif
751	case `'k'`:
752	return strncpy_from_kernel_nofault(dst: buf, unsafe_addr: unsafe_ptr, count: bufsz);
753	case `'u'`:
754	return strncpy_from_user_nofault(dst: buf, unsafe_addr: user_ptr, count: bufsz);
755	}
756
757	return -EINVAL;
758	}
759
760	/ Per-cpu temp buffers used by printf-like helpers to store the bprintf binary*
761	* arguments representation.
762	*/
763	#define MAX_BPRINTF_BIN_ARGS 512
764
765	/ Support executing three nested bprintf helper calls on a given CPU /
766	#define MAX_BPRINTF_NEST_LEVEL 3
767	struct bpf_bprintf_buffers {
768	char bin_args[MAX_BPRINTF_BIN_ARGS];
769	char buf[MAX_BPRINTF_BUF];
770	};
771
772	static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs);
773	static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
774
775	static int try_get_buffers(struct bpf_bprintf_buffers **bufs)
776	{
777	int nest_level;
778
779	preempt_disable();
780	nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
781	if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
782	this_cpu_dec(bpf_bprintf_nest_level);
783	preempt_enable();
784	return -EBUSY;
785	}
786	*bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - `1`]);
787
788	return `0`;
789	}
790
791	void bpf_bprintf_cleanup(struct bpf_bprintf_data *data)
792	{
793	if (!data->bin_args && !data->buf)
794	return;
795	if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == `0`))
796	return;
797	this_cpu_dec(bpf_bprintf_nest_level);
798	preempt_enable();
799	}
800
801	/*
802	* bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers
803	*
804	* Returns a negative value if fmt is an invalid format string or 0 otherwise.
805	*
806	* This can be used in two ways:
807	* - Format string verification only: when data->get_bin_args is false
808	* - Arguments preparation: in addition to the above verification, it writes in
809	* data->bin_args a binary representation of arguments usable by bstr_printf
810	* where pointers from BPF have been sanitized.
811	*
812	* In argument preparation mode, if 0 is returned, safe temporary buffers are
813	* allocated and bpf_bprintf_cleanup should be called to free them after use.
814	*/
815	int bpf_bprintf_prepare(char fmt, u32 fmt_size, const* u64 *raw_args,
816	u32 num_args, struct bpf_bprintf_data *data)
817	{
818	bool get_buffers = (data->get_bin_args && num_args) \|\| data->get_buf;
819	char unsafe_ptr = NULL, tmp_buf = NULL, tmp_buf_end, fmt_end;
820	struct bpf_bprintf_buffers *buffers = NULL;
821	size_t sizeof_cur_arg, sizeof_cur_ip;
822	int err, i, num_spec = `0`;
823	u64 cur_arg;
824	char fmt_ptype, cur_ip[`16`], ip_spec[] = "%pXX";
825
826	fmt_end = strnchr(fmt, fmt_size, `0`);
827	if (!fmt_end)
828	return -EINVAL;
829	fmt_size = fmt_end - fmt;
830
831	if (get_buffers && try_get_buffers(bufs: &buffers))
832	return -EBUSY;
833
834	if (data->get_bin_args) {
835	if (num_args)
836	tmp_buf = buffers->bin_args;
837	tmp_buf_end = tmp_buf + MAX_BPRINTF_BIN_ARGS;
838	data->bin_args = (u32 *)tmp_buf;
839	}
840
841	if (data->get_buf)
842	data->buf = buffers->buf;
843
844	for (i = `0`; i < fmt_size; i++) {
845	if ((!isprint(fmt[i]) && !isspace(fmt[i])) \|\| !isascii(fmt[i])) {
846	err = -EINVAL;
847	goto out;
848	}
849
850	if (fmt[i] != `'%'`)
851	continue;
852
853	if (fmt[i + `1`] == `'%'`) {
854	i++;
855	continue;
856	}
857
858	if (num_spec >= num_args) {
859	err = -EINVAL;
860	goto out;
861	}
862
863	/ The string is zero-terminated so if fmt[i] != 0, we can*
864	* always access fmt[i + 1], in the worst case it will be a 0
865	*/
866	i++;
867
868	/ skip optional "[0 +-][num]" width formatting field /
869	while (fmt[i] == `'0'` \|\| fmt[i] == `'+'` \|\| fmt[i] == `'-'` \|\|
870	fmt[i] == `' '`)
871	i++;
872	if (fmt[i] >= `'1'` && fmt[i] <= `'9'`) {
873	i++;
874	while (fmt[i] >= `'0'` && fmt[i] <= `'9'`)
875	i++;
876	}
877
878	if (fmt[i] == `'p'`) {
879	sizeof_cur_arg = sizeof(long);
880
881	if ((fmt[i + `1`] == `'k'` \|\| fmt[i + `1`] == `'u'`) &&
882	fmt[i + `2`] == `'s'`) {
883	fmt_ptype = fmt[i + `1`];
884	i += `2`;
885	goto fmt_str;
886	}
887
888	if (fmt[i + `1`] == `0` \|\| isspace(fmt[i + `1`]) \|\|
889	ispunct(fmt[i + `1`]) \|\| fmt[i + `1`] == `'K'` \|\|
890	fmt[i + `1`] == `'x'` \|\| fmt[i + `1`] == `'s'` \|\|
891	fmt[i + `1`] == `'S'`) {
892	/ just kernel pointers /
893	if (tmp_buf)
894	cur_arg = raw_args[num_spec];
895	i++;
896	goto nocopy_fmt;
897	}
898
899	if (fmt[i + `1`] == `'B'`) {
900	if (tmp_buf) {
901	err = snprintf(buf: tmp_buf,
902	size: (tmp_buf_end - tmp_buf),
903	fmt: "%pB",
904	(void )(long*)raw_args[num_spec]);
905	tmp_buf += (err + `1`);
906	}
907
908	i++;
909	num_spec++;
910	continue;
911	}
912
913	/ only support "%pI4", "%pi4", "%pI6" and "%pi6". /
914	if ((fmt[i + `1`] != `'i'` && fmt[i + `1`] != `'I'`) \|\|
915	(fmt[i + `2`] != `'4'` && fmt[i + `2`] != `'6'`)) {
916	err = -EINVAL;
917	goto out;
918	}
919
920	i += `2`;
921	if (!tmp_buf)
922	goto nocopy_fmt;
923
924	sizeof_cur_ip = (fmt[i] == `'4'`) ? `4` : `16`;
925	if (tmp_buf_end - tmp_buf < sizeof_cur_ip) {
926	err = -ENOSPC;
927	goto out;
928	}
929
930	unsafe_ptr = (char )(long*)raw_args[num_spec];
931	err = copy_from_kernel_nofault(dst: cur_ip, src: unsafe_ptr,
932	size: sizeof_cur_ip);
933	if (err < `0`)
934	memset(cur_ip, `0`, sizeof_cur_ip);
935
936	/ hack: bstr_printf expects IP addresses to be*
937	* pre-formatted as strings, ironically, the easiest way
938	* to do that is to call snprintf.
939	*/
940	ip_spec[`2`] = fmt[i - `1`];
941	ip_spec[`3`] = fmt[i];
942	err = snprintf(buf: tmp_buf, size: tmp_buf_end - tmp_buf,
943	fmt: ip_spec, &cur_ip);
944
945	tmp_buf += err + `1`;
946	num_spec++;
947
948	continue;
949	} else if (fmt[i] == `'s'`) {
950	fmt_ptype = fmt[i];
951	fmt_str:
952	if (fmt[i + `1`] != `0` &&
953	!isspace(fmt[i + `1`]) &&
954	!ispunct(fmt[i + `1`])) {
955	err = -EINVAL;
956	goto out;
957	}
958
959	if (!tmp_buf)
960	goto nocopy_fmt;
961
962	if (tmp_buf_end == tmp_buf) {
963	err = -ENOSPC;
964	goto out;
965	}
966
967	unsafe_ptr = (char )(long*)raw_args[num_spec];
968	err = bpf_trace_copy_string(buf: tmp_buf, unsafe_ptr,
969	fmt_ptype,
970	bufsz: tmp_buf_end - tmp_buf);
971	if (err < `0`) {
972	tmp_buf[`0`] = `'\0'`;
973	err = `1`;
974	}
975
976	tmp_buf += err;
977	num_spec++;
978
979	continue;
980	} else if (fmt[i] == `'c'`) {
981	if (!tmp_buf)
982	goto nocopy_fmt;
983
984	if (tmp_buf_end == tmp_buf) {
985	err = -ENOSPC;
986	goto out;
987	}
988
989	*tmp_buf = raw_args[num_spec];
990	tmp_buf++;
991	num_spec++;
992
993	continue;
994	}
995
996	sizeof_cur_arg = sizeof(int);
997
998	if (fmt[i] == `'l'`) {
999	sizeof_cur_arg = sizeof(long);
1000	i++;
1001	}
1002	if (fmt[i] == `'l'`) {
1003	sizeof_cur_arg = sizeof(long long);
1004	i++;
1005	}
1006
1007	if (fmt[i] != `'i'` && fmt[i] != `'d'` && fmt[i] != `'u'` &&
1008	fmt[i] != `'x'` && fmt[i] != `'X'`) {
1009	err = -EINVAL;
1010	goto out;
1011	}
1012
1013	if (tmp_buf)
1014	cur_arg = raw_args[num_spec];
1015	nocopy_fmt:
1016	if (tmp_buf) {
1017	tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32));
1018	if (tmp_buf_end - tmp_buf < sizeof_cur_arg) {
1019	err = -ENOSPC;
1020	goto out;
1021	}
1022
1023	if (sizeof_cur_arg == `8`) {
1024	(u32 )tmp_buf = (u32 )&cur_arg;
1025	(u32 )(tmp_buf + `4`) = ((u32 )&cur_arg + `1`);
1026	} else {
1027	(u32 )tmp_buf = (u32)(long)cur_arg;
1028	}
1029	tmp_buf += sizeof_cur_arg;
1030	}
1031	num_spec++;
1032	}
1033
1034	err = `0`;
1035	out:
1036	if (err)
1037	bpf_bprintf_cleanup(data);
1038	return err;
1039	}
1040
1041	BPF_CALL_5(bpf_snprintf, char , str, u32, str_size, char* *, fmt,
1042	const void *, args, u32, data_len)
1043	{
1044	struct bpf_bprintf_data data = {
1045	.get_bin_args = true,
1046	};
1047	int err, num_args;
1048
1049	if (data_len % `8` \|\| data_len > MAX_BPRINTF_VARARGS * `8` \|\|
1050	(data_len && !args))
1051	return -EINVAL;
1052	num_args = data_len / `8`;
1053
1054	/ ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we*
1055	* can safely give an unbounded size.
1056	*/
1057	err = bpf_bprintf_prepare(fmt, UINT_MAX, raw_args: args, num_args, data: &data);
1058	if (err < `0`)
1059	return err;
1060
1061	err = bstr_printf(buf: str, size: str_size, fmt, bin_buf: data.bin_args);
1062
1063	bpf_bprintf_cleanup(data: &data);
1064
1065	return err + `1`;
1066	}
1067
1068	const struct bpf_func_proto bpf_snprintf_proto = {
1069	.func = bpf_snprintf,
1070	.gpl_only = true,
1071	.ret_type = RET_INTEGER,
1072	.arg1_type = ARG_PTR_TO_MEM_OR_NULL,
1073	.arg2_type = ARG_CONST_SIZE_OR_ZERO,
1074	.arg3_type = ARG_PTR_TO_CONST_STR,
1075	.arg4_type = ARG_PTR_TO_MEM \| PTR_MAYBE_NULL \| MEM_RDONLY,
1076	.arg5_type = ARG_CONST_SIZE_OR_ZERO,
1077	};
1078
1079	/ BPF map elements can contain 'struct bpf_timer'.*
1080	* Such map owns all of its BPF timers.
1081	* 'struct bpf_timer' is allocated as part of map element allocation
1082	* and it's zero initialized.
1083	* That space is used to keep 'struct bpf_timer_kern'.
1084	* bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and
1085	* remembers 'struct bpf_map *' pointer it's part of.
1086	* bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn.
1087	* bpf_timer_start() arms the timer.
1088	* If user space reference to a map goes to zero at this point
1089	* ops->map_release_uref callback is responsible for cancelling the timers,
1090	* freeing their memory, and decrementing prog's refcnts.
1091	* bpf_timer_cancel() cancels the timer and decrements prog's refcnt.
1092	* Inner maps can contain bpf timers as well. ops->map_release_uref is
1093	* freeing the timers when inner map is replaced or deleted by user space.
1094	*/
1095	struct bpf_hrtimer {
1096	struct hrtimer timer;
1097	struct bpf_map *map;
1098	struct bpf_prog *prog;
1099	void __rcu *callback_fn;
1100	void *value;
1101	};
1102
1103	/ the actual struct hidden inside uapi struct bpf_timer /
1104	struct bpf_timer_kern {
1105	struct bpf_hrtimer *timer;
1106	/ bpf_spin_lock is used here instead of spinlock_t to make*
1107	* sure that it always fits into space reserved by struct bpf_timer
1108	* regardless of LOCKDEP and spinlock debug flags.
1109	*/
1110	struct bpf_spin_lock lock;
1111	} __attribute__((aligned(`8`)));
1112
1113	static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
1114
1115	static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
1116	{
1117	struct bpf_hrtimer t = container_of(hrtimer, struct* bpf_hrtimer, timer);
1118	struct bpf_map *map = t->map;
1119	void *value = t->value;
1120	bpf_callback_t callback_fn;
1121	void *key;
1122	u32 idx;
1123
1124	BTF_TYPE_EMIT(struct bpf_timer);
1125	callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held());
1126	if (!callback_fn)
1127	goto out;
1128
1129	/ bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and*
1130	* cannot be preempted by another bpf_timer_cb() on the same cpu.
1131	* Remember the timer this callback is servicing to prevent
1132	* deadlock if callback_fn() calls bpf_timer_cancel() or
1133	* bpf_map_delete_elem() on the same timer.
1134	*/
1135	this_cpu_write(hrtimer_running, t);
1136	if (map->map_type == BPF_MAP_TYPE_ARRAY) {
1137	struct bpf_array array = container_of(map, struct* bpf_array, map);
1138
1139	/ compute the key /
1140	idx = ((char *)value - array->value) / array->elem_size;
1141	key = &idx;
1142	} else { / hash or lru /
1143	key = value - round_up(map->key_size, `8`);
1144	}
1145
1146	callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, `0`, `0`);
1147	/ The verifier checked that return value is zero. /
1148
1149	this_cpu_write(hrtimer_running, NULL);
1150	out:
1151	return HRTIMER_NORESTART;
1152	}
1153
1154	BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern , timer, struct* bpf_map *, map,
1155	u64, flags)
1156	{
1157	clockid_t clockid = flags & (MAX_CLOCKS - `1`);
1158	struct bpf_hrtimer *t;
1159	int ret = `0`;
1160
1161	BUILD_BUG_ON(MAX_CLOCKS != `16`);
1162	BUILD_BUG_ON(sizeof(struct bpf_timer_kern) > sizeof(struct bpf_timer));
1163	BUILD_BUG_ON(__alignof__(struct bpf_timer_kern) != __alignof__(struct bpf_timer));
1164
1165	if (in_nmi())
1166	return -EOPNOTSUPP;
1167
1168	if (flags >= MAX_CLOCKS \|\|
1169	/ similar to timerfd except _ALARM variants are not supported /
1170	(clockid != CLOCK_MONOTONIC &&
1171	clockid != CLOCK_REALTIME &&
1172	clockid != CLOCK_BOOTTIME))
1173	return -EINVAL;
1174	__bpf_spin_lock_irqsave(lock: &timer->lock);
1175	t = timer->timer;
1176	if (t) {
1177	ret = -EBUSY;
1178	goto out;
1179	}
1180	if (!atomic64_read(v: &map->usercnt)) {
1181	/ maps with timers must be either held by user space*
1182	* or pinned in bpffs.
1183	*/
1184	ret = -EPERM;
1185	goto out;
1186	}
1187	/ allocate hrtimer via map_kmalloc to use memcg accounting /
1188	t = bpf_map_kmalloc_node(map, size: sizeof(*t), GFP_ATOMIC, node: map->numa_node);
1189	if (!t) {
1190	ret = -ENOMEM;
1191	goto out;
1192	}
1193	t->value = (void *)timer - map->record->timer_off;
1194	t->map = map;
1195	t->prog = NULL;
1196	rcu_assign_pointer(t->callback_fn, NULL);
1197	hrtimer_init(timer: &t->timer, which_clock: clockid, mode: HRTIMER_MODE_REL_SOFT);
1198	t->timer.function = bpf_timer_cb;
1199	timer->timer = t;
1200	out:
1201	__bpf_spin_unlock_irqrestore(lock: &timer->lock);
1202	return ret;
1203	}
1204
1205	static const struct bpf_func_proto bpf_timer_init_proto = {
1206	.func = bpf_timer_init,
1207	.gpl_only = true,
1208	.ret_type = RET_INTEGER,
1209	.arg1_type = ARG_PTR_TO_TIMER,
1210	.arg2_type = ARG_CONST_MAP_PTR,
1211	.arg3_type = ARG_ANYTHING,
1212	};
1213
1214	BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern , timer, void* *, callback_fn,
1215	struct bpf_prog_aux *, aux)
1216	{
1217	struct bpf_prog prev, prog = aux->prog;
1218	struct bpf_hrtimer *t;
1219	int ret = `0`;
1220
1221	if (in_nmi())
1222	return -EOPNOTSUPP;
1223	__bpf_spin_lock_irqsave(lock: &timer->lock);
1224	t = timer->timer;
1225	if (!t) {
1226	ret = -EINVAL;
1227	goto out;
1228	}
1229	if (!atomic64_read(v: &t->map->usercnt)) {
1230	/ maps with timers must be either held by user space*
1231	* or pinned in bpffs. Otherwise timer might still be
1232	* running even when bpf prog is detached and user space
1233	* is gone, since map_release_uref won't ever be called.
1234	*/
1235	ret = -EPERM;
1236	goto out;
1237	}
1238	prev = t->prog;
1239	if (prev != prog) {
1240	/ Bump prog refcnt once. Every bpf_timer_set_callback()*
1241	* can pick different callback_fn-s within the same prog.
1242	*/
1243	prog = bpf_prog_inc_not_zero(prog);
1244	if (IS_ERR(ptr: prog)) {
1245	ret = PTR_ERR(ptr: prog);
1246	goto out;
1247	}
1248	if (prev)
1249	/ Drop prev prog refcnt when swapping with new prog /
1250	bpf_prog_put(prog: prev);
1251	t->prog = prog;
1252	}
1253	rcu_assign_pointer(t->callback_fn, callback_fn);
1254	out:
1255	__bpf_spin_unlock_irqrestore(lock: &timer->lock);
1256	return ret;
1257	}
1258
1259	static const struct bpf_func_proto bpf_timer_set_callback_proto = {
1260	.func = bpf_timer_set_callback,
1261	.gpl_only = true,
1262	.ret_type = RET_INTEGER,
1263	.arg1_type = ARG_PTR_TO_TIMER,
1264	.arg2_type = ARG_PTR_TO_FUNC,
1265	};
1266
1267	BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, flags)
1268	{
1269	struct bpf_hrtimer *t;
1270	int ret = `0`;
1271	enum hrtimer_mode mode;
1272
1273	if (in_nmi())
1274	return -EOPNOTSUPP;
1275	if (flags & ~(BPF_F_TIMER_ABS \| BPF_F_TIMER_CPU_PIN))
1276	return -EINVAL;
1277	__bpf_spin_lock_irqsave(lock: &timer->lock);
1278	t = timer->timer;
1279	if (!t \|\| !t->prog) {
1280	ret = -EINVAL;
1281	goto out;
1282	}
1283
1284	if (flags & BPF_F_TIMER_ABS)
1285	mode = HRTIMER_MODE_ABS_SOFT;
1286	else
1287	mode = HRTIMER_MODE_REL_SOFT;
1288
1289	if (flags & BPF_F_TIMER_CPU_PIN)
1290	mode \|= HRTIMER_MODE_PINNED;
1291
1292	hrtimer_start(timer: &t->timer, tim: ns_to_ktime(ns: nsecs), mode);
1293	out:
1294	__bpf_spin_unlock_irqrestore(lock: &timer->lock);
1295	return ret;
1296	}
1297
1298	static const struct bpf_func_proto bpf_timer_start_proto = {
1299	.func = bpf_timer_start,
1300	.gpl_only = true,
1301	.ret_type = RET_INTEGER,
1302	.arg1_type = ARG_PTR_TO_TIMER,
1303	.arg2_type = ARG_ANYTHING,
1304	.arg3_type = ARG_ANYTHING,
1305	};
1306
1307	static void drop_prog_refcnt(struct bpf_hrtimer *t)
1308	{
1309	struct bpf_prog *prog = t->prog;
1310
1311	if (prog) {
1312	bpf_prog_put(prog);
1313	t->prog = NULL;
1314	rcu_assign_pointer(t->callback_fn, NULL);
1315	}
1316	}
1317
1318	BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer)
1319	{
1320	struct bpf_hrtimer *t;
1321	int ret = `0`;
1322
1323	if (in_nmi())
1324	return -EOPNOTSUPP;
1325	__bpf_spin_lock_irqsave(lock: &timer->lock);
1326	t = timer->timer;
1327	if (!t) {
1328	ret = -EINVAL;
1329	goto out;
1330	}
1331	if (this_cpu_read(hrtimer_running) == t) {
1332	/ If bpf callback_fn is trying to bpf_timer_cancel()*
1333	* its own timer the hrtimer_cancel() will deadlock
1334	* since it waits for callback_fn to finish
1335	*/
1336	ret = -EDEADLK;
1337	goto out;
1338	}
1339	drop_prog_refcnt(t);
1340	out:
1341	__bpf_spin_unlock_irqrestore(lock: &timer->lock);
1342	/ Cancel the timer and wait for associated callback to finish*
1343	* if it was running.
1344	*/
1345	ret = ret ?: hrtimer_cancel(timer: &t->timer);
1346	return ret;
1347	}
1348
1349	static const struct bpf_func_proto bpf_timer_cancel_proto = {
1350	.func = bpf_timer_cancel,
1351	.gpl_only = true,
1352	.ret_type = RET_INTEGER,
1353	.arg1_type = ARG_PTR_TO_TIMER,
1354	};
1355
1356	/ This function is called by map_delete/update_elem for individual element and*
1357	* by ops->map_release_uref when the user space reference to a map reaches zero.
1358	*/
1359	void bpf_timer_cancel_and_free(void *val)
1360	{
1361	struct bpf_timer_kern *timer = val;
1362	struct bpf_hrtimer *t;
1363
1364	/ Performance optimization: read timer->timer without lock first. /
1365	if (!READ_ONCE(timer->timer))
1366	return;
1367
1368	__bpf_spin_lock_irqsave(lock: &timer->lock);
1369	/ re-read it under lock /
1370	t = timer->timer;
1371	if (!t)
1372	goto out;
1373	drop_prog_refcnt(t);
1374	/ The subsequent bpf_timer_start/cancel() helpers won't be able to use*
1375	* this timer, since it won't be initialized.
1376	*/
1377	timer->timer = NULL;
1378	out:
1379	__bpf_spin_unlock_irqrestore(lock: &timer->lock);
1380	if (!t)
1381	return;
1382	/ Cancel the timer and wait for callback to complete if it was running.*
1383	* If hrtimer_cancel() can be safely called it's safe to call kfree(t)
1384	* right after for both preallocated and non-preallocated maps.
1385	* The timer->timer = NULL was already done and no code path can
1386	* see address 't' anymore.
1387	*
1388	* Check that bpf_map_delete/update_elem() wasn't called from timer
1389	* callback_fn. In such case don't call hrtimer_cancel() (since it will
1390	* deadlock) and don't call hrtimer_try_to_cancel() (since it will just
1391	* return -1). Though callback_fn is still running on this cpu it's
1392	* safe to do kfree(t) because bpf_timer_cb() read everything it needed
1393	* from 't'. The bpf subprog callback_fn won't be able to access 't',
1394	* since timer->timer = NULL was already done. The timer will be
1395	* effectively cancelled because bpf_timer_cb() will return
1396	* HRTIMER_NORESTART.
1397	*/
1398	if (this_cpu_read(hrtimer_running) != t)
1399	hrtimer_cancel(timer: &t->timer);
1400	kfree(objp: t);
1401	}
1402
1403	BPF_CALL_2(bpf_kptr_xchg, void , map_value, void* *, ptr)
1404	{
1405	unsigned long *kptr = map_value;
1406
1407	return xchg(kptr, (unsigned long)ptr);
1408	}
1409
1410	/ Unlike other PTR_TO_BTF_ID helpers the btf_id in bpf_kptr_xchg()*
1411	* helper is determined dynamically by the verifier. Use BPF_PTR_POISON to
1412	* denote type that verifier will determine.
1413	*/
1414	static const struct bpf_func_proto bpf_kptr_xchg_proto = {
1415	.func = bpf_kptr_xchg,
1416	.gpl_only = false,
1417	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
1418	.ret_btf_id = BPF_PTR_POISON,
1419	.arg1_type = ARG_PTR_TO_KPTR,
1420	.arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL \| OBJ_RELEASE,
1421	.arg2_btf_id = BPF_PTR_POISON,
1422	};
1423
1424	/ Since the upper 8 bits of dynptr->size is reserved, the*
1425	* maximum supported size is 2^24 - 1.
1426	*/
1427	#define DYNPTR_MAX_SIZE ((1UL << 24) - 1)
1428	#define DYNPTR_TYPE_SHIFT 28
1429	#define DYNPTR_SIZE_MASK 0xFFFFFF
1430	#define DYNPTR_RDONLY_BIT BIT(31)
1431
1432	static bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
1433	{
1434	return ptr->size & DYNPTR_RDONLY_BIT;
1435	}
1436
1437	void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
1438	{
1439	ptr->size \|= DYNPTR_RDONLY_BIT;
1440	}
1441
1442	static void bpf_dynptr_set_type(struct bpf_dynptr_kern ptr, enum* bpf_dynptr_type type)
1443	{
1444	ptr->size \|= type << DYNPTR_TYPE_SHIFT;
1445	}
1446
1447	static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr)
1448	{
1449	return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
1450	}
1451
1452	u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr)
1453	{
1454	return ptr->size & DYNPTR_SIZE_MASK;
1455	}
1456
1457	static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size)
1458	{
1459	u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK;
1460
1461	ptr->size = new_size \| metadata;
1462	}
1463
1464	int bpf_dynptr_check_size(u32 size)
1465	{
1466	return size > DYNPTR_MAX_SIZE ? -E2BIG : `0`;
1467	}
1468
1469	void bpf_dynptr_init(struct bpf_dynptr_kern ptr, void* *data,
1470	enum bpf_dynptr_type type, u32 offset, u32 size)
1471	{
1472	ptr->data = data;
1473	ptr->offset = offset;
1474	ptr->size = size;
1475	bpf_dynptr_set_type(ptr, type);
1476	}
1477
1478	void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
1479	{
1480	memset(ptr, `0`, sizeof(*ptr));
1481	}
1482
1483	static int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
1484	{
1485	u32 size = __bpf_dynptr_size(ptr);
1486
1487	if (len > size \|\| offset > size - len)
1488	return -E2BIG;
1489
1490	return `0`;
1491	}
1492
1493	BPF_CALL_4(bpf_dynptr_from_mem, void , data, u32, size, u64, flags, struct* bpf_dynptr_kern *, ptr)
1494	{
1495	int err;
1496
1497	BTF_TYPE_EMIT(struct bpf_dynptr);
1498
1499	err = bpf_dynptr_check_size(size);
1500	if (err)
1501	goto error;
1502
1503	/ flags is currently unsupported /
1504	if (flags) {
1505	err = -EINVAL;
1506	goto error;
1507	}
1508
1509	bpf_dynptr_init(ptr, data, type: BPF_DYNPTR_TYPE_LOCAL, offset: `0`, size);
1510
1511	return `0`;
1512
1513	error:
1514	bpf_dynptr_set_null(ptr);
1515	return err;
1516	}
1517
1518	static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
1519	.func = bpf_dynptr_from_mem,
1520	.gpl_only = false,
1521	.ret_type = RET_INTEGER,
1522	.arg1_type = ARG_PTR_TO_UNINIT_MEM,
1523	.arg2_type = ARG_CONST_SIZE_OR_ZERO,
1524	.arg3_type = ARG_ANYTHING,
1525	.arg4_type = ARG_PTR_TO_DYNPTR \| DYNPTR_TYPE_LOCAL \| MEM_UNINIT,
1526	};
1527
1528	BPF_CALL_5(bpf_dynptr_read, void , dst, u32, len, const* struct bpf_dynptr_kern *, src,
1529	u32, offset, u64, flags)
1530	{
1531	enum bpf_dynptr_type type;
1532	int err;
1533
1534	if (!src->data \|\| flags)
1535	return -EINVAL;
1536
1537	err = bpf_dynptr_check_off_len(ptr: src, offset, len);
1538	if (err)
1539	return err;
1540
1541	type = bpf_dynptr_get_type(ptr: src);
1542
1543	switch (type) {
1544	case BPF_DYNPTR_TYPE_LOCAL:
1545	case BPF_DYNPTR_TYPE_RINGBUF:
1546	/ Source and destination may possibly overlap, hence use memmove to*
1547	* copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
1548	* pointing to overlapping PTR_TO_MAP_VALUE regions.
1549	*/
1550	memmove(dst, src->data + src->offset + offset, len);
1551	return `0`;
1552	case BPF_DYNPTR_TYPE_SKB:
1553	return __bpf_skb_load_bytes(skb: src->data, offset: src->offset + offset, to: dst, len);
1554	case BPF_DYNPTR_TYPE_XDP:
1555	return __bpf_xdp_load_bytes(xdp: src->data, offset: src->offset + offset, buf: dst, len);
1556	default:
1557	WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
1558	return -EFAULT;
1559	}
1560	}
1561
1562	static const struct bpf_func_proto bpf_dynptr_read_proto = {
1563	.func = bpf_dynptr_read,
1564	.gpl_only = false,
1565	.ret_type = RET_INTEGER,
1566	.arg1_type = ARG_PTR_TO_UNINIT_MEM,
1567	.arg2_type = ARG_CONST_SIZE_OR_ZERO,
1568	.arg3_type = ARG_PTR_TO_DYNPTR \| MEM_RDONLY,
1569	.arg4_type = ARG_ANYTHING,
1570	.arg5_type = ARG_ANYTHING,
1571	};
1572
1573	BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern , dst, u32, offset, void* *, src,
1574	u32, len, u64, flags)
1575	{
1576	enum bpf_dynptr_type type;
1577	int err;
1578
1579	if (!dst->data \|\| __bpf_dynptr_is_rdonly(ptr: dst))
1580	return -EINVAL;
1581
1582	err = bpf_dynptr_check_off_len(ptr: dst, offset, len);
1583	if (err)
1584	return err;
1585
1586	type = bpf_dynptr_get_type(ptr: dst);
1587
1588	switch (type) {
1589	case BPF_DYNPTR_TYPE_LOCAL:
1590	case BPF_DYNPTR_TYPE_RINGBUF:
1591	if (flags)
1592	return -EINVAL;
1593	/ Source and destination may possibly overlap, hence use memmove to*
1594	* copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
1595	* pointing to overlapping PTR_TO_MAP_VALUE regions.
1596	*/
1597	memmove(dst->data + dst->offset + offset, src, len);
1598	return `0`;
1599	case BPF_DYNPTR_TYPE_SKB:
1600	return __bpf_skb_store_bytes(skb: dst->data, offset: dst->offset + offset, from: src, len,
1601	flags);
1602	case BPF_DYNPTR_TYPE_XDP:
1603	if (flags)
1604	return -EINVAL;
1605	return __bpf_xdp_store_bytes(xdp: dst->data, offset: dst->offset + offset, buf: src, len);
1606	default:
1607	WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
1608	return -EFAULT;
1609	}
1610	}
1611
1612	static const struct bpf_func_proto bpf_dynptr_write_proto = {
1613	.func = bpf_dynptr_write,
1614	.gpl_only = false,
1615	.ret_type = RET_INTEGER,
1616	.arg1_type = ARG_PTR_TO_DYNPTR \| MEM_RDONLY,
1617	.arg2_type = ARG_ANYTHING,
1618	.arg3_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
1619	.arg4_type = ARG_CONST_SIZE_OR_ZERO,
1620	.arg5_type = ARG_ANYTHING,
1621	};
1622
1623	BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
1624	{
1625	enum bpf_dynptr_type type;
1626	int err;
1627
1628	if (!ptr->data)
1629	return `0`;
1630
1631	err = bpf_dynptr_check_off_len(ptr, offset, len);
1632	if (err)
1633	return `0`;
1634
1635	if (__bpf_dynptr_is_rdonly(ptr))
1636	return `0`;
1637
1638	type = bpf_dynptr_get_type(ptr);
1639
1640	switch (type) {
1641	case BPF_DYNPTR_TYPE_LOCAL:
1642	case BPF_DYNPTR_TYPE_RINGBUF:
1643	return (unsigned long)(ptr->data + ptr->offset + offset);
1644	case BPF_DYNPTR_TYPE_SKB:
1645	case BPF_DYNPTR_TYPE_XDP:
1646	/ skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr /
1647	return `0`;
1648	default:
1649	WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type);
1650	return `0`;
1651	}
1652	}
1653
1654	static const struct bpf_func_proto bpf_dynptr_data_proto = {
1655	.func = bpf_dynptr_data,
1656	.gpl_only = false,
1657	.ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL,
1658	.arg1_type = ARG_PTR_TO_DYNPTR \| MEM_RDONLY,
1659	.arg2_type = ARG_ANYTHING,
1660	.arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO,
1661	};
1662
1663	const struct bpf_func_proto bpf_get_current_task_proto __weak;
1664	const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
1665	const struct bpf_func_proto bpf_probe_read_user_proto __weak;
1666	const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
1667	const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
1668	const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
1669	const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
1670
1671	const struct bpf_func_proto *
1672	bpf_base_func_proto(enum bpf_func_id func_id)
1673	{
1674	switch (func_id) {
1675	case BPF_FUNC_map_lookup_elem:
1676	return &bpf_map_lookup_elem_proto;
1677	case BPF_FUNC_map_update_elem:
1678	return &bpf_map_update_elem_proto;
1679	case BPF_FUNC_map_delete_elem:
1680	return &bpf_map_delete_elem_proto;
1681	case BPF_FUNC_map_push_elem:
1682	return &bpf_map_push_elem_proto;
1683	case BPF_FUNC_map_pop_elem:
1684	return &bpf_map_pop_elem_proto;
1685	case BPF_FUNC_map_peek_elem:
1686	return &bpf_map_peek_elem_proto;
1687	case BPF_FUNC_map_lookup_percpu_elem:
1688	return &bpf_map_lookup_percpu_elem_proto;
1689	case BPF_FUNC_get_prandom_u32:
1690	return &bpf_get_prandom_u32_proto;
1691	case BPF_FUNC_get_smp_processor_id:
1692	return &bpf_get_raw_smp_processor_id_proto;
1693	case BPF_FUNC_get_numa_node_id:
1694	return &bpf_get_numa_node_id_proto;
1695	case BPF_FUNC_tail_call:
1696	return &bpf_tail_call_proto;
1697	case BPF_FUNC_ktime_get_ns:
1698	return &bpf_ktime_get_ns_proto;
1699	case BPF_FUNC_ktime_get_boot_ns:
1700	return &bpf_ktime_get_boot_ns_proto;
1701	case BPF_FUNC_ktime_get_tai_ns:
1702	return &bpf_ktime_get_tai_ns_proto;
1703	case BPF_FUNC_ringbuf_output:
1704	return &bpf_ringbuf_output_proto;
1705	case BPF_FUNC_ringbuf_reserve:
1706	return &bpf_ringbuf_reserve_proto;
1707	case BPF_FUNC_ringbuf_submit:
1708	return &bpf_ringbuf_submit_proto;
1709	case BPF_FUNC_ringbuf_discard:
1710	return &bpf_ringbuf_discard_proto;
1711	case BPF_FUNC_ringbuf_query:
1712	return &bpf_ringbuf_query_proto;
1713	case BPF_FUNC_strncmp:
1714	return &bpf_strncmp_proto;
1715	case BPF_FUNC_strtol:
1716	return &bpf_strtol_proto;
1717	case BPF_FUNC_strtoul:
1718	return &bpf_strtoul_proto;
1719	default:
1720	break;
1721	}
1722
1723	if (!bpf_capable())
1724	return NULL;
1725
1726	switch (func_id) {
1727	case BPF_FUNC_spin_lock:
1728	return &bpf_spin_lock_proto;
1729	case BPF_FUNC_spin_unlock:
1730	return &bpf_spin_unlock_proto;
1731	case BPF_FUNC_jiffies64:
1732	return &bpf_jiffies64_proto;
1733	case BPF_FUNC_per_cpu_ptr:
1734	return &bpf_per_cpu_ptr_proto;
1735	case BPF_FUNC_this_cpu_ptr:
1736	return &bpf_this_cpu_ptr_proto;
1737	case BPF_FUNC_timer_init:
1738	return &bpf_timer_init_proto;
1739	case BPF_FUNC_timer_set_callback:
1740	return &bpf_timer_set_callback_proto;
1741	case BPF_FUNC_timer_start:
1742	return &bpf_timer_start_proto;
1743	case BPF_FUNC_timer_cancel:
1744	return &bpf_timer_cancel_proto;
1745	case BPF_FUNC_kptr_xchg:
1746	return &bpf_kptr_xchg_proto;
1747	case BPF_FUNC_for_each_map_elem:
1748	return &bpf_for_each_map_elem_proto;
1749	case BPF_FUNC_loop:
1750	return &bpf_loop_proto;
1751	case BPF_FUNC_user_ringbuf_drain:
1752	return &bpf_user_ringbuf_drain_proto;
1753	case BPF_FUNC_ringbuf_reserve_dynptr:
1754	return &bpf_ringbuf_reserve_dynptr_proto;
1755	case BPF_FUNC_ringbuf_submit_dynptr:
1756	return &bpf_ringbuf_submit_dynptr_proto;
1757	case BPF_FUNC_ringbuf_discard_dynptr:
1758	return &bpf_ringbuf_discard_dynptr_proto;
1759	case BPF_FUNC_dynptr_from_mem:
1760	return &bpf_dynptr_from_mem_proto;
1761	case BPF_FUNC_dynptr_read:
1762	return &bpf_dynptr_read_proto;
1763	case BPF_FUNC_dynptr_write:
1764	return &bpf_dynptr_write_proto;
1765	case BPF_FUNC_dynptr_data:
1766	return &bpf_dynptr_data_proto;
1767	#ifdef CONFIG_CGROUPS
1768	case BPF_FUNC_cgrp_storage_get:
1769	return &bpf_cgrp_storage_get_proto;
1770	case BPF_FUNC_cgrp_storage_delete:
1771	return &bpf_cgrp_storage_delete_proto;
1772	case BPF_FUNC_get_current_cgroup_id:
1773	return &bpf_get_current_cgroup_id_proto;
1774	case BPF_FUNC_get_current_ancestor_cgroup_id:
1775	return &bpf_get_current_ancestor_cgroup_id_proto;
1776	#endif
1777	default:
1778	break;
1779	}
1780
1781	if (!perfmon_capable())
1782	return NULL;
1783
1784	switch (func_id) {
1785	case BPF_FUNC_trace_printk:
1786	return bpf_get_trace_printk_proto();
1787	case BPF_FUNC_get_current_task:
1788	return &bpf_get_current_task_proto;
1789	case BPF_FUNC_get_current_task_btf:
1790	return &bpf_get_current_task_btf_proto;
1791	case BPF_FUNC_probe_read_user:
1792	return &bpf_probe_read_user_proto;
1793	case BPF_FUNC_probe_read_kernel:
1794	return security_locked_down(what: LOCKDOWN_BPF_READ_KERNEL) < `0` ?
1795	NULL : &bpf_probe_read_kernel_proto;
1796	case BPF_FUNC_probe_read_user_str:
1797	return &bpf_probe_read_user_str_proto;
1798	case BPF_FUNC_probe_read_kernel_str:
1799	return security_locked_down(what: LOCKDOWN_BPF_READ_KERNEL) < `0` ?
1800	NULL : &bpf_probe_read_kernel_str_proto;
1801	case BPF_FUNC_snprintf_btf:
1802	return &bpf_snprintf_btf_proto;
1803	case BPF_FUNC_snprintf:
1804	return &bpf_snprintf_proto;
1805	case BPF_FUNC_task_pt_regs:
1806	return &bpf_task_pt_regs_proto;
1807	case BPF_FUNC_trace_vprintk:
1808	return bpf_get_trace_vprintk_proto();
1809	default:
1810	return NULL;
1811	}
1812	}
1813
1814	void bpf_list_head_free(const struct btf_field field, void* *list_head,
1815	struct bpf_spin_lock *spin_lock)
1816	{
1817	struct list_head head = list_head, orig_head = list_head;
1818
1819	BUILD_BUG_ON(sizeof(struct list_head) > sizeof(struct bpf_list_head));
1820	BUILD_BUG_ON(__alignof__(struct list_head) > __alignof__(struct bpf_list_head));
1821
1822	/ Do the actual list draining outside the lock to not hold the lock for*
1823	* too long, and also prevent deadlocks if tracing programs end up
1824	* executing on entry/exit of functions called inside the critical
1825	* section, and end up doing map ops that call bpf_list_head_free for
1826	* the same map value again.
1827	*/
1828	__bpf_spin_lock_irqsave(lock: spin_lock);
1829	if (!head->next \|\| list_empty(head))
1830	goto unlock;
1831	head = head->next;
1832	unlock:
1833	INIT_LIST_HEAD(list: orig_head);
1834	__bpf_spin_unlock_irqrestore(lock: spin_lock);
1835
1836	while (head != orig_head) {
1837	void *obj = head;
1838
1839	obj -= field->graph_root.node_offset;
1840	head = head->next;
1841	/ The contained type can also have resources, including a*
1842	* bpf_list_head which needs to be freed.
1843	*/
1844	migrate_disable();
1845	__bpf_obj_drop_impl(p: obj, rec: field->graph_root.value_rec, percpu: false);
1846	migrate_enable();
1847	}
1848	}
1849
1850	/ Like rbtree_postorder_for_each_entry_safe, but 'pos' and 'n' are*
1851	* 'rb_node *', so field name of rb_node within containing struct is not
1852	* needed.
1853	*
1854	* Since bpf_rb_tree's node type has a corresponding struct btf_field with
1855	* graph_root.node_offset, it's not necessary to know field name
1856	* or type of node struct
1857	*/
1858	#define bpf_rbtree_postorder_for_each_entry_safe(pos, n, root) \
1859	for (pos = rb_first_postorder(root); \
1860	pos && ({ n = rb_next_postorder(pos); 1; }); \
1861	pos = n)
1862
1863	void bpf_rb_root_free(const struct btf_field field, void* *rb_root,
1864	struct bpf_spin_lock *spin_lock)
1865	{
1866	struct rb_root_cached orig_root, *root = rb_root;
1867	struct rb_node pos, n;
1868	void *obj;
1869
1870	BUILD_BUG_ON(sizeof(struct rb_root_cached) > sizeof(struct bpf_rb_root));
1871	BUILD_BUG_ON(__alignof__(struct rb_root_cached) > __alignof__(struct bpf_rb_root));
1872
1873	__bpf_spin_lock_irqsave(lock: spin_lock);
1874	orig_root = *root;
1875	*root = RB_ROOT_CACHED;
1876	__bpf_spin_unlock_irqrestore(lock: spin_lock);
1877
1878	bpf_rbtree_postorder_for_each_entry_safe(pos, n, &orig_root.rb_root) {
1879	obj = pos;
1880	obj -= field->graph_root.node_offset;
1881
1882
1883	migrate_disable();
1884	__bpf_obj_drop_impl(p: obj, rec: field->graph_root.value_rec, percpu: false);
1885	migrate_enable();
1886	}
1887	}
1888
1889	__diag_push();
1890	__diag_ignore_all("-Wmissing-prototypes",
1891	"Global functions as their definitions will be in vmlinux BTF");
1892
1893	__bpf_kfunc void bpf_obj_new_impl(u64 local_type_id__k, void* *meta__ign)
1894	{
1895	struct btf_struct_meta *meta = meta__ign;
1896	u64 size = local_type_id__k;
1897	void *p;
1898
1899	p = bpf_mem_alloc(ma: &bpf_global_ma, size);
1900	if (!p)
1901	return NULL;
1902	if (meta)
1903	bpf_obj_init(rec: meta->record, obj: p);
1904	return p;
1905	}
1906
1907	__bpf_kfunc void bpf_percpu_obj_new_impl(u64 local_type_id__k, void* *meta__ign)
1908	{
1909	u64 size = local_type_id__k;
1910
1911	/ The verifier has ensured that meta__ign must be NULL /
1912	return bpf_mem_alloc(ma: &bpf_global_percpu_ma, size);
1913	}
1914
1915	/ Must be called under migrate_disable(), as required by bpf_mem_free /
1916	void __bpf_obj_drop_impl(void p, const* struct btf_record *rec, bool percpu)
1917	{
1918	struct bpf_mem_alloc *ma;
1919
1920	if (rec && rec->refcount_off >= `0` &&
1921	!refcount_dec_and_test(r: (refcount_t *)(p + rec->refcount_off))) {
1922	/ Object is refcounted and refcount_dec didn't result in 0*
1923	* refcount. Return without freeing the object
1924	*/
1925	return;
1926	}
1927
1928	if (rec)
1929	bpf_obj_free_fields(rec, obj: p);
1930
1931	if (percpu)
1932	ma = &bpf_global_percpu_ma;
1933	else
1934	ma = &bpf_global_ma;
1935	if (rec && rec->refcount_off >= `0`)
1936	bpf_mem_free_rcu(ma, ptr: p);
1937	else
1938	bpf_mem_free(ma, ptr: p);
1939	}
1940
1941	__bpf_kfunc void bpf_obj_drop_impl(void p__alloc, void* *meta__ign)
1942	{
1943	struct btf_struct_meta *meta = meta__ign;
1944	void *p = p__alloc;
1945
1946	__bpf_obj_drop_impl(p, rec: meta ? meta->record : NULL, percpu: false);
1947	}
1948
1949	__bpf_kfunc void bpf_percpu_obj_drop_impl(void p__alloc, void* *meta__ign)
1950	{
1951	/ The verifier has ensured that meta__ign must be NULL /
1952	bpf_mem_free_rcu(ma: &bpf_global_percpu_ma, ptr: p__alloc);
1953	}
1954
1955	__bpf_kfunc void bpf_refcount_acquire_impl(void* p__refcounted_kptr, void* *meta__ign)
1956	{
1957	struct btf_struct_meta *meta = meta__ign;
1958	struct bpf_refcount *ref;
1959
1960	/ Could just cast directly to refcount_t , but need some code using
1961	* bpf_refcount type so that it is emitted in vmlinux BTF
1962	*/
1963	ref = (struct bpf_refcount *)(p__refcounted_kptr + meta->record->refcount_off);
1964	if (!refcount_inc_not_zero(r: (refcount_t *)ref))
1965	return NULL;
1966
1967	/ Verifier strips KF_RET_NULL if input is owned ref, see is_kfunc_ret_null*
1968	* in verifier.c
1969	*/
1970	return (void *)p__refcounted_kptr;
1971	}
1972
1973	static int __bpf_list_add(struct bpf_list_node_kern *node,
1974	struct bpf_list_head *head,
1975	bool tail, struct btf_record *rec, u64 off)
1976	{
1977	struct list_head n = &node->list_head, h = (void *)head;
1978
1979	/ If list_head was 0-initialized by map, bpf_obj_init_field wasn't*
1980	* called on its fields, so init here
1981	*/
1982	if (unlikely(!h->next))
1983	INIT_LIST_HEAD(list: h);
1984
1985	/ node->owner != NULL implies !list_empty(n), no need to separately*
1986	* check the latter
1987	*/
1988	if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
1989	/ Only called from BPF prog, no need to migrate_disable /
1990	__bpf_obj_drop_impl(p: (void *)n - off, rec, percpu: false);
1991	return -EINVAL;
1992	}
1993
1994	tail ? list_add_tail(new: n, head: h) : list_add(new: n, head: h);
1995	WRITE_ONCE(node->owner, head);
1996
1997	return `0`;
1998	}
1999
2000	__bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head,
2001	struct bpf_list_node *node,
2002	void *meta__ign, u64 off)
2003	{
2004	struct bpf_list_node_kern n = (void* *)node;
2005	struct btf_struct_meta *meta = meta__ign;
2006
2007	return __bpf_list_add(node: n, head, tail: false, rec: meta ? meta->record : NULL, off);
2008	}
2009
2010	__bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head,
2011	struct bpf_list_node *node,
2012	void *meta__ign, u64 off)
2013	{
2014	struct bpf_list_node_kern n = (void* *)node;
2015	struct btf_struct_meta *meta = meta__ign;
2016
2017	return __bpf_list_add(node: n, head, tail: true, rec: meta ? meta->record : NULL, off);
2018	}
2019
2020	static struct bpf_list_node __bpf_list_del(struct* bpf_list_head *head, bool tail)
2021	{
2022	struct list_head n, h = (void *)head;
2023	struct bpf_list_node_kern *node;
2024
2025	/ If list_head was 0-initialized by map, bpf_obj_init_field wasn't*
2026	* called on its fields, so init here
2027	*/
2028	if (unlikely(!h->next))
2029	INIT_LIST_HEAD(list: h);
2030	if (list_empty(head: h))
2031	return NULL;
2032
2033	n = tail ? h->prev : h->next;
2034	node = container_of(n, struct bpf_list_node_kern, list_head);
2035	if (WARN_ON_ONCE(READ_ONCE(node->owner) != head))
2036	return NULL;
2037
2038	list_del_init(entry: n);
2039	WRITE_ONCE(node->owner, NULL);
2040	return (struct bpf_list_node *)n;
2041	}
2042
2043	__bpf_kfunc struct bpf_list_node bpf_list_pop_front(struct* bpf_list_head *head)
2044	{
2045	return __bpf_list_del(head, tail: false);
2046	}
2047
2048	__bpf_kfunc struct bpf_list_node bpf_list_pop_back(struct* bpf_list_head *head)
2049	{
2050	return __bpf_list_del(head, tail: true);
2051	}
2052
2053	__bpf_kfunc struct bpf_rb_node bpf_rbtree_remove(struct* bpf_rb_root *root,
2054	struct bpf_rb_node *node)
2055	{
2056	struct bpf_rb_node_kern node_internal = (struct* bpf_rb_node_kern *)node;
2057	struct rb_root_cached r = (struct* rb_root_cached *)root;
2058	struct rb_node *n = &node_internal->rb_node;
2059
2060	/ node_internal->owner != root implies either RB_EMPTY_NODE(n) or*
2061	* n is owned by some other tree. No need to check RB_EMPTY_NODE(n)
2062	*/
2063	if (READ_ONCE(node_internal->owner) != root)
2064	return NULL;
2065
2066	rb_erase_cached(node: n, root: r);
2067	RB_CLEAR_NODE(n);
2068	WRITE_ONCE(node_internal->owner, NULL);
2069	return (struct bpf_rb_node *)n;
2070	}
2071
2072	/ Need to copy rbtree_add_cached's logic here because our 'less' is a BPF*
2073	* program
2074	*/
2075	static int __bpf_rbtree_add(struct bpf_rb_root *root,
2076	struct bpf_rb_node_kern *node,
2077	void less, struct* btf_record *rec, u64 off)
2078	{
2079	struct rb_node link = &((struct** rb_root_cached *)root)->rb_root.rb_node;
2080	struct rb_node parent = NULL, n = &node->rb_node;
2081	bpf_callback_t cb = (bpf_callback_t)less;
2082	bool leftmost = true;
2083
2084	/ node->owner != NULL implies !RB_EMPTY_NODE(n), no need to separately*
2085	* check the latter
2086	*/
2087	if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
2088	/ Only called from BPF prog, no need to migrate_disable /
2089	__bpf_obj_drop_impl(p: (void *)n - off, rec, percpu: false);
2090	return -EINVAL;
2091	}
2092
2093	while (*link) {
2094	parent = *link;
2095	if (cb((uintptr_t)node, (uintptr_t)parent, `0`, `0`, `0`)) {
2096	link = &parent->rb_left;
2097	} else {
2098	link = &parent->rb_right;
2099	leftmost = false;
2100	}
2101	}
2102
2103	rb_link_node(node: n, parent, rb_link: link);
2104	rb_insert_color_cached(node: n, root: (struct rb_root_cached *)root, leftmost);
2105	WRITE_ONCE(node->owner, root);
2106	return `0`;
2107	}
2108
2109	__bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root root, struct* bpf_rb_node *node,
2110	bool (less)(struct bpf_rb_node a, const* struct bpf_rb_node *b),
2111	void *meta__ign, u64 off)
2112	{
2113	struct btf_struct_meta *meta = meta__ign;
2114	struct bpf_rb_node_kern n = (void* *)node;
2115
2116	return __bpf_rbtree_add(root, node: n, less: (void *)less, rec: meta ? meta->record : NULL, off);
2117	}
2118
2119	__bpf_kfunc struct bpf_rb_node bpf_rbtree_first(struct* bpf_rb_root *root)
2120	{
2121	struct rb_root_cached r = (struct* rb_root_cached *)root;
2122
2123	return (struct bpf_rb_node *)rb_first_cached(r);
2124	}
2125
2126	/**
2127	* bpf_task_acquire - Acquire a reference to a task. A task acquired by this
2128	* kfunc which is not stored in a map as a kptr, must be released by calling
2129	* bpf_task_release().
2130	* @p: The task on which a reference is being acquired.
2131	*/
2132	__bpf_kfunc struct task_struct bpf_task_acquire(struct* task_struct *p)
2133	{
2134	if (refcount_inc_not_zero(r: &p->rcu_users))
2135	return p;
2136	return NULL;
2137	}
2138
2139	/**
2140	* bpf_task_release - Release the reference acquired on a task.
2141	* @p: The task on which a reference is being released.
2142	*/
2143	__bpf_kfunc void bpf_task_release(struct task_struct *p)
2144	{
2145	put_task_struct_rcu_user(task: p);
2146	}
2147
2148	#ifdef CONFIG_CGROUPS
2149	/**
2150	* bpf_cgroup_acquire - Acquire a reference to a cgroup. A cgroup acquired by
2151	* this kfunc which is not stored in a map as a kptr, must be released by
2152	* calling bpf_cgroup_release().
2153	* @cgrp: The cgroup on which a reference is being acquired.
2154	*/
2155	__bpf_kfunc struct cgroup bpf_cgroup_acquire(struct* cgroup *cgrp)
2156	{
2157	return cgroup_tryget(cgrp) ? cgrp : NULL;
2158	}
2159
2160	/**
2161	* bpf_cgroup_release - Release the reference acquired on a cgroup.
2162	* If this kfunc is invoked in an RCU read region, the cgroup is guaranteed to
2163	* not be freed until the current grace period has ended, even if its refcount
2164	* drops to 0.
2165	* @cgrp: The cgroup on which a reference is being released.
2166	*/
2167	__bpf_kfunc void bpf_cgroup_release(struct cgroup *cgrp)
2168	{
2169	cgroup_put(cgrp);
2170	}
2171
2172	/**
2173	* bpf_cgroup_ancestor - Perform a lookup on an entry in a cgroup's ancestor
2174	* array. A cgroup returned by this kfunc which is not subsequently stored in a
2175	* map, must be released by calling bpf_cgroup_release().
2176	* @cgrp: The cgroup for which we're performing a lookup.
2177	* @level: The level of ancestor to look up.
2178	*/
2179	__bpf_kfunc struct cgroup bpf_cgroup_ancestor(struct* cgroup cgrp, int* level)
2180	{
2181	struct cgroup *ancestor;
2182
2183	if (level > cgrp->level \|\| level < `0`)
2184	return NULL;
2185
2186	/ cgrp's refcnt could be 0 here, but ancestors can still be accessed /
2187	ancestor = cgrp->ancestors[level];
2188	if (!cgroup_tryget(cgrp: ancestor))
2189	return NULL;
2190	return ancestor;
2191	}
2192
2193	/**
2194	* bpf_cgroup_from_id - Find a cgroup from its ID. A cgroup returned by this
2195	* kfunc which is not subsequently stored in a map, must be released by calling
2196	* bpf_cgroup_release().
2197	* @cgid: cgroup id.
2198	*/
2199	__bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid)
2200	{
2201	struct cgroup *cgrp;
2202
2203	cgrp = cgroup_get_from_id(id: cgid);
2204	if (IS_ERR(ptr: cgrp))
2205	return NULL;
2206	return cgrp;
2207	}
2208
2209	/**
2210	* bpf_task_under_cgroup - wrap task_under_cgroup_hierarchy() as a kfunc, test
2211	* task's membership of cgroup ancestry.
2212	* @task: the task to be tested
2213	* @ancestor: possible ancestor of @task's cgroup
2214	*
2215	* Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
2216	* It follows all the same rules as cgroup_is_descendant, and only applies
2217	* to the default hierarchy.
2218	*/
2219	__bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task,
2220	struct cgroup *ancestor)
2221	{
2222	long ret;
2223
2224	rcu_read_lock();
2225	ret = task_under_cgroup_hierarchy(task, ancestor);
2226	rcu_read_unlock();
2227	return ret;
2228	}
2229	#endif /* CONFIG_CGROUPS */
2230
2231	/**
2232	* bpf_task_from_pid - Find a struct task_struct from its pid by looking it up
2233	* in the root pid namespace idr. If a task is returned, it must either be
2234	* stored in a map, or released with bpf_task_release().
2235	* @pid: The pid of the task being looked up.
2236	*/
2237	__bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
2238	{
2239	struct task_struct *p;
2240
2241	rcu_read_lock();
2242	p = find_task_by_pid_ns(nr: pid, ns: &init_pid_ns);
2243	if (p)
2244	p = bpf_task_acquire(p);
2245	rcu_read_unlock();
2246
2247	return p;
2248	}
2249
2250	/**
2251	* bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data.
2252	* @ptr: The dynptr whose data slice to retrieve
2253	* @offset: Offset into the dynptr
2254	* @buffer__opt: User-provided buffer to copy contents into. May be NULL
2255	* @buffer__szk: Size (in bytes) of the buffer if present. This is the
2256	* length of the requested slice. This must be a constant.
2257	*
2258	* For non-skb and non-xdp type dynptrs, there is no difference between
2259	* bpf_dynptr_slice and bpf_dynptr_data.
2260	*
2261	* If buffer__opt is NULL, the call will fail if buffer_opt was needed.
2262	*
2263	* If the intention is to write to the data slice, please use
2264	* bpf_dynptr_slice_rdwr.
2265	*
2266	* The user must check that the returned pointer is not null before using it.
2267	*
2268	* Please note that in the case of skb and xdp dynptrs, bpf_dynptr_slice
2269	* does not change the underlying packet data pointers, so a call to
2270	* bpf_dynptr_slice will not invalidate any ctx->data/data_end pointers in
2271	* the bpf program.
2272	*
2273	* Return: NULL if the call failed (eg invalid dynptr), pointer to a read-only
2274	* data slice (can be either direct pointer to the data or a pointer to the user
2275	* provided buffer, with its contents containing the data, if unable to obtain
2276	* direct pointer)
2277	*/
2278	__bpf_kfunc void bpf_dynptr_slice(const* struct bpf_dynptr_kern *ptr, u32 offset,
2279	void *buffer__opt, u32 buffer__szk)
2280	{
2281	enum bpf_dynptr_type type;
2282	u32 len = buffer__szk;
2283	int err;
2284
2285	if (!ptr->data)
2286	return NULL;
2287
2288	err = bpf_dynptr_check_off_len(ptr, offset, len);
2289	if (err)
2290	return NULL;
2291
2292	type = bpf_dynptr_get_type(ptr);
2293
2294	switch (type) {
2295	case BPF_DYNPTR_TYPE_LOCAL:
2296	case BPF_DYNPTR_TYPE_RINGBUF:
2297	return ptr->data + ptr->offset + offset;
2298	case BPF_DYNPTR_TYPE_SKB:
2299	if (buffer__opt)
2300	return skb_header_pointer(skb: ptr->data, offset: ptr->offset + offset, len, buffer: buffer__opt);
2301	else
2302	return skb_pointer_if_linear(skb: ptr->data, offset: ptr->offset + offset, len);
2303	case BPF_DYNPTR_TYPE_XDP:
2304	{
2305	void *xdp_ptr = bpf_xdp_pointer(xdp: ptr->data, offset: ptr->offset + offset, len);
2306	if (!IS_ERR_OR_NULL(ptr: xdp_ptr))
2307	return xdp_ptr;
2308
2309	if (!buffer__opt)
2310	return NULL;
2311	bpf_xdp_copy_buf(xdp: ptr->data, off: ptr->offset + offset, buf: buffer__opt, len, flush: false);
2312	return buffer__opt;
2313	}
2314	default:
2315	WARN_ONCE(true, "unknown dynptr type %d\n", type);
2316	return NULL;
2317	}
2318	}
2319
2320	/**
2321	* bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data.
2322	* @ptr: The dynptr whose data slice to retrieve
2323	* @offset: Offset into the dynptr
2324	* @buffer__opt: User-provided buffer to copy contents into. May be NULL
2325	* @buffer__szk: Size (in bytes) of the buffer if present. This is the
2326	* length of the requested slice. This must be a constant.
2327	*
2328	* For non-skb and non-xdp type dynptrs, there is no difference between
2329	* bpf_dynptr_slice and bpf_dynptr_data.
2330	*
2331	* If buffer__opt is NULL, the call will fail if buffer_opt was needed.
2332	*
2333	* The returned pointer is writable and may point to either directly the dynptr
2334	* data at the requested offset or to the buffer if unable to obtain a direct
2335	* data pointer to (example: the requested slice is to the paged area of an skb
2336	* packet). In the case where the returned pointer is to the buffer, the user
2337	* is responsible for persisting writes through calling bpf_dynptr_write(). This
2338	* usually looks something like this pattern:
2339	*
2340	* struct eth_hdr *eth = bpf_dynptr_slice_rdwr(&dynptr, 0, buffer, sizeof(buffer));
2341	* if (!eth)
2342	* return TC_ACT_SHOT;
2343	*
2344	* // mutate eth header //
2345	*
2346	* if (eth == buffer)
2347	* bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0);
2348	*
2349	* Please note that, as in the example above, the user must check that the
2350	* returned pointer is not null before using it.
2351	*
2352	* Please also note that in the case of skb and xdp dynptrs, bpf_dynptr_slice_rdwr
2353	* does not change the underlying packet data pointers, so a call to
2354	* bpf_dynptr_slice_rdwr will not invalidate any ctx->data/data_end pointers in
2355	* the bpf program.
2356	*
2357	* Return: NULL if the call failed (eg invalid dynptr), pointer to a
2358	* data slice (can be either direct pointer to the data or a pointer to the user
2359	* provided buffer, with its contents containing the data, if unable to obtain
2360	* direct pointer)
2361	*/
2362	__bpf_kfunc void bpf_dynptr_slice_rdwr(const* struct bpf_dynptr_kern *ptr, u32 offset,
2363	void *buffer__opt, u32 buffer__szk)
2364	{
2365	if (!ptr->data \|\| __bpf_dynptr_is_rdonly(ptr))
2366	return NULL;
2367
2368	/ bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice.*
2369	*
2370	* For skb-type dynptrs, it is safe to write into the returned pointer
2371	* if the bpf program allows skb data writes. There are two possiblities
2372	* that may occur when calling bpf_dynptr_slice_rdwr:
2373	*
2374	* 1) The requested slice is in the head of the skb. In this case, the
2375	* returned pointer is directly to skb data, and if the skb is cloned, the
2376	* verifier will have uncloned it (see bpf_unclone_prologue()) already.
2377	* The pointer can be directly written into.
2378	*
2379	* 2) Some portion of the requested slice is in the paged buffer area.
2380	* In this case, the requested data will be copied out into the buffer
2381	* and the returned pointer will be a pointer to the buffer. The skb
2382	* will not be pulled. To persist the write, the user will need to call
2383	* bpf_dynptr_write(), which will pull the skb and commit the write.
2384	*
2385	* Similarly for xdp programs, if the requested slice is not across xdp
2386	* fragments, then a direct pointer will be returned, otherwise the data
2387	* will be copied out into the buffer and the user will need to call
2388	* bpf_dynptr_write() to commit changes.
2389	*/
2390	return bpf_dynptr_slice(ptr, offset, buffer__opt, buffer__szk);
2391	}
2392
2393	__bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 end)
2394	{
2395	u32 size;
2396
2397	if (!ptr->data \|\| start > end)
2398	return -EINVAL;
2399
2400	size = __bpf_dynptr_size(ptr);
2401
2402	if (start > size \|\| end > size)
2403	return -ERANGE;
2404
2405	ptr->offset += start;
2406	bpf_dynptr_set_size(ptr, new_size: end - start);
2407
2408	return `0`;
2409	}
2410
2411	__bpf_kfunc bool bpf_dynptr_is_null(struct bpf_dynptr_kern *ptr)
2412	{
2413	return !ptr->data;
2414	}
2415
2416	__bpf_kfunc bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
2417	{
2418	if (!ptr->data)
2419	return false;
2420
2421	return __bpf_dynptr_is_rdonly(ptr);
2422	}
2423
2424	__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr_kern *ptr)
2425	{
2426	if (!ptr->data)
2427	return -EINVAL;
2428
2429	return __bpf_dynptr_size(ptr);
2430	}
2431
2432	__bpf_kfunc int bpf_dynptr_clone(struct bpf_dynptr_kern *ptr,
2433	struct bpf_dynptr_kern *clone__uninit)
2434	{
2435	if (!ptr->data) {
2436	bpf_dynptr_set_null(ptr: clone__uninit);
2437	return -EINVAL;
2438	}
2439
2440	clone__uninit = ptr;
2441
2442	return `0`;
2443	}
2444
2445	__bpf_kfunc void bpf_cast_to_kern_ctx(void* *obj)
2446	{
2447	return obj;
2448	}
2449
2450	__bpf_kfunc void bpf_rdonly_cast(void* *obj__ign, u32 btf_id__k)
2451	{
2452	return obj__ign;
2453	}
2454
2455	__bpf_kfunc void bpf_rcu_read_lock(void)
2456	{
2457	rcu_read_lock();
2458	}
2459
2460	__bpf_kfunc void bpf_rcu_read_unlock(void)
2461	{
2462	rcu_read_unlock();
2463	}
2464
2465	struct bpf_throw_ctx {
2466	struct bpf_prog_aux *aux;
2467	u64 sp;
2468	u64 bp;
2469	int cnt;
2470	};
2471
2472	static bool bpf_stack_walker(void *cookie, u64 ip, u64 sp, u64 bp)
2473	{
2474	struct bpf_throw_ctx *ctx = cookie;
2475	struct bpf_prog *prog;
2476
2477	if (!is_bpf_text_address(addr: ip))
2478	return !ctx->cnt;
2479	prog = bpf_prog_ksym_find(addr: ip);
2480	ctx->cnt++;
2481	if (bpf_is_subprog(prog))
2482	return true;
2483	ctx->aux = prog->aux;
2484	ctx->sp = sp;
2485	ctx->bp = bp;
2486	return false;
2487	}
2488
2489	__bpf_kfunc void bpf_throw(u64 cookie)
2490	{
2491	struct bpf_throw_ctx ctx = {};
2492
2493	arch_bpf_stack_walk(consume_fn: bpf_stack_walker, cookie: &ctx);
2494	WARN_ON_ONCE(!ctx.aux);
2495	if (ctx.aux)
2496	WARN_ON_ONCE(!ctx.aux->exception_boundary);
2497	WARN_ON_ONCE(!ctx.bp);
2498	WARN_ON_ONCE(!ctx.cnt);
2499	/ Prevent KASAN false positives for CONFIG_KASAN_STACK by unpoisoning*
2500	* deeper stack depths than ctx.sp as we do not return from bpf_throw,
2501	* which skips compiler generated instrumentation to do the same.
2502	*/
2503	kasan_unpoison_task_stack_below(watermark: (void )(long*)ctx.sp);
2504	ctx.aux->bpf_exception_cb(cookie, ctx.sp, ctx.bp);
2505	WARN(`1`, "A call to BPF exception callback should never return\n");
2506	}
2507
2508	__diag_pop();
2509
2510	BTF_SET8_START(generic_btf_ids)
2511	#ifdef CONFIG_KEXEC_CORE
2512	BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
2513	#endif
2514	BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE \| KF_RET_NULL)
2515	BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE \| KF_RET_NULL)
2516	BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
2517	BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE)
2518	BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE \| KF_RET_NULL)
2519	BTF_ID_FLAGS(func, bpf_list_push_front_impl)
2520	BTF_ID_FLAGS(func, bpf_list_push_back_impl)
2521	BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE \| KF_RET_NULL)
2522	BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE \| KF_RET_NULL)
2523	BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE \| KF_RCU \| KF_RET_NULL)
2524	BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
2525	BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE \| KF_RET_NULL)
2526	BTF_ID_FLAGS(func, bpf_rbtree_add_impl)
2527	BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
2528
2529	#ifdef CONFIG_CGROUPS
2530	BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE \| KF_RCU \| KF_RET_NULL)
2531	BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
2532	BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE \| KF_RCU \| KF_RET_NULL)
2533	BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE \| KF_RET_NULL)
2534	BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU)
2535	#endif
2536	BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE \| KF_RET_NULL)
2537	BTF_ID_FLAGS(func, bpf_throw)
2538	BTF_SET8_END(generic_btf_ids)
2539
2540	static const struct btf_kfunc_id_set generic_kfunc_set = {
2541	.owner = THIS_MODULE,
2542	.set = &generic_btf_ids,
2543	};
2544
2545
2546	BTF_ID_LIST(generic_dtor_ids)
2547	BTF_ID(struct, task_struct)
2548	BTF_ID(func, bpf_task_release)
2549	#ifdef CONFIG_CGROUPS
2550	BTF_ID(struct, cgroup)
2551	BTF_ID(func, bpf_cgroup_release)
2552	#endif
2553
2554	BTF_SET8_START(common_btf_ids)
2555	BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx)
2556	BTF_ID_FLAGS(func, bpf_rdonly_cast)
2557	BTF_ID_FLAGS(func, bpf_rcu_read_lock)
2558	BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
2559	BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL)
2560	BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
2561	BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW)
2562	BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT \| KF_RET_NULL)
2563	BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
2564	BTF_ID_FLAGS(func, bpf_iter_task_vma_new, KF_ITER_NEW \| KF_RCU)
2565	BTF_ID_FLAGS(func, bpf_iter_task_vma_next, KF_ITER_NEXT \| KF_RET_NULL)
2566	BTF_ID_FLAGS(func, bpf_iter_task_vma_destroy, KF_ITER_DESTROY)
2567	BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW \| KF_TRUSTED_ARGS)
2568	BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT \| KF_RET_NULL)
2569	BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY)
2570	BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW \| KF_TRUSTED_ARGS \| KF_RCU_PROTECTED)
2571	BTF_ID_FLAGS(func, bpf_iter_task_next, KF_ITER_NEXT \| KF_RET_NULL)
2572	BTF_ID_FLAGS(func, bpf_iter_task_destroy, KF_ITER_DESTROY)
2573	BTF_ID_FLAGS(func, bpf_iter_css_new, KF_ITER_NEW \| KF_TRUSTED_ARGS \| KF_RCU_PROTECTED)
2574	BTF_ID_FLAGS(func, bpf_iter_css_next, KF_ITER_NEXT \| KF_RET_NULL)
2575	BTF_ID_FLAGS(func, bpf_iter_css_destroy, KF_ITER_DESTROY)
2576	BTF_ID_FLAGS(func, bpf_dynptr_adjust)
2577	BTF_ID_FLAGS(func, bpf_dynptr_is_null)
2578	BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
2579	BTF_ID_FLAGS(func, bpf_dynptr_size)
2580	BTF_ID_FLAGS(func, bpf_dynptr_clone)
2581	BTF_SET8_END(common_btf_ids)
2582
2583	static const struct btf_kfunc_id_set common_kfunc_set = {
2584	.owner = THIS_MODULE,
2585	.set = &common_btf_ids,
2586	};
2587
2588	static int __init kfunc_init(void)
2589	{
2590	int ret;
2591	const struct btf_id_dtor_kfunc generic_dtors[] = {
2592	{
2593	.btf_id = generic_dtor_ids[`0`],
2594	.kfunc_btf_id = generic_dtor_ids[`1`]
2595	},
2596	#ifdef CONFIG_CGROUPS
2597	{
2598	.btf_id = generic_dtor_ids[`2`],
2599	.kfunc_btf_id = generic_dtor_ids[`3`]
2600	},
2601	#endif
2602	};
2603
2604	ret = register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_TRACING, s: &generic_kfunc_set);
2605	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_SCHED_CLS, s: &generic_kfunc_set);
2606	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_STRUCT_OPS, s: &generic_kfunc_set);
2607	ret = ret ?: register_btf_id_dtor_kfuncs(dtors: generic_dtors,
2608	ARRAY_SIZE(generic_dtors),
2609	THIS_MODULE);
2610	return ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_UNSPEC, s: &common_kfunc_set);
2611	}
2612
2613	late_initcall(kfunc_init);
2614

source code of linux/kernel/bpf/helpers.c