verifier.c source code [linux/kernel/bpf/verifier.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/ Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com*
3	* Copyright (c) 2016 Facebook
4	* Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5	*/
6	#include <uapi/linux/btf.h>
7	#include <linux/bpf-cgroup.h>
8	#include <linux/kernel.h>
9	#include <linux/types.h>
10	#include <linux/slab.h>
11	#include <linux/bpf.h>
12	#include <linux/btf.h>
13	#include <linux/bpf_verifier.h>
14	#include <linux/filter.h>
15	#include <net/netlink.h>
16	#include <linux/file.h>
17	#include <linux/vmalloc.h>
18	#include <linux/stringify.h>
19	#include <linux/bsearch.h>
20	#include <linux/sort.h>
21	#include <linux/perf_event.h>
22	#include <linux/ctype.h>
23	#include <linux/error-injection.h>
24	#include <linux/bpf_lsm.h>
25	#include <linux/btf_ids.h>
26	#include <linux/poison.h>
27	#include <linux/module.h>
28	#include <linux/cpumask.h>
29	#include <net/xdp.h>
30
31	#include "disasm.h"
32
33	static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
34	#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
35	[_id] = & _name ## _verifier_ops,
36	#define BPF_MAP_TYPE(_id, _ops)
37	#define BPF_LINK_TYPE(_id, _name)
38	#include <linux/bpf_types.h>
39	#undef BPF_PROG_TYPE
40	#undef BPF_MAP_TYPE
41	#undef BPF_LINK_TYPE
42	};
43
44	/ bpf_check() is a static code analyzer that walks eBPF program*
45	* instruction by instruction and updates register/stack state.
46	* All paths of conditional branches are analyzed until 'bpf_exit' insn.
47	*
48	* The first pass is depth-first-search to check that the program is a DAG.
49	* It rejects the following programs:
50	* - larger than BPF_MAXINSNS insns
51	* - if loop is present (detected via back-edge)
52	* - unreachable insns exist (shouldn't be a forest. program = one function)
53	* - out of bounds or malformed jumps
54	* The second pass is all possible path descent from the 1st insn.
55	* Since it's analyzing all paths through the program, the length of the
56	* analysis is limited to 64k insn, which may be hit even if total number of
57	* insn is less then 4K, but there are too many branches that change stack/regs.
58	* Number of 'branches to be analyzed' is limited to 1k
59	*
60	* On entry to each instruction, each register has a type, and the instruction
61	* changes the types of the registers depending on instruction semantics.
62	* If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
63	* copied to R1.
64	*
65	* All registers are 64-bit.
66	* R0 - return register
67	* R1-R5 argument passing registers
68	* R6-R9 callee saved registers
69	* R10 - frame pointer read-only
70	*
71	* At the start of BPF program the register R1 contains a pointer to bpf_context
72	* and has type PTR_TO_CTX.
73	*
74	* Verifier tracks arithmetic operations on pointers in case:
75	* BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
76	* BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
77	* 1st insn copies R10 (which has FRAME_PTR) type into R1
78	* and 2nd arithmetic instruction is pattern matched to recognize
79	* that it wants to construct a pointer to some element within stack.
80	* So after 2nd insn, the register R1 has type PTR_TO_STACK
81	* (and -20 constant is saved for further stack bounds checking).
82	* Meaning that this reg is a pointer to stack plus known immediate constant.
83	*
84	* Most of the time the registers have SCALAR_VALUE type, which
85	* means the register has some value, but it's not a valid pointer.
86	* (like pointer plus pointer becomes SCALAR_VALUE type)
87	*
88	* When verifier sees load or store instructions the type of base register
89	* can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
90	* four pointer types recognized by check_mem_access() function.
91	*
92	* PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
93	* and the range of [ptr, ptr + map's value_size) is accessible.
94	*
95	* registers used to pass values to function calls are checked against
96	* function argument constraints.
97	*
98	* ARG_PTR_TO_MAP_KEY is one of such argument constraints.
99	* It means that the register type passed to this function must be
100	* PTR_TO_STACK and it will be used inside the function as
101	* 'pointer to map element key'
102	*
103	* For example the argument constraints for bpf_map_lookup_elem():
104	* .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
105	* .arg1_type = ARG_CONST_MAP_PTR,
106	* .arg2_type = ARG_PTR_TO_MAP_KEY,
107	*
108	* ret_type says that this function returns 'pointer to map elem value or null'
109	* function expects 1st argument to be a const pointer to 'struct bpf_map' and
110	* 2nd argument should be a pointer to stack, which will be used inside
111	* the helper function as a pointer to map element key.
112	*
113	* On the kernel side the helper function looks like:
114	* u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
115	* {
116	* struct bpf_map map = (struct bpf_map ) (unsigned long) r1;
117	* void key = (void ) (unsigned long) r2;
118	* void *value;
119	*
120	* here kernel can access 'key' and 'map' pointers safely, knowing that
121	* [key, key + map->key_size) bytes are valid and were initialized on
122	* the stack of eBPF program.
123	* }
124	*
125	* Corresponding eBPF program may look like:
126	* BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
127	* BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
128	* BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
129	* BPF_RAW_INSN(BPF_JMP \| BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
130	* here verifier looks at prototype of map_lookup_elem() and sees:
131	* .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
132	* Now verifier knows that this map has key of R1->map_ptr->key_size bytes
133	*
134	* Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
135	* Now verifier checks that [R2, R2 + map's key_size) are within stack limits
136	* and were initialized prior to this call.
137	* If it's ok, then verifier allows this BPF_CALL insn and looks at
138	* .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
139	* R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
140	* returns either pointer to map value or NULL.
141	*
142	* When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
143	* insn, the register holding that pointer in the true branch changes state to
144	* PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
145	* branch. See check_cond_jmp_op().
146	*
147	* After the call R0 is set to return type of the function and registers R1-R5
148	* are set to NOT_INIT to indicate that they are no longer readable.
149	*
150	* The following reference types represent a potential reference to a kernel
151	* resource which, after first being allocated, must be checked and freed by
152	* the BPF program:
153	* - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
154	*
155	* When the verifier sees a helper call return a reference type, it allocates a
156	* pointer id for the reference and stores it in the current function state.
157	* Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
158	* PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
159	* passes through a NULL-check conditional. For the branch wherein the state is
160	* changed to CONST_IMM, the verifier releases the reference.
161	*
162	* For each helper function that allocates a reference, such as
163	* bpf_sk_lookup_tcp(), there is a corresponding release function, such as
164	* bpf_sk_release(). When a reference type passes into the release function,
165	* the verifier also releases the reference. If any unchecked or unreleased
166	* reference remains at the end of the program, the verifier rejects it.
167	*/
168
169	/ verifier_state + insn_idx are pushed to stack when branch is encountered /
170	struct bpf_verifier_stack_elem {
171	/ verifer state is 'st'*
172	* before processing instruction 'insn_idx'
173	* and after processing instruction 'prev_insn_idx'
174	*/
175	struct bpf_verifier_state st;
176	int insn_idx;
177	int prev_insn_idx;
178	struct bpf_verifier_stack_elem *next;
179	/ length of verifier log at the time this state was pushed on stack /
180	u32 log_pos;
181	};
182
183	#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
184	#define BPF_COMPLEXITY_LIMIT_STATES 64
185
186	#define BPF_MAP_KEY_POISON (1ULL << 63)
187	#define BPF_MAP_KEY_SEEN (1ULL << 62)
188
189	#define BPF_MAP_PTR_UNPRIV 1UL
190	#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
191	POISON_POINTER_DELTA))
192	#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
193
194	static int acquire_reference_state(struct bpf_verifier_env env, int* insn_idx);
195	static int release_reference(struct bpf_verifier_env env, int* ref_obj_id);
196	static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
197	static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
198	static int ref_set_non_owning(struct bpf_verifier_env *env,
199	struct bpf_reg_state *reg);
200	static void specialize_kfunc(struct bpf_verifier_env *env,
201	u32 func_id, u16 offset, unsigned long *addr);
202	static bool is_trusted_reg(const struct bpf_reg_state *reg);
203
204	static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
205	{
206	return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
207	}
208
209	static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
210	{
211	return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
212	}
213
214	static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
215	const struct bpf_map *map, bool unpriv)
216	{
217	BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
218	unpriv \|= bpf_map_ptr_unpriv(aux);
219	aux->map_ptr_state = (unsigned long)map \|
220	(unpriv ? BPF_MAP_PTR_UNPRIV : `0UL`);
221	}
222
223	static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
224	{
225	return aux->map_key_state & BPF_MAP_KEY_POISON;
226	}
227
228	static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
229	{
230	return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
231	}
232
233	static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
234	{
235	return aux->map_key_state & ~(BPF_MAP_KEY_SEEN \| BPF_MAP_KEY_POISON);
236	}
237
238	static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
239	{
240	bool poisoned = bpf_map_key_poisoned(aux);
241
242	aux->map_key_state = state \| BPF_MAP_KEY_SEEN \|
243	(poisoned ? BPF_MAP_KEY_POISON : `0ULL`);
244	}
245
246	static bool bpf_helper_call(const struct bpf_insn *insn)
247	{
248	return insn->code == (BPF_JMP \| BPF_CALL) &&
249	insn->src_reg == `0`;
250	}
251
252	static bool bpf_pseudo_call(const struct bpf_insn *insn)
253	{
254	return insn->code == (BPF_JMP \| BPF_CALL) &&
255	insn->src_reg == BPF_PSEUDO_CALL;
256	}
257
258	static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
259	{
260	return insn->code == (BPF_JMP \| BPF_CALL) &&
261	insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
262	}
263
264	struct bpf_call_arg_meta {
265	struct bpf_map *map_ptr;
266	bool raw_mode;
267	bool pkt_access;
268	u8 release_regno;
269	int regno;
270	int access_size;
271	int mem_size;
272	u64 msize_max_value;
273	int ref_obj_id;
274	int dynptr_id;
275	int map_uid;
276	int func_id;
277	struct btf *btf;
278	u32 btf_id;
279	struct btf *ret_btf;
280	u32 ret_btf_id;
281	u32 subprogno;
282	struct btf_field *kptr_field;
283	};
284
285	struct bpf_kfunc_call_arg_meta {
286	/ In parameters /
287	struct btf *btf;
288	u32 func_id;
289	u32 kfunc_flags;
290	const struct btf_type *func_proto;
291	const char *func_name;
292	/ Out parameters /
293	u32 ref_obj_id;
294	u8 release_regno;
295	bool r0_rdonly;
296	u32 ret_btf_id;
297	u64 r0_size;
298	u32 subprogno;
299	struct {
300	u64 value;
301	bool found;
302	} arg_constant;
303
304	/ arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,*
305	* generally to pass info about user-defined local kptr types to later
306	* verification logic
307	* bpf_obj_drop/bpf_percpu_obj_drop
308	* Record the local kptr type to be drop'd
309	* bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
310	* Record the local kptr type to be refcount_incr'd and use
311	* arg_owning_ref to determine whether refcount_acquire should be
312	* fallible
313	*/
314	struct btf *arg_btf;
315	u32 arg_btf_id;
316	bool arg_owning_ref;
317
318	struct {
319	struct btf_field *field;
320	} arg_list_head;
321	struct {
322	struct btf_field *field;
323	} arg_rbtree_root;
324	struct {
325	enum bpf_dynptr_type type;
326	u32 id;
327	u32 ref_obj_id;
328	} initialized_dynptr;
329	struct {
330	u8 spi;
331	u8 frameno;
332	} iter;
333	u64 mem_size;
334	};
335
336	struct btf *btf_vmlinux;
337
338	static DEFINE_MUTEX(bpf_verifier_lock);
339
340	static const struct bpf_line_info *
341	find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
342	{
343	const struct bpf_line_info *linfo;
344	const struct bpf_prog *prog;
345	u32 i, nr_linfo;
346
347	prog = env->prog;
348	nr_linfo = prog->aux->nr_linfo;
349
350	if (!nr_linfo \|\| insn_off >= prog->len)
351	return NULL;
352
353	linfo = prog->aux->linfo;
354	for (i = `1`; i < nr_linfo; i++)
355	if (insn_off < linfo[i].insn_off)
356	break;
357
358	return &linfo[i - `1`];
359	}
360
361	__printf(`2`, `3`) static void verbose(void private_data, const* char *fmt, ...)
362	{
363	struct bpf_verifier_env *env = private_data;
364	va_list args;
365
366	if (!bpf_verifier_log_needed(log: &env->log))
367	return;
368
369	va_start(args, fmt);
370	bpf_verifier_vlog(log: &env->log, fmt, args);
371	va_end(args);
372	}
373
374	static const char ltrim(const* char *s)
375	{
376	while (isspace(*s))
377	s++;
378
379	return s;
380	}
381
382	__printf(`3`, `4`) static void verbose_linfo(struct bpf_verifier_env *env,
383	u32 insn_off,
384	const char *prefix_fmt, ...)
385	{
386	const struct bpf_line_info *linfo;
387
388	if (!bpf_verifier_log_needed(log: &env->log))
389	return;
390
391	linfo = find_linfo(env, insn_off);
392	if (!linfo \|\| linfo == env->prev_linfo)
393	return;
394
395	if (prefix_fmt) {
396	va_list args;
397
398	va_start(args, prefix_fmt);
399	bpf_verifier_vlog(log: &env->log, fmt: prefix_fmt, args);
400	va_end(args);
401	}
402
403	verbose(private_data: env, fmt: "%s\n",
404	ltrim(s: btf_name_by_offset(btf: env->prog->aux->btf,
405	offset: linfo->line_off)));
406
407	env->prev_linfo = linfo;
408	}
409
410	static void verbose_invalid_scalar(struct bpf_verifier_env *env,
411	struct bpf_reg_state *reg,
412	struct tnum range, const* char *ctx,
413	const char *reg_name)
414	{
415	char tn_buf[`48`];
416
417	verbose(private_data: env, fmt: "At %s the register %s ", ctx, reg_name);
418	if (!tnum_is_unknown(a: reg->var_off)) {
419	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
420	verbose(private_data: env, fmt: "has value %s", tn_buf);
421	} else {
422	verbose(private_data: env, fmt: "has unknown scalar value");
423	}
424	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: *range);
425	verbose(private_data: env, fmt: " should have been in %s\n", tn_buf);
426	}
427
428	static bool type_is_pkt_pointer(enum bpf_reg_type type)
429	{
430	type = base_type(type);
431	return type == PTR_TO_PACKET \|\|
432	type == PTR_TO_PACKET_META;
433	}
434
435	static bool type_is_sk_pointer(enum bpf_reg_type type)
436	{
437	return type == PTR_TO_SOCKET \|\|
438	type == PTR_TO_SOCK_COMMON \|\|
439	type == PTR_TO_TCP_SOCK \|\|
440	type == PTR_TO_XDP_SOCK;
441	}
442
443	static bool type_may_be_null(u32 type)
444	{
445	return type & PTR_MAYBE_NULL;
446	}
447
448	static bool reg_not_null(const struct bpf_reg_state *reg)
449	{
450	enum bpf_reg_type type;
451
452	type = reg->type;
453	if (type_may_be_null(type))
454	return false;
455
456	type = base_type(type);
457	return type == PTR_TO_SOCKET \|\|
458	type == PTR_TO_TCP_SOCK \|\|
459	type == PTR_TO_MAP_VALUE \|\|
460	type == PTR_TO_MAP_KEY \|\|
461	type == PTR_TO_SOCK_COMMON \|\|
462	(type == PTR_TO_BTF_ID && is_trusted_reg(reg)) \|\|
463	type == PTR_TO_MEM;
464	}
465
466	static bool type_is_ptr_alloc_obj(u32 type)
467	{
468	return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
469	}
470
471	static bool type_is_non_owning_ref(u32 type)
472	{
473	return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF;
474	}
475
476	static struct btf_record reg_btf_record(const* struct bpf_reg_state *reg)
477	{
478	struct btf_record *rec = NULL;
479	struct btf_struct_meta *meta;
480
481	if (reg->type == PTR_TO_MAP_VALUE) {
482	rec = reg->map_ptr->record;
483	} else if (type_is_ptr_alloc_obj(type: reg->type)) {
484	meta = btf_find_struct_meta(btf: reg->btf, btf_id: reg->btf_id);
485	if (meta)
486	rec = meta->record;
487	}
488	return rec;
489	}
490
491	static bool subprog_is_global(const struct bpf_verifier_env env, int* subprog)
492	{
493	struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
494
495	return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL;
496	}
497
498	static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
499	{
500	return btf_record_has_field(rec: reg_btf_record(reg), type: BPF_SPIN_LOCK);
501	}
502
503	static bool type_is_rdonly_mem(u32 type)
504	{
505	return type & MEM_RDONLY;
506	}
507
508	static bool is_acquire_function(enum bpf_func_id func_id,
509	const struct bpf_map *map)
510	{
511	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
512
513	if (func_id == BPF_FUNC_sk_lookup_tcp \|\|
514	func_id == BPF_FUNC_sk_lookup_udp \|\|
515	func_id == BPF_FUNC_skc_lookup_tcp \|\|
516	func_id == BPF_FUNC_ringbuf_reserve \|\|
517	func_id == BPF_FUNC_kptr_xchg)
518	return true;
519
520	if (func_id == BPF_FUNC_map_lookup_elem &&
521	(map_type == BPF_MAP_TYPE_SOCKMAP \|\|
522	map_type == BPF_MAP_TYPE_SOCKHASH))
523	return true;
524
525	return false;
526	}
527
528	static bool is_ptr_cast_function(enum bpf_func_id func_id)
529	{
530	return func_id == BPF_FUNC_tcp_sock \|\|
531	func_id == BPF_FUNC_sk_fullsock \|\|
532	func_id == BPF_FUNC_skc_to_tcp_sock \|\|
533	func_id == BPF_FUNC_skc_to_tcp6_sock \|\|
534	func_id == BPF_FUNC_skc_to_udp6_sock \|\|
535	func_id == BPF_FUNC_skc_to_mptcp_sock \|\|
536	func_id == BPF_FUNC_skc_to_tcp_timewait_sock \|\|
537	func_id == BPF_FUNC_skc_to_tcp_request_sock;
538	}
539
540	static bool is_dynptr_ref_function(enum bpf_func_id func_id)
541	{
542	return func_id == BPF_FUNC_dynptr_data;
543	}
544
545	static bool is_callback_calling_kfunc(u32 btf_id);
546	static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
547
548	static bool is_callback_calling_function(enum bpf_func_id func_id)
549	{
550	return func_id == BPF_FUNC_for_each_map_elem \|\|
551	func_id == BPF_FUNC_timer_set_callback \|\|
552	func_id == BPF_FUNC_find_vma \|\|
553	func_id == BPF_FUNC_loop \|\|
554	func_id == BPF_FUNC_user_ringbuf_drain;
555	}
556
557	static bool is_async_callback_calling_function(enum bpf_func_id func_id)
558	{
559	return func_id == BPF_FUNC_timer_set_callback;
560	}
561
562	static bool is_storage_get_function(enum bpf_func_id func_id)
563	{
564	return func_id == BPF_FUNC_sk_storage_get \|\|
565	func_id == BPF_FUNC_inode_storage_get \|\|
566	func_id == BPF_FUNC_task_storage_get \|\|
567	func_id == BPF_FUNC_cgrp_storage_get;
568	}
569
570	static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
571	const struct bpf_map *map)
572	{
573	int ref_obj_uses = `0`;
574
575	if (is_ptr_cast_function(func_id))
576	ref_obj_uses++;
577	if (is_acquire_function(func_id, map))
578	ref_obj_uses++;
579	if (is_dynptr_ref_function(func_id))
580	ref_obj_uses++;
581
582	return ref_obj_uses > `1`;
583	}
584
585	static bool is_cmpxchg_insn(const struct bpf_insn *insn)
586	{
587	return BPF_CLASS(insn->code) == BPF_STX &&
588	BPF_MODE(insn->code) == BPF_ATOMIC &&
589	insn->imm == BPF_CMPXCHG;
590	}
591
592	/ string representation of 'enum bpf_reg_type'*
593	*
594	* Note that reg_type_str() can not appear more than once in a single verbose()
595	* statement.
596	*/
597	static const char reg_type_str(struct* bpf_verifier_env *env,
598	enum bpf_reg_type type)
599	{
600	char postfix[`16`] = {`0`}, prefix[`64`] = {`0`};
601	static const char * const str[] = {
602	[NOT_INIT] = "?",
603	[SCALAR_VALUE] = "scalar",
604	[PTR_TO_CTX] = "ctx",
605	[CONST_PTR_TO_MAP] = "map_ptr",
606	[PTR_TO_MAP_VALUE] = "map_value",
607	[PTR_TO_STACK] = "fp",
608	[PTR_TO_PACKET] = "pkt",
609	[PTR_TO_PACKET_META] = "pkt_meta",
610	[PTR_TO_PACKET_END] = "pkt_end",
611	[PTR_TO_FLOW_KEYS] = "flow_keys",
612	[PTR_TO_SOCKET] = "sock",
613	[PTR_TO_SOCK_COMMON] = "sock_common",
614	[PTR_TO_TCP_SOCK] = "tcp_sock",
615	[PTR_TO_TP_BUFFER] = "tp_buffer",
616	[PTR_TO_XDP_SOCK] = "xdp_sock",
617	[PTR_TO_BTF_ID] = "ptr_",
618	[PTR_TO_MEM] = "mem",
619	[PTR_TO_BUF] = "buf",
620	[PTR_TO_FUNC] = "func",
621	[PTR_TO_MAP_KEY] = "map_key",
622	[CONST_PTR_TO_DYNPTR] = "dynptr_ptr",
623	};
624
625	if (type & PTR_MAYBE_NULL) {
626	if (base_type(type) == PTR_TO_BTF_ID)
627	strncpy(p: postfix, q: "or_null_", size: `16`);
628	else
629	strncpy(p: postfix, q: "_or_null", size: `16`);
630	}
631
632	snprintf(buf: prefix, size: sizeof(prefix), fmt: "%s%s%s%s%s%s%s",
633	type & MEM_RDONLY ? "rdonly_" : "",
634	type & MEM_RINGBUF ? "ringbuf_" : "",
635	type & MEM_USER ? "user_" : "",
636	type & MEM_PERCPU ? "percpu_" : "",
637	type & MEM_RCU ? "rcu_" : "",
638	type & PTR_UNTRUSTED ? "untrusted_" : "",
639	type & PTR_TRUSTED ? "trusted_" : ""
640	);
641
642	snprintf(buf: env->tmp_str_buf, TMP_STR_BUF_LEN, fmt: "%s%s%s",
643	prefix, str[base_type(type)], postfix);
644	return env->tmp_str_buf;
645	}
646
647	static char slot_type_char[] = {
648	[STACK_INVALID] = `'?'`,
649	[STACK_SPILL] = `'r'`,
650	[STACK_MISC] = `'m'`,
651	[STACK_ZERO] = `'0'`,
652	[STACK_DYNPTR] = `'d'`,
653	[STACK_ITER] = `'i'`,
654	};
655
656	static void print_liveness(struct bpf_verifier_env *env,
657	enum bpf_reg_liveness live)
658	{
659	if (live & (REG_LIVE_READ \| REG_LIVE_WRITTEN \| REG_LIVE_DONE))
660	verbose(private_data: env, fmt: "_");
661	if (live & REG_LIVE_READ)
662	verbose(private_data: env, fmt: "r");
663	if (live & REG_LIVE_WRITTEN)
664	verbose(private_data: env, fmt: "w");
665	if (live & REG_LIVE_DONE)
666	verbose(private_data: env, fmt: "D");
667	}
668
669	static int __get_spi(s32 off)
670	{
671	return (-off - `1`) / BPF_REG_SIZE;
672	}
673
674	static struct bpf_func_state func(struct* bpf_verifier_env *env,
675	const struct bpf_reg_state *reg)
676	{
677	struct bpf_verifier_state *cur = env->cur_state;
678
679	return cur->frame[reg->frameno];
680	}
681
682	static bool is_spi_bounds_valid(struct bpf_func_state state, int* spi, int nr_slots)
683	{
684	int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
685
686	/ We need to check that slots between [spi - nr_slots + 1, spi] are*
687	* within [0, allocated_stack).
688	*
689	* Please note that the spi grows downwards. For example, a dynptr
690	* takes the size of two stack slots; the first slot will be at
691	* spi and the second slot will be at spi - 1.
692	*/
693	return spi - nr_slots + `1` >= `0` && spi < allocated_slots;
694	}
695
696	static int stack_slot_obj_get_spi(struct bpf_verifier_env env, struct* bpf_reg_state *reg,
697	const char obj_kind, int* nr_slots)
698	{
699	int off, spi;
700
701	if (!tnum_is_const(a: reg->var_off)) {
702	verbose(private_data: env, fmt: "%s has to be at a constant offset\n", obj_kind);
703	return -EINVAL;
704	}
705
706	off = reg->off + reg->var_off.value;
707	if (off % BPF_REG_SIZE) {
708	verbose(private_data: env, fmt: "cannot pass in %s at an offset=%d\n", obj_kind, off);
709	return -EINVAL;
710	}
711
712	spi = __get_spi(off);
713	if (spi + `1` < nr_slots) {
714	verbose(private_data: env, fmt: "cannot pass in %s at an offset=%d\n", obj_kind, off);
715	return -EINVAL;
716	}
717
718	if (!is_spi_bounds_valid(state: func(env, reg), spi, nr_slots))
719	return -ERANGE;
720	return spi;
721	}
722
723	static int dynptr_get_spi(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
724	{
725	return stack_slot_obj_get_spi(env, reg, obj_kind: "dynptr", BPF_DYNPTR_NR_SLOTS);
726	}
727
728	static int iter_get_spi(struct bpf_verifier_env env, struct* bpf_reg_state reg, int* nr_slots)
729	{
730	return stack_slot_obj_get_spi(env, reg, obj_kind: "iter", nr_slots);
731	}
732
733	static const char btf_type_name(const* struct btf *btf, u32 id)
734	{
735	return btf_name_by_offset(btf, offset: btf_type_by_id(btf, type_id: id)->name_off);
736	}
737
738	static const char dynptr_type_str(enum* bpf_dynptr_type type)
739	{
740	switch (type) {
741	case BPF_DYNPTR_TYPE_LOCAL:
742	return "local";
743	case BPF_DYNPTR_TYPE_RINGBUF:
744	return "ringbuf";
745	case BPF_DYNPTR_TYPE_SKB:
746	return "skb";
747	case BPF_DYNPTR_TYPE_XDP:
748	return "xdp";
749	case BPF_DYNPTR_TYPE_INVALID:
750	return "<invalid>";
751	default:
752	WARN_ONCE(`1`, "unknown dynptr type %d\n", type);
753	return "<unknown>";
754	}
755	}
756
757	static const char iter_type_str(const* struct btf *btf, u32 btf_id)
758	{
759	if (!btf \|\| btf_id == `0`)
760	return "<invalid>";
761
762	/ we already validated that type is valid and has conforming name /
763	return btf_type_name(btf, id: btf_id) + sizeof(ITER_PREFIX) - `1`;
764	}
765
766	static const char iter_state_str(enum* bpf_iter_state state)
767	{
768	switch (state) {
769	case BPF_ITER_STATE_ACTIVE:
770	return "active";
771	case BPF_ITER_STATE_DRAINED:
772	return "drained";
773	case BPF_ITER_STATE_INVALID:
774	return "<invalid>";
775	default:
776	WARN_ONCE(`1`, "unknown iter state %d\n", state);
777	return "<unknown>";
778	}
779	}
780
781	static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
782	{
783	env->scratched_regs \|= `1U` << regno;
784	}
785
786	static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
787	{
788	env->scratched_stack_slots \|= `1ULL` << spi;
789	}
790
791	static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
792	{
793	return (env->scratched_regs >> regno) & `1`;
794	}
795
796	static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
797	{
798	return (env->scratched_stack_slots >> regno) & `1`;
799	}
800
801	static bool verifier_state_scratched(const struct bpf_verifier_env *env)
802	{
803	return env->scratched_regs \|\| env->scratched_stack_slots;
804	}
805
806	static void mark_verifier_state_clean(struct bpf_verifier_env *env)
807	{
808	env->scratched_regs = `0U`;
809	env->scratched_stack_slots = `0ULL`;
810	}
811
812	/ Used for printing the entire verifier state. /
813	static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
814	{
815	env->scratched_regs = ~`0U`;
816	env->scratched_stack_slots = ~`0ULL`;
817	}
818
819	static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
820	{
821	switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
822	case DYNPTR_TYPE_LOCAL:
823	return BPF_DYNPTR_TYPE_LOCAL;
824	case DYNPTR_TYPE_RINGBUF:
825	return BPF_DYNPTR_TYPE_RINGBUF;
826	case DYNPTR_TYPE_SKB:
827	return BPF_DYNPTR_TYPE_SKB;
828	case DYNPTR_TYPE_XDP:
829	return BPF_DYNPTR_TYPE_XDP;
830	default:
831	return BPF_DYNPTR_TYPE_INVALID;
832	}
833	}
834
835	static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
836	{
837	switch (type) {
838	case BPF_DYNPTR_TYPE_LOCAL:
839	return DYNPTR_TYPE_LOCAL;
840	case BPF_DYNPTR_TYPE_RINGBUF:
841	return DYNPTR_TYPE_RINGBUF;
842	case BPF_DYNPTR_TYPE_SKB:
843	return DYNPTR_TYPE_SKB;
844	case BPF_DYNPTR_TYPE_XDP:
845	return DYNPTR_TYPE_XDP;
846	default:
847	return `0`;
848	}
849	}
850
851	static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
852	{
853	return type == BPF_DYNPTR_TYPE_RINGBUF;
854	}
855
856	static void __mark_dynptr_reg(struct bpf_reg_state *reg,
857	enum bpf_dynptr_type type,
858	bool first_slot, int dynptr_id);
859
860	static void __mark_reg_not_init(const struct bpf_verifier_env *env,
861	struct bpf_reg_state *reg);
862
863	static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
864	struct bpf_reg_state *sreg1,
865	struct bpf_reg_state *sreg2,
866	enum bpf_dynptr_type type)
867	{
868	int id = ++env->id_gen;
869
870	__mark_dynptr_reg(reg: sreg1, type, first_slot: true, dynptr_id: id);
871	__mark_dynptr_reg(reg: sreg2, type, first_slot: false, dynptr_id: id);
872	}
873
874	static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
875	struct bpf_reg_state *reg,
876	enum bpf_dynptr_type type)
877	{
878	__mark_dynptr_reg(reg, type, first_slot: true, dynptr_id: ++env->id_gen);
879	}
880
881	static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
882	struct bpf_func_state state, int* spi);
883
884	static int mark_stack_slots_dynptr(struct bpf_verifier_env env, struct* bpf_reg_state *reg,
885	enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id)
886	{
887	struct bpf_func_state *state = func(env, reg);
888	enum bpf_dynptr_type type;
889	int spi, i, err;
890
891	spi = dynptr_get_spi(env, reg);
892	if (spi < `0`)
893	return spi;
894
895	/ We cannot assume both spi and spi - 1 belong to the same dynptr,*
896	* hence we need to call destroy_if_dynptr_stack_slot twice for both,
897	* to ensure that for the following example:
898	* [d1][d1][d2][d2]
899	* spi 3 2 1 0
900	* So marking spi = 2 should lead to destruction of both d1 and d2. In
901	* case they do belong to same dynptr, second call won't see slot_type
902	* as STACK_DYNPTR and will simply skip destruction.
903	*/
904	err = destroy_if_dynptr_stack_slot(env, state, spi);
905	if (err)
906	return err;
907	err = destroy_if_dynptr_stack_slot(env, state, spi: spi - `1`);
908	if (err)
909	return err;
910
911	for (i = `0`; i < BPF_REG_SIZE; i++) {
912	state->stack[spi].slot_type[i] = STACK_DYNPTR;
913	state->stack[spi - `1`].slot_type[i] = STACK_DYNPTR;
914	}
915
916	type = arg_to_dynptr_type(arg_type);
917	if (type == BPF_DYNPTR_TYPE_INVALID)
918	return -EINVAL;
919
920	mark_dynptr_stack_regs(env, sreg1: &state->stack[spi].spilled_ptr,
921	sreg2: &state->stack[spi - `1`].spilled_ptr, type);
922
923	if (dynptr_type_refcounted(type)) {
924	/ The id is used to track proper releasing /
925	int id;
926
927	if (clone_ref_obj_id)
928	id = clone_ref_obj_id;
929	else
930	id = acquire_reference_state(env, insn_idx);
931
932	if (id < `0`)
933	return id;
934
935	state->stack[spi].spilled_ptr.ref_obj_id = id;
936	state->stack[spi - `1`].spilled_ptr.ref_obj_id = id;
937	}
938
939	state->stack[spi].spilled_ptr.live \|= REG_LIVE_WRITTEN;
940	state->stack[spi - `1`].spilled_ptr.live \|= REG_LIVE_WRITTEN;
941
942	return `0`;
943	}
944
945	static void invalidate_dynptr(struct bpf_verifier_env env, struct* bpf_func_state state, int* spi)
946	{
947	int i;
948
949	for (i = `0`; i < BPF_REG_SIZE; i++) {
950	state->stack[spi].slot_type[i] = STACK_INVALID;
951	state->stack[spi - `1`].slot_type[i] = STACK_INVALID;
952	}
953
954	__mark_reg_not_init(env, reg: &state->stack[spi].spilled_ptr);
955	__mark_reg_not_init(env, reg: &state->stack[spi - `1`].spilled_ptr);
956
957	/ Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot?*
958	*
959	* While we don't allow reading STACK_INVALID, it is still possible to
960	* do <8 byte writes marking some but not all slots as STACK_MISC. Then,
961	* helpers or insns can do partial read of that part without failing,
962	* but check_stack_range_initialized, check_stack_read_var_off, and
963	* check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of
964	* the slot conservatively. Hence we need to prevent those liveness
965	* marking walks.
966	*
967	* This was not a problem before because STACK_INVALID is only set by
968	* default (where the default reg state has its reg->parent as NULL), or
969	* in clean_live_states after REG_LIVE_DONE (at which point
970	* mark_reg_read won't walk reg->parent chain), but not randomly during
971	* verifier state exploration (like we did above). Hence, for our case
972	* parentage chain will still be live (i.e. reg->parent may be
973	* non-NULL), while earlier reg->parent was NULL, so we need
974	* REG_LIVE_WRITTEN to screen off read marker propagation when it is
975	* done later on reads or by mark_dynptr_read as well to unnecessary
976	* mark registers in verifier state.
977	*/
978	state->stack[spi].spilled_ptr.live \|= REG_LIVE_WRITTEN;
979	state->stack[spi - `1`].spilled_ptr.live \|= REG_LIVE_WRITTEN;
980	}
981
982	static int unmark_stack_slots_dynptr(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
983	{
984	struct bpf_func_state *state = func(env, reg);
985	int spi, ref_obj_id, i;
986
987	spi = dynptr_get_spi(env, reg);
988	if (spi < `0`)
989	return spi;
990
991	if (!dynptr_type_refcounted(type: state->stack[spi].spilled_ptr.dynptr.type)) {
992	invalidate_dynptr(env, state, spi);
993	return `0`;
994	}
995
996	ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id;
997
998	/ If the dynptr has a ref_obj_id, then we need to invalidate*
999	* two things:
1000	*
1001	* 1) Any dynptrs with a matching ref_obj_id (clones)
1002	* 2) Any slices derived from this dynptr.
1003	*/
1004
1005	/ Invalidate any slices associated with this dynptr /
1006	WARN_ON_ONCE(release_reference(env, ref_obj_id));
1007
1008	/ Invalidate any dynptr clones /
1009	for (i = `1`; i < state->allocated_stack / BPF_REG_SIZE; i++) {
1010	if (state->stack[i].spilled_ptr.ref_obj_id != ref_obj_id)
1011	continue;
1012
1013	/ it should always be the case that if the ref obj id*
1014	* matches then the stack slot also belongs to a
1015	* dynptr
1016	*/
1017	if (state->stack[i].slot_type[`0`] != STACK_DYNPTR) {
1018	verbose(private_data: env, fmt: "verifier internal error: misconfigured ref_obj_id\n");
1019	return -EFAULT;
1020	}
1021	if (state->stack[i].spilled_ptr.dynptr.first_slot)
1022	invalidate_dynptr(env, state, spi: i);
1023	}
1024
1025	return `0`;
1026	}
1027
1028	static void __mark_reg_unknown(const struct bpf_verifier_env *env,
1029	struct bpf_reg_state *reg);
1030
1031	static void mark_reg_invalid(const struct bpf_verifier_env env, struct* bpf_reg_state *reg)
1032	{
1033	if (!env->allow_ptr_leaks)
1034	__mark_reg_not_init(env, reg);
1035	else
1036	__mark_reg_unknown(env, reg);
1037	}
1038
1039	static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
1040	struct bpf_func_state state, int* spi)
1041	{
1042	struct bpf_func_state *fstate;
1043	struct bpf_reg_state *dreg;
1044	int i, dynptr_id;
1045
1046	/ We always ensure that STACK_DYNPTR is never set partially,*
1047	* hence just checking for slot_type[0] is enough. This is
1048	* different for STACK_SPILL, where it may be only set for
1049	* 1 byte, so code has to use is_spilled_reg.
1050	*/
1051	if (state->stack[spi].slot_type[`0`] != STACK_DYNPTR)
1052	return `0`;
1053
1054	/ Reposition spi to first slot /
1055	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
1056	spi = spi + `1`;
1057
1058	if (dynptr_type_refcounted(type: state->stack[spi].spilled_ptr.dynptr.type)) {
1059	verbose(private_data: env, fmt: "cannot overwrite referenced dynptr\n");
1060	return -EINVAL;
1061	}
1062
1063	mark_stack_slot_scratched(env, spi);
1064	mark_stack_slot_scratched(env, spi: spi - `1`);
1065
1066	/ Writing partially to one dynptr stack slot destroys both. /
1067	for (i = `0`; i < BPF_REG_SIZE; i++) {
1068	state->stack[spi].slot_type[i] = STACK_INVALID;
1069	state->stack[spi - `1`].slot_type[i] = STACK_INVALID;
1070	}
1071
1072	dynptr_id = state->stack[spi].spilled_ptr.id;
1073	/ Invalidate any slices associated with this dynptr /
1074	bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({
1075	/ Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM /
1076	if (dreg->type != (PTR_TO_MEM \| PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
1077	continue;
1078	if (dreg->dynptr_id == dynptr_id)
1079	mark_reg_invalid(env, dreg);
1080	}));
1081
1082	/ Do not release reference state, we are destroying dynptr on stack,*
1083	* not using some helper to release it. Just reset register.
1084	*/
1085	__mark_reg_not_init(env, reg: &state->stack[spi].spilled_ptr);
1086	__mark_reg_not_init(env, reg: &state->stack[spi - `1`].spilled_ptr);
1087
1088	/ Same reason as unmark_stack_slots_dynptr above /
1089	state->stack[spi].spilled_ptr.live \|= REG_LIVE_WRITTEN;
1090	state->stack[spi - `1`].spilled_ptr.live \|= REG_LIVE_WRITTEN;
1091
1092	return `0`;
1093	}
1094
1095	static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
1096	{
1097	int spi;
1098
1099	if (reg->type == CONST_PTR_TO_DYNPTR)
1100	return false;
1101
1102	spi = dynptr_get_spi(env, reg);
1103
1104	/ -ERANGE (i.e. spi not falling into allocated stack slots) isn't an*
1105	* error because this just means the stack state hasn't been updated yet.
1106	* We will do check_mem_access to check and update stack bounds later.
1107	*/
1108	if (spi < `0` && spi != -ERANGE)
1109	return false;
1110
1111	/ We don't need to check if the stack slots are marked by previous*
1112	* dynptr initializations because we allow overwriting existing unreferenced
1113	* STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
1114	* destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
1115	* touching are completely destructed before we reinitialize them for a new
1116	* one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
1117	* instead of delaying it until the end where the user will get "Unreleased
1118	* reference" error.
1119	*/
1120	return true;
1121	}
1122
1123	static bool is_dynptr_reg_valid_init(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
1124	{
1125	struct bpf_func_state *state = func(env, reg);
1126	int i, spi;
1127
1128	/ This already represents first slot of initialized bpf_dynptr.*
1129	*
1130	* CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
1131	* check_func_arg_reg_off's logic, so we don't need to check its
1132	* offset and alignment.
1133	*/
1134	if (reg->type == CONST_PTR_TO_DYNPTR)
1135	return true;
1136
1137	spi = dynptr_get_spi(env, reg);
1138	if (spi < `0`)
1139	return false;
1140	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
1141	return false;
1142
1143	for (i = `0`; i < BPF_REG_SIZE; i++) {
1144	if (state->stack[spi].slot_type[i] != STACK_DYNPTR \|\|
1145	state->stack[spi - `1`].slot_type[i] != STACK_DYNPTR)
1146	return false;
1147	}
1148
1149	return true;
1150	}
1151
1152	static bool is_dynptr_type_expected(struct bpf_verifier_env env, struct* bpf_reg_state *reg,
1153	enum bpf_arg_type arg_type)
1154	{
1155	struct bpf_func_state *state = func(env, reg);
1156	enum bpf_dynptr_type dynptr_type;
1157	int spi;
1158
1159	/ ARG_PTR_TO_DYNPTR takes any type of dynptr /
1160	if (arg_type == ARG_PTR_TO_DYNPTR)
1161	return true;
1162
1163	dynptr_type = arg_to_dynptr_type(arg_type);
1164	if (reg->type == CONST_PTR_TO_DYNPTR) {
1165	return reg->dynptr.type == dynptr_type;
1166	} else {
1167	spi = dynptr_get_spi(env, reg);
1168	if (spi < `0`)
1169	return false;
1170	return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
1171	}
1172	}
1173
1174	static void __mark_reg_known_zero(struct bpf_reg_state *reg);
1175
1176	static bool in_rcu_cs(struct bpf_verifier_env *env);
1177
1178	static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta);
1179
1180	static int mark_stack_slots_iter(struct bpf_verifier_env *env,
1181	struct bpf_kfunc_call_arg_meta *meta,
1182	struct bpf_reg_state reg, int* insn_idx,
1183	struct btf btf, u32 btf_id, int* nr_slots)
1184	{
1185	struct bpf_func_state *state = func(env, reg);
1186	int spi, i, j, id;
1187
1188	spi = iter_get_spi(env, reg, nr_slots);
1189	if (spi < `0`)
1190	return spi;
1191
1192	id = acquire_reference_state(env, insn_idx);
1193	if (id < `0`)
1194	return id;
1195
1196	for (i = `0`; i < nr_slots; i++) {
1197	struct bpf_stack_state *slot = &state->stack[spi - i];
1198	struct bpf_reg_state *st = &slot->spilled_ptr;
1199
1200	__mark_reg_known_zero(reg: st);
1201	st->type = PTR_TO_STACK; / we don't have dedicated reg type /
1202	if (is_kfunc_rcu_protected(meta)) {
1203	if (in_rcu_cs(env))
1204	st->type \|= MEM_RCU;
1205	else
1206	st->type \|= PTR_UNTRUSTED;
1207	}
1208	st->live \|= REG_LIVE_WRITTEN;
1209	st->ref_obj_id = i == `0` ? id : `0`;
1210	st->iter.btf = btf;
1211	st->iter.btf_id = btf_id;
1212	st->iter.state = BPF_ITER_STATE_ACTIVE;
1213	st->iter.depth = `0`;
1214
1215	for (j = `0`; j < BPF_REG_SIZE; j++)
1216	slot->slot_type[j] = STACK_ITER;
1217
1218	mark_stack_slot_scratched(env, spi: spi - i);
1219	}
1220
1221	return `0`;
1222	}
1223
1224	static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
1225	struct bpf_reg_state reg, int* nr_slots)
1226	{
1227	struct bpf_func_state *state = func(env, reg);
1228	int spi, i, j;
1229
1230	spi = iter_get_spi(env, reg, nr_slots);
1231	if (spi < `0`)
1232	return spi;
1233
1234	for (i = `0`; i < nr_slots; i++) {
1235	struct bpf_stack_state *slot = &state->stack[spi - i];
1236	struct bpf_reg_state *st = &slot->spilled_ptr;
1237
1238	if (i == `0`)
1239	WARN_ON_ONCE(release_reference(env, st->ref_obj_id));
1240
1241	__mark_reg_not_init(env, reg: st);
1242
1243	/ see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN /
1244	st->live \|= REG_LIVE_WRITTEN;
1245
1246	for (j = `0`; j < BPF_REG_SIZE; j++)
1247	slot->slot_type[j] = STACK_INVALID;
1248
1249	mark_stack_slot_scratched(env, spi: spi - i);
1250	}
1251
1252	return `0`;
1253	}
1254
1255	static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
1256	struct bpf_reg_state reg, int* nr_slots)
1257	{
1258	struct bpf_func_state *state = func(env, reg);
1259	int spi, i, j;
1260
1261	/ For -ERANGE (i.e. spi not falling into allocated stack slots), we*
1262	* will do check_mem_access to check and update stack bounds later, so
1263	* return true for that case.
1264	*/
1265	spi = iter_get_spi(env, reg, nr_slots);
1266	if (spi == -ERANGE)
1267	return true;
1268	if (spi < `0`)
1269	return false;
1270
1271	for (i = `0`; i < nr_slots; i++) {
1272	struct bpf_stack_state *slot = &state->stack[spi - i];
1273
1274	for (j = `0`; j < BPF_REG_SIZE; j++)
1275	if (slot->slot_type[j] == STACK_ITER)
1276	return false;
1277	}
1278
1279	return true;
1280	}
1281
1282	static int is_iter_reg_valid_init(struct bpf_verifier_env env, struct* bpf_reg_state *reg,
1283	struct btf btf, u32 btf_id, int* nr_slots)
1284	{
1285	struct bpf_func_state *state = func(env, reg);
1286	int spi, i, j;
1287
1288	spi = iter_get_spi(env, reg, nr_slots);
1289	if (spi < `0`)
1290	return -EINVAL;
1291
1292	for (i = `0`; i < nr_slots; i++) {
1293	struct bpf_stack_state *slot = &state->stack[spi - i];
1294	struct bpf_reg_state *st = &slot->spilled_ptr;
1295
1296	if (st->type & PTR_UNTRUSTED)
1297	return -EPROTO;
1298	/ only main (first) slot has ref_obj_id set /
1299	if (i == `0` && !st->ref_obj_id)
1300	return -EINVAL;
1301	if (i != `0` && st->ref_obj_id)
1302	return -EINVAL;
1303	if (st->iter.btf != btf \|\| st->iter.btf_id != btf_id)
1304	return -EINVAL;
1305
1306	for (j = `0`; j < BPF_REG_SIZE; j++)
1307	if (slot->slot_type[j] != STACK_ITER)
1308	return -EINVAL;
1309	}
1310
1311	return `0`;
1312	}
1313
1314	/ Check if given stack slot is "special":*
1315	* - spilled register state (STACK_SPILL);
1316	* - dynptr state (STACK_DYNPTR);
1317	* - iter state (STACK_ITER).
1318	*/
1319	static bool is_stack_slot_special(const struct bpf_stack_state *stack)
1320	{
1321	enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - `1`];
1322
1323	switch (type) {
1324	case STACK_SPILL:
1325	case STACK_DYNPTR:
1326	case STACK_ITER:
1327	return true;
1328	case STACK_INVALID:
1329	case STACK_MISC:
1330	case STACK_ZERO:
1331	return false;
1332	default:
1333	WARN_ONCE(`1`, "unknown stack slot type %d\n", type);
1334	return true;
1335	}
1336	}
1337
1338	/ The reg state of a pointer or a bounded scalar was saved when*
1339	* it was spilled to the stack.
1340	*/
1341	static bool is_spilled_reg(const struct bpf_stack_state *stack)
1342	{
1343	return stack->slot_type[BPF_REG_SIZE - `1`] == STACK_SPILL;
1344	}
1345
1346	static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack)
1347	{
1348	return stack->slot_type[BPF_REG_SIZE - `1`] == STACK_SPILL &&
1349	stack->spilled_ptr.type == SCALAR_VALUE;
1350	}
1351
1352	static void scrub_spilled_slot(u8 *stype)
1353	{
1354	if (*stype != STACK_INVALID)
1355	*stype = STACK_MISC;
1356	}
1357
1358	static void print_scalar_ranges(struct bpf_verifier_env *env,
1359	const struct bpf_reg_state *reg,
1360	const char **sep)
1361	{
1362	struct {
1363	const char *name;
1364	u64 val;
1365	bool omit;
1366	} minmaxs[] = {
1367	{"smin", reg->smin_value, reg->smin_value == S64_MIN},
1368	{"smax", reg->smax_value, reg->smax_value == S64_MAX},
1369	{"umin", reg->umin_value, reg->umin_value == `0`},
1370	{"umax", reg->umax_value, reg->umax_value == U64_MAX},
1371	{"smin32", (s64)reg->s32_min_value, reg->s32_min_value == S32_MIN},
1372	{"smax32", (s64)reg->s32_max_value, reg->s32_max_value == S32_MAX},
1373	{"umin32", reg->u32_min_value, reg->u32_min_value == `0`},
1374	{"umax32", reg->u32_max_value, reg->u32_max_value == U32_MAX},
1375	}, m1, m2, *mend = &minmaxs[ARRAY_SIZE(minmaxs)];
1376	bool neg1, neg2;
1377
1378	for (m1 = &minmaxs[`0`]; m1 < mend; m1++) {
1379	if (m1->omit)
1380	continue;
1381
1382	neg1 = m1->name[`0`] == `'s'` && (s64)m1->val < `0`;
1383
1384	verbose(private_data: env, fmt: "%s%s=", *sep, m1->name);
1385	*sep = ",";
1386
1387	for (m2 = m1 + `2`; m2 < mend; m2 += `2`) {
1388	if (m2->omit \|\| m2->val != m1->val)
1389	continue;
1390	/ don't mix negatives with positives /
1391	neg2 = m2->name[`0`] == `'s'` && (s64)m2->val < `0`;
1392	if (neg2 != neg1)
1393	continue;
1394	m2->omit = true;
1395	verbose(private_data: env, fmt: "%s=", m2->name);
1396	}
1397
1398	verbose(private_data: env, fmt: m1->name[`0`] == `'s'` ? "%lld" : "%llu", m1->val);
1399	}
1400	}
1401
1402	static void print_verifier_state(struct bpf_verifier_env *env,
1403	const struct bpf_func_state *state,
1404	bool print_all)
1405	{
1406	const struct bpf_reg_state *reg;
1407	enum bpf_reg_type t;
1408	int i;
1409
1410	if (state->frameno)
1411	verbose(private_data: env, fmt: " frame%d:", state->frameno);
1412	for (i = `0`; i < MAX_BPF_REG; i++) {
1413	reg = &state->regs[i];
1414	t = reg->type;
1415	if (t == NOT_INIT)
1416	continue;
1417	if (!print_all && !reg_scratched(env, regno: i))
1418	continue;
1419	verbose(private_data: env, fmt: " R%d", i);
1420	print_liveness(env, live: reg->live);
1421	verbose(private_data: env, fmt: "=");
1422	if (t == SCALAR_VALUE && reg->precise)
1423	verbose(private_data: env, fmt: "P");
1424	if ((t == SCALAR_VALUE \|\| t == PTR_TO_STACK) &&
1425	tnum_is_const(a: reg->var_off)) {
1426	/ reg->off should be 0 for SCALAR_VALUE /
1427	verbose(private_data: env, fmt: "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, type: t));
1428	verbose(private_data: env, fmt: "%lld", reg->var_off.value + reg->off);
1429	} else {
1430	const char *sep = "";
1431
1432	verbose(private_data: env, fmt: "%s", reg_type_str(env, type: t));
1433	if (base_type(type: t) == PTR_TO_BTF_ID)
1434	verbose(private_data: env, fmt: "%s", btf_type_name(btf: reg->btf, id: reg->btf_id));
1435	verbose(private_data: env, fmt: "(");
1436	/*
1437	* _a stands for append, was shortened to avoid multiline statements below.
1438	* This macro is used to output a comma separated list of attributes.
1439	*/
1440	#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
1441
1442	if (reg->id)
1443	verbose_a("id=%d", reg->id);
1444	if (reg->ref_obj_id)
1445	verbose_a("ref_obj_id=%d", reg->ref_obj_id);
1446	if (type_is_non_owning_ref(type: reg->type))
1447	verbose_a("%s", "non_own_ref");
1448	if (t != SCALAR_VALUE)
1449	verbose_a("off=%d", reg->off);
1450	if (type_is_pkt_pointer(type: t))
1451	verbose_a("r=%d", reg->range);
1452	else if (base_type(type: t) == CONST_PTR_TO_MAP \|\|
1453	base_type(type: t) == PTR_TO_MAP_KEY \|\|
1454	base_type(type: t) == PTR_TO_MAP_VALUE)
1455	verbose_a("ks=%d,vs=%d",
1456	reg->map_ptr->key_size,
1457	reg->map_ptr->value_size);
1458	if (tnum_is_const(a: reg->var_off)) {
1459	/ Typically an immediate SCALAR_VALUE, but*
1460	* could be a pointer whose offset is too big
1461	* for reg->off
1462	*/
1463	verbose_a("imm=%llx", reg->var_off.value);
1464	} else {
1465	print_scalar_ranges(env, reg, sep: &sep);
1466	if (!tnum_is_unknown(a: reg->var_off)) {
1467	char tn_buf[`48`];
1468
1469	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
1470	verbose_a("var_off=%s", tn_buf);
1471	}
1472	}
1473	#undef verbose_a
1474
1475	verbose(private_data: env, fmt: ")");
1476	}
1477	}
1478	for (i = `0`; i < state->allocated_stack / BPF_REG_SIZE; i++) {
1479	char types_buf[BPF_REG_SIZE + `1`];
1480	bool valid = false;
1481	int j;
1482
1483	for (j = `0`; j < BPF_REG_SIZE; j++) {
1484	if (state->stack[i].slot_type[j] != STACK_INVALID)
1485	valid = true;
1486	types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
1487	}
1488	types_buf[BPF_REG_SIZE] = `0`;
1489	if (!valid)
1490	continue;
1491	if (!print_all && !stack_slot_scratched(env, regno: i))
1492	continue;
1493	switch (state->stack[i].slot_type[BPF_REG_SIZE - `1`]) {
1494	case STACK_SPILL:
1495	reg = &state->stack[i].spilled_ptr;
1496	t = reg->type;
1497
1498	verbose(private_data: env, fmt: " fp%d", (-i - `1`) * BPF_REG_SIZE);
1499	print_liveness(env, live: reg->live);
1500	verbose(private_data: env, fmt: "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, type: t));
1501	if (t == SCALAR_VALUE && reg->precise)
1502	verbose(private_data: env, fmt: "P");
1503	if (t == SCALAR_VALUE && tnum_is_const(a: reg->var_off))
1504	verbose(private_data: env, fmt: "%lld", reg->var_off.value + reg->off);
1505	break;
1506	case STACK_DYNPTR:
1507	i += BPF_DYNPTR_NR_SLOTS - `1`;
1508	reg = &state->stack[i].spilled_ptr;
1509
1510	verbose(private_data: env, fmt: " fp%d", (-i - `1`) * BPF_REG_SIZE);
1511	print_liveness(env, live: reg->live);
1512	verbose(private_data: env, fmt: "=dynptr_%s", dynptr_type_str(type: reg->dynptr.type));
1513	if (reg->ref_obj_id)
1514	verbose(private_data: env, fmt: "(ref_id=%d)", reg->ref_obj_id);
1515	break;
1516	case STACK_ITER:
1517	/ only main slot has ref_obj_id set; skip others /
1518	reg = &state->stack[i].spilled_ptr;
1519	if (!reg->ref_obj_id)
1520	continue;
1521
1522	verbose(private_data: env, fmt: " fp%d", (-i - `1`) * BPF_REG_SIZE);
1523	print_liveness(env, live: reg->live);
1524	verbose(private_data: env, fmt: "=iter_%s(ref_id=%d,state=%s,depth=%u)",
1525	iter_type_str(btf: reg->iter.btf, btf_id: reg->iter.btf_id),
1526	reg->ref_obj_id, iter_state_str(state: reg->iter.state),
1527	reg->iter.depth);
1528	break;
1529	case STACK_MISC:
1530	case STACK_ZERO:
1531	default:
1532	reg = &state->stack[i].spilled_ptr;
1533
1534	for (j = `0`; j < BPF_REG_SIZE; j++)
1535	types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
1536	types_buf[BPF_REG_SIZE] = `0`;
1537
1538	verbose(private_data: env, fmt: " fp%d", (-i - `1`) * BPF_REG_SIZE);
1539	print_liveness(env, live: reg->live);
1540	verbose(private_data: env, fmt: "=%s", types_buf);
1541	break;
1542	}
1543	}
1544	if (state->acquired_refs && state->refs[`0`].id) {
1545	verbose(private_data: env, fmt: " refs=%d", state->refs[`0`].id);
1546	for (i = `1`; i < state->acquired_refs; i++)
1547	if (state->refs[i].id)
1548	verbose(private_data: env, fmt: ",%d", state->refs[i].id);
1549	}
1550	if (state->in_callback_fn)
1551	verbose(private_data: env, fmt: " cb");
1552	if (state->in_async_callback_fn)
1553	verbose(private_data: env, fmt: " async_cb");
1554	verbose(private_data: env, fmt: "\n");
1555	if (!print_all)
1556	mark_verifier_state_clean(env);
1557	}
1558
1559	static inline u32 vlog_alignment(u32 pos)
1560	{
1561	return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / `2`, BPF_LOG_ALIGNMENT),
1562	BPF_LOG_MIN_ALIGNMENT) - pos - `1`;
1563	}
1564
1565	static void print_insn_state(struct bpf_verifier_env *env,
1566	const struct bpf_func_state *state)
1567	{
1568	if (env->prev_log_pos && env->prev_log_pos == env->log.end_pos) {
1569	/ remove new line character /
1570	bpf_vlog_reset(log: &env->log, new_pos: env->prev_log_pos - `1`);
1571	verbose(private_data: env, fmt: "%*c;", vlog_alignment(pos: env->prev_insn_print_pos), `' '`);
1572	} else {
1573	verbose(private_data: env, fmt: "%d:", env->insn_idx);
1574	}
1575	print_verifier_state(env, state, print_all: false);
1576	}
1577
1578	/ copy array src of length n * size bytes to dst. dst is reallocated if it's too*
1579	* small to hold src. This is different from krealloc since we don't want to preserve
1580	* the contents of dst.
1581	*
1582	* Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1583	* not be allocated.
1584	*/
1585	static void copy_array(void* dst, const* void *src, size_t n, size_t size, gfp_t flags)
1586	{
1587	size_t alloc_bytes;
1588	void *orig = dst;
1589	size_t bytes;
1590
1591	if (ZERO_OR_NULL_PTR(src))
1592	goto out;
1593
1594	if (unlikely(check_mul_overflow(n, size, &bytes)))
1595	return NULL;
1596
1597	alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1598	dst = krealloc(objp: orig, new_size: alloc_bytes, flags);
1599	if (!dst) {
1600	kfree(objp: orig);
1601	return NULL;
1602	}
1603
1604	memcpy(dst, src, bytes);
1605	out:
1606	return dst ? dst : ZERO_SIZE_PTR;
1607	}
1608
1609	/ resize an array from old_n items to new_n items. the array is reallocated if it's too*
1610	* small to hold new_n items. new items are zeroed out if the array grows.
1611	*
1612	* Contrary to krealloc_array, does not free arr if new_n is zero.
1613	*/
1614	static void realloc_array(void* *arr, size_t old_n, size_t new_n, size_t size)
1615	{
1616	size_t alloc_size;
1617	void *new_arr;
1618
1619	if (!new_n \|\| old_n == new_n)
1620	goto out;
1621
1622	alloc_size = kmalloc_size_roundup(size: size_mul(factor1: new_n, factor2: size));
1623	new_arr = krealloc(objp: arr, new_size: alloc_size, GFP_KERNEL);
1624	if (!new_arr) {
1625	kfree(objp: arr);
1626	return NULL;
1627	}
1628	arr = new_arr;
1629
1630	if (new_n > old_n)
1631	memset(arr + old_n * size, `0`, (new_n - old_n) * size);
1632
1633	out:
1634	return arr ? arr : ZERO_SIZE_PTR;
1635	}
1636
1637	static int copy_reference_state(struct bpf_func_state dst, const* struct bpf_func_state *src)
1638	{
1639	dst->refs = copy_array(dst: dst->refs, src: src->refs, n: src->acquired_refs,
1640	size: sizeof(struct bpf_reference_state), GFP_KERNEL);
1641	if (!dst->refs)
1642	return -ENOMEM;
1643
1644	dst->acquired_refs = src->acquired_refs;
1645	return `0`;
1646	}
1647
1648	static int copy_stack_state(struct bpf_func_state dst, const* struct bpf_func_state *src)
1649	{
1650	size_t n = src->allocated_stack / BPF_REG_SIZE;
1651
1652	dst->stack = copy_array(dst: dst->stack, src: src->stack, n, size: sizeof(struct bpf_stack_state),
1653	GFP_KERNEL);
1654	if (!dst->stack)
1655	return -ENOMEM;
1656
1657	dst->allocated_stack = src->allocated_stack;
1658	return `0`;
1659	}
1660
1661	static int resize_reference_state(struct bpf_func_state *state, size_t n)
1662	{
1663	state->refs = realloc_array(arr: state->refs, old_n: state->acquired_refs, new_n: n,
1664	size: sizeof(struct bpf_reference_state));
1665	if (!state->refs)
1666	return -ENOMEM;
1667
1668	state->acquired_refs = n;
1669	return `0`;
1670	}
1671
1672	static int grow_stack_state(struct bpf_func_state state, int* size)
1673	{
1674	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
1675
1676	if (old_n >= n)
1677	return `0`;
1678
1679	state->stack = realloc_array(arr: state->stack, old_n, new_n: n, size: sizeof(struct bpf_stack_state));
1680	if (!state->stack)
1681	return -ENOMEM;
1682
1683	state->allocated_stack = size;
1684	return `0`;
1685	}
1686
1687	/ Acquire a pointer id from the env and update the state->refs to include*
1688	* this new pointer reference.
1689	* On success, returns a valid pointer id to associate with the register
1690	* On failure, returns a negative errno.
1691	*/
1692	static int acquire_reference_state(struct bpf_verifier_env env, int* insn_idx)
1693	{
1694	struct bpf_func_state *state = cur_func(env);
1695	int new_ofs = state->acquired_refs;
1696	int id, err;
1697
1698	err = resize_reference_state(state, n: state->acquired_refs + `1`);
1699	if (err)
1700	return err;
1701	id = ++env->id_gen;
1702	state->refs[new_ofs].id = id;
1703	state->refs[new_ofs].insn_idx = insn_idx;
1704	state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : `0`;
1705
1706	return id;
1707	}
1708
1709	/ release function corresponding to acquire_reference_state(). Idempotent. /
1710	static int release_reference_state(struct bpf_func_state state, int* ptr_id)
1711	{
1712	int i, last_idx;
1713
1714	last_idx = state->acquired_refs - `1`;
1715	for (i = `0`; i < state->acquired_refs; i++) {
1716	if (state->refs[i].id == ptr_id) {
1717	/ Cannot release caller references in callbacks /
1718	if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
1719	return -EINVAL;
1720	if (last_idx && i != last_idx)
1721	memcpy(&state->refs[i], &state->refs[last_idx],
1722	sizeof(*state->refs));
1723	memset(&state->refs[last_idx], `0`, sizeof(*state->refs));
1724	state->acquired_refs--;
1725	return `0`;
1726	}
1727	}
1728	return -EINVAL;
1729	}
1730
1731	static void free_func_state(struct bpf_func_state *state)
1732	{
1733	if (!state)
1734	return;
1735	kfree(objp: state->refs);
1736	kfree(objp: state->stack);
1737	kfree(objp: state);
1738	}
1739
1740	static void clear_jmp_history(struct bpf_verifier_state *state)
1741	{
1742	kfree(objp: state->jmp_history);
1743	state->jmp_history = NULL;
1744	state->jmp_history_cnt = `0`;
1745	}
1746
1747	static void free_verifier_state(struct bpf_verifier_state *state,
1748	bool free_self)
1749	{
1750	int i;
1751
1752	for (i = `0`; i <= state->curframe; i++) {
1753	free_func_state(state: state->frame[i]);
1754	state->frame[i] = NULL;
1755	}
1756	clear_jmp_history(state);
1757	if (free_self)
1758	kfree(objp: state);
1759	}
1760
1761	/ copy verifier state from src to dst growing dst stack space*
1762	* when necessary to accommodate larger src stack
1763	*/
1764	static int copy_func_state(struct bpf_func_state *dst,
1765	const struct bpf_func_state *src)
1766	{
1767	int err;
1768
1769	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
1770	err = copy_reference_state(dst, src);
1771	if (err)
1772	return err;
1773	return copy_stack_state(dst, src);
1774	}
1775
1776	static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1777	const struct bpf_verifier_state *src)
1778	{
1779	struct bpf_func_state *dst;
1780	int i, err;
1781
1782	dst_state->jmp_history = copy_array(dst: dst_state->jmp_history, src: src->jmp_history,
1783	n: src->jmp_history_cnt, size: sizeof(struct bpf_idx_pair),
1784	GFP_USER);
1785	if (!dst_state->jmp_history)
1786	return -ENOMEM;
1787	dst_state->jmp_history_cnt = src->jmp_history_cnt;
1788
1789	/ if dst has more stack frames then src frame, free them, this is also*
1790	* necessary in case of exceptional exits using bpf_throw.
1791	*/
1792	for (i = src->curframe + `1`; i <= dst_state->curframe; i++) {
1793	free_func_state(state: dst_state->frame[i]);
1794	dst_state->frame[i] = NULL;
1795	}
1796	dst_state->speculative = src->speculative;
1797	dst_state->active_rcu_lock = src->active_rcu_lock;
1798	dst_state->curframe = src->curframe;
1799	dst_state->active_lock.ptr = src->active_lock.ptr;
1800	dst_state->active_lock.id = src->active_lock.id;
1801	dst_state->branches = src->branches;
1802	dst_state->parent = src->parent;
1803	dst_state->first_insn_idx = src->first_insn_idx;
1804	dst_state->last_insn_idx = src->last_insn_idx;
1805	dst_state->dfs_depth = src->dfs_depth;
1806	dst_state->used_as_loop_entry = src->used_as_loop_entry;
1807	for (i = `0`; i <= src->curframe; i++) {
1808	dst = dst_state->frame[i];
1809	if (!dst) {
1810	dst = kzalloc(size: sizeof(*dst), GFP_KERNEL);
1811	if (!dst)
1812	return -ENOMEM;
1813	dst_state->frame[i] = dst;
1814	}
1815	err = copy_func_state(dst, src: src->frame[i]);
1816	if (err)
1817	return err;
1818	}
1819	return `0`;
1820	}
1821
1822	static u32 state_htab_size(struct bpf_verifier_env *env)
1823	{
1824	return env->prog->len;
1825	}
1826
1827	static struct bpf_verifier_state_list explored_state(struct** bpf_verifier_env env, int* idx)
1828	{
1829	struct bpf_verifier_state *cur = env->cur_state;
1830	struct bpf_func_state *state = cur->frame[cur->curframe];
1831
1832	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
1833	}
1834
1835	static bool same_callsites(struct bpf_verifier_state a, struct* bpf_verifier_state *b)
1836	{
1837	int fr;
1838
1839	if (a->curframe != b->curframe)
1840	return false;
1841
1842	for (fr = a->curframe; fr >= `0`; fr--)
1843	if (a->frame[fr]->callsite != b->frame[fr]->callsite)
1844	return false;
1845
1846	return true;
1847	}
1848
1849	/ Open coded iterators allow back-edges in the state graph in order to*
1850	* check unbounded loops that iterators.
1851	*
1852	* In is_state_visited() it is necessary to know if explored states are
1853	* part of some loops in order to decide whether non-exact states
1854	* comparison could be used:
1855	* - non-exact states comparison establishes sub-state relation and uses
1856	* read and precision marks to do so, these marks are propagated from
1857	* children states and thus are not guaranteed to be final in a loop;
1858	* - exact states comparison just checks if current and explored states
1859	* are identical (and thus form a back-edge).
1860	*
1861	* Paper "A New Algorithm for Identifying Loops in Decompilation"
1862	* by Tao Wei, Jian Mao, Wei Zou and Yu Chen [1] presents a convenient
1863	* algorithm for loop structure detection and gives an overview of
1864	* relevant terminology. It also has helpful illustrations.
1865	*
1866	* [1] https://api.semanticscholar.org/CorpusID:15784067
1867	*
1868	* We use a similar algorithm but because loop nested structure is
1869	* irrelevant for verifier ours is significantly simpler and resembles
1870	* strongly connected components algorithm from Sedgewick's textbook.
1871	*
1872	* Define topmost loop entry as a first node of the loop traversed in a
1873	* depth first search starting from initial state. The goal of the loop
1874	* tracking algorithm is to associate topmost loop entries with states
1875	* derived from these entries.
1876	*
1877	* For each step in the DFS states traversal algorithm needs to identify
1878	* the following situations:
1879	*
1880	* initial initial initial
1881	* \| \| \|
1882	* V V V
1883	* ... ... .---------> hdr
1884	* \| \| \| \|
1885	* V V \| V
1886	* cur .-> succ \| .------...
1887	* \| \| \| \| \| \|
1888	* V \| V \| V V
1889	* succ '-- cur \| ... ...
1890	* \| \| \|
1891	* \| V V
1892	* \| succ <- cur
1893	* \| \|
1894	* \| V
1895	* \| ...
1896	* \| \|
1897	* '----'
1898	*
1899	* (A) successor state of cur (B) successor state of cur or it's entry
1900	* not yet traversed are in current DFS path, thus cur and succ
1901	* are members of the same outermost loop
1902	*
1903	* initial initial
1904	* \| \|
1905	* V V
1906	* ... ...
1907	* \| \|
1908	* V V
1909	* .------... .------...
1910	* \| \| \| \|
1911	* V V V V
1912	* .-> hdr ... ... ...
1913	* \| \| \| \| \|
1914	* \| V V V V
1915	* \| succ <- cur succ <- cur
1916	* \| \| \|
1917	* \| V V
1918	* \| ... ...
1919	* \| \| \|
1920	* '----' exit
1921	*
1922	* (C) successor state of cur is a part of some loop but this loop
1923	* does not include cur or successor state is not in a loop at all.
1924	*
1925	* Algorithm could be described as the following python code:
1926	*
1927	* traversed = set() # Set of traversed nodes
1928	* entries = {} # Mapping from node to loop entry
1929	* depths = {} # Depth level assigned to graph node
1930	* path = set() # Current DFS path
1931	*
1932	* # Find outermost loop entry known for n
1933	* def get_loop_entry(n):
1934	* h = entries.get(n, None)
1935	* while h in entries and entries[h] != h:
1936	* h = entries[h]
1937	* return h
1938	*
1939	* # Update n's loop entry if h's outermost entry comes
1940	* # before n's outermost entry in current DFS path.
1941	* def update_loop_entry(n, h):
1942	* n1 = get_loop_entry(n) or n
1943	* h1 = get_loop_entry(h) or h
1944	* if h1 in path and depths[h1] <= depths[n1]:
1945	* entries[n] = h1
1946	*
1947	* def dfs(n, depth):
1948	* traversed.add(n)
1949	* path.add(n)
1950	* depths[n] = depth
1951	* for succ in G.successors(n):
1952	* if succ not in traversed:
1953	* # Case A: explore succ and update cur's loop entry
1954	* # only if succ's entry is in current DFS path.
1955	* dfs(succ, depth + 1)
1956	* h = get_loop_entry(succ)
1957	* update_loop_entry(n, h)
1958	* else:
1959	* # Case B or C depending on `h1 in path` check in update_loop_entry().
1960	* update_loop_entry(n, succ)
1961	* path.remove(n)
1962	*
1963	* To adapt this algorithm for use with verifier:
1964	* - use st->branch == 0 as a signal that DFS of succ had been finished
1965	* and cur's loop entry has to be updated (case A), handle this in
1966	* update_branch_counts();
1967	* - use st->branch > 0 as a signal that st is in the current DFS path;
1968	* - handle cases B and C in is_state_visited();
1969	* - update topmost loop entry for intermediate states in get_loop_entry().
1970	*/
1971	static struct bpf_verifier_state get_loop_entry(struct* bpf_verifier_state *st)
1972	{
1973	struct bpf_verifier_state topmost = st->loop_entry, old;
1974
1975	while (topmost && topmost->loop_entry && topmost != topmost->loop_entry)
1976	topmost = topmost->loop_entry;
1977	/ Update loop entries for intermediate states to avoid this*
1978	* traversal in future get_loop_entry() calls.
1979	*/
1980	while (st && st->loop_entry != topmost) {
1981	old = st->loop_entry;
1982	st->loop_entry = topmost;
1983	st = old;
1984	}
1985	return topmost;
1986	}
1987
1988	static void update_loop_entry(struct bpf_verifier_state cur, struct* bpf_verifier_state *hdr)
1989	{
1990	struct bpf_verifier_state cur1, hdr1;
1991
1992	cur1 = get_loop_entry(st: cur) ?: cur;
1993	hdr1 = get_loop_entry(st: hdr) ?: hdr;
1994	/ The head1->branches check decides between cases B and C in*
1995	* comment for get_loop_entry(). If hdr1->branches == 0 then
1996	* head's topmost loop entry is not in current DFS path,
1997	* hence 'cur' and 'hdr' are not in the same loop and there is
1998	* no need to update cur->loop_entry.
1999	*/
2000	if (hdr1->branches && hdr1->dfs_depth <= cur1->dfs_depth) {
2001	cur->loop_entry = hdr;
2002	hdr->used_as_loop_entry = true;
2003	}
2004	}
2005
2006	static void update_branch_counts(struct bpf_verifier_env env, struct* bpf_verifier_state *st)
2007	{
2008	while (st) {
2009	u32 br = --st->branches;
2010
2011	/ br == 0 signals that DFS exploration for 'st' is finished,*
2012	* thus it is necessary to update parent's loop entry if it
2013	* turned out that st is a part of some loop.
2014	* This is a part of 'case A' in get_loop_entry() comment.
2015	*/
2016	if (br == `0` && st->parent && st->loop_entry)
2017	update_loop_entry(cur: st->parent, hdr: st->loop_entry);
2018
2019	/ WARN_ON(br > 1) technically makes sense here,*
2020	* but see comment in push_stack(), hence:
2021	*/
2022	WARN_ONCE((int)br < `0`,
2023	"BUG update_branch_counts:branches_to_explore=%d\n",
2024	br);
2025	if (br)
2026	break;
2027	st = st->parent;
2028	}
2029	}
2030
2031	static int pop_stack(struct bpf_verifier_env env, int* *prev_insn_idx,
2032	int *insn_idx, bool pop_log)
2033	{
2034	struct bpf_verifier_state *cur = env->cur_state;
2035	struct bpf_verifier_stack_elem elem, head = env->head;
2036	int err;
2037
2038	if (env->head == NULL)
2039	return -ENOENT;
2040
2041	if (cur) {
2042	err = copy_verifier_state(dst_state: cur, src: &head->st);
2043	if (err)
2044	return err;
2045	}
2046	if (pop_log)
2047	bpf_vlog_reset(log: &env->log, new_pos: head->log_pos);
2048	if (insn_idx)
2049	*insn_idx = head->insn_idx;
2050	if (prev_insn_idx)
2051	*prev_insn_idx = head->prev_insn_idx;
2052	elem = head->next;
2053	free_verifier_state(state: &head->st, free_self: false);
2054	kfree(objp: head);
2055	env->head = elem;
2056	env->stack_size--;
2057	return `0`;
2058	}
2059
2060	static struct bpf_verifier_state push_stack(struct* bpf_verifier_env *env,
2061	int insn_idx, int prev_insn_idx,
2062	bool speculative)
2063	{
2064	struct bpf_verifier_state *cur = env->cur_state;
2065	struct bpf_verifier_stack_elem *elem;
2066	int err;
2067
2068	elem = kzalloc(size: sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
2069	if (!elem)
2070	goto err;
2071
2072	elem->insn_idx = insn_idx;
2073	elem->prev_insn_idx = prev_insn_idx;
2074	elem->next = env->head;
2075	elem->log_pos = env->log.end_pos;
2076	env->head = elem;
2077	env->stack_size++;
2078	err = copy_verifier_state(dst_state: &elem->st, src: cur);
2079	if (err)
2080	goto err;
2081	elem->st.speculative \|= speculative;
2082	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2083	verbose(private_data: env, fmt: "The sequence of %d jumps is too complex.\n",
2084	env->stack_size);
2085	goto err;
2086	}
2087	if (elem->st.parent) {
2088	++elem->st.parent->branches;
2089	/ WARN_ON(branches > 2) technically makes sense here,*
2090	* but
2091	* 1. speculative states will bump 'branches' for non-branch
2092	* instructions
2093	* 2. is_state_visited() heuristics may decide not to create
2094	* a new state for a sequence of branches and all such current
2095	* and cloned states will be pointing to a single parent state
2096	* which might have large 'branches' count.
2097	*/
2098	}
2099	return &elem->st;
2100	err:
2101	free_verifier_state(state: env->cur_state, free_self: true);
2102	env->cur_state = NULL;
2103	/ pop all elements and return /
2104	while (!pop_stack(env, NULL, NULL, pop_log: false));
2105	return NULL;
2106	}
2107
2108	#define CALLER_SAVED_REGS 6
2109	static const int caller_saved[CALLER_SAVED_REGS] = {
2110	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
2111	};
2112
2113	/ This helper doesn't clear reg->id /
2114	static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
2115	{
2116	reg->var_off = tnum_const(value: imm);
2117	reg->smin_value = (s64)imm;
2118	reg->smax_value = (s64)imm;
2119	reg->umin_value = imm;
2120	reg->umax_value = imm;
2121
2122	reg->s32_min_value = (s32)imm;
2123	reg->s32_max_value = (s32)imm;
2124	reg->u32_min_value = (u32)imm;
2125	reg->u32_max_value = (u32)imm;
2126	}
2127
2128	/ Mark the unknown part of a register (variable offset or scalar value) as*
2129	* known to have the value @imm.
2130	*/
2131	static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
2132	{
2133	/ Clear off and union(map_ptr, range) /
2134	memset(((u8 )reg) + sizeof*(reg->type), `0`,
2135	offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
2136	reg->id = `0`;
2137	reg->ref_obj_id = `0`;
2138	___mark_reg_known(reg, imm);
2139	}
2140
2141	static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
2142	{
2143	reg->var_off = tnum_const_subreg(a: reg->var_off, value: imm);
2144	reg->s32_min_value = (s32)imm;
2145	reg->s32_max_value = (s32)imm;
2146	reg->u32_min_value = (u32)imm;
2147	reg->u32_max_value = (u32)imm;
2148	}
2149
2150	/ Mark the 'variable offset' part of a register as zero. This should be*
2151	* used only on registers holding a pointer type.
2152	*/
2153	static void __mark_reg_known_zero(struct bpf_reg_state *reg)
2154	{
2155	__mark_reg_known(reg, imm: `0`);
2156	}
2157
2158	static void __mark_reg_const_zero(struct bpf_reg_state *reg)
2159	{
2160	__mark_reg_known(reg, imm: `0`);
2161	reg->type = SCALAR_VALUE;
2162	}
2163
2164	static void mark_reg_known_zero(struct bpf_verifier_env *env,
2165	struct bpf_reg_state *regs, u32 regno)
2166	{
2167	if (WARN_ON(regno >= MAX_BPF_REG)) {
2168	verbose(private_data: env, fmt: "mark_reg_known_zero(regs, %u)\n", regno);
2169	/ Something bad happened, let's kill all regs /
2170	for (regno = `0`; regno < MAX_BPF_REG; regno++)
2171	__mark_reg_not_init(env, reg: regs + regno);
2172	return;
2173	}
2174	__mark_reg_known_zero(reg: regs + regno);
2175	}
2176
2177	static void __mark_dynptr_reg(struct bpf_reg_state reg, enum* bpf_dynptr_type type,
2178	bool first_slot, int dynptr_id)
2179	{
2180	/ reg->type has no meaning for STACK_DYNPTR, but when we set reg for*
2181	* callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
2182	* set it unconditionally as it is ignored for STACK_DYNPTR anyway.
2183	*/
2184	__mark_reg_known_zero(reg);
2185	reg->type = CONST_PTR_TO_DYNPTR;
2186	/ Give each dynptr a unique id to uniquely associate slices to it. /
2187	reg->id = dynptr_id;
2188	reg->dynptr.type = type;
2189	reg->dynptr.first_slot = first_slot;
2190	}
2191
2192	static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
2193	{
2194	if (base_type(type: reg->type) == PTR_TO_MAP_VALUE) {
2195	const struct bpf_map *map = reg->map_ptr;
2196
2197	if (map->inner_map_meta) {
2198	reg->type = CONST_PTR_TO_MAP;
2199	reg->map_ptr = map->inner_map_meta;
2200	/ transfer reg's id which is unique for every map_lookup_elem*
2201	* as UID of the inner map.
2202	*/
2203	if (btf_record_has_field(rec: map->inner_map_meta->record, type: BPF_TIMER))
2204	reg->map_uid = reg->id;
2205	} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
2206	reg->type = PTR_TO_XDP_SOCK;
2207	} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP \|\|
2208	map->map_type == BPF_MAP_TYPE_SOCKHASH) {
2209	reg->type = PTR_TO_SOCKET;
2210	} else {
2211	reg->type = PTR_TO_MAP_VALUE;
2212	}
2213	return;
2214	}
2215
2216	reg->type &= ~PTR_MAYBE_NULL;
2217	}
2218
2219	static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
2220	struct btf_field_graph_root *ds_head)
2221	{
2222	__mark_reg_known_zero(reg: &regs[regno]);
2223	regs[regno].type = PTR_TO_BTF_ID \| MEM_ALLOC;
2224	regs[regno].btf = ds_head->btf;
2225	regs[regno].btf_id = ds_head->value_btf_id;
2226	regs[regno].off = ds_head->node_offset;
2227	}
2228
2229	static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
2230	{
2231	return type_is_pkt_pointer(type: reg->type);
2232	}
2233
2234	static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
2235	{
2236	return reg_is_pkt_pointer(reg) \|\|
2237	reg->type == PTR_TO_PACKET_END;
2238	}
2239
2240	static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
2241	{
2242	return base_type(type: reg->type) == PTR_TO_MEM &&
2243	(reg->type & DYNPTR_TYPE_SKB \|\| reg->type & DYNPTR_TYPE_XDP);
2244	}
2245
2246	/ Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. /
2247	static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
2248	enum bpf_reg_type which)
2249	{
2250	/ The register can already have a range from prior markings.*
2251	* This is fine as long as it hasn't been advanced from its
2252	* origin.
2253	*/
2254	return reg->type == which &&
2255	reg->id == `0` &&
2256	reg->off == `0` &&
2257	tnum_equals_const(a: reg->var_off, b: `0`);
2258	}
2259
2260	/ Reset the min/max bounds of a register /
2261	static void __mark_reg_unbounded(struct bpf_reg_state *reg)
2262	{
2263	reg->smin_value = S64_MIN;
2264	reg->smax_value = S64_MAX;
2265	reg->umin_value = `0`;
2266	reg->umax_value = U64_MAX;
2267
2268	reg->s32_min_value = S32_MIN;
2269	reg->s32_max_value = S32_MAX;
2270	reg->u32_min_value = `0`;
2271	reg->u32_max_value = U32_MAX;
2272	}
2273
2274	static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
2275	{
2276	reg->smin_value = S64_MIN;
2277	reg->smax_value = S64_MAX;
2278	reg->umin_value = `0`;
2279	reg->umax_value = U64_MAX;
2280	}
2281
2282	static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
2283	{
2284	reg->s32_min_value = S32_MIN;
2285	reg->s32_max_value = S32_MAX;
2286	reg->u32_min_value = `0`;
2287	reg->u32_max_value = U32_MAX;
2288	}
2289
2290	static void __update_reg32_bounds(struct bpf_reg_state *reg)
2291	{
2292	struct tnum var32_off = tnum_subreg(a: reg->var_off);
2293
2294	/ min signed is max(sign bit) \| min(other bits) /
2295	reg->s32_min_value = max_t(s32, reg->s32_min_value,
2296	var32_off.value \| (var32_off.mask & S32_MIN));
2297	/ max signed is min(sign bit) \| max(other bits) /
2298	reg->s32_max_value = min_t(s32, reg->s32_max_value,
2299	var32_off.value \| (var32_off.mask & S32_MAX));
2300	reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
2301	reg->u32_max_value = min(reg->u32_max_value,
2302	(u32)(var32_off.value \| var32_off.mask));
2303	}
2304
2305	static void __update_reg64_bounds(struct bpf_reg_state *reg)
2306	{
2307	/ min signed is max(sign bit) \| min(other bits) /
2308	reg->smin_value = max_t(s64, reg->smin_value,
2309	reg->var_off.value \| (reg->var_off.mask & S64_MIN));
2310	/ max signed is min(sign bit) \| max(other bits) /
2311	reg->smax_value = min_t(s64, reg->smax_value,
2312	reg->var_off.value \| (reg->var_off.mask & S64_MAX));
2313	reg->umin_value = max(reg->umin_value, reg->var_off.value);
2314	reg->umax_value = min(reg->umax_value,
2315	reg->var_off.value \| reg->var_off.mask);
2316	}
2317
2318	static void __update_reg_bounds(struct bpf_reg_state *reg)
2319	{
2320	__update_reg32_bounds(reg);
2321	__update_reg64_bounds(reg);
2322	}
2323
2324	/ Uses signed min/max values to inform unsigned, and vice-versa /
2325	static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
2326	{
2327	/ Learn sign from signed bounds.*
2328	* If we cannot cross the sign boundary, then signed and unsigned bounds
2329	* are the same, so combine. This works even in the negative case, e.g.
2330	* -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2331	*/
2332	if (reg->s32_min_value >= `0` \|\| reg->s32_max_value < `0`) {
2333	reg->s32_min_value = reg->u32_min_value =
2334	max_t(u32, reg->s32_min_value, reg->u32_min_value);
2335	reg->s32_max_value = reg->u32_max_value =
2336	min_t(u32, reg->s32_max_value, reg->u32_max_value);
2337	return;
2338	}
2339	/ Learn sign from unsigned bounds. Signed bounds cross the sign*
2340	* boundary, so we must be careful.
2341	*/
2342	if ((s32)reg->u32_max_value >= `0`) {
2343	/ Positive. We can't learn anything from the smin, but smax*
2344	* is positive, hence safe.
2345	*/
2346	reg->s32_min_value = reg->u32_min_value;
2347	reg->s32_max_value = reg->u32_max_value =
2348	min_t(u32, reg->s32_max_value, reg->u32_max_value);
2349	} else if ((s32)reg->u32_min_value < `0`) {
2350	/ Negative. We can't learn anything from the smax, but smin*
2351	* is negative, hence safe.
2352	*/
2353	reg->s32_min_value = reg->u32_min_value =
2354	max_t(u32, reg->s32_min_value, reg->u32_min_value);
2355	reg->s32_max_value = reg->u32_max_value;
2356	}
2357	}
2358
2359	static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
2360	{
2361	/ Learn sign from signed bounds.*
2362	* If we cannot cross the sign boundary, then signed and unsigned bounds
2363	* are the same, so combine. This works even in the negative case, e.g.
2364	* -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2365	*/
2366	if (reg->smin_value >= `0` \|\| reg->smax_value < `0`) {
2367	reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
2368	reg->umin_value);
2369	reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
2370	reg->umax_value);
2371	return;
2372	}
2373	/ Learn sign from unsigned bounds. Signed bounds cross the sign*
2374	* boundary, so we must be careful.
2375	*/
2376	if ((s64)reg->umax_value >= `0`) {
2377	/ Positive. We can't learn anything from the smin, but smax*
2378	* is positive, hence safe.
2379	*/
2380	reg->smin_value = reg->umin_value;
2381	reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
2382	reg->umax_value);
2383	} else if ((s64)reg->umin_value < `0`) {
2384	/ Negative. We can't learn anything from the smax, but smin*
2385	* is negative, hence safe.
2386	*/
2387	reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
2388	reg->umin_value);
2389	reg->smax_value = reg->umax_value;
2390	}
2391	}
2392
2393	static void __reg_deduce_bounds(struct bpf_reg_state *reg)
2394	{
2395	__reg32_deduce_bounds(reg);
2396	__reg64_deduce_bounds(reg);
2397	}
2398
2399	/ Attempts to improve var_off based on unsigned min/max information /
2400	static void __reg_bound_offset(struct bpf_reg_state *reg)
2401	{
2402	struct tnum var64_off = tnum_intersect(a: reg->var_off,
2403	b: tnum_range(min: reg->umin_value,
2404	max: reg->umax_value));
2405	struct tnum var32_off = tnum_intersect(a: tnum_subreg(a: var64_off),
2406	b: tnum_range(min: reg->u32_min_value,
2407	max: reg->u32_max_value));
2408
2409	reg->var_off = tnum_or(a: tnum_clear_subreg(a: var64_off), b: var32_off);
2410	}
2411
2412	static void reg_bounds_sync(struct bpf_reg_state *reg)
2413	{
2414	/ We might have learned new bounds from the var_off. /
2415	__update_reg_bounds(reg);
2416	/ We might have learned something about the sign bit. /
2417	__reg_deduce_bounds(reg);
2418	/ We might have learned some bits from the bounds. /
2419	__reg_bound_offset(reg);
2420	/ Intersecting with the old var_off might have improved our bounds*
2421	* slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2422	* then new var_off is (0; 0x7f...fc) which improves our umax.
2423	*/
2424	__update_reg_bounds(reg);
2425	}
2426
2427	static bool __reg32_bound_s64(s32 a)
2428	{
2429	return a >= `0` && a <= S32_MAX;
2430	}
2431
2432	static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
2433	{
2434	reg->umin_value = reg->u32_min_value;
2435	reg->umax_value = reg->u32_max_value;
2436
2437	/ Attempt to pull 32-bit signed bounds into 64-bit bounds but must*
2438	* be positive otherwise set to worse case bounds and refine later
2439	* from tnum.
2440	*/
2441	if (__reg32_bound_s64(a: reg->s32_min_value) &&
2442	__reg32_bound_s64(a: reg->s32_max_value)) {
2443	reg->smin_value = reg->s32_min_value;
2444	reg->smax_value = reg->s32_max_value;
2445	} else {
2446	reg->smin_value = `0`;
2447	reg->smax_value = U32_MAX;
2448	}
2449	}
2450
2451	static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
2452	{
2453	/ special case when 64-bit register has upper 32-bit register*
2454	* zeroed. Typically happens after zext or <<32, >>32 sequence
2455	* allowing us to use 32-bit bounds directly,
2456	*/
2457	if (tnum_equals_const(a: tnum_clear_subreg(a: reg->var_off), b: `0`)) {
2458	__reg_assign_32_into_64(reg);
2459	} else {
2460	/ Otherwise the best we can do is push lower 32bit known and*
2461	* unknown bits into register (var_off set from jmp logic)
2462	* then learn as much as possible from the 64-bit tnum
2463	* known and unknown bits. The previous smin/smax bounds are
2464	* invalid here because of jmp32 compare so mark them unknown
2465	* so they do not impact tnum bounds calculation.
2466	*/
2467	__mark_reg64_unbounded(reg);
2468	}
2469	reg_bounds_sync(reg);
2470	}
2471
2472	static bool __reg64_bound_s32(s64 a)
2473	{
2474	return a >= S32_MIN && a <= S32_MAX;
2475	}
2476
2477	static bool __reg64_bound_u32(u64 a)
2478	{
2479	return a >= U32_MIN && a <= U32_MAX;
2480	}
2481
2482	static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
2483	{
2484	__mark_reg32_unbounded(reg);
2485	if (__reg64_bound_s32(a: reg->smin_value) && __reg64_bound_s32(a: reg->smax_value)) {
2486	reg->s32_min_value = (s32)reg->smin_value;
2487	reg->s32_max_value = (s32)reg->smax_value;
2488	}
2489	if (__reg64_bound_u32(a: reg->umin_value) && __reg64_bound_u32(a: reg->umax_value)) {
2490	reg->u32_min_value = (u32)reg->umin_value;
2491	reg->u32_max_value = (u32)reg->umax_value;
2492	}
2493	reg_bounds_sync(reg);
2494	}
2495
2496	/ Mark a register as having a completely unknown (scalar) value. /
2497	static void __mark_reg_unknown(const struct bpf_verifier_env *env,
2498	struct bpf_reg_state *reg)
2499	{
2500	/*
2501	* Clear type, off, and union(map_ptr, range) and
2502	* padding between 'type' and union
2503	*/
2504	memset(reg, `0`, offsetof(struct bpf_reg_state, var_off));
2505	reg->type = SCALAR_VALUE;
2506	reg->id = `0`;
2507	reg->ref_obj_id = `0`;
2508	reg->var_off = tnum_unknown;
2509	reg->frameno = `0`;
2510	reg->precise = !env->bpf_capable;
2511	__mark_reg_unbounded(reg);
2512	}
2513
2514	static void mark_reg_unknown(struct bpf_verifier_env *env,
2515	struct bpf_reg_state *regs, u32 regno)
2516	{
2517	if (WARN_ON(regno >= MAX_BPF_REG)) {
2518	verbose(private_data: env, fmt: "mark_reg_unknown(regs, %u)\n", regno);
2519	/ Something bad happened, let's kill all regs except FP /
2520	for (regno = `0`; regno < BPF_REG_FP; regno++)
2521	__mark_reg_not_init(env, reg: regs + regno);
2522	return;
2523	}
2524	__mark_reg_unknown(env, reg: regs + regno);
2525	}
2526
2527	static void __mark_reg_not_init(const struct bpf_verifier_env *env,
2528	struct bpf_reg_state *reg)
2529	{
2530	__mark_reg_unknown(env, reg);
2531	reg->type = NOT_INIT;
2532	}
2533
2534	static void mark_reg_not_init(struct bpf_verifier_env *env,
2535	struct bpf_reg_state *regs, u32 regno)
2536	{
2537	if (WARN_ON(regno >= MAX_BPF_REG)) {
2538	verbose(private_data: env, fmt: "mark_reg_not_init(regs, %u)\n", regno);
2539	/ Something bad happened, let's kill all regs except FP /
2540	for (regno = `0`; regno < BPF_REG_FP; regno++)
2541	__mark_reg_not_init(env, reg: regs + regno);
2542	return;
2543	}
2544	__mark_reg_not_init(env, reg: regs + regno);
2545	}
2546
2547	static void mark_btf_ld_reg(struct bpf_verifier_env *env,
2548	struct bpf_reg_state *regs, u32 regno,
2549	enum bpf_reg_type reg_type,
2550	struct btf *btf, u32 btf_id,
2551	enum bpf_type_flag flag)
2552	{
2553	if (reg_type == SCALAR_VALUE) {
2554	mark_reg_unknown(env, regs, regno);
2555	return;
2556	}
2557	mark_reg_known_zero(env, regs, regno);
2558	regs[regno].type = PTR_TO_BTF_ID \| flag;
2559	regs[regno].btf = btf;
2560	regs[regno].btf_id = btf_id;
2561	}
2562
2563	#define DEF_NOT_SUBREG (0)
2564	static void init_reg_state(struct bpf_verifier_env *env,
2565	struct bpf_func_state *state)
2566	{
2567	struct bpf_reg_state *regs = state->regs;
2568	int i;
2569
2570	for (i = `0`; i < MAX_BPF_REG; i++) {
2571	mark_reg_not_init(env, regs, regno: i);
2572	regs[i].live = REG_LIVE_NONE;
2573	regs[i].parent = NULL;
2574	regs[i].subreg_def = DEF_NOT_SUBREG;
2575	}
2576
2577	/ frame pointer /
2578	regs[BPF_REG_FP].type = PTR_TO_STACK;
2579	mark_reg_known_zero(env, regs, BPF_REG_FP);
2580	regs[BPF_REG_FP].frameno = state->frameno;
2581	}
2582
2583	#define BPF_MAIN_FUNC (-1)
2584	static void init_func_state(struct bpf_verifier_env *env,
2585	struct bpf_func_state *state,
2586	int callsite, int frameno, int subprogno)
2587	{
2588	state->callsite = callsite;
2589	state->frameno = frameno;
2590	state->subprogno = subprogno;
2591	state->callback_ret_range = tnum_range(min: `0`, max: `0`);
2592	init_reg_state(env, state);
2593	mark_verifier_state_scratched(env);
2594	}
2595
2596	/ Similar to push_stack(), but for async callbacks /
2597	static struct bpf_verifier_state push_async_cb(struct* bpf_verifier_env *env,
2598	int insn_idx, int prev_insn_idx,
2599	int subprog)
2600	{
2601	struct bpf_verifier_stack_elem *elem;
2602	struct bpf_func_state *frame;
2603
2604	elem = kzalloc(size: sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
2605	if (!elem)
2606	goto err;
2607
2608	elem->insn_idx = insn_idx;
2609	elem->prev_insn_idx = prev_insn_idx;
2610	elem->next = env->head;
2611	elem->log_pos = env->log.end_pos;
2612	env->head = elem;
2613	env->stack_size++;
2614	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2615	verbose(private_data: env,
2616	fmt: "The sequence of %d jumps is too complex for async cb.\n",
2617	env->stack_size);
2618	goto err;
2619	}
2620	/ Unlike push_stack() do not copy_verifier_state().*
2621	* The caller state doesn't matter.
2622	* This is async callback. It starts in a fresh stack.
2623	* Initialize it similar to do_check_common().
2624	*/
2625	elem->st.branches = `1`;
2626	frame = kzalloc(size: sizeof(*frame), GFP_KERNEL);
2627	if (!frame)
2628	goto err;
2629	init_func_state(env, state: frame,
2630	BPF_MAIN_FUNC / callsite /,
2631	frameno: `0` / frameno within this callchain /,
2632	subprogno: subprog / subprog number within this prog /);
2633	elem->st.frame[`0`] = frame;
2634	return &elem->st;
2635	err:
2636	free_verifier_state(state: env->cur_state, free_self: true);
2637	env->cur_state = NULL;
2638	/ pop all elements and return /
2639	while (!pop_stack(env, NULL, NULL, pop_log: false));
2640	return NULL;
2641	}
2642
2643
2644	enum reg_arg_type {
2645	SRC_OP, / register is used as source operand /
2646	DST_OP, / register is used as destination operand /
2647	DST_OP_NO_MARK / same as above, check only, don't mark /
2648	};
2649
2650	static int cmp_subprogs(const void a, const* void *b)
2651	{
2652	return ((struct bpf_subprog_info *)a)->start -
2653	((struct bpf_subprog_info *)b)->start;
2654	}
2655
2656	static int find_subprog(struct bpf_verifier_env env, int* off)
2657	{
2658	struct bpf_subprog_info *p;
2659
2660	p = bsearch(key: &off, base: env->subprog_info, num: env->subprog_cnt,
2661	size: sizeof(env->subprog_info[`0`]), cmp: cmp_subprogs);
2662	if (!p)
2663	return -ENOENT;
2664	return p - env->subprog_info;
2665
2666	}
2667
2668	static int add_subprog(struct bpf_verifier_env env, int* off)
2669	{
2670	int insn_cnt = env->prog->len;
2671	int ret;
2672
2673	if (off >= insn_cnt \|\| off < `0`) {
2674	verbose(private_data: env, fmt: "call to invalid destination\n");
2675	return -EINVAL;
2676	}
2677	ret = find_subprog(env, off);
2678	if (ret >= `0`)
2679	return ret;
2680	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
2681	verbose(private_data: env, fmt: "too many subprograms\n");
2682	return -E2BIG;
2683	}
2684	/ determine subprog starts. The end is one before the next starts /
2685	env->subprog_info[env->subprog_cnt++].start = off;
2686	sort(base: env->subprog_info, num: env->subprog_cnt,
2687	size: sizeof(env->subprog_info[`0`]), cmp_func: cmp_subprogs, NULL);
2688	return env->subprog_cnt - `1`;
2689	}
2690
2691	static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env)
2692	{
2693	struct bpf_prog_aux *aux = env->prog->aux;
2694	struct btf *btf = aux->btf;
2695	const struct btf_type *t;
2696	u32 main_btf_id, id;
2697	const char *name;
2698	int ret, i;
2699
2700	/ Non-zero func_info_cnt implies valid btf /
2701	if (!aux->func_info_cnt)
2702	return `0`;
2703	main_btf_id = aux->func_info[`0`].type_id;
2704
2705	t = btf_type_by_id(btf, type_id: main_btf_id);
2706	if (!t) {
2707	verbose(private_data: env, fmt: "invalid btf id for main subprog in func_info\n");
2708	return -EINVAL;
2709	}
2710
2711	name = btf_find_decl_tag_value(btf, pt: t, comp_idx: -`1`, tag_key: "exception_callback:");
2712	if (IS_ERR(ptr: name)) {
2713	ret = PTR_ERR(ptr: name);
2714	/ If there is no tag present, there is no exception callback /
2715	if (ret == -ENOENT)
2716	ret = `0`;
2717	else if (ret == -EEXIST)
2718	verbose(private_data: env, fmt: "multiple exception callback tags for main subprog\n");
2719	return ret;
2720	}
2721
2722	ret = btf_find_by_name_kind(btf, name, kind: BTF_KIND_FUNC);
2723	if (ret < `0`) {
2724	verbose(private_data: env, fmt: "exception callback '%s' could not be found in BTF\n", name);
2725	return ret;
2726	}
2727	id = ret;
2728	t = btf_type_by_id(btf, type_id: id);
2729	if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
2730	verbose(private_data: env, fmt: "exception callback '%s' must have global linkage\n", name);
2731	return -EINVAL;
2732	}
2733	ret = `0`;
2734	for (i = `0`; i < aux->func_info_cnt; i++) {
2735	if (aux->func_info[i].type_id != id)
2736	continue;
2737	ret = aux->func_info[i].insn_off;
2738	/ Further func_info and subprog checks will also happen*
2739	* later, so assume this is the right insn_off for now.
2740	*/
2741	if (!ret) {
2742	verbose(private_data: env, fmt: "invalid exception callback insn_off in func_info: 0\n");
2743	ret = -EINVAL;
2744	}
2745	}
2746	if (!ret) {
2747	verbose(private_data: env, fmt: "exception callback type id not found in func_info\n");
2748	ret = -EINVAL;
2749	}
2750	return ret;
2751	}
2752
2753	#define MAX_KFUNC_DESCS 256
2754	#define MAX_KFUNC_BTFS 256
2755
2756	struct bpf_kfunc_desc {
2757	struct btf_func_model func_model;
2758	u32 func_id;
2759	s32 imm;
2760	u16 offset;
2761	unsigned long addr;
2762	};
2763
2764	struct bpf_kfunc_btf {
2765	struct btf *btf;
2766	struct module *module;
2767	u16 offset;
2768	};
2769
2770	struct bpf_kfunc_desc_tab {
2771	/ Sorted by func_id (BTF ID) and offset (fd_array offset) during*
2772	* verification. JITs do lookups by bpf_insn, where func_id may not be
2773	* available, therefore at the end of verification do_misc_fixups()
2774	* sorts this by imm and offset.
2775	*/
2776	struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
2777	u32 nr_descs;
2778	};
2779
2780	struct bpf_kfunc_btf_tab {
2781	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
2782	u32 nr_descs;
2783	};
2784
2785	static int kfunc_desc_cmp_by_id_off(const void a, const* void *b)
2786	{
2787	const struct bpf_kfunc_desc *d0 = a;
2788	const struct bpf_kfunc_desc *d1 = b;
2789
2790	/ func_id is not greater than BTF_MAX_TYPE /
2791	return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
2792	}
2793
2794	static int kfunc_btf_cmp_by_off(const void a, const* void *b)
2795	{
2796	const struct bpf_kfunc_btf *d0 = a;
2797	const struct bpf_kfunc_btf *d1 = b;
2798
2799	return d0->offset - d1->offset;
2800	}
2801
2802	static const struct bpf_kfunc_desc *
2803	find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
2804	{
2805	struct bpf_kfunc_desc desc = {
2806	.func_id = func_id,
2807	.offset = offset,
2808	};
2809	struct bpf_kfunc_desc_tab *tab;
2810
2811	tab = prog->aux->kfunc_tab;
2812	return bsearch(key: &desc, base: tab->descs, num: tab->nr_descs,
2813	size: sizeof(tab->descs[`0`]), cmp: kfunc_desc_cmp_by_id_off);
2814	}
2815
2816	int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
2817	u16 btf_fd_idx, u8 **func_addr)
2818	{
2819	const struct bpf_kfunc_desc *desc;
2820
2821	desc = find_kfunc_desc(prog, func_id, offset: btf_fd_idx);
2822	if (!desc)
2823	return -EFAULT;
2824
2825	func_addr = (u8 )desc->addr;
2826	return `0`;
2827	}
2828
2829	static struct btf __find_kfunc_desc_btf(struct* bpf_verifier_env *env,
2830	s16 offset)
2831	{
2832	struct bpf_kfunc_btf kf_btf = { .offset = offset };
2833	struct bpf_kfunc_btf_tab *tab;
2834	struct bpf_kfunc_btf *b;
2835	struct module *mod;
2836	struct btf *btf;
2837	int btf_fd;
2838
2839	tab = env->prog->aux->kfunc_btf_tab;
2840	b = bsearch(key: &kf_btf, base: tab->descs, num: tab->nr_descs,
2841	size: sizeof(tab->descs[`0`]), cmp: kfunc_btf_cmp_by_off);
2842	if (!b) {
2843	if (tab->nr_descs == MAX_KFUNC_BTFS) {
2844	verbose(private_data: env, fmt: "too many different module BTFs\n");
2845	return ERR_PTR(error: -E2BIG);
2846	}
2847
2848	if (bpfptr_is_null(bpfptr: env->fd_array)) {
2849	verbose(private_data: env, fmt: "kfunc offset > 0 without fd_array is invalid\n");
2850	return ERR_PTR(error: -EPROTO);
2851	}
2852
2853	if (copy_from_bpfptr_offset(dst: &btf_fd, src: env->fd_array,
2854	offset: offset * sizeof(btf_fd),
2855	size: sizeof(btf_fd)))
2856	return ERR_PTR(error: -EFAULT);
2857
2858	btf = btf_get_by_fd(fd: btf_fd);
2859	if (IS_ERR(ptr: btf)) {
2860	verbose(private_data: env, fmt: "invalid module BTF fd specified\n");
2861	return btf;
2862	}
2863
2864	if (!btf_is_module(btf)) {
2865	verbose(private_data: env, fmt: "BTF fd for kfunc is not a module BTF\n");
2866	btf_put(btf);
2867	return ERR_PTR(error: -EINVAL);
2868	}
2869
2870	mod = btf_try_get_module(btf);
2871	if (!mod) {
2872	btf_put(btf);
2873	return ERR_PTR(error: -ENXIO);
2874	}
2875
2876	b = &tab->descs[tab->nr_descs++];
2877	b->btf = btf;
2878	b->module = mod;
2879	b->offset = offset;
2880
2881	sort(base: tab->descs, num: tab->nr_descs, size: sizeof(tab->descs[`0`]),
2882	cmp_func: kfunc_btf_cmp_by_off, NULL);
2883	}
2884	return b->btf;
2885	}
2886
2887	void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
2888	{
2889	if (!tab)
2890	return;
2891
2892	while (tab->nr_descs--) {
2893	module_put(module: tab->descs[tab->nr_descs].module);
2894	btf_put(btf: tab->descs[tab->nr_descs].btf);
2895	}
2896	kfree(objp: tab);
2897	}
2898
2899	static struct btf find_kfunc_desc_btf(struct* bpf_verifier_env *env, s16 offset)
2900	{
2901	if (offset) {
2902	if (offset < `0`) {
2903	/ In the future, this can be allowed to increase limit*
2904	* of fd index into fd_array, interpreted as u16.
2905	*/
2906	verbose(private_data: env, fmt: "negative offset disallowed for kernel module function call\n");
2907	return ERR_PTR(error: -EINVAL);
2908	}
2909
2910	return __find_kfunc_desc_btf(env, offset);
2911	}
2912	return btf_vmlinux ?: ERR_PTR(error: -ENOENT);
2913	}
2914
2915	static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
2916	{
2917	const struct btf_type func, func_proto;
2918	struct bpf_kfunc_btf_tab *btf_tab;
2919	struct bpf_kfunc_desc_tab *tab;
2920	struct bpf_prog_aux *prog_aux;
2921	struct bpf_kfunc_desc *desc;
2922	const char *func_name;
2923	struct btf *desc_btf;
2924	unsigned long call_imm;
2925	unsigned long addr;
2926	int err;
2927
2928	prog_aux = env->prog->aux;
2929	tab = prog_aux->kfunc_tab;
2930	btf_tab = prog_aux->kfunc_btf_tab;
2931	if (!tab) {
2932	if (!btf_vmlinux) {
2933	verbose(private_data: env, fmt: "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
2934	return -ENOTSUPP;
2935	}
2936
2937	if (!env->prog->jit_requested) {
2938	verbose(private_data: env, fmt: "JIT is required for calling kernel function\n");
2939	return -ENOTSUPP;
2940	}
2941
2942	if (!bpf_jit_supports_kfunc_call()) {
2943	verbose(private_data: env, fmt: "JIT does not support calling kernel function\n");
2944	return -ENOTSUPP;
2945	}
2946
2947	if (!env->prog->gpl_compatible) {
2948	verbose(private_data: env, fmt: "cannot call kernel function from non-GPL compatible program\n");
2949	return -EINVAL;
2950	}
2951
2952	tab = kzalloc(size: sizeof(*tab), GFP_KERNEL);
2953	if (!tab)
2954	return -ENOMEM;
2955	prog_aux->kfunc_tab = tab;
2956	}
2957
2958	/ func_id == 0 is always invalid, but instead of returning an error, be*
2959	* conservative and wait until the code elimination pass before returning
2960	* error, so that invalid calls that get pruned out can be in BPF programs
2961	* loaded from userspace. It is also required that offset be untouched
2962	* for such calls.
2963	*/
2964	if (!func_id && !offset)
2965	return `0`;
2966
2967	if (!btf_tab && offset) {
2968	btf_tab = kzalloc(size: sizeof(*btf_tab), GFP_KERNEL);
2969	if (!btf_tab)
2970	return -ENOMEM;
2971	prog_aux->kfunc_btf_tab = btf_tab;
2972	}
2973
2974	desc_btf = find_kfunc_desc_btf(env, offset);
2975	if (IS_ERR(ptr: desc_btf)) {
2976	verbose(private_data: env, fmt: "failed to find BTF for kernel function\n");
2977	return PTR_ERR(ptr: desc_btf);
2978	}
2979
2980	if (find_kfunc_desc(prog: env->prog, func_id, offset))
2981	return `0`;
2982
2983	if (tab->nr_descs == MAX_KFUNC_DESCS) {
2984	verbose(private_data: env, fmt: "too many different kernel function calls\n");
2985	return -E2BIG;
2986	}
2987
2988	func = btf_type_by_id(btf: desc_btf, type_id: func_id);
2989	if (!func \|\| !btf_type_is_func(t: func)) {
2990	verbose(private_data: env, fmt: "kernel btf_id %u is not a function\n",
2991	func_id);
2992	return -EINVAL;
2993	}
2994	func_proto = btf_type_by_id(btf: desc_btf, type_id: func->type);
2995	if (!func_proto \|\| !btf_type_is_func_proto(t: func_proto)) {
2996	verbose(private_data: env, fmt: "kernel function btf_id %u does not have a valid func_proto\n",
2997	func_id);
2998	return -EINVAL;
2999	}
3000
3001	func_name = btf_name_by_offset(btf: desc_btf, offset: func->name_off);
3002	addr = kallsyms_lookup_name(name: func_name);
3003	if (!addr) {
3004	verbose(private_data: env, fmt: "cannot find address for kernel function %s\n",
3005	func_name);
3006	return -EINVAL;
3007	}
3008	specialize_kfunc(env, func_id, offset, addr: &addr);
3009
3010	if (bpf_jit_supports_far_kfunc_call()) {
3011	call_imm = func_id;
3012	} else {
3013	call_imm = BPF_CALL_IMM(addr);
3014	/ Check whether the relative offset overflows desc->imm /
3015	if ((unsigned long)(s32)call_imm != call_imm) {
3016	verbose(private_data: env, fmt: "address of kernel function %s is out of range\n",
3017	func_name);
3018	return -EINVAL;
3019	}
3020	}
3021
3022	if (bpf_dev_bound_kfunc_id(btf_id: func_id)) {
3023	err = bpf_dev_bound_kfunc_check(log: &env->log, prog_aux);
3024	if (err)
3025	return err;
3026	}
3027
3028	desc = &tab->descs[tab->nr_descs++];
3029	desc->func_id = func_id;
3030	desc->imm = call_imm;
3031	desc->offset = offset;
3032	desc->addr = addr;
3033	err = btf_distill_func_proto(log: &env->log, btf: desc_btf,
3034	func_proto, func_name,
3035	m: &desc->func_model);
3036	if (!err)
3037	sort(base: tab->descs, num: tab->nr_descs, size: sizeof(tab->descs[`0`]),
3038	cmp_func: kfunc_desc_cmp_by_id_off, NULL);
3039	return err;
3040	}
3041
3042	static int kfunc_desc_cmp_by_imm_off(const void a, const* void *b)
3043	{
3044	const struct bpf_kfunc_desc *d0 = a;
3045	const struct bpf_kfunc_desc *d1 = b;
3046
3047	if (d0->imm != d1->imm)
3048	return d0->imm < d1->imm ? -`1` : `1`;
3049	if (d0->offset != d1->offset)
3050	return d0->offset < d1->offset ? -`1` : `1`;
3051	return `0`;
3052	}
3053
3054	static void sort_kfunc_descs_by_imm_off(struct bpf_prog *prog)
3055	{
3056	struct bpf_kfunc_desc_tab *tab;
3057
3058	tab = prog->aux->kfunc_tab;
3059	if (!tab)
3060	return;
3061
3062	sort(base: tab->descs, num: tab->nr_descs, size: sizeof(tab->descs[`0`]),
3063	cmp_func: kfunc_desc_cmp_by_imm_off, NULL);
3064	}
3065
3066	bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
3067	{
3068	return !!prog->aux->kfunc_tab;
3069	}
3070
3071	const struct btf_func_model *
3072	bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
3073	const struct bpf_insn *insn)
3074	{
3075	const struct bpf_kfunc_desc desc = {
3076	.imm = insn->imm,
3077	.offset = insn->off,
3078	};
3079	const struct bpf_kfunc_desc *res;
3080	struct bpf_kfunc_desc_tab *tab;
3081
3082	tab = prog->aux->kfunc_tab;
3083	res = bsearch(key: &desc, base: tab->descs, num: tab->nr_descs,
3084	size: sizeof(tab->descs[`0`]), cmp: kfunc_desc_cmp_by_imm_off);
3085
3086	return res ? &res->func_model : NULL;
3087	}
3088
3089	static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
3090	{
3091	struct bpf_subprog_info *subprog = env->subprog_info;
3092	int i, ret, insn_cnt = env->prog->len, ex_cb_insn;
3093	struct bpf_insn *insn = env->prog->insnsi;
3094
3095	/ Add entry function. /
3096	ret = add_subprog(env, off: `0`);
3097	if (ret)
3098	return ret;
3099
3100	for (i = `0`; i < insn_cnt; i++, insn++) {
3101	if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
3102	!bpf_pseudo_kfunc_call(insn))
3103	continue;
3104
3105	if (!env->bpf_capable) {
3106	verbose(private_data: env, fmt: "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
3107	return -EPERM;
3108	}
3109
3110	if (bpf_pseudo_func(insn) \|\| bpf_pseudo_call(insn))
3111	ret = add_subprog(env, off: i + insn->imm + `1`);
3112	else
3113	ret = add_kfunc_call(env, func_id: insn->imm, offset: insn->off);
3114
3115	if (ret < `0`)
3116	return ret;
3117	}
3118
3119	ret = bpf_find_exception_callback_insn_off(env);
3120	if (ret < `0`)
3121	return ret;
3122	ex_cb_insn = ret;
3123
3124	/ If ex_cb_insn > 0, this means that the main program has a subprog*
3125	* marked using BTF decl tag to serve as the exception callback.
3126	*/
3127	if (ex_cb_insn) {
3128	ret = add_subprog(env, off: ex_cb_insn);
3129	if (ret < `0`)
3130	return ret;
3131	for (i = `1`; i < env->subprog_cnt; i++) {
3132	if (env->subprog_info[i].start != ex_cb_insn)
3133	continue;
3134	env->exception_callback_subprog = i;
3135	break;
3136	}
3137	}
3138
3139	/ Add a fake 'exit' subprog which could simplify subprog iteration*
3140	* logic. 'subprog_cnt' should not be increased.
3141	*/
3142	subprog[env->subprog_cnt].start = insn_cnt;
3143
3144	if (env->log.level & BPF_LOG_LEVEL2)
3145	for (i = `0`; i < env->subprog_cnt; i++)
3146	verbose(private_data: env, fmt: "func#%d @%d\n", i, subprog[i].start);
3147
3148	return `0`;
3149	}
3150
3151	static int check_subprogs(struct bpf_verifier_env *env)
3152	{
3153	int i, subprog_start, subprog_end, off, cur_subprog = `0`;
3154	struct bpf_subprog_info *subprog = env->subprog_info;
3155	struct bpf_insn *insn = env->prog->insnsi;
3156	int insn_cnt = env->prog->len;
3157
3158	/ now check that all jumps are within the same subprog /
3159	subprog_start = subprog[cur_subprog].start;
3160	subprog_end = subprog[cur_subprog + `1`].start;
3161	for (i = `0`; i < insn_cnt; i++) {
3162	u8 code = insn[i].code;
3163
3164	if (code == (BPF_JMP \| BPF_CALL) &&
3165	insn[i].src_reg == `0` &&
3166	insn[i].imm == BPF_FUNC_tail_call)
3167	subprog[cur_subprog].has_tail_call = true;
3168	if (BPF_CLASS(code) == BPF_LD &&
3169	(BPF_MODE(code) == BPF_ABS \|\| BPF_MODE(code) == BPF_IND))
3170	subprog[cur_subprog].has_ld_abs = true;
3171	if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
3172	goto next;
3173	if (BPF_OP(code) == BPF_EXIT \|\| BPF_OP(code) == BPF_CALL)
3174	goto next;
3175	if (code == (BPF_JMP32 \| BPF_JA))
3176	off = i + insn[i].imm + `1`;
3177	else
3178	off = i + insn[i].off + `1`;
3179	if (off < subprog_start \|\| off >= subprog_end) {
3180	verbose(private_data: env, fmt: "jump out of range from insn %d to %d\n", i, off);
3181	return -EINVAL;
3182	}
3183	next:
3184	if (i == subprog_end - `1`) {
3185	/ to avoid fall-through from one subprog into another*
3186	* the last insn of the subprog should be either exit
3187	* or unconditional jump back or bpf_throw call
3188	*/
3189	if (code != (BPF_JMP \| BPF_EXIT) &&
3190	code != (BPF_JMP32 \| BPF_JA) &&
3191	code != (BPF_JMP \| BPF_JA)) {
3192	verbose(private_data: env, fmt: "last insn is not an exit or jmp\n");
3193	return -EINVAL;
3194	}
3195	subprog_start = subprog_end;
3196	cur_subprog++;
3197	if (cur_subprog < env->subprog_cnt)
3198	subprog_end = subprog[cur_subprog + `1`].start;
3199	}
3200	}
3201	return `0`;
3202	}
3203
3204	/ Parentage chain of this register (or stack slot) should take care of all*
3205	* issues like callee-saved registers, stack slot allocation time, etc.
3206	*/
3207	static int mark_reg_read(struct bpf_verifier_env *env,
3208	const struct bpf_reg_state *state,
3209	struct bpf_reg_state *parent, u8 flag)
3210	{
3211	bool writes = parent == state->parent; / Observe write marks /
3212	int cnt = `0`;
3213
3214	while (parent) {
3215	/ if read wasn't screened by an earlier write ... /
3216	if (writes && state->live & REG_LIVE_WRITTEN)
3217	break;
3218	if (parent->live & REG_LIVE_DONE) {
3219	verbose(private_data: env, fmt: "verifier BUG type %s var_off %lld off %d\n",
3220	reg_type_str(env, type: parent->type),
3221	parent->var_off.value, parent->off);
3222	return -EFAULT;
3223	}
3224	/ The first condition is more likely to be true than the*
3225	* second, checked it first.
3226	*/
3227	if ((parent->live & REG_LIVE_READ) == flag \|\|
3228	parent->live & REG_LIVE_READ64)
3229	/ The parentage chain never changes and*
3230	* this parent was already marked as LIVE_READ.
3231	* There is no need to keep walking the chain again and
3232	* keep re-marking all parents as LIVE_READ.
3233	* This case happens when the same register is read
3234	* multiple times without writes into it in-between.
3235	* Also, if parent has the stronger REG_LIVE_READ64 set,
3236	* then no need to set the weak REG_LIVE_READ32.
3237	*/
3238	break;
3239	/ ... then we depend on parent's value /
3240	parent->live \|= flag;
3241	/ REG_LIVE_READ64 overrides REG_LIVE_READ32. /
3242	if (flag == REG_LIVE_READ64)
3243	parent->live &= ~REG_LIVE_READ32;
3244	state = parent;
3245	parent = state->parent;
3246	writes = true;
3247	cnt++;
3248	}
3249
3250	if (env->longest_mark_read_walk < cnt)
3251	env->longest_mark_read_walk = cnt;
3252	return `0`;
3253	}
3254
3255	static int mark_dynptr_read(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
3256	{
3257	struct bpf_func_state *state = func(env, reg);
3258	int spi, ret;
3259
3260	/ For CONST_PTR_TO_DYNPTR, it must have already been done by*
3261	* check_reg_arg in check_helper_call and mark_btf_func_reg_size in
3262	* check_kfunc_call.
3263	*/
3264	if (reg->type == CONST_PTR_TO_DYNPTR)
3265	return `0`;
3266	spi = dynptr_get_spi(env, reg);
3267	if (spi < `0`)
3268	return spi;
3269	/ Caller ensures dynptr is valid and initialized, which means spi is in*
3270	* bounds and spi is the first dynptr slot. Simply mark stack slot as
3271	* read.
3272	*/
3273	ret = mark_reg_read(env, state: &state->stack[spi].spilled_ptr,
3274	parent: state->stack[spi].spilled_ptr.parent, flag: REG_LIVE_READ64);
3275	if (ret)
3276	return ret;
3277	return mark_reg_read(env, state: &state->stack[spi - `1`].spilled_ptr,
3278	parent: state->stack[spi - `1`].spilled_ptr.parent, flag: REG_LIVE_READ64);
3279	}
3280
3281	static int mark_iter_read(struct bpf_verifier_env env, struct* bpf_reg_state *reg,
3282	int spi, int nr_slots)
3283	{
3284	struct bpf_func_state *state = func(env, reg);
3285	int err, i;
3286
3287	for (i = `0`; i < nr_slots; i++) {
3288	struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr;
3289
3290	err = mark_reg_read(env, state: st, parent: st->parent, flag: REG_LIVE_READ64);
3291	if (err)
3292	return err;
3293
3294	mark_stack_slot_scratched(env, spi: spi - i);
3295	}
3296
3297	return `0`;
3298	}
3299
3300	/ This function is supposed to be used by the following 32-bit optimization*
3301	* code only. It returns TRUE if the source or destination register operates
3302	* on 64-bit, otherwise return FALSE.
3303	*/
3304	static bool is_reg64(struct bpf_verifier_env env, struct* bpf_insn *insn,
3305	u32 regno, struct bpf_reg_state reg, enum* reg_arg_type t)
3306	{
3307	u8 code, class, op;
3308
3309	code = insn->code;
3310	class = BPF_CLASS(code);
3311	op = BPF_OP(code);
3312	if (class == BPF_JMP) {
3313	/ BPF_EXIT for "main" will reach here. Return TRUE*
3314	* conservatively.
3315	*/
3316	if (op == BPF_EXIT)
3317	return true;
3318	if (op == BPF_CALL) {
3319	/ BPF to BPF call will reach here because of marking*
3320	* caller saved clobber with DST_OP_NO_MARK for which we
3321	* don't care the register def because they are anyway
3322	* marked as NOT_INIT already.
3323	*/
3324	if (insn->src_reg == BPF_PSEUDO_CALL)
3325	return false;
3326	/ Helper call will reach here because of arg type*
3327	* check, conservatively return TRUE.
3328	*/
3329	if (t == SRC_OP)
3330	return true;
3331
3332	return false;
3333	}
3334	}
3335
3336	if (class == BPF_ALU64 && op == BPF_END && (insn->imm == `16` \|\| insn->imm == `32`))
3337	return false;
3338
3339	if (class == BPF_ALU64 \|\| class == BPF_JMP \|\|
3340	(class == BPF_ALU && op == BPF_END && insn->imm == `64`))
3341	return true;
3342
3343	if (class == BPF_ALU \|\| class == BPF_JMP32)
3344	return false;
3345
3346	if (class == BPF_LDX) {
3347	if (t != SRC_OP)
3348	return BPF_SIZE(code) == BPF_DW \|\| BPF_MODE(code) == BPF_MEMSX;
3349	/ LDX source must be ptr. /
3350	return true;
3351	}
3352
3353	if (class == BPF_STX) {
3354	/ BPF_STX (including atomic variants) has multiple source*
3355	* operands, one of which is a ptr. Check whether the caller is
3356	* asking about it.
3357	*/
3358	if (t == SRC_OP && reg->type != SCALAR_VALUE)
3359	return true;
3360	return BPF_SIZE(code) == BPF_DW;
3361	}
3362
3363	if (class == BPF_LD) {
3364	u8 mode = BPF_MODE(code);
3365
3366	/ LD_IMM64 /
3367	if (mode == BPF_IMM)
3368	return true;
3369
3370	/ Both LD_IND and LD_ABS return 32-bit data. /
3371	if (t != SRC_OP)
3372	return false;
3373
3374	/ Implicit ctx ptr. /
3375	if (regno == BPF_REG_6)
3376	return true;
3377
3378	/ Explicit source could be any width. /
3379	return true;
3380	}
3381
3382	if (class == BPF_ST)
3383	/ The only source register for BPF_ST is a ptr. /
3384	return true;
3385
3386	/ Conservatively return true at default. /
3387	return true;
3388	}
3389
3390	/ Return the regno defined by the insn, or -1. /
3391	static int insn_def_regno(const struct bpf_insn *insn)
3392	{
3393	switch (BPF_CLASS(insn->code)) {
3394	case BPF_JMP:
3395	case BPF_JMP32:
3396	case BPF_ST:
3397	return -`1`;
3398	case BPF_STX:
3399	if (BPF_MODE(insn->code) == BPF_ATOMIC &&
3400	(insn->imm & BPF_FETCH)) {
3401	if (insn->imm == BPF_CMPXCHG)
3402	return BPF_REG_0;
3403	else
3404	return insn->src_reg;
3405	} else {
3406	return -`1`;
3407	}
3408	default:
3409	return insn->dst_reg;
3410	}
3411	}
3412
3413	/ Return TRUE if INSN has defined any 32-bit value explicitly. /
3414	static bool insn_has_def32(struct bpf_verifier_env env, struct* bpf_insn *insn)
3415	{
3416	int dst_reg = insn_def_regno(insn);
3417
3418	if (dst_reg == -`1`)
3419	return false;
3420
3421	return !is_reg64(env, insn, regno: dst_reg, NULL, t: DST_OP);
3422	}
3423
3424	static void mark_insn_zext(struct bpf_verifier_env *env,
3425	struct bpf_reg_state *reg)
3426	{
3427	s32 def_idx = reg->subreg_def;
3428
3429	if (def_idx == DEF_NOT_SUBREG)
3430	return;
3431
3432	env->insn_aux_data[def_idx - `1`].zext_dst = true;
3433	/ The dst will be zero extended, so won't be sub-register anymore. /
3434	reg->subreg_def = DEF_NOT_SUBREG;
3435	}
3436
3437	static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
3438	enum reg_arg_type t)
3439	{
3440	struct bpf_verifier_state *vstate = env->cur_state;
3441	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3442	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
3443	struct bpf_reg_state reg, regs = state->regs;
3444	bool rw64;
3445
3446	if (regno >= MAX_BPF_REG) {
3447	verbose(private_data: env, fmt: "R%d is invalid\n", regno);
3448	return -EINVAL;
3449	}
3450
3451	mark_reg_scratched(env, regno);
3452
3453	reg = &regs[regno];
3454	rw64 = is_reg64(env, insn, regno, reg, t);
3455	if (t == SRC_OP) {
3456	/ check whether register used as source operand can be read /
3457	if (reg->type == NOT_INIT) {
3458	verbose(private_data: env, fmt: "R%d !read_ok\n", regno);
3459	return -EACCES;
3460	}
3461	/ We don't need to worry about FP liveness because it's read-only /
3462	if (regno == BPF_REG_FP)
3463	return `0`;
3464
3465	if (rw64)
3466	mark_insn_zext(env, reg);
3467
3468	return mark_reg_read(env, state: reg, parent: reg->parent,
3469	flag: rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
3470	} else {
3471	/ check whether register used as dest operand can be written to /
3472	if (regno == BPF_REG_FP) {
3473	verbose(private_data: env, fmt: "frame pointer is read only\n");
3474	return -EACCES;
3475	}
3476	reg->live \|= REG_LIVE_WRITTEN;
3477	reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + `1`;
3478	if (t == DST_OP)
3479	mark_reg_unknown(env, regs, regno);
3480	}
3481	return `0`;
3482	}
3483
3484	static void mark_jmp_point(struct bpf_verifier_env env, int* idx)
3485	{
3486	env->insn_aux_data[idx].jmp_point = true;
3487	}
3488
3489	static bool is_jmp_point(struct bpf_verifier_env env, int* insn_idx)
3490	{
3491	return env->insn_aux_data[insn_idx].jmp_point;
3492	}
3493
3494	/ for any branch, call, exit record the history of jmps in the given state /
3495	static int push_jmp_history(struct bpf_verifier_env *env,
3496	struct bpf_verifier_state *cur)
3497	{
3498	u32 cnt = cur->jmp_history_cnt;
3499	struct bpf_idx_pair *p;
3500	size_t alloc_size;
3501
3502	if (!is_jmp_point(env, insn_idx: env->insn_idx))
3503	return `0`;
3504
3505	cnt++;
3506	alloc_size = kmalloc_size_roundup(size: size_mul(factor1: cnt, factor2: sizeof(*p)));
3507	p = krealloc(objp: cur->jmp_history, new_size: alloc_size, GFP_USER);
3508	if (!p)
3509	return -ENOMEM;
3510	p[cnt - `1`].idx = env->insn_idx;
3511	p[cnt - `1`].prev_idx = env->prev_insn_idx;
3512	cur->jmp_history = p;
3513	cur->jmp_history_cnt = cnt;
3514	return `0`;
3515	}
3516
3517	/ Backtrack one insn at a time. If idx is not at the top of recorded*
3518	* history then previous instruction came from straight line execution.
3519	*/
3520	static int get_prev_insn_idx(struct bpf_verifier_state st, int* i,
3521	u32 *history)
3522	{
3523	u32 cnt = *history;
3524
3525	if (cnt && st->jmp_history[cnt - `1`].idx == i) {
3526	i = st->jmp_history[cnt - `1`].prev_idx;
3527	(*history)--;
3528	} else {
3529	i--;
3530	}
3531	return i;
3532	}
3533
3534	static const char disasm_kfunc_name(void* data, const* struct bpf_insn *insn)
3535	{
3536	const struct btf_type *func;
3537	struct btf *desc_btf;
3538
3539	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
3540	return NULL;
3541
3542	desc_btf = find_kfunc_desc_btf(env: data, offset: insn->off);
3543	if (IS_ERR(ptr: desc_btf))
3544	return "<error>";
3545
3546	func = btf_type_by_id(btf: desc_btf, type_id: insn->imm);
3547	return btf_name_by_offset(btf: desc_btf, offset: func->name_off);
3548	}
3549
3550	static inline void bt_init(struct backtrack_state *bt, u32 frame)
3551	{
3552	bt->frame = frame;
3553	}
3554
3555	static inline void bt_reset(struct backtrack_state *bt)
3556	{
3557	struct bpf_verifier_env *env = bt->env;
3558
3559	memset(bt, `0`, sizeof(*bt));
3560	bt->env = env;
3561	}
3562
3563	static inline u32 bt_empty(struct backtrack_state *bt)
3564	{
3565	u64 mask = `0`;
3566	int i;
3567
3568	for (i = `0`; i <= bt->frame; i++)
3569	mask \|= bt->reg_masks[i] \| bt->stack_masks[i];
3570
3571	return mask == `0`;
3572	}
3573
3574	static inline int bt_subprog_enter(struct backtrack_state *bt)
3575	{
3576	if (bt->frame == MAX_CALL_FRAMES - `1`) {
3577	verbose(private_data: bt->env, fmt: "BUG subprog enter from frame %d\n", bt->frame);
3578	WARN_ONCE(`1`, "verifier backtracking bug");
3579	return -EFAULT;
3580	}
3581	bt->frame++;
3582	return `0`;
3583	}
3584
3585	static inline int bt_subprog_exit(struct backtrack_state *bt)
3586	{
3587	if (bt->frame == `0`) {
3588	verbose(private_data: bt->env, fmt: "BUG subprog exit from frame 0\n");
3589	WARN_ONCE(`1`, "verifier backtracking bug");
3590	return -EFAULT;
3591	}
3592	bt->frame--;
3593	return `0`;
3594	}
3595
3596	static inline void bt_set_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
3597	{
3598	bt->reg_masks[frame] \|= `1` << reg;
3599	}
3600
3601	static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
3602	{
3603	bt->reg_masks[frame] &= ~(`1` << reg);
3604	}
3605
3606	static inline void bt_set_reg(struct backtrack_state *bt, u32 reg)
3607	{
3608	bt_set_frame_reg(bt, frame: bt->frame, reg);
3609	}
3610
3611	static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
3612	{
3613	bt_clear_frame_reg(bt, frame: bt->frame, reg);
3614	}
3615
3616	static inline void bt_set_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
3617	{
3618	bt->stack_masks[frame] \|= `1ull` << slot;
3619	}
3620
3621	static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
3622	{
3623	bt->stack_masks[frame] &= ~(`1ull` << slot);
3624	}
3625
3626	static inline void bt_set_slot(struct backtrack_state *bt, u32 slot)
3627	{
3628	bt_set_frame_slot(bt, frame: bt->frame, slot);
3629	}
3630
3631	static inline void bt_clear_slot(struct backtrack_state *bt, u32 slot)
3632	{
3633	bt_clear_frame_slot(bt, frame: bt->frame, slot);
3634	}
3635
3636	static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame)
3637	{
3638	return bt->reg_masks[frame];
3639	}
3640
3641	static inline u32 bt_reg_mask(struct backtrack_state *bt)
3642	{
3643	return bt->reg_masks[bt->frame];
3644	}
3645
3646	static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame)
3647	{
3648	return bt->stack_masks[frame];
3649	}
3650
3651	static inline u64 bt_stack_mask(struct backtrack_state *bt)
3652	{
3653	return bt->stack_masks[bt->frame];
3654	}
3655
3656	static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
3657	{
3658	return bt->reg_masks[bt->frame] & (`1` << reg);
3659	}
3660
3661	static inline bool bt_is_slot_set(struct backtrack_state *bt, u32 slot)
3662	{
3663	return bt->stack_masks[bt->frame] & (`1ull` << slot);
3664	}
3665
3666	/ format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask /
3667	static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask)
3668	{
3669	DECLARE_BITMAP(mask, `64`);
3670	bool first = true;
3671	int i, n;
3672
3673	buf[`0`] = `'\0'`;
3674
3675	bitmap_from_u64(dst: mask, mask: reg_mask);
3676	for_each_set_bit(i, mask, `32`) {
3677	n = snprintf(buf, size: buf_sz, fmt: "%sr%d", first ? "" : ",", i);
3678	first = false;
3679	buf += n;
3680	buf_sz -= n;
3681	if (buf_sz < `0`)
3682	break;
3683	}
3684	}
3685	/ format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask /
3686	static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
3687	{
3688	DECLARE_BITMAP(mask, `64`);
3689	bool first = true;
3690	int i, n;
3691
3692	buf[`0`] = `'\0'`;
3693
3694	bitmap_from_u64(dst: mask, mask: stack_mask);
3695	for_each_set_bit(i, mask, `64`) {
3696	n = snprintf(buf, size: buf_sz, fmt: "%s%d", first ? "" : ",", -(i + `1`) * `8`);
3697	first = false;
3698	buf += n;
3699	buf_sz -= n;
3700	if (buf_sz < `0`)
3701	break;
3702	}
3703	}
3704
3705	/ For given verifier state backtrack_insn() is called from the last insn to*
3706	* the first insn. Its purpose is to compute a bitmask of registers and
3707	* stack slots that needs precision in the parent verifier state.
3708	*
3709	* @idx is an index of the instruction we are currently processing;
3710	* @subseq_idx is an index of the subsequent instruction that:
3711	* - would be executed next, if jump history is viewed in forward order;
3712	* - was processed previously during backtracking.
3713	*/
3714	static int backtrack_insn(struct bpf_verifier_env env, int* idx, int subseq_idx,
3715	struct backtrack_state *bt)
3716	{
3717	const struct bpf_insn_cbs cbs = {
3718	.cb_call = disasm_kfunc_name,
3719	.cb_print = verbose,
3720	.private_data = env,
3721	};
3722	struct bpf_insn *insn = env->prog->insnsi + idx;
3723	u8 class = BPF_CLASS(insn->code);
3724	u8 opcode = BPF_OP(insn->code);
3725	u8 mode = BPF_MODE(insn->code);
3726	u32 dreg = insn->dst_reg;
3727	u32 sreg = insn->src_reg;
3728	u32 spi, i;
3729
3730	if (insn->code == `0`)
3731	return `0`;
3732	if (env->log.level & BPF_LOG_LEVEL2) {
3733	fmt_reg_mask(buf: env->tmp_str_buf, TMP_STR_BUF_LEN, reg_mask: bt_reg_mask(bt));
3734	verbose(private_data: env, fmt: "mark_precise: frame%d: regs=%s ",
3735	bt->frame, env->tmp_str_buf);
3736	fmt_stack_mask(buf: env->tmp_str_buf, TMP_STR_BUF_LEN, stack_mask: bt_stack_mask(bt));
3737	verbose(private_data: env, fmt: "stack=%s before ", env->tmp_str_buf);
3738	verbose(private_data: env, fmt: "%d: ", idx);
3739	print_bpf_insn(cbs: &cbs, insn, allow_ptr_leaks: env->allow_ptr_leaks);
3740	}
3741
3742	if (class == BPF_ALU \|\| class == BPF_ALU64) {
3743	if (!bt_is_reg_set(bt, reg: dreg))
3744	return `0`;
3745	if (opcode == BPF_MOV) {
3746	if (BPF_SRC(insn->code) == BPF_X) {
3747	/ dreg = sreg or dreg = (s8, s16, s32)sreg*
3748	* dreg needs precision after this insn
3749	* sreg needs precision before this insn
3750	*/
3751	bt_clear_reg(bt, reg: dreg);
3752	bt_set_reg(bt, reg: sreg);
3753	} else {
3754	/ dreg = K*
3755	* dreg needs precision after this insn.
3756	* Corresponding register is already marked
3757	* as precise=true in this verifier state.
3758	* No further markings in parent are necessary
3759	*/
3760	bt_clear_reg(bt, reg: dreg);
3761	}
3762	} else {
3763	if (BPF_SRC(insn->code) == BPF_X) {
3764	/ dreg += sreg*
3765	* both dreg and sreg need precision
3766	* before this insn
3767	*/
3768	bt_set_reg(bt, reg: sreg);
3769	} / else dreg += K*
3770	* dreg still needs precision before this insn
3771	*/
3772	}
3773	} else if (class == BPF_LDX) {
3774	if (!bt_is_reg_set(bt, reg: dreg))
3775	return `0`;
3776	bt_clear_reg(bt, reg: dreg);
3777
3778	/ scalars can only be spilled into stack w/o losing precision.*
3779	* Load from any other memory can be zero extended.
3780	* The desire to keep that precision is already indicated
3781	* by 'precise' mark in corresponding register of this state.
3782	* No further tracking necessary.
3783	*/
3784	if (insn->src_reg != BPF_REG_FP)
3785	return `0`;
3786
3787	/ dreg = (u64 )[fp - off] was a fill from the stack.*
3788	* that [fp - off] slot contains scalar that needs to be
3789	* tracked with precision
3790	*/
3791	spi = (-insn->off - `1`) / BPF_REG_SIZE;
3792	if (spi >= `64`) {
3793	verbose(private_data: env, fmt: "BUG spi %d\n", spi);
3794	WARN_ONCE(`1`, "verifier backtracking bug");
3795	return -EFAULT;
3796	}
3797	bt_set_slot(bt, slot: spi);
3798	} else if (class == BPF_STX \|\| class == BPF_ST) {
3799	if (bt_is_reg_set(bt, reg: dreg))
3800	/ stx & st shouldn't be using _scalar_ dst_reg*
3801	* to access memory. It means backtracking
3802	* encountered a case of pointer subtraction.
3803	*/
3804	return -ENOTSUPP;
3805	/ scalars can only be spilled into stack /
3806	if (insn->dst_reg != BPF_REG_FP)
3807	return `0`;
3808	spi = (-insn->off - `1`) / BPF_REG_SIZE;
3809	if (spi >= `64`) {
3810	verbose(private_data: env, fmt: "BUG spi %d\n", spi);
3811	WARN_ONCE(`1`, "verifier backtracking bug");
3812	return -EFAULT;
3813	}
3814	if (!bt_is_slot_set(bt, slot: spi))
3815	return `0`;
3816	bt_clear_slot(bt, slot: spi);
3817	if (class == BPF_STX)
3818	bt_set_reg(bt, reg: sreg);
3819	} else if (class == BPF_JMP \|\| class == BPF_JMP32) {
3820	if (bpf_pseudo_call(insn)) {
3821	int subprog_insn_idx, subprog;
3822
3823	subprog_insn_idx = idx + insn->imm + `1`;
3824	subprog = find_subprog(env, off: subprog_insn_idx);
3825	if (subprog < `0`)
3826	return -EFAULT;
3827
3828	if (subprog_is_global(env, subprog)) {
3829	/ check that jump history doesn't have any*
3830	* extra instructions from subprog; the next
3831	* instruction after call to global subprog
3832	* should be literally next instruction in
3833	* caller program
3834	*/
3835	WARN_ONCE(idx + `1` != subseq_idx, "verifier backtracking bug");
3836	/ r1-r5 are invalidated after subprog call,*
3837	* so for global func call it shouldn't be set
3838	* anymore
3839	*/
3840	if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
3841	verbose(private_data: env, fmt: "BUG regs %x\n", bt_reg_mask(bt));
3842	WARN_ONCE(`1`, "verifier backtracking bug");
3843	return -EFAULT;
3844	}
3845	/ global subprog always sets R0 /
3846	bt_clear_reg(bt, reg: BPF_REG_0);
3847	return `0`;
3848	} else {
3849	/ static subprog call instruction, which*
3850	* means that we are exiting current subprog,
3851	* so only r1-r5 could be still requested as
3852	* precise, r0 and r6-r10 or any stack slot in
3853	* the current frame should be zero by now
3854	*/
3855	if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
3856	verbose(private_data: env, fmt: "BUG regs %x\n", bt_reg_mask(bt));
3857	WARN_ONCE(`1`, "verifier backtracking bug");
3858	return -EFAULT;
3859	}
3860	/ we don't track register spills perfectly,*
3861	* so fallback to force-precise instead of failing */
3862	if (bt_stack_mask(bt) != `0`)
3863	return -ENOTSUPP;
3864	/ propagate r1-r5 to the caller /
3865	for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
3866	if (bt_is_reg_set(bt, reg: i)) {
3867	bt_clear_reg(bt, reg: i);
3868	bt_set_frame_reg(bt, frame: bt->frame - `1`, reg: i);
3869	}
3870	}
3871	if (bt_subprog_exit(bt))
3872	return -EFAULT;
3873	return `0`;
3874	}
3875	} else if ((bpf_helper_call(insn) &&
3876	is_callback_calling_function(func_id: insn->imm) &&
3877	!is_async_callback_calling_function(func_id: insn->imm)) \|\|
3878	(bpf_pseudo_kfunc_call(insn) && is_callback_calling_kfunc(btf_id: insn->imm))) {
3879	/ callback-calling helper or kfunc call, which means*
3880	* we are exiting from subprog, but unlike the subprog
3881	* call handling above, we shouldn't propagate
3882	* precision of r1-r5 (if any requested), as they are
3883	* not actually arguments passed directly to callback
3884	* subprogs
3885	*/
3886	if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
3887	verbose(private_data: env, fmt: "BUG regs %x\n", bt_reg_mask(bt));
3888	WARN_ONCE(`1`, "verifier backtracking bug");
3889	return -EFAULT;
3890	}
3891	if (bt_stack_mask(bt) != `0`)
3892	return -ENOTSUPP;
3893	/ clear r1-r5 in callback subprog's mask /
3894	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
3895	bt_clear_reg(bt, reg: i);
3896	if (bt_subprog_exit(bt))
3897	return -EFAULT;
3898	return `0`;
3899	} else if (opcode == BPF_CALL) {
3900	/ kfunc with imm==0 is invalid and fixup_kfunc_call will*
3901	* catch this error later. Make backtracking conservative
3902	* with ENOTSUPP.
3903	*/
3904	if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == `0`)
3905	return -ENOTSUPP;
3906	/ regular helper call sets R0 /
3907	bt_clear_reg(bt, reg: BPF_REG_0);
3908	if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
3909	/ if backtracing was looking for registers R1-R5*
3910	* they should have been found already.
3911	*/
3912	verbose(private_data: env, fmt: "BUG regs %x\n", bt_reg_mask(bt));
3913	WARN_ONCE(`1`, "verifier backtracking bug");
3914	return -EFAULT;
3915	}
3916	} else if (opcode == BPF_EXIT) {
3917	bool r0_precise;
3918
3919	if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
3920	/ if backtracing was looking for registers R1-R5*
3921	* they should have been found already.
3922	*/
3923	verbose(private_data: env, fmt: "BUG regs %x\n", bt_reg_mask(bt));
3924	WARN_ONCE(`1`, "verifier backtracking bug");
3925	return -EFAULT;
3926	}
3927
3928	/ BPF_EXIT in subprog or callback always returns*
3929	* right after the call instruction, so by checking
3930	* whether the instruction at subseq_idx-1 is subprog
3931	* call or not we can distinguish actual exit from
3932	* subprog from exit from callback. In the former
3933	* case, we need to propagate r0 precision, if
3934	* necessary. In the former we never do that.
3935	*/
3936	r0_precise = subseq_idx - `1` >= `0` &&
3937	bpf_pseudo_call(insn: &env->prog->insnsi[subseq_idx - `1`]) &&
3938	bt_is_reg_set(bt, reg: BPF_REG_0);
3939
3940	bt_clear_reg(bt, reg: BPF_REG_0);
3941	if (bt_subprog_enter(bt))
3942	return -EFAULT;
3943
3944	if (r0_precise)
3945	bt_set_reg(bt, reg: BPF_REG_0);
3946	/ r6-r9 and stack slots will stay set in caller frame*
3947	* bitmasks until we return back from callee(s)
3948	*/
3949	return `0`;
3950	} else if (BPF_SRC(insn->code) == BPF_X) {
3951	if (!bt_is_reg_set(bt, reg: dreg) && !bt_is_reg_set(bt, reg: sreg))
3952	return `0`;
3953	/ dreg <cond> sreg*
3954	* Both dreg and sreg need precision before
3955	* this insn. If only sreg was marked precise
3956	* before it would be equally necessary to
3957	* propagate it to dreg.
3958	*/
3959	bt_set_reg(bt, reg: dreg);
3960	bt_set_reg(bt, reg: sreg);
3961	/ else dreg <cond> K*
3962	* Only dreg still needs precision before
3963	* this insn, so for the K-based conditional
3964	* there is nothing new to be marked.
3965	*/
3966	}
3967	} else if (class == BPF_LD) {
3968	if (!bt_is_reg_set(bt, reg: dreg))
3969	return `0`;
3970	bt_clear_reg(bt, reg: dreg);
3971	/ It's ld_imm64 or ld_abs or ld_ind.*
3972	* For ld_imm64 no further tracking of precision
3973	* into parent is necessary
3974	*/
3975	if (mode == BPF_IND \|\| mode == BPF_ABS)
3976	/ to be analyzed /
3977	return -ENOTSUPP;
3978	}
3979	return `0`;
3980	}
3981
3982	/ the scalar precision tracking algorithm:*
3983	* . at the start all registers have precise=false.
3984	* . scalar ranges are tracked as normal through alu and jmp insns.
3985	* . once precise value of the scalar register is used in:
3986	* . ptr + scalar alu
3987	* . if (scalar cond K\|scalar)
3988	* . helper_call(.., scalar, ...) where ARG_CONST is expected
3989	* backtrack through the verifier states and mark all registers and
3990	* stack slots with spilled constants that these scalar regisers
3991	* should be precise.
3992	* . during state pruning two registers (or spilled stack slots)
3993	* are equivalent if both are not precise.
3994	*
3995	* Note the verifier cannot simply walk register parentage chain,
3996	* since many different registers and stack slots could have been
3997	* used to compute single precise scalar.
3998	*
3999	* The approach of starting with precise=true for all registers and then
4000	* backtrack to mark a register as not precise when the verifier detects
4001	* that program doesn't care about specific value (e.g., when helper
4002	* takes register as ARG_ANYTHING parameter) is not safe.
4003	*
4004	* It's ok to walk single parentage chain of the verifier states.
4005	* It's possible that this backtracking will go all the way till 1st insn.
4006	* All other branches will be explored for needing precision later.
4007	*
4008	* The backtracking needs to deal with cases like:
4009	* R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
4010	* r9 -= r8
4011	* r5 = r9
4012	* if r5 > 0x79f goto pc+7
4013	* R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
4014	* r5 += 1
4015	* ...
4016	* call bpf_perf_event_output#25
4017	* where .arg5_type = ARG_CONST_SIZE_OR_ZERO
4018	*
4019	* and this case:
4020	* r6 = 1
4021	* call foo // uses callee's r6 inside to compute r0
4022	* r0 += r6
4023	* if r0 == 0 goto
4024	*
4025	* to track above reg_mask/stack_mask needs to be independent for each frame.
4026	*
4027	* Also if parent's curframe > frame where backtracking started,
4028	* the verifier need to mark registers in both frames, otherwise callees
4029	* may incorrectly prune callers. This is similar to
4030	* commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
4031	*
4032	* For now backtracking falls back into conservative marking.
4033	*/
4034	static void mark_all_scalars_precise(struct bpf_verifier_env *env,
4035	struct bpf_verifier_state *st)
4036	{
4037	struct bpf_func_state *func;
4038	struct bpf_reg_state *reg;
4039	int i, j;
4040
4041	if (env->log.level & BPF_LOG_LEVEL2) {
4042	verbose(private_data: env, fmt: "mark_precise: frame%d: falling back to forcing all scalars precise\n",
4043	st->curframe);
4044	}
4045
4046	/ big hammer: mark all scalars precise in this path.*
4047	* pop_stack may still get !precise scalars.
4048	* We also skip current state and go straight to first parent state,
4049	* because precision markings in current non-checkpointed state are
4050	* not needed. See why in the comment in __mark_chain_precision below.
4051	*/
4052	for (st = st->parent; st; st = st->parent) {
4053	for (i = `0`; i <= st->curframe; i++) {
4054	func = st->frame[i];
4055	for (j = `0`; j < BPF_REG_FP; j++) {
4056	reg = &func->regs[j];
4057	if (reg->type != SCALAR_VALUE \|\| reg->precise)
4058	continue;
4059	reg->precise = true;
4060	if (env->log.level & BPF_LOG_LEVEL2) {
4061	verbose(private_data: env, fmt: "force_precise: frame%d: forcing r%d to be precise\n",
4062	i, j);
4063	}
4064	}
4065	for (j = `0`; j < func->allocated_stack / BPF_REG_SIZE; j++) {
4066	if (!is_spilled_reg(stack: &func->stack[j]))
4067	continue;
4068	reg = &func->stack[j].spilled_ptr;
4069	if (reg->type != SCALAR_VALUE \|\| reg->precise)
4070	continue;
4071	reg->precise = true;
4072	if (env->log.level & BPF_LOG_LEVEL2) {
4073	verbose(private_data: env, fmt: "force_precise: frame%d: forcing fp%d to be precise\n",
4074	i, -(j + `1`) * `8`);
4075	}
4076	}
4077	}
4078	}
4079	}
4080
4081	static void mark_all_scalars_imprecise(struct bpf_verifier_env env, struct* bpf_verifier_state *st)
4082	{
4083	struct bpf_func_state *func;
4084	struct bpf_reg_state *reg;
4085	int i, j;
4086
4087	for (i = `0`; i <= st->curframe; i++) {
4088	func = st->frame[i];
4089	for (j = `0`; j < BPF_REG_FP; j++) {
4090	reg = &func->regs[j];
4091	if (reg->type != SCALAR_VALUE)
4092	continue;
4093	reg->precise = false;
4094	}
4095	for (j = `0`; j < func->allocated_stack / BPF_REG_SIZE; j++) {
4096	if (!is_spilled_reg(stack: &func->stack[j]))
4097	continue;
4098	reg = &func->stack[j].spilled_ptr;
4099	if (reg->type != SCALAR_VALUE)
4100	continue;
4101	reg->precise = false;
4102	}
4103	}
4104	}
4105
4106	static bool idset_contains(struct bpf_idset *s, u32 id)
4107	{
4108	u32 i;
4109
4110	for (i = `0`; i < s->count; ++i)
4111	if (s->ids[i] == id)
4112	return true;
4113
4114	return false;
4115	}
4116
4117	static int idset_push(struct bpf_idset *s, u32 id)
4118	{
4119	if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids)))
4120	return -EFAULT;
4121	s->ids[s->count++] = id;
4122	return `0`;
4123	}
4124
4125	static void idset_reset(struct bpf_idset *s)
4126	{
4127	s->count = `0`;
4128	}
4129
4130	/ Collect a set of IDs for all registers currently marked as precise in env->bt.*
4131	* Mark all registers with these IDs as precise.
4132	*/
4133	static int mark_precise_scalar_ids(struct bpf_verifier_env env, struct* bpf_verifier_state *st)
4134	{
4135	struct bpf_idset *precise_ids = &env->idset_scratch;
4136	struct backtrack_state *bt = &env->bt;
4137	struct bpf_func_state *func;
4138	struct bpf_reg_state *reg;
4139	DECLARE_BITMAP(mask, `64`);
4140	int i, fr;
4141
4142	idset_reset(s: precise_ids);
4143
4144	for (fr = bt->frame; fr >= `0`; fr--) {
4145	func = st->frame[fr];
4146
4147	bitmap_from_u64(dst: mask, mask: bt_frame_reg_mask(bt, frame: fr));
4148	for_each_set_bit(i, mask, `32`) {
4149	reg = &func->regs[i];
4150	if (!reg->id \|\| reg->type != SCALAR_VALUE)
4151	continue;
4152	if (idset_push(s: precise_ids, id: reg->id))
4153	return -EFAULT;
4154	}
4155
4156	bitmap_from_u64(dst: mask, mask: bt_frame_stack_mask(bt, frame: fr));
4157	for_each_set_bit(i, mask, `64`) {
4158	if (i >= func->allocated_stack / BPF_REG_SIZE)
4159	break;
4160	if (!is_spilled_scalar_reg(stack: &func->stack[i]))
4161	continue;
4162	reg = &func->stack[i].spilled_ptr;
4163	if (!reg->id)
4164	continue;
4165	if (idset_push(s: precise_ids, id: reg->id))
4166	return -EFAULT;
4167	}
4168	}
4169
4170	for (fr = `0`; fr <= st->curframe; ++fr) {
4171	func = st->frame[fr];
4172
4173	for (i = BPF_REG_0; i < BPF_REG_10; ++i) {
4174	reg = &func->regs[i];
4175	if (!reg->id)
4176	continue;
4177	if (!idset_contains(s: precise_ids, id: reg->id))
4178	continue;
4179	bt_set_frame_reg(bt, frame: fr, reg: i);
4180	}
4181	for (i = `0`; i < func->allocated_stack / BPF_REG_SIZE; ++i) {
4182	if (!is_spilled_scalar_reg(stack: &func->stack[i]))
4183	continue;
4184	reg = &func->stack[i].spilled_ptr;
4185	if (!reg->id)
4186	continue;
4187	if (!idset_contains(s: precise_ids, id: reg->id))
4188	continue;
4189	bt_set_frame_slot(bt, frame: fr, slot: i);
4190	}
4191	}
4192
4193	return `0`;
4194	}
4195
4196	/*
4197	* __mark_chain_precision() backtracks BPF program instruction sequence and
4198	* chain of verifier states making sure that register regno (if regno >= 0)
4199	* and/or stack slot spi (if spi >= 0) are marked as precisely tracked
4200	* SCALARS, as well as any other registers and slots that contribute to
4201	* a tracked state of given registers/stack slots, depending on specific BPF
4202	* assembly instructions (see backtrack_insns() for exact instruction handling
4203	* logic). This backtracking relies on recorded jmp_history and is able to
4204	* traverse entire chain of parent states. This process ends only when all the
4205	* necessary registers/slots and their transitive dependencies are marked as
4206	* precise.
4207	*
4208	* One important and subtle aspect is that precise marks do not matter in
4209	* the currently verified state (current state). It is important to understand
4210	* why this is the case.
4211	*
4212	* First, note that current state is the state that is not yet "checkpointed",
4213	* i.e., it is not yet put into env->explored_states, and it has no children
4214	* states as well. It's ephemeral, and can end up either a) being discarded if
4215	* compatible explored state is found at some point or BPF_EXIT instruction is
4216	* reached or b) checkpointed and put into env->explored_states, branching out
4217	* into one or more children states.
4218	*
4219	* In the former case, precise markings in current state are completely
4220	* ignored by state comparison code (see regsafe() for details). Only
4221	* checkpointed ("old") state precise markings are important, and if old
4222	* state's register/slot is precise, regsafe() assumes current state's
4223	* register/slot as precise and checks value ranges exactly and precisely. If
4224	* states turn out to be compatible, current state's necessary precise
4225	* markings and any required parent states' precise markings are enforced
4226	* after the fact with propagate_precision() logic, after the fact. But it's
4227	* important to realize that in this case, even after marking current state
4228	* registers/slots as precise, we immediately discard current state. So what
4229	* actually matters is any of the precise markings propagated into current
4230	* state's parent states, which are always checkpointed (due to b) case above).
4231	* As such, for scenario a) it doesn't matter if current state has precise
4232	* markings set or not.
4233	*
4234	* Now, for the scenario b), checkpointing and forking into child(ren)
4235	* state(s). Note that before current state gets to checkpointing step, any
4236	* processed instruction always assumes precise SCALAR register/slot
4237	* knowledge: if precise value or range is useful to prune jump branch, BPF
4238	* verifier takes this opportunity enthusiastically. Similarly, when
4239	* register's value is used to calculate offset or memory address, exact
4240	* knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
4241	* what we mentioned above about state comparison ignoring precise markings
4242	* during state comparison, BPF verifier ignores and also assumes precise
4243	* markings at will during instruction verification process. But as verifier
4244	* assumes precision, it also propagates any precision dependencies across
4245	* parent states, which are not yet finalized, so can be further restricted
4246	* based on new knowledge gained from restrictions enforced by their children
4247	* states. This is so that once those parent states are finalized, i.e., when
4248	* they have no more active children state, state comparison logic in
4249	* is_state_visited() would enforce strict and precise SCALAR ranges, if
4250	* required for correctness.
4251	*
4252	* To build a bit more intuition, note also that once a state is checkpointed,
4253	* the path we took to get to that state is not important. This is crucial
4254	* property for state pruning. When state is checkpointed and finalized at
4255	* some instruction index, it can be correctly and safely used to "short
4256	* circuit" any compatible state that reaches exactly the same instruction
4257	* index. I.e., if we jumped to that instruction from a completely different
4258	* code path than original finalized state was derived from, it doesn't
4259	* matter, current state can be discarded because from that instruction
4260	* forward having a compatible state will ensure we will safely reach the
4261	* exit. States describe preconditions for further exploration, but completely
4262	* forget the history of how we got here.
4263	*
4264	* This also means that even if we needed precise SCALAR range to get to
4265	* finalized state, but from that point forward that same SCALAR register is
4266	* never used in a precise context (i.e., it's precise value is not needed for
4267	* correctness), it's correct and safe to mark such register as "imprecise"
4268	* (i.e., precise marking set to false). This is what we rely on when we do
4269	* not set precise marking in current state. If no child state requires
4270	* precision for any given SCALAR register, it's safe to dictate that it can
4271	* be imprecise. If any child state does require this register to be precise,
4272	* we'll mark it precise later retroactively during precise markings
4273	* propagation from child state to parent states.
4274	*
4275	* Skipping precise marking setting in current state is a mild version of
4276	* relying on the above observation. But we can utilize this property even
4277	* more aggressively by proactively forgetting any precise marking in the
4278	* current state (which we inherited from the parent state), right before we
4279	* checkpoint it and branch off into new child state. This is done by
4280	* mark_all_scalars_imprecise() to hopefully get more permissive and generic
4281	* finalized states which help in short circuiting more future states.
4282	*/
4283	static int __mark_chain_precision(struct bpf_verifier_env env, int* regno)
4284	{
4285	struct backtrack_state *bt = &env->bt;
4286	struct bpf_verifier_state *st = env->cur_state;
4287	int first_idx = st->first_insn_idx;
4288	int last_idx = env->insn_idx;
4289	int subseq_idx = -`1`;
4290	struct bpf_func_state *func;
4291	struct bpf_reg_state *reg;
4292	bool skip_first = true;
4293	int i, fr, err;
4294
4295	if (!env->bpf_capable)
4296	return `0`;
4297
4298	/ set frame number from which we are starting to backtrack /
4299	bt_init(bt, frame: env->cur_state->curframe);
4300
4301	/ Do sanity checks against current state of register and/or stack*
4302	* slot, but don't set precise flag in current state, as precision
4303	* tracking in the current state is unnecessary.
4304	*/
4305	func = st->frame[bt->frame];
4306	if (regno >= `0`) {
4307	reg = &func->regs[regno];
4308	if (reg->type != SCALAR_VALUE) {
4309	WARN_ONCE(`1`, "backtracing misuse");
4310	return -EFAULT;
4311	}
4312	bt_set_reg(bt, reg: regno);
4313	}
4314
4315	if (bt_empty(bt))
4316	return `0`;
4317
4318	for (;;) {
4319	DECLARE_BITMAP(mask, `64`);
4320	u32 history = st->jmp_history_cnt;
4321
4322	if (env->log.level & BPF_LOG_LEVEL2) {
4323	verbose(private_data: env, fmt: "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
4324	bt->frame, last_idx, first_idx, subseq_idx);
4325	}
4326
4327	/ If some register with scalar ID is marked as precise,*
4328	* make sure that all registers sharing this ID are also precise.
4329	* This is needed to estimate effect of find_equal_scalars().
4330	* Do this at the last instruction of each state,
4331	* bpf_reg_state::id fields are valid for these instructions.
4332	*
4333	* Allows to track precision in situation like below:
4334	*
4335	* r2 = unknown value
4336	* ...
4337	* --- state #0 ---
4338	* ...
4339	* r1 = r2 // r1 and r2 now share the same ID
4340	* ...
4341	* --- state #1 {r1.id = A, r2.id = A} ---
4342	* ...
4343	* if (r2 > 10) goto exit; // find_equal_scalars() assigns range to r1
4344	* ...
4345	* --- state #2 {r1.id = A, r2.id = A} ---
4346	* r3 = r10
4347	* r3 += r1 // need to mark both r1 and r2
4348	*/
4349	if (mark_precise_scalar_ids(env, st))
4350	return -EFAULT;
4351
4352	if (last_idx < `0`) {
4353	/ we are at the entry into subprog, which*
4354	* is expected for global funcs, but only if
4355	* requested precise registers are R1-R5
4356	* (which are global func's input arguments)
4357	*/
4358	if (st->curframe == `0` &&
4359	st->frame[`0`]->subprogno > `0` &&
4360	st->frame[`0`]->callsite == BPF_MAIN_FUNC &&
4361	bt_stack_mask(bt) == `0` &&
4362	(bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == `0`) {
4363	bitmap_from_u64(dst: mask, mask: bt_reg_mask(bt));
4364	for_each_set_bit(i, mask, `32`) {
4365	reg = &st->frame[`0`]->regs[i];
4366	bt_clear_reg(bt, reg: i);
4367	if (reg->type == SCALAR_VALUE)
4368	reg->precise = true;
4369	}
4370	return `0`;
4371	}
4372
4373	verbose(private_data: env, fmt: "BUG backtracking func entry subprog %d reg_mask %x stack_mask %llx\n",
4374	st->frame[`0`]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
4375	WARN_ONCE(`1`, "verifier backtracking bug");
4376	return -EFAULT;
4377	}
4378
4379	for (i = last_idx;;) {
4380	if (skip_first) {
4381	err = `0`;
4382	skip_first = false;
4383	} else {
4384	err = backtrack_insn(env, idx: i, subseq_idx, bt);
4385	}
4386	if (err == -ENOTSUPP) {
4387	mark_all_scalars_precise(env, st: env->cur_state);
4388	bt_reset(bt);
4389	return `0`;
4390	} else if (err) {
4391	return err;
4392	}
4393	if (bt_empty(bt))
4394	/ Found assignment(s) into tracked register in this state.*
4395	* Since this state is already marked, just return.
4396	* Nothing to be tracked further in the parent state.
4397	*/
4398	return `0`;
4399	if (i == first_idx)
4400	break;
4401	subseq_idx = i;
4402	i = get_prev_insn_idx(st, i, history: &history);
4403	if (i >= env->prog->len) {
4404	/ This can happen if backtracking reached insn 0*
4405	* and there are still reg_mask or stack_mask
4406	* to backtrack.
4407	* It means the backtracking missed the spot where
4408	* particular register was initialized with a constant.
4409	*/
4410	verbose(private_data: env, fmt: "BUG backtracking idx %d\n", i);
4411	WARN_ONCE(`1`, "verifier backtracking bug");
4412	return -EFAULT;
4413	}
4414	}
4415	st = st->parent;
4416	if (!st)
4417	break;
4418
4419	for (fr = bt->frame; fr >= `0`; fr--) {
4420	func = st->frame[fr];
4421	bitmap_from_u64(dst: mask, mask: bt_frame_reg_mask(bt, frame: fr));
4422	for_each_set_bit(i, mask, `32`) {
4423	reg = &func->regs[i];
4424	if (reg->type != SCALAR_VALUE) {
4425	bt_clear_frame_reg(bt, frame: fr, reg: i);
4426	continue;
4427	}
4428	if (reg->precise)
4429	bt_clear_frame_reg(bt, frame: fr, reg: i);
4430	else
4431	reg->precise = true;
4432	}
4433
4434	bitmap_from_u64(dst: mask, mask: bt_frame_stack_mask(bt, frame: fr));
4435	for_each_set_bit(i, mask, `64`) {
4436	if (i >= func->allocated_stack / BPF_REG_SIZE) {
4437	/ the sequence of instructions:*
4438	* 2: (bf) r3 = r10
4439	* 3: (7b) (u64 )(r3 -8) = r0
4440	* 4: (79) r4 = (u64 )(r10 -8)
4441	* doesn't contain jmps. It's backtracked
4442	* as a single block.
4443	* During backtracking insn 3 is not recognized as
4444	* stack access, so at the end of backtracking
4445	* stack slot fp-8 is still marked in stack_mask.
4446	* However the parent state may not have accessed
4447	* fp-8 and it's "unallocated" stack space.
4448	* In such case fallback to conservative.
4449	*/
4450	mark_all_scalars_precise(env, st: env->cur_state);
4451	bt_reset(bt);
4452	return `0`;
4453	}
4454
4455	if (!is_spilled_scalar_reg(stack: &func->stack[i])) {
4456	bt_clear_frame_slot(bt, frame: fr, slot: i);
4457	continue;
4458	}
4459	reg = &func->stack[i].spilled_ptr;
4460	if (reg->precise)
4461	bt_clear_frame_slot(bt, frame: fr, slot: i);
4462	else
4463	reg->precise = true;
4464	}
4465	if (env->log.level & BPF_LOG_LEVEL2) {
4466	fmt_reg_mask(buf: env->tmp_str_buf, TMP_STR_BUF_LEN,
4467	reg_mask: bt_frame_reg_mask(bt, frame: fr));
4468	verbose(private_data: env, fmt: "mark_precise: frame%d: parent state regs=%s ",
4469	fr, env->tmp_str_buf);
4470	fmt_stack_mask(buf: env->tmp_str_buf, TMP_STR_BUF_LEN,
4471	stack_mask: bt_frame_stack_mask(bt, frame: fr));
4472	verbose(private_data: env, fmt: "stack=%s: ", env->tmp_str_buf);
4473	print_verifier_state(env, state: func, print_all: true);
4474	}
4475	}
4476
4477	if (bt_empty(bt))
4478	return `0`;
4479
4480	subseq_idx = first_idx;
4481	last_idx = st->last_insn_idx;
4482	first_idx = st->first_insn_idx;
4483	}
4484
4485	/ if we still have requested precise regs or slots, we missed*
4486	* something (e.g., stack access through non-r10 register), so
4487	* fallback to marking all precise
4488	*/
4489	if (!bt_empty(bt)) {
4490	mark_all_scalars_precise(env, st: env->cur_state);
4491	bt_reset(bt);
4492	}
4493
4494	return `0`;
4495	}
4496
4497	int mark_chain_precision(struct bpf_verifier_env env, int* regno)
4498	{
4499	return __mark_chain_precision(env, regno);
4500	}
4501
4502	/ mark_chain_precision_batch() assumes that env->bt is set in the caller to*
4503	* desired reg and stack masks across all relevant frames
4504	*/
4505	static int mark_chain_precision_batch(struct bpf_verifier_env *env)
4506	{
4507	return __mark_chain_precision(env, regno: -`1`);
4508	}
4509
4510	static bool is_spillable_regtype(enum bpf_reg_type type)
4511	{
4512	switch (base_type(type)) {
4513	case PTR_TO_MAP_VALUE:
4514	case PTR_TO_STACK:
4515	case PTR_TO_CTX:
4516	case PTR_TO_PACKET:
4517	case PTR_TO_PACKET_META:
4518	case PTR_TO_PACKET_END:
4519	case PTR_TO_FLOW_KEYS:
4520	case CONST_PTR_TO_MAP:
4521	case PTR_TO_SOCKET:
4522	case PTR_TO_SOCK_COMMON:
4523	case PTR_TO_TCP_SOCK:
4524	case PTR_TO_XDP_SOCK:
4525	case PTR_TO_BTF_ID:
4526	case PTR_TO_BUF:
4527	case PTR_TO_MEM:
4528	case PTR_TO_FUNC:
4529	case PTR_TO_MAP_KEY:
4530	return true;
4531	default:
4532	return false;
4533	}
4534	}
4535
4536	/ Does this register contain a constant zero? /
4537	static bool register_is_null(struct bpf_reg_state *reg)
4538	{
4539	return reg->type == SCALAR_VALUE && tnum_equals_const(a: reg->var_off, b: `0`);
4540	}
4541
4542	static bool register_is_const(struct bpf_reg_state *reg)
4543	{
4544	return reg->type == SCALAR_VALUE && tnum_is_const(a: reg->var_off);
4545	}
4546
4547	static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
4548	{
4549	return tnum_is_unknown(a: reg->var_off) &&
4550	reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
4551	reg->umin_value == `0` && reg->umax_value == U64_MAX &&
4552	reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
4553	reg->u32_min_value == `0` && reg->u32_max_value == U32_MAX;
4554	}
4555
4556	static bool register_is_bounded(struct bpf_reg_state *reg)
4557	{
4558	return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
4559	}
4560
4561	static bool __is_pointer_value(bool allow_ptr_leaks,
4562	const struct bpf_reg_state *reg)
4563	{
4564	if (allow_ptr_leaks)
4565	return false;
4566
4567	return reg->type != SCALAR_VALUE;
4568	}
4569
4570	/ Copy src state preserving dst->parent and dst->live fields /
4571	static void copy_register_state(struct bpf_reg_state dst, const* struct bpf_reg_state *src)
4572	{
4573	struct bpf_reg_state *parent = dst->parent;
4574	enum bpf_reg_liveness live = dst->live;
4575
4576	dst = src;
4577	dst->parent = parent;
4578	dst->live = live;
4579	}
4580
4581	static void save_register_state(struct bpf_func_state *state,
4582	int spi, struct bpf_reg_state *reg,
4583	int size)
4584	{
4585	int i;
4586
4587	copy_register_state(dst: &state->stack[spi].spilled_ptr, src: reg);
4588	if (size == BPF_REG_SIZE)
4589	state->stack[spi].spilled_ptr.live \|= REG_LIVE_WRITTEN;
4590
4591	for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
4592	state->stack[spi].slot_type[i - `1`] = STACK_SPILL;
4593
4594	/ size < 8 bytes spill /
4595	for (; i; i--)
4596	scrub_spilled_slot(stype: &state->stack[spi].slot_type[i - `1`]);
4597	}
4598
4599	static bool is_bpf_st_mem(struct bpf_insn *insn)
4600	{
4601	return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
4602	}
4603
4604	/ check_stack_{read,write}_fixed_off functions track spill/fill of registers,*
4605	* stack boundary and alignment are checked in check_mem_access()
4606	*/
4607	static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
4608	/ stack frame we're writing to /
4609	struct bpf_func_state *state,
4610	int off, int size, int value_regno,
4611	int insn_idx)
4612	{
4613	struct bpf_func_state cur; /* state of the current function /
4614	int i, slot = -off - `1`, spi = slot / BPF_REG_SIZE, err;
4615	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4616	struct bpf_reg_state *reg = NULL;
4617	u32 dst_reg = insn->dst_reg;
4618
4619	err = grow_stack_state(state, round_up(slot + `1`, BPF_REG_SIZE));
4620	if (err)
4621	return err;
4622	/ caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,*
4623	* so it's aligned access and [off, off + size) are within stack limits
4624	*/
4625	if (!env->allow_ptr_leaks &&
4626	state->stack[spi].slot_type[`0`] == STACK_SPILL &&
4627	size != BPF_REG_SIZE) {
4628	verbose(private_data: env, fmt: "attempt to corrupt spilled pointer on stack\n");
4629	return -EACCES;
4630	}
4631
4632	cur = env->cur_state->frame[env->cur_state->curframe];
4633	if (value_regno >= `0`)
4634	reg = &cur->regs[value_regno];
4635	if (!env->bypass_spec_v4) {
4636	bool sanitize = reg && is_spillable_regtype(type: reg->type);
4637
4638	for (i = `0`; i < size; i++) {
4639	u8 type = state->stack[spi].slot_type[i];
4640
4641	if (type != STACK_MISC && type != STACK_ZERO) {
4642	sanitize = true;
4643	break;
4644	}
4645	}
4646
4647	if (sanitize)
4648	env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
4649	}
4650
4651	err = destroy_if_dynptr_stack_slot(env, state, spi);
4652	if (err)
4653	return err;
4654
4655	mark_stack_slot_scratched(env, spi);
4656	if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
4657	!register_is_null(reg) && env->bpf_capable) {
4658	if (dst_reg != BPF_REG_FP) {
4659	/ The backtracking logic can only recognize explicit*
4660	* stack slot address like [fp - 8]. Other spill of
4661	* scalar via different register has to be conservative.
4662	* Backtrack from here and mark all registers as precise
4663	* that contributed into 'reg' being a constant.
4664	*/
4665	err = mark_chain_precision(env, regno: value_regno);
4666	if (err)
4667	return err;
4668	}
4669	save_register_state(state, spi, reg, size);
4670	/ Break the relation on a narrowing spill. /
4671	if (fls64(x: reg->umax_value) > BITS_PER_BYTE * size)
4672	state->stack[spi].spilled_ptr.id = `0`;
4673	} else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
4674	insn->imm != `0` && env->bpf_capable) {
4675	struct bpf_reg_state fake_reg = {};
4676
4677	__mark_reg_known(reg: &fake_reg, imm: (u32)insn->imm);
4678	fake_reg.type = SCALAR_VALUE;
4679	save_register_state(state, spi, reg: &fake_reg, size);
4680	} else if (reg && is_spillable_regtype(type: reg->type)) {
4681	/ register containing pointer is being spilled into stack /
4682	if (size != BPF_REG_SIZE) {
4683	verbose_linfo(env, insn_off: insn_idx, prefix_fmt: "; ");
4684	verbose(private_data: env, fmt: "invalid size of register spill\n");
4685	return -EACCES;
4686	}
4687	if (state != cur && reg->type == PTR_TO_STACK) {
4688	verbose(private_data: env, fmt: "cannot spill pointers to stack into stack frame of the caller\n");
4689	return -EINVAL;
4690	}
4691	save_register_state(state, spi, reg, size);
4692	} else {
4693	u8 type = STACK_MISC;
4694
4695	/ regular write of data into stack destroys any spilled ptr /
4696	state->stack[spi].spilled_ptr.type = NOT_INIT;
4697	/ Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. /
4698	if (is_stack_slot_special(stack: &state->stack[spi]))
4699	for (i = `0`; i < BPF_REG_SIZE; i++)
4700	scrub_spilled_slot(stype: &state->stack[spi].slot_type[i]);
4701
4702	/ only mark the slot as written if all 8 bytes were written*
4703	* otherwise read propagation may incorrectly stop too soon
4704	* when stack slots are partially written.
4705	* This heuristic means that read propagation will be
4706	* conservative, since it will add reg_live_read marks
4707	* to stack slots all the way to first state when programs
4708	* writes+reads less than 8 bytes
4709	*/
4710	if (size == BPF_REG_SIZE)
4711	state->stack[spi].spilled_ptr.live \|= REG_LIVE_WRITTEN;
4712
4713	/ when we zero initialize stack slots mark them as such /
4714	if ((reg && register_is_null(reg)) \|\|
4715	(!reg && is_bpf_st_mem(insn) && insn->imm == `0`)) {
4716	/ backtracking doesn't work for STACK_ZERO yet. /
4717	err = mark_chain_precision(env, regno: value_regno);
4718	if (err)
4719	return err;
4720	type = STACK_ZERO;
4721	}
4722
4723	/ Mark slots affected by this stack write. /
4724	for (i = `0`; i < size; i++)
4725	state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
4726	type;
4727	}
4728	return `0`;
4729	}
4730
4731	/ Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is*
4732	* known to contain a variable offset.
4733	* This function checks whether the write is permitted and conservatively
4734	* tracks the effects of the write, considering that each stack slot in the
4735	* dynamic range is potentially written to.
4736	*
4737	* 'off' includes 'regno->off'.
4738	* 'value_regno' can be -1, meaning that an unknown value is being written to
4739	* the stack.
4740	*
4741	* Spilled pointers in range are not marked as written because we don't know
4742	* what's going to be actually written. This means that read propagation for
4743	* future reads cannot be terminated by this write.
4744	*
4745	* For privileged programs, uninitialized stack slots are considered
4746	* initialized by this write (even though we don't know exactly what offsets
4747	* are going to be written to). The idea is that we don't want the verifier to
4748	* reject future reads that access slots written to through variable offsets.
4749	*/
4750	static int check_stack_write_var_off(struct bpf_verifier_env *env,
4751	/ func where register points to /
4752	struct bpf_func_state *state,
4753	int ptr_regno, int off, int size,
4754	int value_regno, int insn_idx)
4755	{
4756	struct bpf_func_state cur; /* state of the current function /
4757	int min_off, max_off;
4758	int i, err;
4759	struct bpf_reg_state ptr_reg = NULL, value_reg = NULL;
4760	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4761	bool writing_zero = false;
4762	/ set if the fact that we're writing a zero is used to let any*
4763	* stack slots remain STACK_ZERO
4764	*/
4765	bool zero_used = false;
4766
4767	cur = env->cur_state->frame[env->cur_state->curframe];
4768	ptr_reg = &cur->regs[ptr_regno];
4769	min_off = ptr_reg->smin_value + off;
4770	max_off = ptr_reg->smax_value + off + size;
4771	if (value_regno >= `0`)
4772	value_reg = &cur->regs[value_regno];
4773	if ((value_reg && register_is_null(reg: value_reg)) \|\|
4774	(!value_reg && is_bpf_st_mem(insn) && insn->imm == `0`))
4775	writing_zero = true;
4776
4777	err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
4778	if (err)
4779	return err;
4780
4781	for (i = min_off; i < max_off; i++) {
4782	int spi;
4783
4784	spi = __get_spi(off: i);
4785	err = destroy_if_dynptr_stack_slot(env, state, spi);
4786	if (err)
4787	return err;
4788	}
4789
4790	/ Variable offset writes destroy any spilled pointers in range. /
4791	for (i = min_off; i < max_off; i++) {
4792	u8 new_type, *stype;
4793	int slot, spi;
4794
4795	slot = -i - `1`;
4796	spi = slot / BPF_REG_SIZE;
4797	stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
4798	mark_stack_slot_scratched(env, spi);
4799
4800	if (!env->allow_ptr_leaks && stype != STACK_MISC && stype != STACK_ZERO) {
4801	/ Reject the write if range we may write to has not*
4802	* been initialized beforehand. If we didn't reject
4803	* here, the ptr status would be erased below (even
4804	* though not all slots are actually overwritten),
4805	* possibly opening the door to leaks.
4806	*
4807	* We do however catch STACK_INVALID case below, and
4808	* only allow reading possibly uninitialized memory
4809	* later for CAP_PERFMON, as the write may not happen to
4810	* that slot.
4811	*/
4812	verbose(private_data: env, fmt: "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
4813	insn_idx, i);
4814	return -EINVAL;
4815	}
4816
4817	/ Erase all spilled pointers. /
4818	state->stack[spi].spilled_ptr.type = NOT_INIT;
4819
4820	/ Update the slot type. /
4821	new_type = STACK_MISC;
4822	if (writing_zero && *stype == STACK_ZERO) {
4823	new_type = STACK_ZERO;
4824	zero_used = true;
4825	}
4826	/ If the slot is STACK_INVALID, we check whether it's OK to*
4827	* pretend that it will be initialized by this write. The slot
4828	* might not actually be written to, and so if we mark it as
4829	* initialized future reads might leak uninitialized memory.
4830	* For privileged programs, we will accept such reads to slots
4831	* that may or may not be written because, if we're reject
4832	* them, the error would be too confusing.
4833	*/
4834	if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
4835	verbose(private_data: env, fmt: "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
4836	insn_idx, i);
4837	return -EINVAL;
4838	}
4839	*stype = new_type;
4840	}
4841	if (zero_used) {
4842	/ backtracking doesn't work for STACK_ZERO yet. /
4843	err = mark_chain_precision(env, regno: value_regno);
4844	if (err)
4845	return err;
4846	}
4847	return `0`;
4848	}
4849
4850	/ When register 'dst_regno' is assigned some values from stack[min_off,*
4851	* max_off), we set the register's type according to the types of the
4852	* respective stack slots. If all the stack values are known to be zeros, then
4853	* so is the destination reg. Otherwise, the register is considered to be
4854	* SCALAR. This function does not deal with register filling; the caller must
4855	* ensure that all spilled registers in the stack range have been marked as
4856	* read.
4857	*/
4858	static void mark_reg_stack_read(struct bpf_verifier_env *env,
4859	/ func where src register points to /
4860	struct bpf_func_state *ptr_state,
4861	int min_off, int max_off, int dst_regno)
4862	{
4863	struct bpf_verifier_state *vstate = env->cur_state;
4864	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4865	int i, slot, spi;
4866	u8 *stype;
4867	int zeros = `0`;
4868
4869	for (i = min_off; i < max_off; i++) {
4870	slot = -i - `1`;
4871	spi = slot / BPF_REG_SIZE;
4872	mark_stack_slot_scratched(env, spi);
4873	stype = ptr_state->stack[spi].slot_type;
4874	if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
4875	break;
4876	zeros++;
4877	}
4878	if (zeros == max_off - min_off) {
4879	/ any access_size read into register is zero extended,*
4880	* so the whole register == const_zero
4881	*/
4882	__mark_reg_const_zero(reg: &state->regs[dst_regno]);
4883	/ backtracking doesn't support STACK_ZERO yet,*
4884	* so mark it precise here, so that later
4885	* backtracking can stop here.
4886	* Backtracking may not need this if this register
4887	* doesn't participate in pointer adjustment.
4888	* Forward propagation of precise flag is not
4889	* necessary either. This mark is only to stop
4890	* backtracking. Any register that contributed
4891	* to const 0 was marked precise before spill.
4892	*/
4893	state->regs[dst_regno].precise = true;
4894	} else {
4895	/ have read misc data from the stack /
4896	mark_reg_unknown(env, regs: state->regs, regno: dst_regno);
4897	}
4898	state->regs[dst_regno].live \|= REG_LIVE_WRITTEN;
4899	}
4900
4901	/ Read the stack at 'off' and put the results into the register indicated by*
4902	* 'dst_regno'. It handles reg filling if the addressed stack slot is a
4903	* spilled reg.
4904	*
4905	* 'dst_regno' can be -1, meaning that the read value is not going to a
4906	* register.
4907	*
4908	* The access is assumed to be within the current stack bounds.
4909	*/
4910	static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
4911	/ func where src register points to /
4912	struct bpf_func_state *reg_state,
4913	int off, int size, int dst_regno)
4914	{
4915	struct bpf_verifier_state *vstate = env->cur_state;
4916	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4917	int i, slot = -off - `1`, spi = slot / BPF_REG_SIZE;
4918	struct bpf_reg_state *reg;
4919	u8 *stype, type;
4920
4921	stype = reg_state->stack[spi].slot_type;
4922	reg = &reg_state->stack[spi].spilled_ptr;
4923
4924	mark_stack_slot_scratched(env, spi);
4925
4926	if (is_spilled_reg(stack: &reg_state->stack[spi])) {
4927	u8 spill_size = `1`;
4928
4929	for (i = BPF_REG_SIZE - `1`; i > `0` && stype[i - `1`] == STACK_SPILL; i--)
4930	spill_size++;
4931
4932	if (size != BPF_REG_SIZE \|\| spill_size != BPF_REG_SIZE) {
4933	if (reg->type != SCALAR_VALUE) {
4934	verbose_linfo(env, insn_off: env->insn_idx, prefix_fmt: "; ");
4935	verbose(private_data: env, fmt: "invalid size of register fill\n");
4936	return -EACCES;
4937	}
4938
4939	mark_reg_read(env, state: reg, parent: reg->parent, flag: REG_LIVE_READ64);
4940	if (dst_regno < `0`)
4941	return `0`;
4942
4943	if (!(off % BPF_REG_SIZE) && size == spill_size) {
4944	/ The earlier check_reg_arg() has decided the*
4945	* subreg_def for this insn. Save it first.
4946	*/
4947	s32 subreg_def = state->regs[dst_regno].subreg_def;
4948
4949	copy_register_state(dst: &state->regs[dst_regno], src: reg);
4950	state->regs[dst_regno].subreg_def = subreg_def;
4951	} else {
4952	for (i = `0`; i < size; i++) {
4953	type = stype[(slot - i) % BPF_REG_SIZE];
4954	if (type == STACK_SPILL)
4955	continue;
4956	if (type == STACK_MISC)
4957	continue;
4958	if (type == STACK_INVALID && env->allow_uninit_stack)
4959	continue;
4960	verbose(private_data: env, fmt: "invalid read from stack off %d+%d size %d\n",
4961	off, i, size);
4962	return -EACCES;
4963	}
4964	mark_reg_unknown(env, regs: state->regs, regno: dst_regno);
4965	}
4966	state->regs[dst_regno].live \|= REG_LIVE_WRITTEN;
4967	return `0`;
4968	}
4969
4970	if (dst_regno >= `0`) {
4971	/ restore register state from stack /
4972	copy_register_state(dst: &state->regs[dst_regno], src: reg);
4973	/ mark reg as written since spilled pointer state likely*
4974	* has its liveness marks cleared by is_state_visited()
4975	* which resets stack/reg liveness for state transitions
4976	*/
4977	state->regs[dst_regno].live \|= REG_LIVE_WRITTEN;
4978	} else if (__is_pointer_value(allow_ptr_leaks: env->allow_ptr_leaks, reg)) {
4979	/ If dst_regno==-1, the caller is asking us whether*
4980	* it is acceptable to use this value as a SCALAR_VALUE
4981	* (e.g. for XADD).
4982	* We must not allow unprivileged callers to do that
4983	* with spilled pointers.
4984	*/
4985	verbose(private_data: env, fmt: "leaking pointer from stack off %d\n",
4986	off);
4987	return -EACCES;
4988	}
4989	mark_reg_read(env, state: reg, parent: reg->parent, flag: REG_LIVE_READ64);
4990	} else {
4991	for (i = `0`; i < size; i++) {
4992	type = stype[(slot - i) % BPF_REG_SIZE];
4993	if (type == STACK_MISC)
4994	continue;
4995	if (type == STACK_ZERO)
4996	continue;
4997	if (type == STACK_INVALID && env->allow_uninit_stack)
4998	continue;
4999	verbose(private_data: env, fmt: "invalid read from stack off %d+%d size %d\n",
5000	off, i, size);
5001	return -EACCES;
5002	}
5003	mark_reg_read(env, state: reg, parent: reg->parent, flag: REG_LIVE_READ64);
5004	if (dst_regno >= `0`)
5005	mark_reg_stack_read(env, ptr_state: reg_state, min_off: off, max_off: off + size, dst_regno);
5006	}
5007	return `0`;
5008	}
5009
5010	enum bpf_access_src {
5011	ACCESS_DIRECT = `1`, / the access is performed by an instruction /
5012	ACCESS_HELPER = `2`, / the access is performed by a helper /
5013	};
5014
5015	static int check_stack_range_initialized(struct bpf_verifier_env *env,
5016	int regno, int off, int access_size,
5017	bool zero_size_allowed,
5018	enum bpf_access_src type,
5019	struct bpf_call_arg_meta *meta);
5020
5021	static struct bpf_reg_state reg_state(struct* bpf_verifier_env env, int* regno)
5022	{
5023	return cur_regs(env) + regno;
5024	}
5025
5026	/ Read the stack at 'ptr_regno + off' and put the result into the register*
5027	* 'dst_regno'.
5028	* 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
5029	* but not its variable offset.
5030	* 'size' is assumed to be <= reg size and the access is assumed to be aligned.
5031	*
5032	* As opposed to check_stack_read_fixed_off, this function doesn't deal with
5033	* filling registers (i.e. reads of spilled register cannot be detected when
5034	* the offset is not fixed). We conservatively mark 'dst_regno' as containing
5035	* SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
5036	* offset; for a fixed offset check_stack_read_fixed_off should be used
5037	* instead.
5038	*/
5039	static int check_stack_read_var_off(struct bpf_verifier_env *env,
5040	int ptr_regno, int off, int size, int dst_regno)
5041	{
5042	/ The state of the source register. /
5043	struct bpf_reg_state *reg = reg_state(env, regno: ptr_regno);
5044	struct bpf_func_state *ptr_state = func(env, reg);
5045	int err;
5046	int min_off, max_off;
5047
5048	/ Note that we pass a NULL meta, so raw access will not be permitted.*
5049	*/
5050	err = check_stack_range_initialized(env, regno: ptr_regno, off, access_size: size,
5051	zero_size_allowed: false, type: ACCESS_DIRECT, NULL);
5052	if (err)
5053	return err;
5054
5055	min_off = reg->smin_value + off;
5056	max_off = reg->smax_value + off;
5057	mark_reg_stack_read(env, ptr_state, min_off, max_off: max_off + size, dst_regno);
5058	return `0`;
5059	}
5060
5061	/ check_stack_read dispatches to check_stack_read_fixed_off or*
5062	* check_stack_read_var_off.
5063	*
5064	* The caller must ensure that the offset falls within the allocated stack
5065	* bounds.
5066	*
5067	* 'dst_regno' is a register which will receive the value from the stack. It
5068	* can be -1, meaning that the read value is not going to a register.
5069	*/
5070	static int check_stack_read(struct bpf_verifier_env *env,
5071	int ptr_regno, int off, int size,
5072	int dst_regno)
5073	{
5074	struct bpf_reg_state *reg = reg_state(env, regno: ptr_regno);
5075	struct bpf_func_state *state = func(env, reg);
5076	int err;
5077	/ Some accesses are only permitted with a static offset. /
5078	bool var_off = !tnum_is_const(a: reg->var_off);
5079
5080	/ The offset is required to be static when reads don't go to a*
5081	* register, in order to not leak pointers (see
5082	* check_stack_read_fixed_off).
5083	*/
5084	if (dst_regno < `0` && var_off) {
5085	char tn_buf[`48`];
5086
5087	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
5088	verbose(private_data: env, fmt: "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
5089	tn_buf, off, size);
5090	return -EACCES;
5091	}
5092	/ Variable offset is prohibited for unprivileged mode for simplicity*
5093	* since it requires corresponding support in Spectre masking for stack
5094	* ALU. See also retrieve_ptr_limit(). The check in
5095	* check_stack_access_for_ptr_arithmetic() called by
5096	* adjust_ptr_min_max_vals() prevents users from creating stack pointers
5097	* with variable offsets, therefore no check is required here. Further,
5098	* just checking it here would be insufficient as speculative stack
5099	* writes could still lead to unsafe speculative behaviour.
5100	*/
5101	if (!var_off) {
5102	off += reg->var_off.value;
5103	err = check_stack_read_fixed_off(env, reg_state: state, off, size,
5104	dst_regno);
5105	} else {
5106	/ Variable offset stack reads need more conservative handling*
5107	* than fixed offset ones. Note that dst_regno >= 0 on this
5108	* branch.
5109	*/
5110	err = check_stack_read_var_off(env, ptr_regno, off, size,
5111	dst_regno);
5112	}
5113	return err;
5114	}
5115
5116
5117	/ check_stack_write dispatches to check_stack_write_fixed_off or*
5118	* check_stack_write_var_off.
5119	*
5120	* 'ptr_regno' is the register used as a pointer into the stack.
5121	* 'off' includes 'ptr_regno->off', but not its variable offset (if any).
5122	* 'value_regno' is the register whose value we're writing to the stack. It can
5123	* be -1, meaning that we're not writing from a register.
5124	*
5125	* The caller must ensure that the offset falls within the maximum stack size.
5126	*/
5127	static int check_stack_write(struct bpf_verifier_env *env,
5128	int ptr_regno, int off, int size,
5129	int value_regno, int insn_idx)
5130	{
5131	struct bpf_reg_state *reg = reg_state(env, regno: ptr_regno);
5132	struct bpf_func_state *state = func(env, reg);
5133	int err;
5134
5135	if (tnum_is_const(a: reg->var_off)) {
5136	off += reg->var_off.value;
5137	err = check_stack_write_fixed_off(env, state, off, size,
5138	value_regno, insn_idx);
5139	} else {
5140	/ Variable offset stack reads need more conservative handling*
5141	* than fixed offset ones.
5142	*/
5143	err = check_stack_write_var_off(env, state,
5144	ptr_regno, off, size,
5145	value_regno, insn_idx);
5146	}
5147	return err;
5148	}
5149
5150	static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
5151	int off, int size, enum bpf_access_type type)
5152	{
5153	struct bpf_reg_state *regs = cur_regs(env);
5154	struct bpf_map *map = regs[regno].map_ptr;
5155	u32 cap = bpf_map_flags_to_cap(map);
5156
5157	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
5158	verbose(private_data: env, fmt: "write into map forbidden, value_size=%d off=%d size=%d\n",
5159	map->value_size, off, size);
5160	return -EACCES;
5161	}
5162
5163	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
5164	verbose(private_data: env, fmt: "read from map forbidden, value_size=%d off=%d size=%d\n",
5165	map->value_size, off, size);
5166	return -EACCES;
5167	}
5168
5169	return `0`;
5170	}
5171
5172	/ check read/write into memory region (e.g., map value, ringbuf sample, etc) /
5173	static int __check_mem_access(struct bpf_verifier_env env, int* regno,
5174	int off, int size, u32 mem_size,
5175	bool zero_size_allowed)
5176	{
5177	bool size_ok = size > `0` \|\| (size == `0` && zero_size_allowed);
5178	struct bpf_reg_state *reg;
5179
5180	if (off >= `0` && size_ok && (u64)off + size <= mem_size)
5181	return `0`;
5182
5183	reg = &cur_regs(env)[regno];
5184	switch (reg->type) {
5185	case PTR_TO_MAP_KEY:
5186	verbose(private_data: env, fmt: "invalid access to map key, key_size=%d off=%d size=%d\n",
5187	mem_size, off, size);
5188	break;
5189	case PTR_TO_MAP_VALUE:
5190	verbose(private_data: env, fmt: "invalid access to map value, value_size=%d off=%d size=%d\n",
5191	mem_size, off, size);
5192	break;
5193	case PTR_TO_PACKET:
5194	case PTR_TO_PACKET_META:
5195	case PTR_TO_PACKET_END:
5196	verbose(private_data: env, fmt: "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
5197	off, size, regno, reg->id, off, mem_size);
5198	break;
5199	case PTR_TO_MEM:
5200	default:
5201	verbose(private_data: env, fmt: "invalid access to memory, mem_size=%u off=%d size=%d\n",
5202	mem_size, off, size);
5203	}
5204
5205	return -EACCES;
5206	}
5207
5208	/ check read/write into a memory region with possible variable offset /
5209	static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
5210	int off, int size, u32 mem_size,
5211	bool zero_size_allowed)
5212	{
5213	struct bpf_verifier_state *vstate = env->cur_state;
5214	struct bpf_func_state *state = vstate->frame[vstate->curframe];
5215	struct bpf_reg_state *reg = &state->regs[regno];
5216	int err;
5217
5218	/ We may have adjusted the register pointing to memory region, so we*
5219	* need to try adding each of min_value and max_value to off
5220	* to make sure our theoretical access will be safe.
5221	*
5222	* The minimum value is only important with signed
5223	* comparisons where we can't assume the floor of a
5224	* value is 0. If we are using signed variables for our
5225	* index'es we need to make sure that whatever we use
5226	* will have a set floor within our range.
5227	*/
5228	if (reg->smin_value < `0` &&
5229	(reg->smin_value == S64_MIN \|\|
5230	(off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) \|\|
5231	reg->smin_value + off < `0`)) {
5232	verbose(private_data: env, fmt: "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5233	regno);
5234	return -EACCES;
5235	}
5236	err = __check_mem_access(env, regno, off: reg->smin_value + off, size,
5237	mem_size, zero_size_allowed);
5238	if (err) {
5239	verbose(private_data: env, fmt: "R%d min value is outside of the allowed memory range\n",
5240	regno);
5241	return err;
5242	}
5243
5244	/ If we haven't set a max value then we need to bail since we can't be*
5245	* sure we won't do bad things.
5246	* If reg->umax_value + off could overflow, treat that as unbounded too.
5247	*/
5248	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
5249	verbose(private_data: env, fmt: "R%d unbounded memory access, make sure to bounds check any such access\n",
5250	regno);
5251	return -EACCES;
5252	}
5253	err = __check_mem_access(env, regno, off: reg->umax_value + off, size,
5254	mem_size, zero_size_allowed);
5255	if (err) {
5256	verbose(private_data: env, fmt: "R%d max value is outside of the allowed memory range\n",
5257	regno);
5258	return err;
5259	}
5260
5261	return `0`;
5262	}
5263
5264	static int __check_ptr_off_reg(struct bpf_verifier_env *env,
5265	const struct bpf_reg_state reg, int* regno,
5266	bool fixed_off_ok)
5267	{
5268	/ Access to this pointer-typed register or passing it to a helper*
5269	* is only allowed in its original, unmodified form.
5270	*/
5271
5272	if (reg->off < `0`) {
5273	verbose(private_data: env, fmt: "negative offset %s ptr R%d off=%d disallowed\n",
5274	reg_type_str(env, type: reg->type), regno, reg->off);
5275	return -EACCES;
5276	}
5277
5278	if (!fixed_off_ok && reg->off) {
5279	verbose(private_data: env, fmt: "dereference of modified %s ptr R%d off=%d disallowed\n",
5280	reg_type_str(env, type: reg->type), regno, reg->off);
5281	return -EACCES;
5282	}
5283
5284	if (!tnum_is_const(a: reg->var_off) \|\| reg->var_off.value) {
5285	char tn_buf[`48`];
5286
5287	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
5288	verbose(private_data: env, fmt: "variable %s access var_off=%s disallowed\n",
5289	reg_type_str(env, type: reg->type), tn_buf);
5290	return -EACCES;
5291	}
5292
5293	return `0`;
5294	}
5295
5296	int check_ptr_off_reg(struct bpf_verifier_env *env,
5297	const struct bpf_reg_state reg, int* regno)
5298	{
5299	return __check_ptr_off_reg(env, reg, regno, fixed_off_ok: false);
5300	}
5301
5302	static int map_kptr_match_type(struct bpf_verifier_env *env,
5303	struct btf_field *kptr_field,
5304	struct bpf_reg_state *reg, u32 regno)
5305	{
5306	const char *targ_name = btf_type_name(btf: kptr_field->kptr.btf, id: kptr_field->kptr.btf_id);
5307	int perm_flags;
5308	const char *reg_name = "";
5309
5310	if (btf_is_kernel(btf: reg->btf)) {
5311	perm_flags = PTR_MAYBE_NULL \| PTR_TRUSTED \| MEM_RCU;
5312
5313	/ Only unreferenced case accepts untrusted pointers /
5314	if (kptr_field->type == BPF_KPTR_UNREF)
5315	perm_flags \|= PTR_UNTRUSTED;
5316	} else {
5317	perm_flags = PTR_MAYBE_NULL \| MEM_ALLOC;
5318	if (kptr_field->type == BPF_KPTR_PERCPU)
5319	perm_flags \|= MEM_PERCPU;
5320	}
5321
5322	if (base_type(type: reg->type) != PTR_TO_BTF_ID \|\| (type_flag(type: reg->type) & ~perm_flags))
5323	goto bad_type;
5324
5325	/ We need to verify reg->type and reg->btf, before accessing reg->btf /
5326	reg_name = btf_type_name(btf: reg->btf, id: reg->btf_id);
5327
5328	/ For ref_ptr case, release function check should ensure we get one*
5329	* referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
5330	* normal store of unreferenced kptr, we must ensure var_off is zero.
5331	* Since ref_ptr cannot be accessed directly by BPF insns, checks for
5332	* reg->off and reg->ref_obj_id are not needed here.
5333	*/
5334	if (__check_ptr_off_reg(env, reg, regno, fixed_off_ok: true))
5335	return -EACCES;
5336
5337	/ A full type match is needed, as BTF can be vmlinux, module or prog BTF, and*
5338	* we also need to take into account the reg->off.
5339	*
5340	* We want to support cases like:
5341	*
5342	* struct foo {
5343	* struct bar br;
5344	* struct baz bz;
5345	* };
5346	*
5347	* struct foo *v;
5348	* v = func(); // PTR_TO_BTF_ID
5349	* val->foo = v; // reg->off is zero, btf and btf_id match type
5350	* val->bar = &v->br; // reg->off is still zero, but we need to retry with
5351	* // first member type of struct after comparison fails
5352	* val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
5353	* // to match type
5354	*
5355	* In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
5356	* is zero. We must also ensure that btf_struct_ids_match does not walk
5357	* the struct to match type against first member of struct, i.e. reject
5358	* second case from above. Hence, when type is BPF_KPTR_REF, we set
5359	* strict mode to true for type match.
5360	*/
5361	if (!btf_struct_ids_match(log: &env->log, btf: reg->btf, id: reg->btf_id, off: reg->off,
5362	need_btf: kptr_field->kptr.btf, need_type_id: kptr_field->kptr.btf_id,
5363	strict: kptr_field->type != BPF_KPTR_UNREF))
5364	goto bad_type;
5365	return `0`;
5366	bad_type:
5367	verbose(private_data: env, fmt: "invalid kptr access, R%d type=%s%s ", regno,
5368	reg_type_str(env, type: reg->type), reg_name);
5369	verbose(private_data: env, fmt: "expected=%s%s", reg_type_str(env, type: PTR_TO_BTF_ID), targ_name);
5370	if (kptr_field->type == BPF_KPTR_UNREF)
5371	verbose(private_data: env, fmt: " or %s%s\n", reg_type_str(env, type: PTR_TO_BTF_ID \| PTR_UNTRUSTED),
5372	targ_name);
5373	else
5374	verbose(private_data: env, fmt: "\n");
5375	return -EINVAL;
5376	}
5377
5378	/ The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()*
5379	* can dereference RCU protected pointers and result is PTR_TRUSTED.
5380	*/
5381	static bool in_rcu_cs(struct bpf_verifier_env *env)
5382	{
5383	return env->cur_state->active_rcu_lock \|\|
5384	env->cur_state->active_lock.ptr \|\|
5385	!env->prog->aux->sleepable;
5386	}
5387
5388	/ Once GCC supports btf_type_tag the following mechanism will be replaced with tag check /
5389	BTF_SET_START(rcu_protected_types)
5390	BTF_ID(struct, prog_test_ref_kfunc)
5391	BTF_ID(struct, cgroup)
5392	BTF_ID(struct, bpf_cpumask)
5393	BTF_ID(struct, task_struct)
5394	BTF_SET_END(rcu_protected_types)
5395
5396	static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
5397	{
5398	if (!btf_is_kernel(btf))
5399	return false;
5400	return btf_id_set_contains(set: &rcu_protected_types, id: btf_id);
5401	}
5402
5403	static bool rcu_safe_kptr(const struct btf_field *field)
5404	{
5405	const struct btf_field_kptr *kptr = &field->kptr;
5406
5407	return field->type == BPF_KPTR_PERCPU \|\|
5408	(field->type == BPF_KPTR_REF && rcu_protected_object(btf: kptr->btf, btf_id: kptr->btf_id));
5409	}
5410
5411	static u32 btf_ld_kptr_type(struct bpf_verifier_env env, struct* btf_field *kptr_field)
5412	{
5413	if (rcu_safe_kptr(field: kptr_field) && in_rcu_cs(env)) {
5414	if (kptr_field->type != BPF_KPTR_PERCPU)
5415	return PTR_MAYBE_NULL \| MEM_RCU;
5416	return PTR_MAYBE_NULL \| MEM_RCU \| MEM_PERCPU;
5417	}
5418	return PTR_MAYBE_NULL \| PTR_UNTRUSTED;
5419	}
5420
5421	static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
5422	int value_regno, int insn_idx,
5423	struct btf_field *kptr_field)
5424	{
5425	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
5426	int class = BPF_CLASS(insn->code);
5427	struct bpf_reg_state *val_reg;
5428
5429	/ Things we already checked for in check_map_access and caller:*
5430	* - Reject cases where variable offset may touch kptr
5431	* - size of access (must be BPF_DW)
5432	* - tnum_is_const(reg->var_off)
5433	* - kptr_field->offset == off + reg->var_off.value
5434	*/
5435	/ Only BPF_[LDX,STX,ST] \| BPF_MEM \| BPF_DW is supported /
5436	if (BPF_MODE(insn->code) != BPF_MEM) {
5437	verbose(private_data: env, fmt: "kptr in map can only be accessed using BPF_MEM instruction mode\n");
5438	return -EACCES;
5439	}
5440
5441	/ We only allow loading referenced kptr, since it will be marked as*
5442	* untrusted, similar to unreferenced kptr.
5443	*/
5444	if (class != BPF_LDX &&
5445	(kptr_field->type == BPF_KPTR_REF \|\| kptr_field->type == BPF_KPTR_PERCPU)) {
5446	verbose(private_data: env, fmt: "store to referenced kptr disallowed\n");
5447	return -EACCES;
5448	}
5449
5450	if (class == BPF_LDX) {
5451	val_reg = reg_state(env, regno: value_regno);
5452	/ We can simply mark the value_regno receiving the pointer*
5453	* value from map as PTR_TO_BTF_ID, with the correct type.
5454	*/
5455	mark_btf_ld_reg(env, regs: cur_regs(env), regno: value_regno, reg_type: PTR_TO_BTF_ID, btf: kptr_field->kptr.btf,
5456	btf_id: kptr_field->kptr.btf_id, flag: btf_ld_kptr_type(env, kptr_field));
5457	/ For mark_ptr_or_null_reg /
5458	val_reg->id = ++env->id_gen;
5459	} else if (class == BPF_STX) {
5460	val_reg = reg_state(env, regno: value_regno);
5461	if (!register_is_null(reg: val_reg) &&
5462	map_kptr_match_type(env, kptr_field, reg: val_reg, regno: value_regno))
5463	return -EACCES;
5464	} else if (class == BPF_ST) {
5465	if (insn->imm) {
5466	verbose(private_data: env, fmt: "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
5467	kptr_field->offset);
5468	return -EACCES;
5469	}
5470	} else {
5471	verbose(private_data: env, fmt: "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
5472	return -EACCES;
5473	}
5474	return `0`;
5475	}
5476
5477	/ check read/write into a map element with possible variable offset /
5478	static int check_map_access(struct bpf_verifier_env *env, u32 regno,
5479	int off, int size, bool zero_size_allowed,
5480	enum bpf_access_src src)
5481	{
5482	struct bpf_verifier_state *vstate = env->cur_state;
5483	struct bpf_func_state *state = vstate->frame[vstate->curframe];
5484	struct bpf_reg_state *reg = &state->regs[regno];
5485	struct bpf_map *map = reg->map_ptr;
5486	struct btf_record *rec;
5487	int err, i;
5488
5489	err = check_mem_region_access(env, regno, off, size, mem_size: map->value_size,
5490	zero_size_allowed);
5491	if (err)
5492	return err;
5493
5494	if (IS_ERR_OR_NULL(ptr: map->record))
5495	return `0`;
5496	rec = map->record;
5497	for (i = `0`; i < rec->cnt; i++) {
5498	struct btf_field *field = &rec->fields[i];
5499	u32 p = field->offset;
5500
5501	/ If any part of a field can be touched by load/store, reject*
5502	* this program. To check that [x1, x2) overlaps with [y1, y2),
5503	* it is sufficient to check x1 < y2 && y1 < x2.
5504	*/
5505	if (reg->smin_value + off < p + btf_field_type_size(type: field->type) &&
5506	p < reg->umax_value + off + size) {
5507	switch (field->type) {
5508	case BPF_KPTR_UNREF:
5509	case BPF_KPTR_REF:
5510	case BPF_KPTR_PERCPU:
5511	if (src != ACCESS_DIRECT) {
5512	verbose(private_data: env, fmt: "kptr cannot be accessed indirectly by helper\n");
5513	return -EACCES;
5514	}
5515	if (!tnum_is_const(a: reg->var_off)) {
5516	verbose(private_data: env, fmt: "kptr access cannot have variable offset\n");
5517	return -EACCES;
5518	}
5519	if (p != off + reg->var_off.value) {
5520	verbose(private_data: env, fmt: "kptr access misaligned expected=%u off=%llu\n",
5521	p, off + reg->var_off.value);
5522	return -EACCES;
5523	}
5524	if (size != bpf_size_to_bytes(BPF_DW)) {
5525	verbose(private_data: env, fmt: "kptr access size must be BPF_DW\n");
5526	return -EACCES;
5527	}
5528	break;
5529	default:
5530	verbose(private_data: env, fmt: "%s cannot be accessed directly by load/store\n",
5531	btf_field_type_name(type: field->type));
5532	return -EACCES;
5533	}
5534	}
5535	}
5536	return `0`;
5537	}
5538
5539	#define MAX_PACKET_OFF 0xffff
5540
5541	static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
5542	const struct bpf_call_arg_meta *meta,
5543	enum bpf_access_type t)
5544	{
5545	enum bpf_prog_type prog_type = resolve_prog_type(prog: env->prog);
5546
5547	switch (prog_type) {
5548	/ Program types only with direct read access go here! /
5549	case BPF_PROG_TYPE_LWT_IN:
5550	case BPF_PROG_TYPE_LWT_OUT:
5551	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
5552	case BPF_PROG_TYPE_SK_REUSEPORT:
5553	case BPF_PROG_TYPE_FLOW_DISSECTOR:
5554	case BPF_PROG_TYPE_CGROUP_SKB:
5555	if (t == BPF_WRITE)
5556	return false;
5557	fallthrough;
5558
5559	/ Program types with direct read + write access go here! /
5560	case BPF_PROG_TYPE_SCHED_CLS:
5561	case BPF_PROG_TYPE_SCHED_ACT:
5562	case BPF_PROG_TYPE_XDP:
5563	case BPF_PROG_TYPE_LWT_XMIT:
5564	case BPF_PROG_TYPE_SK_SKB:
5565	case BPF_PROG_TYPE_SK_MSG:
5566	if (meta)
5567	return meta->pkt_access;
5568
5569	env->seen_direct_write = true;
5570	return true;
5571
5572	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
5573	if (t == BPF_WRITE)
5574	env->seen_direct_write = true;
5575
5576	return true;
5577
5578	default:
5579	return false;
5580	}
5581	}
5582
5583	static int check_packet_access(struct bpf_verifier_env env, u32 regno, int* off,
5584	int size, bool zero_size_allowed)
5585	{
5586	struct bpf_reg_state *regs = cur_regs(env);
5587	struct bpf_reg_state *reg = &regs[regno];
5588	int err;
5589
5590	/ We may have added a variable offset to the packet pointer; but any*
5591	* reg->range we have comes after that. We are only checking the fixed
5592	* offset.
5593	*/
5594
5595	/ We don't allow negative numbers, because we aren't tracking enough*
5596	* detail to prove they're safe.
5597	*/
5598	if (reg->smin_value < `0`) {
5599	verbose(private_data: env, fmt: "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5600	regno);
5601	return -EACCES;
5602	}
5603
5604	err = reg->range < `0` ? -EINVAL :
5605	__check_mem_access(env, regno, off, size, mem_size: reg->range,
5606	zero_size_allowed);
5607	if (err) {
5608	verbose(private_data: env, fmt: "R%d offset is outside of the packet\n", regno);
5609	return err;
5610	}
5611
5612	/ __check_mem_access has made sure "off + size - 1" is within u16.*
5613	* reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
5614	* otherwise find_good_pkt_pointers would have refused to set range info
5615	* that __check_mem_access would have rejected this pkt access.
5616	* Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
5617	*/
5618	env->prog->aux->max_pkt_offset =
5619	max_t(u32, env->prog->aux->max_pkt_offset,
5620	off + reg->umax_value + size - `1`);
5621
5622	return err;
5623	}
5624
5625	/ check access to 'struct bpf_context' fields. Supports fixed offsets only /
5626	static int check_ctx_access(struct bpf_verifier_env env, int* insn_idx, int off, int size,
5627	enum bpf_access_type t, enum bpf_reg_type *reg_type,
5628	struct btf *btf, u32 btf_id)
5629	{
5630	struct bpf_insn_access_aux info = {
5631	.reg_type = *reg_type,
5632	.log = &env->log,
5633	};
5634
5635	if (env->ops->is_valid_access &&
5636	env->ops->is_valid_access(off, size, t, env->prog, &info)) {
5637	/ A non zero info.ctx_field_size indicates that this field is a*
5638	* candidate for later verifier transformation to load the whole
5639	* field and then apply a mask when accessed with a narrower
5640	* access than actual ctx access size. A zero info.ctx_field_size
5641	* will only allow for whole field access and rejects any other
5642	* type of narrower access.
5643	*/
5644	*reg_type = info.reg_type;
5645
5646	if (base_type(type: *reg_type) == PTR_TO_BTF_ID) {
5647	*btf = info.btf;
5648	*btf_id = info.btf_id;
5649	} else {
5650	env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
5651	}
5652	/ remember the offset of last byte accessed in ctx /
5653	if (env->prog->aux->max_ctx_offset < off + size)
5654	env->prog->aux->max_ctx_offset = off + size;
5655	return `0`;
5656	}
5657
5658	verbose(private_data: env, fmt: "invalid bpf_context access off=%d size=%d\n", off, size);
5659	return -EACCES;
5660	}
5661
5662	static int check_flow_keys_access(struct bpf_verifier_env env, int* off,
5663	int size)
5664	{
5665	if (size < `0` \|\| off < `0` \|\|
5666	(u64)off + size > sizeof(struct bpf_flow_keys)) {
5667	verbose(private_data: env, fmt: "invalid access to flow keys off=%d size=%d\n",
5668	off, size);
5669	return -EACCES;
5670	}
5671	return `0`;
5672	}
5673
5674	static int check_sock_access(struct bpf_verifier_env env, int* insn_idx,
5675	u32 regno, int off, int size,
5676	enum bpf_access_type t)
5677	{
5678	struct bpf_reg_state *regs = cur_regs(env);
5679	struct bpf_reg_state *reg = &regs[regno];
5680	struct bpf_insn_access_aux info = {};
5681	bool valid;
5682
5683	if (reg->smin_value < `0`) {
5684	verbose(private_data: env, fmt: "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5685	regno);
5686	return -EACCES;
5687	}
5688
5689	switch (reg->type) {
5690	case PTR_TO_SOCK_COMMON:
5691	valid = bpf_sock_common_is_valid_access(off, size, type: t, info: &info);
5692	break;
5693	case PTR_TO_SOCKET:
5694	valid = bpf_sock_is_valid_access(off, size, type: t, info: &info);
5695	break;
5696	case PTR_TO_TCP_SOCK:
5697	valid = bpf_tcp_sock_is_valid_access(off, size, type: t, info: &info);
5698	break;
5699	case PTR_TO_XDP_SOCK:
5700	valid = bpf_xdp_sock_is_valid_access(off, size, type: t, info: &info);
5701	break;
5702	default:
5703	valid = false;
5704	}
5705
5706
5707	if (valid) {
5708	env->insn_aux_data[insn_idx].ctx_field_size =
5709	info.ctx_field_size;
5710	return `0`;
5711	}
5712
5713	verbose(private_data: env, fmt: "R%d invalid %s access off=%d size=%d\n",
5714	regno, reg_type_str(env, type: reg->type), off, size);
5715
5716	return -EACCES;
5717	}
5718
5719	static bool is_pointer_value(struct bpf_verifier_env env, int* regno)
5720	{
5721	return __is_pointer_value(allow_ptr_leaks: env->allow_ptr_leaks, reg: reg_state(env, regno));
5722	}
5723
5724	static bool is_ctx_reg(struct bpf_verifier_env env, int* regno)
5725	{
5726	const struct bpf_reg_state *reg = reg_state(env, regno);
5727
5728	return reg->type == PTR_TO_CTX;
5729	}
5730
5731	static bool is_sk_reg(struct bpf_verifier_env env, int* regno)
5732	{
5733	const struct bpf_reg_state *reg = reg_state(env, regno);
5734
5735	return type_is_sk_pointer(type: reg->type);
5736	}
5737
5738	static bool is_pkt_reg(struct bpf_verifier_env env, int* regno)
5739	{
5740	const struct bpf_reg_state *reg = reg_state(env, regno);
5741
5742	return type_is_pkt_pointer(type: reg->type);
5743	}
5744
5745	static bool is_flow_key_reg(struct bpf_verifier_env env, int* regno)
5746	{
5747	const struct bpf_reg_state *reg = reg_state(env, regno);
5748
5749	/ Separate to is_ctx_reg() since we still want to allow BPF_ST here. /
5750	return reg->type == PTR_TO_FLOW_KEYS;
5751	}
5752
5753	static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
5754	#ifdef CONFIG_NET
5755	[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
5756	[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
5757	[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
5758	#endif
5759	[CONST_PTR_TO_MAP] = btf_bpf_map_id,
5760	};
5761
5762	static bool is_trusted_reg(const struct bpf_reg_state *reg)
5763	{
5764	/ A referenced register is always trusted. /
5765	if (reg->ref_obj_id)
5766	return true;
5767
5768	/ Types listed in the reg2btf_ids are always trusted /
5769	if (reg2btf_ids[base_type(type: reg->type)])
5770	return true;
5771
5772	/ If a register is not referenced, it is trusted if it has the*
5773	* MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
5774	* other type modifiers may be safe, but we elect to take an opt-in
5775	* approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
5776	* not.
5777	*
5778	* Eventually, we should make PTR_TRUSTED the single source of truth
5779	* for whether a register is trusted.
5780	*/
5781	return type_flag(type: reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
5782	!bpf_type_has_unsafe_modifiers(type: reg->type);
5783	}
5784
5785	static bool is_rcu_reg(const struct bpf_reg_state *reg)
5786	{
5787	return reg->type & MEM_RCU;
5788	}
5789
5790	static void clear_trusted_flags(enum bpf_type_flag *flag)
5791	{
5792	*flag &= ~(BPF_REG_TRUSTED_MODIFIERS \| MEM_RCU);
5793	}
5794
5795	static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
5796	const struct bpf_reg_state *reg,
5797	int off, int size, bool strict)
5798	{
5799	struct tnum reg_off;
5800	int ip_align;
5801
5802	/ Byte size accesses are always allowed. /
5803	if (!strict \|\| size == `1`)
5804	return `0`;
5805
5806	/ For platforms that do not have a Kconfig enabling*
5807	* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
5808	* NET_IP_ALIGN is universally set to '2'. And on platforms
5809	* that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
5810	* to this code only in strict mode where we want to emulate
5811	* the NET_IP_ALIGN==2 checking. Therefore use an
5812	* unconditional IP align value of '2'.
5813	*/
5814	ip_align = `2`;
5815
5816	reg_off = tnum_add(a: reg->var_off, b: tnum_const(value: ip_align + reg->off + off));
5817	if (!tnum_is_aligned(a: reg_off, size)) {
5818	char tn_buf[`48`];
5819
5820	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
5821	verbose(private_data: env,
5822	fmt: "misaligned packet access off %d+%s+%d+%d size %d\n",
5823	ip_align, tn_buf, reg->off, off, size);
5824	return -EACCES;
5825	}
5826
5827	return `0`;
5828	}
5829
5830	static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
5831	const struct bpf_reg_state *reg,
5832	const char *pointer_desc,
5833	int off, int size, bool strict)
5834	{
5835	struct tnum reg_off;
5836
5837	/ Byte size accesses are always allowed. /
5838	if (!strict \|\| size == `1`)
5839	return `0`;
5840
5841	reg_off = tnum_add(a: reg->var_off, b: tnum_const(value: reg->off + off));
5842	if (!tnum_is_aligned(a: reg_off, size)) {
5843	char tn_buf[`48`];
5844
5845	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
5846	verbose(private_data: env, fmt: "misaligned %saccess off %s+%d+%d size %d\n",
5847	pointer_desc, tn_buf, reg->off, off, size);
5848	return -EACCES;
5849	}
5850
5851	return `0`;
5852	}
5853
5854	static int check_ptr_alignment(struct bpf_verifier_env *env,
5855	const struct bpf_reg_state reg, int* off,
5856	int size, bool strict_alignment_once)
5857	{
5858	bool strict = env->strict_alignment \|\| strict_alignment_once;
5859	const char *pointer_desc = "";
5860
5861	switch (reg->type) {
5862	case PTR_TO_PACKET:
5863	case PTR_TO_PACKET_META:
5864	/ Special case, because of NET_IP_ALIGN. Given metadata sits*
5865	* right in front, treat it the very same way.
5866	*/
5867	return check_pkt_ptr_alignment(env, reg, off, size, strict);
5868	case PTR_TO_FLOW_KEYS:
5869	pointer_desc = "flow keys ";
5870	break;
5871	case PTR_TO_MAP_KEY:
5872	pointer_desc = "key ";
5873	break;
5874	case PTR_TO_MAP_VALUE:
5875	pointer_desc = "value ";
5876	break;
5877	case PTR_TO_CTX:
5878	pointer_desc = "context ";
5879	break;
5880	case PTR_TO_STACK:
5881	pointer_desc = "stack ";
5882	/ The stack spill tracking logic in check_stack_write_fixed_off()*
5883	* and check_stack_read_fixed_off() relies on stack accesses being
5884	* aligned.
5885	*/
5886	strict = true;
5887	break;
5888	case PTR_TO_SOCKET:
5889	pointer_desc = "sock ";
5890	break;
5891	case PTR_TO_SOCK_COMMON:
5892	pointer_desc = "sock_common ";
5893	break;
5894	case PTR_TO_TCP_SOCK:
5895	pointer_desc = "tcp_sock ";
5896	break;
5897	case PTR_TO_XDP_SOCK:
5898	pointer_desc = "xdp_sock ";
5899	break;
5900	default:
5901	break;
5902	}
5903	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
5904	strict);
5905	}
5906
5907	static int update_stack_depth(struct bpf_verifier_env *env,
5908	const struct bpf_func_state *func,
5909	int off)
5910	{
5911	u16 stack = env->subprog_info[func->subprogno].stack_depth;
5912
5913	if (stack >= -off)
5914	return `0`;
5915
5916	/ update known max for given subprogram /
5917	env->subprog_info[func->subprogno].stack_depth = -off;
5918	return `0`;
5919	}
5920
5921	/ starting from main bpf function walk all instructions of the function*
5922	* and recursively walk all callees that given function can call.
5923	* Ignore jump and exit insns.
5924	* Since recursion is prevented by check_cfg() this algorithm
5925	* only needs a local stack of MAX_CALL_FRAMES to remember callsites
5926	*/
5927	static int check_max_stack_depth_subprog(struct bpf_verifier_env env, int* idx)
5928	{
5929	struct bpf_subprog_info *subprog = env->subprog_info;
5930	struct bpf_insn *insn = env->prog->insnsi;
5931	int depth = `0`, frame = `0`, i, subprog_end;
5932	bool tail_call_reachable = false;
5933	int ret_insn[MAX_CALL_FRAMES];
5934	int ret_prog[MAX_CALL_FRAMES];
5935	int j;
5936
5937	i = subprog[idx].start;
5938	process_func:
5939	/ protect against potential stack overflow that might happen when*
5940	* bpf2bpf calls get combined with tailcalls. Limit the caller's stack
5941	* depth for such case down to 256 so that the worst case scenario
5942	* would result in 8k stack size (32 which is tailcall limit * 256 =
5943	* 8k).
5944	*
5945	* To get the idea what might happen, see an example:
5946	* func1 -> sub rsp, 128
5947	* subfunc1 -> sub rsp, 256
5948	* tailcall1 -> add rsp, 256
5949	* func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
5950	* subfunc2 -> sub rsp, 64
5951	* subfunc22 -> sub rsp, 128
5952	* tailcall2 -> add rsp, 128
5953	* func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
5954	*
5955	* tailcall will unwind the current stack frame but it will not get rid
5956	* of caller's stack as shown on the example above.
5957	*/
5958	if (idx && subprog[idx].has_tail_call && depth >= `256`) {
5959	verbose(private_data: env,
5960	fmt: "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
5961	depth);
5962	return -EACCES;
5963	}
5964	/ round up to 32-bytes, since this is granularity*
5965	* of interpreter stack size
5966	*/
5967	depth += round_up(max_t(u32, subprog[idx].stack_depth, `1`), `32`);
5968	if (depth > MAX_BPF_STACK) {
5969	verbose(private_data: env, fmt: "combined stack size of %d calls is %d. Too large\n",
5970	frame + `1`, depth);
5971	return -EACCES;
5972	}
5973	continue_func:
5974	subprog_end = subprog[idx + `1`].start;
5975	for (; i < subprog_end; i++) {
5976	int next_insn, sidx;
5977
5978	if (bpf_pseudo_kfunc_call(insn: insn + i) && !insn[i].off) {
5979	bool err = false;
5980
5981	if (!is_bpf_throw_kfunc(insn: insn + i))
5982	continue;
5983	if (subprog[idx].is_cb)
5984	err = true;
5985	for (int c = `0`; c < frame && !err; c++) {
5986	if (subprog[ret_prog[c]].is_cb) {
5987	err = true;
5988	break;
5989	}
5990	}
5991	if (!err)
5992	continue;
5993	verbose(private_data: env,
5994	fmt: "bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n",
5995	i, idx);
5996	return -EINVAL;
5997	}
5998
5999	if (!bpf_pseudo_call(insn: insn + i) && !bpf_pseudo_func(insn: insn + i))
6000	continue;
6001	/ remember insn and function to return to /
6002	ret_insn[frame] = i + `1`;
6003	ret_prog[frame] = idx;
6004
6005	/ find the callee /
6006	next_insn = i + insn[i].imm + `1`;
6007	sidx = find_subprog(env, off: next_insn);
6008	if (sidx < `0`) {
6009	WARN_ONCE(`1`, "verifier bug. No program starts at insn %d\n",
6010	next_insn);
6011	return -EFAULT;
6012	}
6013	if (subprog[sidx].is_async_cb) {
6014	if (subprog[sidx].has_tail_call) {
6015	verbose(private_data: env, fmt: "verifier bug. subprog has tail_call and async cb\n");
6016	return -EFAULT;
6017	}
6018	/ async callbacks don't increase bpf prog stack size unless called directly /
6019	if (!bpf_pseudo_call(insn: insn + i))
6020	continue;
6021	if (subprog[sidx].is_exception_cb) {
6022	verbose(private_data: env, fmt: "insn %d cannot call exception cb directly\n", i);
6023	return -EINVAL;
6024	}
6025	}
6026	i = next_insn;
6027	idx = sidx;
6028
6029	if (subprog[idx].has_tail_call)
6030	tail_call_reachable = true;
6031
6032	frame++;
6033	if (frame >= MAX_CALL_FRAMES) {
6034	verbose(private_data: env, fmt: "the call stack of %d frames is too deep !\n",
6035	frame);
6036	return -E2BIG;
6037	}
6038	goto process_func;
6039	}
6040	/ if tail call got detected across bpf2bpf calls then mark each of the*
6041	* currently present subprog frames as tail call reachable subprogs;
6042	* this info will be utilized by JIT so that we will be preserving the
6043	* tail call counter throughout bpf2bpf calls combined with tailcalls
6044	*/
6045	if (tail_call_reachable)
6046	for (j = `0`; j < frame; j++) {
6047	if (subprog[ret_prog[j]].is_exception_cb) {
6048	verbose(private_data: env, fmt: "cannot tail call within exception cb\n");
6049	return -EINVAL;
6050	}
6051	subprog[ret_prog[j]].tail_call_reachable = true;
6052	}
6053	if (subprog[`0`].tail_call_reachable)
6054	env->prog->aux->tail_call_reachable = true;
6055
6056	/ end of for() loop means the last insn of the 'subprog'*
6057	* was reached. Doesn't matter whether it was JA or EXIT
6058	*/
6059	if (frame == `0`)
6060	return `0`;
6061	depth -= round_up(max_t(u32, subprog[idx].stack_depth, `1`), `32`);
6062	frame--;
6063	i = ret_insn[frame];
6064	idx = ret_prog[frame];
6065	goto continue_func;
6066	}
6067
6068	static int check_max_stack_depth(struct bpf_verifier_env *env)
6069	{
6070	struct bpf_subprog_info *si = env->subprog_info;
6071	int ret;
6072
6073	for (int i = `0`; i < env->subprog_cnt; i++) {
6074	if (!i \|\| si[i].is_async_cb) {
6075	ret = check_max_stack_depth_subprog(env, idx: i);
6076	if (ret < `0`)
6077	return ret;
6078	}
6079	continue;
6080	}
6081	return `0`;
6082	}
6083
6084	#ifndef CONFIG_BPF_JIT_ALWAYS_ON
6085	static int get_callee_stack_depth(struct bpf_verifier_env *env,
6086	const struct bpf_insn insn, int* idx)
6087	{
6088	int start = idx + insn->imm + `1`, subprog;
6089
6090	subprog = find_subprog(env, start);
6091	if (subprog < `0`) {
6092	WARN_ONCE(`1`, "verifier bug. No program starts at insn %d\n",
6093	start);
6094	return -EFAULT;
6095	}
6096	return env->subprog_info[subprog].stack_depth;
6097	}
6098	#endif
6099
6100	static int __check_buffer_access(struct bpf_verifier_env *env,
6101	const char *buf_info,
6102	const struct bpf_reg_state *reg,
6103	int regno, int off, int size)
6104	{
6105	if (off < `0`) {
6106	verbose(private_data: env,
6107	fmt: "R%d invalid %s buffer access: off=%d, size=%d\n",
6108	regno, buf_info, off, size);
6109	return -EACCES;
6110	}
6111	if (!tnum_is_const(a: reg->var_off) \|\| reg->var_off.value) {
6112	char tn_buf[`48`];
6113
6114	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
6115	verbose(private_data: env,
6116	fmt: "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
6117	regno, off, tn_buf);
6118	return -EACCES;
6119	}
6120
6121	return `0`;
6122	}
6123
6124	static int check_tp_buffer_access(struct bpf_verifier_env *env,
6125	const struct bpf_reg_state *reg,
6126	int regno, int off, int size)
6127	{
6128	int err;
6129
6130	err = __check_buffer_access(env, buf_info: "tracepoint", reg, regno, off, size);
6131	if (err)
6132	return err;
6133
6134	if (off + size > env->prog->aux->max_tp_access)
6135	env->prog->aux->max_tp_access = off + size;
6136
6137	return `0`;
6138	}
6139
6140	static int check_buffer_access(struct bpf_verifier_env *env,
6141	const struct bpf_reg_state *reg,
6142	int regno, int off, int size,
6143	bool zero_size_allowed,
6144	u32 *max_access)
6145	{
6146	const char *buf_info = type_is_rdonly_mem(type: reg->type) ? "rdonly" : "rdwr";
6147	int err;
6148
6149	err = __check_buffer_access(env, buf_info, reg, regno, off, size);
6150	if (err)
6151	return err;
6152
6153	if (off + size > *max_access)
6154	*max_access = off + size;
6155
6156	return `0`;
6157	}
6158
6159	/ BPF architecture zero extends alu32 ops into 64-bit registesr /
6160	static void zext_32_to_64(struct bpf_reg_state *reg)
6161	{
6162	reg->var_off = tnum_subreg(a: reg->var_off);
6163	__reg_assign_32_into_64(reg);
6164	}
6165
6166	/ truncate register to smaller size (in bytes)*
6167	* must be called with size < BPF_REG_SIZE
6168	*/
6169	static void coerce_reg_to_size(struct bpf_reg_state reg, int* size)
6170	{
6171	u64 mask;
6172
6173	/ clear high bits in bit representation /
6174	reg->var_off = tnum_cast(a: reg->var_off, size);
6175
6176	/ fix arithmetic bounds /
6177	mask = ((u64)`1` << (size * `8`)) - `1`;
6178	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
6179	reg->umin_value &= mask;
6180	reg->umax_value &= mask;
6181	} else {
6182	reg->umin_value = `0`;
6183	reg->umax_value = mask;
6184	}
6185	reg->smin_value = reg->umin_value;
6186	reg->smax_value = reg->umax_value;
6187
6188	/ If size is smaller than 32bit register the 32bit register*
6189	* values are also truncated so we push 64-bit bounds into
6190	* 32-bit bounds. Above were truncated < 32-bits already.
6191	*/
6192	if (size >= `4`)
6193	return;
6194	__reg_combine_64_into_32(reg);
6195	}
6196
6197	static void set_sext64_default_val(struct bpf_reg_state reg, int* size)
6198	{
6199	if (size == `1`) {
6200	reg->smin_value = reg->s32_min_value = S8_MIN;
6201	reg->smax_value = reg->s32_max_value = S8_MAX;
6202	} else if (size == `2`) {
6203	reg->smin_value = reg->s32_min_value = S16_MIN;
6204	reg->smax_value = reg->s32_max_value = S16_MAX;
6205	} else {
6206	/ size == 4 /
6207	reg->smin_value = reg->s32_min_value = S32_MIN;
6208	reg->smax_value = reg->s32_max_value = S32_MAX;
6209	}
6210	reg->umin_value = reg->u32_min_value = `0`;
6211	reg->umax_value = U64_MAX;
6212	reg->u32_max_value = U32_MAX;
6213	reg->var_off = tnum_unknown;
6214	}
6215
6216	static void coerce_reg_to_size_sx(struct bpf_reg_state reg, int* size)
6217	{
6218	s64 init_s64_max, init_s64_min, s64_max, s64_min, u64_cval;
6219	u64 top_smax_value, top_smin_value;
6220	u64 num_bits = size * `8`;
6221
6222	if (tnum_is_const(a: reg->var_off)) {
6223	u64_cval = reg->var_off.value;
6224	if (size == `1`)
6225	reg->var_off = tnum_const(value: (s8)u64_cval);
6226	else if (size == `2`)
6227	reg->var_off = tnum_const(value: (s16)u64_cval);
6228	else
6229	/ size == 4 /
6230	reg->var_off = tnum_const(value: (s32)u64_cval);
6231
6232	u64_cval = reg->var_off.value;
6233	reg->smax_value = reg->smin_value = u64_cval;
6234	reg->umax_value = reg->umin_value = u64_cval;
6235	reg->s32_max_value = reg->s32_min_value = u64_cval;
6236	reg->u32_max_value = reg->u32_min_value = u64_cval;
6237	return;
6238	}
6239
6240	top_smax_value = ((u64)reg->smax_value >> num_bits) << num_bits;
6241	top_smin_value = ((u64)reg->smin_value >> num_bits) << num_bits;
6242
6243	if (top_smax_value != top_smin_value)
6244	goto out;
6245
6246	/ find the s64_min and s64_min after sign extension /
6247	if (size == `1`) {
6248	init_s64_max = (s8)reg->smax_value;
6249	init_s64_min = (s8)reg->smin_value;
6250	} else if (size == `2`) {
6251	init_s64_max = (s16)reg->smax_value;
6252	init_s64_min = (s16)reg->smin_value;
6253	} else {
6254	init_s64_max = (s32)reg->smax_value;
6255	init_s64_min = (s32)reg->smin_value;
6256	}
6257
6258	s64_max = max(init_s64_max, init_s64_min);
6259	s64_min = min(init_s64_max, init_s64_min);
6260
6261	/ both of s64_max/s64_min positive or negative /
6262	if ((s64_max >= `0`) == (s64_min >= `0`)) {
6263	reg->smin_value = reg->s32_min_value = s64_min;
6264	reg->smax_value = reg->s32_max_value = s64_max;
6265	reg->umin_value = reg->u32_min_value = s64_min;
6266	reg->umax_value = reg->u32_max_value = s64_max;
6267	reg->var_off = tnum_range(min: s64_min, max: s64_max);
6268	return;
6269	}
6270
6271	out:
6272	set_sext64_default_val(reg, size);
6273	}
6274
6275	static void set_sext32_default_val(struct bpf_reg_state reg, int* size)
6276	{
6277	if (size == `1`) {
6278	reg->s32_min_value = S8_MIN;
6279	reg->s32_max_value = S8_MAX;
6280	} else {
6281	/ size == 2 /
6282	reg->s32_min_value = S16_MIN;
6283	reg->s32_max_value = S16_MAX;
6284	}
6285	reg->u32_min_value = `0`;
6286	reg->u32_max_value = U32_MAX;
6287	}
6288
6289	static void coerce_subreg_to_size_sx(struct bpf_reg_state reg, int* size)
6290	{
6291	s32 init_s32_max, init_s32_min, s32_max, s32_min, u32_val;
6292	u32 top_smax_value, top_smin_value;
6293	u32 num_bits = size * `8`;
6294
6295	if (tnum_is_const(a: reg->var_off)) {
6296	u32_val = reg->var_off.value;
6297	if (size == `1`)
6298	reg->var_off = tnum_const(value: (s8)u32_val);
6299	else
6300	reg->var_off = tnum_const(value: (s16)u32_val);
6301
6302	u32_val = reg->var_off.value;
6303	reg->s32_min_value = reg->s32_max_value = u32_val;
6304	reg->u32_min_value = reg->u32_max_value = u32_val;
6305	return;
6306	}
6307
6308	top_smax_value = ((u32)reg->s32_max_value >> num_bits) << num_bits;
6309	top_smin_value = ((u32)reg->s32_min_value >> num_bits) << num_bits;
6310
6311	if (top_smax_value != top_smin_value)
6312	goto out;
6313
6314	/ find the s32_min and s32_min after sign extension /
6315	if (size == `1`) {
6316	init_s32_max = (s8)reg->s32_max_value;
6317	init_s32_min = (s8)reg->s32_min_value;
6318	} else {
6319	/ size == 2 /
6320	init_s32_max = (s16)reg->s32_max_value;
6321	init_s32_min = (s16)reg->s32_min_value;
6322	}
6323	s32_max = max(init_s32_max, init_s32_min);
6324	s32_min = min(init_s32_max, init_s32_min);
6325
6326	if ((s32_min >= `0`) == (s32_max >= `0`)) {
6327	reg->s32_min_value = s32_min;
6328	reg->s32_max_value = s32_max;
6329	reg->u32_min_value = (u32)s32_min;
6330	reg->u32_max_value = (u32)s32_max;
6331	return;
6332	}
6333
6334	out:
6335	set_sext32_default_val(reg, size);
6336	}
6337
6338	static bool bpf_map_is_rdonly(const struct bpf_map *map)
6339	{
6340	/ A map is considered read-only if the following condition are true:*
6341	*
6342	* 1) BPF program side cannot change any of the map content. The
6343	* BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
6344	* and was set at map creation time.
6345	* 2) The map value(s) have been initialized from user space by a
6346	* loader and then "frozen", such that no new map update/delete
6347	* operations from syscall side are possible for the rest of
6348	* the map's lifetime from that point onwards.
6349	* 3) Any parallel/pending map update/delete operations from syscall
6350	* side have been completed. Only after that point, it's safe to
6351	* assume that map value(s) are immutable.
6352	*/
6353	return (map->map_flags & BPF_F_RDONLY_PROG) &&
6354	READ_ONCE(map->frozen) &&
6355	!bpf_map_write_active(map);
6356	}
6357
6358	static int bpf_map_direct_read(struct bpf_map map, int* off, int size, u64 *val,
6359	bool is_ldsx)
6360	{
6361	void *ptr;
6362	u64 addr;
6363	int err;
6364
6365	err = map->ops->map_direct_value_addr(map, &addr, off);
6366	if (err)
6367	return err;
6368	ptr = (void )(long*)addr + off;
6369
6370	switch (size) {
6371	case sizeof(u8):
6372	val = is_ldsx ? (s64)(s8 )ptr : (u64)(u8 *)ptr;
6373	break;
6374	case sizeof(u16):
6375	val = is_ldsx ? (s64)(s16 )ptr : (u64)(u16 *)ptr;
6376	break;
6377	case sizeof(u32):
6378	val = is_ldsx ? (s64)(s32 )ptr : (u64)(u32 *)ptr;
6379	break;
6380	case sizeof(u64):
6381	val = (u64 *)ptr;
6382	break;
6383	default:
6384	return -EINVAL;
6385	}
6386	return `0`;
6387	}
6388
6389	#define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu)
6390	#define BTF_TYPE_SAFE_RCU_OR_NULL(__type) __PASTE(__type, __safe_rcu_or_null)
6391	#define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted)
6392
6393	/*
6394	* Allow list few fields as RCU trusted or full trusted.
6395	* This logic doesn't allow mix tagging and will be removed once GCC supports
6396	* btf_type_tag.
6397	*/
6398
6399	/ RCU trusted: these fields are trusted in RCU CS and never NULL /
6400	BTF_TYPE_SAFE_RCU(struct task_struct) {
6401	const cpumask_t *cpus_ptr;
6402	struct css_set __rcu *cgroups;
6403	struct task_struct __rcu *real_parent;
6404	struct task_struct *group_leader;
6405	};
6406
6407	BTF_TYPE_SAFE_RCU(struct cgroup) {
6408	/ cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c /
6409	struct kernfs_node *kn;
6410	};
6411
6412	BTF_TYPE_SAFE_RCU(struct css_set) {
6413	struct cgroup *dfl_cgrp;
6414	};
6415
6416	/ RCU trusted: these fields are trusted in RCU CS and can be NULL /
6417	BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) {
6418	struct file __rcu *exe_file;
6419	};
6420
6421	/ skb->sk, req->sk are not RCU protected, but we mark them as such*
6422	* because bpf prog accessible sockets are SOCK_RCU_FREE.
6423	*/
6424	BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) {
6425	struct sock *sk;
6426	};
6427
6428	BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) {
6429	struct sock *sk;
6430	};
6431
6432	/ full trusted: these fields are trusted even outside of RCU CS and never NULL /
6433	BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
6434	struct seq_file *seq;
6435	};
6436
6437	BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
6438	struct bpf_iter_meta *meta;
6439	struct task_struct *task;
6440	};
6441
6442	BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
6443	struct file *file;
6444	};
6445
6446	BTF_TYPE_SAFE_TRUSTED(struct file) {
6447	struct inode *f_inode;
6448	};
6449
6450	BTF_TYPE_SAFE_TRUSTED(struct dentry) {
6451	/ no negative dentry-s in places where bpf can see it /
6452	struct inode *d_inode;
6453	};
6454
6455	BTF_TYPE_SAFE_TRUSTED(struct socket) {
6456	struct sock *sk;
6457	};
6458
6459	static bool type_is_rcu(struct bpf_verifier_env *env,
6460	struct bpf_reg_state *reg,
6461	const char *field_name, u32 btf_id)
6462	{
6463	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
6464	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup));
6465	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
6466
6467	return btf_nested_type_is_trusted(log: &env->log, reg, field_name, btf_id, suffix: "__safe_rcu");
6468	}
6469
6470	static bool type_is_rcu_or_null(struct bpf_verifier_env *env,
6471	struct bpf_reg_state *reg,
6472	const char *field_name, u32 btf_id)
6473	{
6474	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct));
6475	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff));
6476	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock));
6477
6478	return btf_nested_type_is_trusted(log: &env->log, reg, field_name, btf_id, suffix: "__safe_rcu_or_null");
6479	}
6480
6481	static bool type_is_trusted(struct bpf_verifier_env *env,
6482	struct bpf_reg_state *reg,
6483	const char *field_name, u32 btf_id)
6484	{
6485	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
6486	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
6487	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
6488	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
6489	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
6490	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket));
6491
6492	return btf_nested_type_is_trusted(log: &env->log, reg, field_name, btf_id, suffix: "__safe_trusted");
6493	}
6494
6495	static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
6496	struct bpf_reg_state *regs,
6497	int regno, int off, int size,
6498	enum bpf_access_type atype,
6499	int value_regno)
6500	{
6501	struct bpf_reg_state *reg = regs + regno;
6502	const struct btf_type *t = btf_type_by_id(btf: reg->btf, type_id: reg->btf_id);
6503	const char *tname = btf_name_by_offset(btf: reg->btf, offset: t->name_off);
6504	const char *field_name = NULL;
6505	enum bpf_type_flag flag = `0`;
6506	u32 btf_id = `0`;
6507	int ret;
6508
6509	if (!env->allow_ptr_leaks) {
6510	verbose(private_data: env,
6511	fmt: "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
6512	tname);
6513	return -EPERM;
6514	}
6515	if (!env->prog->gpl_compatible && btf_is_kernel(btf: reg->btf)) {
6516	verbose(private_data: env,
6517	fmt: "Cannot access kernel 'struct %s' from non-GPL compatible program\n",
6518	tname);
6519	return -EINVAL;
6520	}
6521	if (off < `0`) {
6522	verbose(private_data: env,
6523	fmt: "R%d is ptr_%s invalid negative access: off=%d\n",
6524	regno, tname, off);
6525	return -EACCES;
6526	}
6527	if (!tnum_is_const(a: reg->var_off) \|\| reg->var_off.value) {
6528	char tn_buf[`48`];
6529
6530	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
6531	verbose(private_data: env,
6532	fmt: "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
6533	regno, tname, off, tn_buf);
6534	return -EACCES;
6535	}
6536
6537	if (reg->type & MEM_USER) {
6538	verbose(private_data: env,
6539	fmt: "R%d is ptr_%s access user memory: off=%d\n",
6540	regno, tname, off);
6541	return -EACCES;
6542	}
6543
6544	if (reg->type & MEM_PERCPU) {
6545	verbose(private_data: env,
6546	fmt: "R%d is ptr_%s access percpu memory: off=%d\n",
6547	regno, tname, off);
6548	return -EACCES;
6549	}
6550
6551	if (env->ops->btf_struct_access && !type_is_alloc(type: reg->type) && atype == BPF_WRITE) {
6552	if (!btf_is_kernel(btf: reg->btf)) {
6553	verbose(private_data: env, fmt: "verifier internal error: reg->btf must be kernel btf\n");
6554	return -EFAULT;
6555	}
6556	ret = env->ops->btf_struct_access(&env->log, reg, off, size);
6557	} else {
6558	/ Writes are permitted with default btf_struct_access for*
6559	* program allocated objects (which always have ref_obj_id > 0),
6560	* but not for untrusted PTR_TO_BTF_ID \| MEM_ALLOC.
6561	*/
6562	if (atype != BPF_READ && !type_is_ptr_alloc_obj(type: reg->type)) {
6563	verbose(private_data: env, fmt: "only read is supported\n");
6564	return -EACCES;
6565	}
6566
6567	if (type_is_alloc(type: reg->type) && !type_is_non_owning_ref(type: reg->type) &&
6568	!(reg->type & MEM_RCU) && !reg->ref_obj_id) {
6569	verbose(private_data: env, fmt: "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
6570	return -EFAULT;
6571	}
6572
6573	ret = btf_struct_access(log: &env->log, reg, off, size, atype, next_btf_id: &btf_id, flag: &flag, field_name: &field_name);
6574	}
6575
6576	if (ret < `0`)
6577	return ret;
6578
6579	if (ret != PTR_TO_BTF_ID) {
6580	/ just mark; /
6581
6582	} else if (type_flag(type: reg->type) & PTR_UNTRUSTED) {
6583	/ If this is an untrusted pointer, all pointers formed by walking it*
6584	* also inherit the untrusted flag.
6585	*/
6586	flag = PTR_UNTRUSTED;
6587
6588	} else if (is_trusted_reg(reg) \|\| is_rcu_reg(reg)) {
6589	/ By default any pointer obtained from walking a trusted pointer is no*
6590	* longer trusted, unless the field being accessed has explicitly been
6591	* marked as inheriting its parent's state of trust (either full or RCU).
6592	* For example:
6593	* 'cgroups' pointer is untrusted if task->cgroups dereference
6594	* happened in a sleepable program outside of bpf_rcu_read_lock()
6595	* section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
6596	* Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
6597	*
6598	* A regular RCU-protected pointer with __rcu tag can also be deemed
6599	* trusted if we are in an RCU CS. Such pointer can be NULL.
6600	*/
6601	if (type_is_trusted(env, reg, field_name, btf_id)) {
6602	flag \|= PTR_TRUSTED;
6603	} else if (in_rcu_cs(env) && !type_may_be_null(type: reg->type)) {
6604	if (type_is_rcu(env, reg, field_name, btf_id)) {
6605	/ ignore __rcu tag and mark it MEM_RCU /
6606	flag \|= MEM_RCU;
6607	} else if (flag & MEM_RCU \|\|
6608	type_is_rcu_or_null(env, reg, field_name, btf_id)) {
6609	/ __rcu tagged pointers can be NULL /
6610	flag \|= MEM_RCU \| PTR_MAYBE_NULL;
6611
6612	/ We always trust them /
6613	if (type_is_rcu_or_null(env, reg, field_name, btf_id) &&
6614	flag & PTR_UNTRUSTED)
6615	flag &= ~PTR_UNTRUSTED;
6616	} else if (flag & (MEM_PERCPU \| MEM_USER)) {
6617	/ keep as-is /
6618	} else {
6619	/ walking unknown pointers yields old deprecated PTR_TO_BTF_ID /
6620	clear_trusted_flags(flag: &flag);
6621	}
6622	} else {
6623	/*
6624	* If not in RCU CS or MEM_RCU pointer can be NULL then
6625	* aggressively mark as untrusted otherwise such
6626	* pointers will be plain PTR_TO_BTF_ID without flags
6627	* and will be allowed to be passed into helpers for
6628	* compat reasons.
6629	*/
6630	flag = PTR_UNTRUSTED;
6631	}
6632	} else {
6633	/ Old compat. Deprecated /
6634	clear_trusted_flags(flag: &flag);
6635	}
6636
6637	if (atype == BPF_READ && value_regno >= `0`)
6638	mark_btf_ld_reg(env, regs, regno: value_regno, reg_type: ret, btf: reg->btf, btf_id, flag);
6639
6640	return `0`;
6641	}
6642
6643	static int check_ptr_to_map_access(struct bpf_verifier_env *env,
6644	struct bpf_reg_state *regs,
6645	int regno, int off, int size,
6646	enum bpf_access_type atype,
6647	int value_regno)
6648	{
6649	struct bpf_reg_state *reg = regs + regno;
6650	struct bpf_map *map = reg->map_ptr;
6651	struct bpf_reg_state map_reg;
6652	enum bpf_type_flag flag = `0`;
6653	const struct btf_type *t;
6654	const char *tname;
6655	u32 btf_id;
6656	int ret;
6657
6658	if (!btf_vmlinux) {
6659	verbose(private_data: env, fmt: "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
6660	return -ENOTSUPP;
6661	}
6662
6663	if (!map->ops->map_btf_id \|\| !*map->ops->map_btf_id) {
6664	verbose(private_data: env, fmt: "map_ptr access not supported for map type %d\n",
6665	map->map_type);
6666	return -ENOTSUPP;
6667	}
6668
6669	t = btf_type_by_id(btf: btf_vmlinux, type_id: *map->ops->map_btf_id);
6670	tname = btf_name_by_offset(btf: btf_vmlinux, offset: t->name_off);
6671
6672	if (!env->allow_ptr_leaks) {
6673	verbose(private_data: env,
6674	fmt: "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
6675	tname);
6676	return -EPERM;
6677	}
6678
6679	if (off < `0`) {
6680	verbose(private_data: env, fmt: "R%d is %s invalid negative access: off=%d\n",
6681	regno, tname, off);
6682	return -EACCES;
6683	}
6684
6685	if (atype != BPF_READ) {
6686	verbose(private_data: env, fmt: "only read from %s is supported\n", tname);
6687	return -EACCES;
6688	}
6689
6690	/ Simulate access to a PTR_TO_BTF_ID /
6691	memset(&map_reg, `0`, sizeof(map_reg));
6692	mark_btf_ld_reg(env, regs: &map_reg, regno: `0`, reg_type: PTR_TO_BTF_ID, btf: btf_vmlinux, btf_id: *map->ops->map_btf_id, flag: `0`);
6693	ret = btf_struct_access(log: &env->log, reg: &map_reg, off, size, atype, next_btf_id: &btf_id, flag: &flag, NULL);
6694	if (ret < `0`)
6695	return ret;
6696
6697	if (value_regno >= `0`)
6698	mark_btf_ld_reg(env, regs, regno: value_regno, reg_type: ret, btf: btf_vmlinux, btf_id, flag);
6699
6700	return `0`;
6701	}
6702
6703	/ Check that the stack access at the given offset is within bounds. The*
6704	* maximum valid offset is -1.
6705	*
6706	* The minimum valid offset is -MAX_BPF_STACK for writes, and
6707	* -state->allocated_stack for reads.
6708	*/
6709	static int check_stack_slot_within_bounds(int off,
6710	struct bpf_func_state *state,
6711	enum bpf_access_type t)
6712	{
6713	int min_valid_off;
6714
6715	if (t == BPF_WRITE)
6716	min_valid_off = -MAX_BPF_STACK;
6717	else
6718	min_valid_off = -state->allocated_stack;
6719
6720	if (off < min_valid_off \|\| off > -`1`)
6721	return -EACCES;
6722	return `0`;
6723	}
6724
6725	/ Check that the stack access at 'regno + off' falls within the maximum stack*
6726	* bounds.
6727	*
6728	* 'off' includes `regno->offset`, but not its dynamic part (if any).
6729	*/
6730	static int check_stack_access_within_bounds(
6731	struct bpf_verifier_env *env,
6732	int regno, int off, int access_size,
6733	enum bpf_access_src src, enum bpf_access_type type)
6734	{
6735	struct bpf_reg_state *regs = cur_regs(env);
6736	struct bpf_reg_state *reg = regs + regno;
6737	struct bpf_func_state *state = func(env, reg);
6738	int min_off, max_off;
6739	int err;
6740	char *err_extra;
6741
6742	if (src == ACCESS_HELPER)
6743	/ We don't know if helpers are reading or writing (or both). /
6744	err_extra = " indirect access to";
6745	else if (type == BPF_READ)
6746	err_extra = " read from";
6747	else
6748	err_extra = " write to";
6749
6750	if (tnum_is_const(a: reg->var_off)) {
6751	min_off = reg->var_off.value + off;
6752	if (access_size > `0`)
6753	max_off = min_off + access_size - `1`;
6754	else
6755	max_off = min_off;
6756	} else {
6757	if (reg->smax_value >= BPF_MAX_VAR_OFF \|\|
6758	reg->smin_value <= -BPF_MAX_VAR_OFF) {
6759	verbose(private_data: env, fmt: "invalid unbounded variable-offset%s stack R%d\n",
6760	err_extra, regno);
6761	return -EACCES;
6762	}
6763	min_off = reg->smin_value + off;
6764	if (access_size > `0`)
6765	max_off = reg->smax_value + off + access_size - `1`;
6766	else
6767	max_off = min_off;
6768	}
6769
6770	err = check_stack_slot_within_bounds(off: min_off, state, t: type);
6771	if (!err)
6772	err = check_stack_slot_within_bounds(off: max_off, state, t: type);
6773
6774	if (err) {
6775	if (tnum_is_const(a: reg->var_off)) {
6776	verbose(private_data: env, fmt: "invalid%s stack R%d off=%d size=%d\n",
6777	err_extra, regno, off, access_size);
6778	} else {
6779	char tn_buf[`48`];
6780
6781	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
6782	verbose(private_data: env, fmt: "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
6783	err_extra, regno, tn_buf, access_size);
6784	}
6785	}
6786	return err;
6787	}
6788
6789	/ check whether memory at (regno + off) is accessible for t = (read \| write)*
6790	* if t==write, value_regno is a register which value is stored into memory
6791	* if t==read, value_regno is a register which will receive the value from memory
6792	* if t==write && value_regno==-1, some unknown value is stored into memory
6793	* if t==read && value_regno==-1, don't care what we read from memory
6794	*/
6795	static int check_mem_access(struct bpf_verifier_env env, int* insn_idx, u32 regno,
6796	int off, int bpf_size, enum bpf_access_type t,
6797	int value_regno, bool strict_alignment_once, bool is_ldsx)
6798	{
6799	struct bpf_reg_state *regs = cur_regs(env);
6800	struct bpf_reg_state *reg = regs + regno;
6801	struct bpf_func_state *state;
6802	int size, err = `0`;
6803
6804	size = bpf_size_to_bytes(bpf_size);
6805	if (size < `0`)
6806	return size;
6807
6808	/ alignment checks will add in reg->off themselves /
6809	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
6810	if (err)
6811	return err;
6812
6813	/ for access checks, reg->off is just part of off /
6814	off += reg->off;
6815
6816	if (reg->type == PTR_TO_MAP_KEY) {
6817	if (t == BPF_WRITE) {
6818	verbose(private_data: env, fmt: "write to change key R%d not allowed\n", regno);
6819	return -EACCES;
6820	}
6821
6822	err = check_mem_region_access(env, regno, off, size,
6823	mem_size: reg->map_ptr->key_size, zero_size_allowed: false);
6824	if (err)
6825	return err;
6826	if (value_regno >= `0`)
6827	mark_reg_unknown(env, regs, regno: value_regno);
6828	} else if (reg->type == PTR_TO_MAP_VALUE) {
6829	struct btf_field *kptr_field = NULL;
6830
6831	if (t == BPF_WRITE && value_regno >= `0` &&
6832	is_pointer_value(env, regno: value_regno)) {
6833	verbose(private_data: env, fmt: "R%d leaks addr into map\n", value_regno);
6834	return -EACCES;
6835	}
6836	err = check_map_access_type(env, regno, off, size, type: t);
6837	if (err)
6838	return err;
6839	err = check_map_access(env, regno, off, size, zero_size_allowed: false, src: ACCESS_DIRECT);
6840	if (err)
6841	return err;
6842	if (tnum_is_const(a: reg->var_off))
6843	kptr_field = btf_record_find(rec: reg->map_ptr->record,
6844	offset: off + reg->var_off.value, field_mask: BPF_KPTR);
6845	if (kptr_field) {
6846	err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field);
6847	} else if (t == BPF_READ && value_regno >= `0`) {
6848	struct bpf_map *map = reg->map_ptr;
6849
6850	/ if map is read-only, track its contents as scalars /
6851	if (tnum_is_const(a: reg->var_off) &&
6852	bpf_map_is_rdonly(map) &&
6853	map->ops->map_direct_value_addr) {
6854	int map_off = off + reg->var_off.value;
6855	u64 val = `0`;
6856
6857	err = bpf_map_direct_read(map, off: map_off, size,
6858	val: &val, is_ldsx);
6859	if (err)
6860	return err;
6861
6862	regs[value_regno].type = SCALAR_VALUE;
6863	__mark_reg_known(reg: &regs[value_regno], imm: val);
6864	} else {
6865	mark_reg_unknown(env, regs, regno: value_regno);
6866	}
6867	}
6868	} else if (base_type(type: reg->type) == PTR_TO_MEM) {
6869	bool rdonly_mem = type_is_rdonly_mem(type: reg->type);
6870
6871	if (type_may_be_null(type: reg->type)) {
6872	verbose(private_data: env, fmt: "R%d invalid mem access '%s'\n", regno,
6873	reg_type_str(env, type: reg->type));
6874	return -EACCES;
6875	}
6876
6877	if (t == BPF_WRITE && rdonly_mem) {
6878	verbose(private_data: env, fmt: "R%d cannot write into %s\n",
6879	regno, reg_type_str(env, type: reg->type));
6880	return -EACCES;
6881	}
6882
6883	if (t == BPF_WRITE && value_regno >= `0` &&
6884	is_pointer_value(env, regno: value_regno)) {
6885	verbose(private_data: env, fmt: "R%d leaks addr into mem\n", value_regno);
6886	return -EACCES;
6887	}
6888
6889	err = check_mem_region_access(env, regno, off, size,
6890	mem_size: reg->mem_size, zero_size_allowed: false);
6891	if (!err && value_regno >= `0` && (t == BPF_READ \|\| rdonly_mem))
6892	mark_reg_unknown(env, regs, regno: value_regno);
6893	} else if (reg->type == PTR_TO_CTX) {
6894	enum bpf_reg_type reg_type = SCALAR_VALUE;
6895	struct btf *btf = NULL;
6896	u32 btf_id = `0`;
6897
6898	if (t == BPF_WRITE && value_regno >= `0` &&
6899	is_pointer_value(env, regno: value_regno)) {
6900	verbose(private_data: env, fmt: "R%d leaks addr into ctx\n", value_regno);
6901	return -EACCES;
6902	}
6903
6904	err = check_ptr_off_reg(env, reg, regno);
6905	if (err < `0`)
6906	return err;
6907
6908	err = check_ctx_access(env, insn_idx, off, size, t, reg_type: &reg_type, btf: &btf,
6909	btf_id: &btf_id);
6910	if (err)
6911	verbose_linfo(env, insn_off: insn_idx, prefix_fmt: "; ");
6912	if (!err && t == BPF_READ && value_regno >= `0`) {
6913	/ ctx access returns either a scalar, or a*
6914	* PTR_TO_PACKET[_META,_END]. In the latter
6915	* case, we know the offset is zero.
6916	*/
6917	if (reg_type == SCALAR_VALUE) {
6918	mark_reg_unknown(env, regs, regno: value_regno);
6919	} else {
6920	mark_reg_known_zero(env, regs,
6921	regno: value_regno);
6922	if (type_may_be_null(type: reg_type))
6923	regs[value_regno].id = ++env->id_gen;
6924	/ A load of ctx field could have different*
6925	* actual load size with the one encoded in the
6926	* insn. When the dst is PTR, it is for sure not
6927	* a sub-register.
6928	*/
6929	regs[value_regno].subreg_def = DEF_NOT_SUBREG;
6930	if (base_type(type: reg_type) == PTR_TO_BTF_ID) {
6931	regs[value_regno].btf = btf;
6932	regs[value_regno].btf_id = btf_id;
6933	}
6934	}
6935	regs[value_regno].type = reg_type;
6936	}
6937
6938	} else if (reg->type == PTR_TO_STACK) {
6939	/ Basic bounds checks. /
6940	err = check_stack_access_within_bounds(env, regno, off, access_size: size, src: ACCESS_DIRECT, type: t);
6941	if (err)
6942	return err;
6943
6944	state = func(env, reg);
6945	err = update_stack_depth(env, func: state, off);
6946	if (err)
6947	return err;
6948
6949	if (t == BPF_READ)
6950	err = check_stack_read(env, ptr_regno: regno, off, size,
6951	dst_regno: value_regno);
6952	else
6953	err = check_stack_write(env, ptr_regno: regno, off, size,
6954	value_regno, insn_idx);
6955	} else if (reg_is_pkt_pointer(reg)) {
6956	if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
6957	verbose(private_data: env, fmt: "cannot write into packet\n");
6958	return -EACCES;
6959	}
6960	if (t == BPF_WRITE && value_regno >= `0` &&
6961	is_pointer_value(env, regno: value_regno)) {
6962	verbose(private_data: env, fmt: "R%d leaks addr into packet\n",
6963	value_regno);
6964	return -EACCES;
6965	}
6966	err = check_packet_access(env, regno, off, size, zero_size_allowed: false);
6967	if (!err && t == BPF_READ && value_regno >= `0`)
6968	mark_reg_unknown(env, regs, regno: value_regno);
6969	} else if (reg->type == PTR_TO_FLOW_KEYS) {
6970	if (t == BPF_WRITE && value_regno >= `0` &&
6971	is_pointer_value(env, regno: value_regno)) {
6972	verbose(private_data: env, fmt: "R%d leaks addr into flow keys\n",
6973	value_regno);
6974	return -EACCES;
6975	}
6976
6977	err = check_flow_keys_access(env, off, size);
6978	if (!err && t == BPF_READ && value_regno >= `0`)
6979	mark_reg_unknown(env, regs, regno: value_regno);
6980	} else if (type_is_sk_pointer(type: reg->type)) {
6981	if (t == BPF_WRITE) {
6982	verbose(private_data: env, fmt: "R%d cannot write into %s\n",
6983	regno, reg_type_str(env, type: reg->type));
6984	return -EACCES;
6985	}
6986	err = check_sock_access(env, insn_idx, regno, off, size, t);
6987	if (!err && value_regno >= `0`)
6988	mark_reg_unknown(env, regs, regno: value_regno);
6989	} else if (reg->type == PTR_TO_TP_BUFFER) {
6990	err = check_tp_buffer_access(env, reg, regno, off, size);
6991	if (!err && t == BPF_READ && value_regno >= `0`)
6992	mark_reg_unknown(env, regs, regno: value_regno);
6993	} else if (base_type(type: reg->type) == PTR_TO_BTF_ID &&
6994	!type_may_be_null(type: reg->type)) {
6995	err = check_ptr_to_btf_access(env, regs, regno, off, size, atype: t,
6996	value_regno);
6997	} else if (reg->type == CONST_PTR_TO_MAP) {
6998	err = check_ptr_to_map_access(env, regs, regno, off, size, atype: t,
6999	value_regno);
7000	} else if (base_type(type: reg->type) == PTR_TO_BUF) {
7001	bool rdonly_mem = type_is_rdonly_mem(type: reg->type);
7002	u32 *max_access;
7003
7004	if (rdonly_mem) {
7005	if (t == BPF_WRITE) {
7006	verbose(private_data: env, fmt: "R%d cannot write into %s\n",
7007	regno, reg_type_str(env, type: reg->type));
7008	return -EACCES;
7009	}
7010	max_access = &env->prog->aux->max_rdonly_access;
7011	} else {
7012	max_access = &env->prog->aux->max_rdwr_access;
7013	}
7014
7015	err = check_buffer_access(env, reg, regno, off, size, zero_size_allowed: false,
7016	max_access);
7017
7018	if (!err && value_regno >= `0` && (rdonly_mem \|\| t == BPF_READ))
7019	mark_reg_unknown(env, regs, regno: value_regno);
7020	} else {
7021	verbose(private_data: env, fmt: "R%d invalid mem access '%s'\n", regno,
7022	reg_type_str(env, type: reg->type));
7023	return -EACCES;
7024	}
7025
7026	if (!err && size < BPF_REG_SIZE && value_regno >= `0` && t == BPF_READ &&
7027	regs[value_regno].type == SCALAR_VALUE) {
7028	if (!is_ldsx)
7029	/ b/h/w load zero-extends, mark upper bits as known 0 /
7030	coerce_reg_to_size(reg: &regs[value_regno], size);
7031	else
7032	coerce_reg_to_size_sx(reg: &regs[value_regno], size);
7033	}
7034	return err;
7035	}
7036
7037	static int check_atomic(struct bpf_verifier_env env, int* insn_idx, struct bpf_insn *insn)
7038	{
7039	int load_reg;
7040	int err;
7041
7042	switch (insn->imm) {
7043	case BPF_ADD:
7044	case BPF_ADD \| BPF_FETCH:
7045	case BPF_AND:
7046	case BPF_AND \| BPF_FETCH:
7047	case BPF_OR:
7048	case BPF_OR \| BPF_FETCH:
7049	case BPF_XOR:
7050	case BPF_XOR \| BPF_FETCH:
7051	case BPF_XCHG:
7052	case BPF_CMPXCHG:
7053	break;
7054	default:
7055	verbose(private_data: env, fmt: "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
7056	return -EINVAL;
7057	}
7058
7059	if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
7060	verbose(private_data: env, fmt: "invalid atomic operand size\n");
7061	return -EINVAL;
7062	}
7063
7064	/ check src1 operand /
7065	err = check_reg_arg(env, regno: insn->src_reg, t: SRC_OP);
7066	if (err)
7067	return err;
7068
7069	/ check src2 operand /
7070	err = check_reg_arg(env, regno: insn->dst_reg, t: SRC_OP);
7071	if (err)
7072	return err;
7073
7074	if (insn->imm == BPF_CMPXCHG) {
7075	/ Check comparison of R0 with memory location /
7076	const u32 aux_reg = BPF_REG_0;
7077
7078	err = check_reg_arg(env, regno: aux_reg, t: SRC_OP);
7079	if (err)
7080	return err;
7081
7082	if (is_pointer_value(env, regno: aux_reg)) {
7083	verbose(private_data: env, fmt: "R%d leaks addr into mem\n", aux_reg);
7084	return -EACCES;
7085	}
7086	}
7087
7088	if (is_pointer_value(env, regno: insn->src_reg)) {
7089	verbose(private_data: env, fmt: "R%d leaks addr into mem\n", insn->src_reg);
7090	return -EACCES;
7091	}
7092
7093	if (is_ctx_reg(env, regno: insn->dst_reg) \|\|
7094	is_pkt_reg(env, regno: insn->dst_reg) \|\|
7095	is_flow_key_reg(env, regno: insn->dst_reg) \|\|
7096	is_sk_reg(env, regno: insn->dst_reg)) {
7097	verbose(private_data: env, fmt: "BPF_ATOMIC stores into R%d %s is not allowed\n",
7098	insn->dst_reg,
7099	reg_type_str(env, type: reg_state(env, regno: insn->dst_reg)->type));
7100	return -EACCES;
7101	}
7102
7103	if (insn->imm & BPF_FETCH) {
7104	if (insn->imm == BPF_CMPXCHG)
7105	load_reg = BPF_REG_0;
7106	else
7107	load_reg = insn->src_reg;
7108
7109	/ check and record load of old value /
7110	err = check_reg_arg(env, regno: load_reg, t: DST_OP);
7111	if (err)
7112	return err;
7113	} else {
7114	/ This instruction accesses a memory location but doesn't*
7115	* actually load it into a register.
7116	*/
7117	load_reg = -`1`;
7118	}
7119
7120	/ Check whether we can read the memory, with second call for fetch*
7121	* case to simulate the register fill.
7122	*/
7123	err = check_mem_access(env, insn_idx, regno: insn->dst_reg, off: insn->off,
7124	BPF_SIZE(insn->code), t: BPF_READ, value_regno: -`1`, strict_alignment_once: true, is_ldsx: false);
7125	if (!err && load_reg >= `0`)
7126	err = check_mem_access(env, insn_idx, regno: insn->dst_reg, off: insn->off,
7127	BPF_SIZE(insn->code), t: BPF_READ, value_regno: load_reg,
7128	strict_alignment_once: true, is_ldsx: false);
7129	if (err)
7130	return err;
7131
7132	/ Check whether we can write into the same memory. /
7133	err = check_mem_access(env, insn_idx, regno: insn->dst_reg, off: insn->off,
7134	BPF_SIZE(insn->code), t: BPF_WRITE, value_regno: -`1`, strict_alignment_once: true, is_ldsx: false);
7135	if (err)
7136	return err;
7137
7138	return `0`;
7139	}
7140
7141	/ When register 'regno' is used to read the stack (either directly or through*
7142	* a helper function) make sure that it's within stack boundary and, depending
7143	* on the access type, that all elements of the stack are initialized.
7144	*
7145	* 'off' includes 'regno->off', but not its dynamic part (if any).
7146	*
7147	* All registers that have been spilled on the stack in the slots within the
7148	* read offsets are marked as read.
7149	*/
7150	static int check_stack_range_initialized(
7151	struct bpf_verifier_env env, int* regno, int off,
7152	int access_size, bool zero_size_allowed,
7153	enum bpf_access_src type, struct bpf_call_arg_meta *meta)
7154	{
7155	struct bpf_reg_state *reg = reg_state(env, regno);
7156	struct bpf_func_state *state = func(env, reg);
7157	int err, min_off, max_off, i, j, slot, spi;
7158	char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
7159	enum bpf_access_type bounds_check_type;
7160	/ Some accesses can write anything into the stack, others are*
7161	* read-only.
7162	*/
7163	bool clobber = false;
7164
7165	if (access_size == `0` && !zero_size_allowed) {
7166	verbose(private_data: env, fmt: "invalid zero-sized read\n");
7167	return -EACCES;
7168	}
7169
7170	if (type == ACCESS_HELPER) {
7171	/ The bounds checks for writes are more permissive than for*
7172	* reads. However, if raw_mode is not set, we'll do extra
7173	* checks below.
7174	*/
7175	bounds_check_type = BPF_WRITE;
7176	clobber = true;
7177	} else {
7178	bounds_check_type = BPF_READ;
7179	}
7180	err = check_stack_access_within_bounds(env, regno, off, access_size,
7181	src: type, type: bounds_check_type);
7182	if (err)
7183	return err;
7184
7185
7186	if (tnum_is_const(a: reg->var_off)) {
7187	min_off = max_off = reg->var_off.value + off;
7188	} else {
7189	/ Variable offset is prohibited for unprivileged mode for*
7190	* simplicity since it requires corresponding support in
7191	* Spectre masking for stack ALU.
7192	* See also retrieve_ptr_limit().
7193	*/
7194	if (!env->bypass_spec_v1) {
7195	char tn_buf[`48`];
7196
7197	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
7198	verbose(private_data: env, fmt: "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
7199	regno, err_extra, tn_buf);
7200	return -EACCES;
7201	}
7202	/ Only initialized buffer on stack is allowed to be accessed*
7203	* with variable offset. With uninitialized buffer it's hard to
7204	* guarantee that whole memory is marked as initialized on
7205	* helper return since specific bounds are unknown what may
7206	* cause uninitialized stack leaking.
7207	*/
7208	if (meta && meta->raw_mode)
7209	meta = NULL;
7210
7211	min_off = reg->smin_value + off;
7212	max_off = reg->smax_value + off;
7213	}
7214
7215	if (meta && meta->raw_mode) {
7216	/ Ensure we won't be overwriting dynptrs when simulating byte*
7217	* by byte access in check_helper_call using meta.access_size.
7218	* This would be a problem if we have a helper in the future
7219	* which takes:
7220	*
7221	* helper(uninit_mem, len, dynptr)
7222	*
7223	* Now, uninint_mem may overlap with dynptr pointer. Hence, it
7224	* may end up writing to dynptr itself when touching memory from
7225	* arg 1. This can be relaxed on a case by case basis for known
7226	* safe cases, but reject due to the possibilitiy of aliasing by
7227	* default.
7228	*/
7229	for (i = min_off; i < max_off + access_size; i++) {
7230	int stack_off = -i - `1`;
7231
7232	spi = __get_spi(off: i);
7233	/ raw_mode may write past allocated_stack /
7234	if (state->allocated_stack <= stack_off)
7235	continue;
7236	if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
7237	verbose(private_data: env, fmt: "potential write to dynptr at off=%d disallowed\n", i);
7238	return -EACCES;
7239	}
7240	}
7241	meta->access_size = access_size;
7242	meta->regno = regno;
7243	return `0`;
7244	}
7245
7246	for (i = min_off; i < max_off + access_size; i++) {
7247	u8 *stype;
7248
7249	slot = -i - `1`;
7250	spi = slot / BPF_REG_SIZE;
7251	if (state->allocated_stack <= slot)
7252	goto err;
7253	stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
7254	if (*stype == STACK_MISC)
7255	goto mark;
7256	if ((*stype == STACK_ZERO) \|\|
7257	(*stype == STACK_INVALID && env->allow_uninit_stack)) {
7258	if (clobber) {
7259	/ helper can write anything into the stack /
7260	*stype = STACK_MISC;
7261	}
7262	goto mark;
7263	}
7264
7265	if (is_spilled_reg(stack: &state->stack[spi]) &&
7266	(state->stack[spi].spilled_ptr.type == SCALAR_VALUE \|\|
7267	env->allow_ptr_leaks)) {
7268	if (clobber) {
7269	__mark_reg_unknown(env, reg: &state->stack[spi].spilled_ptr);
7270	for (j = `0`; j < BPF_REG_SIZE; j++)
7271	scrub_spilled_slot(stype: &state->stack[spi].slot_type[j]);
7272	}
7273	goto mark;
7274	}
7275
7276	err:
7277	if (tnum_is_const(a: reg->var_off)) {
7278	verbose(private_data: env, fmt: "invalid%s read from stack R%d off %d+%d size %d\n",
7279	err_extra, regno, min_off, i - min_off, access_size);
7280	} else {
7281	char tn_buf[`48`];
7282
7283	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
7284	verbose(private_data: env, fmt: "invalid%s read from stack R%d var_off %s+%d size %d\n",
7285	err_extra, regno, tn_buf, i - min_off, access_size);
7286	}
7287	return -EACCES;
7288	mark:
7289	/ reading any byte out of 8-byte 'spill_slot' will cause*
7290	* the whole slot to be marked as 'read'
7291	*/
7292	mark_reg_read(env, state: &state->stack[spi].spilled_ptr,
7293	parent: state->stack[spi].spilled_ptr.parent,
7294	flag: REG_LIVE_READ64);
7295	/ We do not set REG_LIVE_WRITTEN for stack slot, as we can not*
7296	* be sure that whether stack slot is written to or not. Hence,
7297	* we must still conservatively propagate reads upwards even if
7298	* helper may write to the entire memory range.
7299	*/
7300	}
7301	return update_stack_depth(env, func: state, off: min_off);
7302	}
7303
7304	static int check_helper_mem_access(struct bpf_verifier_env env, int* regno,
7305	int access_size, bool zero_size_allowed,
7306	struct bpf_call_arg_meta *meta)
7307	{
7308	struct bpf_reg_state regs = cur_regs(env), reg = &regs[regno];
7309	u32 *max_access;
7310
7311	switch (base_type(type: reg->type)) {
7312	case PTR_TO_PACKET:
7313	case PTR_TO_PACKET_META:
7314	return check_packet_access(env, regno, off: reg->off, size: access_size,
7315	zero_size_allowed);
7316	case PTR_TO_MAP_KEY:
7317	if (meta && meta->raw_mode) {
7318	verbose(private_data: env, fmt: "R%d cannot write into %s\n", regno,
7319	reg_type_str(env, type: reg->type));
7320	return -EACCES;
7321	}
7322	return check_mem_region_access(env, regno, off: reg->off, size: access_size,
7323	mem_size: reg->map_ptr->key_size, zero_size_allowed: false);
7324	case PTR_TO_MAP_VALUE:
7325	if (check_map_access_type(env, regno, off: reg->off, size: access_size,
7326	type: meta && meta->raw_mode ? BPF_WRITE :
7327	BPF_READ))
7328	return -EACCES;
7329	return check_map_access(env, regno, off: reg->off, size: access_size,
7330	zero_size_allowed, src: ACCESS_HELPER);
7331	case PTR_TO_MEM:
7332	if (type_is_rdonly_mem(type: reg->type)) {
7333	if (meta && meta->raw_mode) {
7334	verbose(private_data: env, fmt: "R%d cannot write into %s\n", regno,
7335	reg_type_str(env, type: reg->type));
7336	return -EACCES;
7337	}
7338	}
7339	return check_mem_region_access(env, regno, off: reg->off,
7340	size: access_size, mem_size: reg->mem_size,
7341	zero_size_allowed);
7342	case PTR_TO_BUF:
7343	if (type_is_rdonly_mem(type: reg->type)) {
7344	if (meta && meta->raw_mode) {
7345	verbose(private_data: env, fmt: "R%d cannot write into %s\n", regno,
7346	reg_type_str(env, type: reg->type));
7347	return -EACCES;
7348	}
7349
7350	max_access = &env->prog->aux->max_rdonly_access;
7351	} else {
7352	max_access = &env->prog->aux->max_rdwr_access;
7353	}
7354	return check_buffer_access(env, reg, regno, off: reg->off,
7355	size: access_size, zero_size_allowed,
7356	max_access);
7357	case PTR_TO_STACK:
7358	return check_stack_range_initialized(
7359	env,
7360	regno, off: reg->off, access_size,
7361	zero_size_allowed, type: ACCESS_HELPER, meta);
7362	case PTR_TO_BTF_ID:
7363	return check_ptr_to_btf_access(env, regs, regno, off: reg->off,
7364	size: access_size, atype: BPF_READ, value_regno: -`1`);
7365	case PTR_TO_CTX:
7366	/ in case the function doesn't know how to access the context,*
7367	* (because we are in a program of type SYSCALL for example), we
7368	* can not statically check its size.
7369	* Dynamically check it now.
7370	*/
7371	if (!env->ops->convert_ctx_access) {
7372	enum bpf_access_type atype = meta && meta->raw_mode ? BPF_WRITE : BPF_READ;
7373	int offset = access_size - `1`;
7374
7375	/ Allow zero-byte read from PTR_TO_CTX /
7376	if (access_size == `0`)
7377	return zero_size_allowed ? `0` : -EACCES;
7378
7379	return check_mem_access(env, insn_idx: env->insn_idx, regno, off: offset, BPF_B,
7380	t: atype, value_regno: -`1`, strict_alignment_once: false, is_ldsx: false);
7381	}
7382
7383	fallthrough;
7384	default: / scalar_value or invalid ptr /
7385	/ Allow zero-byte read from NULL, regardless of pointer type /
7386	if (zero_size_allowed && access_size == `0` &&
7387	register_is_null(reg))
7388	return `0`;
7389
7390	verbose(private_data: env, fmt: "R%d type=%s ", regno,
7391	reg_type_str(env, type: reg->type));
7392	verbose(private_data: env, fmt: "expected=%s\n", reg_type_str(env, type: PTR_TO_STACK));
7393	return -EACCES;
7394	}
7395	}
7396
7397	static int check_mem_size_reg(struct bpf_verifier_env *env,
7398	struct bpf_reg_state *reg, u32 regno,
7399	bool zero_size_allowed,
7400	struct bpf_call_arg_meta *meta)
7401	{
7402	int err;
7403
7404	/ This is used to refine r0 return value bounds for helpers*
7405	* that enforce this value as an upper bound on return values.
7406	* See do_refine_retval_range() for helpers that can refine
7407	* the return value. C type of helper is u32 so we pull register
7408	* bound from umax_value however, if negative verifier errors
7409	* out. Only upper bounds can be learned because retval is an
7410	* int type and negative retvals are allowed.
7411	*/
7412	meta->msize_max_value = reg->umax_value;
7413
7414	/ The register is SCALAR_VALUE; the access check*
7415	* happens using its boundaries.
7416	*/
7417	if (!tnum_is_const(a: reg->var_off))
7418	/ For unprivileged variable accesses, disable raw*
7419	* mode so that the program is required to
7420	* initialize all the memory that the helper could
7421	* just partially fill up.
7422	*/
7423	meta = NULL;
7424
7425	if (reg->smin_value < `0`) {
7426	verbose(private_data: env, fmt: "R%d min value is negative, either use unsigned or 'var &= const'\n",
7427	regno);
7428	return -EACCES;
7429	}
7430
7431	if (reg->umin_value == `0`) {
7432	err = check_helper_mem_access(env, regno: regno - `1`, access_size: `0`,
7433	zero_size_allowed,
7434	meta);
7435	if (err)
7436	return err;
7437	}
7438
7439	if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
7440	verbose(private_data: env, fmt: "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
7441	regno);
7442	return -EACCES;
7443	}
7444	err = check_helper_mem_access(env, regno: regno - `1`,
7445	access_size: reg->umax_value,
7446	zero_size_allowed, meta);
7447	if (!err)
7448	err = mark_chain_precision(env, regno);
7449	return err;
7450	}
7451
7452	int check_mem_reg(struct bpf_verifier_env env, struct* bpf_reg_state *reg,
7453	u32 regno, u32 mem_size)
7454	{
7455	bool may_be_null = type_may_be_null(type: reg->type);
7456	struct bpf_reg_state saved_reg;
7457	struct bpf_call_arg_meta meta;
7458	int err;
7459
7460	if (register_is_null(reg))
7461	return `0`;
7462
7463	memset(&meta, `0`, sizeof(meta));
7464	/ Assuming that the register contains a value check if the memory*
7465	* access is safe. Temporarily save and restore the register's state as
7466	* the conversion shouldn't be visible to a caller.
7467	*/
7468	if (may_be_null) {
7469	saved_reg = *reg;
7470	mark_ptr_not_null_reg(reg);
7471	}
7472
7473	err = check_helper_mem_access(env, regno, access_size: mem_size, zero_size_allowed: true, meta: &meta);
7474	/ Check access for BPF_WRITE /
7475	meta.raw_mode = true;
7476	err = err ?: check_helper_mem_access(env, regno, access_size: mem_size, zero_size_allowed: true, meta: &meta);
7477
7478	if (may_be_null)
7479	*reg = saved_reg;
7480
7481	return err;
7482	}
7483
7484	static int check_kfunc_mem_size_reg(struct bpf_verifier_env env, struct* bpf_reg_state *reg,
7485	u32 regno)
7486	{
7487	struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - `1`];
7488	bool may_be_null = type_may_be_null(type: mem_reg->type);
7489	struct bpf_reg_state saved_reg;
7490	struct bpf_call_arg_meta meta;
7491	int err;
7492
7493	WARN_ON_ONCE(regno < BPF_REG_2 \|\| regno > BPF_REG_5);
7494
7495	memset(&meta, `0`, sizeof(meta));
7496
7497	if (may_be_null) {
7498	saved_reg = *mem_reg;
7499	mark_ptr_not_null_reg(reg: mem_reg);
7500	}
7501
7502	err = check_mem_size_reg(env, reg, regno, zero_size_allowed: true, meta: &meta);
7503	/ Check access for BPF_WRITE /
7504	meta.raw_mode = true;
7505	err = err ?: check_mem_size_reg(env, reg, regno, zero_size_allowed: true, meta: &meta);
7506
7507	if (may_be_null)
7508	*mem_reg = saved_reg;
7509	return err;
7510	}
7511
7512	/ Implementation details:*
7513	* bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
7514	* bpf_obj_new returns PTR_TO_BTF_ID \| MEM_ALLOC \| PTR_MAYBE_NULL.
7515	* Two bpf_map_lookups (even with the same key) will have different reg->id.
7516	* Two separate bpf_obj_new will also have different reg->id.
7517	* For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID \| MEM_ALLOC, the verifier
7518	* clears reg->id after value_or_null->value transition, since the verifier only
7519	* cares about the range of access to valid map value pointer and doesn't care
7520	* about actual address of the map element.
7521	* For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
7522	* reg->id > 0 after value_or_null->value transition. By doing so
7523	* two bpf_map_lookups will be considered two different pointers that
7524	* point to different bpf_spin_locks. Likewise for pointers to allocated objects
7525	* returned from bpf_obj_new.
7526	* The verifier allows taking only one bpf_spin_lock at a time to avoid
7527	* dead-locks.
7528	* Since only one bpf_spin_lock is allowed the checks are simpler than
7529	* reg_is_refcounted() logic. The verifier needs to remember only
7530	* one spin_lock instead of array of acquired_refs.
7531	* cur_state->active_lock remembers which map value element or allocated
7532	* object got locked and clears it after bpf_spin_unlock.
7533	*/
7534	static int process_spin_lock(struct bpf_verifier_env env, int* regno,
7535	bool is_lock)
7536	{
7537	struct bpf_reg_state regs = cur_regs(env), reg = &regs[regno];
7538	struct bpf_verifier_state *cur = env->cur_state;
7539	bool is_const = tnum_is_const(a: reg->var_off);
7540	u64 val = reg->var_off.value;
7541	struct bpf_map *map = NULL;
7542	struct btf *btf = NULL;
7543	struct btf_record *rec;
7544
7545	if (!is_const) {
7546	verbose(private_data: env,
7547	fmt: "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
7548	regno);
7549	return -EINVAL;
7550	}
7551	if (reg->type == PTR_TO_MAP_VALUE) {
7552	map = reg->map_ptr;
7553	if (!map->btf) {
7554	verbose(private_data: env,
7555	fmt: "map '%s' has to have BTF in order to use bpf_spin_lock\n",
7556	map->name);
7557	return -EINVAL;
7558	}
7559	} else {
7560	btf = reg->btf;
7561	}
7562
7563	rec = reg_btf_record(reg);
7564	if (!btf_record_has_field(rec, type: BPF_SPIN_LOCK)) {
7565	verbose(private_data: env, fmt: "%s '%s' has no valid bpf_spin_lock\n", map ? "map" : "local",
7566	map ? map->name : "kptr");
7567	return -EINVAL;
7568	}
7569	if (rec->spin_lock_off != val + reg->off) {
7570	verbose(private_data: env, fmt: "off %lld doesn't point to 'struct bpf_spin_lock' that is at %d\n",
7571	val + reg->off, rec->spin_lock_off);
7572	return -EINVAL;
7573	}
7574	if (is_lock) {
7575	if (cur->active_lock.ptr) {
7576	verbose(private_data: env,
7577	fmt: "Locking two bpf_spin_locks are not allowed\n");
7578	return -EINVAL;
7579	}
7580	if (map)
7581	cur->active_lock.ptr = map;
7582	else
7583	cur->active_lock.ptr = btf;
7584	cur->active_lock.id = reg->id;
7585	} else {
7586	void *ptr;
7587
7588	if (map)
7589	ptr = map;
7590	else
7591	ptr = btf;
7592
7593	if (!cur->active_lock.ptr) {
7594	verbose(private_data: env, fmt: "bpf_spin_unlock without taking a lock\n");
7595	return -EINVAL;
7596	}
7597	if (cur->active_lock.ptr != ptr \|\|
7598	cur->active_lock.id != reg->id) {
7599	verbose(private_data: env, fmt: "bpf_spin_unlock of different lock\n");
7600	return -EINVAL;
7601	}
7602
7603	invalidate_non_owning_refs(env);
7604
7605	cur->active_lock.ptr = NULL;
7606	cur->active_lock.id = `0`;
7607	}
7608	return `0`;
7609	}
7610
7611	static int process_timer_func(struct bpf_verifier_env env, int* regno,
7612	struct bpf_call_arg_meta *meta)
7613	{
7614	struct bpf_reg_state regs = cur_regs(env), reg = &regs[regno];
7615	bool is_const = tnum_is_const(a: reg->var_off);
7616	struct bpf_map *map = reg->map_ptr;
7617	u64 val = reg->var_off.value;
7618
7619	if (!is_const) {
7620	verbose(private_data: env,
7621	fmt: "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
7622	regno);
7623	return -EINVAL;
7624	}
7625	if (!map->btf) {
7626	verbose(private_data: env, fmt: "map '%s' has to have BTF in order to use bpf_timer\n",
7627	map->name);
7628	return -EINVAL;
7629	}
7630	if (!btf_record_has_field(rec: map->record, type: BPF_TIMER)) {
7631	verbose(private_data: env, fmt: "map '%s' has no valid bpf_timer\n", map->name);
7632	return -EINVAL;
7633	}
7634	if (map->record->timer_off != val + reg->off) {
7635	verbose(private_data: env, fmt: "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
7636	val + reg->off, map->record->timer_off);
7637	return -EINVAL;
7638	}
7639	if (meta->map_ptr) {
7640	verbose(private_data: env, fmt: "verifier bug. Two map pointers in a timer helper\n");
7641	return -EFAULT;
7642	}
7643	meta->map_uid = reg->map_uid;
7644	meta->map_ptr = map;
7645	return `0`;
7646	}
7647
7648	static int process_kptr_func(struct bpf_verifier_env env, int* regno,
7649	struct bpf_call_arg_meta *meta)
7650	{
7651	struct bpf_reg_state regs = cur_regs(env), reg = &regs[regno];
7652	struct bpf_map *map_ptr = reg->map_ptr;
7653	struct btf_field *kptr_field;
7654	u32 kptr_off;
7655
7656	if (!tnum_is_const(a: reg->var_off)) {
7657	verbose(private_data: env,
7658	fmt: "R%d doesn't have constant offset. kptr has to be at the constant offset\n",
7659	regno);
7660	return -EINVAL;
7661	}
7662	if (!map_ptr->btf) {
7663	verbose(private_data: env, fmt: "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
7664	map_ptr->name);
7665	return -EINVAL;
7666	}
7667	if (!btf_record_has_field(rec: map_ptr->record, type: BPF_KPTR)) {
7668	verbose(private_data: env, fmt: "map '%s' has no valid kptr\n", map_ptr->name);
7669	return -EINVAL;
7670	}
7671
7672	meta->map_ptr = map_ptr;
7673	kptr_off = reg->off + reg->var_off.value;
7674	kptr_field = btf_record_find(rec: map_ptr->record, offset: kptr_off, field_mask: BPF_KPTR);
7675	if (!kptr_field) {
7676	verbose(private_data: env, fmt: "off=%d doesn't point to kptr\n", kptr_off);
7677	return -EACCES;
7678	}
7679	if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) {
7680	verbose(private_data: env, fmt: "off=%d kptr isn't referenced kptr\n", kptr_off);
7681	return -EACCES;
7682	}
7683	meta->kptr_field = kptr_field;
7684	return `0`;
7685	}
7686
7687	/ There are two register types representing a bpf_dynptr, one is PTR_TO_STACK*
7688	* which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
7689	*
7690	* In both cases we deal with the first 8 bytes, but need to mark the next 8
7691	* bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
7692	* CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
7693	*
7694	* Mutability of bpf_dynptr is at two levels, one is at the level of struct
7695	* bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
7696	* bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
7697	* mutate the view of the dynptr and also possibly destroy it. In the latter
7698	* case, it cannot mutate the bpf_dynptr itself but it can still mutate the
7699	* memory that dynptr points to.
7700	*
7701	* The verifier will keep track both levels of mutation (bpf_dynptr's in
7702	* reg->type and the memory's in reg->dynptr.type), but there is no support for
7703	* readonly dynptr view yet, hence only the first case is tracked and checked.
7704	*
7705	* This is consistent with how C applies the const modifier to a struct object,
7706	* where the pointer itself inside bpf_dynptr becomes const but not what it
7707	* points to.
7708	*
7709	* Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
7710	* type, and declare it as 'const struct bpf_dynptr *' in their prototype.
7711	*/
7712	static int process_dynptr_func(struct bpf_verifier_env env, int* regno, int insn_idx,
7713	enum bpf_arg_type arg_type, int clone_ref_obj_id)
7714	{
7715	struct bpf_reg_state regs = cur_regs(env), reg = &regs[regno];
7716	int err;
7717
7718	/ MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an*
7719	* ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR \| DYNPTR_TYPE_*):
7720	*/
7721	if ((arg_type & (MEM_UNINIT \| MEM_RDONLY)) == (MEM_UNINIT \| MEM_RDONLY)) {
7722	verbose(private_data: env, fmt: "verifier internal error: misconfigured dynptr helper type flags\n");
7723	return -EFAULT;
7724	}
7725
7726	/ MEM_UNINIT - Points to memory that is an appropriate candidate for*
7727	* constructing a mutable bpf_dynptr object.
7728	*
7729	* Currently, this is only possible with PTR_TO_STACK
7730	* pointing to a region of at least 16 bytes which doesn't
7731	* contain an existing bpf_dynptr.
7732	*
7733	* MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
7734	* mutated or destroyed. However, the memory it points to
7735	* may be mutated.
7736	*
7737	* None - Points to a initialized dynptr that can be mutated and
7738	* destroyed, including mutation of the memory it points
7739	* to.
7740	*/
7741	if (arg_type & MEM_UNINIT) {
7742	int i;
7743
7744	if (!is_dynptr_reg_valid_uninit(env, reg)) {
7745	verbose(private_data: env, fmt: "Dynptr has to be an uninitialized dynptr\n");
7746	return -EINVAL;
7747	}
7748
7749	/ we write BPF_DW bits (8 bytes) at a time /
7750	for (i = `0`; i < BPF_DYNPTR_SIZE; i += `8`) {
7751	err = check_mem_access(env, insn_idx, regno,
7752	off: i, BPF_DW, t: BPF_WRITE, value_regno: -`1`, strict_alignment_once: false, is_ldsx: false);
7753	if (err)
7754	return err;
7755	}
7756
7757	err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, clone_ref_obj_id);
7758	} else / MEM_RDONLY and None case from above / {
7759	/ For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const /
7760	if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
7761	verbose(private_data: env, fmt: "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
7762	return -EINVAL;
7763	}
7764
7765	if (!is_dynptr_reg_valid_init(env, reg)) {
7766	verbose(private_data: env,
7767	fmt: "Expected an initialized dynptr as arg #%d\n",
7768	regno);
7769	return -EINVAL;
7770	}
7771
7772	/ Fold modifiers (in this case, MEM_RDONLY) when checking expected type /
7773	if (!is_dynptr_type_expected(env, reg, arg_type: arg_type & ~MEM_RDONLY)) {
7774	verbose(private_data: env,
7775	fmt: "Expected a dynptr of type %s as arg #%d\n",
7776	dynptr_type_str(type: arg_to_dynptr_type(arg_type)), regno);
7777	return -EINVAL;
7778	}
7779
7780	err = mark_dynptr_read(env, reg);
7781	}
7782	return err;
7783	}
7784
7785	static u32 iter_ref_obj_id(struct bpf_verifier_env env, struct* bpf_reg_state reg, int* spi)
7786	{
7787	struct bpf_func_state *state = func(env, reg);
7788
7789	return state->stack[spi].spilled_ptr.ref_obj_id;
7790	}
7791
7792	static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7793	{
7794	return meta->kfunc_flags & (KF_ITER_NEW \| KF_ITER_NEXT \| KF_ITER_DESTROY);
7795	}
7796
7797	static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7798	{
7799	return meta->kfunc_flags & KF_ITER_NEW;
7800	}
7801
7802	static bool is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7803	{
7804	return meta->kfunc_flags & KF_ITER_NEXT;
7805	}
7806
7807	static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7808	{
7809	return meta->kfunc_flags & KF_ITER_DESTROY;
7810	}
7811
7812	static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta meta, int* arg)
7813	{
7814	/ btf_check_iter_kfuncs() guarantees that first argument of any iter*
7815	* kfunc is iter state pointer
7816	*/
7817	return arg == `0` && is_iter_kfunc(meta);
7818	}
7819
7820	static int process_iter_arg(struct bpf_verifier_env env, int* regno, int insn_idx,
7821	struct bpf_kfunc_call_arg_meta *meta)
7822	{
7823	struct bpf_reg_state regs = cur_regs(env), reg = &regs[regno];
7824	const struct btf_type *t;
7825	const struct btf_param *arg;
7826	int spi, err, i, nr_slots;
7827	u32 btf_id;
7828
7829	/ btf_check_iter_kfuncs() ensures we don't need to validate anything here /
7830	arg = &btf_params(t: meta->func_proto)[`0`];
7831	t = btf_type_skip_modifiers(btf: meta->btf, id: arg->type, NULL); / PTR /
7832	t = btf_type_skip_modifiers(btf: meta->btf, id: t->type, res_id: &btf_id); / STRUCT /
7833	nr_slots = t->size / BPF_REG_SIZE;
7834
7835	if (is_iter_new_kfunc(meta)) {
7836	/ bpf_iter_<type>_new() expects pointer to uninit iter state /
7837	if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
7838	verbose(private_data: env, fmt: "expected uninitialized iter_%s as arg #%d\n",
7839	iter_type_str(btf: meta->btf, btf_id), regno);
7840	return -EINVAL;
7841	}
7842
7843	for (i = `0`; i < nr_slots * `8`; i += BPF_REG_SIZE) {
7844	err = check_mem_access(env, insn_idx, regno,
7845	off: i, BPF_DW, t: BPF_WRITE, value_regno: -`1`, strict_alignment_once: false, is_ldsx: false);
7846	if (err)
7847	return err;
7848	}
7849
7850	err = mark_stack_slots_iter(env, meta, reg, insn_idx, btf: meta->btf, btf_id, nr_slots);
7851	if (err)
7852	return err;
7853	} else {
7854	/ iter_next() or iter_destroy() expect initialized iter state/
7855	err = is_iter_reg_valid_init(env, reg, btf: meta->btf, btf_id, nr_slots);
7856	switch (err) {
7857	case `0`:
7858	break;
7859	case -EINVAL:
7860	verbose(private_data: env, fmt: "expected an initialized iter_%s as arg #%d\n",
7861	iter_type_str(btf: meta->btf, btf_id), regno);
7862	return err;
7863	case -EPROTO:
7864	verbose(private_data: env, fmt: "expected an RCU CS when using %s\n", meta->func_name);
7865	return err;
7866	default:
7867	return err;
7868	}
7869
7870	spi = iter_get_spi(env, reg, nr_slots);
7871	if (spi < `0`)
7872	return spi;
7873
7874	err = mark_iter_read(env, reg, spi, nr_slots);
7875	if (err)
7876	return err;
7877
7878	/ remember meta->iter info for process_iter_next_call() /
7879	meta->iter.spi = spi;
7880	meta->iter.frameno = reg->frameno;
7881	meta->ref_obj_id = iter_ref_obj_id(env, reg, spi);
7882
7883	if (is_iter_destroy_kfunc(meta)) {
7884	err = unmark_stack_slots_iter(env, reg, nr_slots);
7885	if (err)
7886	return err;
7887	}
7888	}
7889
7890	return `0`;
7891	}
7892
7893	/ Look for a previous loop entry at insn_idx: nearest parent state*
7894	* stopped at insn_idx with callsites matching those in cur->frame.
7895	*/
7896	static struct bpf_verifier_state find_prev_entry(struct* bpf_verifier_env *env,
7897	struct bpf_verifier_state *cur,
7898	int insn_idx)
7899	{
7900	struct bpf_verifier_state_list *sl;
7901	struct bpf_verifier_state *st;
7902
7903	/ Explored states are pushed in stack order, most recent states come first /
7904	sl = *explored_state(env, idx: insn_idx);
7905	for (; sl; sl = sl->next) {
7906	/ If st->branches != 0 state is a part of current DFS verification path,*
7907	* hence cur & st for a loop.
7908	*/
7909	st = &sl->state;
7910	if (st->insn_idx == insn_idx && st->branches && same_callsites(a: st, b: cur) &&
7911	st->dfs_depth < cur->dfs_depth)
7912	return st;
7913	}
7914
7915	return NULL;
7916	}
7917
7918	static void reset_idmap_scratch(struct bpf_verifier_env *env);
7919	static bool regs_exact(const struct bpf_reg_state *rold,
7920	const struct bpf_reg_state *rcur,
7921	struct bpf_idmap *idmap);
7922
7923	static void maybe_widen_reg(struct bpf_verifier_env *env,
7924	struct bpf_reg_state rold, struct* bpf_reg_state *rcur,
7925	struct bpf_idmap *idmap)
7926	{
7927	if (rold->type != SCALAR_VALUE)
7928	return;
7929	if (rold->type != rcur->type)
7930	return;
7931	if (rold->precise \|\| rcur->precise \|\| regs_exact(rold, rcur, idmap))
7932	return;
7933	__mark_reg_unknown(env, reg: rcur);
7934	}
7935
7936	static int widen_imprecise_scalars(struct bpf_verifier_env *env,
7937	struct bpf_verifier_state *old,
7938	struct bpf_verifier_state *cur)
7939	{
7940	struct bpf_func_state fold, fcur;
7941	int i, fr;
7942
7943	reset_idmap_scratch(env);
7944	for (fr = old->curframe; fr >= `0`; fr--) {
7945	fold = old->frame[fr];
7946	fcur = cur->frame[fr];
7947
7948	for (i = `0`; i < MAX_BPF_REG; i++)
7949	maybe_widen_reg(env,
7950	rold: &fold->regs[i],
7951	rcur: &fcur->regs[i],
7952	idmap: &env->idmap_scratch);
7953
7954	for (i = `0`; i < fold->allocated_stack / BPF_REG_SIZE; i++) {
7955	if (!is_spilled_reg(stack: &fold->stack[i]) \|\|
7956	!is_spilled_reg(stack: &fcur->stack[i]))
7957	continue;
7958
7959	maybe_widen_reg(env,
7960	rold: &fold->stack[i].spilled_ptr,
7961	rcur: &fcur->stack[i].spilled_ptr,
7962	idmap: &env->idmap_scratch);
7963	}
7964	}
7965	return `0`;
7966	}
7967
7968	/ process_iter_next_call() is called when verifier gets to iterator's next*
7969	* "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
7970	* to it as just "iter_next()" in comments below.
7971	*
7972	* BPF verifier relies on a crucial contract for any iter_next()
7973	* implementation: it should eventually return NULL, and once that happens
7974	* it should keep returning NULL. That is, once iterator exhausts elements to
7975	* iterate, it should never reset or spuriously return new elements.
7976	*
7977	* With the assumption of such contract, process_iter_next_call() simulates
7978	* a fork in the verifier state to validate loop logic correctness and safety
7979	* without having to simulate infinite amount of iterations.
7980	*
7981	* In current state, we first assume that iter_next() returned NULL and
7982	* iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such
7983	* conditions we should not form an infinite loop and should eventually reach
7984	* exit.
7985	*
7986	* Besides that, we also fork current state and enqueue it for later
7987	* verification. In a forked state we keep iterator state as ACTIVE
7988	* (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We
7989	* also bump iteration depth to prevent erroneous infinite loop detection
7990	* later on (see iter_active_depths_differ() comment for details). In this
7991	* state we assume that we'll eventually loop back to another iter_next()
7992	* calls (it could be in exactly same location or in some other instruction,
7993	* it doesn't matter, we don't make any unnecessary assumptions about this,
7994	* everything revolves around iterator state in a stack slot, not which
7995	* instruction is calling iter_next()). When that happens, we either will come
7996	* to iter_next() with equivalent state and can conclude that next iteration
7997	* will proceed in exactly the same way as we just verified, so it's safe to
7998	* assume that loop converges. If not, we'll go on another iteration
7999	* simulation with a different input state, until all possible starting states
8000	* are validated or we reach maximum number of instructions limit.
8001	*
8002	* This way, we will either exhaustively discover all possible input states
8003	* that iterator loop can start with and eventually will converge, or we'll
8004	* effectively regress into bounded loop simulation logic and either reach
8005	* maximum number of instructions if loop is not provably convergent, or there
8006	* is some statically known limit on number of iterations (e.g., if there is
8007	* an explicit `if n > 100 then break;` statement somewhere in the loop).
8008	*
8009	* Iteration convergence logic in is_state_visited() relies on exact
8010	* states comparison, which ignores read and precision marks.
8011	* This is necessary because read and precision marks are not finalized
8012	* while in the loop. Exact comparison might preclude convergence for
8013	* simple programs like below:
8014	*
8015	* i = 0;
8016	* while(iter_next(&it))
8017	* i++;
8018	*
8019	* At each iteration step i++ would produce a new distinct state and
8020	* eventually instruction processing limit would be reached.
8021	*
8022	* To avoid such behavior speculatively forget (widen) range for
8023	* imprecise scalar registers, if those registers were not precise at the
8024	* end of the previous iteration and do not match exactly.
8025	*
8026	* This is a conservative heuristic that allows to verify wide range of programs,
8027	* however it precludes verification of programs that conjure an
8028	* imprecise value on the first loop iteration and use it as precise on a second.
8029	* For example, the following safe program would fail to verify:
8030	*
8031	* struct bpf_num_iter it;
8032	* int arr[10];
8033	* int i = 0, a = 0;
8034	* bpf_iter_num_new(&it, 0, 10);
8035	* while (bpf_iter_num_next(&it)) {
8036	* if (a == 0) {
8037	* a = 1;
8038	* i = 7; // Because i changed verifier would forget
8039	* // it's range on second loop entry.
8040	* } else {
8041	* arr[i] = 42; // This would fail to verify.
8042	* }
8043	* }
8044	* bpf_iter_num_destroy(&it);
8045	*/
8046	static int process_iter_next_call(struct bpf_verifier_env env, int* insn_idx,
8047	struct bpf_kfunc_call_arg_meta *meta)
8048	{
8049	struct bpf_verifier_state cur_st = env->cur_state, queued_st, *prev_st;
8050	struct bpf_func_state cur_fr = cur_st->frame[cur_st->curframe], queued_fr;
8051	struct bpf_reg_state cur_iter, queued_iter;
8052	int iter_frameno = meta->iter.frameno;
8053	int iter_spi = meta->iter.spi;
8054
8055	BTF_TYPE_EMIT(struct bpf_iter);
8056
8057	cur_iter = &env->cur_state->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
8058
8059	if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
8060	cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
8061	verbose(private_data: env, fmt: "verifier internal error: unexpected iterator state %d (%s)\n",
8062	cur_iter->iter.state, iter_state_str(state: cur_iter->iter.state));
8063	return -EFAULT;
8064	}
8065
8066	if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
8067	/ Because iter_next() call is a checkpoint is_state_visitied()*
8068	* should guarantee parent state with same call sites and insn_idx.
8069	*/
8070	if (!cur_st->parent \|\| cur_st->parent->insn_idx != insn_idx \|\|
8071	!same_callsites(a: cur_st->parent, b: cur_st)) {
8072	verbose(private_data: env, fmt: "bug: bad parent state for iter next call");
8073	return -EFAULT;
8074	}
8075	/ Note cur_st->parent in the call below, it is necessary to skip*
8076	* checkpoint created for cur_st by is_state_visited()
8077	* right at this instruction.
8078	*/
8079	prev_st = find_prev_entry(env, cur: cur_st->parent, insn_idx);
8080	/ branch out active iter state /
8081	queued_st = push_stack(env, insn_idx: insn_idx + `1`, prev_insn_idx: insn_idx, speculative: false);
8082	if (!queued_st)
8083	return -ENOMEM;
8084
8085	queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
8086	queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
8087	queued_iter->iter.depth++;
8088	if (prev_st)
8089	widen_imprecise_scalars(env, old: prev_st, cur: queued_st);
8090
8091	queued_fr = queued_st->frame[queued_st->curframe];
8092	mark_ptr_not_null_reg(reg: &queued_fr->regs[BPF_REG_0]);
8093	}
8094
8095	/ switch to DRAINED state, but keep the depth unchanged /
8096	/ mark current iter state as drained and assume returned NULL /
8097	cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
8098	__mark_reg_const_zero(reg: &cur_fr->regs[BPF_REG_0]);
8099
8100	return `0`;
8101	}
8102
8103	static bool arg_type_is_mem_size(enum bpf_arg_type type)
8104	{
8105	return type == ARG_CONST_SIZE \|\|
8106	type == ARG_CONST_SIZE_OR_ZERO;
8107	}
8108
8109	static bool arg_type_is_release(enum bpf_arg_type type)
8110	{
8111	return type & OBJ_RELEASE;
8112	}
8113
8114	static bool arg_type_is_dynptr(enum bpf_arg_type type)
8115	{
8116	return base_type(type) == ARG_PTR_TO_DYNPTR;
8117	}
8118
8119	static int int_ptr_type_to_size(enum bpf_arg_type type)
8120	{
8121	if (type == ARG_PTR_TO_INT)
8122	return sizeof(u32);
8123	else if (type == ARG_PTR_TO_LONG)
8124	return sizeof(u64);
8125
8126	return -EINVAL;
8127	}
8128
8129	static int resolve_map_arg_type(struct bpf_verifier_env *env,
8130	const struct bpf_call_arg_meta *meta,
8131	enum bpf_arg_type *arg_type)
8132	{
8133	if (!meta->map_ptr) {
8134	/ kernel subsystem misconfigured verifier /
8135	verbose(private_data: env, fmt: "invalid map_ptr to access map->type\n");
8136	return -EACCES;
8137	}
8138
8139	switch (meta->map_ptr->map_type) {
8140	case BPF_MAP_TYPE_SOCKMAP:
8141	case BPF_MAP_TYPE_SOCKHASH:
8142	if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
8143	*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
8144	} else {
8145	verbose(private_data: env, fmt: "invalid arg_type for sockmap/sockhash\n");
8146	return -EINVAL;
8147	}
8148	break;
8149	case BPF_MAP_TYPE_BLOOM_FILTER:
8150	if (meta->func_id == BPF_FUNC_map_peek_elem)
8151	*arg_type = ARG_PTR_TO_MAP_VALUE;
8152	break;
8153	default:
8154	break;
8155	}
8156	return `0`;
8157	}
8158
8159	struct bpf_reg_types {
8160	const enum bpf_reg_type types[`10`];
8161	u32 *btf_id;
8162	};
8163
8164	static const struct bpf_reg_types sock_types = {
8165	.types = {
8166	PTR_TO_SOCK_COMMON,
8167	PTR_TO_SOCKET,
8168	PTR_TO_TCP_SOCK,
8169	PTR_TO_XDP_SOCK,
8170	},
8171	};
8172
8173	#ifdef CONFIG_NET
8174	static const struct bpf_reg_types btf_id_sock_common_types = {
8175	.types = {
8176	PTR_TO_SOCK_COMMON,
8177	PTR_TO_SOCKET,
8178	PTR_TO_TCP_SOCK,
8179	PTR_TO_XDP_SOCK,
8180	PTR_TO_BTF_ID,
8181	PTR_TO_BTF_ID \| PTR_TRUSTED,
8182	},
8183	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
8184	};
8185	#endif
8186
8187	static const struct bpf_reg_types mem_types = {
8188	.types = {
8189	PTR_TO_STACK,
8190	PTR_TO_PACKET,
8191	PTR_TO_PACKET_META,
8192	PTR_TO_MAP_KEY,
8193	PTR_TO_MAP_VALUE,
8194	PTR_TO_MEM,
8195	PTR_TO_MEM \| MEM_RINGBUF,
8196	PTR_TO_BUF,
8197	PTR_TO_BTF_ID \| PTR_TRUSTED,
8198	},
8199	};
8200
8201	static const struct bpf_reg_types int_ptr_types = {
8202	.types = {
8203	PTR_TO_STACK,
8204	PTR_TO_PACKET,
8205	PTR_TO_PACKET_META,
8206	PTR_TO_MAP_KEY,
8207	PTR_TO_MAP_VALUE,
8208	},
8209	};
8210
8211	static const struct bpf_reg_types spin_lock_types = {
8212	.types = {
8213	PTR_TO_MAP_VALUE,
8214	PTR_TO_BTF_ID \| MEM_ALLOC,
8215	}
8216	};
8217
8218	static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
8219	static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
8220	static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
8221	static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM \| MEM_RINGBUF } };
8222	static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
8223	static const struct bpf_reg_types btf_ptr_types = {
8224	.types = {
8225	PTR_TO_BTF_ID,
8226	PTR_TO_BTF_ID \| PTR_TRUSTED,
8227	PTR_TO_BTF_ID \| MEM_RCU,
8228	},
8229	};
8230	static const struct bpf_reg_types percpu_btf_ptr_types = {
8231	.types = {
8232	PTR_TO_BTF_ID \| MEM_PERCPU,
8233	PTR_TO_BTF_ID \| MEM_PERCPU \| MEM_RCU,
8234	PTR_TO_BTF_ID \| MEM_PERCPU \| PTR_TRUSTED,
8235	}
8236	};
8237	static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
8238	static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
8239	static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
8240	static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
8241	static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
8242	static const struct bpf_reg_types dynptr_types = {
8243	.types = {
8244	PTR_TO_STACK,
8245	CONST_PTR_TO_DYNPTR,
8246	}
8247	};
8248
8249	static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
8250	[ARG_PTR_TO_MAP_KEY] = &mem_types,
8251	[ARG_PTR_TO_MAP_VALUE] = &mem_types,
8252	[ARG_CONST_SIZE] = &scalar_types,
8253	[ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
8254	[ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
8255	[ARG_CONST_MAP_PTR] = &const_map_ptr_types,
8256	[ARG_PTR_TO_CTX] = &context_types,
8257	[ARG_PTR_TO_SOCK_COMMON] = &sock_types,
8258	#ifdef CONFIG_NET
8259	[ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
8260	#endif
8261	[ARG_PTR_TO_SOCKET] = &fullsock_types,
8262	[ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
8263	[ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
8264	[ARG_PTR_TO_MEM] = &mem_types,
8265	[ARG_PTR_TO_RINGBUF_MEM] = &ringbuf_mem_types,
8266	[ARG_PTR_TO_INT] = &int_ptr_types,
8267	[ARG_PTR_TO_LONG] = &int_ptr_types,
8268	[ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
8269	[ARG_PTR_TO_FUNC] = &func_ptr_types,
8270	[ARG_PTR_TO_STACK] = &stack_ptr_types,
8271	[ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
8272	[ARG_PTR_TO_TIMER] = &timer_types,
8273	[ARG_PTR_TO_KPTR] = &kptr_types,
8274	[ARG_PTR_TO_DYNPTR] = &dynptr_types,
8275	};
8276
8277	static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
8278	enum bpf_arg_type arg_type,
8279	const u32 *arg_btf_id,
8280	struct bpf_call_arg_meta *meta)
8281	{
8282	struct bpf_reg_state regs = cur_regs(env), reg = &regs[regno];
8283	enum bpf_reg_type expected, type = reg->type;
8284	const struct bpf_reg_types *compatible;
8285	int i, j;
8286
8287	compatible = compatible_reg_types[base_type(type: arg_type)];
8288	if (!compatible) {
8289	verbose(private_data: env, fmt: "verifier internal error: unsupported arg type %d\n", arg_type);
8290	return -EFAULT;
8291	}
8292
8293	/ ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,*
8294	* but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
8295	*
8296	* Same for MAYBE_NULL:
8297	*
8298	* ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
8299	* but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
8300	*
8301	* ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type.
8302	*
8303	* Therefore we fold these flags depending on the arg_type before comparison.
8304	*/
8305	if (arg_type & MEM_RDONLY)
8306	type &= ~MEM_RDONLY;
8307	if (arg_type & PTR_MAYBE_NULL)
8308	type &= ~PTR_MAYBE_NULL;
8309	if (base_type(type: arg_type) == ARG_PTR_TO_MEM)
8310	type &= ~DYNPTR_TYPE_FLAG_MASK;
8311
8312	if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) {
8313	type &= ~MEM_ALLOC;
8314	type &= ~MEM_PERCPU;
8315	}
8316
8317	for (i = `0`; i < ARRAY_SIZE(compatible->types); i++) {
8318	expected = compatible->types[i];
8319	if (expected == NOT_INIT)
8320	break;
8321
8322	if (type == expected)
8323	goto found;
8324	}
8325
8326	verbose(private_data: env, fmt: "R%d type=%s expected=", regno, reg_type_str(env, type: reg->type));
8327	for (j = `0`; j + `1` < i; j++)
8328	verbose(private_data: env, fmt: "%s, ", reg_type_str(env, type: compatible->types[j]));
8329	verbose(private_data: env, fmt: "%s\n", reg_type_str(env, type: compatible->types[j]));
8330	return -EACCES;
8331
8332	found:
8333	if (base_type(type: reg->type) != PTR_TO_BTF_ID)
8334	return `0`;
8335
8336	if (compatible == &mem_types) {
8337	if (!(arg_type & MEM_RDONLY)) {
8338	verbose(private_data: env,
8339	fmt: "%s() may write into memory pointed by R%d type=%s\n",
8340	func_id_name(id: meta->func_id),
8341	regno, reg_type_str(env, type: reg->type));
8342	return -EACCES;
8343	}
8344	return `0`;
8345	}
8346
8347	switch ((int)reg->type) {
8348	case PTR_TO_BTF_ID:
8349	case PTR_TO_BTF_ID \| PTR_TRUSTED:
8350	case PTR_TO_BTF_ID \| MEM_RCU:
8351	case PTR_TO_BTF_ID \| PTR_MAYBE_NULL:
8352	case PTR_TO_BTF_ID \| PTR_MAYBE_NULL \| MEM_RCU:
8353	{
8354	/ For bpf_sk_release, it needs to match against first member*
8355	* 'struct sock_common', hence make an exception for it. This
8356	* allows bpf_sk_release to work for multiple socket types.
8357	*/
8358	bool strict_type_match = arg_type_is_release(type: arg_type) &&
8359	meta->func_id != BPF_FUNC_sk_release;
8360
8361	if (type_may_be_null(type: reg->type) &&
8362	(!type_may_be_null(type: arg_type) \|\| arg_type_is_release(type: arg_type))) {
8363	verbose(private_data: env, fmt: "Possibly NULL pointer passed to helper arg%d\n", regno);
8364	return -EACCES;
8365	}
8366
8367	if (!arg_btf_id) {
8368	if (!compatible->btf_id) {
8369	verbose(private_data: env, fmt: "verifier internal error: missing arg compatible BTF ID\n");
8370	return -EFAULT;
8371	}
8372	arg_btf_id = compatible->btf_id;
8373	}
8374
8375	if (meta->func_id == BPF_FUNC_kptr_xchg) {
8376	if (map_kptr_match_type(env, kptr_field: meta->kptr_field, reg, regno))
8377	return -EACCES;
8378	} else {
8379	if (arg_btf_id == BPF_PTR_POISON) {
8380	verbose(private_data: env, fmt: "verifier internal error:");
8381	verbose(private_data: env, fmt: "R%d has non-overwritten BPF_PTR_POISON type\n",
8382	regno);
8383	return -EACCES;
8384	}
8385
8386	if (!btf_struct_ids_match(log: &env->log, btf: reg->btf, id: reg->btf_id, off: reg->off,
8387	need_btf: btf_vmlinux, need_type_id: *arg_btf_id,
8388	strict: strict_type_match)) {
8389	verbose(private_data: env, fmt: "R%d is of type %s but %s is expected\n",
8390	regno, btf_type_name(btf: reg->btf, id: reg->btf_id),
8391	btf_type_name(btf: btf_vmlinux, id: *arg_btf_id));
8392	return -EACCES;
8393	}
8394	}
8395	break;
8396	}
8397	case PTR_TO_BTF_ID \| MEM_ALLOC:
8398	case PTR_TO_BTF_ID \| MEM_PERCPU \| MEM_ALLOC:
8399	if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
8400	meta->func_id != BPF_FUNC_kptr_xchg) {
8401	verbose(private_data: env, fmt: "verifier internal error: unimplemented handling of MEM_ALLOC\n");
8402	return -EFAULT;
8403	}
8404	if (meta->func_id == BPF_FUNC_kptr_xchg) {
8405	if (map_kptr_match_type(env, kptr_field: meta->kptr_field, reg, regno))
8406	return -EACCES;
8407	}
8408	break;
8409	case PTR_TO_BTF_ID \| MEM_PERCPU:
8410	case PTR_TO_BTF_ID \| MEM_PERCPU \| MEM_RCU:
8411	case PTR_TO_BTF_ID \| MEM_PERCPU \| PTR_TRUSTED:
8412	/ Handled by helper specific checks /
8413	break;
8414	default:
8415	verbose(private_data: env, fmt: "verifier internal error: invalid PTR_TO_BTF_ID register for type match\n");
8416	return -EFAULT;
8417	}
8418	return `0`;
8419	}
8420
8421	static struct btf_field *
8422	reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
8423	{
8424	struct btf_field *field;
8425	struct btf_record *rec;
8426
8427	rec = reg_btf_record(reg);
8428	if (!rec)
8429	return NULL;
8430
8431	field = btf_record_find(rec, offset: off, field_mask: fields);
8432	if (!field)
8433	return NULL;
8434
8435	return field;
8436	}
8437
8438	int check_func_arg_reg_off(struct bpf_verifier_env *env,
8439	const struct bpf_reg_state reg, int* regno,
8440	enum bpf_arg_type arg_type)
8441	{
8442	u32 type = reg->type;
8443
8444	/ When referenced register is passed to release function, its fixed*
8445	* offset must be 0.
8446	*
8447	* We will check arg_type_is_release reg has ref_obj_id when storing
8448	* meta->release_regno.
8449	*/
8450	if (arg_type_is_release(type: arg_type)) {
8451	/ ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it*
8452	* may not directly point to the object being released, but to
8453	* dynptr pointing to such object, which might be at some offset
8454	* on the stack. In that case, we simply to fallback to the
8455	* default handling.
8456	*/
8457	if (arg_type_is_dynptr(type: arg_type) && type == PTR_TO_STACK)
8458	return `0`;
8459
8460	/ Doing check_ptr_off_reg check for the offset will catch this*
8461	* because fixed_off_ok is false, but checking here allows us
8462	* to give the user a better error message.
8463	*/
8464	if (reg->off) {
8465	verbose(private_data: env, fmt: "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
8466	regno);
8467	return -EINVAL;
8468	}
8469	return __check_ptr_off_reg(env, reg, regno, fixed_off_ok: false);
8470	}
8471
8472	switch (type) {
8473	/ Pointer types where both fixed and variable offset is explicitly allowed: /
8474	case PTR_TO_STACK:
8475	case PTR_TO_PACKET:
8476	case PTR_TO_PACKET_META:
8477	case PTR_TO_MAP_KEY:
8478	case PTR_TO_MAP_VALUE:
8479	case PTR_TO_MEM:
8480	case PTR_TO_MEM \| MEM_RDONLY:
8481	case PTR_TO_MEM \| MEM_RINGBUF:
8482	case PTR_TO_BUF:
8483	case PTR_TO_BUF \| MEM_RDONLY:
8484	case SCALAR_VALUE:
8485	return `0`;
8486	/ All the rest must be rejected, except PTR_TO_BTF_ID which allows*
8487	* fixed offset.
8488	*/
8489	case PTR_TO_BTF_ID:
8490	case PTR_TO_BTF_ID \| MEM_ALLOC:
8491	case PTR_TO_BTF_ID \| PTR_TRUSTED:
8492	case PTR_TO_BTF_ID \| MEM_RCU:
8493	case PTR_TO_BTF_ID \| MEM_ALLOC \| NON_OWN_REF:
8494	case PTR_TO_BTF_ID \| MEM_ALLOC \| NON_OWN_REF \| MEM_RCU:
8495	/ When referenced PTR_TO_BTF_ID is passed to release function,*
8496	* its fixed offset must be 0. In the other cases, fixed offset
8497	* can be non-zero. This was already checked above. So pass
8498	* fixed_off_ok as true to allow fixed offset for all other
8499	* cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
8500	* still need to do checks instead of returning.
8501	*/
8502	return __check_ptr_off_reg(env, reg, regno, fixed_off_ok: true);
8503	default:
8504	return __check_ptr_off_reg(env, reg, regno, fixed_off_ok: false);
8505	}
8506	}
8507
8508	static struct bpf_reg_state get_dynptr_arg_reg(struct* bpf_verifier_env *env,
8509	const struct bpf_func_proto *fn,
8510	struct bpf_reg_state *regs)
8511	{
8512	struct bpf_reg_state *state = NULL;
8513	int i;
8514
8515	for (i = `0`; i < MAX_BPF_FUNC_REG_ARGS; i++)
8516	if (arg_type_is_dynptr(type: fn->arg_type[i])) {
8517	if (state) {
8518	verbose(private_data: env, fmt: "verifier internal error: multiple dynptr args\n");
8519	return NULL;
8520	}
8521	state = &regs[BPF_REG_1 + i];
8522	}
8523
8524	if (!state)
8525	verbose(private_data: env, fmt: "verifier internal error: no dynptr arg found\n");
8526
8527	return state;
8528	}
8529
8530	static int dynptr_id(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
8531	{
8532	struct bpf_func_state *state = func(env, reg);
8533	int spi;
8534
8535	if (reg->type == CONST_PTR_TO_DYNPTR)
8536	return reg->id;
8537	spi = dynptr_get_spi(env, reg);
8538	if (spi < `0`)
8539	return spi;
8540	return state->stack[spi].spilled_ptr.id;
8541	}
8542
8543	static int dynptr_ref_obj_id(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
8544	{
8545	struct bpf_func_state *state = func(env, reg);
8546	int spi;
8547
8548	if (reg->type == CONST_PTR_TO_DYNPTR)
8549	return reg->ref_obj_id;
8550	spi = dynptr_get_spi(env, reg);
8551	if (spi < `0`)
8552	return spi;
8553	return state->stack[spi].spilled_ptr.ref_obj_id;
8554	}
8555
8556	static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
8557	struct bpf_reg_state *reg)
8558	{
8559	struct bpf_func_state *state = func(env, reg);
8560	int spi;
8561
8562	if (reg->type == CONST_PTR_TO_DYNPTR)
8563	return reg->dynptr.type;
8564
8565	spi = __get_spi(off: reg->off);
8566	if (spi < `0`) {
8567	verbose(private_data: env, fmt: "verifier internal error: invalid spi when querying dynptr type\n");
8568	return BPF_DYNPTR_TYPE_INVALID;
8569	}
8570
8571	return state->stack[spi].spilled_ptr.dynptr.type;
8572	}
8573
8574	static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
8575	struct bpf_call_arg_meta *meta,
8576	const struct bpf_func_proto *fn,
8577	int insn_idx)
8578	{
8579	u32 regno = BPF_REG_1 + arg;
8580	struct bpf_reg_state regs = cur_regs(env), reg = &regs[regno];
8581	enum bpf_arg_type arg_type = fn->arg_type[arg];
8582	enum bpf_reg_type type = reg->type;
8583	u32 *arg_btf_id = NULL;
8584	int err = `0`;
8585
8586	if (arg_type == ARG_DONTCARE)
8587	return `0`;
8588
8589	err = check_reg_arg(env, regno, t: SRC_OP);
8590	if (err)
8591	return err;
8592
8593	if (arg_type == ARG_ANYTHING) {
8594	if (is_pointer_value(env, regno)) {
8595	verbose(private_data: env, fmt: "R%d leaks addr into helper function\n",
8596	regno);
8597	return -EACCES;
8598	}
8599	return `0`;
8600	}
8601
8602	if (type_is_pkt_pointer(type) &&
8603	!may_access_direct_pkt_data(env, meta, t: BPF_READ)) {
8604	verbose(private_data: env, fmt: "helper access to the packet is not allowed\n");
8605	return -EACCES;
8606	}
8607
8608	if (base_type(type: arg_type) == ARG_PTR_TO_MAP_VALUE) {
8609	err = resolve_map_arg_type(env, meta, arg_type: &arg_type);
8610	if (err)
8611	return err;
8612	}
8613
8614	if (register_is_null(reg) && type_may_be_null(type: arg_type))
8615	/ A NULL register has a SCALAR_VALUE type, so skip*
8616	* type checking.
8617	*/
8618	goto skip_type_check;
8619
8620	/ arg_btf_id and arg_size are in a union. /
8621	if (base_type(type: arg_type) == ARG_PTR_TO_BTF_ID \|\|
8622	base_type(type: arg_type) == ARG_PTR_TO_SPIN_LOCK)
8623	arg_btf_id = fn->arg_btf_id[arg];
8624
8625	err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
8626	if (err)
8627	return err;
8628
8629	err = check_func_arg_reg_off(env, reg, regno, arg_type);
8630	if (err)
8631	return err;
8632
8633	skip_type_check:
8634	if (arg_type_is_release(type: arg_type)) {
8635	if (arg_type_is_dynptr(type: arg_type)) {
8636	struct bpf_func_state *state = func(env, reg);
8637	int spi;
8638
8639	/ Only dynptr created on stack can be released, thus*
8640	* the get_spi and stack state checks for spilled_ptr
8641	* should only be done before process_dynptr_func for
8642	* PTR_TO_STACK.
8643	*/
8644	if (reg->type == PTR_TO_STACK) {
8645	spi = dynptr_get_spi(env, reg);
8646	if (spi < `0` \|\| !state->stack[spi].spilled_ptr.ref_obj_id) {
8647	verbose(private_data: env, fmt: "arg %d is an unacquired reference\n", regno);
8648	return -EINVAL;
8649	}
8650	} else {
8651	verbose(private_data: env, fmt: "cannot release unowned const bpf_dynptr\n");
8652	return -EINVAL;
8653	}
8654	} else if (!reg->ref_obj_id && !register_is_null(reg)) {
8655	verbose(private_data: env, fmt: "R%d must be referenced when passed to release function\n",
8656	regno);
8657	return -EINVAL;
8658	}
8659	if (meta->release_regno) {
8660	verbose(private_data: env, fmt: "verifier internal error: more than one release argument\n");
8661	return -EFAULT;
8662	}
8663	meta->release_regno = regno;
8664	}
8665
8666	if (reg->ref_obj_id) {
8667	if (meta->ref_obj_id) {
8668	verbose(private_data: env, fmt: "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
8669	regno, reg->ref_obj_id,
8670	meta->ref_obj_id);
8671	return -EFAULT;
8672	}
8673	meta->ref_obj_id = reg->ref_obj_id;
8674	}
8675
8676	switch (base_type(type: arg_type)) {
8677	case ARG_CONST_MAP_PTR:
8678	/ bpf_map_xxx(map_ptr) call: remember that map_ptr /
8679	if (meta->map_ptr) {
8680	/ Use map_uid (which is unique id of inner map) to reject:*
8681	* inner_map1 = bpf_map_lookup_elem(outer_map, key1)
8682	* inner_map2 = bpf_map_lookup_elem(outer_map, key2)
8683	* if (inner_map1 && inner_map2) {
8684	* timer = bpf_map_lookup_elem(inner_map1);
8685	* if (timer)
8686	* // mismatch would have been allowed
8687	* bpf_timer_init(timer, inner_map2);
8688	* }
8689	*
8690	* Comparing map_ptr is enough to distinguish normal and outer maps.
8691	*/
8692	if (meta->map_ptr != reg->map_ptr \|\|
8693	meta->map_uid != reg->map_uid) {
8694	verbose(private_data: env,
8695	fmt: "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
8696	meta->map_uid, reg->map_uid);
8697	return -EINVAL;
8698	}
8699	}
8700	meta->map_ptr = reg->map_ptr;
8701	meta->map_uid = reg->map_uid;
8702	break;
8703	case ARG_PTR_TO_MAP_KEY:
8704	/ bpf_map_xxx(..., map_ptr, ..., key) call:*
8705	* check that [key, key + map->key_size) are within
8706	* stack limits and initialized
8707	*/
8708	if (!meta->map_ptr) {
8709	/ in function declaration map_ptr must come before*
8710	* map_key, so that it's verified and known before
8711	* we have to check map_key here. Otherwise it means
8712	* that kernel subsystem misconfigured verifier
8713	*/
8714	verbose(private_data: env, fmt: "invalid map_ptr to access map->key\n");
8715	return -EACCES;
8716	}
8717	err = check_helper_mem_access(env, regno,
8718	access_size: meta->map_ptr->key_size, zero_size_allowed: false,
8719	NULL);
8720	break;
8721	case ARG_PTR_TO_MAP_VALUE:
8722	if (type_may_be_null(type: arg_type) && register_is_null(reg))
8723	return `0`;
8724
8725	/ bpf_map_xxx(..., map_ptr, ..., value) call:*
8726	* check [value, value + map->value_size) validity
8727	*/
8728	if (!meta->map_ptr) {
8729	/ kernel subsystem misconfigured verifier /
8730	verbose(private_data: env, fmt: "invalid map_ptr to access map->value\n");
8731	return -EACCES;
8732	}
8733	meta->raw_mode = arg_type & MEM_UNINIT;
8734	err = check_helper_mem_access(env, regno,
8735	access_size: meta->map_ptr->value_size, zero_size_allowed: false,
8736	meta);
8737	break;
8738	case ARG_PTR_TO_PERCPU_BTF_ID:
8739	if (!reg->btf_id) {
8740	verbose(private_data: env, fmt: "Helper has invalid btf_id in R%d\n", regno);
8741	return -EACCES;
8742	}
8743	meta->ret_btf = reg->btf;
8744	meta->ret_btf_id = reg->btf_id;
8745	break;
8746	case ARG_PTR_TO_SPIN_LOCK:
8747	if (in_rbtree_lock_required_cb(env)) {
8748	verbose(private_data: env, fmt: "can't spin_{lock,unlock} in rbtree cb\n");
8749	return -EACCES;
8750	}
8751	if (meta->func_id == BPF_FUNC_spin_lock) {
8752	err = process_spin_lock(env, regno, is_lock: true);
8753	if (err)
8754	return err;
8755	} else if (meta->func_id == BPF_FUNC_spin_unlock) {
8756	err = process_spin_lock(env, regno, is_lock: false);
8757	if (err)
8758	return err;
8759	} else {
8760	verbose(private_data: env, fmt: "verifier internal error\n");
8761	return -EFAULT;
8762	}
8763	break;
8764	case ARG_PTR_TO_TIMER:
8765	err = process_timer_func(env, regno, meta);
8766	if (err)
8767	return err;
8768	break;
8769	case ARG_PTR_TO_FUNC:
8770	meta->subprogno = reg->subprogno;
8771	break;
8772	case ARG_PTR_TO_MEM:
8773	/ The access to this pointer is only checked when we hit the*
8774	* next is_mem_size argument below.
8775	*/
8776	meta->raw_mode = arg_type & MEM_UNINIT;
8777	if (arg_type & MEM_FIXED_SIZE) {
8778	err = check_helper_mem_access(env, regno,
8779	access_size: fn->arg_size[arg], zero_size_allowed: false,
8780	meta);
8781	}
8782	break;
8783	case ARG_CONST_SIZE:
8784	err = check_mem_size_reg(env, reg, regno, zero_size_allowed: false, meta);
8785	break;
8786	case ARG_CONST_SIZE_OR_ZERO:
8787	err = check_mem_size_reg(env, reg, regno, zero_size_allowed: true, meta);
8788	break;
8789	case ARG_PTR_TO_DYNPTR:
8790	err = process_dynptr_func(env, regno, insn_idx, arg_type, clone_ref_obj_id: `0`);
8791	if (err)
8792	return err;
8793	break;
8794	case ARG_CONST_ALLOC_SIZE_OR_ZERO:
8795	if (!tnum_is_const(a: reg->var_off)) {
8796	verbose(private_data: env, fmt: "R%d is not a known constant'\n",
8797	regno);
8798	return -EACCES;
8799	}
8800	meta->mem_size = reg->var_off.value;
8801	err = mark_chain_precision(env, regno);
8802	if (err)
8803	return err;
8804	break;
8805	case ARG_PTR_TO_INT:
8806	case ARG_PTR_TO_LONG:
8807	{
8808	int size = int_ptr_type_to_size(type: arg_type);
8809
8810	err = check_helper_mem_access(env, regno, access_size: size, zero_size_allowed: false, meta);
8811	if (err)
8812	return err;
8813	err = check_ptr_alignment(env, reg, off: `0`, size, strict_alignment_once: true);
8814	break;
8815	}
8816	case ARG_PTR_TO_CONST_STR:
8817	{
8818	struct bpf_map *map = reg->map_ptr;
8819	int map_off;
8820	u64 map_addr;
8821	char *str_ptr;
8822
8823	if (!bpf_map_is_rdonly(map)) {
8824	verbose(private_data: env, fmt: "R%d does not point to a readonly map'\n", regno);
8825	return -EACCES;
8826	}
8827
8828	if (!tnum_is_const(a: reg->var_off)) {
8829	verbose(private_data: env, fmt: "R%d is not a constant address'\n", regno);
8830	return -EACCES;
8831	}
8832
8833	if (!map->ops->map_direct_value_addr) {
8834	verbose(private_data: env, fmt: "no direct value access support for this map type\n");
8835	return -EACCES;
8836	}
8837
8838	err = check_map_access(env, regno, off: reg->off,
8839	size: map->value_size - reg->off, zero_size_allowed: false,
8840	src: ACCESS_HELPER);
8841	if (err)
8842	return err;
8843
8844	map_off = reg->off + reg->var_off.value;
8845	err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
8846	if (err) {
8847	verbose(private_data: env, fmt: "direct value access on string failed\n");
8848	return err;
8849	}
8850
8851	str_ptr = (char )(long*)(map_addr);
8852	if (!strnchr(str_ptr + map_off, map->value_size - map_off, `0`)) {
8853	verbose(private_data: env, fmt: "string is not zero-terminated\n");
8854	return -EINVAL;
8855	}
8856	break;
8857	}
8858	case ARG_PTR_TO_KPTR:
8859	err = process_kptr_func(env, regno, meta);
8860	if (err)
8861	return err;
8862	break;
8863	}
8864
8865	return err;
8866	}
8867
8868	static bool may_update_sockmap(struct bpf_verifier_env env, int* func_id)
8869	{
8870	enum bpf_attach_type eatype = env->prog->expected_attach_type;
8871	enum bpf_prog_type type = resolve_prog_type(prog: env->prog);
8872
8873	if (func_id != BPF_FUNC_map_update_elem)
8874	return false;
8875
8876	/ It's not possible to get access to a locked struct sock in these*
8877	* contexts, so updating is safe.
8878	*/
8879	switch (type) {
8880	case BPF_PROG_TYPE_TRACING:
8881	if (eatype == BPF_TRACE_ITER)
8882	return true;
8883	break;
8884	case BPF_PROG_TYPE_SOCKET_FILTER:
8885	case BPF_PROG_TYPE_SCHED_CLS:
8886	case BPF_PROG_TYPE_SCHED_ACT:
8887	case BPF_PROG_TYPE_XDP:
8888	case BPF_PROG_TYPE_SK_REUSEPORT:
8889	case BPF_PROG_TYPE_FLOW_DISSECTOR:
8890	case BPF_PROG_TYPE_SK_LOOKUP:
8891	return true;
8892	default:
8893	break;
8894	}
8895
8896	verbose(private_data: env, fmt: "cannot update sockmap in this context\n");
8897	return false;
8898	}
8899
8900	static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
8901	{
8902	return env->prog->jit_requested &&
8903	bpf_jit_supports_subprog_tailcalls();
8904	}
8905
8906	static int check_map_func_compatibility(struct bpf_verifier_env *env,
8907	struct bpf_map map, int* func_id)
8908	{
8909	if (!map)
8910	return `0`;
8911
8912	/ We need a two way check, first is from map perspective ... /
8913	switch (map->map_type) {
8914	case BPF_MAP_TYPE_PROG_ARRAY:
8915	if (func_id != BPF_FUNC_tail_call)
8916	goto error;
8917	break;
8918	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
8919	if (func_id != BPF_FUNC_perf_event_read &&
8920	func_id != BPF_FUNC_perf_event_output &&
8921	func_id != BPF_FUNC_skb_output &&
8922	func_id != BPF_FUNC_perf_event_read_value &&
8923	func_id != BPF_FUNC_xdp_output)
8924	goto error;
8925	break;
8926	case BPF_MAP_TYPE_RINGBUF:
8927	if (func_id != BPF_FUNC_ringbuf_output &&
8928	func_id != BPF_FUNC_ringbuf_reserve &&
8929	func_id != BPF_FUNC_ringbuf_query &&
8930	func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
8931	func_id != BPF_FUNC_ringbuf_submit_dynptr &&
8932	func_id != BPF_FUNC_ringbuf_discard_dynptr)
8933	goto error;
8934	break;
8935	case BPF_MAP_TYPE_USER_RINGBUF:
8936	if (func_id != BPF_FUNC_user_ringbuf_drain)
8937	goto error;
8938	break;
8939	case BPF_MAP_TYPE_STACK_TRACE:
8940	if (func_id != BPF_FUNC_get_stackid)
8941	goto error;
8942	break;
8943	case BPF_MAP_TYPE_CGROUP_ARRAY:
8944	if (func_id != BPF_FUNC_skb_under_cgroup &&
8945	func_id != BPF_FUNC_current_task_under_cgroup)
8946	goto error;
8947	break;
8948	case BPF_MAP_TYPE_CGROUP_STORAGE:
8949	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
8950	if (func_id != BPF_FUNC_get_local_storage)
8951	goto error;
8952	break;
8953	case BPF_MAP_TYPE_DEVMAP:
8954	case BPF_MAP_TYPE_DEVMAP_HASH:
8955	if (func_id != BPF_FUNC_redirect_map &&
8956	func_id != BPF_FUNC_map_lookup_elem)
8957	goto error;
8958	break;
8959	/ Restrict bpf side of cpumap and xskmap, open when use-cases*
8960	* appear.
8961	*/
8962	case BPF_MAP_TYPE_CPUMAP:
8963	if (func_id != BPF_FUNC_redirect_map)
8964	goto error;
8965	break;
8966	case BPF_MAP_TYPE_XSKMAP:
8967	if (func_id != BPF_FUNC_redirect_map &&
8968	func_id != BPF_FUNC_map_lookup_elem)
8969	goto error;
8970	break;
8971	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
8972	case BPF_MAP_TYPE_HASH_OF_MAPS:
8973	if (func_id != BPF_FUNC_map_lookup_elem)
8974	goto error;
8975	break;
8976	case BPF_MAP_TYPE_SOCKMAP:
8977	if (func_id != BPF_FUNC_sk_redirect_map &&
8978	func_id != BPF_FUNC_sock_map_update &&
8979	func_id != BPF_FUNC_map_delete_elem &&
8980	func_id != BPF_FUNC_msg_redirect_map &&
8981	func_id != BPF_FUNC_sk_select_reuseport &&
8982	func_id != BPF_FUNC_map_lookup_elem &&
8983	!may_update_sockmap(env, func_id))
8984	goto error;
8985	break;
8986	case BPF_MAP_TYPE_SOCKHASH:
8987	if (func_id != BPF_FUNC_sk_redirect_hash &&
8988	func_id != BPF_FUNC_sock_hash_update &&
8989	func_id != BPF_FUNC_map_delete_elem &&
8990	func_id != BPF_FUNC_msg_redirect_hash &&
8991	func_id != BPF_FUNC_sk_select_reuseport &&
8992	func_id != BPF_FUNC_map_lookup_elem &&
8993	!may_update_sockmap(env, func_id))
8994	goto error;
8995	break;
8996	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
8997	if (func_id != BPF_FUNC_sk_select_reuseport)
8998	goto error;
8999	break;
9000	case BPF_MAP_TYPE_QUEUE:
9001	case BPF_MAP_TYPE_STACK:
9002	if (func_id != BPF_FUNC_map_peek_elem &&
9003	func_id != BPF_FUNC_map_pop_elem &&
9004	func_id != BPF_FUNC_map_push_elem)
9005	goto error;
9006	break;
9007	case BPF_MAP_TYPE_SK_STORAGE:
9008	if (func_id != BPF_FUNC_sk_storage_get &&
9009	func_id != BPF_FUNC_sk_storage_delete &&
9010	func_id != BPF_FUNC_kptr_xchg)
9011	goto error;
9012	break;
9013	case BPF_MAP_TYPE_INODE_STORAGE:
9014	if (func_id != BPF_FUNC_inode_storage_get &&
9015	func_id != BPF_FUNC_inode_storage_delete &&
9016	func_id != BPF_FUNC_kptr_xchg)
9017	goto error;
9018	break;
9019	case BPF_MAP_TYPE_TASK_STORAGE:
9020	if (func_id != BPF_FUNC_task_storage_get &&
9021	func_id != BPF_FUNC_task_storage_delete &&
9022	func_id != BPF_FUNC_kptr_xchg)
9023	goto error;
9024	break;
9025	case BPF_MAP_TYPE_CGRP_STORAGE:
9026	if (func_id != BPF_FUNC_cgrp_storage_get &&
9027	func_id != BPF_FUNC_cgrp_storage_delete &&
9028	func_id != BPF_FUNC_kptr_xchg)
9029	goto error;
9030	break;
9031	case BPF_MAP_TYPE_BLOOM_FILTER:
9032	if (func_id != BPF_FUNC_map_peek_elem &&
9033	func_id != BPF_FUNC_map_push_elem)
9034	goto error;
9035	break;
9036	default:
9037	break;
9038	}
9039
9040	/ ... and second from the function itself. /
9041	switch (func_id) {
9042	case BPF_FUNC_tail_call:
9043	if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
9044	goto error;
9045	if (env->subprog_cnt > `1` && !allow_tail_call_in_subprogs(env)) {
9046	verbose(private_data: env, fmt: "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
9047	return -EINVAL;
9048	}
9049	break;
9050	case BPF_FUNC_perf_event_read:
9051	case BPF_FUNC_perf_event_output:
9052	case BPF_FUNC_perf_event_read_value:
9053	case BPF_FUNC_skb_output:
9054	case BPF_FUNC_xdp_output:
9055	if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
9056	goto error;
9057	break;
9058	case BPF_FUNC_ringbuf_output:
9059	case BPF_FUNC_ringbuf_reserve:
9060	case BPF_FUNC_ringbuf_query:
9061	case BPF_FUNC_ringbuf_reserve_dynptr:
9062	case BPF_FUNC_ringbuf_submit_dynptr:
9063	case BPF_FUNC_ringbuf_discard_dynptr:
9064	if (map->map_type != BPF_MAP_TYPE_RINGBUF)
9065	goto error;
9066	break;
9067	case BPF_FUNC_user_ringbuf_drain:
9068	if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
9069	goto error;
9070	break;
9071	case BPF_FUNC_get_stackid:
9072	if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
9073	goto error;
9074	break;
9075	case BPF_FUNC_current_task_under_cgroup:
9076	case BPF_FUNC_skb_under_cgroup:
9077	if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
9078	goto error;
9079	break;
9080	case BPF_FUNC_redirect_map:
9081	if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
9082	map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
9083	map->map_type != BPF_MAP_TYPE_CPUMAP &&
9084	map->map_type != BPF_MAP_TYPE_XSKMAP)
9085	goto error;
9086	break;
9087	case BPF_FUNC_sk_redirect_map:
9088	case BPF_FUNC_msg_redirect_map:
9089	case BPF_FUNC_sock_map_update:
9090	if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
9091	goto error;
9092	break;
9093	case BPF_FUNC_sk_redirect_hash:
9094	case BPF_FUNC_msg_redirect_hash:
9095	case BPF_FUNC_sock_hash_update:
9096	if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
9097	goto error;
9098	break;
9099	case BPF_FUNC_get_local_storage:
9100	if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
9101	map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
9102	goto error;
9103	break;
9104	case BPF_FUNC_sk_select_reuseport:
9105	if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
9106	map->map_type != BPF_MAP_TYPE_SOCKMAP &&
9107	map->map_type != BPF_MAP_TYPE_SOCKHASH)
9108	goto error;
9109	break;
9110	case BPF_FUNC_map_pop_elem:
9111	if (map->map_type != BPF_MAP_TYPE_QUEUE &&
9112	map->map_type != BPF_MAP_TYPE_STACK)
9113	goto error;
9114	break;
9115	case BPF_FUNC_map_peek_elem:
9116	case BPF_FUNC_map_push_elem:
9117	if (map->map_type != BPF_MAP_TYPE_QUEUE &&
9118	map->map_type != BPF_MAP_TYPE_STACK &&
9119	map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
9120	goto error;
9121	break;
9122	case BPF_FUNC_map_lookup_percpu_elem:
9123	if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
9124	map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
9125	map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
9126	goto error;
9127	break;
9128	case BPF_FUNC_sk_storage_get:
9129	case BPF_FUNC_sk_storage_delete:
9130	if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
9131	goto error;
9132	break;
9133	case BPF_FUNC_inode_storage_get:
9134	case BPF_FUNC_inode_storage_delete:
9135	if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
9136	goto error;
9137	break;
9138	case BPF_FUNC_task_storage_get:
9139	case BPF_FUNC_task_storage_delete:
9140	if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
9141	goto error;
9142	break;
9143	case BPF_FUNC_cgrp_storage_get:
9144	case BPF_FUNC_cgrp_storage_delete:
9145	if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
9146	goto error;
9147	break;
9148	default:
9149	break;
9150	}
9151
9152	return `0`;
9153	error:
9154	verbose(private_data: env, fmt: "cannot pass map_type %d into func %s#%d\n",
9155	map->map_type, func_id_name(id: func_id), func_id);
9156	return -EINVAL;
9157	}
9158
9159	static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
9160	{
9161	int count = `0`;
9162
9163	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
9164	count++;
9165	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
9166	count++;
9167	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
9168	count++;
9169	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
9170	count++;
9171	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
9172	count++;
9173
9174	/ We only support one arg being in raw mode at the moment,*
9175	* which is sufficient for the helper functions we have
9176	* right now.
9177	*/
9178	return count <= `1`;
9179	}
9180
9181	static bool check_args_pair_invalid(const struct bpf_func_proto fn, int* arg)
9182	{
9183	bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
9184	bool has_size = fn->arg_size[arg] != `0`;
9185	bool is_next_size = false;
9186
9187	if (arg + `1` < ARRAY_SIZE(fn->arg_type))
9188	is_next_size = arg_type_is_mem_size(type: fn->arg_type[arg + `1`]);
9189
9190	if (base_type(type: fn->arg_type[arg]) != ARG_PTR_TO_MEM)
9191	return is_next_size;
9192
9193	return has_size == is_next_size \|\| is_next_size == is_fixed;
9194	}
9195
9196	static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
9197	{
9198	/ bpf_xxx(..., buf, len) call will access 'len'*
9199	* bytes from memory 'buf'. Both arg types need
9200	* to be paired, so make sure there's no buggy
9201	* helper function specification.
9202	*/
9203	if (arg_type_is_mem_size(type: fn->arg1_type) \|\|
9204	check_args_pair_invalid(fn, arg: `0`) \|\|
9205	check_args_pair_invalid(fn, arg: `1`) \|\|
9206	check_args_pair_invalid(fn, arg: `2`) \|\|
9207	check_args_pair_invalid(fn, arg: `3`) \|\|
9208	check_args_pair_invalid(fn, arg: `4`))
9209	return false;
9210
9211	return true;
9212	}
9213
9214	static bool check_btf_id_ok(const struct bpf_func_proto *fn)
9215	{
9216	int i;
9217
9218	for (i = `0`; i < ARRAY_SIZE(fn->arg_type); i++) {
9219	if (base_type(type: fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
9220	return !!fn->arg_btf_id[i];
9221	if (base_type(type: fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
9222	return fn->arg_btf_id[i] == BPF_PTR_POISON;
9223	if (base_type(type: fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
9224	/ arg_btf_id and arg_size are in a union. /
9225	(base_type(type: fn->arg_type[i]) != ARG_PTR_TO_MEM \|\|
9226	!(fn->arg_type[i] & MEM_FIXED_SIZE)))
9227	return false;
9228	}
9229
9230	return true;
9231	}
9232
9233	static int check_func_proto(const struct bpf_func_proto fn, int* func_id)
9234	{
9235	return check_raw_mode_ok(fn) &&
9236	check_arg_pair_ok(fn) &&
9237	check_btf_id_ok(fn) ? `0` : -EINVAL;
9238	}
9239
9240	/ Packet data might have moved, any old PTR_TO_PACKET[_META,_END]*
9241	* are now invalid, so turn them into unknown SCALAR_VALUE.
9242	*
9243	* This also applies to dynptr slices belonging to skb and xdp dynptrs,
9244	* since these slices point to packet data.
9245	*/
9246	static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
9247	{
9248	struct bpf_func_state *state;
9249	struct bpf_reg_state *reg;
9250
9251	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
9252	if (reg_is_pkt_pointer_any(reg) \|\| reg_is_dynptr_slice_pkt(reg))
9253	mark_reg_invalid(env, reg);
9254	}));
9255	}
9256
9257	enum {
9258	AT_PKT_END = -`1`,
9259	BEYOND_PKT_END = -`2`,
9260	};
9261
9262	static void mark_pkt_end(struct bpf_verifier_state vstate, int* regn, bool range_open)
9263	{
9264	struct bpf_func_state *state = vstate->frame[vstate->curframe];
9265	struct bpf_reg_state *reg = &state->regs[regn];
9266
9267	if (reg->type != PTR_TO_PACKET)
9268	/ PTR_TO_PACKET_META is not supported yet /
9269	return;
9270
9271	/ The 'reg' is pkt > pkt_end or pkt >= pkt_end.*
9272	* How far beyond pkt_end it goes is unknown.
9273	* if (!range_open) it's the case of pkt >= pkt_end
9274	* if (range_open) it's the case of pkt > pkt_end
9275	* hence this pointer is at least 1 byte bigger than pkt_end
9276	*/
9277	if (range_open)
9278	reg->range = BEYOND_PKT_END;
9279	else
9280	reg->range = AT_PKT_END;
9281	}
9282
9283	/ The pointer with the specified id has released its reference to kernel*
9284	* resources. Identify all copies of the same pointer and clear the reference.
9285	*/
9286	static int release_reference(struct bpf_verifier_env *env,
9287	int ref_obj_id)
9288	{
9289	struct bpf_func_state *state;
9290	struct bpf_reg_state *reg;
9291	int err;
9292
9293	err = release_reference_state(state: cur_func(env), ptr_id: ref_obj_id);
9294	if (err)
9295	return err;
9296
9297	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
9298	if (reg->ref_obj_id == ref_obj_id)
9299	mark_reg_invalid(env, reg);
9300	}));
9301
9302	return `0`;
9303	}
9304
9305	static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
9306	{
9307	struct bpf_func_state *unused;
9308	struct bpf_reg_state *reg;
9309
9310	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
9311	if (type_is_non_owning_ref(reg->type))
9312	mark_reg_invalid(env, reg);
9313	}));
9314	}
9315
9316	static void clear_caller_saved_regs(struct bpf_verifier_env *env,
9317	struct bpf_reg_state *regs)
9318	{
9319	int i;
9320
9321	/ after the call registers r0 - r5 were scratched /
9322	for (i = `0`; i < CALLER_SAVED_REGS; i++) {
9323	mark_reg_not_init(env, regs, regno: caller_saved[i]);
9324	check_reg_arg(env, regno: caller_saved[i], t: DST_OP_NO_MARK);
9325	}
9326	}
9327
9328	typedef int (set_callee_state_fn)(struct* bpf_verifier_env *env,
9329	struct bpf_func_state *caller,
9330	struct bpf_func_state *callee,
9331	int insn_idx);
9332
9333	static int set_callee_state(struct bpf_verifier_env *env,
9334	struct bpf_func_state *caller,
9335	struct bpf_func_state callee, int* insn_idx);
9336
9337	static int __check_func_call(struct bpf_verifier_env env, struct* bpf_insn *insn,
9338	int insn_idx, int* subprog,
9339	set_callee_state_fn set_callee_state_cb)
9340	{
9341	struct bpf_verifier_state *state = env->cur_state;
9342	struct bpf_func_state caller, callee;
9343	int err;
9344
9345	if (state->curframe + `1` >= MAX_CALL_FRAMES) {
9346	verbose(private_data: env, fmt: "the call stack of %d frames is too deep\n",
9347	state->curframe + `2`);
9348	return -E2BIG;
9349	}
9350
9351	caller = state->frame[state->curframe];
9352	if (state->frame[state->curframe + `1`]) {
9353	verbose(private_data: env, fmt: "verifier bug. Frame %d already allocated\n",
9354	state->curframe + `1`);
9355	return -EFAULT;
9356	}
9357
9358	err = btf_check_subprog_call(env, subprog, regs: caller->regs);
9359	if (err == -EFAULT)
9360	return err;
9361	if (subprog_is_global(env, subprog)) {
9362	if (err) {
9363	verbose(private_data: env, fmt: "Caller passes invalid args into func#%d\n",
9364	subprog);
9365	return err;
9366	} else {
9367	if (env->log.level & BPF_LOG_LEVEL)
9368	verbose(private_data: env,
9369	fmt: "Func#%d is global and valid. Skipping.\n",
9370	subprog);
9371	clear_caller_saved_regs(env, regs: caller->regs);
9372
9373	/ All global functions return a 64-bit SCALAR_VALUE /
9374	mark_reg_unknown(env, regs: caller->regs, regno: BPF_REG_0);
9375	caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
9376
9377	/ continue with next insn after call /
9378	return `0`;
9379	}
9380	}
9381
9382	/ set_callee_state is used for direct subprog calls, but we are*
9383	* interested in validating only BPF helpers that can call subprogs as
9384	* callbacks
9385	*/
9386	if (set_callee_state_cb != set_callee_state) {
9387	env->subprog_info[subprog].is_cb = true;
9388	if (bpf_pseudo_kfunc_call(insn) &&
9389	!is_callback_calling_kfunc(btf_id: insn->imm)) {
9390	verbose(private_data: env, fmt: "verifier bug: kfunc %s#%d not marked as callback-calling\n",
9391	func_id_name(id: insn->imm), insn->imm);
9392	return -EFAULT;
9393	} else if (!bpf_pseudo_kfunc_call(insn) &&
9394	!is_callback_calling_function(func_id: insn->imm)) { / helper /
9395	verbose(private_data: env, fmt: "verifier bug: helper %s#%d not marked as callback-calling\n",
9396	func_id_name(id: insn->imm), insn->imm);
9397	return -EFAULT;
9398	}
9399	}
9400
9401	if (insn->code == (BPF_JMP \| BPF_CALL) &&
9402	insn->src_reg == `0` &&
9403	insn->imm == BPF_FUNC_timer_set_callback) {
9404	struct bpf_verifier_state *async_cb;
9405
9406	/ there is no real recursion here. timer callbacks are async /
9407	env->subprog_info[subprog].is_async_cb = true;
9408	async_cb = push_async_cb(env, insn_idx: env->subprog_info[subprog].start,
9409	prev_insn_idx: *insn_idx, subprog);
9410	if (!async_cb)
9411	return -EFAULT;
9412	callee = async_cb->frame[`0`];
9413	callee->async_entry_cnt = caller->async_entry_cnt + `1`;
9414
9415	/ Convert bpf_timer_set_callback() args into timer callback args /
9416	err = set_callee_state_cb(env, caller, callee, *insn_idx);
9417	if (err)
9418	return err;
9419
9420	clear_caller_saved_regs(env, regs: caller->regs);
9421	mark_reg_unknown(env, regs: caller->regs, regno: BPF_REG_0);
9422	caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
9423	/ continue with next insn after call /
9424	return `0`;
9425	}
9426
9427	callee = kzalloc(size: sizeof(*callee), GFP_KERNEL);
9428	if (!callee)
9429	return -ENOMEM;
9430	state->frame[state->curframe + `1`] = callee;
9431
9432	/ callee cannot access r0, r6 - r9 for reading and has to write*
9433	* into its own stack before reading from it.
9434	* callee can read/write into caller's stack
9435	*/
9436	init_func_state(env, state: callee,
9437	/ remember the callsite, it will be used by bpf_exit /
9438	callsite: insn_idx /* callsite /,
9439	frameno: state->curframe + `1` / frameno within this callchain /,
9440	subprogno: subprog / subprog number within this prog /);
9441
9442	/ Transfer references to the callee /
9443	err = copy_reference_state(dst: callee, src: caller);
9444	if (err)
9445	goto err_out;
9446
9447	err = set_callee_state_cb(env, caller, callee, *insn_idx);
9448	if (err)
9449	goto err_out;
9450
9451	clear_caller_saved_regs(env, regs: caller->regs);
9452
9453	/ only increment it after check_reg_arg() finished /
9454	state->curframe++;
9455
9456	/ and go analyze first insn of the callee /
9457	*insn_idx = env->subprog_info[subprog].start - `1`;
9458
9459	if (env->log.level & BPF_LOG_LEVEL) {
9460	verbose(private_data: env, fmt: "caller:\n");
9461	print_verifier_state(env, state: caller, print_all: true);
9462	verbose(private_data: env, fmt: "callee:\n");
9463	print_verifier_state(env, state: callee, print_all: true);
9464	}
9465	return `0`;
9466
9467	err_out:
9468	free_func_state(state: callee);
9469	state->frame[state->curframe + `1`] = NULL;
9470	return err;
9471	}
9472
9473	int map_set_for_each_callback_args(struct bpf_verifier_env *env,
9474	struct bpf_func_state *caller,
9475	struct bpf_func_state *callee)
9476	{
9477	/ bpf_for_each_map_elem(struct bpf_map map, void callback_fn,*
9478	* void *callback_ctx, u64 flags);
9479	* callback_fn(struct bpf_map map, void key, void *value,
9480	* void *callback_ctx);
9481	*/
9482	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9483
9484	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9485	__mark_reg_known_zero(reg: &callee->regs[BPF_REG_2]);
9486	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9487
9488	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9489	__mark_reg_known_zero(reg: &callee->regs[BPF_REG_3]);
9490	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9491
9492	/ pointer to stack or null /
9493	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
9494
9495	/ unused /
9496	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_5]);
9497	return `0`;
9498	}
9499
9500	static int set_callee_state(struct bpf_verifier_env *env,
9501	struct bpf_func_state *caller,
9502	struct bpf_func_state callee, int* insn_idx)
9503	{
9504	int i;
9505
9506	/ copy r1 - r5 args that callee can access. The copy includes parent*
9507	* pointers, which connects us up to the liveness chain
9508	*/
9509	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
9510	callee->regs[i] = caller->regs[i];
9511	return `0`;
9512	}
9513
9514	static int check_func_call(struct bpf_verifier_env env, struct* bpf_insn *insn,
9515	int *insn_idx)
9516	{
9517	int subprog, target_insn;
9518
9519	target_insn = *insn_idx + insn->imm + `1`;
9520	subprog = find_subprog(env, off: target_insn);
9521	if (subprog < `0`) {
9522	verbose(private_data: env, fmt: "verifier bug. No program starts at insn %d\n",
9523	target_insn);
9524	return -EFAULT;
9525	}
9526
9527	return __check_func_call(env, insn, insn_idx, subprog, set_callee_state_cb: set_callee_state);
9528	}
9529
9530	static int set_map_elem_callback_state(struct bpf_verifier_env *env,
9531	struct bpf_func_state *caller,
9532	struct bpf_func_state *callee,
9533	int insn_idx)
9534	{
9535	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
9536	struct bpf_map *map;
9537	int err;
9538
9539	if (bpf_map_ptr_poisoned(aux: insn_aux)) {
9540	verbose(private_data: env, fmt: "tail_call abusing map_ptr\n");
9541	return -EINVAL;
9542	}
9543
9544	map = BPF_MAP_PTR(insn_aux->map_ptr_state);
9545	if (!map->ops->map_set_for_each_callback_args \|\|
9546	!map->ops->map_for_each_callback) {
9547	verbose(private_data: env, fmt: "callback function not allowed for map\n");
9548	return -ENOTSUPP;
9549	}
9550
9551	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
9552	if (err)
9553	return err;
9554
9555	callee->in_callback_fn = true;
9556	callee->callback_ret_range = tnum_range(min: `0`, max: `1`);
9557	return `0`;
9558	}
9559
9560	static int set_loop_callback_state(struct bpf_verifier_env *env,
9561	struct bpf_func_state *caller,
9562	struct bpf_func_state *callee,
9563	int insn_idx)
9564	{
9565	/ bpf_loop(u32 nr_loops, void callback_fn, void callback_ctx,*
9566	* u64 flags);
9567	* callback_fn(u32 index, void *callback_ctx);
9568	*/
9569	callee->regs[BPF_REG_1].type = SCALAR_VALUE;
9570	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9571
9572	/ unused /
9573	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_3]);
9574	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_4]);
9575	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_5]);
9576
9577	callee->in_callback_fn = true;
9578	callee->callback_ret_range = tnum_range(min: `0`, max: `1`);
9579	return `0`;
9580	}
9581
9582	static int set_timer_callback_state(struct bpf_verifier_env *env,
9583	struct bpf_func_state *caller,
9584	struct bpf_func_state *callee,
9585	int insn_idx)
9586	{
9587	struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
9588
9589	/ bpf_timer_set_callback(struct bpf_timer timer, void callback_fn);*
9590	* callback_fn(struct bpf_map map, void key, void *value);
9591	*/
9592	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9593	__mark_reg_known_zero(reg: &callee->regs[BPF_REG_1]);
9594	callee->regs[BPF_REG_1].map_ptr = map_ptr;
9595
9596	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9597	__mark_reg_known_zero(reg: &callee->regs[BPF_REG_2]);
9598	callee->regs[BPF_REG_2].map_ptr = map_ptr;
9599
9600	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9601	__mark_reg_known_zero(reg: &callee->regs[BPF_REG_3]);
9602	callee->regs[BPF_REG_3].map_ptr = map_ptr;
9603
9604	/ unused /
9605	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_4]);
9606	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_5]);
9607	callee->in_async_callback_fn = true;
9608	callee->callback_ret_range = tnum_range(min: `0`, max: `1`);
9609	return `0`;
9610	}
9611
9612	static int set_find_vma_callback_state(struct bpf_verifier_env *env,
9613	struct bpf_func_state *caller,
9614	struct bpf_func_state *callee,
9615	int insn_idx)
9616	{
9617	/ bpf_find_vma(struct task_struct task, u64 addr,
9618	* void callback_fn, void callback_ctx, u64 flags)
9619	* (callback_fn)(struct task_struct *task,
9620	* struct vm_area_struct vma, void callback_ctx);
9621	*/
9622	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9623
9624	callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
9625	__mark_reg_known_zero(reg: &callee->regs[BPF_REG_2]);
9626	callee->regs[BPF_REG_2].btf = btf_vmlinux;
9627	callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA],
9628
9629	/ pointer to stack or null /
9630	callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
9631
9632	/ unused /
9633	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_4]);
9634	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_5]);
9635	callee->in_callback_fn = true;
9636	callee->callback_ret_range = tnum_range(min: `0`, max: `1`);
9637	return `0`;
9638	}
9639
9640	static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
9641	struct bpf_func_state *caller,
9642	struct bpf_func_state *callee,
9643	int insn_idx)
9644	{
9645	/ bpf_user_ringbuf_drain(struct bpf_map map, void callback_fn, void*
9646	* callback_ctx, u64 flags);
9647	* callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
9648	*/
9649	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_0]);
9650	mark_dynptr_cb_reg(env, reg: &callee->regs[BPF_REG_1], type: BPF_DYNPTR_TYPE_LOCAL);
9651	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9652
9653	/ unused /
9654	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_3]);
9655	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_4]);
9656	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_5]);
9657
9658	callee->in_callback_fn = true;
9659	callee->callback_ret_range = tnum_range(min: `0`, max: `1`);
9660	return `0`;
9661	}
9662
9663	static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
9664	struct bpf_func_state *caller,
9665	struct bpf_func_state *callee,
9666	int insn_idx)
9667	{
9668	/ void bpf_rbtree_add_impl(struct bpf_rb_root root, struct bpf_rb_node node,*
9669	* bool (less)(struct bpf_rb_node a, const struct bpf_rb_node b));
9670	*
9671	* 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset
9672	* that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
9673	* by this point, so look at 'root'
9674	*/
9675	struct btf_field *field;
9676
9677	field = reg_find_field_offset(reg: &caller->regs[BPF_REG_1], off: caller->regs[BPF_REG_1].off,
9678	fields: BPF_RB_ROOT);
9679	if (!field \|\| !field->graph_root.value_btf_id)
9680	return -EFAULT;
9681
9682	mark_reg_graph_node(regs: callee->regs, regno: BPF_REG_1, ds_head: &field->graph_root);
9683	ref_set_non_owning(env, reg: &callee->regs[BPF_REG_1]);
9684	mark_reg_graph_node(regs: callee->regs, regno: BPF_REG_2, ds_head: &field->graph_root);
9685	ref_set_non_owning(env, reg: &callee->regs[BPF_REG_2]);
9686
9687	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_3]);
9688	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_4]);
9689	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_5]);
9690	callee->in_callback_fn = true;
9691	callee->callback_ret_range = tnum_range(min: `0`, max: `1`);
9692	return `0`;
9693	}
9694
9695	static bool is_rbtree_lock_required_kfunc(u32 btf_id);
9696
9697	/ Are we currently verifying the callback for a rbtree helper that must*
9698	* be called with lock held? If so, no need to complain about unreleased
9699	* lock
9700	*/
9701	static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
9702	{
9703	struct bpf_verifier_state *state = env->cur_state;
9704	struct bpf_insn *insn = env->prog->insnsi;
9705	struct bpf_func_state *callee;
9706	int kfunc_btf_id;
9707
9708	if (!state->curframe)
9709	return false;
9710
9711	callee = state->frame[state->curframe];
9712
9713	if (!callee->in_callback_fn)
9714	return false;
9715
9716	kfunc_btf_id = insn[callee->callsite].imm;
9717	return is_rbtree_lock_required_kfunc(btf_id: kfunc_btf_id);
9718	}
9719
9720	static int prepare_func_exit(struct bpf_verifier_env env, int* *insn_idx)
9721	{
9722	struct bpf_verifier_state *state = env->cur_state;
9723	struct bpf_func_state caller, callee;
9724	struct bpf_reg_state *r0;
9725	int err;
9726
9727	callee = state->frame[state->curframe];
9728	r0 = &callee->regs[BPF_REG_0];
9729	if (r0->type == PTR_TO_STACK) {
9730	/ technically it's ok to return caller's stack pointer*
9731	* (or caller's caller's pointer) back to the caller,
9732	* since these pointers are valid. Only current stack
9733	* pointer will be invalid as soon as function exits,
9734	* but let's be conservative
9735	*/
9736	verbose(private_data: env, fmt: "cannot return stack pointer to the caller\n");
9737	return -EINVAL;
9738	}
9739
9740	caller = state->frame[state->curframe - `1`];
9741	if (callee->in_callback_fn) {
9742	/ enforce R0 return value range [0, 1]. /
9743	struct tnum range = callee->callback_ret_range;
9744
9745	if (r0->type != SCALAR_VALUE) {
9746	verbose(private_data: env, fmt: "R0 not a scalar value\n");
9747	return -EACCES;
9748	}
9749	if (!tnum_in(a: range, b: r0->var_off)) {
9750	verbose_invalid_scalar(env, reg: r0, range: &range, ctx: "callback return", reg_name: "R0");
9751	return -EINVAL;
9752	}
9753	} else {
9754	/ return to the caller whatever r0 had in the callee /
9755	caller->regs[BPF_REG_0] = *r0;
9756	}
9757
9758	/ callback_fn frame should have released its own additions to parent's*
9759	* reference state at this point, or check_reference_leak would
9760	* complain, hence it must be the same as the caller. There is no need
9761	* to copy it back.
9762	*/
9763	if (!callee->in_callback_fn) {
9764	/ Transfer references to the caller /
9765	err = copy_reference_state(dst: caller, src: callee);
9766	if (err)
9767	return err;
9768	}
9769
9770	*insn_idx = callee->callsite + `1`;
9771	if (env->log.level & BPF_LOG_LEVEL) {
9772	verbose(private_data: env, fmt: "returning from callee:\n");
9773	print_verifier_state(env, state: callee, print_all: true);
9774	verbose(private_data: env, fmt: "to caller at %d:\n", *insn_idx);
9775	print_verifier_state(env, state: caller, print_all: true);
9776	}
9777	/ clear everything in the callee. In case of exceptional exits using*
9778	* bpf_throw, this will be done by copy_verifier_state for extra frames. */
9779	free_func_state(state: callee);
9780	state->frame[state->curframe--] = NULL;
9781	return `0`;
9782	}
9783
9784	static void do_refine_retval_range(struct bpf_reg_state regs, int* ret_type,
9785	int func_id,
9786	struct bpf_call_arg_meta *meta)
9787	{
9788	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
9789
9790	if (ret_type != RET_INTEGER)
9791	return;
9792
9793	switch (func_id) {
9794	case BPF_FUNC_get_stack:
9795	case BPF_FUNC_get_task_stack:
9796	case BPF_FUNC_probe_read_str:
9797	case BPF_FUNC_probe_read_kernel_str:
9798	case BPF_FUNC_probe_read_user_str:
9799	ret_reg->smax_value = meta->msize_max_value;
9800	ret_reg->s32_max_value = meta->msize_max_value;
9801	ret_reg->smin_value = -MAX_ERRNO;
9802	ret_reg->s32_min_value = -MAX_ERRNO;
9803	reg_bounds_sync(reg: ret_reg);
9804	break;
9805	case BPF_FUNC_get_smp_processor_id:
9806	ret_reg->umax_value = nr_cpu_ids - `1`;
9807	ret_reg->u32_max_value = nr_cpu_ids - `1`;
9808	ret_reg->smax_value = nr_cpu_ids - `1`;
9809	ret_reg->s32_max_value = nr_cpu_ids - `1`;
9810	ret_reg->umin_value = `0`;
9811	ret_reg->u32_min_value = `0`;
9812	ret_reg->smin_value = `0`;
9813	ret_reg->s32_min_value = `0`;
9814	reg_bounds_sync(reg: ret_reg);
9815	break;
9816	}
9817	}
9818
9819	static int
9820	record_func_map(struct bpf_verifier_env env, struct* bpf_call_arg_meta *meta,
9821	int func_id, int insn_idx)
9822	{
9823	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
9824	struct bpf_map *map = meta->map_ptr;
9825
9826	if (func_id != BPF_FUNC_tail_call &&
9827	func_id != BPF_FUNC_map_lookup_elem &&
9828	func_id != BPF_FUNC_map_update_elem &&
9829	func_id != BPF_FUNC_map_delete_elem &&
9830	func_id != BPF_FUNC_map_push_elem &&
9831	func_id != BPF_FUNC_map_pop_elem &&
9832	func_id != BPF_FUNC_map_peek_elem &&
9833	func_id != BPF_FUNC_for_each_map_elem &&
9834	func_id != BPF_FUNC_redirect_map &&
9835	func_id != BPF_FUNC_map_lookup_percpu_elem)
9836	return `0`;
9837
9838	if (map == NULL) {
9839	verbose(private_data: env, fmt: "kernel subsystem misconfigured verifier\n");
9840	return -EINVAL;
9841	}
9842
9843	/ In case of read-only, some additional restrictions*
9844	* need to be applied in order to prevent altering the
9845	* state of the map from program side.
9846	*/
9847	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
9848	(func_id == BPF_FUNC_map_delete_elem \|\|
9849	func_id == BPF_FUNC_map_update_elem \|\|
9850	func_id == BPF_FUNC_map_push_elem \|\|
9851	func_id == BPF_FUNC_map_pop_elem)) {
9852	verbose(private_data: env, fmt: "write into map forbidden\n");
9853	return -EACCES;
9854	}
9855
9856	if (!BPF_MAP_PTR(aux->map_ptr_state))
9857	bpf_map_ptr_store(aux, map: meta->map_ptr,
9858	unpriv: !meta->map_ptr->bypass_spec_v1);
9859	else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
9860	bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
9861	unpriv: !meta->map_ptr->bypass_spec_v1);
9862	return `0`;
9863	}
9864
9865	static int
9866	record_func_key(struct bpf_verifier_env env, struct* bpf_call_arg_meta *meta,
9867	int func_id, int insn_idx)
9868	{
9869	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
9870	struct bpf_reg_state regs = cur_regs(env), reg;
9871	struct bpf_map *map = meta->map_ptr;
9872	u64 val, max;
9873	int err;
9874
9875	if (func_id != BPF_FUNC_tail_call)
9876	return `0`;
9877	if (!map \|\| map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
9878	verbose(private_data: env, fmt: "kernel subsystem misconfigured verifier\n");
9879	return -EINVAL;
9880	}
9881
9882	reg = &regs[BPF_REG_3];
9883	val = reg->var_off.value;
9884	max = map->max_entries;
9885
9886	if (!(register_is_const(reg) && val < max)) {
9887	bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
9888	return `0`;
9889	}
9890
9891	err = mark_chain_precision(env, regno: BPF_REG_3);
9892	if (err)
9893	return err;
9894	if (bpf_map_key_unseen(aux))
9895	bpf_map_key_store(aux, state: val);
9896	else if (!bpf_map_key_poisoned(aux) &&
9897	bpf_map_key_immediate(aux) != val)
9898	bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
9899	return `0`;
9900	}
9901
9902	static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
9903	{
9904	struct bpf_func_state *state = cur_func(env);
9905	bool refs_lingering = false;
9906	int i;
9907
9908	if (!exception_exit && state->frameno && !state->in_callback_fn)
9909	return `0`;
9910
9911	for (i = `0`; i < state->acquired_refs; i++) {
9912	if (!exception_exit && state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
9913	continue;
9914	verbose(private_data: env, fmt: "Unreleased reference id=%d alloc_insn=%d\n",
9915	state->refs[i].id, state->refs[i].insn_idx);
9916	refs_lingering = true;
9917	}
9918	return refs_lingering ? -EINVAL : `0`;
9919	}
9920
9921	static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
9922	struct bpf_reg_state *regs)
9923	{
9924	struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
9925	struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
9926	struct bpf_map *fmt_map = fmt_reg->map_ptr;
9927	struct bpf_bprintf_data data = {};
9928	int err, fmt_map_off, num_args;
9929	u64 fmt_addr;
9930	char *fmt;
9931
9932	/ data must be an array of u64 /
9933	if (data_len_reg->var_off.value % `8`)
9934	return -EINVAL;
9935	num_args = data_len_reg->var_off.value / `8`;
9936
9937	/ fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const*
9938	* and map_direct_value_addr is set.
9939	*/
9940	fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
9941	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
9942	fmt_map_off);
9943	if (err) {
9944	verbose(private_data: env, fmt: "verifier bug\n");
9945	return -EFAULT;
9946	}
9947	fmt = (char )(long*)fmt_addr + fmt_map_off;
9948
9949	/ We are also guaranteed that fmt+fmt_map_off is NULL terminated, we*
9950	* can focus on validating the format specifiers.
9951	*/
9952	err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, data: &data);
9953	if (err < `0`)
9954	verbose(private_data: env, fmt: "Invalid format string\n");
9955
9956	return err;
9957	}
9958
9959	static int check_get_func_ip(struct bpf_verifier_env *env)
9960	{
9961	enum bpf_prog_type type = resolve_prog_type(prog: env->prog);
9962	int func_id = BPF_FUNC_get_func_ip;
9963
9964	if (type == BPF_PROG_TYPE_TRACING) {
9965	if (!bpf_prog_has_trampoline(prog: env->prog)) {
9966	verbose(private_data: env, fmt: "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
9967	func_id_name(id: func_id), func_id);
9968	return -ENOTSUPP;
9969	}
9970	return `0`;
9971	} else if (type == BPF_PROG_TYPE_KPROBE) {
9972	return `0`;
9973	}
9974
9975	verbose(private_data: env, fmt: "func %s#%d not supported for program type %d\n",
9976	func_id_name(id: func_id), func_id, type);
9977	return -ENOTSUPP;
9978	}
9979
9980	static struct bpf_insn_aux_data cur_aux(struct* bpf_verifier_env *env)
9981	{
9982	return &env->insn_aux_data[env->insn_idx];
9983	}
9984
9985	static bool loop_flag_is_zero(struct bpf_verifier_env *env)
9986	{
9987	struct bpf_reg_state *regs = cur_regs(env);
9988	struct bpf_reg_state *reg = &regs[BPF_REG_4];
9989	bool reg_is_null = register_is_null(reg);
9990
9991	if (reg_is_null)
9992	mark_chain_precision(env, regno: BPF_REG_4);
9993
9994	return reg_is_null;
9995	}
9996
9997	static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
9998	{
9999	struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
10000
10001	if (!state->initialized) {
10002	state->initialized = `1`;
10003	state->fit_for_inline = loop_flag_is_zero(env);
10004	state->callback_subprogno = subprogno;
10005	return;
10006	}
10007
10008	if (!state->fit_for_inline)
10009	return;
10010
10011	state->fit_for_inline = (loop_flag_is_zero(env) &&
10012	state->callback_subprogno == subprogno);
10013	}
10014
10015	static int check_helper_call(struct bpf_verifier_env env, struct* bpf_insn *insn,
10016	int *insn_idx_p)
10017	{
10018	enum bpf_prog_type prog_type = resolve_prog_type(prog: env->prog);
10019	bool returns_cpu_specific_alloc_ptr = false;
10020	const struct bpf_func_proto *fn = NULL;
10021	enum bpf_return_type ret_type;
10022	enum bpf_type_flag ret_flag;
10023	struct bpf_reg_state *regs;
10024	struct bpf_call_arg_meta meta;
10025	int insn_idx = *insn_idx_p;
10026	bool changes_data;
10027	int i, err, func_id;
10028
10029	/ find function prototype /
10030	func_id = insn->imm;
10031	if (func_id < `0` \|\| func_id >= __BPF_FUNC_MAX_ID) {
10032	verbose(private_data: env, fmt: "invalid func %s#%d\n", func_id_name(id: func_id),
10033	func_id);
10034	return -EINVAL;
10035	}
10036
10037	if (env->ops->get_func_proto)
10038	fn = env->ops->get_func_proto(func_id, env->prog);
10039	if (!fn) {
10040	verbose(private_data: env, fmt: "unknown func %s#%d\n", func_id_name(id: func_id),
10041	func_id);
10042	return -EINVAL;
10043	}
10044
10045	/ eBPF programs must be GPL compatible to use GPL-ed functions /
10046	if (!env->prog->gpl_compatible && fn->gpl_only) {
10047	verbose(private_data: env, fmt: "cannot call GPL-restricted function from non-GPL compatible program\n");
10048	return -EINVAL;
10049	}
10050
10051	if (fn->allowed && !fn->allowed(env->prog)) {
10052	verbose(private_data: env, fmt: "helper call is not allowed in probe\n");
10053	return -EINVAL;
10054	}
10055
10056	if (!env->prog->aux->sleepable && fn->might_sleep) {
10057	verbose(private_data: env, fmt: "helper call might sleep in a non-sleepable prog\n");
10058	return -EINVAL;
10059	}
10060
10061	/ With LD_ABS/IND some JITs save/restore skb from r1. /
10062	changes_data = bpf_helper_changes_pkt_data(func: fn->func);
10063	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
10064	verbose(private_data: env, fmt: "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
10065	func_id_name(id: func_id), func_id);
10066	return -EINVAL;
10067	}
10068
10069	memset(&meta, `0`, sizeof(meta));
10070	meta.pkt_access = fn->pkt_access;
10071
10072	err = check_func_proto(fn, func_id);
10073	if (err) {
10074	verbose(private_data: env, fmt: "kernel subsystem misconfigured func %s#%d\n",
10075	func_id_name(id: func_id), func_id);
10076	return err;
10077	}
10078
10079	if (env->cur_state->active_rcu_lock) {
10080	if (fn->might_sleep) {
10081	verbose(private_data: env, fmt: "sleepable helper %s#%d in rcu_read_lock region\n",
10082	func_id_name(id: func_id), func_id);
10083	return -EINVAL;
10084	}
10085
10086	if (env->prog->aux->sleepable && is_storage_get_function(func_id))
10087	env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
10088	}
10089
10090	meta.func_id = func_id;
10091	/ check args /
10092	for (i = `0`; i < MAX_BPF_FUNC_REG_ARGS; i++) {
10093	err = check_func_arg(env, arg: i, meta: &meta, fn, insn_idx);
10094	if (err)
10095	return err;
10096	}
10097
10098	err = record_func_map(env, meta: &meta, func_id, insn_idx);
10099	if (err)
10100	return err;
10101
10102	err = record_func_key(env, meta: &meta, func_id, insn_idx);
10103	if (err)
10104	return err;
10105
10106	/ Mark slots with STACK_MISC in case of raw mode, stack offset*
10107	* is inferred from register state.
10108	*/
10109	for (i = `0`; i < meta.access_size; i++) {
10110	err = check_mem_access(env, insn_idx, regno: meta.regno, off: i, BPF_B,
10111	t: BPF_WRITE, value_regno: -`1`, strict_alignment_once: false, is_ldsx: false);
10112	if (err)
10113	return err;
10114	}
10115
10116	regs = cur_regs(env);
10117
10118	if (meta.release_regno) {
10119	err = -EINVAL;
10120	/ This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot*
10121	* be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
10122	* is safe to do directly.
10123	*/
10124	if (arg_type_is_dynptr(type: fn->arg_type[meta.release_regno - BPF_REG_1])) {
10125	if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) {
10126	verbose(private_data: env, fmt: "verifier internal error: CONST_PTR_TO_DYNPTR cannot be released\n");
10127	return -EFAULT;
10128	}
10129	err = unmark_stack_slots_dynptr(env, reg: &regs[meta.release_regno]);
10130	} else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj_id) {
10131	u32 ref_obj_id = meta.ref_obj_id;
10132	bool in_rcu = in_rcu_cs(env);
10133	struct bpf_func_state *state;
10134	struct bpf_reg_state *reg;
10135
10136	err = release_reference_state(state: cur_func(env), ptr_id: ref_obj_id);
10137	if (!err) {
10138	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
10139	if (reg->ref_obj_id == ref_obj_id) {
10140	if (in_rcu && (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) {
10141	reg->ref_obj_id = `0`;
10142	reg->type &= ~MEM_ALLOC;
10143	reg->type \|= MEM_RCU;
10144	} else {
10145	mark_reg_invalid(env, reg);
10146	}
10147	}
10148	}));
10149	}
10150	} else if (meta.ref_obj_id) {
10151	err = release_reference(env, ref_obj_id: meta.ref_obj_id);
10152	} else if (register_is_null(reg: &regs[meta.release_regno])) {
10153	/ meta.ref_obj_id can only be 0 if register that is meant to be*
10154	* released is NULL, which must be > R0.
10155	*/
10156	err = `0`;
10157	}
10158	if (err) {
10159	verbose(private_data: env, fmt: "func %s#%d reference has not been acquired before\n",
10160	func_id_name(id: func_id), func_id);
10161	return err;
10162	}
10163	}
10164
10165	switch (func_id) {
10166	case BPF_FUNC_tail_call:
10167	err = check_reference_leak(env, exception_exit: false);
10168	if (err) {
10169	verbose(private_data: env, fmt: "tail_call would lead to reference leak\n");
10170	return err;
10171	}
10172	break;
10173	case BPF_FUNC_get_local_storage:
10174	/ check that flags argument in get_local_storage(map, flags) is 0,*
10175	* this is required because get_local_storage() can't return an error.
10176	*/
10177	if (!register_is_null(reg: &regs[BPF_REG_2])) {
10178	verbose(private_data: env, fmt: "get_local_storage() doesn't support non-zero flags\n");
10179	return -EINVAL;
10180	}
10181	break;
10182	case BPF_FUNC_for_each_map_elem:
10183	err = __check_func_call(env, insn, insn_idx: insn_idx_p, subprog: meta.subprogno,
10184	set_callee_state_cb: set_map_elem_callback_state);
10185	break;
10186	case BPF_FUNC_timer_set_callback:
10187	err = __check_func_call(env, insn, insn_idx: insn_idx_p, subprog: meta.subprogno,
10188	set_callee_state_cb: set_timer_callback_state);
10189	break;
10190	case BPF_FUNC_find_vma:
10191	err = __check_func_call(env, insn, insn_idx: insn_idx_p, subprog: meta.subprogno,
10192	set_callee_state_cb: set_find_vma_callback_state);
10193	break;
10194	case BPF_FUNC_snprintf:
10195	err = check_bpf_snprintf_call(env, regs);
10196	break;
10197	case BPF_FUNC_loop:
10198	update_loop_inline_state(env, subprogno: meta.subprogno);
10199	err = __check_func_call(env, insn, insn_idx: insn_idx_p, subprog: meta.subprogno,
10200	set_callee_state_cb: set_loop_callback_state);
10201	break;
10202	case BPF_FUNC_dynptr_from_mem:
10203	if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
10204	verbose(private_data: env, fmt: "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
10205	reg_type_str(env, type: regs[BPF_REG_1].type));
10206	return -EACCES;
10207	}
10208	break;
10209	case BPF_FUNC_set_retval:
10210	if (prog_type == BPF_PROG_TYPE_LSM &&
10211	env->prog->expected_attach_type == BPF_LSM_CGROUP) {
10212	if (!env->prog->aux->attach_func_proto->type) {
10213	/ Make sure programs that attach to void*
10214	* hooks don't try to modify return value.
10215	*/
10216	verbose(private_data: env, fmt: "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
10217	return -EINVAL;
10218	}
10219	}
10220	break;
10221	case BPF_FUNC_dynptr_data:
10222	{
10223	struct bpf_reg_state *reg;
10224	int id, ref_obj_id;
10225
10226	reg = get_dynptr_arg_reg(env, fn, regs);
10227	if (!reg)
10228	return -EFAULT;
10229
10230
10231	if (meta.dynptr_id) {
10232	verbose(private_data: env, fmt: "verifier internal error: meta.dynptr_id already set\n");
10233	return -EFAULT;
10234	}
10235	if (meta.ref_obj_id) {
10236	verbose(private_data: env, fmt: "verifier internal error: meta.ref_obj_id already set\n");
10237	return -EFAULT;
10238	}
10239
10240	id = dynptr_id(env, reg);
10241	if (id < `0`) {
10242	verbose(private_data: env, fmt: "verifier internal error: failed to obtain dynptr id\n");
10243	return id;
10244	}
10245
10246	ref_obj_id = dynptr_ref_obj_id(env, reg);
10247	if (ref_obj_id < `0`) {
10248	verbose(private_data: env, fmt: "verifier internal error: failed to obtain dynptr ref_obj_id\n");
10249	return ref_obj_id;
10250	}
10251
10252	meta.dynptr_id = id;
10253	meta.ref_obj_id = ref_obj_id;
10254
10255	break;
10256	}
10257	case BPF_FUNC_dynptr_write:
10258	{
10259	enum bpf_dynptr_type dynptr_type;
10260	struct bpf_reg_state *reg;
10261
10262	reg = get_dynptr_arg_reg(env, fn, regs);
10263	if (!reg)
10264	return -EFAULT;
10265
10266	dynptr_type = dynptr_get_type(env, reg);
10267	if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
10268	return -EFAULT;
10269
10270	if (dynptr_type == BPF_DYNPTR_TYPE_SKB)
10271	/ this will trigger clear_all_pkt_pointers(), which will*
10272	* invalidate all dynptr slices associated with the skb
10273	*/
10274	changes_data = true;
10275
10276	break;
10277	}
10278	case BPF_FUNC_per_cpu_ptr:
10279	case BPF_FUNC_this_cpu_ptr:
10280	{
10281	struct bpf_reg_state *reg = &regs[BPF_REG_1];
10282	const struct btf_type *type;
10283
10284	if (reg->type & MEM_RCU) {
10285	type = btf_type_by_id(btf: reg->btf, type_id: reg->btf_id);
10286	if (!type \|\| !btf_type_is_struct(t: type)) {
10287	verbose(private_data: env, fmt: "Helper has invalid btf/btf_id in R1\n");
10288	return -EFAULT;
10289	}
10290	returns_cpu_specific_alloc_ptr = true;
10291	env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true;
10292	}
10293	break;
10294	}
10295	case BPF_FUNC_user_ringbuf_drain:
10296	err = __check_func_call(env, insn, insn_idx: insn_idx_p, subprog: meta.subprogno,
10297	set_callee_state_cb: set_user_ringbuf_callback_state);
10298	break;
10299	}
10300
10301	if (err)
10302	return err;
10303
10304	/ reset caller saved regs /
10305	for (i = `0`; i < CALLER_SAVED_REGS; i++) {
10306	mark_reg_not_init(env, regs, regno: caller_saved[i]);
10307	check_reg_arg(env, regno: caller_saved[i], t: DST_OP_NO_MARK);
10308	}
10309
10310	/ helper call returns 64-bit value. /
10311	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
10312
10313	/ update return register (already marked as written above) /
10314	ret_type = fn->ret_type;
10315	ret_flag = type_flag(type: ret_type);
10316
10317	switch (base_type(type: ret_type)) {
10318	case RET_INTEGER:
10319	/ sets type to SCALAR_VALUE /
10320	mark_reg_unknown(env, regs, regno: BPF_REG_0);
10321	break;
10322	case RET_VOID:
10323	regs[BPF_REG_0].type = NOT_INIT;
10324	break;
10325	case RET_PTR_TO_MAP_VALUE:
10326	/ There is no offset yet applied, variable or fixed /
10327	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
10328	/ remember map_ptr, so that check_map_access()*
10329	* can check 'value_size' boundary of memory access
10330	* to map element returned from bpf_map_lookup_elem()
10331	*/
10332	if (meta.map_ptr == NULL) {
10333	verbose(private_data: env,
10334	fmt: "kernel subsystem misconfigured verifier\n");
10335	return -EINVAL;
10336	}
10337	regs[BPF_REG_0].map_ptr = meta.map_ptr;
10338	regs[BPF_REG_0].map_uid = meta.map_uid;
10339	regs[BPF_REG_0].type = PTR_TO_MAP_VALUE \| ret_flag;
10340	if (!type_may_be_null(type: ret_type) &&
10341	btf_record_has_field(rec: meta.map_ptr->record, type: BPF_SPIN_LOCK)) {
10342	regs[BPF_REG_0].id = ++env->id_gen;
10343	}
10344	break;
10345	case RET_PTR_TO_SOCKET:
10346	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
10347	regs[BPF_REG_0].type = PTR_TO_SOCKET \| ret_flag;
10348	break;
10349	case RET_PTR_TO_SOCK_COMMON:
10350	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
10351	regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON \| ret_flag;
10352	break;
10353	case RET_PTR_TO_TCP_SOCK:
10354	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
10355	regs[BPF_REG_0].type = PTR_TO_TCP_SOCK \| ret_flag;
10356	break;
10357	case RET_PTR_TO_MEM:
10358	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
10359	regs[BPF_REG_0].type = PTR_TO_MEM \| ret_flag;
10360	regs[BPF_REG_0].mem_size = meta.mem_size;
10361	break;
10362	case RET_PTR_TO_MEM_OR_BTF_ID:
10363	{
10364	const struct btf_type *t;
10365
10366	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
10367	t = btf_type_skip_modifiers(btf: meta.ret_btf, id: meta.ret_btf_id, NULL);
10368	if (!btf_type_is_struct(t)) {
10369	u32 tsize;
10370	const struct btf_type *ret;
10371	const char *tname;
10372
10373	/ resolve the type size of ksym. /
10374	ret = btf_resolve_size(btf: meta.ret_btf, type: t, type_size: &tsize);
10375	if (IS_ERR(ptr: ret)) {
10376	tname = btf_name_by_offset(btf: meta.ret_btf, offset: t->name_off);
10377	verbose(private_data: env, fmt: "unable to resolve the size of type '%s': %ld\n",
10378	tname, PTR_ERR(ptr: ret));
10379	return -EINVAL;
10380	}
10381	regs[BPF_REG_0].type = PTR_TO_MEM \| ret_flag;
10382	regs[BPF_REG_0].mem_size = tsize;
10383	} else {
10384	if (returns_cpu_specific_alloc_ptr) {
10385	regs[BPF_REG_0].type = PTR_TO_BTF_ID \| MEM_ALLOC \| MEM_RCU;
10386	} else {
10387	/ MEM_RDONLY may be carried from ret_flag, but it*
10388	* doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
10389	* it will confuse the check of PTR_TO_BTF_ID in
10390	* check_mem_access().
10391	*/
10392	ret_flag &= ~MEM_RDONLY;
10393	regs[BPF_REG_0].type = PTR_TO_BTF_ID \| ret_flag;
10394	}
10395
10396	regs[BPF_REG_0].btf = meta.ret_btf;
10397	regs[BPF_REG_0].btf_id = meta.ret_btf_id;
10398	}
10399	break;
10400	}
10401	case RET_PTR_TO_BTF_ID:
10402	{
10403	struct btf *ret_btf;
10404	int ret_btf_id;
10405
10406	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
10407	regs[BPF_REG_0].type = PTR_TO_BTF_ID \| ret_flag;
10408	if (func_id == BPF_FUNC_kptr_xchg) {
10409	ret_btf = meta.kptr_field->kptr.btf;
10410	ret_btf_id = meta.kptr_field->kptr.btf_id;
10411	if (!btf_is_kernel(btf: ret_btf)) {
10412	regs[BPF_REG_0].type \|= MEM_ALLOC;
10413	if (meta.kptr_field->type == BPF_KPTR_PERCPU)
10414	regs[BPF_REG_0].type \|= MEM_PERCPU;
10415	}
10416	} else {
10417	if (fn->ret_btf_id == BPF_PTR_POISON) {
10418	verbose(private_data: env, fmt: "verifier internal error:");
10419	verbose(private_data: env, fmt: "func %s has non-overwritten BPF_PTR_POISON return type\n",
10420	func_id_name(id: func_id));
10421	return -EINVAL;
10422	}
10423	ret_btf = btf_vmlinux;
10424	ret_btf_id = *fn->ret_btf_id;
10425	}
10426	if (ret_btf_id == `0`) {
10427	verbose(private_data: env, fmt: "invalid return type %u of func %s#%d\n",
10428	base_type(type: ret_type), func_id_name(id: func_id),
10429	func_id);
10430	return -EINVAL;
10431	}
10432	regs[BPF_REG_0].btf = ret_btf;
10433	regs[BPF_REG_0].btf_id = ret_btf_id;
10434	break;
10435	}
10436	default:
10437	verbose(private_data: env, fmt: "unknown return type %u of func %s#%d\n",
10438	base_type(type: ret_type), func_id_name(id: func_id), func_id);
10439	return -EINVAL;
10440	}
10441
10442	if (type_may_be_null(type: regs[BPF_REG_0].type))
10443	regs[BPF_REG_0].id = ++env->id_gen;
10444
10445	if (helper_multiple_ref_obj_use(func_id, map: meta.map_ptr)) {
10446	verbose(private_data: env, fmt: "verifier internal error: func %s#%d sets ref_obj_id more than once\n",
10447	func_id_name(id: func_id), func_id);
10448	return -EFAULT;
10449	}
10450
10451	if (is_dynptr_ref_function(func_id))
10452	regs[BPF_REG_0].dynptr_id = meta.dynptr_id;
10453
10454	if (is_ptr_cast_function(func_id) \|\| is_dynptr_ref_function(func_id)) {
10455	/ For release_reference() /
10456	regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
10457	} else if (is_acquire_function(func_id, map: meta.map_ptr)) {
10458	int id = acquire_reference_state(env, insn_idx);
10459
10460	if (id < `0`)
10461	return id;
10462	/ For mark_ptr_or_null_reg() /
10463	regs[BPF_REG_0].id = id;
10464	/ For release_reference() /
10465	regs[BPF_REG_0].ref_obj_id = id;
10466	}
10467
10468	do_refine_retval_range(regs, ret_type: fn->ret_type, func_id, meta: &meta);
10469
10470	err = check_map_func_compatibility(env, map: meta.map_ptr, func_id);
10471	if (err)
10472	return err;
10473
10474	if ((func_id == BPF_FUNC_get_stack \|\|
10475	func_id == BPF_FUNC_get_task_stack) &&
10476	!env->prog->has_callchain_buf) {
10477	const char *err_str;
10478
10479	#ifdef CONFIG_PERF_EVENTS
10480	err = get_callchain_buffers(max_stack: sysctl_perf_event_max_stack);
10481	err_str = "cannot get callchain buffer for func %s#%d\n";
10482	#else
10483	err = -ENOTSUPP;
10484	err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
10485	#endif
10486	if (err) {
10487	verbose(private_data: env, fmt: err_str, func_id_name(id: func_id), func_id);
10488	return err;
10489	}
10490
10491	env->prog->has_callchain_buf = true;
10492	}
10493
10494	if (func_id == BPF_FUNC_get_stackid \|\| func_id == BPF_FUNC_get_stack)
10495	env->prog->call_get_stack = true;
10496
10497	if (func_id == BPF_FUNC_get_func_ip) {
10498	if (check_get_func_ip(env))
10499	return -ENOTSUPP;
10500	env->prog->call_get_func_ip = true;
10501	}
10502
10503	if (changes_data)
10504	clear_all_pkt_pointers(env);
10505	return `0`;
10506	}
10507
10508	/ mark_btf_func_reg_size() is used when the reg size is determined by*
10509	* the BTF func_proto's return value size and argument.
10510	*/
10511	static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
10512	size_t reg_size)
10513	{
10514	struct bpf_reg_state *reg = &cur_regs(env)[regno];
10515
10516	if (regno == BPF_REG_0) {
10517	/ Function return value /
10518	reg->live \|= REG_LIVE_WRITTEN;
10519	reg->subreg_def = reg_size == sizeof(u64) ?
10520	DEF_NOT_SUBREG : env->insn_idx + `1`;
10521	} else {
10522	/ Function argument /
10523	if (reg_size == sizeof(u64)) {
10524	mark_insn_zext(env, reg);
10525	mark_reg_read(env, state: reg, parent: reg->parent, flag: REG_LIVE_READ64);
10526	} else {
10527	mark_reg_read(env, state: reg, parent: reg->parent, flag: REG_LIVE_READ32);
10528	}
10529	}
10530	}
10531
10532	static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
10533	{
10534	return meta->kfunc_flags & KF_ACQUIRE;
10535	}
10536
10537	static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
10538	{
10539	return meta->kfunc_flags & KF_RELEASE;
10540	}
10541
10542	static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta)
10543	{
10544	return (meta->kfunc_flags & KF_TRUSTED_ARGS) \|\| is_kfunc_release(meta);
10545	}
10546
10547	static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
10548	{
10549	return meta->kfunc_flags & KF_SLEEPABLE;
10550	}
10551
10552	static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
10553	{
10554	return meta->kfunc_flags & KF_DESTRUCTIVE;
10555	}
10556
10557	static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
10558	{
10559	return meta->kfunc_flags & KF_RCU;
10560	}
10561
10562	static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta)
10563	{
10564	return meta->kfunc_flags & KF_RCU_PROTECTED;
10565	}
10566
10567	static bool __kfunc_param_match_suffix(const struct btf *btf,
10568	const struct btf_param *arg,
10569	const char *suffix)
10570	{
10571	int suffix_len = strlen(suffix), len;
10572	const char *param_name;
10573
10574	/ In the future, this can be ported to use BTF tagging /
10575	param_name = btf_name_by_offset(btf, offset: arg->name_off);
10576	if (str_is_empty(s: param_name))
10577	return false;
10578	len = strlen(param_name);
10579	if (len < suffix_len)
10580	return false;
10581	param_name += len - suffix_len;
10582	return !strncmp(param_name, suffix, suffix_len);
10583	}
10584
10585	static bool is_kfunc_arg_mem_size(const struct btf *btf,
10586	const struct btf_param *arg,
10587	const struct bpf_reg_state *reg)
10588	{
10589	const struct btf_type *t;
10590
10591	t = btf_type_skip_modifiers(btf, id: arg->type, NULL);
10592	if (!btf_type_is_scalar(t) \|\| reg->type != SCALAR_VALUE)
10593	return false;
10594
10595	return __kfunc_param_match_suffix(btf, arg, suffix: "__sz");
10596	}
10597
10598	static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
10599	const struct btf_param *arg,
10600	const struct bpf_reg_state *reg)
10601	{
10602	const struct btf_type *t;
10603
10604	t = btf_type_skip_modifiers(btf, id: arg->type, NULL);
10605	if (!btf_type_is_scalar(t) \|\| reg->type != SCALAR_VALUE)
10606	return false;
10607
10608	return __kfunc_param_match_suffix(btf, arg, suffix: "__szk");
10609	}
10610
10611	static bool is_kfunc_arg_optional(const struct btf btf, const* struct btf_param *arg)
10612	{
10613	return __kfunc_param_match_suffix(btf, arg, suffix: "__opt");
10614	}
10615
10616	static bool is_kfunc_arg_constant(const struct btf btf, const* struct btf_param *arg)
10617	{
10618	return __kfunc_param_match_suffix(btf, arg, suffix: "__k");
10619	}
10620
10621	static bool is_kfunc_arg_ignore(const struct btf btf, const* struct btf_param *arg)
10622	{
10623	return __kfunc_param_match_suffix(btf, arg, suffix: "__ign");
10624	}
10625
10626	static bool is_kfunc_arg_alloc_obj(const struct btf btf, const* struct btf_param *arg)
10627	{
10628	return __kfunc_param_match_suffix(btf, arg, suffix: "__alloc");
10629	}
10630
10631	static bool is_kfunc_arg_uninit(const struct btf btf, const* struct btf_param *arg)
10632	{
10633	return __kfunc_param_match_suffix(btf, arg, suffix: "__uninit");
10634	}
10635
10636	static bool is_kfunc_arg_refcounted_kptr(const struct btf btf, const* struct btf_param *arg)
10637	{
10638	return __kfunc_param_match_suffix(btf, arg, suffix: "__refcounted_kptr");
10639	}
10640
10641	static bool is_kfunc_arg_nullable(const struct btf btf, const* struct btf_param *arg)
10642	{
10643	return __kfunc_param_match_suffix(btf, arg, suffix: "__nullable");
10644	}
10645
10646	static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
10647	const struct btf_param *arg,
10648	const char *name)
10649	{
10650	int len, target_len = strlen(name);
10651	const char *param_name;
10652
10653	param_name = btf_name_by_offset(btf, offset: arg->name_off);
10654	if (str_is_empty(s: param_name))
10655	return false;
10656	len = strlen(param_name);
10657	if (len != target_len)
10658	return false;
10659	if (strcmp(param_name, name))
10660	return false;
10661
10662	return true;
10663	}
10664
10665	enum {
10666	KF_ARG_DYNPTR_ID,
10667	KF_ARG_LIST_HEAD_ID,
10668	KF_ARG_LIST_NODE_ID,
10669	KF_ARG_RB_ROOT_ID,
10670	KF_ARG_RB_NODE_ID,
10671	};
10672
10673	BTF_ID_LIST(kf_arg_btf_ids)
10674	BTF_ID(struct, bpf_dynptr_kern)
10675	BTF_ID(struct, bpf_list_head)
10676	BTF_ID(struct, bpf_list_node)
10677	BTF_ID(struct, bpf_rb_root)
10678	BTF_ID(struct, bpf_rb_node)
10679
10680	static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
10681	const struct btf_param arg, int* type)
10682	{
10683	const struct btf_type *t;
10684	u32 res_id;
10685
10686	t = btf_type_skip_modifiers(btf, id: arg->type, NULL);
10687	if (!t)
10688	return false;
10689	if (!btf_type_is_ptr(t))
10690	return false;
10691	t = btf_type_skip_modifiers(btf, id: t->type, res_id: &res_id);
10692	if (!t)
10693	return false;
10694	return btf_types_are_same(btf1: btf, id1: res_id, btf2: btf_vmlinux, id2: kf_arg_btf_ids[type]);
10695	}
10696
10697	static bool is_kfunc_arg_dynptr(const struct btf btf, const* struct btf_param *arg)
10698	{
10699	return __is_kfunc_ptr_arg_type(btf, arg, type: KF_ARG_DYNPTR_ID);
10700	}
10701
10702	static bool is_kfunc_arg_list_head(const struct btf btf, const* struct btf_param *arg)
10703	{
10704	return __is_kfunc_ptr_arg_type(btf, arg, type: KF_ARG_LIST_HEAD_ID);
10705	}
10706
10707	static bool is_kfunc_arg_list_node(const struct btf btf, const* struct btf_param *arg)
10708	{
10709	return __is_kfunc_ptr_arg_type(btf, arg, type: KF_ARG_LIST_NODE_ID);
10710	}
10711
10712	static bool is_kfunc_arg_rbtree_root(const struct btf btf, const* struct btf_param *arg)
10713	{
10714	return __is_kfunc_ptr_arg_type(btf, arg, type: KF_ARG_RB_ROOT_ID);
10715	}
10716
10717	static bool is_kfunc_arg_rbtree_node(const struct btf btf, const* struct btf_param *arg)
10718	{
10719	return __is_kfunc_ptr_arg_type(btf, arg, type: KF_ARG_RB_NODE_ID);
10720	}
10721
10722	static bool is_kfunc_arg_callback(struct bpf_verifier_env env, const* struct btf *btf,
10723	const struct btf_param *arg)
10724	{
10725	const struct btf_type *t;
10726
10727	t = btf_type_resolve_func_ptr(btf, id: arg->type, NULL);
10728	if (!t)
10729	return false;
10730
10731	return true;
10732	}
10733
10734	/ Returns true if struct is composed of scalars, 4 levels of nesting allowed /
10735	static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
10736	const struct btf *btf,
10737	const struct btf_type t, int* rec)
10738	{
10739	const struct btf_type *member_type;
10740	const struct btf_member *member;
10741	u32 i;
10742
10743	if (!btf_type_is_struct(t))
10744	return false;
10745
10746	for_each_member(i, t, member) {
10747	const struct btf_array *array;
10748
10749	member_type = btf_type_skip_modifiers(btf, id: member->type, NULL);
10750	if (btf_type_is_struct(t: member_type)) {
10751	if (rec >= `3`) {
10752	verbose(private_data: env, fmt: "max struct nesting depth exceeded\n");
10753	return false;
10754	}
10755	if (!__btf_type_is_scalar_struct(env, btf, t: member_type, rec: rec + `1`))
10756	return false;
10757	continue;
10758	}
10759	if (btf_type_is_array(t: member_type)) {
10760	array = btf_array(t: member_type);
10761	if (!array->nelems)
10762	return false;
10763	member_type = btf_type_skip_modifiers(btf, id: array->type, NULL);
10764	if (!btf_type_is_scalar(t: member_type))
10765	return false;
10766	continue;
10767	}
10768	if (!btf_type_is_scalar(t: member_type))
10769	return false;
10770	}
10771	return true;
10772	}
10773
10774	enum kfunc_ptr_arg_type {
10775	KF_ARG_PTR_TO_CTX,
10776	KF_ARG_PTR_TO_ALLOC_BTF_ID, / Allocated object /
10777	KF_ARG_PTR_TO_REFCOUNTED_KPTR, / Refcounted local kptr /
10778	KF_ARG_PTR_TO_DYNPTR,
10779	KF_ARG_PTR_TO_ITER,
10780	KF_ARG_PTR_TO_LIST_HEAD,
10781	KF_ARG_PTR_TO_LIST_NODE,
10782	KF_ARG_PTR_TO_BTF_ID, / Also covers reg2btf_ids conversions /
10783	KF_ARG_PTR_TO_MEM,
10784	KF_ARG_PTR_TO_MEM_SIZE, / Size derived from next argument, skip it /
10785	KF_ARG_PTR_TO_CALLBACK,
10786	KF_ARG_PTR_TO_RB_ROOT,
10787	KF_ARG_PTR_TO_RB_NODE,
10788	KF_ARG_PTR_TO_NULL,
10789	};
10790
10791	enum special_kfunc_type {
10792	KF_bpf_obj_new_impl,
10793	KF_bpf_obj_drop_impl,
10794	KF_bpf_refcount_acquire_impl,
10795	KF_bpf_list_push_front_impl,
10796	KF_bpf_list_push_back_impl,
10797	KF_bpf_list_pop_front,
10798	KF_bpf_list_pop_back,
10799	KF_bpf_cast_to_kern_ctx,
10800	KF_bpf_rdonly_cast,
10801	KF_bpf_rcu_read_lock,
10802	KF_bpf_rcu_read_unlock,
10803	KF_bpf_rbtree_remove,
10804	KF_bpf_rbtree_add_impl,
10805	KF_bpf_rbtree_first,
10806	KF_bpf_dynptr_from_skb,
10807	KF_bpf_dynptr_from_xdp,
10808	KF_bpf_dynptr_slice,
10809	KF_bpf_dynptr_slice_rdwr,
10810	KF_bpf_dynptr_clone,
10811	KF_bpf_percpu_obj_new_impl,
10812	KF_bpf_percpu_obj_drop_impl,
10813	KF_bpf_throw,
10814	KF_bpf_iter_css_task_new,
10815	};
10816
10817	BTF_SET_START(special_kfunc_set)
10818	BTF_ID(func, bpf_obj_new_impl)
10819	BTF_ID(func, bpf_obj_drop_impl)
10820	BTF_ID(func, bpf_refcount_acquire_impl)
10821	BTF_ID(func, bpf_list_push_front_impl)
10822	BTF_ID(func, bpf_list_push_back_impl)
10823	BTF_ID(func, bpf_list_pop_front)
10824	BTF_ID(func, bpf_list_pop_back)
10825	BTF_ID(func, bpf_cast_to_kern_ctx)
10826	BTF_ID(func, bpf_rdonly_cast)
10827	BTF_ID(func, bpf_rbtree_remove)
10828	BTF_ID(func, bpf_rbtree_add_impl)
10829	BTF_ID(func, bpf_rbtree_first)
10830	BTF_ID(func, bpf_dynptr_from_skb)
10831	BTF_ID(func, bpf_dynptr_from_xdp)
10832	BTF_ID(func, bpf_dynptr_slice)
10833	BTF_ID(func, bpf_dynptr_slice_rdwr)
10834	BTF_ID(func, bpf_dynptr_clone)
10835	BTF_ID(func, bpf_percpu_obj_new_impl)
10836	BTF_ID(func, bpf_percpu_obj_drop_impl)
10837	BTF_ID(func, bpf_throw)
10838	BTF_ID(func, bpf_iter_css_task_new)
10839	BTF_SET_END(special_kfunc_set)
10840
10841	BTF_ID_LIST(special_kfunc_list)
10842	BTF_ID(func, bpf_obj_new_impl)
10843	BTF_ID(func, bpf_obj_drop_impl)
10844	BTF_ID(func, bpf_refcount_acquire_impl)
10845	BTF_ID(func, bpf_list_push_front_impl)
10846	BTF_ID(func, bpf_list_push_back_impl)
10847	BTF_ID(func, bpf_list_pop_front)
10848	BTF_ID(func, bpf_list_pop_back)
10849	BTF_ID(func, bpf_cast_to_kern_ctx)
10850	BTF_ID(func, bpf_rdonly_cast)
10851	BTF_ID(func, bpf_rcu_read_lock)
10852	BTF_ID(func, bpf_rcu_read_unlock)
10853	BTF_ID(func, bpf_rbtree_remove)
10854	BTF_ID(func, bpf_rbtree_add_impl)
10855	BTF_ID(func, bpf_rbtree_first)
10856	BTF_ID(func, bpf_dynptr_from_skb)
10857	BTF_ID(func, bpf_dynptr_from_xdp)
10858	BTF_ID(func, bpf_dynptr_slice)
10859	BTF_ID(func, bpf_dynptr_slice_rdwr)
10860	BTF_ID(func, bpf_dynptr_clone)
10861	BTF_ID(func, bpf_percpu_obj_new_impl)
10862	BTF_ID(func, bpf_percpu_obj_drop_impl)
10863	BTF_ID(func, bpf_throw)
10864	BTF_ID(func, bpf_iter_css_task_new)
10865
10866	static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
10867	{
10868	if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
10869	meta->arg_owning_ref) {
10870	return false;
10871	}
10872
10873	return meta->kfunc_flags & KF_RET_NULL;
10874	}
10875
10876	static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
10877	{
10878	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
10879	}
10880
10881	static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
10882	{
10883	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
10884	}
10885
10886	static enum kfunc_ptr_arg_type
10887	get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
10888	struct bpf_kfunc_call_arg_meta *meta,
10889	const struct btf_type t, const* struct btf_type *ref_t,
10890	const char ref_tname, const* struct btf_param *args,
10891	int argno, int nargs)
10892	{
10893	u32 regno = argno + `1`;
10894	struct bpf_reg_state *regs = cur_regs(env);
10895	struct bpf_reg_state *reg = &regs[regno];
10896	bool arg_mem_size = false;
10897
10898	if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
10899	return KF_ARG_PTR_TO_CTX;
10900
10901	/ In this function, we verify the kfunc's BTF as per the argument type,*
10902	* leaving the rest of the verification with respect to the register
10903	* type to our caller. When a set of conditions hold in the BTF type of
10904	* arguments, we resolve it to a known kfunc_ptr_arg_type.
10905	*/
10906	if (btf_get_prog_ctx_type(log: &env->log, btf: meta->btf, t, prog_type: resolve_prog_type(prog: env->prog), arg: argno))
10907	return KF_ARG_PTR_TO_CTX;
10908
10909	if (is_kfunc_arg_alloc_obj(btf: meta->btf, arg: &args[argno]))
10910	return KF_ARG_PTR_TO_ALLOC_BTF_ID;
10911
10912	if (is_kfunc_arg_refcounted_kptr(btf: meta->btf, arg: &args[argno]))
10913	return KF_ARG_PTR_TO_REFCOUNTED_KPTR;
10914
10915	if (is_kfunc_arg_dynptr(btf: meta->btf, arg: &args[argno]))
10916	return KF_ARG_PTR_TO_DYNPTR;
10917
10918	if (is_kfunc_arg_iter(meta, arg: argno))
10919	return KF_ARG_PTR_TO_ITER;
10920
10921	if (is_kfunc_arg_list_head(btf: meta->btf, arg: &args[argno]))
10922	return KF_ARG_PTR_TO_LIST_HEAD;
10923
10924	if (is_kfunc_arg_list_node(btf: meta->btf, arg: &args[argno]))
10925	return KF_ARG_PTR_TO_LIST_NODE;
10926
10927	if (is_kfunc_arg_rbtree_root(btf: meta->btf, arg: &args[argno]))
10928	return KF_ARG_PTR_TO_RB_ROOT;
10929
10930	if (is_kfunc_arg_rbtree_node(btf: meta->btf, arg: &args[argno]))
10931	return KF_ARG_PTR_TO_RB_NODE;
10932
10933	if ((base_type(type: reg->type) == PTR_TO_BTF_ID \|\| reg2btf_ids[base_type(type: reg->type)])) {
10934	if (!btf_type_is_struct(t: ref_t)) {
10935	verbose(private_data: env, fmt: "kernel function %s args#%d pointer type %s %s is not supported\n",
10936	meta->func_name, argno, btf_type_str(t: ref_t), ref_tname);
10937	return -EINVAL;
10938	}
10939	return KF_ARG_PTR_TO_BTF_ID;
10940	}
10941
10942	if (is_kfunc_arg_callback(env, btf: meta->btf, arg: &args[argno]))
10943	return KF_ARG_PTR_TO_CALLBACK;
10944
10945	if (is_kfunc_arg_nullable(btf: meta->btf, arg: &args[argno]) && register_is_null(reg))
10946	return KF_ARG_PTR_TO_NULL;
10947
10948	if (argno + `1` < nargs &&
10949	(is_kfunc_arg_mem_size(btf: meta->btf, arg: &args[argno + `1`], reg: &regs[regno + `1`]) \|\|
10950	is_kfunc_arg_const_mem_size(btf: meta->btf, arg: &args[argno + `1`], reg: &regs[regno + `1`])))
10951	arg_mem_size = true;
10952
10953	/ This is the catch all argument type of register types supported by*
10954	* check_helper_mem_access. However, we only allow when argument type is
10955	* pointer to scalar, or struct composed (recursively) of scalars. When
10956	* arg_mem_size is true, the pointer can be void *.
10957	*/
10958	if (!btf_type_is_scalar(t: ref_t) && !__btf_type_is_scalar_struct(env, btf: meta->btf, t: ref_t, rec: `0`) &&
10959	(arg_mem_size ? !btf_type_is_void(t: ref_t) : `1`)) {
10960	verbose(private_data: env, fmt: "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
10961	argno, btf_type_str(t: ref_t), ref_tname, arg_mem_size ? "void, " : "");
10962	return -EINVAL;
10963	}
10964	return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
10965	}
10966
10967	static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
10968	struct bpf_reg_state *reg,
10969	const struct btf_type *ref_t,
10970	const char *ref_tname, u32 ref_id,
10971	struct bpf_kfunc_call_arg_meta *meta,
10972	int argno)
10973	{
10974	const struct btf_type *reg_ref_t;
10975	bool strict_type_match = false;
10976	const struct btf *reg_btf;
10977	const char *reg_ref_tname;
10978	u32 reg_ref_id;
10979
10980	if (base_type(type: reg->type) == PTR_TO_BTF_ID) {
10981	reg_btf = reg->btf;
10982	reg_ref_id = reg->btf_id;
10983	} else {
10984	reg_btf = btf_vmlinux;
10985	reg_ref_id = *reg2btf_ids[base_type(type: reg->type)];
10986	}
10987
10988	/ Enforce strict type matching for calls to kfuncs that are acquiring*
10989	* or releasing a reference, or are no-cast aliases. We do _not_
10990	* enforce strict matching for plain KF_TRUSTED_ARGS kfuncs by default,
10991	* as we want to enable BPF programs to pass types that are bitwise
10992	* equivalent without forcing them to explicitly cast with something
10993	* like bpf_cast_to_kern_ctx().
10994	*
10995	* For example, say we had a type like the following:
10996	*
10997	* struct bpf_cpumask {
10998	* cpumask_t cpumask;
10999	* refcount_t usage;
11000	* };
11001	*
11002	* Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
11003	* to a struct cpumask, so it would be safe to pass a struct
11004	* bpf_cpumask * to a kfunc expecting a struct cpumask *.
11005	*
11006	* The philosophy here is similar to how we allow scalars of different
11007	* types to be passed to kfuncs as long as the size is the same. The
11008	* only difference here is that we're simply allowing
11009	* btf_struct_ids_match() to walk the struct at the 0th offset, and
11010	* resolve types.
11011	*/
11012	if (is_kfunc_acquire(meta) \|\|
11013	(is_kfunc_release(meta) && reg->ref_obj_id) \|\|
11014	btf_type_ids_nocast_alias(log: &env->log, reg_btf, reg_id: reg_ref_id, arg_btf: meta->btf, arg_id: ref_id))
11015	strict_type_match = true;
11016
11017	WARN_ON_ONCE(is_kfunc_trusted_args(meta) && reg->off);
11018
11019	reg_ref_t = btf_type_skip_modifiers(btf: reg_btf, id: reg_ref_id, res_id: &reg_ref_id);
11020	reg_ref_tname = btf_name_by_offset(btf: reg_btf, offset: reg_ref_t->name_off);
11021	if (!btf_struct_ids_match(log: &env->log, btf: reg_btf, id: reg_ref_id, off: reg->off, need_btf: meta->btf, need_type_id: ref_id, strict: strict_type_match)) {
11022	verbose(private_data: env, fmt: "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
11023	meta->func_name, argno, btf_type_str(t: ref_t), ref_tname, argno + `1`,
11024	btf_type_str(t: reg_ref_t), reg_ref_tname);
11025	return -EINVAL;
11026	}
11027	return `0`;
11028	}
11029
11030	static int ref_set_non_owning(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
11031	{
11032	struct bpf_verifier_state *state = env->cur_state;
11033	struct btf_record *rec = reg_btf_record(reg);
11034
11035	if (!state->active_lock.ptr) {
11036	verbose(private_data: env, fmt: "verifier internal error: ref_set_non_owning w/o active lock\n");
11037	return -EFAULT;
11038	}
11039
11040	if (type_flag(type: reg->type) & NON_OWN_REF) {
11041	verbose(private_data: env, fmt: "verifier internal error: NON_OWN_REF already set\n");
11042	return -EFAULT;
11043	}
11044
11045	reg->type \|= NON_OWN_REF;
11046	if (rec->refcount_off >= `0`)
11047	reg->type \|= MEM_RCU;
11048
11049	return `0`;
11050	}
11051
11052	static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id)
11053	{
11054	struct bpf_func_state state, unused;
11055	struct bpf_reg_state *reg;
11056	int i;
11057
11058	state = cur_func(env);
11059
11060	if (!ref_obj_id) {
11061	verbose(private_data: env, fmt: "verifier internal error: ref_obj_id is zero for "
11062	"owning -> non-owning conversion\n");
11063	return -EFAULT;
11064	}
11065
11066	for (i = `0`; i < state->acquired_refs; i++) {
11067	if (state->refs[i].id != ref_obj_id)
11068	continue;
11069
11070	/ Clear ref_obj_id here so release_reference doesn't clobber*
11071	* the whole reg
11072	*/
11073	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
11074	if (reg->ref_obj_id == ref_obj_id) {
11075	reg->ref_obj_id = `0`;
11076	ref_set_non_owning(env, reg);
11077	}
11078	}));
11079	return `0`;
11080	}
11081
11082	verbose(private_data: env, fmt: "verifier internal error: ref state missing for ref_obj_id\n");
11083	return -EFAULT;
11084	}
11085
11086	/ Implementation details:*
11087	*
11088	* Each register points to some region of memory, which we define as an
11089	* allocation. Each allocation may embed a bpf_spin_lock which protects any
11090	* special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
11091	* allocation. The lock and the data it protects are colocated in the same
11092	* memory region.
11093	*
11094	* Hence, everytime a register holds a pointer value pointing to such
11095	* allocation, the verifier preserves a unique reg->id for it.
11096	*
11097	* The verifier remembers the lock 'ptr' and the lock 'id' whenever
11098	* bpf_spin_lock is called.
11099	*
11100	* To enable this, lock state in the verifier captures two values:
11101	* active_lock.ptr = Register's type specific pointer
11102	* active_lock.id = A unique ID for each register pointer value
11103	*
11104	* Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID \| MEM_ALLOC are the two
11105	* supported register types.
11106	*
11107	* The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
11108	* allocated objects is the reg->btf pointer.
11109	*
11110	* The active_lock.id is non-unique for maps supporting direct_value_addr, as we
11111	* can establish the provenance of the map value statically for each distinct
11112	* lookup into such maps. They always contain a single map value hence unique
11113	* IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
11114	*
11115	* So, in case of global variables, they use array maps with max_entries = 1,
11116	* hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
11117	* into the same map value as max_entries is 1, as described above).
11118	*
11119	* In case of inner map lookups, the inner map pointer has same map_ptr as the
11120	* outer map pointer (in verifier context), but each lookup into an inner map
11121	* assigns a fresh reg->id to the lookup, so while lookups into distinct inner
11122	* maps from the same outer map share the same map_ptr as active_lock.ptr, they
11123	* will get different reg->id assigned to each lookup, hence different
11124	* active_lock.id.
11125	*
11126	* In case of allocated objects, active_lock.ptr is the reg->btf, and the
11127	* reg->id is a unique ID preserved after the NULL pointer check on the pointer
11128	* returned from bpf_obj_new. Each allocation receives a new reg->id.
11129	*/
11130	static int check_reg_allocation_locked(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
11131	{
11132	void *ptr;
11133	u32 id;
11134
11135	switch ((int)reg->type) {
11136	case PTR_TO_MAP_VALUE:
11137	ptr = reg->map_ptr;
11138	break;
11139	case PTR_TO_BTF_ID \| MEM_ALLOC:
11140	ptr = reg->btf;
11141	break;
11142	default:
11143	verbose(private_data: env, fmt: "verifier internal error: unknown reg type for lock check\n");
11144	return -EFAULT;
11145	}
11146	id = reg->id;
11147
11148	if (!env->cur_state->active_lock.ptr)
11149	return -EINVAL;
11150	if (env->cur_state->active_lock.ptr != ptr \|\|
11151	env->cur_state->active_lock.id != id) {
11152	verbose(private_data: env, fmt: "held lock and object are not in the same allocation\n");
11153	return -EINVAL;
11154	}
11155	return `0`;
11156	}
11157
11158	static bool is_bpf_list_api_kfunc(u32 btf_id)
11159	{
11160	return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] \|\|
11161	btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] \|\|
11162	btf_id == special_kfunc_list[KF_bpf_list_pop_front] \|\|
11163	btf_id == special_kfunc_list[KF_bpf_list_pop_back];
11164	}
11165
11166	static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
11167	{
11168	return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] \|\|
11169	btf_id == special_kfunc_list[KF_bpf_rbtree_remove] \|\|
11170	btf_id == special_kfunc_list[KF_bpf_rbtree_first];
11171	}
11172
11173	static bool is_bpf_graph_api_kfunc(u32 btf_id)
11174	{
11175	return is_bpf_list_api_kfunc(btf_id) \|\| is_bpf_rbtree_api_kfunc(btf_id) \|\|
11176	btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
11177	}
11178
11179	static bool is_callback_calling_kfunc(u32 btf_id)
11180	{
11181	return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
11182	}
11183
11184	static bool is_bpf_throw_kfunc(struct bpf_insn *insn)
11185	{
11186	return bpf_pseudo_kfunc_call(insn) && insn->off == `0` &&
11187	insn->imm == special_kfunc_list[KF_bpf_throw];
11188	}
11189
11190	static bool is_rbtree_lock_required_kfunc(u32 btf_id)
11191	{
11192	return is_bpf_rbtree_api_kfunc(btf_id);
11193	}
11194
11195	static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
11196	enum btf_field_type head_field_type,
11197	u32 kfunc_btf_id)
11198	{
11199	bool ret;
11200
11201	switch (head_field_type) {
11202	case BPF_LIST_HEAD:
11203	ret = is_bpf_list_api_kfunc(btf_id: kfunc_btf_id);
11204	break;
11205	case BPF_RB_ROOT:
11206	ret = is_bpf_rbtree_api_kfunc(btf_id: kfunc_btf_id);
11207	break;
11208	default:
11209	verbose(private_data: env, fmt: "verifier internal error: unexpected graph root argument type %s\n",
11210	btf_field_type_name(type: head_field_type));
11211	return false;
11212	}
11213
11214	if (!ret)
11215	verbose(private_data: env, fmt: "verifier internal error: %s head arg for unknown kfunc\n",
11216	btf_field_type_name(type: head_field_type));
11217	return ret;
11218	}
11219
11220	static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
11221	enum btf_field_type node_field_type,
11222	u32 kfunc_btf_id)
11223	{
11224	bool ret;
11225
11226	switch (node_field_type) {
11227	case BPF_LIST_NODE:
11228	ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] \|\|
11229	kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl]);
11230	break;
11231	case BPF_RB_NODE:
11232	ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] \|\|
11233	kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]);
11234	break;
11235	default:
11236	verbose(private_data: env, fmt: "verifier internal error: unexpected graph node argument type %s\n",
11237	btf_field_type_name(type: node_field_type));
11238	return false;
11239	}
11240
11241	if (!ret)
11242	verbose(private_data: env, fmt: "verifier internal error: %s node arg for unknown kfunc\n",
11243	btf_field_type_name(type: node_field_type));
11244	return ret;
11245	}
11246
11247	static int
11248	__process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
11249	struct bpf_reg_state *reg, u32 regno,
11250	struct bpf_kfunc_call_arg_meta *meta,
11251	enum btf_field_type head_field_type,
11252	struct btf_field **head_field)
11253	{
11254	const char *head_type_name;
11255	struct btf_field *field;
11256	struct btf_record *rec;
11257	u32 head_off;
11258
11259	if (meta->btf != btf_vmlinux) {
11260	verbose(private_data: env, fmt: "verifier internal error: unexpected btf mismatch in kfunc call\n");
11261	return -EFAULT;
11262	}
11263
11264	if (!check_kfunc_is_graph_root_api(env, head_field_type, kfunc_btf_id: meta->func_id))
11265	return -EFAULT;
11266
11267	head_type_name = btf_field_type_name(type: head_field_type);
11268	if (!tnum_is_const(a: reg->var_off)) {
11269	verbose(private_data: env,
11270	fmt: "R%d doesn't have constant offset. %s has to be at the constant offset\n",
11271	regno, head_type_name);
11272	return -EINVAL;
11273	}
11274
11275	rec = reg_btf_record(reg);
11276	head_off = reg->off + reg->var_off.value;
11277	field = btf_record_find(rec, offset: head_off, field_mask: head_field_type);
11278	if (!field) {
11279	verbose(private_data: env, fmt: "%s not found at offset=%u\n", head_type_name, head_off);
11280	return -EINVAL;
11281	}
11282
11283	/ All functions require bpf_list_head to be protected using a bpf_spin_lock /
11284	if (check_reg_allocation_locked(env, reg)) {
11285	verbose(private_data: env, fmt: "bpf_spin_lock at off=%d must be held for %s\n",
11286	rec->spin_lock_off, head_type_name);
11287	return -EINVAL;
11288	}
11289
11290	if (*head_field) {
11291	verbose(private_data: env, fmt: "verifier internal error: repeating %s arg\n", head_type_name);
11292	return -EFAULT;
11293	}
11294	*head_field = field;
11295	return `0`;
11296	}
11297
11298	static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
11299	struct bpf_reg_state *reg, u32 regno,
11300	struct bpf_kfunc_call_arg_meta *meta)
11301	{
11302	return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, head_field_type: BPF_LIST_HEAD,
11303	head_field: &meta->arg_list_head.field);
11304	}
11305
11306	static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
11307	struct bpf_reg_state *reg, u32 regno,
11308	struct bpf_kfunc_call_arg_meta *meta)
11309	{
11310	return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, head_field_type: BPF_RB_ROOT,
11311	head_field: &meta->arg_rbtree_root.field);
11312	}
11313
11314	static int
11315	__process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
11316	struct bpf_reg_state *reg, u32 regno,
11317	struct bpf_kfunc_call_arg_meta *meta,
11318	enum btf_field_type head_field_type,
11319	enum btf_field_type node_field_type,
11320	struct btf_field **node_field)
11321	{
11322	const char *node_type_name;
11323	const struct btf_type et, t;
11324	struct btf_field *field;
11325	u32 node_off;
11326
11327	if (meta->btf != btf_vmlinux) {
11328	verbose(private_data: env, fmt: "verifier internal error: unexpected btf mismatch in kfunc call\n");
11329	return -EFAULT;
11330	}
11331
11332	if (!check_kfunc_is_graph_node_api(env, node_field_type, kfunc_btf_id: meta->func_id))
11333	return -EFAULT;
11334
11335	node_type_name = btf_field_type_name(type: node_field_type);
11336	if (!tnum_is_const(a: reg->var_off)) {
11337	verbose(private_data: env,
11338	fmt: "R%d doesn't have constant offset. %s has to be at the constant offset\n",
11339	regno, node_type_name);
11340	return -EINVAL;
11341	}
11342
11343	node_off = reg->off + reg->var_off.value;
11344	field = reg_find_field_offset(reg, off: node_off, fields: node_field_type);
11345	if (!field \|\| field->offset != node_off) {
11346	verbose(private_data: env, fmt: "%s not found at offset=%u\n", node_type_name, node_off);
11347	return -EINVAL;
11348	}
11349
11350	field = *node_field;
11351
11352	et = btf_type_by_id(btf: field->graph_root.btf, type_id: field->graph_root.value_btf_id);
11353	t = btf_type_by_id(btf: reg->btf, type_id: reg->btf_id);
11354	if (!btf_struct_ids_match(log: &env->log, btf: reg->btf, id: reg->btf_id, off: `0`, need_btf: field->graph_root.btf,
11355	need_type_id: field->graph_root.value_btf_id, strict: true)) {
11356	verbose(private_data: env, fmt: "operation on %s expects arg#1 %s at offset=%d "
11357	"in struct %s, but arg is at offset=%d in struct %s\n",
11358	btf_field_type_name(type: head_field_type),
11359	btf_field_type_name(type: node_field_type),
11360	field->graph_root.node_offset,
11361	btf_name_by_offset(btf: field->graph_root.btf, offset: et->name_off),
11362	node_off, btf_name_by_offset(btf: reg->btf, offset: t->name_off));
11363	return -EINVAL;
11364	}
11365	meta->arg_btf = reg->btf;
11366	meta->arg_btf_id = reg->btf_id;
11367
11368	if (node_off != field->graph_root.node_offset) {
11369	verbose(private_data: env, fmt: "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
11370	node_off, btf_field_type_name(type: node_field_type),
11371	field->graph_root.node_offset,
11372	btf_name_by_offset(btf: field->graph_root.btf, offset: et->name_off));
11373	return -EINVAL;
11374	}
11375
11376	return `0`;
11377	}
11378
11379	static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
11380	struct bpf_reg_state *reg, u32 regno,
11381	struct bpf_kfunc_call_arg_meta *meta)
11382	{
11383	return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
11384	head_field_type: BPF_LIST_HEAD, node_field_type: BPF_LIST_NODE,
11385	node_field: &meta->arg_list_head.field);
11386	}
11387
11388	static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
11389	struct bpf_reg_state *reg, u32 regno,
11390	struct bpf_kfunc_call_arg_meta *meta)
11391	{
11392	return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
11393	head_field_type: BPF_RB_ROOT, node_field_type: BPF_RB_NODE,
11394	node_field: &meta->arg_rbtree_root.field);
11395	}
11396
11397	static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env)
11398	{
11399	enum bpf_prog_type prog_type = resolve_prog_type(prog: env->prog);
11400
11401	switch (prog_type) {
11402	case BPF_PROG_TYPE_LSM:
11403	return true;
11404	case BPF_TRACE_ITER:
11405	return env->prog->aux->sleepable;
11406	default:
11407	return false;
11408	}
11409	}
11410
11411	static int check_kfunc_args(struct bpf_verifier_env env, struct* bpf_kfunc_call_arg_meta *meta,
11412	int insn_idx)
11413	{
11414	const char func_name = meta->func_name, ref_tname;
11415	const struct btf *btf = meta->btf;
11416	const struct btf_param *args;
11417	struct btf_record *rec;
11418	u32 i, nargs;
11419	int ret;
11420
11421	args = (const struct btf_param *)(meta->func_proto + `1`);
11422	nargs = btf_type_vlen(t: meta->func_proto);
11423	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
11424	verbose(private_data: env, fmt: "Function %s has %d > %d args\n", func_name, nargs,
11425	MAX_BPF_FUNC_REG_ARGS);
11426	return -EINVAL;
11427	}
11428
11429	/ Check that BTF function arguments match actual types that the*
11430	* verifier sees.
11431	*/
11432	for (i = `0`; i < nargs; i++) {
11433	struct bpf_reg_state regs = cur_regs(env), reg = &regs[i + `1`];
11434	const struct btf_type t, ref_t, *resolve_ret;
11435	enum bpf_arg_type arg_type = ARG_DONTCARE;
11436	u32 regno = i + `1`, ref_id, type_size;
11437	bool is_ret_buf_sz = false;
11438	int kf_arg_type;
11439
11440	t = btf_type_skip_modifiers(btf, id: args[i].type, NULL);
11441
11442	if (is_kfunc_arg_ignore(btf, arg: &args[i]))
11443	continue;
11444
11445	if (btf_type_is_scalar(t)) {
11446	if (reg->type != SCALAR_VALUE) {
11447	verbose(private_data: env, fmt: "R%d is not a scalar\n", regno);
11448	return -EINVAL;
11449	}
11450
11451	if (is_kfunc_arg_constant(btf: meta->btf, arg: &args[i])) {
11452	if (meta->arg_constant.found) {
11453	verbose(private_data: env, fmt: "verifier internal error: only one constant argument permitted\n");
11454	return -EFAULT;
11455	}
11456	if (!tnum_is_const(a: reg->var_off)) {
11457	verbose(private_data: env, fmt: "R%d must be a known constant\n", regno);
11458	return -EINVAL;
11459	}
11460	ret = mark_chain_precision(env, regno);
11461	if (ret < `0`)
11462	return ret;
11463	meta->arg_constant.found = true;
11464	meta->arg_constant.value = reg->var_off.value;
11465	} else if (is_kfunc_arg_scalar_with_name(btf, arg: &args[i], name: "rdonly_buf_size")) {
11466	meta->r0_rdonly = true;
11467	is_ret_buf_sz = true;
11468	} else if (is_kfunc_arg_scalar_with_name(btf, arg: &args[i], name: "rdwr_buf_size")) {
11469	is_ret_buf_sz = true;
11470	}
11471
11472	if (is_ret_buf_sz) {
11473	if (meta->r0_size) {
11474	verbose(private_data: env, fmt: "2 or more rdonly/rdwr_buf_size parameters for kfunc");
11475	return -EINVAL;
11476	}
11477
11478	if (!tnum_is_const(a: reg->var_off)) {
11479	verbose(private_data: env, fmt: "R%d is not a const\n", regno);
11480	return -EINVAL;
11481	}
11482
11483	meta->r0_size = reg->var_off.value;
11484	ret = mark_chain_precision(env, regno);
11485	if (ret)
11486	return ret;
11487	}
11488	continue;
11489	}
11490
11491	if (!btf_type_is_ptr(t)) {
11492	verbose(private_data: env, fmt: "Unrecognized arg#%d type %s\n", i, btf_type_str(t));
11493	return -EINVAL;
11494	}
11495
11496	if ((is_kfunc_trusted_args(meta) \|\| is_kfunc_rcu(meta)) &&
11497	(register_is_null(reg) \|\| type_may_be_null(type: reg->type)) &&
11498	!is_kfunc_arg_nullable(btf: meta->btf, arg: &args[i])) {
11499	verbose(private_data: env, fmt: "Possibly NULL pointer passed to trusted arg%d\n", i);
11500	return -EACCES;
11501	}
11502
11503	if (reg->ref_obj_id) {
11504	if (is_kfunc_release(meta) && meta->ref_obj_id) {
11505	verbose(private_data: env, fmt: "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
11506	regno, reg->ref_obj_id,
11507	meta->ref_obj_id);
11508	return -EFAULT;
11509	}
11510	meta->ref_obj_id = reg->ref_obj_id;
11511	if (is_kfunc_release(meta))
11512	meta->release_regno = regno;
11513	}
11514
11515	ref_t = btf_type_skip_modifiers(btf, id: t->type, res_id: &ref_id);
11516	ref_tname = btf_name_by_offset(btf, offset: ref_t->name_off);
11517
11518	kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, argno: i, nargs);
11519	if (kf_arg_type < `0`)
11520	return kf_arg_type;
11521
11522	switch (kf_arg_type) {
11523	case KF_ARG_PTR_TO_NULL:
11524	continue;
11525	case KF_ARG_PTR_TO_ALLOC_BTF_ID:
11526	case KF_ARG_PTR_TO_BTF_ID:
11527	if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
11528	break;
11529
11530	if (!is_trusted_reg(reg)) {
11531	if (!is_kfunc_rcu(meta)) {
11532	verbose(private_data: env, fmt: "R%d must be referenced or trusted\n", regno);
11533	return -EINVAL;
11534	}
11535	if (!is_rcu_reg(reg)) {
11536	verbose(private_data: env, fmt: "R%d must be a rcu pointer\n", regno);
11537	return -EINVAL;
11538	}
11539	}
11540
11541	fallthrough;
11542	case KF_ARG_PTR_TO_CTX:
11543	/ Trusted arguments have the same offset checks as release arguments /
11544	arg_type \|= OBJ_RELEASE;
11545	break;
11546	case KF_ARG_PTR_TO_DYNPTR:
11547	case KF_ARG_PTR_TO_ITER:
11548	case KF_ARG_PTR_TO_LIST_HEAD:
11549	case KF_ARG_PTR_TO_LIST_NODE:
11550	case KF_ARG_PTR_TO_RB_ROOT:
11551	case KF_ARG_PTR_TO_RB_NODE:
11552	case KF_ARG_PTR_TO_MEM:
11553	case KF_ARG_PTR_TO_MEM_SIZE:
11554	case KF_ARG_PTR_TO_CALLBACK:
11555	case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
11556	/ Trusted by default /
11557	break;
11558	default:
11559	WARN_ON_ONCE(`1`);
11560	return -EFAULT;
11561	}
11562
11563	if (is_kfunc_release(meta) && reg->ref_obj_id)
11564	arg_type \|= OBJ_RELEASE;
11565	ret = check_func_arg_reg_off(env, reg, regno, arg_type);
11566	if (ret < `0`)
11567	return ret;
11568
11569	switch (kf_arg_type) {
11570	case KF_ARG_PTR_TO_CTX:
11571	if (reg->type != PTR_TO_CTX) {
11572	verbose(private_data: env, fmt: "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t));
11573	return -EINVAL;
11574	}
11575
11576	if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
11577	ret = get_kern_ctx_btf_id(log: &env->log, prog_type: resolve_prog_type(prog: env->prog));
11578	if (ret < `0`)
11579	return -EINVAL;
11580	meta->ret_btf_id = ret;
11581	}
11582	break;
11583	case KF_ARG_PTR_TO_ALLOC_BTF_ID:
11584	if (reg->type == (PTR_TO_BTF_ID \| MEM_ALLOC)) {
11585	if (meta->func_id != special_kfunc_list[KF_bpf_obj_drop_impl]) {
11586	verbose(private_data: env, fmt: "arg#%d expected for bpf_obj_drop_impl()\n", i);
11587	return -EINVAL;
11588	}
11589	} else if (reg->type == (PTR_TO_BTF_ID \| MEM_ALLOC \| MEM_PERCPU)) {
11590	if (meta->func_id != special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
11591	verbose(private_data: env, fmt: "arg#%d expected for bpf_percpu_obj_drop_impl()\n", i);
11592	return -EINVAL;
11593	}
11594	} else {
11595	verbose(private_data: env, fmt: "arg#%d expected pointer to allocated object\n", i);
11596	return -EINVAL;
11597	}
11598	if (!reg->ref_obj_id) {
11599	verbose(private_data: env, fmt: "allocated object must be referenced\n");
11600	return -EINVAL;
11601	}
11602	if (meta->btf == btf_vmlinux) {
11603	meta->arg_btf = reg->btf;
11604	meta->arg_btf_id = reg->btf_id;
11605	}
11606	break;
11607	case KF_ARG_PTR_TO_DYNPTR:
11608	{
11609	enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
11610	int clone_ref_obj_id = `0`;
11611
11612	if (reg->type != PTR_TO_STACK &&
11613	reg->type != CONST_PTR_TO_DYNPTR) {
11614	verbose(private_data: env, fmt: "arg#%d expected pointer to stack or dynptr_ptr\n", i);
11615	return -EINVAL;
11616	}
11617
11618	if (reg->type == CONST_PTR_TO_DYNPTR)
11619	dynptr_arg_type \|= MEM_RDONLY;
11620
11621	if (is_kfunc_arg_uninit(btf, arg: &args[i]))
11622	dynptr_arg_type \|= MEM_UNINIT;
11623
11624	if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
11625	dynptr_arg_type \|= DYNPTR_TYPE_SKB;
11626	} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) {
11627	dynptr_arg_type \|= DYNPTR_TYPE_XDP;
11628	} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
11629	(dynptr_arg_type & MEM_UNINIT)) {
11630	enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type;
11631
11632	if (parent_type == BPF_DYNPTR_TYPE_INVALID) {
11633	verbose(private_data: env, fmt: "verifier internal error: no dynptr type for parent of clone\n");
11634	return -EFAULT;
11635	}
11636
11637	dynptr_arg_type \|= (unsigned int)get_dynptr_type_flag(type: parent_type);
11638	clone_ref_obj_id = meta->initialized_dynptr.ref_obj_id;
11639	if (dynptr_type_refcounted(type: parent_type) && !clone_ref_obj_id) {
11640	verbose(private_data: env, fmt: "verifier internal error: missing ref obj id for parent of clone\n");
11641	return -EFAULT;
11642	}
11643	}
11644
11645	ret = process_dynptr_func(env, regno, insn_idx, arg_type: dynptr_arg_type, clone_ref_obj_id);
11646	if (ret < `0`)
11647	return ret;
11648
11649	if (!(dynptr_arg_type & MEM_UNINIT)) {
11650	int id = dynptr_id(env, reg);
11651
11652	if (id < `0`) {
11653	verbose(private_data: env, fmt: "verifier internal error: failed to obtain dynptr id\n");
11654	return id;
11655	}
11656	meta->initialized_dynptr.id = id;
11657	meta->initialized_dynptr.type = dynptr_get_type(env, reg);
11658	meta->initialized_dynptr.ref_obj_id = dynptr_ref_obj_id(env, reg);
11659	}
11660
11661	break;
11662	}
11663	case KF_ARG_PTR_TO_ITER:
11664	if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) {
11665	if (!check_css_task_iter_allowlist(env)) {
11666	verbose(private_data: env, fmt: "css_task_iter is only allowed in bpf_lsm and bpf iter-s\n");
11667	return -EINVAL;
11668	}
11669	}
11670	ret = process_iter_arg(env, regno, insn_idx, meta);
11671	if (ret < `0`)
11672	return ret;
11673	break;
11674	case KF_ARG_PTR_TO_LIST_HEAD:
11675	if (reg->type != PTR_TO_MAP_VALUE &&
11676	reg->type != (PTR_TO_BTF_ID \| MEM_ALLOC)) {
11677	verbose(private_data: env, fmt: "arg#%d expected pointer to map value or allocated object\n", i);
11678	return -EINVAL;
11679	}
11680	if (reg->type == (PTR_TO_BTF_ID \| MEM_ALLOC) && !reg->ref_obj_id) {
11681	verbose(private_data: env, fmt: "allocated object must be referenced\n");
11682	return -EINVAL;
11683	}
11684	ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta);
11685	if (ret < `0`)
11686	return ret;
11687	break;
11688	case KF_ARG_PTR_TO_RB_ROOT:
11689	if (reg->type != PTR_TO_MAP_VALUE &&
11690	reg->type != (PTR_TO_BTF_ID \| MEM_ALLOC)) {
11691	verbose(private_data: env, fmt: "arg#%d expected pointer to map value or allocated object\n", i);
11692	return -EINVAL;
11693	}
11694	if (reg->type == (PTR_TO_BTF_ID \| MEM_ALLOC) && !reg->ref_obj_id) {
11695	verbose(private_data: env, fmt: "allocated object must be referenced\n");
11696	return -EINVAL;
11697	}
11698	ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta);
11699	if (ret < `0`)
11700	return ret;
11701	break;
11702	case KF_ARG_PTR_TO_LIST_NODE:
11703	if (reg->type != (PTR_TO_BTF_ID \| MEM_ALLOC)) {
11704	verbose(private_data: env, fmt: "arg#%d expected pointer to allocated object\n", i);
11705	return -EINVAL;
11706	}
11707	if (!reg->ref_obj_id) {
11708	verbose(private_data: env, fmt: "allocated object must be referenced\n");
11709	return -EINVAL;
11710	}
11711	ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta);
11712	if (ret < `0`)
11713	return ret;
11714	break;
11715	case KF_ARG_PTR_TO_RB_NODE:
11716	if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
11717	if (!type_is_non_owning_ref(type: reg->type) \|\| reg->ref_obj_id) {
11718	verbose(private_data: env, fmt: "rbtree_remove node input must be non-owning ref\n");
11719	return -EINVAL;
11720	}
11721	if (in_rbtree_lock_required_cb(env)) {
11722	verbose(private_data: env, fmt: "rbtree_remove not allowed in rbtree cb\n");
11723	return -EINVAL;
11724	}
11725	} else {
11726	if (reg->type != (PTR_TO_BTF_ID \| MEM_ALLOC)) {
11727	verbose(private_data: env, fmt: "arg#%d expected pointer to allocated object\n", i);
11728	return -EINVAL;
11729	}
11730	if (!reg->ref_obj_id) {
11731	verbose(private_data: env, fmt: "allocated object must be referenced\n");
11732	return -EINVAL;
11733	}
11734	}
11735
11736	ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
11737	if (ret < `0`)
11738	return ret;
11739	break;
11740	case KF_ARG_PTR_TO_BTF_ID:
11741	/ Only base_type is checked, further checks are done here /
11742	if ((base_type(type: reg->type) != PTR_TO_BTF_ID \|\|
11743	(bpf_type_has_unsafe_modifiers(type: reg->type) && !is_rcu_reg(reg))) &&
11744	!reg2btf_ids[base_type(type: reg->type)]) {
11745	verbose(private_data: env, fmt: "arg#%d is %s ", i, reg_type_str(env, type: reg->type));
11746	verbose(private_data: env, fmt: "expected %s or socket\n",
11747	reg_type_str(env, type: base_type(type: reg->type) \|
11748	(type_flag(type: reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
11749	return -EINVAL;
11750	}
11751	ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, argno: i);
11752	if (ret < `0`)
11753	return ret;
11754	break;
11755	case KF_ARG_PTR_TO_MEM:
11756	resolve_ret = btf_resolve_size(btf, type: ref_t, type_size: &type_size);
11757	if (IS_ERR(ptr: resolve_ret)) {
11758	verbose(private_data: env, fmt: "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
11759	i, btf_type_str(t: ref_t), ref_tname, PTR_ERR(ptr: resolve_ret));
11760	return -EINVAL;
11761	}
11762	ret = check_mem_reg(env, reg, regno, mem_size: type_size);
11763	if (ret < `0`)
11764	return ret;
11765	break;
11766	case KF_ARG_PTR_TO_MEM_SIZE:
11767	{
11768	struct bpf_reg_state *buff_reg = &regs[regno];
11769	const struct btf_param *buff_arg = &args[i];
11770	struct bpf_reg_state *size_reg = &regs[regno + `1`];
11771	const struct btf_param *size_arg = &args[i + `1`];
11772
11773	if (!register_is_null(reg: buff_reg) \|\| !is_kfunc_arg_optional(btf: meta->btf, arg: buff_arg)) {
11774	ret = check_kfunc_mem_size_reg(env, reg: size_reg, regno: regno + `1`);
11775	if (ret < `0`) {
11776	verbose(private_data: env, fmt: "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + `1`);
11777	return ret;
11778	}
11779	}
11780
11781	if (is_kfunc_arg_const_mem_size(btf: meta->btf, arg: size_arg, reg: size_reg)) {
11782	if (meta->arg_constant.found) {
11783	verbose(private_data: env, fmt: "verifier internal error: only one constant argument permitted\n");
11784	return -EFAULT;
11785	}
11786	if (!tnum_is_const(a: size_reg->var_off)) {
11787	verbose(private_data: env, fmt: "R%d must be a known constant\n", regno + `1`);
11788	return -EINVAL;
11789	}
11790	meta->arg_constant.found = true;
11791	meta->arg_constant.value = size_reg->var_off.value;
11792	}
11793
11794	/ Skip next '__sz' or '__szk' argument /
11795	i++;
11796	break;
11797	}
11798	case KF_ARG_PTR_TO_CALLBACK:
11799	if (reg->type != PTR_TO_FUNC) {
11800	verbose(private_data: env, fmt: "arg%d expected pointer to func\n", i);
11801	return -EINVAL;
11802	}
11803	meta->subprogno = reg->subprogno;
11804	break;
11805	case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
11806	if (!type_is_ptr_alloc_obj(type: reg->type)) {
11807	verbose(private_data: env, fmt: "arg#%d is neither owning or non-owning ref\n", i);
11808	return -EINVAL;
11809	}
11810	if (!type_is_non_owning_ref(type: reg->type))
11811	meta->arg_owning_ref = true;
11812
11813	rec = reg_btf_record(reg);
11814	if (!rec) {
11815	verbose(private_data: env, fmt: "verifier internal error: Couldn't find btf_record\n");
11816	return -EFAULT;
11817	}
11818
11819	if (rec->refcount_off < `0`) {
11820	verbose(private_data: env, fmt: "arg#%d doesn't point to a type with bpf_refcount field\n", i);
11821	return -EINVAL;
11822	}
11823
11824	meta->arg_btf = reg->btf;
11825	meta->arg_btf_id = reg->btf_id;
11826	break;
11827	}
11828	}
11829
11830	if (is_kfunc_release(meta) && !meta->release_regno) {
11831	verbose(private_data: env, fmt: "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
11832	func_name);
11833	return -EINVAL;
11834	}
11835
11836	return `0`;
11837	}
11838
11839	static int fetch_kfunc_meta(struct bpf_verifier_env *env,
11840	struct bpf_insn *insn,
11841	struct bpf_kfunc_call_arg_meta *meta,
11842	const char **kfunc_name)
11843	{
11844	const struct btf_type func, func_proto;
11845	u32 func_id, *kfunc_flags;
11846	const char *func_name;
11847	struct btf *desc_btf;
11848
11849	if (kfunc_name)
11850	*kfunc_name = NULL;
11851
11852	if (!insn->imm)
11853	return -EINVAL;
11854
11855	desc_btf = find_kfunc_desc_btf(env, offset: insn->off);
11856	if (IS_ERR(ptr: desc_btf))
11857	return PTR_ERR(ptr: desc_btf);
11858
11859	func_id = insn->imm;
11860	func = btf_type_by_id(btf: desc_btf, type_id: func_id);
11861	func_name = btf_name_by_offset(btf: desc_btf, offset: func->name_off);
11862	if (kfunc_name)
11863	*kfunc_name = func_name;
11864	func_proto = btf_type_by_id(btf: desc_btf, type_id: func->type);
11865
11866	kfunc_flags = btf_kfunc_id_set_contains(btf: desc_btf, kfunc_btf_id: func_id, prog: env->prog);
11867	if (!kfunc_flags) {
11868	return -EACCES;
11869	}
11870
11871	memset(meta, `0`, sizeof(*meta));
11872	meta->btf = desc_btf;
11873	meta->func_id = func_id;
11874	meta->kfunc_flags = *kfunc_flags;
11875	meta->func_proto = func_proto;
11876	meta->func_name = func_name;
11877
11878	return `0`;
11879	}
11880
11881	static int check_return_code(struct bpf_verifier_env env, int* regno);
11882
11883	static int check_kfunc_call(struct bpf_verifier_env env, struct* bpf_insn *insn,
11884	int *insn_idx_p)
11885	{
11886	const struct btf_type t, ptr_type;
11887	u32 i, nargs, ptr_type_id, release_ref_obj_id;
11888	struct bpf_reg_state *regs = cur_regs(env);
11889	const char func_name, ptr_type_name;
11890	bool sleepable, rcu_lock, rcu_unlock;
11891	struct bpf_kfunc_call_arg_meta meta;
11892	struct bpf_insn_aux_data *insn_aux;
11893	int err, insn_idx = *insn_idx_p;
11894	const struct btf_param *args;
11895	const struct btf_type *ret_t;
11896	struct btf *desc_btf;
11897
11898	/ skip for now, but return error when we find this in fixup_kfunc_call /
11899	if (!insn->imm)
11900	return `0`;
11901
11902	err = fetch_kfunc_meta(env, insn, meta: &meta, kfunc_name: &func_name);
11903	if (err == -EACCES && func_name)
11904	verbose(private_data: env, fmt: "calling kernel function %s is not allowed\n", func_name);
11905	if (err)
11906	return err;
11907	desc_btf = meta.btf;
11908	insn_aux = &env->insn_aux_data[insn_idx];
11909
11910	insn_aux->is_iter_next = is_iter_next_kfunc(meta: &meta);
11911
11912	if (is_kfunc_destructive(meta: &meta) && !capable(CAP_SYS_BOOT)) {
11913	verbose(private_data: env, fmt: "destructive kfunc calls require CAP_SYS_BOOT capability\n");
11914	return -EACCES;
11915	}
11916
11917	sleepable = is_kfunc_sleepable(meta: &meta);
11918	if (sleepable && !env->prog->aux->sleepable) {
11919	verbose(private_data: env, fmt: "program must be sleepable to call sleepable kfunc %s\n", func_name);
11920	return -EACCES;
11921	}
11922
11923	rcu_lock = is_kfunc_bpf_rcu_read_lock(meta: &meta);
11924	rcu_unlock = is_kfunc_bpf_rcu_read_unlock(meta: &meta);
11925
11926	if (env->cur_state->active_rcu_lock) {
11927	struct bpf_func_state *state;
11928	struct bpf_reg_state *reg;
11929	u32 clear_mask = (`1` << STACK_SPILL) \| (`1` << STACK_ITER);
11930
11931	if (in_rbtree_lock_required_cb(env) && (rcu_lock \|\| rcu_unlock)) {
11932	verbose(private_data: env, fmt: "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
11933	return -EACCES;
11934	}
11935
11936	if (rcu_lock) {
11937	verbose(private_data: env, fmt: "nested rcu read lock (kernel function %s)\n", func_name);
11938	return -EINVAL;
11939	} else if (rcu_unlock) {
11940	bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({
11941	if (reg->type & MEM_RCU) {
11942	reg->type &= ~(MEM_RCU \| PTR_MAYBE_NULL);
11943	reg->type \|= PTR_UNTRUSTED;
11944	}
11945	}));
11946	env->cur_state->active_rcu_lock = false;
11947	} else if (sleepable) {
11948	verbose(private_data: env, fmt: "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
11949	return -EACCES;
11950	}
11951	} else if (rcu_lock) {
11952	env->cur_state->active_rcu_lock = true;
11953	} else if (rcu_unlock) {
11954	verbose(private_data: env, fmt: "unmatched rcu read unlock (kernel function %s)\n", func_name);
11955	return -EINVAL;
11956	}
11957
11958	/ Check the arguments /
11959	err = check_kfunc_args(env, meta: &meta, insn_idx);
11960	if (err < `0`)
11961	return err;
11962	/ In case of release function, we get register number of refcounted*
11963	* PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
11964	*/
11965	if (meta.release_regno) {
11966	err = release_reference(env, ref_obj_id: regs[meta.release_regno].ref_obj_id);
11967	if (err) {
11968	verbose(private_data: env, fmt: "kfunc %s#%d reference has not been acquired before\n",
11969	func_name, meta.func_id);
11970	return err;
11971	}
11972	}
11973
11974	if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] \|\|
11975	meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] \|\|
11976	meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
11977	release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
11978	insn_aux->insert_off = regs[BPF_REG_2].off;
11979	insn_aux->kptr_struct_meta = btf_find_struct_meta(btf: meta.arg_btf, btf_id: meta.arg_btf_id);
11980	err = ref_convert_owning_non_owning(env, ref_obj_id: release_ref_obj_id);
11981	if (err) {
11982	verbose(private_data: env, fmt: "kfunc %s#%d conversion of owning ref to non-owning failed\n",
11983	func_name, meta.func_id);
11984	return err;
11985	}
11986
11987	err = release_reference(env, ref_obj_id: release_ref_obj_id);
11988	if (err) {
11989	verbose(private_data: env, fmt: "kfunc %s#%d reference has not been acquired before\n",
11990	func_name, meta.func_id);
11991	return err;
11992	}
11993	}
11994
11995	if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
11996	err = __check_func_call(env, insn, insn_idx: insn_idx_p, subprog: meta.subprogno,
11997	set_callee_state_cb: set_rbtree_add_callback_state);
11998	if (err) {
11999	verbose(private_data: env, fmt: "kfunc %s#%d failed callback verification\n",
12000	func_name, meta.func_id);
12001	return err;
12002	}
12003	}
12004
12005	if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
12006	if (!bpf_jit_supports_exceptions()) {
12007	verbose(private_data: env, fmt: "JIT does not support calling kfunc %s#%d\n",
12008	func_name, meta.func_id);
12009	return -ENOTSUPP;
12010	}
12011	env->seen_exception = true;
12012
12013	/ In the case of the default callback, the cookie value passed*
12014	* to bpf_throw becomes the return value of the program.
12015	*/
12016	if (!env->exception_callback_subprog) {
12017	err = check_return_code(env, regno: BPF_REG_1);
12018	if (err < `0`)
12019	return err;
12020	}
12021	}
12022
12023	for (i = `0`; i < CALLER_SAVED_REGS; i++)
12024	mark_reg_not_init(env, regs, regno: caller_saved[i]);
12025
12026	/ Check return type /
12027	t = btf_type_skip_modifiers(btf: desc_btf, id: meta.func_proto->type, NULL);
12028
12029	if (is_kfunc_acquire(meta: &meta) && !btf_type_is_struct_ptr(btf: meta.btf, t)) {
12030	/ Only exception is bpf_obj_new_impl /
12031	if (meta.btf != btf_vmlinux \|\|
12032	(meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
12033	meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] &&
12034	meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
12035	verbose(private_data: env, fmt: "acquire kernel function does not return PTR_TO_BTF_ID\n");
12036	return -EINVAL;
12037	}
12038	}
12039
12040	if (btf_type_is_scalar(t)) {
12041	mark_reg_unknown(env, regs, regno: BPF_REG_0);
12042	mark_btf_func_reg_size(env, regno: BPF_REG_0, reg_size: t->size);
12043	} else if (btf_type_is_ptr(t)) {
12044	ptr_type = btf_type_skip_modifiers(btf: desc_btf, id: t->type, res_id: &ptr_type_id);
12045
12046	if (meta.btf == btf_vmlinux && btf_id_set_contains(set: &special_kfunc_set, id: meta.func_id)) {
12047	if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] \|\|
12048	meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
12049	struct btf_struct_meta *struct_meta;
12050	struct btf *ret_btf;
12051	u32 ret_btf_id;
12052
12053	if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
12054	return -ENOMEM;
12055
12056	if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set)
12057	return -ENOMEM;
12058
12059	if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
12060	verbose(private_data: env, fmt: "local type ID argument must be in range [0, U32_MAX]\n");
12061	return -EINVAL;
12062	}
12063
12064	ret_btf = env->prog->aux->btf;
12065	ret_btf_id = meta.arg_constant.value;
12066
12067	/ This may be NULL due to user not supplying a BTF /
12068	if (!ret_btf) {
12069	verbose(private_data: env, fmt: "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
12070	return -EINVAL;
12071	}
12072
12073	ret_t = btf_type_by_id(btf: ret_btf, type_id: ret_btf_id);
12074	if (!ret_t \|\| !__btf_type_is_struct(t: ret_t)) {
12075	verbose(private_data: env, fmt: "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
12076	return -EINVAL;
12077	}
12078
12079	struct_meta = btf_find_struct_meta(btf: ret_btf, btf_id: ret_btf_id);
12080	if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
12081	if (!__btf_type_is_scalar_struct(env, btf: ret_btf, t: ret_t, rec: `0`)) {
12082	verbose(private_data: env, fmt: "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
12083	return -EINVAL;
12084	}
12085
12086	if (struct_meta) {
12087	verbose(private_data: env, fmt: "bpf_percpu_obj_new type ID argument must not contain special fields\n");
12088	return -EINVAL;
12089	}
12090	}
12091
12092	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
12093	regs[BPF_REG_0].type = PTR_TO_BTF_ID \| MEM_ALLOC;
12094	regs[BPF_REG_0].btf = ret_btf;
12095	regs[BPF_REG_0].btf_id = ret_btf_id;
12096	if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
12097	regs[BPF_REG_0].type \|= MEM_PERCPU;
12098
12099	insn_aux->obj_new_size = ret_t->size;
12100	insn_aux->kptr_struct_meta = struct_meta;
12101	} else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
12102	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
12103	regs[BPF_REG_0].type = PTR_TO_BTF_ID \| MEM_ALLOC;
12104	regs[BPF_REG_0].btf = meta.arg_btf;
12105	regs[BPF_REG_0].btf_id = meta.arg_btf_id;
12106
12107	insn_aux->kptr_struct_meta =
12108	btf_find_struct_meta(btf: meta.arg_btf,
12109	btf_id: meta.arg_btf_id);
12110	} else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] \|\|
12111	meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
12112	struct btf_field *field = meta.arg_list_head.field;
12113
12114	mark_reg_graph_node(regs, regno: BPF_REG_0, ds_head: &field->graph_root);
12115	} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] \|\|
12116	meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
12117	struct btf_field *field = meta.arg_rbtree_root.field;
12118
12119	mark_reg_graph_node(regs, regno: BPF_REG_0, ds_head: &field->graph_root);
12120	} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
12121	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
12122	regs[BPF_REG_0].type = PTR_TO_BTF_ID \| PTR_TRUSTED;
12123	regs[BPF_REG_0].btf = desc_btf;
12124	regs[BPF_REG_0].btf_id = meta.ret_btf_id;
12125	} else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
12126	ret_t = btf_type_by_id(btf: desc_btf, type_id: meta.arg_constant.value);
12127	if (!ret_t \|\| !btf_type_is_struct(t: ret_t)) {
12128	verbose(private_data: env,
12129	fmt: "kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
12130	return -EINVAL;
12131	}
12132
12133	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
12134	regs[BPF_REG_0].type = PTR_TO_BTF_ID \| PTR_UNTRUSTED;
12135	regs[BPF_REG_0].btf = desc_btf;
12136	regs[BPF_REG_0].btf_id = meta.arg_constant.value;
12137	} else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] \|\|
12138	meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
12139	enum bpf_type_flag type_flag = get_dynptr_type_flag(type: meta.initialized_dynptr.type);
12140
12141	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
12142
12143	if (!meta.arg_constant.found) {
12144	verbose(private_data: env, fmt: "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
12145	return -EFAULT;
12146	}
12147
12148	regs[BPF_REG_0].mem_size = meta.arg_constant.value;
12149
12150	/ PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked /
12151	regs[BPF_REG_0].type = PTR_TO_MEM \| type_flag;
12152
12153	if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
12154	regs[BPF_REG_0].type \|= MEM_RDONLY;
12155	} else {
12156	/ this will set env->seen_direct_write to true /
12157	if (!may_access_direct_pkt_data(env, NULL, t: BPF_WRITE)) {
12158	verbose(private_data: env, fmt: "the prog does not allow writes to packet data\n");
12159	return -EINVAL;
12160	}
12161	}
12162
12163	if (!meta.initialized_dynptr.id) {
12164	verbose(private_data: env, fmt: "verifier internal error: no dynptr id\n");
12165	return -EFAULT;
12166	}
12167	regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id;
12168
12169	/ we don't need to set BPF_REG_0's ref obj id*
12170	* because packet slices are not refcounted (see
12171	* dynptr_type_refcounted)
12172	*/
12173	} else {
12174	verbose(private_data: env, fmt: "kernel function %s unhandled dynamic return type\n",
12175	meta.func_name);
12176	return -EFAULT;
12177	}
12178	} else if (!__btf_type_is_struct(t: ptr_type)) {
12179	if (!meta.r0_size) {
12180	__u32 sz;
12181
12182	if (!IS_ERR(ptr: btf_resolve_size(btf: desc_btf, type: ptr_type, type_size: &sz))) {
12183	meta.r0_size = sz;
12184	meta.r0_rdonly = true;
12185	}
12186	}
12187	if (!meta.r0_size) {
12188	ptr_type_name = btf_name_by_offset(btf: desc_btf,
12189	offset: ptr_type->name_off);
12190	verbose(private_data: env,
12191	fmt: "kernel function %s returns pointer type %s %s is not supported\n",
12192	func_name,
12193	btf_type_str(t: ptr_type),
12194	ptr_type_name);
12195	return -EINVAL;
12196	}
12197
12198	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
12199	regs[BPF_REG_0].type = PTR_TO_MEM;
12200	regs[BPF_REG_0].mem_size = meta.r0_size;
12201
12202	if (meta.r0_rdonly)
12203	regs[BPF_REG_0].type \|= MEM_RDONLY;
12204
12205	/ Ensures we don't access the memory after a release_reference() /
12206	if (meta.ref_obj_id)
12207	regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
12208	} else {
12209	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
12210	regs[BPF_REG_0].btf = desc_btf;
12211	regs[BPF_REG_0].type = PTR_TO_BTF_ID;
12212	regs[BPF_REG_0].btf_id = ptr_type_id;
12213	}
12214
12215	if (is_kfunc_ret_null(meta: &meta)) {
12216	regs[BPF_REG_0].type \|= PTR_MAYBE_NULL;
12217	/ For mark_ptr_or_null_reg, see 93c230e3f5bd6 /
12218	regs[BPF_REG_0].id = ++env->id_gen;
12219	}
12220	mark_btf_func_reg_size(env, regno: BPF_REG_0, reg_size: sizeof(void *));
12221	if (is_kfunc_acquire(meta: &meta)) {
12222	int id = acquire_reference_state(env, insn_idx);
12223
12224	if (id < `0`)
12225	return id;
12226	if (is_kfunc_ret_null(meta: &meta))
12227	regs[BPF_REG_0].id = id;
12228	regs[BPF_REG_0].ref_obj_id = id;
12229	} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
12230	ref_set_non_owning(env, reg: &regs[BPF_REG_0]);
12231	}
12232
12233	if (reg_may_point_to_spin_lock(reg: &regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
12234	regs[BPF_REG_0].id = ++env->id_gen;
12235	} else if (btf_type_is_void(t)) {
12236	if (meta.btf == btf_vmlinux && btf_id_set_contains(set: &special_kfunc_set, id: meta.func_id)) {
12237	if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] \|\|
12238	meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
12239	insn_aux->kptr_struct_meta =
12240	btf_find_struct_meta(btf: meta.arg_btf,
12241	btf_id: meta.arg_btf_id);
12242	}
12243	}
12244	}
12245
12246	nargs = btf_type_vlen(t: meta.func_proto);
12247	args = (const struct btf_param *)(meta.func_proto + `1`);
12248	for (i = `0`; i < nargs; i++) {
12249	u32 regno = i + `1`;
12250
12251	t = btf_type_skip_modifiers(btf: desc_btf, id: args[i].type, NULL);
12252	if (btf_type_is_ptr(t))
12253	mark_btf_func_reg_size(env, regno, reg_size: sizeof(void *));
12254	else
12255	/ scalar. ensured by btf_check_kfunc_arg_match() /
12256	mark_btf_func_reg_size(env, regno, reg_size: t->size);
12257	}
12258
12259	if (is_iter_next_kfunc(meta: &meta)) {
12260	err = process_iter_next_call(env, insn_idx, meta: &meta);
12261	if (err)
12262	return err;
12263	}
12264
12265	return `0`;
12266	}
12267
12268	static bool signed_add_overflows(s64 a, s64 b)
12269	{
12270	/ Do the add in u64, where overflow is well-defined /
12271	s64 res = (s64)((u64)a + (u64)b);
12272
12273	if (b < `0`)
12274	return res > a;
12275	return res < a;
12276	}
12277
12278	static bool signed_add32_overflows(s32 a, s32 b)
12279	{
12280	/ Do the add in u32, where overflow is well-defined /
12281	s32 res = (s32)((u32)a + (u32)b);
12282
12283	if (b < `0`)
12284	return res > a;
12285	return res < a;
12286	}
12287
12288	static bool signed_sub_overflows(s64 a, s64 b)
12289	{
12290	/ Do the sub in u64, where overflow is well-defined /
12291	s64 res = (s64)((u64)a - (u64)b);
12292
12293	if (b < `0`)
12294	return res < a;
12295	return res > a;
12296	}
12297
12298	static bool signed_sub32_overflows(s32 a, s32 b)
12299	{
12300	/ Do the sub in u32, where overflow is well-defined /
12301	s32 res = (s32)((u32)a - (u32)b);
12302
12303	if (b < `0`)
12304	return res < a;
12305	return res > a;
12306	}
12307
12308	static bool check_reg_sane_offset(struct bpf_verifier_env *env,
12309	const struct bpf_reg_state *reg,
12310	enum bpf_reg_type type)
12311	{
12312	bool known = tnum_is_const(a: reg->var_off);
12313	s64 val = reg->var_off.value;
12314	s64 smin = reg->smin_value;
12315
12316	if (known && (val >= BPF_MAX_VAR_OFF \|\| val <= -BPF_MAX_VAR_OFF)) {
12317	verbose(private_data: env, fmt: "math between %s pointer and %lld is not allowed\n",
12318	reg_type_str(env, type), val);
12319	return false;
12320	}
12321
12322	if (reg->off >= BPF_MAX_VAR_OFF \|\| reg->off <= -BPF_MAX_VAR_OFF) {
12323	verbose(private_data: env, fmt: "%s pointer offset %d is not allowed\n",
12324	reg_type_str(env, type), reg->off);
12325	return false;
12326	}
12327
12328	if (smin == S64_MIN) {
12329	verbose(private_data: env, fmt: "math between %s pointer and register with unbounded min value is not allowed\n",
12330	reg_type_str(env, type));
12331	return false;
12332	}
12333
12334	if (smin >= BPF_MAX_VAR_OFF \|\| smin <= -BPF_MAX_VAR_OFF) {
12335	verbose(private_data: env, fmt: "value %lld makes %s pointer be out of bounds\n",
12336	smin, reg_type_str(env, type));
12337	return false;
12338	}
12339
12340	return true;
12341	}
12342
12343	enum {
12344	REASON_BOUNDS = -`1`,
12345	REASON_TYPE = -`2`,
12346	REASON_PATHS = -`3`,
12347	REASON_LIMIT = -`4`,
12348	REASON_STACK = -`5`,
12349	};
12350
12351	static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
12352	u32 *alu_limit, bool mask_to_left)
12353	{
12354	u32 max = `0`, ptr_limit = `0`;
12355
12356	switch (ptr_reg->type) {
12357	case PTR_TO_STACK:
12358	/ Offset 0 is out-of-bounds, but acceptable start for the*
12359	* left direction, see BPF_REG_FP. Also, unknown scalar
12360	* offset where we would need to deal with min/max bounds is
12361	* currently prohibited for unprivileged.
12362	*/
12363	max = MAX_BPF_STACK + mask_to_left;
12364	ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
12365	break;
12366	case PTR_TO_MAP_VALUE:
12367	max = ptr_reg->map_ptr->value_size;
12368	ptr_limit = (mask_to_left ?
12369	ptr_reg->smin_value :
12370	ptr_reg->umax_value) + ptr_reg->off;
12371	break;
12372	default:
12373	return REASON_TYPE;
12374	}
12375
12376	if (ptr_limit >= max)
12377	return REASON_LIMIT;
12378	*alu_limit = ptr_limit;
12379	return `0`;
12380	}
12381
12382	static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
12383	const struct bpf_insn *insn)
12384	{
12385	return env->bypass_spec_v1 \|\| BPF_SRC(insn->code) == BPF_K;
12386	}
12387
12388	static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
12389	u32 alu_state, u32 alu_limit)
12390	{
12391	/ If we arrived here from different branches with different*
12392	* state or limits to sanitize, then this won't work.
12393	*/
12394	if (aux->alu_state &&
12395	(aux->alu_state != alu_state \|\|
12396	aux->alu_limit != alu_limit))
12397	return REASON_PATHS;
12398
12399	/ Corresponding fixup done in do_misc_fixups(). /
12400	aux->alu_state = alu_state;
12401	aux->alu_limit = alu_limit;
12402	return `0`;
12403	}
12404
12405	static int sanitize_val_alu(struct bpf_verifier_env *env,
12406	struct bpf_insn *insn)
12407	{
12408	struct bpf_insn_aux_data *aux = cur_aux(env);
12409
12410	if (can_skip_alu_sanitation(env, insn))
12411	return `0`;
12412
12413	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, alu_limit: `0`);
12414	}
12415
12416	static bool sanitize_needed(u8 opcode)
12417	{
12418	return opcode == BPF_ADD \|\| opcode == BPF_SUB;
12419	}
12420
12421	struct bpf_sanitize_info {
12422	struct bpf_insn_aux_data aux;
12423	bool mask_to_left;
12424	};
12425
12426	static struct bpf_verifier_state *
12427	sanitize_speculative_path(struct bpf_verifier_env *env,
12428	const struct bpf_insn *insn,
12429	u32 next_idx, u32 curr_idx)
12430	{
12431	struct bpf_verifier_state *branch;
12432	struct bpf_reg_state *regs;
12433
12434	branch = push_stack(env, insn_idx: next_idx, prev_insn_idx: curr_idx, speculative: true);
12435	if (branch && insn) {
12436	regs = branch->frame[branch->curframe]->regs;
12437	if (BPF_SRC(insn->code) == BPF_K) {
12438	mark_reg_unknown(env, regs, regno: insn->dst_reg);
12439	} else if (BPF_SRC(insn->code) == BPF_X) {
12440	mark_reg_unknown(env, regs, regno: insn->dst_reg);
12441	mark_reg_unknown(env, regs, regno: insn->src_reg);
12442	}
12443	}
12444	return branch;
12445	}
12446
12447	static int sanitize_ptr_alu(struct bpf_verifier_env *env,
12448	struct bpf_insn *insn,
12449	const struct bpf_reg_state *ptr_reg,
12450	const struct bpf_reg_state *off_reg,
12451	struct bpf_reg_state *dst_reg,
12452	struct bpf_sanitize_info *info,
12453	const bool commit_window)
12454	{
12455	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
12456	struct bpf_verifier_state *vstate = env->cur_state;
12457	bool off_is_imm = tnum_is_const(a: off_reg->var_off);
12458	bool off_is_neg = off_reg->smin_value < `0`;
12459	bool ptr_is_dst_reg = ptr_reg == dst_reg;
12460	u8 opcode = BPF_OP(insn->code);
12461	u32 alu_state, alu_limit;
12462	struct bpf_reg_state tmp;
12463	bool ret;
12464	int err;
12465
12466	if (can_skip_alu_sanitation(env, insn))
12467	return `0`;
12468
12469	/ We already marked aux for masking from non-speculative*
12470	* paths, thus we got here in the first place. We only care
12471	* to explore bad access from here.
12472	*/
12473	if (vstate->speculative)
12474	goto do_sim;
12475
12476	if (!commit_window) {
12477	if (!tnum_is_const(a: off_reg->var_off) &&
12478	(off_reg->smin_value < `0`) != (off_reg->smax_value < `0`))
12479	return REASON_BOUNDS;
12480
12481	info->mask_to_left = (opcode == BPF_ADD && off_is_neg) \|\|
12482	(opcode == BPF_SUB && !off_is_neg);
12483	}
12484
12485	err = retrieve_ptr_limit(ptr_reg, alu_limit: &alu_limit, mask_to_left: info->mask_to_left);
12486	if (err < `0`)
12487	return err;
12488
12489	if (commit_window) {
12490	/ In commit phase we narrow the masking window based on*
12491	* the observed pointer move after the simulated operation.
12492	*/
12493	alu_state = info->aux.alu_state;
12494	alu_limit = abs(info->aux.alu_limit - alu_limit);
12495	} else {
12496	alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : `0`;
12497	alu_state \|= off_is_imm ? BPF_ALU_IMMEDIATE : `0`;
12498	alu_state \|= ptr_is_dst_reg ?
12499	BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
12500
12501	/ Limit pruning on unknown scalars to enable deep search for*
12502	* potential masking differences from other program paths.
12503	*/
12504	if (!off_is_imm)
12505	env->explore_alu_limits = true;
12506	}
12507
12508	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
12509	if (err < `0`)
12510	return err;
12511	do_sim:
12512	/ If we're in commit phase, we're done here given we already*
12513	* pushed the truncated dst_reg into the speculative verification
12514	* stack.
12515	*
12516	* Also, when register is a known constant, we rewrite register-based
12517	* operation to immediate-based, and thus do not need masking (and as
12518	* a consequence, do not need to simulate the zero-truncation either).
12519	*/
12520	if (commit_window \|\| off_is_imm)
12521	return `0`;
12522
12523	/ Simulate and find potential out-of-bounds access under*
12524	* speculative execution from truncation as a result of
12525	* masking when off was not within expected range. If off
12526	* sits in dst, then we temporarily need to move ptr there
12527	* to simulate dst (== 0) +/-= ptr. Needed, for example,
12528	* for cases where we use K-based arithmetic in one direction
12529	* and truncated reg-based in the other in order to explore
12530	* bad access.
12531	*/
12532	if (!ptr_is_dst_reg) {
12533	tmp = *dst_reg;
12534	copy_register_state(dst: dst_reg, src: ptr_reg);
12535	}
12536	ret = sanitize_speculative_path(env, NULL, next_idx: env->insn_idx + `1`,
12537	curr_idx: env->insn_idx);
12538	if (!ptr_is_dst_reg && ret)
12539	*dst_reg = tmp;
12540	return !ret ? REASON_STACK : `0`;
12541	}
12542
12543	static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
12544	{
12545	struct bpf_verifier_state *vstate = env->cur_state;
12546
12547	/ If we simulate paths under speculation, we don't update the*
12548	* insn as 'seen' such that when we verify unreachable paths in
12549	* the non-speculative domain, sanitize_dead_code() can still
12550	* rewrite/sanitize them.
12551	*/
12552	if (!vstate->speculative)
12553	env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
12554	}
12555
12556	static int sanitize_err(struct bpf_verifier_env *env,
12557	const struct bpf_insn insn, int* reason,
12558	const struct bpf_reg_state *off_reg,
12559	const struct bpf_reg_state *dst_reg)
12560	{
12561	static const char *err = "pointer arithmetic with it prohibited for !root";
12562	const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
12563	u32 dst = insn->dst_reg, src = insn->src_reg;
12564
12565	switch (reason) {
12566	case REASON_BOUNDS:
12567	verbose(private_data: env, fmt: "R%d has unknown scalar with mixed signed bounds, %s\n",
12568	off_reg == dst_reg ? dst : src, err);
12569	break;
12570	case REASON_TYPE:
12571	verbose(private_data: env, fmt: "R%d has pointer with unsupported alu operation, %s\n",
12572	off_reg == dst_reg ? src : dst, err);
12573	break;
12574	case REASON_PATHS:
12575	verbose(private_data: env, fmt: "R%d tried to %s from different maps, paths or scalars, %s\n",
12576	dst, op, err);
12577	break;
12578	case REASON_LIMIT:
12579	verbose(private_data: env, fmt: "R%d tried to %s beyond pointer bounds, %s\n",
12580	dst, op, err);
12581	break;
12582	case REASON_STACK:
12583	verbose(private_data: env, fmt: "R%d could not be pushed for speculative verification, %s\n",
12584	dst, err);
12585	break;
12586	default:
12587	verbose(private_data: env, fmt: "verifier internal error: unknown reason (%d)\n",
12588	reason);
12589	break;
12590	}
12591
12592	return -EACCES;
12593	}
12594
12595	/ check that stack access falls within stack limits and that 'reg' doesn't*
12596	* have a variable offset.
12597	*
12598	* Variable offset is prohibited for unprivileged mode for simplicity since it
12599	* requires corresponding support in Spectre masking for stack ALU. See also
12600	* retrieve_ptr_limit().
12601	*
12602	*
12603	* 'off' includes 'reg->off'.
12604	*/
12605	static int check_stack_access_for_ptr_arithmetic(
12606	struct bpf_verifier_env *env,
12607	int regno,
12608	const struct bpf_reg_state *reg,
12609	int off)
12610	{
12611	if (!tnum_is_const(a: reg->var_off)) {
12612	char tn_buf[`48`];
12613
12614	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
12615	verbose(private_data: env, fmt: "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
12616	regno, tn_buf, off);
12617	return -EACCES;
12618	}
12619
12620	if (off >= `0` \|\| off < -MAX_BPF_STACK) {
12621	verbose(private_data: env, fmt: "R%d stack pointer arithmetic goes out of range, "
12622	"prohibited for !root; off=%d\n", regno, off);
12623	return -EACCES;
12624	}
12625
12626	return `0`;
12627	}
12628
12629	static int sanitize_check_bounds(struct bpf_verifier_env *env,
12630	const struct bpf_insn *insn,
12631	const struct bpf_reg_state *dst_reg)
12632	{
12633	u32 dst = insn->dst_reg;
12634
12635	/ For unprivileged we require that resulting offset must be in bounds*
12636	* in order to be able to sanitize access later on.
12637	*/
12638	if (env->bypass_spec_v1)
12639	return `0`;
12640
12641	switch (dst_reg->type) {
12642	case PTR_TO_STACK:
12643	if (check_stack_access_for_ptr_arithmetic(env, regno: dst, reg: dst_reg,
12644	off: dst_reg->off + dst_reg->var_off.value))
12645	return -EACCES;
12646	break;
12647	case PTR_TO_MAP_VALUE:
12648	if (check_map_access(env, regno: dst, off: dst_reg->off, size: `1`, zero_size_allowed: false, src: ACCESS_HELPER)) {
12649	verbose(private_data: env, fmt: "R%d pointer arithmetic of map value goes out of range, "
12650	"prohibited for !root\n", dst);
12651	return -EACCES;
12652	}
12653	break;
12654	default:
12655	break;
12656	}
12657
12658	return `0`;
12659	}
12660
12661	/ Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.*
12662	* Caller should also handle BPF_MOV case separately.
12663	* If we return -EACCES, caller may want to try again treating pointer as a
12664	* scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
12665	*/
12666	static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
12667	struct bpf_insn *insn,
12668	const struct bpf_reg_state *ptr_reg,
12669	const struct bpf_reg_state *off_reg)
12670	{
12671	struct bpf_verifier_state *vstate = env->cur_state;
12672	struct bpf_func_state *state = vstate->frame[vstate->curframe];
12673	struct bpf_reg_state regs = state->regs, dst_reg;
12674	bool known = tnum_is_const(a: off_reg->var_off);
12675	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
12676	smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
12677	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
12678	umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
12679	struct bpf_sanitize_info info = {};
12680	u8 opcode = BPF_OP(insn->code);
12681	u32 dst = insn->dst_reg;
12682	int ret;
12683
12684	dst_reg = &regs[dst];
12685
12686	if ((known && (smin_val != smax_val \|\| umin_val != umax_val)) \|\|
12687	smin_val > smax_val \|\| umin_val > umax_val) {
12688	/ Taint dst register if offset had invalid bounds derived from*
12689	* e.g. dead branches.
12690	*/
12691	__mark_reg_unknown(env, reg: dst_reg);
12692	return `0`;
12693	}
12694
12695	if (BPF_CLASS(insn->code) != BPF_ALU64) {
12696	/ 32-bit ALU ops on pointers produce (meaningless) scalars /
12697	if (opcode == BPF_SUB && env->allow_ptr_leaks) {
12698	__mark_reg_unknown(env, reg: dst_reg);
12699	return `0`;
12700	}
12701
12702	verbose(private_data: env,
12703	fmt: "R%d 32-bit pointer arithmetic prohibited\n",
12704	dst);
12705	return -EACCES;
12706	}
12707
12708	if (ptr_reg->type & PTR_MAYBE_NULL) {
12709	verbose(private_data: env, fmt: "R%d pointer arithmetic on %s prohibited, null-check it first\n",
12710	dst, reg_type_str(env, type: ptr_reg->type));
12711	return -EACCES;
12712	}
12713
12714	switch (base_type(type: ptr_reg->type)) {
12715	case CONST_PTR_TO_MAP:
12716	/ smin_val represents the known value /
12717	if (known && smin_val == `0` && opcode == BPF_ADD)
12718	break;
12719	fallthrough;
12720	case PTR_TO_PACKET_END:
12721	case PTR_TO_SOCKET:
12722	case PTR_TO_SOCK_COMMON:
12723	case PTR_TO_TCP_SOCK:
12724	case PTR_TO_XDP_SOCK:
12725	verbose(private_data: env, fmt: "R%d pointer arithmetic on %s prohibited\n",
12726	dst, reg_type_str(env, type: ptr_reg->type));
12727	return -EACCES;
12728	default:
12729	break;
12730	}
12731
12732	/ In case of 'scalar += pointer', dst_reg inherits pointer type and id.*
12733	* The id may be overwritten later if we create a new variable offset.
12734	*/
12735	dst_reg->type = ptr_reg->type;
12736	dst_reg->id = ptr_reg->id;
12737
12738	if (!check_reg_sane_offset(env, reg: off_reg, type: ptr_reg->type) \|\|
12739	!check_reg_sane_offset(env, reg: ptr_reg, type: ptr_reg->type))
12740	return -EINVAL;
12741
12742	/ pointer types do not carry 32-bit bounds at the moment. /
12743	__mark_reg32_unbounded(reg: dst_reg);
12744
12745	if (sanitize_needed(opcode)) {
12746	ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
12747	info: &info, commit_window: false);
12748	if (ret < `0`)
12749	return sanitize_err(env, insn, reason: ret, off_reg, dst_reg);
12750	}
12751
12752	switch (opcode) {
12753	case BPF_ADD:
12754	/ We can take a fixed offset as long as it doesn't overflow*
12755	* the s32 'off' field
12756	*/
12757	if (known && (ptr_reg->off + smin_val ==
12758	(s64)(s32)(ptr_reg->off + smin_val))) {
12759	/ pointer += K. Accumulate it into fixed offset /
12760	dst_reg->smin_value = smin_ptr;
12761	dst_reg->smax_value = smax_ptr;
12762	dst_reg->umin_value = umin_ptr;
12763	dst_reg->umax_value = umax_ptr;
12764	dst_reg->var_off = ptr_reg->var_off;
12765	dst_reg->off = ptr_reg->off + smin_val;
12766	dst_reg->raw = ptr_reg->raw;
12767	break;
12768	}
12769	/ A new variable offset is created. Note that off_reg->off*
12770	* == 0, since it's a scalar.
12771	* dst_reg gets the pointer type and since some positive
12772	* integer value was added to the pointer, give it a new 'id'
12773	* if it's a PTR_TO_PACKET.
12774	* this creates a new 'base' pointer, off_reg (variable) gets
12775	* added into the variable offset, and we copy the fixed offset
12776	* from ptr_reg.
12777	*/
12778	if (signed_add_overflows(a: smin_ptr, b: smin_val) \|\|
12779	signed_add_overflows(a: smax_ptr, b: smax_val)) {
12780	dst_reg->smin_value = S64_MIN;
12781	dst_reg->smax_value = S64_MAX;
12782	} else {
12783	dst_reg->smin_value = smin_ptr + smin_val;
12784	dst_reg->smax_value = smax_ptr + smax_val;
12785	}
12786	if (umin_ptr + umin_val < umin_ptr \|\|
12787	umax_ptr + umax_val < umax_ptr) {
12788	dst_reg->umin_value = `0`;
12789	dst_reg->umax_value = U64_MAX;
12790	} else {
12791	dst_reg->umin_value = umin_ptr + umin_val;
12792	dst_reg->umax_value = umax_ptr + umax_val;
12793	}
12794	dst_reg->var_off = tnum_add(a: ptr_reg->var_off, b: off_reg->var_off);
12795	dst_reg->off = ptr_reg->off;
12796	dst_reg->raw = ptr_reg->raw;
12797	if (reg_is_pkt_pointer(reg: ptr_reg)) {
12798	dst_reg->id = ++env->id_gen;
12799	/ something was added to pkt_ptr, set range to zero /
12800	memset(&dst_reg->raw, `0`, sizeof(dst_reg->raw));
12801	}
12802	break;
12803	case BPF_SUB:
12804	if (dst_reg == off_reg) {
12805	/ scalar -= pointer. Creates an unknown scalar /
12806	verbose(private_data: env, fmt: "R%d tried to subtract pointer from scalar\n",
12807	dst);
12808	return -EACCES;
12809	}
12810	/ We don't allow subtraction from FP, because (according to*
12811	* test_verifier.c test "invalid fp arithmetic", JITs might not
12812	* be able to deal with it.
12813	*/
12814	if (ptr_reg->type == PTR_TO_STACK) {
12815	verbose(private_data: env, fmt: "R%d subtraction from stack pointer prohibited\n",
12816	dst);
12817	return -EACCES;
12818	}
12819	if (known && (ptr_reg->off - smin_val ==
12820	(s64)(s32)(ptr_reg->off - smin_val))) {
12821	/ pointer -= K. Subtract it from fixed offset /
12822	dst_reg->smin_value = smin_ptr;
12823	dst_reg->smax_value = smax_ptr;
12824	dst_reg->umin_value = umin_ptr;
12825	dst_reg->umax_value = umax_ptr;
12826	dst_reg->var_off = ptr_reg->var_off;
12827	dst_reg->id = ptr_reg->id;
12828	dst_reg->off = ptr_reg->off - smin_val;
12829	dst_reg->raw = ptr_reg->raw;
12830	break;
12831	}
12832	/ A new variable offset is created. If the subtrahend is known*
12833	* nonnegative, then any reg->range we had before is still good.
12834	*/
12835	if (signed_sub_overflows(a: smin_ptr, b: smax_val) \|\|
12836	signed_sub_overflows(a: smax_ptr, b: smin_val)) {
12837	/ Overflow possible, we know nothing /
12838	dst_reg->smin_value = S64_MIN;
12839	dst_reg->smax_value = S64_MAX;
12840	} else {
12841	dst_reg->smin_value = smin_ptr - smax_val;
12842	dst_reg->smax_value = smax_ptr - smin_val;
12843	}
12844	if (umin_ptr < umax_val) {
12845	/ Overflow possible, we know nothing /
12846	dst_reg->umin_value = `0`;
12847	dst_reg->umax_value = U64_MAX;
12848	} else {
12849	/ Cannot overflow (as long as bounds are consistent) /
12850	dst_reg->umin_value = umin_ptr - umax_val;
12851	dst_reg->umax_value = umax_ptr - umin_val;
12852	}
12853	dst_reg->var_off = tnum_sub(a: ptr_reg->var_off, b: off_reg->var_off);
12854	dst_reg->off = ptr_reg->off;
12855	dst_reg->raw = ptr_reg->raw;
12856	if (reg_is_pkt_pointer(reg: ptr_reg)) {
12857	dst_reg->id = ++env->id_gen;
12858	/ something was added to pkt_ptr, set range to zero /
12859	if (smin_val < `0`)
12860	memset(&dst_reg->raw, `0`, sizeof(dst_reg->raw));
12861	}
12862	break;
12863	case BPF_AND:
12864	case BPF_OR:
12865	case BPF_XOR:
12866	/ bitwise ops on pointers are troublesome, prohibit. /
12867	verbose(private_data: env, fmt: "R%d bitwise operator %s on pointer prohibited\n",
12868	dst, bpf_alu_string[opcode >> `4`]);
12869	return -EACCES;
12870	default:
12871	/ other operators (e.g. MUL,LSH) produce non-pointer results /
12872	verbose(private_data: env, fmt: "R%d pointer arithmetic with %s operator prohibited\n",
12873	dst, bpf_alu_string[opcode >> `4`]);
12874	return -EACCES;
12875	}
12876
12877	if (!check_reg_sane_offset(env, reg: dst_reg, type: ptr_reg->type))
12878	return -EINVAL;
12879	reg_bounds_sync(reg: dst_reg);
12880	if (sanitize_check_bounds(env, insn, dst_reg) < `0`)
12881	return -EACCES;
12882	if (sanitize_needed(opcode)) {
12883	ret = sanitize_ptr_alu(env, insn, ptr_reg: dst_reg, off_reg, dst_reg,
12884	info: &info, commit_window: true);
12885	if (ret < `0`)
12886	return sanitize_err(env, insn, reason: ret, off_reg, dst_reg);
12887	}
12888
12889	return `0`;
12890	}
12891
12892	static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
12893	struct bpf_reg_state *src_reg)
12894	{
12895	s32 smin_val = src_reg->s32_min_value;
12896	s32 smax_val = src_reg->s32_max_value;
12897	u32 umin_val = src_reg->u32_min_value;
12898	u32 umax_val = src_reg->u32_max_value;
12899
12900	if (signed_add32_overflows(a: dst_reg->s32_min_value, b: smin_val) \|\|
12901	signed_add32_overflows(a: dst_reg->s32_max_value, b: smax_val)) {
12902	dst_reg->s32_min_value = S32_MIN;
12903	dst_reg->s32_max_value = S32_MAX;
12904	} else {
12905	dst_reg->s32_min_value += smin_val;
12906	dst_reg->s32_max_value += smax_val;
12907	}
12908	if (dst_reg->u32_min_value + umin_val < umin_val \|\|
12909	dst_reg->u32_max_value + umax_val < umax_val) {
12910	dst_reg->u32_min_value = `0`;
12911	dst_reg->u32_max_value = U32_MAX;
12912	} else {
12913	dst_reg->u32_min_value += umin_val;
12914	dst_reg->u32_max_value += umax_val;
12915	}
12916	}
12917
12918	static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
12919	struct bpf_reg_state *src_reg)
12920	{
12921	s64 smin_val = src_reg->smin_value;
12922	s64 smax_val = src_reg->smax_value;
12923	u64 umin_val = src_reg->umin_value;
12924	u64 umax_val = src_reg->umax_value;
12925
12926	if (signed_add_overflows(a: dst_reg->smin_value, b: smin_val) \|\|
12927	signed_add_overflows(a: dst_reg->smax_value, b: smax_val)) {
12928	dst_reg->smin_value = S64_MIN;
12929	dst_reg->smax_value = S64_MAX;
12930	} else {
12931	dst_reg->smin_value += smin_val;
12932	dst_reg->smax_value += smax_val;
12933	}
12934	if (dst_reg->umin_value + umin_val < umin_val \|\|
12935	dst_reg->umax_value + umax_val < umax_val) {
12936	dst_reg->umin_value = `0`;
12937	dst_reg->umax_value = U64_MAX;
12938	} else {
12939	dst_reg->umin_value += umin_val;
12940	dst_reg->umax_value += umax_val;
12941	}
12942	}
12943
12944	static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
12945	struct bpf_reg_state *src_reg)
12946	{
12947	s32 smin_val = src_reg->s32_min_value;
12948	s32 smax_val = src_reg->s32_max_value;
12949	u32 umin_val = src_reg->u32_min_value;
12950	u32 umax_val = src_reg->u32_max_value;
12951
12952	if (signed_sub32_overflows(a: dst_reg->s32_min_value, b: smax_val) \|\|
12953	signed_sub32_overflows(a: dst_reg->s32_max_value, b: smin_val)) {
12954	/ Overflow possible, we know nothing /
12955	dst_reg->s32_min_value = S32_MIN;
12956	dst_reg->s32_max_value = S32_MAX;
12957	} else {
12958	dst_reg->s32_min_value -= smax_val;
12959	dst_reg->s32_max_value -= smin_val;
12960	}
12961	if (dst_reg->u32_min_value < umax_val) {
12962	/ Overflow possible, we know nothing /
12963	dst_reg->u32_min_value = `0`;
12964	dst_reg->u32_max_value = U32_MAX;
12965	} else {
12966	/ Cannot overflow (as long as bounds are consistent) /
12967	dst_reg->u32_min_value -= umax_val;
12968	dst_reg->u32_max_value -= umin_val;
12969	}
12970	}
12971
12972	static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
12973	struct bpf_reg_state *src_reg)
12974	{
12975	s64 smin_val = src_reg->smin_value;
12976	s64 smax_val = src_reg->smax_value;
12977	u64 umin_val = src_reg->umin_value;
12978	u64 umax_val = src_reg->umax_value;
12979
12980	if (signed_sub_overflows(a: dst_reg->smin_value, b: smax_val) \|\|
12981	signed_sub_overflows(a: dst_reg->smax_value, b: smin_val)) {
12982	/ Overflow possible, we know nothing /
12983	dst_reg->smin_value = S64_MIN;
12984	dst_reg->smax_value = S64_MAX;
12985	} else {
12986	dst_reg->smin_value -= smax_val;
12987	dst_reg->smax_value -= smin_val;
12988	}
12989	if (dst_reg->umin_value < umax_val) {
12990	/ Overflow possible, we know nothing /
12991	dst_reg->umin_value = `0`;
12992	dst_reg->umax_value = U64_MAX;
12993	} else {
12994	/ Cannot overflow (as long as bounds are consistent) /
12995	dst_reg->umin_value -= umax_val;
12996	dst_reg->umax_value -= umin_val;
12997	}
12998	}
12999
13000	static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
13001	struct bpf_reg_state *src_reg)
13002	{
13003	s32 smin_val = src_reg->s32_min_value;
13004	u32 umin_val = src_reg->u32_min_value;
13005	u32 umax_val = src_reg->u32_max_value;
13006
13007	if (smin_val < `0` \|\| dst_reg->s32_min_value < `0`) {
13008	/ Ain't nobody got time to multiply that sign /
13009	__mark_reg32_unbounded(reg: dst_reg);
13010	return;
13011	}
13012	/ Both values are positive, so we can work with unsigned and*
13013	* copy the result to signed (unless it exceeds S32_MAX).
13014	*/
13015	if (umax_val > U16_MAX \|\| dst_reg->u32_max_value > U16_MAX) {
13016	/ Potential overflow, we know nothing /
13017	__mark_reg32_unbounded(reg: dst_reg);
13018	return;
13019	}
13020	dst_reg->u32_min_value *= umin_val;
13021	dst_reg->u32_max_value *= umax_val;
13022	if (dst_reg->u32_max_value > S32_MAX) {
13023	/ Overflow possible, we know nothing /
13024	dst_reg->s32_min_value = S32_MIN;
13025	dst_reg->s32_max_value = S32_MAX;
13026	} else {
13027	dst_reg->s32_min_value = dst_reg->u32_min_value;
13028	dst_reg->s32_max_value = dst_reg->u32_max_value;
13029	}
13030	}
13031
13032	static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
13033	struct bpf_reg_state *src_reg)
13034	{
13035	s64 smin_val = src_reg->smin_value;
13036	u64 umin_val = src_reg->umin_value;
13037	u64 umax_val = src_reg->umax_value;
13038
13039	if (smin_val < `0` \|\| dst_reg->smin_value < `0`) {
13040	/ Ain't nobody got time to multiply that sign /
13041	__mark_reg64_unbounded(reg: dst_reg);
13042	return;
13043	}
13044	/ Both values are positive, so we can work with unsigned and*
13045	* copy the result to signed (unless it exceeds S64_MAX).
13046	*/
13047	if (umax_val > U32_MAX \|\| dst_reg->umax_value > U32_MAX) {
13048	/ Potential overflow, we know nothing /
13049	__mark_reg64_unbounded(reg: dst_reg);
13050	return;
13051	}
13052	dst_reg->umin_value *= umin_val;
13053	dst_reg->umax_value *= umax_val;
13054	if (dst_reg->umax_value > S64_MAX) {
13055	/ Overflow possible, we know nothing /
13056	dst_reg->smin_value = S64_MIN;
13057	dst_reg->smax_value = S64_MAX;
13058	} else {
13059	dst_reg->smin_value = dst_reg->umin_value;
13060	dst_reg->smax_value = dst_reg->umax_value;
13061	}
13062	}
13063
13064	static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
13065	struct bpf_reg_state *src_reg)
13066	{
13067	bool src_known = tnum_subreg_is_const(a: src_reg->var_off);
13068	bool dst_known = tnum_subreg_is_const(a: dst_reg->var_off);
13069	struct tnum var32_off = tnum_subreg(a: dst_reg->var_off);
13070	s32 smin_val = src_reg->s32_min_value;
13071	u32 umax_val = src_reg->u32_max_value;
13072
13073	if (src_known && dst_known) {
13074	__mark_reg32_known(reg: dst_reg, imm: var32_off.value);
13075	return;
13076	}
13077
13078	/ We get our minimum from the var_off, since that's inherently*
13079	* bitwise. Our maximum is the minimum of the operands' maxima.
13080	*/
13081	dst_reg->u32_min_value = var32_off.value;
13082	dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
13083	if (dst_reg->s32_min_value < `0` \|\| smin_val < `0`) {
13084	/ Lose signed bounds when ANDing negative numbers,*
13085	* ain't nobody got time for that.
13086	*/
13087	dst_reg->s32_min_value = S32_MIN;
13088	dst_reg->s32_max_value = S32_MAX;
13089	} else {
13090	/ ANDing two positives gives a positive, so safe to*
13091	* cast result into s64.
13092	*/
13093	dst_reg->s32_min_value = dst_reg->u32_min_value;
13094	dst_reg->s32_max_value = dst_reg->u32_max_value;
13095	}
13096	}
13097
13098	static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
13099	struct bpf_reg_state *src_reg)
13100	{
13101	bool src_known = tnum_is_const(a: src_reg->var_off);
13102	bool dst_known = tnum_is_const(a: dst_reg->var_off);
13103	s64 smin_val = src_reg->smin_value;
13104	u64 umax_val = src_reg->umax_value;
13105
13106	if (src_known && dst_known) {
13107	__mark_reg_known(reg: dst_reg, imm: dst_reg->var_off.value);
13108	return;
13109	}
13110
13111	/ We get our minimum from the var_off, since that's inherently*
13112	* bitwise. Our maximum is the minimum of the operands' maxima.
13113	*/
13114	dst_reg->umin_value = dst_reg->var_off.value;
13115	dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
13116	if (dst_reg->smin_value < `0` \|\| smin_val < `0`) {
13117	/ Lose signed bounds when ANDing negative numbers,*
13118	* ain't nobody got time for that.
13119	*/
13120	dst_reg->smin_value = S64_MIN;
13121	dst_reg->smax_value = S64_MAX;
13122	} else {
13123	/ ANDing two positives gives a positive, so safe to*
13124	* cast result into s64.
13125	*/
13126	dst_reg->smin_value = dst_reg->umin_value;
13127	dst_reg->smax_value = dst_reg->umax_value;
13128	}
13129	/ We may learn something more from the var_off /
13130	__update_reg_bounds(reg: dst_reg);
13131	}
13132
13133	static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
13134	struct bpf_reg_state *src_reg)
13135	{
13136	bool src_known = tnum_subreg_is_const(a: src_reg->var_off);
13137	bool dst_known = tnum_subreg_is_const(a: dst_reg->var_off);
13138	struct tnum var32_off = tnum_subreg(a: dst_reg->var_off);
13139	s32 smin_val = src_reg->s32_min_value;
13140	u32 umin_val = src_reg->u32_min_value;
13141
13142	if (src_known && dst_known) {
13143	__mark_reg32_known(reg: dst_reg, imm: var32_off.value);
13144	return;
13145	}
13146
13147	/ We get our maximum from the var_off, and our minimum is the*
13148	* maximum of the operands' minima
13149	*/
13150	dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
13151	dst_reg->u32_max_value = var32_off.value \| var32_off.mask;
13152	if (dst_reg->s32_min_value < `0` \|\| smin_val < `0`) {
13153	/ Lose signed bounds when ORing negative numbers,*
13154	* ain't nobody got time for that.
13155	*/
13156	dst_reg->s32_min_value = S32_MIN;
13157	dst_reg->s32_max_value = S32_MAX;
13158	} else {
13159	/ ORing two positives gives a positive, so safe to*
13160	* cast result into s64.
13161	*/
13162	dst_reg->s32_min_value = dst_reg->u32_min_value;
13163	dst_reg->s32_max_value = dst_reg->u32_max_value;
13164	}
13165	}
13166
13167	static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
13168	struct bpf_reg_state *src_reg)
13169	{
13170	bool src_known = tnum_is_const(a: src_reg->var_off);
13171	bool dst_known = tnum_is_const(a: dst_reg->var_off);
13172	s64 smin_val = src_reg->smin_value;
13173	u64 umin_val = src_reg->umin_value;
13174
13175	if (src_known && dst_known) {
13176	__mark_reg_known(reg: dst_reg, imm: dst_reg->var_off.value);
13177	return;
13178	}
13179
13180	/ We get our maximum from the var_off, and our minimum is the*
13181	* maximum of the operands' minima
13182	*/
13183	dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
13184	dst_reg->umax_value = dst_reg->var_off.value \| dst_reg->var_off.mask;
13185	if (dst_reg->smin_value < `0` \|\| smin_val < `0`) {
13186	/ Lose signed bounds when ORing negative numbers,*
13187	* ain't nobody got time for that.
13188	*/
13189	dst_reg->smin_value = S64_MIN;
13190	dst_reg->smax_value = S64_MAX;
13191	} else {
13192	/ ORing two positives gives a positive, so safe to*
13193	* cast result into s64.
13194	*/
13195	dst_reg->smin_value = dst_reg->umin_value;
13196	dst_reg->smax_value = dst_reg->umax_value;
13197	}
13198	/ We may learn something more from the var_off /
13199	__update_reg_bounds(reg: dst_reg);
13200	}
13201
13202	static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
13203	struct bpf_reg_state *src_reg)
13204	{
13205	bool src_known = tnum_subreg_is_const(a: src_reg->var_off);
13206	bool dst_known = tnum_subreg_is_const(a: dst_reg->var_off);
13207	struct tnum var32_off = tnum_subreg(a: dst_reg->var_off);
13208	s32 smin_val = src_reg->s32_min_value;
13209
13210	if (src_known && dst_known) {
13211	__mark_reg32_known(reg: dst_reg, imm: var32_off.value);
13212	return;
13213	}
13214
13215	/ We get both minimum and maximum from the var32_off. /
13216	dst_reg->u32_min_value = var32_off.value;
13217	dst_reg->u32_max_value = var32_off.value \| var32_off.mask;
13218
13219	if (dst_reg->s32_min_value >= `0` && smin_val >= `0`) {
13220	/ XORing two positive sign numbers gives a positive,*
13221	* so safe to cast u32 result into s32.
13222	*/
13223	dst_reg->s32_min_value = dst_reg->u32_min_value;
13224	dst_reg->s32_max_value = dst_reg->u32_max_value;
13225	} else {
13226	dst_reg->s32_min_value = S32_MIN;
13227	dst_reg->s32_max_value = S32_MAX;
13228	}
13229	}
13230
13231	static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
13232	struct bpf_reg_state *src_reg)
13233	{
13234	bool src_known = tnum_is_const(a: src_reg->var_off);
13235	bool dst_known = tnum_is_const(a: dst_reg->var_off);
13236	s64 smin_val = src_reg->smin_value;
13237
13238	if (src_known && dst_known) {
13239	/ dst_reg->var_off.value has been updated earlier /
13240	__mark_reg_known(reg: dst_reg, imm: dst_reg->var_off.value);
13241	return;
13242	}
13243
13244	/ We get both minimum and maximum from the var_off. /
13245	dst_reg->umin_value = dst_reg->var_off.value;
13246	dst_reg->umax_value = dst_reg->var_off.value \| dst_reg->var_off.mask;
13247
13248	if (dst_reg->smin_value >= `0` && smin_val >= `0`) {
13249	/ XORing two positive sign numbers gives a positive,*
13250	* so safe to cast u64 result into s64.
13251	*/
13252	dst_reg->smin_value = dst_reg->umin_value;
13253	dst_reg->smax_value = dst_reg->umax_value;
13254	} else {
13255	dst_reg->smin_value = S64_MIN;
13256	dst_reg->smax_value = S64_MAX;
13257	}
13258
13259	__update_reg_bounds(reg: dst_reg);
13260	}
13261
13262	static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
13263	u64 umin_val, u64 umax_val)
13264	{
13265	/ We lose all sign bit information (except what we can pick*
13266	* up from var_off)
13267	*/
13268	dst_reg->s32_min_value = S32_MIN;
13269	dst_reg->s32_max_value = S32_MAX;
13270	/ If we might shift our top bit out, then we know nothing /
13271	if (umax_val > `31` \|\| dst_reg->u32_max_value > `1ULL` << (`31` - umax_val)) {
13272	dst_reg->u32_min_value = `0`;
13273	dst_reg->u32_max_value = U32_MAX;
13274	} else {
13275	dst_reg->u32_min_value <<= umin_val;
13276	dst_reg->u32_max_value <<= umax_val;
13277	}
13278	}
13279
13280	static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
13281	struct bpf_reg_state *src_reg)
13282	{
13283	u32 umax_val = src_reg->u32_max_value;
13284	u32 umin_val = src_reg->u32_min_value;
13285	/ u32 alu operation will zext upper bits /
13286	struct tnum subreg = tnum_subreg(a: dst_reg->var_off);
13287
13288	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
13289	dst_reg->var_off = tnum_subreg(a: tnum_lshift(a: subreg, shift: umin_val));
13290	/ Not required but being careful mark reg64 bounds as unknown so*
13291	* that we are forced to pick them up from tnum and zext later and
13292	* if some path skips this step we are still safe.
13293	*/
13294	__mark_reg64_unbounded(reg: dst_reg);
13295	__update_reg32_bounds(reg: dst_reg);
13296	}
13297
13298	static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
13299	u64 umin_val, u64 umax_val)
13300	{
13301	/ Special case <<32 because it is a common compiler pattern to sign*
13302	* extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
13303	* positive we know this shift will also be positive so we can track
13304	* bounds correctly. Otherwise we lose all sign bit information except
13305	* what we can pick up from var_off. Perhaps we can generalize this
13306	* later to shifts of any length.
13307	*/
13308	if (umin_val == `32` && umax_val == `32` && dst_reg->s32_max_value >= `0`)
13309	dst_reg->smax_value = (s64)dst_reg->s32_max_value << `32`;
13310	else
13311	dst_reg->smax_value = S64_MAX;
13312
13313	if (umin_val == `32` && umax_val == `32` && dst_reg->s32_min_value >= `0`)
13314	dst_reg->smin_value = (s64)dst_reg->s32_min_value << `32`;
13315	else
13316	dst_reg->smin_value = S64_MIN;
13317
13318	/ If we might shift our top bit out, then we know nothing /
13319	if (dst_reg->umax_value > `1ULL` << (`63` - umax_val)) {
13320	dst_reg->umin_value = `0`;
13321	dst_reg->umax_value = U64_MAX;
13322	} else {
13323	dst_reg->umin_value <<= umin_val;
13324	dst_reg->umax_value <<= umax_val;
13325	}
13326	}
13327
13328	static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
13329	struct bpf_reg_state *src_reg)
13330	{
13331	u64 umax_val = src_reg->umax_value;
13332	u64 umin_val = src_reg->umin_value;
13333
13334	/ scalar64 calc uses 32bit unshifted bounds so must be called first /
13335	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
13336	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
13337
13338	dst_reg->var_off = tnum_lshift(a: dst_reg->var_off, shift: umin_val);
13339	/ We may learn something more from the var_off /
13340	__update_reg_bounds(reg: dst_reg);
13341	}
13342
13343	static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
13344	struct bpf_reg_state *src_reg)
13345	{
13346	struct tnum subreg = tnum_subreg(a: dst_reg->var_off);
13347	u32 umax_val = src_reg->u32_max_value;
13348	u32 umin_val = src_reg->u32_min_value;
13349
13350	/ BPF_RSH is an unsigned shift. If the value in dst_reg might*
13351	* be negative, then either:
13352	* 1) src_reg might be zero, so the sign bit of the result is
13353	* unknown, so we lose our signed bounds
13354	* 2) it's known negative, thus the unsigned bounds capture the
13355	* signed bounds
13356	* 3) the signed bounds cross zero, so they tell us nothing
13357	* about the result
13358	* If the value in dst_reg is known nonnegative, then again the
13359	* unsigned bounds capture the signed bounds.
13360	* Thus, in all cases it suffices to blow away our signed bounds
13361	* and rely on inferring new ones from the unsigned bounds and
13362	* var_off of the result.
13363	*/
13364	dst_reg->s32_min_value = S32_MIN;
13365	dst_reg->s32_max_value = S32_MAX;
13366
13367	dst_reg->var_off = tnum_rshift(a: subreg, shift: umin_val);
13368	dst_reg->u32_min_value >>= umax_val;
13369	dst_reg->u32_max_value >>= umin_val;
13370
13371	__mark_reg64_unbounded(reg: dst_reg);
13372	__update_reg32_bounds(reg: dst_reg);
13373	}
13374
13375	static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
13376	struct bpf_reg_state *src_reg)
13377	{
13378	u64 umax_val = src_reg->umax_value;
13379	u64 umin_val = src_reg->umin_value;
13380
13381	/ BPF_RSH is an unsigned shift. If the value in dst_reg might*
13382	* be negative, then either:
13383	* 1) src_reg might be zero, so the sign bit of the result is
13384	* unknown, so we lose our signed bounds
13385	* 2) it's known negative, thus the unsigned bounds capture the
13386	* signed bounds
13387	* 3) the signed bounds cross zero, so they tell us nothing
13388	* about the result
13389	* If the value in dst_reg is known nonnegative, then again the
13390	* unsigned bounds capture the signed bounds.
13391	* Thus, in all cases it suffices to blow away our signed bounds
13392	* and rely on inferring new ones from the unsigned bounds and
13393	* var_off of the result.
13394	*/
13395	dst_reg->smin_value = S64_MIN;
13396	dst_reg->smax_value = S64_MAX;
13397	dst_reg->var_off = tnum_rshift(a: dst_reg->var_off, shift: umin_val);
13398	dst_reg->umin_value >>= umax_val;
13399	dst_reg->umax_value >>= umin_val;
13400
13401	/ Its not easy to operate on alu32 bounds here because it depends*
13402	* on bits being shifted in. Take easy way out and mark unbounded
13403	* so we can recalculate later from tnum.
13404	*/
13405	__mark_reg32_unbounded(reg: dst_reg);
13406	__update_reg_bounds(reg: dst_reg);
13407	}
13408
13409	static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
13410	struct bpf_reg_state *src_reg)
13411	{
13412	u64 umin_val = src_reg->u32_min_value;
13413
13414	/ Upon reaching here, src_known is true and*
13415	* umax_val is equal to umin_val.
13416	*/
13417	dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
13418	dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
13419
13420	dst_reg->var_off = tnum_arshift(a: tnum_subreg(a: dst_reg->var_off), min_shift: umin_val, insn_bitness: `32`);
13421
13422	/ blow away the dst_reg umin_value/umax_value and rely on*
13423	* dst_reg var_off to refine the result.
13424	*/
13425	dst_reg->u32_min_value = `0`;
13426	dst_reg->u32_max_value = U32_MAX;
13427
13428	__mark_reg64_unbounded(reg: dst_reg);
13429	__update_reg32_bounds(reg: dst_reg);
13430	}
13431
13432	static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
13433	struct bpf_reg_state *src_reg)
13434	{
13435	u64 umin_val = src_reg->umin_value;
13436
13437	/ Upon reaching here, src_known is true and umax_val is equal*
13438	* to umin_val.
13439	*/
13440	dst_reg->smin_value >>= umin_val;
13441	dst_reg->smax_value >>= umin_val;
13442
13443	dst_reg->var_off = tnum_arshift(a: dst_reg->var_off, min_shift: umin_val, insn_bitness: `64`);
13444
13445	/ blow away the dst_reg umin_value/umax_value and rely on*
13446	* dst_reg var_off to refine the result.
13447	*/
13448	dst_reg->umin_value = `0`;
13449	dst_reg->umax_value = U64_MAX;
13450
13451	/ Its not easy to operate on alu32 bounds here because it depends*
13452	* on bits being shifted in from upper 32-bits. Take easy way out
13453	* and mark unbounded so we can recalculate later from tnum.
13454	*/
13455	__mark_reg32_unbounded(reg: dst_reg);
13456	__update_reg_bounds(reg: dst_reg);
13457	}
13458
13459	/ WARNING: This function does calculations on 64-bit values, but the actual*
13460	* execution may occur on 32-bit values. Therefore, things like bitshifts
13461	* need extra checks in the 32-bit case.
13462	*/
13463	static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
13464	struct bpf_insn *insn,
13465	struct bpf_reg_state *dst_reg,
13466	struct bpf_reg_state src_reg)
13467	{
13468	struct bpf_reg_state *regs = cur_regs(env);
13469	u8 opcode = BPF_OP(insn->code);
13470	bool src_known;
13471	s64 smin_val, smax_val;
13472	u64 umin_val, umax_val;
13473	s32 s32_min_val, s32_max_val;
13474	u32 u32_min_val, u32_max_val;
13475	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? `64` : `32`;
13476	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
13477	int ret;
13478
13479	smin_val = src_reg.smin_value;
13480	smax_val = src_reg.smax_value;
13481	umin_val = src_reg.umin_value;
13482	umax_val = src_reg.umax_value;
13483
13484	s32_min_val = src_reg.s32_min_value;
13485	s32_max_val = src_reg.s32_max_value;
13486	u32_min_val = src_reg.u32_min_value;
13487	u32_max_val = src_reg.u32_max_value;
13488
13489	if (alu32) {
13490	src_known = tnum_subreg_is_const(a: src_reg.var_off);
13491	if ((src_known &&
13492	(s32_min_val != s32_max_val \|\| u32_min_val != u32_max_val)) \|\|
13493	s32_min_val > s32_max_val \|\| u32_min_val > u32_max_val) {
13494	/ Taint dst register if offset had invalid bounds*
13495	* derived from e.g. dead branches.
13496	*/
13497	__mark_reg_unknown(env, reg: dst_reg);
13498	return `0`;
13499	}
13500	} else {
13501	src_known = tnum_is_const(a: src_reg.var_off);
13502	if ((src_known &&
13503	(smin_val != smax_val \|\| umin_val != umax_val)) \|\|
13504	smin_val > smax_val \|\| umin_val > umax_val) {
13505	/ Taint dst register if offset had invalid bounds*
13506	* derived from e.g. dead branches.
13507	*/
13508	__mark_reg_unknown(env, reg: dst_reg);
13509	return `0`;
13510	}
13511	}
13512
13513	if (!src_known &&
13514	opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
13515	__mark_reg_unknown(env, reg: dst_reg);
13516	return `0`;
13517	}
13518
13519	if (sanitize_needed(opcode)) {
13520	ret = sanitize_val_alu(env, insn);
13521	if (ret < `0`)
13522	return sanitize_err(env, insn, reason: ret, NULL, NULL);
13523	}
13524
13525	/ Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.*
13526	* There are two classes of instructions: The first class we track both
13527	* alu32 and alu64 sign/unsigned bounds independently this provides the
13528	* greatest amount of precision when alu operations are mixed with jmp32
13529	* operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
13530	* and BPF_OR. This is possible because these ops have fairly easy to
13531	* understand and calculate behavior in both 32-bit and 64-bit alu ops.
13532	* See alu32 verifier tests for examples. The second class of
13533	* operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
13534	* with regards to tracking sign/unsigned bounds because the bits may
13535	* cross subreg boundaries in the alu64 case. When this happens we mark
13536	* the reg unbounded in the subreg bound space and use the resulting
13537	* tnum to calculate an approximation of the sign/unsigned bounds.
13538	*/
13539	switch (opcode) {
13540	case BPF_ADD:
13541	scalar32_min_max_add(dst_reg, src_reg: &src_reg);
13542	scalar_min_max_add(dst_reg, src_reg: &src_reg);
13543	dst_reg->var_off = tnum_add(a: dst_reg->var_off, b: src_reg.var_off);
13544	break;
13545	case BPF_SUB:
13546	scalar32_min_max_sub(dst_reg, src_reg: &src_reg);
13547	scalar_min_max_sub(dst_reg, src_reg: &src_reg);
13548	dst_reg->var_off = tnum_sub(a: dst_reg->var_off, b: src_reg.var_off);
13549	break;
13550	case BPF_MUL:
13551	dst_reg->var_off = tnum_mul(a: dst_reg->var_off, b: src_reg.var_off);
13552	scalar32_min_max_mul(dst_reg, src_reg: &src_reg);
13553	scalar_min_max_mul(dst_reg, src_reg: &src_reg);
13554	break;
13555	case BPF_AND:
13556	dst_reg->var_off = tnum_and(a: dst_reg->var_off, b: src_reg.var_off);
13557	scalar32_min_max_and(dst_reg, src_reg: &src_reg);
13558	scalar_min_max_and(dst_reg, src_reg: &src_reg);
13559	break;
13560	case BPF_OR:
13561	dst_reg->var_off = tnum_or(a: dst_reg->var_off, b: src_reg.var_off);
13562	scalar32_min_max_or(dst_reg, src_reg: &src_reg);
13563	scalar_min_max_or(dst_reg, src_reg: &src_reg);
13564	break;
13565	case BPF_XOR:
13566	dst_reg->var_off = tnum_xor(a: dst_reg->var_off, b: src_reg.var_off);
13567	scalar32_min_max_xor(dst_reg, src_reg: &src_reg);
13568	scalar_min_max_xor(dst_reg, src_reg: &src_reg);
13569	break;
13570	case BPF_LSH:
13571	if (umax_val >= insn_bitness) {
13572	/ Shifts greater than 31 or 63 are undefined.*
13573	* This includes shifts by a negative number.
13574	*/
13575	mark_reg_unknown(env, regs, regno: insn->dst_reg);
13576	break;
13577	}
13578	if (alu32)
13579	scalar32_min_max_lsh(dst_reg, src_reg: &src_reg);
13580	else
13581	scalar_min_max_lsh(dst_reg, src_reg: &src_reg);
13582	break;
13583	case BPF_RSH:
13584	if (umax_val >= insn_bitness) {
13585	/ Shifts greater than 31 or 63 are undefined.*
13586	* This includes shifts by a negative number.
13587	*/
13588	mark_reg_unknown(env, regs, regno: insn->dst_reg);
13589	break;
13590	}
13591	if (alu32)
13592	scalar32_min_max_rsh(dst_reg, src_reg: &src_reg);
13593	else
13594	scalar_min_max_rsh(dst_reg, src_reg: &src_reg);
13595	break;
13596	case BPF_ARSH:
13597	if (umax_val >= insn_bitness) {
13598	/ Shifts greater than 31 or 63 are undefined.*
13599	* This includes shifts by a negative number.
13600	*/
13601	mark_reg_unknown(env, regs, regno: insn->dst_reg);
13602	break;
13603	}
13604	if (alu32)
13605	scalar32_min_max_arsh(dst_reg, src_reg: &src_reg);
13606	else
13607	scalar_min_max_arsh(dst_reg, src_reg: &src_reg);
13608	break;
13609	default:
13610	mark_reg_unknown(env, regs, regno: insn->dst_reg);
13611	break;
13612	}
13613
13614	/ ALU32 ops are zero extended into 64bit register /
13615	if (alu32)
13616	zext_32_to_64(reg: dst_reg);
13617	reg_bounds_sync(reg: dst_reg);
13618	return `0`;
13619	}
13620
13621	/ Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max*
13622	* and var_off.
13623	*/
13624	static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
13625	struct bpf_insn *insn)
13626	{
13627	struct bpf_verifier_state *vstate = env->cur_state;
13628	struct bpf_func_state *state = vstate->frame[vstate->curframe];
13629	struct bpf_reg_state regs = state->regs, dst_reg, *src_reg;
13630	struct bpf_reg_state *ptr_reg = NULL, off_reg = {`0`};
13631	u8 opcode = BPF_OP(insn->code);
13632	int err;
13633
13634	dst_reg = &regs[insn->dst_reg];
13635	src_reg = NULL;
13636	if (dst_reg->type != SCALAR_VALUE)
13637	ptr_reg = dst_reg;
13638	else
13639	/ Make sure ID is cleared otherwise dst_reg min/max could be*
13640	* incorrectly propagated into other registers by find_equal_scalars()
13641	*/
13642	dst_reg->id = `0`;
13643	if (BPF_SRC(insn->code) == BPF_X) {
13644	src_reg = &regs[insn->src_reg];
13645	if (src_reg->type != SCALAR_VALUE) {
13646	if (dst_reg->type != SCALAR_VALUE) {
13647	/ Combining two pointers by any ALU op yields*
13648	* an arbitrary scalar. Disallow all math except
13649	* pointer subtraction
13650	*/
13651	if (opcode == BPF_SUB && env->allow_ptr_leaks) {
13652	mark_reg_unknown(env, regs, regno: insn->dst_reg);
13653	return `0`;
13654	}
13655	verbose(private_data: env, fmt: "R%d pointer %s pointer prohibited\n",
13656	insn->dst_reg,
13657	bpf_alu_string[opcode >> `4`]);
13658	return -EACCES;
13659	} else {
13660	/ scalar += pointer*
13661	* This is legal, but we have to reverse our
13662	* src/dest handling in computing the range
13663	*/
13664	err = mark_chain_precision(env, regno: insn->dst_reg);
13665	if (err)
13666	return err;
13667	return adjust_ptr_min_max_vals(env, insn,
13668	ptr_reg: src_reg, off_reg: dst_reg);
13669	}
13670	} else if (ptr_reg) {
13671	/ pointer += scalar /
13672	err = mark_chain_precision(env, regno: insn->src_reg);
13673	if (err)
13674	return err;
13675	return adjust_ptr_min_max_vals(env, insn,
13676	ptr_reg: dst_reg, off_reg: src_reg);
13677	} else if (dst_reg->precise) {
13678	/ if dst_reg is precise, src_reg should be precise as well /
13679	err = mark_chain_precision(env, regno: insn->src_reg);
13680	if (err)
13681	return err;
13682	}
13683	} else {
13684	/ Pretend the src is a reg with a known value, since we only*
13685	* need to be able to read from this state.
13686	*/
13687	off_reg.type = SCALAR_VALUE;
13688	__mark_reg_known(reg: &off_reg, imm: insn->imm);
13689	src_reg = &off_reg;
13690	if (ptr_reg) / pointer += K /
13691	return adjust_ptr_min_max_vals(env, insn,
13692	ptr_reg, off_reg: src_reg);
13693	}
13694
13695	/ Got here implies adding two SCALAR_VALUEs /
13696	if (WARN_ON_ONCE(ptr_reg)) {
13697	print_verifier_state(env, state, print_all: true);
13698	verbose(private_data: env, fmt: "verifier internal error: unexpected ptr_reg\n");
13699	return -EINVAL;
13700	}
13701	if (WARN_ON(!src_reg)) {
13702	print_verifier_state(env, state, print_all: true);
13703	verbose(private_data: env, fmt: "verifier internal error: no src_reg\n");
13704	return -EINVAL;
13705	}
13706	return adjust_scalar_min_max_vals(env, insn, dst_reg, src_reg: *src_reg);
13707	}
13708
13709	/ check validity of 32-bit and 64-bit arithmetic operations /
13710	static int check_alu_op(struct bpf_verifier_env env, struct* bpf_insn *insn)
13711	{
13712	struct bpf_reg_state *regs = cur_regs(env);
13713	u8 opcode = BPF_OP(insn->code);
13714	int err;
13715
13716	if (opcode == BPF_END \|\| opcode == BPF_NEG) {
13717	if (opcode == BPF_NEG) {
13718	if (BPF_SRC(insn->code) != BPF_K \|\|
13719	insn->src_reg != BPF_REG_0 \|\|
13720	insn->off != `0` \|\| insn->imm != `0`) {
13721	verbose(private_data: env, fmt: "BPF_NEG uses reserved fields\n");
13722	return -EINVAL;
13723	}
13724	} else {
13725	if (insn->src_reg != BPF_REG_0 \|\| insn->off != `0` \|\|
13726	(insn->imm != `16` && insn->imm != `32` && insn->imm != `64`) \|\|
13727	(BPF_CLASS(insn->code) == BPF_ALU64 &&
13728	BPF_SRC(insn->code) != BPF_TO_LE)) {
13729	verbose(private_data: env, fmt: "BPF_END uses reserved fields\n");
13730	return -EINVAL;
13731	}
13732	}
13733
13734	/ check src operand /
13735	err = check_reg_arg(env, regno: insn->dst_reg, t: SRC_OP);
13736	if (err)
13737	return err;
13738
13739	if (is_pointer_value(env, regno: insn->dst_reg)) {
13740	verbose(private_data: env, fmt: "R%d pointer arithmetic prohibited\n",
13741	insn->dst_reg);
13742	return -EACCES;
13743	}
13744
13745	/ check dest operand /
13746	err = check_reg_arg(env, regno: insn->dst_reg, t: DST_OP);
13747	if (err)
13748	return err;
13749
13750	} else if (opcode == BPF_MOV) {
13751
13752	if (BPF_SRC(insn->code) == BPF_X) {
13753	if (insn->imm != `0`) {
13754	verbose(private_data: env, fmt: "BPF_MOV uses reserved fields\n");
13755	return -EINVAL;
13756	}
13757
13758	if (BPF_CLASS(insn->code) == BPF_ALU) {
13759	if (insn->off != `0` && insn->off != `8` && insn->off != `16`) {
13760	verbose(private_data: env, fmt: "BPF_MOV uses reserved fields\n");
13761	return -EINVAL;
13762	}
13763	} else {
13764	if (insn->off != `0` && insn->off != `8` && insn->off != `16` &&
13765	insn->off != `32`) {
13766	verbose(private_data: env, fmt: "BPF_MOV uses reserved fields\n");
13767	return -EINVAL;
13768	}
13769	}
13770
13771	/ check src operand /
13772	err = check_reg_arg(env, regno: insn->src_reg, t: SRC_OP);
13773	if (err)
13774	return err;
13775	} else {
13776	if (insn->src_reg != BPF_REG_0 \|\| insn->off != `0`) {
13777	verbose(private_data: env, fmt: "BPF_MOV uses reserved fields\n");
13778	return -EINVAL;
13779	}
13780	}
13781
13782	/ check dest operand, mark as required later /
13783	err = check_reg_arg(env, regno: insn->dst_reg, t: DST_OP_NO_MARK);
13784	if (err)
13785	return err;
13786
13787	if (BPF_SRC(insn->code) == BPF_X) {
13788	struct bpf_reg_state *src_reg = regs + insn->src_reg;
13789	struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
13790	bool need_id = src_reg->type == SCALAR_VALUE && !src_reg->id &&
13791	!tnum_is_const(a: src_reg->var_off);
13792
13793	if (BPF_CLASS(insn->code) == BPF_ALU64) {
13794	if (insn->off == `0`) {
13795	/ case: R1 = R2*
13796	* copy register state to dest reg
13797	*/
13798	if (need_id)
13799	/ Assign src and dst registers the same ID*
13800	* that will be used by find_equal_scalars()
13801	* to propagate min/max range.
13802	*/
13803	src_reg->id = ++env->id_gen;
13804	copy_register_state(dst: dst_reg, src: src_reg);
13805	dst_reg->live \|= REG_LIVE_WRITTEN;
13806	dst_reg->subreg_def = DEF_NOT_SUBREG;
13807	} else {
13808	/ case: R1 = (s8, s16 s32)R2 /
13809	if (is_pointer_value(env, regno: insn->src_reg)) {
13810	verbose(private_data: env,
13811	fmt: "R%d sign-extension part of pointer\n",
13812	insn->src_reg);
13813	return -EACCES;
13814	} else if (src_reg->type == SCALAR_VALUE) {
13815	bool no_sext;
13816
13817	no_sext = src_reg->umax_value < (`1ULL` << (insn->off - `1`));
13818	if (no_sext && need_id)
13819	src_reg->id = ++env->id_gen;
13820	copy_register_state(dst: dst_reg, src: src_reg);
13821	if (!no_sext)
13822	dst_reg->id = `0`;
13823	coerce_reg_to_size_sx(reg: dst_reg, size: insn->off >> `3`);
13824	dst_reg->live \|= REG_LIVE_WRITTEN;
13825	dst_reg->subreg_def = DEF_NOT_SUBREG;
13826	} else {
13827	mark_reg_unknown(env, regs, regno: insn->dst_reg);
13828	}
13829	}
13830	} else {
13831	/ R1 = (u32) R2 /
13832	if (is_pointer_value(env, regno: insn->src_reg)) {
13833	verbose(private_data: env,
13834	fmt: "R%d partial copy of pointer\n",
13835	insn->src_reg);
13836	return -EACCES;
13837	} else if (src_reg->type == SCALAR_VALUE) {
13838	if (insn->off == `0`) {
13839	bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
13840
13841	if (is_src_reg_u32 && need_id)
13842	src_reg->id = ++env->id_gen;
13843	copy_register_state(dst: dst_reg, src: src_reg);
13844	/ Make sure ID is cleared if src_reg is not in u32*
13845	* range otherwise dst_reg min/max could be incorrectly
13846	* propagated into src_reg by find_equal_scalars()
13847	*/
13848	if (!is_src_reg_u32)
13849	dst_reg->id = `0`;
13850	dst_reg->live \|= REG_LIVE_WRITTEN;
13851	dst_reg->subreg_def = env->insn_idx + `1`;
13852	} else {
13853	/ case: W1 = (s8, s16)W2 /
13854	bool no_sext = src_reg->umax_value < (`1ULL` << (insn->off - `1`));
13855
13856	if (no_sext && need_id)
13857	src_reg->id = ++env->id_gen;
13858	copy_register_state(dst: dst_reg, src: src_reg);
13859	if (!no_sext)
13860	dst_reg->id = `0`;
13861	dst_reg->live \|= REG_LIVE_WRITTEN;
13862	dst_reg->subreg_def = env->insn_idx + `1`;
13863	coerce_subreg_to_size_sx(reg: dst_reg, size: insn->off >> `3`);
13864	}
13865	} else {
13866	mark_reg_unknown(env, regs,
13867	regno: insn->dst_reg);
13868	}
13869	zext_32_to_64(reg: dst_reg);
13870	reg_bounds_sync(reg: dst_reg);
13871	}
13872	} else {
13873	/ case: R = imm*
13874	* remember the value we stored into this reg
13875	*/
13876	/ clear any state __mark_reg_known doesn't set /
13877	mark_reg_unknown(env, regs, regno: insn->dst_reg);
13878	regs[insn->dst_reg].type = SCALAR_VALUE;
13879	if (BPF_CLASS(insn->code) == BPF_ALU64) {
13880	__mark_reg_known(reg: regs + insn->dst_reg,
13881	imm: insn->imm);
13882	} else {
13883	__mark_reg_known(reg: regs + insn->dst_reg,
13884	imm: (u32)insn->imm);
13885	}
13886	}
13887
13888	} else if (opcode > BPF_END) {
13889	verbose(private_data: env, fmt: "invalid BPF_ALU opcode %x\n", opcode);
13890	return -EINVAL;
13891
13892	} else { / all other ALU ops: and, sub, xor, add, ... /
13893
13894	if (BPF_SRC(insn->code) == BPF_X) {
13895	if (insn->imm != `0` \|\| insn->off > `1` \|\|
13896	(insn->off == `1` && opcode != BPF_MOD && opcode != BPF_DIV)) {
13897	verbose(private_data: env, fmt: "BPF_ALU uses reserved fields\n");
13898	return -EINVAL;
13899	}
13900	/ check src1 operand /
13901	err = check_reg_arg(env, regno: insn->src_reg, t: SRC_OP);
13902	if (err)
13903	return err;
13904	} else {
13905	if (insn->src_reg != BPF_REG_0 \|\| insn->off > `1` \|\|
13906	(insn->off == `1` && opcode != BPF_MOD && opcode != BPF_DIV)) {
13907	verbose(private_data: env, fmt: "BPF_ALU uses reserved fields\n");
13908	return -EINVAL;
13909	}
13910	}
13911
13912	/ check src2 operand /
13913	err = check_reg_arg(env, regno: insn->dst_reg, t: SRC_OP);
13914	if (err)
13915	return err;
13916
13917	if ((opcode == BPF_MOD \|\| opcode == BPF_DIV) &&
13918	BPF_SRC(insn->code) == BPF_K && insn->imm == `0`) {
13919	verbose(private_data: env, fmt: "div by zero\n");
13920	return -EINVAL;
13921	}
13922
13923	if ((opcode == BPF_LSH \|\| opcode == BPF_RSH \|\|
13924	opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
13925	int size = BPF_CLASS(insn->code) == BPF_ALU64 ? `64` : `32`;
13926
13927	if (insn->imm < `0` \|\| insn->imm >= size) {
13928	verbose(private_data: env, fmt: "invalid shift %d\n", insn->imm);
13929	return -EINVAL;
13930	}
13931	}
13932
13933	/ check dest operand /
13934	err = check_reg_arg(env, regno: insn->dst_reg, t: DST_OP_NO_MARK);
13935	if (err)
13936	return err;
13937
13938	return adjust_reg_min_max_vals(env, insn);
13939	}
13940
13941	return `0`;
13942	}
13943
13944	static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
13945	struct bpf_reg_state *dst_reg,
13946	enum bpf_reg_type type,
13947	bool range_right_open)
13948	{
13949	struct bpf_func_state *state;
13950	struct bpf_reg_state *reg;
13951	int new_range;
13952
13953	if (dst_reg->off < `0` \|\|
13954	(dst_reg->off == `0` && range_right_open))
13955	/ This doesn't give us any range /
13956	return;
13957
13958	if (dst_reg->umax_value > MAX_PACKET_OFF \|\|
13959	dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
13960	/ Risk of overflow. For instance, ptr + (1<<63) may be less*
13961	* than pkt_end, but that's because it's also less than pkt.
13962	*/
13963	return;
13964
13965	new_range = dst_reg->off;
13966	if (range_right_open)
13967	new_range++;
13968
13969	/ Examples for register markings:*
13970	*
13971	* pkt_data in dst register:
13972	*
13973	* r2 = r3;
13974	* r2 += 8;
13975	* if (r2 > pkt_end) goto <handle exception>
13976	* <access okay>
13977	*
13978	* r2 = r3;
13979	* r2 += 8;
13980	* if (r2 < pkt_end) goto <access okay>
13981	* <handle exception>
13982	*
13983	* Where:
13984	* r2 == dst_reg, pkt_end == src_reg
13985	* r2=pkt(id=n,off=8,r=0)
13986	* r3=pkt(id=n,off=0,r=0)
13987	*
13988	* pkt_data in src register:
13989	*
13990	* r2 = r3;
13991	* r2 += 8;
13992	* if (pkt_end >= r2) goto <access okay>
13993	* <handle exception>
13994	*
13995	* r2 = r3;
13996	* r2 += 8;
13997	* if (pkt_end <= r2) goto <handle exception>
13998	* <access okay>
13999	*
14000	* Where:
14001	* pkt_end == dst_reg, r2 == src_reg
14002	* r2=pkt(id=n,off=8,r=0)
14003	* r3=pkt(id=n,off=0,r=0)
14004	*
14005	* Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
14006	* or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
14007	* and [r3, r3 + 8-1) respectively is safe to access depending on
14008	* the check.
14009	*/
14010
14011	/ If our ids match, then we must have the same max_value. And we*
14012	* don't care about the other reg's fixed offset, since if it's too big
14013	* the range won't allow anything.
14014	* dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
14015	*/
14016	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
14017	if (reg->type == type && reg->id == dst_reg->id)
14018	/ keep the maximum range already checked /
14019	reg->range = max(reg->range, new_range);
14020	}));
14021	}
14022
14023	static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
14024	{
14025	struct tnum subreg = tnum_subreg(a: reg->var_off);
14026	s32 sval = (s32)val;
14027
14028	switch (opcode) {
14029	case BPF_JEQ:
14030	if (tnum_is_const(a: subreg))
14031	return !!tnum_equals_const(a: subreg, b: val);
14032	else if (val < reg->u32_min_value \|\| val > reg->u32_max_value)
14033	return `0`;
14034	else if (sval < reg->s32_min_value \|\| sval > reg->s32_max_value)
14035	return `0`;
14036	break;
14037	case BPF_JNE:
14038	if (tnum_is_const(a: subreg))
14039	return !tnum_equals_const(a: subreg, b: val);
14040	else if (val < reg->u32_min_value \|\| val > reg->u32_max_value)
14041	return `1`;
14042	else if (sval < reg->s32_min_value \|\| sval > reg->s32_max_value)
14043	return `1`;
14044	break;
14045	case BPF_JSET:
14046	if ((~subreg.mask & subreg.value) & val)
14047	return `1`;
14048	if (!((subreg.mask \| subreg.value) & val))
14049	return `0`;
14050	break;
14051	case BPF_JGT:
14052	if (reg->u32_min_value > val)
14053	return `1`;
14054	else if (reg->u32_max_value <= val)
14055	return `0`;
14056	break;
14057	case BPF_JSGT:
14058	if (reg->s32_min_value > sval)
14059	return `1`;
14060	else if (reg->s32_max_value <= sval)
14061	return `0`;
14062	break;
14063	case BPF_JLT:
14064	if (reg->u32_max_value < val)
14065	return `1`;
14066	else if (reg->u32_min_value >= val)
14067	return `0`;
14068	break;
14069	case BPF_JSLT:
14070	if (reg->s32_max_value < sval)
14071	return `1`;
14072	else if (reg->s32_min_value >= sval)
14073	return `0`;
14074	break;
14075	case BPF_JGE:
14076	if (reg->u32_min_value >= val)
14077	return `1`;
14078	else if (reg->u32_max_value < val)
14079	return `0`;
14080	break;
14081	case BPF_JSGE:
14082	if (reg->s32_min_value >= sval)
14083	return `1`;
14084	else if (reg->s32_max_value < sval)
14085	return `0`;
14086	break;
14087	case BPF_JLE:
14088	if (reg->u32_max_value <= val)
14089	return `1`;
14090	else if (reg->u32_min_value > val)
14091	return `0`;
14092	break;
14093	case BPF_JSLE:
14094	if (reg->s32_max_value <= sval)
14095	return `1`;
14096	else if (reg->s32_min_value > sval)
14097	return `0`;
14098	break;
14099	}
14100
14101	return -`1`;
14102	}
14103
14104
14105	static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
14106	{
14107	s64 sval = (s64)val;
14108
14109	switch (opcode) {
14110	case BPF_JEQ:
14111	if (tnum_is_const(a: reg->var_off))
14112	return !!tnum_equals_const(a: reg->var_off, b: val);
14113	else if (val < reg->umin_value \|\| val > reg->umax_value)
14114	return `0`;
14115	else if (sval < reg->smin_value \|\| sval > reg->smax_value)
14116	return `0`;
14117	break;
14118	case BPF_JNE:
14119	if (tnum_is_const(a: reg->var_off))
14120	return !tnum_equals_const(a: reg->var_off, b: val);
14121	else if (val < reg->umin_value \|\| val > reg->umax_value)
14122	return `1`;
14123	else if (sval < reg->smin_value \|\| sval > reg->smax_value)
14124	return `1`;
14125	break;
14126	case BPF_JSET:
14127	if ((~reg->var_off.mask & reg->var_off.value) & val)
14128	return `1`;
14129	if (!((reg->var_off.mask \| reg->var_off.value) & val))
14130	return `0`;
14131	break;
14132	case BPF_JGT:
14133	if (reg->umin_value > val)
14134	return `1`;
14135	else if (reg->umax_value <= val)
14136	return `0`;
14137	break;
14138	case BPF_JSGT:
14139	if (reg->smin_value > sval)
14140	return `1`;
14141	else if (reg->smax_value <= sval)
14142	return `0`;
14143	break;
14144	case BPF_JLT:
14145	if (reg->umax_value < val)
14146	return `1`;
14147	else if (reg->umin_value >= val)
14148	return `0`;
14149	break;
14150	case BPF_JSLT:
14151	if (reg->smax_value < sval)
14152	return `1`;
14153	else if (reg->smin_value >= sval)
14154	return `0`;
14155	break;
14156	case BPF_JGE:
14157	if (reg->umin_value >= val)
14158	return `1`;
14159	else if (reg->umax_value < val)
14160	return `0`;
14161	break;
14162	case BPF_JSGE:
14163	if (reg->smin_value >= sval)
14164	return `1`;
14165	else if (reg->smax_value < sval)
14166	return `0`;
14167	break;
14168	case BPF_JLE:
14169	if (reg->umax_value <= val)
14170	return `1`;
14171	else if (reg->umin_value > val)
14172	return `0`;
14173	break;
14174	case BPF_JSLE:
14175	if (reg->smax_value <= sval)
14176	return `1`;
14177	else if (reg->smin_value > sval)
14178	return `0`;
14179	break;
14180	}
14181
14182	return -`1`;
14183	}
14184
14185	/ compute branch direction of the expression "if (reg opcode val) goto target;"*
14186	* and return:
14187	* 1 - branch will be taken and "goto target" will be executed
14188	* 0 - branch will not be taken and fall-through to next insn
14189	* -1 - unknown. Example: "if (reg < 5)" is unknown when register value
14190	* range [0,10]
14191	*/
14192	static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
14193	bool is_jmp32)
14194	{
14195	if (__is_pointer_value(allow_ptr_leaks: false, reg)) {
14196	if (!reg_not_null(reg))
14197	return -`1`;
14198
14199	/ If pointer is valid tests against zero will fail so we can*
14200	* use this to direct branch taken.
14201	*/
14202	if (val != `0`)
14203	return -`1`;
14204
14205	switch (opcode) {
14206	case BPF_JEQ:
14207	return `0`;
14208	case BPF_JNE:
14209	return `1`;
14210	default:
14211	return -`1`;
14212	}
14213	}
14214
14215	if (is_jmp32)
14216	return is_branch32_taken(reg, val, opcode);
14217	return is_branch64_taken(reg, val, opcode);
14218	}
14219
14220	static int flip_opcode(u32 opcode)
14221	{
14222	/ How can we transform "a <op> b" into "b <op> a"? /
14223	static const u8 opcode_flip[`16`] = {
14224	/ these stay the same /
14225	[BPF_JEQ >> `4`] = BPF_JEQ,
14226	[BPF_JNE >> `4`] = BPF_JNE,
14227	[BPF_JSET >> `4`] = BPF_JSET,
14228	/ these swap "lesser" and "greater" (L and G in the opcodes) /
14229	[BPF_JGE >> `4`] = BPF_JLE,
14230	[BPF_JGT >> `4`] = BPF_JLT,
14231	[BPF_JLE >> `4`] = BPF_JGE,
14232	[BPF_JLT >> `4`] = BPF_JGT,
14233	[BPF_JSGE >> `4`] = BPF_JSLE,
14234	[BPF_JSGT >> `4`] = BPF_JSLT,
14235	[BPF_JSLE >> `4`] = BPF_JSGE,
14236	[BPF_JSLT >> `4`] = BPF_JSGT
14237	};
14238	return opcode_flip[opcode >> `4`];
14239	}
14240
14241	static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
14242	struct bpf_reg_state *src_reg,
14243	u8 opcode)
14244	{
14245	struct bpf_reg_state *pkt;
14246
14247	if (src_reg->type == PTR_TO_PACKET_END) {
14248	pkt = dst_reg;
14249	} else if (dst_reg->type == PTR_TO_PACKET_END) {
14250	pkt = src_reg;
14251	opcode = flip_opcode(opcode);
14252	} else {
14253	return -`1`;
14254	}
14255
14256	if (pkt->range >= `0`)
14257	return -`1`;
14258
14259	switch (opcode) {
14260	case BPF_JLE:
14261	/ pkt <= pkt_end /
14262	fallthrough;
14263	case BPF_JGT:
14264	/ pkt > pkt_end /
14265	if (pkt->range == BEYOND_PKT_END)
14266	/ pkt has at last one extra byte beyond pkt_end /
14267	return opcode == BPF_JGT;
14268	break;
14269	case BPF_JLT:
14270	/ pkt < pkt_end /
14271	fallthrough;
14272	case BPF_JGE:
14273	/ pkt >= pkt_end /
14274	if (pkt->range == BEYOND_PKT_END \|\| pkt->range == AT_PKT_END)
14275	return opcode == BPF_JGE;
14276	break;
14277	}
14278	return -`1`;
14279	}
14280
14281	/ Adjusts the register min/max values in the case that the dst_reg is the*
14282	* variable register that we are working on, and src_reg is a constant or we're
14283	* simply doing a BPF_K check.
14284	* In JEQ/JNE cases we also adjust the var_off values.
14285	*/
14286	static void reg_set_min_max(struct bpf_reg_state *true_reg,
14287	struct bpf_reg_state *false_reg,
14288	u64 val, u32 val32,
14289	u8 opcode, bool is_jmp32)
14290	{
14291	struct tnum false_32off = tnum_subreg(a: false_reg->var_off);
14292	struct tnum false_64off = false_reg->var_off;
14293	struct tnum true_32off = tnum_subreg(a: true_reg->var_off);
14294	struct tnum true_64off = true_reg->var_off;
14295	s64 sval = (s64)val;
14296	s32 sval32 = (s32)val32;
14297
14298	/ If the dst_reg is a pointer, we can't learn anything about its*
14299	* variable offset from the compare (unless src_reg were a pointer into
14300	* the same object, but we don't bother with that.
14301	* Since false_reg and true_reg have the same type by construction, we
14302	* only need to check one of them for pointerness.
14303	*/
14304	if (__is_pointer_value(allow_ptr_leaks: false, reg: false_reg))
14305	return;
14306
14307	switch (opcode) {
14308	/ JEQ/JNE comparison doesn't change the register equivalence.*
14309	*
14310	* r1 = r2;
14311	* if (r1 == 42) goto label;
14312	* ...
14313	* label: // here both r1 and r2 are known to be 42.
14314	*
14315	* Hence when marking register as known preserve it's ID.
14316	*/
14317	case BPF_JEQ:
14318	if (is_jmp32) {
14319	__mark_reg32_known(reg: true_reg, imm: val32);
14320	true_32off = tnum_subreg(a: true_reg->var_off);
14321	} else {
14322	___mark_reg_known(reg: true_reg, imm: val);
14323	true_64off = true_reg->var_off;
14324	}
14325	break;
14326	case BPF_JNE:
14327	if (is_jmp32) {
14328	__mark_reg32_known(reg: false_reg, imm: val32);
14329	false_32off = tnum_subreg(a: false_reg->var_off);
14330	} else {
14331	___mark_reg_known(reg: false_reg, imm: val);
14332	false_64off = false_reg->var_off;
14333	}
14334	break;
14335	case BPF_JSET:
14336	if (is_jmp32) {
14337	false_32off = tnum_and(a: false_32off, b: tnum_const(value: ~val32));
14338	if (is_power_of_2(n: val32))
14339	true_32off = tnum_or(a: true_32off,
14340	b: tnum_const(value: val32));
14341	} else {
14342	false_64off = tnum_and(a: false_64off, b: tnum_const(value: ~val));
14343	if (is_power_of_2(n: val))
14344	true_64off = tnum_or(a: true_64off,
14345	b: tnum_const(value: val));
14346	}
14347	break;
14348	case BPF_JGE:
14349	case BPF_JGT:
14350	{
14351	if (is_jmp32) {
14352	u32 false_umax = opcode == BPF_JGT ? val32 : val32 - `1`;
14353	u32 true_umin = opcode == BPF_JGT ? val32 + `1` : val32;
14354
14355	false_reg->u32_max_value = min(false_reg->u32_max_value,
14356	false_umax);
14357	true_reg->u32_min_value = max(true_reg->u32_min_value,
14358	true_umin);
14359	} else {
14360	u64 false_umax = opcode == BPF_JGT ? val : val - `1`;
14361	u64 true_umin = opcode == BPF_JGT ? val + `1` : val;
14362
14363	false_reg->umax_value = min(false_reg->umax_value, false_umax);
14364	true_reg->umin_value = max(true_reg->umin_value, true_umin);
14365	}
14366	break;
14367	}
14368	case BPF_JSGE:
14369	case BPF_JSGT:
14370	{
14371	if (is_jmp32) {
14372	s32 false_smax = opcode == BPF_JSGT ? sval32 : sval32 - `1`;
14373	s32 true_smin = opcode == BPF_JSGT ? sval32 + `1` : sval32;
14374
14375	false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
14376	true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
14377	} else {
14378	s64 false_smax = opcode == BPF_JSGT ? sval : sval - `1`;
14379	s64 true_smin = opcode == BPF_JSGT ? sval + `1` : sval;
14380
14381	false_reg->smax_value = min(false_reg->smax_value, false_smax);
14382	true_reg->smin_value = max(true_reg->smin_value, true_smin);
14383	}
14384	break;
14385	}
14386	case BPF_JLE:
14387	case BPF_JLT:
14388	{
14389	if (is_jmp32) {
14390	u32 false_umin = opcode == BPF_JLT ? val32 : val32 + `1`;
14391	u32 true_umax = opcode == BPF_JLT ? val32 - `1` : val32;
14392
14393	false_reg->u32_min_value = max(false_reg->u32_min_value,
14394	false_umin);
14395	true_reg->u32_max_value = min(true_reg->u32_max_value,
14396	true_umax);
14397	} else {
14398	u64 false_umin = opcode == BPF_JLT ? val : val + `1`;
14399	u64 true_umax = opcode == BPF_JLT ? val - `1` : val;
14400
14401	false_reg->umin_value = max(false_reg->umin_value, false_umin);
14402	true_reg->umax_value = min(true_reg->umax_value, true_umax);
14403	}
14404	break;
14405	}
14406	case BPF_JSLE:
14407	case BPF_JSLT:
14408	{
14409	if (is_jmp32) {
14410	s32 false_smin = opcode == BPF_JSLT ? sval32 : sval32 + `1`;
14411	s32 true_smax = opcode == BPF_JSLT ? sval32 - `1` : sval32;
14412
14413	false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
14414	true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
14415	} else {
14416	s64 false_smin = opcode == BPF_JSLT ? sval : sval + `1`;
14417	s64 true_smax = opcode == BPF_JSLT ? sval - `1` : sval;
14418
14419	false_reg->smin_value = max(false_reg->smin_value, false_smin);
14420	true_reg->smax_value = min(true_reg->smax_value, true_smax);
14421	}
14422	break;
14423	}
14424	default:
14425	return;
14426	}
14427
14428	if (is_jmp32) {
14429	false_reg->var_off = tnum_or(a: tnum_clear_subreg(a: false_64off),
14430	b: tnum_subreg(a: false_32off));
14431	true_reg->var_off = tnum_or(a: tnum_clear_subreg(a: true_64off),
14432	b: tnum_subreg(a: true_32off));
14433	__reg_combine_32_into_64(reg: false_reg);
14434	__reg_combine_32_into_64(reg: true_reg);
14435	} else {
14436	false_reg->var_off = false_64off;
14437	true_reg->var_off = true_64off;
14438	__reg_combine_64_into_32(reg: false_reg);
14439	__reg_combine_64_into_32(reg: true_reg);
14440	}
14441	}
14442
14443	/ Same as above, but for the case that dst_reg holds a constant and src_reg is*
14444	* the variable reg.
14445	*/
14446	static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
14447	struct bpf_reg_state *false_reg,
14448	u64 val, u32 val32,
14449	u8 opcode, bool is_jmp32)
14450	{
14451	opcode = flip_opcode(opcode);
14452	/ This uses zero as "not present in table"; luckily the zero opcode,*
14453	* BPF_JA, can't get here.
14454	*/
14455	if (opcode)
14456	reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
14457	}
14458
14459	/ Regs are known to be equal, so intersect their min/max/var_off /
14460	static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
14461	struct bpf_reg_state *dst_reg)
14462	{
14463	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
14464	dst_reg->umin_value);
14465	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
14466	dst_reg->umax_value);
14467	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
14468	dst_reg->smin_value);
14469	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
14470	dst_reg->smax_value);
14471	src_reg->var_off = dst_reg->var_off = tnum_intersect(a: src_reg->var_off,
14472	b: dst_reg->var_off);
14473	reg_bounds_sync(reg: src_reg);
14474	reg_bounds_sync(reg: dst_reg);
14475	}
14476
14477	static void reg_combine_min_max(struct bpf_reg_state *true_src,
14478	struct bpf_reg_state *true_dst,
14479	struct bpf_reg_state *false_src,
14480	struct bpf_reg_state *false_dst,
14481	u8 opcode)
14482	{
14483	switch (opcode) {
14484	case BPF_JEQ:
14485	__reg_combine_min_max(src_reg: true_src, dst_reg: true_dst);
14486	break;
14487	case BPF_JNE:
14488	__reg_combine_min_max(src_reg: false_src, dst_reg: false_dst);
14489	break;
14490	}
14491	}
14492
14493	static void mark_ptr_or_null_reg(struct bpf_func_state *state,
14494	struct bpf_reg_state *reg, u32 id,
14495	bool is_null)
14496	{
14497	if (type_may_be_null(type: reg->type) && reg->id == id &&
14498	(is_rcu_reg(reg) \|\| !WARN_ON_ONCE(!reg->id))) {
14499	/ Old offset (both fixed and variable parts) should have been*
14500	* known-zero, because we don't allow pointer arithmetic on
14501	* pointers that might be NULL. If we see this happening, don't
14502	* convert the register.
14503	*
14504	* But in some cases, some helpers that return local kptrs
14505	* advance offset for the returned pointer. In those cases, it
14506	* is fine to expect to see reg->off.
14507	*/
14508	if (WARN_ON_ONCE(reg->smin_value \|\| reg->smax_value \|\| !tnum_equals_const(reg->var_off, `0`)))
14509	return;
14510	if (!(type_is_ptr_alloc_obj(type: reg->type) \|\| type_is_non_owning_ref(type: reg->type)) &&
14511	WARN_ON_ONCE(reg->off))
14512	return;
14513
14514	if (is_null) {
14515	reg->type = SCALAR_VALUE;
14516	/ We don't need id and ref_obj_id from this point*
14517	* onwards anymore, thus we should better reset it,
14518	* so that state pruning has chances to take effect.
14519	*/
14520	reg->id = `0`;
14521	reg->ref_obj_id = `0`;
14522
14523	return;
14524	}
14525
14526	mark_ptr_not_null_reg(reg);
14527
14528	if (!reg_may_point_to_spin_lock(reg)) {
14529	/ For not-NULL ptr, reg->ref_obj_id will be reset*
14530	* in release_reference().
14531	*
14532	* reg->id is still used by spin_lock ptr. Other
14533	* than spin_lock ptr type, reg->id can be reset.
14534	*/
14535	reg->id = `0`;
14536	}
14537	}
14538	}
14539
14540	/ The logic is similar to find_good_pkt_pointers(), both could eventually*
14541	* be folded together at some point.
14542	*/
14543	static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
14544	bool is_null)
14545	{
14546	struct bpf_func_state *state = vstate->frame[vstate->curframe];
14547	struct bpf_reg_state regs = state->regs, reg;
14548	u32 ref_obj_id = regs[regno].ref_obj_id;
14549	u32 id = regs[regno].id;
14550
14551	if (ref_obj_id && ref_obj_id == id && is_null)
14552	/ regs[regno] is in the " == NULL" branch.*
14553	* No one could have freed the reference state before
14554	* doing the NULL check.
14555	*/
14556	WARN_ON_ONCE(release_reference_state(state, id));
14557
14558	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
14559	mark_ptr_or_null_reg(state, reg, id, is_null);
14560	}));
14561	}
14562
14563	static bool try_match_pkt_pointers(const struct bpf_insn *insn,
14564	struct bpf_reg_state *dst_reg,
14565	struct bpf_reg_state *src_reg,
14566	struct bpf_verifier_state *this_branch,
14567	struct bpf_verifier_state *other_branch)
14568	{
14569	if (BPF_SRC(insn->code) != BPF_X)
14570	return false;
14571
14572	/ Pointers are always 64-bit. /
14573	if (BPF_CLASS(insn->code) == BPF_JMP32)
14574	return false;
14575
14576	switch (BPF_OP(insn->code)) {
14577	case BPF_JGT:
14578	if ((dst_reg->type == PTR_TO_PACKET &&
14579	src_reg->type == PTR_TO_PACKET_END) \|\|
14580	(dst_reg->type == PTR_TO_PACKET_META &&
14581	reg_is_init_pkt_pointer(reg: src_reg, which: PTR_TO_PACKET))) {
14582	/ pkt_data' > pkt_end, pkt_meta' > pkt_data /
14583	find_good_pkt_pointers(vstate: this_branch, dst_reg,
14584	type: dst_reg->type, range_right_open: false);
14585	mark_pkt_end(vstate: other_branch, regn: insn->dst_reg, range_open: true);
14586	} else if ((dst_reg->type == PTR_TO_PACKET_END &&
14587	src_reg->type == PTR_TO_PACKET) \|\|
14588	(reg_is_init_pkt_pointer(reg: dst_reg, which: PTR_TO_PACKET) &&
14589	src_reg->type == PTR_TO_PACKET_META)) {
14590	/ pkt_end > pkt_data', pkt_data > pkt_meta' /
14591	find_good_pkt_pointers(vstate: other_branch, dst_reg: src_reg,
14592	type: src_reg->type, range_right_open: true);
14593	mark_pkt_end(vstate: this_branch, regn: insn->src_reg, range_open: false);
14594	} else {
14595	return false;
14596	}
14597	break;
14598	case BPF_JLT:
14599	if ((dst_reg->type == PTR_TO_PACKET &&
14600	src_reg->type == PTR_TO_PACKET_END) \|\|
14601	(dst_reg->type == PTR_TO_PACKET_META &&
14602	reg_is_init_pkt_pointer(reg: src_reg, which: PTR_TO_PACKET))) {
14603	/ pkt_data' < pkt_end, pkt_meta' < pkt_data /
14604	find_good_pkt_pointers(vstate: other_branch, dst_reg,
14605	type: dst_reg->type, range_right_open: true);
14606	mark_pkt_end(vstate: this_branch, regn: insn->dst_reg, range_open: false);
14607	} else if ((dst_reg->type == PTR_TO_PACKET_END &&
14608	src_reg->type == PTR_TO_PACKET) \|\|
14609	(reg_is_init_pkt_pointer(reg: dst_reg, which: PTR_TO_PACKET) &&
14610	src_reg->type == PTR_TO_PACKET_META)) {
14611	/ pkt_end < pkt_data', pkt_data > pkt_meta' /
14612	find_good_pkt_pointers(vstate: this_branch, dst_reg: src_reg,
14613	type: src_reg->type, range_right_open: false);
14614	mark_pkt_end(vstate: other_branch, regn: insn->src_reg, range_open: true);
14615	} else {
14616	return false;
14617	}
14618	break;
14619	case BPF_JGE:
14620	if ((dst_reg->type == PTR_TO_PACKET &&
14621	src_reg->type == PTR_TO_PACKET_END) \|\|
14622	(dst_reg->type == PTR_TO_PACKET_META &&
14623	reg_is_init_pkt_pointer(reg: src_reg, which: PTR_TO_PACKET))) {
14624	/ pkt_data' >= pkt_end, pkt_meta' >= pkt_data /
14625	find_good_pkt_pointers(vstate: this_branch, dst_reg,
14626	type: dst_reg->type, range_right_open: true);
14627	mark_pkt_end(vstate: other_branch, regn: insn->dst_reg, range_open: false);
14628	} else if ((dst_reg->type == PTR_TO_PACKET_END &&
14629	src_reg->type == PTR_TO_PACKET) \|\|
14630	(reg_is_init_pkt_pointer(reg: dst_reg, which: PTR_TO_PACKET) &&
14631	src_reg->type == PTR_TO_PACKET_META)) {
14632	/ pkt_end >= pkt_data', pkt_data >= pkt_meta' /
14633	find_good_pkt_pointers(vstate: other_branch, dst_reg: src_reg,
14634	type: src_reg->type, range_right_open: false);
14635	mark_pkt_end(vstate: this_branch, regn: insn->src_reg, range_open: true);
14636	} else {
14637	return false;
14638	}
14639	break;
14640	case BPF_JLE:
14641	if ((dst_reg->type == PTR_TO_PACKET &&
14642	src_reg->type == PTR_TO_PACKET_END) \|\|
14643	(dst_reg->type == PTR_TO_PACKET_META &&
14644	reg_is_init_pkt_pointer(reg: src_reg, which: PTR_TO_PACKET))) {
14645	/ pkt_data' <= pkt_end, pkt_meta' <= pkt_data /
14646	find_good_pkt_pointers(vstate: other_branch, dst_reg,
14647	type: dst_reg->type, range_right_open: false);
14648	mark_pkt_end(vstate: this_branch, regn: insn->dst_reg, range_open: true);
14649	} else if ((dst_reg->type == PTR_TO_PACKET_END &&
14650	src_reg->type == PTR_TO_PACKET) \|\|
14651	(reg_is_init_pkt_pointer(reg: dst_reg, which: PTR_TO_PACKET) &&
14652	src_reg->type == PTR_TO_PACKET_META)) {
14653	/ pkt_end <= pkt_data', pkt_data <= pkt_meta' /
14654	find_good_pkt_pointers(vstate: this_branch, dst_reg: src_reg,
14655	type: src_reg->type, range_right_open: true);
14656	mark_pkt_end(vstate: other_branch, regn: insn->src_reg, range_open: false);
14657	} else {
14658	return false;
14659	}
14660	break;
14661	default:
14662	return false;
14663	}
14664
14665	return true;
14666	}
14667
14668	static void find_equal_scalars(struct bpf_verifier_state *vstate,
14669	struct bpf_reg_state *known_reg)
14670	{
14671	struct bpf_func_state *state;
14672	struct bpf_reg_state *reg;
14673
14674	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
14675	if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
14676	copy_register_state(reg, known_reg);
14677	}));
14678	}
14679
14680	static int check_cond_jmp_op(struct bpf_verifier_env *env,
14681	struct bpf_insn insn, int* *insn_idx)
14682	{
14683	struct bpf_verifier_state *this_branch = env->cur_state;
14684	struct bpf_verifier_state *other_branch;
14685	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
14686	struct bpf_reg_state dst_reg, other_branch_regs, *src_reg = NULL;
14687	struct bpf_reg_state *eq_branch_regs;
14688	u8 opcode = BPF_OP(insn->code);
14689	bool is_jmp32;
14690	int pred = -`1`;
14691	int err;
14692
14693	/ Only conditional jumps are expected to reach here. /
14694	if (opcode == BPF_JA \|\| opcode > BPF_JSLE) {
14695	verbose(private_data: env, fmt: "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
14696	return -EINVAL;
14697	}
14698
14699	/ check src2 operand /
14700	err = check_reg_arg(env, regno: insn->dst_reg, t: SRC_OP);
14701	if (err)
14702	return err;
14703
14704	dst_reg = &regs[insn->dst_reg];
14705	if (BPF_SRC(insn->code) == BPF_X) {
14706	if (insn->imm != `0`) {
14707	verbose(private_data: env, fmt: "BPF_JMP/JMP32 uses reserved fields\n");
14708	return -EINVAL;
14709	}
14710
14711	/ check src1 operand /
14712	err = check_reg_arg(env, regno: insn->src_reg, t: SRC_OP);
14713	if (err)
14714	return err;
14715
14716	src_reg = &regs[insn->src_reg];
14717	if (!(reg_is_pkt_pointer_any(reg: dst_reg) && reg_is_pkt_pointer_any(reg: src_reg)) &&
14718	is_pointer_value(env, regno: insn->src_reg)) {
14719	verbose(private_data: env, fmt: "R%d pointer comparison prohibited\n",
14720	insn->src_reg);
14721	return -EACCES;
14722	}
14723	} else {
14724	if (insn->src_reg != BPF_REG_0) {
14725	verbose(private_data: env, fmt: "BPF_JMP/JMP32 uses reserved fields\n");
14726	return -EINVAL;
14727	}
14728	}
14729
14730	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
14731
14732	if (BPF_SRC(insn->code) == BPF_K) {
14733	pred = is_branch_taken(reg: dst_reg, val: insn->imm, opcode, is_jmp32);
14734	} else if (src_reg->type == SCALAR_VALUE &&
14735	is_jmp32 && tnum_is_const(a: tnum_subreg(a: src_reg->var_off))) {
14736	pred = is_branch_taken(reg: dst_reg,
14737	val: tnum_subreg(a: src_reg->var_off).value,
14738	opcode,
14739	is_jmp32);
14740	} else if (src_reg->type == SCALAR_VALUE &&
14741	!is_jmp32 && tnum_is_const(a: src_reg->var_off)) {
14742	pred = is_branch_taken(reg: dst_reg,
14743	val: src_reg->var_off.value,
14744	opcode,
14745	is_jmp32);
14746	} else if (dst_reg->type == SCALAR_VALUE &&
14747	is_jmp32 && tnum_is_const(a: tnum_subreg(a: dst_reg->var_off))) {
14748	pred = is_branch_taken(reg: src_reg,
14749	val: tnum_subreg(a: dst_reg->var_off).value,
14750	opcode: flip_opcode(opcode),
14751	is_jmp32);
14752	} else if (dst_reg->type == SCALAR_VALUE &&
14753	!is_jmp32 && tnum_is_const(a: dst_reg->var_off)) {
14754	pred = is_branch_taken(reg: src_reg,
14755	val: dst_reg->var_off.value,
14756	opcode: flip_opcode(opcode),
14757	is_jmp32);
14758	} else if (reg_is_pkt_pointer_any(reg: dst_reg) &&
14759	reg_is_pkt_pointer_any(reg: src_reg) &&
14760	!is_jmp32) {
14761	pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
14762	}
14763
14764	if (pred >= `0`) {
14765	/ If we get here with a dst_reg pointer type it is because*
14766	* above is_branch_taken() special cased the 0 comparison.
14767	*/
14768	if (!__is_pointer_value(allow_ptr_leaks: false, reg: dst_reg))
14769	err = mark_chain_precision(env, regno: insn->dst_reg);
14770	if (BPF_SRC(insn->code) == BPF_X && !err &&
14771	!__is_pointer_value(allow_ptr_leaks: false, reg: src_reg))
14772	err = mark_chain_precision(env, regno: insn->src_reg);
14773	if (err)
14774	return err;
14775	}
14776
14777	if (pred == `1`) {
14778	/ Only follow the goto, ignore fall-through. If needed, push*
14779	* the fall-through branch for simulation under speculative
14780	* execution.
14781	*/
14782	if (!env->bypass_spec_v1 &&
14783	!sanitize_speculative_path(env, insn, next_idx: *insn_idx + `1`,
14784	curr_idx: *insn_idx))
14785	return -EFAULT;
14786	if (env->log.level & BPF_LOG_LEVEL)
14787	print_insn_state(env, state: this_branch->frame[this_branch->curframe]);
14788	*insn_idx += insn->off;
14789	return `0`;
14790	} else if (pred == `0`) {
14791	/ Only follow the fall-through branch, since that's where the*
14792	* program will go. If needed, push the goto branch for
14793	* simulation under speculative execution.
14794	*/
14795	if (!env->bypass_spec_v1 &&
14796	!sanitize_speculative_path(env, insn,
14797	next_idx: *insn_idx + insn->off + `1`,
14798	curr_idx: *insn_idx))
14799	return -EFAULT;
14800	if (env->log.level & BPF_LOG_LEVEL)
14801	print_insn_state(env, state: this_branch->frame[this_branch->curframe]);
14802	return `0`;
14803	}
14804
14805	other_branch = push_stack(env, insn_idx: insn_idx + insn->off + `1`, prev_insn_idx: insn_idx,
14806	speculative: false);
14807	if (!other_branch)
14808	return -EFAULT;
14809	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
14810
14811	/ detect if we are comparing against a constant value so we can adjust*
14812	* our min/max values for our dst register.
14813	* this is only legit if both are scalars (or pointers to the same
14814	* object, I suppose, see the PTR_MAYBE_NULL related if block below),
14815	* because otherwise the different base pointers mean the offsets aren't
14816	* comparable.
14817	*/
14818	if (BPF_SRC(insn->code) == BPF_X) {
14819	struct bpf_reg_state *src_reg = &regs[insn->src_reg];
14820
14821	if (dst_reg->type == SCALAR_VALUE &&
14822	src_reg->type == SCALAR_VALUE) {
14823	if (tnum_is_const(a: src_reg->var_off) \|\|
14824	(is_jmp32 &&
14825	tnum_is_const(a: tnum_subreg(a: src_reg->var_off))))
14826	reg_set_min_max(true_reg: &other_branch_regs[insn->dst_reg],
14827	false_reg: dst_reg,
14828	val: src_reg->var_off.value,
14829	val32: tnum_subreg(a: src_reg->var_off).value,
14830	opcode, is_jmp32);
14831	else if (tnum_is_const(a: dst_reg->var_off) \|\|
14832	(is_jmp32 &&
14833	tnum_is_const(a: tnum_subreg(a: dst_reg->var_off))))
14834	reg_set_min_max_inv(true_reg: &other_branch_regs[insn->src_reg],
14835	false_reg: src_reg,
14836	val: dst_reg->var_off.value,
14837	val32: tnum_subreg(a: dst_reg->var_off).value,
14838	opcode, is_jmp32);
14839	else if (!is_jmp32 &&
14840	(opcode == BPF_JEQ \|\| opcode == BPF_JNE))
14841	/ Comparing for equality, we can combine knowledge /
14842	reg_combine_min_max(true_src: &other_branch_regs[insn->src_reg],
14843	true_dst: &other_branch_regs[insn->dst_reg],
14844	false_src: src_reg, false_dst: dst_reg, opcode);
14845	if (src_reg->id &&
14846	!WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
14847	find_equal_scalars(vstate: this_branch, known_reg: src_reg);
14848	find_equal_scalars(vstate: other_branch, known_reg: &other_branch_regs[insn->src_reg]);
14849	}
14850
14851	}
14852	} else if (dst_reg->type == SCALAR_VALUE) {
14853	reg_set_min_max(true_reg: &other_branch_regs[insn->dst_reg],
14854	false_reg: dst_reg, val: insn->imm, val32: (u32)insn->imm,
14855	opcode, is_jmp32);
14856	}
14857
14858	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
14859	!WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
14860	find_equal_scalars(vstate: this_branch, known_reg: dst_reg);
14861	find_equal_scalars(vstate: other_branch, known_reg: &other_branch_regs[insn->dst_reg]);
14862	}
14863
14864	/ if one pointer register is compared to another pointer*
14865	* register check if PTR_MAYBE_NULL could be lifted.
14866	* E.g. register A - maybe null
14867	* register B - not null
14868	* for JNE A, B, ... - A is not null in the false branch;
14869	* for JEQ A, B, ... - A is not null in the true branch.
14870	*
14871	* Since PTR_TO_BTF_ID points to a kernel struct that does
14872	* not need to be null checked by the BPF program, i.e.,
14873	* could be null even without PTR_MAYBE_NULL marking, so
14874	* only propagate nullness when neither reg is that type.
14875	*/
14876	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
14877	__is_pointer_value(allow_ptr_leaks: false, reg: src_reg) && __is_pointer_value(allow_ptr_leaks: false, reg: dst_reg) &&
14878	type_may_be_null(type: src_reg->type) != type_may_be_null(type: dst_reg->type) &&
14879	base_type(type: src_reg->type) != PTR_TO_BTF_ID &&
14880	base_type(type: dst_reg->type) != PTR_TO_BTF_ID) {
14881	eq_branch_regs = NULL;
14882	switch (opcode) {
14883	case BPF_JEQ:
14884	eq_branch_regs = other_branch_regs;
14885	break;
14886	case BPF_JNE:
14887	eq_branch_regs = regs;
14888	break;
14889	default:
14890	/ do nothing /
14891	break;
14892	}
14893	if (eq_branch_regs) {
14894	if (type_may_be_null(type: src_reg->type))
14895	mark_ptr_not_null_reg(reg: &eq_branch_regs[insn->src_reg]);
14896	else
14897	mark_ptr_not_null_reg(reg: &eq_branch_regs[insn->dst_reg]);
14898	}
14899	}
14900
14901	/ detect if R == 0 where R is returned from bpf_map_lookup_elem().*
14902	* NOTE: these optimizations below are related with pointer comparison
14903	* which will never be JMP32.
14904	*/
14905	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
14906	insn->imm == `0` && (opcode == BPF_JEQ \|\| opcode == BPF_JNE) &&
14907	type_may_be_null(type: dst_reg->type)) {
14908	/ Mark all identical registers in each branch as either*
14909	* safe or unknown depending R == 0 or R != 0 conditional.
14910	*/
14911	mark_ptr_or_null_regs(vstate: this_branch, regno: insn->dst_reg,
14912	is_null: opcode == BPF_JNE);
14913	mark_ptr_or_null_regs(vstate: other_branch, regno: insn->dst_reg,
14914	is_null: opcode == BPF_JEQ);
14915	} else if (!try_match_pkt_pointers(insn, dst_reg, src_reg: &regs[insn->src_reg],
14916	this_branch, other_branch) &&
14917	is_pointer_value(env, regno: insn->dst_reg)) {
14918	verbose(private_data: env, fmt: "R%d pointer comparison prohibited\n",
14919	insn->dst_reg);
14920	return -EACCES;
14921	}
14922	if (env->log.level & BPF_LOG_LEVEL)
14923	print_insn_state(env, state: this_branch->frame[this_branch->curframe]);
14924	return `0`;
14925	}
14926
14927	/ verify BPF_LD_IMM64 instruction /
14928	static int check_ld_imm(struct bpf_verifier_env env, struct* bpf_insn *insn)
14929	{
14930	struct bpf_insn_aux_data *aux = cur_aux(env);
14931	struct bpf_reg_state *regs = cur_regs(env);
14932	struct bpf_reg_state *dst_reg;
14933	struct bpf_map *map;
14934	int err;
14935
14936	if (BPF_SIZE(insn->code) != BPF_DW) {
14937	verbose(private_data: env, fmt: "invalid BPF_LD_IMM insn\n");
14938	return -EINVAL;
14939	}
14940	if (insn->off != `0`) {
14941	verbose(private_data: env, fmt: "BPF_LD_IMM64 uses reserved fields\n");
14942	return -EINVAL;
14943	}
14944
14945	err = check_reg_arg(env, regno: insn->dst_reg, t: DST_OP);
14946	if (err)
14947	return err;
14948
14949	dst_reg = &regs[insn->dst_reg];
14950	if (insn->src_reg == `0`) {
14951	u64 imm = ((u64)(insn + `1`)->imm << `32`) \| (u32)insn->imm;
14952
14953	dst_reg->type = SCALAR_VALUE;
14954	__mark_reg_known(reg: &regs[insn->dst_reg], imm);
14955	return `0`;
14956	}
14957
14958	/ All special src_reg cases are listed below. From this point onwards*
14959	* we either succeed and assign a corresponding dst_reg->type after
14960	* zeroing the offset, or fail and reject the program.
14961	*/
14962	mark_reg_known_zero(env, regs, regno: insn->dst_reg);
14963
14964	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
14965	dst_reg->type = aux->btf_var.reg_type;
14966	switch (base_type(type: dst_reg->type)) {
14967	case PTR_TO_MEM:
14968	dst_reg->mem_size = aux->btf_var.mem_size;
14969	break;
14970	case PTR_TO_BTF_ID:
14971	dst_reg->btf = aux->btf_var.btf;
14972	dst_reg->btf_id = aux->btf_var.btf_id;
14973	break;
14974	default:
14975	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
14976	return -EFAULT;
14977	}
14978	return `0`;
14979	}
14980
14981	if (insn->src_reg == BPF_PSEUDO_FUNC) {
14982	struct bpf_prog_aux *aux = env->prog->aux;
14983	u32 subprogno = find_subprog(env,
14984	off: env->insn_idx + insn->imm + `1`);
14985
14986	if (!aux->func_info) {
14987	verbose(private_data: env, fmt: "missing btf func_info\n");
14988	return -EINVAL;
14989	}
14990	if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
14991	verbose(private_data: env, fmt: "callback function not static\n");
14992	return -EINVAL;
14993	}
14994
14995	dst_reg->type = PTR_TO_FUNC;
14996	dst_reg->subprogno = subprogno;
14997	return `0`;
14998	}
14999
15000	map = env->used_maps[aux->map_index];
15001	dst_reg->map_ptr = map;
15002
15003	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE \|\|
15004	insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
15005	dst_reg->type = PTR_TO_MAP_VALUE;
15006	dst_reg->off = aux->map_off;
15007	WARN_ON_ONCE(map->max_entries != `1`);
15008	/ We want reg->id to be same (0) as map_value is not distinct /
15009	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD \|\|
15010	insn->src_reg == BPF_PSEUDO_MAP_IDX) {
15011	dst_reg->type = CONST_PTR_TO_MAP;
15012	} else {
15013	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
15014	return -EINVAL;
15015	}
15016
15017	return `0`;
15018	}
15019
15020	static bool may_access_skb(enum bpf_prog_type type)
15021	{
15022	switch (type) {
15023	case BPF_PROG_TYPE_SOCKET_FILTER:
15024	case BPF_PROG_TYPE_SCHED_CLS:
15025	case BPF_PROG_TYPE_SCHED_ACT:
15026	return true;
15027	default:
15028	return false;
15029	}
15030	}
15031
15032	/ verify safety of LD_ABS\|LD_IND instructions:*
15033	* - they can only appear in the programs where ctx == skb
15034	* - since they are wrappers of function calls, they scratch R1-R5 registers,
15035	* preserve R6-R9, and store return value into R0
15036	*
15037	* Implicit input:
15038	* ctx == skb == R6 == CTX
15039	*
15040	* Explicit input:
15041	* SRC == any register
15042	* IMM == 32-bit immediate
15043	*
15044	* Output:
15045	* R0 - 8/16/32-bit skb data converted to cpu endianness
15046	*/
15047	static int check_ld_abs(struct bpf_verifier_env env, struct* bpf_insn *insn)
15048	{
15049	struct bpf_reg_state *regs = cur_regs(env);
15050	static const int ctx_reg = BPF_REG_6;
15051	u8 mode = BPF_MODE(insn->code);
15052	int i, err;
15053
15054	if (!may_access_skb(type: resolve_prog_type(prog: env->prog))) {
15055	verbose(private_data: env, fmt: "BPF_LD_[ABS\|IND] instructions not allowed for this program type\n");
15056	return -EINVAL;
15057	}
15058
15059	if (!env->ops->gen_ld_abs) {
15060	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
15061	return -EINVAL;
15062	}
15063
15064	if (insn->dst_reg != BPF_REG_0 \|\| insn->off != `0` \|\|
15065	BPF_SIZE(insn->code) == BPF_DW \|\|
15066	(mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
15067	verbose(private_data: env, fmt: "BPF_LD_[ABS\|IND] uses reserved fields\n");
15068	return -EINVAL;
15069	}
15070
15071	/ check whether implicit source operand (register R6) is readable /
15072	err = check_reg_arg(env, regno: ctx_reg, t: SRC_OP);
15073	if (err)
15074	return err;
15075
15076	/ Disallow usage of BPF_LD_[ABS\|IND] with reference tracking, as*
15077	* gen_ld_abs() may terminate the program at runtime, leading to
15078	* reference leak.
15079	*/
15080	err = check_reference_leak(env, exception_exit: false);
15081	if (err) {
15082	verbose(private_data: env, fmt: "BPF_LD_[ABS\|IND] cannot be mixed with socket references\n");
15083	return err;
15084	}
15085
15086	if (env->cur_state->active_lock.ptr) {
15087	verbose(private_data: env, fmt: "BPF_LD_[ABS\|IND] cannot be used inside bpf_spin_lock-ed region\n");
15088	return -EINVAL;
15089	}
15090
15091	if (env->cur_state->active_rcu_lock) {
15092	verbose(private_data: env, fmt: "BPF_LD_[ABS\|IND] cannot be used inside bpf_rcu_read_lock-ed region\n");
15093	return -EINVAL;
15094	}
15095
15096	if (regs[ctx_reg].type != PTR_TO_CTX) {
15097	verbose(private_data: env,
15098	fmt: "at the time of BPF_LD_ABS\|IND R6 != pointer to skb\n");
15099	return -EINVAL;
15100	}
15101
15102	if (mode == BPF_IND) {
15103	/ check explicit source operand /
15104	err = check_reg_arg(env, regno: insn->src_reg, t: SRC_OP);
15105	if (err)
15106	return err;
15107	}
15108
15109	err = check_ptr_off_reg(env, reg: &regs[ctx_reg], regno: ctx_reg);
15110	if (err < `0`)
15111	return err;
15112
15113	/ reset caller saved regs to unreadable /
15114	for (i = `0`; i < CALLER_SAVED_REGS; i++) {
15115	mark_reg_not_init(env, regs, regno: caller_saved[i]);
15116	check_reg_arg(env, regno: caller_saved[i], t: DST_OP_NO_MARK);
15117	}
15118
15119	/ mark destination R0 register as readable, since it contains*
15120	* the value fetched from the packet.
15121	* Already marked as written above.
15122	*/
15123	mark_reg_unknown(env, regs, regno: BPF_REG_0);
15124	/ ld_abs load up to 32-bit skb data. /
15125	regs[BPF_REG_0].subreg_def = env->insn_idx + `1`;
15126	return `0`;
15127	}
15128
15129	static int check_return_code(struct bpf_verifier_env env, int* regno)
15130	{
15131	struct tnum enforce_attach_type_range = tnum_unknown;
15132	const struct bpf_prog *prog = env->prog;
15133	struct bpf_reg_state *reg;
15134	struct tnum range = tnum_range(min: `0`, max: `1`), const_0 = tnum_const(value: `0`);
15135	enum bpf_prog_type prog_type = resolve_prog_type(prog: env->prog);
15136	int err;
15137	struct bpf_func_state *frame = env->cur_state->frame[`0`];
15138	const bool is_subprog = frame->subprogno;
15139
15140	/ LSM and struct_ops func-ptr's return type could be "void" /
15141	if (!is_subprog \|\| frame->in_exception_callback_fn) {
15142	switch (prog_type) {
15143	case BPF_PROG_TYPE_LSM:
15144	if (prog->expected_attach_type == BPF_LSM_CGROUP)
15145	/ See below, can be 0 or 0-1 depending on hook. /
15146	break;
15147	fallthrough;
15148	case BPF_PROG_TYPE_STRUCT_OPS:
15149	if (!prog->aux->attach_func_proto->type)
15150	return `0`;
15151	break;
15152	default:
15153	break;
15154	}
15155	}
15156
15157	/ eBPF calling convention is such that R0 is used*
15158	* to return the value from eBPF program.
15159	* Make sure that it's readable at this time
15160	* of bpf_exit, which means that program wrote
15161	* something into it earlier
15162	*/
15163	err = check_reg_arg(env, regno, t: SRC_OP);
15164	if (err)
15165	return err;
15166
15167	if (is_pointer_value(env, regno)) {
15168	verbose(private_data: env, fmt: "R%d leaks addr as return value\n", regno);
15169	return -EACCES;
15170	}
15171
15172	reg = cur_regs(env) + regno;
15173
15174	if (frame->in_async_callback_fn) {
15175	/ enforce return zero from async callbacks like timer /
15176	if (reg->type != SCALAR_VALUE) {
15177	verbose(private_data: env, fmt: "In async callback the register R%d is not a known value (%s)\n",
15178	regno, reg_type_str(env, type: reg->type));
15179	return -EINVAL;
15180	}
15181
15182	if (!tnum_in(a: const_0, b: reg->var_off)) {
15183	verbose_invalid_scalar(env, reg, range: &const_0, ctx: "async callback", reg_name: "R0");
15184	return -EINVAL;
15185	}
15186	return `0`;
15187	}
15188
15189	if (is_subprog && !frame->in_exception_callback_fn) {
15190	if (reg->type != SCALAR_VALUE) {
15191	verbose(private_data: env, fmt: "At subprogram exit the register R%d is not a scalar value (%s)\n",
15192	regno, reg_type_str(env, type: reg->type));
15193	return -EINVAL;
15194	}
15195	return `0`;
15196	}
15197
15198	switch (prog_type) {
15199	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
15200	if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG \|\|
15201	env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG \|\|
15202	env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG \|\|
15203	env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME \|\|
15204	env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME \|\|
15205	env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME \|\|
15206	env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME \|\|
15207	env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME \|\|
15208	env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME)
15209	range = tnum_range(min: `1`, max: `1`);
15210	if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND \|\|
15211	env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
15212	range = tnum_range(min: `0`, max: `3`);
15213	break;
15214	case BPF_PROG_TYPE_CGROUP_SKB:
15215	if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
15216	range = tnum_range(min: `0`, max: `3`);
15217	enforce_attach_type_range = tnum_range(min: `2`, max: `3`);
15218	}
15219	break;
15220	case BPF_PROG_TYPE_CGROUP_SOCK:
15221	case BPF_PROG_TYPE_SOCK_OPS:
15222	case BPF_PROG_TYPE_CGROUP_DEVICE:
15223	case BPF_PROG_TYPE_CGROUP_SYSCTL:
15224	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
15225	break;
15226	case BPF_PROG_TYPE_RAW_TRACEPOINT:
15227	if (!env->prog->aux->attach_btf_id)
15228	return `0`;
15229	range = tnum_const(value: `0`);
15230	break;
15231	case BPF_PROG_TYPE_TRACING:
15232	switch (env->prog->expected_attach_type) {
15233	case BPF_TRACE_FENTRY:
15234	case BPF_TRACE_FEXIT:
15235	range = tnum_const(value: `0`);
15236	break;
15237	case BPF_TRACE_RAW_TP:
15238	case BPF_MODIFY_RETURN:
15239	return `0`;
15240	case BPF_TRACE_ITER:
15241	break;
15242	default:
15243	return -ENOTSUPP;
15244	}
15245	break;
15246	case BPF_PROG_TYPE_SK_LOOKUP:
15247	range = tnum_range(min: SK_DROP, max: SK_PASS);
15248	break;
15249
15250	case BPF_PROG_TYPE_LSM:
15251	if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
15252	/ Regular BPF_PROG_TYPE_LSM programs can return*
15253	* any value.
15254	*/
15255	return `0`;
15256	}
15257	if (!env->prog->aux->attach_func_proto->type) {
15258	/ Make sure programs that attach to void*
15259	* hooks don't try to modify return value.
15260	*/
15261	range = tnum_range(min: `1`, max: `1`);
15262	}
15263	break;
15264
15265	case BPF_PROG_TYPE_NETFILTER:
15266	range = tnum_range(NF_DROP, NF_ACCEPT);
15267	break;
15268	case BPF_PROG_TYPE_EXT:
15269	/ freplace program can return anything as its return value*
15270	* depends on the to-be-replaced kernel func or bpf program.
15271	*/
15272	default:
15273	return `0`;
15274	}
15275
15276	if (reg->type != SCALAR_VALUE) {
15277	verbose(private_data: env, fmt: "At program exit the register R%d is not a known value (%s)\n",
15278	regno, reg_type_str(env, type: reg->type));
15279	return -EINVAL;
15280	}
15281
15282	if (!tnum_in(a: range, b: reg->var_off)) {
15283	verbose_invalid_scalar(env, reg, range: &range, ctx: "program exit", reg_name: "R0");
15284	if (prog->expected_attach_type == BPF_LSM_CGROUP &&
15285	prog_type == BPF_PROG_TYPE_LSM &&
15286	!prog->aux->attach_func_proto->type)
15287	verbose(private_data: env, fmt: "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
15288	return -EINVAL;
15289	}
15290
15291	if (!tnum_is_unknown(a: enforce_attach_type_range) &&
15292	tnum_in(a: enforce_attach_type_range, b: reg->var_off))
15293	env->prog->enforce_expected_attach_type = `1`;
15294	return `0`;
15295	}
15296
15297	/ non-recursive DFS pseudo code*
15298	* 1 procedure DFS-iterative(G,v):
15299	* 2 label v as discovered
15300	* 3 let S be a stack
15301	* 4 S.push(v)
15302	* 5 while S is not empty
15303	* 6 t <- S.peek()
15304	* 7 if t is what we're looking for:
15305	* 8 return t
15306	* 9 for all edges e in G.adjacentEdges(t) do
15307	* 10 if edge e is already labelled
15308	* 11 continue with the next edge
15309	* 12 w <- G.adjacentVertex(t,e)
15310	* 13 if vertex w is not discovered and not explored
15311	* 14 label e as tree-edge
15312	* 15 label w as discovered
15313	* 16 S.push(w)
15314	* 17 continue at 5
15315	* 18 else if vertex w is discovered
15316	* 19 label e as back-edge
15317	* 20 else
15318	* 21 // vertex w is explored
15319	* 22 label e as forward- or cross-edge
15320	* 23 label t as explored
15321	* 24 S.pop()
15322	*
15323	* convention:
15324	* 0x10 - discovered
15325	* 0x11 - discovered and fall-through edge labelled
15326	* 0x12 - discovered and fall-through and branch edges labelled
15327	* 0x20 - explored
15328	*/
15329
15330	enum {
15331	DISCOVERED = `0x10`,
15332	EXPLORED = `0x20`,
15333	FALLTHROUGH = `1`,
15334	BRANCH = `2`,
15335	};
15336
15337	static void mark_prune_point(struct bpf_verifier_env env, int* idx)
15338	{
15339	env->insn_aux_data[idx].prune_point = true;
15340	}
15341
15342	static bool is_prune_point(struct bpf_verifier_env env, int* insn_idx)
15343	{
15344	return env->insn_aux_data[insn_idx].prune_point;
15345	}
15346
15347	static void mark_force_checkpoint(struct bpf_verifier_env env, int* idx)
15348	{
15349	env->insn_aux_data[idx].force_checkpoint = true;
15350	}
15351
15352	static bool is_force_checkpoint(struct bpf_verifier_env env, int* insn_idx)
15353	{
15354	return env->insn_aux_data[insn_idx].force_checkpoint;
15355	}
15356
15357
15358	enum {
15359	DONE_EXPLORING = `0`,
15360	KEEP_EXPLORING = `1`,
15361	};
15362
15363	/ t, w, e - match pseudo-code above:*
15364	* t - index of current instruction
15365	* w - next instruction
15366	* e - edge
15367	*/
15368	static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
15369	bool loop_ok)
15370	{
15371	int *insn_stack = env->cfg.insn_stack;
15372	int *insn_state = env->cfg.insn_state;
15373
15374	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED \| FALLTHROUGH))
15375	return DONE_EXPLORING;
15376
15377	if (e == BRANCH && insn_state[t] >= (DISCOVERED \| BRANCH))
15378	return DONE_EXPLORING;
15379
15380	if (w < `0` \|\| w >= env->prog->len) {
15381	verbose_linfo(env, insn_off: t, prefix_fmt: "%d: ", t);
15382	verbose(private_data: env, fmt: "jump out of range from insn %d to %d\n", t, w);
15383	return -EINVAL;
15384	}
15385
15386	if (e == BRANCH) {
15387	/ mark branch target for state pruning /
15388	mark_prune_point(env, idx: w);
15389	mark_jmp_point(env, idx: w);
15390	}
15391
15392	if (insn_state[w] == `0`) {
15393	/ tree-edge /
15394	insn_state[t] = DISCOVERED \| e;
15395	insn_state[w] = DISCOVERED;
15396	if (env->cfg.cur_stack >= env->prog->len)
15397	return -E2BIG;
15398	insn_stack[env->cfg.cur_stack++] = w;
15399	return KEEP_EXPLORING;
15400	} else if ((insn_state[w] & `0xF0`) == DISCOVERED) {
15401	if (loop_ok && env->bpf_capable)
15402	return DONE_EXPLORING;
15403	verbose_linfo(env, insn_off: t, prefix_fmt: "%d: ", t);
15404	verbose_linfo(env, insn_off: w, prefix_fmt: "%d: ", w);
15405	verbose(private_data: env, fmt: "back-edge from insn %d to %d\n", t, w);
15406	return -EINVAL;
15407	} else if (insn_state[w] == EXPLORED) {
15408	/ forward- or cross-edge /
15409	insn_state[t] = DISCOVERED \| e;
15410	} else {
15411	verbose(private_data: env, fmt: "insn state internal bug\n");
15412	return -EFAULT;
15413	}
15414	return DONE_EXPLORING;
15415	}
15416
15417	static int visit_func_call_insn(int t, struct bpf_insn *insns,
15418	struct bpf_verifier_env *env,
15419	bool visit_callee)
15420	{
15421	int ret;
15422
15423	ret = push_insn(t, w: t + `1`, e: FALLTHROUGH, env, loop_ok: false);
15424	if (ret)
15425	return ret;
15426
15427	mark_prune_point(env, idx: t + `1`);
15428	/ when we exit from subprog, we need to record non-linear history /
15429	mark_jmp_point(env, idx: t + `1`);
15430
15431	if (visit_callee) {
15432	mark_prune_point(env, idx: t);
15433	ret = push_insn(t, w: t + insns[t].imm + `1`, e: BRANCH, env,
15434	/ It's ok to allow recursion from CFG point of*
15435	* view. __check_func_call() will do the actual
15436	* check.
15437	*/
15438	loop_ok: bpf_pseudo_func(insn: insns + t));
15439	}
15440	return ret;
15441	}
15442
15443	/ Visits the instruction at index t and returns one of the following:*
15444	* < 0 - an error occurred
15445	* DONE_EXPLORING - the instruction was fully explored
15446	* KEEP_EXPLORING - there is still work to be done before it is fully explored
15447	*/
15448	static int visit_insn(int t, struct bpf_verifier_env *env)
15449	{
15450	struct bpf_insn insns = env->prog->insnsi, insn = &insns[t];
15451	int ret, off;
15452
15453	if (bpf_pseudo_func(insn))
15454	return visit_func_call_insn(t, insns, env, visit_callee: true);
15455
15456	/ All non-branch instructions have a single fall-through edge. /
15457	if (BPF_CLASS(insn->code) != BPF_JMP &&
15458	BPF_CLASS(insn->code) != BPF_JMP32)
15459	return push_insn(t, w: t + `1`, e: FALLTHROUGH, env, loop_ok: false);
15460
15461	switch (BPF_OP(insn->code)) {
15462	case BPF_EXIT:
15463	return DONE_EXPLORING;
15464
15465	case BPF_CALL:
15466	if (insn->src_reg == `0` && insn->imm == BPF_FUNC_timer_set_callback)
15467	/ Mark this call insn as a prune point to trigger*
15468	* is_state_visited() check before call itself is
15469	* processed by __check_func_call(). Otherwise new
15470	* async state will be pushed for further exploration.
15471	*/
15472	mark_prune_point(env, idx: t);
15473	if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
15474	struct bpf_kfunc_call_arg_meta meta;
15475
15476	ret = fetch_kfunc_meta(env, insn, meta: &meta, NULL);
15477	if (ret == `0` && is_iter_next_kfunc(meta: &meta)) {
15478	mark_prune_point(env, idx: t);
15479	/ Checking and saving state checkpoints at iter_next() call*
15480	* is crucial for fast convergence of open-coded iterator loop
15481	* logic, so we need to force it. If we don't do that,
15482	* is_state_visited() might skip saving a checkpoint, causing
15483	* unnecessarily long sequence of not checkpointed
15484	* instructions and jumps, leading to exhaustion of jump
15485	* history buffer, and potentially other undesired outcomes.
15486	* It is expected that with correct open-coded iterators
15487	* convergence will happen quickly, so we don't run a risk of
15488	* exhausting memory.
15489	*/
15490	mark_force_checkpoint(env, idx: t);
15491	}
15492	}
15493	return visit_func_call_insn(t, insns, env, visit_callee: insn->src_reg == BPF_PSEUDO_CALL);
15494
15495	case BPF_JA:
15496	if (BPF_SRC(insn->code) != BPF_K)
15497	return -EINVAL;
15498
15499	if (BPF_CLASS(insn->code) == BPF_JMP)
15500	off = insn->off;
15501	else
15502	off = insn->imm;
15503
15504	/ unconditional jump with single edge /
15505	ret = push_insn(t, w: t + off + `1`, e: FALLTHROUGH, env,
15506	loop_ok: true);
15507	if (ret)
15508	return ret;
15509
15510	mark_prune_point(env, idx: t + off + `1`);
15511	mark_jmp_point(env, idx: t + off + `1`);
15512
15513	return ret;
15514
15515	default:
15516	/ conditional jump with two edges /
15517	mark_prune_point(env, idx: t);
15518
15519	ret = push_insn(t, w: t + `1`, e: FALLTHROUGH, env, loop_ok: true);
15520	if (ret)
15521	return ret;
15522
15523	return push_insn(t, w: t + insn->off + `1`, e: BRANCH, env, loop_ok: true);
15524	}
15525	}
15526
15527	/ non-recursive depth-first-search to detect loops in BPF program*
15528	* loop == back-edge in directed graph
15529	*/
15530	static int check_cfg(struct bpf_verifier_env *env)
15531	{
15532	int insn_cnt = env->prog->len;
15533	int insn_stack, insn_state;
15534	int ex_insn_beg, i, ret = `0`;
15535	bool ex_done = false;
15536
15537	insn_state = env->cfg.insn_state = kvcalloc(n: insn_cnt, size: sizeof(int), GFP_KERNEL);
15538	if (!insn_state)
15539	return -ENOMEM;
15540
15541	insn_stack = env->cfg.insn_stack = kvcalloc(n: insn_cnt, size: sizeof(int), GFP_KERNEL);
15542	if (!insn_stack) {
15543	kvfree(addr: insn_state);
15544	return -ENOMEM;
15545	}
15546
15547	insn_state[`0`] = DISCOVERED; / mark 1st insn as discovered /
15548	insn_stack[`0`] = `0`; / 0 is the first instruction /
15549	env->cfg.cur_stack = `1`;
15550
15551	walk_cfg:
15552	while (env->cfg.cur_stack > `0`) {
15553	int t = insn_stack[env->cfg.cur_stack - `1`];
15554
15555	ret = visit_insn(t, env);
15556	switch (ret) {
15557	case DONE_EXPLORING:
15558	insn_state[t] = EXPLORED;
15559	env->cfg.cur_stack--;
15560	break;
15561	case KEEP_EXPLORING:
15562	break;
15563	default:
15564	if (ret > `0`) {
15565	verbose(private_data: env, fmt: "visit_insn internal bug\n");
15566	ret = -EFAULT;
15567	}
15568	goto err_free;
15569	}
15570	}
15571
15572	if (env->cfg.cur_stack < `0`) {
15573	verbose(private_data: env, fmt: "pop stack internal bug\n");
15574	ret = -EFAULT;
15575	goto err_free;
15576	}
15577
15578	if (env->exception_callback_subprog && !ex_done) {
15579	ex_insn_beg = env->subprog_info[env->exception_callback_subprog].start;
15580
15581	insn_state[ex_insn_beg] = DISCOVERED;
15582	insn_stack[`0`] = ex_insn_beg;
15583	env->cfg.cur_stack = `1`;
15584	ex_done = true;
15585	goto walk_cfg;
15586	}
15587
15588	for (i = `0`; i < insn_cnt; i++) {
15589	if (insn_state[i] != EXPLORED) {
15590	verbose(private_data: env, fmt: "unreachable insn %d\n", i);
15591	ret = -EINVAL;
15592	goto err_free;
15593	}
15594	}
15595	ret = `0`; / cfg looks good /
15596
15597	err_free:
15598	kvfree(addr: insn_state);
15599	kvfree(addr: insn_stack);
15600	env->cfg.insn_state = env->cfg.insn_stack = NULL;
15601	return ret;
15602	}
15603
15604	static int check_abnormal_return(struct bpf_verifier_env *env)
15605	{
15606	int i;
15607
15608	for (i = `1`; i < env->subprog_cnt; i++) {
15609	if (env->subprog_info[i].has_ld_abs) {
15610	verbose(private_data: env, fmt: "LD_ABS is not allowed in subprogs without BTF\n");
15611	return -EINVAL;
15612	}
15613	if (env->subprog_info[i].has_tail_call) {
15614	verbose(private_data: env, fmt: "tail_call is not allowed in subprogs without BTF\n");
15615	return -EINVAL;
15616	}
15617	}
15618	return `0`;
15619	}
15620
15621	/ The minimum supported BTF func info size /
15622	#define MIN_BPF_FUNCINFO_SIZE 8
15623	#define MAX_FUNCINFO_REC_SIZE 252
15624
15625	static int check_btf_func_early(struct bpf_verifier_env *env,
15626	const union bpf_attr *attr,
15627	bpfptr_t uattr)
15628	{
15629	u32 krec_size = sizeof(struct bpf_func_info);
15630	const struct btf_type type, func_proto;
15631	u32 i, nfuncs, urec_size, min_size;
15632	struct bpf_func_info *krecord;
15633	struct bpf_prog *prog;
15634	const struct btf *btf;
15635	u32 prev_offset = `0`;
15636	bpfptr_t urecord;
15637	int ret = -ENOMEM;
15638
15639	nfuncs = attr->func_info_cnt;
15640	if (!nfuncs) {
15641	if (check_abnormal_return(env))
15642	return -EINVAL;
15643	return `0`;
15644	}
15645
15646	urec_size = attr->func_info_rec_size;
15647	if (urec_size < MIN_BPF_FUNCINFO_SIZE \|\|
15648	urec_size > MAX_FUNCINFO_REC_SIZE \|\|
15649	urec_size % sizeof(u32)) {
15650	verbose(private_data: env, fmt: "invalid func info rec size %u\n", urec_size);
15651	return -EINVAL;
15652	}
15653
15654	prog = env->prog;
15655	btf = prog->aux->btf;
15656
15657	urecord = make_bpfptr(addr: attr->func_info, is_kernel: uattr.is_kernel);
15658	min_size = min_t(u32, krec_size, urec_size);
15659
15660	krecord = kvcalloc(n: nfuncs, size: krec_size, GFP_KERNEL \| __GFP_NOWARN);
15661	if (!krecord)
15662	return -ENOMEM;
15663
15664	for (i = `0`; i < nfuncs; i++) {
15665	ret = bpf_check_uarg_tail_zero(uaddr: urecord, expected_size: krec_size, actual_size: urec_size);
15666	if (ret) {
15667	if (ret == -E2BIG) {
15668	verbose(private_data: env, fmt: "nonzero tailing record in func info");
15669	/ set the size kernel expects so loader can zero*
15670	* out the rest of the record.
15671	*/
15672	if (copy_to_bpfptr_offset(dst: uattr,
15673	offsetof(union bpf_attr, func_info_rec_size),
15674	src: &min_size, size: sizeof(min_size)))
15675	ret = -EFAULT;
15676	}
15677	goto err_free;
15678	}
15679
15680	if (copy_from_bpfptr(dst: &krecord[i], src: urecord, size: min_size)) {
15681	ret = -EFAULT;
15682	goto err_free;
15683	}
15684
15685	/ check insn_off /
15686	ret = -EINVAL;
15687	if (i == `0`) {
15688	if (krecord[i].insn_off) {
15689	verbose(private_data: env,
15690	fmt: "nonzero insn_off %u for the first func info record",
15691	krecord[i].insn_off);
15692	goto err_free;
15693	}
15694	} else if (krecord[i].insn_off <= prev_offset) {
15695	verbose(private_data: env,
15696	fmt: "same or smaller insn offset (%u) than previous func info record (%u)",
15697	krecord[i].insn_off, prev_offset);
15698	goto err_free;
15699	}
15700
15701	/ check type_id /
15702	type = btf_type_by_id(btf, type_id: krecord[i].type_id);
15703	if (!type \|\| !btf_type_is_func(t: type)) {
15704	verbose(private_data: env, fmt: "invalid type id %d in func info",
15705	krecord[i].type_id);
15706	goto err_free;
15707	}
15708
15709	func_proto = btf_type_by_id(btf, type_id: type->type);
15710	if (unlikely(!func_proto \|\| !btf_type_is_func_proto(func_proto)))
15711	/ btf_func_check() already verified it during BTF load /
15712	goto err_free;
15713
15714	prev_offset = krecord[i].insn_off;
15715	bpfptr_add(bpfptr: &urecord, val: urec_size);
15716	}
15717
15718	prog->aux->func_info = krecord;
15719	prog->aux->func_info_cnt = nfuncs;
15720	return `0`;
15721
15722	err_free:
15723	kvfree(addr: krecord);
15724	return ret;
15725	}
15726
15727	static int check_btf_func(struct bpf_verifier_env *env,
15728	const union bpf_attr *attr,
15729	bpfptr_t uattr)
15730	{
15731	const struct btf_type type, func_proto, *ret_type;
15732	u32 i, nfuncs, urec_size;
15733	struct bpf_func_info *krecord;
15734	struct bpf_func_info_aux *info_aux = NULL;
15735	struct bpf_prog *prog;
15736	const struct btf *btf;
15737	bpfptr_t urecord;
15738	bool scalar_return;
15739	int ret = -ENOMEM;
15740
15741	nfuncs = attr->func_info_cnt;
15742	if (!nfuncs) {
15743	if (check_abnormal_return(env))
15744	return -EINVAL;
15745	return `0`;
15746	}
15747	if (nfuncs != env->subprog_cnt) {
15748	verbose(private_data: env, fmt: "number of funcs in func_info doesn't match number of subprogs\n");
15749	return -EINVAL;
15750	}
15751
15752	urec_size = attr->func_info_rec_size;
15753
15754	prog = env->prog;
15755	btf = prog->aux->btf;
15756
15757	urecord = make_bpfptr(addr: attr->func_info, is_kernel: uattr.is_kernel);
15758
15759	krecord = prog->aux->func_info;
15760	info_aux = kcalloc(n: nfuncs, size: sizeof(*info_aux), GFP_KERNEL \| __GFP_NOWARN);
15761	if (!info_aux)
15762	return -ENOMEM;
15763
15764	for (i = `0`; i < nfuncs; i++) {
15765	/ check insn_off /
15766	ret = -EINVAL;
15767
15768	if (env->subprog_info[i].start != krecord[i].insn_off) {
15769	verbose(private_data: env, fmt: "func_info BTF section doesn't match subprog layout in BPF program\n");
15770	goto err_free;
15771	}
15772
15773	/ Already checked type_id /
15774	type = btf_type_by_id(btf, type_id: krecord[i].type_id);
15775	info_aux[i].linkage = BTF_INFO_VLEN(type->info);
15776	/ Already checked func_proto /
15777	func_proto = btf_type_by_id(btf, type_id: type->type);
15778
15779	ret_type = btf_type_skip_modifiers(btf, id: func_proto->type, NULL);
15780	scalar_return =
15781	btf_type_is_small_int(t: ret_type) \|\| btf_is_any_enum(t: ret_type);
15782	if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
15783	verbose(private_data: env, fmt: "LD_ABS is only allowed in functions that return 'int'.\n");
15784	goto err_free;
15785	}
15786	if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
15787	verbose(private_data: env, fmt: "tail_call is only allowed in functions that return 'int'.\n");
15788	goto err_free;
15789	}
15790
15791	bpfptr_add(bpfptr: &urecord, val: urec_size);
15792	}
15793
15794	prog->aux->func_info_aux = info_aux;
15795	return `0`;
15796
15797	err_free:
15798	kfree(objp: info_aux);
15799	return ret;
15800	}
15801
15802	static void adjust_btf_func(struct bpf_verifier_env *env)
15803	{
15804	struct bpf_prog_aux *aux = env->prog->aux;
15805	int i;
15806
15807	if (!aux->func_info)
15808	return;
15809
15810	/ func_info is not available for hidden subprogs /
15811	for (i = `0`; i < env->subprog_cnt - env->hidden_subprog_cnt; i++)
15812	aux->func_info[i].insn_off = env->subprog_info[i].start;
15813	}
15814
15815	#define MIN_BPF_LINEINFO_SIZE offsetofend(struct bpf_line_info, line_col)
15816	#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
15817
15818	static int check_btf_line(struct bpf_verifier_env *env,
15819	const union bpf_attr *attr,
15820	bpfptr_t uattr)
15821	{
15822	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = `0`;
15823	struct bpf_subprog_info *sub;
15824	struct bpf_line_info *linfo;
15825	struct bpf_prog *prog;
15826	const struct btf *btf;
15827	bpfptr_t ulinfo;
15828	int err;
15829
15830	nr_linfo = attr->line_info_cnt;
15831	if (!nr_linfo)
15832	return `0`;
15833	if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
15834	return -EINVAL;
15835
15836	rec_size = attr->line_info_rec_size;
15837	if (rec_size < MIN_BPF_LINEINFO_SIZE \|\|
15838	rec_size > MAX_LINEINFO_REC_SIZE \|\|
15839	rec_size & (sizeof(u32) - `1`))
15840	return -EINVAL;
15841
15842	/ Need to zero it in case the userspace may*
15843	* pass in a smaller bpf_line_info object.
15844	*/
15845	linfo = kvcalloc(n: nr_linfo, size: sizeof(struct bpf_line_info),
15846	GFP_KERNEL \| __GFP_NOWARN);
15847	if (!linfo)
15848	return -ENOMEM;
15849
15850	prog = env->prog;
15851	btf = prog->aux->btf;
15852
15853	s = `0`;
15854	sub = env->subprog_info;
15855	ulinfo = make_bpfptr(addr: attr->line_info, is_kernel: uattr.is_kernel);
15856	expected_size = sizeof(struct bpf_line_info);
15857	ncopy = min_t(u32, expected_size, rec_size);
15858	for (i = `0`; i < nr_linfo; i++) {
15859	err = bpf_check_uarg_tail_zero(uaddr: ulinfo, expected_size, actual_size: rec_size);
15860	if (err) {
15861	if (err == -E2BIG) {
15862	verbose(private_data: env, fmt: "nonzero tailing record in line_info");
15863	if (copy_to_bpfptr_offset(dst: uattr,
15864	offsetof(union bpf_attr, line_info_rec_size),
15865	src: &expected_size, size: sizeof(expected_size)))
15866	err = -EFAULT;
15867	}
15868	goto err_free;
15869	}
15870
15871	if (copy_from_bpfptr(dst: &linfo[i], src: ulinfo, size: ncopy)) {
15872	err = -EFAULT;
15873	goto err_free;
15874	}
15875
15876	/*
15877	* Check insn_off to ensure
15878	* 1) strictly increasing AND
15879	* 2) bounded by prog->len
15880	*
15881	* The linfo[0].insn_off == 0 check logically falls into
15882	* the later "missing bpf_line_info for func..." case
15883	* because the first linfo[0].insn_off must be the
15884	* first sub also and the first sub must have
15885	* subprog_info[0].start == 0.
15886	*/
15887	if ((i && linfo[i].insn_off <= prev_offset) \|\|
15888	linfo[i].insn_off >= prog->len) {
15889	verbose(private_data: env, fmt: "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
15890	i, linfo[i].insn_off, prev_offset,
15891	prog->len);
15892	err = -EINVAL;
15893	goto err_free;
15894	}
15895
15896	if (!prog->insnsi[linfo[i].insn_off].code) {
15897	verbose(private_data: env,
15898	fmt: "Invalid insn code at line_info[%u].insn_off\n",
15899	i);
15900	err = -EINVAL;
15901	goto err_free;
15902	}
15903
15904	if (!btf_name_by_offset(btf, offset: linfo[i].line_off) \|\|
15905	!btf_name_by_offset(btf, offset: linfo[i].file_name_off)) {
15906	verbose(private_data: env, fmt: "Invalid line_info[%u].line_off or .file_name_off\n", i);
15907	err = -EINVAL;
15908	goto err_free;
15909	}
15910
15911	if (s != env->subprog_cnt) {
15912	if (linfo[i].insn_off == sub[s].start) {
15913	sub[s].linfo_idx = i;
15914	s++;
15915	} else if (sub[s].start < linfo[i].insn_off) {
15916	verbose(private_data: env, fmt: "missing bpf_line_info for func#%u\n", s);
15917	err = -EINVAL;
15918	goto err_free;
15919	}
15920	}
15921
15922	prev_offset = linfo[i].insn_off;
15923	bpfptr_add(bpfptr: &ulinfo, val: rec_size);
15924	}
15925
15926	if (s != env->subprog_cnt) {
15927	verbose(private_data: env, fmt: "missing bpf_line_info for %u funcs starting from func#%u\n",
15928	env->subprog_cnt - s, s);
15929	err = -EINVAL;
15930	goto err_free;
15931	}
15932
15933	prog->aux->linfo = linfo;
15934	prog->aux->nr_linfo = nr_linfo;
15935
15936	return `0`;
15937
15938	err_free:
15939	kvfree(addr: linfo);
15940	return err;
15941	}
15942
15943	#define MIN_CORE_RELO_SIZE sizeof(struct bpf_core_relo)
15944	#define MAX_CORE_RELO_SIZE MAX_FUNCINFO_REC_SIZE
15945
15946	static int check_core_relo(struct bpf_verifier_env *env,
15947	const union bpf_attr *attr,
15948	bpfptr_t uattr)
15949	{
15950	u32 i, nr_core_relo, ncopy, expected_size, rec_size;
15951	struct bpf_core_relo core_relo = {};
15952	struct bpf_prog *prog = env->prog;
15953	const struct btf *btf = prog->aux->btf;
15954	struct bpf_core_ctx ctx = {
15955	.log = &env->log,
15956	.btf = btf,
15957	};
15958	bpfptr_t u_core_relo;
15959	int err;
15960
15961	nr_core_relo = attr->core_relo_cnt;
15962	if (!nr_core_relo)
15963	return `0`;
15964	if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
15965	return -EINVAL;
15966
15967	rec_size = attr->core_relo_rec_size;
15968	if (rec_size < MIN_CORE_RELO_SIZE \|\|
15969	rec_size > MAX_CORE_RELO_SIZE \|\|
15970	rec_size % sizeof(u32))
15971	return -EINVAL;
15972
15973	u_core_relo = make_bpfptr(addr: attr->core_relos, is_kernel: uattr.is_kernel);
15974	expected_size = sizeof(struct bpf_core_relo);
15975	ncopy = min_t(u32, expected_size, rec_size);
15976
15977	/ Unlike func_info and line_info, copy and apply each CO-RE*
15978	* relocation record one at a time.
15979	*/
15980	for (i = `0`; i < nr_core_relo; i++) {
15981	/ future proofing when sizeof(bpf_core_relo) changes /
15982	err = bpf_check_uarg_tail_zero(uaddr: u_core_relo, expected_size, actual_size: rec_size);
15983	if (err) {
15984	if (err == -E2BIG) {
15985	verbose(private_data: env, fmt: "nonzero tailing record in core_relo");
15986	if (copy_to_bpfptr_offset(dst: uattr,
15987	offsetof(union bpf_attr, core_relo_rec_size),
15988	src: &expected_size, size: sizeof(expected_size)))
15989	err = -EFAULT;
15990	}
15991	break;
15992	}
15993
15994	if (copy_from_bpfptr(dst: &core_relo, src: u_core_relo, size: ncopy)) {
15995	err = -EFAULT;
15996	break;
15997	}
15998
15999	if (core_relo.insn_off % `8` \|\| core_relo.insn_off / `8` >= prog->len) {
16000	verbose(private_data: env, fmt: "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
16001	i, core_relo.insn_off, prog->len);
16002	err = -EINVAL;
16003	break;
16004	}
16005
16006	err = bpf_core_apply(ctx: &ctx, relo: &core_relo, relo_idx: i,
16007	insn: &prog->insnsi[core_relo.insn_off / `8`]);
16008	if (err)
16009	break;
16010	bpfptr_add(bpfptr: &u_core_relo, val: rec_size);
16011	}
16012	return err;
16013	}
16014
16015	static int check_btf_info_early(struct bpf_verifier_env *env,
16016	const union bpf_attr *attr,
16017	bpfptr_t uattr)
16018	{
16019	struct btf *btf;
16020	int err;
16021
16022	if (!attr->func_info_cnt && !attr->line_info_cnt) {
16023	if (check_abnormal_return(env))
16024	return -EINVAL;
16025	return `0`;
16026	}
16027
16028	btf = btf_get_by_fd(fd: attr->prog_btf_fd);
16029	if (IS_ERR(ptr: btf))
16030	return PTR_ERR(ptr: btf);
16031	if (btf_is_kernel(btf)) {
16032	btf_put(btf);
16033	return -EACCES;
16034	}
16035	env->prog->aux->btf = btf;
16036
16037	err = check_btf_func_early(env, attr, uattr);
16038	if (err)
16039	return err;
16040	return `0`;
16041	}
16042
16043	static int check_btf_info(struct bpf_verifier_env *env,
16044	const union bpf_attr *attr,
16045	bpfptr_t uattr)
16046	{
16047	int err;
16048
16049	if (!attr->func_info_cnt && !attr->line_info_cnt) {
16050	if (check_abnormal_return(env))
16051	return -EINVAL;
16052	return `0`;
16053	}
16054
16055	err = check_btf_func(env, attr, uattr);
16056	if (err)
16057	return err;
16058
16059	err = check_btf_line(env, attr, uattr);
16060	if (err)
16061	return err;
16062
16063	err = check_core_relo(env, attr, uattr);
16064	if (err)
16065	return err;
16066
16067	return `0`;
16068	}
16069
16070	/ check %cur's range satisfies %old's /
16071	static bool range_within(struct bpf_reg_state *old,
16072	struct bpf_reg_state *cur)
16073	{
16074	return old->umin_value <= cur->umin_value &&
16075	old->umax_value >= cur->umax_value &&
16076	old->smin_value <= cur->smin_value &&
16077	old->smax_value >= cur->smax_value &&
16078	old->u32_min_value <= cur->u32_min_value &&
16079	old->u32_max_value >= cur->u32_max_value &&
16080	old->s32_min_value <= cur->s32_min_value &&
16081	old->s32_max_value >= cur->s32_max_value;
16082	}
16083
16084	/ If in the old state two registers had the same id, then they need to have*
16085	* the same id in the new state as well. But that id could be different from
16086	* the old state, so we need to track the mapping from old to new ids.
16087	* Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
16088	* regs with old id 5 must also have new id 9 for the new state to be safe. But
16089	* regs with a different old id could still have new id 9, we don't care about
16090	* that.
16091	* So we look through our idmap to see if this old id has been seen before. If
16092	* so, we require the new id to match; otherwise, we add the id pair to the map.
16093	*/
16094	static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
16095	{
16096	struct bpf_id_pair *map = idmap->map;
16097	unsigned int i;
16098
16099	/ either both IDs should be set or both should be zero /
16100	if (!!old_id != !!cur_id)
16101	return false;
16102
16103	if (old_id == `0`) / cur_id == 0 as well /
16104	return true;
16105
16106	for (i = `0`; i < BPF_ID_MAP_SIZE; i++) {
16107	if (!map[i].old) {
16108	/ Reached an empty slot; haven't seen this id before /
16109	map[i].old = old_id;
16110	map[i].cur = cur_id;
16111	return true;
16112	}
16113	if (map[i].old == old_id)
16114	return map[i].cur == cur_id;
16115	if (map[i].cur == cur_id)
16116	return false;
16117	}
16118	/ We ran out of idmap slots, which should be impossible /
16119	WARN_ON_ONCE(`1`);
16120	return false;
16121	}
16122
16123	/ Similar to check_ids(), but allocate a unique temporary ID*
16124	* for 'old_id' or 'cur_id' of zero.
16125	* This makes pairs like '0 vs unique ID', 'unique ID vs 0' valid.
16126	*/
16127	static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
16128	{
16129	old_id = old_id ? old_id : ++idmap->tmp_id_gen;
16130	cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
16131
16132	return check_ids(old_id, cur_id, idmap);
16133	}
16134
16135	static void clean_func_state(struct bpf_verifier_env *env,
16136	struct bpf_func_state *st)
16137	{
16138	enum bpf_reg_liveness live;
16139	int i, j;
16140
16141	for (i = `0`; i < BPF_REG_FP; i++) {
16142	live = st->regs[i].live;
16143	/ liveness must not touch this register anymore /
16144	st->regs[i].live \|= REG_LIVE_DONE;
16145	if (!(live & REG_LIVE_READ))
16146	/ since the register is unused, clear its state*
16147	* to make further comparison simpler
16148	*/
16149	__mark_reg_not_init(env, reg: &st->regs[i]);
16150	}
16151
16152	for (i = `0`; i < st->allocated_stack / BPF_REG_SIZE; i++) {
16153	live = st->stack[i].spilled_ptr.live;
16154	/ liveness must not touch this stack slot anymore /
16155	st->stack[i].spilled_ptr.live \|= REG_LIVE_DONE;
16156	if (!(live & REG_LIVE_READ)) {
16157	__mark_reg_not_init(env, reg: &st->stack[i].spilled_ptr);
16158	for (j = `0`; j < BPF_REG_SIZE; j++)
16159	st->stack[i].slot_type[j] = STACK_INVALID;
16160	}
16161	}
16162	}
16163
16164	static void clean_verifier_state(struct bpf_verifier_env *env,
16165	struct bpf_verifier_state *st)
16166	{
16167	int i;
16168
16169	if (st->frame[`0`]->regs[`0`].live & REG_LIVE_DONE)
16170	/ all regs in this state in all frames were already marked /
16171	return;
16172
16173	for (i = `0`; i <= st->curframe; i++)
16174	clean_func_state(env, st: st->frame[i]);
16175	}
16176
16177	/ the parentage chains form a tree.*
16178	* the verifier states are added to state lists at given insn and
16179	* pushed into state stack for future exploration.
16180	* when the verifier reaches bpf_exit insn some of the verifer states
16181	* stored in the state lists have their final liveness state already,
16182	* but a lot of states will get revised from liveness point of view when
16183	* the verifier explores other branches.
16184	* Example:
16185	* 1: r0 = 1
16186	* 2: if r1 == 100 goto pc+1
16187	* 3: r0 = 2
16188	* 4: exit
16189	* when the verifier reaches exit insn the register r0 in the state list of
16190	* insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
16191	* of insn 2 and goes exploring further. At the insn 4 it will walk the
16192	* parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
16193	*
16194	* Since the verifier pushes the branch states as it sees them while exploring
16195	* the program the condition of walking the branch instruction for the second
16196	* time means that all states below this branch were already explored and
16197	* their final liveness marks are already propagated.
16198	* Hence when the verifier completes the search of state list in is_state_visited()
16199	* we can call this clean_live_states() function to mark all liveness states
16200	* as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
16201	* will not be used.
16202	* This function also clears the registers and stack for states that !READ
16203	* to simplify state merging.
16204	*
16205	* Important note here that walking the same branch instruction in the callee
16206	* doesn't meant that the states are DONE. The verifier has to compare
16207	* the callsites
16208	*/
16209	static void clean_live_states(struct bpf_verifier_env env, int* insn,
16210	struct bpf_verifier_state *cur)
16211	{
16212	struct bpf_verifier_state_list *sl;
16213
16214	sl = *explored_state(env, idx: insn);
16215	while (sl) {
16216	if (sl->state.branches)
16217	goto next;
16218	if (sl->state.insn_idx != insn \|\|
16219	!same_callsites(a: &sl->state, b: cur))
16220	goto next;
16221	clean_verifier_state(env, st: &sl->state);
16222	next:
16223	sl = sl->next;
16224	}
16225	}
16226
16227	static bool regs_exact(const struct bpf_reg_state *rold,
16228	const struct bpf_reg_state *rcur,
16229	struct bpf_idmap *idmap)
16230	{
16231	return memcmp(p: rold, q: rcur, offsetof(struct bpf_reg_state, id)) == `0` &&
16232	check_ids(old_id: rold->id, cur_id: rcur->id, idmap) &&
16233	check_ids(old_id: rold->ref_obj_id, cur_id: rcur->ref_obj_id, idmap);
16234	}
16235
16236	/ Returns true if (rold safe implies rcur safe) /
16237	static bool regsafe(struct bpf_verifier_env env, struct* bpf_reg_state *rold,
16238	struct bpf_reg_state rcur, struct* bpf_idmap *idmap, bool exact)
16239	{
16240	if (exact)
16241	return regs_exact(rold, rcur, idmap);
16242
16243	if (!(rold->live & REG_LIVE_READ))
16244	/ explored state didn't use this /
16245	return true;
16246	if (rold->type == NOT_INIT)
16247	/ explored state can't have used this /
16248	return true;
16249	if (rcur->type == NOT_INIT)
16250	return false;
16251
16252	/ Enforce that register types have to match exactly, including their*
16253	* modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
16254	* rule.
16255	*
16256	* One can make a point that using a pointer register as unbounded
16257	* SCALAR would be technically acceptable, but this could lead to
16258	* pointer leaks because scalars are allowed to leak while pointers
16259	* are not. We could make this safe in special cases if root is
16260	* calling us, but it's probably not worth the hassle.
16261	*
16262	* Also, register types that are not MAYBE_NULL could technically be
16263	* safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
16264	* is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
16265	* to the same map).
16266	* However, if the old MAYBE_NULL register then got NULL checked,
16267	* doing so could have affected others with the same id, and we can't
16268	* check for that because we lost the id when we converted to
16269	* a non-MAYBE_NULL variant.
16270	* So, as a general rule we don't allow mixing MAYBE_NULL and
16271	* non-MAYBE_NULL registers as well.
16272	*/
16273	if (rold->type != rcur->type)
16274	return false;
16275
16276	switch (base_type(type: rold->type)) {
16277	case SCALAR_VALUE:
16278	if (env->explore_alu_limits) {
16279	/ explore_alu_limits disables tnum_in() and range_within()*
16280	* logic and requires everything to be strict
16281	*/
16282	return memcmp(p: rold, q: rcur, offsetof(struct bpf_reg_state, id)) == `0` &&
16283	check_scalar_ids(old_id: rold->id, cur_id: rcur->id, idmap);
16284	}
16285	if (!rold->precise)
16286	return true;
16287	/ Why check_ids() for scalar registers?*
16288	*
16289	* Consider the following BPF code:
16290	* 1: r6 = ... unbound scalar, ID=a ...
16291	* 2: r7 = ... unbound scalar, ID=b ...
16292	* 3: if (r6 > r7) goto +1
16293	* 4: r6 = r7
16294	* 5: if (r6 > X) goto ...
16295	* 6: ... memory operation using r7 ...
16296	*
16297	* First verification path is [1-6]:
16298	* - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
16299	* - at (5) r6 would be marked <= X, find_equal_scalars() would also mark
16300	* r7 <= X, because r6 and r7 share same id.
16301	* Next verification path is [1-4, 6].
16302	*
16303	* Instruction (6) would be reached in two states:
16304	* I. r6{.id=b}, r7{.id=b} via path 1-6;
16305	* II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
16306	*
16307	* Use check_ids() to distinguish these states.
16308	* ---
16309	* Also verify that new value satisfies old value range knowledge.
16310	*/
16311	return range_within(old: rold, cur: rcur) &&
16312	tnum_in(a: rold->var_off, b: rcur->var_off) &&
16313	check_scalar_ids(old_id: rold->id, cur_id: rcur->id, idmap);
16314	case PTR_TO_MAP_KEY:
16315	case PTR_TO_MAP_VALUE:
16316	case PTR_TO_MEM:
16317	case PTR_TO_BUF:
16318	case PTR_TO_TP_BUFFER:
16319	/ If the new min/max/var_off satisfy the old ones and*
16320	* everything else matches, we are OK.
16321	*/
16322	return memcmp(p: rold, q: rcur, offsetof(struct bpf_reg_state, var_off)) == `0` &&
16323	range_within(old: rold, cur: rcur) &&
16324	tnum_in(a: rold->var_off, b: rcur->var_off) &&
16325	check_ids(old_id: rold->id, cur_id: rcur->id, idmap) &&
16326	check_ids(old_id: rold->ref_obj_id, cur_id: rcur->ref_obj_id, idmap);
16327	case PTR_TO_PACKET_META:
16328	case PTR_TO_PACKET:
16329	/ We must have at least as much range as the old ptr*
16330	* did, so that any accesses which were safe before are
16331	* still safe. This is true even if old range < old off,
16332	* since someone could have accessed through (ptr - k), or
16333	* even done ptr -= k in a register, to get a safe access.
16334	*/
16335	if (rold->range > rcur->range)
16336	return false;
16337	/ If the offsets don't match, we can't trust our alignment;*
16338	* nor can we be sure that we won't fall out of range.
16339	*/
16340	if (rold->off != rcur->off)
16341	return false;
16342	/ id relations must be preserved /
16343	if (!check_ids(old_id: rold->id, cur_id: rcur->id, idmap))
16344	return false;
16345	/ new val must satisfy old val knowledge /
16346	return range_within(old: rold, cur: rcur) &&
16347	tnum_in(a: rold->var_off, b: rcur->var_off);
16348	case PTR_TO_STACK:
16349	/ two stack pointers are equal only if they're pointing to*
16350	* the same stack frame, since fp-8 in foo != fp-8 in bar
16351	*/
16352	return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
16353	default:
16354	return regs_exact(rold, rcur, idmap);
16355	}
16356	}
16357
16358	static bool stacksafe(struct bpf_verifier_env env, struct* bpf_func_state *old,
16359	struct bpf_func_state cur, struct* bpf_idmap *idmap, bool exact)
16360	{
16361	int i, spi;
16362
16363	/ walk slots of the explored stack and ignore any additional*
16364	* slots in the current stack, since explored(safe) state
16365	* didn't use them
16366	*/
16367	for (i = `0`; i < old->allocated_stack; i++) {
16368	struct bpf_reg_state old_reg, cur_reg;
16369
16370	spi = i / BPF_REG_SIZE;
16371
16372	if (exact &&
16373	old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
16374	cur->stack[spi].slot_type[i % BPF_REG_SIZE])
16375	return false;
16376
16377	if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) && !exact) {
16378	i += BPF_REG_SIZE - `1`;
16379	/ explored state didn't use this /
16380	continue;
16381	}
16382
16383	if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
16384	continue;
16385
16386	if (env->allow_uninit_stack &&
16387	old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
16388	continue;
16389
16390	/ explored stack has more populated slots than current stack*
16391	* and these slots were used
16392	*/
16393	if (i >= cur->allocated_stack)
16394	return false;
16395
16396	/ if old state was safe with misc data in the stack*
16397	* it will be safe with zero-initialized stack.
16398	* The opposite is not true
16399	*/
16400	if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
16401	cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
16402	continue;
16403	if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
16404	cur->stack[spi].slot_type[i % BPF_REG_SIZE])
16405	/ Ex: old explored (safe) state has STACK_SPILL in*
16406	* this stack slot, but current has STACK_MISC ->
16407	* this verifier states are not equivalent,
16408	* return false to continue verification of this path
16409	*/
16410	return false;
16411	if (i % BPF_REG_SIZE != BPF_REG_SIZE - `1`)
16412	continue;
16413	/ Both old and cur are having same slot_type /
16414	switch (old->stack[spi].slot_type[BPF_REG_SIZE - `1`]) {
16415	case STACK_SPILL:
16416	/ when explored and current stack slot are both storing*
16417	* spilled registers, check that stored pointers types
16418	* are the same as well.
16419	* Ex: explored safe path could have stored
16420	* (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
16421	* but current path has stored:
16422	* (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
16423	* such verifier states are not equivalent.
16424	* return false to continue verification of this path
16425	*/
16426	if (!regsafe(env, rold: &old->stack[spi].spilled_ptr,
16427	rcur: &cur->stack[spi].spilled_ptr, idmap, exact))
16428	return false;
16429	break;
16430	case STACK_DYNPTR:
16431	old_reg = &old->stack[spi].spilled_ptr;
16432	cur_reg = &cur->stack[spi].spilled_ptr;
16433	if (old_reg->dynptr.type != cur_reg->dynptr.type \|\|
16434	old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot \|\|
16435	!check_ids(old_id: old_reg->ref_obj_id, cur_id: cur_reg->ref_obj_id, idmap))
16436	return false;
16437	break;
16438	case STACK_ITER:
16439	old_reg = &old->stack[spi].spilled_ptr;
16440	cur_reg = &cur->stack[spi].spilled_ptr;
16441	/ iter.depth is not compared between states as it*
16442	* doesn't matter for correctness and would otherwise
16443	* prevent convergence; we maintain it only to prevent
16444	* infinite loop check triggering, see
16445	* iter_active_depths_differ()
16446	*/
16447	if (old_reg->iter.btf != cur_reg->iter.btf \|\|
16448	old_reg->iter.btf_id != cur_reg->iter.btf_id \|\|
16449	old_reg->iter.state != cur_reg->iter.state \|\|
16450	/ ignore {old_reg,cur_reg}->iter.depth, see above /
16451	!check_ids(old_id: old_reg->ref_obj_id, cur_id: cur_reg->ref_obj_id, idmap))
16452	return false;
16453	break;
16454	case STACK_MISC:
16455	case STACK_ZERO:
16456	case STACK_INVALID:
16457	continue;
16458	/ Ensure that new unhandled slot types return false by default /
16459	default:
16460	return false;
16461	}
16462	}
16463	return true;
16464	}
16465
16466	static bool refsafe(struct bpf_func_state old, struct* bpf_func_state *cur,
16467	struct bpf_idmap *idmap)
16468	{
16469	int i;
16470
16471	if (old->acquired_refs != cur->acquired_refs)
16472	return false;
16473
16474	for (i = `0`; i < old->acquired_refs; i++) {
16475	if (!check_ids(old_id: old->refs[i].id, cur_id: cur->refs[i].id, idmap))
16476	return false;
16477	}
16478
16479	return true;
16480	}
16481
16482	/ compare two verifier states*
16483	*
16484	* all states stored in state_list are known to be valid, since
16485	* verifier reached 'bpf_exit' instruction through them
16486	*
16487	* this function is called when verifier exploring different branches of
16488	* execution popped from the state stack. If it sees an old state that has
16489	* more strict register state and more strict stack state then this execution
16490	* branch doesn't need to be explored further, since verifier already
16491	* concluded that more strict state leads to valid finish.
16492	*
16493	* Therefore two states are equivalent if register state is more conservative
16494	* and explored stack state is more conservative than the current one.
16495	* Example:
16496	* explored current
16497	* (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
16498	* (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
16499	*
16500	* In other words if current stack state (one being explored) has more
16501	* valid slots than old one that already passed validation, it means
16502	* the verifier can stop exploring and conclude that current state is valid too
16503	*
16504	* Similarly with registers. If explored state has register type as invalid
16505	* whereas register type in current state is meaningful, it means that
16506	* the current state will reach 'bpf_exit' instruction safely
16507	*/
16508	static bool func_states_equal(struct bpf_verifier_env env, struct* bpf_func_state *old,
16509	struct bpf_func_state *cur, bool exact)
16510	{
16511	int i;
16512
16513	for (i = `0`; i < MAX_BPF_REG; i++)
16514	if (!regsafe(env, rold: &old->regs[i], rcur: &cur->regs[i],
16515	idmap: &env->idmap_scratch, exact))
16516	return false;
16517
16518	if (!stacksafe(env, old, cur, idmap: &env->idmap_scratch, exact))
16519	return false;
16520
16521	if (!refsafe(old, cur, idmap: &env->idmap_scratch))
16522	return false;
16523
16524	return true;
16525	}
16526
16527	static void reset_idmap_scratch(struct bpf_verifier_env *env)
16528	{
16529	env->idmap_scratch.tmp_id_gen = env->id_gen;
16530	memset(&env->idmap_scratch.map, `0`, sizeof(env->idmap_scratch.map));
16531	}
16532
16533	static bool states_equal(struct bpf_verifier_env *env,
16534	struct bpf_verifier_state *old,
16535	struct bpf_verifier_state *cur,
16536	bool exact)
16537	{
16538	int i;
16539
16540	if (old->curframe != cur->curframe)
16541	return false;
16542
16543	reset_idmap_scratch(env);
16544
16545	/ Verification state from speculative execution simulation*
16546	* must never prune a non-speculative execution one.
16547	*/
16548	if (old->speculative && !cur->speculative)
16549	return false;
16550
16551	if (old->active_lock.ptr != cur->active_lock.ptr)
16552	return false;
16553
16554	/ Old and cur active_lock's have to be either both present*
16555	* or both absent.
16556	*/
16557	if (!!old->active_lock.id != !!cur->active_lock.id)
16558	return false;
16559
16560	if (old->active_lock.id &&
16561	!check_ids(old_id: old->active_lock.id, cur_id: cur->active_lock.id, idmap: &env->idmap_scratch))
16562	return false;
16563
16564	if (old->active_rcu_lock != cur->active_rcu_lock)
16565	return false;
16566
16567	/ for states to be equal callsites have to be the same*
16568	* and all frame states need to be equivalent
16569	*/
16570	for (i = `0`; i <= old->curframe; i++) {
16571	if (old->frame[i]->callsite != cur->frame[i]->callsite)
16572	return false;
16573	if (!func_states_equal(env, old: old->frame[i], cur: cur->frame[i], exact))
16574	return false;
16575	}
16576	return true;
16577	}
16578
16579	/ Return 0 if no propagation happened. Return negative error code if error*
16580	* happened. Otherwise, return the propagated bit.
16581	*/
16582	static int propagate_liveness_reg(struct bpf_verifier_env *env,
16583	struct bpf_reg_state *reg,
16584	struct bpf_reg_state *parent_reg)
16585	{
16586	u8 parent_flag = parent_reg->live & REG_LIVE_READ;
16587	u8 flag = reg->live & REG_LIVE_READ;
16588	int err;
16589
16590	/ When comes here, read flags of PARENT_REG or REG could be any of*
16591	* REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
16592	* of propagation if PARENT_REG has strongest REG_LIVE_READ64.
16593	*/
16594	if (parent_flag == REG_LIVE_READ64 \|\|
16595	/ Or if there is no read flag from REG. /
16596	!flag \|\|
16597	/ Or if the read flag from REG is the same as PARENT_REG. /
16598	parent_flag == flag)
16599	return `0`;
16600
16601	err = mark_reg_read(env, state: reg, parent: parent_reg, flag);
16602	if (err)
16603	return err;
16604
16605	return flag;
16606	}
16607
16608	/ A write screens off any subsequent reads; but write marks come from the*
16609	* straight-line code between a state and its parent. When we arrive at an
16610	* equivalent state (jump target or such) we didn't arrive by the straight-line
16611	* code, so read marks in the state must propagate to the parent regardless
16612	* of the state's write marks. That's what 'parent == state->parent' comparison
16613	* in mark_reg_read() is for.
16614	*/
16615	static int propagate_liveness(struct bpf_verifier_env *env,
16616	const struct bpf_verifier_state *vstate,
16617	struct bpf_verifier_state *vparent)
16618	{
16619	struct bpf_reg_state state_reg, parent_reg;
16620	struct bpf_func_state state, parent;
16621	int i, frame, err = `0`;
16622
16623	if (vparent->curframe != vstate->curframe) {
16624	WARN(`1`, "propagate_live: parent frame %d current frame %d\n",
16625	vparent->curframe, vstate->curframe);
16626	return -EFAULT;
16627	}
16628	/ Propagate read liveness of registers... /
16629	BUILD_BUG_ON(BPF_REG_FP + `1` != MAX_BPF_REG);
16630	for (frame = `0`; frame <= vstate->curframe; frame++) {
16631	parent = vparent->frame[frame];
16632	state = vstate->frame[frame];
16633	parent_reg = parent->regs;
16634	state_reg = state->regs;
16635	/ We don't need to worry about FP liveness, it's read-only /
16636	for (i = frame < vstate->curframe ? BPF_REG_6 : `0`; i < BPF_REG_FP; i++) {
16637	err = propagate_liveness_reg(env, reg: &state_reg[i],
16638	parent_reg: &parent_reg[i]);
16639	if (err < `0`)
16640	return err;
16641	if (err == REG_LIVE_READ64)
16642	mark_insn_zext(env, reg: &parent_reg[i]);
16643	}
16644
16645	/ Propagate stack slots. /
16646	for (i = `0`; i < state->allocated_stack / BPF_REG_SIZE &&
16647	i < parent->allocated_stack / BPF_REG_SIZE; i++) {
16648	parent_reg = &parent->stack[i].spilled_ptr;
16649	state_reg = &state->stack[i].spilled_ptr;
16650	err = propagate_liveness_reg(env, reg: state_reg,
16651	parent_reg);
16652	if (err < `0`)
16653	return err;
16654	}
16655	}
16656	return `0`;
16657	}
16658
16659	/ find precise scalars in the previous equivalent state and*
16660	* propagate them into the current state
16661	*/
16662	static int propagate_precision(struct bpf_verifier_env *env,
16663	const struct bpf_verifier_state *old)
16664	{
16665	struct bpf_reg_state *state_reg;
16666	struct bpf_func_state *state;
16667	int i, err = `0`, fr;
16668	bool first;
16669
16670	for (fr = old->curframe; fr >= `0`; fr--) {
16671	state = old->frame[fr];
16672	state_reg = state->regs;
16673	first = true;
16674	for (i = `0`; i < BPF_REG_FP; i++, state_reg++) {
16675	if (state_reg->type != SCALAR_VALUE \|\|
16676	!state_reg->precise \|\|
16677	!(state_reg->live & REG_LIVE_READ))
16678	continue;
16679	if (env->log.level & BPF_LOG_LEVEL2) {
16680	if (first)
16681	verbose(private_data: env, fmt: "frame %d: propagating r%d", fr, i);
16682	else
16683	verbose(private_data: env, fmt: ",r%d", i);
16684	}
16685	bt_set_frame_reg(bt: &env->bt, frame: fr, reg: i);
16686	first = false;
16687	}
16688
16689	for (i = `0`; i < state->allocated_stack / BPF_REG_SIZE; i++) {
16690	if (!is_spilled_reg(stack: &state->stack[i]))
16691	continue;
16692	state_reg = &state->stack[i].spilled_ptr;
16693	if (state_reg->type != SCALAR_VALUE \|\|
16694	!state_reg->precise \|\|
16695	!(state_reg->live & REG_LIVE_READ))
16696	continue;
16697	if (env->log.level & BPF_LOG_LEVEL2) {
16698	if (first)
16699	verbose(private_data: env, fmt: "frame %d: propagating fp%d",
16700	fr, (-i - `1`) * BPF_REG_SIZE);
16701	else
16702	verbose(private_data: env, fmt: ",fp%d", (-i - `1`) * BPF_REG_SIZE);
16703	}
16704	bt_set_frame_slot(bt: &env->bt, frame: fr, slot: i);
16705	first = false;
16706	}
16707	if (!first)
16708	verbose(private_data: env, fmt: "\n");
16709	}
16710
16711	err = mark_chain_precision_batch(env);
16712	if (err < `0`)
16713	return err;
16714
16715	return `0`;
16716	}
16717
16718	static bool states_maybe_looping(struct bpf_verifier_state *old,
16719	struct bpf_verifier_state *cur)
16720	{
16721	struct bpf_func_state fold, fcur;
16722	int i, fr = cur->curframe;
16723
16724	if (old->curframe != fr)
16725	return false;
16726
16727	fold = old->frame[fr];
16728	fcur = cur->frame[fr];
16729	for (i = `0`; i < MAX_BPF_REG; i++)
16730	if (memcmp(p: &fold->regs[i], q: &fcur->regs[i],
16731	offsetof(struct bpf_reg_state, parent)))
16732	return false;
16733	return true;
16734	}
16735
16736	static bool is_iter_next_insn(struct bpf_verifier_env env, int* insn_idx)
16737	{
16738	return env->insn_aux_data[insn_idx].is_iter_next;
16739	}
16740
16741	/ is_state_visited() handles iter_next() (see process_iter_next_call() for*
16742	* terminology) calls specially: as opposed to bounded BPF loops, it expects
16743	* states to match, which otherwise would look like an infinite loop. So while
16744	* iter_next() calls are taken care of, we still need to be careful and
16745	* prevent erroneous and too eager declaration of "ininite loop", when
16746	* iterators are involved.
16747	*
16748	* Here's a situation in pseudo-BPF assembly form:
16749	*
16750	* 0: again: ; set up iter_next() call args
16751	* 1: r1 = &it ; <CHECKPOINT HERE>
16752	* 2: call bpf_iter_num_next ; this is iter_next() call
16753	* 3: if r0 == 0 goto done
16754	* 4: ... something useful here ...
16755	* 5: goto again ; another iteration
16756	* 6: done:
16757	* 7: r1 = &it
16758	* 8: call bpf_iter_num_destroy ; clean up iter state
16759	* 9: exit
16760	*
16761	* This is a typical loop. Let's assume that we have a prune point at 1:,
16762	* before we get to `call bpf_iter_num_next` (e.g., because of that `goto
16763	* again`, assuming other heuristics don't get in a way).
16764	*
16765	* When we first time come to 1:, let's say we have some state X. We proceed
16766	* to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit.
16767	* Now we come back to validate that forked ACTIVE state. We proceed through
16768	* 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we
16769	* are converging. But the problem is that we don't know that yet, as this
16770	* convergence has to happen at iter_next() call site only. So if nothing is
16771	* done, at 1: verifier will use bounded loop logic and declare infinite
16772	* looping (and would be technically correct, if not for iterator's
16773	* "eventual sticky NULL" contract, see process_iter_next_call()). But we
16774	* don't want that. So what we do in process_iter_next_call() when we go on
16775	* another ACTIVE iteration, we bump slot->iter.depth, to mark that it's
16776	* a different iteration. So when we suspect an infinite loop, we additionally
16777	* check if any of the ACTIVE iterator states depths differ. If yes, we
16778	* pretend we are not looping and wait for next iter_next() call.
16779	*
16780	* This only applies to ACTIVE state. In DRAINED state we don't expect to
16781	* loop, because that would actually mean infinite loop, as DRAINED state is
16782	* "sticky", and so we'll keep returning into the same instruction with the
16783	* same state (at least in one of possible code paths).
16784	*
16785	* This approach allows to keep infinite loop heuristic even in the face of
16786	* active iterator. E.g., C snippet below is and will be detected as
16787	* inifintely looping:
16788	*
16789	* struct bpf_iter_num it;
16790	* int *p, x;
16791	*
16792	* bpf_iter_num_new(&it, 0, 10);
16793	* while ((p = bpf_iter_num_next(&t))) {
16794	* x = p;
16795	* while (x--) {} // <<-- infinite loop here
16796	* }
16797	*
16798	*/
16799	static bool iter_active_depths_differ(struct bpf_verifier_state old, struct* bpf_verifier_state *cur)
16800	{
16801	struct bpf_reg_state slot, cur_slot;
16802	struct bpf_func_state *state;
16803	int i, fr;
16804
16805	for (fr = old->curframe; fr >= `0`; fr--) {
16806	state = old->frame[fr];
16807	for (i = `0`; i < state->allocated_stack / BPF_REG_SIZE; i++) {
16808	if (state->stack[i].slot_type[`0`] != STACK_ITER)
16809	continue;
16810
16811	slot = &state->stack[i].spilled_ptr;
16812	if (slot->iter.state != BPF_ITER_STATE_ACTIVE)
16813	continue;
16814
16815	cur_slot = &cur->frame[fr]->stack[i].spilled_ptr;
16816	if (cur_slot->iter.depth != slot->iter.depth)
16817	return true;
16818	}
16819	}
16820	return false;
16821	}
16822
16823	static int is_state_visited(struct bpf_verifier_env env, int* insn_idx)
16824	{
16825	struct bpf_verifier_state_list *new_sl;
16826	struct bpf_verifier_state_list sl, *pprev;
16827	struct bpf_verifier_state cur = env->cur_state, new, *loop_entry;
16828	int i, j, n, err, states_cnt = `0`;
16829	bool force_new_state = env->test_state_freq \|\| is_force_checkpoint(env, insn_idx);
16830	bool add_new_state = force_new_state;
16831	bool force_exact;
16832
16833	/ bpf progs typically have pruning point every 4 instructions*
16834	* http://vger.kernel.org/bpfconf2019.html#session-1
16835	* Do not add new state for future pruning if the verifier hasn't seen
16836	* at least 2 jumps and at least 8 instructions.
16837	* This heuristics helps decrease 'total_states' and 'peak_states' metric.
16838	* In tests that amounts to up to 50% reduction into total verifier
16839	* memory consumption and 20% verifier time speedup.
16840	*/
16841	if (env->jmps_processed - env->prev_jmps_processed >= `2` &&
16842	env->insn_processed - env->prev_insn_processed >= `8`)
16843	add_new_state = true;
16844
16845	pprev = explored_state(env, idx: insn_idx);
16846	sl = *pprev;
16847
16848	clean_live_states(env, insn: insn_idx, cur);
16849
16850	while (sl) {
16851	states_cnt++;
16852	if (sl->state.insn_idx != insn_idx)
16853	goto next;
16854
16855	if (sl->state.branches) {
16856	struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
16857
16858	if (frame->in_async_callback_fn &&
16859	frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
16860	/ Different async_entry_cnt means that the verifier is*
16861	* processing another entry into async callback.
16862	* Seeing the same state is not an indication of infinite
16863	* loop or infinite recursion.
16864	* But finding the same state doesn't mean that it's safe
16865	* to stop processing the current state. The previous state
16866	* hasn't yet reached bpf_exit, since state.branches > 0.
16867	* Checking in_async_callback_fn alone is not enough either.
16868	* Since the verifier still needs to catch infinite loops
16869	* inside async callbacks.
16870	*/
16871	goto skip_inf_loop_check;
16872	}
16873	/ BPF open-coded iterators loop detection is special.*
16874	* states_maybe_looping() logic is too simplistic in detecting
16875	* states that might be equivalent, because it doesn't know
16876	* about ID remapping, so don't even perform it.
16877	* See process_iter_next_call() and iter_active_depths_differ()
16878	* for overview of the logic. When current and one of parent
16879	* states are detected as equivalent, it's a good thing: we prove
16880	* convergence and can stop simulating further iterations.
16881	* It's safe to assume that iterator loop will finish, taking into
16882	* account iter_next() contract of eventually returning
16883	* sticky NULL result.
16884	*
16885	* Note, that states have to be compared exactly in this case because
16886	* read and precision marks might not be finalized inside the loop.
16887	* E.g. as in the program below:
16888	*
16889	* 1. r7 = -16
16890	* 2. r6 = bpf_get_prandom_u32()
16891	* 3. while (bpf_iter_num_next(&fp[-8])) {
16892	* 4. if (r6 != 42) {
16893	* 5. r7 = -32
16894	* 6. r6 = bpf_get_prandom_u32()
16895	* 7. continue
16896	* 8. }
16897	* 9. r0 = r10
16898	* 10. r0 += r7
16899	* 11. r8 = (u64 )(r0 + 0)
16900	* 12. r6 = bpf_get_prandom_u32()
16901	* 13. }
16902	*
16903	* Here verifier would first visit path 1-3, create a checkpoint at 3
16904	* with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
16905	* not have read or precision mark for r7 yet, thus inexact states
16906	* comparison would discard current state with r7=-32
16907	* => unsafe memory access at 11 would not be caught.
16908	*/
16909	if (is_iter_next_insn(env, insn_idx)) {
16910	if (states_equal(env, old: &sl->state, cur, exact: true)) {
16911	struct bpf_func_state *cur_frame;
16912	struct bpf_reg_state iter_state, iter_reg;
16913	int spi;
16914
16915	cur_frame = cur->frame[cur->curframe];
16916	/ btf_check_iter_kfuncs() enforces that*
16917	* iter state pointer is always the first arg
16918	*/
16919	iter_reg = &cur_frame->regs[BPF_REG_1];
16920	/ current state is valid due to states_equal(),*
16921	* so we can assume valid iter and reg state,
16922	* no need for extra (re-)validations
16923	*/
16924	spi = __get_spi(off: iter_reg->off + iter_reg->var_off.value);
16925	iter_state = &func(env, reg: iter_reg)->stack[spi].spilled_ptr;
16926	if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
16927	update_loop_entry(cur, hdr: &sl->state);
16928	goto hit;
16929	}
16930	}
16931	goto skip_inf_loop_check;
16932	}
16933	/ attempt to detect infinite loop to avoid unnecessary doomed work /
16934	if (states_maybe_looping(old: &sl->state, cur) &&
16935	states_equal(env, old: &sl->state, cur, exact: false) &&
16936	!iter_active_depths_differ(old: &sl->state, cur)) {
16937	verbose_linfo(env, insn_off: insn_idx, prefix_fmt: "; ");
16938	verbose(private_data: env, fmt: "infinite loop detected at insn %d\n", insn_idx);
16939	verbose(private_data: env, fmt: "cur state:");
16940	print_verifier_state(env, state: cur->frame[cur->curframe], print_all: true);
16941	verbose(private_data: env, fmt: "old state:");
16942	print_verifier_state(env, state: sl->state.frame[cur->curframe], print_all: true);
16943	return -EINVAL;
16944	}
16945	/ if the verifier is processing a loop, avoid adding new state*
16946	* too often, since different loop iterations have distinct
16947	* states and may not help future pruning.
16948	* This threshold shouldn't be too low to make sure that
16949	* a loop with large bound will be rejected quickly.
16950	* The most abusive loop will be:
16951	* r1 += 1
16952	* if r1 < 1000000 goto pc-2
16953	* 1M insn_procssed limit / 100 == 10k peak states.
16954	* This threshold shouldn't be too high either, since states
16955	* at the end of the loop are likely to be useful in pruning.
16956	*/
16957	skip_inf_loop_check:
16958	if (!force_new_state &&
16959	env->jmps_processed - env->prev_jmps_processed < `20` &&
16960	env->insn_processed - env->prev_insn_processed < `100`)
16961	add_new_state = false;
16962	goto miss;
16963	}
16964	/ If sl->state is a part of a loop and this loop's entry is a part of*
16965	* current verification path then states have to be compared exactly.
16966	* 'force_exact' is needed to catch the following case:
16967	*
16968	* initial Here state 'succ' was processed first,
16969	* \| it was eventually tracked to produce a
16970	* V state identical to 'hdr'.
16971	* .---------> hdr All branches from 'succ' had been explored
16972	* \| \| and thus 'succ' has its .branches == 0.
16973	* \| V
16974	* \| .------... Suppose states 'cur' and 'succ' correspond
16975	* \| \| \| to the same instruction + callsites.
16976	* \| V V In such case it is necessary to check
16977	* \| ... ... if 'succ' and 'cur' are states_equal().
16978	* \| \| \| If 'succ' and 'cur' are a part of the
16979	* \| V V same loop exact flag has to be set.
16980	* \| succ <- cur To check if that is the case, verify
16981	* \| \| if loop entry of 'succ' is in current
16982	* \| V DFS path.
16983	* \| ...
16984	* \| \|
16985	* '----'
16986	*
16987	* Additional details are in the comment before get_loop_entry().
16988	*/
16989	loop_entry = get_loop_entry(st: &sl->state);
16990	force_exact = loop_entry && loop_entry->branches > `0`;
16991	if (states_equal(env, old: &sl->state, cur, exact: force_exact)) {
16992	if (force_exact)
16993	update_loop_entry(cur, hdr: loop_entry);
16994	hit:
16995	sl->hit_cnt++;
16996	/ reached equivalent register/stack state,*
16997	* prune the search.
16998	* Registers read by the continuation are read by us.
16999	* If we have any write marks in env->cur_state, they
17000	* will prevent corresponding reads in the continuation
17001	* from reaching our parent (an explored_state). Our
17002	* own state will get the read marks recorded, but
17003	* they'll be immediately forgotten as we're pruning
17004	* this state and will pop a new one.
17005	*/
17006	err = propagate_liveness(env, vstate: &sl->state, vparent: cur);
17007
17008	/ if previous state reached the exit with precision and*
17009	* current state is equivalent to it (except precsion marks)
17010	* the precision needs to be propagated back in
17011	* the current state.
17012	*/
17013	err = err ? : push_jmp_history(env, cur);
17014	err = err ? : propagate_precision(env, old: &sl->state);
17015	if (err)
17016	return err;
17017	return `1`;
17018	}
17019	miss:
17020	/ when new state is not going to be added do not increase miss count.*
17021	* Otherwise several loop iterations will remove the state
17022	* recorded earlier. The goal of these heuristics is to have
17023	* states from some iterations of the loop (some in the beginning
17024	* and some at the end) to help pruning.
17025	*/
17026	if (add_new_state)
17027	sl->miss_cnt++;
17028	/ heuristic to determine whether this state is beneficial*
17029	* to keep checking from state equivalence point of view.
17030	* Higher numbers increase max_states_per_insn and verification time,
17031	* but do not meaningfully decrease insn_processed.
17032	* 'n' controls how many times state could miss before eviction.
17033	* Use bigger 'n' for checkpoints because evicting checkpoint states
17034	* too early would hinder iterator convergence.
17035	*/
17036	n = is_force_checkpoint(env, insn_idx) && sl->state.branches > `0` ? `64` : `3`;
17037	if (sl->miss_cnt > sl->hit_cnt * n + n) {
17038	/ the state is unlikely to be useful. Remove it to*
17039	* speed up verification
17040	*/
17041	*pprev = sl->next;
17042	if (sl->state.frame[`0`]->regs[`0`].live & REG_LIVE_DONE &&
17043	!sl->state.used_as_loop_entry) {
17044	u32 br = sl->state.branches;
17045
17046	WARN_ONCE(br,
17047	"BUG live_done but branches_to_explore %d\n",
17048	br);
17049	free_verifier_state(state: &sl->state, free_self: false);
17050	kfree(objp: sl);
17051	env->peak_states--;
17052	} else {
17053	/ cannot free this state, since parentage chain may*
17054	* walk it later. Add it for free_list instead to
17055	* be freed at the end of verification
17056	*/
17057	sl->next = env->free_list;
17058	env->free_list = sl;
17059	}
17060	sl = *pprev;
17061	continue;
17062	}
17063	next:
17064	pprev = &sl->next;
17065	sl = *pprev;
17066	}
17067
17068	if (env->max_states_per_insn < states_cnt)
17069	env->max_states_per_insn = states_cnt;
17070
17071	if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
17072	return `0`;
17073
17074	if (!add_new_state)
17075	return `0`;
17076
17077	/ There were no equivalent states, remember the current one.*
17078	* Technically the current state is not proven to be safe yet,
17079	* but it will either reach outer most bpf_exit (which means it's safe)
17080	* or it will be rejected. When there are no loops the verifier won't be
17081	* seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
17082	* again on the way to bpf_exit.
17083	* When looping the sl->state.branches will be > 0 and this state
17084	* will not be considered for equivalence until branches == 0.
17085	*/
17086	new_sl = kzalloc(size: sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
17087	if (!new_sl)
17088	return -ENOMEM;
17089	env->total_states++;
17090	env->peak_states++;
17091	env->prev_jmps_processed = env->jmps_processed;
17092	env->prev_insn_processed = env->insn_processed;
17093
17094	/ forget precise markings we inherited, see __mark_chain_precision /
17095	if (env->bpf_capable)
17096	mark_all_scalars_imprecise(env, st: cur);
17097
17098	/ add new state to the head of linked list /
17099	new = &new_sl->state;
17100	err = copy_verifier_state(dst_state: new, src: cur);
17101	if (err) {
17102	free_verifier_state(state: new, free_self: false);
17103	kfree(objp: new_sl);
17104	return err;
17105	}
17106	new->insn_idx = insn_idx;
17107	WARN_ONCE(new->branches != `1`,
17108	"BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
17109
17110	cur->parent = new;
17111	cur->first_insn_idx = insn_idx;
17112	cur->dfs_depth = new->dfs_depth + `1`;
17113	clear_jmp_history(state: cur);
17114	new_sl->next = *explored_state(env, idx: insn_idx);
17115	*explored_state(env, idx: insn_idx) = new_sl;
17116	/ connect new state to parentage chain. Current frame needs all*
17117	* registers connected. Only r6 - r9 of the callers are alive (pushed
17118	* to the stack implicitly by JITs) so in callers' frames connect just
17119	* r6 - r9 as an optimization. Callers will have r1 - r5 connected to
17120	* the state of the call instruction (with WRITTEN set), and r0 comes
17121	* from callee with its full parentage chain, anyway.
17122	*/
17123	/ clear write marks in current state: the writes we did are not writes*
17124	* our child did, so they don't screen off its reads from us.
17125	* (There are no read marks in current state, because reads always mark
17126	* their parent and current state never has children yet. Only
17127	* explored_states can get read marks.)
17128	*/
17129	for (j = `0`; j <= cur->curframe; j++) {
17130	for (i = j < cur->curframe ? BPF_REG_6 : `0`; i < BPF_REG_FP; i++)
17131	cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
17132	for (i = `0`; i < BPF_REG_FP; i++)
17133	cur->frame[j]->regs[i].live = REG_LIVE_NONE;
17134	}
17135
17136	/ all stack frames are accessible from callee, clear them all /
17137	for (j = `0`; j <= cur->curframe; j++) {
17138	struct bpf_func_state *frame = cur->frame[j];
17139	struct bpf_func_state *newframe = new->frame[j];
17140
17141	for (i = `0`; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
17142	frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
17143	frame->stack[i].spilled_ptr.parent =
17144	&newframe->stack[i].spilled_ptr;
17145	}
17146	}
17147	return `0`;
17148	}
17149
17150	/ Return true if it's OK to have the same insn return a different type. /
17151	static bool reg_type_mismatch_ok(enum bpf_reg_type type)
17152	{
17153	switch (base_type(type)) {
17154	case PTR_TO_CTX:
17155	case PTR_TO_SOCKET:
17156	case PTR_TO_SOCK_COMMON:
17157	case PTR_TO_TCP_SOCK:
17158	case PTR_TO_XDP_SOCK:
17159	case PTR_TO_BTF_ID:
17160	return false;
17161	default:
17162	return true;
17163	}
17164	}
17165
17166	/ If an instruction was previously used with particular pointer types, then we*
17167	* need to be careful to avoid cases such as the below, where it may be ok
17168	* for one branch accessing the pointer, but not ok for the other branch:
17169	*
17170	* R1 = sock_ptr
17171	* goto X;
17172	* ...
17173	* R1 = some_other_valid_ptr;
17174	* goto X;
17175	* ...
17176	* R2 = (u32 )(R1 + 0);
17177	*/
17178	static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
17179	{
17180	return src != prev && (!reg_type_mismatch_ok(type: src) \|\|
17181	!reg_type_mismatch_ok(type: prev));
17182	}
17183
17184	static int save_aux_ptr_type(struct bpf_verifier_env env, enum* bpf_reg_type type,
17185	bool allow_trust_missmatch)
17186	{
17187	enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
17188
17189	if (*prev_type == NOT_INIT) {
17190	/ Saw a valid insn*
17191	* dst_reg = (u32 )(src_reg + off)
17192	* save type to validate intersecting paths
17193	*/
17194	*prev_type = type;
17195	} else if (reg_type_mismatch(src: type, prev: *prev_type)) {
17196	/ Abuser program is trying to use the same insn*
17197	* dst_reg = (u32) (src_reg + off)
17198	* with different pointer types:
17199	* src_reg == ctx in one branch and
17200	* src_reg == stack\|map in some other branch.
17201	* Reject it.
17202	*/
17203	if (allow_trust_missmatch &&
17204	base_type(type) == PTR_TO_BTF_ID &&
17205	base_type(type: *prev_type) == PTR_TO_BTF_ID) {
17206	/*
17207	* Have to support a use case when one path through
17208	* the program yields TRUSTED pointer while another
17209	* is UNTRUSTED. Fallback to UNTRUSTED to generate
17210	* BPF_PROBE_MEM/BPF_PROBE_MEMSX.
17211	*/
17212	*prev_type = PTR_TO_BTF_ID \| PTR_UNTRUSTED;
17213	} else {
17214	verbose(private_data: env, fmt: "same insn cannot be used with different pointers\n");
17215	return -EINVAL;
17216	}
17217	}
17218
17219	return `0`;
17220	}
17221
17222	static int do_check(struct bpf_verifier_env *env)
17223	{
17224	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
17225	struct bpf_verifier_state *state = env->cur_state;
17226	struct bpf_insn *insns = env->prog->insnsi;
17227	struct bpf_reg_state *regs;
17228	int insn_cnt = env->prog->len;
17229	bool do_print_state = false;
17230	int prev_insn_idx = -`1`;
17231
17232	for (;;) {
17233	bool exception_exit = false;
17234	struct bpf_insn *insn;
17235	u8 class;
17236	int err;
17237
17238	env->prev_insn_idx = prev_insn_idx;
17239	if (env->insn_idx >= insn_cnt) {
17240	verbose(private_data: env, fmt: "invalid insn idx %d insn_cnt %d\n",
17241	env->insn_idx, insn_cnt);
17242	return -EFAULT;
17243	}
17244
17245	insn = &insns[env->insn_idx];
17246	class = BPF_CLASS(insn->code);
17247
17248	if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
17249	verbose(private_data: env,
17250	fmt: "BPF program is too large. Processed %d insn\n",
17251	env->insn_processed);
17252	return -E2BIG;
17253	}
17254
17255	state->last_insn_idx = env->prev_insn_idx;
17256
17257	if (is_prune_point(env, insn_idx: env->insn_idx)) {
17258	err = is_state_visited(env, insn_idx: env->insn_idx);
17259	if (err < `0`)
17260	return err;
17261	if (err == `1`) {
17262	/ found equivalent state, can prune the search /
17263	if (env->log.level & BPF_LOG_LEVEL) {
17264	if (do_print_state)
17265	verbose(private_data: env, fmt: "\nfrom %d to %d%s: safe\n",
17266	env->prev_insn_idx, env->insn_idx,
17267	env->cur_state->speculative ?
17268	" (speculative execution)" : "");
17269	else
17270	verbose(private_data: env, fmt: "%d: safe\n", env->insn_idx);
17271	}
17272	goto process_bpf_exit;
17273	}
17274	}
17275
17276	if (is_jmp_point(env, insn_idx: env->insn_idx)) {
17277	err = push_jmp_history(env, cur: state);
17278	if (err)
17279	return err;
17280	}
17281
17282	if (signal_pending(current))
17283	return -EAGAIN;
17284
17285	if (need_resched())
17286	cond_resched();
17287
17288	if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
17289	verbose(private_data: env, fmt: "\nfrom %d to %d%s:",
17290	env->prev_insn_idx, env->insn_idx,
17291	env->cur_state->speculative ?
17292	" (speculative execution)" : "");
17293	print_verifier_state(env, state: state->frame[state->curframe], print_all: true);
17294	do_print_state = false;
17295	}
17296
17297	if (env->log.level & BPF_LOG_LEVEL) {
17298	const struct bpf_insn_cbs cbs = {
17299	.cb_call = disasm_kfunc_name,
17300	.cb_print = verbose,
17301	.private_data = env,
17302	};
17303
17304	if (verifier_state_scratched(env))
17305	print_insn_state(env, state: state->frame[state->curframe]);
17306
17307	verbose_linfo(env, insn_off: env->insn_idx, prefix_fmt: "; ");
17308	env->prev_log_pos = env->log.end_pos;
17309	verbose(private_data: env, fmt: "%d: ", env->insn_idx);
17310	print_bpf_insn(cbs: &cbs, insn, allow_ptr_leaks: env->allow_ptr_leaks);
17311	env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
17312	env->prev_log_pos = env->log.end_pos;
17313	}
17314
17315	if (bpf_prog_is_offloaded(aux: env->prog->aux)) {
17316	err = bpf_prog_offload_verify_insn(env, insn_idx: env->insn_idx,
17317	prev_insn_idx: env->prev_insn_idx);
17318	if (err)
17319	return err;
17320	}
17321
17322	regs = cur_regs(env);
17323	sanitize_mark_insn_seen(env);
17324	prev_insn_idx = env->insn_idx;
17325
17326	if (class == BPF_ALU \|\| class == BPF_ALU64) {
17327	err = check_alu_op(env, insn);
17328	if (err)
17329	return err;
17330
17331	} else if (class == BPF_LDX) {
17332	enum bpf_reg_type src_reg_type;
17333
17334	/ check for reserved fields is already done /
17335
17336	/ check src operand /
17337	err = check_reg_arg(env, regno: insn->src_reg, t: SRC_OP);
17338	if (err)
17339	return err;
17340
17341	err = check_reg_arg(env, regno: insn->dst_reg, t: DST_OP_NO_MARK);
17342	if (err)
17343	return err;
17344
17345	src_reg_type = regs[insn->src_reg].type;
17346
17347	/ check that memory (src_reg + off) is readable,*
17348	* the state of dst_reg will be updated by this func
17349	*/
17350	err = check_mem_access(env, insn_idx: env->insn_idx, regno: insn->src_reg,
17351	off: insn->off, BPF_SIZE(insn->code),
17352	t: BPF_READ, value_regno: insn->dst_reg, strict_alignment_once: false,
17353	BPF_MODE(insn->code) == BPF_MEMSX);
17354	if (err)
17355	return err;
17356
17357	err = save_aux_ptr_type(env, type: src_reg_type, allow_trust_missmatch: true);
17358	if (err)
17359	return err;
17360	} else if (class == BPF_STX) {
17361	enum bpf_reg_type dst_reg_type;
17362
17363	if (BPF_MODE(insn->code) == BPF_ATOMIC) {
17364	err = check_atomic(env, insn_idx: env->insn_idx, insn);
17365	if (err)
17366	return err;
17367	env->insn_idx++;
17368	continue;
17369	}
17370
17371	if (BPF_MODE(insn->code) != BPF_MEM \|\| insn->imm != `0`) {
17372	verbose(private_data: env, fmt: "BPF_STX uses reserved fields\n");
17373	return -EINVAL;
17374	}
17375
17376	/ check src1 operand /
17377	err = check_reg_arg(env, regno: insn->src_reg, t: SRC_OP);
17378	if (err)
17379	return err;
17380	/ check src2 operand /
17381	err = check_reg_arg(env, regno: insn->dst_reg, t: SRC_OP);
17382	if (err)
17383	return err;
17384
17385	dst_reg_type = regs[insn->dst_reg].type;
17386
17387	/ check that memory (dst_reg + off) is writeable /
17388	err = check_mem_access(env, insn_idx: env->insn_idx, regno: insn->dst_reg,
17389	off: insn->off, BPF_SIZE(insn->code),
17390	t: BPF_WRITE, value_regno: insn->src_reg, strict_alignment_once: false, is_ldsx: false);
17391	if (err)
17392	return err;
17393
17394	err = save_aux_ptr_type(env, type: dst_reg_type, allow_trust_missmatch: false);
17395	if (err)
17396	return err;
17397	} else if (class == BPF_ST) {
17398	enum bpf_reg_type dst_reg_type;
17399
17400	if (BPF_MODE(insn->code) != BPF_MEM \|\|
17401	insn->src_reg != BPF_REG_0) {
17402	verbose(private_data: env, fmt: "BPF_ST uses reserved fields\n");
17403	return -EINVAL;
17404	}
17405	/ check src operand /
17406	err = check_reg_arg(env, regno: insn->dst_reg, t: SRC_OP);
17407	if (err)
17408	return err;
17409
17410	dst_reg_type = regs[insn->dst_reg].type;
17411
17412	/ check that memory (dst_reg + off) is writeable /
17413	err = check_mem_access(env, insn_idx: env->insn_idx, regno: insn->dst_reg,
17414	off: insn->off, BPF_SIZE(insn->code),
17415	t: BPF_WRITE, value_regno: -`1`, strict_alignment_once: false, is_ldsx: false);
17416	if (err)
17417	return err;
17418
17419	err = save_aux_ptr_type(env, type: dst_reg_type, allow_trust_missmatch: false);
17420	if (err)
17421	return err;
17422	} else if (class == BPF_JMP \|\| class == BPF_JMP32) {
17423	u8 opcode = BPF_OP(insn->code);
17424
17425	env->jmps_processed++;
17426	if (opcode == BPF_CALL) {
17427	if (BPF_SRC(insn->code) != BPF_K \|\|
17428	(insn->src_reg != BPF_PSEUDO_KFUNC_CALL
17429	&& insn->off != `0`) \|\|
17430	(insn->src_reg != BPF_REG_0 &&
17431	insn->src_reg != BPF_PSEUDO_CALL &&
17432	insn->src_reg != BPF_PSEUDO_KFUNC_CALL) \|\|
17433	insn->dst_reg != BPF_REG_0 \|\|
17434	class == BPF_JMP32) {
17435	verbose(private_data: env, fmt: "BPF_CALL uses reserved fields\n");
17436	return -EINVAL;
17437	}
17438
17439	if (env->cur_state->active_lock.ptr) {
17440	if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) \|\|
17441	(insn->src_reg == BPF_PSEUDO_CALL) \|\|
17442	(insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
17443	(insn->off != `0` \|\| !is_bpf_graph_api_kfunc(btf_id: insn->imm)))) {
17444	verbose(private_data: env, fmt: "function calls are not allowed while holding a lock\n");
17445	return -EINVAL;
17446	}
17447	}
17448	if (insn->src_reg == BPF_PSEUDO_CALL) {
17449	err = check_func_call(env, insn, insn_idx: &env->insn_idx);
17450	} else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
17451	err = check_kfunc_call(env, insn, insn_idx_p: &env->insn_idx);
17452	if (!err && is_bpf_throw_kfunc(insn)) {
17453	exception_exit = true;
17454	goto process_bpf_exit_full;
17455	}
17456	} else {
17457	err = check_helper_call(env, insn, insn_idx_p: &env->insn_idx);
17458	}
17459	if (err)
17460	return err;
17461
17462	mark_reg_scratched(env, regno: BPF_REG_0);
17463	} else if (opcode == BPF_JA) {
17464	if (BPF_SRC(insn->code) != BPF_K \|\|
17465	insn->src_reg != BPF_REG_0 \|\|
17466	insn->dst_reg != BPF_REG_0 \|\|
17467	(class == BPF_JMP && insn->imm != `0`) \|\|
17468	(class == BPF_JMP32 && insn->off != `0`)) {
17469	verbose(private_data: env, fmt: "BPF_JA uses reserved fields\n");
17470	return -EINVAL;
17471	}
17472
17473	if (class == BPF_JMP)
17474	env->insn_idx += insn->off + `1`;
17475	else
17476	env->insn_idx += insn->imm + `1`;
17477	continue;
17478
17479	} else if (opcode == BPF_EXIT) {
17480	if (BPF_SRC(insn->code) != BPF_K \|\|
17481	insn->imm != `0` \|\|
17482	insn->src_reg != BPF_REG_0 \|\|
17483	insn->dst_reg != BPF_REG_0 \|\|
17484	class == BPF_JMP32) {
17485	verbose(private_data: env, fmt: "BPF_EXIT uses reserved fields\n");
17486	return -EINVAL;
17487	}
17488	process_bpf_exit_full:
17489	if (env->cur_state->active_lock.ptr &&
17490	!in_rbtree_lock_required_cb(env)) {
17491	verbose(private_data: env, fmt: "bpf_spin_unlock is missing\n");
17492	return -EINVAL;
17493	}
17494
17495	if (env->cur_state->active_rcu_lock &&
17496	!in_rbtree_lock_required_cb(env)) {
17497	verbose(private_data: env, fmt: "bpf_rcu_read_unlock is missing\n");
17498	return -EINVAL;
17499	}
17500
17501	/ We must do check_reference_leak here before*
17502	* prepare_func_exit to handle the case when
17503	* state->curframe > 0, it may be a callback
17504	* function, for which reference_state must
17505	* match caller reference state when it exits.
17506	*/
17507	err = check_reference_leak(env, exception_exit);
17508	if (err)
17509	return err;
17510
17511	/ The side effect of the prepare_func_exit*
17512	* which is being skipped is that it frees
17513	* bpf_func_state. Typically, process_bpf_exit
17514	* will only be hit with outermost exit.
17515	* copy_verifier_state in pop_stack will handle
17516	* freeing of any extra bpf_func_state left over
17517	* from not processing all nested function
17518	* exits. We also skip return code checks as
17519	* they are not needed for exceptional exits.
17520	*/
17521	if (exception_exit)
17522	goto process_bpf_exit;
17523
17524	if (state->curframe) {
17525	/ exit from nested function /
17526	err = prepare_func_exit(env, insn_idx: &env->insn_idx);
17527	if (err)
17528	return err;
17529	do_print_state = true;
17530	continue;
17531	}
17532
17533	err = check_return_code(env, regno: BPF_REG_0);
17534	if (err)
17535	return err;
17536	process_bpf_exit:
17537	mark_verifier_state_scratched(env);
17538	update_branch_counts(env, st: env->cur_state);
17539	err = pop_stack(env, prev_insn_idx: &prev_insn_idx,
17540	insn_idx: &env->insn_idx, pop_log);
17541	if (err < `0`) {
17542	if (err != -ENOENT)
17543	return err;
17544	break;
17545	} else {
17546	do_print_state = true;
17547	continue;
17548	}
17549	} else {
17550	err = check_cond_jmp_op(env, insn, insn_idx: &env->insn_idx);
17551	if (err)
17552	return err;
17553	}
17554	} else if (class == BPF_LD) {
17555	u8 mode = BPF_MODE(insn->code);
17556
17557	if (mode == BPF_ABS \|\| mode == BPF_IND) {
17558	err = check_ld_abs(env, insn);
17559	if (err)
17560	return err;
17561
17562	} else if (mode == BPF_IMM) {
17563	err = check_ld_imm(env, insn);
17564	if (err)
17565	return err;
17566
17567	env->insn_idx++;
17568	sanitize_mark_insn_seen(env);
17569	} else {
17570	verbose(private_data: env, fmt: "invalid BPF_LD mode\n");
17571	return -EINVAL;
17572	}
17573	} else {
17574	verbose(private_data: env, fmt: "unknown insn class %d\n", class);
17575	return -EINVAL;
17576	}
17577
17578	env->insn_idx++;
17579	}
17580
17581	return `0`;
17582	}
17583
17584	static int find_btf_percpu_datasec(struct btf *btf)
17585	{
17586	const struct btf_type *t;
17587	const char *tname;
17588	int i, n;
17589
17590	/*
17591	* Both vmlinux and module each have their own ".data..percpu"
17592	* DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
17593	* types to look at only module's own BTF types.
17594	*/
17595	n = btf_nr_types(btf);
17596	if (btf_is_module(btf))
17597	i = btf_nr_types(btf: btf_vmlinux);
17598	else
17599	i = `1`;
17600
17601	for(; i < n; i++) {
17602	t = btf_type_by_id(btf, type_id: i);
17603	if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
17604	continue;
17605
17606	tname = btf_name_by_offset(btf, offset: t->name_off);
17607	if (!strcmp(tname, ".data..percpu"))
17608	return i;
17609	}
17610
17611	return -ENOENT;
17612	}
17613
17614	/ replace pseudo btf_id with kernel symbol address /
17615	static int check_pseudo_btf_id(struct bpf_verifier_env *env,
17616	struct bpf_insn *insn,
17617	struct bpf_insn_aux_data *aux)
17618	{
17619	const struct btf_var_secinfo *vsi;
17620	const struct btf_type *datasec;
17621	struct btf_mod_pair *btf_mod;
17622	const struct btf_type *t;
17623	const char *sym_name;
17624	bool percpu = false;
17625	u32 type, id = insn->imm;
17626	struct btf *btf;
17627	s32 datasec_id;
17628	u64 addr;
17629	int i, btf_fd, err;
17630
17631	btf_fd = insn[`1`].imm;
17632	if (btf_fd) {
17633	btf = btf_get_by_fd(fd: btf_fd);
17634	if (IS_ERR(ptr: btf)) {
17635	verbose(private_data: env, fmt: "invalid module BTF object FD specified.\n");
17636	return -EINVAL;
17637	}
17638	} else {
17639	if (!btf_vmlinux) {
17640	verbose(private_data: env, fmt: "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
17641	return -EINVAL;
17642	}
17643	btf = btf_vmlinux;
17644	btf_get(btf);
17645	}
17646
17647	t = btf_type_by_id(btf, type_id: id);
17648	if (!t) {
17649	verbose(private_data: env, fmt: "ldimm64 insn specifies invalid btf_id %d.\n", id);
17650	err = -ENOENT;
17651	goto err_put;
17652	}
17653
17654	if (!btf_type_is_var(t) && !btf_type_is_func(t)) {
17655	verbose(private_data: env, fmt: "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id);
17656	err = -EINVAL;
17657	goto err_put;
17658	}
17659
17660	sym_name = btf_name_by_offset(btf, offset: t->name_off);
17661	addr = kallsyms_lookup_name(name: sym_name);
17662	if (!addr) {
17663	verbose(private_data: env, fmt: "ldimm64 failed to find the address for kernel symbol '%s'.\n",
17664	sym_name);
17665	err = -ENOENT;
17666	goto err_put;
17667	}
17668	insn[`0`].imm = (u32)addr;
17669	insn[`1`].imm = addr >> `32`;
17670
17671	if (btf_type_is_func(t)) {
17672	aux->btf_var.reg_type = PTR_TO_MEM \| MEM_RDONLY;
17673	aux->btf_var.mem_size = `0`;
17674	goto check_btf;
17675	}
17676
17677	datasec_id = find_btf_percpu_datasec(btf);
17678	if (datasec_id > `0`) {
17679	datasec = btf_type_by_id(btf, type_id: datasec_id);
17680	for_each_vsi(i, datasec, vsi) {
17681	if (vsi->type == id) {
17682	percpu = true;
17683	break;
17684	}
17685	}
17686	}
17687
17688	type = t->type;
17689	t = btf_type_skip_modifiers(btf, id: type, NULL);
17690	if (percpu) {
17691	aux->btf_var.reg_type = PTR_TO_BTF_ID \| MEM_PERCPU;
17692	aux->btf_var.btf = btf;
17693	aux->btf_var.btf_id = type;
17694	} else if (!btf_type_is_struct(t)) {
17695	const struct btf_type *ret;
17696	const char *tname;
17697	u32 tsize;
17698
17699	/ resolve the type size of ksym. /
17700	ret = btf_resolve_size(btf, type: t, type_size: &tsize);
17701	if (IS_ERR(ptr: ret)) {
17702	tname = btf_name_by_offset(btf, offset: t->name_off);
17703	verbose(private_data: env, fmt: "ldimm64 unable to resolve the size of type '%s': %ld\n",
17704	tname, PTR_ERR(ptr: ret));
17705	err = -EINVAL;
17706	goto err_put;
17707	}
17708	aux->btf_var.reg_type = PTR_TO_MEM \| MEM_RDONLY;
17709	aux->btf_var.mem_size = tsize;
17710	} else {
17711	aux->btf_var.reg_type = PTR_TO_BTF_ID;
17712	aux->btf_var.btf = btf;
17713	aux->btf_var.btf_id = type;
17714	}
17715	check_btf:
17716	/ check whether we recorded this BTF (and maybe module) already /
17717	for (i = `0`; i < env->used_btf_cnt; i++) {
17718	if (env->used_btfs[i].btf == btf) {
17719	btf_put(btf);
17720	return `0`;
17721	}
17722	}
17723
17724	if (env->used_btf_cnt >= MAX_USED_BTFS) {
17725	err = -E2BIG;
17726	goto err_put;
17727	}
17728
17729	btf_mod = &env->used_btfs[env->used_btf_cnt];
17730	btf_mod->btf = btf;
17731	btf_mod->module = NULL;
17732
17733	/ if we reference variables from kernel module, bump its refcount /
17734	if (btf_is_module(btf)) {
17735	btf_mod->module = btf_try_get_module(btf);
17736	if (!btf_mod->module) {
17737	err = -ENXIO;
17738	goto err_put;
17739	}
17740	}
17741
17742	env->used_btf_cnt++;
17743
17744	return `0`;
17745	err_put:
17746	btf_put(btf);
17747	return err;
17748	}
17749
17750	static bool is_tracing_prog_type(enum bpf_prog_type type)
17751	{
17752	switch (type) {
17753	case BPF_PROG_TYPE_KPROBE:
17754	case BPF_PROG_TYPE_TRACEPOINT:
17755	case BPF_PROG_TYPE_PERF_EVENT:
17756	case BPF_PROG_TYPE_RAW_TRACEPOINT:
17757	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
17758	return true;
17759	default:
17760	return false;
17761	}
17762	}
17763
17764	static int check_map_prog_compatibility(struct bpf_verifier_env *env,
17765	struct bpf_map *map,
17766	struct bpf_prog *prog)
17767
17768	{
17769	enum bpf_prog_type prog_type = resolve_prog_type(prog);
17770
17771	if (btf_record_has_field(rec: map->record, type: BPF_LIST_HEAD) \|\|
17772	btf_record_has_field(rec: map->record, type: BPF_RB_ROOT)) {
17773	if (is_tracing_prog_type(type: prog_type)) {
17774	verbose(private_data: env, fmt: "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
17775	return -EINVAL;
17776	}
17777	}
17778
17779	if (btf_record_has_field(rec: map->record, type: BPF_SPIN_LOCK)) {
17780	if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
17781	verbose(private_data: env, fmt: "socket filter progs cannot use bpf_spin_lock yet\n");
17782	return -EINVAL;
17783	}
17784
17785	if (is_tracing_prog_type(type: prog_type)) {
17786	verbose(private_data: env, fmt: "tracing progs cannot use bpf_spin_lock yet\n");
17787	return -EINVAL;
17788	}
17789	}
17790
17791	if (btf_record_has_field(rec: map->record, type: BPF_TIMER)) {
17792	if (is_tracing_prog_type(type: prog_type)) {
17793	verbose(private_data: env, fmt: "tracing progs cannot use bpf_timer yet\n");
17794	return -EINVAL;
17795	}
17796	}
17797
17798	if ((bpf_prog_is_offloaded(aux: prog->aux) \|\| bpf_map_is_offloaded(map)) &&
17799	!bpf_offload_prog_map_match(prog, map)) {
17800	verbose(private_data: env, fmt: "offload device mismatch between prog and map\n");
17801	return -EINVAL;
17802	}
17803
17804	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
17805	verbose(private_data: env, fmt: "bpf_struct_ops map cannot be used in prog\n");
17806	return -EINVAL;
17807	}
17808
17809	if (prog->aux->sleepable)
17810	switch (map->map_type) {
17811	case BPF_MAP_TYPE_HASH:
17812	case BPF_MAP_TYPE_LRU_HASH:
17813	case BPF_MAP_TYPE_ARRAY:
17814	case BPF_MAP_TYPE_PERCPU_HASH:
17815	case BPF_MAP_TYPE_PERCPU_ARRAY:
17816	case BPF_MAP_TYPE_LRU_PERCPU_HASH:
17817	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
17818	case BPF_MAP_TYPE_HASH_OF_MAPS:
17819	case BPF_MAP_TYPE_RINGBUF:
17820	case BPF_MAP_TYPE_USER_RINGBUF:
17821	case BPF_MAP_TYPE_INODE_STORAGE:
17822	case BPF_MAP_TYPE_SK_STORAGE:
17823	case BPF_MAP_TYPE_TASK_STORAGE:
17824	case BPF_MAP_TYPE_CGRP_STORAGE:
17825	break;
17826	default:
17827	verbose(private_data: env,
17828	fmt: "Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
17829	return -EINVAL;
17830	}
17831
17832	return `0`;
17833	}
17834
17835	static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
17836	{
17837	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE \|\|
17838	map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
17839	}
17840
17841	/ find and rewrite pseudo imm in ld_imm64 instructions:*
17842	*
17843	* 1. if it accesses map FD, replace it with actual map pointer.
17844	* 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
17845	*
17846	* NOTE: btf_vmlinux is required for converting pseudo btf_id.
17847	*/
17848	static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
17849	{
17850	struct bpf_insn *insn = env->prog->insnsi;
17851	int insn_cnt = env->prog->len;
17852	int i, j, err;
17853
17854	err = bpf_prog_calc_tag(fp: env->prog);
17855	if (err)
17856	return err;
17857
17858	for (i = `0`; i < insn_cnt; i++, insn++) {
17859	if (BPF_CLASS(insn->code) == BPF_LDX &&
17860	((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) \|\|
17861	insn->imm != `0`)) {
17862	verbose(private_data: env, fmt: "BPF_LDX uses reserved fields\n");
17863	return -EINVAL;
17864	}
17865
17866	if (insn[`0`].code == (BPF_LD \| BPF_IMM \| BPF_DW)) {
17867	struct bpf_insn_aux_data *aux;
17868	struct bpf_map *map;
17869	struct fd f;
17870	u64 addr;
17871	u32 fd;
17872
17873	if (i == insn_cnt - `1` \|\| insn[`1`].code != `0` \|\|
17874	insn[`1`].dst_reg != `0` \|\| insn[`1`].src_reg != `0` \|\|
17875	insn[`1`].off != `0`) {
17876	verbose(private_data: env, fmt: "invalid bpf_ld_imm64 insn\n");
17877	return -EINVAL;
17878	}
17879
17880	if (insn[`0`].src_reg == `0`)
17881	/ valid generic load 64-bit imm /
17882	goto next_insn;
17883
17884	if (insn[`0`].src_reg == BPF_PSEUDO_BTF_ID) {
17885	aux = &env->insn_aux_data[i];
17886	err = check_pseudo_btf_id(env, insn, aux);
17887	if (err)
17888	return err;
17889	goto next_insn;
17890	}
17891
17892	if (insn[`0`].src_reg == BPF_PSEUDO_FUNC) {
17893	aux = &env->insn_aux_data[i];
17894	aux->ptr_type = PTR_TO_FUNC;
17895	goto next_insn;
17896	}
17897
17898	/ In final convert_pseudo_ld_imm64() step, this is*
17899	* converted into regular 64-bit imm load insn.
17900	*/
17901	switch (insn[`0`].src_reg) {
17902	case BPF_PSEUDO_MAP_VALUE:
17903	case BPF_PSEUDO_MAP_IDX_VALUE:
17904	break;
17905	case BPF_PSEUDO_MAP_FD:
17906	case BPF_PSEUDO_MAP_IDX:
17907	if (insn[`1`].imm == `0`)
17908	break;
17909	fallthrough;
17910	default:
17911	verbose(private_data: env, fmt: "unrecognized bpf_ld_imm64 insn\n");
17912	return -EINVAL;
17913	}
17914
17915	switch (insn[`0`].src_reg) {
17916	case BPF_PSEUDO_MAP_IDX_VALUE:
17917	case BPF_PSEUDO_MAP_IDX:
17918	if (bpfptr_is_null(bpfptr: env->fd_array)) {
17919	verbose(private_data: env, fmt: "fd_idx without fd_array is invalid\n");
17920	return -EPROTO;
17921	}
17922	if (copy_from_bpfptr_offset(dst: &fd, src: env->fd_array,
17923	offset: insn[`0`].imm * sizeof(fd),
17924	size: sizeof(fd)))
17925	return -EFAULT;
17926	break;
17927	default:
17928	fd = insn[`0`].imm;
17929	break;
17930	}
17931
17932	f = fdget(fd);
17933	map = __bpf_map_get(f);
17934	if (IS_ERR(ptr: map)) {
17935	verbose(private_data: env, fmt: "fd %d is not pointing to valid bpf_map\n",
17936	insn[`0`].imm);
17937	return PTR_ERR(ptr: map);
17938	}
17939
17940	err = check_map_prog_compatibility(env, map, prog: env->prog);
17941	if (err) {
17942	fdput(fd: f);
17943	return err;
17944	}
17945
17946	aux = &env->insn_aux_data[i];
17947	if (insn[`0`].src_reg == BPF_PSEUDO_MAP_FD \|\|
17948	insn[`0`].src_reg == BPF_PSEUDO_MAP_IDX) {
17949	addr = (unsigned long)map;
17950	} else {
17951	u32 off = insn[`1`].imm;
17952
17953	if (off >= BPF_MAX_VAR_OFF) {
17954	verbose(private_data: env, fmt: "direct value offset of %u is not allowed\n", off);
17955	fdput(fd: f);
17956	return -EINVAL;
17957	}
17958
17959	if (!map->ops->map_direct_value_addr) {
17960	verbose(private_data: env, fmt: "no direct value access support for this map type\n");
17961	fdput(fd: f);
17962	return -EINVAL;
17963	}
17964
17965	err = map->ops->map_direct_value_addr(map, &addr, off);
17966	if (err) {
17967	verbose(private_data: env, fmt: "invalid access to map value pointer, value_size=%u off=%u\n",
17968	map->value_size, off);
17969	fdput(fd: f);
17970	return err;
17971	}
17972
17973	aux->map_off = off;
17974	addr += off;
17975	}
17976
17977	insn[`0`].imm = (u32)addr;
17978	insn[`1`].imm = addr >> `32`;
17979
17980	/ check whether we recorded this map already /
17981	for (j = `0`; j < env->used_map_cnt; j++) {
17982	if (env->used_maps[j] == map) {
17983	aux->map_index = j;
17984	fdput(fd: f);
17985	goto next_insn;
17986	}
17987	}
17988
17989	if (env->used_map_cnt >= MAX_USED_MAPS) {
17990	fdput(fd: f);
17991	return -E2BIG;
17992	}
17993
17994	/ hold the map. If the program is rejected by verifier,*
17995	* the map will be released by release_maps() or it
17996	* will be used by the valid program until it's unloaded
17997	* and all maps are released in free_used_maps()
17998	*/
17999	bpf_map_inc(map);
18000
18001	aux->map_index = env->used_map_cnt;
18002	env->used_maps[env->used_map_cnt++] = map;
18003
18004	if (bpf_map_is_cgroup_storage(map) &&
18005	bpf_cgroup_storage_assign(aux: env->prog->aux, map)) {
18006	verbose(private_data: env, fmt: "only one cgroup storage of each type is allowed\n");
18007	fdput(fd: f);
18008	return -EBUSY;
18009	}
18010
18011	fdput(fd: f);
18012	next_insn:
18013	insn++;
18014	i++;
18015	continue;
18016	}
18017
18018	/ Basic sanity check before we invest more work here. /
18019	if (!bpf_opcode_in_insntable(code: insn->code)) {
18020	verbose(private_data: env, fmt: "unknown opcode %02x\n", insn->code);
18021	return -EINVAL;
18022	}
18023	}
18024
18025	/ now all pseudo BPF_LD_IMM64 instructions load valid*
18026	* 'struct bpf_map *' into a register instead of user map_fd.
18027	* These pointers will be used later by verifier to validate map access.
18028	*/
18029	return `0`;
18030	}
18031
18032	/ drop refcnt of maps used by the rejected program /
18033	static void release_maps(struct bpf_verifier_env *env)
18034	{
18035	__bpf_free_used_maps(aux: env->prog->aux, used_maps: env->used_maps,
18036	len: env->used_map_cnt);
18037	}
18038
18039	/ drop refcnt of maps used by the rejected program /
18040	static void release_btfs(struct bpf_verifier_env *env)
18041	{
18042	__bpf_free_used_btfs(aux: env->prog->aux, used_btfs: env->used_btfs,
18043	len: env->used_btf_cnt);
18044	}
18045
18046	/ convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 /
18047	static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
18048	{
18049	struct bpf_insn *insn = env->prog->insnsi;
18050	int insn_cnt = env->prog->len;
18051	int i;
18052
18053	for (i = `0`; i < insn_cnt; i++, insn++) {
18054	if (insn->code != (BPF_LD \| BPF_IMM \| BPF_DW))
18055	continue;
18056	if (insn->src_reg == BPF_PSEUDO_FUNC)
18057	continue;
18058	insn->src_reg = `0`;
18059	}
18060	}
18061
18062	/ single env->prog->insni[off] instruction was replaced with the range*
18063	* insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
18064	* [0, off) and [off, end) to new locations, so the patched range stays zero
18065	*/
18066	static void adjust_insn_aux_data(struct bpf_verifier_env *env,
18067	struct bpf_insn_aux_data *new_data,
18068	struct bpf_prog *new_prog, u32 off, u32 cnt)
18069	{
18070	struct bpf_insn_aux_data *old_data = env->insn_aux_data;
18071	struct bpf_insn *insn = new_prog->insnsi;
18072	u32 old_seen = old_data[off].seen;
18073	u32 prog_len;
18074	int i;
18075
18076	/ aux info at OFF always needs adjustment, no matter fast path*
18077	* (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
18078	* original insn at old prog.
18079	*/
18080	old_data[off].zext_dst = insn_has_def32(env, insn: insn + off + cnt - `1`);
18081
18082	if (cnt == `1`)
18083	return;
18084	prog_len = new_prog->len;
18085
18086	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
18087	memcpy(new_data + off + cnt - `1`, old_data + off,
18088	sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + `1`));
18089	for (i = off; i < off + cnt - `1`; i++) {
18090	/ Expand insni[off]'s seen count to the patched range. /
18091	new_data[i].seen = old_seen;
18092	new_data[i].zext_dst = insn_has_def32(env, insn: insn + i);
18093	}
18094	env->insn_aux_data = new_data;
18095	vfree(addr: old_data);
18096	}
18097
18098	static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
18099	{
18100	int i;
18101
18102	if (len == `1`)
18103	return;
18104	/ NOTE: fake 'exit' subprog should be updated as well. /
18105	for (i = `0`; i <= env->subprog_cnt; i++) {
18106	if (env->subprog_info[i].start <= off)
18107	continue;
18108	env->subprog_info[i].start += len - `1`;
18109	}
18110	}
18111
18112	static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
18113	{
18114	struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
18115	int i, sz = prog->aux->size_poke_tab;
18116	struct bpf_jit_poke_descriptor *desc;
18117
18118	for (i = `0`; i < sz; i++) {
18119	desc = &tab[i];
18120	if (desc->insn_idx <= off)
18121	continue;
18122	desc->insn_idx += len - `1`;
18123	}
18124	}
18125
18126	static struct bpf_prog bpf_patch_insn_data(struct* bpf_verifier_env *env, u32 off,
18127	const struct bpf_insn *patch, u32 len)
18128	{
18129	struct bpf_prog *new_prog;
18130	struct bpf_insn_aux_data *new_data = NULL;
18131
18132	if (len > `1`) {
18133	new_data = vzalloc(array_size(env->prog->len + len - `1`,
18134	sizeof(struct bpf_insn_aux_data)));
18135	if (!new_data)
18136	return NULL;
18137	}
18138
18139	new_prog = bpf_patch_insn_single(prog: env->prog, off, patch, len);
18140	if (IS_ERR(ptr: new_prog)) {
18141	if (PTR_ERR(ptr: new_prog) == -ERANGE)
18142	verbose(private_data: env,
18143	fmt: "insn %d cannot be patched due to 16-bit range\n",
18144	env->insn_aux_data[off].orig_idx);
18145	vfree(addr: new_data);
18146	return NULL;
18147	}
18148	adjust_insn_aux_data(env, new_data, new_prog, off, cnt: len);
18149	adjust_subprog_starts(env, off, len);
18150	adjust_poke_descs(prog: new_prog, off, len);
18151	return new_prog;
18152	}
18153
18154	static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
18155	u32 off, u32 cnt)
18156	{
18157	int i, j;
18158
18159	/ find first prog starting at or after off (first to remove) /
18160	for (i = `0`; i < env->subprog_cnt; i++)
18161	if (env->subprog_info[i].start >= off)
18162	break;
18163	/ find first prog starting at or after off + cnt (first to stay) /
18164	for (j = i; j < env->subprog_cnt; j++)
18165	if (env->subprog_info[j].start >= off + cnt)
18166	break;
18167	/ if j doesn't start exactly at off + cnt, we are just removing*
18168	* the front of previous prog
18169	*/
18170	if (env->subprog_info[j].start != off + cnt)
18171	j--;
18172
18173	if (j > i) {
18174	struct bpf_prog_aux *aux = env->prog->aux;
18175	int move;
18176
18177	/ move fake 'exit' subprog as well /
18178	move = env->subprog_cnt + `1` - j;
18179
18180	memmove(env->subprog_info + i,
18181	env->subprog_info + j,
18182	sizeof(env->subprog_info) move);
18183	env->subprog_cnt -= j - i;
18184
18185	/ remove func_info /
18186	if (aux->func_info) {
18187	move = aux->func_info_cnt - j;
18188
18189	memmove(aux->func_info + i,
18190	aux->func_info + j,
18191	sizeof(aux->func_info) move);
18192	aux->func_info_cnt -= j - i;
18193	/ func_info->insn_off is set after all code rewrites,*
18194	* in adjust_btf_func() - no need to adjust
18195	*/
18196	}
18197	} else {
18198	/ convert i from "first prog to remove" to "first to adjust" /
18199	if (env->subprog_info[i].start == off)
18200	i++;
18201	}
18202
18203	/ update fake 'exit' subprog as well /
18204	for (; i <= env->subprog_cnt; i++)
18205	env->subprog_info[i].start -= cnt;
18206
18207	return `0`;
18208	}
18209
18210	static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
18211	u32 cnt)
18212	{
18213	struct bpf_prog *prog = env->prog;
18214	u32 i, l_off, l_cnt, nr_linfo;
18215	struct bpf_line_info *linfo;
18216
18217	nr_linfo = prog->aux->nr_linfo;
18218	if (!nr_linfo)
18219	return `0`;
18220
18221	linfo = prog->aux->linfo;
18222
18223	/ find first line info to remove, count lines to be removed /
18224	for (i = `0`; i < nr_linfo; i++)
18225	if (linfo[i].insn_off >= off)
18226	break;
18227
18228	l_off = i;
18229	l_cnt = `0`;
18230	for (; i < nr_linfo; i++)
18231	if (linfo[i].insn_off < off + cnt)
18232	l_cnt++;
18233	else
18234	break;
18235
18236	/ First live insn doesn't match first live linfo, it needs to "inherit"*
18237	* last removed linfo. prog is already modified, so prog->len == off
18238	* means no live instructions after (tail of the program was removed).
18239	*/
18240	if (prog->len != off && l_cnt &&
18241	(i == nr_linfo \|\| linfo[i].insn_off != off + cnt)) {
18242	l_cnt--;
18243	linfo[--i].insn_off = off + cnt;
18244	}
18245
18246	/ remove the line info which refer to the removed instructions /
18247	if (l_cnt) {
18248	memmove(linfo + l_off, linfo + i,
18249	sizeof(linfo) (nr_linfo - i));
18250
18251	prog->aux->nr_linfo -= l_cnt;
18252	nr_linfo = prog->aux->nr_linfo;
18253	}
18254
18255	/ pull all linfo[i].insn_off >= off + cnt in by cnt /
18256	for (i = l_off; i < nr_linfo; i++)
18257	linfo[i].insn_off -= cnt;
18258
18259	/ fix up all subprogs (incl. 'exit') which start >= off /
18260	for (i = `0`; i <= env->subprog_cnt; i++)
18261	if (env->subprog_info[i].linfo_idx > l_off) {
18262	/ program may have started in the removed region but*
18263	* may not be fully removed
18264	*/
18265	if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
18266	env->subprog_info[i].linfo_idx -= l_cnt;
18267	else
18268	env->subprog_info[i].linfo_idx = l_off;
18269	}
18270
18271	return `0`;
18272	}
18273
18274	static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
18275	{
18276	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
18277	unsigned int orig_prog_len = env->prog->len;
18278	int err;
18279
18280	if (bpf_prog_is_offloaded(aux: env->prog->aux))
18281	bpf_prog_offload_remove_insns(env, off, cnt);
18282
18283	err = bpf_remove_insns(prog: env->prog, off, cnt);
18284	if (err)
18285	return err;
18286
18287	err = adjust_subprog_starts_after_remove(env, off, cnt);
18288	if (err)
18289	return err;
18290
18291	err = bpf_adj_linfo_after_remove(env, off, cnt);
18292	if (err)
18293	return err;
18294
18295	memmove(aux_data + off, aux_data + off + cnt,
18296	sizeof(aux_data) (orig_prog_len - off - cnt));
18297
18298	return `0`;
18299	}
18300
18301	/ The verifier does more data flow analysis than llvm and will not*
18302	* explore branches that are dead at run time. Malicious programs can
18303	* have dead code too. Therefore replace all dead at-run-time code
18304	* with 'ja -1'.
18305	*
18306	* Just nops are not optimal, e.g. if they would sit at the end of the
18307	* program and through another bug we would manage to jump there, then
18308	* we'd execute beyond program memory otherwise. Returning exception
18309	* code also wouldn't work since we can have subprogs where the dead
18310	* code could be located.
18311	*/
18312	static void sanitize_dead_code(struct bpf_verifier_env *env)
18313	{
18314	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
18315	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, `0`, `0`, -`1`);
18316	struct bpf_insn *insn = env->prog->insnsi;
18317	const int insn_cnt = env->prog->len;
18318	int i;
18319
18320	for (i = `0`; i < insn_cnt; i++) {
18321	if (aux_data[i].seen)
18322	continue;
18323	memcpy(insn + i, &trap, sizeof(trap));
18324	aux_data[i].zext_dst = false;
18325	}
18326	}
18327
18328	static bool insn_is_cond_jump(u8 code)
18329	{
18330	u8 op;
18331
18332	op = BPF_OP(code);
18333	if (BPF_CLASS(code) == BPF_JMP32)
18334	return op != BPF_JA;
18335
18336	if (BPF_CLASS(code) != BPF_JMP)
18337	return false;
18338
18339	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
18340	}
18341
18342	static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
18343	{
18344	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
18345	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, `0`, `0`, `0`);
18346	struct bpf_insn *insn = env->prog->insnsi;
18347	const int insn_cnt = env->prog->len;
18348	int i;
18349
18350	for (i = `0`; i < insn_cnt; i++, insn++) {
18351	if (!insn_is_cond_jump(code: insn->code))
18352	continue;
18353
18354	if (!aux_data[i + `1`].seen)
18355	ja.off = insn->off;
18356	else if (!aux_data[i + `1` + insn->off].seen)
18357	ja.off = `0`;
18358	else
18359	continue;
18360
18361	if (bpf_prog_is_offloaded(aux: env->prog->aux))
18362	bpf_prog_offload_replace_insn(env, off: i, insn: &ja);
18363
18364	memcpy(insn, &ja, sizeof(ja));
18365	}
18366	}
18367
18368	static int opt_remove_dead_code(struct bpf_verifier_env *env)
18369	{
18370	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
18371	int insn_cnt = env->prog->len;
18372	int i, err;
18373
18374	for (i = `0`; i < insn_cnt; i++) {
18375	int j;
18376
18377	j = `0`;
18378	while (i + j < insn_cnt && !aux_data[i + j].seen)
18379	j++;
18380	if (!j)
18381	continue;
18382
18383	err = verifier_remove_insns(env, off: i, cnt: j);
18384	if (err)
18385	return err;
18386	insn_cnt = env->prog->len;
18387	}
18388
18389	return `0`;
18390	}
18391
18392	static int opt_remove_nops(struct bpf_verifier_env *env)
18393	{
18394	const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, `0`, `0`, `0`);
18395	struct bpf_insn *insn = env->prog->insnsi;
18396	int insn_cnt = env->prog->len;
18397	int i, err;
18398
18399	for (i = `0`; i < insn_cnt; i++) {
18400	if (memcmp(p: &insn[i], q: &ja, size: sizeof(ja)))
18401	continue;
18402
18403	err = verifier_remove_insns(env, off: i, cnt: `1`);
18404	if (err)
18405	return err;
18406	insn_cnt--;
18407	i--;
18408	}
18409
18410	return `0`;
18411	}
18412
18413	static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
18414	const union bpf_attr *attr)
18415	{
18416	struct bpf_insn *patch, zext_patch[`2`], rnd_hi32_patch[`4`];
18417	struct bpf_insn_aux_data *aux = env->insn_aux_data;
18418	int i, patch_len, delta = `0`, len = env->prog->len;
18419	struct bpf_insn *insns = env->prog->insnsi;
18420	struct bpf_prog *new_prog;
18421	bool rnd_hi32;
18422
18423	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
18424	zext_patch[`1`] = BPF_ZEXT_REG(`0`);
18425	rnd_hi32_patch[`1`] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, `0`);
18426	rnd_hi32_patch[`2`] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, `32`);
18427	rnd_hi32_patch[`3`] = BPF_ALU64_REG(BPF_OR, `0`, BPF_REG_AX);
18428	for (i = `0`; i < len; i++) {
18429	int adj_idx = i + delta;
18430	struct bpf_insn insn;
18431	int load_reg;
18432
18433	insn = insns[adj_idx];
18434	load_reg = insn_def_regno(insn: &insn);
18435	if (!aux[adj_idx].zext_dst) {
18436	u8 code, class;
18437	u32 imm_rnd;
18438
18439	if (!rnd_hi32)
18440	continue;
18441
18442	code = insn.code;
18443	class = BPF_CLASS(code);
18444	if (load_reg == -`1`)
18445	continue;
18446
18447	/ NOTE: arg "reg" (the fourth one) is only used for*
18448	* BPF_STX + SRC_OP, so it is safe to pass NULL
18449	* here.
18450	*/
18451	if (is_reg64(env, insn: &insn, regno: load_reg, NULL, t: DST_OP)) {
18452	if (class == BPF_LD &&
18453	BPF_MODE(code) == BPF_IMM)
18454	i++;
18455	continue;
18456	}
18457
18458	/ ctx load could be transformed into wider load. /
18459	if (class == BPF_LDX &&
18460	aux[adj_idx].ptr_type == PTR_TO_CTX)
18461	continue;
18462
18463	imm_rnd = get_random_u32();
18464	rnd_hi32_patch[`0`] = insn;
18465	rnd_hi32_patch[`1`].imm = imm_rnd;
18466	rnd_hi32_patch[`3`].dst_reg = load_reg;
18467	patch = rnd_hi32_patch;
18468	patch_len = `4`;
18469	goto apply_patch_buffer;
18470	}
18471
18472	/ Add in an zero-extend instruction if a) the JIT has requested*
18473	* it or b) it's a CMPXCHG.
18474	*
18475	* The latter is because: BPF_CMPXCHG always loads a value into
18476	* R0, therefore always zero-extends. However some archs'
18477	* equivalent instruction only does this load when the
18478	* comparison is successful. This detail of CMPXCHG is
18479	* orthogonal to the general zero-extension behaviour of the
18480	* CPU, so it's treated independently of bpf_jit_needs_zext.
18481	*/
18482	if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(insn: &insn))
18483	continue;
18484
18485	/ Zero-extension is done by the caller. /
18486	if (bpf_pseudo_kfunc_call(insn: &insn))
18487	continue;
18488
18489	if (WARN_ON(load_reg == -`1`)) {
18490	verbose(private_data: env, fmt: "verifier bug. zext_dst is set, but no reg is defined\n");
18491	return -EFAULT;
18492	}
18493
18494	zext_patch[`0`] = insn;
18495	zext_patch[`1`].dst_reg = load_reg;
18496	zext_patch[`1`].src_reg = load_reg;
18497	patch = zext_patch;
18498	patch_len = `2`;
18499	apply_patch_buffer:
18500	new_prog = bpf_patch_insn_data(env, off: adj_idx, patch, len: patch_len);
18501	if (!new_prog)
18502	return -ENOMEM;
18503	env->prog = new_prog;
18504	insns = new_prog->insnsi;
18505	aux = env->insn_aux_data;
18506	delta += patch_len - `1`;
18507	}
18508
18509	return `0`;
18510	}
18511
18512	/ convert load instructions that access fields of a context type into a*
18513	* sequence of instructions that access fields of the underlying structure:
18514	* struct __sk_buff -> struct sk_buff
18515	* struct bpf_sock_ops -> struct sock
18516	*/
18517	static int convert_ctx_accesses(struct bpf_verifier_env *env)
18518	{
18519	const struct bpf_verifier_ops *ops = env->ops;
18520	int i, cnt, size, ctx_field_size, delta = `0`;
18521	const int insn_cnt = env->prog->len;
18522	struct bpf_insn insn_buf[`16`], *insn;
18523	u32 target_size, size_default, off;
18524	struct bpf_prog *new_prog;
18525	enum bpf_access_type type;
18526	bool is_narrower_load;
18527
18528	if (ops->gen_prologue \|\| env->seen_direct_write) {
18529	if (!ops->gen_prologue) {
18530	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
18531	return -EINVAL;
18532	}
18533	cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
18534	env->prog);
18535	if (cnt >= ARRAY_SIZE(insn_buf)) {
18536	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
18537	return -EINVAL;
18538	} else if (cnt) {
18539	new_prog = bpf_patch_insn_data(env, off: `0`, patch: insn_buf, len: cnt);
18540	if (!new_prog)
18541	return -ENOMEM;
18542
18543	env->prog = new_prog;
18544	delta += cnt - `1`;
18545	}
18546	}
18547
18548	if (bpf_prog_is_offloaded(aux: env->prog->aux))
18549	return `0`;
18550
18551	insn = env->prog->insnsi + delta;
18552
18553	for (i = `0`; i < insn_cnt; i++, insn++) {
18554	bpf_convert_ctx_access_t convert_ctx_access;
18555	u8 mode;
18556
18557	if (insn->code == (BPF_LDX \| BPF_MEM \| BPF_B) \|\|
18558	insn->code == (BPF_LDX \| BPF_MEM \| BPF_H) \|\|
18559	insn->code == (BPF_LDX \| BPF_MEM \| BPF_W) \|\|
18560	insn->code == (BPF_LDX \| BPF_MEM \| BPF_DW) \|\|
18561	insn->code == (BPF_LDX \| BPF_MEMSX \| BPF_B) \|\|
18562	insn->code == (BPF_LDX \| BPF_MEMSX \| BPF_H) \|\|
18563	insn->code == (BPF_LDX \| BPF_MEMSX \| BPF_W)) {
18564	type = BPF_READ;
18565	} else if (insn->code == (BPF_STX \| BPF_MEM \| BPF_B) \|\|
18566	insn->code == (BPF_STX \| BPF_MEM \| BPF_H) \|\|
18567	insn->code == (BPF_STX \| BPF_MEM \| BPF_W) \|\|
18568	insn->code == (BPF_STX \| BPF_MEM \| BPF_DW) \|\|
18569	insn->code == (BPF_ST \| BPF_MEM \| BPF_B) \|\|
18570	insn->code == (BPF_ST \| BPF_MEM \| BPF_H) \|\|
18571	insn->code == (BPF_ST \| BPF_MEM \| BPF_W) \|\|
18572	insn->code == (BPF_ST \| BPF_MEM \| BPF_DW)) {
18573	type = BPF_WRITE;
18574	} else {
18575	continue;
18576	}
18577
18578	if (type == BPF_WRITE &&
18579	env->insn_aux_data[i + delta].sanitize_stack_spill) {
18580	struct bpf_insn patch[] = {
18581	*insn,
18582	BPF_ST_NOSPEC(),
18583	};
18584
18585	cnt = ARRAY_SIZE(patch);
18586	new_prog = bpf_patch_insn_data(env, off: i + delta, patch, len: cnt);
18587	if (!new_prog)
18588	return -ENOMEM;
18589
18590	delta += cnt - `1`;
18591	env->prog = new_prog;
18592	insn = new_prog->insnsi + i + delta;
18593	continue;
18594	}
18595
18596	switch ((int)env->insn_aux_data[i + delta].ptr_type) {
18597	case PTR_TO_CTX:
18598	if (!ops->convert_ctx_access)
18599	continue;
18600	convert_ctx_access = ops->convert_ctx_access;
18601	break;
18602	case PTR_TO_SOCKET:
18603	case PTR_TO_SOCK_COMMON:
18604	convert_ctx_access = bpf_sock_convert_ctx_access;
18605	break;
18606	case PTR_TO_TCP_SOCK:
18607	convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
18608	break;
18609	case PTR_TO_XDP_SOCK:
18610	convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
18611	break;
18612	case PTR_TO_BTF_ID:
18613	case PTR_TO_BTF_ID \| PTR_UNTRUSTED:
18614	/ PTR_TO_BTF_ID \| MEM_ALLOC always has a valid lifetime, unlike*
18615	* PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
18616	* be said once it is marked PTR_UNTRUSTED, hence we must handle
18617	* any faults for loads into such types. BPF_WRITE is disallowed
18618	* for this case.
18619	*/
18620	case PTR_TO_BTF_ID \| MEM_ALLOC \| PTR_UNTRUSTED:
18621	if (type == BPF_READ) {
18622	if (BPF_MODE(insn->code) == BPF_MEM)
18623	insn->code = BPF_LDX \| BPF_PROBE_MEM \|
18624	BPF_SIZE((insn)->code);
18625	else
18626	insn->code = BPF_LDX \| BPF_PROBE_MEMSX \|
18627	BPF_SIZE((insn)->code);
18628	env->prog->aux->num_exentries++;
18629	}
18630	continue;
18631	default:
18632	continue;
18633	}
18634
18635	ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
18636	size = BPF_LDST_BYTES(insn);
18637	mode = BPF_MODE(insn->code);
18638
18639	/ If the read access is a narrower load of the field,*
18640	* convert to a 4/8-byte load, to minimum program type specific
18641	* convert_ctx_access changes. If conversion is successful,
18642	* we will apply proper mask to the result.
18643	*/
18644	is_narrower_load = size < ctx_field_size;
18645	size_default = bpf_ctx_off_adjust_machine(size: ctx_field_size);
18646	off = insn->off;
18647	if (is_narrower_load) {
18648	u8 size_code;
18649
18650	if (type == BPF_WRITE) {
18651	verbose(private_data: env, fmt: "bpf verifier narrow ctx access misconfigured\n");
18652	return -EINVAL;
18653	}
18654
18655	size_code = BPF_H;
18656	if (ctx_field_size == `4`)
18657	size_code = BPF_W;
18658	else if (ctx_field_size == `8`)
18659	size_code = BPF_DW;
18660
18661	insn->off = off & ~(size_default - `1`);
18662	insn->code = BPF_LDX \| BPF_MEM \| size_code;
18663	}
18664
18665	target_size = `0`;
18666	cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
18667	&target_size);
18668	if (cnt == `0` \|\| cnt >= ARRAY_SIZE(insn_buf) \|\|
18669	(ctx_field_size && !target_size)) {
18670	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
18671	return -EINVAL;
18672	}
18673
18674	if (is_narrower_load && size < target_size) {
18675	u8 shift = bpf_ctx_narrow_access_offset(
18676	off, size, size_default) * `8`;
18677	if (shift && cnt + `1` >= ARRAY_SIZE(insn_buf)) {
18678	verbose(private_data: env, fmt: "bpf verifier narrow ctx load misconfigured\n");
18679	return -EINVAL;
18680	}
18681	if (ctx_field_size <= `4`) {
18682	if (shift)
18683	insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
18684	insn->dst_reg,
18685	shift);
18686	insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
18687	(`1` << size * `8`) - `1`);
18688	} else {
18689	if (shift)
18690	insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
18691	insn->dst_reg,
18692	shift);
18693	insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
18694	(`1ULL` << size * `8`) - `1`);
18695	}
18696	}
18697	if (mode == BPF_MEMSX)
18698	insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 \| BPF_MOV \| BPF_X,
18699	insn->dst_reg, insn->dst_reg,
18700	size * `8`, `0`);
18701
18702	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
18703	if (!new_prog)
18704	return -ENOMEM;
18705
18706	delta += cnt - `1`;
18707
18708	/ keep walking new program and skip insns we just inserted /
18709	env->prog = new_prog;
18710	insn = new_prog->insnsi + i + delta;
18711	}
18712
18713	return `0`;
18714	}
18715
18716	static int jit_subprogs(struct bpf_verifier_env *env)
18717	{
18718	struct bpf_prog prog = env->prog, func, tmp;
18719	int i, j, subprog_start, subprog_end = `0`, len, subprog;
18720	struct bpf_map *map_ptr;
18721	struct bpf_insn *insn;
18722	void *old_bpf_func;
18723	int err, num_exentries;
18724
18725	if (env->subprog_cnt <= `1`)
18726	return `0`;
18727
18728	for (i = `0`, insn = prog->insnsi; i < prog->len; i++, insn++) {
18729	if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
18730	continue;
18731
18732	/ Upon error here we cannot fall back to interpreter but*
18733	* need a hard reject of the program. Thus -EFAULT is
18734	* propagated in any case.
18735	*/
18736	subprog = find_subprog(env, off: i + insn->imm + `1`);
18737	if (subprog < `0`) {
18738	WARN_ONCE(`1`, "verifier bug. No program starts at insn %d\n",
18739	i + insn->imm + `1`);
18740	return -EFAULT;
18741	}
18742	/ temporarily remember subprog id inside insn instead of*
18743	* aux_data, since next loop will split up all insns into funcs
18744	*/
18745	insn->off = subprog;
18746	/ remember original imm in case JIT fails and fallback*
18747	* to interpreter will be needed
18748	*/
18749	env->insn_aux_data[i].call_imm = insn->imm;
18750	/ point imm to __bpf_call_base+1 from JITs point of view /
18751	insn->imm = `1`;
18752	if (bpf_pseudo_func(insn))
18753	/ jit (e.g. x86_64) may emit fewer instructions*
18754	* if it learns a u32 imm is the same as a u64 imm.
18755	* Force a non zero here.
18756	*/
18757	insn[`1`].imm = `1`;
18758	}
18759
18760	err = bpf_prog_alloc_jited_linfo(prog);
18761	if (err)
18762	goto out_undo_insn;
18763
18764	err = -ENOMEM;
18765	func = kcalloc(n: env->subprog_cnt, size: sizeof(prog), GFP_KERNEL);
18766	if (!func)
18767	goto out_undo_insn;
18768
18769	for (i = `0`; i < env->subprog_cnt; i++) {
18770	subprog_start = subprog_end;
18771	subprog_end = env->subprog_info[i + `1`].start;
18772
18773	len = subprog_end - subprog_start;
18774	/ bpf_prog_run() doesn't call subprogs directly,*
18775	* hence main prog stats include the runtime of subprogs.
18776	* subprogs don't have IDs and not reachable via prog_get_next_id
18777	* func[i]->stats will never be accessed and stays NULL
18778	*/
18779	func[i] = bpf_prog_alloc_no_stats(size: bpf_prog_size(proglen: len), GFP_USER);
18780	if (!func[i])
18781	goto out_free;
18782	memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
18783	len * sizeof(struct bpf_insn));
18784	func[i]->type = prog->type;
18785	func[i]->len = len;
18786	if (bpf_prog_calc_tag(fp: func[i]))
18787	goto out_free;
18788	func[i]->is_func = `1`;
18789	func[i]->aux->func_idx = i;
18790	/ Below members will be freed only at prog->aux /
18791	func[i]->aux->btf = prog->aux->btf;
18792	func[i]->aux->func_info = prog->aux->func_info;
18793	func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
18794	func[i]->aux->poke_tab = prog->aux->poke_tab;
18795	func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
18796
18797	for (j = `0`; j < prog->aux->size_poke_tab; j++) {
18798	struct bpf_jit_poke_descriptor *poke;
18799
18800	poke = &prog->aux->poke_tab[j];
18801	if (poke->insn_idx < subprog_end &&
18802	poke->insn_idx >= subprog_start)
18803	poke->aux = func[i]->aux;
18804	}
18805
18806	func[i]->aux->name[`0`] = `'F'`;
18807	func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
18808	func[i]->jit_requested = `1`;
18809	func[i]->blinding_requested = prog->blinding_requested;
18810	func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
18811	func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
18812	func[i]->aux->linfo = prog->aux->linfo;
18813	func[i]->aux->nr_linfo = prog->aux->nr_linfo;
18814	func[i]->aux->jited_linfo = prog->aux->jited_linfo;
18815	func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
18816	num_exentries = `0`;
18817	insn = func[i]->insnsi;
18818	for (j = `0`; j < func[i]->len; j++, insn++) {
18819	if (BPF_CLASS(insn->code) == BPF_LDX &&
18820	(BPF_MODE(insn->code) == BPF_PROBE_MEM \|\|
18821	BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
18822	num_exentries++;
18823	}
18824	func[i]->aux->num_exentries = num_exentries;
18825	func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
18826	func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
18827	if (!i)
18828	func[i]->aux->exception_boundary = env->seen_exception;
18829	func[i] = bpf_int_jit_compile(prog: func[i]);
18830	if (!func[i]->jited) {
18831	err = -ENOTSUPP;
18832	goto out_free;
18833	}
18834	cond_resched();
18835	}
18836
18837	/ at this point all bpf functions were successfully JITed*
18838	* now populate all bpf_calls with correct addresses and
18839	* run last pass of JIT
18840	*/
18841	for (i = `0`; i < env->subprog_cnt; i++) {
18842	insn = func[i]->insnsi;
18843	for (j = `0`; j < func[i]->len; j++, insn++) {
18844	if (bpf_pseudo_func(insn)) {
18845	subprog = insn->off;
18846	insn[`0`].imm = (u32)(long)func[subprog]->bpf_func;
18847	insn[`1`].imm = ((u64)(long)func[subprog]->bpf_func) >> `32`;
18848	continue;
18849	}
18850	if (!bpf_pseudo_call(insn))
18851	continue;
18852	subprog = insn->off;
18853	insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
18854	}
18855
18856	/ we use the aux data to keep a list of the start addresses*
18857	* of the JITed images for each function in the program
18858	*
18859	* for some architectures, such as powerpc64, the imm field
18860	* might not be large enough to hold the offset of the start
18861	* address of the callee's JITed image from __bpf_call_base
18862	*
18863	* in such cases, we can lookup the start address of a callee
18864	* by using its subprog id, available from the off field of
18865	* the call instruction, as an index for this list
18866	*/
18867	func[i]->aux->func = func;
18868	func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
18869	func[i]->aux->real_func_cnt = env->subprog_cnt;
18870	}
18871	for (i = `0`; i < env->subprog_cnt; i++) {
18872	old_bpf_func = func[i]->bpf_func;
18873	tmp = bpf_int_jit_compile(prog: func[i]);
18874	if (tmp != func[i] \|\| func[i]->bpf_func != old_bpf_func) {
18875	verbose(private_data: env, fmt: "JIT doesn't support bpf-to-bpf calls\n");
18876	err = -ENOTSUPP;
18877	goto out_free;
18878	}
18879	cond_resched();
18880	}
18881
18882	/ finally lock prog and jit images for all functions and*
18883	* populate kallsysm. Begin at the first subprogram, since
18884	* bpf_prog_load will add the kallsyms for the main program.
18885	*/
18886	for (i = `1`; i < env->subprog_cnt; i++) {
18887	bpf_prog_lock_ro(fp: func[i]);
18888	bpf_prog_kallsyms_add(fp: func[i]);
18889	}
18890
18891	/ Last step: make now unused interpreter insns from main*
18892	* prog consistent for later dump requests, so they can
18893	* later look the same as if they were interpreted only.
18894	*/
18895	for (i = `0`, insn = prog->insnsi; i < prog->len; i++, insn++) {
18896	if (bpf_pseudo_func(insn)) {
18897	insn[`0`].imm = env->insn_aux_data[i].call_imm;
18898	insn[`1`].imm = insn->off;
18899	insn->off = `0`;
18900	continue;
18901	}
18902	if (!bpf_pseudo_call(insn))
18903	continue;
18904	insn->off = env->insn_aux_data[i].call_imm;
18905	subprog = find_subprog(env, off: i + insn->off + `1`);
18906	insn->imm = subprog;
18907	}
18908
18909	prog->jited = `1`;
18910	prog->bpf_func = func[`0`]->bpf_func;
18911	prog->jited_len = func[`0`]->jited_len;
18912	prog->aux->extable = func[`0`]->aux->extable;
18913	prog->aux->num_exentries = func[`0`]->aux->num_exentries;
18914	prog->aux->func = func;
18915	prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
18916	prog->aux->real_func_cnt = env->subprog_cnt;
18917	prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func;
18918	prog->aux->exception_boundary = func[`0`]->aux->exception_boundary;
18919	bpf_prog_jit_attempt_done(prog);
18920	return `0`;
18921	out_free:
18922	/ We failed JIT'ing, so at this point we need to unregister poke*
18923	* descriptors from subprogs, so that kernel is not attempting to
18924	* patch it anymore as we're freeing the subprog JIT memory.
18925	*/
18926	for (i = `0`; i < prog->aux->size_poke_tab; i++) {
18927	map_ptr = prog->aux->poke_tab[i].tail_call.map;
18928	map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
18929	}
18930	/ At this point we're guaranteed that poke descriptors are not*
18931	* live anymore. We can just unlink its descriptor table as it's
18932	* released with the main prog.
18933	*/
18934	for (i = `0`; i < env->subprog_cnt; i++) {
18935	if (!func[i])
18936	continue;
18937	func[i]->aux->poke_tab = NULL;
18938	bpf_jit_free(fp: func[i]);
18939	}
18940	kfree(objp: func);
18941	out_undo_insn:
18942	/ cleanup main prog to be interpreted /
18943	prog->jit_requested = `0`;
18944	prog->blinding_requested = `0`;
18945	for (i = `0`, insn = prog->insnsi; i < prog->len; i++, insn++) {
18946	if (!bpf_pseudo_call(insn))
18947	continue;
18948	insn->off = `0`;
18949	insn->imm = env->insn_aux_data[i].call_imm;
18950	}
18951	bpf_prog_jit_attempt_done(prog);
18952	return err;
18953	}
18954
18955	static int fixup_call_args(struct bpf_verifier_env *env)
18956	{
18957	#ifndef CONFIG_BPF_JIT_ALWAYS_ON
18958	struct bpf_prog *prog = env->prog;
18959	struct bpf_insn *insn = prog->insnsi;
18960	bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
18961	int i, depth;
18962	#endif
18963	int err = `0`;
18964
18965	if (env->prog->jit_requested &&
18966	!bpf_prog_is_offloaded(aux: env->prog->aux)) {
18967	err = jit_subprogs(env);
18968	if (err == `0`)
18969	return `0`;
18970	if (err == -EFAULT)
18971	return err;
18972	}
18973	#ifndef CONFIG_BPF_JIT_ALWAYS_ON
18974	if (has_kfunc_call) {
18975	verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
18976	return -EINVAL;
18977	}
18978	if (env->subprog_cnt > `1` && env->prog->aux->tail_call_reachable) {
18979	/ When JIT fails the progs with bpf2bpf calls and tail_calls*
18980	* have to be rejected, since interpreter doesn't support them yet.
18981	*/
18982	verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
18983	return -EINVAL;
18984	}
18985	for (i = `0`; i < prog->len; i++, insn++) {
18986	if (bpf_pseudo_func(insn)) {
18987	/ When JIT fails the progs with callback calls*
18988	* have to be rejected, since interpreter doesn't support them yet.
18989	*/
18990	verbose(env, "callbacks are not allowed in non-JITed programs\n");
18991	return -EINVAL;
18992	}
18993
18994	if (!bpf_pseudo_call(insn))
18995	continue;
18996	depth = get_callee_stack_depth(env, insn, i);
18997	if (depth < `0`)
18998	return depth;
18999	bpf_patch_call_args(insn, depth);
19000	}
19001	err = `0`;
19002	#endif
19003	return err;
19004	}
19005
19006	/ replace a generic kfunc with a specialized version if necessary /
19007	static void specialize_kfunc(struct bpf_verifier_env *env,
19008	u32 func_id, u16 offset, unsigned long *addr)
19009	{
19010	struct bpf_prog *prog = env->prog;
19011	bool seen_direct_write;
19012	void *xdp_kfunc;
19013	bool is_rdonly;
19014
19015	if (bpf_dev_bound_kfunc_id(btf_id: func_id)) {
19016	xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
19017	if (xdp_kfunc) {
19018	addr = (unsigned* long)xdp_kfunc;
19019	return;
19020	}
19021	/ fallback to default kfunc when not supported by netdev /
19022	}
19023
19024	if (offset)
19025	return;
19026
19027	if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
19028	seen_direct_write = env->seen_direct_write;
19029	is_rdonly = !may_access_direct_pkt_data(env, NULL, t: BPF_WRITE);
19030
19031	if (is_rdonly)
19032	addr = (unsigned* long)bpf_dynptr_from_skb_rdonly;
19033
19034	/ restore env->seen_direct_write to its original value, since*
19035	* may_access_direct_pkt_data mutates it
19036	*/
19037	env->seen_direct_write = seen_direct_write;
19038	}
19039	}
19040
19041	static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
19042	u16 struct_meta_reg,
19043	u16 node_offset_reg,
19044	struct bpf_insn *insn,
19045	struct bpf_insn *insn_buf,
19046	int *cnt)
19047	{
19048	struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
19049	struct bpf_insn addr[`2`] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
19050
19051	insn_buf[`0`] = addr[`0`];
19052	insn_buf[`1`] = addr[`1`];
19053	insn_buf[`2`] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
19054	insn_buf[`3`] = *insn;
19055	*cnt = `4`;
19056	}
19057
19058	static int fixup_kfunc_call(struct bpf_verifier_env env, struct* bpf_insn *insn,
19059	struct bpf_insn insn_buf, int* insn_idx, int *cnt)
19060	{
19061	const struct bpf_kfunc_desc *desc;
19062
19063	if (!insn->imm) {
19064	verbose(private_data: env, fmt: "invalid kernel function call not eliminated in verifier pass\n");
19065	return -EINVAL;
19066	}
19067
19068	*cnt = `0`;
19069
19070	/ insn->imm has the btf func_id. Replace it with an offset relative to*
19071	* __bpf_call_base, unless the JIT needs to call functions that are
19072	* further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
19073	*/
19074	desc = find_kfunc_desc(prog: env->prog, func_id: insn->imm, offset: insn->off);
19075	if (!desc) {
19076	verbose(private_data: env, fmt: "verifier internal error: kernel function descriptor not found for func_id %u\n",
19077	insn->imm);
19078	return -EFAULT;
19079	}
19080
19081	if (!bpf_jit_supports_far_kfunc_call())
19082	insn->imm = BPF_CALL_IMM(desc->addr);
19083	if (insn->off)
19084	return `0`;
19085	if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] \|\|
19086	desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
19087	struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19088	struct bpf_insn addr[`2`] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19089	u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
19090
19091	if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && kptr_struct_meta) {
19092	verbose(private_data: env, fmt: "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
19093	insn_idx);
19094	return -EFAULT;
19095	}
19096
19097	insn_buf[`0`] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
19098	insn_buf[`1`] = addr[`0`];
19099	insn_buf[`2`] = addr[`1`];
19100	insn_buf[`3`] = *insn;
19101	*cnt = `4`;
19102	} else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] \|\|
19103	desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] \|\|
19104	desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
19105	struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19106	struct bpf_insn addr[`2`] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19107
19108	if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] && kptr_struct_meta) {
19109	verbose(private_data: env, fmt: "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
19110	insn_idx);
19111	return -EFAULT;
19112	}
19113
19114	if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
19115	!kptr_struct_meta) {
19116	verbose(private_data: env, fmt: "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
19117	insn_idx);
19118	return -EFAULT;
19119	}
19120
19121	insn_buf[`0`] = addr[`0`];
19122	insn_buf[`1`] = addr[`1`];
19123	insn_buf[`2`] = *insn;
19124	*cnt = `3`;
19125	} else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] \|\|
19126	desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] \|\|
19127	desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
19128	struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19129	int struct_meta_reg = BPF_REG_3;
19130	int node_offset_reg = BPF_REG_4;
19131
19132	/ rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs /
19133	if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
19134	struct_meta_reg = BPF_REG_4;
19135	node_offset_reg = BPF_REG_5;
19136	}
19137
19138	if (!kptr_struct_meta) {
19139	verbose(private_data: env, fmt: "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
19140	insn_idx);
19141	return -EFAULT;
19142	}
19143
19144	__fixup_collection_insert_kfunc(insn_aux: &env->insn_aux_data[insn_idx], struct_meta_reg,
19145	node_offset_reg, insn, insn_buf, cnt);
19146	} else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] \|\|
19147	desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
19148	insn_buf[`0`] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
19149	*cnt = `1`;
19150	}
19151	return `0`;
19152	}
19153
19154	/ The function requires that first instruction in 'patch' is insnsi[prog->len - 1] /
19155	static int add_hidden_subprog(struct bpf_verifier_env env, struct* bpf_insn patch, int* len)
19156	{
19157	struct bpf_subprog_info *info = env->subprog_info;
19158	int cnt = env->subprog_cnt;
19159	struct bpf_prog *prog;
19160
19161	/ We only reserve one slot for hidden subprogs in subprog_info. /
19162	if (env->hidden_subprog_cnt) {
19163	verbose(private_data: env, fmt: "verifier internal error: only one hidden subprog supported\n");
19164	return -EFAULT;
19165	}
19166	/ We're not patching any existing instruction, just appending the new*
19167	* ones for the hidden subprog. Hence all of the adjustment operations
19168	* in bpf_patch_insn_data are no-ops.
19169	*/
19170	prog = bpf_patch_insn_data(env, off: env->prog->len - `1`, patch, len);
19171	if (!prog)
19172	return -ENOMEM;
19173	env->prog = prog;
19174	info[cnt + `1`].start = info[cnt].start;
19175	info[cnt].start = prog->len - len + `1`;
19176	env->subprog_cnt++;
19177	env->hidden_subprog_cnt++;
19178	return `0`;
19179	}
19180
19181	/ Do various post-verification rewrites in a single program pass.*
19182	* These rewrites simplify JIT and interpreter implementations.
19183	*/
19184	static int do_misc_fixups(struct bpf_verifier_env *env)
19185	{
19186	struct bpf_prog *prog = env->prog;
19187	enum bpf_attach_type eatype = prog->expected_attach_type;
19188	enum bpf_prog_type prog_type = resolve_prog_type(prog);
19189	struct bpf_insn *insn = prog->insnsi;
19190	const struct bpf_func_proto *fn;
19191	const int insn_cnt = prog->len;
19192	const struct bpf_map_ops *ops;
19193	struct bpf_insn_aux_data *aux;
19194	struct bpf_insn insn_buf[`16`];
19195	struct bpf_prog *new_prog;
19196	struct bpf_map *map_ptr;
19197	int i, ret, cnt, delta = `0`;
19198
19199	if (env->seen_exception && !env->exception_callback_subprog) {
19200	struct bpf_insn patch[] = {
19201	env->prog->insnsi[insn_cnt - `1`],
19202	BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
19203	BPF_EXIT_INSN(),
19204	};
19205
19206	ret = add_hidden_subprog(env, patch, ARRAY_SIZE(patch));
19207	if (ret < `0`)
19208	return ret;
19209	prog = env->prog;
19210	insn = prog->insnsi;
19211
19212	env->exception_callback_subprog = env->subprog_cnt - `1`;
19213	/ Don't update insn_cnt, as add_hidden_subprog always appends insns /
19214	env->subprog_info[env->exception_callback_subprog].is_cb = true;
19215	env->subprog_info[env->exception_callback_subprog].is_async_cb = true;
19216	env->subprog_info[env->exception_callback_subprog].is_exception_cb = true;
19217	}
19218
19219	for (i = `0`; i < insn_cnt; i++, insn++) {
19220	/ Make divide-by-zero exceptions impossible. /
19221	if (insn->code == (BPF_ALU64 \| BPF_MOD \| BPF_X) \|\|
19222	insn->code == (BPF_ALU64 \| BPF_DIV \| BPF_X) \|\|
19223	insn->code == (BPF_ALU \| BPF_MOD \| BPF_X) \|\|
19224	insn->code == (BPF_ALU \| BPF_DIV \| BPF_X)) {
19225	bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
19226	bool isdiv = BPF_OP(insn->code) == BPF_DIV;
19227	struct bpf_insn *patchlet;
19228	struct bpf_insn chk_and_div[] = {
19229	/ [R,W]x div 0 -> 0 /
19230	BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) \|
19231	BPF_JNE \| BPF_K, insn->src_reg,
19232	`0`, `2`, `0`),
19233	BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
19234	BPF_JMP_IMM(BPF_JA, `0`, `0`, `1`),
19235	*insn,
19236	};
19237	struct bpf_insn chk_and_mod[] = {
19238	/ [R,W]x mod 0 -> [R,W]x /
19239	BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) \|
19240	BPF_JEQ \| BPF_K, insn->src_reg,
19241	`0`, `1` + (is64 ? `0` : `1`), `0`),
19242	*insn,
19243	BPF_JMP_IMM(BPF_JA, `0`, `0`, `1`),
19244	BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
19245	};
19246
19247	patchlet = isdiv ? chk_and_div : chk_and_mod;
19248	cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
19249	ARRAY_SIZE(chk_and_mod) - (is64 ? `2` : `0`);
19250
19251	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: patchlet, len: cnt);
19252	if (!new_prog)
19253	return -ENOMEM;
19254
19255	delta += cnt - `1`;
19256	env->prog = prog = new_prog;
19257	insn = new_prog->insnsi + i + delta;
19258	continue;
19259	}
19260
19261	/ Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. /
19262	if (BPF_CLASS(insn->code) == BPF_LD &&
19263	(BPF_MODE(insn->code) == BPF_ABS \|\|
19264	BPF_MODE(insn->code) == BPF_IND)) {
19265	cnt = env->ops->gen_ld_abs(insn, insn_buf);
19266	if (cnt == `0` \|\| cnt >= ARRAY_SIZE(insn_buf)) {
19267	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
19268	return -EINVAL;
19269	}
19270
19271	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19272	if (!new_prog)
19273	return -ENOMEM;
19274
19275	delta += cnt - `1`;
19276	env->prog = prog = new_prog;
19277	insn = new_prog->insnsi + i + delta;
19278	continue;
19279	}
19280
19281	/ Rewrite pointer arithmetic to mitigate speculation attacks. /
19282	if (insn->code == (BPF_ALU64 \| BPF_ADD \| BPF_X) \|\|
19283	insn->code == (BPF_ALU64 \| BPF_SUB \| BPF_X)) {
19284	const u8 code_add = BPF_ALU64 \| BPF_ADD \| BPF_X;
19285	const u8 code_sub = BPF_ALU64 \| BPF_SUB \| BPF_X;
19286	struct bpf_insn *patch = &insn_buf[`0`];
19287	bool issrc, isneg, isimm;
19288	u32 off_reg;
19289
19290	aux = &env->insn_aux_data[i + delta];
19291	if (!aux->alu_state \|\|
19292	aux->alu_state == BPF_ALU_NON_POINTER)
19293	continue;
19294
19295	isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
19296	issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
19297	BPF_ALU_SANITIZE_SRC;
19298	isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
19299
19300	off_reg = issrc ? insn->src_reg : insn->dst_reg;
19301	if (isimm) {
19302	*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
19303	} else {
19304	if (isneg)
19305	*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -`1`);
19306	*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
19307	*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
19308	*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
19309	*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, `0`);
19310	*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, `63`);
19311	*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
19312	}
19313	if (!issrc)
19314	*patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
19315	insn->src_reg = BPF_REG_AX;
19316	if (isneg)
19317	insn->code = insn->code == code_add ?
19318	code_sub : code_add;
19319	patch++ = insn;
19320	if (issrc && isneg && !isimm)
19321	*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -`1`);
19322	cnt = patch - insn_buf;
19323
19324	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19325	if (!new_prog)
19326	return -ENOMEM;
19327
19328	delta += cnt - `1`;
19329	env->prog = prog = new_prog;
19330	insn = new_prog->insnsi + i + delta;
19331	continue;
19332	}
19333
19334	if (insn->code != (BPF_JMP \| BPF_CALL))
19335	continue;
19336	if (insn->src_reg == BPF_PSEUDO_CALL)
19337	continue;
19338	if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
19339	ret = fixup_kfunc_call(env, insn, insn_buf, insn_idx: i + delta, cnt: &cnt);
19340	if (ret)
19341	return ret;
19342	if (cnt == `0`)
19343	continue;
19344
19345	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19346	if (!new_prog)
19347	return -ENOMEM;
19348
19349	delta += cnt - `1`;
19350	env->prog = prog = new_prog;
19351	insn = new_prog->insnsi + i + delta;
19352	continue;
19353	}
19354
19355	if (insn->imm == BPF_FUNC_get_route_realm)
19356	prog->dst_needed = `1`;
19357	if (insn->imm == BPF_FUNC_get_prandom_u32)
19358	bpf_user_rnd_init_once();
19359	if (insn->imm == BPF_FUNC_override_return)
19360	prog->kprobe_override = `1`;
19361	if (insn->imm == BPF_FUNC_tail_call) {
19362	/ If we tail call into other programs, we*
19363	* cannot make any assumptions since they can
19364	* be replaced dynamically during runtime in
19365	* the program array.
19366	*/
19367	prog->cb_access = `1`;
19368	if (!allow_tail_call_in_subprogs(env))
19369	prog->aux->stack_depth = MAX_BPF_STACK;
19370	prog->aux->max_pkt_offset = MAX_PACKET_OFF;
19371
19372	/ mark bpf_tail_call as different opcode to avoid*
19373	* conditional branch in the interpreter for every normal
19374	* call and to prevent accidental JITing by JIT compiler
19375	* that doesn't support bpf_tail_call yet
19376	*/
19377	insn->imm = `0`;
19378	insn->code = BPF_JMP \| BPF_TAIL_CALL;
19379
19380	aux = &env->insn_aux_data[i + delta];
19381	if (env->bpf_capable && !prog->blinding_requested &&
19382	prog->jit_requested &&
19383	!bpf_map_key_poisoned(aux) &&
19384	!bpf_map_ptr_poisoned(aux) &&
19385	!bpf_map_ptr_unpriv(aux)) {
19386	struct bpf_jit_poke_descriptor desc = {
19387	.reason = BPF_POKE_REASON_TAIL_CALL,
19388	.tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
19389	.tail_call.key = bpf_map_key_immediate(aux),
19390	.insn_idx = i + delta,
19391	};
19392
19393	ret = bpf_jit_add_poke_descriptor(prog, poke: &desc);
19394	if (ret < `0`) {
19395	verbose(private_data: env, fmt: "adding tail call poke descriptor failed\n");
19396	return ret;
19397	}
19398
19399	insn->imm = ret + `1`;
19400	continue;
19401	}
19402
19403	if (!bpf_map_ptr_unpriv(aux))
19404	continue;
19405
19406	/ instead of changing every JIT dealing with tail_call*
19407	* emit two extra insns:
19408	* if (index >= max_entries) goto out;
19409	* index &= array->index_mask;
19410	* to avoid out-of-bounds cpu speculation
19411	*/
19412	if (bpf_map_ptr_poisoned(aux)) {
19413	verbose(private_data: env, fmt: "tail_call abusing map_ptr\n");
19414	return -EINVAL;
19415	}
19416
19417	map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
19418	insn_buf[`0`] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
19419	map_ptr->max_entries, `2`);
19420	insn_buf[`1`] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
19421	container_of(map_ptr,
19422	struct bpf_array,
19423	map)->index_mask);
19424	insn_buf[`2`] = *insn;
19425	cnt = `3`;
19426	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19427	if (!new_prog)
19428	return -ENOMEM;
19429
19430	delta += cnt - `1`;
19431	env->prog = prog = new_prog;
19432	insn = new_prog->insnsi + i + delta;
19433	continue;
19434	}
19435
19436	if (insn->imm == BPF_FUNC_timer_set_callback) {
19437	/ The verifier will process callback_fn as many times as necessary*
19438	* with different maps and the register states prepared by
19439	* set_timer_callback_state will be accurate.
19440	*
19441	* The following use case is valid:
19442	* map1 is shared by prog1, prog2, prog3.
19443	* prog1 calls bpf_timer_init for some map1 elements
19444	* prog2 calls bpf_timer_set_callback for some map1 elements.
19445	* Those that were not bpf_timer_init-ed will return -EINVAL.
19446	* prog3 calls bpf_timer_start for some map1 elements.
19447	* Those that were not both bpf_timer_init-ed and
19448	* bpf_timer_set_callback-ed will return -EINVAL.
19449	*/
19450	struct bpf_insn ld_addrs[`2`] = {
19451	BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
19452	};
19453
19454	insn_buf[`0`] = ld_addrs[`0`];
19455	insn_buf[`1`] = ld_addrs[`1`];
19456	insn_buf[`2`] = *insn;
19457	cnt = `3`;
19458
19459	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19460	if (!new_prog)
19461	return -ENOMEM;
19462
19463	delta += cnt - `1`;
19464	env->prog = prog = new_prog;
19465	insn = new_prog->insnsi + i + delta;
19466	goto patch_call_imm;
19467	}
19468
19469	if (is_storage_get_function(func_id: insn->imm)) {
19470	if (!env->prog->aux->sleepable \|\|
19471	env->insn_aux_data[i + delta].storage_get_func_atomic)
19472	insn_buf[`0`] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
19473	else
19474	insn_buf[`0`] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
19475	insn_buf[`1`] = *insn;
19476	cnt = `2`;
19477
19478	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19479	if (!new_prog)
19480	return -ENOMEM;
19481
19482	delta += cnt - `1`;
19483	env->prog = prog = new_prog;
19484	insn = new_prog->insnsi + i + delta;
19485	goto patch_call_imm;
19486	}
19487
19488	/ bpf_per_cpu_ptr() and bpf_this_cpu_ptr() /
19489	if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) {
19490	/ patch with 'r1 = (u64 )(r1 + 0)' since for percpu data,*
19491	* bpf_mem_alloc() returns a ptr to the percpu data ptr.
19492	*/
19493	insn_buf[`0`] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, `0`);
19494	insn_buf[`1`] = *insn;
19495	cnt = `2`;
19496
19497	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19498	if (!new_prog)
19499	return -ENOMEM;
19500
19501	delta += cnt - `1`;
19502	env->prog = prog = new_prog;
19503	insn = new_prog->insnsi + i + delta;
19504	goto patch_call_imm;
19505	}
19506
19507	/ BPF_EMIT_CALL() assumptions in some of the map_gen_lookup*
19508	* and other inlining handlers are currently limited to 64 bit
19509	* only.
19510	*/
19511	if (prog->jit_requested && BITS_PER_LONG == `64` &&
19512	(insn->imm == BPF_FUNC_map_lookup_elem \|\|
19513	insn->imm == BPF_FUNC_map_update_elem \|\|
19514	insn->imm == BPF_FUNC_map_delete_elem \|\|
19515	insn->imm == BPF_FUNC_map_push_elem \|\|
19516	insn->imm == BPF_FUNC_map_pop_elem \|\|
19517	insn->imm == BPF_FUNC_map_peek_elem \|\|
19518	insn->imm == BPF_FUNC_redirect_map \|\|
19519	insn->imm == BPF_FUNC_for_each_map_elem \|\|
19520	insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
19521	aux = &env->insn_aux_data[i + delta];
19522	if (bpf_map_ptr_poisoned(aux))
19523	goto patch_call_imm;
19524
19525	map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
19526	ops = map_ptr->ops;
19527	if (insn->imm == BPF_FUNC_map_lookup_elem &&
19528	ops->map_gen_lookup) {
19529	cnt = ops->map_gen_lookup(map_ptr, insn_buf);
19530	if (cnt == -EOPNOTSUPP)
19531	goto patch_map_ops_generic;
19532	if (cnt <= `0` \|\| cnt >= ARRAY_SIZE(insn_buf)) {
19533	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
19534	return -EINVAL;
19535	}
19536
19537	new_prog = bpf_patch_insn_data(env, off: i + delta,
19538	patch: insn_buf, len: cnt);
19539	if (!new_prog)
19540	return -ENOMEM;
19541
19542	delta += cnt - `1`;
19543	env->prog = prog = new_prog;
19544	insn = new_prog->insnsi + i + delta;
19545	continue;
19546	}
19547
19548	BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
19549	(void ()(struct bpf_map map, void* *key))NULL));
19550	BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
19551	(long ()(struct* bpf_map map, void* *key))NULL));
19552	BUILD_BUG_ON(!__same_type(ops->map_update_elem,
19553	(long ()(struct* bpf_map map, void* key, void* *value,
19554	u64 flags))NULL));
19555	BUILD_BUG_ON(!__same_type(ops->map_push_elem,
19556	(long ()(struct* bpf_map map, void* *value,
19557	u64 flags))NULL));
19558	BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
19559	(long ()(struct* bpf_map map, void* *value))NULL));
19560	BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
19561	(long ()(struct* bpf_map map, void* *value))NULL));
19562	BUILD_BUG_ON(!__same_type(ops->map_redirect,
19563	(long ()(struct* bpf_map *map, u64 index, u64 flags))NULL));
19564	BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
19565	(long ()(struct* bpf_map *map,
19566	bpf_callback_t callback_fn,
19567	void *callback_ctx,
19568	u64 flags))NULL));
19569	BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
19570	(void ()(struct bpf_map map, void* *key, u32 cpu))NULL));
19571
19572	patch_map_ops_generic:
19573	switch (insn->imm) {
19574	case BPF_FUNC_map_lookup_elem:
19575	insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
19576	continue;
19577	case BPF_FUNC_map_update_elem:
19578	insn->imm = BPF_CALL_IMM(ops->map_update_elem);
19579	continue;
19580	case BPF_FUNC_map_delete_elem:
19581	insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
19582	continue;
19583	case BPF_FUNC_map_push_elem:
19584	insn->imm = BPF_CALL_IMM(ops->map_push_elem);
19585	continue;
19586	case BPF_FUNC_map_pop_elem:
19587	insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
19588	continue;
19589	case BPF_FUNC_map_peek_elem:
19590	insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
19591	continue;
19592	case BPF_FUNC_redirect_map:
19593	insn->imm = BPF_CALL_IMM(ops->map_redirect);
19594	continue;
19595	case BPF_FUNC_for_each_map_elem:
19596	insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
19597	continue;
19598	case BPF_FUNC_map_lookup_percpu_elem:
19599	insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
19600	continue;
19601	}
19602
19603	goto patch_call_imm;
19604	}
19605
19606	/ Implement bpf_jiffies64 inline. /
19607	if (prog->jit_requested && BITS_PER_LONG == `64` &&
19608	insn->imm == BPF_FUNC_jiffies64) {
19609	struct bpf_insn ld_jiffies_addr[`2`] = {
19610	BPF_LD_IMM64(BPF_REG_0,
19611	(unsigned long)&jiffies),
19612	};
19613
19614	insn_buf[`0`] = ld_jiffies_addr[`0`];
19615	insn_buf[`1`] = ld_jiffies_addr[`1`];
19616	insn_buf[`2`] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
19617	BPF_REG_0, `0`);
19618	cnt = `3`;
19619
19620	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf,
19621	len: cnt);
19622	if (!new_prog)
19623	return -ENOMEM;
19624
19625	delta += cnt - `1`;
19626	env->prog = prog = new_prog;
19627	insn = new_prog->insnsi + i + delta;
19628	continue;
19629	}
19630
19631	/ Implement bpf_get_func_arg inline. /
19632	if (prog_type == BPF_PROG_TYPE_TRACING &&
19633	insn->imm == BPF_FUNC_get_func_arg) {
19634	/ Load nr_args from ctx - 8 /
19635	insn_buf[`0`] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -`8`);
19636	insn_buf[`1`] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, `6`);
19637	insn_buf[`2`] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, `3`);
19638	insn_buf[`3`] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
19639	insn_buf[`4`] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, `0`);
19640	insn_buf[`5`] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, `0`);
19641	insn_buf[`6`] = BPF_MOV64_IMM(BPF_REG_0, `0`);
19642	insn_buf[`7`] = BPF_JMP_A(`1`);
19643	insn_buf[`8`] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
19644	cnt = `9`;
19645
19646	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19647	if (!new_prog)
19648	return -ENOMEM;
19649
19650	delta += cnt - `1`;
19651	env->prog = prog = new_prog;
19652	insn = new_prog->insnsi + i + delta;
19653	continue;
19654	}
19655
19656	/ Implement bpf_get_func_ret inline. /
19657	if (prog_type == BPF_PROG_TYPE_TRACING &&
19658	insn->imm == BPF_FUNC_get_func_ret) {
19659	if (eatype == BPF_TRACE_FEXIT \|\|
19660	eatype == BPF_MODIFY_RETURN) {
19661	/ Load nr_args from ctx - 8 /
19662	insn_buf[`0`] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -`8`);
19663	insn_buf[`1`] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, `3`);
19664	insn_buf[`2`] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
19665	insn_buf[`3`] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, `0`);
19666	insn_buf[`4`] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, `0`);
19667	insn_buf[`5`] = BPF_MOV64_IMM(BPF_REG_0, `0`);
19668	cnt = `6`;
19669	} else {
19670	insn_buf[`0`] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
19671	cnt = `1`;
19672	}
19673
19674	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19675	if (!new_prog)
19676	return -ENOMEM;
19677
19678	delta += cnt - `1`;
19679	env->prog = prog = new_prog;
19680	insn = new_prog->insnsi + i + delta;
19681	continue;
19682	}
19683
19684	/ Implement get_func_arg_cnt inline. /
19685	if (prog_type == BPF_PROG_TYPE_TRACING &&
19686	insn->imm == BPF_FUNC_get_func_arg_cnt) {
19687	/ Load nr_args from ctx - 8 /
19688	insn_buf[`0`] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -`8`);
19689
19690	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: `1`);
19691	if (!new_prog)
19692	return -ENOMEM;
19693
19694	env->prog = prog = new_prog;
19695	insn = new_prog->insnsi + i + delta;
19696	continue;
19697	}
19698
19699	/ Implement bpf_get_func_ip inline. /
19700	if (prog_type == BPF_PROG_TYPE_TRACING &&
19701	insn->imm == BPF_FUNC_get_func_ip) {
19702	/ Load IP address from ctx - 16 /
19703	insn_buf[`0`] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -`16`);
19704
19705	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: `1`);
19706	if (!new_prog)
19707	return -ENOMEM;
19708
19709	env->prog = prog = new_prog;
19710	insn = new_prog->insnsi + i + delta;
19711	continue;
19712	}
19713
19714	patch_call_imm:
19715	fn = env->ops->get_func_proto(insn->imm, env->prog);
19716	/ all functions that have prototype and verifier allowed*
19717	* programs to call them, must be real in-kernel functions
19718	*/
19719	if (!fn->func) {
19720	verbose(private_data: env,
19721	fmt: "kernel subsystem misconfigured func %s#%d\n",
19722	func_id_name(id: insn->imm), insn->imm);
19723	return -EFAULT;
19724	}
19725	insn->imm = fn->func - __bpf_call_base;
19726	}
19727
19728	/ Since poke tab is now finalized, publish aux to tracker. /
19729	for (i = `0`; i < prog->aux->size_poke_tab; i++) {
19730	map_ptr = prog->aux->poke_tab[i].tail_call.map;
19731	if (!map_ptr->ops->map_poke_track \|\|
19732	!map_ptr->ops->map_poke_untrack \|\|
19733	!map_ptr->ops->map_poke_run) {
19734	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
19735	return -EINVAL;
19736	}
19737
19738	ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
19739	if (ret < `0`) {
19740	verbose(private_data: env, fmt: "tracking tail call prog failed\n");
19741	return ret;
19742	}
19743	}
19744
19745	sort_kfunc_descs_by_imm_off(prog: env->prog);
19746
19747	return `0`;
19748	}
19749
19750	static struct bpf_prog inline_bpf_loop(struct* bpf_verifier_env *env,
19751	int position,
19752	s32 stack_base,
19753	u32 callback_subprogno,
19754	u32 *cnt)
19755	{
19756	s32 r6_offset = stack_base + `0` * BPF_REG_SIZE;
19757	s32 r7_offset = stack_base + `1` * BPF_REG_SIZE;
19758	s32 r8_offset = stack_base + `2` * BPF_REG_SIZE;
19759	int reg_loop_max = BPF_REG_6;
19760	int reg_loop_cnt = BPF_REG_7;
19761	int reg_loop_ctx = BPF_REG_8;
19762
19763	struct bpf_prog *new_prog;
19764	u32 callback_start;
19765	u32 call_insn_offset;
19766	s32 callback_offset;
19767
19768	/ This represents an inlined version of bpf_iter.c:bpf_loop,*
19769	* be careful to modify this code in sync.
19770	*/
19771	struct bpf_insn insn_buf[] = {
19772	/ Return error and jump to the end of the patch if*
19773	* expected number of iterations is too big.
19774	*/
19775	BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, `2`),
19776	BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
19777	BPF_JMP_IMM(BPF_JA, `0`, `0`, `16`),
19778	/ spill R6, R7, R8 to use these as loop vars /
19779	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
19780	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
19781	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
19782	/ initialize loop vars /
19783	BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
19784	BPF_MOV32_IMM(reg_loop_cnt, `0`),
19785	BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
19786	/ loop header,*
19787	* if reg_loop_cnt >= reg_loop_max skip the loop body
19788	*/
19789	BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, `5`),
19790	/ callback call,*
19791	* correct callback offset would be set after patching
19792	*/
19793	BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
19794	BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
19795	BPF_CALL_REL(`0`),
19796	/ increment loop counter /
19797	BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, `1`),
19798	/ jump to loop header if callback returned 0 /
19799	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, `0`, -`6`),
19800	/ return value of bpf_loop,*
19801	* set R0 to the number of iterations
19802	*/
19803	BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
19804	/ restore original values of R6, R7, R8 /
19805	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
19806	BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
19807	BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
19808	};
19809
19810	*cnt = ARRAY_SIZE(insn_buf);
19811	new_prog = bpf_patch_insn_data(env, off: position, patch: insn_buf, len: *cnt);
19812	if (!new_prog)
19813	return new_prog;
19814
19815	/ callback start is known only after patching /
19816	callback_start = env->subprog_info[callback_subprogno].start;
19817	/ Note: insn_buf[12] is an offset of BPF_CALL_REL instruction /
19818	call_insn_offset = position + `12`;
19819	callback_offset = callback_start - call_insn_offset - `1`;
19820	new_prog->insnsi[call_insn_offset].imm = callback_offset;
19821
19822	return new_prog;
19823	}
19824
19825	static bool is_bpf_loop_call(struct bpf_insn *insn)
19826	{
19827	return insn->code == (BPF_JMP \| BPF_CALL) &&
19828	insn->src_reg == `0` &&
19829	insn->imm == BPF_FUNC_loop;
19830	}
19831
19832	/ For all sub-programs in the program (including main) check*
19833	* insn_aux_data to see if there are bpf_loop calls that require
19834	* inlining. If such calls are found the calls are replaced with a
19835	* sequence of instructions produced by `inline_bpf_loop` function and
19836	* subprog stack_depth is increased by the size of 3 registers.
19837	* This stack space is used to spill values of the R6, R7, R8. These
19838	* registers are used to store the loop bound, counter and context
19839	* variables.
19840	*/
19841	static int optimize_bpf_loop(struct bpf_verifier_env *env)
19842	{
19843	struct bpf_subprog_info *subprogs = env->subprog_info;
19844	int i, cur_subprog = `0`, cnt, delta = `0`;
19845	struct bpf_insn *insn = env->prog->insnsi;
19846	int insn_cnt = env->prog->len;
19847	u16 stack_depth = subprogs[cur_subprog].stack_depth;
19848	u16 stack_depth_roundup = round_up(stack_depth, `8`) - stack_depth;
19849	u16 stack_depth_extra = `0`;
19850
19851	for (i = `0`; i < insn_cnt; i++, insn++) {
19852	struct bpf_loop_inline_state *inline_state =
19853	&env->insn_aux_data[i + delta].loop_inline_state;
19854
19855	if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
19856	struct bpf_prog *new_prog;
19857
19858	stack_depth_extra = BPF_REG_SIZE * `3` + stack_depth_roundup;
19859	new_prog = inline_bpf_loop(env,
19860	position: i + delta,
19861	stack_base: -(stack_depth + stack_depth_extra),
19862	callback_subprogno: inline_state->callback_subprogno,
19863	cnt: &cnt);
19864	if (!new_prog)
19865	return -ENOMEM;
19866
19867	delta += cnt - `1`;
19868	env->prog = new_prog;
19869	insn = new_prog->insnsi + i + delta;
19870	}
19871
19872	if (subprogs[cur_subprog + `1`].start == i + delta + `1`) {
19873	subprogs[cur_subprog].stack_depth += stack_depth_extra;
19874	cur_subprog++;
19875	stack_depth = subprogs[cur_subprog].stack_depth;
19876	stack_depth_roundup = round_up(stack_depth, `8`) - stack_depth;
19877	stack_depth_extra = `0`;
19878	}
19879	}
19880
19881	env->prog->aux->stack_depth = env->subprog_info[`0`].stack_depth;
19882
19883	return `0`;
19884	}
19885
19886	static void free_states(struct bpf_verifier_env *env)
19887	{
19888	struct bpf_verifier_state_list sl, sln;
19889	int i;
19890
19891	sl = env->free_list;
19892	while (sl) {
19893	sln = sl->next;
19894	free_verifier_state(state: &sl->state, free_self: false);
19895	kfree(objp: sl);
19896	sl = sln;
19897	}
19898	env->free_list = NULL;
19899
19900	if (!env->explored_states)
19901	return;
19902
19903	for (i = `0`; i < state_htab_size(env); i++) {
19904	sl = env->explored_states[i];
19905
19906	while (sl) {
19907	sln = sl->next;
19908	free_verifier_state(state: &sl->state, free_self: false);
19909	kfree(objp: sl);
19910	sl = sln;
19911	}
19912	env->explored_states[i] = NULL;
19913	}
19914	}
19915
19916	static int do_check_common(struct bpf_verifier_env env, int* subprog, bool is_ex_cb)
19917	{
19918	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
19919	struct bpf_verifier_state *state;
19920	struct bpf_reg_state *regs;
19921	int ret, i;
19922
19923	env->prev_linfo = NULL;
19924	env->pass_cnt++;
19925
19926	state = kzalloc(size: sizeof(struct bpf_verifier_state), GFP_KERNEL);
19927	if (!state)
19928	return -ENOMEM;
19929	state->curframe = `0`;
19930	state->speculative = false;
19931	state->branches = `1`;
19932	state->frame[`0`] = kzalloc(size: sizeof(struct bpf_func_state), GFP_KERNEL);
19933	if (!state->frame[`0`]) {
19934	kfree(objp: state);
19935	return -ENOMEM;
19936	}
19937	env->cur_state = state;
19938	init_func_state(env, state: state->frame[`0`],
19939	BPF_MAIN_FUNC / callsite /,
19940	frameno: `0` / frameno /,
19941	subprogno: subprog);
19942	state->first_insn_idx = env->subprog_info[subprog].start;
19943	state->last_insn_idx = -`1`;
19944
19945	regs = state->frame[state->curframe]->regs;
19946	if (subprog \|\| env->prog->type == BPF_PROG_TYPE_EXT) {
19947	ret = btf_prepare_func_args(env, subprog, reg: regs, is_ex_cb);
19948	if (ret)
19949	goto out;
19950	for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
19951	if (regs[i].type == PTR_TO_CTX)
19952	mark_reg_known_zero(env, regs, regno: i);
19953	else if (regs[i].type == SCALAR_VALUE)
19954	mark_reg_unknown(env, regs, regno: i);
19955	else if (base_type(type: regs[i].type) == PTR_TO_MEM) {
19956	const u32 mem_size = regs[i].mem_size;
19957
19958	mark_reg_known_zero(env, regs, regno: i);
19959	regs[i].mem_size = mem_size;
19960	regs[i].id = ++env->id_gen;
19961	}
19962	}
19963	if (is_ex_cb) {
19964	state->frame[`0`]->in_exception_callback_fn = true;
19965	env->subprog_info[subprog].is_cb = true;
19966	env->subprog_info[subprog].is_async_cb = true;
19967	env->subprog_info[subprog].is_exception_cb = true;
19968	}
19969	} else {
19970	/ 1st arg to a function /
19971	regs[BPF_REG_1].type = PTR_TO_CTX;
19972	mark_reg_known_zero(env, regs, regno: BPF_REG_1);
19973	ret = btf_check_subprog_arg_match(env, subprog, regs);
19974	if (ret == -EFAULT)
19975	/ unlikely verifier bug. abort.*
19976	* ret == 0 and ret < 0 are sadly acceptable for
19977	* main() function due to backward compatibility.
19978	* Like socket filter program may be written as:
19979	* int bpf_prog(struct pt_regs *ctx)
19980	* and never dereference that ctx in the program.
19981	* 'struct pt_regs' is a type mismatch for socket
19982	* filter that should be using 'struct __sk_buff'.
19983	*/
19984	goto out;
19985	}
19986
19987	ret = do_check(env);
19988	out:
19989	/ check for NULL is necessary, since cur_state can be freed inside*
19990	* do_check() under memory pressure.
19991	*/
19992	if (env->cur_state) {
19993	free_verifier_state(state: env->cur_state, free_self: true);
19994	env->cur_state = NULL;
19995	}
19996	while (!pop_stack(env, NULL, NULL, pop_log: false));
19997	if (!ret && pop_log)
19998	bpf_vlog_reset(log: &env->log, new_pos: `0`);
19999	free_states(env);
20000	return ret;
20001	}
20002
20003	/ Verify all global functions in a BPF program one by one based on their BTF.*
20004	* All global functions must pass verification. Otherwise the whole program is rejected.
20005	* Consider:
20006	* int bar(int);
20007	* int foo(int f)
20008	* {
20009	* return bar(f);
20010	* }
20011	* int bar(int b)
20012	* {
20013	* ...
20014	* }
20015	* foo() will be verified first for R1=any_scalar_value. During verification it
20016	* will be assumed that bar() already verified successfully and call to bar()
20017	* from foo() will be checked for type match only. Later bar() will be verified
20018	* independently to check that it's safe for R1=any_scalar_value.
20019	*/
20020	static int do_check_subprogs(struct bpf_verifier_env *env)
20021	{
20022	struct bpf_prog_aux *aux = env->prog->aux;
20023	int i, ret;
20024
20025	if (!aux->func_info)
20026	return `0`;
20027
20028	for (i = `1`; i < env->subprog_cnt; i++) {
20029	if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
20030	continue;
20031	env->insn_idx = env->subprog_info[i].start;
20032	WARN_ON_ONCE(env->insn_idx == `0`);
20033	ret = do_check_common(env, subprog: i, is_ex_cb: env->exception_callback_subprog == i);
20034	if (ret) {
20035	return ret;
20036	} else if (env->log.level & BPF_LOG_LEVEL) {
20037	verbose(private_data: env,
20038	fmt: "Func#%d is safe for any args that match its prototype\n",
20039	i);
20040	}
20041	}
20042	return `0`;
20043	}
20044
20045	static int do_check_main(struct bpf_verifier_env *env)
20046	{
20047	int ret;
20048
20049	env->insn_idx = `0`;
20050	ret = do_check_common(env, subprog: `0`, is_ex_cb: false);
20051	if (!ret)
20052	env->prog->aux->stack_depth = env->subprog_info[`0`].stack_depth;
20053	return ret;
20054	}
20055
20056
20057	static void print_verification_stats(struct bpf_verifier_env *env)
20058	{
20059	int i;
20060
20061	if (env->log.level & BPF_LOG_STATS) {
20062	verbose(private_data: env, fmt: "verification time %lld usec\n",
20063	div_u64(dividend: env->verification_time, divisor: `1000`));
20064	verbose(private_data: env, fmt: "stack depth ");
20065	for (i = `0`; i < env->subprog_cnt; i++) {
20066	u32 depth = env->subprog_info[i].stack_depth;
20067
20068	verbose(private_data: env, fmt: "%d", depth);
20069	if (i + `1` < env->subprog_cnt)
20070	verbose(private_data: env, fmt: "+");
20071	}
20072	verbose(private_data: env, fmt: "\n");
20073	}
20074	verbose(private_data: env, fmt: "processed %d insns (limit %d) max_states_per_insn %d "
20075	"total_states %d peak_states %d mark_read %d\n",
20076	env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
20077	env->max_states_per_insn, env->total_states,
20078	env->peak_states, env->longest_mark_read_walk);
20079	}
20080
20081	static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
20082	{
20083	const struct btf_type t, func_proto;
20084	const struct bpf_struct_ops *st_ops;
20085	const struct btf_member *member;
20086	struct bpf_prog *prog = env->prog;
20087	u32 btf_id, member_idx;
20088	const char *mname;
20089
20090	if (!prog->gpl_compatible) {
20091	verbose(private_data: env, fmt: "struct ops programs must have a GPL compatible license\n");
20092	return -EINVAL;
20093	}
20094
20095	btf_id = prog->aux->attach_btf_id;
20096	st_ops = bpf_struct_ops_find(type_id: btf_id);
20097	if (!st_ops) {
20098	verbose(private_data: env, fmt: "attach_btf_id %u is not a supported struct\n",
20099	btf_id);
20100	return -ENOTSUPP;
20101	}
20102
20103	t = st_ops->type;
20104	member_idx = prog->expected_attach_type;
20105	if (member_idx >= btf_type_vlen(t)) {
20106	verbose(private_data: env, fmt: "attach to invalid member idx %u of struct %s\n",
20107	member_idx, st_ops->name);
20108	return -EINVAL;
20109	}
20110
20111	member = &btf_type_member(t)[member_idx];
20112	mname = btf_name_by_offset(btf: btf_vmlinux, offset: member->name_off);
20113	func_proto = btf_type_resolve_func_ptr(btf: btf_vmlinux, id: member->type,
20114	NULL);
20115	if (!func_proto) {
20116	verbose(private_data: env, fmt: "attach to invalid member %s(@idx %u) of struct %s\n",
20117	mname, member_idx, st_ops->name);
20118	return -EINVAL;
20119	}
20120
20121	if (st_ops->check_member) {
20122	int err = st_ops->check_member(t, member, prog);
20123
20124	if (err) {
20125	verbose(private_data: env, fmt: "attach to unsupported member %s of struct %s\n",
20126	mname, st_ops->name);
20127	return err;
20128	}
20129	}
20130
20131	prog->aux->attach_func_proto = func_proto;
20132	prog->aux->attach_func_name = mname;
20133	env->ops = st_ops->verifier_ops;
20134
20135	return `0`;
20136	}
20137	#define SECURITY_PREFIX "security_"
20138
20139	static int check_attach_modify_return(unsigned long addr, const char *func_name)
20140	{
20141	if (within_error_injection_list(addr) \|\|
20142	!strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - `1`))
20143	return `0`;
20144
20145	return -EINVAL;
20146	}
20147
20148	/ list of non-sleepable functions that are otherwise on*
20149	* ALLOW_ERROR_INJECTION list
20150	*/
20151	BTF_SET_START(btf_non_sleepable_error_inject)
20152	/ Three functions below can be called from sleepable and non-sleepable context.*
20153	* Assume non-sleepable from bpf safety point of view.
20154	*/
20155	BTF_ID(func, __filemap_add_folio)
20156	BTF_ID(func, should_fail_alloc_page)
20157	BTF_ID(func, should_failslab)
20158	BTF_SET_END(btf_non_sleepable_error_inject)
20159
20160	static int check_non_sleepable_error_inject(u32 btf_id)
20161	{
20162	return btf_id_set_contains(set: &btf_non_sleepable_error_inject, id: btf_id);
20163	}
20164
20165	int bpf_check_attach_target(struct bpf_verifier_log *log,
20166	const struct bpf_prog *prog,
20167	const struct bpf_prog *tgt_prog,
20168	u32 btf_id,
20169	struct bpf_attach_target_info *tgt_info)
20170	{
20171	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
20172	const char prefix[] = "btf_trace_";
20173	int ret = `0`, subprog = -`1`, i;
20174	const struct btf_type *t;
20175	bool conservative = true;
20176	const char *tname;
20177	struct btf *btf;
20178	long addr = `0`;
20179	struct module *mod = NULL;
20180
20181	if (!btf_id) {
20182	bpf_log(log, fmt: "Tracing programs must provide btf_id\n");
20183	return -EINVAL;
20184	}
20185	btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
20186	if (!btf) {
20187	bpf_log(log,
20188	fmt: "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
20189	return -EINVAL;
20190	}
20191	t = btf_type_by_id(btf, type_id: btf_id);
20192	if (!t) {
20193	bpf_log(log, fmt: "attach_btf_id %u is invalid\n", btf_id);
20194	return -EINVAL;
20195	}
20196	tname = btf_name_by_offset(btf, offset: t->name_off);
20197	if (!tname) {
20198	bpf_log(log, fmt: "attach_btf_id %u doesn't have a name\n", btf_id);
20199	return -EINVAL;
20200	}
20201	if (tgt_prog) {
20202	struct bpf_prog_aux *aux = tgt_prog->aux;
20203
20204	if (bpf_prog_is_dev_bound(aux: prog->aux) &&
20205	!bpf_prog_dev_bound_match(lhs: prog, rhs: tgt_prog)) {
20206	bpf_log(log, fmt: "Target program bound device mismatch");
20207	return -EINVAL;
20208	}
20209
20210	for (i = `0`; i < aux->func_info_cnt; i++)
20211	if (aux->func_info[i].type_id == btf_id) {
20212	subprog = i;
20213	break;
20214	}
20215	if (subprog == -`1`) {
20216	bpf_log(log, fmt: "Subprog %s doesn't exist\n", tname);
20217	return -EINVAL;
20218	}
20219	if (aux->func && aux->func[subprog]->aux->exception_cb) {
20220	bpf_log(log,
20221	fmt: "%s programs cannot attach to exception callback\n",
20222	prog_extension ? "Extension" : "FENTRY/FEXIT");
20223	return -EINVAL;
20224	}
20225	conservative = aux->func_info_aux[subprog].unreliable;
20226	if (prog_extension) {
20227	if (conservative) {
20228	bpf_log(log,
20229	fmt: "Cannot replace static functions\n");
20230	return -EINVAL;
20231	}
20232	if (!prog->jit_requested) {
20233	bpf_log(log,
20234	fmt: "Extension programs should be JITed\n");
20235	return -EINVAL;
20236	}
20237	}
20238	if (!tgt_prog->jited) {
20239	bpf_log(log, fmt: "Can attach to only JITed progs\n");
20240	return -EINVAL;
20241	}
20242	if (tgt_prog->type == prog->type) {
20243	/ Cannot fentry/fexit another fentry/fexit program.*
20244	* Cannot attach program extension to another extension.
20245	* It's ok to attach fentry/fexit to extension program.
20246	*/
20247	bpf_log(log, fmt: "Cannot recursively attach\n");
20248	return -EINVAL;
20249	}
20250	if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
20251	prog_extension &&
20252	(tgt_prog->expected_attach_type == BPF_TRACE_FENTRY \|\|
20253	tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
20254	/ Program extensions can extend all program types*
20255	* except fentry/fexit. The reason is the following.
20256	* The fentry/fexit programs are used for performance
20257	* analysis, stats and can be attached to any program
20258	* type except themselves. When extension program is
20259	* replacing XDP function it is necessary to allow
20260	* performance analysis of all functions. Both original
20261	* XDP program and its program extension. Hence
20262	* attaching fentry/fexit to BPF_PROG_TYPE_EXT is
20263	* allowed. If extending of fentry/fexit was allowed it
20264	* would be possible to create long call chain
20265	* fentry->extension->fentry->extension beyond
20266	* reasonable stack size. Hence extending fentry is not
20267	* allowed.
20268	*/
20269	bpf_log(log, fmt: "Cannot extend fentry/fexit\n");
20270	return -EINVAL;
20271	}
20272	} else {
20273	if (prog_extension) {
20274	bpf_log(log, fmt: "Cannot replace kernel functions\n");
20275	return -EINVAL;
20276	}
20277	}
20278
20279	switch (prog->expected_attach_type) {
20280	case BPF_TRACE_RAW_TP:
20281	if (tgt_prog) {
20282	bpf_log(log,
20283	fmt: "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
20284	return -EINVAL;
20285	}
20286	if (!btf_type_is_typedef(t)) {
20287	bpf_log(log, fmt: "attach_btf_id %u is not a typedef\n",
20288	btf_id);
20289	return -EINVAL;
20290	}
20291	if (strncmp(prefix, tname, sizeof(prefix) - `1`)) {
20292	bpf_log(log, fmt: "attach_btf_id %u points to wrong type name %s\n",
20293	btf_id, tname);
20294	return -EINVAL;
20295	}
20296	tname += sizeof(prefix) - `1`;
20297	t = btf_type_by_id(btf, type_id: t->type);
20298	if (!btf_type_is_ptr(t))
20299	/ should never happen in valid vmlinux build /
20300	return -EINVAL;
20301	t = btf_type_by_id(btf, type_id: t->type);
20302	if (!btf_type_is_func_proto(t))
20303	/ should never happen in valid vmlinux build /
20304	return -EINVAL;
20305
20306	break;
20307	case BPF_TRACE_ITER:
20308	if (!btf_type_is_func(t)) {
20309	bpf_log(log, fmt: "attach_btf_id %u is not a function\n",
20310	btf_id);
20311	return -EINVAL;
20312	}
20313	t = btf_type_by_id(btf, type_id: t->type);
20314	if (!btf_type_is_func_proto(t))
20315	return -EINVAL;
20316	ret = btf_distill_func_proto(log, btf, func_proto: t, func_name: tname, m: &tgt_info->fmodel);
20317	if (ret)
20318	return ret;
20319	break;
20320	default:
20321	if (!prog_extension)
20322	return -EINVAL;
20323	fallthrough;
20324	case BPF_MODIFY_RETURN:
20325	case BPF_LSM_MAC:
20326	case BPF_LSM_CGROUP:
20327	case BPF_TRACE_FENTRY:
20328	case BPF_TRACE_FEXIT:
20329	if (!btf_type_is_func(t)) {
20330	bpf_log(log, fmt: "attach_btf_id %u is not a function\n",
20331	btf_id);
20332	return -EINVAL;
20333	}
20334	if (prog_extension &&
20335	btf_check_type_match(log, prog, btf, t))
20336	return -EINVAL;
20337	t = btf_type_by_id(btf, type_id: t->type);
20338	if (!btf_type_is_func_proto(t))
20339	return -EINVAL;
20340
20341	if ((prog->aux->saved_dst_prog_type \|\| prog->aux->saved_dst_attach_type) &&
20342	(!tgt_prog \|\| prog->aux->saved_dst_prog_type != tgt_prog->type \|\|
20343	prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
20344	return -EINVAL;
20345
20346	if (tgt_prog && conservative)
20347	t = NULL;
20348
20349	ret = btf_distill_func_proto(log, btf, func_proto: t, func_name: tname, m: &tgt_info->fmodel);
20350	if (ret < `0`)
20351	return ret;
20352
20353	if (tgt_prog) {
20354	if (subprog == `0`)
20355	addr = (long) tgt_prog->bpf_func;
20356	else
20357	addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
20358	} else {
20359	if (btf_is_module(btf)) {
20360	mod = btf_try_get_module(btf);
20361	if (mod)
20362	addr = find_kallsyms_symbol_value(mod, name: tname);
20363	else
20364	addr = `0`;
20365	} else {
20366	addr = kallsyms_lookup_name(name: tname);
20367	}
20368	if (!addr) {
20369	module_put(module: mod);
20370	bpf_log(log,
20371	fmt: "The address of function %s cannot be found\n",
20372	tname);
20373	return -ENOENT;
20374	}
20375	}
20376
20377	if (prog->aux->sleepable) {
20378	ret = -EINVAL;
20379	switch (prog->type) {
20380	case BPF_PROG_TYPE_TRACING:
20381
20382	/ fentry/fexit/fmod_ret progs can be sleepable if they are*
20383	* attached to ALLOW_ERROR_INJECTION and are not in denylist.
20384	*/
20385	if (!check_non_sleepable_error_inject(btf_id) &&
20386	within_error_injection_list(addr))
20387	ret = `0`;
20388	/ fentry/fexit/fmod_ret progs can also be sleepable if they are*
20389	* in the fmodret id set with the KF_SLEEPABLE flag.
20390	*/
20391	else {
20392	u32 *flags = btf_kfunc_is_modify_return(btf, kfunc_btf_id: btf_id,
20393	prog);
20394
20395	if (flags && (*flags & KF_SLEEPABLE))
20396	ret = `0`;
20397	}
20398	break;
20399	case BPF_PROG_TYPE_LSM:
20400	/ LSM progs check that they are attached to bpf_lsm_() funcs.
20401	* Only some of them are sleepable.
20402	*/
20403	if (bpf_lsm_is_sleepable_hook(btf_id))
20404	ret = `0`;
20405	break;
20406	default:
20407	break;
20408	}
20409	if (ret) {
20410	module_put(module: mod);
20411	bpf_log(log, fmt: "%s is not sleepable\n", tname);
20412	return ret;
20413	}
20414	} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
20415	if (tgt_prog) {
20416	module_put(module: mod);
20417	bpf_log(log, fmt: "can't modify return codes of BPF programs\n");
20418	return -EINVAL;
20419	}
20420	ret = -EINVAL;
20421	if (btf_kfunc_is_modify_return(btf, kfunc_btf_id: btf_id, prog) \|\|
20422	!check_attach_modify_return(addr, func_name: tname))
20423	ret = `0`;
20424	if (ret) {
20425	module_put(module: mod);
20426	bpf_log(log, fmt: "%s() is not modifiable\n", tname);
20427	return ret;
20428	}
20429	}
20430
20431	break;
20432	}
20433	tgt_info->tgt_addr = addr;
20434	tgt_info->tgt_name = tname;
20435	tgt_info->tgt_type = t;
20436	tgt_info->tgt_mod = mod;
20437	return `0`;
20438	}
20439
20440	BTF_SET_START(btf_id_deny)
20441	BTF_ID_UNUSED
20442	#ifdef CONFIG_SMP
20443	BTF_ID(func, migrate_disable)
20444	BTF_ID(func, migrate_enable)
20445	#endif
20446	#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
20447	BTF_ID(func, rcu_read_unlock_strict)
20448	#endif
20449	#if defined(CONFIG_DEBUG_PREEMPT) \|\| defined(CONFIG_TRACE_PREEMPT_TOGGLE)
20450	BTF_ID(func, preempt_count_add)
20451	BTF_ID(func, preempt_count_sub)
20452	#endif
20453	#ifdef CONFIG_PREEMPT_RCU
20454	BTF_ID(func, __rcu_read_lock)
20455	BTF_ID(func, __rcu_read_unlock)
20456	#endif
20457	BTF_SET_END(btf_id_deny)
20458
20459	static bool can_be_sleepable(struct bpf_prog *prog)
20460	{
20461	if (prog->type == BPF_PROG_TYPE_TRACING) {
20462	switch (prog->expected_attach_type) {
20463	case BPF_TRACE_FENTRY:
20464	case BPF_TRACE_FEXIT:
20465	case BPF_MODIFY_RETURN:
20466	case BPF_TRACE_ITER:
20467	return true;
20468	default:
20469	return false;
20470	}
20471	}
20472	return prog->type == BPF_PROG_TYPE_LSM \|\|
20473	prog->type == BPF_PROG_TYPE_KPROBE / only for uprobes / \|\|
20474	prog->type == BPF_PROG_TYPE_STRUCT_OPS;
20475	}
20476
20477	static int check_attach_btf_id(struct bpf_verifier_env *env)
20478	{
20479	struct bpf_prog *prog = env->prog;
20480	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
20481	struct bpf_attach_target_info tgt_info = {};
20482	u32 btf_id = prog->aux->attach_btf_id;
20483	struct bpf_trampoline *tr;
20484	int ret;
20485	u64 key;
20486
20487	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
20488	if (prog->aux->sleepable)
20489	/ attach_btf_id checked to be zero already /
20490	return `0`;
20491	verbose(private_data: env, fmt: "Syscall programs can only be sleepable\n");
20492	return -EINVAL;
20493	}
20494
20495	if (prog->aux->sleepable && !can_be_sleepable(prog)) {
20496	verbose(private_data: env, fmt: "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
20497	return -EINVAL;
20498	}
20499
20500	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
20501	return check_struct_ops_btf_id(env);
20502
20503	if (prog->type != BPF_PROG_TYPE_TRACING &&
20504	prog->type != BPF_PROG_TYPE_LSM &&
20505	prog->type != BPF_PROG_TYPE_EXT)
20506	return `0`;
20507
20508	ret = bpf_check_attach_target(log: &env->log, prog, tgt_prog, btf_id, tgt_info: &tgt_info);
20509	if (ret)
20510	return ret;
20511
20512	if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
20513	/ to make freplace equivalent to their targets, they need to*
20514	* inherit env->ops and expected_attach_type for the rest of the
20515	* verification
20516	*/
20517	env->ops = bpf_verifier_ops[tgt_prog->type];
20518	prog->expected_attach_type = tgt_prog->expected_attach_type;
20519	}
20520
20521	/ store info about the attachment target that will be used later /
20522	prog->aux->attach_func_proto = tgt_info.tgt_type;
20523	prog->aux->attach_func_name = tgt_info.tgt_name;
20524	prog->aux->mod = tgt_info.tgt_mod;
20525
20526	if (tgt_prog) {
20527	prog->aux->saved_dst_prog_type = tgt_prog->type;
20528	prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
20529	}
20530
20531	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
20532	prog->aux->attach_btf_trace = true;
20533	return `0`;
20534	} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
20535	if (!bpf_iter_prog_supported(prog))
20536	return -EINVAL;
20537	return `0`;
20538	}
20539
20540	if (prog->type == BPF_PROG_TYPE_LSM) {
20541	ret = bpf_lsm_verify_prog(vlog: &env->log, prog);
20542	if (ret < `0`)
20543	return ret;
20544	} else if (prog->type == BPF_PROG_TYPE_TRACING &&
20545	btf_id_set_contains(set: &btf_id_deny, id: btf_id)) {
20546	return -EINVAL;
20547	}
20548
20549	key = bpf_trampoline_compute_key(tgt_prog, btf: prog->aux->attach_btf, btf_id);
20550	tr = bpf_trampoline_get(key, tgt_info: &tgt_info);
20551	if (!tr)
20552	return -ENOMEM;
20553
20554	if (tgt_prog && tgt_prog->aux->tail_call_reachable)
20555	tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX;
20556
20557	prog->aux->dst_trampoline = tr;
20558	return `0`;
20559	}
20560
20561	struct btf bpf_get_btf_vmlinux(void*)
20562	{
20563	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
20564	mutex_lock(&bpf_verifier_lock);
20565	if (!btf_vmlinux)
20566	btf_vmlinux = btf_parse_vmlinux();
20567	mutex_unlock(lock: &bpf_verifier_lock);
20568	}
20569	return btf_vmlinux;
20570	}
20571
20572	int bpf_check(struct bpf_prog prog, union** bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
20573	{
20574	u64 start_time = ktime_get_ns();
20575	struct bpf_verifier_env *env;
20576	int i, len, ret = -EINVAL, err;
20577	u32 log_true_size;
20578	bool is_priv;
20579
20580	/ no program is valid /
20581	if (ARRAY_SIZE(bpf_verifier_ops) == `0`)
20582	return -EINVAL;
20583
20584	/ 'struct bpf_verifier_env' can be global, but since it's not small,*
20585	* allocate/free it every time bpf_check() is called
20586	*/
20587	env = kzalloc(size: sizeof(struct bpf_verifier_env), GFP_KERNEL);
20588	if (!env)
20589	return -ENOMEM;
20590
20591	env->bt.env = env;
20592
20593	len = (*prog)->len;
20594	env->insn_aux_data =
20595	vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
20596	ret = -ENOMEM;
20597	if (!env->insn_aux_data)
20598	goto err_free_env;
20599	for (i = `0`; i < len; i++)
20600	env->insn_aux_data[i].orig_idx = i;
20601	env->prog = *prog;
20602	env->ops = bpf_verifier_ops[env->prog->type];
20603	env->fd_array = make_bpfptr(addr: attr->fd_array, is_kernel: uattr.is_kernel);
20604	is_priv = bpf_capable();
20605
20606	bpf_get_btf_vmlinux();
20607
20608	/ grab the mutex to protect few globals used by verifier /
20609	if (!is_priv)
20610	mutex_lock(&bpf_verifier_lock);
20611
20612	/ user could have requested verbose verifier output*
20613	* and supplied buffer to store the verification trace
20614	*/
20615	ret = bpf_vlog_init(log: &env->log, log_level: attr->log_level,
20616	log_buf: (char __user ) (unsigned* long) attr->log_buf,
20617	log_size: attr->log_size);
20618	if (ret)
20619	goto err_unlock;
20620
20621	mark_verifier_state_clean(env);
20622
20623	if (IS_ERR(ptr: btf_vmlinux)) {
20624	/ Either gcc or pahole or kernel are broken. /
20625	verbose(private_data: env, fmt: "in-kernel BTF is malformed\n");
20626	ret = PTR_ERR(ptr: btf_vmlinux);
20627	goto skip_full_check;
20628	}
20629
20630	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
20631	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
20632	env->strict_alignment = true;
20633	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
20634	env->strict_alignment = false;
20635
20636	env->allow_ptr_leaks = bpf_allow_ptr_leaks();
20637	env->allow_uninit_stack = bpf_allow_uninit_stack();
20638	env->bypass_spec_v1 = bpf_bypass_spec_v1();
20639	env->bypass_spec_v4 = bpf_bypass_spec_v4();
20640	env->bpf_capable = bpf_capable();
20641
20642	if (is_priv)
20643	env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
20644
20645	env->explored_states = kvcalloc(n: state_htab_size(env),
20646	size: sizeof(struct bpf_verifier_state_list *),
20647	GFP_USER);
20648	ret = -ENOMEM;
20649	if (!env->explored_states)
20650	goto skip_full_check;
20651
20652	ret = check_btf_info_early(env, attr, uattr);
20653	if (ret < `0`)
20654	goto skip_full_check;
20655
20656	ret = add_subprog_and_kfunc(env);
20657	if (ret < `0`)
20658	goto skip_full_check;
20659
20660	ret = check_subprogs(env);
20661	if (ret < `0`)
20662	goto skip_full_check;
20663
20664	ret = check_btf_info(env, attr, uattr);
20665	if (ret < `0`)
20666	goto skip_full_check;
20667
20668	ret = check_attach_btf_id(env);
20669	if (ret)
20670	goto skip_full_check;
20671
20672	ret = resolve_pseudo_ldimm64(env);
20673	if (ret < `0`)
20674	goto skip_full_check;
20675
20676	if (bpf_prog_is_offloaded(aux: env->prog->aux)) {
20677	ret = bpf_prog_offload_verifier_prep(prog: env->prog);
20678	if (ret)
20679	goto skip_full_check;
20680	}
20681
20682	ret = check_cfg(env);
20683	if (ret < `0`)
20684	goto skip_full_check;
20685
20686	ret = do_check_subprogs(env);
20687	ret = ret ?: do_check_main(env);
20688
20689	if (ret == `0` && bpf_prog_is_offloaded(aux: env->prog->aux))
20690	ret = bpf_prog_offload_finalize(env);
20691
20692	skip_full_check:
20693	kvfree(addr: env->explored_states);
20694
20695	if (ret == `0`)
20696	ret = check_max_stack_depth(env);
20697
20698	/ instruction rewrites happen after this point /
20699	if (ret == `0`)
20700	ret = optimize_bpf_loop(env);
20701
20702	if (is_priv) {
20703	if (ret == `0`)
20704	opt_hard_wire_dead_code_branches(env);
20705	if (ret == `0`)
20706	ret = opt_remove_dead_code(env);
20707	if (ret == `0`)
20708	ret = opt_remove_nops(env);
20709	} else {
20710	if (ret == `0`)
20711	sanitize_dead_code(env);
20712	}
20713
20714	if (ret == `0`)
20715	/ program is valid, convert (u32)(ctx + off) accesses /
20716	ret = convert_ctx_accesses(env);
20717
20718	if (ret == `0`)
20719	ret = do_misc_fixups(env);
20720
20721	/ do 32-bit optimization after insn patching has done so those patched*
20722	* insns could be handled correctly.
20723	*/
20724	if (ret == `0` && !bpf_prog_is_offloaded(aux: env->prog->aux)) {
20725	ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
20726	env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
20727	: false;
20728	}
20729
20730	if (ret == `0`)
20731	ret = fixup_call_args(env);
20732
20733	env->verification_time = ktime_get_ns() - start_time;
20734	print_verification_stats(env);
20735	env->prog->aux->verified_insns = env->insn_processed;
20736
20737	/ preserve original error even if log finalization is successful /
20738	err = bpf_vlog_finalize(log: &env->log, log_size_actual: &log_true_size);
20739	if (err)
20740	ret = err;
20741
20742	if (uattr_size >= offsetofend(union bpf_attr, log_true_size) &&
20743	copy_to_bpfptr_offset(dst: uattr, offsetof(union bpf_attr, log_true_size),
20744	src: &log_true_size, size: sizeof(log_true_size))) {
20745	ret = -EFAULT;
20746	goto err_release_maps;
20747	}
20748
20749	if (ret)
20750	goto err_release_maps;
20751
20752	if (env->used_map_cnt) {
20753	/ if program passed verifier, update used_maps in bpf_prog_info /
20754	env->prog->aux->used_maps = kmalloc_array(n: env->used_map_cnt,
20755	size: sizeof(env->used_maps[`0`]),
20756	GFP_KERNEL);
20757
20758	if (!env->prog->aux->used_maps) {
20759	ret = -ENOMEM;
20760	goto err_release_maps;
20761	}
20762
20763	memcpy(env->prog->aux->used_maps, env->used_maps,
20764	sizeof(env->used_maps[`0`]) * env->used_map_cnt);
20765	env->prog->aux->used_map_cnt = env->used_map_cnt;
20766	}
20767	if (env->used_btf_cnt) {
20768	/ if program passed verifier, update used_btfs in bpf_prog_aux /
20769	env->prog->aux->used_btfs = kmalloc_array(n: env->used_btf_cnt,
20770	size: sizeof(env->used_btfs[`0`]),
20771	GFP_KERNEL);
20772	if (!env->prog->aux->used_btfs) {
20773	ret = -ENOMEM;
20774	goto err_release_maps;
20775	}
20776
20777	memcpy(env->prog->aux->used_btfs, env->used_btfs,
20778	sizeof(env->used_btfs[`0`]) * env->used_btf_cnt);
20779	env->prog->aux->used_btf_cnt = env->used_btf_cnt;
20780	}
20781	if (env->used_map_cnt \|\| env->used_btf_cnt) {
20782	/ program is valid. Convert pseudo bpf_ld_imm64 into generic*
20783	* bpf_ld_imm64 instructions
20784	*/
20785	convert_pseudo_ld_imm64(env);
20786	}
20787
20788	adjust_btf_func(env);
20789
20790	err_release_maps:
20791	if (!env->prog->aux->used_maps)
20792	/ if we didn't copy map pointers into bpf_prog_info, release*
20793	* them now. Otherwise free_used_maps() will release them.
20794	*/
20795	release_maps(env);
20796	if (!env->prog->aux->used_btfs)
20797	release_btfs(env);
20798
20799	/ extension progs temporarily inherit the attach_type of their targets*
20800	for verification purposes, so set it back to zero before returning
20801	*/
20802	if (env->prog->type == BPF_PROG_TYPE_EXT)
20803	env->prog->expected_attach_type = `0`;
20804
20805	*prog = env->prog;
20806	err_unlock:
20807	if (!is_priv)
20808	mutex_unlock(lock: &bpf_verifier_lock);
20809	vfree(addr: env->insn_aux_data);
20810	err_free_env:
20811	kfree(objp: env);
20812	return ret;
20813	}
20814

source code of linux/kernel/bpf/verifier.c