arraymap.c source code [linux/kernel/bpf/arraymap.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/ Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com*
3	* Copyright (c) 2016,2017 Facebook
4	*/
5	#include <linux/bpf.h>
6	#include <linux/btf.h>
7	#include <linux/err.h>
8	#include <linux/slab.h>
9	#include <linux/mm.h>
10	#include <linux/filter.h>
11	#include <linux/perf_event.h>
12	#include <uapi/linux/btf.h>
13	#include <linux/rcupdate_trace.h>
14	#include <linux/btf_ids.h>
15
16	#include "map_in_map.h"
17
18	#define ARRAY_CREATE_FLAG_MASK \
19	(BPF_F_NUMA_NODE \| BPF_F_MMAPABLE \| BPF_F_ACCESS_MASK \| \
20	BPF_F_PRESERVE_ELEMS \| BPF_F_INNER_MAP)
21
22	static void bpf_array_free_percpu(struct bpf_array *array)
23	{
24	int i;
25
26	for (i = `0`; i < array->map.max_entries; i++) {
27	free_percpu(pdata: array->pptrs[i]);
28	cond_resched();
29	}
30	}
31
32	static int bpf_array_alloc_percpu(struct bpf_array *array)
33	{
34	void __percpu *ptr;
35	int i;
36
37	for (i = `0`; i < array->map.max_entries; i++) {
38	ptr = bpf_map_alloc_percpu(map: &array->map, size: array->elem_size, align: `8`,
39	GFP_USER \| __GFP_NOWARN);
40	if (!ptr) {
41	bpf_array_free_percpu(array);
42	return -ENOMEM;
43	}
44	array->pptrs[i] = ptr;
45	cond_resched();
46	}
47
48	return `0`;
49	}
50
51	/ Called from syscall /
52	int array_map_alloc_check(union bpf_attr *attr)
53	{
54	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
55	int numa_node = bpf_map_attr_numa_node(attr);
56
57	/ check sanity of attributes /
58	if (attr->max_entries == `0` \|\| attr->key_size != `4` \|\|
59	attr->value_size == `0` \|\|
60	attr->map_flags & ~ARRAY_CREATE_FLAG_MASK \|\|
61	!bpf_map_flags_access_ok(access_flags: attr->map_flags) \|\|
62	(percpu && numa_node != NUMA_NO_NODE))
63	return -EINVAL;
64
65	if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
66	attr->map_flags & (BPF_F_MMAPABLE \| BPF_F_INNER_MAP))
67	return -EINVAL;
68
69	if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
70	attr->map_flags & BPF_F_PRESERVE_ELEMS)
71	return -EINVAL;
72
73	/ avoid overflow on round_up(map->value_size) /
74	if (attr->value_size > INT_MAX)
75	return -E2BIG;
76
77	return `0`;
78	}
79
80	static struct bpf_map array_map_alloc(union* bpf_attr *attr)
81	{
82	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
83	int numa_node = bpf_map_attr_numa_node(attr);
84	u32 elem_size, index_mask, max_entries;
85	bool bypass_spec_v1 = bpf_bypass_spec_v1();
86	u64 array_size, mask64;
87	struct bpf_array *array;
88
89	elem_size = round_up(attr->value_size, `8`);
90
91	max_entries = attr->max_entries;
92
93	/ On 32 bit archs roundup_pow_of_two() with max_entries that has*
94	* upper most bit set in u32 space is undefined behavior due to
95	* resulting 1U << 32, so do it manually here in u64 space.
96	*/
97	mask64 = fls_long(l: max_entries - `1`);
98	mask64 = `1ULL` << mask64;
99	mask64 -= `1`;
100
101	index_mask = mask64;
102	if (!bypass_spec_v1) {
103	/ round up array size to nearest power of 2,*
104	* since cpu will speculate within index_mask limits
105	*/
106	max_entries = index_mask + `1`;
107	/ Check for overflows. /
108	if (max_entries < attr->max_entries)
109	return ERR_PTR(error: -E2BIG);
110	}
111
112	array_size = sizeof(*array);
113	if (percpu) {
114	array_size += (u64) max_entries * sizeof(void *);
115	} else {
116	/ rely on vmalloc() to return page-aligned memory and*
117	* ensure array->value is exactly page-aligned
118	*/
119	if (attr->map_flags & BPF_F_MMAPABLE) {
120	array_size = PAGE_ALIGN(array_size);
121	array_size += PAGE_ALIGN((u64) max_entries * elem_size);
122	} else {
123	array_size += (u64) max_entries * elem_size;
124	}
125	}
126
127	/ allocate all map elements and zero-initialize them /
128	if (attr->map_flags & BPF_F_MMAPABLE) {
129	void *data;
130
131	/ kmalloc'ed memory can't be mmap'ed, use explicit vmalloc /
132	data = bpf_map_area_mmapable_alloc(size: array_size, numa_node);
133	if (!data)
134	return ERR_PTR(error: -ENOMEM);
135	array = data + PAGE_ALIGN(sizeof(struct bpf_array))
136	- offsetof(struct bpf_array, value);
137	} else {
138	array = bpf_map_area_alloc(size: array_size, numa_node);
139	}
140	if (!array)
141	return ERR_PTR(error: -ENOMEM);
142	array->index_mask = index_mask;
143	array->map.bypass_spec_v1 = bypass_spec_v1;
144
145	/ copy mandatory map attributes /
146	bpf_map_init_from_attr(map: &array->map, attr);
147	array->elem_size = elem_size;
148
149	if (percpu && bpf_array_alloc_percpu(array)) {
150	bpf_map_area_free(base: array);
151	return ERR_PTR(error: -ENOMEM);
152	}
153
154	return &array->map;
155	}
156
157	static void array_map_elem_ptr(struct* bpf_array* array, u32 index)
158	{
159	return array->value + (u64)array->elem_size * index;
160	}
161
162	/ Called from syscall or from eBPF program /
163	static void array_map_lookup_elem(struct* bpf_map map, void* *key)
164	{
165	struct bpf_array array = container_of(map, struct* bpf_array, map);
166	u32 index = (u32 )key;
167
168	if (unlikely(index >= array->map.max_entries))
169	return NULL;
170
171	return array->value + (u64)array->elem_size * (index & array->index_mask);
172	}
173
174	static int array_map_direct_value_addr(const struct bpf_map map, u64 imm,
175	u32 off)
176	{
177	struct bpf_array array = container_of(map, struct* bpf_array, map);
178
179	if (map->max_entries != `1`)
180	return -ENOTSUPP;
181	if (off >= map->value_size)
182	return -EINVAL;
183
184	imm = (unsigned* long)array->value;
185	return `0`;
186	}
187
188	static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
189	u32 *off)
190	{
191	struct bpf_array array = container_of(map, struct* bpf_array, map);
192	u64 base = (unsigned long)array->value;
193	u64 range = array->elem_size;
194
195	if (map->max_entries != `1`)
196	return -ENOTSUPP;
197	if (imm < base \|\| imm >= base + range)
198	return -ENOENT;
199
200	*off = imm - base;
201	return `0`;
202	}
203
204	/ emit BPF instructions equivalent to C code of array_map_lookup_elem() /
205	static int array_map_gen_lookup(struct bpf_map map, struct* bpf_insn *insn_buf)
206	{
207	struct bpf_array array = container_of(map, struct* bpf_array, map);
208	struct bpf_insn *insn = insn_buf;
209	u32 elem_size = array->elem_size;
210	const int ret = BPF_REG_0;
211	const int map_ptr = BPF_REG_1;
212	const int index = BPF_REG_2;
213
214	if (map->map_flags & BPF_F_INNER_MAP)
215	return -EOPNOTSUPP;
216
217	insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct* bpf_array, value));
218	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, `0`);
219	if (!map->bypass_spec_v1) {
220	*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, `4`);
221	*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
222	} else {
223	*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, `3`);
224	}
225
226	if (is_power_of_2(n: elem_size)) {
227	*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
228	} else {
229	*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
230	}
231	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
232	*insn++ = BPF_JMP_IMM(BPF_JA, `0`, `0`, `1`);
233	*insn++ = BPF_MOV64_IMM(ret, `0`);
234	return insn - insn_buf;
235	}
236
237	/ Called from eBPF program /
238	static void percpu_array_map_lookup_elem(struct* bpf_map map, void* *key)
239	{
240	struct bpf_array array = container_of(map, struct* bpf_array, map);
241	u32 index = (u32 )key;
242
243	if (unlikely(index >= array->map.max_entries))
244	return NULL;
245
246	return this_cpu_ptr(array->pptrs[index & array->index_mask]);
247	}
248
249	static void percpu_array_map_lookup_percpu_elem(struct* bpf_map map, void* *key, u32 cpu)
250	{
251	struct bpf_array array = container_of(map, struct* bpf_array, map);
252	u32 index = (u32 )key;
253
254	if (cpu >= nr_cpu_ids)
255	return NULL;
256
257	if (unlikely(index >= array->map.max_entries))
258	return NULL;
259
260	return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu);
261	}
262
263	int bpf_percpu_array_copy(struct bpf_map map, void* key, void* *value)
264	{
265	struct bpf_array array = container_of(map, struct* bpf_array, map);
266	u32 index = (u32 )key;
267	void __percpu *pptr;
268	int cpu, off = `0`;
269	u32 size;
270
271	if (unlikely(index >= array->map.max_entries))
272	return -ENOENT;
273
274	/ per_cpu areas are zero-filled and bpf programs can only*
275	* access 'value_size' of them, so copying rounded areas
276	* will not leak any kernel data
277	*/
278	size = array->elem_size;
279	rcu_read_lock();
280	pptr = array->pptrs[index & array->index_mask];
281	for_each_possible_cpu(cpu) {
282	copy_map_value_long(map, dst: value + off, per_cpu_ptr(pptr, cpu));
283	check_and_init_map_value(map, dst: value + off);
284	off += size;
285	}
286	rcu_read_unlock();
287	return `0`;
288	}
289
290	/ Called from syscall /
291	static int array_map_get_next_key(struct bpf_map map, void* key, void* *next_key)
292	{
293	struct bpf_array array = container_of(map, struct* bpf_array, map);
294	u32 index = key ? (u32 )key : U32_MAX;
295	u32 next = (u32 )next_key;
296
297	if (index >= array->map.max_entries) {
298	*next = `0`;
299	return `0`;
300	}
301
302	if (index == array->map.max_entries - `1`)
303	return -ENOENT;
304
305	*next = index + `1`;
306	return `0`;
307	}
308
309	/ Called from syscall or from eBPF program /
310	static long array_map_update_elem(struct bpf_map map, void* key, void* *value,
311	u64 map_flags)
312	{
313	struct bpf_array array = container_of(map, struct* bpf_array, map);
314	u32 index = (u32 )key;
315	char *val;
316
317	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
318	/ unknown flags /
319	return -EINVAL;
320
321	if (unlikely(index >= array->map.max_entries))
322	/ all elements were pre-allocated, cannot insert a new one /
323	return -E2BIG;
324
325	if (unlikely(map_flags & BPF_NOEXIST))
326	/ all elements already exist /
327	return -EEXIST;
328
329	if (unlikely((map_flags & BPF_F_LOCK) &&
330	!btf_record_has_field(map->record, BPF_SPIN_LOCK)))
331	return -EINVAL;
332
333	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
334	val = this_cpu_ptr(array->pptrs[index & array->index_mask]);
335	copy_map_value(map, dst: val, src: value);
336	bpf_obj_free_fields(rec: array->map.record, obj: val);
337	} else {
338	val = array->value +
339	(u64)array->elem_size * (index & array->index_mask);
340	if (map_flags & BPF_F_LOCK)
341	copy_map_value_locked(map, dst: val, src: value, lock_src: false);
342	else
343	copy_map_value(map, dst: val, src: value);
344	bpf_obj_free_fields(rec: array->map.record, obj: val);
345	}
346	return `0`;
347	}
348
349	int bpf_percpu_array_update(struct bpf_map map, void* key, void* *value,
350	u64 map_flags)
351	{
352	struct bpf_array array = container_of(map, struct* bpf_array, map);
353	u32 index = (u32 )key;
354	void __percpu *pptr;
355	int cpu, off = `0`;
356	u32 size;
357
358	if (unlikely(map_flags > BPF_EXIST))
359	/ unknown flags /
360	return -EINVAL;
361
362	if (unlikely(index >= array->map.max_entries))
363	/ all elements were pre-allocated, cannot insert a new one /
364	return -E2BIG;
365
366	if (unlikely(map_flags == BPF_NOEXIST))
367	/ all elements already exist /
368	return -EEXIST;
369
370	/ the user space will provide round_up(value_size, 8) bytes that*
371	* will be copied into per-cpu area. bpf programs can only access
372	* value_size of it. During lookup the same extra bytes will be
373	* returned or zeros which were zero-filled by percpu_alloc,
374	* so no kernel data leaks possible
375	*/
376	size = array->elem_size;
377	rcu_read_lock();
378	pptr = array->pptrs[index & array->index_mask];
379	for_each_possible_cpu(cpu) {
380	copy_map_value_long(map, per_cpu_ptr(pptr, cpu), src: value + off);
381	bpf_obj_free_fields(rec: array->map.record, per_cpu_ptr(pptr, cpu));
382	off += size;
383	}
384	rcu_read_unlock();
385	return `0`;
386	}
387
388	/ Called from syscall or from eBPF program /
389	static long array_map_delete_elem(struct bpf_map map, void* *key)
390	{
391	return -EINVAL;
392	}
393
394	static void array_map_vmalloc_addr(struct* bpf_array *array)
395	{
396	return (void )round_down((unsigned* long)array, PAGE_SIZE);
397	}
398
399	static void array_map_free_timers(struct bpf_map *map)
400	{
401	struct bpf_array array = container_of(map, struct* bpf_array, map);
402	int i;
403
404	/ We don't reset or free fields other than timer on uref dropping to zero. /
405	if (!btf_record_has_field(rec: map->record, type: BPF_TIMER))
406	return;
407
408	for (i = `0`; i < array->map.max_entries; i++)
409	bpf_obj_free_timer(rec: map->record, obj: array_map_elem_ptr(array, index: i));
410	}
411
412	/ Called when map->refcnt goes to zero, either from workqueue or from syscall /
413	static void array_map_free(struct bpf_map *map)
414	{
415	struct bpf_array array = container_of(map, struct* bpf_array, map);
416	int i;
417
418	if (!IS_ERR_OR_NULL(ptr: map->record)) {
419	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
420	for (i = `0`; i < array->map.max_entries; i++) {
421	void __percpu *pptr = array->pptrs[i & array->index_mask];
422	int cpu;
423
424	for_each_possible_cpu(cpu) {
425	bpf_obj_free_fields(rec: map->record, per_cpu_ptr(pptr, cpu));
426	cond_resched();
427	}
428	}
429	} else {
430	for (i = `0`; i < array->map.max_entries; i++)
431	bpf_obj_free_fields(rec: map->record, obj: array_map_elem_ptr(array, index: i));
432	}
433	}
434
435	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
436	bpf_array_free_percpu(array);
437
438	if (array->map.map_flags & BPF_F_MMAPABLE)
439	bpf_map_area_free(base: array_map_vmalloc_addr(array));
440	else
441	bpf_map_area_free(base: array);
442	}
443
444	static void array_map_seq_show_elem(struct bpf_map map, void* *key,
445	struct seq_file *m)
446	{
447	void *value;
448
449	rcu_read_lock();
450
451	value = array_map_lookup_elem(map, key);
452	if (!value) {
453	rcu_read_unlock();
454	return;
455	}
456
457	if (map->btf_key_type_id)
458	seq_printf(m, fmt: "%u: ", (u32 )key);
459	btf_type_seq_show(btf: map->btf, type_id: map->btf_value_type_id, obj: value, m);
460	seq_puts(m, s: "\n");
461
462	rcu_read_unlock();
463	}
464
465	static void percpu_array_map_seq_show_elem(struct bpf_map map, void* *key,
466	struct seq_file *m)
467	{
468	struct bpf_array array = container_of(map, struct* bpf_array, map);
469	u32 index = (u32 )key;
470	void __percpu *pptr;
471	int cpu;
472
473	rcu_read_lock();
474
475	seq_printf(m, fmt: "%u: {\n", (u32 )key);
476	pptr = array->pptrs[index & array->index_mask];
477	for_each_possible_cpu(cpu) {
478	seq_printf(m, fmt: "\tcpu%d: ", cpu);
479	btf_type_seq_show(btf: map->btf, type_id: map->btf_value_type_id,
480	per_cpu_ptr(pptr, cpu), m);
481	seq_puts(m, s: "\n");
482	}
483	seq_puts(m, s: "}\n");
484
485	rcu_read_unlock();
486	}
487
488	static int array_map_check_btf(const struct bpf_map *map,
489	const struct btf *btf,
490	const struct btf_type *key_type,
491	const struct btf_type *value_type)
492	{
493	u32 int_data;
494
495	/ One exception for keyless BTF: .bss/.data/.rodata map /
496	if (btf_type_is_void(t: key_type)) {
497	if (map->map_type != BPF_MAP_TYPE_ARRAY \|\|
498	map->max_entries != `1`)
499	return -EINVAL;
500
501	if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
502	return -EINVAL;
503
504	return `0`;
505	}
506
507	if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
508	return -EINVAL;
509
510	int_data = (u32 )(key_type + `1`);
511	/ bpf array can only take a u32 key. This check makes sure*
512	* that the btf matches the attr used during map_create.
513	*/
514	if (BTF_INT_BITS(int_data) != `32` \|\| BTF_INT_OFFSET(int_data))
515	return -EINVAL;
516
517	return `0`;
518	}
519
520	static int array_map_mmap(struct bpf_map map, struct* vm_area_struct *vma)
521	{
522	struct bpf_array array = container_of(map, struct* bpf_array, map);
523	pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT;
524
525	if (!(map->map_flags & BPF_F_MMAPABLE))
526	return -EINVAL;
527
528	if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) >
529	PAGE_ALIGN((u64)array->map.max_entries * array->elem_size))
530	return -EINVAL;
531
532	return remap_vmalloc_range(vma, addr: array_map_vmalloc_addr(array),
533	pgoff: vma->vm_pgoff + pgoff);
534	}
535
536	static bool array_map_meta_equal(const struct bpf_map *meta0,
537	const struct bpf_map *meta1)
538	{
539	if (!bpf_map_meta_equal(meta0, meta1))
540	return false;
541	return meta0->map_flags & BPF_F_INNER_MAP ? true :
542	meta0->max_entries == meta1->max_entries;
543	}
544
545	struct bpf_iter_seq_array_map_info {
546	struct bpf_map *map;
547	void *percpu_value_buf;
548	u32 index;
549	};
550
551	static void bpf_array_map_seq_start(struct* seq_file seq, loff_t pos)
552	{
553	struct bpf_iter_seq_array_map_info *info = seq->private;
554	struct bpf_map *map = info->map;
555	struct bpf_array *array;
556	u32 index;
557
558	if (info->index >= map->max_entries)
559	return NULL;
560
561	if (*pos == `0`)
562	++*pos;
563	array = container_of(map, struct bpf_array, map);
564	index = info->index & array->index_mask;
565	if (info->percpu_value_buf)
566	return array->pptrs[index];
567	return array_map_elem_ptr(array, index);
568	}
569
570	static void bpf_array_map_seq_next(struct* seq_file seq, void* v, loff_t pos)
571	{
572	struct bpf_iter_seq_array_map_info *info = seq->private;
573	struct bpf_map *map = info->map;
574	struct bpf_array *array;
575	u32 index;
576
577	++*pos;
578	++info->index;
579	if (info->index >= map->max_entries)
580	return NULL;
581
582	array = container_of(map, struct bpf_array, map);
583	index = info->index & array->index_mask;
584	if (info->percpu_value_buf)
585	return array->pptrs[index];
586	return array_map_elem_ptr(array, index);
587	}
588
589	static int __bpf_array_map_seq_show(struct seq_file seq, void* *v)
590	{
591	struct bpf_iter_seq_array_map_info *info = seq->private;
592	struct bpf_iter__bpf_map_elem ctx = {};
593	struct bpf_map *map = info->map;
594	struct bpf_array array = container_of(map, struct* bpf_array, map);
595	struct bpf_iter_meta meta;
596	struct bpf_prog *prog;
597	int off = `0`, cpu = `0`;
598	void __percpu **pptr;
599	u32 size;
600
601	meta.seq = seq;
602	prog = bpf_iter_get_info(meta: &meta, in_stop: v == NULL);
603	if (!prog)
604	return `0`;
605
606	ctx.meta = &meta;
607	ctx.map = info->map;
608	if (v) {
609	ctx.key = &info->index;
610
611	if (!info->percpu_value_buf) {
612	ctx.value = v;
613	} else {
614	pptr = v;
615	size = array->elem_size;
616	for_each_possible_cpu(cpu) {
617	copy_map_value_long(map, dst: info->percpu_value_buf + off,
618	per_cpu_ptr(pptr, cpu));
619	check_and_init_map_value(map, dst: info->percpu_value_buf + off);
620	off += size;
621	}
622	ctx.value = info->percpu_value_buf;
623	}
624	}
625
626	return bpf_iter_run_prog(prog, ctx: &ctx);
627	}
628
629	static int bpf_array_map_seq_show(struct seq_file seq, void* *v)
630	{
631	return __bpf_array_map_seq_show(seq, v);
632	}
633
634	static void bpf_array_map_seq_stop(struct seq_file seq, void* *v)
635	{
636	if (!v)
637	(void)__bpf_array_map_seq_show(seq, NULL);
638	}
639
640	static int bpf_iter_init_array_map(void *priv_data,
641	struct bpf_iter_aux_info *aux)
642	{
643	struct bpf_iter_seq_array_map_info *seq_info = priv_data;
644	struct bpf_map *map = aux->map;
645	struct bpf_array array = container_of(map, struct* bpf_array, map);
646	void *value_buf;
647	u32 buf_size;
648
649	if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
650	buf_size = array->elem_size * num_possible_cpus();
651	value_buf = kmalloc(size: buf_size, GFP_USER \| __GFP_NOWARN);
652	if (!value_buf)
653	return -ENOMEM;
654
655	seq_info->percpu_value_buf = value_buf;
656	}
657
658	/ bpf_iter_attach_map() acquires a map uref, and the uref may be*
659	* released before or in the middle of iterating map elements, so
660	* acquire an extra map uref for iterator.
661	*/
662	bpf_map_inc_with_uref(map);
663	seq_info->map = map;
664	return `0`;
665	}
666
667	static void bpf_iter_fini_array_map(void *priv_data)
668	{
669	struct bpf_iter_seq_array_map_info *seq_info = priv_data;
670
671	bpf_map_put_with_uref(map: seq_info->map);
672	kfree(objp: seq_info->percpu_value_buf);
673	}
674
675	static const struct seq_operations bpf_array_map_seq_ops = {
676	.start = bpf_array_map_seq_start,
677	.next = bpf_array_map_seq_next,
678	.stop = bpf_array_map_seq_stop,
679	.show = bpf_array_map_seq_show,
680	};
681
682	static const struct bpf_iter_seq_info iter_seq_info = {
683	.seq_ops = &bpf_array_map_seq_ops,
684	.init_seq_private = bpf_iter_init_array_map,
685	.fini_seq_private = bpf_iter_fini_array_map,
686	.seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
687	};
688
689	static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn,
690	void *callback_ctx, u64 flags)
691	{
692	u32 i, key, num_elems = `0`;
693	struct bpf_array *array;
694	bool is_percpu;
695	u64 ret = `0`;
696	void *val;
697
698	if (flags != `0`)
699	return -EINVAL;
700
701	is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
702	array = container_of(map, struct bpf_array, map);
703	if (is_percpu)
704	migrate_disable();
705	for (i = `0`; i < map->max_entries; i++) {
706	if (is_percpu)
707	val = this_cpu_ptr(array->pptrs[i]);
708	else
709	val = array_map_elem_ptr(array, index: i);
710	num_elems++;
711	key = i;
712	ret = callback_fn((u64)(long)map, (u64)(long)&key,
713	(u64)(long)val, (u64)(long)callback_ctx, `0`);
714	/ return value: 0 - continue, 1 - stop and return /
715	if (ret)
716	break;
717	}
718
719	if (is_percpu)
720	migrate_enable();
721	return num_elems;
722	}
723
724	static u64 array_map_mem_usage(const struct bpf_map *map)
725	{
726	struct bpf_array array = container_of(map, struct* bpf_array, map);
727	bool percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
728	u32 elem_size = array->elem_size;
729	u64 entries = map->max_entries;
730	u64 usage = sizeof(*array);
731
732	if (percpu) {
733	usage += entries * sizeof(void *);
734	usage += entries * elem_size * num_possible_cpus();
735	} else {
736	if (map->map_flags & BPF_F_MMAPABLE) {
737	usage = PAGE_ALIGN(usage);
738	usage += PAGE_ALIGN(entries * elem_size);
739	} else {
740	usage += entries * elem_size;
741	}
742	}
743	return usage;
744	}
745
746	BTF_ID_LIST_SINGLE(array_map_btf_ids, struct, bpf_array)
747	const struct bpf_map_ops array_map_ops = {
748	.map_meta_equal = array_map_meta_equal,
749	.map_alloc_check = array_map_alloc_check,
750	.map_alloc = array_map_alloc,
751	.map_free = array_map_free,
752	.map_get_next_key = array_map_get_next_key,
753	.map_release_uref = array_map_free_timers,
754	.map_lookup_elem = array_map_lookup_elem,
755	.map_update_elem = array_map_update_elem,
756	.map_delete_elem = array_map_delete_elem,
757	.map_gen_lookup = array_map_gen_lookup,
758	.map_direct_value_addr = array_map_direct_value_addr,
759	.map_direct_value_meta = array_map_direct_value_meta,
760	.map_mmap = array_map_mmap,
761	.map_seq_show_elem = array_map_seq_show_elem,
762	.map_check_btf = array_map_check_btf,
763	.map_lookup_batch = generic_map_lookup_batch,
764	.map_update_batch = generic_map_update_batch,
765	.map_set_for_each_callback_args = map_set_for_each_callback_args,
766	.map_for_each_callback = bpf_for_each_array_elem,
767	.map_mem_usage = array_map_mem_usage,
768	.map_btf_id = &array_map_btf_ids[`0`],
769	.iter_seq_info = &iter_seq_info,
770	};
771
772	const struct bpf_map_ops percpu_array_map_ops = {
773	.map_meta_equal = bpf_map_meta_equal,
774	.map_alloc_check = array_map_alloc_check,
775	.map_alloc = array_map_alloc,
776	.map_free = array_map_free,
777	.map_get_next_key = array_map_get_next_key,
778	.map_lookup_elem = percpu_array_map_lookup_elem,
779	.map_update_elem = array_map_update_elem,
780	.map_delete_elem = array_map_delete_elem,
781	.map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem,
782	.map_seq_show_elem = percpu_array_map_seq_show_elem,
783	.map_check_btf = array_map_check_btf,
784	.map_lookup_batch = generic_map_lookup_batch,
785	.map_update_batch = generic_map_update_batch,
786	.map_set_for_each_callback_args = map_set_for_each_callback_args,
787	.map_for_each_callback = bpf_for_each_array_elem,
788	.map_mem_usage = array_map_mem_usage,
789	.map_btf_id = &array_map_btf_ids[`0`],
790	.iter_seq_info = &iter_seq_info,
791	};
792
793	static int fd_array_map_alloc_check(union bpf_attr *attr)
794	{
795	/ only file descriptors can be stored in this type of map /
796	if (attr->value_size != sizeof(u32))
797	return -EINVAL;
798	/ Program read-only/write-only not supported for special maps yet. /
799	if (attr->map_flags & (BPF_F_RDONLY_PROG \| BPF_F_WRONLY_PROG))
800	return -EINVAL;
801	return array_map_alloc_check(attr);
802	}
803
804	static void fd_array_map_free(struct bpf_map *map)
805	{
806	struct bpf_array array = container_of(map, struct* bpf_array, map);
807	int i;
808
809	/ make sure it's empty /
810	for (i = `0`; i < array->map.max_entries; i++)
811	BUG_ON(array->ptrs[i] != NULL);
812
813	bpf_map_area_free(base: array);
814	}
815
816	static void fd_array_map_lookup_elem(struct* bpf_map map, void* *key)
817	{
818	return ERR_PTR(error: -EOPNOTSUPP);
819	}
820
821	/ only called from syscall /
822	int bpf_fd_array_map_lookup_elem(struct bpf_map map, void* key, u32 value)
823	{
824	void *elem, ptr;
825	int ret = `0`;
826
827	if (!map->ops->map_fd_sys_lookup_elem)
828	return -ENOTSUPP;
829
830	rcu_read_lock();
831	elem = array_map_lookup_elem(map, key);
832	if (elem && (ptr = READ_ONCE(*elem)))
833	*value = map->ops->map_fd_sys_lookup_elem(ptr);
834	else
835	ret = -ENOENT;
836	rcu_read_unlock();
837
838	return ret;
839	}
840
841	/ only called from syscall /
842	int bpf_fd_array_map_update_elem(struct bpf_map map, struct* file *map_file,
843	void key, void* *value, u64 map_flags)
844	{
845	struct bpf_array array = container_of(map, struct* bpf_array, map);
846	void new_ptr, old_ptr;
847	u32 index = (u32 )key, ufd;
848
849	if (map_flags != BPF_ANY)
850	return -EINVAL;
851
852	if (index >= array->map.max_entries)
853	return -E2BIG;
854
855	ufd = (u32 )value;
856	new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
857	if (IS_ERR(ptr: new_ptr))
858	return PTR_ERR(ptr: new_ptr);
859
860	if (map->ops->map_poke_run) {
861	mutex_lock(&array->aux->poke_mutex);
862	old_ptr = xchg(array->ptrs + index, new_ptr);
863	map->ops->map_poke_run(map, index, old_ptr, new_ptr);
864	mutex_unlock(lock: &array->aux->poke_mutex);
865	} else {
866	old_ptr = xchg(array->ptrs + index, new_ptr);
867	}
868
869	if (old_ptr)
870	map->ops->map_fd_put_ptr(old_ptr);
871	return `0`;
872	}
873
874	static long fd_array_map_delete_elem(struct bpf_map map, void* *key)
875	{
876	struct bpf_array array = container_of(map, struct* bpf_array, map);
877	void *old_ptr;
878	u32 index = (u32 )key;
879
880	if (index >= array->map.max_entries)
881	return -E2BIG;
882
883	if (map->ops->map_poke_run) {
884	mutex_lock(&array->aux->poke_mutex);
885	old_ptr = xchg(array->ptrs + index, NULL);
886	map->ops->map_poke_run(map, index, old_ptr, NULL);
887	mutex_unlock(lock: &array->aux->poke_mutex);
888	} else {
889	old_ptr = xchg(array->ptrs + index, NULL);
890	}
891
892	if (old_ptr) {
893	map->ops->map_fd_put_ptr(old_ptr);
894	return `0`;
895	} else {
896	return -ENOENT;
897	}
898	}
899
900	static void prog_fd_array_get_ptr(struct* bpf_map *map,
901	struct file map_file, int* fd)
902	{
903	struct bpf_prog *prog = bpf_prog_get(ufd: fd);
904
905	if (IS_ERR(ptr: prog))
906	return prog;
907
908	if (!bpf_prog_map_compatible(map, fp: prog)) {
909	bpf_prog_put(prog);
910	return ERR_PTR(error: -EINVAL);
911	}
912
913	return prog;
914	}
915
916	static void prog_fd_array_put_ptr(void *ptr)
917	{
918	bpf_prog_put(prog: ptr);
919	}
920
921	static u32 prog_fd_array_sys_lookup_elem(void *ptr)
922	{
923	return ((struct bpf_prog *)ptr)->aux->id;
924	}
925
926	/ decrement refcnt of all bpf_progs that are stored in this map /
927	static void bpf_fd_array_map_clear(struct bpf_map *map)
928	{
929	struct bpf_array array = container_of(map, struct* bpf_array, map);
930	int i;
931
932	for (i = `0`; i < array->map.max_entries; i++)
933	fd_array_map_delete_elem(map, key: &i);
934	}
935
936	static void prog_array_map_seq_show_elem(struct bpf_map map, void* *key,
937	struct seq_file *m)
938	{
939	void *elem, ptr;
940	u32 prog_id;
941
942	rcu_read_lock();
943
944	elem = array_map_lookup_elem(map, key);
945	if (elem) {
946	ptr = READ_ONCE(*elem);
947	if (ptr) {
948	seq_printf(m, fmt: "%u: ", (u32 )key);
949	prog_id = prog_fd_array_sys_lookup_elem(ptr);
950	btf_type_seq_show(btf: map->btf, type_id: map->btf_value_type_id,
951	obj: &prog_id, m);
952	seq_puts(m, s: "\n");
953	}
954	}
955
956	rcu_read_unlock();
957	}
958
959	struct prog_poke_elem {
960	struct list_head list;
961	struct bpf_prog_aux *aux;
962	};
963
964	static int prog_array_map_poke_track(struct bpf_map *map,
965	struct bpf_prog_aux *prog_aux)
966	{
967	struct prog_poke_elem *elem;
968	struct bpf_array_aux *aux;
969	int ret = `0`;
970
971	aux = container_of(map, struct bpf_array, map)->aux;
972	mutex_lock(&aux->poke_mutex);
973	list_for_each_entry(elem, &aux->poke_progs, list) {
974	if (elem->aux == prog_aux)
975	goto out;
976	}
977
978	elem = kmalloc(size: sizeof(*elem), GFP_KERNEL);
979	if (!elem) {
980	ret = -ENOMEM;
981	goto out;
982	}
983
984	INIT_LIST_HEAD(list: &elem->list);
985	/ We must track the program's aux info at this point in time*
986	* since the program pointer itself may not be stable yet, see
987	* also comment in prog_array_map_poke_run().
988	*/
989	elem->aux = prog_aux;
990
991	list_add_tail(new: &elem->list, head: &aux->poke_progs);
992	out:
993	mutex_unlock(lock: &aux->poke_mutex);
994	return ret;
995	}
996
997	static void prog_array_map_poke_untrack(struct bpf_map *map,
998	struct bpf_prog_aux *prog_aux)
999	{
1000	struct prog_poke_elem elem, tmp;
1001	struct bpf_array_aux *aux;
1002
1003	aux = container_of(map, struct bpf_array, map)->aux;
1004	mutex_lock(&aux->poke_mutex);
1005	list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
1006	if (elem->aux == prog_aux) {
1007	list_del_init(entry: &elem->list);
1008	kfree(objp: elem);
1009	break;
1010	}
1011	}
1012	mutex_unlock(lock: &aux->poke_mutex);
1013	}
1014
1015	static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
1016	struct bpf_prog *old,
1017	struct bpf_prog *new)
1018	{
1019	u8 old_addr, new_addr, *old_bypass_addr;
1020	struct prog_poke_elem *elem;
1021	struct bpf_array_aux *aux;
1022
1023	aux = container_of(map, struct bpf_array, map)->aux;
1024	WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex));
1025
1026	list_for_each_entry(elem, &aux->poke_progs, list) {
1027	struct bpf_jit_poke_descriptor *poke;
1028	int i, ret;
1029
1030	for (i = `0`; i < elem->aux->size_poke_tab; i++) {
1031	poke = &elem->aux->poke_tab[i];
1032
1033	/ Few things to be aware of:*
1034	*
1035	* 1) We can only ever access aux in this context, but
1036	* not aux->prog since it might not be stable yet and
1037	* there could be danger of use after free otherwise.
1038	* 2) Initially when we start tracking aux, the program
1039	* is not JITed yet and also does not have a kallsyms
1040	* entry. We skip these as poke->tailcall_target_stable
1041	* is not active yet. The JIT will do the final fixup
1042	* before setting it stable. The various
1043	* poke->tailcall_target_stable are successively
1044	* activated, so tail call updates can arrive from here
1045	* while JIT is still finishing its final fixup for
1046	* non-activated poke entries.
1047	* 3) On program teardown, the program's kallsym entry gets
1048	* removed out of RCU callback, but we can only untrack
1049	* from sleepable context, therefore bpf_arch_text_poke()
1050	* might not see that this is in BPF text section and
1051	* bails out with -EINVAL. As these are unreachable since
1052	* RCU grace period already passed, we simply skip them.
1053	* 4) Also programs reaching refcount of zero while patching
1054	* is in progress is okay since we're protected under
1055	* poke_mutex and untrack the programs before the JIT
1056	* buffer is freed. When we're still in the middle of
1057	* patching and suddenly kallsyms entry of the program
1058	* gets evicted, we just skip the rest which is fine due
1059	* to point 3).
1060	* 5) Any other error happening below from bpf_arch_text_poke()
1061	* is a unexpected bug.
1062	*/
1063	if (!READ_ONCE(poke->tailcall_target_stable))
1064	continue;
1065	if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
1066	continue;
1067	if (poke->tail_call.map != map \|\|
1068	poke->tail_call.key != key)
1069	continue;
1070
1071	old_bypass_addr = old ? NULL : poke->bypass_addr;
1072	old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
1073	new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
1074
1075	if (new) {
1076	ret = bpf_arch_text_poke(ip: poke->tailcall_target,
1077	t: BPF_MOD_JUMP,
1078	addr1: old_addr, addr2: new_addr);
1079	BUG_ON(ret < `0` && ret != -EINVAL);
1080	if (!old) {
1081	ret = bpf_arch_text_poke(ip: poke->tailcall_bypass,
1082	t: BPF_MOD_JUMP,
1083	addr1: poke->bypass_addr,
1084	NULL);
1085	BUG_ON(ret < `0` && ret != -EINVAL);
1086	}
1087	} else {
1088	ret = bpf_arch_text_poke(ip: poke->tailcall_bypass,
1089	t: BPF_MOD_JUMP,
1090	addr1: old_bypass_addr,
1091	addr2: poke->bypass_addr);
1092	BUG_ON(ret < `0` && ret != -EINVAL);
1093	/ let other CPUs finish the execution of program*
1094	* so that it will not possible to expose them
1095	* to invalid nop, stack unwind, nop state
1096	*/
1097	if (!ret)
1098	synchronize_rcu();
1099	ret = bpf_arch_text_poke(ip: poke->tailcall_target,
1100	t: BPF_MOD_JUMP,
1101	addr1: old_addr, NULL);
1102	BUG_ON(ret < `0` && ret != -EINVAL);
1103	}
1104	}
1105	}
1106	}
1107
1108	static void prog_array_map_clear_deferred(struct work_struct *work)
1109	{
1110	struct bpf_map map = container_of(work, struct* bpf_array_aux,
1111	work)->map;
1112	bpf_fd_array_map_clear(map);
1113	bpf_map_put(map);
1114	}
1115
1116	static void prog_array_map_clear(struct bpf_map *map)
1117	{
1118	struct bpf_array_aux aux = container_of(map, struct* bpf_array,
1119	map)->aux;
1120	bpf_map_inc(map);
1121	schedule_work(work: &aux->work);
1122	}
1123
1124	static struct bpf_map prog_array_map_alloc(union* bpf_attr *attr)
1125	{
1126	struct bpf_array_aux *aux;
1127	struct bpf_map *map;
1128
1129	aux = kzalloc(size: sizeof(*aux), GFP_KERNEL_ACCOUNT);
1130	if (!aux)
1131	return ERR_PTR(error: -ENOMEM);
1132
1133	INIT_WORK(&aux->work, prog_array_map_clear_deferred);
1134	INIT_LIST_HEAD(list: &aux->poke_progs);
1135	mutex_init(&aux->poke_mutex);
1136
1137	map = array_map_alloc(attr);
1138	if (IS_ERR(ptr: map)) {
1139	kfree(objp: aux);
1140	return map;
1141	}
1142
1143	container_of(map, struct bpf_array, map)->aux = aux;
1144	aux->map = map;
1145
1146	return map;
1147	}
1148
1149	static void prog_array_map_free(struct bpf_map *map)
1150	{
1151	struct prog_poke_elem elem, tmp;
1152	struct bpf_array_aux *aux;
1153
1154	aux = container_of(map, struct bpf_array, map)->aux;
1155	list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
1156	list_del_init(entry: &elem->list);
1157	kfree(objp: elem);
1158	}
1159	kfree(objp: aux);
1160	fd_array_map_free(map);
1161	}
1162
1163	/ prog_array->aux->{type,jited} is a runtime binding.*
1164	* Doing static check alone in the verifier is not enough.
1165	* Thus, prog_array_map cannot be used as an inner_map
1166	* and map_meta_equal is not implemented.
1167	*/
1168	const struct bpf_map_ops prog_array_map_ops = {
1169	.map_alloc_check = fd_array_map_alloc_check,
1170	.map_alloc = prog_array_map_alloc,
1171	.map_free = prog_array_map_free,
1172	.map_poke_track = prog_array_map_poke_track,
1173	.map_poke_untrack = prog_array_map_poke_untrack,
1174	.map_poke_run = prog_array_map_poke_run,
1175	.map_get_next_key = array_map_get_next_key,
1176	.map_lookup_elem = fd_array_map_lookup_elem,
1177	.map_delete_elem = fd_array_map_delete_elem,
1178	.map_fd_get_ptr = prog_fd_array_get_ptr,
1179	.map_fd_put_ptr = prog_fd_array_put_ptr,
1180	.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
1181	.map_release_uref = prog_array_map_clear,
1182	.map_seq_show_elem = prog_array_map_seq_show_elem,
1183	.map_mem_usage = array_map_mem_usage,
1184	.map_btf_id = &array_map_btf_ids[`0`],
1185	};
1186
1187	static struct bpf_event_entry bpf_event_entry_gen(struct* file *perf_file,
1188	struct file *map_file)
1189	{
1190	struct bpf_event_entry *ee;
1191
1192	ee = kzalloc(size: sizeof(*ee), GFP_ATOMIC);
1193	if (ee) {
1194	ee->event = perf_file->private_data;
1195	ee->perf_file = perf_file;
1196	ee->map_file = map_file;
1197	}
1198
1199	return ee;
1200	}
1201
1202	static void __bpf_event_entry_free(struct rcu_head *rcu)
1203	{
1204	struct bpf_event_entry *ee;
1205
1206	ee = container_of(rcu, struct bpf_event_entry, rcu);
1207	fput(ee->perf_file);
1208	kfree(objp: ee);
1209	}
1210
1211	static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
1212	{
1213	call_rcu(head: &ee->rcu, func: __bpf_event_entry_free);
1214	}
1215
1216	static void perf_event_fd_array_get_ptr(struct* bpf_map *map,
1217	struct file map_file, int* fd)
1218	{
1219	struct bpf_event_entry *ee;
1220	struct perf_event *event;
1221	struct file *perf_file;
1222	u64 value;
1223
1224	perf_file = perf_event_get(fd);
1225	if (IS_ERR(ptr: perf_file))
1226	return perf_file;
1227
1228	ee = ERR_PTR(error: -EOPNOTSUPP);
1229	event = perf_file->private_data;
1230	if (perf_event_read_local(event, value: &value, NULL, NULL) == -EOPNOTSUPP)
1231	goto err_out;
1232
1233	ee = bpf_event_entry_gen(perf_file, map_file);
1234	if (ee)
1235	return ee;
1236	ee = ERR_PTR(error: -ENOMEM);
1237	err_out:
1238	fput(perf_file);
1239	return ee;
1240	}
1241
1242	static void perf_event_fd_array_put_ptr(void *ptr)
1243	{
1244	bpf_event_entry_free_rcu(ee: ptr);
1245	}
1246
1247	static void perf_event_fd_array_release(struct bpf_map *map,
1248	struct file *map_file)
1249	{
1250	struct bpf_array array = container_of(map, struct* bpf_array, map);
1251	struct bpf_event_entry *ee;
1252	int i;
1253
1254	if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1255	return;
1256
1257	rcu_read_lock();
1258	for (i = `0`; i < array->map.max_entries; i++) {
1259	ee = READ_ONCE(array->ptrs[i]);
1260	if (ee && ee->map_file == map_file)
1261	fd_array_map_delete_elem(map, key: &i);
1262	}
1263	rcu_read_unlock();
1264	}
1265
1266	static void perf_event_fd_array_map_free(struct bpf_map *map)
1267	{
1268	if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1269	bpf_fd_array_map_clear(map);
1270	fd_array_map_free(map);
1271	}
1272
1273	const struct bpf_map_ops perf_event_array_map_ops = {
1274	.map_meta_equal = bpf_map_meta_equal,
1275	.map_alloc_check = fd_array_map_alloc_check,
1276	.map_alloc = array_map_alloc,
1277	.map_free = perf_event_fd_array_map_free,
1278	.map_get_next_key = array_map_get_next_key,
1279	.map_lookup_elem = fd_array_map_lookup_elem,
1280	.map_delete_elem = fd_array_map_delete_elem,
1281	.map_fd_get_ptr = perf_event_fd_array_get_ptr,
1282	.map_fd_put_ptr = perf_event_fd_array_put_ptr,
1283	.map_release = perf_event_fd_array_release,
1284	.map_check_btf = map_check_no_btf,
1285	.map_mem_usage = array_map_mem_usage,
1286	.map_btf_id = &array_map_btf_ids[`0`],
1287	};
1288
1289	#ifdef CONFIG_CGROUPS
1290	static void cgroup_fd_array_get_ptr(struct* bpf_map *map,
1291	struct file map_file /* not used /,
1292	int fd)
1293	{
1294	return cgroup_get_from_fd(fd);
1295	}
1296
1297	static void cgroup_fd_array_put_ptr(void *ptr)
1298	{
1299	/ cgroup_put free cgrp after a rcu grace period /
1300	cgroup_put(cgrp: ptr);
1301	}
1302
1303	static void cgroup_fd_array_free(struct bpf_map *map)
1304	{
1305	bpf_fd_array_map_clear(map);
1306	fd_array_map_free(map);
1307	}
1308
1309	const struct bpf_map_ops cgroup_array_map_ops = {
1310	.map_meta_equal = bpf_map_meta_equal,
1311	.map_alloc_check = fd_array_map_alloc_check,
1312	.map_alloc = array_map_alloc,
1313	.map_free = cgroup_fd_array_free,
1314	.map_get_next_key = array_map_get_next_key,
1315	.map_lookup_elem = fd_array_map_lookup_elem,
1316	.map_delete_elem = fd_array_map_delete_elem,
1317	.map_fd_get_ptr = cgroup_fd_array_get_ptr,
1318	.map_fd_put_ptr = cgroup_fd_array_put_ptr,
1319	.map_check_btf = map_check_no_btf,
1320	.map_mem_usage = array_map_mem_usage,
1321	.map_btf_id = &array_map_btf_ids[`0`],
1322	};
1323	#endif
1324
1325	static struct bpf_map array_of_map_alloc(union* bpf_attr *attr)
1326	{
1327	struct bpf_map map, inner_map_meta;
1328
1329	inner_map_meta = bpf_map_meta_alloc(inner_map_ufd: attr->inner_map_fd);
1330	if (IS_ERR(ptr: inner_map_meta))
1331	return inner_map_meta;
1332
1333	map = array_map_alloc(attr);
1334	if (IS_ERR(ptr: map)) {
1335	bpf_map_meta_free(map_meta: inner_map_meta);
1336	return map;
1337	}
1338
1339	map->inner_map_meta = inner_map_meta;
1340
1341	return map;
1342	}
1343
1344	static void array_of_map_free(struct bpf_map *map)
1345	{
1346	/ map->inner_map_meta is only accessed by syscall which*
1347	* is protected by fdget/fdput.
1348	*/
1349	bpf_map_meta_free(map_meta: map->inner_map_meta);
1350	bpf_fd_array_map_clear(map);
1351	fd_array_map_free(map);
1352	}
1353
1354	static void array_of_map_lookup_elem(struct* bpf_map map, void* *key)
1355	{
1356	struct bpf_map **inner_map = array_map_lookup_elem(map, key);
1357
1358	if (!inner_map)
1359	return NULL;
1360
1361	return READ_ONCE(*inner_map);
1362	}
1363
1364	static int array_of_map_gen_lookup(struct bpf_map *map,
1365	struct bpf_insn *insn_buf)
1366	{
1367	struct bpf_array array = container_of(map, struct* bpf_array, map);
1368	u32 elem_size = array->elem_size;
1369	struct bpf_insn *insn = insn_buf;
1370	const int ret = BPF_REG_0;
1371	const int map_ptr = BPF_REG_1;
1372	const int index = BPF_REG_2;
1373
1374	insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct* bpf_array, value));
1375	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, `0`);
1376	if (!map->bypass_spec_v1) {
1377	*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, `6`);
1378	*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
1379	} else {
1380	*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, `5`);
1381	}
1382	if (is_power_of_2(n: elem_size))
1383	*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
1384	else
1385	*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
1386	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
1387	*insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, `0`);
1388	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, `0`, `1`);
1389	*insn++ = BPF_JMP_IMM(BPF_JA, `0`, `0`, `1`);
1390	*insn++ = BPF_MOV64_IMM(ret, `0`);
1391
1392	return insn - insn_buf;
1393	}
1394
1395	const struct bpf_map_ops array_of_maps_map_ops = {
1396	.map_alloc_check = fd_array_map_alloc_check,
1397	.map_alloc = array_of_map_alloc,
1398	.map_free = array_of_map_free,
1399	.map_get_next_key = array_map_get_next_key,
1400	.map_lookup_elem = array_of_map_lookup_elem,
1401	.map_delete_elem = fd_array_map_delete_elem,
1402	.map_fd_get_ptr = bpf_map_fd_get_ptr,
1403	.map_fd_put_ptr = bpf_map_fd_put_ptr,
1404	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
1405	.map_gen_lookup = array_of_map_gen_lookup,
1406	.map_lookup_batch = generic_map_lookup_batch,
1407	.map_update_batch = generic_map_update_batch,
1408	.map_check_btf = map_check_no_btf,
1409	.map_mem_usage = array_map_mem_usage,
1410	.map_btf_id = &array_map_btf_ids[`0`],
1411	};
1412

source code of linux/kernel/bpf/arraymap.c