filter.c source code [linux/net/core/filter.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* Linux Socket Filter - Kernel level socket filtering
4	*
5	* Based on the design of the Berkeley Packet Filter. The new
6	* internal format has been designed by PLUMgrid:
7	*
8	* Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
9	*
10	* Authors:
11	*
12	* Jay Schulist <jschlst@samba.org>
13	* Alexei Starovoitov <ast@plumgrid.com>
14	* Daniel Borkmann <dborkman@redhat.com>
15	*
16	* Andi Kleen - Fix a few bad bugs and races.
17	* Kris Katterjohn - Added many additional checks in bpf_check_classic()
18	*/
19
20	#include <linux/atomic.h>
21	#include <linux/bpf_verifier.h>
22	#include <linux/module.h>
23	#include <linux/types.h>
24	#include <linux/mm.h>
25	#include <linux/fcntl.h>
26	#include <linux/socket.h>
27	#include <linux/sock_diag.h>
28	#include <linux/in.h>
29	#include <linux/inet.h>
30	#include <linux/netdevice.h>
31	#include <linux/if_packet.h>
32	#include <linux/if_arp.h>
33	#include <linux/gfp.h>
34	#include <net/inet_common.h>
35	#include <net/ip.h>
36	#include <net/protocol.h>
37	#include <net/netlink.h>
38	#include <linux/skbuff.h>
39	#include <linux/skmsg.h>
40	#include <net/sock.h>
41	#include <net/flow_dissector.h>
42	#include <linux/errno.h>
43	#include <linux/timer.h>
44	#include <linux/uaccess.h>
45	#include <asm/unaligned.h>
46	#include <linux/filter.h>
47	#include <linux/ratelimit.h>
48	#include <linux/seccomp.h>
49	#include <linux/if_vlan.h>
50	#include <linux/bpf.h>
51	#include <linux/btf.h>
52	#include <net/sch_generic.h>
53	#include <net/cls_cgroup.h>
54	#include <net/dst_metadata.h>
55	#include <net/dst.h>
56	#include <net/sock_reuseport.h>
57	#include <net/busy_poll.h>
58	#include <net/tcp.h>
59	#include <net/xfrm.h>
60	#include <net/udp.h>
61	#include <linux/bpf_trace.h>
62	#include <net/xdp_sock.h>
63	#include <linux/inetdevice.h>
64	#include <net/inet_hashtables.h>
65	#include <net/inet6_hashtables.h>
66	#include <net/ip_fib.h>
67	#include <net/nexthop.h>
68	#include <net/flow.h>
69	#include <net/arp.h>
70	#include <net/ipv6.h>
71	#include <net/net_namespace.h>
72	#include <linux/seg6_local.h>
73	#include <net/seg6.h>
74	#include <net/seg6_local.h>
75	#include <net/lwtunnel.h>
76	#include <net/ipv6_stubs.h>
77	#include <net/bpf_sk_storage.h>
78	#include <net/transp_v6.h>
79	#include <linux/btf_ids.h>
80	#include <net/tls.h>
81	#include <net/xdp.h>
82	#include <net/mptcp.h>
83	#include <net/netfilter/nf_conntrack_bpf.h>
84	#include <linux/un.h>
85
86	#include "dev.h"
87
88	static const struct bpf_func_proto *
89	bpf_sk_base_func_proto(enum bpf_func_id func_id);
90
91	int copy_bpf_fprog_from_user(struct sock_fprog dst, sockptr_t src, int* len)
92	{
93	if (in_compat_syscall()) {
94	struct compat_sock_fprog f32;
95
96	if (len != sizeof(f32))
97	return -EINVAL;
98	if (copy_from_sockptr(dst: &f32, src, size: sizeof(f32)))
99	return -EFAULT;
100	memset(dst, `0`, sizeof(*dst));
101	dst->len = f32.len;
102	dst->filter = compat_ptr(uptr: f32.filter);
103	} else {
104	if (len != sizeof(*dst))
105	return -EINVAL;
106	if (copy_from_sockptr(dst, src, size: sizeof(*dst)))
107	return -EFAULT;
108	}
109
110	return `0`;
111	}
112	EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
113
114	/**
115	* sk_filter_trim_cap - run a packet through a socket filter
116	* @sk: sock associated with &sk_buff
117	* @skb: buffer to filter
118	* @cap: limit on how short the eBPF program may trim the packet
119	*
120	* Run the eBPF program and then cut skb->data to correct size returned by
121	* the program. If pkt_len is 0 we toss packet. If skb->len is smaller
122	* than pkt_len we keep whole skb->data. This is the socket level
123	* wrapper to bpf_prog_run. It returns 0 if the packet should
124	* be accepted or -EPERM if the packet should be tossed.
125	*
126	*/
127	int sk_filter_trim_cap(struct sock sk, struct* sk_buff skb, unsigned* int cap)
128	{
129	int err;
130	struct sk_filter *filter;
131
132	/*
133	* If the skb was allocated from pfmemalloc reserves, only
134	* allow SOCK_MEMALLOC sockets to use it as this socket is
135	* helping free memory
136	*/
137	if (skb_pfmemalloc(skb) && !sock_flag(sk, flag: SOCK_MEMALLOC)) {
138	NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
139	return -ENOMEM;
140	}
141	err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
142	if (err)
143	return err;
144
145	err = security_sock_rcv_skb(sk, skb);
146	if (err)
147	return err;
148
149	rcu_read_lock();
150	filter = rcu_dereference(sk->sk_filter);
151	if (filter) {
152	struct sock *save_sk = skb->sk;
153	unsigned int pkt_len;
154
155	skb->sk = sk;
156	pkt_len = bpf_prog_run_save_cb(prog: filter->prog, skb);
157	skb->sk = save_sk;
158	err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
159	}
160	rcu_read_unlock();
161
162	return err;
163	}
164	EXPORT_SYMBOL(sk_filter_trim_cap);
165
166	BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb)
167	{
168	return skb_get_poff(skb);
169	}
170
171	BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
172	{
173	struct nlattr *nla;
174
175	if (skb_is_nonlinear(skb))
176	return `0`;
177
178	if (skb->len < sizeof(struct nlattr))
179	return `0`;
180
181	if (a > skb->len - sizeof(struct nlattr))
182	return `0`;
183
184	nla = nla_find(head: (struct nlattr *) &skb->data[a], len: skb->len - a, attrtype: x);
185	if (nla)
186	return (void ) nla - (void* *) skb->data;
187
188	return `0`;
189	}
190
191	BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
192	{
193	struct nlattr *nla;
194
195	if (skb_is_nonlinear(skb))
196	return `0`;
197
198	if (skb->len < sizeof(struct nlattr))
199	return `0`;
200
201	if (a > skb->len - sizeof(struct nlattr))
202	return `0`;
203
204	nla = (struct nlattr *) &skb->data[a];
205	if (nla->nla_len > skb->len - a)
206	return `0`;
207
208	nla = nla_find_nested(nla, attrtype: x);
209	if (nla)
210	return (void ) nla - (void* *) skb->data;
211
212	return `0`;
213	}
214
215	BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff , skb, const* void *,
216	data, int, headlen, int, offset)
217	{
218	u8 tmp, *ptr;
219	const int len = sizeof(tmp);
220
221	if (offset >= `0`) {
222	if (headlen - offset >= len)
223	return (u8 )(data + offset);
224	if (!skb_copy_bits(skb, offset, to: &tmp, len: sizeof(tmp)))
225	return tmp;
226	} else {
227	ptr = bpf_internal_load_pointer_neg_helper(skb, k: offset, size: len);
228	if (likely(ptr))
229	return (u8 )ptr;
230	}
231
232	return -EFAULT;
233	}
234
235	BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
236	int, offset)
237	{
238	return ____bpf_skb_load_helper_8(skb, data: skb->data, headlen: skb->len - skb->data_len,
239	offset);
240	}
241
242	BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff , skb, const* void *,
243	data, int, headlen, int, offset)
244	{
245	__be16 tmp, *ptr;
246	const int len = sizeof(tmp);
247
248	if (offset >= `0`) {
249	if (headlen - offset >= len)
250	return get_unaligned_be16(p: data + offset);
251	if (!skb_copy_bits(skb, offset, to: &tmp, len: sizeof(tmp)))
252	return be16_to_cpu(tmp);
253	} else {
254	ptr = bpf_internal_load_pointer_neg_helper(skb, k: offset, size: len);
255	if (likely(ptr))
256	return get_unaligned_be16(p: ptr);
257	}
258
259	return -EFAULT;
260	}
261
262	BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
263	int, offset)
264	{
265	return ____bpf_skb_load_helper_16(skb, data: skb->data, headlen: skb->len - skb->data_len,
266	offset);
267	}
268
269	BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff , skb, const* void *,
270	data, int, headlen, int, offset)
271	{
272	__be32 tmp, *ptr;
273	const int len = sizeof(tmp);
274
275	if (likely(offset >= `0`)) {
276	if (headlen - offset >= len)
277	return get_unaligned_be32(p: data + offset);
278	if (!skb_copy_bits(skb, offset, to: &tmp, len: sizeof(tmp)))
279	return be32_to_cpu(tmp);
280	} else {
281	ptr = bpf_internal_load_pointer_neg_helper(skb, k: offset, size: len);
282	if (likely(ptr))
283	return get_unaligned_be32(p: ptr);
284	}
285
286	return -EFAULT;
287	}
288
289	BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
290	int, offset)
291	{
292	return ____bpf_skb_load_helper_32(skb, data: skb->data, headlen: skb->len - skb->data_len,
293	offset);
294	}
295
296	static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
297	struct bpf_insn *insn_buf)
298	{
299	struct bpf_insn *insn = insn_buf;
300
301	switch (skb_field) {
302	case SKF_AD_MARK:
303	BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != `4`);
304
305	*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
306	offsetof(struct sk_buff, mark));
307	break;
308
309	case SKF_AD_PKTTYPE:
310	*insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET);
311	*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
312	#ifdef __BIG_ENDIAN_BITFIELD
313	*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, `5`);
314	#endif
315	break;
316
317	case SKF_AD_QUEUE:
318	BUILD_BUG_ON(sizeof_field(struct sk_buff, queue_mapping) != `2`);
319
320	*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
321	offsetof(struct sk_buff, queue_mapping));
322	break;
323
324	case SKF_AD_VLAN_TAG:
325	BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != `2`);
326
327	/ dst_reg = (u16 ) (src_reg + offsetof(vlan_tci)) /
328	*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
329	offsetof(struct sk_buff, vlan_tci));
330	break;
331	case SKF_AD_VLAN_TAG_PRESENT:
332	BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_all) != `4`);
333	*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
334	offsetof(struct sk_buff, vlan_all));
335	*insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, `0`, `1`);
336	*insn++ = BPF_ALU32_IMM(BPF_MOV, dst_reg, `1`);
337	break;
338	}
339
340	return insn - insn_buf;
341	}
342
343	static bool convert_bpf_extensions(struct sock_filter *fp,
344	struct bpf_insn **insnp)
345	{
346	struct bpf_insn insn = insnp;
347	u32 cnt;
348
349	switch (fp->k) {
350	case SKF_AD_OFF + SKF_AD_PROTOCOL:
351	BUILD_BUG_ON(sizeof_field(struct sk_buff, protocol) != `2`);
352
353	/ A = (u16 ) (CTX + offsetof(protocol)) /
354	*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
355	offsetof(struct sk_buff, protocol));
356	/ A = ntohs(A) [emitting a nop or swap16] /
357	*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, `16`);
358	break;
359
360	case SKF_AD_OFF + SKF_AD_PKTTYPE:
361	cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn_buf: insn);
362	insn += cnt - `1`;
363	break;
364
365	case SKF_AD_OFF + SKF_AD_IFINDEX:
366	case SKF_AD_OFF + SKF_AD_HATYPE:
367	BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != `4`);
368	BUILD_BUG_ON(sizeof_field(struct net_device, type) != `2`);
369
370	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, dev),
371	BPF_REG_TMP, BPF_REG_CTX,
372	offsetof(struct sk_buff, dev));
373	/ if (tmp != 0) goto pc + 1 /
374	*insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, `0`, `1`);
375	*insn++ = BPF_EXIT_INSN();
376	if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
377	*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
378	offsetof(struct net_device, ifindex));
379	else
380	*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
381	offsetof(struct net_device, type));
382	break;
383
384	case SKF_AD_OFF + SKF_AD_MARK:
385	cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn_buf: insn);
386	insn += cnt - `1`;
387	break;
388
389	case SKF_AD_OFF + SKF_AD_RXHASH:
390	BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != `4`);
391
392	*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
393	offsetof(struct sk_buff, hash));
394	break;
395
396	case SKF_AD_OFF + SKF_AD_QUEUE:
397	cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn_buf: insn);
398	insn += cnt - `1`;
399	break;
400
401	case SKF_AD_OFF + SKF_AD_VLAN_TAG:
402	cnt = convert_skb_access(SKF_AD_VLAN_TAG,
403	BPF_REG_A, BPF_REG_CTX, insn_buf: insn);
404	insn += cnt - `1`;
405	break;
406
407	case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
408	cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
409	BPF_REG_A, BPF_REG_CTX, insn_buf: insn);
410	insn += cnt - `1`;
411	break;
412
413	case SKF_AD_OFF + SKF_AD_VLAN_TPID:
414	BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_proto) != `2`);
415
416	/ A = (u16 ) (CTX + offsetof(vlan_proto)) /
417	*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
418	offsetof(struct sk_buff, vlan_proto));
419	/ A = ntohs(A) [emitting a nop or swap16] /
420	*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, `16`);
421	break;
422
423	case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
424	case SKF_AD_OFF + SKF_AD_NLATTR:
425	case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
426	case SKF_AD_OFF + SKF_AD_CPU:
427	case SKF_AD_OFF + SKF_AD_RANDOM:
428	/ arg1 = CTX /
429	*insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
430	/ arg2 = A /
431	*insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
432	/ arg3 = X /
433	*insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
434	/ Emit call(arg1=CTX, arg2=A, arg3=X) /
435	switch (fp->k) {
436	case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
437	*insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset);
438	break;
439	case SKF_AD_OFF + SKF_AD_NLATTR:
440	*insn = BPF_EMIT_CALL(bpf_skb_get_nlattr);
441	break;
442	case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
443	*insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest);
444	break;
445	case SKF_AD_OFF + SKF_AD_CPU:
446	*insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id);
447	break;
448	case SKF_AD_OFF + SKF_AD_RANDOM:
449	*insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
450	bpf_user_rnd_init_once();
451	break;
452	}
453	break;
454
455	case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
456	/ A ^= X /
457	*insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
458	break;
459
460	default:
461	/ This is just a dummy call to avoid letting the compiler*
462	* evict __bpf_call_base() as an optimization. Placed here
463	* where no-one bothers.
464	*/
465	BUG_ON(__bpf_call_base(`0`, `0`, `0`, `0`, `0`) != `0`);
466	return false;
467	}
468
469	*insnp = insn;
470	return true;
471	}
472
473	static bool convert_bpf_ld_abs(struct sock_filter fp, struct* bpf_insn **insnp)
474	{
475	const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS);
476	int size = bpf_size_to_bytes(BPF_SIZE(fp->code));
477	bool endian = BPF_SIZE(fp->code) == BPF_H \|\|
478	BPF_SIZE(fp->code) == BPF_W;
479	bool indirect = BPF_MODE(fp->code) == BPF_IND;
480	const int ip_align = NET_IP_ALIGN;
481	struct bpf_insn insn = insnp;
482	int offset = fp->k;
483
484	if (!indirect &&
485	((unaligned_ok && offset >= `0`) \|\|
486	(!unaligned_ok && offset >= `0` &&
487	offset + ip_align >= `0` &&
488	offset + ip_align % size == `0`))) {
489	bool ldx_off_ok = offset <= S16_MAX;
490
491	*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
492	if (offset)
493	*insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
494	*insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
495	size, `2` + endian + (!ldx_off_ok * `2`));
496	if (ldx_off_ok) {
497	*insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
498	BPF_REG_D, offset);
499	} else {
500	*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D);
501	*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset);
502	*insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
503	BPF_REG_TMP, `0`);
504	}
505	if (endian)
506	insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size `8`);
507	*insn++ = BPF_JMP_A(`8`);
508	}
509
510	*insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
511	*insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D);
512	*insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H);
513	if (!indirect) {
514	*insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset);
515	} else {
516	*insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X);
517	if (fp->k)
518	*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset);
519	}
520
521	switch (BPF_SIZE(fp->code)) {
522	case BPF_B:
523	*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8);
524	break;
525	case BPF_H:
526	*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16);
527	break;
528	case BPF_W:
529	*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32);
530	break;
531	default:
532	return false;
533	}
534
535	*insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, `0`, `2`);
536	*insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
537	*insn = BPF_EXIT_INSN();
538
539	*insnp = insn;
540	return true;
541	}
542
543	/**
544	* bpf_convert_filter - convert filter program
545	* @prog: the user passed filter program
546	* @len: the length of the user passed filter program
547	* @new_prog: allocated 'struct bpf_prog' or NULL
548	* @new_len: pointer to store length of converted program
549	* @seen_ld_abs: bool whether we've seen ld_abs/ind
550	*
551	* Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
552	* style extended BPF (eBPF).
553	* Conversion workflow:
554	*
555	* 1) First pass for calculating the new program length:
556	* bpf_convert_filter(old_prog, old_len, NULL, &new_len, &seen_ld_abs)
557	*
558	* 2) 2nd pass to remap in two passes: 1st pass finds new
559	* jump offsets, 2nd pass remapping:
560	* bpf_convert_filter(old_prog, old_len, new_prog, &new_len, &seen_ld_abs)
561	*/
562	static int bpf_convert_filter(struct sock_filter prog, int* len,
563	struct bpf_prog new_prog, int* *new_len,
564	bool *seen_ld_abs)
565	{
566	int new_flen = `0`, pass = `0`, target, i, stack_off;
567	struct bpf_insn new_insn, first_insn = NULL;
568	struct sock_filter *fp;
569	int *addrs = NULL;
570	u8 bpf_src;
571
572	BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
573	BUILD_BUG_ON(BPF_REG_FP + `1` != MAX_BPF_REG);
574
575	if (len <= `0` \|\| len > BPF_MAXINSNS)
576	return -EINVAL;
577
578	if (new_prog) {
579	first_insn = new_prog->insnsi;
580	addrs = kcalloc(n: len, size: sizeof(*addrs),
581	GFP_KERNEL \| __GFP_NOWARN);
582	if (!addrs)
583	return -ENOMEM;
584	}
585
586	do_pass:
587	new_insn = first_insn;
588	fp = prog;
589
590	/ Classic BPF related prologue emission. /
591	if (new_prog) {
592	/ Classic BPF expects A and X to be reset first. These need*
593	* to be guaranteed to be the first two instructions.
594	*/
595	*new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
596	*new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
597
598	/ All programs must keep CTX in callee saved BPF_REG_CTX.*
599	* In eBPF case it's done by the compiler, here we need to
600	* do this ourself. Initial CTX is present in BPF_REG_ARG1.
601	*/
602	*new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
603	if (*seen_ld_abs) {
604	/ For packet access in classic BPF, cache skb->data*
605	* in callee-saved BPF R8 and skb->len - skb->data_len
606	* (headlen) in BPF R9. Since classic BPF is read-only
607	* on CTX, we only need to cache it once.
608	*/
609	new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, data),
610	BPF_REG_D, BPF_REG_CTX,
611	offsetof(struct sk_buff, data));
612	*new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX,
613	offsetof(struct sk_buff, len));
614	*new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX,
615	offsetof(struct sk_buff, data_len));
616	*new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP);
617	}
618	} else {
619	new_insn += `3`;
620	}
621
622	for (i = `0`; i < len; fp++, i++) {
623	struct bpf_insn tmp_insns[`32`] = { };
624	struct bpf_insn *insn = tmp_insns;
625
626	if (addrs)
627	addrs[i] = new_insn - first_insn;
628
629	switch (fp->code) {
630	/ All arithmetic insns and skb loads map as-is. /
631	case BPF_ALU \| BPF_ADD \| BPF_X:
632	case BPF_ALU \| BPF_ADD \| BPF_K:
633	case BPF_ALU \| BPF_SUB \| BPF_X:
634	case BPF_ALU \| BPF_SUB \| BPF_K:
635	case BPF_ALU \| BPF_AND \| BPF_X:
636	case BPF_ALU \| BPF_AND \| BPF_K:
637	case BPF_ALU \| BPF_OR \| BPF_X:
638	case BPF_ALU \| BPF_OR \| BPF_K:
639	case BPF_ALU \| BPF_LSH \| BPF_X:
640	case BPF_ALU \| BPF_LSH \| BPF_K:
641	case BPF_ALU \| BPF_RSH \| BPF_X:
642	case BPF_ALU \| BPF_RSH \| BPF_K:
643	case BPF_ALU \| BPF_XOR \| BPF_X:
644	case BPF_ALU \| BPF_XOR \| BPF_K:
645	case BPF_ALU \| BPF_MUL \| BPF_X:
646	case BPF_ALU \| BPF_MUL \| BPF_K:
647	case BPF_ALU \| BPF_DIV \| BPF_X:
648	case BPF_ALU \| BPF_DIV \| BPF_K:
649	case BPF_ALU \| BPF_MOD \| BPF_X:
650	case BPF_ALU \| BPF_MOD \| BPF_K:
651	case BPF_ALU \| BPF_NEG:
652	case BPF_LD \| BPF_ABS \| BPF_W:
653	case BPF_LD \| BPF_ABS \| BPF_H:
654	case BPF_LD \| BPF_ABS \| BPF_B:
655	case BPF_LD \| BPF_IND \| BPF_W:
656	case BPF_LD \| BPF_IND \| BPF_H:
657	case BPF_LD \| BPF_IND \| BPF_B:
658	/ Check for overloaded BPF extension and*
659	* directly convert it if found, otherwise
660	* just move on with mapping.
661	*/
662	if (BPF_CLASS(fp->code) == BPF_LD &&
663	BPF_MODE(fp->code) == BPF_ABS &&
664	convert_bpf_extensions(fp, insnp: &insn))
665	break;
666	if (BPF_CLASS(fp->code) == BPF_LD &&
667	convert_bpf_ld_abs(fp, insnp: &insn)) {
668	*seen_ld_abs = true;
669	break;
670	}
671
672	if (fp->code == (BPF_ALU \| BPF_DIV \| BPF_X) \|\|
673	fp->code == (BPF_ALU \| BPF_MOD \| BPF_X)) {
674	*insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
675	/ Error with exception code on div/mod by 0.*
676	* For cBPF programs, this was always return 0.
677	*/
678	*insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, `0`, `2`);
679	*insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
680	*insn++ = BPF_EXIT_INSN();
681	}
682
683	*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, `0`, fp->k);
684	break;
685
686	/ Jump transformation cannot use BPF block macros*
687	* everywhere as offset calculation and target updates
688	* require a bit more work than the rest, i.e. jump
689	* opcodes map as-is, but offsets need adjustment.
690	*/
691
692	#define BPF_EMIT_JMP \
693	do { \
694	const s32 off_min = S16_MIN, off_max = S16_MAX; \
695	s32 off; \
696	\
697	if (target >= len \|\| target < 0) \
698	goto err; \
699	off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
700	/* Adjust pc relative offset for 2nd or 3rd insn. */ \
701	off -= insn - tmp_insns; \
702	/* Reject anything not fitting into insn->off. */ \
703	if (off < off_min \|\| off > off_max) \
704	goto err; \
705	insn->off = off; \
706	} while (0)
707
708	case BPF_JMP \| BPF_JA:
709	target = i + fp->k + `1`;
710	insn->code = fp->code;
711	BPF_EMIT_JMP;
712	break;
713
714	case BPF_JMP \| BPF_JEQ \| BPF_K:
715	case BPF_JMP \| BPF_JEQ \| BPF_X:
716	case BPF_JMP \| BPF_JSET \| BPF_K:
717	case BPF_JMP \| BPF_JSET \| BPF_X:
718	case BPF_JMP \| BPF_JGT \| BPF_K:
719	case BPF_JMP \| BPF_JGT \| BPF_X:
720	case BPF_JMP \| BPF_JGE \| BPF_K:
721	case BPF_JMP \| BPF_JGE \| BPF_X:
722	if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < `0`) {
723	/ BPF immediates are signed, zero extend*
724	* immediate into tmp register and use it
725	* in compare insn.
726	*/
727	*insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
728
729	insn->dst_reg = BPF_REG_A;
730	insn->src_reg = BPF_REG_TMP;
731	bpf_src = BPF_X;
732	} else {
733	insn->dst_reg = BPF_REG_A;
734	insn->imm = fp->k;
735	bpf_src = BPF_SRC(fp->code);
736	insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : `0`;
737	}
738
739	/ Common case where 'jump_false' is next insn. /
740	if (fp->jf == `0`) {
741	insn->code = BPF_JMP \| BPF_OP(fp->code) \| bpf_src;
742	target = i + fp->jt + `1`;
743	BPF_EMIT_JMP;
744	break;
745	}
746
747	/ Convert some jumps when 'jump_true' is next insn. /
748	if (fp->jt == `0`) {
749	switch (BPF_OP(fp->code)) {
750	case BPF_JEQ:
751	insn->code = BPF_JMP \| BPF_JNE \| bpf_src;
752	break;
753	case BPF_JGT:
754	insn->code = BPF_JMP \| BPF_JLE \| bpf_src;
755	break;
756	case BPF_JGE:
757	insn->code = BPF_JMP \| BPF_JLT \| bpf_src;
758	break;
759	default:
760	goto jmp_rest;
761	}
762
763	target = i + fp->jf + `1`;
764	BPF_EMIT_JMP;
765	break;
766	}
767	jmp_rest:
768	/ Other jumps are mapped into two insns: Jxx and JA. /
769	target = i + fp->jt + `1`;
770	insn->code = BPF_JMP \| BPF_OP(fp->code) \| bpf_src;
771	BPF_EMIT_JMP;
772	insn++;
773
774	insn->code = BPF_JMP \| BPF_JA;
775	target = i + fp->jf + `1`;
776	BPF_EMIT_JMP;
777	break;
778
779	/ ldxb 4 * ([14] & 0xf) is remaped into 6 insns. /
780	case BPF_LDX \| BPF_MSH \| BPF_B: {
781	struct sock_filter tmp = {
782	.code = BPF_LD \| BPF_ABS \| BPF_B,
783	.k = fp->k,
784	};
785
786	*seen_ld_abs = true;
787
788	/ X = A /
789	*insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
790	/ A = BPF_R0 = (u8 ) (skb->data + K) /
791	convert_bpf_ld_abs(fp: &tmp, insnp: &insn);
792	insn++;
793	/ A &= 0xf /
794	*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, `0xf`);
795	/ A <<= 2 /
796	*insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, `2`);
797	/ tmp = X /
798	*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X);
799	/ X = A /
800	*insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
801	/ A = tmp /
802	*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
803	break;
804	}
805	/ RET_K is remaped into 2 insns. RET_A case doesn't need an*
806	* extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
807	*/
808	case BPF_RET \| BPF_A:
809	case BPF_RET \| BPF_K:
810	if (BPF_RVAL(fp->code) == BPF_K)
811	*insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
812	`0`, fp->k);
813	*insn = BPF_EXIT_INSN();
814	break;
815
816	/ Store to stack. /
817	case BPF_ST:
818	case BPF_STX:
819	stack_off = fp->k * `4` + `4`;
820	*insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
821	BPF_ST ? BPF_REG_A : BPF_REG_X,
822	-stack_off);
823	/ check_load_and_stores() verifies that classic BPF can*
824	* load from stack only after write, so tracking
825	* stack_depth for ST\|STX insns is enough
826	*/
827	if (new_prog && new_prog->aux->stack_depth < stack_off)
828	new_prog->aux->stack_depth = stack_off;
829	break;
830
831	/ Load from stack. /
832	case BPF_LD \| BPF_MEM:
833	case BPF_LDX \| BPF_MEM:
834	stack_off = fp->k * `4` + `4`;
835	*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
836	BPF_REG_A : BPF_REG_X, BPF_REG_FP,
837	-stack_off);
838	break;
839
840	/ A = K or X = K /
841	case BPF_LD \| BPF_IMM:
842	case BPF_LDX \| BPF_IMM:
843	*insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
844	BPF_REG_A : BPF_REG_X, fp->k);
845	break;
846
847	/ X = A /
848	case BPF_MISC \| BPF_TAX:
849	*insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
850	break;
851
852	/ A = X /
853	case BPF_MISC \| BPF_TXA:
854	*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
855	break;
856
857	/ A = skb->len or X = skb->len /
858	case BPF_LD \| BPF_W \| BPF_LEN:
859	case BPF_LDX \| BPF_W \| BPF_LEN:
860	*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
861	BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
862	offsetof(struct sk_buff, len));
863	break;
864
865	/ Access seccomp_data fields. /
866	case BPF_LDX \| BPF_ABS \| BPF_W:
867	/ A = (u32 ) (ctx + K) /
868	*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
869	break;
870
871	/ Unknown instruction. /
872	default:
873	goto err;
874	}
875
876	insn++;
877	if (new_prog)
878	memcpy(new_insn, tmp_insns,
879	sizeof(insn) (insn - tmp_insns));
880	new_insn += insn - tmp_insns;
881	}
882
883	if (!new_prog) {
884	/ Only calculating new length. /
885	*new_len = new_insn - first_insn;
886	if (*seen_ld_abs)
887	new_len += `4`; /* Prologue bits. /
888	return `0`;
889	}
890
891	pass++;
892	if (new_flen != new_insn - first_insn) {
893	new_flen = new_insn - first_insn;
894	if (pass > `2`)
895	goto err;
896	goto do_pass;
897	}
898
899	kfree(objp: addrs);
900	BUG_ON(*new_len != new_flen);
901	return `0`;
902	err:
903	kfree(objp: addrs);
904	return -EINVAL;
905	}
906
907	/ Security:*
908	*
909	* As we dont want to clear mem[] array for each packet going through
910	* __bpf_prog_run(), we check that filter loaded by user never try to read
911	* a cell if not previously written, and we check all branches to be sure
912	* a malicious user doesn't try to abuse us.
913	*/
914	static int check_load_and_stores(const struct sock_filter filter, int* flen)
915	{
916	u16 masks, memvalid = `0`; /* One bit per cell, 16 cells /
917	int pc, ret = `0`;
918
919	BUILD_BUG_ON(BPF_MEMWORDS > `16`);
920
921	masks = kmalloc_array(n: flen, size: sizeof(*masks), GFP_KERNEL);
922	if (!masks)
923	return -ENOMEM;
924
925	memset(masks, `0xff`, flen * sizeof(*masks));
926
927	for (pc = `0`; pc < flen; pc++) {
928	memvalid &= masks[pc];
929
930	switch (filter[pc].code) {
931	case BPF_ST:
932	case BPF_STX:
933	memvalid \|= (`1` << filter[pc].k);
934	break;
935	case BPF_LD \| BPF_MEM:
936	case BPF_LDX \| BPF_MEM:
937	if (!(memvalid & (`1` << filter[pc].k))) {
938	ret = -EINVAL;
939	goto error;
940	}
941	break;
942	case BPF_JMP \| BPF_JA:
943	/ A jump must set masks on target /
944	masks[pc + `1` + filter[pc].k] &= memvalid;
945	memvalid = ~`0`;
946	break;
947	case BPF_JMP \| BPF_JEQ \| BPF_K:
948	case BPF_JMP \| BPF_JEQ \| BPF_X:
949	case BPF_JMP \| BPF_JGE \| BPF_K:
950	case BPF_JMP \| BPF_JGE \| BPF_X:
951	case BPF_JMP \| BPF_JGT \| BPF_K:
952	case BPF_JMP \| BPF_JGT \| BPF_X:
953	case BPF_JMP \| BPF_JSET \| BPF_K:
954	case BPF_JMP \| BPF_JSET \| BPF_X:
955	/ A jump must set masks on targets /
956	masks[pc + `1` + filter[pc].jt] &= memvalid;
957	masks[pc + `1` + filter[pc].jf] &= memvalid;
958	memvalid = ~`0`;
959	break;
960	}
961	}
962	error:
963	kfree(objp: masks);
964	return ret;
965	}
966
967	static bool chk_code_allowed(u16 code_to_probe)
968	{
969	static const bool codes[] = {
970	/ 32 bit ALU operations /
971	[BPF_ALU \| BPF_ADD \| BPF_K] = true,
972	[BPF_ALU \| BPF_ADD \| BPF_X] = true,
973	[BPF_ALU \| BPF_SUB \| BPF_K] = true,
974	[BPF_ALU \| BPF_SUB \| BPF_X] = true,
975	[BPF_ALU \| BPF_MUL \| BPF_K] = true,
976	[BPF_ALU \| BPF_MUL \| BPF_X] = true,
977	[BPF_ALU \| BPF_DIV \| BPF_K] = true,
978	[BPF_ALU \| BPF_DIV \| BPF_X] = true,
979	[BPF_ALU \| BPF_MOD \| BPF_K] = true,
980	[BPF_ALU \| BPF_MOD \| BPF_X] = true,
981	[BPF_ALU \| BPF_AND \| BPF_K] = true,
982	[BPF_ALU \| BPF_AND \| BPF_X] = true,
983	[BPF_ALU \| BPF_OR \| BPF_K] = true,
984	[BPF_ALU \| BPF_OR \| BPF_X] = true,
985	[BPF_ALU \| BPF_XOR \| BPF_K] = true,
986	[BPF_ALU \| BPF_XOR \| BPF_X] = true,
987	[BPF_ALU \| BPF_LSH \| BPF_K] = true,
988	[BPF_ALU \| BPF_LSH \| BPF_X] = true,
989	[BPF_ALU \| BPF_RSH \| BPF_K] = true,
990	[BPF_ALU \| BPF_RSH \| BPF_X] = true,
991	[BPF_ALU \| BPF_NEG] = true,
992	/ Load instructions /
993	[BPF_LD \| BPF_W \| BPF_ABS] = true,
994	[BPF_LD \| BPF_H \| BPF_ABS] = true,
995	[BPF_LD \| BPF_B \| BPF_ABS] = true,
996	[BPF_LD \| BPF_W \| BPF_LEN] = true,
997	[BPF_LD \| BPF_W \| BPF_IND] = true,
998	[BPF_LD \| BPF_H \| BPF_IND] = true,
999	[BPF_LD \| BPF_B \| BPF_IND] = true,
1000	[BPF_LD \| BPF_IMM] = true,
1001	[BPF_LD \| BPF_MEM] = true,
1002	[BPF_LDX \| BPF_W \| BPF_LEN] = true,
1003	[BPF_LDX \| BPF_B \| BPF_MSH] = true,
1004	[BPF_LDX \| BPF_IMM] = true,
1005	[BPF_LDX \| BPF_MEM] = true,
1006	/ Store instructions /
1007	[BPF_ST] = true,
1008	[BPF_STX] = true,
1009	/ Misc instructions /
1010	[BPF_MISC \| BPF_TAX] = true,
1011	[BPF_MISC \| BPF_TXA] = true,
1012	/ Return instructions /
1013	[BPF_RET \| BPF_K] = true,
1014	[BPF_RET \| BPF_A] = true,
1015	/ Jump instructions /
1016	[BPF_JMP \| BPF_JA] = true,
1017	[BPF_JMP \| BPF_JEQ \| BPF_K] = true,
1018	[BPF_JMP \| BPF_JEQ \| BPF_X] = true,
1019	[BPF_JMP \| BPF_JGE \| BPF_K] = true,
1020	[BPF_JMP \| BPF_JGE \| BPF_X] = true,
1021	[BPF_JMP \| BPF_JGT \| BPF_K] = true,
1022	[BPF_JMP \| BPF_JGT \| BPF_X] = true,
1023	[BPF_JMP \| BPF_JSET \| BPF_K] = true,
1024	[BPF_JMP \| BPF_JSET \| BPF_X] = true,
1025	};
1026
1027	if (code_to_probe >= ARRAY_SIZE(codes))
1028	return false;
1029
1030	return codes[code_to_probe];
1031	}
1032
1033	static bool bpf_check_basics_ok(const struct sock_filter *filter,
1034	unsigned int flen)
1035	{
1036	if (filter == NULL)
1037	return false;
1038	if (flen == `0` \|\| flen > BPF_MAXINSNS)
1039	return false;
1040
1041	return true;
1042	}
1043
1044	/**
1045	* bpf_check_classic - verify socket filter code
1046	* @filter: filter to verify
1047	* @flen: length of filter
1048	*
1049	* Check the user's filter code. If we let some ugly
1050	* filter code slip through kaboom! The filter must contain
1051	* no references or jumps that are out of range, no illegal
1052	* instructions, and must end with a RET instruction.
1053	*
1054	* All jumps are forward as they are not signed.
1055	*
1056	* Returns 0 if the rule set is legal or -EINVAL if not.
1057	*/
1058	static int bpf_check_classic(const struct sock_filter *filter,
1059	unsigned int flen)
1060	{
1061	bool anc_found;
1062	int pc;
1063
1064	/ Check the filter code now /
1065	for (pc = `0`; pc < flen; pc++) {
1066	const struct sock_filter *ftest = &filter[pc];
1067
1068	/ May we actually operate on this code? /
1069	if (!chk_code_allowed(code_to_probe: ftest->code))
1070	return -EINVAL;
1071
1072	/ Some instructions need special checks /
1073	switch (ftest->code) {
1074	case BPF_ALU \| BPF_DIV \| BPF_K:
1075	case BPF_ALU \| BPF_MOD \| BPF_K:
1076	/ Check for division by zero /
1077	if (ftest->k == `0`)
1078	return -EINVAL;
1079	break;
1080	case BPF_ALU \| BPF_LSH \| BPF_K:
1081	case BPF_ALU \| BPF_RSH \| BPF_K:
1082	if (ftest->k >= `32`)
1083	return -EINVAL;
1084	break;
1085	case BPF_LD \| BPF_MEM:
1086	case BPF_LDX \| BPF_MEM:
1087	case BPF_ST:
1088	case BPF_STX:
1089	/ Check for invalid memory addresses /
1090	if (ftest->k >= BPF_MEMWORDS)
1091	return -EINVAL;
1092	break;
1093	case BPF_JMP \| BPF_JA:
1094	/ Note, the large ftest->k might cause loops.*
1095	* Compare this with conditional jumps below,
1096	* where offsets are limited. --ANK (981016)
1097	*/
1098	if (ftest->k >= (unsigned int)(flen - pc - `1`))
1099	return -EINVAL;
1100	break;
1101	case BPF_JMP \| BPF_JEQ \| BPF_K:
1102	case BPF_JMP \| BPF_JEQ \| BPF_X:
1103	case BPF_JMP \| BPF_JGE \| BPF_K:
1104	case BPF_JMP \| BPF_JGE \| BPF_X:
1105	case BPF_JMP \| BPF_JGT \| BPF_K:
1106	case BPF_JMP \| BPF_JGT \| BPF_X:
1107	case BPF_JMP \| BPF_JSET \| BPF_K:
1108	case BPF_JMP \| BPF_JSET \| BPF_X:
1109	/ Both conditionals must be safe /
1110	if (pc + ftest->jt + `1` >= flen \|\|
1111	pc + ftest->jf + `1` >= flen)
1112	return -EINVAL;
1113	break;
1114	case BPF_LD \| BPF_W \| BPF_ABS:
1115	case BPF_LD \| BPF_H \| BPF_ABS:
1116	case BPF_LD \| BPF_B \| BPF_ABS:
1117	anc_found = false;
1118	if (bpf_anc_helper(ftest) & BPF_ANC)
1119	anc_found = true;
1120	/ Ancillary operation unknown or unsupported /
1121	if (anc_found == false && ftest->k >= SKF_AD_OFF)
1122	return -EINVAL;
1123	}
1124	}
1125
1126	/ Last instruction must be a RET code /
1127	switch (filter[flen - `1`].code) {
1128	case BPF_RET \| BPF_K:
1129	case BPF_RET \| BPF_A:
1130	return check_load_and_stores(filter, flen);
1131	}
1132
1133	return -EINVAL;
1134	}
1135
1136	static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
1137	const struct sock_fprog *fprog)
1138	{
1139	unsigned int fsize = bpf_classic_proglen(fprog);
1140	struct sock_fprog_kern *fkprog;
1141
1142	fp->orig_prog = kmalloc(size: sizeof(*fkprog), GFP_KERNEL);
1143	if (!fp->orig_prog)
1144	return -ENOMEM;
1145
1146	fkprog = fp->orig_prog;
1147	fkprog->len = fprog->len;
1148
1149	fkprog->filter = kmemdup(p: fp->insns, size: fsize,
1150	GFP_KERNEL \| __GFP_NOWARN);
1151	if (!fkprog->filter) {
1152	kfree(objp: fp->orig_prog);
1153	return -ENOMEM;
1154	}
1155
1156	return `0`;
1157	}
1158
1159	static void bpf_release_orig_filter(struct bpf_prog *fp)
1160	{
1161	struct sock_fprog_kern *fprog = fp->orig_prog;
1162
1163	if (fprog) {
1164	kfree(objp: fprog->filter);
1165	kfree(objp: fprog);
1166	}
1167	}
1168
1169	static void __bpf_prog_release(struct bpf_prog *prog)
1170	{
1171	if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
1172	bpf_prog_put(prog);
1173	} else {
1174	bpf_release_orig_filter(fp: prog);
1175	bpf_prog_free(fp: prog);
1176	}
1177	}
1178
1179	static void __sk_filter_release(struct sk_filter *fp)
1180	{
1181	__bpf_prog_release(prog: fp->prog);
1182	kfree(objp: fp);
1183	}
1184
1185	/**
1186	* sk_filter_release_rcu - Release a socket filter by rcu_head
1187	* @rcu: rcu_head that contains the sk_filter to free
1188	*/
1189	static void sk_filter_release_rcu(struct rcu_head *rcu)
1190	{
1191	struct sk_filter fp = container_of(rcu, struct* sk_filter, rcu);
1192
1193	__sk_filter_release(fp);
1194	}
1195
1196	/**
1197	* sk_filter_release - release a socket filter
1198	* @fp: filter to remove
1199	*
1200	* Remove a filter from a socket and release its resources.
1201	*/
1202	static void sk_filter_release(struct sk_filter *fp)
1203	{
1204	if (refcount_dec_and_test(r: &fp->refcnt))
1205	call_rcu(head: &fp->rcu, func: sk_filter_release_rcu);
1206	}
1207
1208	void sk_filter_uncharge(struct sock sk, struct* sk_filter *fp)
1209	{
1210	u32 filter_size = bpf_prog_size(proglen: fp->prog->len);
1211
1212	atomic_sub(i: filter_size, v: &sk->sk_omem_alloc);
1213	sk_filter_release(fp);
1214	}
1215
1216	/ try to charge the socket memory if there is space available*
1217	* return true on success
1218	*/
1219	static bool __sk_filter_charge(struct sock sk, struct* sk_filter *fp)
1220	{
1221	u32 filter_size = bpf_prog_size(proglen: fp->prog->len);
1222	int optmem_max = READ_ONCE(sysctl_optmem_max);
1223
1224	/ same check as in sock_kmalloc() /
1225	if (filter_size <= optmem_max &&
1226	atomic_read(v: &sk->sk_omem_alloc) + filter_size < optmem_max) {
1227	atomic_add(i: filter_size, v: &sk->sk_omem_alloc);
1228	return true;
1229	}
1230	return false;
1231	}
1232
1233	bool sk_filter_charge(struct sock sk, struct* sk_filter *fp)
1234	{
1235	if (!refcount_inc_not_zero(r: &fp->refcnt))
1236	return false;
1237
1238	if (!__sk_filter_charge(sk, fp)) {
1239	sk_filter_release(fp);
1240	return false;
1241	}
1242	return true;
1243	}
1244
1245	static struct bpf_prog bpf_migrate_filter(struct* bpf_prog *fp)
1246	{
1247	struct sock_filter *old_prog;
1248	struct bpf_prog *old_fp;
1249	int err, new_len, old_len = fp->len;
1250	bool seen_ld_abs = false;
1251
1252	/ We are free to overwrite insns et al right here as it won't be used at*
1253	* this point in time anymore internally after the migration to the eBPF
1254	* instruction representation.
1255	*/
1256	BUILD_BUG_ON(sizeof(struct sock_filter) !=
1257	sizeof(struct bpf_insn));
1258
1259	/ Conversion cannot happen on overlapping memory areas,*
1260	* so we need to keep the user BPF around until the 2nd
1261	* pass. At this time, the user BPF is stored in fp->insns.
1262	*/
1263	old_prog = kmemdup(p: fp->insns, size: old_len * sizeof(struct sock_filter),
1264	GFP_KERNEL \| __GFP_NOWARN);
1265	if (!old_prog) {
1266	err = -ENOMEM;
1267	goto out_err;
1268	}
1269
1270	/ 1st pass: calculate the new program length. /
1271	err = bpf_convert_filter(prog: old_prog, len: old_len, NULL, new_len: &new_len,
1272	seen_ld_abs: &seen_ld_abs);
1273	if (err)
1274	goto out_err_free;
1275
1276	/ Expand fp for appending the new filter representation. /
1277	old_fp = fp;
1278	fp = bpf_prog_realloc(fp_old: old_fp, size: bpf_prog_size(proglen: new_len), gfp_extra_flags: `0`);
1279	if (!fp) {
1280	/ The old_fp is still around in case we couldn't*
1281	* allocate new memory, so uncharge on that one.
1282	*/
1283	fp = old_fp;
1284	err = -ENOMEM;
1285	goto out_err_free;
1286	}
1287
1288	fp->len = new_len;
1289
1290	/ 2nd pass: remap sock_filter insns into bpf_insn insns. /
1291	err = bpf_convert_filter(prog: old_prog, len: old_len, new_prog: fp, new_len: &new_len,
1292	seen_ld_abs: &seen_ld_abs);
1293	if (err)
1294	/ 2nd bpf_convert_filter() can fail only if it fails*
1295	* to allocate memory, remapping must succeed. Note,
1296	* that at this time old_fp has already been released
1297	* by krealloc().
1298	*/
1299	goto out_err_free;
1300
1301	fp = bpf_prog_select_runtime(fp, err: &err);
1302	if (err)
1303	goto out_err_free;
1304
1305	kfree(objp: old_prog);
1306	return fp;
1307
1308	out_err_free:
1309	kfree(objp: old_prog);
1310	out_err:
1311	__bpf_prog_release(prog: fp);
1312	return ERR_PTR(error: err);
1313	}
1314
1315	static struct bpf_prog bpf_prepare_filter(struct* bpf_prog *fp,
1316	bpf_aux_classic_check_t trans)
1317	{
1318	int err;
1319
1320	fp->bpf_func = NULL;
1321	fp->jited = `0`;
1322
1323	err = bpf_check_classic(filter: fp->insns, flen: fp->len);
1324	if (err) {
1325	__bpf_prog_release(prog: fp);
1326	return ERR_PTR(error: err);
1327	}
1328
1329	/ There might be additional checks and transformations*
1330	* needed on classic filters, f.e. in case of seccomp.
1331	*/
1332	if (trans) {
1333	err = trans(fp->insns, fp->len);
1334	if (err) {
1335	__bpf_prog_release(prog: fp);
1336	return ERR_PTR(error: err);
1337	}
1338	}
1339
1340	/ Probe if we can JIT compile the filter and if so, do*
1341	* the compilation of the filter.
1342	*/
1343	bpf_jit_compile(prog: fp);
1344
1345	/ JIT compiler couldn't process this filter, so do the eBPF translation*
1346	* for the optimized interpreter.
1347	*/
1348	if (!fp->jited)
1349	fp = bpf_migrate_filter(fp);
1350
1351	return fp;
1352	}
1353
1354	/**
1355	* bpf_prog_create - create an unattached filter
1356	* @pfp: the unattached filter that is created
1357	* @fprog: the filter program
1358	*
1359	* Create a filter independent of any socket. We first run some
1360	* sanity checks on it to make sure it does not explode on us later.
1361	* If an error occurs or there is insufficient memory for the filter
1362	* a negative errno code is returned. On success the return is zero.
1363	*/
1364	int bpf_prog_create(struct bpf_prog pfp, struct** sock_fprog_kern *fprog)
1365	{
1366	unsigned int fsize = bpf_classic_proglen(fprog);
1367	struct bpf_prog *fp;
1368
1369	/ Make sure new filter is there and in the right amounts. /
1370	if (!bpf_check_basics_ok(filter: fprog->filter, flen: fprog->len))
1371	return -EINVAL;
1372
1373	fp = bpf_prog_alloc(size: bpf_prog_size(proglen: fprog->len), gfp_extra_flags: `0`);
1374	if (!fp)
1375	return -ENOMEM;
1376
1377	memcpy(fp->insns, fprog->filter, fsize);
1378
1379	fp->len = fprog->len;
1380	/ Since unattached filters are not copied back to user*
1381	* space through sk_get_filter(), we do not need to hold
1382	* a copy here, and can spare us the work.
1383	*/
1384	fp->orig_prog = NULL;
1385
1386	/ bpf_prepare_filter() already takes care of freeing*
1387	* memory in case something goes wrong.
1388	*/
1389	fp = bpf_prepare_filter(fp, NULL);
1390	if (IS_ERR(ptr: fp))
1391	return PTR_ERR(ptr: fp);
1392
1393	*pfp = fp;
1394	return `0`;
1395	}
1396	EXPORT_SYMBOL_GPL(bpf_prog_create);
1397
1398	/**
1399	* bpf_prog_create_from_user - create an unattached filter from user buffer
1400	* @pfp: the unattached filter that is created
1401	* @fprog: the filter program
1402	* @trans: post-classic verifier transformation handler
1403	* @save_orig: save classic BPF program
1404	*
1405	* This function effectively does the same as bpf_prog_create(), only
1406	* that it builds up its insns buffer from user space provided buffer.
1407	* It also allows for passing a bpf_aux_classic_check_t handler.
1408	*/
1409	int bpf_prog_create_from_user(struct bpf_prog pfp, struct** sock_fprog *fprog,
1410	bpf_aux_classic_check_t trans, bool save_orig)
1411	{
1412	unsigned int fsize = bpf_classic_proglen(fprog);
1413	struct bpf_prog *fp;
1414	int err;
1415
1416	/ Make sure new filter is there and in the right amounts. /
1417	if (!bpf_check_basics_ok(filter: fprog->filter, flen: fprog->len))
1418	return -EINVAL;
1419
1420	fp = bpf_prog_alloc(size: bpf_prog_size(proglen: fprog->len), gfp_extra_flags: `0`);
1421	if (!fp)
1422	return -ENOMEM;
1423
1424	if (copy_from_user(to: fp->insns, from: fprog->filter, n: fsize)) {
1425	__bpf_prog_free(fp);
1426	return -EFAULT;
1427	}
1428
1429	fp->len = fprog->len;
1430	fp->orig_prog = NULL;
1431
1432	if (save_orig) {
1433	err = bpf_prog_store_orig_filter(fp, fprog);
1434	if (err) {
1435	__bpf_prog_free(fp);
1436	return -ENOMEM;
1437	}
1438	}
1439
1440	/ bpf_prepare_filter() already takes care of freeing*
1441	* memory in case something goes wrong.
1442	*/
1443	fp = bpf_prepare_filter(fp, trans);
1444	if (IS_ERR(ptr: fp))
1445	return PTR_ERR(ptr: fp);
1446
1447	*pfp = fp;
1448	return `0`;
1449	}
1450	EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);
1451
1452	void bpf_prog_destroy(struct bpf_prog *fp)
1453	{
1454	__bpf_prog_release(prog: fp);
1455	}
1456	EXPORT_SYMBOL_GPL(bpf_prog_destroy);
1457
1458	static int __sk_attach_prog(struct bpf_prog prog, struct* sock *sk)
1459	{
1460	struct sk_filter fp, old_fp;
1461
1462	fp = kmalloc(size: sizeof(*fp), GFP_KERNEL);
1463	if (!fp)
1464	return -ENOMEM;
1465
1466	fp->prog = prog;
1467
1468	if (!__sk_filter_charge(sk, fp)) {
1469	kfree(objp: fp);
1470	return -ENOMEM;
1471	}
1472	refcount_set(r: &fp->refcnt, n: `1`);
1473
1474	old_fp = rcu_dereference_protected(sk->sk_filter,
1475	lockdep_sock_is_held(sk));
1476	rcu_assign_pointer(sk->sk_filter, fp);
1477
1478	if (old_fp)
1479	sk_filter_uncharge(sk, fp: old_fp);
1480
1481	return `0`;
1482	}
1483
1484	static
1485	struct bpf_prog __get_filter(struct* sock_fprog fprog, struct* sock *sk)
1486	{
1487	unsigned int fsize = bpf_classic_proglen(fprog);
1488	struct bpf_prog *prog;
1489	int err;
1490
1491	if (sock_flag(sk, flag: SOCK_FILTER_LOCKED))
1492	return ERR_PTR(error: -EPERM);
1493
1494	/ Make sure new filter is there and in the right amounts. /
1495	if (!bpf_check_basics_ok(filter: fprog->filter, flen: fprog->len))
1496	return ERR_PTR(error: -EINVAL);
1497
1498	prog = bpf_prog_alloc(size: bpf_prog_size(proglen: fprog->len), gfp_extra_flags: `0`);
1499	if (!prog)
1500	return ERR_PTR(error: -ENOMEM);
1501
1502	if (copy_from_user(to: prog->insns, from: fprog->filter, n: fsize)) {
1503	__bpf_prog_free(fp: prog);
1504	return ERR_PTR(error: -EFAULT);
1505	}
1506
1507	prog->len = fprog->len;
1508
1509	err = bpf_prog_store_orig_filter(fp: prog, fprog);
1510	if (err) {
1511	__bpf_prog_free(fp: prog);
1512	return ERR_PTR(error: -ENOMEM);
1513	}
1514
1515	/ bpf_prepare_filter() already takes care of freeing*
1516	* memory in case something goes wrong.
1517	*/
1518	return bpf_prepare_filter(fp: prog, NULL);
1519	}
1520
1521	/**
1522	* sk_attach_filter - attach a socket filter
1523	* @fprog: the filter program
1524	* @sk: the socket to use
1525	*
1526	* Attach the user's filter code. We first run some sanity checks on
1527	* it to make sure it does not explode on us later. If an error
1528	* occurs or there is insufficient memory for the filter a negative
1529	* errno code is returned. On success the return is zero.
1530	*/
1531	int sk_attach_filter(struct sock_fprog fprog, struct* sock *sk)
1532	{
1533	struct bpf_prog *prog = __get_filter(fprog, sk);
1534	int err;
1535
1536	if (IS_ERR(ptr: prog))
1537	return PTR_ERR(ptr: prog);
1538
1539	err = __sk_attach_prog(prog, sk);
1540	if (err < `0`) {
1541	__bpf_prog_release(prog);
1542	return err;
1543	}
1544
1545	return `0`;
1546	}
1547	EXPORT_SYMBOL_GPL(sk_attach_filter);
1548
1549	int sk_reuseport_attach_filter(struct sock_fprog fprog, struct* sock *sk)
1550	{
1551	struct bpf_prog *prog = __get_filter(fprog, sk);
1552	int err;
1553
1554	if (IS_ERR(ptr: prog))
1555	return PTR_ERR(ptr: prog);
1556
1557	if (bpf_prog_size(proglen: prog->len) > READ_ONCE(sysctl_optmem_max))
1558	err = -ENOMEM;
1559	else
1560	err = reuseport_attach_prog(sk, prog);
1561
1562	if (err)
1563	__bpf_prog_release(prog);
1564
1565	return err;
1566	}
1567
1568	static struct bpf_prog __get_bpf(u32 ufd, struct* sock *sk)
1569	{
1570	if (sock_flag(sk, flag: SOCK_FILTER_LOCKED))
1571	return ERR_PTR(error: -EPERM);
1572
1573	return bpf_prog_get_type(ufd, type: BPF_PROG_TYPE_SOCKET_FILTER);
1574	}
1575
1576	int sk_attach_bpf(u32 ufd, struct sock *sk)
1577	{
1578	struct bpf_prog *prog = __get_bpf(ufd, sk);
1579	int err;
1580
1581	if (IS_ERR(ptr: prog))
1582	return PTR_ERR(ptr: prog);
1583
1584	err = __sk_attach_prog(prog, sk);
1585	if (err < `0`) {
1586	bpf_prog_put(prog);
1587	return err;
1588	}
1589
1590	return `0`;
1591	}
1592
1593	int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
1594	{
1595	struct bpf_prog *prog;
1596	int err;
1597
1598	if (sock_flag(sk, flag: SOCK_FILTER_LOCKED))
1599	return -EPERM;
1600
1601	prog = bpf_prog_get_type(ufd, type: BPF_PROG_TYPE_SOCKET_FILTER);
1602	if (PTR_ERR(ptr: prog) == -EINVAL)
1603	prog = bpf_prog_get_type(ufd, type: BPF_PROG_TYPE_SK_REUSEPORT);
1604	if (IS_ERR(ptr: prog))
1605	return PTR_ERR(ptr: prog);
1606
1607	if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
1608	/ Like other non BPF_PROG_TYPE_SOCKET_FILTER*
1609	* bpf prog (e.g. sockmap). It depends on the
1610	* limitation imposed by bpf_prog_load().
1611	* Hence, sysctl_optmem_max is not checked.
1612	*/
1613	if ((sk->sk_type != SOCK_STREAM &&
1614	sk->sk_type != SOCK_DGRAM) \|\|
1615	(sk->sk_protocol != IPPROTO_UDP &&
1616	sk->sk_protocol != IPPROTO_TCP) \|\|
1617	(sk->sk_family != AF_INET &&
1618	sk->sk_family != AF_INET6)) {
1619	err = -ENOTSUPP;
1620	goto err_prog_put;
1621	}
1622	} else {
1623	/ BPF_PROG_TYPE_SOCKET_FILTER /
1624	if (bpf_prog_size(proglen: prog->len) > READ_ONCE(sysctl_optmem_max)) {
1625	err = -ENOMEM;
1626	goto err_prog_put;
1627	}
1628	}
1629
1630	err = reuseport_attach_prog(sk, prog);
1631	err_prog_put:
1632	if (err)
1633	bpf_prog_put(prog);
1634
1635	return err;
1636	}
1637
1638	void sk_reuseport_prog_free(struct bpf_prog *prog)
1639	{
1640	if (!prog)
1641	return;
1642
1643	if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
1644	bpf_prog_put(prog);
1645	else
1646	bpf_prog_destroy(prog);
1647	}
1648
1649	struct bpf_scratchpad {
1650	union {
1651	__be32 diff[MAX_BPF_STACK / sizeof(__be32)];
1652	u8 buff[MAX_BPF_STACK];
1653	};
1654	};
1655
1656	static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
1657
1658	static inline int __bpf_try_make_writable(struct sk_buff *skb,
1659	unsigned int write_len)
1660	{
1661	return skb_ensure_writable(skb, write_len);
1662	}
1663
1664	static inline int bpf_try_make_writable(struct sk_buff *skb,
1665	unsigned int write_len)
1666	{
1667	int err = __bpf_try_make_writable(skb, write_len);
1668
1669	bpf_compute_data_pointers(skb);
1670	return err;
1671	}
1672
1673	static int bpf_try_make_head_writable(struct sk_buff *skb)
1674	{
1675	return bpf_try_make_writable(skb, write_len: skb_headlen(skb));
1676	}
1677
1678	static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
1679	{
1680	if (skb_at_tc_ingress(skb))
1681	skb_postpush_rcsum(skb, start: skb_mac_header(skb), len: skb->mac_len);
1682	}
1683
1684	static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
1685	{
1686	if (skb_at_tc_ingress(skb))
1687	skb_postpull_rcsum(skb, start: skb_mac_header(skb), len: skb->mac_len);
1688	}
1689
1690	BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
1691	const void *, from, u32, len, u64, flags)
1692	{
1693	void *ptr;
1694
1695	if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM \| BPF_F_INVALIDATE_HASH)))
1696	return -EINVAL;
1697	if (unlikely(offset > INT_MAX))
1698	return -EFAULT;
1699	if (unlikely(bpf_try_make_writable(skb, offset + len)))
1700	return -EFAULT;
1701
1702	ptr = skb->data + offset;
1703	if (flags & BPF_F_RECOMPUTE_CSUM)
1704	__skb_postpull_rcsum(skb, start: ptr, len, off: offset);
1705
1706	memcpy(ptr, from, len);
1707
1708	if (flags & BPF_F_RECOMPUTE_CSUM)
1709	__skb_postpush_rcsum(skb, start: ptr, len, off: offset);
1710	if (flags & BPF_F_INVALIDATE_HASH)
1711	skb_clear_hash(skb);
1712
1713	return `0`;
1714	}
1715
1716	static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
1717	.func = bpf_skb_store_bytes,
1718	.gpl_only = false,
1719	.ret_type = RET_INTEGER,
1720	.arg1_type = ARG_PTR_TO_CTX,
1721	.arg2_type = ARG_ANYTHING,
1722	.arg3_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
1723	.arg4_type = ARG_CONST_SIZE,
1724	.arg5_type = ARG_ANYTHING,
1725	};
1726
1727	int __bpf_skb_store_bytes(struct sk_buff skb, u32 offset, const* void *from,
1728	u32 len, u64 flags)
1729	{
1730	return ____bpf_skb_store_bytes(skb, offset, from, len, flags);
1731	}
1732
1733	BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
1734	void *, to, u32, len)
1735	{
1736	void *ptr;
1737
1738	if (unlikely(offset > INT_MAX))
1739	goto err_clear;
1740
1741	ptr = skb_header_pointer(skb, offset, len, buffer: to);
1742	if (unlikely(!ptr))
1743	goto err_clear;
1744	if (ptr != to)
1745	memcpy(to, ptr, len);
1746
1747	return `0`;
1748	err_clear:
1749	memset(to, `0`, len);
1750	return -EFAULT;
1751	}
1752
1753	static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
1754	.func = bpf_skb_load_bytes,
1755	.gpl_only = false,
1756	.ret_type = RET_INTEGER,
1757	.arg1_type = ARG_PTR_TO_CTX,
1758	.arg2_type = ARG_ANYTHING,
1759	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
1760	.arg4_type = ARG_CONST_SIZE,
1761	};
1762
1763	int __bpf_skb_load_bytes(const struct sk_buff skb, u32 offset, void* *to, u32 len)
1764	{
1765	return ____bpf_skb_load_bytes(skb, offset, to, len);
1766	}
1767
1768	BPF_CALL_4(bpf_flow_dissector_load_bytes,
1769	const struct bpf_flow_dissector *, ctx, u32, offset,
1770	void *, to, u32, len)
1771	{
1772	void *ptr;
1773
1774	if (unlikely(offset > `0xffff`))
1775	goto err_clear;
1776
1777	if (unlikely(!ctx->skb))
1778	goto err_clear;
1779
1780	ptr = skb_header_pointer(skb: ctx->skb, offset, len, buffer: to);
1781	if (unlikely(!ptr))
1782	goto err_clear;
1783	if (ptr != to)
1784	memcpy(to, ptr, len);
1785
1786	return `0`;
1787	err_clear:
1788	memset(to, `0`, len);
1789	return -EFAULT;
1790	}
1791
1792	static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = {
1793	.func = bpf_flow_dissector_load_bytes,
1794	.gpl_only = false,
1795	.ret_type = RET_INTEGER,
1796	.arg1_type = ARG_PTR_TO_CTX,
1797	.arg2_type = ARG_ANYTHING,
1798	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
1799	.arg4_type = ARG_CONST_SIZE,
1800	};
1801
1802	BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
1803	u32, offset, void *, to, u32, len, u32, start_header)
1804	{
1805	u8 *end = skb_tail_pointer(skb);
1806	u8 start, ptr;
1807
1808	if (unlikely(offset > `0xffff`))
1809	goto err_clear;
1810
1811	switch (start_header) {
1812	case BPF_HDR_START_MAC:
1813	if (unlikely(!skb_mac_header_was_set(skb)))
1814	goto err_clear;
1815	start = skb_mac_header(skb);
1816	break;
1817	case BPF_HDR_START_NET:
1818	start = skb_network_header(skb);
1819	break;
1820	default:
1821	goto err_clear;
1822	}
1823
1824	ptr = start + offset;
1825
1826	if (likely(ptr + len <= end)) {
1827	memcpy(to, ptr, len);
1828	return `0`;
1829	}
1830
1831	err_clear:
1832	memset(to, `0`, len);
1833	return -EFAULT;
1834	}
1835
1836	static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = {
1837	.func = bpf_skb_load_bytes_relative,
1838	.gpl_only = false,
1839	.ret_type = RET_INTEGER,
1840	.arg1_type = ARG_PTR_TO_CTX,
1841	.arg2_type = ARG_ANYTHING,
1842	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
1843	.arg4_type = ARG_CONST_SIZE,
1844	.arg5_type = ARG_ANYTHING,
1845	};
1846
1847	BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
1848	{
1849	/ Idea is the following: should the needed direct read/write*
1850	* test fail during runtime, we can pull in more data and redo
1851	* again, since implicitly, we invalidate previous checks here.
1852	*
1853	* Or, since we know how much we need to make read/writeable,
1854	* this can be done once at the program beginning for direct
1855	* access case. By this we overcome limitations of only current
1856	* headroom being accessible.
1857	*/
1858	return bpf_try_make_writable(skb, write_len: len ? : skb_headlen(skb));
1859	}
1860
1861	static const struct bpf_func_proto bpf_skb_pull_data_proto = {
1862	.func = bpf_skb_pull_data,
1863	.gpl_only = false,
1864	.ret_type = RET_INTEGER,
1865	.arg1_type = ARG_PTR_TO_CTX,
1866	.arg2_type = ARG_ANYTHING,
1867	};
1868
1869	BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
1870	{
1871	return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
1872	}
1873
1874	static const struct bpf_func_proto bpf_sk_fullsock_proto = {
1875	.func = bpf_sk_fullsock,
1876	.gpl_only = false,
1877	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
1878	.arg1_type = ARG_PTR_TO_SOCK_COMMON,
1879	};
1880
1881	static inline int sk_skb_try_make_writable(struct sk_buff *skb,
1882	unsigned int write_len)
1883	{
1884	return __bpf_try_make_writable(skb, write_len);
1885	}
1886
1887	BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
1888	{
1889	/ Idea is the following: should the needed direct read/write*
1890	* test fail during runtime, we can pull in more data and redo
1891	* again, since implicitly, we invalidate previous checks here.
1892	*
1893	* Or, since we know how much we need to make read/writeable,
1894	* this can be done once at the program beginning for direct
1895	* access case. By this we overcome limitations of only current
1896	* headroom being accessible.
1897	*/
1898	return sk_skb_try_make_writable(skb, write_len: len ? : skb_headlen(skb));
1899	}
1900
1901	static const struct bpf_func_proto sk_skb_pull_data_proto = {
1902	.func = sk_skb_pull_data,
1903	.gpl_only = false,
1904	.ret_type = RET_INTEGER,
1905	.arg1_type = ARG_PTR_TO_CTX,
1906	.arg2_type = ARG_ANYTHING,
1907	};
1908
1909	BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
1910	u64, from, u64, to, u64, flags)
1911	{
1912	__sum16 *ptr;
1913
1914	if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
1915	return -EINVAL;
1916	if (unlikely(offset > `0xffff` \|\| offset & `1`))
1917	return -EFAULT;
1918	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
1919	return -EFAULT;
1920
1921	ptr = (__sum16 *)(skb->data + offset);
1922	switch (flags & BPF_F_HDR_FIELD_MASK) {
1923	case `0`:
1924	if (unlikely(from != `0`))
1925	return -EINVAL;
1926
1927	csum_replace_by_diff(sum: ptr, diff: to);
1928	break;
1929	case `2`:
1930	csum_replace2(sum: ptr, old: from, new: to);
1931	break;
1932	case `4`:
1933	csum_replace4(sum: ptr, from, to);
1934	break;
1935	default:
1936	return -EINVAL;
1937	}
1938
1939	return `0`;
1940	}
1941
1942	static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
1943	.func = bpf_l3_csum_replace,
1944	.gpl_only = false,
1945	.ret_type = RET_INTEGER,
1946	.arg1_type = ARG_PTR_TO_CTX,
1947	.arg2_type = ARG_ANYTHING,
1948	.arg3_type = ARG_ANYTHING,
1949	.arg4_type = ARG_ANYTHING,
1950	.arg5_type = ARG_ANYTHING,
1951	};
1952
1953	BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
1954	u64, from, u64, to, u64, flags)
1955	{
1956	bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
1957	bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
1958	bool do_mforce = flags & BPF_F_MARK_ENFORCE;
1959	__sum16 *ptr;
1960
1961	if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 \| BPF_F_MARK_ENFORCE \|
1962	BPF_F_PSEUDO_HDR \| BPF_F_HDR_FIELD_MASK)))
1963	return -EINVAL;
1964	if (unlikely(offset > `0xffff` \|\| offset & `1`))
1965	return -EFAULT;
1966	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
1967	return -EFAULT;
1968
1969	ptr = (__sum16 *)(skb->data + offset);
1970	if (is_mmzero && !do_mforce && !*ptr)
1971	return `0`;
1972
1973	switch (flags & BPF_F_HDR_FIELD_MASK) {
1974	case `0`:
1975	if (unlikely(from != `0`))
1976	return -EINVAL;
1977
1978	inet_proto_csum_replace_by_diff(sum: ptr, skb, diff: to, pseudohdr: is_pseudo);
1979	break;
1980	case `2`:
1981	inet_proto_csum_replace2(sum: ptr, skb, from, to, pseudohdr: is_pseudo);
1982	break;
1983	case `4`:
1984	inet_proto_csum_replace4(sum: ptr, skb, from, to, pseudohdr: is_pseudo);
1985	break;
1986	default:
1987	return -EINVAL;
1988	}
1989
1990	if (is_mmzero && !*ptr)
1991	*ptr = CSUM_MANGLED_0;
1992	return `0`;
1993	}
1994
1995	static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
1996	.func = bpf_l4_csum_replace,
1997	.gpl_only = false,
1998	.ret_type = RET_INTEGER,
1999	.arg1_type = ARG_PTR_TO_CTX,
2000	.arg2_type = ARG_ANYTHING,
2001	.arg3_type = ARG_ANYTHING,
2002	.arg4_type = ARG_ANYTHING,
2003	.arg5_type = ARG_ANYTHING,
2004	};
2005
2006	BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
2007	__be32 *, to, u32, to_size, __wsum, seed)
2008	{
2009	struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
2010	u32 diff_size = from_size + to_size;
2011	int i, j = `0`;
2012
2013	/ This is quite flexible, some examples:*
2014	*
2015	* from_size == 0, to_size > 0, seed := csum --> pushing data
2016	* from_size > 0, to_size == 0, seed := csum --> pulling data
2017	* from_size > 0, to_size > 0, seed := 0 --> diffing data
2018	*
2019	* Even for diffing, from_size and to_size don't need to be equal.
2020	*/
2021	if (unlikely(((from_size \| to_size) & (sizeof(__be32) - `1`)) \|\|
2022	diff_size > sizeof(sp->diff)))
2023	return -EINVAL;
2024
2025	for (i = `0`; i < from_size / sizeof(__be32); i++, j++)
2026	sp->diff[j] = ~from[i];
2027	for (i = `0`; i < to_size / sizeof(__be32); i++, j++)
2028	sp->diff[j] = to[i];
2029
2030	return csum_partial(buff: sp->diff, len: diff_size, sum: seed);
2031	}
2032
2033	static const struct bpf_func_proto bpf_csum_diff_proto = {
2034	.func = bpf_csum_diff,
2035	.gpl_only = false,
2036	.pkt_access = true,
2037	.ret_type = RET_INTEGER,
2038	.arg1_type = ARG_PTR_TO_MEM \| PTR_MAYBE_NULL \| MEM_RDONLY,
2039	.arg2_type = ARG_CONST_SIZE_OR_ZERO,
2040	.arg3_type = ARG_PTR_TO_MEM \| PTR_MAYBE_NULL \| MEM_RDONLY,
2041	.arg4_type = ARG_CONST_SIZE_OR_ZERO,
2042	.arg5_type = ARG_ANYTHING,
2043	};
2044
2045	BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
2046	{
2047	/ The interface is to be used in combination with bpf_csum_diff()*
2048	* for direct packet writes. csum rotation for alignment as well
2049	* as emulating csum_sub() can be done from the eBPF program.
2050	*/
2051	if (skb->ip_summed == CHECKSUM_COMPLETE)
2052	return (skb->csum = csum_add(csum: skb->csum, addend: csum));
2053
2054	return -ENOTSUPP;
2055	}
2056
2057	static const struct bpf_func_proto bpf_csum_update_proto = {
2058	.func = bpf_csum_update,
2059	.gpl_only = false,
2060	.ret_type = RET_INTEGER,
2061	.arg1_type = ARG_PTR_TO_CTX,
2062	.arg2_type = ARG_ANYTHING,
2063	};
2064
2065	BPF_CALL_2(bpf_csum_level, struct sk_buff *, skb, u64, level)
2066	{
2067	/ The interface is to be used in combination with bpf_skb_adjust_room()*
2068	* for encap/decap of packet headers when BPF_F_ADJ_ROOM_NO_CSUM_RESET
2069	* is passed as flags, for example.
2070	*/
2071	switch (level) {
2072	case BPF_CSUM_LEVEL_INC:
2073	__skb_incr_checksum_unnecessary(skb);
2074	break;
2075	case BPF_CSUM_LEVEL_DEC:
2076	__skb_decr_checksum_unnecessary(skb);
2077	break;
2078	case BPF_CSUM_LEVEL_RESET:
2079	__skb_reset_checksum_unnecessary(skb);
2080	break;
2081	case BPF_CSUM_LEVEL_QUERY:
2082	return skb->ip_summed == CHECKSUM_UNNECESSARY ?
2083	skb->csum_level : -EACCES;
2084	default:
2085	return -EINVAL;
2086	}
2087
2088	return `0`;
2089	}
2090
2091	static const struct bpf_func_proto bpf_csum_level_proto = {
2092	.func = bpf_csum_level,
2093	.gpl_only = false,
2094	.ret_type = RET_INTEGER,
2095	.arg1_type = ARG_PTR_TO_CTX,
2096	.arg2_type = ARG_ANYTHING,
2097	};
2098
2099	static inline int __bpf_rx_skb(struct net_device dev, struct* sk_buff *skb)
2100	{
2101	return dev_forward_skb_nomtu(dev, skb);
2102	}
2103
2104	static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
2105	struct sk_buff *skb)
2106	{
2107	int ret = ____dev_forward_skb(dev, skb, check_mtu: false);
2108
2109	if (likely(!ret)) {
2110	skb->dev = dev;
2111	ret = netif_rx(skb);
2112	}
2113
2114	return ret;
2115	}
2116
2117	static inline int __bpf_tx_skb(struct net_device dev, struct* sk_buff *skb)
2118	{
2119	int ret;
2120
2121	if (dev_xmit_recursion()) {
2122	net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2123	kfree_skb(skb);
2124	return -ENETDOWN;
2125	}
2126
2127	skb->dev = dev;
2128	skb_set_redirected_noclear(skb, from_ingress: skb_at_tc_ingress(skb));
2129	skb_clear_tstamp(skb);
2130
2131	dev_xmit_recursion_inc();
2132	ret = dev_queue_xmit(skb);
2133	dev_xmit_recursion_dec();
2134
2135	return ret;
2136	}
2137
2138	static int __bpf_redirect_no_mac(struct sk_buff skb, struct* net_device *dev,
2139	u32 flags)
2140	{
2141	unsigned int mlen = skb_network_offset(skb);
2142
2143	if (unlikely(skb->len <= mlen)) {
2144	kfree_skb(skb);
2145	return -ERANGE;
2146	}
2147
2148	if (mlen) {
2149	__skb_pull(skb, len: mlen);
2150
2151	/ At ingress, the mac header has already been pulled once.*
2152	* At egress, skb_pospull_rcsum has to be done in case that
2153	* the skb is originated from ingress (i.e. a forwarded skb)
2154	* to ensure that rcsum starts at net header.
2155	*/
2156	if (!skb_at_tc_ingress(skb))
2157	skb_postpull_rcsum(skb, start: skb_mac_header(skb), len: mlen);
2158	}
2159	skb_pop_mac_header(skb);
2160	skb_reset_mac_len(skb);
2161	return flags & BPF_F_INGRESS ?
2162	__bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
2163	}
2164
2165	static int __bpf_redirect_common(struct sk_buff skb, struct* net_device *dev,
2166	u32 flags)
2167	{
2168	/ Verify that a link layer header is carried /
2169	if (unlikely(skb->mac_header >= skb->network_header \|\| skb->len == `0`)) {
2170	kfree_skb(skb);
2171	return -ERANGE;
2172	}
2173
2174	bpf_push_mac_rcsum(skb);
2175	return flags & BPF_F_INGRESS ?
2176	__bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
2177	}
2178
2179	static int __bpf_redirect(struct sk_buff skb, struct* net_device *dev,
2180	u32 flags)
2181	{
2182	if (dev_is_mac_header_xmit(dev))
2183	return __bpf_redirect_common(skb, dev, flags);
2184	else
2185	return __bpf_redirect_no_mac(skb, dev, flags);
2186	}
2187
2188	#if IS_ENABLED(CONFIG_IPV6)
2189	static int bpf_out_neigh_v6(struct net net, struct* sk_buff *skb,
2190	struct net_device dev, struct* bpf_nh_params *nh)
2191	{
2192	u32 hh_len = LL_RESERVED_SPACE(dev);
2193	const struct in6_addr *nexthop;
2194	struct dst_entry *dst = NULL;
2195	struct neighbour *neigh;
2196
2197	if (dev_xmit_recursion()) {
2198	net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2199	goto out_drop;
2200	}
2201
2202	skb->dev = dev;
2203	skb_clear_tstamp(skb);
2204
2205	if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
2206	skb = skb_expand_head(skb, headroom: hh_len);
2207	if (!skb)
2208	return -ENOMEM;
2209	}
2210
2211	rcu_read_lock();
2212	if (!nh) {
2213	dst = skb_dst(skb);
2214	nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
2215	daddr: &ipv6_hdr(skb)->daddr);
2216	} else {
2217	nexthop = &nh->ipv6_nh;
2218	}
2219	neigh = ip_neigh_gw6(dev, addr: nexthop);
2220	if (likely(!IS_ERR(neigh))) {
2221	int ret;
2222
2223	sock_confirm_neigh(skb, n: neigh);
2224	local_bh_disable();
2225	dev_xmit_recursion_inc();
2226	ret = neigh_output(n: neigh, skb, skip_cache: false);
2227	dev_xmit_recursion_dec();
2228	local_bh_enable();
2229	rcu_read_unlock();
2230	return ret;
2231	}
2232	rcu_read_unlock_bh();
2233	if (dst)
2234	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
2235	out_drop:
2236	kfree_skb(skb);
2237	return -ENETDOWN;
2238	}
2239
2240	static int __bpf_redirect_neigh_v6(struct sk_buff skb, struct* net_device *dev,
2241	struct bpf_nh_params *nh)
2242	{
2243	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
2244	struct net *net = dev_net(dev);
2245	int err, ret = NET_XMIT_DROP;
2246
2247	if (!nh) {
2248	struct dst_entry *dst;
2249	struct flowi6 fl6 = {
2250	.flowi6_flags = FLOWI_FLAG_ANYSRC,
2251	.flowi6_mark = skb->mark,
2252	.flowlabel = ip6_flowinfo(hdr: ip6h),
2253	.flowi6_oif = dev->ifindex,
2254	.flowi6_proto = ip6h->nexthdr,
2255	.daddr = ip6h->daddr,
2256	.saddr = ip6h->saddr,
2257	};
2258
2259	dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
2260	if (IS_ERR(ptr: dst))
2261	goto out_drop;
2262
2263	skb_dst_set(skb, dst);
2264	} else if (nh->nh_family != AF_INET6) {
2265	goto out_drop;
2266	}
2267
2268	err = bpf_out_neigh_v6(net, skb, dev, nh);
2269	if (unlikely(net_xmit_eval(err)))
2270	dev->stats.tx_errors++;
2271	else
2272	ret = NET_XMIT_SUCCESS;
2273	goto out_xmit;
2274	out_drop:
2275	dev->stats.tx_errors++;
2276	kfree_skb(skb);
2277	out_xmit:
2278	return ret;
2279	}
2280	#else
2281	static int __bpf_redirect_neigh_v6(struct sk_buff skb, struct* net_device *dev,
2282	struct bpf_nh_params *nh)
2283	{
2284	kfree_skb(skb);
2285	return NET_XMIT_DROP;
2286	}
2287	#endif /* CONFIG_IPV6 */
2288
2289	#if IS_ENABLED(CONFIG_INET)
2290	static int bpf_out_neigh_v4(struct net net, struct* sk_buff *skb,
2291	struct net_device dev, struct* bpf_nh_params *nh)
2292	{
2293	u32 hh_len = LL_RESERVED_SPACE(dev);
2294	struct neighbour *neigh;
2295	bool is_v6gw = false;
2296
2297	if (dev_xmit_recursion()) {
2298	net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2299	goto out_drop;
2300	}
2301
2302	skb->dev = dev;
2303	skb_clear_tstamp(skb);
2304
2305	if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
2306	skb = skb_expand_head(skb, headroom: hh_len);
2307	if (!skb)
2308	return -ENOMEM;
2309	}
2310
2311	rcu_read_lock();
2312	if (!nh) {
2313	struct dst_entry *dst = skb_dst(skb);
2314	struct rtable rt = container_of(dst, struct* rtable, dst);
2315
2316	neigh = ip_neigh_for_gw(rt, skb, is_v6gw: &is_v6gw);
2317	} else if (nh->nh_family == AF_INET6) {
2318	neigh = ip_neigh_gw6(dev, addr: &nh->ipv6_nh);
2319	is_v6gw = true;
2320	} else if (nh->nh_family == AF_INET) {
2321	neigh = ip_neigh_gw4(dev, daddr: nh->ipv4_nh);
2322	} else {
2323	rcu_read_unlock();
2324	goto out_drop;
2325	}
2326
2327	if (likely(!IS_ERR(neigh))) {
2328	int ret;
2329
2330	sock_confirm_neigh(skb, n: neigh);
2331	local_bh_disable();
2332	dev_xmit_recursion_inc();
2333	ret = neigh_output(n: neigh, skb, skip_cache: is_v6gw);
2334	dev_xmit_recursion_dec();
2335	local_bh_enable();
2336	rcu_read_unlock();
2337	return ret;
2338	}
2339	rcu_read_unlock();
2340	out_drop:
2341	kfree_skb(skb);
2342	return -ENETDOWN;
2343	}
2344
2345	static int __bpf_redirect_neigh_v4(struct sk_buff skb, struct* net_device *dev,
2346	struct bpf_nh_params *nh)
2347	{
2348	const struct iphdr *ip4h = ip_hdr(skb);
2349	struct net *net = dev_net(dev);
2350	int err, ret = NET_XMIT_DROP;
2351
2352	if (!nh) {
2353	struct flowi4 fl4 = {
2354	.flowi4_flags = FLOWI_FLAG_ANYSRC,
2355	.flowi4_mark = skb->mark,
2356	.flowi4_tos = RT_TOS(ip4h->tos),
2357	.flowi4_oif = dev->ifindex,
2358	.flowi4_proto = ip4h->protocol,
2359	.daddr = ip4h->daddr,
2360	.saddr = ip4h->saddr,
2361	};
2362	struct rtable *rt;
2363
2364	rt = ip_route_output_flow(net, flp: &fl4, NULL);
2365	if (IS_ERR(ptr: rt))
2366	goto out_drop;
2367	if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
2368	ip_rt_put(rt);
2369	goto out_drop;
2370	}
2371
2372	skb_dst_set(skb, dst: &rt->dst);
2373	}
2374
2375	err = bpf_out_neigh_v4(net, skb, dev, nh);
2376	if (unlikely(net_xmit_eval(err)))
2377	dev->stats.tx_errors++;
2378	else
2379	ret = NET_XMIT_SUCCESS;
2380	goto out_xmit;
2381	out_drop:
2382	dev->stats.tx_errors++;
2383	kfree_skb(skb);
2384	out_xmit:
2385	return ret;
2386	}
2387	#else
2388	static int __bpf_redirect_neigh_v4(struct sk_buff skb, struct* net_device *dev,
2389	struct bpf_nh_params *nh)
2390	{
2391	kfree_skb(skb);
2392	return NET_XMIT_DROP;
2393	}
2394	#endif /* CONFIG_INET */
2395
2396	static int __bpf_redirect_neigh(struct sk_buff skb, struct* net_device *dev,
2397	struct bpf_nh_params *nh)
2398	{
2399	struct ethhdr *ethh = eth_hdr(skb);
2400
2401	if (unlikely(skb->mac_header >= skb->network_header))
2402	goto out;
2403	bpf_push_mac_rcsum(skb);
2404	if (is_multicast_ether_addr(addr: ethh->h_dest))
2405	goto out;
2406
2407	skb_pull(skb, len: sizeof(*ethh));
2408	skb_unset_mac_header(skb);
2409	skb_reset_network_header(skb);
2410
2411	if (skb->protocol == htons(ETH_P_IP))
2412	return __bpf_redirect_neigh_v4(skb, dev, nh);
2413	else if (skb->protocol == htons(ETH_P_IPV6))
2414	return __bpf_redirect_neigh_v6(skb, dev, nh);
2415	out:
2416	kfree_skb(skb);
2417	return -ENOTSUPP;
2418	}
2419
2420	/ Internal, non-exposed redirect flags. /
2421	enum {
2422	BPF_F_NEIGH = (`1ULL` << `1`),
2423	BPF_F_PEER = (`1ULL` << `2`),
2424	BPF_F_NEXTHOP = (`1ULL` << `3`),
2425	#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH \| BPF_F_PEER \| BPF_F_NEXTHOP)
2426	};
2427
2428	BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
2429	{
2430	struct net_device *dev;
2431	struct sk_buff *clone;
2432	int ret;
2433
2434	if (unlikely(flags & (~(BPF_F_INGRESS) \| BPF_F_REDIRECT_INTERNAL)))
2435	return -EINVAL;
2436
2437	dev = dev_get_by_index_rcu(net: dev_net(dev: skb->dev), ifindex);
2438	if (unlikely(!dev))
2439	return -EINVAL;
2440
2441	clone = skb_clone(skb, GFP_ATOMIC);
2442	if (unlikely(!clone))
2443	return -ENOMEM;
2444
2445	/ For direct write, we need to keep the invariant that the skbs*
2446	* we're dealing with need to be uncloned. Should uncloning fail
2447	* here, we need to free the just generated clone to unclone once
2448	* again.
2449	*/
2450	ret = bpf_try_make_head_writable(skb);
2451	if (unlikely(ret)) {
2452	kfree_skb(skb: clone);
2453	return -ENOMEM;
2454	}
2455
2456	return __bpf_redirect(skb: clone, dev, flags);
2457	}
2458
2459	static const struct bpf_func_proto bpf_clone_redirect_proto = {
2460	.func = bpf_clone_redirect,
2461	.gpl_only = false,
2462	.ret_type = RET_INTEGER,
2463	.arg1_type = ARG_PTR_TO_CTX,
2464	.arg2_type = ARG_ANYTHING,
2465	.arg3_type = ARG_ANYTHING,
2466	};
2467
2468	DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
2469	EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
2470
2471	int skb_do_redirect(struct sk_buff *skb)
2472	{
2473	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2474	struct net *net = dev_net(dev: skb->dev);
2475	struct net_device *dev;
2476	u32 flags = ri->flags;
2477
2478	dev = dev_get_by_index_rcu(net, ifindex: ri->tgt_index);
2479	ri->tgt_index = `0`;
2480	ri->flags = `0`;
2481	if (unlikely(!dev))
2482	goto out_drop;
2483	if (flags & BPF_F_PEER) {
2484	const struct net_device_ops *ops = dev->netdev_ops;
2485
2486	if (unlikely(!ops->ndo_get_peer_dev \|\|
2487	!skb_at_tc_ingress(skb)))
2488	goto out_drop;
2489	dev = ops->ndo_get_peer_dev(dev);
2490	if (unlikely(!dev \|\|
2491	!(dev->flags & IFF_UP) \|\|
2492	net_eq(net, dev_net(dev))))
2493	goto out_drop;
2494	skb->dev = dev;
2495	return -EAGAIN;
2496	}
2497	return flags & BPF_F_NEIGH ?
2498	__bpf_redirect_neigh(skb, dev, nh: flags & BPF_F_NEXTHOP ?
2499	&ri->nh : NULL) :
2500	__bpf_redirect(skb, dev, flags);
2501	out_drop:
2502	kfree_skb(skb);
2503	return -EINVAL;
2504	}
2505
2506	BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
2507	{
2508	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2509
2510	if (unlikely(flags & (~(BPF_F_INGRESS) \| BPF_F_REDIRECT_INTERNAL)))
2511	return TC_ACT_SHOT;
2512
2513	ri->flags = flags;
2514	ri->tgt_index = ifindex;
2515
2516	return TC_ACT_REDIRECT;
2517	}
2518
2519	static const struct bpf_func_proto bpf_redirect_proto = {
2520	.func = bpf_redirect,
2521	.gpl_only = false,
2522	.ret_type = RET_INTEGER,
2523	.arg1_type = ARG_ANYTHING,
2524	.arg2_type = ARG_ANYTHING,
2525	};
2526
2527	BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags)
2528	{
2529	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2530
2531	if (unlikely(flags))
2532	return TC_ACT_SHOT;
2533
2534	ri->flags = BPF_F_PEER;
2535	ri->tgt_index = ifindex;
2536
2537	return TC_ACT_REDIRECT;
2538	}
2539
2540	static const struct bpf_func_proto bpf_redirect_peer_proto = {
2541	.func = bpf_redirect_peer,
2542	.gpl_only = false,
2543	.ret_type = RET_INTEGER,
2544	.arg1_type = ARG_ANYTHING,
2545	.arg2_type = ARG_ANYTHING,
2546	};
2547
2548	BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params,
2549	int, plen, u64, flags)
2550	{
2551	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2552
2553	if (unlikely((plen && plen < sizeof(*params)) \|\| flags))
2554	return TC_ACT_SHOT;
2555
2556	ri->flags = BPF_F_NEIGH \| (plen ? BPF_F_NEXTHOP : `0`);
2557	ri->tgt_index = ifindex;
2558
2559	BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params));
2560	if (plen)
2561	memcpy(&ri->nh, params, sizeof(ri->nh));
2562
2563	return TC_ACT_REDIRECT;
2564	}
2565
2566	static const struct bpf_func_proto bpf_redirect_neigh_proto = {
2567	.func = bpf_redirect_neigh,
2568	.gpl_only = false,
2569	.ret_type = RET_INTEGER,
2570	.arg1_type = ARG_ANYTHING,
2571	.arg2_type = ARG_PTR_TO_MEM \| PTR_MAYBE_NULL \| MEM_RDONLY,
2572	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
2573	.arg4_type = ARG_ANYTHING,
2574	};
2575
2576	BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
2577	{
2578	msg->apply_bytes = bytes;
2579	return `0`;
2580	}
2581
2582	static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
2583	.func = bpf_msg_apply_bytes,
2584	.gpl_only = false,
2585	.ret_type = RET_INTEGER,
2586	.arg1_type = ARG_PTR_TO_CTX,
2587	.arg2_type = ARG_ANYTHING,
2588	};
2589
2590	BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
2591	{
2592	msg->cork_bytes = bytes;
2593	return `0`;
2594	}
2595
2596	static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
2597	.func = bpf_msg_cork_bytes,
2598	.gpl_only = false,
2599	.ret_type = RET_INTEGER,
2600	.arg1_type = ARG_PTR_TO_CTX,
2601	.arg2_type = ARG_ANYTHING,
2602	};
2603
2604	BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
2605	u32, end, u64, flags)
2606	{
2607	u32 len = `0`, offset = `0`, copy = `0`, poffset = `0`, bytes = end - start;
2608	u32 first_sge, last_sge, i, shift, bytes_sg_total;
2609	struct scatterlist *sge;
2610	u8 raw, to, *from;
2611	struct page *page;
2612
2613	if (unlikely(flags \|\| end <= start))
2614	return -EINVAL;
2615
2616	/ First find the starting scatterlist element /
2617	i = msg->sg.start;
2618	do {
2619	offset += len;
2620	len = sk_msg_elem(msg, which: i)->length;
2621	if (start < offset + len)
2622	break;
2623	sk_msg_iter_var_next(i);
2624	} while (i != msg->sg.end);
2625
2626	if (unlikely(start >= offset + len))
2627	return -EINVAL;
2628
2629	first_sge = i;
2630	/ The start may point into the sg element so we need to also*
2631	* account for the headroom.
2632	*/
2633	bytes_sg_total = start - offset + bytes;
2634	if (!test_bit(i, msg->sg.copy) && bytes_sg_total <= len)
2635	goto out;
2636
2637	/ At this point we need to linearize multiple scatterlist*
2638	* elements or a single shared page. Either way we need to
2639	* copy into a linear buffer exclusively owned by BPF. Then
2640	* place the buffer in the scatterlist and fixup the original
2641	* entries by removing the entries now in the linear buffer
2642	* and shifting the remaining entries. For now we do not try
2643	* to copy partial entries to avoid complexity of running out
2644	* of sg_entry slots. The downside is reading a single byte
2645	* will copy the entire sg entry.
2646	*/
2647	do {
2648	copy += sk_msg_elem(msg, which: i)->length;
2649	sk_msg_iter_var_next(i);
2650	if (bytes_sg_total <= copy)
2651	break;
2652	} while (i != msg->sg.end);
2653	last_sge = i;
2654
2655	if (unlikely(bytes_sg_total > copy))
2656	return -EINVAL;
2657
2658	page = alloc_pages(__GFP_NOWARN \| GFP_ATOMIC \| __GFP_COMP,
2659	order: get_order(size: copy));
2660	if (unlikely(!page))
2661	return -ENOMEM;
2662
2663	raw = page_address(page);
2664	i = first_sge;
2665	do {
2666	sge = sk_msg_elem(msg, which: i);
2667	from = sg_virt(sg: sge);
2668	len = sge->length;
2669	to = raw + poffset;
2670
2671	memcpy(to, from, len);
2672	poffset += len;
2673	sge->length = `0`;
2674	put_page(page: sg_page(sg: sge));
2675
2676	sk_msg_iter_var_next(i);
2677	} while (i != last_sge);
2678
2679	sg_set_page(sg: &msg->sg.data[first_sge], page, len: copy, offset: `0`);
2680
2681	/ To repair sg ring we need to shift entries. If we only*
2682	* had a single entry though we can just replace it and
2683	* be done. Otherwise walk the ring and shift the entries.
2684	*/
2685	WARN_ON_ONCE(last_sge == first_sge);
2686	shift = last_sge > first_sge ?
2687	last_sge - first_sge - `1` :
2688	NR_MSG_FRAG_IDS - first_sge + last_sge - `1`;
2689	if (!shift)
2690	goto out;
2691
2692	i = first_sge;
2693	sk_msg_iter_var_next(i);
2694	do {
2695	u32 move_from;
2696
2697	if (i + shift >= NR_MSG_FRAG_IDS)
2698	move_from = i + shift - NR_MSG_FRAG_IDS;
2699	else
2700	move_from = i + shift;
2701	if (move_from == msg->sg.end)
2702	break;
2703
2704	msg->sg.data[i] = msg->sg.data[move_from];
2705	msg->sg.data[move_from].length = `0`;
2706	msg->sg.data[move_from].page_link = `0`;
2707	msg->sg.data[move_from].offset = `0`;
2708	sk_msg_iter_var_next(i);
2709	} while (`1`);
2710
2711	msg->sg.end = msg->sg.end - shift > msg->sg.end ?
2712	msg->sg.end - shift + NR_MSG_FRAG_IDS :
2713	msg->sg.end - shift;
2714	out:
2715	msg->data = sg_virt(sg: &msg->sg.data[first_sge]) + start - offset;
2716	msg->data_end = msg->data + bytes;
2717	return `0`;
2718	}
2719
2720	static const struct bpf_func_proto bpf_msg_pull_data_proto = {
2721	.func = bpf_msg_pull_data,
2722	.gpl_only = false,
2723	.ret_type = RET_INTEGER,
2724	.arg1_type = ARG_PTR_TO_CTX,
2725	.arg2_type = ARG_ANYTHING,
2726	.arg3_type = ARG_ANYTHING,
2727	.arg4_type = ARG_ANYTHING,
2728	};
2729
2730	BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
2731	u32, len, u64, flags)
2732	{
2733	struct scatterlist sge, nsge, nnsge, rsge = {`0`}, *psge;
2734	u32 new, i = `0`, l = `0`, space, copy = `0`, offset = `0`;
2735	u8 raw, to, *from;
2736	struct page *page;
2737
2738	if (unlikely(flags))
2739	return -EINVAL;
2740
2741	if (unlikely(len == `0`))
2742	return `0`;
2743
2744	/ First find the starting scatterlist element /
2745	i = msg->sg.start;
2746	do {
2747	offset += l;
2748	l = sk_msg_elem(msg, which: i)->length;
2749
2750	if (start < offset + l)
2751	break;
2752	sk_msg_iter_var_next(i);
2753	} while (i != msg->sg.end);
2754
2755	if (start >= offset + l)
2756	return -EINVAL;
2757
2758	space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2759
2760	/ If no space available will fallback to copy, we need at*
2761	* least one scatterlist elem available to push data into
2762	* when start aligns to the beginning of an element or two
2763	* when it falls inside an element. We handle the start equals
2764	* offset case because its the common case for inserting a
2765	* header.
2766	*/
2767	if (!space \|\| (space == `1` && start != offset))
2768	copy = msg->sg.data[i].length;
2769
2770	page = alloc_pages(__GFP_NOWARN \| GFP_ATOMIC \| __GFP_COMP,
2771	order: get_order(size: copy + len));
2772	if (unlikely(!page))
2773	return -ENOMEM;
2774
2775	if (copy) {
2776	int front, back;
2777
2778	raw = page_address(page);
2779
2780	psge = sk_msg_elem(msg, which: i);
2781	front = start - offset;
2782	back = psge->length - front;
2783	from = sg_virt(sg: psge);
2784
2785	if (front)
2786	memcpy(raw, from, front);
2787
2788	if (back) {
2789	from += front;
2790	to = raw + front + len;
2791
2792	memcpy(to, from, back);
2793	}
2794
2795	put_page(page: sg_page(sg: psge));
2796	} else if (start - offset) {
2797	psge = sk_msg_elem(msg, which: i);
2798	rsge = sk_msg_elem_cpy(msg, which: i);
2799
2800	psge->length = start - offset;
2801	rsge.length -= psge->length;
2802	rsge.offset += start;
2803
2804	sk_msg_iter_var_next(i);
2805	sg_unmark_end(sg: psge);
2806	sg_unmark_end(sg: &rsge);
2807	sk_msg_iter_next(msg, end);
2808	}
2809
2810	/ Slot(s) to place newly allocated data /
2811	new = i;
2812
2813	/ Shift one or two slots as needed /
2814	if (!copy) {
2815	sge = sk_msg_elem_cpy(msg, which: i);
2816
2817	sk_msg_iter_var_next(i);
2818	sg_unmark_end(sg: &sge);
2819	sk_msg_iter_next(msg, end);
2820
2821	nsge = sk_msg_elem_cpy(msg, which: i);
2822	if (rsge.length) {
2823	sk_msg_iter_var_next(i);
2824	nnsge = sk_msg_elem_cpy(msg, which: i);
2825	}
2826
2827	while (i != msg->sg.end) {
2828	msg->sg.data[i] = sge;
2829	sge = nsge;
2830	sk_msg_iter_var_next(i);
2831	if (rsge.length) {
2832	nsge = nnsge;
2833	nnsge = sk_msg_elem_cpy(msg, which: i);
2834	} else {
2835	nsge = sk_msg_elem_cpy(msg, which: i);
2836	}
2837	}
2838	}
2839
2840	/ Place newly allocated data buffer /
2841	sk_mem_charge(sk: msg->sk, size: len);
2842	msg->sg.size += len;
2843	__clear_bit(new, msg->sg.copy);
2844	sg_set_page(sg: &msg->sg.data[new], page, len: len + copy, offset: `0`);
2845	if (rsge.length) {
2846	get_page(page: sg_page(sg: &rsge));
2847	sk_msg_iter_var_next(new);
2848	msg->sg.data[new] = rsge;
2849	}
2850
2851	sk_msg_compute_data_pointers(msg);
2852	return `0`;
2853	}
2854
2855	static const struct bpf_func_proto bpf_msg_push_data_proto = {
2856	.func = bpf_msg_push_data,
2857	.gpl_only = false,
2858	.ret_type = RET_INTEGER,
2859	.arg1_type = ARG_PTR_TO_CTX,
2860	.arg2_type = ARG_ANYTHING,
2861	.arg3_type = ARG_ANYTHING,
2862	.arg4_type = ARG_ANYTHING,
2863	};
2864
2865	static void sk_msg_shift_left(struct sk_msg msg, int* i)
2866	{
2867	int prev;
2868
2869	do {
2870	prev = i;
2871	sk_msg_iter_var_next(i);
2872	msg->sg.data[prev] = msg->sg.data[i];
2873	} while (i != msg->sg.end);
2874
2875	sk_msg_iter_prev(msg, end);
2876	}
2877
2878	static void sk_msg_shift_right(struct sk_msg msg, int* i)
2879	{
2880	struct scatterlist tmp, sge;
2881
2882	sk_msg_iter_next(msg, end);
2883	sge = sk_msg_elem_cpy(msg, which: i);
2884	sk_msg_iter_var_next(i);
2885	tmp = sk_msg_elem_cpy(msg, which: i);
2886
2887	while (i != msg->sg.end) {
2888	msg->sg.data[i] = sge;
2889	sk_msg_iter_var_next(i);
2890	sge = tmp;
2891	tmp = sk_msg_elem_cpy(msg, which: i);
2892	}
2893	}
2894
2895	BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
2896	u32, len, u64, flags)
2897	{
2898	u32 i = `0`, l = `0`, space, offset = `0`;
2899	u64 last = start + len;
2900	int pop;
2901
2902	if (unlikely(flags))
2903	return -EINVAL;
2904
2905	/ First find the starting scatterlist element /
2906	i = msg->sg.start;
2907	do {
2908	offset += l;
2909	l = sk_msg_elem(msg, which: i)->length;
2910
2911	if (start < offset + l)
2912	break;
2913	sk_msg_iter_var_next(i);
2914	} while (i != msg->sg.end);
2915
2916	/ Bounds checks: start and pop must be inside message /
2917	if (start >= offset + l \|\| last >= msg->sg.size)
2918	return -EINVAL;
2919
2920	space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2921
2922	pop = len;
2923	/ --------------\| offset*
2924	* -\| start \|-------- len -------\|
2925	*
2926	* \|----- a ----\|-------- pop -------\|----- b ----\|
2927	* \|______________________________________________\| length
2928	*
2929	*
2930	* a: region at front of scatter element to save
2931	* b: region at back of scatter element to save when length > A + pop
2932	* pop: region to pop from element, same as input 'pop' here will be
2933	* decremented below per iteration.
2934	*
2935	* Two top-level cases to handle when start != offset, first B is non
2936	* zero and second B is zero corresponding to when a pop includes more
2937	* than one element.
2938	*
2939	* Then if B is non-zero AND there is no space allocate space and
2940	* compact A, B regions into page. If there is space shift ring to
2941	* the rigth free'ing the next element in ring to place B, leaving
2942	* A untouched except to reduce length.
2943	*/
2944	if (start != offset) {
2945	struct scatterlist nsge, sge = sk_msg_elem(msg, which: i);
2946	int a = start;
2947	int b = sge->length - pop - a;
2948
2949	sk_msg_iter_var_next(i);
2950
2951	if (pop < sge->length - a) {
2952	if (space) {
2953	sge->length = a;
2954	sk_msg_shift_right(msg, i);
2955	nsge = sk_msg_elem(msg, which: i);
2956	get_page(page: sg_page(sg: sge));
2957	sg_set_page(sg: nsge,
2958	page: sg_page(sg: sge),
2959	len: b, offset: sge->offset + pop + a);
2960	} else {
2961	struct page page, orig;
2962	u8 to, from;
2963
2964	page = alloc_pages(__GFP_NOWARN \|
2965	__GFP_COMP \| GFP_ATOMIC,
2966	order: get_order(size: a + b));
2967	if (unlikely(!page))
2968	return -ENOMEM;
2969
2970	sge->length = a;
2971	orig = sg_page(sg: sge);
2972	from = sg_virt(sg: sge);
2973	to = page_address(page);
2974	memcpy(to, from, a);
2975	memcpy(to + a, from + a + pop, b);
2976	sg_set_page(sg: sge, page, len: a + b, offset: `0`);
2977	put_page(page: orig);
2978	}
2979	pop = `0`;
2980	} else if (pop >= sge->length - a) {
2981	pop -= (sge->length - a);
2982	sge->length = a;
2983	}
2984	}
2985
2986	/ From above the current layout _must_ be as follows,*
2987	*
2988	* -\| offset
2989	* -\| start
2990	*
2991	* \|---- pop ---\|---------------- b ------------\|
2992	* \|____________________________________________\| length
2993	*
2994	* Offset and start of the current msg elem are equal because in the
2995	* previous case we handled offset != start and either consumed the
2996	* entire element and advanced to the next element OR pop == 0.
2997	*
2998	* Two cases to handle here are first pop is less than the length
2999	* leaving some remainder b above. Simply adjust the element's layout
3000	* in this case. Or pop >= length of the element so that b = 0. In this
3001	* case advance to next element decrementing pop.
3002	*/
3003	while (pop) {
3004	struct scatterlist *sge = sk_msg_elem(msg, which: i);
3005
3006	if (pop < sge->length) {
3007	sge->length -= pop;
3008	sge->offset += pop;
3009	pop = `0`;
3010	} else {
3011	pop -= sge->length;
3012	sk_msg_shift_left(msg, i);
3013	}
3014	sk_msg_iter_var_next(i);
3015	}
3016
3017	sk_mem_uncharge(sk: msg->sk, size: len - pop);
3018	msg->sg.size -= (len - pop);
3019	sk_msg_compute_data_pointers(msg);
3020	return `0`;
3021	}
3022
3023	static const struct bpf_func_proto bpf_msg_pop_data_proto = {
3024	.func = bpf_msg_pop_data,
3025	.gpl_only = false,
3026	.ret_type = RET_INTEGER,
3027	.arg1_type = ARG_PTR_TO_CTX,
3028	.arg2_type = ARG_ANYTHING,
3029	.arg3_type = ARG_ANYTHING,
3030	.arg4_type = ARG_ANYTHING,
3031	};
3032
3033	#ifdef CONFIG_CGROUP_NET_CLASSID
3034	BPF_CALL_0(bpf_get_cgroup_classid_curr)
3035	{
3036	return __task_get_classid(current);
3037	}
3038
3039	const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = {
3040	.func = bpf_get_cgroup_classid_curr,
3041	.gpl_only = false,
3042	.ret_type = RET_INTEGER,
3043	};
3044
3045	BPF_CALL_1(bpf_skb_cgroup_classid, const struct sk_buff *, skb)
3046	{
3047	struct sock *sk = skb_to_full_sk(skb);
3048
3049	if (!sk \|\| !sk_fullsock(sk))
3050	return `0`;
3051
3052	return sock_cgroup_classid(skcd: &sk->sk_cgrp_data);
3053	}
3054
3055	static const struct bpf_func_proto bpf_skb_cgroup_classid_proto = {
3056	.func = bpf_skb_cgroup_classid,
3057	.gpl_only = false,
3058	.ret_type = RET_INTEGER,
3059	.arg1_type = ARG_PTR_TO_CTX,
3060	};
3061	#endif
3062
3063	BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
3064	{
3065	return task_get_classid(skb);
3066	}
3067
3068	static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
3069	.func = bpf_get_cgroup_classid,
3070	.gpl_only = false,
3071	.ret_type = RET_INTEGER,
3072	.arg1_type = ARG_PTR_TO_CTX,
3073	};
3074
3075	BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
3076	{
3077	return dst_tclassid(skb);
3078	}
3079
3080	static const struct bpf_func_proto bpf_get_route_realm_proto = {
3081	.func = bpf_get_route_realm,
3082	.gpl_only = false,
3083	.ret_type = RET_INTEGER,
3084	.arg1_type = ARG_PTR_TO_CTX,
3085	};
3086
3087	BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
3088	{
3089	/ If skb_clear_hash() was called due to mangling, we can*
3090	* trigger SW recalculation here. Later access to hash
3091	* can then use the inline skb->hash via context directly
3092	* instead of calling this helper again.
3093	*/
3094	return skb_get_hash(skb);
3095	}
3096
3097	static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
3098	.func = bpf_get_hash_recalc,
3099	.gpl_only = false,
3100	.ret_type = RET_INTEGER,
3101	.arg1_type = ARG_PTR_TO_CTX,
3102	};
3103
3104	BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
3105	{
3106	/ After all direct packet write, this can be used once for*
3107	* triggering a lazy recalc on next skb_get_hash() invocation.
3108	*/
3109	skb_clear_hash(skb);
3110	return `0`;
3111	}
3112
3113	static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
3114	.func = bpf_set_hash_invalid,
3115	.gpl_only = false,
3116	.ret_type = RET_INTEGER,
3117	.arg1_type = ARG_PTR_TO_CTX,
3118	};
3119
3120	BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash)
3121	{
3122	/ Set user specified hash as L4(+), so that it gets returned*
3123	* on skb_get_hash() call unless BPF prog later on triggers a
3124	* skb_clear_hash().
3125	*/
3126	__skb_set_sw_hash(skb, hash, is_l4: true);
3127	return `0`;
3128	}
3129
3130	static const struct bpf_func_proto bpf_set_hash_proto = {
3131	.func = bpf_set_hash,
3132	.gpl_only = false,
3133	.ret_type = RET_INTEGER,
3134	.arg1_type = ARG_PTR_TO_CTX,
3135	.arg2_type = ARG_ANYTHING,
3136	};
3137
3138	BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
3139	u16, vlan_tci)
3140	{
3141	int ret;
3142
3143	if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
3144	vlan_proto != htons(ETH_P_8021AD)))
3145	vlan_proto = htons(ETH_P_8021Q);
3146
3147	bpf_push_mac_rcsum(skb);
3148	ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
3149	bpf_pull_mac_rcsum(skb);
3150
3151	bpf_compute_data_pointers(skb);
3152	return ret;
3153	}
3154
3155	static const struct bpf_func_proto bpf_skb_vlan_push_proto = {
3156	.func = bpf_skb_vlan_push,
3157	.gpl_only = false,
3158	.ret_type = RET_INTEGER,
3159	.arg1_type = ARG_PTR_TO_CTX,
3160	.arg2_type = ARG_ANYTHING,
3161	.arg3_type = ARG_ANYTHING,
3162	};
3163
3164	BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
3165	{
3166	int ret;
3167
3168	bpf_push_mac_rcsum(skb);
3169	ret = skb_vlan_pop(skb);
3170	bpf_pull_mac_rcsum(skb);
3171
3172	bpf_compute_data_pointers(skb);
3173	return ret;
3174	}
3175
3176	static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
3177	.func = bpf_skb_vlan_pop,
3178	.gpl_only = false,
3179	.ret_type = RET_INTEGER,
3180	.arg1_type = ARG_PTR_TO_CTX,
3181	};
3182
3183	static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
3184	{
3185	/ Caller already did skb_cow() with len as headroom,*
3186	* so no need to do it here.
3187	*/
3188	skb_push(skb, len);
3189	memmove(skb->data, skb->data + len, off);
3190	memset(skb->data + off, `0`, len);
3191
3192	/ No skb_postpush_rcsum(skb, skb->data + off, len)*
3193	* needed here as it does not change the skb->csum
3194	* result for checksum complete when summing over
3195	* zeroed blocks.
3196	*/
3197	return `0`;
3198	}
3199
3200	static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
3201	{
3202	void *old_data;
3203
3204	/ skb_ensure_writable() is not needed here, as we're*
3205	* already working on an uncloned skb.
3206	*/
3207	if (unlikely(!pskb_may_pull(skb, off + len)))
3208	return -ENOMEM;
3209
3210	old_data = skb->data;
3211	__skb_pull(skb, len);
3212	skb_postpull_rcsum(skb, start: old_data + off, len);
3213	memmove(skb->data, old_data, off);
3214
3215	return `0`;
3216	}
3217
3218	static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
3219	{
3220	bool trans_same = skb->transport_header == skb->network_header;
3221	int ret;
3222
3223	/ There's no need for __skb_push()/__skb_pull() pair to*
3224	* get to the start of the mac header as we're guaranteed
3225	* to always start from here under eBPF.
3226	*/
3227	ret = bpf_skb_generic_push(skb, off, len);
3228	if (likely(!ret)) {
3229	skb->mac_header -= len;
3230	skb->network_header -= len;
3231	if (trans_same)
3232	skb->transport_header = skb->network_header;
3233	}
3234
3235	return ret;
3236	}
3237
3238	static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
3239	{
3240	bool trans_same = skb->transport_header == skb->network_header;
3241	int ret;
3242
3243	/ Same here, __skb_push()/__skb_pull() pair not needed. /
3244	ret = bpf_skb_generic_pop(skb, off, len);
3245	if (likely(!ret)) {
3246	skb->mac_header += len;
3247	skb->network_header += len;
3248	if (trans_same)
3249	skb->transport_header = skb->network_header;
3250	}
3251
3252	return ret;
3253	}
3254
3255	static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
3256	{
3257	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
3258	u32 off = skb_mac_header_len(skb);
3259	int ret;
3260
3261	ret = skb_cow(skb, headroom: len_diff);
3262	if (unlikely(ret < `0`))
3263	return ret;
3264
3265	ret = bpf_skb_net_hdr_push(skb, off, len: len_diff);
3266	if (unlikely(ret < `0`))
3267	return ret;
3268
3269	if (skb_is_gso(skb)) {
3270	struct skb_shared_info *shinfo = skb_shinfo(skb);
3271
3272	/ SKB_GSO_TCPV4 needs to be changed into SKB_GSO_TCPV6. /
3273	if (shinfo->gso_type & SKB_GSO_TCPV4) {
3274	shinfo->gso_type &= ~SKB_GSO_TCPV4;
3275	shinfo->gso_type \|= SKB_GSO_TCPV6;
3276	}
3277	}
3278
3279	skb->protocol = htons(ETH_P_IPV6);
3280	skb_clear_hash(skb);
3281
3282	return `0`;
3283	}
3284
3285	static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
3286	{
3287	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
3288	u32 off = skb_mac_header_len(skb);
3289	int ret;
3290
3291	ret = skb_unclone(skb, GFP_ATOMIC);
3292	if (unlikely(ret < `0`))
3293	return ret;
3294
3295	ret = bpf_skb_net_hdr_pop(skb, off, len: len_diff);
3296	if (unlikely(ret < `0`))
3297	return ret;
3298
3299	if (skb_is_gso(skb)) {
3300	struct skb_shared_info *shinfo = skb_shinfo(skb);
3301
3302	/ SKB_GSO_TCPV6 needs to be changed into SKB_GSO_TCPV4. /
3303	if (shinfo->gso_type & SKB_GSO_TCPV6) {
3304	shinfo->gso_type &= ~SKB_GSO_TCPV6;
3305	shinfo->gso_type \|= SKB_GSO_TCPV4;
3306	}
3307	}
3308
3309	skb->protocol = htons(ETH_P_IP);
3310	skb_clear_hash(skb);
3311
3312	return `0`;
3313	}
3314
3315	static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
3316	{
3317	__be16 from_proto = skb->protocol;
3318
3319	if (from_proto == htons(ETH_P_IP) &&
3320	to_proto == htons(ETH_P_IPV6))
3321	return bpf_skb_proto_4_to_6(skb);
3322
3323	if (from_proto == htons(ETH_P_IPV6) &&
3324	to_proto == htons(ETH_P_IP))
3325	return bpf_skb_proto_6_to_4(skb);
3326
3327	return -ENOTSUPP;
3328	}
3329
3330	BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
3331	u64, flags)
3332	{
3333	int ret;
3334
3335	if (unlikely(flags))
3336	return -EINVAL;
3337
3338	/ General idea is that this helper does the basic groundwork*
3339	* needed for changing the protocol, and eBPF program fills the
3340	* rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
3341	* and other helpers, rather than passing a raw buffer here.
3342	*
3343	* The rationale is to keep this minimal and without a need to
3344	* deal with raw packet data. F.e. even if we would pass buffers
3345	* here, the program still needs to call the bpf_lX_csum_replace()
3346	* helpers anyway. Plus, this way we keep also separation of
3347	* concerns, since f.e. bpf_skb_store_bytes() should only take
3348	* care of stores.
3349	*
3350	* Currently, additional options and extension header space are
3351	* not supported, but flags register is reserved so we can adapt
3352	* that. For offloads, we mark packet as dodgy, so that headers
3353	* need to be verified first.
3354	*/
3355	ret = bpf_skb_proto_xlat(skb, to_proto: proto);
3356	bpf_compute_data_pointers(skb);
3357	return ret;
3358	}
3359
3360	static const struct bpf_func_proto bpf_skb_change_proto_proto = {
3361	.func = bpf_skb_change_proto,
3362	.gpl_only = false,
3363	.ret_type = RET_INTEGER,
3364	.arg1_type = ARG_PTR_TO_CTX,
3365	.arg2_type = ARG_ANYTHING,
3366	.arg3_type = ARG_ANYTHING,
3367	};
3368
3369	BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
3370	{
3371	/ We only allow a restricted subset to be changed for now. /
3372	if (unlikely(!skb_pkt_type_ok(skb->pkt_type) \|\|
3373	!skb_pkt_type_ok(pkt_type)))
3374	return -EINVAL;
3375
3376	skb->pkt_type = pkt_type;
3377	return `0`;
3378	}
3379
3380	static const struct bpf_func_proto bpf_skb_change_type_proto = {
3381	.func = bpf_skb_change_type,
3382	.gpl_only = false,
3383	.ret_type = RET_INTEGER,
3384	.arg1_type = ARG_PTR_TO_CTX,
3385	.arg2_type = ARG_ANYTHING,
3386	};
3387
3388	static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
3389	{
3390	switch (skb->protocol) {
3391	case htons(ETH_P_IP):
3392	return sizeof(struct iphdr);
3393	case htons(ETH_P_IPV6):
3394	return sizeof(struct ipv6hdr);
3395	default:
3396	return ~`0U`;
3397	}
3398	}
3399
3400	#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 \| \
3401	BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3402
3403	#define BPF_F_ADJ_ROOM_DECAP_L3_MASK (BPF_F_ADJ_ROOM_DECAP_L3_IPV4 \| \
3404	BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
3405
3406	#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO \| \
3407	BPF_F_ADJ_ROOM_ENCAP_L3_MASK \| \
3408	BPF_F_ADJ_ROOM_ENCAP_L4_GRE \| \
3409	BPF_F_ADJ_ROOM_ENCAP_L4_UDP \| \
3410	BPF_F_ADJ_ROOM_ENCAP_L2_ETH \| \
3411	BPF_F_ADJ_ROOM_ENCAP_L2( \
3412	BPF_ADJ_ROOM_ENCAP_L2_MASK) \| \
3413	BPF_F_ADJ_ROOM_DECAP_L3_MASK)
3414
3415	static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
3416	u64 flags)
3417	{
3418	u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT;
3419	bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
3420	u16 mac_len = `0`, inner_net = `0`, inner_trans = `0`;
3421	unsigned int gso_type = SKB_GSO_DODGY;
3422	int ret;
3423
3424	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
3425	/ udp gso_size delineates datagrams, only allow if fixed /
3426	if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) \|\|
3427	!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3428	return -ENOTSUPP;
3429	}
3430
3431	ret = skb_cow_head(skb, headroom: len_diff);
3432	if (unlikely(ret < `0`))
3433	return ret;
3434
3435	if (encap) {
3436	if (skb->protocol != htons(ETH_P_IP) &&
3437	skb->protocol != htons(ETH_P_IPV6))
3438	return -ENOTSUPP;
3439
3440	if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 &&
3441	flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3442	return -EINVAL;
3443
3444	if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE &&
3445	flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
3446	return -EINVAL;
3447
3448	if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
3449	inner_mac_len < ETH_HLEN)
3450	return -EINVAL;
3451
3452	if (skb->encapsulation)
3453	return -EALREADY;
3454
3455	mac_len = skb->network_header - skb->mac_header;
3456	inner_net = skb->network_header;
3457	if (inner_mac_len > len_diff)
3458	return -EINVAL;
3459	inner_trans = skb->transport_header;
3460	}
3461
3462	ret = bpf_skb_net_hdr_push(skb, off, len: len_diff);
3463	if (unlikely(ret < `0`))
3464	return ret;
3465
3466	if (encap) {
3467	skb->inner_mac_header = inner_net - inner_mac_len;
3468	skb->inner_network_header = inner_net;
3469	skb->inner_transport_header = inner_trans;
3470
3471	if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
3472	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
3473	else
3474	skb_set_inner_protocol(skb, protocol: skb->protocol);
3475
3476	skb->encapsulation = `1`;
3477	skb_set_network_header(skb, offset: mac_len);
3478
3479	if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
3480	gso_type \|= SKB_GSO_UDP_TUNNEL;
3481	else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE)
3482	gso_type \|= SKB_GSO_GRE;
3483	else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3484	gso_type \|= SKB_GSO_IPXIP6;
3485	else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
3486	gso_type \|= SKB_GSO_IPXIP4;
3487
3488	if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE \|\|
3489	flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) {
3490	int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ?
3491	sizeof(struct ipv6hdr) :
3492	sizeof(struct iphdr);
3493
3494	skb_set_transport_header(skb, offset: mac_len + nh_len);
3495	}
3496
3497	/ Match skb->protocol to new outer l3 protocol /
3498	if (skb->protocol == htons(ETH_P_IP) &&
3499	flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3500	skb->protocol = htons(ETH_P_IPV6);
3501	else if (skb->protocol == htons(ETH_P_IPV6) &&
3502	flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
3503	skb->protocol = htons(ETH_P_IP);
3504	}
3505
3506	if (skb_is_gso(skb)) {
3507	struct skb_shared_info *shinfo = skb_shinfo(skb);
3508
3509	/ Due to header grow, MSS needs to be downgraded. /
3510	if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3511	skb_decrease_gso_size(shinfo, decrement: len_diff);
3512
3513	/ Header must be checked, and gso_segs recomputed. /
3514	shinfo->gso_type \|= gso_type;
3515	shinfo->gso_segs = `0`;
3516	}
3517
3518	return `0`;
3519	}
3520
3521	static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
3522	u64 flags)
3523	{
3524	int ret;
3525
3526	if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO \|
3527	BPF_F_ADJ_ROOM_DECAP_L3_MASK \|
3528	BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
3529	return -EINVAL;
3530
3531	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
3532	/ udp gso_size delineates datagrams, only allow if fixed /
3533	if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) \|\|
3534	!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3535	return -ENOTSUPP;
3536	}
3537
3538	ret = skb_unclone(skb, GFP_ATOMIC);
3539	if (unlikely(ret < `0`))
3540	return ret;
3541
3542	ret = bpf_skb_net_hdr_pop(skb, off, len: len_diff);
3543	if (unlikely(ret < `0`))
3544	return ret;
3545
3546	/ Match skb->protocol to new outer l3 protocol /
3547	if (skb->protocol == htons(ETH_P_IP) &&
3548	flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
3549	skb->protocol = htons(ETH_P_IPV6);
3550	else if (skb->protocol == htons(ETH_P_IPV6) &&
3551	flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
3552	skb->protocol = htons(ETH_P_IP);
3553
3554	if (skb_is_gso(skb)) {
3555	struct skb_shared_info *shinfo = skb_shinfo(skb);
3556
3557	/ Due to header shrink, MSS can be upgraded. /
3558	if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3559	skb_increase_gso_size(shinfo, increment: len_diff);
3560
3561	/ Header must be checked, and gso_segs recomputed. /
3562	shinfo->gso_type \|= SKB_GSO_DODGY;
3563	shinfo->gso_segs = `0`;
3564	}
3565
3566	return `0`;
3567	}
3568
3569	#define BPF_SKB_MAX_LEN SKB_MAX_ALLOC
3570
3571	BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
3572	u32, mode, u64, flags)
3573	{
3574	u32 len_diff_abs = abs(len_diff);
3575	bool shrink = len_diff < `0`;
3576	int ret = `0`;
3577
3578	if (unlikely(flags \|\| mode))
3579	return -EINVAL;
3580	if (unlikely(len_diff_abs > `0xfffU`))
3581	return -EFAULT;
3582
3583	if (!shrink) {
3584	ret = skb_cow(skb, headroom: len_diff);
3585	if (unlikely(ret < `0`))
3586	return ret;
3587	__skb_push(skb, len: len_diff_abs);
3588	memset(skb->data, `0`, len_diff_abs);
3589	} else {
3590	if (unlikely(!pskb_may_pull(skb, len_diff_abs)))
3591	return -ENOMEM;
3592	__skb_pull(skb, len: len_diff_abs);
3593	}
3594	if (tls_sw_has_ctx_rx(sk: skb->sk)) {
3595	struct strp_msg *rxm = strp_msg(skb);
3596
3597	rxm->full_len += len_diff;
3598	}
3599	return ret;
3600	}
3601
3602	static const struct bpf_func_proto sk_skb_adjust_room_proto = {
3603	.func = sk_skb_adjust_room,
3604	.gpl_only = false,
3605	.ret_type = RET_INTEGER,
3606	.arg1_type = ARG_PTR_TO_CTX,
3607	.arg2_type = ARG_ANYTHING,
3608	.arg3_type = ARG_ANYTHING,
3609	.arg4_type = ARG_ANYTHING,
3610	};
3611
3612	BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
3613	u32, mode, u64, flags)
3614	{
3615	u32 len_cur, len_diff_abs = abs(len_diff);
3616	u32 len_min = bpf_skb_net_base_len(skb);
3617	u32 len_max = BPF_SKB_MAX_LEN;
3618	__be16 proto = skb->protocol;
3619	bool shrink = len_diff < `0`;
3620	u32 off;
3621	int ret;
3622
3623	if (unlikely(flags & ~(BPF_F_ADJ_ROOM_MASK \|
3624	BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
3625	return -EINVAL;
3626	if (unlikely(len_diff_abs > `0xfffU`))
3627	return -EFAULT;
3628	if (unlikely(proto != htons(ETH_P_IP) &&
3629	proto != htons(ETH_P_IPV6)))
3630	return -ENOTSUPP;
3631
3632	off = skb_mac_header_len(skb);
3633	switch (mode) {
3634	case BPF_ADJ_ROOM_NET:
3635	off += bpf_skb_net_base_len(skb);
3636	break;
3637	case BPF_ADJ_ROOM_MAC:
3638	break;
3639	default:
3640	return -ENOTSUPP;
3641	}
3642
3643	if (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
3644	if (!shrink)
3645	return -EINVAL;
3646
3647	switch (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
3648	case BPF_F_ADJ_ROOM_DECAP_L3_IPV4:
3649	len_min = sizeof(struct iphdr);
3650	break;
3651	case BPF_F_ADJ_ROOM_DECAP_L3_IPV6:
3652	len_min = sizeof(struct ipv6hdr);
3653	break;
3654	default:
3655	return -EINVAL;
3656	}
3657	}
3658
3659	len_cur = skb->len - skb_network_offset(skb);
3660	if ((shrink && (len_diff_abs >= len_cur \|\|
3661	len_cur - len_diff_abs < len_min)) \|\|
3662	(!shrink && (skb->len + len_diff_abs > len_max &&
3663	!skb_is_gso(skb))))
3664	return -ENOTSUPP;
3665
3666	ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff: len_diff_abs, flags) :
3667	bpf_skb_net_grow(skb, off, len_diff: len_diff_abs, flags);
3668	if (!ret && !(flags & BPF_F_ADJ_ROOM_NO_CSUM_RESET))
3669	__skb_reset_checksum_unnecessary(skb);
3670
3671	bpf_compute_data_pointers(skb);
3672	return ret;
3673	}
3674
3675	static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
3676	.func = bpf_skb_adjust_room,
3677	.gpl_only = false,
3678	.ret_type = RET_INTEGER,
3679	.arg1_type = ARG_PTR_TO_CTX,
3680	.arg2_type = ARG_ANYTHING,
3681	.arg3_type = ARG_ANYTHING,
3682	.arg4_type = ARG_ANYTHING,
3683	};
3684
3685	static u32 __bpf_skb_min_len(const struct sk_buff *skb)
3686	{
3687	u32 min_len = skb_network_offset(skb);
3688
3689	if (skb_transport_header_was_set(skb))
3690	min_len = skb_transport_offset(skb);
3691	if (skb->ip_summed == CHECKSUM_PARTIAL)
3692	min_len = skb_checksum_start_offset(skb) +
3693	skb->csum_offset + sizeof(__sum16);
3694	return min_len;
3695	}
3696
3697	static int bpf_skb_grow_rcsum(struct sk_buff skb, unsigned* int new_len)
3698	{
3699	unsigned int old_len = skb->len;
3700	int ret;
3701
3702	ret = __skb_grow_rcsum(skb, len: new_len);
3703	if (!ret)
3704	memset(skb->data + old_len, `0`, new_len - old_len);
3705	return ret;
3706	}
3707
3708	static int bpf_skb_trim_rcsum(struct sk_buff skb, unsigned* int new_len)
3709	{
3710	return __skb_trim_rcsum(skb, len: new_len);
3711	}
3712
3713	static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
3714	u64 flags)
3715	{
3716	u32 max_len = BPF_SKB_MAX_LEN;
3717	u32 min_len = __bpf_skb_min_len(skb);
3718	int ret;
3719
3720	if (unlikely(flags \|\| new_len > max_len \|\| new_len < min_len))
3721	return -EINVAL;
3722	if (skb->encapsulation)
3723	return -ENOTSUPP;
3724
3725	/ The basic idea of this helper is that it's performing the*
3726	* needed work to either grow or trim an skb, and eBPF program
3727	* rewrites the rest via helpers like bpf_skb_store_bytes(),
3728	* bpf_lX_csum_replace() and others rather than passing a raw
3729	* buffer here. This one is a slow path helper and intended
3730	* for replies with control messages.
3731	*
3732	* Like in bpf_skb_change_proto(), we want to keep this rather
3733	* minimal and without protocol specifics so that we are able
3734	* to separate concerns as in bpf_skb_store_bytes() should only
3735	* be the one responsible for writing buffers.
3736	*
3737	* It's really expected to be a slow path operation here for
3738	* control message replies, so we're implicitly linearizing,
3739	* uncloning and drop offloads from the skb by this.
3740	*/
3741	ret = __bpf_try_make_writable(skb, write_len: skb->len);
3742	if (!ret) {
3743	if (new_len > skb->len)
3744	ret = bpf_skb_grow_rcsum(skb, new_len);
3745	else if (new_len < skb->len)
3746	ret = bpf_skb_trim_rcsum(skb, new_len);
3747	if (!ret && skb_is_gso(skb))
3748	skb_gso_reset(skb);
3749	}
3750	return ret;
3751	}
3752
3753	BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3754	u64, flags)
3755	{
3756	int ret = __bpf_skb_change_tail(skb, new_len, flags);
3757
3758	bpf_compute_data_pointers(skb);
3759	return ret;
3760	}
3761
3762	static const struct bpf_func_proto bpf_skb_change_tail_proto = {
3763	.func = bpf_skb_change_tail,
3764	.gpl_only = false,
3765	.ret_type = RET_INTEGER,
3766	.arg1_type = ARG_PTR_TO_CTX,
3767	.arg2_type = ARG_ANYTHING,
3768	.arg3_type = ARG_ANYTHING,
3769	};
3770
3771	BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3772	u64, flags)
3773	{
3774	return __bpf_skb_change_tail(skb, new_len, flags);
3775	}
3776
3777	static const struct bpf_func_proto sk_skb_change_tail_proto = {
3778	.func = sk_skb_change_tail,
3779	.gpl_only = false,
3780	.ret_type = RET_INTEGER,
3781	.arg1_type = ARG_PTR_TO_CTX,
3782	.arg2_type = ARG_ANYTHING,
3783	.arg3_type = ARG_ANYTHING,
3784	};
3785
3786	static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
3787	u64 flags)
3788	{
3789	u32 max_len = BPF_SKB_MAX_LEN;
3790	u32 new_len = skb->len + head_room;
3791	int ret;
3792
3793	if (unlikely(flags \|\| (!skb_is_gso(skb) && new_len > max_len) \|\|
3794	new_len < skb->len))
3795	return -EINVAL;
3796
3797	ret = skb_cow(skb, headroom: head_room);
3798	if (likely(!ret)) {
3799	/ Idea for this helper is that we currently only*
3800	* allow to expand on mac header. This means that
3801	* skb->protocol network header, etc, stay as is.
3802	* Compared to bpf_skb_change_tail(), we're more
3803	* flexible due to not needing to linearize or
3804	* reset GSO. Intention for this helper is to be
3805	* used by an L3 skb that needs to push mac header
3806	* for redirection into L2 device.
3807	*/
3808	__skb_push(skb, len: head_room);
3809	memset(skb->data, `0`, head_room);
3810	skb_reset_mac_header(skb);
3811	skb_reset_mac_len(skb);
3812	}
3813
3814	return ret;
3815	}
3816
3817	BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
3818	u64, flags)
3819	{
3820	int ret = __bpf_skb_change_head(skb, head_room, flags);
3821
3822	bpf_compute_data_pointers(skb);
3823	return ret;
3824	}
3825
3826	static const struct bpf_func_proto bpf_skb_change_head_proto = {
3827	.func = bpf_skb_change_head,
3828	.gpl_only = false,
3829	.ret_type = RET_INTEGER,
3830	.arg1_type = ARG_PTR_TO_CTX,
3831	.arg2_type = ARG_ANYTHING,
3832	.arg3_type = ARG_ANYTHING,
3833	};
3834
3835	BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
3836	u64, flags)
3837	{
3838	return __bpf_skb_change_head(skb, head_room, flags);
3839	}
3840
3841	static const struct bpf_func_proto sk_skb_change_head_proto = {
3842	.func = sk_skb_change_head,
3843	.gpl_only = false,
3844	.ret_type = RET_INTEGER,
3845	.arg1_type = ARG_PTR_TO_CTX,
3846	.arg2_type = ARG_ANYTHING,
3847	.arg3_type = ARG_ANYTHING,
3848	};
3849
3850	BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp)
3851	{
3852	return xdp_get_buff_len(xdp);
3853	}
3854
3855	static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
3856	.func = bpf_xdp_get_buff_len,
3857	.gpl_only = false,
3858	.ret_type = RET_INTEGER,
3859	.arg1_type = ARG_PTR_TO_CTX,
3860	};
3861
3862	BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)
3863
3864	const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
3865	.func = bpf_xdp_get_buff_len,
3866	.gpl_only = false,
3867	.arg1_type = ARG_PTR_TO_BTF_ID,
3868	.arg1_btf_id = &bpf_xdp_get_buff_len_bpf_ids[`0`],
3869	};
3870
3871	static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
3872	{
3873	return xdp_data_meta_unsupported(xdp) ? `0` :
3874	xdp->data - xdp->data_meta;
3875	}
3876
3877	BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff , xdp, int*, offset)
3878	{
3879	void xdp_frame_end = xdp->data_hard_start + sizeof(struct* xdp_frame);
3880	unsigned long metalen = xdp_get_metalen(xdp);
3881	void *data_start = xdp_frame_end + metalen;
3882	void *data = xdp->data + offset;
3883
3884	if (unlikely(data < data_start \|\|
3885	data > xdp->data_end - ETH_HLEN))
3886	return -EINVAL;
3887
3888	if (metalen)
3889	memmove(xdp->data_meta + offset,
3890	xdp->data_meta, metalen);
3891	xdp->data_meta += offset;
3892	xdp->data = data;
3893
3894	return `0`;
3895	}
3896
3897	static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
3898	.func = bpf_xdp_adjust_head,
3899	.gpl_only = false,
3900	.ret_type = RET_INTEGER,
3901	.arg1_type = ARG_PTR_TO_CTX,
3902	.arg2_type = ARG_ANYTHING,
3903	};
3904
3905	void bpf_xdp_copy_buf(struct xdp_buff xdp, unsigned* long off,
3906	void buf, unsigned* long len, bool flush)
3907	{
3908	unsigned long ptr_len, ptr_off = `0`;
3909	skb_frag_t next_frag, end_frag;
3910	struct skb_shared_info *sinfo;
3911	void src, dst;
3912	u8 *ptr_buf;
3913
3914	if (likely(xdp->data_end - xdp->data >= off + len)) {
3915	src = flush ? buf : xdp->data + off;
3916	dst = flush ? xdp->data + off : buf;
3917	memcpy(dst, src, len);
3918	return;
3919	}
3920
3921	sinfo = xdp_get_shared_info_from_buff(xdp);
3922	end_frag = &sinfo->frags[sinfo->nr_frags];
3923	next_frag = &sinfo->frags[`0`];
3924
3925	ptr_len = xdp->data_end - xdp->data;
3926	ptr_buf = xdp->data;
3927
3928	while (true) {
3929	if (off < ptr_off + ptr_len) {
3930	unsigned long copy_off = off - ptr_off;
3931	unsigned long copy_len = min(len, ptr_len - copy_off);
3932
3933	src = flush ? buf : ptr_buf + copy_off;
3934	dst = flush ? ptr_buf + copy_off : buf;
3935	memcpy(dst, src, copy_len);
3936
3937	off += copy_len;
3938	len -= copy_len;
3939	buf += copy_len;
3940	}
3941
3942	if (!len \|\| next_frag == end_frag)
3943	break;
3944
3945	ptr_off += ptr_len;
3946	ptr_buf = skb_frag_address(frag: next_frag);
3947	ptr_len = skb_frag_size(frag: next_frag);
3948	next_frag++;
3949	}
3950	}
3951
3952	void bpf_xdp_pointer(struct* xdp_buff *xdp, u32 offset, u32 len)
3953	{
3954	u32 size = xdp->data_end - xdp->data;
3955	struct skb_shared_info *sinfo;
3956	void *addr = xdp->data;
3957	int i;
3958
3959	if (unlikely(offset > `0xffff` \|\| len > `0xffff`))
3960	return ERR_PTR(error: -EFAULT);
3961
3962	if (unlikely(offset + len > xdp_get_buff_len(xdp)))
3963	return ERR_PTR(error: -EINVAL);
3964
3965	if (likely(offset < size)) / linear area /
3966	goto out;
3967
3968	sinfo = xdp_get_shared_info_from_buff(xdp);
3969	offset -= size;
3970	for (i = `0`; i < sinfo->nr_frags; i++) { / paged area /
3971	u32 frag_size = skb_frag_size(frag: &sinfo->frags[i]);
3972
3973	if (offset < frag_size) {
3974	addr = skb_frag_address(frag: &sinfo->frags[i]);
3975	size = frag_size;
3976	break;
3977	}
3978	offset -= frag_size;
3979	}
3980	out:
3981	return offset + len <= size ? addr + offset : NULL;
3982	}
3983
3984	BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset,
3985	void *, buf, u32, len)
3986	{
3987	void *ptr;
3988
3989	ptr = bpf_xdp_pointer(xdp, offset, len);
3990	if (IS_ERR(ptr))
3991	return PTR_ERR(ptr);
3992
3993	if (!ptr)
3994	bpf_xdp_copy_buf(xdp, off: offset, buf, len, flush: false);
3995	else
3996	memcpy(buf, ptr, len);
3997
3998	return `0`;
3999	}
4000
4001	static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
4002	.func = bpf_xdp_load_bytes,
4003	.gpl_only = false,
4004	.ret_type = RET_INTEGER,
4005	.arg1_type = ARG_PTR_TO_CTX,
4006	.arg2_type = ARG_ANYTHING,
4007	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
4008	.arg4_type = ARG_CONST_SIZE,
4009	};
4010
4011	int __bpf_xdp_load_bytes(struct xdp_buff xdp, u32 offset, void* *buf, u32 len)
4012	{
4013	return ____bpf_xdp_load_bytes(xdp, offset, buf, len);
4014	}
4015
4016	BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
4017	void *, buf, u32, len)
4018	{
4019	void *ptr;
4020
4021	ptr = bpf_xdp_pointer(xdp, offset, len);
4022	if (IS_ERR(ptr))
4023	return PTR_ERR(ptr);
4024
4025	if (!ptr)
4026	bpf_xdp_copy_buf(xdp, off: offset, buf, len, flush: true);
4027	else
4028	memcpy(ptr, buf, len);
4029
4030	return `0`;
4031	}
4032
4033	static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
4034	.func = bpf_xdp_store_bytes,
4035	.gpl_only = false,
4036	.ret_type = RET_INTEGER,
4037	.arg1_type = ARG_PTR_TO_CTX,
4038	.arg2_type = ARG_ANYTHING,
4039	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
4040	.arg4_type = ARG_CONST_SIZE,
4041	};
4042
4043	int __bpf_xdp_store_bytes(struct xdp_buff xdp, u32 offset, void* *buf, u32 len)
4044	{
4045	return ____bpf_xdp_store_bytes(xdp, offset, buf, len);
4046	}
4047
4048	static int bpf_xdp_frags_increase_tail(struct xdp_buff xdp, int* offset)
4049	{
4050	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
4051	skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - `1`];
4052	struct xdp_rxq_info *rxq = xdp->rxq;
4053	unsigned int tailroom;
4054
4055	if (!rxq->frag_size \|\| rxq->frag_size > xdp->frame_sz)
4056	return -EOPNOTSUPP;
4057
4058	tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);
4059	if (unlikely(offset > tailroom))
4060	return -EINVAL;
4061
4062	memset(skb_frag_address(frag) + skb_frag_size(frag), `0`, offset);
4063	skb_frag_size_add(frag, delta: offset);
4064	sinfo->xdp_frags_size += offset;
4065
4066	return `0`;
4067	}
4068
4069	static int bpf_xdp_frags_shrink_tail(struct xdp_buff xdp, int* offset)
4070	{
4071	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
4072	int i, n_frags_free = `0`, len_free = `0`;
4073
4074	if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN))
4075	return -EINVAL;
4076
4077	for (i = sinfo->nr_frags - `1`; i >= `0` && offset > `0`; i--) {
4078	skb_frag_t *frag = &sinfo->frags[i];
4079	int shrink = min_t(int, offset, skb_frag_size(frag));
4080
4081	len_free += shrink;
4082	offset -= shrink;
4083
4084	if (skb_frag_size(frag) == shrink) {
4085	struct page *page = skb_frag_page(frag);
4086
4087	__xdp_return(page_address(page), mem: &xdp->rxq->mem,
4088	napi_direct: false, NULL);
4089	n_frags_free++;
4090	} else {
4091	skb_frag_size_sub(frag, delta: shrink);
4092	break;
4093	}
4094	}
4095	sinfo->nr_frags -= n_frags_free;
4096	sinfo->xdp_frags_size -= len_free;
4097
4098	if (unlikely(!sinfo->nr_frags)) {
4099	xdp_buff_clear_frags_flag(xdp);
4100	xdp->data_end -= offset;
4101	}
4102
4103	return `0`;
4104	}
4105
4106	BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff , xdp, int*, offset)
4107	{
4108	void data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz /
4109	void *data_end = xdp->data_end + offset;
4110
4111	if (unlikely(xdp_buff_has_frags(xdp))) { / non-linear xdp buff /
4112	if (offset < `0`)
4113	return bpf_xdp_frags_shrink_tail(xdp, offset: -offset);
4114
4115	return bpf_xdp_frags_increase_tail(xdp, offset);
4116	}
4117
4118	/ Notice that xdp_data_hard_end have reserved some tailroom /
4119	if (unlikely(data_end > data_hard_end))
4120	return -EINVAL;
4121
4122	if (unlikely(data_end < xdp->data + ETH_HLEN))
4123	return -EINVAL;
4124
4125	/ Clear memory area on grow, can contain uninit kernel memory /
4126	if (offset > `0`)
4127	memset(xdp->data_end, `0`, offset);
4128
4129	xdp->data_end = data_end;
4130
4131	return `0`;
4132	}
4133
4134	static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = {
4135	.func = bpf_xdp_adjust_tail,
4136	.gpl_only = false,
4137	.ret_type = RET_INTEGER,
4138	.arg1_type = ARG_PTR_TO_CTX,
4139	.arg2_type = ARG_ANYTHING,
4140	};
4141
4142	BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff , xdp, int*, offset)
4143	{
4144	void xdp_frame_end = xdp->data_hard_start + sizeof(struct* xdp_frame);
4145	void *meta = xdp->data_meta + offset;
4146	unsigned long metalen = xdp->data - meta;
4147
4148	if (xdp_data_meta_unsupported(xdp))
4149	return -ENOTSUPP;
4150	if (unlikely(meta < xdp_frame_end \|\|
4151	meta > xdp->data))
4152	return -EINVAL;
4153	if (unlikely(xdp_metalen_invalid(metalen)))
4154	return -EACCES;
4155
4156	xdp->data_meta = meta;
4157
4158	return `0`;
4159	}
4160
4161	static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
4162	.func = bpf_xdp_adjust_meta,
4163	.gpl_only = false,
4164	.ret_type = RET_INTEGER,
4165	.arg1_type = ARG_PTR_TO_CTX,
4166	.arg2_type = ARG_ANYTHING,
4167	};
4168
4169	/**
4170	* DOC: xdp redirect
4171	*
4172	* XDP_REDIRECT works by a three-step process, implemented in the functions
4173	* below:
4174	*
4175	* 1. The bpf_redirect() and bpf_redirect_map() helpers will lookup the target
4176	* of the redirect and store it (along with some other metadata) in a per-CPU
4177	* struct bpf_redirect_info.
4178	*
4179	* 2. When the program returns the XDP_REDIRECT return code, the driver will
4180	* call xdp_do_redirect() which will use the information in struct
4181	* bpf_redirect_info to actually enqueue the frame into a map type-specific
4182	* bulk queue structure.
4183	*
4184	* 3. Before exiting its NAPI poll loop, the driver will call
4185	* xdp_do_flush(), which will flush all the different bulk queues,
4186	* thus completing the redirect. Note that xdp_do_flush() must be
4187	* called before napi_complete_done() in the driver, as the
4188	* XDP_REDIRECT logic relies on being inside a single NAPI instance
4189	* through to the xdp_do_flush() call for RCU protection of all
4190	* in-kernel data structures.
4191	*/
4192	/*
4193	* Pointers to the map entries will be kept around for this whole sequence of
4194	* steps, protected by RCU. However, there is no top-level rcu_read_lock() in
4195	* the core code; instead, the RCU protection relies on everything happening
4196	* inside a single NAPI poll sequence, which means it's between a pair of calls
4197	* to local_bh_disable()/local_bh_enable().
4198	*
4199	* The map entries are marked as __rcu and the map code makes sure to
4200	* dereference those pointers with rcu_dereference_check() in a way that works
4201	* for both sections that to hold an rcu_read_lock() and sections that are
4202	* called from NAPI without a separate rcu_read_lock(). The code below does not
4203	* use RCU annotations, but relies on those in the map code.
4204	*/
4205	void xdp_do_flush(void)
4206	{
4207	__dev_flush();
4208	__cpu_map_flush();
4209	__xsk_map_flush();
4210	}
4211	EXPORT_SYMBOL_GPL(xdp_do_flush);
4212
4213	#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
4214	void xdp_do_check_flushed(struct napi_struct *napi)
4215	{
4216	bool ret;
4217
4218	ret = dev_check_flush();
4219	ret \|= cpu_map_check_flush();
4220	ret \|= xsk_map_check_flush();
4221
4222	WARN_ONCE(ret, "Missing xdp_do_flush() invocation after NAPI by %ps\n",
4223	napi->poll);
4224	}
4225	#endif
4226
4227	void bpf_clear_redirect_map(struct bpf_map *map)
4228	{
4229	struct bpf_redirect_info *ri;
4230	int cpu;
4231
4232	for_each_possible_cpu(cpu) {
4233	ri = per_cpu_ptr(&bpf_redirect_info, cpu);
4234	/ Avoid polluting remote cacheline due to writes if*
4235	* not needed. Once we pass this test, we need the
4236	* cmpxchg() to make sure it hasn't been changed in
4237	* the meantime by remote CPU.
4238	*/
4239	if (unlikely(READ_ONCE(ri->map) == map))
4240	cmpxchg(&ri->map, map, NULL);
4241	}
4242	}
4243
4244	DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
4245	EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);
4246
4247	u32 xdp_master_redirect(struct xdp_buff *xdp)
4248	{
4249	struct net_device master, slave;
4250	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4251
4252	master = netdev_master_upper_dev_get_rcu(dev: xdp->rxq->dev);
4253	slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
4254	if (slave && slave != xdp->rxq->dev) {
4255	/ The target device is different from the receiving device, so*
4256	* redirect it to the new device.
4257	* Using XDP_REDIRECT gets the correct behaviour from XDP enabled
4258	* drivers to unmap the packet from their rx ring.
4259	*/
4260	ri->tgt_index = slave->ifindex;
4261	ri->map_id = INT_MAX;
4262	ri->map_type = BPF_MAP_TYPE_UNSPEC;
4263	return XDP_REDIRECT;
4264	}
4265	return XDP_TX;
4266	}
4267	EXPORT_SYMBOL_GPL(xdp_master_redirect);
4268
4269	static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri,
4270	struct net_device *dev,
4271	struct xdp_buff *xdp,
4272	struct bpf_prog *xdp_prog)
4273	{
4274	enum bpf_map_type map_type = ri->map_type;
4275	void *fwd = ri->tgt_value;
4276	u32 map_id = ri->map_id;
4277	int err;
4278
4279	ri->map_id = `0`; / Valid map id idr range: [1,INT_MAX[ /
4280	ri->map_type = BPF_MAP_TYPE_UNSPEC;
4281
4282	err = __xsk_map_redirect(xs: fwd, xdp);
4283	if (unlikely(err))
4284	goto err;
4285
4286	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
4287	return `0`;
4288	err:
4289	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
4290	return err;
4291	}
4292
4293	static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
4294	struct net_device *dev,
4295	struct xdp_frame *xdpf,
4296	struct bpf_prog *xdp_prog)
4297	{
4298	enum bpf_map_type map_type = ri->map_type;
4299	void *fwd = ri->tgt_value;
4300	u32 map_id = ri->map_id;
4301	struct bpf_map *map;
4302	int err;
4303
4304	ri->map_id = `0`; / Valid map id idr range: [1,INT_MAX[ /
4305	ri->map_type = BPF_MAP_TYPE_UNSPEC;
4306
4307	if (unlikely(!xdpf)) {
4308	err = -EOVERFLOW;
4309	goto err;
4310	}
4311
4312	switch (map_type) {
4313	case BPF_MAP_TYPE_DEVMAP:
4314	fallthrough;
4315	case BPF_MAP_TYPE_DEVMAP_HASH:
4316	map = READ_ONCE(ri->map);
4317	if (unlikely(map)) {
4318	WRITE_ONCE(ri->map, NULL);
4319	err = dev_map_enqueue_multi(xdpf, dev_rx: dev, map,
4320	exclude_ingress: ri->flags & BPF_F_EXCLUDE_INGRESS);
4321	} else {
4322	err = dev_map_enqueue(dst: fwd, xdpf, dev_rx: dev);
4323	}
4324	break;
4325	case BPF_MAP_TYPE_CPUMAP:
4326	err = cpu_map_enqueue(rcpu: fwd, xdpf, dev_rx: dev);
4327	break;
4328	case BPF_MAP_TYPE_UNSPEC:
4329	if (map_id == INT_MAX) {
4330	fwd = dev_get_by_index_rcu(net: dev_net(dev), ifindex: ri->tgt_index);
4331	if (unlikely(!fwd)) {
4332	err = -EINVAL;
4333	break;
4334	}
4335	err = dev_xdp_enqueue(dev: fwd, xdpf, dev_rx: dev);
4336	break;
4337	}
4338	fallthrough;
4339	default:
4340	err = -EBADRQC;
4341	}
4342
4343	if (unlikely(err))
4344	goto err;
4345
4346	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
4347	return `0`;
4348	err:
4349	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
4350	return err;
4351	}
4352
4353	int xdp_do_redirect(struct net_device dev, struct* xdp_buff *xdp,
4354	struct bpf_prog *xdp_prog)
4355	{
4356	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4357	enum bpf_map_type map_type = ri->map_type;
4358
4359	if (map_type == BPF_MAP_TYPE_XSKMAP)
4360	return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
4361
4362	return __xdp_do_redirect_frame(ri, dev, xdpf: xdp_convert_buff_to_frame(xdp),
4363	xdp_prog);
4364	}
4365	EXPORT_SYMBOL_GPL(xdp_do_redirect);
4366
4367	int xdp_do_redirect_frame(struct net_device dev, struct* xdp_buff *xdp,
4368	struct xdp_frame xdpf, struct* bpf_prog *xdp_prog)
4369	{
4370	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4371	enum bpf_map_type map_type = ri->map_type;
4372
4373	if (map_type == BPF_MAP_TYPE_XSKMAP)
4374	return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
4375
4376	return __xdp_do_redirect_frame(ri, dev, xdpf, xdp_prog);
4377	}
4378	EXPORT_SYMBOL_GPL(xdp_do_redirect_frame);
4379
4380	static int xdp_do_generic_redirect_map(struct net_device *dev,
4381	struct sk_buff *skb,
4382	struct xdp_buff *xdp,
4383	struct bpf_prog *xdp_prog,
4384	void *fwd,
4385	enum bpf_map_type map_type, u32 map_id)
4386	{
4387	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4388	struct bpf_map *map;
4389	int err;
4390
4391	switch (map_type) {
4392	case BPF_MAP_TYPE_DEVMAP:
4393	fallthrough;
4394	case BPF_MAP_TYPE_DEVMAP_HASH:
4395	map = READ_ONCE(ri->map);
4396	if (unlikely(map)) {
4397	WRITE_ONCE(ri->map, NULL);
4398	err = dev_map_redirect_multi(dev, skb, xdp_prog, map,
4399	exclude_ingress: ri->flags & BPF_F_EXCLUDE_INGRESS);
4400	} else {
4401	err = dev_map_generic_redirect(dst: fwd, skb, xdp_prog);
4402	}
4403	if (unlikely(err))
4404	goto err;
4405	break;
4406	case BPF_MAP_TYPE_XSKMAP:
4407	err = xsk_generic_rcv(xs: fwd, xdp);
4408	if (err)
4409	goto err;
4410	consume_skb(skb);
4411	break;
4412	case BPF_MAP_TYPE_CPUMAP:
4413	err = cpu_map_generic_redirect(rcpu: fwd, skb);
4414	if (unlikely(err))
4415	goto err;
4416	break;
4417	default:
4418	err = -EBADRQC;
4419	goto err;
4420	}
4421
4422	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
4423	return `0`;
4424	err:
4425	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
4426	return err;
4427	}
4428
4429	int xdp_do_generic_redirect(struct net_device dev, struct* sk_buff *skb,
4430	struct xdp_buff xdp, struct* bpf_prog *xdp_prog)
4431	{
4432	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4433	enum bpf_map_type map_type = ri->map_type;
4434	void *fwd = ri->tgt_value;
4435	u32 map_id = ri->map_id;
4436	int err;
4437
4438	ri->map_id = `0`; / Valid map id idr range: [1,INT_MAX[ /
4439	ri->map_type = BPF_MAP_TYPE_UNSPEC;
4440
4441	if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
4442	fwd = dev_get_by_index_rcu(net: dev_net(dev), ifindex: ri->tgt_index);
4443	if (unlikely(!fwd)) {
4444	err = -EINVAL;
4445	goto err;
4446	}
4447
4448	err = xdp_ok_fwd_dev(fwd, pktlen: skb->len);
4449	if (unlikely(err))
4450	goto err;
4451
4452	skb->dev = fwd;
4453	_trace_xdp_redirect(dev, xdp_prog, ri->tgt_index);
4454	generic_xdp_tx(skb, xdp_prog);
4455	return `0`;
4456	}
4457
4458	return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id);
4459	err:
4460	_trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
4461	return err;
4462	}
4463
4464	BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
4465	{
4466	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4467
4468	if (unlikely(flags))
4469	return XDP_ABORTED;
4470
4471	/ NB! Map type UNSPEC and map_id == INT_MAX (never generated*
4472	* by map_idr) is used for ifindex based XDP redirect.
4473	*/
4474	ri->tgt_index = ifindex;
4475	ri->map_id = INT_MAX;
4476	ri->map_type = BPF_MAP_TYPE_UNSPEC;
4477
4478	return XDP_REDIRECT;
4479	}
4480
4481	static const struct bpf_func_proto bpf_xdp_redirect_proto = {
4482	.func = bpf_xdp_redirect,
4483	.gpl_only = false,
4484	.ret_type = RET_INTEGER,
4485	.arg1_type = ARG_ANYTHING,
4486	.arg2_type = ARG_ANYTHING,
4487	};
4488
4489	BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u64, key,
4490	u64, flags)
4491	{
4492	return map->ops->map_redirect(map, key, flags);
4493	}
4494
4495	static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
4496	.func = bpf_xdp_redirect_map,
4497	.gpl_only = false,
4498	.ret_type = RET_INTEGER,
4499	.arg1_type = ARG_CONST_MAP_PTR,
4500	.arg2_type = ARG_ANYTHING,
4501	.arg3_type = ARG_ANYTHING,
4502	};
4503
4504	static unsigned long bpf_skb_copy(void dst_buff, const* void *skb,
4505	unsigned long off, unsigned long len)
4506	{
4507	void *ptr = skb_header_pointer(skb, offset: off, len, buffer: dst_buff);
4508
4509	if (unlikely(!ptr))
4510	return len;
4511	if (ptr != dst_buff)
4512	memcpy(dst_buff, ptr, len);
4513
4514	return `0`;
4515	}
4516
4517	BPF_CALL_5(bpf_skb_event_output, struct sk_buff , skb, struct* bpf_map *, map,
4518	u64, flags, void *, meta, u64, meta_size)
4519	{
4520	u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> `32`;
4521
4522	if (unlikely(flags & ~(BPF_F_CTXLEN_MASK \| BPF_F_INDEX_MASK)))
4523	return -EINVAL;
4524	if (unlikely(!skb \|\| skb_size > skb->len))
4525	return -EFAULT;
4526
4527	return bpf_event_output(map, flags, meta, meta_size, ctx: skb, ctx_size: skb_size,
4528	ctx_copy: bpf_skb_copy);
4529	}
4530
4531	static const struct bpf_func_proto bpf_skb_event_output_proto = {
4532	.func = bpf_skb_event_output,
4533	.gpl_only = true,
4534	.ret_type = RET_INTEGER,
4535	.arg1_type = ARG_PTR_TO_CTX,
4536	.arg2_type = ARG_CONST_MAP_PTR,
4537	.arg3_type = ARG_ANYTHING,
4538	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
4539	.arg5_type = ARG_CONST_SIZE_OR_ZERO,
4540	};
4541
4542	BTF_ID_LIST_SINGLE(bpf_skb_output_btf_ids, struct, sk_buff)
4543
4544	const struct bpf_func_proto bpf_skb_output_proto = {
4545	.func = bpf_skb_event_output,
4546	.gpl_only = true,
4547	.ret_type = RET_INTEGER,
4548	.arg1_type = ARG_PTR_TO_BTF_ID,
4549	.arg1_btf_id = &bpf_skb_output_btf_ids[`0`],
4550	.arg2_type = ARG_CONST_MAP_PTR,
4551	.arg3_type = ARG_ANYTHING,
4552	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
4553	.arg5_type = ARG_CONST_SIZE_OR_ZERO,
4554	};
4555
4556	static unsigned short bpf_tunnel_key_af(u64 flags)
4557	{
4558	return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
4559	}
4560
4561	BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff , skb, struct* bpf_tunnel_key *, to,
4562	u32, size, u64, flags)
4563	{
4564	const struct ip_tunnel_info *info = skb_tunnel_info(skb);
4565	u8 compat[sizeof(struct bpf_tunnel_key)];
4566	void *to_orig = to;
4567	int err;
4568
4569	if (unlikely(!info \|\| (flags & ~(BPF_F_TUNINFO_IPV6 \|
4570	BPF_F_TUNINFO_FLAGS)))) {
4571	err = -EINVAL;
4572	goto err_clear;
4573	}
4574	if (ip_tunnel_info_af(tun_info: info) != bpf_tunnel_key_af(flags)) {
4575	err = -EPROTO;
4576	goto err_clear;
4577	}
4578	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
4579	err = -EINVAL;
4580	switch (size) {
4581	case offsetof(struct bpf_tunnel_key, local_ipv6[`0`]):
4582	case offsetof(struct bpf_tunnel_key, tunnel_label):
4583	case offsetof(struct bpf_tunnel_key, tunnel_ext):
4584	goto set_compat;
4585	case offsetof(struct bpf_tunnel_key, remote_ipv6[`1`]):
4586	/ Fixup deprecated structure layouts here, so we have*
4587	* a common path later on.
4588	*/
4589	if (ip_tunnel_info_af(tun_info: info) != AF_INET)
4590	goto err_clear;
4591	set_compat:
4592	to = (struct bpf_tunnel_key *)compat;
4593	break;
4594	default:
4595	goto err_clear;
4596	}
4597	}
4598
4599	to->tunnel_id = be64_to_cpu(info->key.tun_id);
4600	to->tunnel_tos = info->key.tos;
4601	to->tunnel_ttl = info->key.ttl;
4602	if (flags & BPF_F_TUNINFO_FLAGS)
4603	to->tunnel_flags = info->key.tun_flags;
4604	else
4605	to->tunnel_ext = `0`;
4606
4607	if (flags & BPF_F_TUNINFO_IPV6) {
4608	memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
4609	sizeof(to->remote_ipv6));
4610	memcpy(to->local_ipv6, &info->key.u.ipv6.dst,
4611	sizeof(to->local_ipv6));
4612	to->tunnel_label = be32_to_cpu(info->key.label);
4613	} else {
4614	to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
4615	memset(&to->remote_ipv6[`1`], `0`, sizeof(__u32) * `3`);
4616	to->local_ipv4 = be32_to_cpu(info->key.u.ipv4.dst);
4617	memset(&to->local_ipv6[`1`], `0`, sizeof(__u32) * `3`);
4618	to->tunnel_label = `0`;
4619	}
4620
4621	if (unlikely(size != sizeof(struct bpf_tunnel_key)))
4622	memcpy(to_orig, to, size);
4623
4624	return `0`;
4625	err_clear:
4626	memset(to_orig, `0`, size);
4627	return err;
4628	}
4629
4630	static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
4631	.func = bpf_skb_get_tunnel_key,
4632	.gpl_only = false,
4633	.ret_type = RET_INTEGER,
4634	.arg1_type = ARG_PTR_TO_CTX,
4635	.arg2_type = ARG_PTR_TO_UNINIT_MEM,
4636	.arg3_type = ARG_CONST_SIZE,
4637	.arg4_type = ARG_ANYTHING,
4638	};
4639
4640	BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff , skb, u8 , to, u32, size)
4641	{
4642	const struct ip_tunnel_info *info = skb_tunnel_info(skb);
4643	int err;
4644
4645	if (unlikely(!info \|\|
4646	!(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
4647	err = -ENOENT;
4648	goto err_clear;
4649	}
4650	if (unlikely(size < info->options_len)) {
4651	err = -ENOMEM;
4652	goto err_clear;
4653	}
4654
4655	ip_tunnel_info_opts_get(to, info);
4656	if (size > info->options_len)
4657	memset(to + info->options_len, `0`, size - info->options_len);
4658
4659	return info->options_len;
4660	err_clear:
4661	memset(to, `0`, size);
4662	return err;
4663	}
4664
4665	static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
4666	.func = bpf_skb_get_tunnel_opt,
4667	.gpl_only = false,
4668	.ret_type = RET_INTEGER,
4669	.arg1_type = ARG_PTR_TO_CTX,
4670	.arg2_type = ARG_PTR_TO_UNINIT_MEM,
4671	.arg3_type = ARG_CONST_SIZE,
4672	};
4673
4674	static struct metadata_dst __percpu *md_dst;
4675
4676	BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
4677	const struct bpf_tunnel_key *, from, u32, size, u64, flags)
4678	{
4679	struct metadata_dst *md = this_cpu_ptr(md_dst);
4680	u8 compat[sizeof(struct bpf_tunnel_key)];
4681	struct ip_tunnel_info *info;
4682
4683	if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 \| BPF_F_ZERO_CSUM_TX \|
4684	BPF_F_DONT_FRAGMENT \| BPF_F_SEQ_NUMBER \|
4685	BPF_F_NO_TUNNEL_KEY)))
4686	return -EINVAL;
4687	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
4688	switch (size) {
4689	case offsetof(struct bpf_tunnel_key, local_ipv6[`0`]):
4690	case offsetof(struct bpf_tunnel_key, tunnel_label):
4691	case offsetof(struct bpf_tunnel_key, tunnel_ext):
4692	case offsetof(struct bpf_tunnel_key, remote_ipv6[`1`]):
4693	/ Fixup deprecated structure layouts here, so we have*
4694	* a common path later on.
4695	*/
4696	memcpy(compat, from, size);
4697	memset(compat + size, `0`, sizeof(compat) - size);
4698	from = (const struct bpf_tunnel_key *) compat;
4699	break;
4700	default:
4701	return -EINVAL;
4702	}
4703	}
4704	if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) \|\|
4705	from->tunnel_ext))
4706	return -EINVAL;
4707
4708	skb_dst_drop(skb);
4709	dst_hold(dst: (struct dst_entry *) md);
4710	skb_dst_set(skb, dst: (struct dst_entry *) md);
4711
4712	info = &md->u.tun_info;
4713	memset(info, `0`, sizeof(*info));
4714	info->mode = IP_TUNNEL_INFO_TX;
4715
4716	info->key.tun_flags = TUNNEL_KEY \| TUNNEL_CSUM \| TUNNEL_NOCACHE;
4717	if (flags & BPF_F_DONT_FRAGMENT)
4718	info->key.tun_flags \|= TUNNEL_DONT_FRAGMENT;
4719	if (flags & BPF_F_ZERO_CSUM_TX)
4720	info->key.tun_flags &= ~TUNNEL_CSUM;
4721	if (flags & BPF_F_SEQ_NUMBER)
4722	info->key.tun_flags \|= TUNNEL_SEQ;
4723	if (flags & BPF_F_NO_TUNNEL_KEY)
4724	info->key.tun_flags &= ~TUNNEL_KEY;
4725
4726	info->key.tun_id = cpu_to_be64(from->tunnel_id);
4727	info->key.tos = from->tunnel_tos;
4728	info->key.ttl = from->tunnel_ttl;
4729
4730	if (flags & BPF_F_TUNINFO_IPV6) {
4731	info->mode \|= IP_TUNNEL_INFO_IPV6;
4732	memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
4733	sizeof(from->remote_ipv6));
4734	memcpy(&info->key.u.ipv6.src, from->local_ipv6,
4735	sizeof(from->local_ipv6));
4736	info->key.label = cpu_to_be32(from->tunnel_label) &
4737	IPV6_FLOWLABEL_MASK;
4738	} else {
4739	info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
4740	info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4);
4741	info->key.flow_flags = FLOWI_FLAG_ANYSRC;
4742	}
4743
4744	return `0`;
4745	}
4746
4747	static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
4748	.func = bpf_skb_set_tunnel_key,
4749	.gpl_only = false,
4750	.ret_type = RET_INTEGER,
4751	.arg1_type = ARG_PTR_TO_CTX,
4752	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
4753	.arg3_type = ARG_CONST_SIZE,
4754	.arg4_type = ARG_ANYTHING,
4755	};
4756
4757	BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
4758	const u8 *, from, u32, size)
4759	{
4760	struct ip_tunnel_info *info = skb_tunnel_info(skb);
4761	const struct metadata_dst *md = this_cpu_ptr(md_dst);
4762
4763	if (unlikely(info != &md->u.tun_info \|\| (size & (sizeof(u32) - `1`))))
4764	return -EINVAL;
4765	if (unlikely(size > IP_TUNNEL_OPTS_MAX))
4766	return -ENOMEM;
4767
4768	ip_tunnel_info_opts_set(info, from, len: size, TUNNEL_OPTIONS_PRESENT);
4769
4770	return `0`;
4771	}
4772
4773	static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
4774	.func = bpf_skb_set_tunnel_opt,
4775	.gpl_only = false,
4776	.ret_type = RET_INTEGER,
4777	.arg1_type = ARG_PTR_TO_CTX,
4778	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
4779	.arg3_type = ARG_CONST_SIZE,
4780	};
4781
4782	static const struct bpf_func_proto *
4783	bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
4784	{
4785	if (!md_dst) {
4786	struct metadata_dst __percpu *tmp;
4787
4788	tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
4789	type: METADATA_IP_TUNNEL,
4790	GFP_KERNEL);
4791	if (!tmp)
4792	return NULL;
4793	if (cmpxchg(&md_dst, NULL, tmp))
4794	metadata_dst_free_percpu(md_dst: tmp);
4795	}
4796
4797	switch (which) {
4798	case BPF_FUNC_skb_set_tunnel_key:
4799	return &bpf_skb_set_tunnel_key_proto;
4800	case BPF_FUNC_skb_set_tunnel_opt:
4801	return &bpf_skb_set_tunnel_opt_proto;
4802	default:
4803	return NULL;
4804	}
4805	}
4806
4807	BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff , skb, struct* bpf_map *, map,
4808	u32, idx)
4809	{
4810	struct bpf_array array = container_of(map, struct* bpf_array, map);
4811	struct cgroup *cgrp;
4812	struct sock *sk;
4813
4814	sk = skb_to_full_sk(skb);
4815	if (!sk \|\| !sk_fullsock(sk))
4816	return -ENOENT;
4817	if (unlikely(idx >= array->map.max_entries))
4818	return -E2BIG;
4819
4820	cgrp = READ_ONCE(array->ptrs[idx]);
4821	if (unlikely(!cgrp))
4822	return -EAGAIN;
4823
4824	return sk_under_cgroup_hierarchy(sk, ancestor: cgrp);
4825	}
4826
4827	static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
4828	.func = bpf_skb_under_cgroup,
4829	.gpl_only = false,
4830	.ret_type = RET_INTEGER,
4831	.arg1_type = ARG_PTR_TO_CTX,
4832	.arg2_type = ARG_CONST_MAP_PTR,
4833	.arg3_type = ARG_ANYTHING,
4834	};
4835
4836	#ifdef CONFIG_SOCK_CGROUP_DATA
4837	static inline u64 __bpf_sk_cgroup_id(struct sock *sk)
4838	{
4839	struct cgroup *cgrp;
4840
4841	sk = sk_to_full_sk(sk);
4842	if (!sk \|\| !sk_fullsock(sk))
4843	return `0`;
4844
4845	cgrp = sock_cgroup_ptr(skcd: &sk->sk_cgrp_data);
4846	return cgroup_id(cgrp);
4847	}
4848
4849	BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
4850	{
4851	return __bpf_sk_cgroup_id(sk: skb->sk);
4852	}
4853
4854	static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
4855	.func = bpf_skb_cgroup_id,
4856	.gpl_only = false,
4857	.ret_type = RET_INTEGER,
4858	.arg1_type = ARG_PTR_TO_CTX,
4859	};
4860
4861	static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
4862	int ancestor_level)
4863	{
4864	struct cgroup *ancestor;
4865	struct cgroup *cgrp;
4866
4867	sk = sk_to_full_sk(sk);
4868	if (!sk \|\| !sk_fullsock(sk))
4869	return `0`;
4870
4871	cgrp = sock_cgroup_ptr(skcd: &sk->sk_cgrp_data);
4872	ancestor = cgroup_ancestor(cgrp, ancestor_level);
4873	if (!ancestor)
4874	return `0`;
4875
4876	return cgroup_id(cgrp: ancestor);
4877	}
4878
4879	BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff , skb, int*,
4880	ancestor_level)
4881	{
4882	return __bpf_sk_ancestor_cgroup_id(sk: skb->sk, ancestor_level);
4883	}
4884
4885	static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
4886	.func = bpf_skb_ancestor_cgroup_id,
4887	.gpl_only = false,
4888	.ret_type = RET_INTEGER,
4889	.arg1_type = ARG_PTR_TO_CTX,
4890	.arg2_type = ARG_ANYTHING,
4891	};
4892
4893	BPF_CALL_1(bpf_sk_cgroup_id, struct sock *, sk)
4894	{
4895	return __bpf_sk_cgroup_id(sk);
4896	}
4897
4898	static const struct bpf_func_proto bpf_sk_cgroup_id_proto = {
4899	.func = bpf_sk_cgroup_id,
4900	.gpl_only = false,
4901	.ret_type = RET_INTEGER,
4902	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
4903	};
4904
4905	BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock , sk, int*, ancestor_level)
4906	{
4907	return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
4908	}
4909
4910	static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
4911	.func = bpf_sk_ancestor_cgroup_id,
4912	.gpl_only = false,
4913	.ret_type = RET_INTEGER,
4914	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
4915	.arg2_type = ARG_ANYTHING,
4916	};
4917	#endif
4918
4919	static unsigned long bpf_xdp_copy(void dst, const* void *ctx,
4920	unsigned long off, unsigned long len)
4921	{
4922	struct xdp_buff xdp = (struct* xdp_buff *)ctx;
4923
4924	bpf_xdp_copy_buf(xdp, off, buf: dst, len, flush: false);
4925	return `0`;
4926	}
4927
4928	BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff , xdp, struct* bpf_map *, map,
4929	u64, flags, void *, meta, u64, meta_size)
4930	{
4931	u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> `32`;
4932
4933	if (unlikely(flags & ~(BPF_F_CTXLEN_MASK \| BPF_F_INDEX_MASK)))
4934	return -EINVAL;
4935
4936	if (unlikely(!xdp \|\| xdp_size > xdp_get_buff_len(xdp)))
4937	return -EFAULT;
4938
4939	return bpf_event_output(map, flags, meta, meta_size, ctx: xdp,
4940	ctx_size: xdp_size, ctx_copy: bpf_xdp_copy);
4941	}
4942
4943	static const struct bpf_func_proto bpf_xdp_event_output_proto = {
4944	.func = bpf_xdp_event_output,
4945	.gpl_only = true,
4946	.ret_type = RET_INTEGER,
4947	.arg1_type = ARG_PTR_TO_CTX,
4948	.arg2_type = ARG_CONST_MAP_PTR,
4949	.arg3_type = ARG_ANYTHING,
4950	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
4951	.arg5_type = ARG_CONST_SIZE_OR_ZERO,
4952	};
4953
4954	BTF_ID_LIST_SINGLE(bpf_xdp_output_btf_ids, struct, xdp_buff)
4955
4956	const struct bpf_func_proto bpf_xdp_output_proto = {
4957	.func = bpf_xdp_event_output,
4958	.gpl_only = true,
4959	.ret_type = RET_INTEGER,
4960	.arg1_type = ARG_PTR_TO_BTF_ID,
4961	.arg1_btf_id = &bpf_xdp_output_btf_ids[`0`],
4962	.arg2_type = ARG_CONST_MAP_PTR,
4963	.arg3_type = ARG_ANYTHING,
4964	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
4965	.arg5_type = ARG_CONST_SIZE_OR_ZERO,
4966	};
4967
4968	BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
4969	{
4970	return skb->sk ? __sock_gen_cookie(sk: skb->sk) : `0`;
4971	}
4972
4973	static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
4974	.func = bpf_get_socket_cookie,
4975	.gpl_only = false,
4976	.ret_type = RET_INTEGER,
4977	.arg1_type = ARG_PTR_TO_CTX,
4978	};
4979
4980	BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
4981	{
4982	return __sock_gen_cookie(sk: ctx->sk);
4983	}
4984
4985	static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
4986	.func = bpf_get_socket_cookie_sock_addr,
4987	.gpl_only = false,
4988	.ret_type = RET_INTEGER,
4989	.arg1_type = ARG_PTR_TO_CTX,
4990	};
4991
4992	BPF_CALL_1(bpf_get_socket_cookie_sock, struct sock *, ctx)
4993	{
4994	return __sock_gen_cookie(sk: ctx);
4995	}
4996
4997	static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
4998	.func = bpf_get_socket_cookie_sock,
4999	.gpl_only = false,
5000	.ret_type = RET_INTEGER,
5001	.arg1_type = ARG_PTR_TO_CTX,
5002	};
5003
5004	BPF_CALL_1(bpf_get_socket_ptr_cookie, struct sock *, sk)
5005	{
5006	return sk ? sock_gen_cookie(sk) : `0`;
5007	}
5008
5009	const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto = {
5010	.func = bpf_get_socket_ptr_cookie,
5011	.gpl_only = false,
5012	.ret_type = RET_INTEGER,
5013	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON \| PTR_MAYBE_NULL,
5014	};
5015
5016	BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
5017	{
5018	return __sock_gen_cookie(sk: ctx->sk);
5019	}
5020
5021	static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
5022	.func = bpf_get_socket_cookie_sock_ops,
5023	.gpl_only = false,
5024	.ret_type = RET_INTEGER,
5025	.arg1_type = ARG_PTR_TO_CTX,
5026	};
5027
5028	static u64 __bpf_get_netns_cookie(struct sock *sk)
5029	{
5030	const struct net *net = sk ? sock_net(sk) : &init_net;
5031
5032	return net->net_cookie;
5033	}
5034
5035	BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx)
5036	{
5037	return __bpf_get_netns_cookie(sk: ctx);
5038	}
5039
5040	static const struct bpf_func_proto bpf_get_netns_cookie_sock_proto = {
5041	.func = bpf_get_netns_cookie_sock,
5042	.gpl_only = false,
5043	.ret_type = RET_INTEGER,
5044	.arg1_type = ARG_PTR_TO_CTX_OR_NULL,
5045	};
5046
5047	BPF_CALL_1(bpf_get_netns_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
5048	{
5049	return __bpf_get_netns_cookie(sk: ctx ? ctx->sk : NULL);
5050	}
5051
5052	static const struct bpf_func_proto bpf_get_netns_cookie_sock_addr_proto = {
5053	.func = bpf_get_netns_cookie_sock_addr,
5054	.gpl_only = false,
5055	.ret_type = RET_INTEGER,
5056	.arg1_type = ARG_PTR_TO_CTX_OR_NULL,
5057	};
5058
5059	BPF_CALL_1(bpf_get_netns_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
5060	{
5061	return __bpf_get_netns_cookie(sk: ctx ? ctx->sk : NULL);
5062	}
5063
5064	static const struct bpf_func_proto bpf_get_netns_cookie_sock_ops_proto = {
5065	.func = bpf_get_netns_cookie_sock_ops,
5066	.gpl_only = false,
5067	.ret_type = RET_INTEGER,
5068	.arg1_type = ARG_PTR_TO_CTX_OR_NULL,
5069	};
5070
5071	BPF_CALL_1(bpf_get_netns_cookie_sk_msg, struct sk_msg *, ctx)
5072	{
5073	return __bpf_get_netns_cookie(sk: ctx ? ctx->sk : NULL);
5074	}
5075
5076	static const struct bpf_func_proto bpf_get_netns_cookie_sk_msg_proto = {
5077	.func = bpf_get_netns_cookie_sk_msg,
5078	.gpl_only = false,
5079	.ret_type = RET_INTEGER,
5080	.arg1_type = ARG_PTR_TO_CTX_OR_NULL,
5081	};
5082
5083	BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
5084	{
5085	struct sock *sk = sk_to_full_sk(sk: skb->sk);
5086	kuid_t kuid;
5087
5088	if (!sk \|\| !sk_fullsock(sk))
5089	return overflowuid;
5090	kuid = sock_net_uid(net: sock_net(sk), sk);
5091	return from_kuid_munged(to: sock_net(sk)->user_ns, uid: kuid);
5092	}
5093
5094	static const struct bpf_func_proto bpf_get_socket_uid_proto = {
5095	.func = bpf_get_socket_uid,
5096	.gpl_only = false,
5097	.ret_type = RET_INTEGER,
5098	.arg1_type = ARG_PTR_TO_CTX,
5099	};
5100
5101	static int sol_socket_sockopt(struct sock sk, int* optname,
5102	char optval, int* *optlen,
5103	bool getopt)
5104	{
5105	switch (optname) {
5106	case SO_REUSEADDR:
5107	case SO_SNDBUF:
5108	case SO_RCVBUF:
5109	case SO_KEEPALIVE:
5110	case SO_PRIORITY:
5111	case SO_REUSEPORT:
5112	case SO_RCVLOWAT:
5113	case SO_MARK:
5114	case SO_MAX_PACING_RATE:
5115	case SO_BINDTOIFINDEX:
5116	case SO_TXREHASH:
5117	if (optlen != sizeof(int*))
5118	return -EINVAL;
5119	break;
5120	case SO_BINDTODEVICE:
5121	break;
5122	default:
5123	return -EINVAL;
5124	}
5125
5126	if (getopt) {
5127	if (optname == SO_BINDTODEVICE)
5128	return -EINVAL;
5129	return sk_getsockopt(sk, SOL_SOCKET, optname,
5130	optval: KERNEL_SOCKPTR(p: optval),
5131	optlen: KERNEL_SOCKPTR(p: optlen));
5132	}
5133
5134	return sk_setsockopt(sk, SOL_SOCKET, optname,
5135	optval: KERNEL_SOCKPTR(p: optval), optlen: *optlen);
5136	}
5137
5138	static int bpf_sol_tcp_setsockopt(struct sock sk, int* optname,
5139	char optval, int* optlen)
5140	{
5141	struct tcp_sock *tp = tcp_sk(sk);
5142	unsigned long timeout;
5143	int val;
5144
5145	if (optlen != sizeof(int))
5146	return -EINVAL;
5147
5148	val = (int* *)optval;
5149
5150	/ Only some options are supported /
5151	switch (optname) {
5152	case TCP_BPF_IW:
5153	if (val <= `0` \|\| tp->data_segs_out > tp->syn_data)
5154	return -EINVAL;
5155	tcp_snd_cwnd_set(tp, val);
5156	break;
5157	case TCP_BPF_SNDCWND_CLAMP:
5158	if (val <= `0`)
5159	return -EINVAL;
5160	tp->snd_cwnd_clamp = val;
5161	tp->snd_ssthresh = val;
5162	break;
5163	case TCP_BPF_DELACK_MAX:
5164	timeout = usecs_to_jiffies(u: val);
5165	if (timeout > TCP_DELACK_MAX \|\|
5166	timeout < TCP_TIMEOUT_MIN)
5167	return -EINVAL;
5168	inet_csk(sk)->icsk_delack_max = timeout;
5169	break;
5170	case TCP_BPF_RTO_MIN:
5171	timeout = usecs_to_jiffies(u: val);
5172	if (timeout > TCP_RTO_MIN \|\|
5173	timeout < TCP_TIMEOUT_MIN)
5174	return -EINVAL;
5175	inet_csk(sk)->icsk_rto_min = timeout;
5176	break;
5177	default:
5178	return -EINVAL;
5179	}
5180
5181	return `0`;
5182	}
5183
5184	static int sol_tcp_sockopt_congestion(struct sock sk, char* *optval,
5185	int *optlen, bool getopt)
5186	{
5187	struct tcp_sock *tp;
5188	int ret;
5189
5190	if (*optlen < `2`)
5191	return -EINVAL;
5192
5193	if (getopt) {
5194	if (!inet_csk(sk)->icsk_ca_ops)
5195	return -EINVAL;
5196	/ BPF expects NULL-terminated tcp-cc string /
5197	optval[--(*optlen)] = `'\0'`;
5198	return do_tcp_getsockopt(sk, SOL_TCP, TCP_CONGESTION,
5199	optval: KERNEL_SOCKPTR(p: optval),
5200	optlen: KERNEL_SOCKPTR(p: optlen));
5201	}
5202
5203	/ "cdg" is the only cc that alloc a ptr*
5204	* in inet_csk_ca area. The bpf-tcp-cc may
5205	* overwrite this ptr after switching to cdg.
5206	*/
5207	if (optlen >= sizeof("cdg") - `1` && !strncmp("cdg", optval, optlen))
5208	return -ENOTSUPP;
5209
5210	/ It stops this looping*
5211	*
5212	* .init => bpf_setsockopt(tcp_cc) => .init =>
5213	* bpf_setsockopt(tcp_cc)" => .init => ....
5214	*
5215	* The second bpf_setsockopt(tcp_cc) is not allowed
5216	* in order to break the loop when both .init
5217	* are the same bpf prog.
5218	*
5219	* This applies even the second bpf_setsockopt(tcp_cc)
5220	* does not cause a loop. This limits only the first
5221	* '.init' can call bpf_setsockopt(TCP_CONGESTION) to
5222	* pick a fallback cc (eg. peer does not support ECN)
5223	* and the second '.init' cannot fallback to
5224	* another.
5225	*/
5226	tp = tcp_sk(sk);
5227	if (tp->bpf_chg_cc_inprogress)
5228	return -EBUSY;
5229
5230	tp->bpf_chg_cc_inprogress = `1`;
5231	ret = do_tcp_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
5232	optval: KERNEL_SOCKPTR(p: optval), optlen: *optlen);
5233	tp->bpf_chg_cc_inprogress = `0`;
5234	return ret;
5235	}
5236
5237	static int sol_tcp_sockopt(struct sock sk, int* optname,
5238	char optval, int* *optlen,
5239	bool getopt)
5240	{
5241	if (sk->sk_protocol != IPPROTO_TCP)
5242	return -EINVAL;
5243
5244	switch (optname) {
5245	case TCP_NODELAY:
5246	case TCP_MAXSEG:
5247	case TCP_KEEPIDLE:
5248	case TCP_KEEPINTVL:
5249	case TCP_KEEPCNT:
5250	case TCP_SYNCNT:
5251	case TCP_WINDOW_CLAMP:
5252	case TCP_THIN_LINEAR_TIMEOUTS:
5253	case TCP_USER_TIMEOUT:
5254	case TCP_NOTSENT_LOWAT:
5255	case TCP_SAVE_SYN:
5256	if (optlen != sizeof(int*))
5257	return -EINVAL;
5258	break;
5259	case TCP_CONGESTION:
5260	return sol_tcp_sockopt_congestion(sk, optval, optlen, getopt);
5261	case TCP_SAVED_SYN:
5262	if (*optlen < `1`)
5263	return -EINVAL;
5264	break;
5265	default:
5266	if (getopt)
5267	return -EINVAL;
5268	return bpf_sol_tcp_setsockopt(sk, optname, optval, optlen: *optlen);
5269	}
5270
5271	if (getopt) {
5272	if (optname == TCP_SAVED_SYN) {
5273	struct tcp_sock *tp = tcp_sk(sk);
5274
5275	if (!tp->saved_syn \|\|
5276	*optlen > tcp_saved_syn_len(saved_syn: tp->saved_syn))
5277	return -EINVAL;
5278	memcpy(optval, tp->saved_syn->data, *optlen);
5279	/ It cannot free tp->saved_syn here because it*
5280	* does not know if the user space still needs it.
5281	*/
5282	return `0`;
5283	}
5284
5285	return do_tcp_getsockopt(sk, SOL_TCP, optname,
5286	optval: KERNEL_SOCKPTR(p: optval),
5287	optlen: KERNEL_SOCKPTR(p: optlen));
5288	}
5289
5290	return do_tcp_setsockopt(sk, SOL_TCP, optname,
5291	optval: KERNEL_SOCKPTR(p: optval), optlen: *optlen);
5292	}
5293
5294	static int sol_ip_sockopt(struct sock sk, int* optname,
5295	char optval, int* *optlen,
5296	bool getopt)
5297	{
5298	if (sk->sk_family != AF_INET)
5299	return -EINVAL;
5300
5301	switch (optname) {
5302	case IP_TOS:
5303	if (optlen != sizeof(int*))
5304	return -EINVAL;
5305	break;
5306	default:
5307	return -EINVAL;
5308	}
5309
5310	if (getopt)
5311	return do_ip_getsockopt(sk, SOL_IP, optname,
5312	optval: KERNEL_SOCKPTR(p: optval),
5313	optlen: KERNEL_SOCKPTR(p: optlen));
5314
5315	return do_ip_setsockopt(sk, SOL_IP, optname,
5316	optval: KERNEL_SOCKPTR(p: optval), optlen: *optlen);
5317	}
5318
5319	static int sol_ipv6_sockopt(struct sock sk, int* optname,
5320	char optval, int* *optlen,
5321	bool getopt)
5322	{
5323	if (sk->sk_family != AF_INET6)
5324	return -EINVAL;
5325
5326	switch (optname) {
5327	case IPV6_TCLASS:
5328	case IPV6_AUTOFLOWLABEL:
5329	if (optlen != sizeof(int*))
5330	return -EINVAL;
5331	break;
5332	default:
5333	return -EINVAL;
5334	}
5335
5336	if (getopt)
5337	return ipv6_bpf_stub->ipv6_getsockopt(sk, SOL_IPV6, optname,
5338	KERNEL_SOCKPTR(p: optval),
5339	KERNEL_SOCKPTR(p: optlen));
5340
5341	return ipv6_bpf_stub->ipv6_setsockopt(sk, SOL_IPV6, optname,
5342	KERNEL_SOCKPTR(p: optval), *optlen);
5343	}
5344
5345	static int __bpf_setsockopt(struct sock sk, int* level, int optname,
5346	char optval, int* optlen)
5347	{
5348	if (!sk_fullsock(sk))
5349	return -EINVAL;
5350
5351	if (level == SOL_SOCKET)
5352	return sol_socket_sockopt(sk, optname, optval, optlen: &optlen, getopt: false);
5353	else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP)
5354	return sol_ip_sockopt(sk, optname, optval, optlen: &optlen, getopt: false);
5355	else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6)
5356	return sol_ipv6_sockopt(sk, optname, optval, optlen: &optlen, getopt: false);
5357	else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP)
5358	return sol_tcp_sockopt(sk, optname, optval, optlen: &optlen, getopt: false);
5359
5360	return -EINVAL;
5361	}
5362
5363	static int _bpf_setsockopt(struct sock sk, int* level, int optname,
5364	char optval, int* optlen)
5365	{
5366	if (sk_fullsock(sk))
5367	sock_owned_by_me(sk);
5368	return __bpf_setsockopt(sk, level, optname, optval, optlen);
5369	}
5370
5371	static int __bpf_getsockopt(struct sock sk, int* level, int optname,
5372	char optval, int* optlen)
5373	{
5374	int err, saved_optlen = optlen;
5375
5376	if (!sk_fullsock(sk)) {
5377	err = -EINVAL;
5378	goto done;
5379	}
5380
5381	if (level == SOL_SOCKET)
5382	err = sol_socket_sockopt(sk, optname, optval, optlen: &optlen, getopt: true);
5383	else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP)
5384	err = sol_tcp_sockopt(sk, optname, optval, optlen: &optlen, getopt: true);
5385	else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP)
5386	err = sol_ip_sockopt(sk, optname, optval, optlen: &optlen, getopt: true);
5387	else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6)
5388	err = sol_ipv6_sockopt(sk, optname, optval, optlen: &optlen, getopt: true);
5389	else
5390	err = -EINVAL;
5391
5392	done:
5393	if (err)
5394	optlen = `0`;
5395	if (optlen < saved_optlen)
5396	memset(optval + optlen, `0`, saved_optlen - optlen);
5397	return err;
5398	}
5399
5400	static int _bpf_getsockopt(struct sock sk, int* level, int optname,
5401	char optval, int* optlen)
5402	{
5403	if (sk_fullsock(sk))
5404	sock_owned_by_me(sk);
5405	return __bpf_getsockopt(sk, level, optname, optval, optlen);
5406	}
5407
5408	BPF_CALL_5(bpf_sk_setsockopt, struct sock , sk, int*, level,
5409	int, optname, char , optval, int*, optlen)
5410	{
5411	return _bpf_setsockopt(sk, level, optname, optval, optlen);
5412	}
5413
5414	const struct bpf_func_proto bpf_sk_setsockopt_proto = {
5415	.func = bpf_sk_setsockopt,
5416	.gpl_only = false,
5417	.ret_type = RET_INTEGER,
5418	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5419	.arg2_type = ARG_ANYTHING,
5420	.arg3_type = ARG_ANYTHING,
5421	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
5422	.arg5_type = ARG_CONST_SIZE,
5423	};
5424
5425	BPF_CALL_5(bpf_sk_getsockopt, struct sock , sk, int*, level,
5426	int, optname, char , optval, int*, optlen)
5427	{
5428	return _bpf_getsockopt(sk, level, optname, optval, optlen);
5429	}
5430
5431	const struct bpf_func_proto bpf_sk_getsockopt_proto = {
5432	.func = bpf_sk_getsockopt,
5433	.gpl_only = false,
5434	.ret_type = RET_INTEGER,
5435	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5436	.arg2_type = ARG_ANYTHING,
5437	.arg3_type = ARG_ANYTHING,
5438	.arg4_type = ARG_PTR_TO_UNINIT_MEM,
5439	.arg5_type = ARG_CONST_SIZE,
5440	};
5441
5442	BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock , sk, int*, level,
5443	int, optname, char , optval, int*, optlen)
5444	{
5445	return __bpf_setsockopt(sk, level, optname, optval, optlen);
5446	}
5447
5448	const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto = {
5449	.func = bpf_unlocked_sk_setsockopt,
5450	.gpl_only = false,
5451	.ret_type = RET_INTEGER,
5452	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5453	.arg2_type = ARG_ANYTHING,
5454	.arg3_type = ARG_ANYTHING,
5455	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
5456	.arg5_type = ARG_CONST_SIZE,
5457	};
5458
5459	BPF_CALL_5(bpf_unlocked_sk_getsockopt, struct sock , sk, int*, level,
5460	int, optname, char , optval, int*, optlen)
5461	{
5462	return __bpf_getsockopt(sk, level, optname, optval, optlen);
5463	}
5464
5465	const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto = {
5466	.func = bpf_unlocked_sk_getsockopt,
5467	.gpl_only = false,
5468	.ret_type = RET_INTEGER,
5469	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5470	.arg2_type = ARG_ANYTHING,
5471	.arg3_type = ARG_ANYTHING,
5472	.arg4_type = ARG_PTR_TO_UNINIT_MEM,
5473	.arg5_type = ARG_CONST_SIZE,
5474	};
5475
5476	BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
5477	int, level, int, optname, char , optval, int*, optlen)
5478	{
5479	return _bpf_setsockopt(sk: ctx->sk, level, optname, optval, optlen);
5480	}
5481
5482	static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = {
5483	.func = bpf_sock_addr_setsockopt,
5484	.gpl_only = false,
5485	.ret_type = RET_INTEGER,
5486	.arg1_type = ARG_PTR_TO_CTX,
5487	.arg2_type = ARG_ANYTHING,
5488	.arg3_type = ARG_ANYTHING,
5489	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
5490	.arg5_type = ARG_CONST_SIZE,
5491	};
5492
5493	BPF_CALL_5(bpf_sock_addr_getsockopt, struct bpf_sock_addr_kern *, ctx,
5494	int, level, int, optname, char , optval, int*, optlen)
5495	{
5496	return _bpf_getsockopt(sk: ctx->sk, level, optname, optval, optlen);
5497	}
5498
5499	static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
5500	.func = bpf_sock_addr_getsockopt,
5501	.gpl_only = false,
5502	.ret_type = RET_INTEGER,
5503	.arg1_type = ARG_PTR_TO_CTX,
5504	.arg2_type = ARG_ANYTHING,
5505	.arg3_type = ARG_ANYTHING,
5506	.arg4_type = ARG_PTR_TO_UNINIT_MEM,
5507	.arg5_type = ARG_CONST_SIZE,
5508	};
5509
5510	BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
5511	int, level, int, optname, char , optval, int*, optlen)
5512	{
5513	return _bpf_setsockopt(sk: bpf_sock->sk, level, optname, optval, optlen);
5514	}
5515
5516	static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
5517	.func = bpf_sock_ops_setsockopt,
5518	.gpl_only = false,
5519	.ret_type = RET_INTEGER,
5520	.arg1_type = ARG_PTR_TO_CTX,
5521	.arg2_type = ARG_ANYTHING,
5522	.arg3_type = ARG_ANYTHING,
5523	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
5524	.arg5_type = ARG_CONST_SIZE,
5525	};
5526
5527	static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock,
5528	int optname, const u8 **start)
5529	{
5530	struct sk_buff *syn_skb = bpf_sock->syn_skb;
5531	const u8 *hdr_start;
5532	int ret;
5533
5534	if (syn_skb) {
5535	/ sk is a request_sock here /
5536
5537	if (optname == TCP_BPF_SYN) {
5538	hdr_start = syn_skb->data;
5539	ret = tcp_hdrlen(skb: syn_skb);
5540	} else if (optname == TCP_BPF_SYN_IP) {
5541	hdr_start = skb_network_header(skb: syn_skb);
5542	ret = skb_network_header_len(skb: syn_skb) +
5543	tcp_hdrlen(skb: syn_skb);
5544	} else {
5545	/ optname == TCP_BPF_SYN_MAC /
5546	hdr_start = skb_mac_header(skb: syn_skb);
5547	ret = skb_mac_header_len(skb: syn_skb) +
5548	skb_network_header_len(skb: syn_skb) +
5549	tcp_hdrlen(skb: syn_skb);
5550	}
5551	} else {
5552	struct sock *sk = bpf_sock->sk;
5553	struct saved_syn *saved_syn;
5554
5555	if (sk->sk_state == TCP_NEW_SYN_RECV)
5556	/ synack retransmit. bpf_sock->syn_skb will*
5557	* not be available. It has to resort to
5558	* saved_syn (if it is saved).
5559	*/
5560	saved_syn = inet_reqsk(sk)->saved_syn;
5561	else
5562	saved_syn = tcp_sk(sk)->saved_syn;
5563
5564	if (!saved_syn)
5565	return -ENOENT;
5566
5567	if (optname == TCP_BPF_SYN) {
5568	hdr_start = saved_syn->data +
5569	saved_syn->mac_hdrlen +
5570	saved_syn->network_hdrlen;
5571	ret = saved_syn->tcp_hdrlen;
5572	} else if (optname == TCP_BPF_SYN_IP) {
5573	hdr_start = saved_syn->data +
5574	saved_syn->mac_hdrlen;
5575	ret = saved_syn->network_hdrlen +
5576	saved_syn->tcp_hdrlen;
5577	} else {
5578	/ optname == TCP_BPF_SYN_MAC /
5579
5580	/ TCP_SAVE_SYN may not have saved the mac hdr /
5581	if (!saved_syn->mac_hdrlen)
5582	return -ENOENT;
5583
5584	hdr_start = saved_syn->data;
5585	ret = saved_syn->mac_hdrlen +
5586	saved_syn->network_hdrlen +
5587	saved_syn->tcp_hdrlen;
5588	}
5589	}
5590
5591	*start = hdr_start;
5592	return ret;
5593	}
5594
5595	BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
5596	int, level, int, optname, char , optval, int*, optlen)
5597	{
5598	if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
5599	optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) {
5600	int ret, copy_len = `0`;
5601	const u8 *start;
5602
5603	ret = bpf_sock_ops_get_syn(bpf_sock, optname, start: &start);
5604	if (ret > `0`) {
5605	copy_len = ret;
5606	if (optlen < copy_len) {
5607	copy_len = optlen;
5608	ret = -ENOSPC;
5609	}
5610
5611	memcpy(optval, start, copy_len);
5612	}
5613
5614	/ Zero out unused buffer at the end /
5615	memset(optval + copy_len, `0`, optlen - copy_len);
5616
5617	return ret;
5618	}
5619
5620	return _bpf_getsockopt(sk: bpf_sock->sk, level, optname, optval, optlen);
5621	}
5622
5623	static const struct bpf_func_proto bpf_sock_ops_getsockopt_proto = {
5624	.func = bpf_sock_ops_getsockopt,
5625	.gpl_only = false,
5626	.ret_type = RET_INTEGER,
5627	.arg1_type = ARG_PTR_TO_CTX,
5628	.arg2_type = ARG_ANYTHING,
5629	.arg3_type = ARG_ANYTHING,
5630	.arg4_type = ARG_PTR_TO_UNINIT_MEM,
5631	.arg5_type = ARG_CONST_SIZE,
5632	};
5633
5634	BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
5635	int, argval)
5636	{
5637	struct sock *sk = bpf_sock->sk;
5638	int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
5639
5640	if (!IS_ENABLED(CONFIG_INET) \|\| !sk_fullsock(sk))
5641	return -EINVAL;
5642
5643	tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
5644
5645	return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
5646	}
5647
5648	static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
5649	.func = bpf_sock_ops_cb_flags_set,
5650	.gpl_only = false,
5651	.ret_type = RET_INTEGER,
5652	.arg1_type = ARG_PTR_TO_CTX,
5653	.arg2_type = ARG_ANYTHING,
5654	};
5655
5656	const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
5657	EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
5658
5659	BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern , ctx, struct* sockaddr *, addr,
5660	int, addr_len)
5661	{
5662	#ifdef CONFIG_INET
5663	struct sock *sk = ctx->sk;
5664	u32 flags = BIND_FROM_BPF;
5665	int err;
5666
5667	err = -EINVAL;
5668	if (addr_len < offsetofend(struct sockaddr, sa_family))
5669	return err;
5670	if (addr->sa_family == AF_INET) {
5671	if (addr_len < sizeof(struct sockaddr_in))
5672	return err;
5673	if (((struct sockaddr_in *)addr)->sin_port == htons(`0`))
5674	flags \|= BIND_FORCE_ADDRESS_NO_PORT;
5675	return __inet_bind(sk, uaddr: addr, addr_len, flags);
5676	#if IS_ENABLED(CONFIG_IPV6)
5677	} else if (addr->sa_family == AF_INET6) {
5678	if (addr_len < SIN6_LEN_RFC2133)
5679	return err;
5680	if (((struct sockaddr_in6 *)addr)->sin6_port == htons(`0`))
5681	flags \|= BIND_FORCE_ADDRESS_NO_PORT;
5682	/ ipv6_bpf_stub cannot be NULL, since it's called from*
5683	* bpf_cgroup_inet6_connect hook and ipv6 is already loaded
5684	*/
5685	return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, flags);
5686	#endif /* CONFIG_IPV6 */
5687	}
5688	#endif /* CONFIG_INET */
5689
5690	return -EAFNOSUPPORT;
5691	}
5692
5693	static const struct bpf_func_proto bpf_bind_proto = {
5694	.func = bpf_bind,
5695	.gpl_only = false,
5696	.ret_type = RET_INTEGER,
5697	.arg1_type = ARG_PTR_TO_CTX,
5698	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
5699	.arg3_type = ARG_CONST_SIZE,
5700	};
5701
5702	#ifdef CONFIG_XFRM
5703
5704	#if (IS_BUILTIN(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) \|\| \
5705	(IS_MODULE(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
5706
5707	struct metadata_dst __percpu *xfrm_bpf_md_dst;
5708	EXPORT_SYMBOL_GPL(xfrm_bpf_md_dst);
5709
5710	#endif
5711
5712	BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
5713	struct bpf_xfrm_state *, to, u32, size, u64, flags)
5714	{
5715	const struct sec_path *sp = skb_sec_path(skb);
5716	const struct xfrm_state *x;
5717
5718	if (!sp \|\| unlikely(index >= sp->len \|\| flags))
5719	goto err_clear;
5720
5721	x = sp->xvec[index];
5722
5723	if (unlikely(size != sizeof(struct bpf_xfrm_state)))
5724	goto err_clear;
5725
5726	to->reqid = x->props.reqid;
5727	to->spi = x->id.spi;
5728	to->family = x->props.family;
5729	to->ext = `0`;
5730
5731	if (to->family == AF_INET6) {
5732	memcpy(to->remote_ipv6, x->props.saddr.a6,
5733	sizeof(to->remote_ipv6));
5734	} else {
5735	to->remote_ipv4 = x->props.saddr.a4;
5736	memset(&to->remote_ipv6[`1`], `0`, sizeof(__u32) * `3`);
5737	}
5738
5739	return `0`;
5740	err_clear:
5741	memset(to, `0`, size);
5742	return -EINVAL;
5743	}
5744
5745	static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
5746	.func = bpf_skb_get_xfrm_state,
5747	.gpl_only = false,
5748	.ret_type = RET_INTEGER,
5749	.arg1_type = ARG_PTR_TO_CTX,
5750	.arg2_type = ARG_ANYTHING,
5751	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
5752	.arg4_type = ARG_CONST_SIZE,
5753	.arg5_type = ARG_ANYTHING,
5754	};
5755	#endif
5756
5757	#if IS_ENABLED(CONFIG_INET) \|\| IS_ENABLED(CONFIG_IPV6)
5758	static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, u32 mtu)
5759	{
5760	params->h_vlan_TCI = `0`;
5761	params->h_vlan_proto = `0`;
5762	if (mtu)
5763	params->mtu_result = mtu; / union with tot_len /
5764
5765	return `0`;
5766	}
5767	#endif
5768
5769	#if IS_ENABLED(CONFIG_INET)
5770	static int bpf_ipv4_fib_lookup(struct net net, struct* bpf_fib_lookup *params,
5771	u32 flags, bool check_mtu)
5772	{
5773	struct fib_nh_common *nhc;
5774	struct in_device *in_dev;
5775	struct neighbour *neigh;
5776	struct net_device *dev;
5777	struct fib_result res;
5778	struct flowi4 fl4;
5779	u32 mtu = `0`;
5780	int err;
5781
5782	dev = dev_get_by_index_rcu(net, ifindex: params->ifindex);
5783	if (unlikely(!dev))
5784	return -ENODEV;
5785
5786	/ verify forwarding is enabled on this interface /
5787	in_dev = __in_dev_get_rcu(dev);
5788	if (unlikely(!in_dev \|\| !IN_DEV_FORWARD(in_dev)))
5789	return BPF_FIB_LKUP_RET_FWD_DISABLED;
5790
5791	if (flags & BPF_FIB_LOOKUP_OUTPUT) {
5792	fl4.flowi4_iif = `1`;
5793	fl4.flowi4_oif = params->ifindex;
5794	} else {
5795	fl4.flowi4_iif = params->ifindex;
5796	fl4.flowi4_oif = `0`;
5797	}
5798	fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
5799	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
5800	fl4.flowi4_flags = `0`;
5801
5802	fl4.flowi4_proto = params->l4_protocol;
5803	fl4.daddr = params->ipv4_dst;
5804	fl4.saddr = params->ipv4_src;
5805	fl4.fl4_sport = params->sport;
5806	fl4.fl4_dport = params->dport;
5807	fl4.flowi4_multipath_hash = `0`;
5808
5809	if (flags & BPF_FIB_LOOKUP_DIRECT) {
5810	u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
5811	struct fib_table *tb;
5812
5813	if (flags & BPF_FIB_LOOKUP_TBID) {
5814	tbid = params->tbid;
5815	/ zero out for vlan output /
5816	params->tbid = `0`;
5817	}
5818
5819	tb = fib_get_table(net, id: tbid);
5820	if (unlikely(!tb))
5821	return BPF_FIB_LKUP_RET_NOT_FWDED;
5822
5823	err = fib_table_lookup(tb, flp: &fl4, res: &res, FIB_LOOKUP_NOREF);
5824	} else {
5825	fl4.flowi4_mark = `0`;
5826	fl4.flowi4_secid = `0`;
5827	fl4.flowi4_tun_key.tun_id = `0`;
5828	fl4.flowi4_uid = sock_net_uid(net, NULL);
5829
5830	err = fib_lookup(net, flp: &fl4, res: &res, FIB_LOOKUP_NOREF);
5831	}
5832
5833	if (err) {
5834	/ map fib lookup errors to RTN_ type /
5835	if (err == -EINVAL)
5836	return BPF_FIB_LKUP_RET_BLACKHOLE;
5837	if (err == -EHOSTUNREACH)
5838	return BPF_FIB_LKUP_RET_UNREACHABLE;
5839	if (err == -EACCES)
5840	return BPF_FIB_LKUP_RET_PROHIBIT;
5841
5842	return BPF_FIB_LKUP_RET_NOT_FWDED;
5843	}
5844
5845	if (res.type != RTN_UNICAST)
5846	return BPF_FIB_LKUP_RET_NOT_FWDED;
5847
5848	if (fib_info_num_path(fi: res.fi) > `1`)
5849	fib_select_path(net, res: &res, fl4: &fl4, NULL);
5850
5851	if (check_mtu) {
5852	mtu = ip_mtu_from_fib_result(res: &res, daddr: params->ipv4_dst);
5853	if (params->tot_len > mtu) {
5854	params->mtu_result = mtu; / union with tot_len /
5855	return BPF_FIB_LKUP_RET_FRAG_NEEDED;
5856	}
5857	}
5858
5859	nhc = res.nhc;
5860
5861	/ do not handle lwt encaps right now /
5862	if (nhc->nhc_lwtstate)
5863	return BPF_FIB_LKUP_RET_UNSUPP_LWT;
5864
5865	dev = nhc->nhc_dev;
5866
5867	params->rt_metric = res.fi->fib_priority;
5868	params->ifindex = dev->ifindex;
5869
5870	if (flags & BPF_FIB_LOOKUP_SRC)
5871	params->ipv4_src = fib_result_prefsrc(net, res: &res);
5872
5873	/ xdp and cls_bpf programs are run in RCU-bh so*
5874	* rcu_read_lock_bh is not needed here
5875	*/
5876	if (likely(nhc->nhc_gw_family != AF_INET6)) {
5877	if (nhc->nhc_gw_family)
5878	params->ipv4_dst = nhc->nhc_gw.ipv4;
5879	} else {
5880	struct in6_addr dst = (struct* in6_addr *)params->ipv6_dst;
5881
5882	params->family = AF_INET6;
5883	*dst = nhc->nhc_gw.ipv6;
5884	}
5885
5886	if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
5887	goto set_fwd_params;
5888
5889	if (likely(nhc->nhc_gw_family != AF_INET6))
5890	neigh = __ipv4_neigh_lookup_noref(dev,
5891	key: (__force u32)params->ipv4_dst);
5892	else
5893	neigh = __ipv6_neigh_lookup_noref_stub(dev, pkey: params->ipv6_dst);
5894
5895	if (!neigh \|\| !(READ_ONCE(neigh->nud_state) & NUD_VALID))
5896	return BPF_FIB_LKUP_RET_NO_NEIGH;
5897	memcpy(params->dmac, neigh->ha, ETH_ALEN);
5898	memcpy(params->smac, dev->dev_addr, ETH_ALEN);
5899
5900	set_fwd_params:
5901	return bpf_fib_set_fwd_params(params, mtu);
5902	}
5903	#endif
5904
5905	#if IS_ENABLED(CONFIG_IPV6)
5906	static int bpf_ipv6_fib_lookup(struct net net, struct* bpf_fib_lookup *params,
5907	u32 flags, bool check_mtu)
5908	{
5909	struct in6_addr src = (struct* in6_addr *) params->ipv6_src;
5910	struct in6_addr dst = (struct* in6_addr *) params->ipv6_dst;
5911	struct fib6_result res = {};
5912	struct neighbour *neigh;
5913	struct net_device *dev;
5914	struct inet6_dev *idev;
5915	struct flowi6 fl6;
5916	int strict = `0`;
5917	int oif, err;
5918	u32 mtu = `0`;
5919
5920	/ link local addresses are never forwarded /
5921	if (rt6_need_strict(daddr: dst) \|\| rt6_need_strict(daddr: src))
5922	return BPF_FIB_LKUP_RET_NOT_FWDED;
5923
5924	dev = dev_get_by_index_rcu(net, ifindex: params->ifindex);
5925	if (unlikely(!dev))
5926	return -ENODEV;
5927
5928	idev = __in6_dev_get_safely(dev);
5929	if (unlikely(!idev \|\| !idev->cnf.forwarding))
5930	return BPF_FIB_LKUP_RET_FWD_DISABLED;
5931
5932	if (flags & BPF_FIB_LOOKUP_OUTPUT) {
5933	fl6.flowi6_iif = `1`;
5934	oif = fl6.flowi6_oif = params->ifindex;
5935	} else {
5936	oif = fl6.flowi6_iif = params->ifindex;
5937	fl6.flowi6_oif = `0`;
5938	strict = RT6_LOOKUP_F_HAS_SADDR;
5939	}
5940	fl6.flowlabel = params->flowinfo;
5941	fl6.flowi6_scope = `0`;
5942	fl6.flowi6_flags = `0`;
5943	fl6.mp_hash = `0`;
5944
5945	fl6.flowi6_proto = params->l4_protocol;
5946	fl6.daddr = *dst;
5947	fl6.saddr = *src;
5948	fl6.fl6_sport = params->sport;
5949	fl6.fl6_dport = params->dport;
5950
5951	if (flags & BPF_FIB_LOOKUP_DIRECT) {
5952	u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
5953	struct fib6_table *tb;
5954
5955	if (flags & BPF_FIB_LOOKUP_TBID) {
5956	tbid = params->tbid;
5957	/ zero out for vlan output /
5958	params->tbid = `0`;
5959	}
5960
5961	tb = ipv6_stub->fib6_get_table(net, tbid);
5962	if (unlikely(!tb))
5963	return BPF_FIB_LKUP_RET_NOT_FWDED;
5964
5965	err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
5966	strict);
5967	} else {
5968	fl6.flowi6_mark = `0`;
5969	fl6.flowi6_secid = `0`;
5970	fl6.flowi6_tun_key.tun_id = `0`;
5971	fl6.flowi6_uid = sock_net_uid(net, NULL);
5972
5973	err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict);
5974	}
5975
5976	if (unlikely(err \|\| IS_ERR_OR_NULL(res.f6i) \|\|
5977	res.f6i == net->ipv6.fib6_null_entry))
5978	return BPF_FIB_LKUP_RET_NOT_FWDED;
5979
5980	switch (res.fib6_type) {
5981	/ only unicast is forwarded /
5982	case RTN_UNICAST:
5983	break;
5984	case RTN_BLACKHOLE:
5985	return BPF_FIB_LKUP_RET_BLACKHOLE;
5986	case RTN_UNREACHABLE:
5987	return BPF_FIB_LKUP_RET_UNREACHABLE;
5988	case RTN_PROHIBIT:
5989	return BPF_FIB_LKUP_RET_PROHIBIT;
5990	default:
5991	return BPF_FIB_LKUP_RET_NOT_FWDED;
5992	}
5993
5994	ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
5995	fl6.flowi6_oif != `0`, NULL, strict);
5996
5997	if (check_mtu) {
5998	mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src);
5999	if (params->tot_len > mtu) {
6000	params->mtu_result = mtu; / union with tot_len /
6001	return BPF_FIB_LKUP_RET_FRAG_NEEDED;
6002	}
6003	}
6004
6005	if (res.nh->fib_nh_lws)
6006	return BPF_FIB_LKUP_RET_UNSUPP_LWT;
6007
6008	if (res.nh->fib_nh_gw_family)
6009	*dst = res.nh->fib_nh_gw6;
6010
6011	dev = res.nh->fib_nh_dev;
6012	params->rt_metric = res.f6i->fib6_metric;
6013	params->ifindex = dev->ifindex;
6014
6015	if (flags & BPF_FIB_LOOKUP_SRC) {
6016	if (res.f6i->fib6_prefsrc.plen) {
6017	*src = res.f6i->fib6_prefsrc.addr;
6018	} else {
6019	err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
6020	&fl6.daddr, `0`,
6021	src);
6022	if (err)
6023	return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
6024	}
6025	}
6026
6027	if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
6028	goto set_fwd_params;
6029
6030	/ xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is*
6031	* not needed here.
6032	*/
6033	neigh = __ipv6_neigh_lookup_noref_stub(dev, pkey: dst);
6034	if (!neigh \|\| !(READ_ONCE(neigh->nud_state) & NUD_VALID))
6035	return BPF_FIB_LKUP_RET_NO_NEIGH;
6036	memcpy(params->dmac, neigh->ha, ETH_ALEN);
6037	memcpy(params->smac, dev->dev_addr, ETH_ALEN);
6038
6039	set_fwd_params:
6040	return bpf_fib_set_fwd_params(params, mtu);
6041	}
6042	#endif
6043
6044	#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT \| BPF_FIB_LOOKUP_OUTPUT \| \
6045	BPF_FIB_LOOKUP_SKIP_NEIGH \| BPF_FIB_LOOKUP_TBID \| \
6046	BPF_FIB_LOOKUP_SRC)
6047
6048	BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
6049	struct bpf_fib_lookup , params, int*, plen, u32, flags)
6050	{
6051	if (plen < sizeof(*params))
6052	return -EINVAL;
6053
6054	if (flags & ~BPF_FIB_LOOKUP_MASK)
6055	return -EINVAL;
6056
6057	switch (params->family) {
6058	#if IS_ENABLED(CONFIG_INET)
6059	case AF_INET:
6060	return bpf_ipv4_fib_lookup(net: dev_net(dev: ctx->rxq->dev), params,
6061	flags, check_mtu: true);
6062	#endif
6063	#if IS_ENABLED(CONFIG_IPV6)
6064	case AF_INET6:
6065	return bpf_ipv6_fib_lookup(net: dev_net(dev: ctx->rxq->dev), params,
6066	flags, check_mtu: true);
6067	#endif
6068	}
6069	return -EAFNOSUPPORT;
6070	}
6071
6072	static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
6073	.func = bpf_xdp_fib_lookup,
6074	.gpl_only = true,
6075	.ret_type = RET_INTEGER,
6076	.arg1_type = ARG_PTR_TO_CTX,
6077	.arg2_type = ARG_PTR_TO_MEM,
6078	.arg3_type = ARG_CONST_SIZE,
6079	.arg4_type = ARG_ANYTHING,
6080	};
6081
6082	BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
6083	struct bpf_fib_lookup , params, int*, plen, u32, flags)
6084	{
6085	struct net *net = dev_net(dev: skb->dev);
6086	int rc = -EAFNOSUPPORT;
6087	bool check_mtu = false;
6088
6089	if (plen < sizeof(*params))
6090	return -EINVAL;
6091
6092	if (flags & ~BPF_FIB_LOOKUP_MASK)
6093	return -EINVAL;
6094
6095	if (params->tot_len)
6096	check_mtu = true;
6097
6098	switch (params->family) {
6099	#if IS_ENABLED(CONFIG_INET)
6100	case AF_INET:
6101	rc = bpf_ipv4_fib_lookup(net, params, flags, check_mtu);
6102	break;
6103	#endif
6104	#if IS_ENABLED(CONFIG_IPV6)
6105	case AF_INET6:
6106	rc = bpf_ipv6_fib_lookup(net, params, flags, check_mtu);
6107	break;
6108	#endif
6109	}
6110
6111	if (rc == BPF_FIB_LKUP_RET_SUCCESS && !check_mtu) {
6112	struct net_device *dev;
6113
6114	/ When tot_len isn't provided by user, check skb*
6115	* against MTU of FIB lookup resulting net_device
6116	*/
6117	dev = dev_get_by_index_rcu(net, ifindex: params->ifindex);
6118	if (!is_skb_forwardable(dev, skb))
6119	rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
6120
6121	params->mtu_result = dev->mtu; / union with tot_len /
6122	}
6123
6124	return rc;
6125	}
6126
6127	static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
6128	.func = bpf_skb_fib_lookup,
6129	.gpl_only = true,
6130	.ret_type = RET_INTEGER,
6131	.arg1_type = ARG_PTR_TO_CTX,
6132	.arg2_type = ARG_PTR_TO_MEM,
6133	.arg3_type = ARG_CONST_SIZE,
6134	.arg4_type = ARG_ANYTHING,
6135	};
6136
6137	static struct net_device __dev_via_ifindex(struct* net_device *dev_curr,
6138	u32 ifindex)
6139	{
6140	struct net *netns = dev_net(dev: dev_curr);
6141
6142	/ Non-redirect use-cases can use ifindex=0 and save ifindex lookup /
6143	if (ifindex == `0`)
6144	return dev_curr;
6145
6146	return dev_get_by_index_rcu(net: netns, ifindex);
6147	}
6148
6149	BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
6150	u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
6151	{
6152	int ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
6153	struct net_device *dev = skb->dev;
6154	int skb_len, dev_len;
6155	int mtu;
6156
6157	if (unlikely(flags & ~(BPF_MTU_CHK_SEGS)))
6158	return -EINVAL;
6159
6160	if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff \|\| *mtu_len)))
6161	return -EINVAL;
6162
6163	dev = __dev_via_ifindex(dev_curr: dev, ifindex);
6164	if (unlikely(!dev))
6165	return -ENODEV;
6166
6167	mtu = READ_ONCE(dev->mtu);
6168
6169	dev_len = mtu + dev->hard_header_len;
6170
6171	/ If set use mtu_len as input, L3 as iph->tot_len (like fib_lookup) /*
6172	skb_len = mtu_len ? mtu_len + dev->hard_header_len : skb->len;
6173
6174	skb_len += len_diff; / minus result pass check /
6175	if (skb_len <= dev_len) {
6176	ret = BPF_MTU_CHK_RET_SUCCESS;
6177	goto out;
6178	}
6179	/ At this point, skb->len exceed MTU, but as it include length of all*
6180	* segments, it can still be below MTU. The SKB can possibly get
6181	* re-segmented in transmit path (see validate_xmit_skb). Thus, user
6182	* must choose if segs are to be MTU checked.
6183	*/
6184	if (skb_is_gso(skb)) {
6185	ret = BPF_MTU_CHK_RET_SUCCESS;
6186
6187	if (flags & BPF_MTU_CHK_SEGS &&
6188	!skb_gso_validate_network_len(skb, mtu))
6189	ret = BPF_MTU_CHK_RET_SEGS_TOOBIG;
6190	}
6191	out:
6192	/ BPF verifier guarantees valid pointer /
6193	*mtu_len = mtu;
6194
6195	return ret;
6196	}
6197
6198	BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp,
6199	u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
6200	{
6201	struct net_device *dev = xdp->rxq->dev;
6202	int xdp_len = xdp->data_end - xdp->data;
6203	int ret = BPF_MTU_CHK_RET_SUCCESS;
6204	int mtu, dev_len;
6205
6206	/ XDP variant doesn't support multi-buffer segment check (yet) /
6207	if (unlikely(flags))
6208	return -EINVAL;
6209
6210	dev = __dev_via_ifindex(dev_curr: dev, ifindex);
6211	if (unlikely(!dev))
6212	return -ENODEV;
6213
6214	mtu = READ_ONCE(dev->mtu);
6215
6216	/ Add L2-header as dev MTU is L3 size /
6217	dev_len = mtu + dev->hard_header_len;
6218
6219	/ Use mtu_len as input, L3 as iph->tot_len (like fib_lookup) /*
6220	if (*mtu_len)
6221	xdp_len = *mtu_len + dev->hard_header_len;
6222
6223	xdp_len += len_diff; / minus result pass check /
6224	if (xdp_len > dev_len)
6225	ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
6226
6227	/ BPF verifier guarantees valid pointer /
6228	*mtu_len = mtu;
6229
6230	return ret;
6231	}
6232
6233	static const struct bpf_func_proto bpf_skb_check_mtu_proto = {
6234	.func = bpf_skb_check_mtu,
6235	.gpl_only = true,
6236	.ret_type = RET_INTEGER,
6237	.arg1_type = ARG_PTR_TO_CTX,
6238	.arg2_type = ARG_ANYTHING,
6239	.arg3_type = ARG_PTR_TO_INT,
6240	.arg4_type = ARG_ANYTHING,
6241	.arg5_type = ARG_ANYTHING,
6242	};
6243
6244	static const struct bpf_func_proto bpf_xdp_check_mtu_proto = {
6245	.func = bpf_xdp_check_mtu,
6246	.gpl_only = true,
6247	.ret_type = RET_INTEGER,
6248	.arg1_type = ARG_PTR_TO_CTX,
6249	.arg2_type = ARG_ANYTHING,
6250	.arg3_type = ARG_PTR_TO_INT,
6251	.arg4_type = ARG_ANYTHING,
6252	.arg5_type = ARG_ANYTHING,
6253	};
6254
6255	#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
6256	static int bpf_push_seg6_encap(struct sk_buff skb, u32 type, void* *hdr, u32 len)
6257	{
6258	int err;
6259	struct ipv6_sr_hdr srh = (struct* ipv6_sr_hdr *)hdr;
6260
6261	if (!seg6_validate_srh(srh, len, reduced: false))
6262	return -EINVAL;
6263
6264	switch (type) {
6265	case BPF_LWT_ENCAP_SEG6_INLINE:
6266	if (skb->protocol != htons(ETH_P_IPV6))
6267	return -EBADMSG;
6268
6269	err = seg6_do_srh_inline(skb, osrh: srh);
6270	break;
6271	case BPF_LWT_ENCAP_SEG6:
6272	skb_reset_inner_headers(skb);
6273	skb->encapsulation = `1`;
6274	err = seg6_do_srh_encap(skb, osrh: srh, IPPROTO_IPV6);
6275	break;
6276	default:
6277	return -EINVAL;
6278	}
6279
6280	bpf_compute_data_pointers(skb);
6281	if (err)
6282	return err;
6283
6284	skb_set_transport_header(skb, offset: sizeof(struct ipv6hdr));
6285
6286	return seg6_lookup_nexthop(skb, NULL, tbl_id: `0`);
6287	}
6288	#endif /* CONFIG_IPV6_SEG6_BPF */
6289
6290	#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
6291	static int bpf_push_ip_encap(struct sk_buff skb, void* *hdr, u32 len,
6292	bool ingress)
6293	{
6294	return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
6295	}
6296	#endif
6297
6298	BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff , skb, u32, type, void* *, hdr,
6299	u32, len)
6300	{
6301	switch (type) {
6302	#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
6303	case BPF_LWT_ENCAP_SEG6:
6304	case BPF_LWT_ENCAP_SEG6_INLINE:
6305	return bpf_push_seg6_encap(skb, type, hdr, len);
6306	#endif
6307	#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
6308	case BPF_LWT_ENCAP_IP:
6309	return bpf_push_ip_encap(skb, hdr, len, ingress: true / ingress /);
6310	#endif
6311	default:
6312	return -EINVAL;
6313	}
6314	}
6315
6316	BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type,
6317	void *, hdr, u32, len)
6318	{
6319	switch (type) {
6320	#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
6321	case BPF_LWT_ENCAP_IP:
6322	return bpf_push_ip_encap(skb, hdr, len, ingress: false / egress /);
6323	#endif
6324	default:
6325	return -EINVAL;
6326	}
6327	}
6328
6329	static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
6330	.func = bpf_lwt_in_push_encap,
6331	.gpl_only = false,
6332	.ret_type = RET_INTEGER,
6333	.arg1_type = ARG_PTR_TO_CTX,
6334	.arg2_type = ARG_ANYTHING,
6335	.arg3_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6336	.arg4_type = ARG_CONST_SIZE
6337	};
6338
6339	static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
6340	.func = bpf_lwt_xmit_push_encap,
6341	.gpl_only = false,
6342	.ret_type = RET_INTEGER,
6343	.arg1_type = ARG_PTR_TO_CTX,
6344	.arg2_type = ARG_ANYTHING,
6345	.arg3_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6346	.arg4_type = ARG_CONST_SIZE
6347	};
6348
6349	#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
6350	BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
6351	const void *, from, u32, len)
6352	{
6353	struct seg6_bpf_srh_state *srh_state =
6354	this_cpu_ptr(&seg6_bpf_srh_states);
6355	struct ipv6_sr_hdr *srh = srh_state->srh;
6356	void srh_tlvs, srh_end, *ptr;
6357	int srhoff = `0`;
6358
6359	if (srh == NULL)
6360	return -EINVAL;
6361
6362	srh_tlvs = (void )((char* *)srh + ((srh->first_segment + `1`) << `4`));
6363	srh_end = (void )((char* )srh + sizeof(srh) + srh_state->hdrlen);
6364
6365	ptr = skb->data + offset;
6366	if (ptr >= srh_tlvs && ptr + len <= srh_end)
6367	srh_state->valid = false;
6368	else if (ptr < (void *)&srh->flags \|\|
6369	ptr + len > (void *)&srh->segments)
6370	return -EFAULT;
6371
6372	if (unlikely(bpf_try_make_writable(skb, offset + len)))
6373	return -EFAULT;
6374	if (ipv6_find_hdr(skb, offset: &srhoff, IPPROTO_ROUTING, NULL, NULL) < `0`)
6375	return -EINVAL;
6376	srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
6377
6378	memcpy(skb->data + offset, from, len);
6379	return `0`;
6380	}
6381
6382	static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
6383	.func = bpf_lwt_seg6_store_bytes,
6384	.gpl_only = false,
6385	.ret_type = RET_INTEGER,
6386	.arg1_type = ARG_PTR_TO_CTX,
6387	.arg2_type = ARG_ANYTHING,
6388	.arg3_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6389	.arg4_type = ARG_CONST_SIZE
6390	};
6391
6392	static void bpf_update_srh_state(struct sk_buff *skb)
6393	{
6394	struct seg6_bpf_srh_state *srh_state =
6395	this_cpu_ptr(&seg6_bpf_srh_states);
6396	int srhoff = `0`;
6397
6398	if (ipv6_find_hdr(skb, offset: &srhoff, IPPROTO_ROUTING, NULL, NULL) < `0`) {
6399	srh_state->srh = NULL;
6400	} else {
6401	srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
6402	srh_state->hdrlen = srh_state->srh->hdrlen << `3`;
6403	srh_state->valid = true;
6404	}
6405	}
6406
6407	BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
6408	u32, action, void *, param, u32, param_len)
6409	{
6410	struct seg6_bpf_srh_state *srh_state =
6411	this_cpu_ptr(&seg6_bpf_srh_states);
6412	int hdroff = `0`;
6413	int err;
6414
6415	switch (action) {
6416	case SEG6_LOCAL_ACTION_END_X:
6417	if (!seg6_bpf_has_valid_srh(skb))
6418	return -EBADMSG;
6419	if (param_len != sizeof(struct in6_addr))
6420	return -EINVAL;
6421	return seg6_lookup_nexthop(skb, nhaddr: (struct in6_addr *)param, tbl_id: `0`);
6422	case SEG6_LOCAL_ACTION_END_T:
6423	if (!seg6_bpf_has_valid_srh(skb))
6424	return -EBADMSG;
6425	if (param_len != sizeof(int))
6426	return -EINVAL;
6427	return seg6_lookup_nexthop(skb, NULL, tbl_id: (int* *)param);
6428	case SEG6_LOCAL_ACTION_END_DT6:
6429	if (!seg6_bpf_has_valid_srh(skb))
6430	return -EBADMSG;
6431	if (param_len != sizeof(int))
6432	return -EINVAL;
6433
6434	if (ipv6_find_hdr(skb, offset: &hdroff, IPPROTO_IPV6, NULL, NULL) < `0`)
6435	return -EBADMSG;
6436	if (!pskb_pull(skb, len: hdroff))
6437	return -EBADMSG;
6438
6439	skb_postpull_rcsum(skb, start: skb_network_header(skb), len: hdroff);
6440	skb_reset_network_header(skb);
6441	skb_reset_transport_header(skb);
6442	skb->encapsulation = `0`;
6443
6444	bpf_compute_data_pointers(skb);
6445	bpf_update_srh_state(skb);
6446	return seg6_lookup_nexthop(skb, NULL, tbl_id: (int* *)param);
6447	case SEG6_LOCAL_ACTION_END_B6:
6448	if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
6449	return -EBADMSG;
6450	err = bpf_push_seg6_encap(skb, type: BPF_LWT_ENCAP_SEG6_INLINE,
6451	hdr: param, len: param_len);
6452	if (!err)
6453	bpf_update_srh_state(skb);
6454
6455	return err;
6456	case SEG6_LOCAL_ACTION_END_B6_ENCAP:
6457	if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
6458	return -EBADMSG;
6459	err = bpf_push_seg6_encap(skb, type: BPF_LWT_ENCAP_SEG6,
6460	hdr: param, len: param_len);
6461	if (!err)
6462	bpf_update_srh_state(skb);
6463
6464	return err;
6465	default:
6466	return -EINVAL;
6467	}
6468	}
6469
6470	static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
6471	.func = bpf_lwt_seg6_action,
6472	.gpl_only = false,
6473	.ret_type = RET_INTEGER,
6474	.arg1_type = ARG_PTR_TO_CTX,
6475	.arg2_type = ARG_ANYTHING,
6476	.arg3_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6477	.arg4_type = ARG_CONST_SIZE
6478	};
6479
6480	BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
6481	s32, len)
6482	{
6483	struct seg6_bpf_srh_state *srh_state =
6484	this_cpu_ptr(&seg6_bpf_srh_states);
6485	struct ipv6_sr_hdr *srh = srh_state->srh;
6486	void srh_end, srh_tlvs, *ptr;
6487	struct ipv6hdr *hdr;
6488	int srhoff = `0`;
6489	int ret;
6490
6491	if (unlikely(srh == NULL))
6492	return -EINVAL;
6493
6494	srh_tlvs = (void )((unsigned* char )srh + sizeof(srh) +
6495	((srh->first_segment + `1`) << `4`));
6496	srh_end = (void )((unsigned* char )srh + sizeof(srh) +
6497	srh_state->hdrlen);
6498	ptr = skb->data + offset;
6499
6500	if (unlikely(ptr < srh_tlvs \|\| ptr > srh_end))
6501	return -EFAULT;
6502	if (unlikely(len < `0` && (void )((char* *)ptr - len) > srh_end))
6503	return -EFAULT;
6504
6505	if (len > `0`) {
6506	ret = skb_cow_head(skb, headroom: len);
6507	if (unlikely(ret < `0`))
6508	return ret;
6509
6510	ret = bpf_skb_net_hdr_push(skb, off: offset, len);
6511	} else {
6512	ret = bpf_skb_net_hdr_pop(skb, off: offset, len: -`1` * len);
6513	}
6514
6515	bpf_compute_data_pointers(skb);
6516	if (unlikely(ret < `0`))
6517	return ret;
6518
6519	hdr = (struct ipv6hdr *)skb->data;
6520	hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
6521
6522	if (ipv6_find_hdr(skb, offset: &srhoff, IPPROTO_ROUTING, NULL, NULL) < `0`)
6523	return -EINVAL;
6524	srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
6525	srh_state->hdrlen += len;
6526	srh_state->valid = false;
6527	return `0`;
6528	}
6529
6530	static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
6531	.func = bpf_lwt_seg6_adjust_srh,
6532	.gpl_only = false,
6533	.ret_type = RET_INTEGER,
6534	.arg1_type = ARG_PTR_TO_CTX,
6535	.arg2_type = ARG_ANYTHING,
6536	.arg3_type = ARG_ANYTHING,
6537	};
6538	#endif /* CONFIG_IPV6_SEG6_BPF */
6539
6540	#ifdef CONFIG_INET
6541	static struct sock sk_lookup(struct* net net, struct* bpf_sock_tuple *tuple,
6542	int dif, int sdif, u8 family, u8 proto)
6543	{
6544	struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo;
6545	bool refcounted = false;
6546	struct sock *sk = NULL;
6547
6548	if (family == AF_INET) {
6549	__be32 src4 = tuple->ipv4.saddr;
6550	__be32 dst4 = tuple->ipv4.daddr;
6551
6552	if (proto == IPPROTO_TCP)
6553	sk = __inet_lookup(net, hashinfo: hinfo, NULL, doff: `0`,
6554	saddr: src4, sport: tuple->ipv4.sport,
6555	daddr: dst4, dport: tuple->ipv4.dport,
6556	dif, sdif, refcounted: &refcounted);
6557	else
6558	sk = __udp4_lib_lookup(net, saddr: src4, sport: tuple->ipv4.sport,
6559	daddr: dst4, dport: tuple->ipv4.dport,
6560	dif, sdif, tbl: net->ipv4.udp_table, NULL);
6561	#if IS_ENABLED(CONFIG_IPV6)
6562	} else {
6563	struct in6_addr src6 = (struct* in6_addr *)&tuple->ipv6.saddr;
6564	struct in6_addr dst6 = (struct* in6_addr *)&tuple->ipv6.daddr;
6565
6566	if (proto == IPPROTO_TCP)
6567	sk = __inet6_lookup(net, hashinfo: hinfo, NULL, doff: `0`,
6568	saddr: src6, sport: tuple->ipv6.sport,
6569	daddr: dst6, ntohs(tuple->ipv6.dport),
6570	dif, sdif, refcounted: &refcounted);
6571	else if (likely(ipv6_bpf_stub))
6572	sk = ipv6_bpf_stub->udp6_lib_lookup(net,
6573	src6, tuple->ipv6.sport,
6574	dst6, tuple->ipv6.dport,
6575	dif, sdif,
6576	net->ipv4.udp_table, NULL);
6577	#endif
6578	}
6579
6580	if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) {
6581	WARN_ONCE(`1`, "Found non-RCU, unreferenced socket!");
6582	sk = NULL;
6583	}
6584	return sk;
6585	}
6586
6587	/ bpf_skc_lookup performs the core lookup for different types of sockets,*
6588	* taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
6589	*/
6590	static struct sock *
6591	__bpf_skc_lookup(struct sk_buff skb, struct* bpf_sock_tuple *tuple, u32 len,
6592	struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
6593	u64 flags, int sdif)
6594	{
6595	struct sock *sk = NULL;
6596	struct net *net;
6597	u8 family;
6598
6599	if (len == sizeof(tuple->ipv4))
6600	family = AF_INET;
6601	else if (len == sizeof(tuple->ipv6))
6602	family = AF_INET6;
6603	else
6604	return NULL;
6605
6606	if (unlikely(flags \|\| !((s32)netns_id < `0` \|\| netns_id <= S32_MAX)))
6607	goto out;
6608
6609	if (sdif < `0`) {
6610	if (family == AF_INET)
6611	sdif = inet_sdif(skb);
6612	else
6613	sdif = inet6_sdif(skb);
6614	}
6615
6616	if ((s32)netns_id < `0`) {
6617	net = caller_net;
6618	sk = sk_lookup(net, tuple, dif: ifindex, sdif, family, proto);
6619	} else {
6620	net = get_net_ns_by_id(net: caller_net, id: netns_id);
6621	if (unlikely(!net))
6622	goto out;
6623	sk = sk_lookup(net, tuple, dif: ifindex, sdif, family, proto);
6624	put_net(net);
6625	}
6626
6627	out:
6628	return sk;
6629	}
6630
6631	static struct sock *
6632	__bpf_sk_lookup(struct sk_buff skb, struct* bpf_sock_tuple *tuple, u32 len,
6633	struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
6634	u64 flags, int sdif)
6635	{
6636	struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
6637	ifindex, proto, netns_id, flags,
6638	sdif);
6639
6640	if (sk) {
6641	struct sock *sk2 = sk_to_full_sk(sk);
6642
6643	/ sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk*
6644	* sock refcnt is decremented to prevent a request_sock leak.
6645	*/
6646	if (!sk_fullsock(sk: sk2))
6647	sk2 = NULL;
6648	if (sk2 != sk) {
6649	sock_gen_put(sk);
6650	/ Ensure there is no need to bump sk2 refcnt /
6651	if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
6652	WARN_ONCE(`1`, "Found non-RCU, unreferenced socket!");
6653	return NULL;
6654	}
6655	sk = sk2;
6656	}
6657	}
6658
6659	return sk;
6660	}
6661
6662	static struct sock *
6663	bpf_skc_lookup(struct sk_buff skb, struct* bpf_sock_tuple *tuple, u32 len,
6664	u8 proto, u64 netns_id, u64 flags)
6665	{
6666	struct net *caller_net;
6667	int ifindex;
6668
6669	if (skb->dev) {
6670	caller_net = dev_net(dev: skb->dev);
6671	ifindex = skb->dev->ifindex;
6672	} else {
6673	caller_net = sock_net(sk: skb->sk);
6674	ifindex = `0`;
6675	}
6676
6677	return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
6678	netns_id, flags, sdif: -`1`);
6679	}
6680
6681	static struct sock *
6682	bpf_sk_lookup(struct sk_buff skb, struct* bpf_sock_tuple *tuple, u32 len,
6683	u8 proto, u64 netns_id, u64 flags)
6684	{
6685	struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id,
6686	flags);
6687
6688	if (sk) {
6689	struct sock *sk2 = sk_to_full_sk(sk);
6690
6691	/ sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk*
6692	* sock refcnt is decremented to prevent a request_sock leak.
6693	*/
6694	if (!sk_fullsock(sk: sk2))
6695	sk2 = NULL;
6696	if (sk2 != sk) {
6697	sock_gen_put(sk);
6698	/ Ensure there is no need to bump sk2 refcnt /
6699	if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
6700	WARN_ONCE(`1`, "Found non-RCU, unreferenced socket!");
6701	return NULL;
6702	}
6703	sk = sk2;
6704	}
6705	}
6706
6707	return sk;
6708	}
6709
6710	BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb,
6711	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6712	{
6713	return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
6714	netns_id, flags);
6715	}
6716
6717	static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
6718	.func = bpf_skc_lookup_tcp,
6719	.gpl_only = false,
6720	.pkt_access = true,
6721	.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
6722	.arg1_type = ARG_PTR_TO_CTX,
6723	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6724	.arg3_type = ARG_CONST_SIZE,
6725	.arg4_type = ARG_ANYTHING,
6726	.arg5_type = ARG_ANYTHING,
6727	};
6728
6729	BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
6730	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6731	{
6732	return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
6733	netns_id, flags);
6734	}
6735
6736	static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
6737	.func = bpf_sk_lookup_tcp,
6738	.gpl_only = false,
6739	.pkt_access = true,
6740	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6741	.arg1_type = ARG_PTR_TO_CTX,
6742	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6743	.arg3_type = ARG_CONST_SIZE,
6744	.arg4_type = ARG_ANYTHING,
6745	.arg5_type = ARG_ANYTHING,
6746	};
6747
6748	BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
6749	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6750	{
6751	return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
6752	netns_id, flags);
6753	}
6754
6755	static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
6756	.func = bpf_sk_lookup_udp,
6757	.gpl_only = false,
6758	.pkt_access = true,
6759	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6760	.arg1_type = ARG_PTR_TO_CTX,
6761	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6762	.arg3_type = ARG_CONST_SIZE,
6763	.arg4_type = ARG_ANYTHING,
6764	.arg5_type = ARG_ANYTHING,
6765	};
6766
6767	BPF_CALL_5(bpf_tc_skc_lookup_tcp, struct sk_buff *, skb,
6768	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6769	{
6770	struct net_device *dev = skb->dev;
6771	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
6772	struct net *caller_net = dev_net(dev);
6773
6774	return (unsigned long)__bpf_skc_lookup(skb, tuple, len, caller_net,
6775	ifindex, IPPROTO_TCP, netns_id,
6776	flags, sdif);
6777	}
6778
6779	static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
6780	.func = bpf_tc_skc_lookup_tcp,
6781	.gpl_only = false,
6782	.pkt_access = true,
6783	.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
6784	.arg1_type = ARG_PTR_TO_CTX,
6785	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6786	.arg3_type = ARG_CONST_SIZE,
6787	.arg4_type = ARG_ANYTHING,
6788	.arg5_type = ARG_ANYTHING,
6789	};
6790
6791	BPF_CALL_5(bpf_tc_sk_lookup_tcp, struct sk_buff *, skb,
6792	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6793	{
6794	struct net_device *dev = skb->dev;
6795	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
6796	struct net *caller_net = dev_net(dev);
6797
6798	return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
6799	ifindex, IPPROTO_TCP, netns_id,
6800	flags, sdif);
6801	}
6802
6803	static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
6804	.func = bpf_tc_sk_lookup_tcp,
6805	.gpl_only = false,
6806	.pkt_access = true,
6807	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6808	.arg1_type = ARG_PTR_TO_CTX,
6809	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6810	.arg3_type = ARG_CONST_SIZE,
6811	.arg4_type = ARG_ANYTHING,
6812	.arg5_type = ARG_ANYTHING,
6813	};
6814
6815	BPF_CALL_5(bpf_tc_sk_lookup_udp, struct sk_buff *, skb,
6816	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6817	{
6818	struct net_device *dev = skb->dev;
6819	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
6820	struct net *caller_net = dev_net(dev);
6821
6822	return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
6823	ifindex, IPPROTO_UDP, netns_id,
6824	flags, sdif);
6825	}
6826
6827	static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = {
6828	.func = bpf_tc_sk_lookup_udp,
6829	.gpl_only = false,
6830	.pkt_access = true,
6831	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6832	.arg1_type = ARG_PTR_TO_CTX,
6833	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6834	.arg3_type = ARG_CONST_SIZE,
6835	.arg4_type = ARG_ANYTHING,
6836	.arg5_type = ARG_ANYTHING,
6837	};
6838
6839	BPF_CALL_1(bpf_sk_release, struct sock *, sk)
6840	{
6841	if (sk && sk_is_refcounted(sk))
6842	sock_gen_put(sk);
6843	return `0`;
6844	}
6845
6846	static const struct bpf_func_proto bpf_sk_release_proto = {
6847	.func = bpf_sk_release,
6848	.gpl_only = false,
6849	.ret_type = RET_INTEGER,
6850	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON \| OBJ_RELEASE,
6851	};
6852
6853	BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
6854	struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
6855	{
6856	struct net_device *dev = ctx->rxq->dev;
6857	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
6858	struct net *caller_net = dev_net(dev);
6859
6860	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
6861	ifindex, IPPROTO_UDP, netns_id,
6862	flags, sdif);
6863	}
6864
6865	static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
6866	.func = bpf_xdp_sk_lookup_udp,
6867	.gpl_only = false,
6868	.pkt_access = true,
6869	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6870	.arg1_type = ARG_PTR_TO_CTX,
6871	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6872	.arg3_type = ARG_CONST_SIZE,
6873	.arg4_type = ARG_ANYTHING,
6874	.arg5_type = ARG_ANYTHING,
6875	};
6876
6877	BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
6878	struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
6879	{
6880	struct net_device *dev = ctx->rxq->dev;
6881	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
6882	struct net *caller_net = dev_net(dev);
6883
6884	return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
6885	ifindex, IPPROTO_TCP, netns_id,
6886	flags, sdif);
6887	}
6888
6889	static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
6890	.func = bpf_xdp_skc_lookup_tcp,
6891	.gpl_only = false,
6892	.pkt_access = true,
6893	.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
6894	.arg1_type = ARG_PTR_TO_CTX,
6895	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6896	.arg3_type = ARG_CONST_SIZE,
6897	.arg4_type = ARG_ANYTHING,
6898	.arg5_type = ARG_ANYTHING,
6899	};
6900
6901	BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
6902	struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
6903	{
6904	struct net_device *dev = ctx->rxq->dev;
6905	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
6906	struct net *caller_net = dev_net(dev);
6907
6908	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
6909	ifindex, IPPROTO_TCP, netns_id,
6910	flags, sdif);
6911	}
6912
6913	static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
6914	.func = bpf_xdp_sk_lookup_tcp,
6915	.gpl_only = false,
6916	.pkt_access = true,
6917	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6918	.arg1_type = ARG_PTR_TO_CTX,
6919	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6920	.arg3_type = ARG_CONST_SIZE,
6921	.arg4_type = ARG_ANYTHING,
6922	.arg5_type = ARG_ANYTHING,
6923	};
6924
6925	BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
6926	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6927	{
6928	return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
6929	caller_net: sock_net(sk: ctx->sk), ifindex: `0`,
6930	IPPROTO_TCP, netns_id, flags,
6931	sdif: -`1`);
6932	}
6933
6934	static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
6935	.func = bpf_sock_addr_skc_lookup_tcp,
6936	.gpl_only = false,
6937	.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
6938	.arg1_type = ARG_PTR_TO_CTX,
6939	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6940	.arg3_type = ARG_CONST_SIZE,
6941	.arg4_type = ARG_ANYTHING,
6942	.arg5_type = ARG_ANYTHING,
6943	};
6944
6945	BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
6946	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6947	{
6948	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
6949	caller_net: sock_net(sk: ctx->sk), ifindex: `0`, IPPROTO_TCP,
6950	netns_id, flags, sdif: -`1`);
6951	}
6952
6953	static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
6954	.func = bpf_sock_addr_sk_lookup_tcp,
6955	.gpl_only = false,
6956	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6957	.arg1_type = ARG_PTR_TO_CTX,
6958	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6959	.arg3_type = ARG_CONST_SIZE,
6960	.arg4_type = ARG_ANYTHING,
6961	.arg5_type = ARG_ANYTHING,
6962	};
6963
6964	BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
6965	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6966	{
6967	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
6968	caller_net: sock_net(sk: ctx->sk), ifindex: `0`, IPPROTO_UDP,
6969	netns_id, flags, sdif: -`1`);
6970	}
6971
6972	static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
6973	.func = bpf_sock_addr_sk_lookup_udp,
6974	.gpl_only = false,
6975	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6976	.arg1_type = ARG_PTR_TO_CTX,
6977	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6978	.arg3_type = ARG_CONST_SIZE,
6979	.arg4_type = ARG_ANYTHING,
6980	.arg5_type = ARG_ANYTHING,
6981	};
6982
6983	bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
6984	struct bpf_insn_access_aux *info)
6985	{
6986	if (off < `0` \|\| off >= offsetofend(struct bpf_tcp_sock,
6987	icsk_retransmits))
6988	return false;
6989
6990	if (off % size != `0`)
6991	return false;
6992
6993	switch (off) {
6994	case offsetof(struct bpf_tcp_sock, bytes_received):
6995	case offsetof(struct bpf_tcp_sock, bytes_acked):
6996	return size == sizeof(__u64);
6997	default:
6998	return size == sizeof(__u32);
6999	}
7000	}
7001
7002	u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
7003	const struct bpf_insn *si,
7004	struct bpf_insn *insn_buf,
7005	struct bpf_prog prog, u32 target_size)
7006	{
7007	struct bpf_insn *insn = insn_buf;
7008
7009	#define BPF_TCP_SOCK_GET_COMMON(FIELD) \
7010	do { \
7011	BUILD_BUG_ON(sizeof_field(struct tcp_sock, FIELD) > \
7012	sizeof_field(struct bpf_tcp_sock, FIELD)); \
7013	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
7014	si->dst_reg, si->src_reg, \
7015	offsetof(struct tcp_sock, FIELD)); \
7016	} while (0)
7017
7018	#define BPF_INET_SOCK_GET_COMMON(FIELD) \
7019	do { \
7020	BUILD_BUG_ON(sizeof_field(struct inet_connection_sock, \
7021	FIELD) > \
7022	sizeof_field(struct bpf_tcp_sock, FIELD)); \
7023	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
7024	struct inet_connection_sock, \
7025	FIELD), \
7026	si->dst_reg, si->src_reg, \
7027	offsetof( \
7028	struct inet_connection_sock, \
7029	FIELD)); \
7030	} while (0)
7031
7032	BTF_TYPE_EMIT(struct bpf_tcp_sock);
7033
7034	switch (si->off) {
7035	case offsetof(struct bpf_tcp_sock, rtt_min):
7036	BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
7037	sizeof(struct minmax));
7038	BUILD_BUG_ON(sizeof(struct minmax) <
7039	sizeof(struct minmax_sample));
7040
7041	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
7042	offsetof(struct tcp_sock, rtt_min) +
7043	offsetof(struct minmax_sample, v));
7044	break;
7045	case offsetof(struct bpf_tcp_sock, snd_cwnd):
7046	BPF_TCP_SOCK_GET_COMMON(snd_cwnd);
7047	break;
7048	case offsetof(struct bpf_tcp_sock, srtt_us):
7049	BPF_TCP_SOCK_GET_COMMON(srtt_us);
7050	break;
7051	case offsetof(struct bpf_tcp_sock, snd_ssthresh):
7052	BPF_TCP_SOCK_GET_COMMON(snd_ssthresh);
7053	break;
7054	case offsetof(struct bpf_tcp_sock, rcv_nxt):
7055	BPF_TCP_SOCK_GET_COMMON(rcv_nxt);
7056	break;
7057	case offsetof(struct bpf_tcp_sock, snd_nxt):
7058	BPF_TCP_SOCK_GET_COMMON(snd_nxt);
7059	break;
7060	case offsetof(struct bpf_tcp_sock, snd_una):
7061	BPF_TCP_SOCK_GET_COMMON(snd_una);
7062	break;
7063	case offsetof(struct bpf_tcp_sock, mss_cache):
7064	BPF_TCP_SOCK_GET_COMMON(mss_cache);
7065	break;
7066	case offsetof(struct bpf_tcp_sock, ecn_flags):
7067	BPF_TCP_SOCK_GET_COMMON(ecn_flags);
7068	break;
7069	case offsetof(struct bpf_tcp_sock, rate_delivered):
7070	BPF_TCP_SOCK_GET_COMMON(rate_delivered);
7071	break;
7072	case offsetof(struct bpf_tcp_sock, rate_interval_us):
7073	BPF_TCP_SOCK_GET_COMMON(rate_interval_us);
7074	break;
7075	case offsetof(struct bpf_tcp_sock, packets_out):
7076	BPF_TCP_SOCK_GET_COMMON(packets_out);
7077	break;
7078	case offsetof(struct bpf_tcp_sock, retrans_out):
7079	BPF_TCP_SOCK_GET_COMMON(retrans_out);
7080	break;
7081	case offsetof(struct bpf_tcp_sock, total_retrans):
7082	BPF_TCP_SOCK_GET_COMMON(total_retrans);
7083	break;
7084	case offsetof(struct bpf_tcp_sock, segs_in):
7085	BPF_TCP_SOCK_GET_COMMON(segs_in);
7086	break;
7087	case offsetof(struct bpf_tcp_sock, data_segs_in):
7088	BPF_TCP_SOCK_GET_COMMON(data_segs_in);
7089	break;
7090	case offsetof(struct bpf_tcp_sock, segs_out):
7091	BPF_TCP_SOCK_GET_COMMON(segs_out);
7092	break;
7093	case offsetof(struct bpf_tcp_sock, data_segs_out):
7094	BPF_TCP_SOCK_GET_COMMON(data_segs_out);
7095	break;
7096	case offsetof(struct bpf_tcp_sock, lost_out):
7097	BPF_TCP_SOCK_GET_COMMON(lost_out);
7098	break;
7099	case offsetof(struct bpf_tcp_sock, sacked_out):
7100	BPF_TCP_SOCK_GET_COMMON(sacked_out);
7101	break;
7102	case offsetof(struct bpf_tcp_sock, bytes_received):
7103	BPF_TCP_SOCK_GET_COMMON(bytes_received);
7104	break;
7105	case offsetof(struct bpf_tcp_sock, bytes_acked):
7106	BPF_TCP_SOCK_GET_COMMON(bytes_acked);
7107	break;
7108	case offsetof(struct bpf_tcp_sock, dsack_dups):
7109	BPF_TCP_SOCK_GET_COMMON(dsack_dups);
7110	break;
7111	case offsetof(struct bpf_tcp_sock, delivered):
7112	BPF_TCP_SOCK_GET_COMMON(delivered);
7113	break;
7114	case offsetof(struct bpf_tcp_sock, delivered_ce):
7115	BPF_TCP_SOCK_GET_COMMON(delivered_ce);
7116	break;
7117	case offsetof(struct bpf_tcp_sock, icsk_retransmits):
7118	BPF_INET_SOCK_GET_COMMON(icsk_retransmits);
7119	break;
7120	}
7121
7122	return insn - insn_buf;
7123	}
7124
7125	BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
7126	{
7127	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
7128	return (unsigned long)sk;
7129
7130	return (unsigned long)NULL;
7131	}
7132
7133	const struct bpf_func_proto bpf_tcp_sock_proto = {
7134	.func = bpf_tcp_sock,
7135	.gpl_only = false,
7136	.ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL,
7137	.arg1_type = ARG_PTR_TO_SOCK_COMMON,
7138	};
7139
7140	BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
7141	{
7142	sk = sk_to_full_sk(sk);
7143
7144	if (sk->sk_state == TCP_LISTEN && sock_flag(sk, flag: SOCK_RCU_FREE))
7145	return (unsigned long)sk;
7146
7147	return (unsigned long)NULL;
7148	}
7149
7150	static const struct bpf_func_proto bpf_get_listener_sock_proto = {
7151	.func = bpf_get_listener_sock,
7152	.gpl_only = false,
7153	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
7154	.arg1_type = ARG_PTR_TO_SOCK_COMMON,
7155	};
7156
7157	BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
7158	{
7159	unsigned int iphdr_len;
7160
7161	switch (skb_protocol(skb, skip_vlan: true)) {
7162	case cpu_to_be16(ETH_P_IP):
7163	iphdr_len = sizeof(struct iphdr);
7164	break;
7165	case cpu_to_be16(ETH_P_IPV6):
7166	iphdr_len = sizeof(struct ipv6hdr);
7167	break;
7168	default:
7169	return `0`;
7170	}
7171
7172	if (skb_headlen(skb) < iphdr_len)
7173	return `0`;
7174
7175	if (skb_cloned(skb) && !skb_clone_writable(skb, len: iphdr_len))
7176	return `0`;
7177
7178	return INET_ECN_set_ce(skb);
7179	}
7180
7181	bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
7182	struct bpf_insn_access_aux *info)
7183	{
7184	if (off < `0` \|\| off >= offsetofend(struct bpf_xdp_sock, queue_id))
7185	return false;
7186
7187	if (off % size != `0`)
7188	return false;
7189
7190	switch (off) {
7191	default:
7192	return size == sizeof(__u32);
7193	}
7194	}
7195
7196	u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
7197	const struct bpf_insn *si,
7198	struct bpf_insn *insn_buf,
7199	struct bpf_prog prog, u32 target_size)
7200	{
7201	struct bpf_insn *insn = insn_buf;
7202
7203	#define BPF_XDP_SOCK_GET(FIELD) \
7204	do { \
7205	BUILD_BUG_ON(sizeof_field(struct xdp_sock, FIELD) > \
7206	sizeof_field(struct bpf_xdp_sock, FIELD)); \
7207	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\
7208	si->dst_reg, si->src_reg, \
7209	offsetof(struct xdp_sock, FIELD)); \
7210	} while (0)
7211
7212	switch (si->off) {
7213	case offsetof(struct bpf_xdp_sock, queue_id):
7214	BPF_XDP_SOCK_GET(queue_id);
7215	break;
7216	}
7217
7218	return insn - insn_buf;
7219	}
7220
7221	static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
7222	.func = bpf_skb_ecn_set_ce,
7223	.gpl_only = false,
7224	.ret_type = RET_INTEGER,
7225	.arg1_type = ARG_PTR_TO_CTX,
7226	};
7227
7228	BPF_CALL_5(bpf_tcp_check_syncookie, struct sock , sk, void* *, iph, u32, iph_len,
7229	struct tcphdr *, th, u32, th_len)
7230	{
7231	#ifdef CONFIG_SYN_COOKIES
7232	u32 cookie;
7233	int ret;
7234
7235	if (unlikely(!sk \|\| th_len < sizeof(*th)))
7236	return -EINVAL;
7237
7238	/ sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. /
7239	if (sk->sk_protocol != IPPROTO_TCP \|\| sk->sk_state != TCP_LISTEN)
7240	return -EINVAL;
7241
7242	if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
7243	return -EINVAL;
7244
7245	if (!th->ack \|\| th->rst \|\| th->syn)
7246	return -ENOENT;
7247
7248	if (unlikely(iph_len < sizeof(struct iphdr)))
7249	return -EINVAL;
7250
7251	if (tcp_synq_no_recent_overflow(sk))
7252	return -ENOENT;
7253
7254	cookie = ntohl(th->ack_seq) - `1`;
7255
7256	/ Both struct iphdr and struct ipv6hdr have the version field at the*
7257	* same offset so we can cast to the shorter header (struct iphdr).
7258	*/
7259	switch (((struct iphdr *)iph)->version) {
7260	case `4`:
7261	if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
7262	return -EINVAL;
7263
7264	ret = __cookie_v4_check(iph: (struct iphdr *)iph, th, cookie);
7265	break;
7266
7267	#if IS_BUILTIN(CONFIG_IPV6)
7268	case `6`:
7269	if (unlikely(iph_len < sizeof(struct ipv6hdr)))
7270	return -EINVAL;
7271
7272	if (sk->sk_family != AF_INET6)
7273	return -EINVAL;
7274
7275	ret = __cookie_v6_check(iph: (struct ipv6hdr *)iph, th, cookie);
7276	break;
7277	#endif /* CONFIG_IPV6 */
7278
7279	default:
7280	return -EPROTONOSUPPORT;
7281	}
7282
7283	if (ret > `0`)
7284	return `0`;
7285
7286	return -ENOENT;
7287	#else
7288	return -ENOTSUPP;
7289	#endif
7290	}
7291
7292	static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
7293	.func = bpf_tcp_check_syncookie,
7294	.gpl_only = true,
7295	.pkt_access = true,
7296	.ret_type = RET_INTEGER,
7297	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
7298	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7299	.arg3_type = ARG_CONST_SIZE,
7300	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7301	.arg5_type = ARG_CONST_SIZE,
7302	};
7303
7304	BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock , sk, void* *, iph, u32, iph_len,
7305	struct tcphdr *, th, u32, th_len)
7306	{
7307	#ifdef CONFIG_SYN_COOKIES
7308	u32 cookie;
7309	u16 mss;
7310
7311	if (unlikely(!sk \|\| th_len < sizeof(th) \|\| th_len != th->doff `4`))
7312	return -EINVAL;
7313
7314	if (sk->sk_protocol != IPPROTO_TCP \|\| sk->sk_state != TCP_LISTEN)
7315	return -EINVAL;
7316
7317	if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
7318	return -ENOENT;
7319
7320	if (!th->syn \|\| th->ack \|\| th->fin \|\| th->rst)
7321	return -EINVAL;
7322
7323	if (unlikely(iph_len < sizeof(struct iphdr)))
7324	return -EINVAL;
7325
7326	/ Both struct iphdr and struct ipv6hdr have the version field at the*
7327	* same offset so we can cast to the shorter header (struct iphdr).
7328	*/
7329	switch (((struct iphdr *)iph)->version) {
7330	case `4`:
7331	if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
7332	return -EINVAL;
7333
7334	mss = tcp_v4_get_syncookie(sk, iph, th, cookie: &cookie);
7335	break;
7336
7337	#if IS_BUILTIN(CONFIG_IPV6)
7338	case `6`:
7339	if (unlikely(iph_len < sizeof(struct ipv6hdr)))
7340	return -EINVAL;
7341
7342	if (sk->sk_family != AF_INET6)
7343	return -EINVAL;
7344
7345	mss = tcp_v6_get_syncookie(sk, iph, th, cookie: &cookie);
7346	break;
7347	#endif /* CONFIG_IPV6 */
7348
7349	default:
7350	return -EPROTONOSUPPORT;
7351	}
7352	if (mss == `0`)
7353	return -ENOENT;
7354
7355	return cookie \| ((u64)mss << `32`);
7356	#else
7357	return -EOPNOTSUPP;
7358	#endif /* CONFIG_SYN_COOKIES */
7359	}
7360
7361	static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
7362	.func = bpf_tcp_gen_syncookie,
7363	.gpl_only = true, / __cookie_v_init_sequence() is GPL /*
7364	.pkt_access = true,
7365	.ret_type = RET_INTEGER,
7366	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
7367	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7368	.arg3_type = ARG_CONST_SIZE,
7369	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7370	.arg5_type = ARG_CONST_SIZE,
7371	};
7372
7373	BPF_CALL_3(bpf_sk_assign, struct sk_buff , skb, struct* sock *, sk, u64, flags)
7374	{
7375	if (!sk \|\| flags != `0`)
7376	return -EINVAL;
7377	if (!skb_at_tc_ingress(skb))
7378	return -EOPNOTSUPP;
7379	if (unlikely(dev_net(skb->dev) != sock_net(sk)))
7380	return -ENETUNREACH;
7381	if (sk_unhashed(sk))
7382	return -EOPNOTSUPP;
7383	if (sk_is_refcounted(sk) &&
7384	unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
7385	return -ENOENT;
7386
7387	skb_orphan(skb);
7388	skb->sk = sk;
7389	skb->destructor = sock_pfree;
7390
7391	return `0`;
7392	}
7393
7394	static const struct bpf_func_proto bpf_sk_assign_proto = {
7395	.func = bpf_sk_assign,
7396	.gpl_only = false,
7397	.ret_type = RET_INTEGER,
7398	.arg1_type = ARG_PTR_TO_CTX,
7399	.arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
7400	.arg3_type = ARG_ANYTHING,
7401	};
7402
7403	static const u8 bpf_search_tcp_opt(const* u8 op, const* u8 *opend,
7404	u8 search_kind, const u8 *magic,
7405	u8 magic_len, bool *eol)
7406	{
7407	u8 kind, kind_len;
7408
7409	*eol = false;
7410
7411	while (op < opend) {
7412	kind = op[`0`];
7413
7414	if (kind == TCPOPT_EOL) {
7415	*eol = true;
7416	return ERR_PTR(error: -ENOMSG);
7417	} else if (kind == TCPOPT_NOP) {
7418	op++;
7419	continue;
7420	}
7421
7422	if (opend - op < `2` \|\| opend - op < op[`1`] \|\| op[`1`] < `2`)
7423	/ Something is wrong in the received header.*
7424	* Follow the TCP stack's tcp_parse_options()
7425	* and just bail here.
7426	*/
7427	return ERR_PTR(error: -EFAULT);
7428
7429	kind_len = op[`1`];
7430	if (search_kind == kind) {
7431	if (!magic_len)
7432	return op;
7433
7434	if (magic_len > kind_len - `2`)
7435	return ERR_PTR(error: -ENOMSG);
7436
7437	if (!memcmp(p: &op[`2`], q: magic, size: magic_len))
7438	return op;
7439	}
7440
7441	op += kind_len;
7442	}
7443
7444	return ERR_PTR(error: -ENOMSG);
7445	}
7446
7447	BPF_CALL_4(bpf_sock_ops_load_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
7448	void *, search_res, u32, len, u64, flags)
7449	{
7450	bool eol, load_syn = flags & BPF_LOAD_HDR_OPT_TCP_SYN;
7451	const u8 op, opend, magic, search = search_res;
7452	u8 search_kind, search_len, copy_len, magic_len;
7453	int ret;
7454
7455	/ 2 byte is the minimal option len except TCPOPT_NOP and*
7456	* TCPOPT_EOL which are useless for the bpf prog to learn
7457	* and this helper disallow loading them also.
7458	*/
7459	if (len < `2` \|\| flags & ~BPF_LOAD_HDR_OPT_TCP_SYN)
7460	return -EINVAL;
7461
7462	search_kind = search[`0`];
7463	search_len = search[`1`];
7464
7465	if (search_len > len \|\| search_kind == TCPOPT_NOP \|\|
7466	search_kind == TCPOPT_EOL)
7467	return -EINVAL;
7468
7469	if (search_kind == TCPOPT_EXP \|\| search_kind == `253`) {
7470	/ 16 or 32 bit magic. +2 for kind and kind length /
7471	if (search_len != `4` && search_len != `6`)
7472	return -EINVAL;
7473	magic = &search[`2`];
7474	magic_len = search_len - `2`;
7475	} else {
7476	if (search_len)
7477	return -EINVAL;
7478	magic = NULL;
7479	magic_len = `0`;
7480	}
7481
7482	if (load_syn) {
7483	ret = bpf_sock_ops_get_syn(bpf_sock, optname: TCP_BPF_SYN, start: &op);
7484	if (ret < `0`)
7485	return ret;
7486
7487	opend = op + ret;
7488	op += sizeof(struct tcphdr);
7489	} else {
7490	if (!bpf_sock->skb \|\|
7491	bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB)
7492	/ This bpf_sock->op cannot call this helper /
7493	return -EPERM;
7494
7495	opend = bpf_sock->skb_data_end;
7496	op = bpf_sock->skb->data + sizeof(struct tcphdr);
7497	}
7498
7499	op = bpf_search_tcp_opt(op, opend, search_kind, magic, magic_len,
7500	eol: &eol);
7501	if (IS_ERR(ptr: op))
7502	return PTR_ERR(ptr: op);
7503
7504	copy_len = op[`1`];
7505	ret = copy_len;
7506	if (copy_len > len) {
7507	ret = -ENOSPC;
7508	copy_len = len;
7509	}
7510
7511	memcpy(search_res, op, copy_len);
7512	return ret;
7513	}
7514
7515	static const struct bpf_func_proto bpf_sock_ops_load_hdr_opt_proto = {
7516	.func = bpf_sock_ops_load_hdr_opt,
7517	.gpl_only = false,
7518	.ret_type = RET_INTEGER,
7519	.arg1_type = ARG_PTR_TO_CTX,
7520	.arg2_type = ARG_PTR_TO_MEM,
7521	.arg3_type = ARG_CONST_SIZE,
7522	.arg4_type = ARG_ANYTHING,
7523	};
7524
7525	BPF_CALL_4(bpf_sock_ops_store_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
7526	const void *, from, u32, len, u64, flags)
7527	{
7528	u8 new_kind, new_kind_len, magic_len = `0`, *opend;
7529	const u8 op, new_op, *magic = NULL;
7530	struct sk_buff *skb;
7531	bool eol;
7532
7533	if (bpf_sock->op != BPF_SOCK_OPS_WRITE_HDR_OPT_CB)
7534	return -EPERM;
7535
7536	if (len < `2` \|\| flags)
7537	return -EINVAL;
7538
7539	new_op = from;
7540	new_kind = new_op[`0`];
7541	new_kind_len = new_op[`1`];
7542
7543	if (new_kind_len > len \|\| new_kind == TCPOPT_NOP \|\|
7544	new_kind == TCPOPT_EOL)
7545	return -EINVAL;
7546
7547	if (new_kind_len > bpf_sock->remaining_opt_len)
7548	return -ENOSPC;
7549
7550	/ 253 is another experimental kind /
7551	if (new_kind == TCPOPT_EXP \|\| new_kind == `253`) {
7552	if (new_kind_len < `4`)
7553	return -EINVAL;
7554	/ Match for the 2 byte magic also.*
7555	* RFC 6994: the magic could be 2 or 4 bytes.
7556	* Hence, matching by 2 byte only is on the
7557	* conservative side but it is the right
7558	* thing to do for the 'search-for-duplication'
7559	* purpose.
7560	*/
7561	magic = &new_op[`2`];
7562	magic_len = `2`;
7563	}
7564
7565	/ Check for duplication /
7566	skb = bpf_sock->skb;
7567	op = skb->data + sizeof(struct tcphdr);
7568	opend = bpf_sock->skb_data_end;
7569
7570	op = bpf_search_tcp_opt(op, opend, search_kind: new_kind, magic, magic_len,
7571	eol: &eol);
7572	if (!IS_ERR(ptr: op))
7573	return -EEXIST;
7574
7575	if (PTR_ERR(ptr: op) != -ENOMSG)
7576	return PTR_ERR(ptr: op);
7577
7578	if (eol)
7579	/ The option has been ended. Treat it as no more*
7580	* header option can be written.
7581	*/
7582	return -ENOSPC;
7583
7584	/ No duplication found. Store the header option. /
7585	memcpy(opend, from, new_kind_len);
7586
7587	bpf_sock->remaining_opt_len -= new_kind_len;
7588	bpf_sock->skb_data_end += new_kind_len;
7589
7590	return `0`;
7591	}
7592
7593	static const struct bpf_func_proto bpf_sock_ops_store_hdr_opt_proto = {
7594	.func = bpf_sock_ops_store_hdr_opt,
7595	.gpl_only = false,
7596	.ret_type = RET_INTEGER,
7597	.arg1_type = ARG_PTR_TO_CTX,
7598	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7599	.arg3_type = ARG_CONST_SIZE,
7600	.arg4_type = ARG_ANYTHING,
7601	};
7602
7603	BPF_CALL_3(bpf_sock_ops_reserve_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
7604	u32, len, u64, flags)
7605	{
7606	if (bpf_sock->op != BPF_SOCK_OPS_HDR_OPT_LEN_CB)
7607	return -EPERM;
7608
7609	if (flags \|\| len < `2`)
7610	return -EINVAL;
7611
7612	if (len > bpf_sock->remaining_opt_len)
7613	return -ENOSPC;
7614
7615	bpf_sock->remaining_opt_len -= len;
7616
7617	return `0`;
7618	}
7619
7620	static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = {
7621	.func = bpf_sock_ops_reserve_hdr_opt,
7622	.gpl_only = false,
7623	.ret_type = RET_INTEGER,
7624	.arg1_type = ARG_PTR_TO_CTX,
7625	.arg2_type = ARG_ANYTHING,
7626	.arg3_type = ARG_ANYTHING,
7627	};
7628
7629	BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb,
7630	u64, tstamp, u32, tstamp_type)
7631	{
7632	/ skb_clear_delivery_time() is done for inet protocol /
7633	if (skb->protocol != htons(ETH_P_IP) &&
7634	skb->protocol != htons(ETH_P_IPV6))
7635	return -EOPNOTSUPP;
7636
7637	switch (tstamp_type) {
7638	case BPF_SKB_TSTAMP_DELIVERY_MONO:
7639	if (!tstamp)
7640	return -EINVAL;
7641	skb->tstamp = tstamp;
7642	skb->mono_delivery_time = `1`;
7643	break;
7644	case BPF_SKB_TSTAMP_UNSPEC:
7645	if (tstamp)
7646	return -EINVAL;
7647	skb->tstamp = `0`;
7648	skb->mono_delivery_time = `0`;
7649	break;
7650	default:
7651	return -EINVAL;
7652	}
7653
7654	return `0`;
7655	}
7656
7657	static const struct bpf_func_proto bpf_skb_set_tstamp_proto = {
7658	.func = bpf_skb_set_tstamp,
7659	.gpl_only = false,
7660	.ret_type = RET_INTEGER,
7661	.arg1_type = ARG_PTR_TO_CTX,
7662	.arg2_type = ARG_ANYTHING,
7663	.arg3_type = ARG_ANYTHING,
7664	};
7665
7666	#ifdef CONFIG_SYN_COOKIES
7667	BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv4, struct iphdr *, iph,
7668	struct tcphdr *, th, u32, th_len)
7669	{
7670	u32 cookie;
7671	u16 mss;
7672
7673	if (unlikely(th_len < sizeof(th) \|\| th_len != th->doff `4`))
7674	return -EINVAL;
7675
7676	mss = tcp_parse_mss_option(th, user_mss: `0`) ?: TCP_MSS_DEFAULT;
7677	cookie = __cookie_v4_init_sequence(iph, th, mssp: &mss);
7678
7679	return cookie \| ((u64)mss << `32`);
7680	}
7681
7682	static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = {
7683	.func = bpf_tcp_raw_gen_syncookie_ipv4,
7684	.gpl_only = true, / __cookie_v4_init_sequence() is GPL /
7685	.pkt_access = true,
7686	.ret_type = RET_INTEGER,
7687	.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7688	.arg1_size = sizeof(struct iphdr),
7689	.arg2_type = ARG_PTR_TO_MEM,
7690	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
7691	};
7692
7693	BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph,
7694	struct tcphdr *, th, u32, th_len)
7695	{
7696	#if IS_BUILTIN(CONFIG_IPV6)
7697	const u16 mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
7698	sizeof(struct ipv6hdr);
7699	u32 cookie;
7700	u16 mss;
7701
7702	if (unlikely(th_len < sizeof(th) \|\| th_len != th->doff `4`))
7703	return -EINVAL;
7704
7705	mss = tcp_parse_mss_option(th, user_mss: `0`) ?: mss_clamp;
7706	cookie = __cookie_v6_init_sequence(iph, th, mssp: &mss);
7707
7708	return cookie \| ((u64)mss << `32`);
7709	#else
7710	return -EPROTONOSUPPORT;
7711	#endif
7712	}
7713
7714	static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = {
7715	.func = bpf_tcp_raw_gen_syncookie_ipv6,
7716	.gpl_only = true, / __cookie_v6_init_sequence() is GPL /
7717	.pkt_access = true,
7718	.ret_type = RET_INTEGER,
7719	.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7720	.arg1_size = sizeof(struct ipv6hdr),
7721	.arg2_type = ARG_PTR_TO_MEM,
7722	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
7723	};
7724
7725	BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph,
7726	struct tcphdr *, th)
7727	{
7728	u32 cookie = ntohl(th->ack_seq) - `1`;
7729
7730	if (__cookie_v4_check(iph, th, cookie) > `0`)
7731	return `0`;
7732
7733	return -EACCES;
7734	}
7735
7736	static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = {
7737	.func = bpf_tcp_raw_check_syncookie_ipv4,
7738	.gpl_only = true, / __cookie_v4_check is GPL /
7739	.pkt_access = true,
7740	.ret_type = RET_INTEGER,
7741	.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7742	.arg1_size = sizeof(struct iphdr),
7743	.arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7744	.arg2_size = sizeof(struct tcphdr),
7745	};
7746
7747	BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph,
7748	struct tcphdr *, th)
7749	{
7750	#if IS_BUILTIN(CONFIG_IPV6)
7751	u32 cookie = ntohl(th->ack_seq) - `1`;
7752
7753	if (__cookie_v6_check(iph, th, cookie) > `0`)
7754	return `0`;
7755
7756	return -EACCES;
7757	#else
7758	return -EPROTONOSUPPORT;
7759	#endif
7760	}
7761
7762	static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = {
7763	.func = bpf_tcp_raw_check_syncookie_ipv6,
7764	.gpl_only = true, / __cookie_v6_check is GPL /
7765	.pkt_access = true,
7766	.ret_type = RET_INTEGER,
7767	.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7768	.arg1_size = sizeof(struct ipv6hdr),
7769	.arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7770	.arg2_size = sizeof(struct tcphdr),
7771	};
7772	#endif /* CONFIG_SYN_COOKIES */
7773
7774	#endif /* CONFIG_INET */
7775
7776	bool bpf_helper_changes_pkt_data(void *func)
7777	{
7778	if (func == bpf_skb_vlan_push \|\|
7779	func == bpf_skb_vlan_pop \|\|
7780	func == bpf_skb_store_bytes \|\|
7781	func == bpf_skb_change_proto \|\|
7782	func == bpf_skb_change_head \|\|
7783	func == sk_skb_change_head \|\|
7784	func == bpf_skb_change_tail \|\|
7785	func == sk_skb_change_tail \|\|
7786	func == bpf_skb_adjust_room \|\|
7787	func == sk_skb_adjust_room \|\|
7788	func == bpf_skb_pull_data \|\|
7789	func == sk_skb_pull_data \|\|
7790	func == bpf_clone_redirect \|\|
7791	func == bpf_l3_csum_replace \|\|
7792	func == bpf_l4_csum_replace \|\|
7793	func == bpf_xdp_adjust_head \|\|
7794	func == bpf_xdp_adjust_meta \|\|
7795	func == bpf_msg_pull_data \|\|
7796	func == bpf_msg_push_data \|\|
7797	func == bpf_msg_pop_data \|\|
7798	func == bpf_xdp_adjust_tail \|\|
7799	#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
7800	func == bpf_lwt_seg6_store_bytes \|\|
7801	func == bpf_lwt_seg6_adjust_srh \|\|
7802	func == bpf_lwt_seg6_action \|\|
7803	#endif
7804	#ifdef CONFIG_INET
7805	func == bpf_sock_ops_store_hdr_opt \|\|
7806	#endif
7807	func == bpf_lwt_in_push_encap \|\|
7808	func == bpf_lwt_xmit_push_encap)
7809	return true;
7810
7811	return false;
7812	}
7813
7814	const struct bpf_func_proto bpf_event_output_data_proto __weak;
7815	const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak;
7816
7817	static const struct bpf_func_proto *
7818	sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7819	{
7820	const struct bpf_func_proto *func_proto;
7821
7822	func_proto = cgroup_common_func_proto(func_id, prog);
7823	if (func_proto)
7824	return func_proto;
7825
7826	func_proto = cgroup_current_func_proto(func_id, prog);
7827	if (func_proto)
7828	return func_proto;
7829
7830	switch (func_id) {
7831	case BPF_FUNC_get_socket_cookie:
7832	return &bpf_get_socket_cookie_sock_proto;
7833	case BPF_FUNC_get_netns_cookie:
7834	return &bpf_get_netns_cookie_sock_proto;
7835	case BPF_FUNC_perf_event_output:
7836	return &bpf_event_output_data_proto;
7837	case BPF_FUNC_sk_storage_get:
7838	return &bpf_sk_storage_get_cg_sock_proto;
7839	case BPF_FUNC_ktime_get_coarse_ns:
7840	return &bpf_ktime_get_coarse_ns_proto;
7841	default:
7842	return bpf_base_func_proto(func_id);
7843	}
7844	}
7845
7846	static const struct bpf_func_proto *
7847	sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7848	{
7849	const struct bpf_func_proto *func_proto;
7850
7851	func_proto = cgroup_common_func_proto(func_id, prog);
7852	if (func_proto)
7853	return func_proto;
7854
7855	func_proto = cgroup_current_func_proto(func_id, prog);
7856	if (func_proto)
7857	return func_proto;
7858
7859	switch (func_id) {
7860	case BPF_FUNC_bind:
7861	switch (prog->expected_attach_type) {
7862	case BPF_CGROUP_INET4_CONNECT:
7863	case BPF_CGROUP_INET6_CONNECT:
7864	return &bpf_bind_proto;
7865	default:
7866	return NULL;
7867	}
7868	case BPF_FUNC_get_socket_cookie:
7869	return &bpf_get_socket_cookie_sock_addr_proto;
7870	case BPF_FUNC_get_netns_cookie:
7871	return &bpf_get_netns_cookie_sock_addr_proto;
7872	case BPF_FUNC_perf_event_output:
7873	return &bpf_event_output_data_proto;
7874	#ifdef CONFIG_INET
7875	case BPF_FUNC_sk_lookup_tcp:
7876	return &bpf_sock_addr_sk_lookup_tcp_proto;
7877	case BPF_FUNC_sk_lookup_udp:
7878	return &bpf_sock_addr_sk_lookup_udp_proto;
7879	case BPF_FUNC_sk_release:
7880	return &bpf_sk_release_proto;
7881	case BPF_FUNC_skc_lookup_tcp:
7882	return &bpf_sock_addr_skc_lookup_tcp_proto;
7883	#endif /* CONFIG_INET */
7884	case BPF_FUNC_sk_storage_get:
7885	return &bpf_sk_storage_get_proto;
7886	case BPF_FUNC_sk_storage_delete:
7887	return &bpf_sk_storage_delete_proto;
7888	case BPF_FUNC_setsockopt:
7889	switch (prog->expected_attach_type) {
7890	case BPF_CGROUP_INET4_BIND:
7891	case BPF_CGROUP_INET6_BIND:
7892	case BPF_CGROUP_INET4_CONNECT:
7893	case BPF_CGROUP_INET6_CONNECT:
7894	case BPF_CGROUP_UNIX_CONNECT:
7895	case BPF_CGROUP_UDP4_RECVMSG:
7896	case BPF_CGROUP_UDP6_RECVMSG:
7897	case BPF_CGROUP_UNIX_RECVMSG:
7898	case BPF_CGROUP_UDP4_SENDMSG:
7899	case BPF_CGROUP_UDP6_SENDMSG:
7900	case BPF_CGROUP_UNIX_SENDMSG:
7901	case BPF_CGROUP_INET4_GETPEERNAME:
7902	case BPF_CGROUP_INET6_GETPEERNAME:
7903	case BPF_CGROUP_UNIX_GETPEERNAME:
7904	case BPF_CGROUP_INET4_GETSOCKNAME:
7905	case BPF_CGROUP_INET6_GETSOCKNAME:
7906	case BPF_CGROUP_UNIX_GETSOCKNAME:
7907	return &bpf_sock_addr_setsockopt_proto;
7908	default:
7909	return NULL;
7910	}
7911	case BPF_FUNC_getsockopt:
7912	switch (prog->expected_attach_type) {
7913	case BPF_CGROUP_INET4_BIND:
7914	case BPF_CGROUP_INET6_BIND:
7915	case BPF_CGROUP_INET4_CONNECT:
7916	case BPF_CGROUP_INET6_CONNECT:
7917	case BPF_CGROUP_UNIX_CONNECT:
7918	case BPF_CGROUP_UDP4_RECVMSG:
7919	case BPF_CGROUP_UDP6_RECVMSG:
7920	case BPF_CGROUP_UNIX_RECVMSG:
7921	case BPF_CGROUP_UDP4_SENDMSG:
7922	case BPF_CGROUP_UDP6_SENDMSG:
7923	case BPF_CGROUP_UNIX_SENDMSG:
7924	case BPF_CGROUP_INET4_GETPEERNAME:
7925	case BPF_CGROUP_INET6_GETPEERNAME:
7926	case BPF_CGROUP_UNIX_GETPEERNAME:
7927	case BPF_CGROUP_INET4_GETSOCKNAME:
7928	case BPF_CGROUP_INET6_GETSOCKNAME:
7929	case BPF_CGROUP_UNIX_GETSOCKNAME:
7930	return &bpf_sock_addr_getsockopt_proto;
7931	default:
7932	return NULL;
7933	}
7934	default:
7935	return bpf_sk_base_func_proto(func_id);
7936	}
7937	}
7938
7939	static const struct bpf_func_proto *
7940	sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7941	{
7942	switch (func_id) {
7943	case BPF_FUNC_skb_load_bytes:
7944	return &bpf_skb_load_bytes_proto;
7945	case BPF_FUNC_skb_load_bytes_relative:
7946	return &bpf_skb_load_bytes_relative_proto;
7947	case BPF_FUNC_get_socket_cookie:
7948	return &bpf_get_socket_cookie_proto;
7949	case BPF_FUNC_get_socket_uid:
7950	return &bpf_get_socket_uid_proto;
7951	case BPF_FUNC_perf_event_output:
7952	return &bpf_skb_event_output_proto;
7953	default:
7954	return bpf_sk_base_func_proto(func_id);
7955	}
7956	}
7957
7958	const struct bpf_func_proto bpf_sk_storage_get_proto __weak;
7959	const struct bpf_func_proto bpf_sk_storage_delete_proto __weak;
7960
7961	static const struct bpf_func_proto *
7962	cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7963	{
7964	const struct bpf_func_proto *func_proto;
7965
7966	func_proto = cgroup_common_func_proto(func_id, prog);
7967	if (func_proto)
7968	return func_proto;
7969
7970	switch (func_id) {
7971	case BPF_FUNC_sk_fullsock:
7972	return &bpf_sk_fullsock_proto;
7973	case BPF_FUNC_sk_storage_get:
7974	return &bpf_sk_storage_get_proto;
7975	case BPF_FUNC_sk_storage_delete:
7976	return &bpf_sk_storage_delete_proto;
7977	case BPF_FUNC_perf_event_output:
7978	return &bpf_skb_event_output_proto;
7979	#ifdef CONFIG_SOCK_CGROUP_DATA
7980	case BPF_FUNC_skb_cgroup_id:
7981	return &bpf_skb_cgroup_id_proto;
7982	case BPF_FUNC_skb_ancestor_cgroup_id:
7983	return &bpf_skb_ancestor_cgroup_id_proto;
7984	case BPF_FUNC_sk_cgroup_id:
7985	return &bpf_sk_cgroup_id_proto;
7986	case BPF_FUNC_sk_ancestor_cgroup_id:
7987	return &bpf_sk_ancestor_cgroup_id_proto;
7988	#endif
7989	#ifdef CONFIG_INET
7990	case BPF_FUNC_sk_lookup_tcp:
7991	return &bpf_sk_lookup_tcp_proto;
7992	case BPF_FUNC_sk_lookup_udp:
7993	return &bpf_sk_lookup_udp_proto;
7994	case BPF_FUNC_sk_release:
7995	return &bpf_sk_release_proto;
7996	case BPF_FUNC_skc_lookup_tcp:
7997	return &bpf_skc_lookup_tcp_proto;
7998	case BPF_FUNC_tcp_sock:
7999	return &bpf_tcp_sock_proto;
8000	case BPF_FUNC_get_listener_sock:
8001	return &bpf_get_listener_sock_proto;
8002	case BPF_FUNC_skb_ecn_set_ce:
8003	return &bpf_skb_ecn_set_ce_proto;
8004	#endif
8005	default:
8006	return sk_filter_func_proto(func_id, prog);
8007	}
8008	}
8009
8010	static const struct bpf_func_proto *
8011	tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8012	{
8013	switch (func_id) {
8014	case BPF_FUNC_skb_store_bytes:
8015	return &bpf_skb_store_bytes_proto;
8016	case BPF_FUNC_skb_load_bytes:
8017	return &bpf_skb_load_bytes_proto;
8018	case BPF_FUNC_skb_load_bytes_relative:
8019	return &bpf_skb_load_bytes_relative_proto;
8020	case BPF_FUNC_skb_pull_data:
8021	return &bpf_skb_pull_data_proto;
8022	case BPF_FUNC_csum_diff:
8023	return &bpf_csum_diff_proto;
8024	case BPF_FUNC_csum_update:
8025	return &bpf_csum_update_proto;
8026	case BPF_FUNC_csum_level:
8027	return &bpf_csum_level_proto;
8028	case BPF_FUNC_l3_csum_replace:
8029	return &bpf_l3_csum_replace_proto;
8030	case BPF_FUNC_l4_csum_replace:
8031	return &bpf_l4_csum_replace_proto;
8032	case BPF_FUNC_clone_redirect:
8033	return &bpf_clone_redirect_proto;
8034	case BPF_FUNC_get_cgroup_classid:
8035	return &bpf_get_cgroup_classid_proto;
8036	case BPF_FUNC_skb_vlan_push:
8037	return &bpf_skb_vlan_push_proto;
8038	case BPF_FUNC_skb_vlan_pop:
8039	return &bpf_skb_vlan_pop_proto;
8040	case BPF_FUNC_skb_change_proto:
8041	return &bpf_skb_change_proto_proto;
8042	case BPF_FUNC_skb_change_type:
8043	return &bpf_skb_change_type_proto;
8044	case BPF_FUNC_skb_adjust_room:
8045	return &bpf_skb_adjust_room_proto;
8046	case BPF_FUNC_skb_change_tail:
8047	return &bpf_skb_change_tail_proto;
8048	case BPF_FUNC_skb_change_head:
8049	return &bpf_skb_change_head_proto;
8050	case BPF_FUNC_skb_get_tunnel_key:
8051	return &bpf_skb_get_tunnel_key_proto;
8052	case BPF_FUNC_skb_set_tunnel_key:
8053	return bpf_get_skb_set_tunnel_proto(which: func_id);
8054	case BPF_FUNC_skb_get_tunnel_opt:
8055	return &bpf_skb_get_tunnel_opt_proto;
8056	case BPF_FUNC_skb_set_tunnel_opt:
8057	return bpf_get_skb_set_tunnel_proto(which: func_id);
8058	case BPF_FUNC_redirect:
8059	return &bpf_redirect_proto;
8060	case BPF_FUNC_redirect_neigh:
8061	return &bpf_redirect_neigh_proto;
8062	case BPF_FUNC_redirect_peer:
8063	return &bpf_redirect_peer_proto;
8064	case BPF_FUNC_get_route_realm:
8065	return &bpf_get_route_realm_proto;
8066	case BPF_FUNC_get_hash_recalc:
8067	return &bpf_get_hash_recalc_proto;
8068	case BPF_FUNC_set_hash_invalid:
8069	return &bpf_set_hash_invalid_proto;
8070	case BPF_FUNC_set_hash:
8071	return &bpf_set_hash_proto;
8072	case BPF_FUNC_perf_event_output:
8073	return &bpf_skb_event_output_proto;
8074	case BPF_FUNC_get_smp_processor_id:
8075	return &bpf_get_smp_processor_id_proto;
8076	case BPF_FUNC_skb_under_cgroup:
8077	return &bpf_skb_under_cgroup_proto;
8078	case BPF_FUNC_get_socket_cookie:
8079	return &bpf_get_socket_cookie_proto;
8080	case BPF_FUNC_get_socket_uid:
8081	return &bpf_get_socket_uid_proto;
8082	case BPF_FUNC_fib_lookup:
8083	return &bpf_skb_fib_lookup_proto;
8084	case BPF_FUNC_check_mtu:
8085	return &bpf_skb_check_mtu_proto;
8086	case BPF_FUNC_sk_fullsock:
8087	return &bpf_sk_fullsock_proto;
8088	case BPF_FUNC_sk_storage_get:
8089	return &bpf_sk_storage_get_proto;
8090	case BPF_FUNC_sk_storage_delete:
8091	return &bpf_sk_storage_delete_proto;
8092	#ifdef CONFIG_XFRM
8093	case BPF_FUNC_skb_get_xfrm_state:
8094	return &bpf_skb_get_xfrm_state_proto;
8095	#endif
8096	#ifdef CONFIG_CGROUP_NET_CLASSID
8097	case BPF_FUNC_skb_cgroup_classid:
8098	return &bpf_skb_cgroup_classid_proto;
8099	#endif
8100	#ifdef CONFIG_SOCK_CGROUP_DATA
8101	case BPF_FUNC_skb_cgroup_id:
8102	return &bpf_skb_cgroup_id_proto;
8103	case BPF_FUNC_skb_ancestor_cgroup_id:
8104	return &bpf_skb_ancestor_cgroup_id_proto;
8105	#endif
8106	#ifdef CONFIG_INET
8107	case BPF_FUNC_sk_lookup_tcp:
8108	return &bpf_tc_sk_lookup_tcp_proto;
8109	case BPF_FUNC_sk_lookup_udp:
8110	return &bpf_tc_sk_lookup_udp_proto;
8111	case BPF_FUNC_sk_release:
8112	return &bpf_sk_release_proto;
8113	case BPF_FUNC_tcp_sock:
8114	return &bpf_tcp_sock_proto;
8115	case BPF_FUNC_get_listener_sock:
8116	return &bpf_get_listener_sock_proto;
8117	case BPF_FUNC_skc_lookup_tcp:
8118	return &bpf_tc_skc_lookup_tcp_proto;
8119	case BPF_FUNC_tcp_check_syncookie:
8120	return &bpf_tcp_check_syncookie_proto;
8121	case BPF_FUNC_skb_ecn_set_ce:
8122	return &bpf_skb_ecn_set_ce_proto;
8123	case BPF_FUNC_tcp_gen_syncookie:
8124	return &bpf_tcp_gen_syncookie_proto;
8125	case BPF_FUNC_sk_assign:
8126	return &bpf_sk_assign_proto;
8127	case BPF_FUNC_skb_set_tstamp:
8128	return &bpf_skb_set_tstamp_proto;
8129	#ifdef CONFIG_SYN_COOKIES
8130	case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
8131	return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
8132	case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
8133	return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
8134	case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
8135	return &bpf_tcp_raw_check_syncookie_ipv4_proto;
8136	case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
8137	return &bpf_tcp_raw_check_syncookie_ipv6_proto;
8138	#endif
8139	#endif
8140	default:
8141	return bpf_sk_base_func_proto(func_id);
8142	}
8143	}
8144
8145	static const struct bpf_func_proto *
8146	xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8147	{
8148	switch (func_id) {
8149	case BPF_FUNC_perf_event_output:
8150	return &bpf_xdp_event_output_proto;
8151	case BPF_FUNC_get_smp_processor_id:
8152	return &bpf_get_smp_processor_id_proto;
8153	case BPF_FUNC_csum_diff:
8154	return &bpf_csum_diff_proto;
8155	case BPF_FUNC_xdp_adjust_head:
8156	return &bpf_xdp_adjust_head_proto;
8157	case BPF_FUNC_xdp_adjust_meta:
8158	return &bpf_xdp_adjust_meta_proto;
8159	case BPF_FUNC_redirect:
8160	return &bpf_xdp_redirect_proto;
8161	case BPF_FUNC_redirect_map:
8162	return &bpf_xdp_redirect_map_proto;
8163	case BPF_FUNC_xdp_adjust_tail:
8164	return &bpf_xdp_adjust_tail_proto;
8165	case BPF_FUNC_xdp_get_buff_len:
8166	return &bpf_xdp_get_buff_len_proto;
8167	case BPF_FUNC_xdp_load_bytes:
8168	return &bpf_xdp_load_bytes_proto;
8169	case BPF_FUNC_xdp_store_bytes:
8170	return &bpf_xdp_store_bytes_proto;
8171	case BPF_FUNC_fib_lookup:
8172	return &bpf_xdp_fib_lookup_proto;
8173	case BPF_FUNC_check_mtu:
8174	return &bpf_xdp_check_mtu_proto;
8175	#ifdef CONFIG_INET
8176	case BPF_FUNC_sk_lookup_udp:
8177	return &bpf_xdp_sk_lookup_udp_proto;
8178	case BPF_FUNC_sk_lookup_tcp:
8179	return &bpf_xdp_sk_lookup_tcp_proto;
8180	case BPF_FUNC_sk_release:
8181	return &bpf_sk_release_proto;
8182	case BPF_FUNC_skc_lookup_tcp:
8183	return &bpf_xdp_skc_lookup_tcp_proto;
8184	case BPF_FUNC_tcp_check_syncookie:
8185	return &bpf_tcp_check_syncookie_proto;
8186	case BPF_FUNC_tcp_gen_syncookie:
8187	return &bpf_tcp_gen_syncookie_proto;
8188	#ifdef CONFIG_SYN_COOKIES
8189	case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
8190	return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
8191	case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
8192	return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
8193	case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
8194	return &bpf_tcp_raw_check_syncookie_ipv4_proto;
8195	case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
8196	return &bpf_tcp_raw_check_syncookie_ipv6_proto;
8197	#endif
8198	#endif
8199	default:
8200	return bpf_sk_base_func_proto(func_id);
8201	}
8202
8203	#if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)
8204	/ The nf_conn___init type is used in the NF_CONNTRACK kfuncs. The*
8205	* kfuncs are defined in two different modules, and we want to be able
8206	* to use them interchangably with the same BTF type ID. Because modules
8207	* can't de-duplicate BTF IDs between each other, we need the type to be
8208	* referenced in the vmlinux BTF or the verifier will get confused about
8209	* the different types. So we add this dummy type reference which will
8210	* be included in vmlinux BTF, allowing both modules to refer to the
8211	* same type ID.
8212	*/
8213	BTF_TYPE_EMIT(struct nf_conn___init);
8214	#endif
8215	}
8216
8217	const struct bpf_func_proto bpf_sock_map_update_proto __weak;
8218	const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
8219
8220	static const struct bpf_func_proto *
8221	sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8222	{
8223	const struct bpf_func_proto *func_proto;
8224
8225	func_proto = cgroup_common_func_proto(func_id, prog);
8226	if (func_proto)
8227	return func_proto;
8228
8229	switch (func_id) {
8230	case BPF_FUNC_setsockopt:
8231	return &bpf_sock_ops_setsockopt_proto;
8232	case BPF_FUNC_getsockopt:
8233	return &bpf_sock_ops_getsockopt_proto;
8234	case BPF_FUNC_sock_ops_cb_flags_set:
8235	return &bpf_sock_ops_cb_flags_set_proto;
8236	case BPF_FUNC_sock_map_update:
8237	return &bpf_sock_map_update_proto;
8238	case BPF_FUNC_sock_hash_update:
8239	return &bpf_sock_hash_update_proto;
8240	case BPF_FUNC_get_socket_cookie:
8241	return &bpf_get_socket_cookie_sock_ops_proto;
8242	case BPF_FUNC_perf_event_output:
8243	return &bpf_event_output_data_proto;
8244	case BPF_FUNC_sk_storage_get:
8245	return &bpf_sk_storage_get_proto;
8246	case BPF_FUNC_sk_storage_delete:
8247	return &bpf_sk_storage_delete_proto;
8248	case BPF_FUNC_get_netns_cookie:
8249	return &bpf_get_netns_cookie_sock_ops_proto;
8250	#ifdef CONFIG_INET
8251	case BPF_FUNC_load_hdr_opt:
8252	return &bpf_sock_ops_load_hdr_opt_proto;
8253	case BPF_FUNC_store_hdr_opt:
8254	return &bpf_sock_ops_store_hdr_opt_proto;
8255	case BPF_FUNC_reserve_hdr_opt:
8256	return &bpf_sock_ops_reserve_hdr_opt_proto;
8257	case BPF_FUNC_tcp_sock:
8258	return &bpf_tcp_sock_proto;
8259	#endif /* CONFIG_INET */
8260	default:
8261	return bpf_sk_base_func_proto(func_id);
8262	}
8263	}
8264
8265	const struct bpf_func_proto bpf_msg_redirect_map_proto __weak;
8266	const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak;
8267
8268	static const struct bpf_func_proto *
8269	sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8270	{
8271	switch (func_id) {
8272	case BPF_FUNC_msg_redirect_map:
8273	return &bpf_msg_redirect_map_proto;
8274	case BPF_FUNC_msg_redirect_hash:
8275	return &bpf_msg_redirect_hash_proto;
8276	case BPF_FUNC_msg_apply_bytes:
8277	return &bpf_msg_apply_bytes_proto;
8278	case BPF_FUNC_msg_cork_bytes:
8279	return &bpf_msg_cork_bytes_proto;
8280	case BPF_FUNC_msg_pull_data:
8281	return &bpf_msg_pull_data_proto;
8282	case BPF_FUNC_msg_push_data:
8283	return &bpf_msg_push_data_proto;
8284	case BPF_FUNC_msg_pop_data:
8285	return &bpf_msg_pop_data_proto;
8286	case BPF_FUNC_perf_event_output:
8287	return &bpf_event_output_data_proto;
8288	case BPF_FUNC_get_current_uid_gid:
8289	return &bpf_get_current_uid_gid_proto;
8290	case BPF_FUNC_get_current_pid_tgid:
8291	return &bpf_get_current_pid_tgid_proto;
8292	case BPF_FUNC_sk_storage_get:
8293	return &bpf_sk_storage_get_proto;
8294	case BPF_FUNC_sk_storage_delete:
8295	return &bpf_sk_storage_delete_proto;
8296	case BPF_FUNC_get_netns_cookie:
8297	return &bpf_get_netns_cookie_sk_msg_proto;
8298	#ifdef CONFIG_CGROUP_NET_CLASSID
8299	case BPF_FUNC_get_cgroup_classid:
8300	return &bpf_get_cgroup_classid_curr_proto;
8301	#endif
8302	default:
8303	return bpf_sk_base_func_proto(func_id);
8304	}
8305	}
8306
8307	const struct bpf_func_proto bpf_sk_redirect_map_proto __weak;
8308	const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak;
8309
8310	static const struct bpf_func_proto *
8311	sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8312	{
8313	switch (func_id) {
8314	case BPF_FUNC_skb_store_bytes:
8315	return &bpf_skb_store_bytes_proto;
8316	case BPF_FUNC_skb_load_bytes:
8317	return &bpf_skb_load_bytes_proto;
8318	case BPF_FUNC_skb_pull_data:
8319	return &sk_skb_pull_data_proto;
8320	case BPF_FUNC_skb_change_tail:
8321	return &sk_skb_change_tail_proto;
8322	case BPF_FUNC_skb_change_head:
8323	return &sk_skb_change_head_proto;
8324	case BPF_FUNC_skb_adjust_room:
8325	return &sk_skb_adjust_room_proto;
8326	case BPF_FUNC_get_socket_cookie:
8327	return &bpf_get_socket_cookie_proto;
8328	case BPF_FUNC_get_socket_uid:
8329	return &bpf_get_socket_uid_proto;
8330	case BPF_FUNC_sk_redirect_map:
8331	return &bpf_sk_redirect_map_proto;
8332	case BPF_FUNC_sk_redirect_hash:
8333	return &bpf_sk_redirect_hash_proto;
8334	case BPF_FUNC_perf_event_output:
8335	return &bpf_skb_event_output_proto;
8336	#ifdef CONFIG_INET
8337	case BPF_FUNC_sk_lookup_tcp:
8338	return &bpf_sk_lookup_tcp_proto;
8339	case BPF_FUNC_sk_lookup_udp:
8340	return &bpf_sk_lookup_udp_proto;
8341	case BPF_FUNC_sk_release:
8342	return &bpf_sk_release_proto;
8343	case BPF_FUNC_skc_lookup_tcp:
8344	return &bpf_skc_lookup_tcp_proto;
8345	#endif
8346	default:
8347	return bpf_sk_base_func_proto(func_id);
8348	}
8349	}
8350
8351	static const struct bpf_func_proto *
8352	flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8353	{
8354	switch (func_id) {
8355	case BPF_FUNC_skb_load_bytes:
8356	return &bpf_flow_dissector_load_bytes_proto;
8357	default:
8358	return bpf_sk_base_func_proto(func_id);
8359	}
8360	}
8361
8362	static const struct bpf_func_proto *
8363	lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8364	{
8365	switch (func_id) {
8366	case BPF_FUNC_skb_load_bytes:
8367	return &bpf_skb_load_bytes_proto;
8368	case BPF_FUNC_skb_pull_data:
8369	return &bpf_skb_pull_data_proto;
8370	case BPF_FUNC_csum_diff:
8371	return &bpf_csum_diff_proto;
8372	case BPF_FUNC_get_cgroup_classid:
8373	return &bpf_get_cgroup_classid_proto;
8374	case BPF_FUNC_get_route_realm:
8375	return &bpf_get_route_realm_proto;
8376	case BPF_FUNC_get_hash_recalc:
8377	return &bpf_get_hash_recalc_proto;
8378	case BPF_FUNC_perf_event_output:
8379	return &bpf_skb_event_output_proto;
8380	case BPF_FUNC_get_smp_processor_id:
8381	return &bpf_get_smp_processor_id_proto;
8382	case BPF_FUNC_skb_under_cgroup:
8383	return &bpf_skb_under_cgroup_proto;
8384	default:
8385	return bpf_sk_base_func_proto(func_id);
8386	}
8387	}
8388
8389	static const struct bpf_func_proto *
8390	lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8391	{
8392	switch (func_id) {
8393	case BPF_FUNC_lwt_push_encap:
8394	return &bpf_lwt_in_push_encap_proto;
8395	default:
8396	return lwt_out_func_proto(func_id, prog);
8397	}
8398	}
8399
8400	static const struct bpf_func_proto *
8401	lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8402	{
8403	switch (func_id) {
8404	case BPF_FUNC_skb_get_tunnel_key:
8405	return &bpf_skb_get_tunnel_key_proto;
8406	case BPF_FUNC_skb_set_tunnel_key:
8407	return bpf_get_skb_set_tunnel_proto(which: func_id);
8408	case BPF_FUNC_skb_get_tunnel_opt:
8409	return &bpf_skb_get_tunnel_opt_proto;
8410	case BPF_FUNC_skb_set_tunnel_opt:
8411	return bpf_get_skb_set_tunnel_proto(which: func_id);
8412	case BPF_FUNC_redirect:
8413	return &bpf_redirect_proto;
8414	case BPF_FUNC_clone_redirect:
8415	return &bpf_clone_redirect_proto;
8416	case BPF_FUNC_skb_change_tail:
8417	return &bpf_skb_change_tail_proto;
8418	case BPF_FUNC_skb_change_head:
8419	return &bpf_skb_change_head_proto;
8420	case BPF_FUNC_skb_store_bytes:
8421	return &bpf_skb_store_bytes_proto;
8422	case BPF_FUNC_csum_update:
8423	return &bpf_csum_update_proto;
8424	case BPF_FUNC_csum_level:
8425	return &bpf_csum_level_proto;
8426	case BPF_FUNC_l3_csum_replace:
8427	return &bpf_l3_csum_replace_proto;
8428	case BPF_FUNC_l4_csum_replace:
8429	return &bpf_l4_csum_replace_proto;
8430	case BPF_FUNC_set_hash_invalid:
8431	return &bpf_set_hash_invalid_proto;
8432	case BPF_FUNC_lwt_push_encap:
8433	return &bpf_lwt_xmit_push_encap_proto;
8434	default:
8435	return lwt_out_func_proto(func_id, prog);
8436	}
8437	}
8438
8439	static const struct bpf_func_proto *
8440	lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8441	{
8442	switch (func_id) {
8443	#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
8444	case BPF_FUNC_lwt_seg6_store_bytes:
8445	return &bpf_lwt_seg6_store_bytes_proto;
8446	case BPF_FUNC_lwt_seg6_action:
8447	return &bpf_lwt_seg6_action_proto;
8448	case BPF_FUNC_lwt_seg6_adjust_srh:
8449	return &bpf_lwt_seg6_adjust_srh_proto;
8450	#endif
8451	default:
8452	return lwt_out_func_proto(func_id, prog);
8453	}
8454	}
8455
8456	static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
8457	const struct bpf_prog *prog,
8458	struct bpf_insn_access_aux *info)
8459	{
8460	const int size_default = sizeof(__u32);
8461
8462	if (off < `0` \|\| off >= sizeof(struct __sk_buff))
8463	return false;
8464
8465	/ The verifier guarantees that size > 0. /
8466	if (off % size != `0`)
8467	return false;
8468
8469	switch (off) {
8470	case bpf_ctx_range_till(struct __sk_buff, cb[`0`], cb[`4`]):
8471	if (off + size > offsetofend(struct __sk_buff, cb[`4`]))
8472	return false;
8473	break;
8474	case bpf_ctx_range_till(struct __sk_buff, remote_ip6[`0`], remote_ip6[`3`]):
8475	case bpf_ctx_range_till(struct __sk_buff, local_ip6[`0`], local_ip6[`3`]):
8476	case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
8477	case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
8478	case bpf_ctx_range(struct __sk_buff, data):
8479	case bpf_ctx_range(struct __sk_buff, data_meta):
8480	case bpf_ctx_range(struct __sk_buff, data_end):
8481	if (size != size_default)
8482	return false;
8483	break;
8484	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
8485	return false;
8486	case bpf_ctx_range(struct __sk_buff, hwtstamp):
8487	if (type == BPF_WRITE \|\| size != sizeof(__u64))
8488	return false;
8489	break;
8490	case bpf_ctx_range(struct __sk_buff, tstamp):
8491	if (size != sizeof(__u64))
8492	return false;
8493	break;
8494	case offsetof(struct __sk_buff, sk):
8495	if (type == BPF_WRITE \|\| size != sizeof(__u64))
8496	return false;
8497	info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
8498	break;
8499	case offsetof(struct __sk_buff, tstamp_type):
8500	return false;
8501	case offsetofend(struct __sk_buff, tstamp_type) ... offsetof(struct __sk_buff, hwtstamp) - `1`:
8502	/ Explicitly prohibit access to padding in __sk_buff. /
8503	return false;
8504	default:
8505	/ Only narrow read access allowed for now. /
8506	if (type == BPF_WRITE) {
8507	if (size != size_default)
8508	return false;
8509	} else {
8510	bpf_ctx_record_field_size(aux: info, size: size_default);
8511	if (!bpf_ctx_narrow_access_ok(off, size, size_default))
8512	return false;
8513	}
8514	}
8515
8516	return true;
8517	}
8518
8519	static bool sk_filter_is_valid_access(int off, int size,
8520	enum bpf_access_type type,
8521	const struct bpf_prog *prog,
8522	struct bpf_insn_access_aux *info)
8523	{
8524	switch (off) {
8525	case bpf_ctx_range(struct __sk_buff, tc_classid):
8526	case bpf_ctx_range(struct __sk_buff, data):
8527	case bpf_ctx_range(struct __sk_buff, data_meta):
8528	case bpf_ctx_range(struct __sk_buff, data_end):
8529	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
8530	case bpf_ctx_range(struct __sk_buff, tstamp):
8531	case bpf_ctx_range(struct __sk_buff, wire_len):
8532	case bpf_ctx_range(struct __sk_buff, hwtstamp):
8533	return false;
8534	}
8535
8536	if (type == BPF_WRITE) {
8537	switch (off) {
8538	case bpf_ctx_range_till(struct __sk_buff, cb[`0`], cb[`4`]):
8539	break;
8540	default:
8541	return false;
8542	}
8543	}
8544
8545	return bpf_skb_is_valid_access(off, size, type, prog, info);
8546	}
8547
8548	static bool cg_skb_is_valid_access(int off, int size,
8549	enum bpf_access_type type,
8550	const struct bpf_prog *prog,
8551	struct bpf_insn_access_aux *info)
8552	{
8553	switch (off) {
8554	case bpf_ctx_range(struct __sk_buff, tc_classid):
8555	case bpf_ctx_range(struct __sk_buff, data_meta):
8556	case bpf_ctx_range(struct __sk_buff, wire_len):
8557	return false;
8558	case bpf_ctx_range(struct __sk_buff, data):
8559	case bpf_ctx_range(struct __sk_buff, data_end):
8560	if (!bpf_capable())
8561	return false;
8562	break;
8563	}
8564
8565	if (type == BPF_WRITE) {
8566	switch (off) {
8567	case bpf_ctx_range(struct __sk_buff, mark):
8568	case bpf_ctx_range(struct __sk_buff, priority):
8569	case bpf_ctx_range_till(struct __sk_buff, cb[`0`], cb[`4`]):
8570	break;
8571	case bpf_ctx_range(struct __sk_buff, tstamp):
8572	if (!bpf_capable())
8573	return false;
8574	break;
8575	default:
8576	return false;
8577	}
8578	}
8579
8580	switch (off) {
8581	case bpf_ctx_range(struct __sk_buff, data):
8582	info->reg_type = PTR_TO_PACKET;
8583	break;
8584	case bpf_ctx_range(struct __sk_buff, data_end):
8585	info->reg_type = PTR_TO_PACKET_END;
8586	break;
8587	}
8588
8589	return bpf_skb_is_valid_access(off, size, type, prog, info);
8590	}
8591
8592	static bool lwt_is_valid_access(int off, int size,
8593	enum bpf_access_type type,
8594	const struct bpf_prog *prog,
8595	struct bpf_insn_access_aux *info)
8596	{
8597	switch (off) {
8598	case bpf_ctx_range(struct __sk_buff, tc_classid):
8599	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
8600	case bpf_ctx_range(struct __sk_buff, data_meta):
8601	case bpf_ctx_range(struct __sk_buff, tstamp):
8602	case bpf_ctx_range(struct __sk_buff, wire_len):
8603	case bpf_ctx_range(struct __sk_buff, hwtstamp):
8604	return false;
8605	}
8606
8607	if (type == BPF_WRITE) {
8608	switch (off) {
8609	case bpf_ctx_range(struct __sk_buff, mark):
8610	case bpf_ctx_range(struct __sk_buff, priority):
8611	case bpf_ctx_range_till(struct __sk_buff, cb[`0`], cb[`4`]):
8612	break;
8613	default:
8614	return false;
8615	}
8616	}
8617
8618	switch (off) {
8619	case bpf_ctx_range(struct __sk_buff, data):
8620	info->reg_type = PTR_TO_PACKET;
8621	break;
8622	case bpf_ctx_range(struct __sk_buff, data_end):
8623	info->reg_type = PTR_TO_PACKET_END;
8624	break;
8625	}
8626
8627	return bpf_skb_is_valid_access(off, size, type, prog, info);
8628	}
8629
8630	/ Attach type specific accesses /
8631	static bool __sock_filter_check_attach_type(int off,
8632	enum bpf_access_type access_type,
8633	enum bpf_attach_type attach_type)
8634	{
8635	switch (off) {
8636	case offsetof(struct bpf_sock, bound_dev_if):
8637	case offsetof(struct bpf_sock, mark):
8638	case offsetof(struct bpf_sock, priority):
8639	switch (attach_type) {
8640	case BPF_CGROUP_INET_SOCK_CREATE:
8641	case BPF_CGROUP_INET_SOCK_RELEASE:
8642	goto full_access;
8643	default:
8644	return false;
8645	}
8646	case bpf_ctx_range(struct bpf_sock, src_ip4):
8647	switch (attach_type) {
8648	case BPF_CGROUP_INET4_POST_BIND:
8649	goto read_only;
8650	default:
8651	return false;
8652	}
8653	case bpf_ctx_range_till(struct bpf_sock, src_ip6[`0`], src_ip6[`3`]):
8654	switch (attach_type) {
8655	case BPF_CGROUP_INET6_POST_BIND:
8656	goto read_only;
8657	default:
8658	return false;
8659	}
8660	case bpf_ctx_range(struct bpf_sock, src_port):
8661	switch (attach_type) {
8662	case BPF_CGROUP_INET4_POST_BIND:
8663	case BPF_CGROUP_INET6_POST_BIND:
8664	goto read_only;
8665	default:
8666	return false;
8667	}
8668	}
8669	read_only:
8670	return access_type == BPF_READ;
8671	full_access:
8672	return true;
8673	}
8674
8675	bool bpf_sock_common_is_valid_access(int off, int size,
8676	enum bpf_access_type type,
8677	struct bpf_insn_access_aux *info)
8678	{
8679	switch (off) {
8680	case bpf_ctx_range_till(struct bpf_sock, type, priority):
8681	return false;
8682	default:
8683	return bpf_sock_is_valid_access(off, size, type, info);
8684	}
8685	}
8686
8687	bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
8688	struct bpf_insn_access_aux *info)
8689	{
8690	const int size_default = sizeof(__u32);
8691	int field_size;
8692
8693	if (off < `0` \|\| off >= sizeof(struct bpf_sock))
8694	return false;
8695	if (off % size != `0`)
8696	return false;
8697
8698	switch (off) {
8699	case offsetof(struct bpf_sock, state):
8700	case offsetof(struct bpf_sock, family):
8701	case offsetof(struct bpf_sock, type):
8702	case offsetof(struct bpf_sock, protocol):
8703	case offsetof(struct bpf_sock, src_port):
8704	case offsetof(struct bpf_sock, rx_queue_mapping):
8705	case bpf_ctx_range(struct bpf_sock, src_ip4):
8706	case bpf_ctx_range_till(struct bpf_sock, src_ip6[`0`], src_ip6[`3`]):
8707	case bpf_ctx_range(struct bpf_sock, dst_ip4):
8708	case bpf_ctx_range_till(struct bpf_sock, dst_ip6[`0`], dst_ip6[`3`]):
8709	bpf_ctx_record_field_size(aux: info, size: size_default);
8710	return bpf_ctx_narrow_access_ok(off, size, size_default);
8711	case bpf_ctx_range(struct bpf_sock, dst_port):
8712	field_size = size == size_default ?
8713	size_default : sizeof_field(struct bpf_sock, dst_port);
8714	bpf_ctx_record_field_size(aux: info, size: field_size);
8715	return bpf_ctx_narrow_access_ok(off, size, size_default: field_size);
8716	case offsetofend(struct bpf_sock, dst_port) ...
8717	offsetof(struct bpf_sock, dst_ip4) - `1`:
8718	return false;
8719	}
8720
8721	return size == size_default;
8722	}
8723
8724	static bool sock_filter_is_valid_access(int off, int size,
8725	enum bpf_access_type type,
8726	const struct bpf_prog *prog,
8727	struct bpf_insn_access_aux *info)
8728	{
8729	if (!bpf_sock_is_valid_access(off, size, type, info))
8730	return false;
8731	return __sock_filter_check_attach_type(off, access_type: type,
8732	attach_type: prog->expected_attach_type);
8733	}
8734
8735	static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write,
8736	const struct bpf_prog *prog)
8737	{
8738	/ Neither direct read nor direct write requires any preliminary*
8739	* action.
8740	*/
8741	return `0`;
8742	}
8743
8744	static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
8745	const struct bpf_prog prog, int* drop_verdict)
8746	{
8747	struct bpf_insn *insn = insn_buf;
8748
8749	if (!direct_write)
8750	return `0`;
8751
8752	/ if (!skb->cloned)*
8753	* goto start;
8754	*
8755	* (Fast-path, otherwise approximation that we might be
8756	* a clone, do the rest in helper.)
8757	*/
8758	*insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET);
8759	*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
8760	*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, `0`, `7`);
8761
8762	/ ret = bpf_skb_pull_data(skb, 0); /
8763	*insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
8764	*insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
8765	*insn++ = BPF_RAW_INSN(BPF_JMP \| BPF_CALL, `0`, `0`, `0`,
8766	BPF_FUNC_skb_pull_data);
8767	/ if (!ret)*
8768	* goto restore;
8769	* return TC_ACT_SHOT;
8770	*/
8771	*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, `0`, `2`);
8772	*insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
8773	*insn++ = BPF_EXIT_INSN();
8774
8775	/ restore: /
8776	*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
8777	/ start: /
8778	*insn++ = prog->insnsi[`0`];
8779
8780	return insn - insn_buf;
8781	}
8782
8783	static int bpf_gen_ld_abs(const struct bpf_insn *orig,
8784	struct bpf_insn *insn_buf)
8785	{
8786	bool indirect = BPF_MODE(orig->code) == BPF_IND;
8787	struct bpf_insn *insn = insn_buf;
8788
8789	if (!indirect) {
8790	*insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm);
8791	} else {
8792	*insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg);
8793	if (orig->imm)
8794	*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm);
8795	}
8796	/ We're guaranteed here that CTX is in R6. /
8797	*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX);
8798
8799	switch (BPF_SIZE(orig->code)) {
8800	case BPF_B:
8801	*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache);
8802	break;
8803	case BPF_H:
8804	*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache);
8805	break;
8806	case BPF_W:
8807	*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache);
8808	break;
8809	}
8810
8811	*insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, `0`, `2`);
8812	*insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
8813	*insn++ = BPF_EXIT_INSN();
8814
8815	return insn - insn_buf;
8816	}
8817
8818	static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
8819	const struct bpf_prog *prog)
8820	{
8821	return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
8822	}
8823
8824	static bool tc_cls_act_is_valid_access(int off, int size,
8825	enum bpf_access_type type,
8826	const struct bpf_prog *prog,
8827	struct bpf_insn_access_aux *info)
8828	{
8829	if (type == BPF_WRITE) {
8830	switch (off) {
8831	case bpf_ctx_range(struct __sk_buff, mark):
8832	case bpf_ctx_range(struct __sk_buff, tc_index):
8833	case bpf_ctx_range(struct __sk_buff, priority):
8834	case bpf_ctx_range(struct __sk_buff, tc_classid):
8835	case bpf_ctx_range_till(struct __sk_buff, cb[`0`], cb[`4`]):
8836	case bpf_ctx_range(struct __sk_buff, tstamp):
8837	case bpf_ctx_range(struct __sk_buff, queue_mapping):
8838	break;
8839	default:
8840	return false;
8841	}
8842	}
8843
8844	switch (off) {
8845	case bpf_ctx_range(struct __sk_buff, data):
8846	info->reg_type = PTR_TO_PACKET;
8847	break;
8848	case bpf_ctx_range(struct __sk_buff, data_meta):
8849	info->reg_type = PTR_TO_PACKET_META;
8850	break;
8851	case bpf_ctx_range(struct __sk_buff, data_end):
8852	info->reg_type = PTR_TO_PACKET_END;
8853	break;
8854	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
8855	return false;
8856	case offsetof(struct __sk_buff, tstamp_type):
8857	/ The convert_ctx_access() on reading and writing*
8858	* __sk_buff->tstamp depends on whether the bpf prog
8859	* has used __sk_buff->tstamp_type or not.
8860	* Thus, we need to set prog->tstamp_type_access
8861	* earlier during is_valid_access() here.
8862	*/
8863	((struct bpf_prog *)prog)->tstamp_type_access = `1`;
8864	return size == sizeof(__u8);
8865	}
8866
8867	return bpf_skb_is_valid_access(off, size, type, prog, info);
8868	}
8869
8870	DEFINE_MUTEX(nf_conn_btf_access_lock);
8871	EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock);
8872
8873	int (nfct_btf_struct_access)(struct* bpf_verifier_log *log,
8874	const struct bpf_reg_state *reg,
8875	int off, int size);
8876	EXPORT_SYMBOL_GPL(nfct_btf_struct_access);
8877
8878	static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log,
8879	const struct bpf_reg_state *reg,
8880	int off, int size)
8881	{
8882	int ret = -EACCES;
8883
8884	mutex_lock(&nf_conn_btf_access_lock);
8885	if (nfct_btf_struct_access)
8886	ret = nfct_btf_struct_access(log, reg, off, size);
8887	mutex_unlock(lock: &nf_conn_btf_access_lock);
8888
8889	return ret;
8890	}
8891
8892	static bool __is_valid_xdp_access(int off, int size)
8893	{
8894	if (off < `0` \|\| off >= sizeof(struct xdp_md))
8895	return false;
8896	if (off % size != `0`)
8897	return false;
8898	if (size != sizeof(__u32))
8899	return false;
8900
8901	return true;
8902	}
8903
8904	static bool xdp_is_valid_access(int off, int size,
8905	enum bpf_access_type type,
8906	const struct bpf_prog *prog,
8907	struct bpf_insn_access_aux *info)
8908	{
8909	if (prog->expected_attach_type != BPF_XDP_DEVMAP) {
8910	switch (off) {
8911	case offsetof(struct xdp_md, egress_ifindex):
8912	return false;
8913	}
8914	}
8915
8916	if (type == BPF_WRITE) {
8917	if (bpf_prog_is_offloaded(aux: prog->aux)) {
8918	switch (off) {
8919	case offsetof(struct xdp_md, rx_queue_index):
8920	return __is_valid_xdp_access(off, size);
8921	}
8922	}
8923	return false;
8924	}
8925
8926	switch (off) {
8927	case offsetof(struct xdp_md, data):
8928	info->reg_type = PTR_TO_PACKET;
8929	break;
8930	case offsetof(struct xdp_md, data_meta):
8931	info->reg_type = PTR_TO_PACKET_META;
8932	break;
8933	case offsetof(struct xdp_md, data_end):
8934	info->reg_type = PTR_TO_PACKET_END;
8935	break;
8936	}
8937
8938	return __is_valid_xdp_access(off, size);
8939	}
8940
8941	void bpf_warn_invalid_xdp_action(struct net_device dev, struct* bpf_prog *prog, u32 act)
8942	{
8943	const u32 act_max = XDP_REDIRECT;
8944
8945	pr_warn_once("%s XDP return value %u on prog %s (id %d) dev %s, expect packet loss!\n",
8946	act > act_max ? "Illegal" : "Driver unsupported",
8947	act, prog->aux->name, prog->aux->id, dev ? dev->name : "N/A");
8948	}
8949	EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
8950
8951	static int xdp_btf_struct_access(struct bpf_verifier_log *log,
8952	const struct bpf_reg_state *reg,
8953	int off, int size)
8954	{
8955	int ret = -EACCES;
8956
8957	mutex_lock(&nf_conn_btf_access_lock);
8958	if (nfct_btf_struct_access)
8959	ret = nfct_btf_struct_access(log, reg, off, size);
8960	mutex_unlock(lock: &nf_conn_btf_access_lock);
8961
8962	return ret;
8963	}
8964
8965	static bool sock_addr_is_valid_access(int off, int size,
8966	enum bpf_access_type type,
8967	const struct bpf_prog *prog,
8968	struct bpf_insn_access_aux *info)
8969	{
8970	const int size_default = sizeof(__u32);
8971
8972	if (off < `0` \|\| off >= sizeof(struct bpf_sock_addr))
8973	return false;
8974	if (off % size != `0`)
8975	return false;
8976
8977	/ Disallow access to fields not belonging to the attach type's address*
8978	* family.
8979	*/
8980	switch (off) {
8981	case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
8982	switch (prog->expected_attach_type) {
8983	case BPF_CGROUP_INET4_BIND:
8984	case BPF_CGROUP_INET4_CONNECT:
8985	case BPF_CGROUP_INET4_GETPEERNAME:
8986	case BPF_CGROUP_INET4_GETSOCKNAME:
8987	case BPF_CGROUP_UDP4_SENDMSG:
8988	case BPF_CGROUP_UDP4_RECVMSG:
8989	break;
8990	default:
8991	return false;
8992	}
8993	break;
8994	case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[`0`], user_ip6[`3`]):
8995	switch (prog->expected_attach_type) {
8996	case BPF_CGROUP_INET6_BIND:
8997	case BPF_CGROUP_INET6_CONNECT:
8998	case BPF_CGROUP_INET6_GETPEERNAME:
8999	case BPF_CGROUP_INET6_GETSOCKNAME:
9000	case BPF_CGROUP_UDP6_SENDMSG:
9001	case BPF_CGROUP_UDP6_RECVMSG:
9002	break;
9003	default:
9004	return false;
9005	}
9006	break;
9007	case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
9008	switch (prog->expected_attach_type) {
9009	case BPF_CGROUP_UDP4_SENDMSG:
9010	break;
9011	default:
9012	return false;
9013	}
9014	break;
9015	case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[`0`],
9016	msg_src_ip6[`3`]):
9017	switch (prog->expected_attach_type) {
9018	case BPF_CGROUP_UDP6_SENDMSG:
9019	break;
9020	default:
9021	return false;
9022	}
9023	break;
9024	}
9025
9026	switch (off) {
9027	case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
9028	case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[`0`], user_ip6[`3`]):
9029	case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
9030	case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[`0`],
9031	msg_src_ip6[`3`]):
9032	case bpf_ctx_range(struct bpf_sock_addr, user_port):
9033	if (type == BPF_READ) {
9034	bpf_ctx_record_field_size(aux: info, size: size_default);
9035
9036	if (bpf_ctx_wide_access_ok(off, size,
9037	struct bpf_sock_addr,
9038	user_ip6))
9039	return true;
9040
9041	if (bpf_ctx_wide_access_ok(off, size,
9042	struct bpf_sock_addr,
9043	msg_src_ip6))
9044	return true;
9045
9046	if (!bpf_ctx_narrow_access_ok(off, size, size_default))
9047	return false;
9048	} else {
9049	if (bpf_ctx_wide_access_ok(off, size,
9050	struct bpf_sock_addr,
9051	user_ip6))
9052	return true;
9053
9054	if (bpf_ctx_wide_access_ok(off, size,
9055	struct bpf_sock_addr,
9056	msg_src_ip6))
9057	return true;
9058
9059	if (size != size_default)
9060	return false;
9061	}
9062	break;
9063	case offsetof(struct bpf_sock_addr, sk):
9064	if (type != BPF_READ)
9065	return false;
9066	if (size != sizeof(__u64))
9067	return false;
9068	info->reg_type = PTR_TO_SOCKET;
9069	break;
9070	default:
9071	if (type == BPF_READ) {
9072	if (size != size_default)
9073	return false;
9074	} else {
9075	return false;
9076	}
9077	}
9078
9079	return true;
9080	}
9081
9082	static bool sock_ops_is_valid_access(int off, int size,
9083	enum bpf_access_type type,
9084	const struct bpf_prog *prog,
9085	struct bpf_insn_access_aux *info)
9086	{
9087	const int size_default = sizeof(__u32);
9088
9089	if (off < `0` \|\| off >= sizeof(struct bpf_sock_ops))
9090	return false;
9091
9092	/ The verifier guarantees that size > 0. /
9093	if (off % size != `0`)
9094	return false;
9095
9096	if (type == BPF_WRITE) {
9097	switch (off) {
9098	case offsetof(struct bpf_sock_ops, reply):
9099	case offsetof(struct bpf_sock_ops, sk_txhash):
9100	if (size != size_default)
9101	return false;
9102	break;
9103	default:
9104	return false;
9105	}
9106	} else {
9107	switch (off) {
9108	case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
9109	bytes_acked):
9110	if (size != sizeof(__u64))
9111	return false;
9112	break;
9113	case offsetof(struct bpf_sock_ops, sk):
9114	if (size != sizeof(__u64))
9115	return false;
9116	info->reg_type = PTR_TO_SOCKET_OR_NULL;
9117	break;
9118	case offsetof(struct bpf_sock_ops, skb_data):
9119	if (size != sizeof(__u64))
9120	return false;
9121	info->reg_type = PTR_TO_PACKET;
9122	break;
9123	case offsetof(struct bpf_sock_ops, skb_data_end):
9124	if (size != sizeof(__u64))
9125	return false;
9126	info->reg_type = PTR_TO_PACKET_END;
9127	break;
9128	case offsetof(struct bpf_sock_ops, skb_tcp_flags):
9129	bpf_ctx_record_field_size(aux: info, size: size_default);
9130	return bpf_ctx_narrow_access_ok(off, size,
9131	size_default);
9132	case offsetof(struct bpf_sock_ops, skb_hwtstamp):
9133	if (size != sizeof(__u64))
9134	return false;
9135	break;
9136	default:
9137	if (size != size_default)
9138	return false;
9139	break;
9140	}
9141	}
9142
9143	return true;
9144	}
9145
9146	static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
9147	const struct bpf_prog *prog)
9148	{
9149	return bpf_unclone_prologue(insn_buf, direct_write, prog, drop_verdict: SK_DROP);
9150	}
9151
9152	static bool sk_skb_is_valid_access(int off, int size,
9153	enum bpf_access_type type,
9154	const struct bpf_prog *prog,
9155	struct bpf_insn_access_aux *info)
9156	{
9157	switch (off) {
9158	case bpf_ctx_range(struct __sk_buff, tc_classid):
9159	case bpf_ctx_range(struct __sk_buff, data_meta):
9160	case bpf_ctx_range(struct __sk_buff, tstamp):
9161	case bpf_ctx_range(struct __sk_buff, wire_len):
9162	case bpf_ctx_range(struct __sk_buff, hwtstamp):
9163	return false;
9164	}
9165
9166	if (type == BPF_WRITE) {
9167	switch (off) {
9168	case bpf_ctx_range(struct __sk_buff, tc_index):
9169	case bpf_ctx_range(struct __sk_buff, priority):
9170	break;
9171	default:
9172	return false;
9173	}
9174	}
9175
9176	switch (off) {
9177	case bpf_ctx_range(struct __sk_buff, mark):
9178	return false;
9179	case bpf_ctx_range(struct __sk_buff, data):
9180	info->reg_type = PTR_TO_PACKET;
9181	break;
9182	case bpf_ctx_range(struct __sk_buff, data_end):
9183	info->reg_type = PTR_TO_PACKET_END;
9184	break;
9185	}
9186
9187	return bpf_skb_is_valid_access(off, size, type, prog, info);
9188	}
9189
9190	static bool sk_msg_is_valid_access(int off, int size,
9191	enum bpf_access_type type,
9192	const struct bpf_prog *prog,
9193	struct bpf_insn_access_aux *info)
9194	{
9195	if (type == BPF_WRITE)
9196	return false;
9197
9198	if (off % size != `0`)
9199	return false;
9200
9201	switch (off) {
9202	case offsetof(struct sk_msg_md, data):
9203	info->reg_type = PTR_TO_PACKET;
9204	if (size != sizeof(__u64))
9205	return false;
9206	break;
9207	case offsetof(struct sk_msg_md, data_end):
9208	info->reg_type = PTR_TO_PACKET_END;
9209	if (size != sizeof(__u64))
9210	return false;
9211	break;
9212	case offsetof(struct sk_msg_md, sk):
9213	if (size != sizeof(__u64))
9214	return false;
9215	info->reg_type = PTR_TO_SOCKET;
9216	break;
9217	case bpf_ctx_range(struct sk_msg_md, family):
9218	case bpf_ctx_range(struct sk_msg_md, remote_ip4):
9219	case bpf_ctx_range(struct sk_msg_md, local_ip4):
9220	case bpf_ctx_range_till(struct sk_msg_md, remote_ip6[`0`], remote_ip6[`3`]):
9221	case bpf_ctx_range_till(struct sk_msg_md, local_ip6[`0`], local_ip6[`3`]):
9222	case bpf_ctx_range(struct sk_msg_md, remote_port):
9223	case bpf_ctx_range(struct sk_msg_md, local_port):
9224	case bpf_ctx_range(struct sk_msg_md, size):
9225	if (size != sizeof(__u32))
9226	return false;
9227	break;
9228	default:
9229	return false;
9230	}
9231	return true;
9232	}
9233
9234	static bool flow_dissector_is_valid_access(int off, int size,
9235	enum bpf_access_type type,
9236	const struct bpf_prog *prog,
9237	struct bpf_insn_access_aux *info)
9238	{
9239	const int size_default = sizeof(__u32);
9240
9241	if (off < `0` \|\| off >= sizeof(struct __sk_buff))
9242	return false;
9243
9244	if (type == BPF_WRITE)
9245	return false;
9246
9247	switch (off) {
9248	case bpf_ctx_range(struct __sk_buff, data):
9249	if (size != size_default)
9250	return false;
9251	info->reg_type = PTR_TO_PACKET;
9252	return true;
9253	case bpf_ctx_range(struct __sk_buff, data_end):
9254	if (size != size_default)
9255	return false;
9256	info->reg_type = PTR_TO_PACKET_END;
9257	return true;
9258	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
9259	if (size != sizeof(__u64))
9260	return false;
9261	info->reg_type = PTR_TO_FLOW_KEYS;
9262	return true;
9263	default:
9264	return false;
9265	}
9266	}
9267
9268	static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
9269	const struct bpf_insn *si,
9270	struct bpf_insn *insn_buf,
9271	struct bpf_prog *prog,
9272	u32 *target_size)
9273
9274	{
9275	struct bpf_insn *insn = insn_buf;
9276
9277	switch (si->off) {
9278	case offsetof(struct __sk_buff, data):
9279	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_flow_dissector, data),
9280	si->dst_reg, si->src_reg,
9281	offsetof(struct bpf_flow_dissector, data));
9282	break;
9283
9284	case offsetof(struct __sk_buff, data_end):
9285	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_flow_dissector, data_end),
9286	si->dst_reg, si->src_reg,
9287	offsetof(struct bpf_flow_dissector, data_end));
9288	break;
9289
9290	case offsetof(struct __sk_buff, flow_keys):
9291	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_flow_dissector, flow_keys),
9292	si->dst_reg, si->src_reg,
9293	offsetof(struct bpf_flow_dissector, flow_keys));
9294	break;
9295	}
9296
9297	return insn - insn_buf;
9298	}
9299
9300	static struct bpf_insn bpf_convert_tstamp_type_read(const* struct bpf_insn *si,
9301	struct bpf_insn *insn)
9302	{
9303	__u8 value_reg = si->dst_reg;
9304	__u8 skb_reg = si->src_reg;
9305	/ AX is needed because src_reg and dst_reg could be the same /
9306	__u8 tmp_reg = BPF_REG_AX;
9307
9308	*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
9309	SKB_BF_MONO_TC_OFFSET);
9310	*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
9311	SKB_MONO_DELIVERY_TIME_MASK, `2`);
9312	*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC);
9313	*insn++ = BPF_JMP_A(`1`);
9314	*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO);
9315
9316	return insn;
9317	}
9318
9319	static struct bpf_insn *bpf_convert_shinfo_access(__u8 dst_reg, __u8 skb_reg,
9320	struct bpf_insn *insn)
9321	{
9322	/ si->dst_reg = skb_shinfo(SKB); /
9323	#ifdef NET_SKBUFF_DATA_USES_OFFSET
9324	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, end),
9325	BPF_REG_AX, skb_reg,
9326	offsetof(struct sk_buff, end));
9327	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, head),
9328	dst_reg, skb_reg,
9329	offsetof(struct sk_buff, head));
9330	*insn++ = BPF_ALU64_REG(BPF_ADD, dst_reg, BPF_REG_AX);
9331	#else
9332	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, end),
9333	dst_reg, skb_reg,
9334	offsetof(struct sk_buff, end));
9335	#endif
9336
9337	return insn;
9338	}
9339
9340	static struct bpf_insn bpf_convert_tstamp_read(const* struct bpf_prog *prog,
9341	const struct bpf_insn *si,
9342	struct bpf_insn *insn)
9343	{
9344	__u8 value_reg = si->dst_reg;
9345	__u8 skb_reg = si->src_reg;
9346
9347	#ifdef CONFIG_NET_XGRESS
9348	/ If the tstamp_type is read,*
9349	* the bpf prog is aware the tstamp could have delivery time.
9350	* Thus, read skb->tstamp as is if tstamp_type_access is true.
9351	*/
9352	if (!prog->tstamp_type_access) {
9353	/ AX is needed because src_reg and dst_reg could be the same /
9354	__u8 tmp_reg = BPF_REG_AX;
9355
9356	*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
9357	*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
9358	TC_AT_INGRESS_MASK \| SKB_MONO_DELIVERY_TIME_MASK);
9359	*insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg,
9360	TC_AT_INGRESS_MASK \| SKB_MONO_DELIVERY_TIME_MASK, `2`);
9361	/ skb->tc_at_ingress && skb->mono_delivery_time,*
9362	* read 0 as the (rcv) timestamp.
9363	*/
9364	*insn++ = BPF_MOV64_IMM(value_reg, `0`);
9365	*insn++ = BPF_JMP_A(`1`);
9366	}
9367	#endif
9368
9369	*insn++ = BPF_LDX_MEM(BPF_DW, value_reg, skb_reg,
9370	offsetof(struct sk_buff, tstamp));
9371	return insn;
9372	}
9373
9374	static struct bpf_insn bpf_convert_tstamp_write(const* struct bpf_prog *prog,
9375	const struct bpf_insn *si,
9376	struct bpf_insn *insn)
9377	{
9378	__u8 value_reg = si->src_reg;
9379	__u8 skb_reg = si->dst_reg;
9380
9381	#ifdef CONFIG_NET_XGRESS
9382	/ If the tstamp_type is read,*
9383	* the bpf prog is aware the tstamp could have delivery time.
9384	* Thus, write skb->tstamp as is if tstamp_type_access is true.
9385	* Otherwise, writing at ingress will have to clear the
9386	* mono_delivery_time bit also.
9387	*/
9388	if (!prog->tstamp_type_access) {
9389	__u8 tmp_reg = BPF_REG_AX;
9390
9391	*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
9392	/ Writing __sk_buff->tstamp as ingress, goto <clear> /
9393	*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, `1`);
9394	/ goto <store> /
9395	*insn++ = BPF_JMP_A(`2`);
9396	/ <clear>: mono_delivery_time /
9397	*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK);
9398	*insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET);
9399	}
9400	#endif
9401
9402	/ <store>: skb->tstamp = tstamp /
9403	*insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) \| BPF_DW \| BPF_MEM,
9404	skb_reg, value_reg, offsetof(struct sk_buff, tstamp), si->imm);
9405	return insn;
9406	}
9407
9408	#define BPF_EMIT_STORE(size, si, off) \
9409	BPF_RAW_INSN(BPF_CLASS((si)->code) \| (size) \| BPF_MEM, \
9410	(si)->dst_reg, (si)->src_reg, (off), (si)->imm)
9411
9412	static u32 bpf_convert_ctx_access(enum bpf_access_type type,
9413	const struct bpf_insn *si,
9414	struct bpf_insn *insn_buf,
9415	struct bpf_prog prog, u32 target_size)
9416	{
9417	struct bpf_insn *insn = insn_buf;
9418	int off;
9419
9420	switch (si->off) {
9421	case offsetof(struct __sk_buff, len):
9422	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9423	bpf_target_off(struct sk_buff, len, `4`,
9424	target_size));
9425	break;
9426
9427	case offsetof(struct __sk_buff, protocol):
9428	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9429	bpf_target_off(struct sk_buff, protocol, `2`,
9430	target_size));
9431	break;
9432
9433	case offsetof(struct __sk_buff, vlan_proto):
9434	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9435	bpf_target_off(struct sk_buff, vlan_proto, `2`,
9436	target_size));
9437	break;
9438
9439	case offsetof(struct __sk_buff, priority):
9440	if (type == BPF_WRITE)
9441	*insn++ = BPF_EMIT_STORE(BPF_W, si,
9442	bpf_target_off(struct sk_buff, priority, `4`,
9443	target_size));
9444	else
9445	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9446	bpf_target_off(struct sk_buff, priority, `4`,
9447	target_size));
9448	break;
9449
9450	case offsetof(struct __sk_buff, ingress_ifindex):
9451	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9452	bpf_target_off(struct sk_buff, skb_iif, `4`,
9453	target_size));
9454	break;
9455
9456	case offsetof(struct __sk_buff, ifindex):
9457	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, dev),
9458	si->dst_reg, si->src_reg,
9459	offsetof(struct sk_buff, dev));
9460	*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, `0`, `1`);
9461	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9462	bpf_target_off(struct net_device, ifindex, `4`,
9463	target_size));
9464	break;
9465
9466	case offsetof(struct __sk_buff, hash):
9467	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9468	bpf_target_off(struct sk_buff, hash, `4`,
9469	target_size));
9470	break;
9471
9472	case offsetof(struct __sk_buff, mark):
9473	if (type == BPF_WRITE)
9474	*insn++ = BPF_EMIT_STORE(BPF_W, si,
9475	bpf_target_off(struct sk_buff, mark, `4`,
9476	target_size));
9477	else
9478	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9479	bpf_target_off(struct sk_buff, mark, `4`,
9480	target_size));
9481	break;
9482
9483	case offsetof(struct __sk_buff, pkt_type):
9484	*target_size = `1`;
9485	*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
9486	PKT_TYPE_OFFSET);
9487	*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
9488	#ifdef __BIG_ENDIAN_BITFIELD
9489	*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, `5`);
9490	#endif
9491	break;
9492
9493	case offsetof(struct __sk_buff, queue_mapping):
9494	if (type == BPF_WRITE) {
9495	u32 off = bpf_target_off(struct sk_buff, queue_mapping, `2`, target_size);
9496
9497	if (BPF_CLASS(si->code) == BPF_ST && si->imm >= NO_QUEUE_MAPPING) {
9498	insn++ = BPF_JMP_A(`0`); /* noop /
9499	break;
9500	}
9501
9502	if (BPF_CLASS(si->code) == BPF_STX)
9503	*insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, `1`);
9504	*insn++ = BPF_EMIT_STORE(BPF_H, si, off);
9505	} else {
9506	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9507	bpf_target_off(struct sk_buff,
9508	queue_mapping,
9509	`2`, target_size));
9510	}
9511	break;
9512
9513	case offsetof(struct __sk_buff, vlan_present):
9514	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9515	bpf_target_off(struct sk_buff,
9516	vlan_all, `4`, target_size));
9517	*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, `0`, `1`);
9518	*insn++ = BPF_ALU32_IMM(BPF_MOV, si->dst_reg, `1`);
9519	break;
9520
9521	case offsetof(struct __sk_buff, vlan_tci):
9522	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9523	bpf_target_off(struct sk_buff, vlan_tci, `2`,
9524	target_size));
9525	break;
9526
9527	case offsetof(struct __sk_buff, cb[`0`]) ...
9528	offsetofend(struct __sk_buff, cb[`4`]) - `1`:
9529	BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, data) < `20`);
9530	BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
9531	offsetof(struct qdisc_skb_cb, data)) %
9532	sizeof(__u64));
9533
9534	prog->cb_access = `1`;
9535	off = si->off;
9536	off -= offsetof(struct __sk_buff, cb[`0`]);
9537	off += offsetof(struct sk_buff, cb);
9538	off += offsetof(struct qdisc_skb_cb, data);
9539	if (type == BPF_WRITE)
9540	*insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off);
9541	else
9542	*insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
9543	si->src_reg, off);
9544	break;
9545
9546	case offsetof(struct __sk_buff, tc_classid):
9547	BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, tc_classid) != `2`);
9548
9549	off = si->off;
9550	off -= offsetof(struct __sk_buff, tc_classid);
9551	off += offsetof(struct sk_buff, cb);
9552	off += offsetof(struct qdisc_skb_cb, tc_classid);
9553	*target_size = `2`;
9554	if (type == BPF_WRITE)
9555	*insn++ = BPF_EMIT_STORE(BPF_H, si, off);
9556	else
9557	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
9558	si->src_reg, off);
9559	break;
9560
9561	case offsetof(struct __sk_buff, data):
9562	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, data),
9563	si->dst_reg, si->src_reg,
9564	offsetof(struct sk_buff, data));
9565	break;
9566
9567	case offsetof(struct __sk_buff, data_meta):
9568	off = si->off;
9569	off -= offsetof(struct __sk_buff, data_meta);
9570	off += offsetof(struct sk_buff, cb);
9571	off += offsetof(struct bpf_skb_data_end, data_meta);
9572	insn++ = BPF_LDX_MEM(BPF_SIZEOF(void* *), si->dst_reg,
9573	si->src_reg, off);
9574	break;
9575
9576	case offsetof(struct __sk_buff, data_end):
9577	off = si->off;
9578	off -= offsetof(struct __sk_buff, data_end);
9579	off += offsetof(struct sk_buff, cb);
9580	off += offsetof(struct bpf_skb_data_end, data_end);
9581	insn++ = BPF_LDX_MEM(BPF_SIZEOF(void* *), si->dst_reg,
9582	si->src_reg, off);
9583	break;
9584
9585	case offsetof(struct __sk_buff, tc_index):
9586	#ifdef CONFIG_NET_SCHED
9587	if (type == BPF_WRITE)
9588	*insn++ = BPF_EMIT_STORE(BPF_H, si,
9589	bpf_target_off(struct sk_buff, tc_index, `2`,
9590	target_size));
9591	else
9592	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9593	bpf_target_off(struct sk_buff, tc_index, `2`,
9594	target_size));
9595	#else
9596	*target_size = `2`;
9597	if (type == BPF_WRITE)
9598	*insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
9599	else
9600	*insn++ = BPF_MOV64_IMM(si->dst_reg, `0`);
9601	#endif
9602	break;
9603
9604	case offsetof(struct __sk_buff, napi_id):
9605	#if defined(CONFIG_NET_RX_BUSY_POLL)
9606	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9607	bpf_target_off(struct sk_buff, napi_id, `4`,
9608	target_size));
9609	*insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, `1`);
9610	*insn++ = BPF_MOV64_IMM(si->dst_reg, `0`);
9611	#else
9612	*target_size = `4`;
9613	*insn++ = BPF_MOV64_IMM(si->dst_reg, `0`);
9614	#endif
9615	break;
9616	case offsetof(struct __sk_buff, family):
9617	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != `2`);
9618
9619	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, sk),
9620	si->dst_reg, si->src_reg,
9621	offsetof(struct sk_buff, sk));
9622	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9623	bpf_target_off(struct sock_common,
9624	skc_family,
9625	`2`, target_size));
9626	break;
9627	case offsetof(struct __sk_buff, remote_ip4):
9628	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != `4`);
9629
9630	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, sk),
9631	si->dst_reg, si->src_reg,
9632	offsetof(struct sk_buff, sk));
9633	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9634	bpf_target_off(struct sock_common,
9635	skc_daddr,
9636	`4`, target_size));
9637	break;
9638	case offsetof(struct __sk_buff, local_ip4):
9639	BUILD_BUG_ON(sizeof_field(struct sock_common,
9640	skc_rcv_saddr) != `4`);
9641
9642	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, sk),
9643	si->dst_reg, si->src_reg,
9644	offsetof(struct sk_buff, sk));
9645	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9646	bpf_target_off(struct sock_common,
9647	skc_rcv_saddr,
9648	`4`, target_size));
9649	break;
9650	case offsetof(struct __sk_buff, remote_ip6[`0`]) ...
9651	offsetof(struct __sk_buff, remote_ip6[`3`]):
9652	#if IS_ENABLED(CONFIG_IPV6)
9653	BUILD_BUG_ON(sizeof_field(struct sock_common,
9654	skc_v6_daddr.s6_addr32[`0`]) != `4`);
9655
9656	off = si->off;
9657	off -= offsetof(struct __sk_buff, remote_ip6[`0`]);
9658
9659	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, sk),
9660	si->dst_reg, si->src_reg,
9661	offsetof(struct sk_buff, sk));
9662	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9663	offsetof(struct sock_common,
9664	skc_v6_daddr.s6_addr32[`0`]) +
9665	off);
9666	#else
9667	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
9668	#endif
9669	break;
9670	case offsetof(struct __sk_buff, local_ip6[`0`]) ...
9671	offsetof(struct __sk_buff, local_ip6[`3`]):
9672	#if IS_ENABLED(CONFIG_IPV6)
9673	BUILD_BUG_ON(sizeof_field(struct sock_common,
9674	skc_v6_rcv_saddr.s6_addr32[`0`]) != `4`);
9675
9676	off = si->off;
9677	off -= offsetof(struct __sk_buff, local_ip6[`0`]);
9678
9679	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, sk),
9680	si->dst_reg, si->src_reg,
9681	offsetof(struct sk_buff, sk));
9682	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9683	offsetof(struct sock_common,
9684	skc_v6_rcv_saddr.s6_addr32[`0`]) +
9685	off);
9686	#else
9687	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
9688	#endif
9689	break;
9690
9691	case offsetof(struct __sk_buff, remote_port):
9692	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != `2`);
9693
9694	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, sk),
9695	si->dst_reg, si->src_reg,
9696	offsetof(struct sk_buff, sk));
9697	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9698	bpf_target_off(struct sock_common,
9699	skc_dport,
9700	`2`, target_size));
9701	#ifndef __BIG_ENDIAN_BITFIELD
9702	*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, `16`);
9703	#endif
9704	break;
9705
9706	case offsetof(struct __sk_buff, local_port):
9707	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != `2`);
9708
9709	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, sk),
9710	si->dst_reg, si->src_reg,
9711	offsetof(struct sk_buff, sk));
9712	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9713	bpf_target_off(struct sock_common,
9714	skc_num, `2`, target_size));
9715	break;
9716
9717	case offsetof(struct __sk_buff, tstamp):
9718	BUILD_BUG_ON(sizeof_field(struct sk_buff, tstamp) != `8`);
9719
9720	if (type == BPF_WRITE)
9721	insn = bpf_convert_tstamp_write(prog, si, insn);
9722	else
9723	insn = bpf_convert_tstamp_read(prog, si, insn);
9724	break;
9725
9726	case offsetof(struct __sk_buff, tstamp_type):
9727	insn = bpf_convert_tstamp_type_read(si, insn);
9728	break;
9729
9730	case offsetof(struct __sk_buff, gso_segs):
9731	insn = bpf_convert_shinfo_access(dst_reg: si->dst_reg, skb_reg: si->src_reg, insn);
9732	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* skb_shared_info, gso_segs),
9733	si->dst_reg, si->dst_reg,
9734	bpf_target_off(struct skb_shared_info,
9735	gso_segs, `2`,
9736	target_size));
9737	break;
9738	case offsetof(struct __sk_buff, gso_size):
9739	insn = bpf_convert_shinfo_access(dst_reg: si->dst_reg, skb_reg: si->src_reg, insn);
9740	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* skb_shared_info, gso_size),
9741	si->dst_reg, si->dst_reg,
9742	bpf_target_off(struct skb_shared_info,
9743	gso_size, `2`,
9744	target_size));
9745	break;
9746	case offsetof(struct __sk_buff, wire_len):
9747	BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, pkt_len) != `4`);
9748
9749	off = si->off;
9750	off -= offsetof(struct __sk_buff, wire_len);
9751	off += offsetof(struct sk_buff, cb);
9752	off += offsetof(struct qdisc_skb_cb, pkt_len);
9753	*target_size = `4`;
9754	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
9755	break;
9756
9757	case offsetof(struct __sk_buff, sk):
9758	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, sk),
9759	si->dst_reg, si->src_reg,
9760	offsetof(struct sk_buff, sk));
9761	break;
9762	case offsetof(struct __sk_buff, hwtstamp):
9763	BUILD_BUG_ON(sizeof_field(struct skb_shared_hwtstamps, hwtstamp) != `8`);
9764	BUILD_BUG_ON(offsetof(struct skb_shared_hwtstamps, hwtstamp) != `0`);
9765
9766	insn = bpf_convert_shinfo_access(dst_reg: si->dst_reg, skb_reg: si->src_reg, insn);
9767	*insn++ = BPF_LDX_MEM(BPF_DW,
9768	si->dst_reg, si->dst_reg,
9769	bpf_target_off(struct skb_shared_info,
9770	hwtstamps, `8`,
9771	target_size));
9772	break;
9773	}
9774
9775	return insn - insn_buf;
9776	}
9777
9778	u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
9779	const struct bpf_insn *si,
9780	struct bpf_insn *insn_buf,
9781	struct bpf_prog prog, u32 target_size)
9782	{
9783	struct bpf_insn *insn = insn_buf;
9784	int off;
9785
9786	switch (si->off) {
9787	case offsetof(struct bpf_sock, bound_dev_if):
9788	BUILD_BUG_ON(sizeof_field(struct sock, sk_bound_dev_if) != `4`);
9789
9790	if (type == BPF_WRITE)
9791	*insn++ = BPF_EMIT_STORE(BPF_W, si,
9792	offsetof(struct sock, sk_bound_dev_if));
9793	else
9794	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9795	offsetof(struct sock, sk_bound_dev_if));
9796	break;
9797
9798	case offsetof(struct bpf_sock, mark):
9799	BUILD_BUG_ON(sizeof_field(struct sock, sk_mark) != `4`);
9800
9801	if (type == BPF_WRITE)
9802	*insn++ = BPF_EMIT_STORE(BPF_W, si,
9803	offsetof(struct sock, sk_mark));
9804	else
9805	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9806	offsetof(struct sock, sk_mark));
9807	break;
9808
9809	case offsetof(struct bpf_sock, priority):
9810	BUILD_BUG_ON(sizeof_field(struct sock, sk_priority) != `4`);
9811
9812	if (type == BPF_WRITE)
9813	*insn++ = BPF_EMIT_STORE(BPF_W, si,
9814	offsetof(struct sock, sk_priority));
9815	else
9816	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9817	offsetof(struct sock, sk_priority));
9818	break;
9819
9820	case offsetof(struct bpf_sock, family):
9821	*insn++ = BPF_LDX_MEM(
9822	BPF_FIELD_SIZEOF(struct sock_common, skc_family),
9823	si->dst_reg, si->src_reg,
9824	bpf_target_off(struct sock_common,
9825	skc_family,
9826	sizeof_field(struct sock_common,
9827	skc_family),
9828	target_size));
9829	break;
9830
9831	case offsetof(struct bpf_sock, type):
9832	*insn++ = BPF_LDX_MEM(
9833	BPF_FIELD_SIZEOF(struct sock, sk_type),
9834	si->dst_reg, si->src_reg,
9835	bpf_target_off(struct sock, sk_type,
9836	sizeof_field(struct sock, sk_type),
9837	target_size));
9838	break;
9839
9840	case offsetof(struct bpf_sock, protocol):
9841	*insn++ = BPF_LDX_MEM(
9842	BPF_FIELD_SIZEOF(struct sock, sk_protocol),
9843	si->dst_reg, si->src_reg,
9844	bpf_target_off(struct sock, sk_protocol,
9845	sizeof_field(struct sock, sk_protocol),
9846	target_size));
9847	break;
9848
9849	case offsetof(struct bpf_sock, src_ip4):
9850	*insn++ = BPF_LDX_MEM(
9851	BPF_SIZE(si->code), si->dst_reg, si->src_reg,
9852	bpf_target_off(struct sock_common, skc_rcv_saddr,
9853	sizeof_field(struct sock_common,
9854	skc_rcv_saddr),
9855	target_size));
9856	break;
9857
9858	case offsetof(struct bpf_sock, dst_ip4):
9859	*insn++ = BPF_LDX_MEM(
9860	BPF_SIZE(si->code), si->dst_reg, si->src_reg,
9861	bpf_target_off(struct sock_common, skc_daddr,
9862	sizeof_field(struct sock_common,
9863	skc_daddr),
9864	target_size));
9865	break;
9866
9867	case bpf_ctx_range_till(struct bpf_sock, src_ip6[`0`], src_ip6[`3`]):
9868	#if IS_ENABLED(CONFIG_IPV6)
9869	off = si->off;
9870	off -= offsetof(struct bpf_sock, src_ip6[`0`]);
9871	*insn++ = BPF_LDX_MEM(
9872	BPF_SIZE(si->code), si->dst_reg, si->src_reg,
9873	bpf_target_off(
9874	struct sock_common,
9875	skc_v6_rcv_saddr.s6_addr32[`0`],
9876	sizeof_field(struct sock_common,
9877	skc_v6_rcv_saddr.s6_addr32[`0`]),
9878	target_size) + off);
9879	#else
9880	(void)off;
9881	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
9882	#endif
9883	break;
9884
9885	case bpf_ctx_range_till(struct bpf_sock, dst_ip6[`0`], dst_ip6[`3`]):
9886	#if IS_ENABLED(CONFIG_IPV6)
9887	off = si->off;
9888	off -= offsetof(struct bpf_sock, dst_ip6[`0`]);
9889	*insn++ = BPF_LDX_MEM(
9890	BPF_SIZE(si->code), si->dst_reg, si->src_reg,
9891	bpf_target_off(struct sock_common,
9892	skc_v6_daddr.s6_addr32[`0`],
9893	sizeof_field(struct sock_common,
9894	skc_v6_daddr.s6_addr32[`0`]),
9895	target_size) + off);
9896	#else
9897	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
9898	*target_size = `4`;
9899	#endif
9900	break;
9901
9902	case offsetof(struct bpf_sock, src_port):
9903	*insn++ = BPF_LDX_MEM(
9904	BPF_FIELD_SIZEOF(struct sock_common, skc_num),
9905	si->dst_reg, si->src_reg,
9906	bpf_target_off(struct sock_common, skc_num,
9907	sizeof_field(struct sock_common,
9908	skc_num),
9909	target_size));
9910	break;
9911
9912	case offsetof(struct bpf_sock, dst_port):
9913	*insn++ = BPF_LDX_MEM(
9914	BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
9915	si->dst_reg, si->src_reg,
9916	bpf_target_off(struct sock_common, skc_dport,
9917	sizeof_field(struct sock_common,
9918	skc_dport),
9919	target_size));
9920	break;
9921
9922	case offsetof(struct bpf_sock, state):
9923	*insn++ = BPF_LDX_MEM(
9924	BPF_FIELD_SIZEOF(struct sock_common, skc_state),
9925	si->dst_reg, si->src_reg,
9926	bpf_target_off(struct sock_common, skc_state,
9927	sizeof_field(struct sock_common,
9928	skc_state),
9929	target_size));
9930	break;
9931	case offsetof(struct bpf_sock, rx_queue_mapping):
9932	#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
9933	*insn++ = BPF_LDX_MEM(
9934	BPF_FIELD_SIZEOF(struct sock, sk_rx_queue_mapping),
9935	si->dst_reg, si->src_reg,
9936	bpf_target_off(struct sock, sk_rx_queue_mapping,
9937	sizeof_field(struct sock,
9938	sk_rx_queue_mapping),
9939	target_size));
9940	*insn++ = BPF_JMP_IMM(BPF_JNE, si->dst_reg, NO_QUEUE_MAPPING,
9941	`1`);
9942	*insn++ = BPF_MOV64_IMM(si->dst_reg, -`1`);
9943	#else
9944	*insn++ = BPF_MOV64_IMM(si->dst_reg, -`1`);
9945	*target_size = `2`;
9946	#endif
9947	break;
9948	}
9949
9950	return insn - insn_buf;
9951	}
9952
9953	static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
9954	const struct bpf_insn *si,
9955	struct bpf_insn *insn_buf,
9956	struct bpf_prog prog, u32 target_size)
9957	{
9958	struct bpf_insn *insn = insn_buf;
9959
9960	switch (si->off) {
9961	case offsetof(struct __sk_buff, ifindex):
9962	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, dev),
9963	si->dst_reg, si->src_reg,
9964	offsetof(struct sk_buff, dev));
9965	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9966	bpf_target_off(struct net_device, ifindex, `4`,
9967	target_size));
9968	break;
9969	default:
9970	return bpf_convert_ctx_access(type, si, insn_buf, prog,
9971	target_size);
9972	}
9973
9974	return insn - insn_buf;
9975	}
9976
9977	static u32 xdp_convert_ctx_access(enum bpf_access_type type,
9978	const struct bpf_insn *si,
9979	struct bpf_insn *insn_buf,
9980	struct bpf_prog prog, u32 target_size)
9981	{
9982	struct bpf_insn *insn = insn_buf;
9983
9984	switch (si->off) {
9985	case offsetof(struct xdp_md, data):
9986	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* xdp_buff, data),
9987	si->dst_reg, si->src_reg,
9988	offsetof(struct xdp_buff, data));
9989	break;
9990	case offsetof(struct xdp_md, data_meta):
9991	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* xdp_buff, data_meta),
9992	si->dst_reg, si->src_reg,
9993	offsetof(struct xdp_buff, data_meta));
9994	break;
9995	case offsetof(struct xdp_md, data_end):
9996	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* xdp_buff, data_end),
9997	si->dst_reg, si->src_reg,
9998	offsetof(struct xdp_buff, data_end));
9999	break;
10000	case offsetof(struct xdp_md, ingress_ifindex):
10001	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* xdp_buff, rxq),
10002	si->dst_reg, si->src_reg,
10003	offsetof(struct xdp_buff, rxq));
10004	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* xdp_rxq_info, dev),
10005	si->dst_reg, si->dst_reg,
10006	offsetof(struct xdp_rxq_info, dev));
10007	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10008	offsetof(struct net_device, ifindex));
10009	break;
10010	case offsetof(struct xdp_md, rx_queue_index):
10011	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* xdp_buff, rxq),
10012	si->dst_reg, si->src_reg,
10013	offsetof(struct xdp_buff, rxq));
10014	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10015	offsetof(struct xdp_rxq_info,
10016	queue_index));
10017	break;
10018	case offsetof(struct xdp_md, egress_ifindex):
10019	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* xdp_buff, txq),
10020	si->dst_reg, si->src_reg,
10021	offsetof(struct xdp_buff, txq));
10022	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* xdp_txq_info, dev),
10023	si->dst_reg, si->dst_reg,
10024	offsetof(struct xdp_txq_info, dev));
10025	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10026	offsetof(struct net_device, ifindex));
10027	break;
10028	}
10029
10030	return insn - insn_buf;
10031	}
10032
10033	/ SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of*
10034	* context Structure, F is Field in context structure that contains a pointer
10035	* to Nested Structure of type NS that has the field NF.
10036	*
10037	* SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
10038	* sure that SIZE is not greater than actual size of S.F.NF.
10039	*
10040	* If offset OFF is provided, the load happens from that offset relative to
10041	* offset of NF.
10042	*/
10043	#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF) \
10044	do { \
10045	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg, \
10046	si->src_reg, offsetof(S, F)); \
10047	*insn++ = BPF_LDX_MEM( \
10048	SIZE, si->dst_reg, si->dst_reg, \
10049	bpf_target_off(NS, NF, sizeof_field(NS, NF), \
10050	target_size) \
10051	+ OFF); \
10052	} while (0)
10053
10054	#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF) \
10055	SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, \
10056	BPF_FIELD_SIZEOF(NS, NF), 0)
10057
10058	/ SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to*
10059	* SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
10060	*
10061	* In addition it uses Temporary Field TF (member of struct S) as the 3rd
10062	* "register" since two registers available in convert_ctx_access are not
10063	* enough: we can't override neither SRC, since it contains value to store, nor
10064	* DST since it contains pointer to context that may be used by later
10065	* instructions. But we need a temporary place to save pointer to nested
10066	* structure whose field we want to store to.
10067	*/
10068	#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF) \
10069	do { \
10070	int tmp_reg = BPF_REG_9; \
10071	if (si->src_reg == tmp_reg \|\| si->dst_reg == tmp_reg) \
10072	--tmp_reg; \
10073	if (si->src_reg == tmp_reg \|\| si->dst_reg == tmp_reg) \
10074	--tmp_reg; \
10075	*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg, \
10076	offsetof(S, TF)); \
10077	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
10078	si->dst_reg, offsetof(S, F)); \
10079	*insn++ = BPF_RAW_INSN(SIZE \| BPF_MEM \| BPF_CLASS(si->code), \
10080	tmp_reg, si->src_reg, \
10081	bpf_target_off(NS, NF, sizeof_field(NS, NF), \
10082	target_size) \
10083	+ OFF, \
10084	si->imm); \
10085	*insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
10086	offsetof(S, TF)); \
10087	} while (0)
10088
10089	#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
10090	TF) \
10091	do { \
10092	if (type == BPF_WRITE) { \
10093	SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, \
10094	OFF, TF); \
10095	} else { \
10096	SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
10097	S, NS, F, NF, SIZE, OFF); \
10098	} \
10099	} while (0)
10100
10101	#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \
10102	SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \
10103	S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
10104
10105	static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
10106	const struct bpf_insn *si,
10107	struct bpf_insn *insn_buf,
10108	struct bpf_prog prog, u32 target_size)
10109	{
10110	int off, port_size = sizeof_field(struct sockaddr_in6, sin6_port);
10111	struct bpf_insn *insn = insn_buf;
10112
10113	switch (si->off) {
10114	case offsetof(struct bpf_sock_addr, user_family):
10115	SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
10116	struct sockaddr, uaddr, sa_family);
10117	break;
10118
10119	case offsetof(struct bpf_sock_addr, user_ip4):
10120	SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
10121	struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
10122	sin_addr, BPF_SIZE(si->code), `0`, tmp_reg);
10123	break;
10124
10125	case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[`0`], user_ip6[`3`]):
10126	off = si->off;
10127	off -= offsetof(struct bpf_sock_addr, user_ip6[`0`]);
10128	SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
10129	struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
10130	sin6_addr.s6_addr32[`0`], BPF_SIZE(si->code), off,
10131	tmp_reg);
10132	break;
10133
10134	case offsetof(struct bpf_sock_addr, user_port):
10135	/ To get port we need to know sa_family first and then treat*
10136	* sockaddr as either sockaddr_in or sockaddr_in6.
10137	* Though we can simplify since port field has same offset and
10138	* size in both structures.
10139	* Here we check this invariant and use just one of the
10140	* structures if it's true.
10141	*/
10142	BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
10143	offsetof(struct sockaddr_in6, sin6_port));
10144	BUILD_BUG_ON(sizeof_field(struct sockaddr_in, sin_port) !=
10145	sizeof_field(struct sockaddr_in6, sin6_port));
10146	/ Account for sin6_port being smaller than user_port. /
10147	port_size = min(port_size, BPF_LDST_BYTES(si));
10148	SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
10149	struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
10150	sin6_port, bytes_to_bpf_size(port_size), `0`, tmp_reg);
10151	break;
10152
10153	case offsetof(struct bpf_sock_addr, family):
10154	SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
10155	struct sock, sk, sk_family);
10156	break;
10157
10158	case offsetof(struct bpf_sock_addr, type):
10159	SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
10160	struct sock, sk, sk_type);
10161	break;
10162
10163	case offsetof(struct bpf_sock_addr, protocol):
10164	SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
10165	struct sock, sk, sk_protocol);
10166	break;
10167
10168	case offsetof(struct bpf_sock_addr, msg_src_ip4):
10169	/ Treat t_ctx as struct in_addr for msg_src_ip4. /
10170	SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
10171	struct bpf_sock_addr_kern, struct in_addr, t_ctx,
10172	s_addr, BPF_SIZE(si->code), `0`, tmp_reg);
10173	break;
10174
10175	case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[`0`],
10176	msg_src_ip6[`3`]):
10177	off = si->off;
10178	off -= offsetof(struct bpf_sock_addr, msg_src_ip6[`0`]);
10179	/ Treat t_ctx as struct in6_addr for msg_src_ip6. /
10180	SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
10181	struct bpf_sock_addr_kern, struct in6_addr, t_ctx,
10182	s6_addr32[`0`], BPF_SIZE(si->code), off, tmp_reg);
10183	break;
10184	case offsetof(struct bpf_sock_addr, sk):
10185	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_sock_addr_kern, sk),
10186	si->dst_reg, si->src_reg,
10187	offsetof(struct bpf_sock_addr_kern, sk));
10188	break;
10189	}
10190
10191	return insn - insn_buf;
10192	}
10193
10194	static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
10195	const struct bpf_insn *si,
10196	struct bpf_insn *insn_buf,
10197	struct bpf_prog *prog,
10198	u32 *target_size)
10199	{
10200	struct bpf_insn *insn = insn_buf;
10201	int off;
10202
10203	/ Helper macro for adding read access to tcp_sock or sock fields. /
10204	#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
10205	do { \
10206	int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 2; \
10207	BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \
10208	sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \
10209	if (si->dst_reg == reg \|\| si->src_reg == reg) \
10210	reg--; \
10211	if (si->dst_reg == reg \|\| si->src_reg == reg) \
10212	reg--; \
10213	if (si->dst_reg == si->src_reg) { \
10214	*insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \
10215	offsetof(struct bpf_sock_ops_kern, \
10216	temp)); \
10217	fullsock_reg = reg; \
10218	jmp += 2; \
10219	} \
10220	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10221	struct bpf_sock_ops_kern, \
10222	is_fullsock), \
10223	fullsock_reg, si->src_reg, \
10224	offsetof(struct bpf_sock_ops_kern, \
10225	is_fullsock)); \
10226	*insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \
10227	if (si->dst_reg == si->src_reg) \
10228	*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
10229	offsetof(struct bpf_sock_ops_kern, \
10230	temp)); \
10231	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10232	struct bpf_sock_ops_kern, sk),\
10233	si->dst_reg, si->src_reg, \
10234	offsetof(struct bpf_sock_ops_kern, sk));\
10235	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
10236	OBJ_FIELD), \
10237	si->dst_reg, si->dst_reg, \
10238	offsetof(OBJ, OBJ_FIELD)); \
10239	if (si->dst_reg == si->src_reg) { \
10240	*insn++ = BPF_JMP_A(1); \
10241	*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
10242	offsetof(struct bpf_sock_ops_kern, \
10243	temp)); \
10244	} \
10245	} while (0)
10246
10247	#define SOCK_OPS_GET_SK() \
10248	do { \
10249	int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 1; \
10250	if (si->dst_reg == reg \|\| si->src_reg == reg) \
10251	reg--; \
10252	if (si->dst_reg == reg \|\| si->src_reg == reg) \
10253	reg--; \
10254	if (si->dst_reg == si->src_reg) { \
10255	*insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \
10256	offsetof(struct bpf_sock_ops_kern, \
10257	temp)); \
10258	fullsock_reg = reg; \
10259	jmp += 2; \
10260	} \
10261	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10262	struct bpf_sock_ops_kern, \
10263	is_fullsock), \
10264	fullsock_reg, si->src_reg, \
10265	offsetof(struct bpf_sock_ops_kern, \
10266	is_fullsock)); \
10267	*insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \
10268	if (si->dst_reg == si->src_reg) \
10269	*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
10270	offsetof(struct bpf_sock_ops_kern, \
10271	temp)); \
10272	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10273	struct bpf_sock_ops_kern, sk),\
10274	si->dst_reg, si->src_reg, \
10275	offsetof(struct bpf_sock_ops_kern, sk));\
10276	if (si->dst_reg == si->src_reg) { \
10277	*insn++ = BPF_JMP_A(1); \
10278	*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
10279	offsetof(struct bpf_sock_ops_kern, \
10280	temp)); \
10281	} \
10282	} while (0)
10283
10284	#define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
10285	SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock)
10286
10287	/ Helper macro for adding write access to tcp_sock or sock fields.*
10288	* The macro is called with two registers, dst_reg which contains a pointer
10289	* to ctx (context) and src_reg which contains the value that should be
10290	* stored. However, we need an additional register since we cannot overwrite
10291	* dst_reg because it may be used later in the program.
10292	* Instead we "borrow" one of the other register. We first save its value
10293	* into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
10294	* it at the end of the macro.
10295	*/
10296	#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
10297	do { \
10298	int reg = BPF_REG_9; \
10299	BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \
10300	sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \
10301	if (si->dst_reg == reg \|\| si->src_reg == reg) \
10302	reg--; \
10303	if (si->dst_reg == reg \|\| si->src_reg == reg) \
10304	reg--; \
10305	*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
10306	offsetof(struct bpf_sock_ops_kern, \
10307	temp)); \
10308	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10309	struct bpf_sock_ops_kern, \
10310	is_fullsock), \
10311	reg, si->dst_reg, \
10312	offsetof(struct bpf_sock_ops_kern, \
10313	is_fullsock)); \
10314	*insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
10315	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10316	struct bpf_sock_ops_kern, sk),\
10317	reg, si->dst_reg, \
10318	offsetof(struct bpf_sock_ops_kern, sk));\
10319	*insn++ = BPF_RAW_INSN(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD) \| \
10320	BPF_MEM \| BPF_CLASS(si->code), \
10321	reg, si->src_reg, \
10322	offsetof(OBJ, OBJ_FIELD), \
10323	si->imm); \
10324	*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
10325	offsetof(struct bpf_sock_ops_kern, \
10326	temp)); \
10327	} while (0)
10328
10329	#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
10330	do { \
10331	if (TYPE == BPF_WRITE) \
10332	SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
10333	else \
10334	SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
10335	} while (0)
10336
10337	switch (si->off) {
10338	case offsetof(struct bpf_sock_ops, op):
10339	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_sock_ops_kern,
10340	op),
10341	si->dst_reg, si->src_reg,
10342	offsetof(struct bpf_sock_ops_kern, op));
10343	break;
10344
10345	case offsetof(struct bpf_sock_ops, replylong[`0`]) ...
10346	offsetof(struct bpf_sock_ops, replylong[`3`]):
10347	BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, reply) !=
10348	sizeof_field(struct bpf_sock_ops_kern, reply));
10349	BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, replylong) !=
10350	sizeof_field(struct bpf_sock_ops_kern, replylong));
10351	off = si->off;
10352	off -= offsetof(struct bpf_sock_ops, replylong[`0`]);
10353	off += offsetof(struct bpf_sock_ops_kern, replylong[`0`]);
10354	if (type == BPF_WRITE)
10355	*insn++ = BPF_EMIT_STORE(BPF_W, si, off);
10356	else
10357	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
10358	off);
10359	break;
10360
10361	case offsetof(struct bpf_sock_ops, family):
10362	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != `2`);
10363
10364	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10365	struct bpf_sock_ops_kern, sk),
10366	si->dst_reg, si->src_reg,
10367	offsetof(struct bpf_sock_ops_kern, sk));
10368	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10369	offsetof(struct sock_common, skc_family));
10370	break;
10371
10372	case offsetof(struct bpf_sock_ops, remote_ip4):
10373	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != `4`);
10374
10375	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10376	struct bpf_sock_ops_kern, sk),
10377	si->dst_reg, si->src_reg,
10378	offsetof(struct bpf_sock_ops_kern, sk));
10379	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10380	offsetof(struct sock_common, skc_daddr));
10381	break;
10382
10383	case offsetof(struct bpf_sock_ops, local_ip4):
10384	BUILD_BUG_ON(sizeof_field(struct sock_common,
10385	skc_rcv_saddr) != `4`);
10386
10387	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10388	struct bpf_sock_ops_kern, sk),
10389	si->dst_reg, si->src_reg,
10390	offsetof(struct bpf_sock_ops_kern, sk));
10391	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10392	offsetof(struct sock_common,
10393	skc_rcv_saddr));
10394	break;
10395
10396	case offsetof(struct bpf_sock_ops, remote_ip6[`0`]) ...
10397	offsetof(struct bpf_sock_ops, remote_ip6[`3`]):
10398	#if IS_ENABLED(CONFIG_IPV6)
10399	BUILD_BUG_ON(sizeof_field(struct sock_common,
10400	skc_v6_daddr.s6_addr32[`0`]) != `4`);
10401
10402	off = si->off;
10403	off -= offsetof(struct bpf_sock_ops, remote_ip6[`0`]);
10404	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10405	struct bpf_sock_ops_kern, sk),
10406	si->dst_reg, si->src_reg,
10407	offsetof(struct bpf_sock_ops_kern, sk));
10408	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10409	offsetof(struct sock_common,
10410	skc_v6_daddr.s6_addr32[`0`]) +
10411	off);
10412	#else
10413	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
10414	#endif
10415	break;
10416
10417	case offsetof(struct bpf_sock_ops, local_ip6[`0`]) ...
10418	offsetof(struct bpf_sock_ops, local_ip6[`3`]):
10419	#if IS_ENABLED(CONFIG_IPV6)
10420	BUILD_BUG_ON(sizeof_field(struct sock_common,
10421	skc_v6_rcv_saddr.s6_addr32[`0`]) != `4`);
10422
10423	off = si->off;
10424	off -= offsetof(struct bpf_sock_ops, local_ip6[`0`]);
10425	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10426	struct bpf_sock_ops_kern, sk),
10427	si->dst_reg, si->src_reg,
10428	offsetof(struct bpf_sock_ops_kern, sk));
10429	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10430	offsetof(struct sock_common,
10431	skc_v6_rcv_saddr.s6_addr32[`0`]) +
10432	off);
10433	#else
10434	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
10435	#endif
10436	break;
10437
10438	case offsetof(struct bpf_sock_ops, remote_port):
10439	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != `2`);
10440
10441	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10442	struct bpf_sock_ops_kern, sk),
10443	si->dst_reg, si->src_reg,
10444	offsetof(struct bpf_sock_ops_kern, sk));
10445	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10446	offsetof(struct sock_common, skc_dport));
10447	#ifndef __BIG_ENDIAN_BITFIELD
10448	*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, `16`);
10449	#endif
10450	break;
10451
10452	case offsetof(struct bpf_sock_ops, local_port):
10453	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != `2`);
10454
10455	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10456	struct bpf_sock_ops_kern, sk),
10457	si->dst_reg, si->src_reg,
10458	offsetof(struct bpf_sock_ops_kern, sk));
10459	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10460	offsetof(struct sock_common, skc_num));
10461	break;
10462
10463	case offsetof(struct bpf_sock_ops, is_fullsock):
10464	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10465	struct bpf_sock_ops_kern,
10466	is_fullsock),
10467	si->dst_reg, si->src_reg,
10468	offsetof(struct bpf_sock_ops_kern,
10469	is_fullsock));
10470	break;
10471
10472	case offsetof(struct bpf_sock_ops, state):
10473	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != `1`);
10474
10475	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10476	struct bpf_sock_ops_kern, sk),
10477	si->dst_reg, si->src_reg,
10478	offsetof(struct bpf_sock_ops_kern, sk));
10479	*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
10480	offsetof(struct sock_common, skc_state));
10481	break;
10482
10483	case offsetof(struct bpf_sock_ops, rtt_min):
10484	BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
10485	sizeof(struct minmax));
10486	BUILD_BUG_ON(sizeof(struct minmax) <
10487	sizeof(struct minmax_sample));
10488
10489	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10490	struct bpf_sock_ops_kern, sk),
10491	si->dst_reg, si->src_reg,
10492	offsetof(struct bpf_sock_ops_kern, sk));
10493	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10494	offsetof(struct tcp_sock, rtt_min) +
10495	sizeof_field(struct minmax_sample, t));
10496	break;
10497
10498	case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
10499	SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
10500	struct tcp_sock);
10501	break;
10502
10503	case offsetof(struct bpf_sock_ops, sk_txhash):
10504	SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
10505	struct sock, type);
10506	break;
10507	case offsetof(struct bpf_sock_ops, snd_cwnd):
10508	SOCK_OPS_GET_TCP_SOCK_FIELD(snd_cwnd);
10509	break;
10510	case offsetof(struct bpf_sock_ops, srtt_us):
10511	SOCK_OPS_GET_TCP_SOCK_FIELD(srtt_us);
10512	break;
10513	case offsetof(struct bpf_sock_ops, snd_ssthresh):
10514	SOCK_OPS_GET_TCP_SOCK_FIELD(snd_ssthresh);
10515	break;
10516	case offsetof(struct bpf_sock_ops, rcv_nxt):
10517	SOCK_OPS_GET_TCP_SOCK_FIELD(rcv_nxt);
10518	break;
10519	case offsetof(struct bpf_sock_ops, snd_nxt):
10520	SOCK_OPS_GET_TCP_SOCK_FIELD(snd_nxt);
10521	break;
10522	case offsetof(struct bpf_sock_ops, snd_una):
10523	SOCK_OPS_GET_TCP_SOCK_FIELD(snd_una);
10524	break;
10525	case offsetof(struct bpf_sock_ops, mss_cache):
10526	SOCK_OPS_GET_TCP_SOCK_FIELD(mss_cache);
10527	break;
10528	case offsetof(struct bpf_sock_ops, ecn_flags):
10529	SOCK_OPS_GET_TCP_SOCK_FIELD(ecn_flags);
10530	break;
10531	case offsetof(struct bpf_sock_ops, rate_delivered):
10532	SOCK_OPS_GET_TCP_SOCK_FIELD(rate_delivered);
10533	break;
10534	case offsetof(struct bpf_sock_ops, rate_interval_us):
10535	SOCK_OPS_GET_TCP_SOCK_FIELD(rate_interval_us);
10536	break;
10537	case offsetof(struct bpf_sock_ops, packets_out):
10538	SOCK_OPS_GET_TCP_SOCK_FIELD(packets_out);
10539	break;
10540	case offsetof(struct bpf_sock_ops, retrans_out):
10541	SOCK_OPS_GET_TCP_SOCK_FIELD(retrans_out);
10542	break;
10543	case offsetof(struct bpf_sock_ops, total_retrans):
10544	SOCK_OPS_GET_TCP_SOCK_FIELD(total_retrans);
10545	break;
10546	case offsetof(struct bpf_sock_ops, segs_in):
10547	SOCK_OPS_GET_TCP_SOCK_FIELD(segs_in);
10548	break;
10549	case offsetof(struct bpf_sock_ops, data_segs_in):
10550	SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_in);
10551	break;
10552	case offsetof(struct bpf_sock_ops, segs_out):
10553	SOCK_OPS_GET_TCP_SOCK_FIELD(segs_out);
10554	break;
10555	case offsetof(struct bpf_sock_ops, data_segs_out):
10556	SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_out);
10557	break;
10558	case offsetof(struct bpf_sock_ops, lost_out):
10559	SOCK_OPS_GET_TCP_SOCK_FIELD(lost_out);
10560	break;
10561	case offsetof(struct bpf_sock_ops, sacked_out):
10562	SOCK_OPS_GET_TCP_SOCK_FIELD(sacked_out);
10563	break;
10564	case offsetof(struct bpf_sock_ops, bytes_received):
10565	SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_received);
10566	break;
10567	case offsetof(struct bpf_sock_ops, bytes_acked):
10568	SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_acked);
10569	break;
10570	case offsetof(struct bpf_sock_ops, sk):
10571	SOCK_OPS_GET_SK();
10572	break;
10573	case offsetof(struct bpf_sock_ops, skb_data_end):
10574	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_sock_ops_kern,
10575	skb_data_end),
10576	si->dst_reg, si->src_reg,
10577	offsetof(struct bpf_sock_ops_kern,
10578	skb_data_end));
10579	break;
10580	case offsetof(struct bpf_sock_ops, skb_data):
10581	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_sock_ops_kern,
10582	skb),
10583	si->dst_reg, si->src_reg,
10584	offsetof(struct bpf_sock_ops_kern,
10585	skb));
10586	*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, `0`, `1`);
10587	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, data),
10588	si->dst_reg, si->dst_reg,
10589	offsetof(struct sk_buff, data));
10590	break;
10591	case offsetof(struct bpf_sock_ops, skb_len):
10592	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_sock_ops_kern,
10593	skb),
10594	si->dst_reg, si->src_reg,
10595	offsetof(struct bpf_sock_ops_kern,
10596	skb));
10597	*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, `0`, `1`);
10598	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, len),
10599	si->dst_reg, si->dst_reg,
10600	offsetof(struct sk_buff, len));
10601	break;
10602	case offsetof(struct bpf_sock_ops, skb_tcp_flags):
10603	off = offsetof(struct sk_buff, cb);
10604	off += offsetof(struct tcp_skb_cb, tcp_flags);
10605	target_size = sizeof_field(struct* tcp_skb_cb, tcp_flags);
10606	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_sock_ops_kern,
10607	skb),
10608	si->dst_reg, si->src_reg,
10609	offsetof(struct bpf_sock_ops_kern,
10610	skb));
10611	*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, `0`, `1`);
10612	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* tcp_skb_cb,
10613	tcp_flags),
10614	si->dst_reg, si->dst_reg, off);
10615	break;
10616	case offsetof(struct bpf_sock_ops, skb_hwtstamp): {
10617	struct bpf_insn *jmp_on_null_skb;
10618
10619	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_sock_ops_kern,
10620	skb),
10621	si->dst_reg, si->src_reg,
10622	offsetof(struct bpf_sock_ops_kern,
10623	skb));
10624	/ Reserve one insn to test skb == NULL /
10625	jmp_on_null_skb = insn++;
10626	insn = bpf_convert_shinfo_access(dst_reg: si->dst_reg, skb_reg: si->dst_reg, insn);
10627	*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
10628	bpf_target_off(struct skb_shared_info,
10629	hwtstamps, `8`,
10630	target_size));
10631	*jmp_on_null_skb = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, `0`,
10632	insn - jmp_on_null_skb - `1`);
10633	break;
10634	}
10635	}
10636	return insn - insn_buf;
10637	}
10638
10639	/ data_end = skb->data + skb_headlen() /
10640	static struct bpf_insn bpf_convert_data_end_access(const* struct bpf_insn *si,
10641	struct bpf_insn *insn)
10642	{
10643	int reg;
10644	int temp_reg_off = offsetof(struct sk_buff, cb) +
10645	offsetof(struct sk_skb_cb, temp_reg);
10646
10647	if (si->src_reg == si->dst_reg) {
10648	/ We need an extra register, choose and save a register. /
10649	reg = BPF_REG_9;
10650	if (si->src_reg == reg \|\| si->dst_reg == reg)
10651	reg--;
10652	if (si->src_reg == reg \|\| si->dst_reg == reg)
10653	reg--;
10654	*insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, temp_reg_off);
10655	} else {
10656	reg = si->dst_reg;
10657	}
10658
10659	/ reg = skb->data /
10660	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, data),
10661	reg, si->src_reg,
10662	offsetof(struct sk_buff, data));
10663	/ AX = skb->len /
10664	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, len),
10665	BPF_REG_AX, si->src_reg,
10666	offsetof(struct sk_buff, len));
10667	/ reg = skb->data + skb->len /
10668	*insn++ = BPF_ALU64_REG(BPF_ADD, reg, BPF_REG_AX);
10669	/ AX = skb->data_len /
10670	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, data_len),
10671	BPF_REG_AX, si->src_reg,
10672	offsetof(struct sk_buff, data_len));
10673
10674	/ reg = skb->data + skb->len - skb->data_len /
10675	*insn++ = BPF_ALU64_REG(BPF_SUB, reg, BPF_REG_AX);
10676
10677	if (si->src_reg == si->dst_reg) {
10678	/ Restore the saved register /
10679	*insn++ = BPF_MOV64_REG(BPF_REG_AX, si->src_reg);
10680	*insn++ = BPF_MOV64_REG(si->dst_reg, reg);
10681	*insn++ = BPF_LDX_MEM(BPF_DW, reg, BPF_REG_AX, temp_reg_off);
10682	}
10683
10684	return insn;
10685	}
10686
10687	static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
10688	const struct bpf_insn *si,
10689	struct bpf_insn *insn_buf,
10690	struct bpf_prog prog, u32 target_size)
10691	{
10692	struct bpf_insn *insn = insn_buf;
10693	int off;
10694
10695	switch (si->off) {
10696	case offsetof(struct __sk_buff, data_end):
10697	insn = bpf_convert_data_end_access(si, insn);
10698	break;
10699	case offsetof(struct __sk_buff, cb[`0`]) ...
10700	offsetofend(struct __sk_buff, cb[`4`]) - `1`:
10701	BUILD_BUG_ON(sizeof_field(struct sk_skb_cb, data) < `20`);
10702	BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
10703	offsetof(struct sk_skb_cb, data)) %
10704	sizeof(__u64));
10705
10706	prog->cb_access = `1`;
10707	off = si->off;
10708	off -= offsetof(struct __sk_buff, cb[`0`]);
10709	off += offsetof(struct sk_buff, cb);
10710	off += offsetof(struct sk_skb_cb, data);
10711	if (type == BPF_WRITE)
10712	*insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off);
10713	else
10714	*insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
10715	si->src_reg, off);
10716	break;
10717
10718
10719	default:
10720	return bpf_convert_ctx_access(type, si, insn_buf, prog,
10721	target_size);
10722	}
10723
10724	return insn - insn_buf;
10725	}
10726
10727	static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
10728	const struct bpf_insn *si,
10729	struct bpf_insn *insn_buf,
10730	struct bpf_prog prog, u32 target_size)
10731	{
10732	struct bpf_insn *insn = insn_buf;
10733	#if IS_ENABLED(CONFIG_IPV6)
10734	int off;
10735	#endif
10736
10737	/ convert ctx uses the fact sg element is first in struct /
10738	BUILD_BUG_ON(offsetof(struct sk_msg, sg) != `0`);
10739
10740	switch (si->off) {
10741	case offsetof(struct sk_msg_md, data):
10742	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_msg, data),
10743	si->dst_reg, si->src_reg,
10744	offsetof(struct sk_msg, data));
10745	break;
10746	case offsetof(struct sk_msg_md, data_end):
10747	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_msg, data_end),
10748	si->dst_reg, si->src_reg,
10749	offsetof(struct sk_msg, data_end));
10750	break;
10751	case offsetof(struct sk_msg_md, family):
10752	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != `2`);
10753
10754	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10755	struct sk_msg, sk),
10756	si->dst_reg, si->src_reg,
10757	offsetof(struct sk_msg, sk));
10758	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10759	offsetof(struct sock_common, skc_family));
10760	break;
10761
10762	case offsetof(struct sk_msg_md, remote_ip4):
10763	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != `4`);
10764
10765	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10766	struct sk_msg, sk),
10767	si->dst_reg, si->src_reg,
10768	offsetof(struct sk_msg, sk));
10769	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10770	offsetof(struct sock_common, skc_daddr));
10771	break;
10772
10773	case offsetof(struct sk_msg_md, local_ip4):
10774	BUILD_BUG_ON(sizeof_field(struct sock_common,
10775	skc_rcv_saddr) != `4`);
10776
10777	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10778	struct sk_msg, sk),
10779	si->dst_reg, si->src_reg,
10780	offsetof(struct sk_msg, sk));
10781	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10782	offsetof(struct sock_common,
10783	skc_rcv_saddr));
10784	break;
10785
10786	case offsetof(struct sk_msg_md, remote_ip6[`0`]) ...
10787	offsetof(struct sk_msg_md, remote_ip6[`3`]):
10788	#if IS_ENABLED(CONFIG_IPV6)
10789	BUILD_BUG_ON(sizeof_field(struct sock_common,
10790	skc_v6_daddr.s6_addr32[`0`]) != `4`);
10791
10792	off = si->off;
10793	off -= offsetof(struct sk_msg_md, remote_ip6[`0`]);
10794	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10795	struct sk_msg, sk),
10796	si->dst_reg, si->src_reg,
10797	offsetof(struct sk_msg, sk));
10798	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10799	offsetof(struct sock_common,
10800	skc_v6_daddr.s6_addr32[`0`]) +
10801	off);
10802	#else
10803	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
10804	#endif
10805	break;
10806
10807	case offsetof(struct sk_msg_md, local_ip6[`0`]) ...
10808	offsetof(struct sk_msg_md, local_ip6[`3`]):
10809	#if IS_ENABLED(CONFIG_IPV6)
10810	BUILD_BUG_ON(sizeof_field(struct sock_common,
10811	skc_v6_rcv_saddr.s6_addr32[`0`]) != `4`);
10812
10813	off = si->off;
10814	off -= offsetof(struct sk_msg_md, local_ip6[`0`]);
10815	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10816	struct sk_msg, sk),
10817	si->dst_reg, si->src_reg,
10818	offsetof(struct sk_msg, sk));
10819	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10820	offsetof(struct sock_common,
10821	skc_v6_rcv_saddr.s6_addr32[`0`]) +
10822	off);
10823	#else
10824	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
10825	#endif
10826	break;
10827
10828	case offsetof(struct sk_msg_md, remote_port):
10829	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != `2`);
10830
10831	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10832	struct sk_msg, sk),
10833	si->dst_reg, si->src_reg,
10834	offsetof(struct sk_msg, sk));
10835	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10836	offsetof(struct sock_common, skc_dport));
10837	#ifndef __BIG_ENDIAN_BITFIELD
10838	*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, `16`);
10839	#endif
10840	break;
10841
10842	case offsetof(struct sk_msg_md, local_port):
10843	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != `2`);
10844
10845	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10846	struct sk_msg, sk),
10847	si->dst_reg, si->src_reg,
10848	offsetof(struct sk_msg, sk));
10849	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10850	offsetof(struct sock_common, skc_num));
10851	break;
10852
10853	case offsetof(struct sk_msg_md, size):
10854	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_msg_sg, size),
10855	si->dst_reg, si->src_reg,
10856	offsetof(struct sk_msg_sg, size));
10857	break;
10858
10859	case offsetof(struct sk_msg_md, sk):
10860	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_msg, sk),
10861	si->dst_reg, si->src_reg,
10862	offsetof(struct sk_msg, sk));
10863	break;
10864	}
10865
10866	return insn - insn_buf;
10867	}
10868
10869	const struct bpf_verifier_ops sk_filter_verifier_ops = {
10870	.get_func_proto = sk_filter_func_proto,
10871	.is_valid_access = sk_filter_is_valid_access,
10872	.convert_ctx_access = bpf_convert_ctx_access,
10873	.gen_ld_abs = bpf_gen_ld_abs,
10874	};
10875
10876	const struct bpf_prog_ops sk_filter_prog_ops = {
10877	.test_run = bpf_prog_test_run_skb,
10878	};
10879
10880	const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
10881	.get_func_proto = tc_cls_act_func_proto,
10882	.is_valid_access = tc_cls_act_is_valid_access,
10883	.convert_ctx_access = tc_cls_act_convert_ctx_access,
10884	.gen_prologue = tc_cls_act_prologue,
10885	.gen_ld_abs = bpf_gen_ld_abs,
10886	.btf_struct_access = tc_cls_act_btf_struct_access,
10887	};
10888
10889	const struct bpf_prog_ops tc_cls_act_prog_ops = {
10890	.test_run = bpf_prog_test_run_skb,
10891	};
10892
10893	const struct bpf_verifier_ops xdp_verifier_ops = {
10894	.get_func_proto = xdp_func_proto,
10895	.is_valid_access = xdp_is_valid_access,
10896	.convert_ctx_access = xdp_convert_ctx_access,
10897	.gen_prologue = bpf_noop_prologue,
10898	.btf_struct_access = xdp_btf_struct_access,
10899	};
10900
10901	const struct bpf_prog_ops xdp_prog_ops = {
10902	.test_run = bpf_prog_test_run_xdp,
10903	};
10904
10905	const struct bpf_verifier_ops cg_skb_verifier_ops = {
10906	.get_func_proto = cg_skb_func_proto,
10907	.is_valid_access = cg_skb_is_valid_access,
10908	.convert_ctx_access = bpf_convert_ctx_access,
10909	};
10910
10911	const struct bpf_prog_ops cg_skb_prog_ops = {
10912	.test_run = bpf_prog_test_run_skb,
10913	};
10914
10915	const struct bpf_verifier_ops lwt_in_verifier_ops = {
10916	.get_func_proto = lwt_in_func_proto,
10917	.is_valid_access = lwt_is_valid_access,
10918	.convert_ctx_access = bpf_convert_ctx_access,
10919	};
10920
10921	const struct bpf_prog_ops lwt_in_prog_ops = {
10922	.test_run = bpf_prog_test_run_skb,
10923	};
10924
10925	const struct bpf_verifier_ops lwt_out_verifier_ops = {
10926	.get_func_proto = lwt_out_func_proto,
10927	.is_valid_access = lwt_is_valid_access,
10928	.convert_ctx_access = bpf_convert_ctx_access,
10929	};
10930
10931	const struct bpf_prog_ops lwt_out_prog_ops = {
10932	.test_run = bpf_prog_test_run_skb,
10933	};
10934
10935	const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
10936	.get_func_proto = lwt_xmit_func_proto,
10937	.is_valid_access = lwt_is_valid_access,
10938	.convert_ctx_access = bpf_convert_ctx_access,
10939	.gen_prologue = tc_cls_act_prologue,
10940	};
10941
10942	const struct bpf_prog_ops lwt_xmit_prog_ops = {
10943	.test_run = bpf_prog_test_run_skb,
10944	};
10945
10946	const struct bpf_verifier_ops lwt_seg6local_verifier_ops = {
10947	.get_func_proto = lwt_seg6local_func_proto,
10948	.is_valid_access = lwt_is_valid_access,
10949	.convert_ctx_access = bpf_convert_ctx_access,
10950	};
10951
10952	const struct bpf_prog_ops lwt_seg6local_prog_ops = {
10953	.test_run = bpf_prog_test_run_skb,
10954	};
10955
10956	const struct bpf_verifier_ops cg_sock_verifier_ops = {
10957	.get_func_proto = sock_filter_func_proto,
10958	.is_valid_access = sock_filter_is_valid_access,
10959	.convert_ctx_access = bpf_sock_convert_ctx_access,
10960	};
10961
10962	const struct bpf_prog_ops cg_sock_prog_ops = {
10963	};
10964
10965	const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
10966	.get_func_proto = sock_addr_func_proto,
10967	.is_valid_access = sock_addr_is_valid_access,
10968	.convert_ctx_access = sock_addr_convert_ctx_access,
10969	};
10970
10971	const struct bpf_prog_ops cg_sock_addr_prog_ops = {
10972	};
10973
10974	const struct bpf_verifier_ops sock_ops_verifier_ops = {
10975	.get_func_proto = sock_ops_func_proto,
10976	.is_valid_access = sock_ops_is_valid_access,
10977	.convert_ctx_access = sock_ops_convert_ctx_access,
10978	};
10979
10980	const struct bpf_prog_ops sock_ops_prog_ops = {
10981	};
10982
10983	const struct bpf_verifier_ops sk_skb_verifier_ops = {
10984	.get_func_proto = sk_skb_func_proto,
10985	.is_valid_access = sk_skb_is_valid_access,
10986	.convert_ctx_access = sk_skb_convert_ctx_access,
10987	.gen_prologue = sk_skb_prologue,
10988	};
10989
10990	const struct bpf_prog_ops sk_skb_prog_ops = {
10991	};
10992
10993	const struct bpf_verifier_ops sk_msg_verifier_ops = {
10994	.get_func_proto = sk_msg_func_proto,
10995	.is_valid_access = sk_msg_is_valid_access,
10996	.convert_ctx_access = sk_msg_convert_ctx_access,
10997	.gen_prologue = bpf_noop_prologue,
10998	};
10999
11000	const struct bpf_prog_ops sk_msg_prog_ops = {
11001	};
11002
11003	const struct bpf_verifier_ops flow_dissector_verifier_ops = {
11004	.get_func_proto = flow_dissector_func_proto,
11005	.is_valid_access = flow_dissector_is_valid_access,
11006	.convert_ctx_access = flow_dissector_convert_ctx_access,
11007	};
11008
11009	const struct bpf_prog_ops flow_dissector_prog_ops = {
11010	.test_run = bpf_prog_test_run_flow_dissector,
11011	};
11012
11013	int sk_detach_filter(struct sock *sk)
11014	{
11015	int ret = -ENOENT;
11016	struct sk_filter *filter;
11017
11018	if (sock_flag(sk, flag: SOCK_FILTER_LOCKED))
11019	return -EPERM;
11020
11021	filter = rcu_dereference_protected(sk->sk_filter,
11022	lockdep_sock_is_held(sk));
11023	if (filter) {
11024	RCU_INIT_POINTER(sk->sk_filter, NULL);
11025	sk_filter_uncharge(sk, fp: filter);
11026	ret = `0`;
11027	}
11028
11029	return ret;
11030	}
11031	EXPORT_SYMBOL_GPL(sk_detach_filter);
11032
11033	int sk_get_filter(struct sock sk, sockptr_t optval, unsigned* int len)
11034	{
11035	struct sock_fprog_kern *fprog;
11036	struct sk_filter *filter;
11037	int ret = `0`;
11038
11039	sockopt_lock_sock(sk);
11040	filter = rcu_dereference_protected(sk->sk_filter,
11041	lockdep_sock_is_held(sk));
11042	if (!filter)
11043	goto out;
11044
11045	/ We're copying the filter that has been originally attached,*
11046	* so no conversion/decode needed anymore. eBPF programs that
11047	* have no original program cannot be dumped through this.
11048	*/
11049	ret = -EACCES;
11050	fprog = filter->prog->orig_prog;
11051	if (!fprog)
11052	goto out;
11053
11054	ret = fprog->len;
11055	if (!len)
11056	/ User space only enquires number of filter blocks. /
11057	goto out;
11058
11059	ret = -EINVAL;
11060	if (len < fprog->len)
11061	goto out;
11062
11063	ret = -EFAULT;
11064	if (copy_to_sockptr(dst: optval, src: fprog->filter, bpf_classic_proglen(fprog)))
11065	goto out;
11066
11067	/ Instead of bytes, the API requests to return the number*
11068	* of filter blocks.
11069	*/
11070	ret = fprog->len;
11071	out:
11072	sockopt_release_sock(sk);
11073	return ret;
11074	}
11075
11076	#ifdef CONFIG_INET
11077	static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
11078	struct sock_reuseport *reuse,
11079	struct sock sk, struct* sk_buff *skb,
11080	struct sock *migrating_sk,
11081	u32 hash)
11082	{
11083	reuse_kern->skb = skb;
11084	reuse_kern->sk = sk;
11085	reuse_kern->selected_sk = NULL;
11086	reuse_kern->migrating_sk = migrating_sk;
11087	reuse_kern->data_end = skb->data + skb_headlen(skb);
11088	reuse_kern->hash = hash;
11089	reuse_kern->reuseport_id = reuse->reuseport_id;
11090	reuse_kern->bind_inany = reuse->bind_inany;
11091	}
11092
11093	struct sock bpf_run_sk_reuseport(struct* sock_reuseport reuse, struct* sock *sk,
11094	struct bpf_prog prog, struct* sk_buff *skb,
11095	struct sock *migrating_sk,
11096	u32 hash)
11097	{
11098	struct sk_reuseport_kern reuse_kern;
11099	enum sk_action action;
11100
11101	bpf_init_reuseport_kern(reuse_kern: &reuse_kern, reuse, sk, skb, migrating_sk, hash);
11102	action = bpf_prog_run(prog, ctx: &reuse_kern);
11103
11104	if (action == SK_PASS)
11105	return reuse_kern.selected_sk;
11106	else
11107	return ERR_PTR(error: -ECONNREFUSED);
11108	}
11109
11110	BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
11111	struct bpf_map , map, void* *, key, u32, flags)
11112	{
11113	bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
11114	struct sock_reuseport *reuse;
11115	struct sock *selected_sk;
11116
11117	selected_sk = map->ops->map_lookup_elem(map, key);
11118	if (!selected_sk)
11119	return -ENOENT;
11120
11121	reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
11122	if (!reuse) {
11123	/ Lookup in sock_map can return TCP ESTABLISHED sockets. /
11124	if (sk_is_refcounted(sk: selected_sk))
11125	sock_put(sk: selected_sk);
11126
11127	/ reuseport_array has only sk with non NULL sk_reuseport_cb.*
11128	* The only (!reuse) case here is - the sk has already been
11129	* unhashed (e.g. by close()), so treat it as -ENOENT.
11130	*
11131	* Other maps (e.g. sock_map) do not provide this guarantee and
11132	* the sk may never be in the reuseport group to begin with.
11133	*/
11134	return is_sockarray ? -ENOENT : -EINVAL;
11135	}
11136
11137	if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
11138	struct sock *sk = reuse_kern->sk;
11139
11140	if (sk->sk_protocol != selected_sk->sk_protocol)
11141	return -EPROTOTYPE;
11142	else if (sk->sk_family != selected_sk->sk_family)
11143	return -EAFNOSUPPORT;
11144
11145	/ Catch all. Likely bound to a different sockaddr. /
11146	return -EBADFD;
11147	}
11148
11149	reuse_kern->selected_sk = selected_sk;
11150
11151	return `0`;
11152	}
11153
11154	static const struct bpf_func_proto sk_select_reuseport_proto = {
11155	.func = sk_select_reuseport,
11156	.gpl_only = false,
11157	.ret_type = RET_INTEGER,
11158	.arg1_type = ARG_PTR_TO_CTX,
11159	.arg2_type = ARG_CONST_MAP_PTR,
11160	.arg3_type = ARG_PTR_TO_MAP_KEY,
11161	.arg4_type = ARG_ANYTHING,
11162	};
11163
11164	BPF_CALL_4(sk_reuseport_load_bytes,
11165	const struct sk_reuseport_kern *, reuse_kern, u32, offset,
11166	void *, to, u32, len)
11167	{
11168	return ____bpf_skb_load_bytes(skb: reuse_kern->skb, offset, to, len);
11169	}
11170
11171	static const struct bpf_func_proto sk_reuseport_load_bytes_proto = {
11172	.func = sk_reuseport_load_bytes,
11173	.gpl_only = false,
11174	.ret_type = RET_INTEGER,
11175	.arg1_type = ARG_PTR_TO_CTX,
11176	.arg2_type = ARG_ANYTHING,
11177	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
11178	.arg4_type = ARG_CONST_SIZE,
11179	};
11180
11181	BPF_CALL_5(sk_reuseport_load_bytes_relative,
11182	const struct sk_reuseport_kern *, reuse_kern, u32, offset,
11183	void *, to, u32, len, u32, start_header)
11184	{
11185	return ____bpf_skb_load_bytes_relative(skb: reuse_kern->skb, offset, to,
11186	len, start_header);
11187	}
11188
11189	static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = {
11190	.func = sk_reuseport_load_bytes_relative,
11191	.gpl_only = false,
11192	.ret_type = RET_INTEGER,
11193	.arg1_type = ARG_PTR_TO_CTX,
11194	.arg2_type = ARG_ANYTHING,
11195	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
11196	.arg4_type = ARG_CONST_SIZE,
11197	.arg5_type = ARG_ANYTHING,
11198	};
11199
11200	static const struct bpf_func_proto *
11201	sk_reuseport_func_proto(enum bpf_func_id func_id,
11202	const struct bpf_prog *prog)
11203	{
11204	switch (func_id) {
11205	case BPF_FUNC_sk_select_reuseport:
11206	return &sk_select_reuseport_proto;
11207	case BPF_FUNC_skb_load_bytes:
11208	return &sk_reuseport_load_bytes_proto;
11209	case BPF_FUNC_skb_load_bytes_relative:
11210	return &sk_reuseport_load_bytes_relative_proto;
11211	case BPF_FUNC_get_socket_cookie:
11212	return &bpf_get_socket_ptr_cookie_proto;
11213	case BPF_FUNC_ktime_get_coarse_ns:
11214	return &bpf_ktime_get_coarse_ns_proto;
11215	default:
11216	return bpf_base_func_proto(func_id);
11217	}
11218	}
11219
11220	static bool
11221	sk_reuseport_is_valid_access(int off, int size,
11222	enum bpf_access_type type,
11223	const struct bpf_prog *prog,
11224	struct bpf_insn_access_aux *info)
11225	{
11226	const u32 size_default = sizeof(__u32);
11227
11228	if (off < `0` \|\| off >= sizeof(struct sk_reuseport_md) \|\|
11229	off % size \|\| type != BPF_READ)
11230	return false;
11231
11232	switch (off) {
11233	case offsetof(struct sk_reuseport_md, data):
11234	info->reg_type = PTR_TO_PACKET;
11235	return size == sizeof(__u64);
11236
11237	case offsetof(struct sk_reuseport_md, data_end):
11238	info->reg_type = PTR_TO_PACKET_END;
11239	return size == sizeof(__u64);
11240
11241	case offsetof(struct sk_reuseport_md, hash):
11242	return size == size_default;
11243
11244	case offsetof(struct sk_reuseport_md, sk):
11245	info->reg_type = PTR_TO_SOCKET;
11246	return size == sizeof(__u64);
11247
11248	case offsetof(struct sk_reuseport_md, migrating_sk):
11249	info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
11250	return size == sizeof(__u64);
11251
11252	/ Fields that allow narrowing /
11253	case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
11254	if (size < sizeof_field(struct sk_buff, protocol))
11255	return false;
11256	fallthrough;
11257	case bpf_ctx_range(struct sk_reuseport_md, ip_protocol):
11258	case bpf_ctx_range(struct sk_reuseport_md, bind_inany):
11259	case bpf_ctx_range(struct sk_reuseport_md, len):
11260	bpf_ctx_record_field_size(aux: info, size: size_default);
11261	return bpf_ctx_narrow_access_ok(off, size, size_default);
11262
11263	default:
11264	return false;
11265	}
11266	}
11267
11268	#define SK_REUSEPORT_LOAD_FIELD(F) ({ \
11269	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
11270	si->dst_reg, si->src_reg, \
11271	bpf_target_off(struct sk_reuseport_kern, F, \
11272	sizeof_field(struct sk_reuseport_kern, F), \
11273	target_size)); \
11274	})
11275
11276	#define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD) \
11277	SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
11278	struct sk_buff, \
11279	skb, \
11280	SKB_FIELD)
11281
11282	#define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD) \
11283	SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
11284	struct sock, \
11285	sk, \
11286	SK_FIELD)
11287
11288	static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
11289	const struct bpf_insn *si,
11290	struct bpf_insn *insn_buf,
11291	struct bpf_prog *prog,
11292	u32 *target_size)
11293	{
11294	struct bpf_insn *insn = insn_buf;
11295
11296	switch (si->off) {
11297	case offsetof(struct sk_reuseport_md, data):
11298	SK_REUSEPORT_LOAD_SKB_FIELD(data);
11299	break;
11300
11301	case offsetof(struct sk_reuseport_md, len):
11302	SK_REUSEPORT_LOAD_SKB_FIELD(len);
11303	break;
11304
11305	case offsetof(struct sk_reuseport_md, eth_protocol):
11306	SK_REUSEPORT_LOAD_SKB_FIELD(protocol);
11307	break;
11308
11309	case offsetof(struct sk_reuseport_md, ip_protocol):
11310	SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol);
11311	break;
11312
11313	case offsetof(struct sk_reuseport_md, data_end):
11314	SK_REUSEPORT_LOAD_FIELD(data_end);
11315	break;
11316
11317	case offsetof(struct sk_reuseport_md, hash):
11318	SK_REUSEPORT_LOAD_FIELD(hash);
11319	break;
11320
11321	case offsetof(struct sk_reuseport_md, bind_inany):
11322	SK_REUSEPORT_LOAD_FIELD(bind_inany);
11323	break;
11324
11325	case offsetof(struct sk_reuseport_md, sk):
11326	SK_REUSEPORT_LOAD_FIELD(sk);
11327	break;
11328
11329	case offsetof(struct sk_reuseport_md, migrating_sk):
11330	SK_REUSEPORT_LOAD_FIELD(migrating_sk);
11331	break;
11332	}
11333
11334	return insn - insn_buf;
11335	}
11336
11337	const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
11338	.get_func_proto = sk_reuseport_func_proto,
11339	.is_valid_access = sk_reuseport_is_valid_access,
11340	.convert_ctx_access = sk_reuseport_convert_ctx_access,
11341	};
11342
11343	const struct bpf_prog_ops sk_reuseport_prog_ops = {
11344	};
11345
11346	DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled);
11347	EXPORT_SYMBOL(bpf_sk_lookup_enabled);
11348
11349	BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
11350	struct sock *, sk, u64, flags)
11351	{
11352	if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE \|
11353	BPF_SK_LOOKUP_F_NO_REUSEPORT)))
11354	return -EINVAL;
11355	if (unlikely(sk && sk_is_refcounted(sk)))
11356	return -ESOCKTNOSUPPORT; / reject non-RCU freed sockets /
11357	if (unlikely(sk && sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN))
11358	return -ESOCKTNOSUPPORT; / only accept TCP socket in LISTEN /
11359	if (unlikely(sk && sk_is_udp(sk) && sk->sk_state != TCP_CLOSE))
11360	return -ESOCKTNOSUPPORT; / only accept UDP socket in CLOSE /
11361
11362	/ Check if socket is suitable for packet L3/L4 protocol /
11363	if (sk && sk->sk_protocol != ctx->protocol)
11364	return -EPROTOTYPE;
11365	if (sk && sk->sk_family != ctx->family &&
11366	(sk->sk_family == AF_INET \|\| ipv6_only_sock(sk)))
11367	return -EAFNOSUPPORT;
11368
11369	if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE))
11370	return -EEXIST;
11371
11372	/ Select socket as lookup result /
11373	ctx->selected_sk = sk;
11374	ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT;
11375	return `0`;
11376	}
11377
11378	static const struct bpf_func_proto bpf_sk_lookup_assign_proto = {
11379	.func = bpf_sk_lookup_assign,
11380	.gpl_only = false,
11381	.ret_type = RET_INTEGER,
11382	.arg1_type = ARG_PTR_TO_CTX,
11383	.arg2_type = ARG_PTR_TO_SOCKET_OR_NULL,
11384	.arg3_type = ARG_ANYTHING,
11385	};
11386
11387	static const struct bpf_func_proto *
11388	sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
11389	{
11390	switch (func_id) {
11391	case BPF_FUNC_perf_event_output:
11392	return &bpf_event_output_data_proto;
11393	case BPF_FUNC_sk_assign:
11394	return &bpf_sk_lookup_assign_proto;
11395	case BPF_FUNC_sk_release:
11396	return &bpf_sk_release_proto;
11397	default:
11398	return bpf_sk_base_func_proto(func_id);
11399	}
11400	}
11401
11402	static bool sk_lookup_is_valid_access(int off, int size,
11403	enum bpf_access_type type,
11404	const struct bpf_prog *prog,
11405	struct bpf_insn_access_aux *info)
11406	{
11407	if (off < `0` \|\| off >= sizeof(struct bpf_sk_lookup))
11408	return false;
11409	if (off % size != `0`)
11410	return false;
11411	if (type != BPF_READ)
11412	return false;
11413
11414	switch (off) {
11415	case offsetof(struct bpf_sk_lookup, sk):
11416	info->reg_type = PTR_TO_SOCKET_OR_NULL;
11417	return size == sizeof(__u64);
11418
11419	case bpf_ctx_range(struct bpf_sk_lookup, family):
11420	case bpf_ctx_range(struct bpf_sk_lookup, protocol):
11421	case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4):
11422	case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
11423	case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[`0`], remote_ip6[`3`]):
11424	case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[`0`], local_ip6[`3`]):
11425	case bpf_ctx_range(struct bpf_sk_lookup, local_port):
11426	case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex):
11427	bpf_ctx_record_field_size(aux: info, size: sizeof(__u32));
11428	return bpf_ctx_narrow_access_ok(off, size, size_default: sizeof(__u32));
11429
11430	case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
11431	/ Allow 4-byte access to 2-byte field for backward compatibility /
11432	if (size == sizeof(__u32))
11433	return true;
11434	bpf_ctx_record_field_size(aux: info, size: sizeof(__be16));
11435	return bpf_ctx_narrow_access_ok(off, size, size_default: sizeof(__be16));
11436
11437	case offsetofend(struct bpf_sk_lookup, remote_port) ...
11438	offsetof(struct bpf_sk_lookup, local_ip4) - `1`:
11439	/ Allow access to zero padding for backward compatibility /
11440	bpf_ctx_record_field_size(aux: info, size: sizeof(__u16));
11441	return bpf_ctx_narrow_access_ok(off, size, size_default: sizeof(__u16));
11442
11443	default:
11444	return false;
11445	}
11446	}
11447
11448	static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
11449	const struct bpf_insn *si,
11450	struct bpf_insn *insn_buf,
11451	struct bpf_prog *prog,
11452	u32 *target_size)
11453	{
11454	struct bpf_insn *insn = insn_buf;
11455
11456	switch (si->off) {
11457	case offsetof(struct bpf_sk_lookup, sk):
11458	insn++ = BPF_LDX_MEM(BPF_SIZEOF(void* *), si->dst_reg, si->src_reg,
11459	offsetof(struct bpf_sk_lookup_kern, selected_sk));
11460	break;
11461
11462	case offsetof(struct bpf_sk_lookup, family):
11463	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
11464	bpf_target_off(struct bpf_sk_lookup_kern,
11465	family, `2`, target_size));
11466	break;
11467
11468	case offsetof(struct bpf_sk_lookup, protocol):
11469	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
11470	bpf_target_off(struct bpf_sk_lookup_kern,
11471	protocol, `2`, target_size));
11472	break;
11473
11474	case offsetof(struct bpf_sk_lookup, remote_ip4):
11475	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
11476	bpf_target_off(struct bpf_sk_lookup_kern,
11477	v4.saddr, `4`, target_size));
11478	break;
11479
11480	case offsetof(struct bpf_sk_lookup, local_ip4):
11481	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
11482	bpf_target_off(struct bpf_sk_lookup_kern,
11483	v4.daddr, `4`, target_size));
11484	break;
11485
11486	case bpf_ctx_range_till(struct bpf_sk_lookup,
11487	remote_ip6[`0`], remote_ip6[`3`]): {
11488	#if IS_ENABLED(CONFIG_IPV6)
11489	int off = si->off;
11490
11491	off -= offsetof(struct bpf_sk_lookup, remote_ip6[`0`]);
11492	off += bpf_target_off(struct in6_addr, s6_addr32[`0`], `4`, target_size);
11493	insn++ = BPF_LDX_MEM(BPF_SIZEOF(void* *), si->dst_reg, si->src_reg,
11494	offsetof(struct bpf_sk_lookup_kern, v6.saddr));
11495	*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, `0`, `1`);
11496	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
11497	#else
11498	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
11499	#endif
11500	break;
11501	}
11502	case bpf_ctx_range_till(struct bpf_sk_lookup,
11503	local_ip6[`0`], local_ip6[`3`]): {
11504	#if IS_ENABLED(CONFIG_IPV6)
11505	int off = si->off;
11506
11507	off -= offsetof(struct bpf_sk_lookup, local_ip6[`0`]);
11508	off += bpf_target_off(struct in6_addr, s6_addr32[`0`], `4`, target_size);
11509	insn++ = BPF_LDX_MEM(BPF_SIZEOF(void* *), si->dst_reg, si->src_reg,
11510	offsetof(struct bpf_sk_lookup_kern, v6.daddr));
11511	*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, `0`, `1`);
11512	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
11513	#else
11514	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
11515	#endif
11516	break;
11517	}
11518	case offsetof(struct bpf_sk_lookup, remote_port):
11519	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
11520	bpf_target_off(struct bpf_sk_lookup_kern,
11521	sport, `2`, target_size));
11522	break;
11523
11524	case offsetofend(struct bpf_sk_lookup, remote_port):
11525	*target_size = `2`;
11526	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
11527	break;
11528
11529	case offsetof(struct bpf_sk_lookup, local_port):
11530	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
11531	bpf_target_off(struct bpf_sk_lookup_kern,
11532	dport, `2`, target_size));
11533	break;
11534
11535	case offsetof(struct bpf_sk_lookup, ingress_ifindex):
11536	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
11537	bpf_target_off(struct bpf_sk_lookup_kern,
11538	ingress_ifindex, `4`, target_size));
11539	break;
11540	}
11541
11542	return insn - insn_buf;
11543	}
11544
11545	const struct bpf_prog_ops sk_lookup_prog_ops = {
11546	.test_run = bpf_prog_test_run_sk_lookup,
11547	};
11548
11549	const struct bpf_verifier_ops sk_lookup_verifier_ops = {
11550	.get_func_proto = sk_lookup_func_proto,
11551	.is_valid_access = sk_lookup_is_valid_access,
11552	.convert_ctx_access = sk_lookup_convert_ctx_access,
11553	};
11554
11555	#endif /* CONFIG_INET */
11556
11557	DEFINE_BPF_DISPATCHER(xdp)
11558
11559	void bpf_prog_change_xdp(struct bpf_prog prev_prog, struct* bpf_prog *prog)
11560	{
11561	bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), from: prev_prog, to: prog);
11562	}
11563
11564	BTF_ID_LIST_GLOBAL(btf_sock_ids, MAX_BTF_SOCK_TYPE)
11565	#define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type)
11566	BTF_SOCK_TYPE_xxx
11567	#undef BTF_SOCK_TYPE
11568
11569	BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
11570	{
11571	/ tcp6_sock type is not generated in dwarf and hence btf,*
11572	* trigger an explicit type generation here.
11573	*/
11574	BTF_TYPE_EMIT(struct tcp6_sock);
11575	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
11576	sk->sk_family == AF_INET6)
11577	return (unsigned long)sk;
11578
11579	return (unsigned long)NULL;
11580	}
11581
11582	const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
11583	.func = bpf_skc_to_tcp6_sock,
11584	.gpl_only = false,
11585	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11586	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
11587	.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
11588	};
11589
11590	BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
11591	{
11592	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
11593	return (unsigned long)sk;
11594
11595	return (unsigned long)NULL;
11596	}
11597
11598	const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
11599	.func = bpf_skc_to_tcp_sock,
11600	.gpl_only = false,
11601	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11602	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
11603	.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
11604	};
11605
11606	BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
11607	{
11608	/ BTF types for tcp_timewait_sock and inet_timewait_sock are not*
11609	* generated if CONFIG_INET=n. Trigger an explicit generation here.
11610	*/
11611	BTF_TYPE_EMIT(struct inet_timewait_sock);
11612	BTF_TYPE_EMIT(struct tcp_timewait_sock);
11613
11614	#ifdef CONFIG_INET
11615	if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
11616	return (unsigned long)sk;
11617	#endif
11618
11619	#if IS_BUILTIN(CONFIG_IPV6)
11620	if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
11621	return (unsigned long)sk;
11622	#endif
11623
11624	return (unsigned long)NULL;
11625	}
11626
11627	const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
11628	.func = bpf_skc_to_tcp_timewait_sock,
11629	.gpl_only = false,
11630	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11631	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
11632	.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
11633	};
11634
11635	BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk)
11636	{
11637	#ifdef CONFIG_INET
11638	if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV)
11639	return (unsigned long)sk;
11640	#endif
11641
11642	#if IS_BUILTIN(CONFIG_IPV6)
11643	if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
11644	return (unsigned long)sk;
11645	#endif
11646
11647	return (unsigned long)NULL;
11648	}
11649
11650	const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
11651	.func = bpf_skc_to_tcp_request_sock,
11652	.gpl_only = false,
11653	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11654	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
11655	.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
11656	};
11657
11658	BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
11659	{
11660	/ udp6_sock type is not generated in dwarf and hence btf,*
11661	* trigger an explicit type generation here.
11662	*/
11663	BTF_TYPE_EMIT(struct udp6_sock);
11664	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
11665	sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6)
11666	return (unsigned long)sk;
11667
11668	return (unsigned long)NULL;
11669	}
11670
11671	const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
11672	.func = bpf_skc_to_udp6_sock,
11673	.gpl_only = false,
11674	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11675	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
11676	.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
11677	};
11678
11679	BPF_CALL_1(bpf_skc_to_unix_sock, struct sock *, sk)
11680	{
11681	/ unix_sock type is not generated in dwarf and hence btf,*
11682	* trigger an explicit type generation here.
11683	*/
11684	BTF_TYPE_EMIT(struct unix_sock);
11685	if (sk && sk_fullsock(sk) && sk->sk_family == AF_UNIX)
11686	return (unsigned long)sk;
11687
11688	return (unsigned long)NULL;
11689	}
11690
11691	const struct bpf_func_proto bpf_skc_to_unix_sock_proto = {
11692	.func = bpf_skc_to_unix_sock,
11693	.gpl_only = false,
11694	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11695	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
11696	.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UNIX],
11697	};
11698
11699	BPF_CALL_1(bpf_skc_to_mptcp_sock, struct sock *, sk)
11700	{
11701	BTF_TYPE_EMIT(struct mptcp_sock);
11702	return (unsigned long)bpf_mptcp_sock_from_subflow(sk);
11703	}
11704
11705	const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
11706	.func = bpf_skc_to_mptcp_sock,
11707	.gpl_only = false,
11708	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11709	.arg1_type = ARG_PTR_TO_SOCK_COMMON,
11710	.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
11711	};
11712
11713	BPF_CALL_1(bpf_sock_from_file, struct file *, file)
11714	{
11715	return (unsigned long)sock_from_file(file);
11716	}
11717
11718	BTF_ID_LIST(bpf_sock_from_file_btf_ids)
11719	BTF_ID(struct, socket)
11720	BTF_ID(struct, file)
11721
11722	const struct bpf_func_proto bpf_sock_from_file_proto = {
11723	.func = bpf_sock_from_file,
11724	.gpl_only = false,
11725	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11726	.ret_btf_id = &bpf_sock_from_file_btf_ids[`0`],
11727	.arg1_type = ARG_PTR_TO_BTF_ID,
11728	.arg1_btf_id = &bpf_sock_from_file_btf_ids[`1`],
11729	};
11730
11731	static const struct bpf_func_proto *
11732	bpf_sk_base_func_proto(enum bpf_func_id func_id)
11733	{
11734	const struct bpf_func_proto *func;
11735
11736	switch (func_id) {
11737	case BPF_FUNC_skc_to_tcp6_sock:
11738	func = &bpf_skc_to_tcp6_sock_proto;
11739	break;
11740	case BPF_FUNC_skc_to_tcp_sock:
11741	func = &bpf_skc_to_tcp_sock_proto;
11742	break;
11743	case BPF_FUNC_skc_to_tcp_timewait_sock:
11744	func = &bpf_skc_to_tcp_timewait_sock_proto;
11745	break;
11746	case BPF_FUNC_skc_to_tcp_request_sock:
11747	func = &bpf_skc_to_tcp_request_sock_proto;
11748	break;
11749	case BPF_FUNC_skc_to_udp6_sock:
11750	func = &bpf_skc_to_udp6_sock_proto;
11751	break;
11752	case BPF_FUNC_skc_to_unix_sock:
11753	func = &bpf_skc_to_unix_sock_proto;
11754	break;
11755	case BPF_FUNC_skc_to_mptcp_sock:
11756	func = &bpf_skc_to_mptcp_sock_proto;
11757	break;
11758	case BPF_FUNC_ktime_get_coarse_ns:
11759	return &bpf_ktime_get_coarse_ns_proto;
11760	default:
11761	return bpf_base_func_proto(func_id);
11762	}
11763
11764	if (!perfmon_capable())
11765	return NULL;
11766
11767	return func;
11768	}
11769
11770	__diag_push();
11771	__diag_ignore_all("-Wmissing-prototypes",
11772	"Global functions as their definitions will be in vmlinux BTF");
11773	__bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags,
11774	struct bpf_dynptr_kern *ptr__uninit)
11775	{
11776	if (flags) {
11777	bpf_dynptr_set_null(ptr: ptr__uninit);
11778	return -EINVAL;
11779	}
11780
11781	bpf_dynptr_init(ptr: ptr__uninit, data: skb, type: BPF_DYNPTR_TYPE_SKB, offset: `0`, size: skb->len);
11782
11783	return `0`;
11784	}
11785
11786	__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags,
11787	struct bpf_dynptr_kern *ptr__uninit)
11788	{
11789	if (flags) {
11790	bpf_dynptr_set_null(ptr: ptr__uninit);
11791	return -EINVAL;
11792	}
11793
11794	bpf_dynptr_init(ptr: ptr__uninit, data: xdp, type: BPF_DYNPTR_TYPE_XDP, offset: `0`, size: xdp_get_buff_len(xdp));
11795
11796	return `0`;
11797	}
11798
11799	__bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
11800	const u8 *sun_path, u32 sun_path__sz)
11801	{
11802	struct sockaddr_un *un;
11803
11804	if (sa_kern->sk->sk_family != AF_UNIX)
11805	return -EINVAL;
11806
11807	/ We do not allow changing the address to unnamed or larger than the*
11808	* maximum allowed address size for a unix sockaddr.
11809	*/
11810	if (sun_path__sz == `0` \|\| sun_path__sz > UNIX_PATH_MAX)
11811	return -EINVAL;
11812
11813	un = (struct sockaddr_un *)sa_kern->uaddr;
11814	memcpy(un->sun_path, sun_path, sun_path__sz);
11815	sa_kern->uaddrlen = offsetof(struct sockaddr_un, sun_path) + sun_path__sz;
11816
11817	return `0`;
11818	}
11819	__diag_pop();
11820
11821	int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
11822	struct bpf_dynptr_kern *ptr__uninit)
11823	{
11824	int err;
11825
11826	err = bpf_dynptr_from_skb(skb, flags, ptr__uninit);
11827	if (err)
11828	return err;
11829
11830	bpf_dynptr_set_rdonly(ptr: ptr__uninit);
11831
11832	return `0`;
11833	}
11834
11835	BTF_SET8_START(bpf_kfunc_check_set_skb)
11836	BTF_ID_FLAGS(func, bpf_dynptr_from_skb)
11837	BTF_SET8_END(bpf_kfunc_check_set_skb)
11838
11839	BTF_SET8_START(bpf_kfunc_check_set_xdp)
11840	BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
11841	BTF_SET8_END(bpf_kfunc_check_set_xdp)
11842
11843	BTF_SET8_START(bpf_kfunc_check_set_sock_addr)
11844	BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path)
11845	BTF_SET8_END(bpf_kfunc_check_set_sock_addr)
11846
11847	static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
11848	.owner = THIS_MODULE,
11849	.set = &bpf_kfunc_check_set_skb,
11850	};
11851
11852	static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = {
11853	.owner = THIS_MODULE,
11854	.set = &bpf_kfunc_check_set_xdp,
11855	};
11856
11857	static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = {
11858	.owner = THIS_MODULE,
11859	.set = &bpf_kfunc_check_set_sock_addr,
11860	};
11861
11862	static int __init bpf_kfunc_init(void)
11863	{
11864	int ret;
11865
11866	ret = register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_SCHED_CLS, s: &bpf_kfunc_set_skb);
11867	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_SCHED_ACT, s: &bpf_kfunc_set_skb);
11868	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_SK_SKB, s: &bpf_kfunc_set_skb);
11869	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_SOCKET_FILTER, s: &bpf_kfunc_set_skb);
11870	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_CGROUP_SKB, s: &bpf_kfunc_set_skb);
11871	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_LWT_OUT, s: &bpf_kfunc_set_skb);
11872	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_LWT_IN, s: &bpf_kfunc_set_skb);
11873	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_LWT_XMIT, s: &bpf_kfunc_set_skb);
11874	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_LWT_SEG6LOCAL, s: &bpf_kfunc_set_skb);
11875	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_NETFILTER, s: &bpf_kfunc_set_skb);
11876	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_XDP, s: &bpf_kfunc_set_xdp);
11877	return ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
11878	s: &bpf_kfunc_set_sock_addr);
11879	}
11880	late_initcall(bpf_kfunc_init);
11881
11882	/ Disables missing prototype warnings /
11883	__diag_push();
11884	__diag_ignore_all("-Wmissing-prototypes",
11885	"Global functions as their definitions will be in vmlinux BTF");
11886
11887	/ bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code.*
11888	*
11889	* The function expects a non-NULL pointer to a socket, and invokes the
11890	* protocol specific socket destroy handlers.
11891	*
11892	* The helper can only be called from BPF contexts that have acquired the socket
11893	* locks.
11894	*
11895	* Parameters:
11896	* @sock: Pointer to socket to be destroyed
11897	*
11898	* Return:
11899	* On error, may return EPROTONOSUPPORT, EINVAL.
11900	* EPROTONOSUPPORT if protocol specific destroy handler is not supported.
11901	* 0 otherwise
11902	*/
11903	__bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
11904	{
11905	struct sock sk = (struct* sock *)sock;
11906
11907	/ The locking semantics that allow for synchronous execution of the*
11908	* destroy handlers are only supported for TCP and UDP.
11909	* Supporting protocols will need to acquire sock lock in the BPF context
11910	* prior to invoking this kfunc.
11911	*/
11912	if (!sk->sk_prot->diag_destroy \|\| (sk->sk_protocol != IPPROTO_TCP &&
11913	sk->sk_protocol != IPPROTO_UDP))
11914	return -EOPNOTSUPP;
11915
11916	return sk->sk_prot->diag_destroy(sk, ECONNABORTED);
11917	}
11918
11919	__diag_pop()
11920
11921	BTF_SET8_START(bpf_sk_iter_kfunc_ids)
11922	BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS)
11923	BTF_SET8_END(bpf_sk_iter_kfunc_ids)
11924
11925	static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id)
11926	{
11927	if (btf_id_set8_contains(set: &bpf_sk_iter_kfunc_ids, id: kfunc_id) &&
11928	prog->expected_attach_type != BPF_TRACE_ITER)
11929	return -EACCES;
11930	return `0`;
11931	}
11932
11933	static const struct btf_kfunc_id_set bpf_sk_iter_kfunc_set = {
11934	.owner = THIS_MODULE,
11935	.set = &bpf_sk_iter_kfunc_ids,
11936	.filter = tracing_iter_filter,
11937	};
11938
11939	static int init_subsystem(void)
11940	{
11941	return register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_TRACING, s: &bpf_sk_iter_kfunc_set);
11942	}
11943	late_initcall(init_subsystem);
11944

source code of linux/net/core/filter.c