1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2019 Facebook */
3
4#include <linux/bpf.h>
5#include <netinet/in.h>
6#include <stdbool.h>
7
8#include <bpf/bpf_helpers.h>
9#include <bpf/bpf_endian.h>
10#include "bpf_tcp_helpers.h"
11
12enum bpf_linum_array_idx {
13 EGRESS_LINUM_IDX,
14 INGRESS_LINUM_IDX,
15 READ_SK_DST_PORT_LINUM_IDX,
16 __NR_BPF_LINUM_ARRAY_IDX,
17};
18
19struct {
20 __uint(type, BPF_MAP_TYPE_ARRAY);
21 __uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
22 __type(key, __u32);
23 __type(value, __u32);
24} linum_map SEC(".maps");
25
26struct bpf_spinlock_cnt {
27 struct bpf_spin_lock lock;
28 __u32 cnt;
29};
30
31struct {
32 __uint(type, BPF_MAP_TYPE_SK_STORAGE);
33 __uint(map_flags, BPF_F_NO_PREALLOC);
34 __type(key, int);
35 __type(value, struct bpf_spinlock_cnt);
36} sk_pkt_out_cnt SEC(".maps");
37
38struct {
39 __uint(type, BPF_MAP_TYPE_SK_STORAGE);
40 __uint(map_flags, BPF_F_NO_PREALLOC);
41 __type(key, int);
42 __type(value, struct bpf_spinlock_cnt);
43} sk_pkt_out_cnt10 SEC(".maps");
44
45struct bpf_tcp_sock listen_tp = {};
46struct sockaddr_in6 srv_sa6 = {};
47struct bpf_tcp_sock cli_tp = {};
48struct bpf_tcp_sock srv_tp = {};
49struct bpf_sock listen_sk = {};
50struct bpf_sock srv_sk = {};
51struct bpf_sock cli_sk = {};
52__u64 parent_cg_id = 0;
53__u64 child_cg_id = 0;
54__u64 lsndtime = 0;
55
56static bool is_loopback6(__u32 *a6)
57{
58 return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
59}
60
61static void skcpy(struct bpf_sock *dst,
62 const struct bpf_sock *src)
63{
64 dst->bound_dev_if = src->bound_dev_if;
65 dst->family = src->family;
66 dst->type = src->type;
67 dst->protocol = src->protocol;
68 dst->mark = src->mark;
69 dst->priority = src->priority;
70 dst->src_ip4 = src->src_ip4;
71 dst->src_ip6[0] = src->src_ip6[0];
72 dst->src_ip6[1] = src->src_ip6[1];
73 dst->src_ip6[2] = src->src_ip6[2];
74 dst->src_ip6[3] = src->src_ip6[3];
75 dst->src_port = src->src_port;
76 dst->dst_ip4 = src->dst_ip4;
77 dst->dst_ip6[0] = src->dst_ip6[0];
78 dst->dst_ip6[1] = src->dst_ip6[1];
79 dst->dst_ip6[2] = src->dst_ip6[2];
80 dst->dst_ip6[3] = src->dst_ip6[3];
81 dst->dst_port = src->dst_port;
82 dst->state = src->state;
83}
84
85static void tpcpy(struct bpf_tcp_sock *dst,
86 const struct bpf_tcp_sock *src)
87{
88 dst->snd_cwnd = src->snd_cwnd;
89 dst->srtt_us = src->srtt_us;
90 dst->rtt_min = src->rtt_min;
91 dst->snd_ssthresh = src->snd_ssthresh;
92 dst->rcv_nxt = src->rcv_nxt;
93 dst->snd_nxt = src->snd_nxt;
94 dst->snd_una = src->snd_una;
95 dst->mss_cache = src->mss_cache;
96 dst->ecn_flags = src->ecn_flags;
97 dst->rate_delivered = src->rate_delivered;
98 dst->rate_interval_us = src->rate_interval_us;
99 dst->packets_out = src->packets_out;
100 dst->retrans_out = src->retrans_out;
101 dst->total_retrans = src->total_retrans;
102 dst->segs_in = src->segs_in;
103 dst->data_segs_in = src->data_segs_in;
104 dst->segs_out = src->segs_out;
105 dst->data_segs_out = src->data_segs_out;
106 dst->lost_out = src->lost_out;
107 dst->sacked_out = src->sacked_out;
108 dst->bytes_received = src->bytes_received;
109 dst->bytes_acked = src->bytes_acked;
110}
111
112/* Always return CG_OK so that no pkt will be filtered out */
113#define CG_OK 1
114
115#define RET_LOG() ({ \
116 linum = __LINE__; \
117 bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_ANY); \
118 return CG_OK; \
119})
120
121SEC("cgroup_skb/egress")
122int egress_read_sock_fields(struct __sk_buff *skb)
123{
124 struct bpf_spinlock_cnt cli_cnt_init = { .lock = {}, .cnt = 0xeB9F };
125 struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
126 struct bpf_tcp_sock *tp, *tp_ret;
127 struct bpf_sock *sk, *sk_ret;
128 __u32 linum, linum_idx;
129 struct tcp_sock *ktp;
130
131 linum_idx = EGRESS_LINUM_IDX;
132
133 sk = skb->sk;
134 if (!sk)
135 RET_LOG();
136
137 /* Not testing the egress traffic or the listening socket,
138 * which are covered by the cgroup_skb/ingress test program.
139 */
140 if (sk->family != AF_INET6 || !is_loopback6(a6: sk->src_ip6) ||
141 sk->state == BPF_TCP_LISTEN)
142 return CG_OK;
143
144 if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) {
145 /* Server socket */
146 sk_ret = &srv_sk;
147 tp_ret = &srv_tp;
148 } else if (sk->dst_port == srv_sa6.sin6_port) {
149 /* Client socket */
150 sk_ret = &cli_sk;
151 tp_ret = &cli_tp;
152 } else {
153 /* Not the testing egress traffic */
154 return CG_OK;
155 }
156
157 /* It must be a fullsock for cgroup_skb/egress prog */
158 sk = bpf_sk_fullsock(sk);
159 if (!sk)
160 RET_LOG();
161
162 /* Not the testing egress traffic */
163 if (sk->protocol != IPPROTO_TCP)
164 return CG_OK;
165
166 tp = bpf_tcp_sock(sk);
167 if (!tp)
168 RET_LOG();
169
170 skcpy(dst: sk_ret, src: sk);
171 tpcpy(dst: tp_ret, src: tp);
172
173 if (sk_ret == &srv_sk) {
174 ktp = bpf_skc_to_tcp_sock(sk);
175
176 if (!ktp)
177 RET_LOG();
178
179 lsndtime = ktp->lsndtime;
180
181 child_cg_id = bpf_sk_cgroup_id(ktp);
182 if (!child_cg_id)
183 RET_LOG();
184
185 parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2);
186 if (!parent_cg_id)
187 RET_LOG();
188
189 /* The userspace has created it for srv sk */
190 pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0);
191 pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp,
192 0, 0);
193 } else {
194 pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk,
195 &cli_cnt_init,
196 BPF_SK_STORAGE_GET_F_CREATE);
197 pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10,
198 sk, &cli_cnt_init,
199 BPF_SK_STORAGE_GET_F_CREATE);
200 }
201
202 if (!pkt_out_cnt || !pkt_out_cnt10)
203 RET_LOG();
204
205 /* Even both cnt and cnt10 have lock defined in their BTF,
206 * intentionally one cnt takes lock while one does not
207 * as a test for the spinlock support in BPF_MAP_TYPE_SK_STORAGE.
208 */
209 pkt_out_cnt->cnt += 1;
210 bpf_spin_lock(&pkt_out_cnt10->lock);
211 pkt_out_cnt10->cnt += 10;
212 bpf_spin_unlock(&pkt_out_cnt10->lock);
213
214 return CG_OK;
215}
216
217SEC("cgroup_skb/ingress")
218int ingress_read_sock_fields(struct __sk_buff *skb)
219{
220 struct bpf_tcp_sock *tp;
221 __u32 linum, linum_idx;
222 struct bpf_sock *sk;
223
224 linum_idx = INGRESS_LINUM_IDX;
225
226 sk = skb->sk;
227 if (!sk)
228 RET_LOG();
229
230 /* Not the testing ingress traffic to the server */
231 if (sk->family != AF_INET6 || !is_loopback6(a6: sk->src_ip6) ||
232 sk->src_port != bpf_ntohs(srv_sa6.sin6_port))
233 return CG_OK;
234
235 /* Only interested in the listening socket */
236 if (sk->state != BPF_TCP_LISTEN)
237 return CG_OK;
238
239 /* It must be a fullsock for cgroup_skb/ingress prog */
240 sk = bpf_sk_fullsock(sk);
241 if (!sk)
242 RET_LOG();
243
244 tp = bpf_tcp_sock(sk);
245 if (!tp)
246 RET_LOG();
247
248 skcpy(dst: &listen_sk, src: sk);
249 tpcpy(dst: &listen_tp, src: tp);
250
251 return CG_OK;
252}
253
254/*
255 * NOTE: 4-byte load from bpf_sock at dst_port offset is quirky. It
256 * gets rewritten by the access converter to a 2-byte load for
257 * backward compatibility. Treating the load result as a be16 value
258 * makes the code portable across little- and big-endian platforms.
259 */
260static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
261{
262 __u32 *word = (__u32 *)&sk->dst_port;
263 return word[0] == bpf_htons(0xcafe);
264}
265
266static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
267{
268 __u16 *half;
269
270 asm volatile ("");
271 half = (__u16 *)&sk->dst_port;
272 return half[0] == bpf_htons(0xcafe);
273}
274
275static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk)
276{
277 __u8 *byte = (__u8 *)&sk->dst_port;
278 return byte[0] == 0xca && byte[1] == 0xfe;
279}
280
281SEC("cgroup_skb/egress")
282int read_sk_dst_port(struct __sk_buff *skb)
283{
284 __u32 linum, linum_idx;
285 struct bpf_sock *sk;
286
287 linum_idx = READ_SK_DST_PORT_LINUM_IDX;
288
289 sk = skb->sk;
290 if (!sk)
291 RET_LOG();
292
293 /* Ignore everything but the SYN from the client socket */
294 if (sk->state != BPF_TCP_SYN_SENT)
295 return CG_OK;
296
297 if (!sk_dst_port__load_word(sk))
298 RET_LOG();
299 if (!sk_dst_port__load_half(sk))
300 RET_LOG();
301 if (!sk_dst_port__load_byte(sk))
302 RET_LOG();
303
304 return CG_OK;
305}
306
307char _license[] SEC("license") = "GPL";
308

source code of linux/tools/testing/selftests/bpf/progs/test_sock_fields.c