1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <string.h> |
3 | #include <linux/tcp.h> |
4 | #include <linux/bpf.h> |
5 | #include <netinet/in.h> |
6 | #include <bpf/bpf_helpers.h> |
7 | |
8 | char _license[] SEC("license" ) = "GPL" ; |
9 | |
10 | int page_size = 0; /* userspace should set it */ |
11 | |
12 | #ifndef SOL_TCP |
13 | #define SOL_TCP IPPROTO_TCP |
14 | #endif |
15 | |
16 | #define SOL_CUSTOM 0xdeadbeef |
17 | |
18 | struct sockopt_sk { |
19 | __u8 val; |
20 | }; |
21 | |
22 | struct { |
23 | __uint(type, BPF_MAP_TYPE_SK_STORAGE); |
24 | __uint(map_flags, BPF_F_NO_PREALLOC); |
25 | __type(key, int); |
26 | __type(value, struct sockopt_sk); |
27 | } socket_storage_map SEC(".maps" ); |
28 | |
29 | SEC("cgroup/getsockopt" ) |
30 | int _getsockopt(struct bpf_sockopt *ctx) |
31 | { |
32 | __u8 *optval_end = ctx->optval_end; |
33 | __u8 *optval = ctx->optval; |
34 | struct sockopt_sk *storage; |
35 | struct bpf_sock *sk; |
36 | |
37 | /* Bypass AF_NETLINK. */ |
38 | sk = ctx->sk; |
39 | if (sk && sk->family == AF_NETLINK) |
40 | goto out; |
41 | |
42 | /* Make sure bpf_get_netns_cookie is callable. |
43 | */ |
44 | if (bpf_get_netns_cookie(NULL) == 0) |
45 | return 0; |
46 | |
47 | if (bpf_get_netns_cookie(ctx) == 0) |
48 | return 0; |
49 | |
50 | if (ctx->level == SOL_IP && ctx->optname == IP_TOS) { |
51 | /* Not interested in SOL_IP:IP_TOS; |
52 | * let next BPF program in the cgroup chain or kernel |
53 | * handle it. |
54 | */ |
55 | goto out; |
56 | } |
57 | |
58 | if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) { |
59 | /* Not interested in SOL_SOCKET:SO_SNDBUF; |
60 | * let next BPF program in the cgroup chain or kernel |
61 | * handle it. |
62 | */ |
63 | goto out; |
64 | } |
65 | |
66 | if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) { |
67 | /* Not interested in SOL_TCP:TCP_CONGESTION; |
68 | * let next BPF program in the cgroup chain or kernel |
69 | * handle it. |
70 | */ |
71 | goto out; |
72 | } |
73 | |
74 | if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) { |
75 | /* Verify that TCP_ZEROCOPY_RECEIVE triggers. |
76 | * It has a custom implementation for performance |
77 | * reasons. |
78 | */ |
79 | |
80 | /* Check that optval contains address (__u64) */ |
81 | if (optval + sizeof(__u64) > optval_end) |
82 | return 0; /* bounds check */ |
83 | |
84 | if (((struct tcp_zerocopy_receive *)optval)->address != 0) |
85 | return 0; /* unexpected data */ |
86 | |
87 | goto out; |
88 | } |
89 | |
90 | if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { |
91 | if (optval + 1 > optval_end) |
92 | return 0; /* bounds check */ |
93 | |
94 | ctx->retval = 0; /* Reset system call return value to zero */ |
95 | |
96 | /* Always export 0x55 */ |
97 | optval[0] = 0x55; |
98 | ctx->optlen = 1; |
99 | |
100 | /* Userspace buffer is PAGE_SIZE * 2, but BPF |
101 | * program can only see the first PAGE_SIZE |
102 | * bytes of data. |
103 | */ |
104 | if (optval_end - optval != page_size) |
105 | return 0; /* unexpected data size */ |
106 | |
107 | return 1; |
108 | } |
109 | |
110 | if (ctx->level != SOL_CUSTOM) |
111 | return 0; /* deny everything except custom level */ |
112 | |
113 | if (optval + 1 > optval_end) |
114 | return 0; /* bounds check */ |
115 | |
116 | storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0, |
117 | BPF_SK_STORAGE_GET_F_CREATE); |
118 | if (!storage) |
119 | return 0; /* couldn't get sk storage */ |
120 | |
121 | if (!ctx->retval) |
122 | return 0; /* kernel should not have handled |
123 | * SOL_CUSTOM, something is wrong! |
124 | */ |
125 | ctx->retval = 0; /* Reset system call return value to zero */ |
126 | |
127 | optval[0] = storage->val; |
128 | ctx->optlen = 1; |
129 | |
130 | return 1; |
131 | |
132 | out: |
133 | /* optval larger than PAGE_SIZE use kernel's buffer. */ |
134 | if (ctx->optlen > page_size) |
135 | ctx->optlen = 0; |
136 | return 1; |
137 | } |
138 | |
139 | SEC("cgroup/setsockopt" ) |
140 | int _setsockopt(struct bpf_sockopt *ctx) |
141 | { |
142 | __u8 *optval_end = ctx->optval_end; |
143 | __u8 *optval = ctx->optval; |
144 | struct sockopt_sk *storage; |
145 | struct bpf_sock *sk; |
146 | |
147 | /* Bypass AF_NETLINK. */ |
148 | sk = ctx->sk; |
149 | if (sk && sk->family == AF_NETLINK) |
150 | goto out; |
151 | |
152 | /* Make sure bpf_get_netns_cookie is callable. |
153 | */ |
154 | if (bpf_get_netns_cookie(NULL) == 0) |
155 | return 0; |
156 | |
157 | if (bpf_get_netns_cookie(ctx) == 0) |
158 | return 0; |
159 | |
160 | if (ctx->level == SOL_IP && ctx->optname == IP_TOS) { |
161 | /* Not interested in SOL_IP:IP_TOS; |
162 | * let next BPF program in the cgroup chain or kernel |
163 | * handle it. |
164 | */ |
165 | ctx->optlen = 0; /* bypass optval>PAGE_SIZE */ |
166 | return 1; |
167 | } |
168 | |
169 | if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) { |
170 | /* Overwrite SO_SNDBUF value */ |
171 | |
172 | if (optval + sizeof(__u32) > optval_end) |
173 | return 0; /* bounds check */ |
174 | |
175 | *(__u32 *)optval = 0x55AA; |
176 | ctx->optlen = 4; |
177 | |
178 | return 1; |
179 | } |
180 | |
181 | if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) { |
182 | /* Always use cubic */ |
183 | |
184 | if (optval + 5 > optval_end) |
185 | return 0; /* bounds check */ |
186 | |
187 | memcpy(optval, "cubic" , 5); |
188 | ctx->optlen = 5; |
189 | |
190 | return 1; |
191 | } |
192 | |
193 | if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { |
194 | /* Original optlen is larger than PAGE_SIZE. */ |
195 | if (ctx->optlen != page_size * 2) |
196 | return 0; /* unexpected data size */ |
197 | |
198 | if (optval + 1 > optval_end) |
199 | return 0; /* bounds check */ |
200 | |
201 | /* Make sure we can trim the buffer. */ |
202 | optval[0] = 0; |
203 | ctx->optlen = 1; |
204 | |
205 | /* Usepace buffer is PAGE_SIZE * 2, but BPF |
206 | * program can only see the first PAGE_SIZE |
207 | * bytes of data. |
208 | */ |
209 | if (optval_end - optval != page_size) |
210 | return 0; /* unexpected data size */ |
211 | |
212 | return 1; |
213 | } |
214 | |
215 | if (ctx->level != SOL_CUSTOM) |
216 | return 0; /* deny everything except custom level */ |
217 | |
218 | if (optval + 1 > optval_end) |
219 | return 0; /* bounds check */ |
220 | |
221 | storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0, |
222 | BPF_SK_STORAGE_GET_F_CREATE); |
223 | if (!storage) |
224 | return 0; /* couldn't get sk storage */ |
225 | |
226 | storage->val = optval[0]; |
227 | ctx->optlen = -1; /* BPF has consumed this option, don't call kernel |
228 | * setsockopt handler. |
229 | */ |
230 | |
231 | return 1; |
232 | |
233 | out: |
234 | /* optval larger than PAGE_SIZE use kernel's buffer. */ |
235 | if (ctx->optlen > page_size) |
236 | ctx->optlen = 0; |
237 | return 1; |
238 | } |
239 | |