1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright (c) 2020 Facebook */ |
3 | |
4 | #include <stddef.h> |
5 | #include <errno.h> |
6 | #include <stdbool.h> |
7 | #include <sys/types.h> |
8 | #include <sys/socket.h> |
9 | #include <linux/tcp.h> |
10 | #include <linux/socket.h> |
11 | #include <linux/bpf.h> |
12 | #include <linux/types.h> |
13 | #include <bpf/bpf_helpers.h> |
14 | #include <bpf/bpf_endian.h> |
15 | #define BPF_PROG_TEST_TCP_HDR_OPTIONS |
16 | #include "test_tcp_hdr_options.h" |
17 | |
18 | #ifndef sizeof_field |
19 | #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) |
20 | #endif |
21 | |
22 | __u8 test_kind = TCPOPT_EXP; |
23 | __u16 test_magic = 0xeB9F; |
24 | __u32 inherit_cb_flags = 0; |
25 | |
26 | struct bpf_test_option passive_synack_out = {}; |
27 | struct bpf_test_option passive_fin_out = {}; |
28 | |
29 | struct bpf_test_option passive_estab_in = {}; |
30 | struct bpf_test_option passive_fin_in = {}; |
31 | |
32 | struct bpf_test_option active_syn_out = {}; |
33 | struct bpf_test_option active_fin_out = {}; |
34 | |
35 | struct bpf_test_option active_estab_in = {}; |
36 | struct bpf_test_option active_fin_in = {}; |
37 | |
38 | struct { |
39 | __uint(type, BPF_MAP_TYPE_SK_STORAGE); |
40 | __uint(map_flags, BPF_F_NO_PREALLOC); |
41 | __type(key, int); |
42 | __type(value, struct hdr_stg); |
43 | } hdr_stg_map SEC(".maps" ); |
44 | |
45 | static bool skops_want_cookie(const struct bpf_sock_ops *skops) |
46 | { |
47 | return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE; |
48 | } |
49 | |
50 | static bool skops_current_mss(const struct bpf_sock_ops *skops) |
51 | { |
52 | return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS; |
53 | } |
54 | |
55 | static __u8 option_total_len(__u8 flags) |
56 | { |
57 | __u8 i, len = 1; /* +1 for flags */ |
58 | |
59 | if (!flags) |
60 | return 0; |
61 | |
62 | /* RESEND bit does not use a byte */ |
63 | for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++) |
64 | len += !!TEST_OPTION_FLAGS(flags, i); |
65 | |
66 | if (test_kind == TCPOPT_EXP) |
67 | return len + TCP_BPF_EXPOPT_BASE_LEN; |
68 | else |
69 | return len + 2; /* +1 kind, +1 kind-len */ |
70 | } |
71 | |
72 | static void write_test_option(const struct bpf_test_option *test_opt, |
73 | __u8 *data) |
74 | { |
75 | __u8 offset = 0; |
76 | |
77 | data[offset++] = test_opt->flags; |
78 | if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS)) |
79 | data[offset++] = test_opt->max_delack_ms; |
80 | |
81 | if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND)) |
82 | data[offset++] = test_opt->rand; |
83 | } |
84 | |
85 | static int store_option(struct bpf_sock_ops *skops, |
86 | const struct bpf_test_option *test_opt) |
87 | { |
88 | union { |
89 | struct tcp_exprm_opt exprm; |
90 | struct tcp_opt regular; |
91 | } write_opt; |
92 | int err; |
93 | |
94 | if (test_kind == TCPOPT_EXP) { |
95 | write_opt.exprm.kind = TCPOPT_EXP; |
96 | write_opt.exprm.len = option_total_len(flags: test_opt->flags); |
97 | write_opt.exprm.magic = __bpf_htons(test_magic); |
98 | write_opt.exprm.data32 = 0; |
99 | write_test_option(test_opt, data: write_opt.exprm.data); |
100 | err = bpf_store_hdr_opt(skops, &write_opt.exprm, |
101 | sizeof(write_opt.exprm), 0); |
102 | } else { |
103 | write_opt.regular.kind = test_kind; |
104 | write_opt.regular.len = option_total_len(flags: test_opt->flags); |
105 | write_opt.regular.data32 = 0; |
106 | write_test_option(test_opt, data: write_opt.regular.data); |
107 | err = bpf_store_hdr_opt(skops, &write_opt.regular, |
108 | sizeof(write_opt.regular), 0); |
109 | } |
110 | |
111 | if (err) |
112 | RET_CG_ERR(err); |
113 | |
114 | return CG_OK; |
115 | } |
116 | |
117 | static int parse_test_option(struct bpf_test_option *opt, const __u8 *start) |
118 | { |
119 | opt->flags = *start++; |
120 | |
121 | if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS)) |
122 | opt->max_delack_ms = *start++; |
123 | |
124 | if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND)) |
125 | opt->rand = *start++; |
126 | |
127 | return 0; |
128 | } |
129 | |
130 | static int load_option(struct bpf_sock_ops *skops, |
131 | struct bpf_test_option *test_opt, bool from_syn) |
132 | { |
133 | union { |
134 | struct tcp_exprm_opt exprm; |
135 | struct tcp_opt regular; |
136 | } search_opt; |
137 | int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0; |
138 | |
139 | if (test_kind == TCPOPT_EXP) { |
140 | search_opt.exprm.kind = TCPOPT_EXP; |
141 | search_opt.exprm.len = 4; |
142 | search_opt.exprm.magic = __bpf_htons(test_magic); |
143 | search_opt.exprm.data32 = 0; |
144 | ret = bpf_load_hdr_opt(skops, &search_opt.exprm, |
145 | sizeof(search_opt.exprm), load_flags); |
146 | if (ret < 0) |
147 | return ret; |
148 | return parse_test_option(opt: test_opt, start: search_opt.exprm.data); |
149 | } else { |
150 | search_opt.regular.kind = test_kind; |
151 | search_opt.regular.len = 0; |
152 | search_opt.regular.data32 = 0; |
153 | ret = bpf_load_hdr_opt(skops, &search_opt.regular, |
154 | sizeof(search_opt.regular), load_flags); |
155 | if (ret < 0) |
156 | return ret; |
157 | return parse_test_option(opt: test_opt, start: search_opt.regular.data); |
158 | } |
159 | } |
160 | |
161 | static int synack_opt_len(struct bpf_sock_ops *skops) |
162 | { |
163 | struct bpf_test_option test_opt = {}; |
164 | __u8 optlen; |
165 | int err; |
166 | |
167 | if (!passive_synack_out.flags) |
168 | return CG_OK; |
169 | |
170 | err = load_option(skops, test_opt: &test_opt, from_syn: true); |
171 | |
172 | /* bpf_test_option is not found */ |
173 | if (err == -ENOMSG) |
174 | return CG_OK; |
175 | |
176 | if (err) |
177 | RET_CG_ERR(err); |
178 | |
179 | optlen = option_total_len(flags: passive_synack_out.flags); |
180 | if (optlen) { |
181 | err = bpf_reserve_hdr_opt(skops, optlen, 0); |
182 | if (err) |
183 | RET_CG_ERR(err); |
184 | } |
185 | |
186 | return CG_OK; |
187 | } |
188 | |
189 | static int write_synack_opt(struct bpf_sock_ops *skops) |
190 | { |
191 | struct bpf_test_option opt; |
192 | |
193 | if (!passive_synack_out.flags) |
194 | /* We should not even be called since no header |
195 | * space has been reserved. |
196 | */ |
197 | RET_CG_ERR(0); |
198 | |
199 | opt = passive_synack_out; |
200 | if (skops_want_cookie(skops)) |
201 | SET_OPTION_FLAGS(opt.flags, OPTION_RESEND); |
202 | |
203 | return store_option(skops, test_opt: &opt); |
204 | } |
205 | |
206 | static int syn_opt_len(struct bpf_sock_ops *skops) |
207 | { |
208 | __u8 optlen; |
209 | int err; |
210 | |
211 | if (!active_syn_out.flags) |
212 | return CG_OK; |
213 | |
214 | optlen = option_total_len(flags: active_syn_out.flags); |
215 | if (optlen) { |
216 | err = bpf_reserve_hdr_opt(skops, optlen, 0); |
217 | if (err) |
218 | RET_CG_ERR(err); |
219 | } |
220 | |
221 | return CG_OK; |
222 | } |
223 | |
224 | static int write_syn_opt(struct bpf_sock_ops *skops) |
225 | { |
226 | if (!active_syn_out.flags) |
227 | RET_CG_ERR(0); |
228 | |
229 | return store_option(skops, test_opt: &active_syn_out); |
230 | } |
231 | |
232 | static int fin_opt_len(struct bpf_sock_ops *skops) |
233 | { |
234 | struct bpf_test_option *opt; |
235 | struct hdr_stg *hdr_stg; |
236 | __u8 optlen; |
237 | int err; |
238 | |
239 | if (!skops->sk) |
240 | RET_CG_ERR(0); |
241 | |
242 | hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); |
243 | if (!hdr_stg) |
244 | RET_CG_ERR(0); |
245 | |
246 | if (hdr_stg->active) |
247 | opt = &active_fin_out; |
248 | else |
249 | opt = &passive_fin_out; |
250 | |
251 | optlen = option_total_len(flags: opt->flags); |
252 | if (optlen) { |
253 | err = bpf_reserve_hdr_opt(skops, optlen, 0); |
254 | if (err) |
255 | RET_CG_ERR(err); |
256 | } |
257 | |
258 | return CG_OK; |
259 | } |
260 | |
261 | static int write_fin_opt(struct bpf_sock_ops *skops) |
262 | { |
263 | struct bpf_test_option *opt; |
264 | struct hdr_stg *hdr_stg; |
265 | |
266 | if (!skops->sk) |
267 | RET_CG_ERR(0); |
268 | |
269 | hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); |
270 | if (!hdr_stg) |
271 | RET_CG_ERR(0); |
272 | |
273 | if (hdr_stg->active) |
274 | opt = &active_fin_out; |
275 | else |
276 | opt = &passive_fin_out; |
277 | |
278 | if (!opt->flags) |
279 | RET_CG_ERR(0); |
280 | |
281 | return store_option(skops, test_opt: opt); |
282 | } |
283 | |
284 | static int resend_in_ack(struct bpf_sock_ops *skops) |
285 | { |
286 | struct hdr_stg *hdr_stg; |
287 | |
288 | if (!skops->sk) |
289 | return -1; |
290 | |
291 | hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); |
292 | if (!hdr_stg) |
293 | return -1; |
294 | |
295 | return !!hdr_stg->resend_syn; |
296 | } |
297 | |
298 | static int nodata_opt_len(struct bpf_sock_ops *skops) |
299 | { |
300 | int resend; |
301 | |
302 | resend = resend_in_ack(skops); |
303 | if (resend < 0) |
304 | RET_CG_ERR(0); |
305 | |
306 | if (resend) |
307 | return syn_opt_len(skops); |
308 | |
309 | return CG_OK; |
310 | } |
311 | |
312 | static int write_nodata_opt(struct bpf_sock_ops *skops) |
313 | { |
314 | int resend; |
315 | |
316 | resend = resend_in_ack(skops); |
317 | if (resend < 0) |
318 | RET_CG_ERR(0); |
319 | |
320 | if (resend) |
321 | return write_syn_opt(skops); |
322 | |
323 | return CG_OK; |
324 | } |
325 | |
326 | static int data_opt_len(struct bpf_sock_ops *skops) |
327 | { |
328 | /* Same as the nodata version. Mostly to show |
329 | * an example usage on skops->skb_len. |
330 | */ |
331 | return nodata_opt_len(skops); |
332 | } |
333 | |
334 | static int write_data_opt(struct bpf_sock_ops *skops) |
335 | { |
336 | return write_nodata_opt(skops); |
337 | } |
338 | |
339 | static int current_mss_opt_len(struct bpf_sock_ops *skops) |
340 | { |
341 | /* Reserve maximum that may be needed */ |
342 | int err; |
343 | |
344 | err = bpf_reserve_hdr_opt(skops, option_total_len(flags: OPTION_MASK), 0); |
345 | if (err) |
346 | RET_CG_ERR(err); |
347 | |
348 | return CG_OK; |
349 | } |
350 | |
351 | static int handle_hdr_opt_len(struct bpf_sock_ops *skops) |
352 | { |
353 | __u8 tcp_flags = skops_tcp_flags(skops); |
354 | |
355 | if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) |
356 | return synack_opt_len(skops); |
357 | |
358 | if (tcp_flags & TCPHDR_SYN) |
359 | return syn_opt_len(skops); |
360 | |
361 | if (tcp_flags & TCPHDR_FIN) |
362 | return fin_opt_len(skops); |
363 | |
364 | if (skops_current_mss(skops)) |
365 | /* The kernel is calculating the MSS */ |
366 | return current_mss_opt_len(skops); |
367 | |
368 | if (skops->skb_len) |
369 | return data_opt_len(skops); |
370 | |
371 | return nodata_opt_len(skops); |
372 | } |
373 | |
374 | static int handle_write_hdr_opt(struct bpf_sock_ops *skops) |
375 | { |
376 | __u8 tcp_flags = skops_tcp_flags(skops); |
377 | struct tcphdr *th; |
378 | |
379 | if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) |
380 | return write_synack_opt(skops); |
381 | |
382 | if (tcp_flags & TCPHDR_SYN) |
383 | return write_syn_opt(skops); |
384 | |
385 | if (tcp_flags & TCPHDR_FIN) |
386 | return write_fin_opt(skops); |
387 | |
388 | th = skops->skb_data; |
389 | if (th + 1 > skops->skb_data_end) |
390 | RET_CG_ERR(0); |
391 | |
392 | if (skops->skb_len > tcp_hdrlen(skb: th)) |
393 | return write_data_opt(skops); |
394 | |
395 | return write_nodata_opt(skops); |
396 | } |
397 | |
398 | static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms) |
399 | { |
400 | __u32 max_delack_us = max_delack_ms * 1000; |
401 | |
402 | return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX, |
403 | &max_delack_us, sizeof(max_delack_us)); |
404 | } |
405 | |
406 | static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms) |
407 | { |
408 | __u32 min_rto_us = peer_max_delack_ms * 1000; |
409 | |
410 | return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us, |
411 | sizeof(min_rto_us)); |
412 | } |
413 | |
414 | static int handle_active_estab(struct bpf_sock_ops *skops) |
415 | { |
416 | struct hdr_stg init_stg = { |
417 | .active = true, |
418 | }; |
419 | int err; |
420 | |
421 | err = load_option(skops, test_opt: &active_estab_in, from_syn: false); |
422 | if (err && err != -ENOMSG) |
423 | RET_CG_ERR(err); |
424 | |
425 | init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags, |
426 | OPTION_RESEND); |
427 | if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk, |
428 | &init_stg, |
429 | BPF_SK_STORAGE_GET_F_CREATE)) |
430 | RET_CG_ERR(0); |
431 | |
432 | if (init_stg.resend_syn) |
433 | /* Don't clear the write_hdr cb now because |
434 | * the ACK may get lost and retransmit may |
435 | * be needed. |
436 | * |
437 | * PARSE_ALL_HDR cb flag is set to learn if this |
438 | * resend_syn option has received by the peer. |
439 | * |
440 | * The header option will be resent until a valid |
441 | * packet is received at handle_parse_hdr() |
442 | * and all hdr cb flags will be cleared in |
443 | * handle_parse_hdr(). |
444 | */ |
445 | set_parse_all_hdr_cb_flags(skops); |
446 | else if (!active_fin_out.flags) |
447 | /* No options will be written from now */ |
448 | clear_hdr_cb_flags(skops); |
449 | |
450 | if (active_syn_out.max_delack_ms) { |
451 | err = set_delack_max(skops, max_delack_ms: active_syn_out.max_delack_ms); |
452 | if (err) |
453 | RET_CG_ERR(err); |
454 | } |
455 | |
456 | if (active_estab_in.max_delack_ms) { |
457 | err = set_rto_min(skops, peer_max_delack_ms: active_estab_in.max_delack_ms); |
458 | if (err) |
459 | RET_CG_ERR(err); |
460 | } |
461 | |
462 | return CG_OK; |
463 | } |
464 | |
465 | static int handle_passive_estab(struct bpf_sock_ops *skops) |
466 | { |
467 | struct hdr_stg init_stg = {}; |
468 | struct tcphdr *th; |
469 | int err; |
470 | |
471 | inherit_cb_flags = skops->bpf_sock_ops_cb_flags; |
472 | |
473 | err = load_option(skops, test_opt: &passive_estab_in, from_syn: true); |
474 | if (err == -ENOENT) { |
475 | /* saved_syn is not found. It was in syncookie mode. |
476 | * We have asked the active side to resend the options |
477 | * in ACK, so try to find the bpf_test_option from ACK now. |
478 | */ |
479 | err = load_option(skops, test_opt: &passive_estab_in, from_syn: false); |
480 | init_stg.syncookie = true; |
481 | } |
482 | |
483 | /* ENOMSG: The bpf_test_option is not found which is fine. |
484 | * Bail out now for all other errors. |
485 | */ |
486 | if (err && err != -ENOMSG) |
487 | RET_CG_ERR(err); |
488 | |
489 | th = skops->skb_data; |
490 | if (th + 1 > skops->skb_data_end) |
491 | RET_CG_ERR(0); |
492 | |
493 | if (th->syn) { |
494 | /* Fastopen */ |
495 | |
496 | /* Cannot clear cb_flags to stop write_hdr cb. |
497 | * synack is not sent yet for fast open. |
498 | * Even it was, the synack may need to be retransmitted. |
499 | * |
500 | * PARSE_ALL_HDR cb flag is set to learn |
501 | * if synack has reached the peer. |
502 | * All cb_flags will be cleared in handle_parse_hdr(). |
503 | */ |
504 | set_parse_all_hdr_cb_flags(skops); |
505 | init_stg.fastopen = true; |
506 | } else if (!passive_fin_out.flags) { |
507 | /* No options will be written from now */ |
508 | clear_hdr_cb_flags(skops); |
509 | } |
510 | |
511 | if (!skops->sk || |
512 | !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg, |
513 | BPF_SK_STORAGE_GET_F_CREATE)) |
514 | RET_CG_ERR(0); |
515 | |
516 | if (passive_synack_out.max_delack_ms) { |
517 | err = set_delack_max(skops, max_delack_ms: passive_synack_out.max_delack_ms); |
518 | if (err) |
519 | RET_CG_ERR(err); |
520 | } |
521 | |
522 | if (passive_estab_in.max_delack_ms) { |
523 | err = set_rto_min(skops, peer_max_delack_ms: passive_estab_in.max_delack_ms); |
524 | if (err) |
525 | RET_CG_ERR(err); |
526 | } |
527 | |
528 | return CG_OK; |
529 | } |
530 | |
531 | static int handle_parse_hdr(struct bpf_sock_ops *skops) |
532 | { |
533 | struct hdr_stg *hdr_stg; |
534 | struct tcphdr *th; |
535 | |
536 | if (!skops->sk) |
537 | RET_CG_ERR(0); |
538 | |
539 | th = skops->skb_data; |
540 | if (th + 1 > skops->skb_data_end) |
541 | RET_CG_ERR(0); |
542 | |
543 | hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); |
544 | if (!hdr_stg) |
545 | RET_CG_ERR(0); |
546 | |
547 | if (hdr_stg->resend_syn || hdr_stg->fastopen) |
548 | /* The PARSE_ALL_HDR cb flag was turned on |
549 | * to ensure that the previously written |
550 | * options have reached the peer. |
551 | * Those previously written option includes: |
552 | * - Active side: resend_syn in ACK during syncookie |
553 | * or |
554 | * - Passive side: SYNACK during fastopen |
555 | * |
556 | * A valid packet has been received here after |
557 | * the 3WHS, so the PARSE_ALL_HDR cb flag |
558 | * can be cleared now. |
559 | */ |
560 | clear_parse_all_hdr_cb_flags(skops); |
561 | |
562 | if (hdr_stg->resend_syn && !active_fin_out.flags) |
563 | /* Active side resent the syn option in ACK |
564 | * because the server was in syncookie mode. |
565 | * A valid packet has been received, so |
566 | * clear header cb flags if there is no |
567 | * more option to send. |
568 | */ |
569 | clear_hdr_cb_flags(skops); |
570 | |
571 | if (hdr_stg->fastopen && !passive_fin_out.flags) |
572 | /* Passive side was in fastopen. |
573 | * A valid packet has been received, so |
574 | * the SYNACK has reached the peer. |
575 | * Clear header cb flags if there is no more |
576 | * option to send. |
577 | */ |
578 | clear_hdr_cb_flags(skops); |
579 | |
580 | if (th->fin) { |
581 | struct bpf_test_option *fin_opt; |
582 | int err; |
583 | |
584 | if (hdr_stg->active) |
585 | fin_opt = &active_fin_in; |
586 | else |
587 | fin_opt = &passive_fin_in; |
588 | |
589 | err = load_option(skops, test_opt: fin_opt, from_syn: false); |
590 | if (err && err != -ENOMSG) |
591 | RET_CG_ERR(err); |
592 | } |
593 | |
594 | return CG_OK; |
595 | } |
596 | |
597 | SEC("sockops" ) |
598 | int estab(struct bpf_sock_ops *skops) |
599 | { |
600 | int true_val = 1; |
601 | |
602 | switch (skops->op) { |
603 | case BPF_SOCK_OPS_TCP_LISTEN_CB: |
604 | bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN, |
605 | &true_val, sizeof(true_val)); |
606 | set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG); |
607 | break; |
608 | case BPF_SOCK_OPS_TCP_CONNECT_CB: |
609 | set_hdr_cb_flags(skops, 0); |
610 | break; |
611 | case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: |
612 | return handle_parse_hdr(skops); |
613 | case BPF_SOCK_OPS_HDR_OPT_LEN_CB: |
614 | return handle_hdr_opt_len(skops); |
615 | case BPF_SOCK_OPS_WRITE_HDR_OPT_CB: |
616 | return handle_write_hdr_opt(skops); |
617 | case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: |
618 | return handle_passive_estab(skops); |
619 | case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: |
620 | return handle_active_estab(skops); |
621 | } |
622 | |
623 | return CG_OK; |
624 | } |
625 | |
626 | char _license[] SEC("license" ) = "GPL" ; |
627 | |