1 | // SPDX-License-Identifier: GPL-2.0 |
2 | // Copyright (c) 2022 Meta |
3 | |
4 | #include <stddef.h> |
5 | #include <stdint.h> |
6 | #include <stdbool.h> |
7 | #include <linux/bpf.h> |
8 | #include <linux/stddef.h> |
9 | #include <linux/pkt_cls.h> |
10 | #include <linux/if_ether.h> |
11 | #include <linux/in.h> |
12 | #include <linux/ip.h> |
13 | #include <linux/ipv6.h> |
14 | #include <linux/tcp.h> |
15 | #include <linux/udp.h> |
16 | #include <bpf/bpf_helpers.h> |
17 | #include <bpf/bpf_endian.h> |
18 | |
19 | /* veth_src --- veth_src_fwd --- veth_det_fwd --- veth_dst |
20 | * | | |
21 | * ns_src | ns_fwd | ns_dst |
22 | * |
23 | * ns_src and ns_dst: ENDHOST namespace |
24 | * ns_fwd: Fowarding namespace |
25 | */ |
26 | |
27 | #define ctx_ptr(field) (void *)(long)(field) |
28 | |
29 | #define ip4_src __bpf_htonl(0xac100164) /* 172.16.1.100 */ |
30 | #define ip4_dst __bpf_htonl(0xac100264) /* 172.16.2.100 */ |
31 | |
32 | #define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ |
33 | 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } |
34 | #define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ |
35 | 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } |
36 | |
37 | #define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \ |
38 | a.s6_addr32[1] == b.s6_addr32[1] && \ |
39 | a.s6_addr32[2] == b.s6_addr32[2] && \ |
40 | a.s6_addr32[3] == b.s6_addr32[3]) |
41 | |
42 | volatile const __u32 IFINDEX_SRC; |
43 | volatile const __u32 IFINDEX_DST; |
44 | |
45 | #define EGRESS_ENDHOST_MAGIC 0x0b9fbeef |
46 | #define INGRESS_FWDNS_MAGIC 0x1b9fbeef |
47 | #define EGRESS_FWDNS_MAGIC 0x2b9fbeef |
48 | |
49 | enum { |
50 | INGRESS_FWDNS_P100, |
51 | INGRESS_FWDNS_P101, |
52 | EGRESS_FWDNS_P100, |
53 | EGRESS_FWDNS_P101, |
54 | INGRESS_ENDHOST, |
55 | EGRESS_ENDHOST, |
56 | SET_DTIME, |
57 | __MAX_CNT, |
58 | }; |
59 | |
60 | enum { |
61 | TCP_IP6_CLEAR_DTIME, |
62 | TCP_IP4, |
63 | TCP_IP6, |
64 | UDP_IP4, |
65 | UDP_IP6, |
66 | TCP_IP4_RT_FWD, |
67 | TCP_IP6_RT_FWD, |
68 | UDP_IP4_RT_FWD, |
69 | UDP_IP6_RT_FWD, |
70 | UKN_TEST, |
71 | __NR_TESTS, |
72 | }; |
73 | |
74 | enum { |
75 | SRC_NS = 1, |
76 | DST_NS, |
77 | }; |
78 | |
79 | __u32 dtimes[__NR_TESTS][__MAX_CNT] = {}; |
80 | __u32 errs[__NR_TESTS][__MAX_CNT] = {}; |
81 | __u32 test = 0; |
82 | |
83 | static void inc_dtimes(__u32 idx) |
84 | { |
85 | if (test < __NR_TESTS) |
86 | dtimes[test][idx]++; |
87 | else |
88 | dtimes[UKN_TEST][idx]++; |
89 | } |
90 | |
91 | static void inc_errs(__u32 idx) |
92 | { |
93 | if (test < __NR_TESTS) |
94 | errs[test][idx]++; |
95 | else |
96 | errs[UKN_TEST][idx]++; |
97 | } |
98 | |
99 | static int skb_proto(int type) |
100 | { |
101 | return type & 0xff; |
102 | } |
103 | |
104 | static int skb_ns(int type) |
105 | { |
106 | return (type >> 8) & 0xff; |
107 | } |
108 | |
109 | static bool fwdns_clear_dtime(void) |
110 | { |
111 | return test == TCP_IP6_CLEAR_DTIME; |
112 | } |
113 | |
114 | static bool bpf_fwd(void) |
115 | { |
116 | return test < TCP_IP4_RT_FWD; |
117 | } |
118 | |
119 | static __u8 get_proto(void) |
120 | { |
121 | switch (test) { |
122 | case UDP_IP4: |
123 | case UDP_IP6: |
124 | case UDP_IP4_RT_FWD: |
125 | case UDP_IP6_RT_FWD: |
126 | return IPPROTO_UDP; |
127 | default: |
128 | return IPPROTO_TCP; |
129 | } |
130 | } |
131 | |
132 | /* -1: parse error: TC_ACT_SHOT |
133 | * 0: not testing traffic: TC_ACT_OK |
134 | * >0: first byte is the inet_proto, second byte has the netns |
135 | * of the sender |
136 | */ |
137 | static int skb_get_type(struct __sk_buff *skb) |
138 | { |
139 | __u16 dst_ns_port = __bpf_htons(50000 + test); |
140 | void *data_end = ctx_ptr(skb->data_end); |
141 | void *data = ctx_ptr(skb->data); |
142 | __u8 inet_proto = 0, ns = 0; |
143 | struct ipv6hdr *ip6h; |
144 | __u16 sport, dport; |
145 | struct iphdr *iph; |
146 | struct tcphdr *th; |
147 | struct udphdr *uh; |
148 | void *trans; |
149 | |
150 | switch (skb->protocol) { |
151 | case __bpf_htons(ETH_P_IP): |
152 | iph = data + sizeof(struct ethhdr); |
153 | if (iph + 1 > data_end) |
154 | return -1; |
155 | if (iph->saddr == ip4_src) |
156 | ns = SRC_NS; |
157 | else if (iph->saddr == ip4_dst) |
158 | ns = DST_NS; |
159 | inet_proto = iph->protocol; |
160 | trans = iph + 1; |
161 | break; |
162 | case __bpf_htons(ETH_P_IPV6): |
163 | ip6h = data + sizeof(struct ethhdr); |
164 | if (ip6h + 1 > data_end) |
165 | return -1; |
166 | if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_src}})) |
167 | ns = SRC_NS; |
168 | else if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_dst}})) |
169 | ns = DST_NS; |
170 | inet_proto = ip6h->nexthdr; |
171 | trans = ip6h + 1; |
172 | break; |
173 | default: |
174 | return 0; |
175 | } |
176 | |
177 | /* skb is not from src_ns or dst_ns. |
178 | * skb is not the testing IPPROTO. |
179 | */ |
180 | if (!ns || inet_proto != get_proto()) |
181 | return 0; |
182 | |
183 | switch (inet_proto) { |
184 | case IPPROTO_TCP: |
185 | th = trans; |
186 | if (th + 1 > data_end) |
187 | return -1; |
188 | sport = th->source; |
189 | dport = th->dest; |
190 | break; |
191 | case IPPROTO_UDP: |
192 | uh = trans; |
193 | if (uh + 1 > data_end) |
194 | return -1; |
195 | sport = uh->source; |
196 | dport = uh->dest; |
197 | break; |
198 | default: |
199 | return 0; |
200 | } |
201 | |
202 | /* The skb is the testing traffic */ |
203 | if ((ns == SRC_NS && dport == dst_ns_port) || |
204 | (ns == DST_NS && sport == dst_ns_port)) |
205 | return (ns << 8 | inet_proto); |
206 | |
207 | return 0; |
208 | } |
209 | |
210 | /* format: direction@iface@netns |
211 | * egress@veth_(src|dst)@ns_(src|dst) |
212 | */ |
213 | SEC("tc" ) |
214 | int egress_host(struct __sk_buff *skb) |
215 | { |
216 | int skb_type; |
217 | |
218 | skb_type = skb_get_type(skb); |
219 | if (skb_type == -1) |
220 | return TC_ACT_SHOT; |
221 | if (!skb_type) |
222 | return TC_ACT_OK; |
223 | |
224 | if (skb_proto(type: skb_type) == IPPROTO_TCP) { |
225 | if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO && |
226 | skb->tstamp) |
227 | inc_dtimes(idx: EGRESS_ENDHOST); |
228 | else |
229 | inc_errs(idx: EGRESS_ENDHOST); |
230 | } else { |
231 | if (skb->tstamp_type == BPF_SKB_TSTAMP_UNSPEC && |
232 | skb->tstamp) |
233 | inc_dtimes(idx: EGRESS_ENDHOST); |
234 | else |
235 | inc_errs(idx: EGRESS_ENDHOST); |
236 | } |
237 | |
238 | skb->tstamp = EGRESS_ENDHOST_MAGIC; |
239 | |
240 | return TC_ACT_OK; |
241 | } |
242 | |
243 | /* ingress@veth_(src|dst)@ns_(src|dst) */ |
244 | SEC("tc" ) |
245 | int ingress_host(struct __sk_buff *skb) |
246 | { |
247 | int skb_type; |
248 | |
249 | skb_type = skb_get_type(skb); |
250 | if (skb_type == -1) |
251 | return TC_ACT_SHOT; |
252 | if (!skb_type) |
253 | return TC_ACT_OK; |
254 | |
255 | if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO && |
256 | skb->tstamp == EGRESS_FWDNS_MAGIC) |
257 | inc_dtimes(idx: INGRESS_ENDHOST); |
258 | else |
259 | inc_errs(idx: INGRESS_ENDHOST); |
260 | |
261 | return TC_ACT_OK; |
262 | } |
263 | |
264 | /* ingress@veth_(src|dst)_fwd@ns_fwd priority 100 */ |
265 | SEC("tc" ) |
266 | int ingress_fwdns_prio100(struct __sk_buff *skb) |
267 | { |
268 | int skb_type; |
269 | |
270 | skb_type = skb_get_type(skb); |
271 | if (skb_type == -1) |
272 | return TC_ACT_SHOT; |
273 | if (!skb_type) |
274 | return TC_ACT_OK; |
275 | |
276 | /* delivery_time is only available to the ingress |
277 | * if the tc-bpf checks the skb->tstamp_type. |
278 | */ |
279 | if (skb->tstamp == EGRESS_ENDHOST_MAGIC) |
280 | inc_errs(idx: INGRESS_FWDNS_P100); |
281 | |
282 | if (fwdns_clear_dtime()) |
283 | skb->tstamp = 0; |
284 | |
285 | return TC_ACT_UNSPEC; |
286 | } |
287 | |
288 | /* egress@veth_(src|dst)_fwd@ns_fwd priority 100 */ |
289 | SEC("tc" ) |
290 | int egress_fwdns_prio100(struct __sk_buff *skb) |
291 | { |
292 | int skb_type; |
293 | |
294 | skb_type = skb_get_type(skb); |
295 | if (skb_type == -1) |
296 | return TC_ACT_SHOT; |
297 | if (!skb_type) |
298 | return TC_ACT_OK; |
299 | |
300 | /* delivery_time is always available to egress even |
301 | * the tc-bpf did not use the tstamp_type. |
302 | */ |
303 | if (skb->tstamp == INGRESS_FWDNS_MAGIC) |
304 | inc_dtimes(idx: EGRESS_FWDNS_P100); |
305 | else |
306 | inc_errs(idx: EGRESS_FWDNS_P100); |
307 | |
308 | if (fwdns_clear_dtime()) |
309 | skb->tstamp = 0; |
310 | |
311 | return TC_ACT_UNSPEC; |
312 | } |
313 | |
314 | /* ingress@veth_(src|dst)_fwd@ns_fwd priority 101 */ |
315 | SEC("tc" ) |
316 | int ingress_fwdns_prio101(struct __sk_buff *skb) |
317 | { |
318 | __u64 expected_dtime = EGRESS_ENDHOST_MAGIC; |
319 | int skb_type; |
320 | |
321 | skb_type = skb_get_type(skb); |
322 | if (skb_type == -1 || !skb_type) |
323 | /* Should have handled in prio100 */ |
324 | return TC_ACT_SHOT; |
325 | |
326 | if (skb_proto(type: skb_type) == IPPROTO_UDP) |
327 | expected_dtime = 0; |
328 | |
329 | if (skb->tstamp_type) { |
330 | if (fwdns_clear_dtime() || |
331 | skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO || |
332 | skb->tstamp != expected_dtime) |
333 | inc_errs(idx: INGRESS_FWDNS_P101); |
334 | else |
335 | inc_dtimes(idx: INGRESS_FWDNS_P101); |
336 | } else { |
337 | if (!fwdns_clear_dtime() && expected_dtime) |
338 | inc_errs(idx: INGRESS_FWDNS_P101); |
339 | } |
340 | |
341 | if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) { |
342 | skb->tstamp = INGRESS_FWDNS_MAGIC; |
343 | } else { |
344 | if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, |
345 | BPF_SKB_TSTAMP_DELIVERY_MONO)) |
346 | inc_errs(idx: SET_DTIME); |
347 | if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, |
348 | BPF_SKB_TSTAMP_UNSPEC)) |
349 | inc_errs(idx: SET_DTIME); |
350 | } |
351 | |
352 | if (skb_ns(type: skb_type) == SRC_NS) |
353 | return bpf_fwd() ? |
354 | bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0) : TC_ACT_OK; |
355 | else |
356 | return bpf_fwd() ? |
357 | bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0) : TC_ACT_OK; |
358 | } |
359 | |
360 | /* egress@veth_(src|dst)_fwd@ns_fwd priority 101 */ |
361 | SEC("tc" ) |
362 | int egress_fwdns_prio101(struct __sk_buff *skb) |
363 | { |
364 | int skb_type; |
365 | |
366 | skb_type = skb_get_type(skb); |
367 | if (skb_type == -1 || !skb_type) |
368 | /* Should have handled in prio100 */ |
369 | return TC_ACT_SHOT; |
370 | |
371 | if (skb->tstamp_type) { |
372 | if (fwdns_clear_dtime() || |
373 | skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO || |
374 | skb->tstamp != INGRESS_FWDNS_MAGIC) |
375 | inc_errs(idx: EGRESS_FWDNS_P101); |
376 | else |
377 | inc_dtimes(idx: EGRESS_FWDNS_P101); |
378 | } else { |
379 | if (!fwdns_clear_dtime()) |
380 | inc_errs(idx: EGRESS_FWDNS_P101); |
381 | } |
382 | |
383 | if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) { |
384 | skb->tstamp = EGRESS_FWDNS_MAGIC; |
385 | } else { |
386 | if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC, |
387 | BPF_SKB_TSTAMP_DELIVERY_MONO)) |
388 | inc_errs(idx: SET_DTIME); |
389 | if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, |
390 | BPF_SKB_TSTAMP_UNSPEC)) |
391 | inc_errs(idx: SET_DTIME); |
392 | } |
393 | |
394 | return TC_ACT_OK; |
395 | } |
396 | |
397 | char __license[] SEC("license" ) = "GPL" ; |
398 | |