1 | // SPDX-License-Identifier: GPL-2.0 |
2 | // Copyright (c) 2017 Facebook |
3 | #include <stddef.h> |
4 | #include <stdbool.h> |
5 | #include <string.h> |
6 | #include <linux/pkt_cls.h> |
7 | #include <linux/bpf.h> |
8 | #include <linux/in.h> |
9 | #include <linux/if_ether.h> |
10 | #include <linux/ip.h> |
11 | #include <linux/ipv6.h> |
12 | #include <linux/icmp.h> |
13 | #include <linux/icmpv6.h> |
14 | #include <linux/tcp.h> |
15 | #include <linux/udp.h> |
16 | #include <bpf/bpf_helpers.h> |
17 | #include <bpf/bpf_endian.h> |
18 | #include "bpf_compiler.h" |
19 | |
20 | static __always_inline __u32 rol32(__u32 word, unsigned int shift) |
21 | { |
22 | return (word << shift) | (word >> ((-shift) & 31)); |
23 | } |
24 | |
25 | /* copy paste of jhash from kernel sources to make sure llvm |
26 | * can compile it into valid sequence of bpf instructions |
27 | */ |
28 | #define __jhash_mix(a, b, c) \ |
29 | { \ |
30 | a -= c; a ^= rol32(c, 4); c += b; \ |
31 | b -= a; b ^= rol32(a, 6); a += c; \ |
32 | c -= b; c ^= rol32(b, 8); b += a; \ |
33 | a -= c; a ^= rol32(c, 16); c += b; \ |
34 | b -= a; b ^= rol32(a, 19); a += c; \ |
35 | c -= b; c ^= rol32(b, 4); b += a; \ |
36 | } |
37 | |
38 | #define __jhash_final(a, b, c) \ |
39 | { \ |
40 | c ^= b; c -= rol32(b, 14); \ |
41 | a ^= c; a -= rol32(c, 11); \ |
42 | b ^= a; b -= rol32(a, 25); \ |
43 | c ^= b; c -= rol32(b, 16); \ |
44 | a ^= c; a -= rol32(c, 4); \ |
45 | b ^= a; b -= rol32(a, 14); \ |
46 | c ^= b; c -= rol32(b, 24); \ |
47 | } |
48 | |
49 | #define JHASH_INITVAL 0xdeadbeef |
50 | |
51 | typedef unsigned int u32; |
52 | |
53 | static __noinline |
54 | u32 jhash(const void *key, u32 length, u32 initval) |
55 | { |
56 | u32 a, b, c; |
57 | const unsigned char *k = key; |
58 | |
59 | a = b = c = JHASH_INITVAL + length + initval; |
60 | |
61 | while (length > 12) { |
62 | a += *(u32 *)(k); |
63 | b += *(u32 *)(k + 4); |
64 | c += *(u32 *)(k + 8); |
65 | __jhash_mix(a, b, c); |
66 | length -= 12; |
67 | k += 12; |
68 | } |
69 | switch (length) { |
70 | case 12: c += (u32)k[11]<<24; |
71 | case 11: c += (u32)k[10]<<16; |
72 | case 10: c += (u32)k[9]<<8; |
73 | case 9: c += k[8]; |
74 | case 8: b += (u32)k[7]<<24; |
75 | case 7: b += (u32)k[6]<<16; |
76 | case 6: b += (u32)k[5]<<8; |
77 | case 5: b += k[4]; |
78 | case 4: a += (u32)k[3]<<24; |
79 | case 3: a += (u32)k[2]<<16; |
80 | case 2: a += (u32)k[1]<<8; |
81 | case 1: a += k[0]; |
82 | __jhash_final(a, b, c); |
83 | case 0: /* Nothing left to add */ |
84 | break; |
85 | } |
86 | |
87 | return c; |
88 | } |
89 | |
90 | __noinline |
91 | u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) |
92 | { |
93 | a += initval; |
94 | b += initval; |
95 | c += initval; |
96 | __jhash_final(a, b, c); |
97 | return c; |
98 | } |
99 | |
100 | __noinline |
101 | u32 jhash_2words(u32 a, u32 b, u32 initval) |
102 | { |
103 | return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); |
104 | } |
105 | |
106 | struct flow_key { |
107 | union { |
108 | __be32 src; |
109 | __be32 srcv6[4]; |
110 | }; |
111 | union { |
112 | __be32 dst; |
113 | __be32 dstv6[4]; |
114 | }; |
115 | union { |
116 | __u32 ports; |
117 | __u16 port16[2]; |
118 | }; |
119 | __u8 proto; |
120 | }; |
121 | |
122 | struct packet_description { |
123 | struct flow_key flow; |
124 | __u8 flags; |
125 | }; |
126 | |
127 | struct ctl_value { |
128 | union { |
129 | __u64 value; |
130 | __u32 ifindex; |
131 | __u8 mac[6]; |
132 | }; |
133 | }; |
134 | |
135 | struct vip_definition { |
136 | union { |
137 | __be32 vip; |
138 | __be32 vipv6[4]; |
139 | }; |
140 | __u16 port; |
141 | __u16 family; |
142 | __u8 proto; |
143 | }; |
144 | |
145 | struct vip_meta { |
146 | __u32 flags; |
147 | __u32 vip_num; |
148 | }; |
149 | |
150 | struct real_pos_lru { |
151 | __u32 pos; |
152 | __u64 atime; |
153 | }; |
154 | |
155 | struct real_definition { |
156 | union { |
157 | __be32 dst; |
158 | __be32 dstv6[4]; |
159 | }; |
160 | __u8 flags; |
161 | }; |
162 | |
163 | struct lb_stats { |
164 | __u64 v2; |
165 | __u64 v1; |
166 | }; |
167 | |
168 | struct { |
169 | __uint(type, BPF_MAP_TYPE_HASH); |
170 | __uint(max_entries, 512); |
171 | __type(key, struct vip_definition); |
172 | __type(value, struct vip_meta); |
173 | } vip_map SEC(".maps" ); |
174 | |
175 | struct { |
176 | __uint(type, BPF_MAP_TYPE_LRU_HASH); |
177 | __uint(max_entries, 300); |
178 | __uint(map_flags, 1U << 1); |
179 | __type(key, struct flow_key); |
180 | __type(value, struct real_pos_lru); |
181 | } lru_cache SEC(".maps" ); |
182 | |
183 | struct { |
184 | __uint(type, BPF_MAP_TYPE_ARRAY); |
185 | __uint(max_entries, 12 * 655); |
186 | __type(key, __u32); |
187 | __type(value, __u32); |
188 | } ch_rings SEC(".maps" ); |
189 | |
190 | struct { |
191 | __uint(type, BPF_MAP_TYPE_ARRAY); |
192 | __uint(max_entries, 40); |
193 | __type(key, __u32); |
194 | __type(value, struct real_definition); |
195 | } reals SEC(".maps" ); |
196 | |
197 | struct { |
198 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); |
199 | __uint(max_entries, 515); |
200 | __type(key, __u32); |
201 | __type(value, struct lb_stats); |
202 | } stats SEC(".maps" ); |
203 | |
204 | struct { |
205 | __uint(type, BPF_MAP_TYPE_ARRAY); |
206 | __uint(max_entries, 16); |
207 | __type(key, __u32); |
208 | __type(value, struct ctl_value); |
209 | } ctl_array SEC(".maps" ); |
210 | |
211 | struct eth_hdr { |
212 | unsigned char eth_dest[6]; |
213 | unsigned char eth_source[6]; |
214 | unsigned short eth_proto; |
215 | }; |
216 | |
217 | static __noinline __u64 calc_offset(bool is_ipv6, bool is_icmp) |
218 | { |
219 | __u64 off = sizeof(struct eth_hdr); |
220 | if (is_ipv6) { |
221 | off += sizeof(struct ipv6hdr); |
222 | if (is_icmp) |
223 | off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr); |
224 | } else { |
225 | off += sizeof(struct iphdr); |
226 | if (is_icmp) |
227 | off += sizeof(struct icmphdr) + sizeof(struct iphdr); |
228 | } |
229 | return off; |
230 | } |
231 | |
232 | static __attribute__ ((noinline)) |
233 | bool parse_udp(void *data, void *data_end, |
234 | bool is_ipv6, struct packet_description *pckt) |
235 | { |
236 | |
237 | bool is_icmp = !((pckt->flags & (1 << 0)) == 0); |
238 | __u64 off = calc_offset(is_ipv6, is_icmp); |
239 | struct udphdr *udp; |
240 | udp = data + off; |
241 | |
242 | if (udp + 1 > data_end) |
243 | return false; |
244 | if (!is_icmp) { |
245 | pckt->flow.port16[0] = udp->source; |
246 | pckt->flow.port16[1] = udp->dest; |
247 | } else { |
248 | pckt->flow.port16[0] = udp->dest; |
249 | pckt->flow.port16[1] = udp->source; |
250 | } |
251 | return true; |
252 | } |
253 | |
254 | static __attribute__ ((noinline)) |
255 | bool parse_tcp(void *data, void *data_end, |
256 | bool is_ipv6, struct packet_description *pckt) |
257 | { |
258 | |
259 | bool is_icmp = !((pckt->flags & (1 << 0)) == 0); |
260 | __u64 off = calc_offset(is_ipv6, is_icmp); |
261 | struct tcphdr *tcp; |
262 | |
263 | tcp = data + off; |
264 | if (tcp + 1 > data_end) |
265 | return false; |
266 | if (tcp->syn) |
267 | pckt->flags |= (1 << 1); |
268 | if (!is_icmp) { |
269 | pckt->flow.port16[0] = tcp->source; |
270 | pckt->flow.port16[1] = tcp->dest; |
271 | } else { |
272 | pckt->flow.port16[0] = tcp->dest; |
273 | pckt->flow.port16[1] = tcp->source; |
274 | } |
275 | return true; |
276 | } |
277 | |
278 | static __attribute__ ((noinline)) |
279 | bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, |
280 | struct packet_description *pckt, |
281 | struct real_definition *dst, __u32 pkt_bytes) |
282 | { |
283 | struct eth_hdr *new_eth; |
284 | struct eth_hdr *old_eth; |
285 | struct ipv6hdr *ip6h; |
286 | __u32 ip_suffix; |
287 | void *data_end; |
288 | void *data; |
289 | |
290 | if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) |
291 | return false; |
292 | data = (void *)(long)xdp->data; |
293 | data_end = (void *)(long)xdp->data_end; |
294 | new_eth = data; |
295 | ip6h = data + sizeof(struct eth_hdr); |
296 | old_eth = data + sizeof(struct ipv6hdr); |
297 | if (new_eth + 1 > data_end || |
298 | old_eth + 1 > data_end || ip6h + 1 > data_end) |
299 | return false; |
300 | memcpy(new_eth->eth_dest, cval->mac, 6); |
301 | memcpy(new_eth->eth_source, old_eth->eth_dest, 6); |
302 | new_eth->eth_proto = 56710; |
303 | ip6h->version = 6; |
304 | ip6h->priority = 0; |
305 | memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); |
306 | |
307 | ip6h->nexthdr = IPPROTO_IPV6; |
308 | ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0]; |
309 | ip6h->payload_len = |
310 | bpf_htons(pkt_bytes + sizeof(struct ipv6hdr)); |
311 | ip6h->hop_limit = 4; |
312 | |
313 | ip6h->saddr.in6_u.u6_addr32[0] = 1; |
314 | ip6h->saddr.in6_u.u6_addr32[1] = 2; |
315 | ip6h->saddr.in6_u.u6_addr32[2] = 3; |
316 | ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix; |
317 | memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16); |
318 | return true; |
319 | } |
320 | |
321 | static __attribute__ ((noinline)) |
322 | bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, |
323 | struct packet_description *pckt, |
324 | struct real_definition *dst, __u32 pkt_bytes) |
325 | { |
326 | |
327 | __u32 ip_suffix = bpf_ntohs(pckt->flow.port16[0]); |
328 | struct eth_hdr *new_eth; |
329 | struct eth_hdr *old_eth; |
330 | __u16 *next_iph_u16; |
331 | struct iphdr *iph; |
332 | __u32 csum = 0; |
333 | void *data_end; |
334 | void *data; |
335 | |
336 | ip_suffix <<= 15; |
337 | ip_suffix ^= pckt->flow.src; |
338 | if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) |
339 | return false; |
340 | data = (void *)(long)xdp->data; |
341 | data_end = (void *)(long)xdp->data_end; |
342 | new_eth = data; |
343 | iph = data + sizeof(struct eth_hdr); |
344 | old_eth = data + sizeof(struct iphdr); |
345 | if (new_eth + 1 > data_end || |
346 | old_eth + 1 > data_end || iph + 1 > data_end) |
347 | return false; |
348 | memcpy(new_eth->eth_dest, cval->mac, 6); |
349 | memcpy(new_eth->eth_source, old_eth->eth_dest, 6); |
350 | new_eth->eth_proto = 8; |
351 | iph->version = 4; |
352 | iph->ihl = 5; |
353 | iph->frag_off = 0; |
354 | iph->protocol = IPPROTO_IPIP; |
355 | iph->check = 0; |
356 | iph->tos = 1; |
357 | iph->tot_len = bpf_htons(pkt_bytes + sizeof(struct iphdr)); |
358 | /* don't update iph->daddr, since it will overwrite old eth_proto |
359 | * and multiple iterations of bpf_prog_run() will fail |
360 | */ |
361 | |
362 | iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst; |
363 | iph->ttl = 4; |
364 | |
365 | next_iph_u16 = (__u16 *) iph; |
366 | __pragma_loop_unroll_full |
367 | for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) |
368 | csum += *next_iph_u16++; |
369 | iph->check = ~((csum & 0xffff) + (csum >> 16)); |
370 | if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) |
371 | return false; |
372 | return true; |
373 | } |
374 | |
375 | static __attribute__ ((noinline)) |
376 | int swap_mac_and_send(void *data, void *data_end) |
377 | { |
378 | unsigned char tmp_mac[6]; |
379 | struct eth_hdr *eth; |
380 | |
381 | eth = data; |
382 | memcpy(tmp_mac, eth->eth_source, 6); |
383 | memcpy(eth->eth_source, eth->eth_dest, 6); |
384 | memcpy(eth->eth_dest, tmp_mac, 6); |
385 | return XDP_TX; |
386 | } |
387 | |
388 | static __attribute__ ((noinline)) |
389 | int send_icmp_reply(void *data, void *data_end) |
390 | { |
391 | struct icmphdr *icmp_hdr; |
392 | __u16 *next_iph_u16; |
393 | __u32 tmp_addr = 0; |
394 | struct iphdr *iph; |
395 | __u32 csum = 0; |
396 | __u64 off = 0; |
397 | |
398 | if (data + sizeof(struct eth_hdr) |
399 | + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end) |
400 | return XDP_DROP; |
401 | off += sizeof(struct eth_hdr); |
402 | iph = data + off; |
403 | off += sizeof(struct iphdr); |
404 | icmp_hdr = data + off; |
405 | icmp_hdr->type = 0; |
406 | icmp_hdr->checksum += 0x0007; |
407 | iph->ttl = 4; |
408 | tmp_addr = iph->daddr; |
409 | iph->daddr = iph->saddr; |
410 | iph->saddr = tmp_addr; |
411 | iph->check = 0; |
412 | next_iph_u16 = (__u16 *) iph; |
413 | __pragma_loop_unroll_full |
414 | for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) |
415 | csum += *next_iph_u16++; |
416 | iph->check = ~((csum & 0xffff) + (csum >> 16)); |
417 | return swap_mac_and_send(data, data_end); |
418 | } |
419 | |
420 | static __attribute__ ((noinline)) |
421 | int send_icmp6_reply(void *data, void *data_end) |
422 | { |
423 | struct icmp6hdr *icmp_hdr; |
424 | struct ipv6hdr *ip6h; |
425 | __be32 tmp_addr[4]; |
426 | __u64 off = 0; |
427 | |
428 | if (data + sizeof(struct eth_hdr) |
429 | + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end) |
430 | return XDP_DROP; |
431 | off += sizeof(struct eth_hdr); |
432 | ip6h = data + off; |
433 | off += sizeof(struct ipv6hdr); |
434 | icmp_hdr = data + off; |
435 | icmp_hdr->icmp6_type = 129; |
436 | icmp_hdr->icmp6_cksum -= 0x0001; |
437 | ip6h->hop_limit = 4; |
438 | memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16); |
439 | memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16); |
440 | memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16); |
441 | return swap_mac_and_send(data, data_end); |
442 | } |
443 | |
444 | static __attribute__ ((noinline)) |
445 | int parse_icmpv6(void *data, void *data_end, __u64 off, |
446 | struct packet_description *pckt) |
447 | { |
448 | struct icmp6hdr *icmp_hdr; |
449 | struct ipv6hdr *ip6h; |
450 | |
451 | icmp_hdr = data + off; |
452 | if (icmp_hdr + 1 > data_end) |
453 | return XDP_DROP; |
454 | if (icmp_hdr->icmp6_type == 128) |
455 | return send_icmp6_reply(data, data_end); |
456 | if (icmp_hdr->icmp6_type != 3) |
457 | return XDP_PASS; |
458 | off += sizeof(struct icmp6hdr); |
459 | ip6h = data + off; |
460 | if (ip6h + 1 > data_end) |
461 | return XDP_DROP; |
462 | pckt->flow.proto = ip6h->nexthdr; |
463 | pckt->flags |= (1 << 0); |
464 | memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16); |
465 | memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16); |
466 | return -1; |
467 | } |
468 | |
469 | static __attribute__ ((noinline)) |
470 | int parse_icmp(void *data, void *data_end, __u64 off, |
471 | struct packet_description *pckt) |
472 | { |
473 | struct icmphdr *icmp_hdr; |
474 | struct iphdr *iph; |
475 | |
476 | icmp_hdr = data + off; |
477 | if (icmp_hdr + 1 > data_end) |
478 | return XDP_DROP; |
479 | if (icmp_hdr->type == 8) |
480 | return send_icmp_reply(data, data_end); |
481 | if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4)) |
482 | return XDP_PASS; |
483 | off += sizeof(struct icmphdr); |
484 | iph = data + off; |
485 | if (iph + 1 > data_end) |
486 | return XDP_DROP; |
487 | if (iph->ihl != 5) |
488 | return XDP_DROP; |
489 | pckt->flow.proto = iph->protocol; |
490 | pckt->flags |= (1 << 0); |
491 | pckt->flow.src = iph->daddr; |
492 | pckt->flow.dst = iph->saddr; |
493 | return -1; |
494 | } |
495 | |
496 | static __attribute__ ((noinline)) |
497 | __u32 get_packet_hash(struct packet_description *pckt, |
498 | bool hash_16bytes) |
499 | { |
500 | if (hash_16bytes) |
501 | return jhash_2words(a: jhash(key: pckt->flow.srcv6, length: 16, initval: 12), |
502 | b: pckt->flow.ports, initval: 24); |
503 | else |
504 | return jhash_2words(a: pckt->flow.src, b: pckt->flow.ports, |
505 | initval: 24); |
506 | } |
507 | |
508 | __attribute__ ((noinline)) |
509 | static bool get_packet_dst(struct real_definition **real, |
510 | struct packet_description *pckt, |
511 | struct vip_meta *vip_info, |
512 | bool is_ipv6, void *lru_map) |
513 | { |
514 | struct real_pos_lru new_dst_lru = { }; |
515 | bool hash_16bytes = is_ipv6; |
516 | __u32 *real_pos, hash, key; |
517 | __u64 cur_time; |
518 | |
519 | if (vip_info->flags & (1 << 2)) |
520 | hash_16bytes = 1; |
521 | if (vip_info->flags & (1 << 3)) { |
522 | pckt->flow.port16[0] = pckt->flow.port16[1]; |
523 | memset(pckt->flow.srcv6, 0, 16); |
524 | } |
525 | hash = get_packet_hash(pckt, hash_16bytes); |
526 | if (hash != 0x358459b7 /* jhash of ipv4 packet */ && |
527 | hash != 0x2f4bc6bb /* jhash of ipv6 packet */) |
528 | return false; |
529 | key = 2 * vip_info->vip_num + hash % 2; |
530 | real_pos = bpf_map_lookup_elem(&ch_rings, &key); |
531 | if (!real_pos) |
532 | return false; |
533 | key = *real_pos; |
534 | *real = bpf_map_lookup_elem(&reals, &key); |
535 | if (!(*real)) |
536 | return false; |
537 | if (!(vip_info->flags & (1 << 1))) { |
538 | __u32 conn_rate_key = 512 + 2; |
539 | struct lb_stats *conn_rate_stats = |
540 | bpf_map_lookup_elem(&stats, &conn_rate_key); |
541 | |
542 | if (!conn_rate_stats) |
543 | return true; |
544 | cur_time = bpf_ktime_get_ns(); |
545 | if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) { |
546 | conn_rate_stats->v1 = 1; |
547 | conn_rate_stats->v2 = cur_time; |
548 | } else { |
549 | conn_rate_stats->v1 += 1; |
550 | if (conn_rate_stats->v1 >= 1) |
551 | return true; |
552 | } |
553 | if (pckt->flow.proto == IPPROTO_UDP) |
554 | new_dst_lru.atime = cur_time; |
555 | new_dst_lru.pos = key; |
556 | bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0); |
557 | } |
558 | return true; |
559 | } |
560 | |
561 | __attribute__ ((noinline)) |
562 | static void connection_table_lookup(struct real_definition **real, |
563 | struct packet_description *pckt, |
564 | void *lru_map) |
565 | { |
566 | |
567 | struct real_pos_lru *dst_lru; |
568 | __u64 cur_time; |
569 | __u32 key; |
570 | |
571 | dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow); |
572 | if (!dst_lru) |
573 | return; |
574 | if (pckt->flow.proto == IPPROTO_UDP) { |
575 | cur_time = bpf_ktime_get_ns(); |
576 | if (cur_time - dst_lru->atime > 300000) |
577 | return; |
578 | dst_lru->atime = cur_time; |
579 | } |
580 | key = dst_lru->pos; |
581 | *real = bpf_map_lookup_elem(&reals, &key); |
582 | } |
583 | |
584 | /* don't believe your eyes! |
585 | * below function has 6 arguments whereas bpf and llvm allow maximum of 5 |
586 | * but since it's _static_ llvm can optimize one argument away |
587 | */ |
588 | __attribute__ ((noinline)) |
589 | static int (struct packet_description *pckt, |
590 | __u8 *protocol, __u64 off, |
591 | __u16 *pkt_bytes, void *data, |
592 | void *data_end) |
593 | { |
594 | struct ipv6hdr *ip6h; |
595 | __u64 iph_len; |
596 | int action; |
597 | |
598 | ip6h = data + off; |
599 | if (ip6h + 1 > data_end) |
600 | return XDP_DROP; |
601 | iph_len = sizeof(struct ipv6hdr); |
602 | *protocol = ip6h->nexthdr; |
603 | pckt->flow.proto = *protocol; |
604 | *pkt_bytes = bpf_ntohs(ip6h->payload_len); |
605 | off += iph_len; |
606 | if (*protocol == 45) { |
607 | return XDP_DROP; |
608 | } else if (*protocol == 59) { |
609 | action = parse_icmpv6(data, data_end, off, pckt); |
610 | if (action >= 0) |
611 | return action; |
612 | } else { |
613 | memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16); |
614 | memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16); |
615 | } |
616 | return -1; |
617 | } |
618 | |
619 | __attribute__ ((noinline)) |
620 | static int (struct packet_description *pckt, |
621 | __u8 *protocol, __u64 off, |
622 | __u16 *pkt_bytes, void *data, |
623 | void *data_end) |
624 | { |
625 | struct iphdr *iph; |
626 | int action; |
627 | |
628 | iph = data + off; |
629 | if (iph + 1 > data_end) |
630 | return XDP_DROP; |
631 | if (iph->ihl != 5) |
632 | return XDP_DROP; |
633 | *protocol = iph->protocol; |
634 | pckt->flow.proto = *protocol; |
635 | *pkt_bytes = bpf_ntohs(iph->tot_len); |
636 | off += 20; |
637 | if (iph->frag_off & 65343) |
638 | return XDP_DROP; |
639 | if (*protocol == IPPROTO_ICMP) { |
640 | action = parse_icmp(data, data_end, off, pckt); |
641 | if (action >= 0) |
642 | return action; |
643 | } else { |
644 | pckt->flow.src = iph->saddr; |
645 | pckt->flow.dst = iph->daddr; |
646 | } |
647 | return -1; |
648 | } |
649 | |
650 | __attribute__ ((noinline)) |
651 | static int process_packet(void *data, __u64 off, void *data_end, |
652 | bool is_ipv6, struct xdp_md *xdp) |
653 | { |
654 | |
655 | struct real_definition *dst = NULL; |
656 | struct packet_description pckt = { }; |
657 | struct vip_definition vip = { }; |
658 | struct lb_stats *data_stats; |
659 | void *lru_map = &lru_cache; |
660 | struct vip_meta *vip_info; |
661 | __u32 lru_stats_key = 513; |
662 | __u32 mac_addr_pos = 0; |
663 | __u32 stats_key = 512; |
664 | struct ctl_value *cval; |
665 | __u16 pkt_bytes; |
666 | __u8 protocol; |
667 | __u32 vip_num; |
668 | int action; |
669 | |
670 | if (is_ipv6) |
671 | action = process_l3_headers_v6(pckt: &pckt, protocol: &protocol, off, |
672 | pkt_bytes: &pkt_bytes, data, data_end); |
673 | else |
674 | action = process_l3_headers_v4(pckt: &pckt, protocol: &protocol, off, |
675 | pkt_bytes: &pkt_bytes, data, data_end); |
676 | if (action >= 0) |
677 | return action; |
678 | protocol = pckt.flow.proto; |
679 | if (protocol == IPPROTO_TCP) { |
680 | if (!parse_tcp(data, data_end, is_ipv6, pckt: &pckt)) |
681 | return XDP_DROP; |
682 | } else if (protocol == IPPROTO_UDP) { |
683 | if (!parse_udp(data, data_end, is_ipv6, pckt: &pckt)) |
684 | return XDP_DROP; |
685 | } else { |
686 | return XDP_TX; |
687 | } |
688 | |
689 | if (is_ipv6) |
690 | memcpy(vip.vipv6, pckt.flow.dstv6, 16); |
691 | else |
692 | vip.vip = pckt.flow.dst; |
693 | vip.port = pckt.flow.port16[1]; |
694 | vip.proto = pckt.flow.proto; |
695 | vip_info = bpf_map_lookup_elem(&vip_map, &vip); |
696 | if (!vip_info) { |
697 | vip.port = 0; |
698 | vip_info = bpf_map_lookup_elem(&vip_map, &vip); |
699 | if (!vip_info) |
700 | return XDP_PASS; |
701 | if (!(vip_info->flags & (1 << 4))) |
702 | pckt.flow.port16[1] = 0; |
703 | } |
704 | if (data_end - data > 1400) |
705 | return XDP_DROP; |
706 | data_stats = bpf_map_lookup_elem(&stats, &stats_key); |
707 | if (!data_stats) |
708 | return XDP_DROP; |
709 | data_stats->v1 += 1; |
710 | if (!dst) { |
711 | if (vip_info->flags & (1 << 0)) |
712 | pckt.flow.port16[0] = 0; |
713 | if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1))) |
714 | connection_table_lookup(real: &dst, pckt: &pckt, lru_map); |
715 | if (dst) |
716 | goto out; |
717 | if (pckt.flow.proto == IPPROTO_TCP) { |
718 | struct lb_stats *lru_stats = |
719 | bpf_map_lookup_elem(&stats, &lru_stats_key); |
720 | |
721 | if (!lru_stats) |
722 | return XDP_DROP; |
723 | if (pckt.flags & (1 << 1)) |
724 | lru_stats->v1 += 1; |
725 | else |
726 | lru_stats->v2 += 1; |
727 | } |
728 | if (!get_packet_dst(real: &dst, pckt: &pckt, vip_info, is_ipv6, lru_map)) |
729 | return XDP_DROP; |
730 | data_stats->v2 += 1; |
731 | } |
732 | out: |
733 | cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos); |
734 | if (!cval) |
735 | return XDP_DROP; |
736 | if (dst->flags & (1 << 0)) { |
737 | if (!encap_v6(xdp, cval, pckt: &pckt, dst, pkt_bytes)) |
738 | return XDP_DROP; |
739 | } else { |
740 | if (!encap_v4(xdp, cval, pckt: &pckt, dst, pkt_bytes)) |
741 | return XDP_DROP; |
742 | } |
743 | vip_num = vip_info->vip_num; |
744 | data_stats = bpf_map_lookup_elem(&stats, &vip_num); |
745 | if (!data_stats) |
746 | return XDP_DROP; |
747 | data_stats->v1 += 1; |
748 | data_stats->v2 += pkt_bytes; |
749 | |
750 | data = (void *)(long)xdp->data; |
751 | data_end = (void *)(long)xdp->data_end; |
752 | if (data + 4 > data_end) |
753 | return XDP_DROP; |
754 | *(u32 *)data = dst->dst; |
755 | return XDP_DROP; |
756 | } |
757 | |
758 | SEC("xdp" ) |
759 | int balancer_ingress_v4(struct xdp_md *ctx) |
760 | { |
761 | void *data = (void *)(long)ctx->data; |
762 | void *data_end = (void *)(long)ctx->data_end; |
763 | struct eth_hdr *eth = data; |
764 | __u32 eth_proto; |
765 | __u32 nh_off; |
766 | |
767 | nh_off = sizeof(struct eth_hdr); |
768 | if (data + nh_off > data_end) |
769 | return XDP_DROP; |
770 | eth_proto = bpf_ntohs(eth->eth_proto); |
771 | if (eth_proto == ETH_P_IP) |
772 | return process_packet(data, off: nh_off, data_end, is_ipv6: 0, xdp: ctx); |
773 | else |
774 | return XDP_DROP; |
775 | } |
776 | |
777 | SEC("xdp" ) |
778 | int balancer_ingress_v6(struct xdp_md *ctx) |
779 | { |
780 | void *data = (void *)(long)ctx->data; |
781 | void *data_end = (void *)(long)ctx->data_end; |
782 | struct eth_hdr *eth = data; |
783 | __u32 eth_proto; |
784 | __u32 nh_off; |
785 | |
786 | nh_off = sizeof(struct eth_hdr); |
787 | if (data + nh_off > data_end) |
788 | return XDP_DROP; |
789 | eth_proto = bpf_ntohs(eth->eth_proto); |
790 | if (eth_proto == ETH_P_IPV6) |
791 | return process_packet(data, off: nh_off, data_end, is_ipv6: 1, xdp: ctx); |
792 | else |
793 | return XDP_DROP; |
794 | } |
795 | |
796 | char _license[] SEC("license" ) = "GPL" ; |
797 | |