1 | /* SPDX-License-Identifier: GPL-2.0 |
2 | * |
3 | * Copyright (c) 2019 Facebook |
4 | * |
5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of version 2 of the GNU General Public |
7 | * License as published by the Free Software Foundation. |
8 | * |
9 | * Include file for sample Host Bandwidth Manager (HBM) BPF programs |
10 | */ |
11 | #define KBUILD_MODNAME "foo" |
12 | #include <uapi/linux/bpf.h> |
13 | #include <uapi/linux/if_ether.h> |
14 | #include <uapi/linux/if_packet.h> |
15 | #include <uapi/linux/ip.h> |
16 | #include <uapi/linux/ipv6.h> |
17 | #include <uapi/linux/in.h> |
18 | #include <uapi/linux/tcp.h> |
19 | #include <uapi/linux/filter.h> |
20 | #include <uapi/linux/pkt_cls.h> |
21 | #include <net/ipv6.h> |
22 | #include <net/inet_ecn.h> |
23 | #include <bpf/bpf_endian.h> |
24 | #include <bpf/bpf_helpers.h> |
25 | #include "hbm.h" |
26 | |
27 | #define DROP_PKT 0 |
28 | #define ALLOW_PKT 1 |
29 | #define TCP_ECN_OK 1 |
30 | #define CWR 2 |
31 | |
32 | #ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging |
33 | #undef bpf_printk |
34 | #define bpf_printk(fmt, ...) |
35 | #endif |
36 | |
37 | #define INITIAL_CREDIT_PACKETS 100 |
38 | #define MAX_BYTES_PER_PACKET 1500 |
39 | #define MARK_THRESH (40 * MAX_BYTES_PER_PACKET) |
40 | #define DROP_THRESH (80 * 5 * MAX_BYTES_PER_PACKET) |
41 | #define LARGE_PKT_DROP_THRESH (DROP_THRESH - (15 * MAX_BYTES_PER_PACKET)) |
42 | #define MARK_REGION_SIZE (LARGE_PKT_DROP_THRESH - MARK_THRESH) |
43 | #define LARGE_PKT_THRESH 120 |
44 | #define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET) |
45 | #define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET) |
46 | |
47 | // Time base accounting for fq's EDT |
48 | #define BURST_SIZE_NS 100000 // 100us |
49 | #define MARK_THRESH_NS 50000 // 50us |
50 | #define DROP_THRESH_NS 500000 // 500us |
51 | // Reserve 20us of queuing for small packets (less than 120 bytes) |
52 | #define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000) |
53 | #define MARK_REGION_SIZE_NS (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS) |
54 | |
55 | // rate in bytes per ns << 20 |
56 | #define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) |
57 | #define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) |
58 | #define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate)) |
59 | |
60 | struct { |
61 | __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); |
62 | __type(key, struct bpf_cgroup_storage_key); |
63 | __type(value, struct hbm_vqueue); |
64 | } queue_state SEC(".maps" ); |
65 | |
66 | struct { |
67 | __uint(type, BPF_MAP_TYPE_ARRAY); |
68 | __uint(max_entries, 1); |
69 | __type(key, u32); |
70 | __type(value, struct hbm_queue_stats); |
71 | } queue_stats SEC(".maps" ); |
72 | |
73 | struct hbm_pkt_info { |
74 | int cwnd; |
75 | int rtt; |
76 | int packets_out; |
77 | bool is_ip; |
78 | bool is_tcp; |
79 | short ecn; |
80 | }; |
81 | |
82 | static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti) |
83 | { |
84 | struct bpf_sock *sk; |
85 | struct bpf_tcp_sock *tp; |
86 | |
87 | sk = skb->sk; |
88 | if (sk) { |
89 | sk = bpf_sk_fullsock(sk); |
90 | if (sk) { |
91 | if (sk->protocol == IPPROTO_TCP) { |
92 | tp = bpf_tcp_sock(sk); |
93 | if (tp) { |
94 | pkti->cwnd = tp->snd_cwnd; |
95 | pkti->rtt = tp->srtt_us >> 3; |
96 | pkti->packets_out = tp->packets_out; |
97 | return 0; |
98 | } |
99 | } |
100 | } |
101 | } |
102 | pkti->cwnd = 0; |
103 | pkti->rtt = 0; |
104 | pkti->packets_out = 0; |
105 | return 1; |
106 | } |
107 | |
108 | static void hbm_get_pkt_info(struct __sk_buff *skb, |
109 | struct hbm_pkt_info *pkti) |
110 | { |
111 | struct iphdr iph; |
112 | struct ipv6hdr *ip6h; |
113 | |
114 | pkti->cwnd = 0; |
115 | pkti->rtt = 0; |
116 | bpf_skb_load_bytes(skb, 0, &iph, 12); |
117 | if (iph.version == 6) { |
118 | ip6h = (struct ipv6hdr *)&iph; |
119 | pkti->is_ip = true; |
120 | pkti->is_tcp = (ip6h->nexthdr == 6); |
121 | pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK; |
122 | } else if (iph.version == 4) { |
123 | pkti->is_ip = true; |
124 | pkti->is_tcp = (iph.protocol == 6); |
125 | pkti->ecn = iph.tos & INET_ECN_MASK; |
126 | } else { |
127 | pkti->is_ip = false; |
128 | pkti->is_tcp = false; |
129 | pkti->ecn = 0; |
130 | } |
131 | if (pkti->is_tcp) |
132 | get_tcp_info(skb, pkti); |
133 | } |
134 | |
135 | static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate) |
136 | { |
137 | bpf_printk("Initializing queue_state, rate:%d\n" , rate * 128); |
138 | qdp->lasttime = bpf_ktime_get_ns(); |
139 | qdp->credit = INIT_CREDIT; |
140 | qdp->rate = rate * 128; |
141 | } |
142 | |
143 | static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp, |
144 | int rate) |
145 | { |
146 | unsigned long long curtime; |
147 | |
148 | curtime = bpf_ktime_get_ns(); |
149 | bpf_printk("Initializing queue_state, rate:%d\n" , rate * 128); |
150 | qdp->lasttime = curtime - BURST_SIZE_NS; // support initial burst |
151 | qdp->credit = 0; // not used |
152 | qdp->rate = rate * 128; |
153 | } |
154 | |
155 | static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp, |
156 | int len, |
157 | unsigned long long curtime, |
158 | bool congestion_flag, |
159 | bool drop_flag, |
160 | bool cwr_flag, |
161 | bool ecn_ce_flag, |
162 | struct hbm_pkt_info *pkti, |
163 | int credit) |
164 | { |
165 | int rv = ALLOW_PKT; |
166 | |
167 | if (qsp != NULL) { |
168 | // Following is needed for work conserving |
169 | __sync_add_and_fetch(&(qsp->bytes_total), len); |
170 | if (qsp->stats) { |
171 | // Optionally update statistics |
172 | if (qsp->firstPacketTime == 0) |
173 | qsp->firstPacketTime = curtime; |
174 | qsp->lastPacketTime = curtime; |
175 | __sync_add_and_fetch(&(qsp->pkts_total), 1); |
176 | if (congestion_flag) { |
177 | __sync_add_and_fetch(&(qsp->pkts_marked), 1); |
178 | __sync_add_and_fetch(&(qsp->bytes_marked), len); |
179 | } |
180 | if (drop_flag) { |
181 | __sync_add_and_fetch(&(qsp->pkts_dropped), 1); |
182 | __sync_add_and_fetch(&(qsp->bytes_dropped), |
183 | len); |
184 | } |
185 | if (ecn_ce_flag) |
186 | __sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1); |
187 | if (pkti->cwnd) { |
188 | __sync_add_and_fetch(&(qsp->sum_cwnd), |
189 | pkti->cwnd); |
190 | __sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1); |
191 | } |
192 | if (pkti->rtt) |
193 | __sync_add_and_fetch(&(qsp->sum_rtt), |
194 | pkti->rtt); |
195 | __sync_add_and_fetch(&(qsp->sum_credit), credit); |
196 | |
197 | if (drop_flag) |
198 | rv = DROP_PKT; |
199 | if (cwr_flag) |
200 | rv |= 2; |
201 | if (rv == DROP_PKT) |
202 | __sync_add_and_fetch(&(qsp->returnValCount[0]), |
203 | 1); |
204 | else if (rv == ALLOW_PKT) |
205 | __sync_add_and_fetch(&(qsp->returnValCount[1]), |
206 | 1); |
207 | else if (rv == 2) |
208 | __sync_add_and_fetch(&(qsp->returnValCount[2]), |
209 | 1); |
210 | else if (rv == 3) |
211 | __sync_add_and_fetch(&(qsp->returnValCount[3]), |
212 | 1); |
213 | } |
214 | } |
215 | } |
216 | |