tcp_vegas.c source code [linux/net/ipv4/tcp_vegas.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* TCP Vegas congestion control
4	*
5	* This is based on the congestion detection/avoidance scheme described in
6	* Lawrence S. Brakmo and Larry L. Peterson.
7	* "TCP Vegas: End to end congestion avoidance on a global internet."
8	* IEEE Journal on Selected Areas in Communication, 13(8):1465--1480,
9	* October 1995. Available from:
10	* ftp://ftp.cs.arizona.edu/xkernel/Papers/jsac.ps
11	*
12	* See http://www.cs.arizona.edu/xkernel/ for their implementation.
13	* The main aspects that distinguish this implementation from the
14	* Arizona Vegas implementation are:
15	* o We do not change the loss detection or recovery mechanisms of
16	* Linux in any way. Linux already recovers from losses quite well,
17	* using fine-grained timers, NewReno, and FACK.
18	* o To avoid the performance penalty imposed by increasing cwnd
19	* only every-other RTT during slow start, we increase during
20	* every RTT during slow start, just like Reno.
21	* o Largely to allow continuous cwnd growth during slow start,
22	* we use the rate at which ACKs come back as the "actual"
23	* rate, rather than the rate at which data is sent.
24	* o To speed convergence to the right rate, we set the cwnd
25	* to achieve the right ("actual") rate when we exit slow start.
26	* o To filter out the noise caused by delayed ACKs, we use the
27	* minimum RTT sample observed during the last RTT to calculate
28	* the actual rate.
29	* o When the sender re-starts from idle, it waits until it has
30	* received ACKs for an entire flight of new data before making
31	* a cwnd adjustment decision. The original Vegas implementation
32	* assumed senders never went idle.
33	*/
34
35	#include <linux/mm.h>
36	#include <linux/module.h>
37	#include <linux/skbuff.h>
38	#include <linux/inet_diag.h>
39
40	#include <net/tcp.h>
41
42	#include "tcp_vegas.h"
43
44	static int alpha = `2`;
45	static int beta = `4`;
46	static int gamma = `1`;
47
48	module_param(alpha, int, `0644`);
49	MODULE_PARM_DESC(alpha, "lower bound of packets in network");
50	module_param(beta, int, `0644`);
51	MODULE_PARM_DESC(beta, "upper bound of packets in network");
52	module_param(gamma, int, `0644`);
53	MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)");
54
55	/ There are several situations when we must "re-start" Vegas:*
56	*
57	* o when a connection is established
58	* o after an RTO
59	* o after fast recovery
60	* o when we send a packet and there is no outstanding
61	* unacknowledged data (restarting an idle connection)
62	*
63	* In these circumstances we cannot do a Vegas calculation at the
64	* end of the first RTT, because any calculation we do is using
65	* stale info -- both the saved cwnd and congestion feedback are
66	* stale.
67	*
68	* Instead we must wait until the completion of an RTT during
69	* which we actually receive ACKs.
70	*/
71	static void vegas_enable(struct sock *sk)
72	{
73	const struct tcp_sock *tp = tcp_sk(sk);
74	struct vegas *vegas = inet_csk_ca(sk);
75
76	/ Begin taking Vegas samples next time we send something. /
77	vegas->doing_vegas_now = `1`;
78
79	/ Set the beginning of the next send window. /
80	vegas->beg_snd_nxt = tp->snd_nxt;
81
82	vegas->cntRTT = `0`;
83	vegas->minRTT = `0x7fffffff`;
84	}
85
86	/ Stop taking Vegas samples for now. /
87	static inline void vegas_disable(struct sock *sk)
88	{
89	struct vegas *vegas = inet_csk_ca(sk);
90
91	vegas->doing_vegas_now = `0`;
92	}
93
94	void tcp_vegas_init(struct sock *sk)
95	{
96	struct vegas *vegas = inet_csk_ca(sk);
97
98	vegas->baseRTT = `0x7fffffff`;
99	vegas_enable(sk);
100	}
101	EXPORT_SYMBOL_GPL(tcp_vegas_init);
102
103	/ Do RTT sampling needed for Vegas.*
104	* Basically we:
105	* o min-filter RTT samples from within an RTT to get the current
106	* propagation delay + queuing delay (we are min-filtering to try to
107	* avoid the effects of delayed ACKs)
108	* o min-filter RTT samples from a much longer window (forever for now)
109	* to find the propagation delay (baseRTT)
110	*/
111	void tcp_vegas_pkts_acked(struct sock sk, const* struct ack_sample *sample)
112	{
113	struct vegas *vegas = inet_csk_ca(sk);
114	u32 vrtt;
115
116	if (sample->rtt_us < `0`)
117	return;
118
119	/ Never allow zero rtt or baseRTT /
120	vrtt = sample->rtt_us + `1`;
121
122	/ Filter to find propagation delay: /
123	if (vrtt < vegas->baseRTT)
124	vegas->baseRTT = vrtt;
125
126	/ Find the min RTT during the last RTT to find*
127	* the current prop. delay + queuing delay:
128	*/
129	vegas->minRTT = min(vegas->minRTT, vrtt);
130	vegas->cntRTT++;
131	}
132	EXPORT_SYMBOL_GPL(tcp_vegas_pkts_acked);
133
134	void tcp_vegas_state(struct sock *sk, u8 ca_state)
135	{
136	if (ca_state == TCP_CA_Open)
137	vegas_enable(sk);
138	else
139	vegas_disable(sk);
140	}
141	EXPORT_SYMBOL_GPL(tcp_vegas_state);
142
143	/*
144	* If the connection is idle and we are restarting,
145	* then we don't want to do any Vegas calculations
146	* until we get fresh RTT samples. So when we
147	* restart, we reset our Vegas state to a clean
148	* slate. After we get acks for this flight of
149	* packets, _then_ we can make Vegas calculations
150	* again.
151	*/
152	void tcp_vegas_cwnd_event(struct sock sk, enum* tcp_ca_event event)
153	{
154	if (event == CA_EVENT_CWND_RESTART \|\|
155	event == CA_EVENT_TX_START)
156	tcp_vegas_init(sk);
157	}
158	EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
159
160	static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
161	{
162	return min(tp->snd_ssthresh, tcp_snd_cwnd(tp));
163	}
164
165	static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
166	{
167	struct tcp_sock *tp = tcp_sk(sk);
168	struct vegas *vegas = inet_csk_ca(sk);
169
170	if (!vegas->doing_vegas_now) {
171	tcp_reno_cong_avoid(sk, ack, acked);
172	return;
173	}
174
175	if (after(ack, vegas->beg_snd_nxt)) {
176	/ Do the Vegas once-per-RTT cwnd adjustment. /
177
178	/ Save the extent of the current window so we can use this*
179	* at the end of the next RTT.
180	*/
181	vegas->beg_snd_nxt = tp->snd_nxt;
182
183	/ We do the Vegas calculations only if we got enough RTT*
184	* samples that we can be reasonably sure that we got
185	* at least one RTT sample that wasn't from a delayed ACK.
186	* If we only had 2 samples total,
187	* then that means we're getting only 1 ACK per RTT, which
188	* means they're almost certainly delayed ACKs.
189	* If we have 3 samples, we should be OK.
190	*/
191
192	if (vegas->cntRTT <= `2`) {
193	/ We don't have enough RTT samples to do the Vegas*
194	* calculation, so we'll behave like Reno.
195	*/
196	tcp_reno_cong_avoid(sk, ack, acked);
197	} else {
198	u32 rtt, diff;
199	u64 target_cwnd;
200
201	/ We have enough RTT samples, so, using the Vegas*
202	* algorithm, we determine if we should increase or
203	* decrease cwnd, and by how much.
204	*/
205
206	/ Pluck out the RTT we are using for the Vegas*
207	* calculations. This is the min RTT seen during the
208	* last RTT. Taking the min filters out the effects
209	* of delayed ACKs, at the cost of noticing congestion
210	* a bit later.
211	*/
212	rtt = vegas->minRTT;
213
214	/ Calculate the cwnd we should have, if we weren't*
215	* going too fast.
216	*
217	* This is:
218	* (actual rate in segments) * baseRTT
219	*/
220	target_cwnd = (u64)tcp_snd_cwnd(tp) * vegas->baseRTT;
221	do_div(target_cwnd, rtt);
222
223	/ Calculate the difference between the window we had,*
224	* and the window we would like to have. This quantity
225	* is the "Diff" from the Arizona Vegas papers.
226	*/
227	diff = tcp_snd_cwnd(tp) * (rtt-vegas->baseRTT) / vegas->baseRTT;
228
229	if (diff > gamma && tcp_in_slow_start(tp)) {
230	/ Going too fast. Time to slow down*
231	* and switch to congestion avoidance.
232	*/
233
234	/ Set cwnd to match the actual rate*
235	* exactly:
236	* cwnd = (actual rate) * baseRTT
237	* Then we add 1 because the integer
238	* truncation robs us of full link
239	* utilization.
240	*/
241	tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp),
242	(u32)target_cwnd + `1`));
243	tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
244
245	} else if (tcp_in_slow_start(tp)) {
246	/ Slow start. /
247	tcp_slow_start(tp, acked);
248	} else {
249	/ Congestion avoidance. /
250
251	/ Figure out where we would like cwnd*
252	* to be.
253	*/
254	if (diff > beta) {
255	/ The old window was too fast, so*
256	* we slow down.
257	*/
258	tcp_snd_cwnd_set(tp, val: tcp_snd_cwnd(tp) - `1`);
259	tp->snd_ssthresh
260	= tcp_vegas_ssthresh(tp);
261	} else if (diff < alpha) {
262	/ We don't have enough extra packets*
263	* in the network, so speed up.
264	*/
265	tcp_snd_cwnd_set(tp, val: tcp_snd_cwnd(tp) + `1`);
266	} else {
267	/ Sending just as fast as we*
268	* should be.
269	*/
270	}
271	}
272
273	if (tcp_snd_cwnd(tp) < `2`)
274	tcp_snd_cwnd_set(tp, val: `2`);
275	else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp)
276	tcp_snd_cwnd_set(tp, val: tp->snd_cwnd_clamp);
277
278	tp->snd_ssthresh = tcp_current_ssthresh(sk);
279	}
280
281	/ Wipe the slate clean for the next RTT. /
282	vegas->cntRTT = `0`;
283	vegas->minRTT = `0x7fffffff`;
284	}
285	/ Use normal slow start /
286	else if (tcp_in_slow_start(tp))
287	tcp_slow_start(tp, acked);
288	}
289
290	/ Extract info for Tcp socket info provided via netlink. /
291	size_t tcp_vegas_get_info(struct sock sk, u32 ext, int* *attr,
292	union tcp_cc_info *info)
293	{
294	const struct vegas *ca = inet_csk_ca(sk);
295
296	if (ext & (`1` << (INET_DIAG_VEGASINFO - `1`))) {
297	info->vegas.tcpv_enabled = ca->doing_vegas_now;
298	info->vegas.tcpv_rttcnt = ca->cntRTT;
299	info->vegas.tcpv_rtt = ca->baseRTT;
300	info->vegas.tcpv_minrtt = ca->minRTT;
301
302	*attr = INET_DIAG_VEGASINFO;
303	return sizeof(struct tcpvegas_info);
304	}
305	return `0`;
306	}
307	EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
308
309	static struct tcp_congestion_ops tcp_vegas __read_mostly = {
310	.init = tcp_vegas_init,
311	.ssthresh = tcp_reno_ssthresh,
312	.undo_cwnd = tcp_reno_undo_cwnd,
313	.cong_avoid = tcp_vegas_cong_avoid,
314	.pkts_acked = tcp_vegas_pkts_acked,
315	.set_state = tcp_vegas_state,
316	.cwnd_event = tcp_vegas_cwnd_event,
317	.get_info = tcp_vegas_get_info,
318
319	.owner = THIS_MODULE,
320	.name = "vegas",
321	};
322
323	static int __init tcp_vegas_register(void)
324	{
325	BUILD_BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE);
326	tcp_register_congestion_control(type: &tcp_vegas);
327	return `0`;
328	}
329
330	static void __exit tcp_vegas_unregister(void)
331	{
332	tcp_unregister_congestion_control(type: &tcp_vegas);
333	}
334
335	module_init(tcp_vegas_register);
336	module_exit(tcp_vegas_unregister);
337
338	MODULE_AUTHOR("Stephen Hemminger");
339	MODULE_LICENSE("GPL");
340	MODULE_DESCRIPTION("TCP Vegas");
341

source code of linux/net/ipv4/tcp_vegas.c