1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* RTT/RTO calculation. |
3 | * |
4 | * Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com) |
5 | * |
6 | * https://tools.ietf.org/html/rfc6298 |
7 | * https://tools.ietf.org/html/rfc1122#section-4.2.3.1 |
8 | * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf |
9 | */ |
10 | |
11 | #include <linux/net.h> |
12 | #include "ar-internal.h" |
13 | |
14 | #define RXRPC_RTO_MAX (120 * USEC_PER_SEC) |
15 | #define RXRPC_TIMEOUT_INIT ((unsigned int)(1 * MSEC_PER_SEC)) /* RFC6298 2.1 initial RTO value */ |
16 | #define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */ |
17 | |
18 | static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) |
19 | { |
20 | return 200; |
21 | } |
22 | |
23 | static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) |
24 | { |
25 | return (peer->srtt_us >> 3) + peer->rttvar_us; |
26 | } |
27 | |
28 | static u32 rxrpc_bound_rto(u32 rto) |
29 | { |
30 | return min(rto, RXRPC_RTO_MAX); |
31 | } |
32 | |
33 | /* |
34 | * Called to compute a smoothed rtt estimate. The data fed to this |
35 | * routine either comes from timestamps, or from segments that were |
36 | * known _not_ to have been retransmitted [see Karn/Partridge |
37 | * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 |
38 | * piece by Van Jacobson. |
39 | * NOTE: the next three routines used to be one big routine. |
40 | * To save cycles in the RFC 1323 implementation it was better to break |
41 | * it up into three procedures. -- erics |
42 | */ |
43 | static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) |
44 | { |
45 | long m = sample_rtt_us; /* RTT */ |
46 | u32 srtt = peer->srtt_us; |
47 | |
48 | /* The following amusing code comes from Jacobson's |
49 | * article in SIGCOMM '88. Note that rtt and mdev |
50 | * are scaled versions of rtt and mean deviation. |
51 | * This is designed to be as fast as possible |
52 | * m stands for "measurement". |
53 | * |
54 | * On a 1990 paper the rto value is changed to: |
55 | * RTO = rtt + 4 * mdev |
56 | * |
57 | * Funny. This algorithm seems to be very broken. |
58 | * These formulae increase RTO, when it should be decreased, increase |
59 | * too slowly, when it should be increased quickly, decrease too quickly |
60 | * etc. I guess in BSD RTO takes ONE value, so that it is absolutely |
61 | * does not matter how to _calculate_ it. Seems, it was trap |
62 | * that VJ failed to avoid. 8) |
63 | */ |
64 | if (srtt != 0) { |
65 | m -= (srtt >> 3); /* m is now error in rtt est */ |
66 | srtt += m; /* rtt = 7/8 rtt + 1/8 new */ |
67 | if (m < 0) { |
68 | m = -m; /* m is now abs(error) */ |
69 | m -= (peer->mdev_us >> 2); /* similar update on mdev */ |
70 | /* This is similar to one of Eifel findings. |
71 | * Eifel blocks mdev updates when rtt decreases. |
72 | * This solution is a bit different: we use finer gain |
73 | * for mdev in this case (alpha*beta). |
74 | * Like Eifel it also prevents growth of rto, |
75 | * but also it limits too fast rto decreases, |
76 | * happening in pure Eifel. |
77 | */ |
78 | if (m > 0) |
79 | m >>= 3; |
80 | } else { |
81 | m -= (peer->mdev_us >> 2); /* similar update on mdev */ |
82 | } |
83 | |
84 | peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ |
85 | if (peer->mdev_us > peer->mdev_max_us) { |
86 | peer->mdev_max_us = peer->mdev_us; |
87 | if (peer->mdev_max_us > peer->rttvar_us) |
88 | peer->rttvar_us = peer->mdev_max_us; |
89 | } |
90 | } else { |
91 | /* no previous measure. */ |
92 | srtt = m << 3; /* take the measured time to be rtt */ |
93 | peer->mdev_us = m << 1; /* make sure rto = 3*rtt */ |
94 | peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer)); |
95 | peer->mdev_max_us = peer->rttvar_us; |
96 | } |
97 | |
98 | peer->srtt_us = max(1U, srtt); |
99 | } |
100 | |
101 | /* |
102 | * Calculate rto without backoff. This is the second half of Van Jacobson's |
103 | * routine referred to above. |
104 | */ |
105 | static void rxrpc_set_rto(struct rxrpc_peer *peer) |
106 | { |
107 | u32 rto; |
108 | |
109 | /* 1. If rtt variance happened to be less 50msec, it is hallucination. |
110 | * It cannot be less due to utterly erratic ACK generation made |
111 | * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ |
112 | * to do with delayed acks, because at cwnd>2 true delack timeout |
113 | * is invisible. Actually, Linux-2.4 also generates erratic |
114 | * ACKs in some circumstances. |
115 | */ |
116 | rto = __rxrpc_set_rto(peer); |
117 | |
118 | /* 2. Fixups made earlier cannot be right. |
119 | * If we do not estimate RTO correctly without them, |
120 | * all the algo is pure shit and should be replaced |
121 | * with correct one. It is exactly, which we pretend to do. |
122 | */ |
123 | |
124 | /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo |
125 | * guarantees that rto is higher. |
126 | */ |
127 | peer->rto_us = rxrpc_bound_rto(rto); |
128 | } |
129 | |
130 | static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us) |
131 | { |
132 | if (rtt_us < 0) |
133 | return; |
134 | |
135 | //rxrpc_update_rtt_min(peer, rtt_us); |
136 | rxrpc_rtt_estimator(peer, sample_rtt_us: rtt_us); |
137 | rxrpc_set_rto(peer); |
138 | |
139 | /* RFC6298: only reset backoff on valid RTT measurement. */ |
140 | peer->backoff = 0; |
141 | } |
142 | |
143 | /* |
144 | * Add RTT information to cache. This is called in softirq mode and has |
145 | * exclusive access to the peer RTT data. |
146 | */ |
147 | void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, |
148 | int rtt_slot, |
149 | rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, |
150 | ktime_t send_time, ktime_t resp_time) |
151 | { |
152 | struct rxrpc_peer *peer = call->peer; |
153 | s64 rtt_us; |
154 | |
155 | rtt_us = ktime_to_us(ktime_sub(resp_time, send_time)); |
156 | if (rtt_us < 0) |
157 | return; |
158 | |
159 | spin_lock(lock: &peer->rtt_input_lock); |
160 | rxrpc_ack_update_rtt(peer, rtt_us); |
161 | if (peer->rtt_count < 3) |
162 | peer->rtt_count++; |
163 | spin_unlock(lock: &peer->rtt_input_lock); |
164 | |
165 | trace_rxrpc_rtt_rx(call, why, slot: rtt_slot, send_serial, resp_serial, |
166 | rtt: peer->srtt_us >> 3, rto: peer->rto_us); |
167 | } |
168 | |
169 | /* |
170 | * Get the retransmission timeout to set in nanoseconds, backing it off each |
171 | * time we retransmit. |
172 | */ |
173 | ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans) |
174 | { |
175 | u64 timo_us; |
176 | u32 backoff = READ_ONCE(peer->backoff); |
177 | |
178 | timo_us = peer->rto_us; |
179 | timo_us <<= backoff; |
180 | if (retrans && timo_us * 2 <= RXRPC_RTO_MAX) |
181 | WRITE_ONCE(peer->backoff, backoff + 1); |
182 | |
183 | if (timo_us < 1) |
184 | timo_us = 1; |
185 | |
186 | return ns_to_ktime(ns: timo_us * NSEC_PER_USEC); |
187 | } |
188 | |
189 | void rxrpc_peer_init_rtt(struct rxrpc_peer *peer) |
190 | { |
191 | peer->rto_us = RXRPC_TIMEOUT_INIT; |
192 | peer->mdev_us = RXRPC_TIMEOUT_INIT; |
193 | peer->backoff = 0; |
194 | //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U); |
195 | } |
196 | |