1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* Handle vlserver selection and rotation. |
3 | * |
4 | * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. |
5 | * Written by David Howells (dhowells@redhat.com) |
6 | */ |
7 | |
8 | #include <linux/kernel.h> |
9 | #include <linux/sched.h> |
10 | #include <linux/sched/signal.h> |
11 | #include "internal.h" |
12 | #include "afs_vl.h" |
13 | |
14 | /* |
15 | * Begin an operation on a volume location server. |
16 | */ |
17 | bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell, |
18 | struct key *key) |
19 | { |
20 | memset(vc, 0, sizeof(*vc)); |
21 | vc->cell = cell; |
22 | vc->key = key; |
23 | vc->error = -EDESTADDRREQ; |
24 | vc->ac.error = SHRT_MAX; |
25 | |
26 | if (signal_pending(current)) { |
27 | vc->error = -EINTR; |
28 | vc->flags |= AFS_VL_CURSOR_STOP; |
29 | return false; |
30 | } |
31 | |
32 | return true; |
33 | } |
34 | |
35 | /* |
36 | * Begin iteration through a server list, starting with the last used server if |
37 | * possible, or the last recorded good server if not. |
38 | */ |
39 | static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) |
40 | { |
41 | struct afs_cell *cell = vc->cell; |
42 | unsigned int dns_lookup_count; |
43 | |
44 | if (cell->dns_source == DNS_RECORD_UNAVAILABLE || |
45 | cell->dns_expiry <= ktime_get_real_seconds()) { |
46 | dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count); |
47 | set_bit(AFS_CELL_FL_DO_LOOKUP, addr: &cell->flags); |
48 | afs_queue_cell(cell, afs_cell_trace_get_queue_dns); |
49 | |
50 | if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { |
51 | if (wait_var_event_interruptible( |
52 | &cell->dns_lookup_count, |
53 | smp_load_acquire(&cell->dns_lookup_count) |
54 | != dns_lookup_count) < 0) { |
55 | vc->error = -ERESTARTSYS; |
56 | return false; |
57 | } |
58 | } |
59 | |
60 | /* Status load is ordered after lookup counter load */ |
61 | if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { |
62 | vc->error = -EDESTADDRREQ; |
63 | return false; |
64 | } |
65 | } |
66 | |
67 | read_lock(&cell->vl_servers_lock); |
68 | vc->server_list = afs_get_vlserverlist( |
69 | rcu_dereference_protected(cell->vl_servers, |
70 | lockdep_is_held(&cell->vl_servers_lock))); |
71 | read_unlock(&cell->vl_servers_lock); |
72 | if (!vc->server_list->nr_servers) |
73 | return false; |
74 | |
75 | vc->untried = (1UL << vc->server_list->nr_servers) - 1; |
76 | vc->index = -1; |
77 | return true; |
78 | } |
79 | |
80 | /* |
81 | * Select the vlserver to use. May be called multiple times to rotate |
82 | * through the vlservers. |
83 | */ |
84 | bool afs_select_vlserver(struct afs_vl_cursor *vc) |
85 | { |
86 | struct afs_addr_list *alist; |
87 | struct afs_vlserver *vlserver; |
88 | struct afs_error e; |
89 | u32 rtt; |
90 | int error = vc->ac.error, i; |
91 | |
92 | _enter("%lx[%d],%lx[%d],%d,%d" , |
93 | vc->untried, vc->index, |
94 | vc->ac.tried, vc->ac.index, |
95 | error, vc->ac.abort_code); |
96 | |
97 | if (vc->flags & AFS_VL_CURSOR_STOP) { |
98 | _leave(" = f [stopped]" ); |
99 | return false; |
100 | } |
101 | |
102 | vc->nr_iterations++; |
103 | |
104 | /* Evaluate the result of the previous operation, if there was one. */ |
105 | switch (error) { |
106 | case SHRT_MAX: |
107 | goto start; |
108 | |
109 | default: |
110 | case 0: |
111 | /* Success or local failure. Stop. */ |
112 | vc->error = error; |
113 | vc->flags |= AFS_VL_CURSOR_STOP; |
114 | _leave(" = f [okay/local %d]" , vc->ac.error); |
115 | return false; |
116 | |
117 | case -ECONNABORTED: |
118 | /* The far side rejected the operation on some grounds. This |
119 | * might involve the server being busy or the volume having been moved. |
120 | */ |
121 | switch (vc->ac.abort_code) { |
122 | case AFSVL_IO: |
123 | case AFSVL_BADVOLOPER: |
124 | case AFSVL_NOMEM: |
125 | /* The server went weird. */ |
126 | vc->error = -EREMOTEIO; |
127 | //write_lock(&vc->cell->vl_servers_lock); |
128 | //vc->server_list->weird_mask |= 1 << vc->index; |
129 | //write_unlock(&vc->cell->vl_servers_lock); |
130 | goto next_server; |
131 | |
132 | default: |
133 | vc->error = afs_abort_to_error(vc->ac.abort_code); |
134 | goto failed; |
135 | } |
136 | |
137 | case -ERFKILL: |
138 | case -EADDRNOTAVAIL: |
139 | case -ENETUNREACH: |
140 | case -EHOSTUNREACH: |
141 | case -EHOSTDOWN: |
142 | case -ECONNREFUSED: |
143 | case -ETIMEDOUT: |
144 | case -ETIME: |
145 | _debug("no conn %d" , error); |
146 | vc->error = error; |
147 | goto iterate_address; |
148 | |
149 | case -ECONNRESET: |
150 | _debug("call reset" ); |
151 | vc->error = error; |
152 | vc->flags |= AFS_VL_CURSOR_RETRY; |
153 | goto next_server; |
154 | |
155 | case -EOPNOTSUPP: |
156 | _debug("notsupp" ); |
157 | goto next_server; |
158 | } |
159 | |
160 | restart_from_beginning: |
161 | _debug("restart" ); |
162 | afs_end_cursor(&vc->ac); |
163 | afs_put_vlserverlist(vc->cell->net, vc->server_list); |
164 | vc->server_list = NULL; |
165 | if (vc->flags & AFS_VL_CURSOR_RETRIED) |
166 | goto failed; |
167 | vc->flags |= AFS_VL_CURSOR_RETRIED; |
168 | start: |
169 | _debug("start" ); |
170 | |
171 | if (!afs_start_vl_iteration(vc)) |
172 | goto failed; |
173 | |
174 | error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list); |
175 | if (error < 0) |
176 | goto failed_set_error; |
177 | |
178 | pick_server: |
179 | _debug("pick [%lx]" , vc->untried); |
180 | |
181 | error = afs_wait_for_vl_probes(vc->server_list, vc->untried); |
182 | if (error < 0) |
183 | goto failed_set_error; |
184 | |
185 | /* Pick the untried server with the lowest RTT. */ |
186 | vc->index = vc->server_list->preferred; |
187 | if (test_bit(vc->index, &vc->untried)) |
188 | goto selected_server; |
189 | |
190 | vc->index = -1; |
191 | rtt = U32_MAX; |
192 | for (i = 0; i < vc->server_list->nr_servers; i++) { |
193 | struct afs_vlserver *s = vc->server_list->servers[i].server; |
194 | |
195 | if (!test_bit(i, &vc->untried) || |
196 | !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) |
197 | continue; |
198 | if (s->probe.rtt < rtt) { |
199 | vc->index = i; |
200 | rtt = s->probe.rtt; |
201 | } |
202 | } |
203 | |
204 | if (vc->index == -1) |
205 | goto no_more_servers; |
206 | |
207 | selected_server: |
208 | _debug("use %d" , vc->index); |
209 | __clear_bit(vc->index, &vc->untried); |
210 | |
211 | /* We're starting on a different vlserver from the list. We need to |
212 | * check it, find its address list and probe its capabilities before we |
213 | * use it. |
214 | */ |
215 | ASSERTCMP(vc->ac.alist, ==, NULL); |
216 | vlserver = vc->server_list->servers[vc->index].server; |
217 | vc->server = vlserver; |
218 | |
219 | _debug("USING VLSERVER: %s" , vlserver->name); |
220 | |
221 | read_lock(&vlserver->lock); |
222 | alist = rcu_dereference_protected(vlserver->addresses, |
223 | lockdep_is_held(&vlserver->lock)); |
224 | afs_get_addrlist(alist); |
225 | read_unlock(&vlserver->lock); |
226 | |
227 | memset(&vc->ac, 0, sizeof(vc->ac)); |
228 | |
229 | if (!vc->ac.alist) |
230 | vc->ac.alist = alist; |
231 | else |
232 | afs_put_addrlist(alist); |
233 | |
234 | vc->ac.index = -1; |
235 | |
236 | iterate_address: |
237 | ASSERT(vc->ac.alist); |
238 | /* Iterate over the current server's address list to try and find an |
239 | * address on which it will respond to us. |
240 | */ |
241 | if (!afs_iterate_addresses(&vc->ac)) |
242 | goto next_server; |
243 | |
244 | _debug("VL address %d/%d" , vc->ac.index, vc->ac.alist->nr_addrs); |
245 | |
246 | _leave(" = t %pISpc" , &vc->ac.alist->addrs[vc->ac.index].transport); |
247 | return true; |
248 | |
249 | next_server: |
250 | _debug("next" ); |
251 | afs_end_cursor(&vc->ac); |
252 | goto pick_server; |
253 | |
254 | no_more_servers: |
255 | /* That's all the servers poked to no good effect. Try again if some |
256 | * of them were busy. |
257 | */ |
258 | if (vc->flags & AFS_VL_CURSOR_RETRY) |
259 | goto restart_from_beginning; |
260 | |
261 | e.error = -EDESTADDRREQ; |
262 | e.responded = false; |
263 | for (i = 0; i < vc->server_list->nr_servers; i++) { |
264 | struct afs_vlserver *s = vc->server_list->servers[i].server; |
265 | |
266 | if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) |
267 | e.responded = true; |
268 | afs_prioritise_error(&e, READ_ONCE(s->probe.error), |
269 | s->probe.abort_code); |
270 | } |
271 | |
272 | error = e.error; |
273 | |
274 | failed_set_error: |
275 | vc->error = error; |
276 | failed: |
277 | vc->flags |= AFS_VL_CURSOR_STOP; |
278 | afs_end_cursor(&vc->ac); |
279 | _leave(" = f [failed %d]" , vc->error); |
280 | return false; |
281 | } |
282 | |
283 | /* |
284 | * Dump cursor state in the case of the error being EDESTADDRREQ. |
285 | */ |
286 | static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) |
287 | { |
288 | static int count; |
289 | int i; |
290 | |
291 | if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) |
292 | return; |
293 | count++; |
294 | |
295 | rcu_read_lock(); |
296 | pr_notice("EDESTADDR occurred\n" ); |
297 | pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n" , |
298 | vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error); |
299 | |
300 | if (vc->server_list) { |
301 | const struct afs_vlserver_list *sl = vc->server_list; |
302 | pr_notice("VC: SL nr=%u ix=%u\n" , |
303 | sl->nr_servers, sl->index); |
304 | for (i = 0; i < sl->nr_servers; i++) { |
305 | const struct afs_vlserver *s = sl->servers[i].server; |
306 | pr_notice("VC: server %s+%hu fl=%lx E=%hd\n" , |
307 | s->name, s->port, s->flags, s->probe.error); |
308 | if (s->addresses) { |
309 | const struct afs_addr_list *a = |
310 | rcu_dereference(s->addresses); |
311 | pr_notice("VC: - nr=%u/%u/%u pf=%u\n" , |
312 | a->nr_ipv4, a->nr_addrs, a->max_addrs, |
313 | a->preferred); |
314 | pr_notice("VC: - R=%lx F=%lx\n" , |
315 | a->responded, a->failed); |
316 | if (a == vc->ac.alist) |
317 | pr_notice("VC: - current\n" ); |
318 | } |
319 | } |
320 | } |
321 | |
322 | pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n" , |
323 | vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error, |
324 | vc->ac.responded, vc->ac.nr_iterations); |
325 | rcu_read_unlock(); |
326 | } |
327 | |
328 | /* |
329 | * Tidy up a volume location server cursor and unlock the vnode. |
330 | */ |
331 | int afs_end_vlserver_operation(struct afs_vl_cursor *vc) |
332 | { |
333 | struct afs_net *net = vc->cell->net; |
334 | |
335 | if (vc->error == -EDESTADDRREQ || |
336 | vc->error == -EADDRNOTAVAIL || |
337 | vc->error == -ENETUNREACH || |
338 | vc->error == -EHOSTUNREACH) |
339 | afs_vl_dump_edestaddrreq(vc); |
340 | |
341 | afs_end_cursor(&vc->ac); |
342 | afs_put_vlserverlist(net, vc->server_list); |
343 | |
344 | if (vc->error == -ECONNABORTED) |
345 | vc->error = afs_abort_to_error(vc->ac.abort_code); |
346 | |
347 | return vc->error; |
348 | } |
349 | |