1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* AFS volume management |
3 | * |
4 | * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. |
5 | * Written by David Howells (dhowells@redhat.com) |
6 | */ |
7 | |
8 | #include <linux/kernel.h> |
9 | #include <linux/slab.h> |
10 | #include "internal.h" |
11 | |
12 | static unsigned __read_mostly afs_volume_record_life = 60 * 60; |
13 | |
14 | /* |
15 | * Insert a volume into a cell. If there's an existing volume record, that is |
16 | * returned instead with a ref held. |
17 | */ |
18 | static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell, |
19 | struct afs_volume *volume) |
20 | { |
21 | struct afs_volume *p; |
22 | struct rb_node *parent = NULL, **pp; |
23 | |
24 | write_seqlock(sl: &cell->volume_lock); |
25 | |
26 | pp = &cell->volumes.rb_node; |
27 | while (*pp) { |
28 | parent = *pp; |
29 | p = rb_entry(parent, struct afs_volume, cell_node); |
30 | if (p->vid < volume->vid) { |
31 | pp = &(*pp)->rb_left; |
32 | } else if (p->vid > volume->vid) { |
33 | pp = &(*pp)->rb_right; |
34 | } else { |
35 | volume = afs_get_volume(p, afs_volume_trace_get_cell_insert); |
36 | goto found; |
37 | } |
38 | } |
39 | |
40 | rb_link_node_rcu(node: &volume->cell_node, parent, rb_link: pp); |
41 | rb_insert_color(&volume->cell_node, &cell->volumes); |
42 | hlist_add_head_rcu(n: &volume->proc_link, h: &cell->proc_volumes); |
43 | |
44 | found: |
45 | write_sequnlock(sl: &cell->volume_lock); |
46 | return volume; |
47 | |
48 | } |
49 | |
50 | static void afs_remove_volume_from_cell(struct afs_volume *volume) |
51 | { |
52 | struct afs_cell *cell = volume->cell; |
53 | |
54 | if (!hlist_unhashed(h: &volume->proc_link)) { |
55 | trace_afs_volume(vid: volume->vid, ref: refcount_read(r: &cell->ref), |
56 | reason: afs_volume_trace_remove); |
57 | write_seqlock(sl: &cell->volume_lock); |
58 | hlist_del_rcu(n: &volume->proc_link); |
59 | rb_erase(&volume->cell_node, &cell->volumes); |
60 | write_sequnlock(sl: &cell->volume_lock); |
61 | } |
62 | } |
63 | |
64 | /* |
65 | * Allocate a volume record and load it up from a vldb record. |
66 | */ |
67 | static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, |
68 | struct afs_vldb_entry *vldb, |
69 | unsigned long type_mask) |
70 | { |
71 | struct afs_server_list *slist; |
72 | struct afs_volume *volume; |
73 | int ret = -ENOMEM; |
74 | |
75 | volume = kzalloc(size: sizeof(struct afs_volume), GFP_KERNEL); |
76 | if (!volume) |
77 | goto error_0; |
78 | |
79 | volume->vid = vldb->vid[params->type]; |
80 | volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; |
81 | volume->cell = afs_get_cell(params->cell, afs_cell_trace_get_vol); |
82 | volume->type = params->type; |
83 | volume->type_force = params->force; |
84 | volume->name_len = vldb->name_len; |
85 | |
86 | refcount_set(r: &volume->ref, n: 1); |
87 | INIT_HLIST_NODE(h: &volume->proc_link); |
88 | rwlock_init(&volume->servers_lock); |
89 | rwlock_init(&volume->cb_v_break_lock); |
90 | memcpy(volume->name, vldb->name, vldb->name_len + 1); |
91 | |
92 | slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask); |
93 | if (IS_ERR(ptr: slist)) { |
94 | ret = PTR_ERR(ptr: slist); |
95 | goto error_1; |
96 | } |
97 | |
98 | refcount_set(r: &slist->usage, n: 1); |
99 | rcu_assign_pointer(volume->servers, slist); |
100 | trace_afs_volume(vid: volume->vid, ref: 1, reason: afs_volume_trace_alloc); |
101 | return volume; |
102 | |
103 | error_1: |
104 | afs_put_cell(volume->cell, afs_cell_trace_put_vol); |
105 | kfree(objp: volume); |
106 | error_0: |
107 | return ERR_PTR(error: ret); |
108 | } |
109 | |
110 | /* |
111 | * Look up or allocate a volume record. |
112 | */ |
113 | static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params, |
114 | struct afs_vldb_entry *vldb, |
115 | unsigned long type_mask) |
116 | { |
117 | struct afs_volume *candidate, *volume; |
118 | |
119 | candidate = afs_alloc_volume(params, vldb, type_mask); |
120 | if (IS_ERR(ptr: candidate)) |
121 | return candidate; |
122 | |
123 | volume = afs_insert_volume_into_cell(cell: params->cell, volume: candidate); |
124 | if (volume != candidate) |
125 | afs_put_volume(params->net, candidate, afs_volume_trace_put_cell_dup); |
126 | return volume; |
127 | } |
128 | |
129 | /* |
130 | * Look up a VLDB record for a volume. |
131 | */ |
132 | static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, |
133 | struct key *key, |
134 | const char *volname, |
135 | size_t volnamesz) |
136 | { |
137 | struct afs_vldb_entry *vldb = ERR_PTR(error: -EDESTADDRREQ); |
138 | struct afs_vl_cursor vc; |
139 | int ret; |
140 | |
141 | if (!afs_begin_vlserver_operation(&vc, cell, key)) |
142 | return ERR_PTR(error: -ERESTARTSYS); |
143 | |
144 | while (afs_select_vlserver(&vc)) { |
145 | vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz); |
146 | } |
147 | |
148 | ret = afs_end_vlserver_operation(&vc); |
149 | return ret < 0 ? ERR_PTR(error: ret) : vldb; |
150 | } |
151 | |
152 | /* |
153 | * Look up a volume in the VL server and create a candidate volume record for |
154 | * it. |
155 | * |
156 | * The volume name can be one of the following: |
157 | * "%[cell:]volume[.]" R/W volume |
158 | * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0), |
159 | * or R/W (rwparent=1) volume |
160 | * "%[cell:]volume.readonly" R/O volume |
161 | * "#[cell:]volume.readonly" R/O volume |
162 | * "%[cell:]volume.backup" Backup volume |
163 | * "#[cell:]volume.backup" Backup volume |
164 | * |
165 | * The cell name is optional, and defaults to the current cell. |
166 | * |
167 | * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin |
168 | * Guide |
169 | * - Rule 1: Explicit type suffix forces access of that type or nothing |
170 | * (no suffix, then use Rule 2 & 3) |
171 | * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W |
172 | * if not available |
173 | * - Rule 3: If parent volume is R/W, then only mount R/W volume unless |
174 | * explicitly told otherwise |
175 | */ |
176 | struct afs_volume *afs_create_volume(struct afs_fs_context *params) |
177 | { |
178 | struct afs_vldb_entry *vldb; |
179 | struct afs_volume *volume; |
180 | unsigned long type_mask = 1UL << params->type; |
181 | |
182 | vldb = afs_vl_lookup_vldb(cell: params->cell, key: params->key, |
183 | volname: params->volname, volnamesz: params->volnamesz); |
184 | if (IS_ERR(ptr: vldb)) |
185 | return ERR_CAST(ptr: vldb); |
186 | |
187 | if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) { |
188 | volume = ERR_PTR(error: vldb->error); |
189 | goto error; |
190 | } |
191 | |
192 | /* Make the final decision on the type we want */ |
193 | volume = ERR_PTR(error: -ENOMEDIUM); |
194 | if (params->force) { |
195 | if (!(vldb->flags & type_mask)) |
196 | goto error; |
197 | } else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) { |
198 | params->type = AFSVL_ROVOL; |
199 | } else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) { |
200 | params->type = AFSVL_RWVOL; |
201 | } else { |
202 | goto error; |
203 | } |
204 | |
205 | type_mask = 1UL << params->type; |
206 | volume = afs_lookup_volume(params, vldb, type_mask); |
207 | |
208 | error: |
209 | kfree(objp: vldb); |
210 | return volume; |
211 | } |
212 | |
213 | /* |
214 | * Destroy a volume record |
215 | */ |
216 | static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume) |
217 | { |
218 | _enter("%p" , volume); |
219 | |
220 | #ifdef CONFIG_AFS_FSCACHE |
221 | ASSERTCMP(volume->cache, ==, NULL); |
222 | #endif |
223 | |
224 | afs_remove_volume_from_cell(volume); |
225 | afs_put_serverlist(net, rcu_access_pointer(volume->servers)); |
226 | afs_put_cell(volume->cell, afs_cell_trace_put_vol); |
227 | trace_afs_volume(vid: volume->vid, ref: refcount_read(r: &volume->ref), |
228 | reason: afs_volume_trace_free); |
229 | kfree_rcu(volume, rcu); |
230 | |
231 | _leave(" [destroyed]" ); |
232 | } |
233 | |
234 | /* |
235 | * Get a reference on a volume record. |
236 | */ |
237 | struct afs_volume *afs_get_volume(struct afs_volume *volume, |
238 | enum afs_volume_trace reason) |
239 | { |
240 | if (volume) { |
241 | int r; |
242 | |
243 | __refcount_inc(r: &volume->ref, oldp: &r); |
244 | trace_afs_volume(vid: volume->vid, ref: r + 1, reason); |
245 | } |
246 | return volume; |
247 | } |
248 | |
249 | |
250 | /* |
251 | * Drop a reference on a volume record. |
252 | */ |
253 | void afs_put_volume(struct afs_net *net, struct afs_volume *volume, |
254 | enum afs_volume_trace reason) |
255 | { |
256 | if (volume) { |
257 | afs_volid_t vid = volume->vid; |
258 | bool zero; |
259 | int r; |
260 | |
261 | zero = __refcount_dec_and_test(r: &volume->ref, oldp: &r); |
262 | trace_afs_volume(vid, ref: r - 1, reason); |
263 | if (zero) |
264 | afs_destroy_volume(net, volume); |
265 | } |
266 | } |
267 | |
268 | /* |
269 | * Activate a volume. |
270 | */ |
271 | int afs_activate_volume(struct afs_volume *volume) |
272 | { |
273 | #ifdef CONFIG_AFS_FSCACHE |
274 | struct fscache_volume *vcookie; |
275 | char *name; |
276 | |
277 | name = kasprintf(GFP_KERNEL, fmt: "afs,%s,%llx" , |
278 | volume->cell->name, volume->vid); |
279 | if (!name) |
280 | return -ENOMEM; |
281 | |
282 | vcookie = fscache_acquire_volume(volume_key: name, NULL, NULL, coherency_len: 0); |
283 | if (IS_ERR(ptr: vcookie)) { |
284 | if (vcookie != ERR_PTR(error: -EBUSY)) { |
285 | kfree(objp: name); |
286 | return PTR_ERR(ptr: vcookie); |
287 | } |
288 | pr_err("AFS: Cache volume key already in use (%s)\n" , name); |
289 | vcookie = NULL; |
290 | } |
291 | volume->cache = vcookie; |
292 | kfree(objp: name); |
293 | #endif |
294 | return 0; |
295 | } |
296 | |
297 | /* |
298 | * Deactivate a volume. |
299 | */ |
300 | void afs_deactivate_volume(struct afs_volume *volume) |
301 | { |
302 | _enter("%s" , volume->name); |
303 | |
304 | #ifdef CONFIG_AFS_FSCACHE |
305 | fscache_relinquish_volume(volume: volume->cache, NULL, |
306 | test_bit(AFS_VOLUME_DELETED, &volume->flags)); |
307 | volume->cache = NULL; |
308 | #endif |
309 | |
310 | _leave("" ); |
311 | } |
312 | |
313 | /* |
314 | * Query the VL service to update the volume status. |
315 | */ |
316 | static int afs_update_volume_status(struct afs_volume *volume, struct key *key) |
317 | { |
318 | struct afs_server_list *new, *old, *discard; |
319 | struct afs_vldb_entry *vldb; |
320 | char idbuf[16]; |
321 | int ret, idsz; |
322 | |
323 | _enter("" ); |
324 | |
325 | /* We look up an ID by passing it as a decimal string in the |
326 | * operation's name parameter. |
327 | */ |
328 | idsz = sprintf(buf: idbuf, fmt: "%llu" , volume->vid); |
329 | |
330 | vldb = afs_vl_lookup_vldb(cell: volume->cell, key, volname: idbuf, volnamesz: idsz); |
331 | if (IS_ERR(ptr: vldb)) { |
332 | ret = PTR_ERR(ptr: vldb); |
333 | goto error; |
334 | } |
335 | |
336 | /* See if the volume got renamed. */ |
337 | if (vldb->name_len != volume->name_len || |
338 | memcmp(p: vldb->name, q: volume->name, size: vldb->name_len) != 0) { |
339 | /* TODO: Use RCU'd string. */ |
340 | memcpy(volume->name, vldb->name, AFS_MAXVOLNAME); |
341 | volume->name_len = vldb->name_len; |
342 | } |
343 | |
344 | /* See if the volume's server list got updated. */ |
345 | new = afs_alloc_server_list(volume->cell, key, |
346 | vldb, (1 << volume->type)); |
347 | if (IS_ERR(ptr: new)) { |
348 | ret = PTR_ERR(ptr: new); |
349 | goto error_vldb; |
350 | } |
351 | |
352 | write_lock(&volume->servers_lock); |
353 | |
354 | discard = new; |
355 | old = rcu_dereference_protected(volume->servers, |
356 | lockdep_is_held(&volume->servers_lock)); |
357 | if (afs_annotate_server_list(new, old)) { |
358 | new->seq = volume->servers_seq + 1; |
359 | rcu_assign_pointer(volume->servers, new); |
360 | smp_wmb(); |
361 | volume->servers_seq++; |
362 | discard = old; |
363 | } |
364 | |
365 | volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; |
366 | write_unlock(&volume->servers_lock); |
367 | ret = 0; |
368 | |
369 | afs_put_serverlist(volume->cell->net, discard); |
370 | error_vldb: |
371 | kfree(objp: vldb); |
372 | error: |
373 | _leave(" = %d" , ret); |
374 | return ret; |
375 | } |
376 | |
377 | /* |
378 | * Make sure the volume record is up to date. |
379 | */ |
380 | int afs_check_volume_status(struct afs_volume *volume, struct afs_operation *op) |
381 | { |
382 | int ret, retries = 0; |
383 | |
384 | _enter("" ); |
385 | |
386 | retry: |
387 | if (test_bit(AFS_VOLUME_WAIT, &volume->flags)) |
388 | goto wait; |
389 | if (volume->update_at <= ktime_get_real_seconds() || |
390 | test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags)) |
391 | goto update; |
392 | _leave(" = 0" ); |
393 | return 0; |
394 | |
395 | update: |
396 | if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, addr: &volume->flags)) { |
397 | clear_bit(AFS_VOLUME_NEEDS_UPDATE, addr: &volume->flags); |
398 | ret = afs_update_volume_status(volume, key: op->key); |
399 | if (ret < 0) |
400 | set_bit(AFS_VOLUME_NEEDS_UPDATE, addr: &volume->flags); |
401 | clear_bit_unlock(AFS_VOLUME_WAIT, addr: &volume->flags); |
402 | clear_bit_unlock(AFS_VOLUME_UPDATING, addr: &volume->flags); |
403 | wake_up_bit(word: &volume->flags, AFS_VOLUME_WAIT); |
404 | _leave(" = %d" , ret); |
405 | return ret; |
406 | } |
407 | |
408 | wait: |
409 | if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) { |
410 | _leave(" = 0 [no wait]" ); |
411 | return 0; |
412 | } |
413 | |
414 | ret = wait_on_bit(word: &volume->flags, AFS_VOLUME_WAIT, |
415 | mode: (op->flags & AFS_OPERATION_UNINTR) ? |
416 | TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE); |
417 | if (ret == -ERESTARTSYS) { |
418 | _leave(" = %d" , ret); |
419 | return ret; |
420 | } |
421 | |
422 | retries++; |
423 | if (retries == 4) { |
424 | _leave(" = -ESTALE" ); |
425 | return -ESTALE; |
426 | } |
427 | goto retry; |
428 | } |
429 | |