1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
4 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
5 | */ |
6 | |
7 | #include <linux/spinlock.h> |
8 | #include <linux/completion.h> |
9 | #include <linux/buffer_head.h> |
10 | #include <linux/gfs2_ondisk.h> |
11 | #include <linux/bio.h> |
12 | #include <linux/posix_acl.h> |
13 | #include <linux/security.h> |
14 | |
15 | #include "gfs2.h" |
16 | #include "incore.h" |
17 | #include "bmap.h" |
18 | #include "glock.h" |
19 | #include "glops.h" |
20 | #include "inode.h" |
21 | #include "log.h" |
22 | #include "meta_io.h" |
23 | #include "recovery.h" |
24 | #include "rgrp.h" |
25 | #include "util.h" |
26 | #include "trans.h" |
27 | #include "dir.h" |
28 | #include "lops.h" |
29 | |
30 | struct workqueue_struct *gfs2_freeze_wq; |
31 | |
32 | extern struct workqueue_struct *gfs2_control_wq; |
33 | |
34 | static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) |
35 | { |
36 | struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; |
37 | |
38 | fs_err(sdp, |
39 | "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page " |
40 | "state 0x%lx\n" , |
41 | bh, (unsigned long long)bh->b_blocknr, bh->b_state, |
42 | bh->b_folio->mapping, bh->b_folio->flags); |
43 | fs_err(sdp, "AIL glock %u:%llu mapping %p\n" , |
44 | gl->gl_name.ln_type, gl->gl_name.ln_number, |
45 | gfs2_glock2aspace(gl)); |
46 | gfs2_lm(sdp, fmt: "AIL error\n" ); |
47 | gfs2_withdraw_delayed(sdp); |
48 | } |
49 | |
50 | /** |
51 | * __gfs2_ail_flush - remove all buffers for a given lock from the AIL |
52 | * @gl: the glock |
53 | * @fsync: set when called from fsync (not all buffers will be clean) |
54 | * @nr_revokes: Number of buffers to revoke |
55 | * |
56 | * None of the buffers should be dirty, locked, or pinned. |
57 | */ |
58 | |
59 | static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, |
60 | unsigned int nr_revokes) |
61 | { |
62 | struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; |
63 | struct list_head *head = &gl->gl_ail_list; |
64 | struct gfs2_bufdata *bd, *tmp; |
65 | struct buffer_head *bh; |
66 | const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock); |
67 | |
68 | gfs2_log_lock(sdp); |
69 | spin_lock(lock: &sdp->sd_ail_lock); |
70 | list_for_each_entry_safe_reverse(bd, tmp, head, bd_ail_gl_list) { |
71 | if (nr_revokes == 0) |
72 | break; |
73 | bh = bd->bd_bh; |
74 | if (bh->b_state & b_state) { |
75 | if (fsync) |
76 | continue; |
77 | gfs2_ail_error(gl, bh); |
78 | } |
79 | gfs2_trans_add_revoke(sdp, bd); |
80 | nr_revokes--; |
81 | } |
82 | GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); |
83 | spin_unlock(lock: &sdp->sd_ail_lock); |
84 | gfs2_log_unlock(sdp); |
85 | } |
86 | |
87 | |
88 | static int gfs2_ail_empty_gl(struct gfs2_glock *gl) |
89 | { |
90 | struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; |
91 | struct gfs2_trans tr; |
92 | unsigned int revokes; |
93 | int ret = 0; |
94 | |
95 | revokes = atomic_read(v: &gl->gl_ail_count); |
96 | |
97 | if (!revokes) { |
98 | bool have_revokes; |
99 | bool log_in_flight; |
100 | |
101 | /* |
102 | * We have nothing on the ail, but there could be revokes on |
103 | * the sdp revoke queue, in which case, we still want to flush |
104 | * the log and wait for it to finish. |
105 | * |
106 | * If the sdp revoke list is empty too, we might still have an |
107 | * io outstanding for writing revokes, so we should wait for |
108 | * it before returning. |
109 | * |
110 | * If none of these conditions are true, our revokes are all |
111 | * flushed and we can return. |
112 | */ |
113 | gfs2_log_lock(sdp); |
114 | have_revokes = !list_empty(head: &sdp->sd_log_revokes); |
115 | log_in_flight = atomic_read(v: &sdp->sd_log_in_flight); |
116 | gfs2_log_unlock(sdp); |
117 | if (have_revokes) |
118 | goto flush; |
119 | if (log_in_flight) |
120 | log_flush_wait(sdp); |
121 | return 0; |
122 | } |
123 | |
124 | memset(&tr, 0, sizeof(tr)); |
125 | set_bit(nr: TR_ONSTACK, addr: &tr.tr_flags); |
126 | ret = __gfs2_trans_begin(tr: &tr, sdp, blocks: 0, revokes, _RET_IP_); |
127 | if (ret) { |
128 | fs_err(sdp, "Transaction error %d: Unable to write revokes." , ret); |
129 | goto flush; |
130 | } |
131 | __gfs2_ail_flush(gl, fsync: 0, nr_revokes: revokes); |
132 | gfs2_trans_end(sdp); |
133 | |
134 | flush: |
135 | if (!ret) |
136 | gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | |
137 | GFS2_LFC_AIL_EMPTY_GL); |
138 | return ret; |
139 | } |
140 | |
141 | void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) |
142 | { |
143 | struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; |
144 | unsigned int revokes = atomic_read(v: &gl->gl_ail_count); |
145 | int ret; |
146 | |
147 | if (!revokes) |
148 | return; |
149 | |
150 | ret = gfs2_trans_begin(sdp, blocks: 0, revokes); |
151 | if (ret) |
152 | return; |
153 | __gfs2_ail_flush(gl, fsync, nr_revokes: revokes); |
154 | gfs2_trans_end(sdp); |
155 | gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | |
156 | GFS2_LFC_AIL_FLUSH); |
157 | } |
158 | |
159 | /** |
160 | * gfs2_rgrp_metasync - sync out the metadata of a resource group |
161 | * @gl: the glock protecting the resource group |
162 | * |
163 | */ |
164 | |
165 | static int gfs2_rgrp_metasync(struct gfs2_glock *gl) |
166 | { |
167 | struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; |
168 | struct address_space *metamapping = &sdp->sd_aspace; |
169 | struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl); |
170 | const unsigned bsize = sdp->sd_sb.sb_bsize; |
171 | loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK; |
172 | loff_t end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1; |
173 | int error; |
174 | |
175 | filemap_fdatawrite_range(mapping: metamapping, start, end); |
176 | error = filemap_fdatawait_range(metamapping, lstart: start, lend: end); |
177 | WARN_ON_ONCE(error && !gfs2_withdrawing_or_withdrawn(sdp)); |
178 | mapping_set_error(mapping: metamapping, error); |
179 | if (error) |
180 | gfs2_io_error(sdp); |
181 | return error; |
182 | } |
183 | |
184 | /** |
185 | * rgrp_go_sync - sync out the metadata for this glock |
186 | * @gl: the glock |
187 | * |
188 | * Called when demoting or unlocking an EX glock. We must flush |
189 | * to disk all dirty buffers/pages relating to this glock, and must not |
190 | * return to caller to demote/unlock the glock until I/O is complete. |
191 | */ |
192 | |
193 | static int rgrp_go_sync(struct gfs2_glock *gl) |
194 | { |
195 | struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; |
196 | struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl); |
197 | int error; |
198 | |
199 | if (!rgd || !test_and_clear_bit(nr: GLF_DIRTY, addr: &gl->gl_flags)) |
200 | return 0; |
201 | GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); |
202 | |
203 | gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL | |
204 | GFS2_LFC_RGRP_GO_SYNC); |
205 | error = gfs2_rgrp_metasync(gl); |
206 | if (!error) |
207 | error = gfs2_ail_empty_gl(gl); |
208 | gfs2_free_clones(rgd); |
209 | return error; |
210 | } |
211 | |
212 | /** |
213 | * rgrp_go_inval - invalidate the metadata for this glock |
214 | * @gl: the glock |
215 | * @flags: |
216 | * |
217 | * We never used LM_ST_DEFERRED with resource groups, so that we |
218 | * should always see the metadata flag set here. |
219 | * |
220 | */ |
221 | |
222 | static void rgrp_go_inval(struct gfs2_glock *gl, int flags) |
223 | { |
224 | struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; |
225 | struct address_space *mapping = &sdp->sd_aspace; |
226 | struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl); |
227 | const unsigned bsize = sdp->sd_sb.sb_bsize; |
228 | loff_t start, end; |
229 | |
230 | if (!rgd) |
231 | return; |
232 | start = (rgd->rd_addr * bsize) & PAGE_MASK; |
233 | end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1; |
234 | gfs2_rgrp_brelse(rgd); |
235 | WARN_ON_ONCE(!(flags & DIO_METADATA)); |
236 | truncate_inode_pages_range(mapping, lstart: start, lend: end); |
237 | } |
238 | |
239 | static void gfs2_rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl, |
240 | const char *fs_id_buf) |
241 | { |
242 | struct gfs2_rgrpd *rgd = gl->gl_object; |
243 | |
244 | if (rgd) |
245 | gfs2_rgrp_dump(seq, rgd, fs_id_buf); |
246 | } |
247 | |
248 | static struct gfs2_inode *gfs2_glock2inode(struct gfs2_glock *gl) |
249 | { |
250 | struct gfs2_inode *ip; |
251 | |
252 | spin_lock(lock: &gl->gl_lockref.lock); |
253 | ip = gl->gl_object; |
254 | if (ip) |
255 | set_bit(nr: GIF_GLOP_PENDING, addr: &ip->i_flags); |
256 | spin_unlock(lock: &gl->gl_lockref.lock); |
257 | return ip; |
258 | } |
259 | |
260 | struct gfs2_rgrpd *gfs2_glock2rgrp(struct gfs2_glock *gl) |
261 | { |
262 | struct gfs2_rgrpd *rgd; |
263 | |
264 | spin_lock(lock: &gl->gl_lockref.lock); |
265 | rgd = gl->gl_object; |
266 | spin_unlock(lock: &gl->gl_lockref.lock); |
267 | |
268 | return rgd; |
269 | } |
270 | |
271 | static void gfs2_clear_glop_pending(struct gfs2_inode *ip) |
272 | { |
273 | if (!ip) |
274 | return; |
275 | |
276 | clear_bit_unlock(nr: GIF_GLOP_PENDING, addr: &ip->i_flags); |
277 | wake_up_bit(word: &ip->i_flags, bit: GIF_GLOP_PENDING); |
278 | } |
279 | |
280 | /** |
281 | * gfs2_inode_metasync - sync out the metadata of an inode |
282 | * @gl: the glock protecting the inode |
283 | * |
284 | */ |
285 | int gfs2_inode_metasync(struct gfs2_glock *gl) |
286 | { |
287 | struct address_space *metamapping = gfs2_glock2aspace(gl); |
288 | int error; |
289 | |
290 | filemap_fdatawrite(metamapping); |
291 | error = filemap_fdatawait(mapping: metamapping); |
292 | if (error) |
293 | gfs2_io_error(gl->gl_name.ln_sbd); |
294 | return error; |
295 | } |
296 | |
297 | /** |
298 | * inode_go_sync - Sync the dirty metadata of an inode |
299 | * @gl: the glock protecting the inode |
300 | * |
301 | */ |
302 | |
303 | static int inode_go_sync(struct gfs2_glock *gl) |
304 | { |
305 | struct gfs2_inode *ip = gfs2_glock2inode(gl); |
306 | int isreg = ip && S_ISREG(ip->i_inode.i_mode); |
307 | struct address_space *metamapping = gfs2_glock2aspace(gl); |
308 | int error = 0, ret; |
309 | |
310 | if (isreg) { |
311 | if (test_and_clear_bit(nr: GIF_SW_PAGED, addr: &ip->i_flags)) |
312 | unmap_shared_mapping_range(mapping: ip->i_inode.i_mapping, holebegin: 0, holelen: 0); |
313 | inode_dio_wait(inode: &ip->i_inode); |
314 | } |
315 | if (!test_and_clear_bit(nr: GLF_DIRTY, addr: &gl->gl_flags)) |
316 | goto out; |
317 | |
318 | GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); |
319 | |
320 | gfs2_log_flush(sdp: gl->gl_name.ln_sbd, gl, GFS2_LOG_HEAD_FLUSH_NORMAL | |
321 | GFS2_LFC_INODE_GO_SYNC); |
322 | filemap_fdatawrite(metamapping); |
323 | if (isreg) { |
324 | struct address_space *mapping = ip->i_inode.i_mapping; |
325 | filemap_fdatawrite(mapping); |
326 | error = filemap_fdatawait(mapping); |
327 | mapping_set_error(mapping, error); |
328 | } |
329 | ret = gfs2_inode_metasync(gl); |
330 | if (!error) |
331 | error = ret; |
332 | ret = gfs2_ail_empty_gl(gl); |
333 | if (!error) |
334 | error = ret; |
335 | /* |
336 | * Writeback of the data mapping may cause the dirty flag to be set |
337 | * so we have to clear it again here. |
338 | */ |
339 | smp_mb__before_atomic(); |
340 | clear_bit(nr: GLF_DIRTY, addr: &gl->gl_flags); |
341 | |
342 | out: |
343 | gfs2_clear_glop_pending(ip); |
344 | return error; |
345 | } |
346 | |
347 | /** |
348 | * inode_go_inval - prepare a inode glock to be released |
349 | * @gl: the glock |
350 | * @flags: |
351 | * |
352 | * Normally we invalidate everything, but if we are moving into |
353 | * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we |
354 | * can keep hold of the metadata, since it won't have changed. |
355 | * |
356 | */ |
357 | |
358 | static void inode_go_inval(struct gfs2_glock *gl, int flags) |
359 | { |
360 | struct gfs2_inode *ip = gfs2_glock2inode(gl); |
361 | |
362 | if (flags & DIO_METADATA) { |
363 | struct address_space *mapping = gfs2_glock2aspace(gl); |
364 | truncate_inode_pages(mapping, 0); |
365 | if (ip) { |
366 | set_bit(nr: GLF_INSTANTIATE_NEEDED, addr: &gl->gl_flags); |
367 | forget_all_cached_acls(inode: &ip->i_inode); |
368 | security_inode_invalidate_secctx(inode: &ip->i_inode); |
369 | gfs2_dir_hash_inval(ip); |
370 | } |
371 | } |
372 | |
373 | if (ip == GFS2_I(inode: gl->gl_name.ln_sbd->sd_rindex)) { |
374 | gfs2_log_flush(sdp: gl->gl_name.ln_sbd, NULL, |
375 | GFS2_LOG_HEAD_FLUSH_NORMAL | |
376 | GFS2_LFC_INODE_GO_INVAL); |
377 | gl->gl_name.ln_sbd->sd_rindex_uptodate = 0; |
378 | } |
379 | if (ip && S_ISREG(ip->i_inode.i_mode)) |
380 | truncate_inode_pages(ip->i_inode.i_mapping, 0); |
381 | |
382 | gfs2_clear_glop_pending(ip); |
383 | } |
384 | |
385 | /** |
386 | * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock |
387 | * @gl: the glock |
388 | * |
389 | * Returns: 1 if it's ok |
390 | */ |
391 | |
392 | static int inode_go_demote_ok(const struct gfs2_glock *gl) |
393 | { |
394 | struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; |
395 | |
396 | if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) |
397 | return 0; |
398 | |
399 | return 1; |
400 | } |
401 | |
402 | static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) |
403 | { |
404 | struct gfs2_sbd *sdp = GFS2_SB(inode: &ip->i_inode); |
405 | const struct gfs2_dinode *str = buf; |
406 | struct timespec64 atime, iatime; |
407 | u16 height, depth; |
408 | umode_t mode = be32_to_cpu(str->di_mode); |
409 | struct inode *inode = &ip->i_inode; |
410 | bool is_new = inode->i_state & I_NEW; |
411 | |
412 | if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) |
413 | goto corrupt; |
414 | if (unlikely(!is_new && inode_wrong_type(inode, mode))) |
415 | goto corrupt; |
416 | ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); |
417 | inode->i_mode = mode; |
418 | if (is_new) { |
419 | inode->i_rdev = 0; |
420 | switch (mode & S_IFMT) { |
421 | case S_IFBLK: |
422 | case S_IFCHR: |
423 | inode->i_rdev = MKDEV(be32_to_cpu(str->di_major), |
424 | be32_to_cpu(str->di_minor)); |
425 | break; |
426 | } |
427 | } |
428 | |
429 | i_uid_write(inode, be32_to_cpu(str->di_uid)); |
430 | i_gid_write(inode, be32_to_cpu(str->di_gid)); |
431 | set_nlink(inode, be32_to_cpu(str->di_nlink)); |
432 | i_size_write(inode, be64_to_cpu(str->di_size)); |
433 | gfs2_set_inode_blocks(inode, be64_to_cpu(str->di_blocks)); |
434 | atime.tv_sec = be64_to_cpu(str->di_atime); |
435 | atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); |
436 | iatime = inode_get_atime(inode); |
437 | if (timespec64_compare(lhs: &iatime, rhs: &atime) < 0) |
438 | inode_set_atime_to_ts(inode, ts: atime); |
439 | inode_set_mtime(inode, be64_to_cpu(str->di_mtime), |
440 | be32_to_cpu(str->di_mtime_nsec)); |
441 | inode_set_ctime(inode, be64_to_cpu(str->di_ctime), |
442 | be32_to_cpu(str->di_ctime_nsec)); |
443 | |
444 | ip->i_goal = be64_to_cpu(str->di_goal_meta); |
445 | ip->i_generation = be64_to_cpu(str->di_generation); |
446 | |
447 | ip->i_diskflags = be32_to_cpu(str->di_flags); |
448 | ip->i_eattr = be64_to_cpu(str->di_eattr); |
449 | /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */ |
450 | gfs2_set_inode_flags(inode); |
451 | height = be16_to_cpu(str->di_height); |
452 | if (unlikely(height > sdp->sd_max_height)) |
453 | goto corrupt; |
454 | ip->i_height = (u8)height; |
455 | |
456 | depth = be16_to_cpu(str->di_depth); |
457 | if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) |
458 | goto corrupt; |
459 | ip->i_depth = (u8)depth; |
460 | ip->i_entries = be32_to_cpu(str->di_entries); |
461 | |
462 | if (gfs2_is_stuffed(ip) && inode->i_size > gfs2_max_stuffed_size(ip)) |
463 | goto corrupt; |
464 | |
465 | if (S_ISREG(inode->i_mode)) |
466 | gfs2_set_aops(inode); |
467 | |
468 | return 0; |
469 | corrupt: |
470 | gfs2_consist_inode(ip); |
471 | return -EIO; |
472 | } |
473 | |
474 | /** |
475 | * gfs2_inode_refresh - Refresh the incore copy of the dinode |
476 | * @ip: The GFS2 inode |
477 | * |
478 | * Returns: errno |
479 | */ |
480 | |
481 | int gfs2_inode_refresh(struct gfs2_inode *ip) |
482 | { |
483 | struct buffer_head *dibh; |
484 | int error; |
485 | |
486 | error = gfs2_meta_inode_buffer(ip, bhp: &dibh); |
487 | if (error) |
488 | return error; |
489 | |
490 | error = gfs2_dinode_in(ip, buf: dibh->b_data); |
491 | brelse(bh: dibh); |
492 | return error; |
493 | } |
494 | |
495 | /** |
496 | * inode_go_instantiate - read in an inode if necessary |
497 | * @gl: The glock |
498 | * |
499 | * Returns: errno |
500 | */ |
501 | |
502 | static int inode_go_instantiate(struct gfs2_glock *gl) |
503 | { |
504 | struct gfs2_inode *ip = gl->gl_object; |
505 | |
506 | if (!ip) /* no inode to populate - read it in later */ |
507 | return 0; |
508 | |
509 | return gfs2_inode_refresh(ip); |
510 | } |
511 | |
512 | static int inode_go_held(struct gfs2_holder *gh) |
513 | { |
514 | struct gfs2_glock *gl = gh->gh_gl; |
515 | struct gfs2_inode *ip = gl->gl_object; |
516 | int error = 0; |
517 | |
518 | if (!ip) /* no inode to populate - read it in later */ |
519 | return 0; |
520 | |
521 | if (gh->gh_state != LM_ST_DEFERRED) |
522 | inode_dio_wait(inode: &ip->i_inode); |
523 | |
524 | if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && |
525 | (gl->gl_state == LM_ST_EXCLUSIVE) && |
526 | (gh->gh_state == LM_ST_EXCLUSIVE)) |
527 | error = gfs2_truncatei_resume(ip); |
528 | |
529 | return error; |
530 | } |
531 | |
532 | /** |
533 | * inode_go_dump - print information about an inode |
534 | * @seq: The iterator |
535 | * @gl: The glock |
536 | * @fs_id_buf: file system id (may be empty) |
537 | * |
538 | */ |
539 | |
540 | static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl, |
541 | const char *fs_id_buf) |
542 | { |
543 | struct gfs2_inode *ip = gl->gl_object; |
544 | const struct inode *inode = &ip->i_inode; |
545 | |
546 | if (ip == NULL) |
547 | return; |
548 | |
549 | gfs2_print_dbg(seq, fmt: "%s I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu " |
550 | "p:%lu\n" , fs_id_buf, |
551 | (unsigned long long)ip->i_no_formal_ino, |
552 | (unsigned long long)ip->i_no_addr, |
553 | IF2DT(inode->i_mode), ip->i_flags, |
554 | (unsigned int)ip->i_diskflags, |
555 | (unsigned long long)i_size_read(inode), |
556 | inode->i_data.nrpages); |
557 | } |
558 | |
559 | /** |
560 | * freeze_go_callback - A cluster node is requesting a freeze |
561 | * @gl: the glock |
562 | * @remote: true if this came from a different cluster node |
563 | */ |
564 | |
565 | static void freeze_go_callback(struct gfs2_glock *gl, bool remote) |
566 | { |
567 | struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; |
568 | struct super_block *sb = sdp->sd_vfs; |
569 | |
570 | if (!remote || |
571 | (gl->gl_state != LM_ST_SHARED && |
572 | gl->gl_state != LM_ST_UNLOCKED) || |
573 | gl->gl_demote_state != LM_ST_UNLOCKED) |
574 | return; |
575 | |
576 | /* |
577 | * Try to get an active super block reference to prevent racing with |
578 | * unmount (see super_trylock_shared()). But note that unmount isn't |
579 | * the only place where a write lock on s_umount is taken, and we can |
580 | * fail here because of things like remount as well. |
581 | */ |
582 | if (down_read_trylock(sem: &sb->s_umount)) { |
583 | atomic_inc(v: &sb->s_active); |
584 | up_read(sem: &sb->s_umount); |
585 | if (!queue_work(wq: gfs2_freeze_wq, work: &sdp->sd_freeze_work)) |
586 | deactivate_super(sb); |
587 | } |
588 | } |
589 | |
590 | /** |
591 | * freeze_go_xmote_bh - After promoting/demoting the freeze glock |
592 | * @gl: the glock |
593 | */ |
594 | static int freeze_go_xmote_bh(struct gfs2_glock *gl) |
595 | { |
596 | struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; |
597 | struct gfs2_inode *ip = GFS2_I(inode: sdp->sd_jdesc->jd_inode); |
598 | struct gfs2_glock *j_gl = ip->i_gl; |
599 | struct gfs2_log_header_host head; |
600 | int error; |
601 | |
602 | if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { |
603 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); |
604 | |
605 | error = gfs2_find_jhead(jd: sdp->sd_jdesc, head: &head, keep_cache: false); |
606 | if (gfs2_assert_withdraw_delayed(sdp, !error)) |
607 | return error; |
608 | if (gfs2_assert_withdraw_delayed(sdp, head.lh_flags & |
609 | GFS2_LOG_HEAD_UNMOUNT)) |
610 | return -EIO; |
611 | sdp->sd_log_sequence = head.lh_sequence + 1; |
612 | gfs2_log_pointers_init(sdp, value: head.lh_blkno); |
613 | } |
614 | return 0; |
615 | } |
616 | |
617 | /** |
618 | * iopen_go_callback - schedule the dcache entry for the inode to be deleted |
619 | * @gl: the glock |
620 | * @remote: true if this came from a different cluster node |
621 | * |
622 | * gl_lockref.lock lock is held while calling this |
623 | */ |
624 | static void iopen_go_callback(struct gfs2_glock *gl, bool remote) |
625 | { |
626 | struct gfs2_inode *ip = gl->gl_object; |
627 | struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; |
628 | |
629 | if (!remote || sb_rdonly(sb: sdp->sd_vfs) || |
630 | test_bit(SDF_KILL, &sdp->sd_flags)) |
631 | return; |
632 | |
633 | if (gl->gl_demote_state == LM_ST_UNLOCKED && |
634 | gl->gl_state == LM_ST_SHARED && ip) { |
635 | gl->gl_lockref.count++; |
636 | if (!gfs2_queue_try_to_evict(gl)) |
637 | gl->gl_lockref.count--; |
638 | } |
639 | } |
640 | |
641 | /** |
642 | * inode_go_free - wake up anyone waiting for dlm's unlock ast to free it |
643 | * @gl: glock being freed |
644 | * |
645 | * For now, this is only used for the journal inode glock. In withdraw |
646 | * situations, we need to wait for the glock to be freed so that we know |
647 | * other nodes may proceed with recovery / journal replay. |
648 | */ |
649 | static void inode_go_free(struct gfs2_glock *gl) |
650 | { |
651 | /* Note that we cannot reference gl_object because it's already set |
652 | * to NULL by this point in its lifecycle. */ |
653 | if (!test_bit(GLF_FREEING, &gl->gl_flags)) |
654 | return; |
655 | clear_bit_unlock(nr: GLF_FREEING, addr: &gl->gl_flags); |
656 | wake_up_bit(word: &gl->gl_flags, bit: GLF_FREEING); |
657 | } |
658 | |
659 | /** |
660 | * nondisk_go_callback - used to signal when a node did a withdraw |
661 | * @gl: the nondisk glock |
662 | * @remote: true if this came from a different cluster node |
663 | * |
664 | */ |
665 | static void nondisk_go_callback(struct gfs2_glock *gl, bool remote) |
666 | { |
667 | struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; |
668 | |
669 | /* Ignore the callback unless it's from another node, and it's the |
670 | live lock. */ |
671 | if (!remote || gl->gl_name.ln_number != GFS2_LIVE_LOCK) |
672 | return; |
673 | |
674 | /* First order of business is to cancel the demote request. We don't |
675 | * really want to demote a nondisk glock. At best it's just to inform |
676 | * us of another node's withdraw. We'll keep it in SH mode. */ |
677 | clear_bit(nr: GLF_DEMOTE, addr: &gl->gl_flags); |
678 | clear_bit(nr: GLF_PENDING_DEMOTE, addr: &gl->gl_flags); |
679 | |
680 | /* Ignore the unlock if we're withdrawn, unmounting, or in recovery. */ |
681 | if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || |
682 | test_bit(SDF_WITHDRAWN, &sdp->sd_flags) || |
683 | test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) |
684 | return; |
685 | |
686 | /* We only care when a node wants us to unlock, because that means |
687 | * they want a journal recovered. */ |
688 | if (gl->gl_demote_state != LM_ST_UNLOCKED) |
689 | return; |
690 | |
691 | if (sdp->sd_args.ar_spectator) { |
692 | fs_warn(sdp, "Spectator node cannot recover journals.\n" ); |
693 | return; |
694 | } |
695 | |
696 | fs_warn(sdp, "Some node has withdrawn; checking for recovery.\n" ); |
697 | set_bit(nr: SDF_REMOTE_WITHDRAW, addr: &sdp->sd_flags); |
698 | /* |
699 | * We can't call remote_withdraw directly here or gfs2_recover_journal |
700 | * because this is called from the glock unlock function and the |
701 | * remote_withdraw needs to enqueue and dequeue the same "live" glock |
702 | * we were called from. So we queue it to the control work queue in |
703 | * lock_dlm. |
704 | */ |
705 | queue_delayed_work(wq: gfs2_control_wq, dwork: &sdp->sd_control_work, delay: 0); |
706 | } |
707 | |
708 | const struct gfs2_glock_operations gfs2_meta_glops = { |
709 | .go_type = LM_TYPE_META, |
710 | .go_flags = GLOF_NONDISK, |
711 | }; |
712 | |
713 | const struct gfs2_glock_operations gfs2_inode_glops = { |
714 | .go_sync = inode_go_sync, |
715 | .go_inval = inode_go_inval, |
716 | .go_demote_ok = inode_go_demote_ok, |
717 | .go_instantiate = inode_go_instantiate, |
718 | .go_held = inode_go_held, |
719 | .go_dump = inode_go_dump, |
720 | .go_type = LM_TYPE_INODE, |
721 | .go_flags = GLOF_ASPACE | GLOF_LRU | GLOF_LVB, |
722 | .go_free = inode_go_free, |
723 | }; |
724 | |
725 | const struct gfs2_glock_operations gfs2_rgrp_glops = { |
726 | .go_sync = rgrp_go_sync, |
727 | .go_inval = rgrp_go_inval, |
728 | .go_instantiate = gfs2_rgrp_go_instantiate, |
729 | .go_dump = gfs2_rgrp_go_dump, |
730 | .go_type = LM_TYPE_RGRP, |
731 | .go_flags = GLOF_LVB, |
732 | }; |
733 | |
734 | const struct gfs2_glock_operations gfs2_freeze_glops = { |
735 | .go_xmote_bh = freeze_go_xmote_bh, |
736 | .go_callback = freeze_go_callback, |
737 | .go_type = LM_TYPE_NONDISK, |
738 | .go_flags = GLOF_NONDISK, |
739 | }; |
740 | |
741 | const struct gfs2_glock_operations gfs2_iopen_glops = { |
742 | .go_type = LM_TYPE_IOPEN, |
743 | .go_callback = iopen_go_callback, |
744 | .go_dump = inode_go_dump, |
745 | .go_flags = GLOF_LRU | GLOF_NONDISK, |
746 | .go_subclass = 1, |
747 | }; |
748 | |
749 | const struct gfs2_glock_operations gfs2_flock_glops = { |
750 | .go_type = LM_TYPE_FLOCK, |
751 | .go_flags = GLOF_LRU | GLOF_NONDISK, |
752 | }; |
753 | |
754 | const struct gfs2_glock_operations gfs2_nondisk_glops = { |
755 | .go_type = LM_TYPE_NONDISK, |
756 | .go_flags = GLOF_NONDISK, |
757 | .go_callback = nondisk_go_callback, |
758 | }; |
759 | |
760 | const struct gfs2_glock_operations gfs2_quota_glops = { |
761 | .go_type = LM_TYPE_QUOTA, |
762 | .go_flags = GLOF_LVB | GLOF_LRU | GLOF_NONDISK, |
763 | }; |
764 | |
765 | const struct gfs2_glock_operations gfs2_journal_glops = { |
766 | .go_type = LM_TYPE_JOURNAL, |
767 | .go_flags = GLOF_NONDISK, |
768 | }; |
769 | |
770 | const struct gfs2_glock_operations *gfs2_glops_list[] = { |
771 | [LM_TYPE_META] = &gfs2_meta_glops, |
772 | [LM_TYPE_INODE] = &gfs2_inode_glops, |
773 | [LM_TYPE_RGRP] = &gfs2_rgrp_glops, |
774 | [LM_TYPE_IOPEN] = &gfs2_iopen_glops, |
775 | [LM_TYPE_FLOCK] = &gfs2_flock_glops, |
776 | [LM_TYPE_NONDISK] = &gfs2_nondisk_glops, |
777 | [LM_TYPE_QUOTA] = &gfs2_quota_glops, |
778 | [LM_TYPE_JOURNAL] = &gfs2_journal_glops, |
779 | }; |
780 | |
781 | |