1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* |
3 | * Buffer/page management specific to NILFS |
4 | * |
5 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. |
6 | * |
7 | * Written by Ryusuke Konishi and Seiji Kihara. |
8 | */ |
9 | |
10 | #include <linux/pagemap.h> |
11 | #include <linux/writeback.h> |
12 | #include <linux/swap.h> |
13 | #include <linux/bitops.h> |
14 | #include <linux/page-flags.h> |
15 | #include <linux/list.h> |
16 | #include <linux/highmem.h> |
17 | #include <linux/pagevec.h> |
18 | #include <linux/gfp.h> |
19 | #include "nilfs.h" |
20 | #include "page.h" |
21 | #include "mdt.h" |
22 | |
23 | |
24 | #define NILFS_BUFFER_INHERENT_BITS \ |
25 | (BIT(BH_Uptodate) | BIT(BH_Mapped) | BIT(BH_NILFS_Node) | \ |
26 | BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Checked)) |
27 | |
28 | static struct buffer_head *__nilfs_get_folio_block(struct folio *folio, |
29 | unsigned long block, pgoff_t index, int blkbits, |
30 | unsigned long b_state) |
31 | |
32 | { |
33 | unsigned long first_block; |
34 | struct buffer_head *bh = folio_buffers(folio); |
35 | |
36 | if (!bh) |
37 | bh = create_empty_buffers(folio, blocksize: 1 << blkbits, b_state); |
38 | |
39 | first_block = (unsigned long)index << (PAGE_SHIFT - blkbits); |
40 | bh = get_nth_bh(bh, count: block - first_block); |
41 | |
42 | touch_buffer(bh); |
43 | wait_on_buffer(bh); |
44 | return bh; |
45 | } |
46 | |
47 | struct buffer_head *nilfs_grab_buffer(struct inode *inode, |
48 | struct address_space *mapping, |
49 | unsigned long blkoff, |
50 | unsigned long b_state) |
51 | { |
52 | int blkbits = inode->i_blkbits; |
53 | pgoff_t index = blkoff >> (PAGE_SHIFT - blkbits); |
54 | struct folio *folio; |
55 | struct buffer_head *bh; |
56 | |
57 | folio = filemap_grab_folio(mapping, index); |
58 | if (IS_ERR(ptr: folio)) |
59 | return NULL; |
60 | |
61 | bh = __nilfs_get_folio_block(folio, block: blkoff, index, blkbits, b_state); |
62 | if (unlikely(!bh)) { |
63 | folio_unlock(folio); |
64 | folio_put(folio); |
65 | return NULL; |
66 | } |
67 | return bh; |
68 | } |
69 | |
70 | /** |
71 | * nilfs_forget_buffer - discard dirty state |
72 | * @bh: buffer head of the buffer to be discarded |
73 | */ |
74 | void nilfs_forget_buffer(struct buffer_head *bh) |
75 | { |
76 | struct page *page = bh->b_page; |
77 | const unsigned long clear_bits = |
78 | (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | |
79 | BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | |
80 | BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected)); |
81 | |
82 | lock_buffer(bh); |
83 | set_mask_bits(&bh->b_state, clear_bits, 0); |
84 | if (nilfs_page_buffers_clean(page)) |
85 | __nilfs_clear_page_dirty(page); |
86 | |
87 | bh->b_blocknr = -1; |
88 | ClearPageUptodate(page); |
89 | ClearPageMappedToDisk(page); |
90 | unlock_buffer(bh); |
91 | brelse(bh); |
92 | } |
93 | |
94 | /** |
95 | * nilfs_copy_buffer -- copy buffer data and flags |
96 | * @dbh: destination buffer |
97 | * @sbh: source buffer |
98 | */ |
99 | void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) |
100 | { |
101 | void *kaddr0, *kaddr1; |
102 | unsigned long bits; |
103 | struct page *spage = sbh->b_page, *dpage = dbh->b_page; |
104 | struct buffer_head *bh; |
105 | |
106 | kaddr0 = kmap_atomic(page: spage); |
107 | kaddr1 = kmap_atomic(page: dpage); |
108 | memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); |
109 | kunmap_atomic(kaddr1); |
110 | kunmap_atomic(kaddr0); |
111 | |
112 | dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; |
113 | dbh->b_blocknr = sbh->b_blocknr; |
114 | dbh->b_bdev = sbh->b_bdev; |
115 | |
116 | bh = dbh; |
117 | bits = sbh->b_state & (BIT(BH_Uptodate) | BIT(BH_Mapped)); |
118 | while ((bh = bh->b_this_page) != dbh) { |
119 | lock_buffer(bh); |
120 | bits &= bh->b_state; |
121 | unlock_buffer(bh); |
122 | } |
123 | if (bits & BIT(BH_Uptodate)) |
124 | SetPageUptodate(dpage); |
125 | else |
126 | ClearPageUptodate(page: dpage); |
127 | if (bits & BIT(BH_Mapped)) |
128 | SetPageMappedToDisk(dpage); |
129 | else |
130 | ClearPageMappedToDisk(page: dpage); |
131 | } |
132 | |
133 | /** |
134 | * nilfs_page_buffers_clean - check if a page has dirty buffers or not. |
135 | * @page: page to be checked |
136 | * |
137 | * nilfs_page_buffers_clean() returns zero if the page has dirty buffers. |
138 | * Otherwise, it returns non-zero value. |
139 | */ |
140 | int nilfs_page_buffers_clean(struct page *page) |
141 | { |
142 | struct buffer_head *bh, *head; |
143 | |
144 | bh = head = page_buffers(page); |
145 | do { |
146 | if (buffer_dirty(bh)) |
147 | return 0; |
148 | bh = bh->b_this_page; |
149 | } while (bh != head); |
150 | return 1; |
151 | } |
152 | |
153 | void nilfs_page_bug(struct page *page) |
154 | { |
155 | struct address_space *m; |
156 | unsigned long ino; |
157 | |
158 | if (unlikely(!page)) { |
159 | printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n" ); |
160 | return; |
161 | } |
162 | |
163 | m = page->mapping; |
164 | ino = m ? m->host->i_ino : 0; |
165 | |
166 | printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " |
167 | "mapping=%p ino=%lu\n" , |
168 | page, page_ref_count(page), |
169 | (unsigned long long)page->index, page->flags, m, ino); |
170 | |
171 | if (page_has_buffers(page)) { |
172 | struct buffer_head *bh, *head; |
173 | int i = 0; |
174 | |
175 | bh = head = page_buffers(page); |
176 | do { |
177 | printk(KERN_CRIT |
178 | " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n" , |
179 | i++, bh, atomic_read(&bh->b_count), |
180 | (unsigned long long)bh->b_blocknr, bh->b_state); |
181 | bh = bh->b_this_page; |
182 | } while (bh != head); |
183 | } |
184 | } |
185 | |
186 | /** |
187 | * nilfs_copy_folio -- copy the folio with buffers |
188 | * @dst: destination folio |
189 | * @src: source folio |
190 | * @copy_dirty: flag whether to copy dirty states on the folio's buffer heads. |
191 | * |
192 | * This function is for both data folios and btnode folios. The dirty flag |
193 | * should be treated by caller. The folio must not be under i/o. |
194 | * Both src and dst folio must be locked |
195 | */ |
196 | static void nilfs_copy_folio(struct folio *dst, struct folio *src, |
197 | bool copy_dirty) |
198 | { |
199 | struct buffer_head *dbh, *dbufs, *sbh; |
200 | unsigned long mask = NILFS_BUFFER_INHERENT_BITS; |
201 | |
202 | BUG_ON(folio_test_writeback(dst)); |
203 | |
204 | sbh = folio_buffers(src); |
205 | dbh = folio_buffers(dst); |
206 | if (!dbh) |
207 | dbh = create_empty_buffers(folio: dst, blocksize: sbh->b_size, b_state: 0); |
208 | |
209 | if (copy_dirty) |
210 | mask |= BIT(BH_Dirty); |
211 | |
212 | dbufs = dbh; |
213 | do { |
214 | lock_buffer(bh: sbh); |
215 | lock_buffer(bh: dbh); |
216 | dbh->b_state = sbh->b_state & mask; |
217 | dbh->b_blocknr = sbh->b_blocknr; |
218 | dbh->b_bdev = sbh->b_bdev; |
219 | sbh = sbh->b_this_page; |
220 | dbh = dbh->b_this_page; |
221 | } while (dbh != dbufs); |
222 | |
223 | folio_copy(dst, src); |
224 | |
225 | if (folio_test_uptodate(folio: src) && !folio_test_uptodate(folio: dst)) |
226 | folio_mark_uptodate(folio: dst); |
227 | else if (!folio_test_uptodate(folio: src) && folio_test_uptodate(folio: dst)) |
228 | folio_clear_uptodate(folio: dst); |
229 | if (folio_test_mappedtodisk(folio: src) && !folio_test_mappedtodisk(folio: dst)) |
230 | folio_set_mappedtodisk(folio: dst); |
231 | else if (!folio_test_mappedtodisk(folio: src) && folio_test_mappedtodisk(folio: dst)) |
232 | folio_clear_mappedtodisk(folio: dst); |
233 | |
234 | do { |
235 | unlock_buffer(bh: sbh); |
236 | unlock_buffer(bh: dbh); |
237 | sbh = sbh->b_this_page; |
238 | dbh = dbh->b_this_page; |
239 | } while (dbh != dbufs); |
240 | } |
241 | |
242 | int nilfs_copy_dirty_pages(struct address_space *dmap, |
243 | struct address_space *smap) |
244 | { |
245 | struct folio_batch fbatch; |
246 | unsigned int i; |
247 | pgoff_t index = 0; |
248 | int err = 0; |
249 | |
250 | folio_batch_init(fbatch: &fbatch); |
251 | repeat: |
252 | if (!filemap_get_folios_tag(mapping: smap, start: &index, end: (pgoff_t)-1, |
253 | PAGECACHE_TAG_DIRTY, fbatch: &fbatch)) |
254 | return 0; |
255 | |
256 | for (i = 0; i < folio_batch_count(fbatch: &fbatch); i++) { |
257 | struct folio *folio = fbatch.folios[i], *dfolio; |
258 | |
259 | folio_lock(folio); |
260 | if (unlikely(!folio_test_dirty(folio))) |
261 | NILFS_PAGE_BUG(&folio->page, "inconsistent dirty state" ); |
262 | |
263 | dfolio = filemap_grab_folio(mapping: dmap, index: folio->index); |
264 | if (unlikely(IS_ERR(dfolio))) { |
265 | /* No empty page is added to the page cache */ |
266 | folio_unlock(folio); |
267 | err = PTR_ERR(ptr: dfolio); |
268 | break; |
269 | } |
270 | if (unlikely(!folio_buffers(folio))) |
271 | NILFS_PAGE_BUG(&folio->page, |
272 | "found empty page in dat page cache" ); |
273 | |
274 | nilfs_copy_folio(dst: dfolio, src: folio, copy_dirty: true); |
275 | filemap_dirty_folio(mapping: folio_mapping(dfolio), folio: dfolio); |
276 | |
277 | folio_unlock(folio: dfolio); |
278 | folio_put(folio: dfolio); |
279 | folio_unlock(folio); |
280 | } |
281 | folio_batch_release(fbatch: &fbatch); |
282 | cond_resched(); |
283 | |
284 | if (likely(!err)) |
285 | goto repeat; |
286 | return err; |
287 | } |
288 | |
289 | /** |
290 | * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache |
291 | * @dmap: destination page cache |
292 | * @smap: source page cache |
293 | * |
294 | * No pages must be added to the cache during this process. |
295 | * This must be ensured by the caller. |
296 | */ |
297 | void nilfs_copy_back_pages(struct address_space *dmap, |
298 | struct address_space *smap) |
299 | { |
300 | struct folio_batch fbatch; |
301 | unsigned int i, n; |
302 | pgoff_t start = 0; |
303 | |
304 | folio_batch_init(fbatch: &fbatch); |
305 | repeat: |
306 | n = filemap_get_folios(mapping: smap, start: &start, end: ~0UL, fbatch: &fbatch); |
307 | if (!n) |
308 | return; |
309 | |
310 | for (i = 0; i < folio_batch_count(fbatch: &fbatch); i++) { |
311 | struct folio *folio = fbatch.folios[i], *dfolio; |
312 | pgoff_t index = folio->index; |
313 | |
314 | folio_lock(folio); |
315 | dfolio = filemap_lock_folio(mapping: dmap, index); |
316 | if (!IS_ERR(ptr: dfolio)) { |
317 | /* overwrite existing folio in the destination cache */ |
318 | WARN_ON(folio_test_dirty(dfolio)); |
319 | nilfs_copy_folio(dst: dfolio, src: folio, copy_dirty: false); |
320 | folio_unlock(folio: dfolio); |
321 | folio_put(folio: dfolio); |
322 | /* Do we not need to remove folio from smap here? */ |
323 | } else { |
324 | struct folio *f; |
325 | |
326 | /* move the folio to the destination cache */ |
327 | xa_lock_irq(&smap->i_pages); |
328 | f = __xa_erase(&smap->i_pages, index); |
329 | WARN_ON(folio != f); |
330 | smap->nrpages--; |
331 | xa_unlock_irq(&smap->i_pages); |
332 | |
333 | xa_lock_irq(&dmap->i_pages); |
334 | f = __xa_store(&dmap->i_pages, index, entry: folio, GFP_NOFS); |
335 | if (unlikely(f)) { |
336 | /* Probably -ENOMEM */ |
337 | folio->mapping = NULL; |
338 | folio_put(folio); |
339 | } else { |
340 | folio->mapping = dmap; |
341 | dmap->nrpages++; |
342 | if (folio_test_dirty(folio)) |
343 | __xa_set_mark(&dmap->i_pages, index, |
344 | PAGECACHE_TAG_DIRTY); |
345 | } |
346 | xa_unlock_irq(&dmap->i_pages); |
347 | } |
348 | folio_unlock(folio); |
349 | } |
350 | folio_batch_release(fbatch: &fbatch); |
351 | cond_resched(); |
352 | |
353 | goto repeat; |
354 | } |
355 | |
356 | /** |
357 | * nilfs_clear_dirty_pages - discard dirty pages in address space |
358 | * @mapping: address space with dirty pages for discarding |
359 | * @silent: suppress [true] or print [false] warning messages |
360 | */ |
361 | void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) |
362 | { |
363 | struct folio_batch fbatch; |
364 | unsigned int i; |
365 | pgoff_t index = 0; |
366 | |
367 | folio_batch_init(fbatch: &fbatch); |
368 | |
369 | while (filemap_get_folios_tag(mapping, start: &index, end: (pgoff_t)-1, |
370 | PAGECACHE_TAG_DIRTY, fbatch: &fbatch)) { |
371 | for (i = 0; i < folio_batch_count(fbatch: &fbatch); i++) { |
372 | struct folio *folio = fbatch.folios[i]; |
373 | |
374 | folio_lock(folio); |
375 | |
376 | /* |
377 | * This folio may have been removed from the address |
378 | * space by truncation or invalidation when the lock |
379 | * was acquired. Skip processing in that case. |
380 | */ |
381 | if (likely(folio->mapping == mapping)) |
382 | nilfs_clear_dirty_page(&folio->page, silent); |
383 | |
384 | folio_unlock(folio); |
385 | } |
386 | folio_batch_release(fbatch: &fbatch); |
387 | cond_resched(); |
388 | } |
389 | } |
390 | |
391 | /** |
392 | * nilfs_clear_dirty_page - discard dirty page |
393 | * @page: dirty page that will be discarded |
394 | * @silent: suppress [true] or print [false] warning messages |
395 | */ |
396 | void nilfs_clear_dirty_page(struct page *page, bool silent) |
397 | { |
398 | struct inode *inode = page->mapping->host; |
399 | struct super_block *sb = inode->i_sb; |
400 | |
401 | BUG_ON(!PageLocked(page)); |
402 | |
403 | if (!silent) |
404 | nilfs_warn(sb, "discard dirty page: offset=%lld, ino=%lu" , |
405 | page_offset(page), inode->i_ino); |
406 | |
407 | ClearPageUptodate(page); |
408 | ClearPageMappedToDisk(page); |
409 | |
410 | if (page_has_buffers(page)) { |
411 | struct buffer_head *bh, *head; |
412 | const unsigned long clear_bits = |
413 | (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | |
414 | BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | |
415 | BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected)); |
416 | |
417 | bh = head = page_buffers(page); |
418 | do { |
419 | lock_buffer(bh); |
420 | if (!silent) |
421 | nilfs_warn(sb, |
422 | "discard dirty block: blocknr=%llu, size=%zu" , |
423 | (u64)bh->b_blocknr, bh->b_size); |
424 | |
425 | set_mask_bits(&bh->b_state, clear_bits, 0); |
426 | unlock_buffer(bh); |
427 | } while (bh = bh->b_this_page, bh != head); |
428 | } |
429 | |
430 | __nilfs_clear_page_dirty(page); |
431 | } |
432 | |
433 | unsigned int nilfs_page_count_clean_buffers(struct page *page, |
434 | unsigned int from, unsigned int to) |
435 | { |
436 | unsigned int block_start, block_end; |
437 | struct buffer_head *bh, *head; |
438 | unsigned int nc = 0; |
439 | |
440 | for (bh = head = page_buffers(page), block_start = 0; |
441 | bh != head || !block_start; |
442 | block_start = block_end, bh = bh->b_this_page) { |
443 | block_end = block_start + bh->b_size; |
444 | if (block_end > from && block_start < to && !buffer_dirty(bh)) |
445 | nc++; |
446 | } |
447 | return nc; |
448 | } |
449 | |
450 | /* |
451 | * NILFS2 needs clear_page_dirty() in the following two cases: |
452 | * |
453 | * 1) For B-tree node pages and data pages of DAT file, NILFS2 clears dirty |
454 | * flag of pages when it copies back pages from shadow cache to the |
455 | * original cache. |
456 | * |
457 | * 2) Some B-tree operations like insertion or deletion may dispose buffers |
458 | * in dirty state, and this needs to cancel the dirty state of their pages. |
459 | */ |
460 | int __nilfs_clear_page_dirty(struct page *page) |
461 | { |
462 | struct address_space *mapping = page->mapping; |
463 | |
464 | if (mapping) { |
465 | xa_lock_irq(&mapping->i_pages); |
466 | if (test_bit(PG_dirty, &page->flags)) { |
467 | __xa_clear_mark(&mapping->i_pages, index: page_index(page), |
468 | PAGECACHE_TAG_DIRTY); |
469 | xa_unlock_irq(&mapping->i_pages); |
470 | return clear_page_dirty_for_io(page); |
471 | } |
472 | xa_unlock_irq(&mapping->i_pages); |
473 | return 0; |
474 | } |
475 | return TestClearPageDirty(page); |
476 | } |
477 | |
478 | /** |
479 | * nilfs_find_uncommitted_extent - find extent of uncommitted data |
480 | * @inode: inode |
481 | * @start_blk: start block offset (in) |
482 | * @blkoff: start offset of the found extent (out) |
483 | * |
484 | * This function searches an extent of buffers marked "delayed" which |
485 | * starts from a block offset equal to or larger than @start_blk. If |
486 | * such an extent was found, this will store the start offset in |
487 | * @blkoff and return its length in blocks. Otherwise, zero is |
488 | * returned. |
489 | */ |
490 | unsigned long nilfs_find_uncommitted_extent(struct inode *inode, |
491 | sector_t start_blk, |
492 | sector_t *blkoff) |
493 | { |
494 | unsigned int i, nr_folios; |
495 | pgoff_t index; |
496 | unsigned long length = 0; |
497 | struct folio_batch fbatch; |
498 | struct folio *folio; |
499 | |
500 | if (inode->i_mapping->nrpages == 0) |
501 | return 0; |
502 | |
503 | index = start_blk >> (PAGE_SHIFT - inode->i_blkbits); |
504 | |
505 | folio_batch_init(fbatch: &fbatch); |
506 | |
507 | repeat: |
508 | nr_folios = filemap_get_folios_contig(mapping: inode->i_mapping, start: &index, ULONG_MAX, |
509 | fbatch: &fbatch); |
510 | if (nr_folios == 0) |
511 | return length; |
512 | |
513 | i = 0; |
514 | do { |
515 | folio = fbatch.folios[i]; |
516 | |
517 | folio_lock(folio); |
518 | if (folio_buffers(folio)) { |
519 | struct buffer_head *bh, *head; |
520 | sector_t b; |
521 | |
522 | b = folio->index << (PAGE_SHIFT - inode->i_blkbits); |
523 | bh = head = folio_buffers(folio); |
524 | do { |
525 | if (b < start_blk) |
526 | continue; |
527 | if (buffer_delay(bh)) { |
528 | if (length == 0) |
529 | *blkoff = b; |
530 | length++; |
531 | } else if (length > 0) { |
532 | goto out_locked; |
533 | } |
534 | } while (++b, bh = bh->b_this_page, bh != head); |
535 | } else { |
536 | if (length > 0) |
537 | goto out_locked; |
538 | } |
539 | folio_unlock(folio); |
540 | |
541 | } while (++i < nr_folios); |
542 | |
543 | folio_batch_release(fbatch: &fbatch); |
544 | cond_resched(); |
545 | goto repeat; |
546 | |
547 | out_locked: |
548 | folio_unlock(folio); |
549 | folio_batch_release(fbatch: &fbatch); |
550 | return length; |
551 | } |
552 | |