1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | #include <linux/slab.h> |
4 | #include "messages.h" |
5 | #include "ctree.h" |
6 | #include "subpage.h" |
7 | #include "btrfs_inode.h" |
8 | |
9 | /* |
10 | * Subpage (sectorsize < PAGE_SIZE) support overview: |
11 | * |
12 | * Limitations: |
13 | * |
14 | * - Only support 64K page size for now |
15 | * This is to make metadata handling easier, as 64K page would ensure |
16 | * all nodesize would fit inside one page, thus we don't need to handle |
17 | * cases where a tree block crosses several pages. |
18 | * |
19 | * - Only metadata read-write for now |
20 | * The data read-write part is in development. |
21 | * |
22 | * - Metadata can't cross 64K page boundary |
23 | * btrfs-progs and kernel have done that for a while, thus only ancient |
24 | * filesystems could have such problem. For such case, do a graceful |
25 | * rejection. |
26 | * |
27 | * Special behavior: |
28 | * |
29 | * - Metadata |
30 | * Metadata read is fully supported. |
31 | * Meaning when reading one tree block will only trigger the read for the |
32 | * needed range, other unrelated range in the same page will not be touched. |
33 | * |
34 | * Metadata write support is partial. |
35 | * The writeback is still for the full page, but we will only submit |
36 | * the dirty extent buffers in the page. |
37 | * |
38 | * This means, if we have a metadata page like this: |
39 | * |
40 | * Page offset |
41 | * 0 16K 32K 48K 64K |
42 | * |/////////| |///////////| |
43 | * \- Tree block A \- Tree block B |
44 | * |
45 | * Even if we just want to writeback tree block A, we will also writeback |
46 | * tree block B if it's also dirty. |
47 | * |
48 | * This may cause extra metadata writeback which results more COW. |
49 | * |
50 | * Implementation: |
51 | * |
52 | * - Common |
53 | * Both metadata and data will use a new structure, btrfs_subpage, to |
54 | * record the status of each sector inside a page. This provides the extra |
55 | * granularity needed. |
56 | * |
57 | * - Metadata |
58 | * Since we have multiple tree blocks inside one page, we can't rely on page |
59 | * locking anymore, or we will have greatly reduced concurrency or even |
60 | * deadlocks (hold one tree lock while trying to lock another tree lock in |
61 | * the same page). |
62 | * |
63 | * Thus for metadata locking, subpage support relies on io_tree locking only. |
64 | * This means a slightly higher tree locking latency. |
65 | */ |
66 | |
67 | bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct address_space *mapping) |
68 | { |
69 | if (fs_info->sectorsize >= PAGE_SIZE) |
70 | return false; |
71 | |
72 | /* |
73 | * Only data pages (either through DIO or compression) can have no |
74 | * mapping. And if page->mapping->host is data inode, it's subpage. |
75 | * As we have ruled our sectorsize >= PAGE_SIZE case already. |
76 | */ |
77 | if (!mapping || !mapping->host || is_data_inode(inode: mapping->host)) |
78 | return true; |
79 | |
80 | /* |
81 | * Now the only remaining case is metadata, which we only go subpage |
82 | * routine if nodesize < PAGE_SIZE. |
83 | */ |
84 | if (fs_info->nodesize < PAGE_SIZE) |
85 | return true; |
86 | return false; |
87 | } |
88 | |
89 | void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize) |
90 | { |
91 | unsigned int cur = 0; |
92 | unsigned int nr_bits; |
93 | |
94 | ASSERT(IS_ALIGNED(PAGE_SIZE, sectorsize)); |
95 | |
96 | nr_bits = PAGE_SIZE / sectorsize; |
97 | subpage_info->bitmap_nr_bits = nr_bits; |
98 | |
99 | subpage_info->uptodate_offset = cur; |
100 | cur += nr_bits; |
101 | |
102 | subpage_info->dirty_offset = cur; |
103 | cur += nr_bits; |
104 | |
105 | subpage_info->writeback_offset = cur; |
106 | cur += nr_bits; |
107 | |
108 | subpage_info->ordered_offset = cur; |
109 | cur += nr_bits; |
110 | |
111 | subpage_info->checked_offset = cur; |
112 | cur += nr_bits; |
113 | |
114 | subpage_info->locked_offset = cur; |
115 | cur += nr_bits; |
116 | |
117 | subpage_info->total_nr_bits = cur; |
118 | } |
119 | |
120 | int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, |
121 | struct folio *folio, enum btrfs_subpage_type type) |
122 | { |
123 | struct btrfs_subpage *subpage; |
124 | |
125 | /* |
126 | * We have cases like a dummy extent buffer page, which is not mapped |
127 | * and doesn't need to be locked. |
128 | */ |
129 | if (folio->mapping) |
130 | ASSERT(folio_test_locked(folio)); |
131 | |
132 | /* Either not subpage, or the folio already has private attached. */ |
133 | if (!btrfs_is_subpage(fs_info, mapping: folio->mapping) || folio_test_private(folio)) |
134 | return 0; |
135 | |
136 | subpage = btrfs_alloc_subpage(fs_info, type); |
137 | if (IS_ERR(ptr: subpage)) |
138 | return PTR_ERR(ptr: subpage); |
139 | |
140 | folio_attach_private(folio, data: subpage); |
141 | return 0; |
142 | } |
143 | |
144 | void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio) |
145 | { |
146 | struct btrfs_subpage *subpage; |
147 | |
148 | /* Either not subpage, or the folio already has private attached. */ |
149 | if (!btrfs_is_subpage(fs_info, mapping: folio->mapping) || !folio_test_private(folio)) |
150 | return; |
151 | |
152 | subpage = folio_detach_private(folio); |
153 | ASSERT(subpage); |
154 | btrfs_free_subpage(subpage); |
155 | } |
156 | |
157 | struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, |
158 | enum btrfs_subpage_type type) |
159 | { |
160 | struct btrfs_subpage *ret; |
161 | unsigned int real_size; |
162 | |
163 | ASSERT(fs_info->sectorsize < PAGE_SIZE); |
164 | |
165 | real_size = struct_size(ret, bitmaps, |
166 | BITS_TO_LONGS(fs_info->subpage_info->total_nr_bits)); |
167 | ret = kzalloc(size: real_size, GFP_NOFS); |
168 | if (!ret) |
169 | return ERR_PTR(error: -ENOMEM); |
170 | |
171 | spin_lock_init(&ret->lock); |
172 | if (type == BTRFS_SUBPAGE_METADATA) { |
173 | atomic_set(v: &ret->eb_refs, i: 0); |
174 | } else { |
175 | atomic_set(v: &ret->readers, i: 0); |
176 | atomic_set(v: &ret->writers, i: 0); |
177 | } |
178 | return ret; |
179 | } |
180 | |
181 | void btrfs_free_subpage(struct btrfs_subpage *subpage) |
182 | { |
183 | kfree(objp: subpage); |
184 | } |
185 | |
186 | /* |
187 | * Increase the eb_refs of current subpage. |
188 | * |
189 | * This is important for eb allocation, to prevent race with last eb freeing |
190 | * of the same page. |
191 | * With the eb_refs increased before the eb inserted into radix tree, |
192 | * detach_extent_buffer_page() won't detach the folio private while we're still |
193 | * allocating the extent buffer. |
194 | */ |
195 | void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) |
196 | { |
197 | struct btrfs_subpage *subpage; |
198 | |
199 | if (!btrfs_is_subpage(fs_info, mapping: folio->mapping)) |
200 | return; |
201 | |
202 | ASSERT(folio_test_private(folio) && folio->mapping); |
203 | lockdep_assert_held(&folio->mapping->i_private_lock); |
204 | |
205 | subpage = folio_get_private(folio); |
206 | atomic_inc(v: &subpage->eb_refs); |
207 | } |
208 | |
209 | void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) |
210 | { |
211 | struct btrfs_subpage *subpage; |
212 | |
213 | if (!btrfs_is_subpage(fs_info, mapping: folio->mapping)) |
214 | return; |
215 | |
216 | ASSERT(folio_test_private(folio) && folio->mapping); |
217 | lockdep_assert_held(&folio->mapping->i_private_lock); |
218 | |
219 | subpage = folio_get_private(folio); |
220 | ASSERT(atomic_read(&subpage->eb_refs)); |
221 | atomic_dec(v: &subpage->eb_refs); |
222 | } |
223 | |
224 | static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, |
225 | struct folio *folio, u64 start, u32 len) |
226 | { |
227 | /* For subpage support, the folio must be single page. */ |
228 | ASSERT(folio_order(folio) == 0); |
229 | |
230 | /* Basic checks */ |
231 | ASSERT(folio_test_private(folio) && folio_get_private(folio)); |
232 | ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && |
233 | IS_ALIGNED(len, fs_info->sectorsize)); |
234 | /* |
235 | * The range check only works for mapped page, we can still have |
236 | * unmapped page like dummy extent buffer pages. |
237 | */ |
238 | if (folio->mapping) |
239 | ASSERT(folio_pos(folio) <= start && |
240 | start + len <= folio_pos(folio) + PAGE_SIZE); |
241 | } |
242 | |
243 | #define subpage_calc_start_bit(fs_info, folio, name, start, len) \ |
244 | ({ \ |
245 | unsigned int start_bit; \ |
246 | \ |
247 | btrfs_subpage_assert(fs_info, folio, start, len); \ |
248 | start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \ |
249 | start_bit += fs_info->subpage_info->name##_offset; \ |
250 | start_bit; \ |
251 | }) |
252 | |
253 | void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info, |
254 | struct folio *folio, u64 start, u32 len) |
255 | { |
256 | struct btrfs_subpage *subpage = folio_get_private(folio); |
257 | const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); |
258 | const int nbits = len >> fs_info->sectorsize_bits; |
259 | unsigned long flags; |
260 | |
261 | |
262 | btrfs_subpage_assert(fs_info, folio, start, len); |
263 | |
264 | spin_lock_irqsave(&subpage->lock, flags); |
265 | /* |
266 | * Even though it's just for reading the page, no one should have |
267 | * locked the subpage range. |
268 | */ |
269 | ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); |
270 | bitmap_set(map: subpage->bitmaps, start: start_bit, nbits); |
271 | atomic_add(i: nbits, v: &subpage->readers); |
272 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
273 | } |
274 | |
275 | void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info, |
276 | struct folio *folio, u64 start, u32 len) |
277 | { |
278 | struct btrfs_subpage *subpage = folio_get_private(folio); |
279 | const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); |
280 | const int nbits = len >> fs_info->sectorsize_bits; |
281 | unsigned long flags; |
282 | bool is_data; |
283 | bool last; |
284 | |
285 | btrfs_subpage_assert(fs_info, folio, start, len); |
286 | is_data = is_data_inode(inode: folio->mapping->host); |
287 | |
288 | spin_lock_irqsave(&subpage->lock, flags); |
289 | |
290 | /* The range should have already been locked. */ |
291 | ASSERT(bitmap_test_range_all_set(subpage->bitmaps, start_bit, nbits)); |
292 | ASSERT(atomic_read(&subpage->readers) >= nbits); |
293 | |
294 | bitmap_clear(map: subpage->bitmaps, start: start_bit, nbits); |
295 | last = atomic_sub_and_test(i: nbits, v: &subpage->readers); |
296 | |
297 | /* |
298 | * For data we need to unlock the page if the last read has finished. |
299 | * |
300 | * And please don't replace @last with atomic_sub_and_test() call |
301 | * inside if () condition. |
302 | * As we want the atomic_sub_and_test() to be always executed. |
303 | */ |
304 | if (is_data && last) |
305 | folio_unlock(folio); |
306 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
307 | } |
308 | |
309 | static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len) |
310 | { |
311 | u64 orig_start = *start; |
312 | u32 orig_len = *len; |
313 | |
314 | *start = max_t(u64, folio_pos(folio), orig_start); |
315 | /* |
316 | * For certain call sites like btrfs_drop_pages(), we may have pages |
317 | * beyond the target range. In that case, just set @len to 0, subpage |
318 | * helpers can handle @len == 0 without any problem. |
319 | */ |
320 | if (folio_pos(folio) >= orig_start + orig_len) |
321 | *len = 0; |
322 | else |
323 | *len = min_t(u64, folio_pos(folio) + PAGE_SIZE, |
324 | orig_start + orig_len) - *start; |
325 | } |
326 | |
327 | static void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info, |
328 | struct folio *folio, u64 start, u32 len) |
329 | { |
330 | struct btrfs_subpage *subpage = folio_get_private(folio); |
331 | const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); |
332 | const int nbits = (len >> fs_info->sectorsize_bits); |
333 | unsigned long flags; |
334 | int ret; |
335 | |
336 | btrfs_subpage_assert(fs_info, folio, start, len); |
337 | |
338 | spin_lock_irqsave(&subpage->lock, flags); |
339 | ASSERT(atomic_read(&subpage->readers) == 0); |
340 | ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); |
341 | bitmap_set(map: subpage->bitmaps, start: start_bit, nbits); |
342 | ret = atomic_add_return(i: nbits, v: &subpage->writers); |
343 | ASSERT(ret == nbits); |
344 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
345 | } |
346 | |
347 | static bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info, |
348 | struct folio *folio, u64 start, u32 len) |
349 | { |
350 | struct btrfs_subpage *subpage = folio_get_private(folio); |
351 | const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); |
352 | const int nbits = (len >> fs_info->sectorsize_bits); |
353 | unsigned long flags; |
354 | bool last; |
355 | |
356 | btrfs_subpage_assert(fs_info, folio, start, len); |
357 | |
358 | spin_lock_irqsave(&subpage->lock, flags); |
359 | /* |
360 | * We have call sites passing @lock_page into |
361 | * extent_clear_unlock_delalloc() for compression path. |
362 | * |
363 | * This @locked_page is locked by plain lock_page(), thus its |
364 | * subpage::writers is 0. Handle them in a special way. |
365 | */ |
366 | if (atomic_read(v: &subpage->writers) == 0) { |
367 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
368 | return true; |
369 | } |
370 | |
371 | ASSERT(atomic_read(&subpage->writers) >= nbits); |
372 | /* The target range should have been locked. */ |
373 | ASSERT(bitmap_test_range_all_set(subpage->bitmaps, start_bit, nbits)); |
374 | bitmap_clear(map: subpage->bitmaps, start: start_bit, nbits); |
375 | last = atomic_sub_and_test(i: nbits, v: &subpage->writers); |
376 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
377 | return last; |
378 | } |
379 | |
380 | /* |
381 | * Lock a folio for delalloc page writeback. |
382 | * |
383 | * Return -EAGAIN if the page is not properly initialized. |
384 | * Return 0 with the page locked, and writer counter updated. |
385 | * |
386 | * Even with 0 returned, the page still need extra check to make sure |
387 | * it's really the correct page, as the caller is using |
388 | * filemap_get_folios_contig(), which can race with page invalidating. |
389 | */ |
390 | int btrfs_folio_start_writer_lock(const struct btrfs_fs_info *fs_info, |
391 | struct folio *folio, u64 start, u32 len) |
392 | { |
393 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, mapping: folio->mapping)) { |
394 | folio_lock(folio); |
395 | return 0; |
396 | } |
397 | folio_lock(folio); |
398 | if (!folio_test_private(folio) || !folio_get_private(folio)) { |
399 | folio_unlock(folio); |
400 | return -EAGAIN; |
401 | } |
402 | btrfs_subpage_clamp_range(folio, start: &start, len: &len); |
403 | btrfs_subpage_start_writer(fs_info, folio, start, len); |
404 | return 0; |
405 | } |
406 | |
407 | void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info, |
408 | struct folio *folio, u64 start, u32 len) |
409 | { |
410 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, mapping: folio->mapping)) { |
411 | folio_unlock(folio); |
412 | return; |
413 | } |
414 | btrfs_subpage_clamp_range(folio, start: &start, len: &len); |
415 | if (btrfs_subpage_end_and_test_writer(fs_info, folio, start, len)) |
416 | folio_unlock(folio); |
417 | } |
418 | |
419 | #define subpage_test_bitmap_all_set(fs_info, subpage, name) \ |
420 | bitmap_test_range_all_set(subpage->bitmaps, \ |
421 | fs_info->subpage_info->name##_offset, \ |
422 | fs_info->subpage_info->bitmap_nr_bits) |
423 | |
424 | #define subpage_test_bitmap_all_zero(fs_info, subpage, name) \ |
425 | bitmap_test_range_all_zero(subpage->bitmaps, \ |
426 | fs_info->subpage_info->name##_offset, \ |
427 | fs_info->subpage_info->bitmap_nr_bits) |
428 | |
429 | void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info, |
430 | struct folio *folio, u64 start, u32 len) |
431 | { |
432 | struct btrfs_subpage *subpage = folio_get_private(folio); |
433 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
434 | uptodate, start, len); |
435 | unsigned long flags; |
436 | |
437 | spin_lock_irqsave(&subpage->lock, flags); |
438 | bitmap_set(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
439 | if (subpage_test_bitmap_all_set(fs_info, subpage, uptodate)) |
440 | folio_mark_uptodate(folio); |
441 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
442 | } |
443 | |
444 | void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info, |
445 | struct folio *folio, u64 start, u32 len) |
446 | { |
447 | struct btrfs_subpage *subpage = folio_get_private(folio); |
448 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
449 | uptodate, start, len); |
450 | unsigned long flags; |
451 | |
452 | spin_lock_irqsave(&subpage->lock, flags); |
453 | bitmap_clear(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
454 | folio_clear_uptodate(folio); |
455 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
456 | } |
457 | |
458 | void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info, |
459 | struct folio *folio, u64 start, u32 len) |
460 | { |
461 | struct btrfs_subpage *subpage = folio_get_private(folio); |
462 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
463 | dirty, start, len); |
464 | unsigned long flags; |
465 | |
466 | spin_lock_irqsave(&subpage->lock, flags); |
467 | bitmap_set(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
468 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
469 | folio_mark_dirty(folio); |
470 | } |
471 | |
472 | /* |
473 | * Extra clear_and_test function for subpage dirty bitmap. |
474 | * |
475 | * Return true if we're the last bits in the dirty_bitmap and clear the |
476 | * dirty_bitmap. |
477 | * Return false otherwise. |
478 | * |
479 | * NOTE: Callers should manually clear page dirty for true case, as we have |
480 | * extra handling for tree blocks. |
481 | */ |
482 | bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, |
483 | struct folio *folio, u64 start, u32 len) |
484 | { |
485 | struct btrfs_subpage *subpage = folio_get_private(folio); |
486 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
487 | dirty, start, len); |
488 | unsigned long flags; |
489 | bool last = false; |
490 | |
491 | spin_lock_irqsave(&subpage->lock, flags); |
492 | bitmap_clear(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
493 | if (subpage_test_bitmap_all_zero(fs_info, subpage, dirty)) |
494 | last = true; |
495 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
496 | return last; |
497 | } |
498 | |
499 | void btrfs_subpage_clear_dirty(const struct btrfs_fs_info *fs_info, |
500 | struct folio *folio, u64 start, u32 len) |
501 | { |
502 | bool last; |
503 | |
504 | last = btrfs_subpage_clear_and_test_dirty(fs_info, folio, start, len); |
505 | if (last) |
506 | folio_clear_dirty_for_io(folio); |
507 | } |
508 | |
509 | void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, |
510 | struct folio *folio, u64 start, u32 len) |
511 | { |
512 | struct btrfs_subpage *subpage = folio_get_private(folio); |
513 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
514 | writeback, start, len); |
515 | unsigned long flags; |
516 | |
517 | spin_lock_irqsave(&subpage->lock, flags); |
518 | bitmap_set(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
519 | if (!folio_test_writeback(folio)) |
520 | folio_start_writeback(folio); |
521 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
522 | } |
523 | |
524 | void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info, |
525 | struct folio *folio, u64 start, u32 len) |
526 | { |
527 | struct btrfs_subpage *subpage = folio_get_private(folio); |
528 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
529 | writeback, start, len); |
530 | unsigned long flags; |
531 | |
532 | spin_lock_irqsave(&subpage->lock, flags); |
533 | bitmap_clear(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
534 | if (subpage_test_bitmap_all_zero(fs_info, subpage, writeback)) { |
535 | ASSERT(folio_test_writeback(folio)); |
536 | folio_end_writeback(folio); |
537 | } |
538 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
539 | } |
540 | |
541 | void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info, |
542 | struct folio *folio, u64 start, u32 len) |
543 | { |
544 | struct btrfs_subpage *subpage = folio_get_private(folio); |
545 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
546 | ordered, start, len); |
547 | unsigned long flags; |
548 | |
549 | spin_lock_irqsave(&subpage->lock, flags); |
550 | bitmap_set(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
551 | folio_set_ordered(folio); |
552 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
553 | } |
554 | |
555 | void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info, |
556 | struct folio *folio, u64 start, u32 len) |
557 | { |
558 | struct btrfs_subpage *subpage = folio_get_private(folio); |
559 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
560 | ordered, start, len); |
561 | unsigned long flags; |
562 | |
563 | spin_lock_irqsave(&subpage->lock, flags); |
564 | bitmap_clear(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
565 | if (subpage_test_bitmap_all_zero(fs_info, subpage, ordered)) |
566 | folio_clear_ordered(folio); |
567 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
568 | } |
569 | |
570 | void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info, |
571 | struct folio *folio, u64 start, u32 len) |
572 | { |
573 | struct btrfs_subpage *subpage = folio_get_private(folio); |
574 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
575 | checked, start, len); |
576 | unsigned long flags; |
577 | |
578 | spin_lock_irqsave(&subpage->lock, flags); |
579 | bitmap_set(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
580 | if (subpage_test_bitmap_all_set(fs_info, subpage, checked)) |
581 | folio_set_checked(folio); |
582 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
583 | } |
584 | |
585 | void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info, |
586 | struct folio *folio, u64 start, u32 len) |
587 | { |
588 | struct btrfs_subpage *subpage = folio_get_private(folio); |
589 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
590 | checked, start, len); |
591 | unsigned long flags; |
592 | |
593 | spin_lock_irqsave(&subpage->lock, flags); |
594 | bitmap_clear(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
595 | folio_clear_checked(folio); |
596 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
597 | } |
598 | |
599 | /* |
600 | * Unlike set/clear which is dependent on each page status, for test all bits |
601 | * are tested in the same way. |
602 | */ |
603 | #define IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(name) \ |
604 | bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \ |
605 | struct folio *folio, u64 start, u32 len) \ |
606 | { \ |
607 | struct btrfs_subpage *subpage = folio_get_private(folio); \ |
608 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, \ |
609 | name, start, len); \ |
610 | unsigned long flags; \ |
611 | bool ret; \ |
612 | \ |
613 | spin_lock_irqsave(&subpage->lock, flags); \ |
614 | ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit, \ |
615 | len >> fs_info->sectorsize_bits); \ |
616 | spin_unlock_irqrestore(&subpage->lock, flags); \ |
617 | return ret; \ |
618 | } |
619 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate); |
620 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty); |
621 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback); |
622 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered); |
623 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked); |
624 | |
625 | /* |
626 | * Note that, in selftests (extent-io-tests), we can have empty fs_info passed |
627 | * in. We only test sectorsize == PAGE_SIZE cases so far, thus we can fall |
628 | * back to regular sectorsize branch. |
629 | */ |
630 | #define IMPLEMENT_BTRFS_PAGE_OPS(name, folio_set_func, \ |
631 | folio_clear_func, folio_test_func) \ |
632 | void btrfs_folio_set_##name(const struct btrfs_fs_info *fs_info, \ |
633 | struct folio *folio, u64 start, u32 len) \ |
634 | { \ |
635 | if (unlikely(!fs_info) || \ |
636 | !btrfs_is_subpage(fs_info, folio->mapping)) { \ |
637 | folio_set_func(folio); \ |
638 | return; \ |
639 | } \ |
640 | btrfs_subpage_set_##name(fs_info, folio, start, len); \ |
641 | } \ |
642 | void btrfs_folio_clear_##name(const struct btrfs_fs_info *fs_info, \ |
643 | struct folio *folio, u64 start, u32 len) \ |
644 | { \ |
645 | if (unlikely(!fs_info) || \ |
646 | !btrfs_is_subpage(fs_info, folio->mapping)) { \ |
647 | folio_clear_func(folio); \ |
648 | return; \ |
649 | } \ |
650 | btrfs_subpage_clear_##name(fs_info, folio, start, len); \ |
651 | } \ |
652 | bool btrfs_folio_test_##name(const struct btrfs_fs_info *fs_info, \ |
653 | struct folio *folio, u64 start, u32 len) \ |
654 | { \ |
655 | if (unlikely(!fs_info) || \ |
656 | !btrfs_is_subpage(fs_info, folio->mapping)) \ |
657 | return folio_test_func(folio); \ |
658 | return btrfs_subpage_test_##name(fs_info, folio, start, len); \ |
659 | } \ |
660 | void btrfs_folio_clamp_set_##name(const struct btrfs_fs_info *fs_info, \ |
661 | struct folio *folio, u64 start, u32 len) \ |
662 | { \ |
663 | if (unlikely(!fs_info) || \ |
664 | !btrfs_is_subpage(fs_info, folio->mapping)) { \ |
665 | folio_set_func(folio); \ |
666 | return; \ |
667 | } \ |
668 | btrfs_subpage_clamp_range(folio, &start, &len); \ |
669 | btrfs_subpage_set_##name(fs_info, folio, start, len); \ |
670 | } \ |
671 | void btrfs_folio_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \ |
672 | struct folio *folio, u64 start, u32 len) \ |
673 | { \ |
674 | if (unlikely(!fs_info) || \ |
675 | !btrfs_is_subpage(fs_info, folio->mapping)) { \ |
676 | folio_clear_func(folio); \ |
677 | return; \ |
678 | } \ |
679 | btrfs_subpage_clamp_range(folio, &start, &len); \ |
680 | btrfs_subpage_clear_##name(fs_info, folio, start, len); \ |
681 | } \ |
682 | bool btrfs_folio_clamp_test_##name(const struct btrfs_fs_info *fs_info, \ |
683 | struct folio *folio, u64 start, u32 len) \ |
684 | { \ |
685 | if (unlikely(!fs_info) || \ |
686 | !btrfs_is_subpage(fs_info, folio->mapping)) \ |
687 | return folio_test_func(folio); \ |
688 | btrfs_subpage_clamp_range(folio, &start, &len); \ |
689 | return btrfs_subpage_test_##name(fs_info, folio, start, len); \ |
690 | } |
691 | IMPLEMENT_BTRFS_PAGE_OPS(uptodate, folio_mark_uptodate, folio_clear_uptodate, |
692 | folio_test_uptodate); |
693 | IMPLEMENT_BTRFS_PAGE_OPS(dirty, folio_mark_dirty, folio_clear_dirty_for_io, |
694 | folio_test_dirty); |
695 | IMPLEMENT_BTRFS_PAGE_OPS(writeback, folio_start_writeback, folio_end_writeback, |
696 | folio_test_writeback); |
697 | IMPLEMENT_BTRFS_PAGE_OPS(ordered, folio_set_ordered, folio_clear_ordered, |
698 | folio_test_ordered); |
699 | IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked, |
700 | folio_test_checked); |
701 | |
702 | /* |
703 | * Make sure not only the page dirty bit is cleared, but also subpage dirty bit |
704 | * is cleared. |
705 | */ |
706 | void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, struct folio *folio) |
707 | { |
708 | struct btrfs_subpage *subpage = folio_get_private(folio); |
709 | |
710 | if (!IS_ENABLED(CONFIG_BTRFS_ASSERT)) |
711 | return; |
712 | |
713 | ASSERT(!folio_test_dirty(folio)); |
714 | if (!btrfs_is_subpage(fs_info, mapping: folio->mapping)) |
715 | return; |
716 | |
717 | ASSERT(folio_test_private(folio) && folio_get_private(folio)); |
718 | ASSERT(subpage_test_bitmap_all_zero(fs_info, subpage, dirty)); |
719 | } |
720 | |
721 | /* |
722 | * Handle different locked pages with different page sizes: |
723 | * |
724 | * - Page locked by plain lock_page() |
725 | * It should not have any subpage::writers count. |
726 | * Can be unlocked by unlock_page(). |
727 | * This is the most common locked page for __extent_writepage() called |
728 | * inside extent_write_cache_pages(). |
729 | * Rarer cases include the @locked_page from extent_write_locked_range(). |
730 | * |
731 | * - Page locked by lock_delalloc_pages() |
732 | * There is only one caller, all pages except @locked_page for |
733 | * extent_write_locked_range(). |
734 | * In this case, we have to call subpage helper to handle the case. |
735 | */ |
736 | void btrfs_folio_unlock_writer(struct btrfs_fs_info *fs_info, |
737 | struct folio *folio, u64 start, u32 len) |
738 | { |
739 | struct btrfs_subpage *subpage; |
740 | |
741 | ASSERT(folio_test_locked(folio)); |
742 | /* For non-subpage case, we just unlock the page */ |
743 | if (!btrfs_is_subpage(fs_info, mapping: folio->mapping)) { |
744 | folio_unlock(folio); |
745 | return; |
746 | } |
747 | |
748 | ASSERT(folio_test_private(folio) && folio_get_private(folio)); |
749 | subpage = folio_get_private(folio); |
750 | |
751 | /* |
752 | * For subpage case, there are two types of locked page. With or |
753 | * without writers number. |
754 | * |
755 | * Since we own the page lock, no one else could touch subpage::writers |
756 | * and we are safe to do several atomic operations without spinlock. |
757 | */ |
758 | if (atomic_read(v: &subpage->writers) == 0) { |
759 | /* No writers, locked by plain lock_page() */ |
760 | folio_unlock(folio); |
761 | return; |
762 | } |
763 | |
764 | /* Have writers, use proper subpage helper to end it */ |
765 | btrfs_folio_end_writer_lock(fs_info, folio, start, len); |
766 | } |
767 | |
768 | #define GET_SUBPAGE_BITMAP(subpage, subpage_info, name, dst) \ |
769 | bitmap_cut(dst, subpage->bitmaps, 0, \ |
770 | subpage_info->name##_offset, subpage_info->bitmap_nr_bits) |
771 | |
772 | void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info, |
773 | struct folio *folio, u64 start, u32 len) |
774 | { |
775 | struct btrfs_subpage_info *subpage_info = fs_info->subpage_info; |
776 | struct btrfs_subpage *subpage; |
777 | unsigned long uptodate_bitmap; |
778 | unsigned long error_bitmap; |
779 | unsigned long dirty_bitmap; |
780 | unsigned long writeback_bitmap; |
781 | unsigned long ordered_bitmap; |
782 | unsigned long checked_bitmap; |
783 | unsigned long flags; |
784 | |
785 | ASSERT(folio_test_private(folio) && folio_get_private(folio)); |
786 | ASSERT(subpage_info); |
787 | subpage = folio_get_private(folio); |
788 | |
789 | spin_lock_irqsave(&subpage->lock, flags); |
790 | GET_SUBPAGE_BITMAP(subpage, subpage_info, uptodate, &uptodate_bitmap); |
791 | GET_SUBPAGE_BITMAP(subpage, subpage_info, dirty, &dirty_bitmap); |
792 | GET_SUBPAGE_BITMAP(subpage, subpage_info, writeback, &writeback_bitmap); |
793 | GET_SUBPAGE_BITMAP(subpage, subpage_info, ordered, &ordered_bitmap); |
794 | GET_SUBPAGE_BITMAP(subpage, subpage_info, checked, &checked_bitmap); |
795 | GET_SUBPAGE_BITMAP(subpage, subpage_info, locked, &checked_bitmap); |
796 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
797 | |
798 | dump_page(folio_page(folio, 0), reason: "btrfs subpage dump" ); |
799 | btrfs_warn(fs_info, |
800 | "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl error=%*pbl dirty=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl" , |
801 | start, len, folio_pos(folio), |
802 | subpage_info->bitmap_nr_bits, &uptodate_bitmap, |
803 | subpage_info->bitmap_nr_bits, &error_bitmap, |
804 | subpage_info->bitmap_nr_bits, &dirty_bitmap, |
805 | subpage_info->bitmap_nr_bits, &writeback_bitmap, |
806 | subpage_info->bitmap_nr_bits, &ordered_bitmap, |
807 | subpage_info->bitmap_nr_bits, &checked_bitmap); |
808 | } |
809 | |