1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * include/linux/buffer_head.h |
4 | * |
5 | * Everything to do with buffer_heads. |
6 | */ |
7 | |
8 | #ifndef _LINUX_BUFFER_HEAD_H |
9 | #define _LINUX_BUFFER_HEAD_H |
10 | |
11 | #include <linux/types.h> |
12 | #include <linux/blk_types.h> |
13 | #include <linux/fs.h> |
14 | #include <linux/linkage.h> |
15 | #include <linux/pagemap.h> |
16 | #include <linux/wait.h> |
17 | #include <linux/atomic.h> |
18 | |
19 | enum bh_state_bits { |
20 | BH_Uptodate, /* Contains valid data */ |
21 | BH_Dirty, /* Is dirty */ |
22 | BH_Lock, /* Is locked */ |
23 | BH_Req, /* Has been submitted for I/O */ |
24 | |
25 | BH_Mapped, /* Has a disk mapping */ |
26 | BH_New, /* Disk mapping was newly created by get_block */ |
27 | BH_Async_Read, /* Is under end_buffer_async_read I/O */ |
28 | BH_Async_Write, /* Is under end_buffer_async_write I/O */ |
29 | BH_Delay, /* Buffer is not yet allocated on disk */ |
30 | BH_Boundary, /* Block is followed by a discontiguity */ |
31 | BH_Write_EIO, /* I/O error on write */ |
32 | BH_Unwritten, /* Buffer is allocated on disk but not written */ |
33 | BH_Quiet, /* Buffer Error Prinks to be quiet */ |
34 | BH_Meta, /* Buffer contains metadata */ |
35 | BH_Prio, /* Buffer should be submitted with REQ_PRIO */ |
36 | BH_Defer_Completion, /* Defer AIO completion to workqueue */ |
37 | |
38 | BH_PrivateStart,/* not a state bit, but the first bit available |
39 | * for private allocation by other entities |
40 | */ |
41 | }; |
42 | |
43 | #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512) |
44 | |
45 | struct page; |
46 | struct buffer_head; |
47 | struct address_space; |
48 | typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); |
49 | |
50 | /* |
51 | * Historically, a buffer_head was used to map a single block |
52 | * within a page, and of course as the unit of I/O through the |
53 | * filesystem and block layers. Nowadays the basic I/O unit |
54 | * is the bio, and buffer_heads are used for extracting block |
55 | * mappings (via a get_block_t call), for tracking state within |
56 | * a page (via a page_mapping) and for wrapping bio submission |
57 | * for backward compatibility reasons (e.g. submit_bh). |
58 | */ |
59 | struct buffer_head { |
60 | unsigned long b_state; /* buffer state bitmap (see above) */ |
61 | struct buffer_head *b_this_page;/* circular list of page's buffers */ |
62 | union { |
63 | struct page *b_page; /* the page this bh is mapped to */ |
64 | struct folio *b_folio; /* the folio this bh is mapped to */ |
65 | }; |
66 | |
67 | sector_t b_blocknr; /* start block number */ |
68 | size_t b_size; /* size of mapping */ |
69 | char *b_data; /* pointer to data within the page */ |
70 | |
71 | struct block_device *b_bdev; |
72 | bh_end_io_t *b_end_io; /* I/O completion */ |
73 | void *b_private; /* reserved for b_end_io */ |
74 | struct list_head b_assoc_buffers; /* associated with another mapping */ |
75 | struct address_space *b_assoc_map; /* mapping this buffer is |
76 | associated with */ |
77 | atomic_t b_count; /* users using this buffer_head */ |
78 | spinlock_t b_uptodate_lock; /* Used by the first bh in a page, to |
79 | * serialise IO completion of other |
80 | * buffers in the page */ |
81 | }; |
82 | |
83 | /* |
84 | * macro tricks to expand the set_buffer_foo(), clear_buffer_foo() |
85 | * and buffer_foo() functions. |
86 | * To avoid reset buffer flags that are already set, because that causes |
87 | * a costly cache line transition, check the flag first. |
88 | */ |
89 | #define BUFFER_FNS(bit, name) \ |
90 | static __always_inline void set_buffer_##name(struct buffer_head *bh) \ |
91 | { \ |
92 | if (!test_bit(BH_##bit, &(bh)->b_state)) \ |
93 | set_bit(BH_##bit, &(bh)->b_state); \ |
94 | } \ |
95 | static __always_inline void clear_buffer_##name(struct buffer_head *bh) \ |
96 | { \ |
97 | clear_bit(BH_##bit, &(bh)->b_state); \ |
98 | } \ |
99 | static __always_inline int buffer_##name(const struct buffer_head *bh) \ |
100 | { \ |
101 | return test_bit(BH_##bit, &(bh)->b_state); \ |
102 | } |
103 | |
104 | /* |
105 | * test_set_buffer_foo() and test_clear_buffer_foo() |
106 | */ |
107 | #define TAS_BUFFER_FNS(bit, name) \ |
108 | static __always_inline int test_set_buffer_##name(struct buffer_head *bh) \ |
109 | { \ |
110 | return test_and_set_bit(BH_##bit, &(bh)->b_state); \ |
111 | } \ |
112 | static __always_inline int test_clear_buffer_##name(struct buffer_head *bh) \ |
113 | { \ |
114 | return test_and_clear_bit(BH_##bit, &(bh)->b_state); \ |
115 | } \ |
116 | |
117 | /* |
118 | * Emit the buffer bitops functions. Note that there are also functions |
119 | * of the form "mark_buffer_foo()". These are higher-level functions which |
120 | * do something in addition to setting a b_state bit. |
121 | */ |
122 | BUFFER_FNS(Dirty, dirty) |
123 | TAS_BUFFER_FNS(Dirty, dirty) |
124 | BUFFER_FNS(Lock, locked) |
125 | BUFFER_FNS(Req, req) |
126 | TAS_BUFFER_FNS(Req, req) |
127 | BUFFER_FNS(Mapped, mapped) |
128 | BUFFER_FNS(New, new) |
129 | BUFFER_FNS(Async_Read, async_read) |
130 | BUFFER_FNS(Async_Write, async_write) |
131 | BUFFER_FNS(Delay, delay) |
132 | BUFFER_FNS(Boundary, boundary) |
133 | BUFFER_FNS(Write_EIO, write_io_error) |
134 | BUFFER_FNS(Unwritten, unwritten) |
135 | BUFFER_FNS(Meta, meta) |
136 | BUFFER_FNS(Prio, prio) |
137 | BUFFER_FNS(Defer_Completion, defer_completion) |
138 | |
139 | static __always_inline void set_buffer_uptodate(struct buffer_head *bh) |
140 | { |
141 | /* |
142 | * If somebody else already set this uptodate, they will |
143 | * have done the memory barrier, and a reader will thus |
144 | * see *some* valid buffer state. |
145 | * |
146 | * Any other serialization (with IO errors or whatever that |
147 | * might clear the bit) has to come from other state (eg BH_Lock). |
148 | */ |
149 | if (test_bit(BH_Uptodate, &bh->b_state)) |
150 | return; |
151 | |
152 | /* |
153 | * make it consistent with folio_mark_uptodate |
154 | * pairs with smp_load_acquire in buffer_uptodate |
155 | */ |
156 | smp_mb__before_atomic(); |
157 | set_bit(nr: BH_Uptodate, addr: &bh->b_state); |
158 | } |
159 | |
160 | static __always_inline void clear_buffer_uptodate(struct buffer_head *bh) |
161 | { |
162 | clear_bit(nr: BH_Uptodate, addr: &bh->b_state); |
163 | } |
164 | |
165 | static __always_inline int buffer_uptodate(const struct buffer_head *bh) |
166 | { |
167 | /* |
168 | * make it consistent with folio_test_uptodate |
169 | * pairs with smp_mb__before_atomic in set_buffer_uptodate |
170 | */ |
171 | return test_bit_acquire(BH_Uptodate, &bh->b_state); |
172 | } |
173 | |
174 | static inline unsigned long bh_offset(const struct buffer_head *bh) |
175 | { |
176 | return (unsigned long)(bh)->b_data & (page_size(page: bh->b_page) - 1); |
177 | } |
178 | |
179 | /* If we *know* page->private refers to buffer_heads */ |
180 | #define page_buffers(page) \ |
181 | ({ \ |
182 | BUG_ON(!PagePrivate(page)); \ |
183 | ((struct buffer_head *)page_private(page)); \ |
184 | }) |
185 | #define page_has_buffers(page) PagePrivate(page) |
186 | #define folio_buffers(folio) folio_get_private(folio) |
187 | |
188 | void buffer_check_dirty_writeback(struct folio *folio, |
189 | bool *dirty, bool *writeback); |
190 | |
191 | /* |
192 | * Declarations |
193 | */ |
194 | |
195 | void mark_buffer_dirty(struct buffer_head *bh); |
196 | void mark_buffer_write_io_error(struct buffer_head *bh); |
197 | void touch_buffer(struct buffer_head *bh); |
198 | void folio_set_bh(struct buffer_head *bh, struct folio *folio, |
199 | unsigned long offset); |
200 | struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size, |
201 | gfp_t gfp); |
202 | struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, |
203 | bool retry); |
204 | struct buffer_head *create_empty_buffers(struct folio *folio, |
205 | unsigned long blocksize, unsigned long b_state); |
206 | void end_buffer_read_sync(struct buffer_head *bh, int uptodate); |
207 | void end_buffer_write_sync(struct buffer_head *bh, int uptodate); |
208 | |
209 | /* Things to do with buffers at mapping->private_list */ |
210 | void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode); |
211 | int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end, |
212 | bool datasync); |
213 | int generic_buffers_fsync(struct file *file, loff_t start, loff_t end, |
214 | bool datasync); |
215 | void clean_bdev_aliases(struct block_device *bdev, sector_t block, |
216 | sector_t len); |
217 | static inline void clean_bdev_bh_alias(struct buffer_head *bh) |
218 | { |
219 | clean_bdev_aliases(bdev: bh->b_bdev, block: bh->b_blocknr, len: 1); |
220 | } |
221 | |
222 | void mark_buffer_async_write(struct buffer_head *bh); |
223 | void __wait_on_buffer(struct buffer_head *); |
224 | wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); |
225 | struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, |
226 | unsigned size); |
227 | struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, |
228 | unsigned size, gfp_t gfp); |
229 | void __brelse(struct buffer_head *); |
230 | void __bforget(struct buffer_head *); |
231 | void __breadahead(struct block_device *, sector_t block, unsigned int size); |
232 | struct buffer_head *__bread_gfp(struct block_device *, |
233 | sector_t block, unsigned size, gfp_t gfp); |
234 | struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); |
235 | void free_buffer_head(struct buffer_head * bh); |
236 | void unlock_buffer(struct buffer_head *bh); |
237 | void __lock_buffer(struct buffer_head *bh); |
238 | int sync_dirty_buffer(struct buffer_head *bh); |
239 | int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); |
240 | void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); |
241 | void submit_bh(blk_opf_t, struct buffer_head *); |
242 | void write_boundary_block(struct block_device *bdev, |
243 | sector_t bblock, unsigned blocksize); |
244 | int bh_uptodate_or_lock(struct buffer_head *bh); |
245 | int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait); |
246 | void __bh_read_batch(int nr, struct buffer_head *bhs[], |
247 | blk_opf_t op_flags, bool force_lock); |
248 | |
249 | /* |
250 | * Generic address_space_operations implementations for buffer_head-backed |
251 | * address_spaces. |
252 | */ |
253 | void block_invalidate_folio(struct folio *folio, size_t offset, size_t length); |
254 | int block_write_full_folio(struct folio *folio, struct writeback_control *wbc, |
255 | void *get_block); |
256 | int __block_write_full_folio(struct inode *inode, struct folio *folio, |
257 | get_block_t *get_block, struct writeback_control *wbc); |
258 | int block_read_full_folio(struct folio *, get_block_t *); |
259 | bool block_is_partially_uptodate(struct folio *, size_t from, size_t count); |
260 | int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, |
261 | struct page **pagep, get_block_t *get_block); |
262 | int __block_write_begin(struct page *page, loff_t pos, unsigned len, |
263 | get_block_t *get_block); |
264 | int block_write_end(struct file *, struct address_space *, |
265 | loff_t, unsigned, unsigned, |
266 | struct page *, void *); |
267 | int generic_write_end(struct file *, struct address_space *, |
268 | loff_t, unsigned, unsigned, |
269 | struct page *, void *); |
270 | void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to); |
271 | int cont_write_begin(struct file *, struct address_space *, loff_t, |
272 | unsigned, struct page **, void **, |
273 | get_block_t *, loff_t *); |
274 | int generic_cont_expand_simple(struct inode *inode, loff_t size); |
275 | void block_commit_write(struct page *page, unsigned int from, unsigned int to); |
276 | int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, |
277 | get_block_t get_block); |
278 | sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); |
279 | int block_truncate_page(struct address_space *, loff_t, get_block_t *); |
280 | |
281 | #ifdef CONFIG_MIGRATION |
282 | extern int buffer_migrate_folio(struct address_space *, |
283 | struct folio *dst, struct folio *src, enum migrate_mode); |
284 | extern int buffer_migrate_folio_norefs(struct address_space *, |
285 | struct folio *dst, struct folio *src, enum migrate_mode); |
286 | #else |
287 | #define buffer_migrate_folio NULL |
288 | #define buffer_migrate_folio_norefs NULL |
289 | #endif |
290 | |
291 | /* |
292 | * inline definitions |
293 | */ |
294 | |
295 | static inline void get_bh(struct buffer_head *bh) |
296 | { |
297 | atomic_inc(v: &bh->b_count); |
298 | } |
299 | |
300 | static inline void put_bh(struct buffer_head *bh) |
301 | { |
302 | smp_mb__before_atomic(); |
303 | atomic_dec(v: &bh->b_count); |
304 | } |
305 | |
306 | static inline void brelse(struct buffer_head *bh) |
307 | { |
308 | if (bh) |
309 | __brelse(bh); |
310 | } |
311 | |
312 | static inline void bforget(struct buffer_head *bh) |
313 | { |
314 | if (bh) |
315 | __bforget(bh); |
316 | } |
317 | |
318 | static inline struct buffer_head * |
319 | sb_bread(struct super_block *sb, sector_t block) |
320 | { |
321 | return __bread_gfp(sb->s_bdev, block, size: sb->s_blocksize, __GFP_MOVABLE); |
322 | } |
323 | |
324 | static inline struct buffer_head * |
325 | sb_bread_unmovable(struct super_block *sb, sector_t block) |
326 | { |
327 | return __bread_gfp(sb->s_bdev, block, size: sb->s_blocksize, gfp: 0); |
328 | } |
329 | |
330 | static inline void |
331 | sb_breadahead(struct super_block *sb, sector_t block) |
332 | { |
333 | __breadahead(sb->s_bdev, block, size: sb->s_blocksize); |
334 | } |
335 | |
336 | static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, |
337 | sector_t block, unsigned size) |
338 | { |
339 | gfp_t gfp; |
340 | |
341 | gfp = mapping_gfp_constraint(mapping: bdev->bd_inode->i_mapping, gfp_mask: ~__GFP_FS); |
342 | gfp |= __GFP_NOFAIL; |
343 | |
344 | return bdev_getblk(bdev, block, size, gfp); |
345 | } |
346 | |
347 | static inline struct buffer_head *__getblk(struct block_device *bdev, |
348 | sector_t block, unsigned size) |
349 | { |
350 | gfp_t gfp; |
351 | |
352 | gfp = mapping_gfp_constraint(mapping: bdev->bd_inode->i_mapping, gfp_mask: ~__GFP_FS); |
353 | gfp |= __GFP_MOVABLE | __GFP_NOFAIL; |
354 | |
355 | return bdev_getblk(bdev, block, size, gfp); |
356 | } |
357 | |
358 | static inline struct buffer_head *sb_getblk(struct super_block *sb, |
359 | sector_t block) |
360 | { |
361 | return __getblk(bdev: sb->s_bdev, block, size: sb->s_blocksize); |
362 | } |
363 | |
364 | static inline struct buffer_head *sb_getblk_gfp(struct super_block *sb, |
365 | sector_t block, gfp_t gfp) |
366 | { |
367 | return bdev_getblk(bdev: sb->s_bdev, block, size: sb->s_blocksize, gfp); |
368 | } |
369 | |
370 | static inline struct buffer_head * |
371 | sb_find_get_block(struct super_block *sb, sector_t block) |
372 | { |
373 | return __find_get_block(bdev: sb->s_bdev, block, size: sb->s_blocksize); |
374 | } |
375 | |
376 | static inline void |
377 | map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block) |
378 | { |
379 | set_buffer_mapped(bh); |
380 | bh->b_bdev = sb->s_bdev; |
381 | bh->b_blocknr = block; |
382 | bh->b_size = sb->s_blocksize; |
383 | } |
384 | |
385 | static inline void wait_on_buffer(struct buffer_head *bh) |
386 | { |
387 | might_sleep(); |
388 | if (buffer_locked(bh)) |
389 | __wait_on_buffer(bh); |
390 | } |
391 | |
392 | static inline int trylock_buffer(struct buffer_head *bh) |
393 | { |
394 | return likely(!test_and_set_bit_lock(BH_Lock, &bh->b_state)); |
395 | } |
396 | |
397 | static inline void lock_buffer(struct buffer_head *bh) |
398 | { |
399 | might_sleep(); |
400 | if (!trylock_buffer(bh)) |
401 | __lock_buffer(bh); |
402 | } |
403 | |
404 | static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags) |
405 | { |
406 | if (!buffer_uptodate(bh) && trylock_buffer(bh)) { |
407 | if (!buffer_uptodate(bh)) |
408 | __bh_read(bh, op_flags, wait: false); |
409 | else |
410 | unlock_buffer(bh); |
411 | } |
412 | } |
413 | |
414 | static inline void bh_read_nowait(struct buffer_head *bh, blk_opf_t op_flags) |
415 | { |
416 | if (!bh_uptodate_or_lock(bh)) |
417 | __bh_read(bh, op_flags, wait: false); |
418 | } |
419 | |
420 | /* Returns 1 if buffer uptodated, 0 on success, and -EIO on error. */ |
421 | static inline int bh_read(struct buffer_head *bh, blk_opf_t op_flags) |
422 | { |
423 | if (bh_uptodate_or_lock(bh)) |
424 | return 1; |
425 | return __bh_read(bh, op_flags, wait: true); |
426 | } |
427 | |
428 | static inline void bh_read_batch(int nr, struct buffer_head *bhs[]) |
429 | { |
430 | __bh_read_batch(nr, bhs, op_flags: 0, force_lock: true); |
431 | } |
432 | |
433 | static inline void bh_readahead_batch(int nr, struct buffer_head *bhs[], |
434 | blk_opf_t op_flags) |
435 | { |
436 | __bh_read_batch(nr, bhs, op_flags, force_lock: false); |
437 | } |
438 | |
439 | /** |
440 | * __bread() - reads a specified block and returns the bh |
441 | * @bdev: the block_device to read from |
442 | * @block: number of block |
443 | * @size: size (in bytes) to read |
444 | * |
445 | * Reads a specified block, and returns buffer head that contains it. |
446 | * The page cache is allocated from movable area so that it can be migrated. |
447 | * It returns NULL if the block was unreadable. |
448 | */ |
449 | static inline struct buffer_head * |
450 | __bread(struct block_device *bdev, sector_t block, unsigned size) |
451 | { |
452 | return __bread_gfp(bdev, block, size, __GFP_MOVABLE); |
453 | } |
454 | |
455 | /** |
456 | * get_nth_bh - Get a reference on the n'th buffer after this one. |
457 | * @bh: The buffer to start counting from. |
458 | * @count: How many buffers to skip. |
459 | * |
460 | * This is primarily useful for finding the nth buffer in a folio; in |
461 | * that case you pass the head buffer and the byte offset in the folio |
462 | * divided by the block size. It can be used for other purposes, but |
463 | * it will wrap at the end of the folio rather than returning NULL or |
464 | * proceeding to the next folio for you. |
465 | * |
466 | * Return: The requested buffer with an elevated refcount. |
467 | */ |
468 | static inline __must_check |
469 | struct buffer_head *get_nth_bh(struct buffer_head *bh, unsigned int count) |
470 | { |
471 | while (count--) |
472 | bh = bh->b_this_page; |
473 | get_bh(bh); |
474 | return bh; |
475 | } |
476 | |
477 | bool block_dirty_folio(struct address_space *mapping, struct folio *folio); |
478 | |
479 | #ifdef CONFIG_BUFFER_HEAD |
480 | |
481 | void buffer_init(void); |
482 | bool try_to_free_buffers(struct folio *folio); |
483 | int inode_has_buffers(struct inode *inode); |
484 | void invalidate_inode_buffers(struct inode *inode); |
485 | int remove_inode_buffers(struct inode *inode); |
486 | int sync_mapping_buffers(struct address_space *mapping); |
487 | void invalidate_bh_lrus(void); |
488 | void invalidate_bh_lrus_cpu(void); |
489 | bool has_bh_in_lru(int cpu, void *dummy); |
490 | extern int buffer_heads_over_limit; |
491 | |
492 | #else /* CONFIG_BUFFER_HEAD */ |
493 | |
494 | static inline void buffer_init(void) {} |
495 | static inline bool try_to_free_buffers(struct folio *folio) { return true; } |
496 | static inline int inode_has_buffers(struct inode *inode) { return 0; } |
497 | static inline void invalidate_inode_buffers(struct inode *inode) {} |
498 | static inline int remove_inode_buffers(struct inode *inode) { return 1; } |
499 | static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } |
500 | static inline void invalidate_bh_lrus(void) {} |
501 | static inline void invalidate_bh_lrus_cpu(void) {} |
502 | static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; } |
503 | #define buffer_heads_over_limit 0 |
504 | |
505 | #endif /* CONFIG_BUFFER_HEAD */ |
506 | #endif /* _LINUX_BUFFER_HEAD_H */ |
507 | |