1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * aops.c - NTFS kernel address space operations and page cache handling. |
4 | * |
5 | * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. |
6 | * Copyright (c) 2002 Richard Russon |
7 | */ |
8 | |
9 | #include <linux/errno.h> |
10 | #include <linux/fs.h> |
11 | #include <linux/gfp.h> |
12 | #include <linux/mm.h> |
13 | #include <linux/pagemap.h> |
14 | #include <linux/swap.h> |
15 | #include <linux/buffer_head.h> |
16 | #include <linux/writeback.h> |
17 | #include <linux/bit_spinlock.h> |
18 | #include <linux/bio.h> |
19 | |
20 | #include "aops.h" |
21 | #include "attrib.h" |
22 | #include "debug.h" |
23 | #include "inode.h" |
24 | #include "mft.h" |
25 | #include "runlist.h" |
26 | #include "types.h" |
27 | #include "ntfs.h" |
28 | |
29 | /** |
30 | * ntfs_end_buffer_async_read - async io completion for reading attributes |
31 | * @bh: buffer head on which io is completed |
32 | * @uptodate: whether @bh is now uptodate or not |
33 | * |
34 | * Asynchronous I/O completion handler for reading pages belonging to the |
35 | * attribute address space of an inode. The inodes can either be files or |
36 | * directories or they can be fake inodes describing some attribute. |
37 | * |
38 | * If NInoMstProtected(), perform the post read mst fixups when all IO on the |
39 | * page has been completed and mark the page uptodate or set the error bit on |
40 | * the page. To determine the size of the records that need fixing up, we |
41 | * cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs |
42 | * record size, and index_block_size_bits, to the log(base 2) of the ntfs |
43 | * record size. |
44 | */ |
45 | static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) |
46 | { |
47 | unsigned long flags; |
48 | struct buffer_head *first, *tmp; |
49 | struct page *page; |
50 | struct inode *vi; |
51 | ntfs_inode *ni; |
52 | int page_uptodate = 1; |
53 | |
54 | page = bh->b_page; |
55 | vi = page->mapping->host; |
56 | ni = NTFS_I(inode: vi); |
57 | |
58 | if (likely(uptodate)) { |
59 | loff_t i_size; |
60 | s64 file_ofs, init_size; |
61 | |
62 | set_buffer_uptodate(bh); |
63 | |
64 | file_ofs = ((s64)page->index << PAGE_SHIFT) + |
65 | bh_offset(bh); |
66 | read_lock_irqsave(&ni->size_lock, flags); |
67 | init_size = ni->initialized_size; |
68 | i_size = i_size_read(inode: vi); |
69 | read_unlock_irqrestore(&ni->size_lock, flags); |
70 | if (unlikely(init_size > i_size)) { |
71 | /* Race with shrinking truncate. */ |
72 | init_size = i_size; |
73 | } |
74 | /* Check for the current buffer head overflowing. */ |
75 | if (unlikely(file_ofs + bh->b_size > init_size)) { |
76 | int ofs; |
77 | void *kaddr; |
78 | |
79 | ofs = 0; |
80 | if (file_ofs < init_size) |
81 | ofs = init_size - file_ofs; |
82 | kaddr = kmap_atomic(page); |
83 | memset(kaddr + bh_offset(bh) + ofs, 0, |
84 | bh->b_size - ofs); |
85 | flush_dcache_page(page); |
86 | kunmap_atomic(kaddr); |
87 | } |
88 | } else { |
89 | clear_buffer_uptodate(bh); |
90 | SetPageError(page); |
91 | ntfs_error(ni->vol->sb, "Buffer I/O error, logical block " |
92 | "0x%llx." , (unsigned long long)bh->b_blocknr); |
93 | } |
94 | first = page_buffers(page); |
95 | spin_lock_irqsave(&first->b_uptodate_lock, flags); |
96 | clear_buffer_async_read(bh); |
97 | unlock_buffer(bh); |
98 | tmp = bh; |
99 | do { |
100 | if (!buffer_uptodate(bh: tmp)) |
101 | page_uptodate = 0; |
102 | if (buffer_async_read(bh: tmp)) { |
103 | if (likely(buffer_locked(tmp))) |
104 | goto still_busy; |
105 | /* Async buffers must be locked. */ |
106 | BUG(); |
107 | } |
108 | tmp = tmp->b_this_page; |
109 | } while (tmp != bh); |
110 | spin_unlock_irqrestore(lock: &first->b_uptodate_lock, flags); |
111 | /* |
112 | * If none of the buffers had errors then we can set the page uptodate, |
113 | * but we first have to perform the post read mst fixups, if the |
114 | * attribute is mst protected, i.e. if NInoMstProteced(ni) is true. |
115 | * Note we ignore fixup errors as those are detected when |
116 | * map_mft_record() is called which gives us per record granularity |
117 | * rather than per page granularity. |
118 | */ |
119 | if (!NInoMstProtected(ni)) { |
120 | if (likely(page_uptodate && !PageError(page))) |
121 | SetPageUptodate(page); |
122 | } else { |
123 | u8 *kaddr; |
124 | unsigned int i, recs; |
125 | u32 rec_size; |
126 | |
127 | rec_size = ni->itype.index.block_size; |
128 | recs = PAGE_SIZE / rec_size; |
129 | /* Should have been verified before we got here... */ |
130 | BUG_ON(!recs); |
131 | kaddr = kmap_atomic(page); |
132 | for (i = 0; i < recs; i++) |
133 | post_read_mst_fixup(b: (NTFS_RECORD*)(kaddr + |
134 | i * rec_size), size: rec_size); |
135 | kunmap_atomic(kaddr); |
136 | flush_dcache_page(page); |
137 | if (likely(page_uptodate && !PageError(page))) |
138 | SetPageUptodate(page); |
139 | } |
140 | unlock_page(page); |
141 | return; |
142 | still_busy: |
143 | spin_unlock_irqrestore(lock: &first->b_uptodate_lock, flags); |
144 | return; |
145 | } |
146 | |
147 | /** |
148 | * ntfs_read_block - fill a @folio of an address space with data |
149 | * @folio: page cache folio to fill with data |
150 | * |
151 | * We read each buffer asynchronously and when all buffers are read in, our io |
152 | * completion handler ntfs_end_buffer_read_async(), if required, automatically |
153 | * applies the mst fixups to the folio before finally marking it uptodate and |
154 | * unlocking it. |
155 | * |
156 | * We only enforce allocated_size limit because i_size is checked for in |
157 | * generic_file_read(). |
158 | * |
159 | * Return 0 on success and -errno on error. |
160 | * |
161 | * Contains an adapted version of fs/buffer.c::block_read_full_folio(). |
162 | */ |
163 | static int ntfs_read_block(struct folio *folio) |
164 | { |
165 | loff_t i_size; |
166 | VCN vcn; |
167 | LCN lcn; |
168 | s64 init_size; |
169 | struct inode *vi; |
170 | ntfs_inode *ni; |
171 | ntfs_volume *vol; |
172 | runlist_element *rl; |
173 | struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; |
174 | sector_t iblock, lblock, zblock; |
175 | unsigned long flags; |
176 | unsigned int blocksize, vcn_ofs; |
177 | int i, nr; |
178 | unsigned char blocksize_bits; |
179 | |
180 | vi = folio->mapping->host; |
181 | ni = NTFS_I(inode: vi); |
182 | vol = ni->vol; |
183 | |
184 | /* $MFT/$DATA must have its complete runlist in memory at all times. */ |
185 | BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni)); |
186 | |
187 | blocksize = vol->sb->s_blocksize; |
188 | blocksize_bits = vol->sb->s_blocksize_bits; |
189 | |
190 | head = folio_buffers(folio); |
191 | if (!head) |
192 | head = create_empty_buffers(folio, blocksize, b_state: 0); |
193 | bh = head; |
194 | |
195 | /* |
196 | * We may be racing with truncate. To avoid some of the problems we |
197 | * now take a snapshot of the various sizes and use those for the whole |
198 | * of the function. In case of an extending truncate it just means we |
199 | * may leave some buffers unmapped which are now allocated. This is |
200 | * not a problem since these buffers will just get mapped when a write |
201 | * occurs. In case of a shrinking truncate, we will detect this later |
202 | * on due to the runlist being incomplete and if the folio is being |
203 | * fully truncated, truncate will throw it away as soon as we unlock |
204 | * it so no need to worry what we do with it. |
205 | */ |
206 | iblock = (s64)folio->index << (PAGE_SHIFT - blocksize_bits); |
207 | read_lock_irqsave(&ni->size_lock, flags); |
208 | lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits; |
209 | init_size = ni->initialized_size; |
210 | i_size = i_size_read(inode: vi); |
211 | read_unlock_irqrestore(&ni->size_lock, flags); |
212 | if (unlikely(init_size > i_size)) { |
213 | /* Race with shrinking truncate. */ |
214 | init_size = i_size; |
215 | } |
216 | zblock = (init_size + blocksize - 1) >> blocksize_bits; |
217 | |
218 | /* Loop through all the buffers in the folio. */ |
219 | rl = NULL; |
220 | nr = i = 0; |
221 | do { |
222 | int err = 0; |
223 | |
224 | if (unlikely(buffer_uptodate(bh))) |
225 | continue; |
226 | if (unlikely(buffer_mapped(bh))) { |
227 | arr[nr++] = bh; |
228 | continue; |
229 | } |
230 | bh->b_bdev = vol->sb->s_bdev; |
231 | /* Is the block within the allowed limits? */ |
232 | if (iblock < lblock) { |
233 | bool is_retry = false; |
234 | |
235 | /* Convert iblock into corresponding vcn and offset. */ |
236 | vcn = (VCN)iblock << blocksize_bits >> |
237 | vol->cluster_size_bits; |
238 | vcn_ofs = ((VCN)iblock << blocksize_bits) & |
239 | vol->cluster_size_mask; |
240 | if (!rl) { |
241 | lock_retry_remap: |
242 | down_read(sem: &ni->runlist.lock); |
243 | rl = ni->runlist.rl; |
244 | } |
245 | if (likely(rl != NULL)) { |
246 | /* Seek to element containing target vcn. */ |
247 | while (rl->length && rl[1].vcn <= vcn) |
248 | rl++; |
249 | lcn = ntfs_rl_vcn_to_lcn(rl, vcn); |
250 | } else |
251 | lcn = LCN_RL_NOT_MAPPED; |
252 | /* Successful remap. */ |
253 | if (lcn >= 0) { |
254 | /* Setup buffer head to correct block. */ |
255 | bh->b_blocknr = ((lcn << vol->cluster_size_bits) |
256 | + vcn_ofs) >> blocksize_bits; |
257 | set_buffer_mapped(bh); |
258 | /* Only read initialized data blocks. */ |
259 | if (iblock < zblock) { |
260 | arr[nr++] = bh; |
261 | continue; |
262 | } |
263 | /* Fully non-initialized data block, zero it. */ |
264 | goto handle_zblock; |
265 | } |
266 | /* It is a hole, need to zero it. */ |
267 | if (lcn == LCN_HOLE) |
268 | goto handle_hole; |
269 | /* If first try and runlist unmapped, map and retry. */ |
270 | if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { |
271 | is_retry = true; |
272 | /* |
273 | * Attempt to map runlist, dropping lock for |
274 | * the duration. |
275 | */ |
276 | up_read(sem: &ni->runlist.lock); |
277 | err = ntfs_map_runlist(ni, vcn); |
278 | if (likely(!err)) |
279 | goto lock_retry_remap; |
280 | rl = NULL; |
281 | } else if (!rl) |
282 | up_read(sem: &ni->runlist.lock); |
283 | /* |
284 | * If buffer is outside the runlist, treat it as a |
285 | * hole. This can happen due to concurrent truncate |
286 | * for example. |
287 | */ |
288 | if (err == -ENOENT || lcn == LCN_ENOENT) { |
289 | err = 0; |
290 | goto handle_hole; |
291 | } |
292 | /* Hard error, zero out region. */ |
293 | if (!err) |
294 | err = -EIO; |
295 | bh->b_blocknr = -1; |
296 | folio_set_error(folio); |
297 | ntfs_error(vol->sb, "Failed to read from inode 0x%lx, " |
298 | "attribute type 0x%x, vcn 0x%llx, " |
299 | "offset 0x%x because its location on " |
300 | "disk could not be determined%s " |
301 | "(error code %i)." , ni->mft_no, |
302 | ni->type, (unsigned long long)vcn, |
303 | vcn_ofs, is_retry ? " even after " |
304 | "retrying" : "" , err); |
305 | } |
306 | /* |
307 | * Either iblock was outside lblock limits or |
308 | * ntfs_rl_vcn_to_lcn() returned error. Just zero that portion |
309 | * of the folio and set the buffer uptodate. |
310 | */ |
311 | handle_hole: |
312 | bh->b_blocknr = -1UL; |
313 | clear_buffer_mapped(bh); |
314 | handle_zblock: |
315 | folio_zero_range(folio, start: i * blocksize, length: blocksize); |
316 | if (likely(!err)) |
317 | set_buffer_uptodate(bh); |
318 | } while (i++, iblock++, (bh = bh->b_this_page) != head); |
319 | |
320 | /* Release the lock if we took it. */ |
321 | if (rl) |
322 | up_read(sem: &ni->runlist.lock); |
323 | |
324 | /* Check we have at least one buffer ready for i/o. */ |
325 | if (nr) { |
326 | struct buffer_head *tbh; |
327 | |
328 | /* Lock the buffers. */ |
329 | for (i = 0; i < nr; i++) { |
330 | tbh = arr[i]; |
331 | lock_buffer(bh: tbh); |
332 | tbh->b_end_io = ntfs_end_buffer_async_read; |
333 | set_buffer_async_read(tbh); |
334 | } |
335 | /* Finally, start i/o on the buffers. */ |
336 | for (i = 0; i < nr; i++) { |
337 | tbh = arr[i]; |
338 | if (likely(!buffer_uptodate(tbh))) |
339 | submit_bh(REQ_OP_READ, tbh); |
340 | else |
341 | ntfs_end_buffer_async_read(bh: tbh, uptodate: 1); |
342 | } |
343 | return 0; |
344 | } |
345 | /* No i/o was scheduled on any of the buffers. */ |
346 | if (likely(!folio_test_error(folio))) |
347 | folio_mark_uptodate(folio); |
348 | else /* Signal synchronous i/o error. */ |
349 | nr = -EIO; |
350 | folio_unlock(folio); |
351 | return nr; |
352 | } |
353 | |
354 | /** |
355 | * ntfs_read_folio - fill a @folio of a @file with data from the device |
356 | * @file: open file to which the folio @folio belongs or NULL |
357 | * @folio: page cache folio to fill with data |
358 | * |
359 | * For non-resident attributes, ntfs_read_folio() fills the @folio of the open |
360 | * file @file by calling the ntfs version of the generic block_read_full_folio() |
361 | * function, ntfs_read_block(), which in turn creates and reads in the buffers |
362 | * associated with the folio asynchronously. |
363 | * |
364 | * For resident attributes, OTOH, ntfs_read_folio() fills @folio by copying the |
365 | * data from the mft record (which at this stage is most likely in memory) and |
366 | * fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as |
367 | * even if the mft record is not cached at this point in time, we need to wait |
368 | * for it to be read in before we can do the copy. |
369 | * |
370 | * Return 0 on success and -errno on error. |
371 | */ |
372 | static int ntfs_read_folio(struct file *file, struct folio *folio) |
373 | { |
374 | struct page *page = &folio->page; |
375 | loff_t i_size; |
376 | struct inode *vi; |
377 | ntfs_inode *ni, *base_ni; |
378 | u8 *addr; |
379 | ntfs_attr_search_ctx *ctx; |
380 | MFT_RECORD *mrec; |
381 | unsigned long flags; |
382 | u32 attr_len; |
383 | int err = 0; |
384 | |
385 | retry_readpage: |
386 | BUG_ON(!PageLocked(page)); |
387 | vi = page->mapping->host; |
388 | i_size = i_size_read(inode: vi); |
389 | /* Is the page fully outside i_size? (truncate in progress) */ |
390 | if (unlikely(page->index >= (i_size + PAGE_SIZE - 1) >> |
391 | PAGE_SHIFT)) { |
392 | zero_user(page, start: 0, PAGE_SIZE); |
393 | ntfs_debug("Read outside i_size - truncated?" ); |
394 | goto done; |
395 | } |
396 | /* |
397 | * This can potentially happen because we clear PageUptodate() during |
398 | * ntfs_writepage() of MstProtected() attributes. |
399 | */ |
400 | if (PageUptodate(page)) { |
401 | unlock_page(page); |
402 | return 0; |
403 | } |
404 | ni = NTFS_I(inode: vi); |
405 | /* |
406 | * Only $DATA attributes can be encrypted and only unnamed $DATA |
407 | * attributes can be compressed. Index root can have the flags set but |
408 | * this means to create compressed/encrypted files, not that the |
409 | * attribute is compressed/encrypted. Note we need to check for |
410 | * AT_INDEX_ALLOCATION since this is the type of both directory and |
411 | * index inodes. |
412 | */ |
413 | if (ni->type != AT_INDEX_ALLOCATION) { |
414 | /* If attribute is encrypted, deny access, just like NT4. */ |
415 | if (NInoEncrypted(ni)) { |
416 | BUG_ON(ni->type != AT_DATA); |
417 | err = -EACCES; |
418 | goto err_out; |
419 | } |
420 | /* Compressed data streams are handled in compress.c. */ |
421 | if (NInoNonResident(ni) && NInoCompressed(ni)) { |
422 | BUG_ON(ni->type != AT_DATA); |
423 | BUG_ON(ni->name_len); |
424 | return ntfs_read_compressed_block(page); |
425 | } |
426 | } |
427 | /* NInoNonResident() == NInoIndexAllocPresent() */ |
428 | if (NInoNonResident(ni)) { |
429 | /* Normal, non-resident data stream. */ |
430 | return ntfs_read_block(folio); |
431 | } |
432 | /* |
433 | * Attribute is resident, implying it is not compressed or encrypted. |
434 | * This also means the attribute is smaller than an mft record and |
435 | * hence smaller than a page, so can simply zero out any pages with |
436 | * index above 0. Note the attribute can actually be marked compressed |
437 | * but if it is resident the actual data is not compressed so we are |
438 | * ok to ignore the compressed flag here. |
439 | */ |
440 | if (unlikely(page->index > 0)) { |
441 | zero_user(page, start: 0, PAGE_SIZE); |
442 | goto done; |
443 | } |
444 | if (!NInoAttr(ni)) |
445 | base_ni = ni; |
446 | else |
447 | base_ni = ni->ext.base_ntfs_ino; |
448 | /* Map, pin, and lock the mft record. */ |
449 | mrec = map_mft_record(ni: base_ni); |
450 | if (IS_ERR(ptr: mrec)) { |
451 | err = PTR_ERR(ptr: mrec); |
452 | goto err_out; |
453 | } |
454 | /* |
455 | * If a parallel write made the attribute non-resident, drop the mft |
456 | * record and retry the read_folio. |
457 | */ |
458 | if (unlikely(NInoNonResident(ni))) { |
459 | unmap_mft_record(ni: base_ni); |
460 | goto retry_readpage; |
461 | } |
462 | ctx = ntfs_attr_get_search_ctx(ni: base_ni, mrec); |
463 | if (unlikely(!ctx)) { |
464 | err = -ENOMEM; |
465 | goto unm_err_out; |
466 | } |
467 | err = ntfs_attr_lookup(type: ni->type, name: ni->name, name_len: ni->name_len, |
468 | ic: CASE_SENSITIVE, lowest_vcn: 0, NULL, val_len: 0, ctx); |
469 | if (unlikely(err)) |
470 | goto put_unm_err_out; |
471 | attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); |
472 | read_lock_irqsave(&ni->size_lock, flags); |
473 | if (unlikely(attr_len > ni->initialized_size)) |
474 | attr_len = ni->initialized_size; |
475 | i_size = i_size_read(inode: vi); |
476 | read_unlock_irqrestore(&ni->size_lock, flags); |
477 | if (unlikely(attr_len > i_size)) { |
478 | /* Race with shrinking truncate. */ |
479 | attr_len = i_size; |
480 | } |
481 | addr = kmap_atomic(page); |
482 | /* Copy the data to the page. */ |
483 | memcpy(addr, (u8*)ctx->attr + |
484 | le16_to_cpu(ctx->attr->data.resident.value_offset), |
485 | attr_len); |
486 | /* Zero the remainder of the page. */ |
487 | memset(addr + attr_len, 0, PAGE_SIZE - attr_len); |
488 | flush_dcache_page(page); |
489 | kunmap_atomic(addr); |
490 | put_unm_err_out: |
491 | ntfs_attr_put_search_ctx(ctx); |
492 | unm_err_out: |
493 | unmap_mft_record(ni: base_ni); |
494 | done: |
495 | SetPageUptodate(page); |
496 | err_out: |
497 | unlock_page(page); |
498 | return err; |
499 | } |
500 | |
501 | #ifdef NTFS_RW |
502 | |
503 | /** |
504 | * ntfs_write_block - write a @folio to the backing store |
505 | * @folio: page cache folio to write out |
506 | * @wbc: writeback control structure |
507 | * |
508 | * This function is for writing folios belonging to non-resident, non-mst |
509 | * protected attributes to their backing store. |
510 | * |
511 | * For a folio with buffers, map and write the dirty buffers asynchronously |
512 | * under folio writeback. For a folio without buffers, create buffers for the |
513 | * folio, then proceed as above. |
514 | * |
515 | * If a folio doesn't have buffers the folio dirty state is definitive. If |
516 | * a folio does have buffers, the folio dirty state is just a hint, |
517 | * and the buffer dirty state is definitive. (A hint which has rules: |
518 | * dirty buffers against a clean folio is illegal. Other combinations are |
519 | * legal and need to be handled. In particular a dirty folio containing |
520 | * clean buffers for example.) |
521 | * |
522 | * Return 0 on success and -errno on error. |
523 | * |
524 | * Based on ntfs_read_block() and __block_write_full_folio(). |
525 | */ |
526 | static int ntfs_write_block(struct folio *folio, struct writeback_control *wbc) |
527 | { |
528 | VCN vcn; |
529 | LCN lcn; |
530 | s64 initialized_size; |
531 | loff_t i_size; |
532 | sector_t block, dblock, iblock; |
533 | struct inode *vi; |
534 | ntfs_inode *ni; |
535 | ntfs_volume *vol; |
536 | runlist_element *rl; |
537 | struct buffer_head *bh, *head; |
538 | unsigned long flags; |
539 | unsigned int blocksize, vcn_ofs; |
540 | int err; |
541 | bool need_end_writeback; |
542 | unsigned char blocksize_bits; |
543 | |
544 | vi = folio->mapping->host; |
545 | ni = NTFS_I(inode: vi); |
546 | vol = ni->vol; |
547 | |
548 | ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " |
549 | "0x%lx." , ni->mft_no, ni->type, folio->index); |
550 | |
551 | BUG_ON(!NInoNonResident(ni)); |
552 | BUG_ON(NInoMstProtected(ni)); |
553 | blocksize = vol->sb->s_blocksize; |
554 | blocksize_bits = vol->sb->s_blocksize_bits; |
555 | head = folio_buffers(folio); |
556 | if (!head) { |
557 | BUG_ON(!folio_test_uptodate(folio)); |
558 | head = create_empty_buffers(folio, blocksize, |
559 | b_state: (1 << BH_Uptodate) | (1 << BH_Dirty)); |
560 | } |
561 | bh = head; |
562 | |
563 | /* NOTE: Different naming scheme to ntfs_read_block()! */ |
564 | |
565 | /* The first block in the folio. */ |
566 | block = (s64)folio->index << (PAGE_SHIFT - blocksize_bits); |
567 | |
568 | read_lock_irqsave(&ni->size_lock, flags); |
569 | i_size = i_size_read(inode: vi); |
570 | initialized_size = ni->initialized_size; |
571 | read_unlock_irqrestore(&ni->size_lock, flags); |
572 | |
573 | /* The first out of bounds block for the data size. */ |
574 | dblock = (i_size + blocksize - 1) >> blocksize_bits; |
575 | |
576 | /* The last (fully or partially) initialized block. */ |
577 | iblock = initialized_size >> blocksize_bits; |
578 | |
579 | /* |
580 | * Be very careful. We have no exclusion from block_dirty_folio |
581 | * here, and the (potentially unmapped) buffers may become dirty at |
582 | * any time. If a buffer becomes dirty here after we've inspected it |
583 | * then we just miss that fact, and the folio stays dirty. |
584 | * |
585 | * Buffers outside i_size may be dirtied by block_dirty_folio; |
586 | * handle that here by just cleaning them. |
587 | */ |
588 | |
589 | /* |
590 | * Loop through all the buffers in the folio, mapping all the dirty |
591 | * buffers to disk addresses and handling any aliases from the |
592 | * underlying block device's mapping. |
593 | */ |
594 | rl = NULL; |
595 | err = 0; |
596 | do { |
597 | bool is_retry = false; |
598 | |
599 | if (unlikely(block >= dblock)) { |
600 | /* |
601 | * Mapped buffers outside i_size will occur, because |
602 | * this folio can be outside i_size when there is a |
603 | * truncate in progress. The contents of such buffers |
604 | * were zeroed by ntfs_writepage(). |
605 | * |
606 | * FIXME: What about the small race window where |
607 | * ntfs_writepage() has not done any clearing because |
608 | * the folio was within i_size but before we get here, |
609 | * vmtruncate() modifies i_size? |
610 | */ |
611 | clear_buffer_dirty(bh); |
612 | set_buffer_uptodate(bh); |
613 | continue; |
614 | } |
615 | |
616 | /* Clean buffers are not written out, so no need to map them. */ |
617 | if (!buffer_dirty(bh)) |
618 | continue; |
619 | |
620 | /* Make sure we have enough initialized size. */ |
621 | if (unlikely((block >= iblock) && |
622 | (initialized_size < i_size))) { |
623 | /* |
624 | * If this folio is fully outside initialized |
625 | * size, zero out all folios between the current |
626 | * initialized size and the current folio. Just |
627 | * use ntfs_read_folio() to do the zeroing |
628 | * transparently. |
629 | */ |
630 | if (block > iblock) { |
631 | // TODO: |
632 | // For each folio do: |
633 | // - read_cache_folio() |
634 | // Again for each folio do: |
635 | // - wait_on_folio_locked() |
636 | // - Check (folio_test_uptodate(folio) && |
637 | // !folio_test_error(folio)) |
638 | // Update initialized size in the attribute and |
639 | // in the inode. |
640 | // Again, for each folio do: |
641 | // block_dirty_folio(); |
642 | // folio_put() |
643 | // We don't need to wait on the writes. |
644 | // Update iblock. |
645 | } |
646 | /* |
647 | * The current folio straddles initialized size. Zero |
648 | * all non-uptodate buffers and set them uptodate (and |
649 | * dirty?). Note, there aren't any non-uptodate buffers |
650 | * if the folio is uptodate. |
651 | * FIXME: For an uptodate folio, the buffers may need to |
652 | * be written out because they were not initialized on |
653 | * disk before. |
654 | */ |
655 | if (!folio_test_uptodate(folio)) { |
656 | // TODO: |
657 | // Zero any non-uptodate buffers up to i_size. |
658 | // Set them uptodate and dirty. |
659 | } |
660 | // TODO: |
661 | // Update initialized size in the attribute and in the |
662 | // inode (up to i_size). |
663 | // Update iblock. |
664 | // FIXME: This is inefficient. Try to batch the two |
665 | // size changes to happen in one go. |
666 | ntfs_error(vol->sb, "Writing beyond initialized size " |
667 | "is not supported yet. Sorry." ); |
668 | err = -EOPNOTSUPP; |
669 | break; |
670 | // Do NOT set_buffer_new() BUT DO clear buffer range |
671 | // outside write request range. |
672 | // set_buffer_uptodate() on complete buffers as well as |
673 | // set_buffer_dirty(). |
674 | } |
675 | |
676 | /* No need to map buffers that are already mapped. */ |
677 | if (buffer_mapped(bh)) |
678 | continue; |
679 | |
680 | /* Unmapped, dirty buffer. Need to map it. */ |
681 | bh->b_bdev = vol->sb->s_bdev; |
682 | |
683 | /* Convert block into corresponding vcn and offset. */ |
684 | vcn = (VCN)block << blocksize_bits; |
685 | vcn_ofs = vcn & vol->cluster_size_mask; |
686 | vcn >>= vol->cluster_size_bits; |
687 | if (!rl) { |
688 | lock_retry_remap: |
689 | down_read(sem: &ni->runlist.lock); |
690 | rl = ni->runlist.rl; |
691 | } |
692 | if (likely(rl != NULL)) { |
693 | /* Seek to element containing target vcn. */ |
694 | while (rl->length && rl[1].vcn <= vcn) |
695 | rl++; |
696 | lcn = ntfs_rl_vcn_to_lcn(rl, vcn); |
697 | } else |
698 | lcn = LCN_RL_NOT_MAPPED; |
699 | /* Successful remap. */ |
700 | if (lcn >= 0) { |
701 | /* Setup buffer head to point to correct block. */ |
702 | bh->b_blocknr = ((lcn << vol->cluster_size_bits) + |
703 | vcn_ofs) >> blocksize_bits; |
704 | set_buffer_mapped(bh); |
705 | continue; |
706 | } |
707 | /* It is a hole, need to instantiate it. */ |
708 | if (lcn == LCN_HOLE) { |
709 | u8 *kaddr; |
710 | unsigned long *bpos, *bend; |
711 | |
712 | /* Check if the buffer is zero. */ |
713 | kaddr = kmap_local_folio(folio, offset: bh_offset(bh)); |
714 | bpos = (unsigned long *)kaddr; |
715 | bend = (unsigned long *)(kaddr + blocksize); |
716 | do { |
717 | if (unlikely(*bpos)) |
718 | break; |
719 | } while (likely(++bpos < bend)); |
720 | kunmap_local(kaddr); |
721 | if (bpos == bend) { |
722 | /* |
723 | * Buffer is zero and sparse, no need to write |
724 | * it. |
725 | */ |
726 | bh->b_blocknr = -1; |
727 | clear_buffer_dirty(bh); |
728 | continue; |
729 | } |
730 | // TODO: Instantiate the hole. |
731 | // clear_buffer_new(bh); |
732 | // clean_bdev_bh_alias(bh); |
733 | ntfs_error(vol->sb, "Writing into sparse regions is " |
734 | "not supported yet. Sorry." ); |
735 | err = -EOPNOTSUPP; |
736 | break; |
737 | } |
738 | /* If first try and runlist unmapped, map and retry. */ |
739 | if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { |
740 | is_retry = true; |
741 | /* |
742 | * Attempt to map runlist, dropping lock for |
743 | * the duration. |
744 | */ |
745 | up_read(sem: &ni->runlist.lock); |
746 | err = ntfs_map_runlist(ni, vcn); |
747 | if (likely(!err)) |
748 | goto lock_retry_remap; |
749 | rl = NULL; |
750 | } else if (!rl) |
751 | up_read(sem: &ni->runlist.lock); |
752 | /* |
753 | * If buffer is outside the runlist, truncate has cut it out |
754 | * of the runlist. Just clean and clear the buffer and set it |
755 | * uptodate so it can get discarded by the VM. |
756 | */ |
757 | if (err == -ENOENT || lcn == LCN_ENOENT) { |
758 | bh->b_blocknr = -1; |
759 | clear_buffer_dirty(bh); |
760 | folio_zero_range(folio, start: bh_offset(bh), length: blocksize); |
761 | set_buffer_uptodate(bh); |
762 | err = 0; |
763 | continue; |
764 | } |
765 | /* Failed to map the buffer, even after retrying. */ |
766 | if (!err) |
767 | err = -EIO; |
768 | bh->b_blocknr = -1; |
769 | ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " |
770 | "attribute type 0x%x, vcn 0x%llx, offset 0x%x " |
771 | "because its location on disk could not be " |
772 | "determined%s (error code %i)." , ni->mft_no, |
773 | ni->type, (unsigned long long)vcn, |
774 | vcn_ofs, is_retry ? " even after " |
775 | "retrying" : "" , err); |
776 | break; |
777 | } while (block++, (bh = bh->b_this_page) != head); |
778 | |
779 | /* Release the lock if we took it. */ |
780 | if (rl) |
781 | up_read(sem: &ni->runlist.lock); |
782 | |
783 | /* For the error case, need to reset bh to the beginning. */ |
784 | bh = head; |
785 | |
786 | /* Just an optimization, so ->read_folio() is not called later. */ |
787 | if (unlikely(!folio_test_uptodate(folio))) { |
788 | int uptodate = 1; |
789 | do { |
790 | if (!buffer_uptodate(bh)) { |
791 | uptodate = 0; |
792 | bh = head; |
793 | break; |
794 | } |
795 | } while ((bh = bh->b_this_page) != head); |
796 | if (uptodate) |
797 | folio_mark_uptodate(folio); |
798 | } |
799 | |
800 | /* Setup all mapped, dirty buffers for async write i/o. */ |
801 | do { |
802 | if (buffer_mapped(bh) && buffer_dirty(bh)) { |
803 | lock_buffer(bh); |
804 | if (test_clear_buffer_dirty(bh)) { |
805 | BUG_ON(!buffer_uptodate(bh)); |
806 | mark_buffer_async_write(bh); |
807 | } else |
808 | unlock_buffer(bh); |
809 | } else if (unlikely(err)) { |
810 | /* |
811 | * For the error case. The buffer may have been set |
812 | * dirty during attachment to a dirty folio. |
813 | */ |
814 | if (err != -ENOMEM) |
815 | clear_buffer_dirty(bh); |
816 | } |
817 | } while ((bh = bh->b_this_page) != head); |
818 | |
819 | if (unlikely(err)) { |
820 | // TODO: Remove the -EOPNOTSUPP check later on... |
821 | if (unlikely(err == -EOPNOTSUPP)) |
822 | err = 0; |
823 | else if (err == -ENOMEM) { |
824 | ntfs_warning(vol->sb, "Error allocating memory. " |
825 | "Redirtying folio so we try again " |
826 | "later." ); |
827 | /* |
828 | * Put the folio back on mapping->dirty_pages, but |
829 | * leave its buffer's dirty state as-is. |
830 | */ |
831 | folio_redirty_for_writepage(wbc, folio); |
832 | err = 0; |
833 | } else |
834 | folio_set_error(folio); |
835 | } |
836 | |
837 | BUG_ON(folio_test_writeback(folio)); |
838 | folio_start_writeback(folio); /* Keeps try_to_free_buffers() away. */ |
839 | |
840 | /* Submit the prepared buffers for i/o. */ |
841 | need_end_writeback = true; |
842 | do { |
843 | struct buffer_head *next = bh->b_this_page; |
844 | if (buffer_async_write(bh)) { |
845 | submit_bh(REQ_OP_WRITE, bh); |
846 | need_end_writeback = false; |
847 | } |
848 | bh = next; |
849 | } while (bh != head); |
850 | folio_unlock(folio); |
851 | |
852 | /* If no i/o was started, need to end writeback here. */ |
853 | if (unlikely(need_end_writeback)) |
854 | folio_end_writeback(folio); |
855 | |
856 | ntfs_debug("Done." ); |
857 | return err; |
858 | } |
859 | |
860 | /** |
861 | * ntfs_write_mst_block - write a @page to the backing store |
862 | * @page: page cache page to write out |
863 | * @wbc: writeback control structure |
864 | * |
865 | * This function is for writing pages belonging to non-resident, mst protected |
866 | * attributes to their backing store. The only supported attributes are index |
867 | * allocation and $MFT/$DATA. Both directory inodes and index inodes are |
868 | * supported for the index allocation case. |
869 | * |
870 | * The page must remain locked for the duration of the write because we apply |
871 | * the mst fixups, write, and then undo the fixups, so if we were to unlock the |
872 | * page before undoing the fixups, any other user of the page will see the |
873 | * page contents as corrupt. |
874 | * |
875 | * We clear the page uptodate flag for the duration of the function to ensure |
876 | * exclusion for the $MFT/$DATA case against someone mapping an mft record we |
877 | * are about to apply the mst fixups to. |
878 | * |
879 | * Return 0 on success and -errno on error. |
880 | * |
881 | * Based on ntfs_write_block(), ntfs_mft_writepage(), and |
882 | * write_mft_record_nolock(). |
883 | */ |
884 | static int ntfs_write_mst_block(struct page *page, |
885 | struct writeback_control *wbc) |
886 | { |
887 | sector_t block, dblock, rec_block; |
888 | struct inode *vi = page->mapping->host; |
889 | ntfs_inode *ni = NTFS_I(inode: vi); |
890 | ntfs_volume *vol = ni->vol; |
891 | u8 *kaddr; |
892 | unsigned int rec_size = ni->itype.index.block_size; |
893 | ntfs_inode *locked_nis[PAGE_SIZE / NTFS_BLOCK_SIZE]; |
894 | struct buffer_head *bh, *head, *tbh, *rec_start_bh; |
895 | struct buffer_head *bhs[MAX_BUF_PER_PAGE]; |
896 | runlist_element *rl; |
897 | int i, nr_locked_nis, nr_recs, nr_bhs, max_bhs, bhs_per_rec, err, err2; |
898 | unsigned bh_size, rec_size_bits; |
899 | bool sync, is_mft, page_is_dirty, rec_is_dirty; |
900 | unsigned char bh_size_bits; |
901 | |
902 | if (WARN_ON(rec_size < NTFS_BLOCK_SIZE)) |
903 | return -EINVAL; |
904 | |
905 | ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " |
906 | "0x%lx." , vi->i_ino, ni->type, page->index); |
907 | BUG_ON(!NInoNonResident(ni)); |
908 | BUG_ON(!NInoMstProtected(ni)); |
909 | is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino); |
910 | /* |
911 | * NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page |
912 | * in its page cache were to be marked dirty. However this should |
913 | * never happen with the current driver and considering we do not |
914 | * handle this case here we do want to BUG(), at least for now. |
915 | */ |
916 | BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) || |
917 | (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION))); |
918 | bh_size = vol->sb->s_blocksize; |
919 | bh_size_bits = vol->sb->s_blocksize_bits; |
920 | max_bhs = PAGE_SIZE / bh_size; |
921 | BUG_ON(!max_bhs); |
922 | BUG_ON(max_bhs > MAX_BUF_PER_PAGE); |
923 | |
924 | /* Were we called for sync purposes? */ |
925 | sync = (wbc->sync_mode == WB_SYNC_ALL); |
926 | |
927 | /* Make sure we have mapped buffers. */ |
928 | bh = head = page_buffers(page); |
929 | BUG_ON(!bh); |
930 | |
931 | rec_size_bits = ni->itype.index.block_size_bits; |
932 | BUG_ON(!(PAGE_SIZE >> rec_size_bits)); |
933 | bhs_per_rec = rec_size >> bh_size_bits; |
934 | BUG_ON(!bhs_per_rec); |
935 | |
936 | /* The first block in the page. */ |
937 | rec_block = block = (sector_t)page->index << |
938 | (PAGE_SHIFT - bh_size_bits); |
939 | |
940 | /* The first out of bounds block for the data size. */ |
941 | dblock = (i_size_read(inode: vi) + bh_size - 1) >> bh_size_bits; |
942 | |
943 | rl = NULL; |
944 | err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0; |
945 | page_is_dirty = rec_is_dirty = false; |
946 | rec_start_bh = NULL; |
947 | do { |
948 | bool is_retry = false; |
949 | |
950 | if (likely(block < rec_block)) { |
951 | if (unlikely(block >= dblock)) { |
952 | clear_buffer_dirty(bh); |
953 | set_buffer_uptodate(bh); |
954 | continue; |
955 | } |
956 | /* |
957 | * This block is not the first one in the record. We |
958 | * ignore the buffer's dirty state because we could |
959 | * have raced with a parallel mark_ntfs_record_dirty(). |
960 | */ |
961 | if (!rec_is_dirty) |
962 | continue; |
963 | if (unlikely(err2)) { |
964 | if (err2 != -ENOMEM) |
965 | clear_buffer_dirty(bh); |
966 | continue; |
967 | } |
968 | } else /* if (block == rec_block) */ { |
969 | BUG_ON(block > rec_block); |
970 | /* This block is the first one in the record. */ |
971 | rec_block += bhs_per_rec; |
972 | err2 = 0; |
973 | if (unlikely(block >= dblock)) { |
974 | clear_buffer_dirty(bh); |
975 | continue; |
976 | } |
977 | if (!buffer_dirty(bh)) { |
978 | /* Clean records are not written out. */ |
979 | rec_is_dirty = false; |
980 | continue; |
981 | } |
982 | rec_is_dirty = true; |
983 | rec_start_bh = bh; |
984 | } |
985 | /* Need to map the buffer if it is not mapped already. */ |
986 | if (unlikely(!buffer_mapped(bh))) { |
987 | VCN vcn; |
988 | LCN lcn; |
989 | unsigned int vcn_ofs; |
990 | |
991 | bh->b_bdev = vol->sb->s_bdev; |
992 | /* Obtain the vcn and offset of the current block. */ |
993 | vcn = (VCN)block << bh_size_bits; |
994 | vcn_ofs = vcn & vol->cluster_size_mask; |
995 | vcn >>= vol->cluster_size_bits; |
996 | if (!rl) { |
997 | lock_retry_remap: |
998 | down_read(sem: &ni->runlist.lock); |
999 | rl = ni->runlist.rl; |
1000 | } |
1001 | if (likely(rl != NULL)) { |
1002 | /* Seek to element containing target vcn. */ |
1003 | while (rl->length && rl[1].vcn <= vcn) |
1004 | rl++; |
1005 | lcn = ntfs_rl_vcn_to_lcn(rl, vcn); |
1006 | } else |
1007 | lcn = LCN_RL_NOT_MAPPED; |
1008 | /* Successful remap. */ |
1009 | if (likely(lcn >= 0)) { |
1010 | /* Setup buffer head to correct block. */ |
1011 | bh->b_blocknr = ((lcn << |
1012 | vol->cluster_size_bits) + |
1013 | vcn_ofs) >> bh_size_bits; |
1014 | set_buffer_mapped(bh); |
1015 | } else { |
1016 | /* |
1017 | * Remap failed. Retry to map the runlist once |
1018 | * unless we are working on $MFT which always |
1019 | * has the whole of its runlist in memory. |
1020 | */ |
1021 | if (!is_mft && !is_retry && |
1022 | lcn == LCN_RL_NOT_MAPPED) { |
1023 | is_retry = true; |
1024 | /* |
1025 | * Attempt to map runlist, dropping |
1026 | * lock for the duration. |
1027 | */ |
1028 | up_read(sem: &ni->runlist.lock); |
1029 | err2 = ntfs_map_runlist(ni, vcn); |
1030 | if (likely(!err2)) |
1031 | goto lock_retry_remap; |
1032 | if (err2 == -ENOMEM) |
1033 | page_is_dirty = true; |
1034 | lcn = err2; |
1035 | } else { |
1036 | err2 = -EIO; |
1037 | if (!rl) |
1038 | up_read(sem: &ni->runlist.lock); |
1039 | } |
1040 | /* Hard error. Abort writing this record. */ |
1041 | if (!err || err == -ENOMEM) |
1042 | err = err2; |
1043 | bh->b_blocknr = -1; |
1044 | ntfs_error(vol->sb, "Cannot write ntfs record " |
1045 | "0x%llx (inode 0x%lx, " |
1046 | "attribute type 0x%x) because " |
1047 | "its location on disk could " |
1048 | "not be determined (error " |
1049 | "code %lli)." , |
1050 | (long long)block << |
1051 | bh_size_bits >> |
1052 | vol->mft_record_size_bits, |
1053 | ni->mft_no, ni->type, |
1054 | (long long)lcn); |
1055 | /* |
1056 | * If this is not the first buffer, remove the |
1057 | * buffers in this record from the list of |
1058 | * buffers to write and clear their dirty bit |
1059 | * if not error -ENOMEM. |
1060 | */ |
1061 | if (rec_start_bh != bh) { |
1062 | while (bhs[--nr_bhs] != rec_start_bh) |
1063 | ; |
1064 | if (err2 != -ENOMEM) { |
1065 | do { |
1066 | clear_buffer_dirty( |
1067 | bh: rec_start_bh); |
1068 | } while ((rec_start_bh = |
1069 | rec_start_bh-> |
1070 | b_this_page) != |
1071 | bh); |
1072 | } |
1073 | } |
1074 | continue; |
1075 | } |
1076 | } |
1077 | BUG_ON(!buffer_uptodate(bh)); |
1078 | BUG_ON(nr_bhs >= max_bhs); |
1079 | bhs[nr_bhs++] = bh; |
1080 | } while (block++, (bh = bh->b_this_page) != head); |
1081 | if (unlikely(rl)) |
1082 | up_read(sem: &ni->runlist.lock); |
1083 | /* If there were no dirty buffers, we are done. */ |
1084 | if (!nr_bhs) |
1085 | goto done; |
1086 | /* Map the page so we can access its contents. */ |
1087 | kaddr = kmap(page); |
1088 | /* Clear the page uptodate flag whilst the mst fixups are applied. */ |
1089 | BUG_ON(!PageUptodate(page)); |
1090 | ClearPageUptodate(page); |
1091 | for (i = 0; i < nr_bhs; i++) { |
1092 | unsigned int ofs; |
1093 | |
1094 | /* Skip buffers which are not at the beginning of records. */ |
1095 | if (i % bhs_per_rec) |
1096 | continue; |
1097 | tbh = bhs[i]; |
1098 | ofs = bh_offset(bh: tbh); |
1099 | if (is_mft) { |
1100 | ntfs_inode *tni; |
1101 | unsigned long mft_no; |
1102 | |
1103 | /* Get the mft record number. */ |
1104 | mft_no = (((s64)page->index << PAGE_SHIFT) + ofs) |
1105 | >> rec_size_bits; |
1106 | /* Check whether to write this mft record. */ |
1107 | tni = NULL; |
1108 | if (!ntfs_may_write_mft_record(vol, mft_no, |
1109 | m: (MFT_RECORD*)(kaddr + ofs), locked_ni: &tni)) { |
1110 | /* |
1111 | * The record should not be written. This |
1112 | * means we need to redirty the page before |
1113 | * returning. |
1114 | */ |
1115 | page_is_dirty = true; |
1116 | /* |
1117 | * Remove the buffers in this mft record from |
1118 | * the list of buffers to write. |
1119 | */ |
1120 | do { |
1121 | bhs[i] = NULL; |
1122 | } while (++i % bhs_per_rec); |
1123 | continue; |
1124 | } |
1125 | /* |
1126 | * The record should be written. If a locked ntfs |
1127 | * inode was returned, add it to the array of locked |
1128 | * ntfs inodes. |
1129 | */ |
1130 | if (tni) |
1131 | locked_nis[nr_locked_nis++] = tni; |
1132 | } |
1133 | /* Apply the mst protection fixups. */ |
1134 | err2 = pre_write_mst_fixup(b: (NTFS_RECORD*)(kaddr + ofs), |
1135 | size: rec_size); |
1136 | if (unlikely(err2)) { |
1137 | if (!err || err == -ENOMEM) |
1138 | err = -EIO; |
1139 | ntfs_error(vol->sb, "Failed to apply mst fixups " |
1140 | "(inode 0x%lx, attribute type 0x%x, " |
1141 | "page index 0x%lx, page offset 0x%x)!" |
1142 | " Unmount and run chkdsk." , vi->i_ino, |
1143 | ni->type, page->index, ofs); |
1144 | /* |
1145 | * Mark all the buffers in this record clean as we do |
1146 | * not want to write corrupt data to disk. |
1147 | */ |
1148 | do { |
1149 | clear_buffer_dirty(bh: bhs[i]); |
1150 | bhs[i] = NULL; |
1151 | } while (++i % bhs_per_rec); |
1152 | continue; |
1153 | } |
1154 | nr_recs++; |
1155 | } |
1156 | /* If no records are to be written out, we are done. */ |
1157 | if (!nr_recs) |
1158 | goto unm_done; |
1159 | flush_dcache_page(page); |
1160 | /* Lock buffers and start synchronous write i/o on them. */ |
1161 | for (i = 0; i < nr_bhs; i++) { |
1162 | tbh = bhs[i]; |
1163 | if (!tbh) |
1164 | continue; |
1165 | if (!trylock_buffer(bh: tbh)) |
1166 | BUG(); |
1167 | /* The buffer dirty state is now irrelevant, just clean it. */ |
1168 | clear_buffer_dirty(bh: tbh); |
1169 | BUG_ON(!buffer_uptodate(tbh)); |
1170 | BUG_ON(!buffer_mapped(tbh)); |
1171 | get_bh(bh: tbh); |
1172 | tbh->b_end_io = end_buffer_write_sync; |
1173 | submit_bh(REQ_OP_WRITE, tbh); |
1174 | } |
1175 | /* Synchronize the mft mirror now if not @sync. */ |
1176 | if (is_mft && !sync) |
1177 | goto do_mirror; |
1178 | do_wait: |
1179 | /* Wait on i/o completion of buffers. */ |
1180 | for (i = 0; i < nr_bhs; i++) { |
1181 | tbh = bhs[i]; |
1182 | if (!tbh) |
1183 | continue; |
1184 | wait_on_buffer(bh: tbh); |
1185 | if (unlikely(!buffer_uptodate(tbh))) { |
1186 | ntfs_error(vol->sb, "I/O error while writing ntfs " |
1187 | "record buffer (inode 0x%lx, " |
1188 | "attribute type 0x%x, page index " |
1189 | "0x%lx, page offset 0x%lx)! Unmount " |
1190 | "and run chkdsk." , vi->i_ino, ni->type, |
1191 | page->index, bh_offset(tbh)); |
1192 | if (!err || err == -ENOMEM) |
1193 | err = -EIO; |
1194 | /* |
1195 | * Set the buffer uptodate so the page and buffer |
1196 | * states do not become out of sync. |
1197 | */ |
1198 | set_buffer_uptodate(tbh); |
1199 | } |
1200 | } |
1201 | /* If @sync, now synchronize the mft mirror. */ |
1202 | if (is_mft && sync) { |
1203 | do_mirror: |
1204 | for (i = 0; i < nr_bhs; i++) { |
1205 | unsigned long mft_no; |
1206 | unsigned int ofs; |
1207 | |
1208 | /* |
1209 | * Skip buffers which are not at the beginning of |
1210 | * records. |
1211 | */ |
1212 | if (i % bhs_per_rec) |
1213 | continue; |
1214 | tbh = bhs[i]; |
1215 | /* Skip removed buffers (and hence records). */ |
1216 | if (!tbh) |
1217 | continue; |
1218 | ofs = bh_offset(bh: tbh); |
1219 | /* Get the mft record number. */ |
1220 | mft_no = (((s64)page->index << PAGE_SHIFT) + ofs) |
1221 | >> rec_size_bits; |
1222 | if (mft_no < vol->mftmirr_size) |
1223 | ntfs_sync_mft_mirror(vol, mft_no, |
1224 | m: (MFT_RECORD*)(kaddr + ofs), |
1225 | sync); |
1226 | } |
1227 | if (!sync) |
1228 | goto do_wait; |
1229 | } |
1230 | /* Remove the mst protection fixups again. */ |
1231 | for (i = 0; i < nr_bhs; i++) { |
1232 | if (!(i % bhs_per_rec)) { |
1233 | tbh = bhs[i]; |
1234 | if (!tbh) |
1235 | continue; |
1236 | post_write_mst_fixup(b: (NTFS_RECORD*)(kaddr + |
1237 | bh_offset(bh: tbh))); |
1238 | } |
1239 | } |
1240 | flush_dcache_page(page); |
1241 | unm_done: |
1242 | /* Unlock any locked inodes. */ |
1243 | while (nr_locked_nis-- > 0) { |
1244 | ntfs_inode *tni, *base_tni; |
1245 | |
1246 | tni = locked_nis[nr_locked_nis]; |
1247 | /* Get the base inode. */ |
1248 | mutex_lock(&tni->extent_lock); |
1249 | if (tni->nr_extents >= 0) |
1250 | base_tni = tni; |
1251 | else { |
1252 | base_tni = tni->ext.base_ntfs_ino; |
1253 | BUG_ON(!base_tni); |
1254 | } |
1255 | mutex_unlock(lock: &tni->extent_lock); |
1256 | ntfs_debug("Unlocking %s inode 0x%lx." , |
1257 | tni == base_tni ? "base" : "extent" , |
1258 | tni->mft_no); |
1259 | mutex_unlock(lock: &tni->mrec_lock); |
1260 | atomic_dec(v: &tni->count); |
1261 | iput(VFS_I(ni: base_tni)); |
1262 | } |
1263 | SetPageUptodate(page); |
1264 | kunmap(page); |
1265 | done: |
1266 | if (unlikely(err && err != -ENOMEM)) { |
1267 | /* |
1268 | * Set page error if there is only one ntfs record in the page. |
1269 | * Otherwise we would loose per-record granularity. |
1270 | */ |
1271 | if (ni->itype.index.block_size == PAGE_SIZE) |
1272 | SetPageError(page); |
1273 | NVolSetErrors(vol); |
1274 | } |
1275 | if (page_is_dirty) { |
1276 | ntfs_debug("Page still contains one or more dirty ntfs " |
1277 | "records. Redirtying the page starting at " |
1278 | "record 0x%lx." , page->index << |
1279 | (PAGE_SHIFT - rec_size_bits)); |
1280 | redirty_page_for_writepage(wbc, page); |
1281 | unlock_page(page); |
1282 | } else { |
1283 | /* |
1284 | * Keep the VM happy. This must be done otherwise the |
1285 | * radix-tree tag PAGECACHE_TAG_DIRTY remains set even though |
1286 | * the page is clean. |
1287 | */ |
1288 | BUG_ON(PageWriteback(page)); |
1289 | set_page_writeback(page); |
1290 | unlock_page(page); |
1291 | end_page_writeback(page); |
1292 | } |
1293 | if (likely(!err)) |
1294 | ntfs_debug("Done." ); |
1295 | return err; |
1296 | } |
1297 | |
1298 | /** |
1299 | * ntfs_writepage - write a @page to the backing store |
1300 | * @page: page cache page to write out |
1301 | * @wbc: writeback control structure |
1302 | * |
1303 | * This is called from the VM when it wants to have a dirty ntfs page cache |
1304 | * page cleaned. The VM has already locked the page and marked it clean. |
1305 | * |
1306 | * For non-resident attributes, ntfs_writepage() writes the @page by calling |
1307 | * the ntfs version of the generic block_write_full_page() function, |
1308 | * ntfs_write_block(), which in turn if necessary creates and writes the |
1309 | * buffers associated with the page asynchronously. |
1310 | * |
1311 | * For resident attributes, OTOH, ntfs_writepage() writes the @page by copying |
1312 | * the data to the mft record (which at this stage is most likely in memory). |
1313 | * The mft record is then marked dirty and written out asynchronously via the |
1314 | * vfs inode dirty code path for the inode the mft record belongs to or via the |
1315 | * vm page dirty code path for the page the mft record is in. |
1316 | * |
1317 | * Based on ntfs_read_folio() and fs/buffer.c::block_write_full_page(). |
1318 | * |
1319 | * Return 0 on success and -errno on error. |
1320 | */ |
1321 | static int ntfs_writepage(struct page *page, struct writeback_control *wbc) |
1322 | { |
1323 | struct folio *folio = page_folio(page); |
1324 | loff_t i_size; |
1325 | struct inode *vi = folio->mapping->host; |
1326 | ntfs_inode *base_ni = NULL, *ni = NTFS_I(inode: vi); |
1327 | char *addr; |
1328 | ntfs_attr_search_ctx *ctx = NULL; |
1329 | MFT_RECORD *m = NULL; |
1330 | u32 attr_len; |
1331 | int err; |
1332 | |
1333 | retry_writepage: |
1334 | BUG_ON(!folio_test_locked(folio)); |
1335 | i_size = i_size_read(inode: vi); |
1336 | /* Is the folio fully outside i_size? (truncate in progress) */ |
1337 | if (unlikely(folio->index >= (i_size + PAGE_SIZE - 1) >> |
1338 | PAGE_SHIFT)) { |
1339 | /* |
1340 | * The folio may have dirty, unmapped buffers. Make them |
1341 | * freeable here, so the page does not leak. |
1342 | */ |
1343 | block_invalidate_folio(folio, offset: 0, length: folio_size(folio)); |
1344 | folio_unlock(folio); |
1345 | ntfs_debug("Write outside i_size - truncated?" ); |
1346 | return 0; |
1347 | } |
1348 | /* |
1349 | * Only $DATA attributes can be encrypted and only unnamed $DATA |
1350 | * attributes can be compressed. Index root can have the flags set but |
1351 | * this means to create compressed/encrypted files, not that the |
1352 | * attribute is compressed/encrypted. Note we need to check for |
1353 | * AT_INDEX_ALLOCATION since this is the type of both directory and |
1354 | * index inodes. |
1355 | */ |
1356 | if (ni->type != AT_INDEX_ALLOCATION) { |
1357 | /* If file is encrypted, deny access, just like NT4. */ |
1358 | if (NInoEncrypted(ni)) { |
1359 | folio_unlock(folio); |
1360 | BUG_ON(ni->type != AT_DATA); |
1361 | ntfs_debug("Denying write access to encrypted file." ); |
1362 | return -EACCES; |
1363 | } |
1364 | /* Compressed data streams are handled in compress.c. */ |
1365 | if (NInoNonResident(ni) && NInoCompressed(ni)) { |
1366 | BUG_ON(ni->type != AT_DATA); |
1367 | BUG_ON(ni->name_len); |
1368 | // TODO: Implement and replace this with |
1369 | // return ntfs_write_compressed_block(page); |
1370 | folio_unlock(folio); |
1371 | ntfs_error(vi->i_sb, "Writing to compressed files is " |
1372 | "not supported yet. Sorry." ); |
1373 | return -EOPNOTSUPP; |
1374 | } |
1375 | // TODO: Implement and remove this check. |
1376 | if (NInoNonResident(ni) && NInoSparse(ni)) { |
1377 | folio_unlock(folio); |
1378 | ntfs_error(vi->i_sb, "Writing to sparse files is not " |
1379 | "supported yet. Sorry." ); |
1380 | return -EOPNOTSUPP; |
1381 | } |
1382 | } |
1383 | /* NInoNonResident() == NInoIndexAllocPresent() */ |
1384 | if (NInoNonResident(ni)) { |
1385 | /* We have to zero every time due to mmap-at-end-of-file. */ |
1386 | if (folio->index >= (i_size >> PAGE_SHIFT)) { |
1387 | /* The folio straddles i_size. */ |
1388 | unsigned int ofs = i_size & (folio_size(folio) - 1); |
1389 | folio_zero_segment(folio, start: ofs, xend: folio_size(folio)); |
1390 | } |
1391 | /* Handle mst protected attributes. */ |
1392 | if (NInoMstProtected(ni)) |
1393 | return ntfs_write_mst_block(page, wbc); |
1394 | /* Normal, non-resident data stream. */ |
1395 | return ntfs_write_block(folio, wbc); |
1396 | } |
1397 | /* |
1398 | * Attribute is resident, implying it is not compressed, encrypted, or |
1399 | * mst protected. This also means the attribute is smaller than an mft |
1400 | * record and hence smaller than a folio, so can simply return error on |
1401 | * any folios with index above 0. Note the attribute can actually be |
1402 | * marked compressed but if it is resident the actual data is not |
1403 | * compressed so we are ok to ignore the compressed flag here. |
1404 | */ |
1405 | BUG_ON(folio_buffers(folio)); |
1406 | BUG_ON(!folio_test_uptodate(folio)); |
1407 | if (unlikely(folio->index > 0)) { |
1408 | ntfs_error(vi->i_sb, "BUG()! folio->index (0x%lx) > 0. " |
1409 | "Aborting write." , folio->index); |
1410 | BUG_ON(folio_test_writeback(folio)); |
1411 | folio_start_writeback(folio); |
1412 | folio_unlock(folio); |
1413 | folio_end_writeback(folio); |
1414 | return -EIO; |
1415 | } |
1416 | if (!NInoAttr(ni)) |
1417 | base_ni = ni; |
1418 | else |
1419 | base_ni = ni->ext.base_ntfs_ino; |
1420 | /* Map, pin, and lock the mft record. */ |
1421 | m = map_mft_record(ni: base_ni); |
1422 | if (IS_ERR(ptr: m)) { |
1423 | err = PTR_ERR(ptr: m); |
1424 | m = NULL; |
1425 | ctx = NULL; |
1426 | goto err_out; |
1427 | } |
1428 | /* |
1429 | * If a parallel write made the attribute non-resident, drop the mft |
1430 | * record and retry the writepage. |
1431 | */ |
1432 | if (unlikely(NInoNonResident(ni))) { |
1433 | unmap_mft_record(ni: base_ni); |
1434 | goto retry_writepage; |
1435 | } |
1436 | ctx = ntfs_attr_get_search_ctx(ni: base_ni, mrec: m); |
1437 | if (unlikely(!ctx)) { |
1438 | err = -ENOMEM; |
1439 | goto err_out; |
1440 | } |
1441 | err = ntfs_attr_lookup(type: ni->type, name: ni->name, name_len: ni->name_len, |
1442 | ic: CASE_SENSITIVE, lowest_vcn: 0, NULL, val_len: 0, ctx); |
1443 | if (unlikely(err)) |
1444 | goto err_out; |
1445 | /* |
1446 | * Keep the VM happy. This must be done otherwise |
1447 | * PAGECACHE_TAG_DIRTY remains set even though the folio is clean. |
1448 | */ |
1449 | BUG_ON(folio_test_writeback(folio)); |
1450 | folio_start_writeback(folio); |
1451 | folio_unlock(folio); |
1452 | attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); |
1453 | i_size = i_size_read(inode: vi); |
1454 | if (unlikely(attr_len > i_size)) { |
1455 | /* Race with shrinking truncate or a failed truncate. */ |
1456 | attr_len = i_size; |
1457 | /* |
1458 | * If the truncate failed, fix it up now. If a concurrent |
1459 | * truncate, we do its job, so it does not have to do anything. |
1460 | */ |
1461 | err = ntfs_resident_attr_value_resize(m: ctx->mrec, a: ctx->attr, |
1462 | new_size: attr_len); |
1463 | /* Shrinking cannot fail. */ |
1464 | BUG_ON(err); |
1465 | } |
1466 | addr = kmap_local_folio(folio, offset: 0); |
1467 | /* Copy the data from the folio to the mft record. */ |
1468 | memcpy((u8*)ctx->attr + |
1469 | le16_to_cpu(ctx->attr->data.resident.value_offset), |
1470 | addr, attr_len); |
1471 | /* Zero out of bounds area in the page cache folio. */ |
1472 | memset(addr + attr_len, 0, folio_size(folio) - attr_len); |
1473 | kunmap_local(addr); |
1474 | flush_dcache_folio(folio); |
1475 | flush_dcache_mft_record_page(ni: ctx->ntfs_ino); |
1476 | /* We are done with the folio. */ |
1477 | folio_end_writeback(folio); |
1478 | /* Finally, mark the mft record dirty, so it gets written back. */ |
1479 | mark_mft_record_dirty(ni: ctx->ntfs_ino); |
1480 | ntfs_attr_put_search_ctx(ctx); |
1481 | unmap_mft_record(ni: base_ni); |
1482 | return 0; |
1483 | err_out: |
1484 | if (err == -ENOMEM) { |
1485 | ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying " |
1486 | "page so we try again later." ); |
1487 | /* |
1488 | * Put the folio back on mapping->dirty_pages, but leave its |
1489 | * buffers' dirty state as-is. |
1490 | */ |
1491 | folio_redirty_for_writepage(wbc, folio); |
1492 | err = 0; |
1493 | } else { |
1494 | ntfs_error(vi->i_sb, "Resident attribute write failed with " |
1495 | "error %i." , err); |
1496 | folio_set_error(folio); |
1497 | NVolSetErrors(vol: ni->vol); |
1498 | } |
1499 | folio_unlock(folio); |
1500 | if (ctx) |
1501 | ntfs_attr_put_search_ctx(ctx); |
1502 | if (m) |
1503 | unmap_mft_record(ni: base_ni); |
1504 | return err; |
1505 | } |
1506 | |
1507 | #endif /* NTFS_RW */ |
1508 | |
1509 | /** |
1510 | * ntfs_bmap - map logical file block to physical device block |
1511 | * @mapping: address space mapping to which the block to be mapped belongs |
1512 | * @block: logical block to map to its physical device block |
1513 | * |
1514 | * For regular, non-resident files (i.e. not compressed and not encrypted), map |
1515 | * the logical @block belonging to the file described by the address space |
1516 | * mapping @mapping to its physical device block. |
1517 | * |
1518 | * The size of the block is equal to the @s_blocksize field of the super block |
1519 | * of the mounted file system which is guaranteed to be smaller than or equal |
1520 | * to the cluster size thus the block is guaranteed to fit entirely inside the |
1521 | * cluster which means we do not need to care how many contiguous bytes are |
1522 | * available after the beginning of the block. |
1523 | * |
1524 | * Return the physical device block if the mapping succeeded or 0 if the block |
1525 | * is sparse or there was an error. |
1526 | * |
1527 | * Note: This is a problem if someone tries to run bmap() on $Boot system file |
1528 | * as that really is in block zero but there is nothing we can do. bmap() is |
1529 | * just broken in that respect (just like it cannot distinguish sparse from |
1530 | * not available or error). |
1531 | */ |
1532 | static sector_t ntfs_bmap(struct address_space *mapping, sector_t block) |
1533 | { |
1534 | s64 ofs, size; |
1535 | loff_t i_size; |
1536 | LCN lcn; |
1537 | unsigned long blocksize, flags; |
1538 | ntfs_inode *ni = NTFS_I(inode: mapping->host); |
1539 | ntfs_volume *vol = ni->vol; |
1540 | unsigned delta; |
1541 | unsigned char blocksize_bits, cluster_size_shift; |
1542 | |
1543 | ntfs_debug("Entering for mft_no 0x%lx, logical block 0x%llx." , |
1544 | ni->mft_no, (unsigned long long)block); |
1545 | if (ni->type != AT_DATA || !NInoNonResident(ni) || NInoEncrypted(ni)) { |
1546 | ntfs_error(vol->sb, "BMAP does not make sense for %s " |
1547 | "attributes, returning 0." , |
1548 | (ni->type != AT_DATA) ? "non-data" : |
1549 | (!NInoNonResident(ni) ? "resident" : |
1550 | "encrypted" )); |
1551 | return 0; |
1552 | } |
1553 | /* None of these can happen. */ |
1554 | BUG_ON(NInoCompressed(ni)); |
1555 | BUG_ON(NInoMstProtected(ni)); |
1556 | blocksize = vol->sb->s_blocksize; |
1557 | blocksize_bits = vol->sb->s_blocksize_bits; |
1558 | ofs = (s64)block << blocksize_bits; |
1559 | read_lock_irqsave(&ni->size_lock, flags); |
1560 | size = ni->initialized_size; |
1561 | i_size = i_size_read(inode: VFS_I(ni)); |
1562 | read_unlock_irqrestore(&ni->size_lock, flags); |
1563 | /* |
1564 | * If the offset is outside the initialized size or the block straddles |
1565 | * the initialized size then pretend it is a hole unless the |
1566 | * initialized size equals the file size. |
1567 | */ |
1568 | if (unlikely(ofs >= size || (ofs + blocksize > size && size < i_size))) |
1569 | goto hole; |
1570 | cluster_size_shift = vol->cluster_size_bits; |
1571 | down_read(sem: &ni->runlist.lock); |
1572 | lcn = ntfs_attr_vcn_to_lcn_nolock(ni, vcn: ofs >> cluster_size_shift, write_locked: false); |
1573 | up_read(sem: &ni->runlist.lock); |
1574 | if (unlikely(lcn < LCN_HOLE)) { |
1575 | /* |
1576 | * Step down to an integer to avoid gcc doing a long long |
1577 | * comparision in the switch when we know @lcn is between |
1578 | * LCN_HOLE and LCN_EIO (i.e. -1 to -5). |
1579 | * |
1580 | * Otherwise older gcc (at least on some architectures) will |
1581 | * try to use __cmpdi2() which is of course not available in |
1582 | * the kernel. |
1583 | */ |
1584 | switch ((int)lcn) { |
1585 | case LCN_ENOENT: |
1586 | /* |
1587 | * If the offset is out of bounds then pretend it is a |
1588 | * hole. |
1589 | */ |
1590 | goto hole; |
1591 | case LCN_ENOMEM: |
1592 | ntfs_error(vol->sb, "Not enough memory to complete " |
1593 | "mapping for inode 0x%lx. " |
1594 | "Returning 0." , ni->mft_no); |
1595 | break; |
1596 | default: |
1597 | ntfs_error(vol->sb, "Failed to complete mapping for " |
1598 | "inode 0x%lx. Run chkdsk. " |
1599 | "Returning 0." , ni->mft_no); |
1600 | break; |
1601 | } |
1602 | return 0; |
1603 | } |
1604 | if (lcn < 0) { |
1605 | /* It is a hole. */ |
1606 | hole: |
1607 | ntfs_debug("Done (returning hole)." ); |
1608 | return 0; |
1609 | } |
1610 | /* |
1611 | * The block is really allocated and fullfils all our criteria. |
1612 | * Convert the cluster to units of block size and return the result. |
1613 | */ |
1614 | delta = ofs & vol->cluster_size_mask; |
1615 | if (unlikely(sizeof(block) < sizeof(lcn))) { |
1616 | block = lcn = ((lcn << cluster_size_shift) + delta) >> |
1617 | blocksize_bits; |
1618 | /* If the block number was truncated return 0. */ |
1619 | if (unlikely(block != lcn)) { |
1620 | ntfs_error(vol->sb, "Physical block 0x%llx is too " |
1621 | "large to be returned, returning 0." , |
1622 | (long long)lcn); |
1623 | return 0; |
1624 | } |
1625 | } else |
1626 | block = ((lcn << cluster_size_shift) + delta) >> |
1627 | blocksize_bits; |
1628 | ntfs_debug("Done (returning block 0x%llx)." , (unsigned long long)lcn); |
1629 | return block; |
1630 | } |
1631 | |
1632 | /* |
1633 | * ntfs_normal_aops - address space operations for normal inodes and attributes |
1634 | * |
1635 | * Note these are not used for compressed or mst protected inodes and |
1636 | * attributes. |
1637 | */ |
1638 | const struct address_space_operations ntfs_normal_aops = { |
1639 | .read_folio = ntfs_read_folio, |
1640 | #ifdef NTFS_RW |
1641 | .writepage = ntfs_writepage, |
1642 | .dirty_folio = block_dirty_folio, |
1643 | #endif /* NTFS_RW */ |
1644 | .bmap = ntfs_bmap, |
1645 | .migrate_folio = buffer_migrate_folio, |
1646 | .is_partially_uptodate = block_is_partially_uptodate, |
1647 | .error_remove_page = generic_error_remove_page, |
1648 | }; |
1649 | |
1650 | /* |
1651 | * ntfs_compressed_aops - address space operations for compressed inodes |
1652 | */ |
1653 | const struct address_space_operations ntfs_compressed_aops = { |
1654 | .read_folio = ntfs_read_folio, |
1655 | #ifdef NTFS_RW |
1656 | .writepage = ntfs_writepage, |
1657 | .dirty_folio = block_dirty_folio, |
1658 | #endif /* NTFS_RW */ |
1659 | .migrate_folio = buffer_migrate_folio, |
1660 | .is_partially_uptodate = block_is_partially_uptodate, |
1661 | .error_remove_page = generic_error_remove_page, |
1662 | }; |
1663 | |
1664 | /* |
1665 | * ntfs_mst_aops - general address space operations for mst protecteed inodes |
1666 | * and attributes |
1667 | */ |
1668 | const struct address_space_operations ntfs_mst_aops = { |
1669 | .read_folio = ntfs_read_folio, /* Fill page with data. */ |
1670 | #ifdef NTFS_RW |
1671 | .writepage = ntfs_writepage, /* Write dirty page to disk. */ |
1672 | .dirty_folio = filemap_dirty_folio, |
1673 | #endif /* NTFS_RW */ |
1674 | .migrate_folio = buffer_migrate_folio, |
1675 | .is_partially_uptodate = block_is_partially_uptodate, |
1676 | .error_remove_page = generic_error_remove_page, |
1677 | }; |
1678 | |
1679 | #ifdef NTFS_RW |
1680 | |
1681 | /** |
1682 | * mark_ntfs_record_dirty - mark an ntfs record dirty |
1683 | * @page: page containing the ntfs record to mark dirty |
1684 | * @ofs: byte offset within @page at which the ntfs record begins |
1685 | * |
1686 | * Set the buffers and the page in which the ntfs record is located dirty. |
1687 | * |
1688 | * The latter also marks the vfs inode the ntfs record belongs to dirty |
1689 | * (I_DIRTY_PAGES only). |
1690 | * |
1691 | * If the page does not have buffers, we create them and set them uptodate. |
1692 | * The page may not be locked which is why we need to handle the buffers under |
1693 | * the mapping->private_lock. Once the buffers are marked dirty we no longer |
1694 | * need the lock since try_to_free_buffers() does not free dirty buffers. |
1695 | */ |
1696 | void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) { |
1697 | struct address_space *mapping = page->mapping; |
1698 | ntfs_inode *ni = NTFS_I(inode: mapping->host); |
1699 | struct buffer_head *bh, *head, *buffers_to_free = NULL; |
1700 | unsigned int end, bh_size, bh_ofs; |
1701 | |
1702 | BUG_ON(!PageUptodate(page)); |
1703 | end = ofs + ni->itype.index.block_size; |
1704 | bh_size = VFS_I(ni)->i_sb->s_blocksize; |
1705 | spin_lock(lock: &mapping->private_lock); |
1706 | if (unlikely(!page_has_buffers(page))) { |
1707 | spin_unlock(lock: &mapping->private_lock); |
1708 | bh = head = alloc_page_buffers(page, size: bh_size, retry: true); |
1709 | spin_lock(lock: &mapping->private_lock); |
1710 | if (likely(!page_has_buffers(page))) { |
1711 | struct buffer_head *tail; |
1712 | |
1713 | do { |
1714 | set_buffer_uptodate(bh); |
1715 | tail = bh; |
1716 | bh = bh->b_this_page; |
1717 | } while (bh); |
1718 | tail->b_this_page = head; |
1719 | attach_page_private(page, data: head); |
1720 | } else |
1721 | buffers_to_free = bh; |
1722 | } |
1723 | bh = head = page_buffers(page); |
1724 | BUG_ON(!bh); |
1725 | do { |
1726 | bh_ofs = bh_offset(bh); |
1727 | if (bh_ofs + bh_size <= ofs) |
1728 | continue; |
1729 | if (unlikely(bh_ofs >= end)) |
1730 | break; |
1731 | set_buffer_dirty(bh); |
1732 | } while ((bh = bh->b_this_page) != head); |
1733 | spin_unlock(lock: &mapping->private_lock); |
1734 | filemap_dirty_folio(mapping, page_folio(page)); |
1735 | if (unlikely(buffers_to_free)) { |
1736 | do { |
1737 | bh = buffers_to_free->b_this_page; |
1738 | free_buffer_head(bh: buffers_to_free); |
1739 | buffers_to_free = bh; |
1740 | } while (buffers_to_free); |
1741 | } |
1742 | } |
1743 | |
1744 | #endif /* NTFS_RW */ |
1745 | |