1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* |
3 | * NILFS disk address translation. |
4 | * |
5 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. |
6 | * |
7 | * Written by Koji Sato. |
8 | */ |
9 | |
10 | #include <linux/types.h> |
11 | #include <linux/buffer_head.h> |
12 | #include <linux/string.h> |
13 | #include <linux/errno.h> |
14 | #include "nilfs.h" |
15 | #include "mdt.h" |
16 | #include "alloc.h" |
17 | #include "dat.h" |
18 | |
19 | |
20 | #define NILFS_CNO_MIN ((__u64)1) |
21 | #define NILFS_CNO_MAX (~(__u64)0) |
22 | |
23 | /** |
24 | * struct nilfs_dat_info - on-memory private data of DAT file |
25 | * @mi: on-memory private data of metadata file |
26 | * @palloc_cache: persistent object allocator cache of DAT file |
27 | * @shadow: shadow map of DAT file |
28 | */ |
29 | struct nilfs_dat_info { |
30 | struct nilfs_mdt_info mi; |
31 | struct nilfs_palloc_cache palloc_cache; |
32 | struct nilfs_shadow_map shadow; |
33 | }; |
34 | |
35 | static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) |
36 | { |
37 | return (struct nilfs_dat_info *)NILFS_MDT(inode: dat); |
38 | } |
39 | |
40 | static int nilfs_dat_prepare_entry(struct inode *dat, |
41 | struct nilfs_palloc_req *req, int create) |
42 | { |
43 | int ret; |
44 | |
45 | ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, |
46 | create, &req->pr_entry_bh); |
47 | if (unlikely(ret == -ENOENT)) { |
48 | nilfs_err(dat->i_sb, |
49 | "DAT doesn't have a block to manage vblocknr = %llu" , |
50 | (unsigned long long)req->pr_entry_nr); |
51 | /* |
52 | * Return internal code -EINVAL to notify bmap layer of |
53 | * metadata corruption. |
54 | */ |
55 | ret = -EINVAL; |
56 | } |
57 | return ret; |
58 | } |
59 | |
60 | static void nilfs_dat_commit_entry(struct inode *dat, |
61 | struct nilfs_palloc_req *req) |
62 | { |
63 | mark_buffer_dirty(bh: req->pr_entry_bh); |
64 | nilfs_mdt_mark_dirty(inode: dat); |
65 | brelse(bh: req->pr_entry_bh); |
66 | } |
67 | |
68 | static void nilfs_dat_abort_entry(struct inode *dat, |
69 | struct nilfs_palloc_req *req) |
70 | { |
71 | brelse(bh: req->pr_entry_bh); |
72 | } |
73 | |
74 | int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req) |
75 | { |
76 | int ret; |
77 | |
78 | ret = nilfs_palloc_prepare_alloc_entry(dat, req); |
79 | if (ret < 0) |
80 | return ret; |
81 | |
82 | ret = nilfs_dat_prepare_entry(dat, req, create: 1); |
83 | if (ret < 0) |
84 | nilfs_palloc_abort_alloc_entry(dat, req); |
85 | |
86 | return ret; |
87 | } |
88 | |
89 | void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req) |
90 | { |
91 | struct nilfs_dat_entry *entry; |
92 | void *kaddr; |
93 | |
94 | kaddr = kmap_atomic(page: req->pr_entry_bh->b_page); |
95 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, |
96 | req->pr_entry_bh, kaddr); |
97 | entry->de_start = cpu_to_le64(NILFS_CNO_MIN); |
98 | entry->de_end = cpu_to_le64(NILFS_CNO_MAX); |
99 | entry->de_blocknr = cpu_to_le64(0); |
100 | kunmap_atomic(kaddr); |
101 | |
102 | nilfs_palloc_commit_alloc_entry(dat, req); |
103 | nilfs_dat_commit_entry(dat, req); |
104 | } |
105 | |
106 | void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req) |
107 | { |
108 | nilfs_dat_abort_entry(dat, req); |
109 | nilfs_palloc_abort_alloc_entry(dat, req); |
110 | } |
111 | |
112 | static void nilfs_dat_commit_free(struct inode *dat, |
113 | struct nilfs_palloc_req *req) |
114 | { |
115 | struct nilfs_dat_entry *entry; |
116 | void *kaddr; |
117 | |
118 | kaddr = kmap_atomic(page: req->pr_entry_bh->b_page); |
119 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, |
120 | req->pr_entry_bh, kaddr); |
121 | entry->de_start = cpu_to_le64(NILFS_CNO_MIN); |
122 | entry->de_end = cpu_to_le64(NILFS_CNO_MIN); |
123 | entry->de_blocknr = cpu_to_le64(0); |
124 | kunmap_atomic(kaddr); |
125 | |
126 | nilfs_dat_commit_entry(dat, req); |
127 | |
128 | if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) { |
129 | nilfs_error(dat->i_sb, |
130 | "state inconsistency probably due to duplicate use of vblocknr = %llu" , |
131 | (unsigned long long)req->pr_entry_nr); |
132 | return; |
133 | } |
134 | nilfs_palloc_commit_free_entry(dat, req); |
135 | } |
136 | |
137 | int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) |
138 | { |
139 | return nilfs_dat_prepare_entry(dat, req, create: 0); |
140 | } |
141 | |
142 | void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, |
143 | sector_t blocknr) |
144 | { |
145 | struct nilfs_dat_entry *entry; |
146 | void *kaddr; |
147 | |
148 | kaddr = kmap_atomic(page: req->pr_entry_bh->b_page); |
149 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, |
150 | req->pr_entry_bh, kaddr); |
151 | entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); |
152 | entry->de_blocknr = cpu_to_le64(blocknr); |
153 | kunmap_atomic(kaddr); |
154 | |
155 | nilfs_dat_commit_entry(dat, req); |
156 | } |
157 | |
158 | int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) |
159 | { |
160 | struct nilfs_dat_entry *entry; |
161 | __u64 start; |
162 | sector_t blocknr; |
163 | void *kaddr; |
164 | int ret; |
165 | |
166 | ret = nilfs_dat_prepare_entry(dat, req, create: 0); |
167 | if (ret < 0) |
168 | return ret; |
169 | |
170 | kaddr = kmap_atomic(page: req->pr_entry_bh->b_page); |
171 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, |
172 | req->pr_entry_bh, kaddr); |
173 | start = le64_to_cpu(entry->de_start); |
174 | blocknr = le64_to_cpu(entry->de_blocknr); |
175 | kunmap_atomic(kaddr); |
176 | |
177 | if (blocknr == 0) { |
178 | ret = nilfs_palloc_prepare_free_entry(dat, req); |
179 | if (ret < 0) { |
180 | nilfs_dat_abort_entry(dat, req); |
181 | return ret; |
182 | } |
183 | } |
184 | if (unlikely(start > nilfs_mdt_cno(dat))) { |
185 | nilfs_err(dat->i_sb, |
186 | "vblocknr = %llu has abnormal lifetime: start cno (= %llu) > current cno (= %llu)" , |
187 | (unsigned long long)req->pr_entry_nr, |
188 | (unsigned long long)start, |
189 | (unsigned long long)nilfs_mdt_cno(dat)); |
190 | nilfs_dat_abort_entry(dat, req); |
191 | return -EINVAL; |
192 | } |
193 | |
194 | return 0; |
195 | } |
196 | |
197 | void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, |
198 | int dead) |
199 | { |
200 | struct nilfs_dat_entry *entry; |
201 | __u64 start, end; |
202 | sector_t blocknr; |
203 | void *kaddr; |
204 | |
205 | kaddr = kmap_atomic(page: req->pr_entry_bh->b_page); |
206 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, |
207 | req->pr_entry_bh, kaddr); |
208 | end = start = le64_to_cpu(entry->de_start); |
209 | if (!dead) { |
210 | end = nilfs_mdt_cno(inode: dat); |
211 | WARN_ON(start > end); |
212 | } |
213 | entry->de_end = cpu_to_le64(end); |
214 | blocknr = le64_to_cpu(entry->de_blocknr); |
215 | kunmap_atomic(kaddr); |
216 | |
217 | if (blocknr == 0) |
218 | nilfs_dat_commit_free(dat, req); |
219 | else |
220 | nilfs_dat_commit_entry(dat, req); |
221 | } |
222 | |
223 | void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req) |
224 | { |
225 | struct nilfs_dat_entry *entry; |
226 | __u64 start; |
227 | sector_t blocknr; |
228 | void *kaddr; |
229 | |
230 | kaddr = kmap_atomic(page: req->pr_entry_bh->b_page); |
231 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, |
232 | req->pr_entry_bh, kaddr); |
233 | start = le64_to_cpu(entry->de_start); |
234 | blocknr = le64_to_cpu(entry->de_blocknr); |
235 | kunmap_atomic(kaddr); |
236 | |
237 | if (start == nilfs_mdt_cno(inode: dat) && blocknr == 0) |
238 | nilfs_palloc_abort_free_entry(dat, req); |
239 | nilfs_dat_abort_entry(dat, req); |
240 | } |
241 | |
242 | int nilfs_dat_prepare_update(struct inode *dat, |
243 | struct nilfs_palloc_req *oldreq, |
244 | struct nilfs_palloc_req *newreq) |
245 | { |
246 | int ret; |
247 | |
248 | ret = nilfs_dat_prepare_end(dat, req: oldreq); |
249 | if (!ret) { |
250 | ret = nilfs_dat_prepare_alloc(dat, req: newreq); |
251 | if (ret < 0) |
252 | nilfs_dat_abort_end(dat, req: oldreq); |
253 | } |
254 | return ret; |
255 | } |
256 | |
257 | void nilfs_dat_commit_update(struct inode *dat, |
258 | struct nilfs_palloc_req *oldreq, |
259 | struct nilfs_palloc_req *newreq, int dead) |
260 | { |
261 | nilfs_dat_commit_end(dat, req: oldreq, dead); |
262 | nilfs_dat_commit_alloc(dat, req: newreq); |
263 | } |
264 | |
265 | void nilfs_dat_abort_update(struct inode *dat, |
266 | struct nilfs_palloc_req *oldreq, |
267 | struct nilfs_palloc_req *newreq) |
268 | { |
269 | nilfs_dat_abort_end(dat, req: oldreq); |
270 | nilfs_dat_abort_alloc(dat, req: newreq); |
271 | } |
272 | |
273 | /** |
274 | * nilfs_dat_mark_dirty - |
275 | * @dat: DAT file inode |
276 | * @vblocknr: virtual block number |
277 | * |
278 | * Description: |
279 | * |
280 | * Return Value: On success, 0 is returned. On error, one of the following |
281 | * negative error codes is returned. |
282 | * |
283 | * %-EIO - I/O error. |
284 | * |
285 | * %-ENOMEM - Insufficient amount of memory available. |
286 | */ |
287 | int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) |
288 | { |
289 | struct nilfs_palloc_req req; |
290 | int ret; |
291 | |
292 | req.pr_entry_nr = vblocknr; |
293 | ret = nilfs_dat_prepare_entry(dat, req: &req, create: 0); |
294 | if (ret == 0) |
295 | nilfs_dat_commit_entry(dat, req: &req); |
296 | return ret; |
297 | } |
298 | |
299 | /** |
300 | * nilfs_dat_freev - free virtual block numbers |
301 | * @dat: DAT file inode |
302 | * @vblocknrs: array of virtual block numbers |
303 | * @nitems: number of virtual block numbers |
304 | * |
305 | * Description: nilfs_dat_freev() frees the virtual block numbers specified by |
306 | * @vblocknrs and @nitems. |
307 | * |
308 | * Return Value: On success, 0 is returned. On error, one of the following |
309 | * negative error codes is returned. |
310 | * |
311 | * %-EIO - I/O error. |
312 | * |
313 | * %-ENOMEM - Insufficient amount of memory available. |
314 | * |
315 | * %-ENOENT - The virtual block number have not been allocated. |
316 | */ |
317 | int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems) |
318 | { |
319 | return nilfs_palloc_freev(dat, vblocknrs, nitems); |
320 | } |
321 | |
322 | /** |
323 | * nilfs_dat_move - change a block number |
324 | * @dat: DAT file inode |
325 | * @vblocknr: virtual block number |
326 | * @blocknr: block number |
327 | * |
328 | * Description: nilfs_dat_move() changes the block number associated with |
329 | * @vblocknr to @blocknr. |
330 | * |
331 | * Return Value: On success, 0 is returned. On error, one of the following |
332 | * negative error codes is returned. |
333 | * |
334 | * %-EIO - I/O error. |
335 | * |
336 | * %-ENOMEM - Insufficient amount of memory available. |
337 | */ |
338 | int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) |
339 | { |
340 | struct buffer_head *entry_bh; |
341 | struct nilfs_dat_entry *entry; |
342 | void *kaddr; |
343 | int ret; |
344 | |
345 | ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); |
346 | if (ret < 0) |
347 | return ret; |
348 | |
349 | /* |
350 | * The given disk block number (blocknr) is not yet written to |
351 | * the device at this point. |
352 | * |
353 | * To prevent nilfs_dat_translate() from returning the |
354 | * uncommitted block number, this makes a copy of the entry |
355 | * buffer and redirects nilfs_dat_translate() to the copy. |
356 | */ |
357 | if (!buffer_nilfs_redirected(bh: entry_bh)) { |
358 | ret = nilfs_mdt_freeze_buffer(inode: dat, bh: entry_bh); |
359 | if (ret) { |
360 | brelse(bh: entry_bh); |
361 | return ret; |
362 | } |
363 | } |
364 | |
365 | kaddr = kmap_atomic(page: entry_bh->b_page); |
366 | entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); |
367 | if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { |
368 | nilfs_crit(dat->i_sb, |
369 | "%s: invalid vblocknr = %llu, [%llu, %llu)" , |
370 | __func__, (unsigned long long)vblocknr, |
371 | (unsigned long long)le64_to_cpu(entry->de_start), |
372 | (unsigned long long)le64_to_cpu(entry->de_end)); |
373 | kunmap_atomic(kaddr); |
374 | brelse(bh: entry_bh); |
375 | return -EINVAL; |
376 | } |
377 | WARN_ON(blocknr == 0); |
378 | entry->de_blocknr = cpu_to_le64(blocknr); |
379 | kunmap_atomic(kaddr); |
380 | |
381 | mark_buffer_dirty(bh: entry_bh); |
382 | nilfs_mdt_mark_dirty(inode: dat); |
383 | |
384 | brelse(bh: entry_bh); |
385 | |
386 | return 0; |
387 | } |
388 | |
389 | /** |
390 | * nilfs_dat_translate - translate a virtual block number to a block number |
391 | * @dat: DAT file inode |
392 | * @vblocknr: virtual block number |
393 | * @blocknrp: pointer to a block number |
394 | * |
395 | * Description: nilfs_dat_translate() maps the virtual block number @vblocknr |
396 | * to the corresponding block number. |
397 | * |
398 | * Return Value: On success, 0 is returned and the block number associated |
399 | * with @vblocknr is stored in the place pointed by @blocknrp. On error, one |
400 | * of the following negative error codes is returned. |
401 | * |
402 | * %-EIO - I/O error. |
403 | * |
404 | * %-ENOMEM - Insufficient amount of memory available. |
405 | * |
406 | * %-ENOENT - A block number associated with @vblocknr does not exist. |
407 | */ |
408 | int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) |
409 | { |
410 | struct buffer_head *entry_bh, *bh; |
411 | struct nilfs_dat_entry *entry; |
412 | sector_t blocknr; |
413 | void *kaddr; |
414 | int ret; |
415 | |
416 | ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); |
417 | if (ret < 0) |
418 | return ret; |
419 | |
420 | if (!nilfs_doing_gc() && buffer_nilfs_redirected(bh: entry_bh)) { |
421 | bh = nilfs_mdt_get_frozen_buffer(inode: dat, bh: entry_bh); |
422 | if (bh) { |
423 | WARN_ON(!buffer_uptodate(bh)); |
424 | brelse(bh: entry_bh); |
425 | entry_bh = bh; |
426 | } |
427 | } |
428 | |
429 | kaddr = kmap_atomic(page: entry_bh->b_page); |
430 | entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); |
431 | blocknr = le64_to_cpu(entry->de_blocknr); |
432 | if (blocknr == 0) { |
433 | ret = -ENOENT; |
434 | goto out; |
435 | } |
436 | *blocknrp = blocknr; |
437 | |
438 | out: |
439 | kunmap_atomic(kaddr); |
440 | brelse(bh: entry_bh); |
441 | return ret; |
442 | } |
443 | |
444 | ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz, |
445 | size_t nvi) |
446 | { |
447 | struct buffer_head *entry_bh; |
448 | struct nilfs_dat_entry *entry; |
449 | struct nilfs_vinfo *vinfo = buf; |
450 | __u64 first, last; |
451 | void *kaddr; |
452 | unsigned long entries_per_block = NILFS_MDT(inode: dat)->mi_entries_per_block; |
453 | int i, j, n, ret; |
454 | |
455 | for (i = 0; i < nvi; i += n) { |
456 | ret = nilfs_palloc_get_entry_block(dat, vinfo->vi_vblocknr, |
457 | 0, &entry_bh); |
458 | if (ret < 0) |
459 | return ret; |
460 | kaddr = kmap_atomic(page: entry_bh->b_page); |
461 | /* last virtual block number in this block */ |
462 | first = vinfo->vi_vblocknr; |
463 | do_div(first, entries_per_block); |
464 | first *= entries_per_block; |
465 | last = first + entries_per_block - 1; |
466 | for (j = i, n = 0; |
467 | j < nvi && vinfo->vi_vblocknr >= first && |
468 | vinfo->vi_vblocknr <= last; |
469 | j++, n++, vinfo = (void *)vinfo + visz) { |
470 | entry = nilfs_palloc_block_get_entry( |
471 | dat, vinfo->vi_vblocknr, entry_bh, kaddr); |
472 | vinfo->vi_start = le64_to_cpu(entry->de_start); |
473 | vinfo->vi_end = le64_to_cpu(entry->de_end); |
474 | vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr); |
475 | } |
476 | kunmap_atomic(kaddr); |
477 | brelse(bh: entry_bh); |
478 | } |
479 | |
480 | return nvi; |
481 | } |
482 | |
483 | /** |
484 | * nilfs_dat_read - read or get dat inode |
485 | * @sb: super block instance |
486 | * @entry_size: size of a dat entry |
487 | * @raw_inode: on-disk dat inode |
488 | * @inodep: buffer to store the inode |
489 | */ |
490 | int nilfs_dat_read(struct super_block *sb, size_t entry_size, |
491 | struct nilfs_inode *raw_inode, struct inode **inodep) |
492 | { |
493 | static struct lock_class_key dat_lock_key; |
494 | struct inode *dat; |
495 | struct nilfs_dat_info *di; |
496 | int err; |
497 | |
498 | if (entry_size > sb->s_blocksize) { |
499 | nilfs_err(sb, "too large DAT entry size: %zu bytes" , |
500 | entry_size); |
501 | return -EINVAL; |
502 | } else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) { |
503 | nilfs_err(sb, "too small DAT entry size: %zu bytes" , |
504 | entry_size); |
505 | return -EINVAL; |
506 | } |
507 | |
508 | dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO); |
509 | if (unlikely(!dat)) |
510 | return -ENOMEM; |
511 | if (!(dat->i_state & I_NEW)) |
512 | goto out; |
513 | |
514 | err = nilfs_mdt_init(inode: dat, NILFS_MDT_GFP, objsz: sizeof(*di)); |
515 | if (err) |
516 | goto failed; |
517 | |
518 | err = nilfs_palloc_init_blockgroup(dat, entry_size); |
519 | if (err) |
520 | goto failed; |
521 | |
522 | di = NILFS_DAT_I(dat); |
523 | lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); |
524 | nilfs_palloc_setup_cache(inode: dat, cache: &di->palloc_cache); |
525 | err = nilfs_mdt_setup_shadow_map(inode: dat, shadow: &di->shadow); |
526 | if (err) |
527 | goto failed; |
528 | |
529 | err = nilfs_read_inode_common(dat, raw_inode); |
530 | if (err) |
531 | goto failed; |
532 | |
533 | unlock_new_inode(dat); |
534 | out: |
535 | *inodep = dat; |
536 | return 0; |
537 | failed: |
538 | iget_failed(dat); |
539 | return err; |
540 | } |
541 | |