1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * This file is part of UBIFS. |
4 | * |
5 | * Copyright (C) 2006-2008 Nokia Corporation. |
6 | * |
7 | * Author: Adrian Hunter |
8 | */ |
9 | |
10 | #include "ubifs.h" |
11 | |
12 | /* |
13 | * An orphan is an inode number whose inode node has been committed to the index |
14 | * with a link count of zero. That happens when an open file is deleted |
15 | * (unlinked) and then a commit is run. In the normal course of events the inode |
16 | * would be deleted when the file is closed. However in the case of an unclean |
17 | * unmount, orphans need to be accounted for. After an unclean unmount, the |
18 | * orphans' inodes must be deleted which means either scanning the entire index |
19 | * looking for them, or keeping a list on flash somewhere. This unit implements |
20 | * the latter approach. |
21 | * |
22 | * The orphan area is a fixed number of LEBs situated between the LPT area and |
23 | * the main area. The number of orphan area LEBs is specified when the file |
24 | * system is created. The minimum number is 1. The size of the orphan area |
25 | * should be so that it can hold the maximum number of orphans that are expected |
26 | * to ever exist at one time. |
27 | * |
28 | * The number of orphans that can fit in a LEB is: |
29 | * |
30 | * (c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64) |
31 | * |
32 | * For example: a 15872 byte LEB can fit 1980 orphans so 1 LEB may be enough. |
33 | * |
34 | * Orphans are accumulated in a rb-tree. When an inode's link count drops to |
35 | * zero, the inode number is added to the rb-tree. It is removed from the tree |
36 | * when the inode is deleted. Any new orphans that are in the orphan tree when |
37 | * the commit is run, are written to the orphan area in 1 or more orphan nodes. |
38 | * If the orphan area is full, it is consolidated to make space. There is |
39 | * always enough space because validation prevents the user from creating more |
40 | * than the maximum number of orphans allowed. |
41 | */ |
42 | |
43 | static int dbg_check_orphans(struct ubifs_info *c); |
44 | |
45 | static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum, |
46 | struct ubifs_orphan *parent_orphan) |
47 | { |
48 | struct ubifs_orphan *orphan, *o; |
49 | struct rb_node **p, *parent = NULL; |
50 | |
51 | orphan = kzalloc(size: sizeof(struct ubifs_orphan), GFP_NOFS); |
52 | if (!orphan) |
53 | return ERR_PTR(error: -ENOMEM); |
54 | orphan->inum = inum; |
55 | orphan->new = 1; |
56 | INIT_LIST_HEAD(list: &orphan->child_list); |
57 | |
58 | spin_lock(lock: &c->orphan_lock); |
59 | if (c->tot_orphans >= c->max_orphans) { |
60 | spin_unlock(lock: &c->orphan_lock); |
61 | kfree(objp: orphan); |
62 | return ERR_PTR(error: -ENFILE); |
63 | } |
64 | p = &c->orph_tree.rb_node; |
65 | while (*p) { |
66 | parent = *p; |
67 | o = rb_entry(parent, struct ubifs_orphan, rb); |
68 | if (inum < o->inum) |
69 | p = &(*p)->rb_left; |
70 | else if (inum > o->inum) |
71 | p = &(*p)->rb_right; |
72 | else { |
73 | ubifs_err(c, fmt: "orphaned twice" ); |
74 | spin_unlock(lock: &c->orphan_lock); |
75 | kfree(objp: orphan); |
76 | return ERR_PTR(error: -EINVAL); |
77 | } |
78 | } |
79 | c->tot_orphans += 1; |
80 | c->new_orphans += 1; |
81 | rb_link_node(node: &orphan->rb, parent, rb_link: p); |
82 | rb_insert_color(&orphan->rb, &c->orph_tree); |
83 | list_add_tail(new: &orphan->list, head: &c->orph_list); |
84 | list_add_tail(new: &orphan->new_list, head: &c->orph_new); |
85 | |
86 | if (parent_orphan) { |
87 | list_add_tail(new: &orphan->child_list, |
88 | head: &parent_orphan->child_list); |
89 | } |
90 | |
91 | spin_unlock(lock: &c->orphan_lock); |
92 | dbg_gen("ino %lu" , (unsigned long)inum); |
93 | return orphan; |
94 | } |
95 | |
96 | static struct ubifs_orphan *lookup_orphan(struct ubifs_info *c, ino_t inum) |
97 | { |
98 | struct ubifs_orphan *o; |
99 | struct rb_node *p; |
100 | |
101 | p = c->orph_tree.rb_node; |
102 | while (p) { |
103 | o = rb_entry(p, struct ubifs_orphan, rb); |
104 | if (inum < o->inum) |
105 | p = p->rb_left; |
106 | else if (inum > o->inum) |
107 | p = p->rb_right; |
108 | else { |
109 | return o; |
110 | } |
111 | } |
112 | return NULL; |
113 | } |
114 | |
115 | static void __orphan_drop(struct ubifs_info *c, struct ubifs_orphan *o) |
116 | { |
117 | rb_erase(&o->rb, &c->orph_tree); |
118 | list_del(entry: &o->list); |
119 | c->tot_orphans -= 1; |
120 | |
121 | if (o->new) { |
122 | list_del(entry: &o->new_list); |
123 | c->new_orphans -= 1; |
124 | } |
125 | |
126 | kfree(objp: o); |
127 | } |
128 | |
129 | static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph) |
130 | { |
131 | if (orph->del) { |
132 | dbg_gen("deleted twice ino %lu" , (unsigned long)orph->inum); |
133 | return; |
134 | } |
135 | |
136 | if (orph->cmt) { |
137 | orph->del = 1; |
138 | orph->dnext = c->orph_dnext; |
139 | c->orph_dnext = orph; |
140 | dbg_gen("delete later ino %lu" , (unsigned long)orph->inum); |
141 | return; |
142 | } |
143 | |
144 | __orphan_drop(c, o: orph); |
145 | } |
146 | |
147 | /** |
148 | * ubifs_add_orphan - add an orphan. |
149 | * @c: UBIFS file-system description object |
150 | * @inum: orphan inode number |
151 | * |
152 | * Add an orphan. This function is called when an inodes link count drops to |
153 | * zero. |
154 | */ |
155 | int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) |
156 | { |
157 | int err = 0; |
158 | ino_t xattr_inum; |
159 | union ubifs_key key; |
160 | struct ubifs_dent_node *xent, *pxent = NULL; |
161 | struct fscrypt_name nm = {0}; |
162 | struct ubifs_orphan *xattr_orphan; |
163 | struct ubifs_orphan *orphan; |
164 | |
165 | orphan = orphan_add(c, inum, NULL); |
166 | if (IS_ERR(ptr: orphan)) |
167 | return PTR_ERR(ptr: orphan); |
168 | |
169 | lowest_xent_key(c, key: &key, inum); |
170 | while (1) { |
171 | xent = ubifs_tnc_next_ent(c, key: &key, nm: &nm); |
172 | if (IS_ERR(ptr: xent)) { |
173 | err = PTR_ERR(ptr: xent); |
174 | if (err == -ENOENT) |
175 | break; |
176 | kfree(objp: pxent); |
177 | return err; |
178 | } |
179 | |
180 | fname_name(&nm) = xent->name; |
181 | fname_len(&nm) = le16_to_cpu(xent->nlen); |
182 | xattr_inum = le64_to_cpu(xent->inum); |
183 | |
184 | xattr_orphan = orphan_add(c, inum: xattr_inum, parent_orphan: orphan); |
185 | if (IS_ERR(ptr: xattr_orphan)) { |
186 | kfree(objp: pxent); |
187 | kfree(objp: xent); |
188 | return PTR_ERR(ptr: xattr_orphan); |
189 | } |
190 | |
191 | kfree(objp: pxent); |
192 | pxent = xent; |
193 | key_read(c, from: &xent->key, to: &key); |
194 | } |
195 | kfree(objp: pxent); |
196 | |
197 | return 0; |
198 | } |
199 | |
200 | /** |
201 | * ubifs_delete_orphan - delete an orphan. |
202 | * @c: UBIFS file-system description object |
203 | * @inum: orphan inode number |
204 | * |
205 | * Delete an orphan. This function is called when an inode is deleted. |
206 | */ |
207 | void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) |
208 | { |
209 | struct ubifs_orphan *orph, *child_orph, *tmp_o; |
210 | |
211 | spin_lock(lock: &c->orphan_lock); |
212 | |
213 | orph = lookup_orphan(c, inum); |
214 | if (!orph) { |
215 | spin_unlock(lock: &c->orphan_lock); |
216 | ubifs_err(c, fmt: "missing orphan ino %lu" , (unsigned long)inum); |
217 | dump_stack(); |
218 | |
219 | return; |
220 | } |
221 | |
222 | list_for_each_entry_safe(child_orph, tmp_o, &orph->child_list, child_list) { |
223 | list_del(entry: &child_orph->child_list); |
224 | orphan_delete(c, orph: child_orph); |
225 | } |
226 | |
227 | orphan_delete(c, orph); |
228 | |
229 | spin_unlock(lock: &c->orphan_lock); |
230 | } |
231 | |
232 | /** |
233 | * ubifs_orphan_start_commit - start commit of orphans. |
234 | * @c: UBIFS file-system description object |
235 | * |
236 | * Start commit of orphans. |
237 | */ |
238 | int ubifs_orphan_start_commit(struct ubifs_info *c) |
239 | { |
240 | struct ubifs_orphan *orphan, **last; |
241 | |
242 | spin_lock(lock: &c->orphan_lock); |
243 | last = &c->orph_cnext; |
244 | list_for_each_entry(orphan, &c->orph_new, new_list) { |
245 | ubifs_assert(c, orphan->new); |
246 | ubifs_assert(c, !orphan->cmt); |
247 | orphan->new = 0; |
248 | orphan->cmt = 1; |
249 | *last = orphan; |
250 | last = &orphan->cnext; |
251 | } |
252 | *last = NULL; |
253 | c->cmt_orphans = c->new_orphans; |
254 | c->new_orphans = 0; |
255 | dbg_cmt("%d orphans to commit" , c->cmt_orphans); |
256 | INIT_LIST_HEAD(list: &c->orph_new); |
257 | if (c->tot_orphans == 0) |
258 | c->no_orphs = 1; |
259 | else |
260 | c->no_orphs = 0; |
261 | spin_unlock(lock: &c->orphan_lock); |
262 | return 0; |
263 | } |
264 | |
265 | /** |
266 | * avail_orphs - calculate available space. |
267 | * @c: UBIFS file-system description object |
268 | * |
269 | * This function returns the number of orphans that can be written in the |
270 | * available space. |
271 | */ |
272 | static int avail_orphs(struct ubifs_info *c) |
273 | { |
274 | int avail_lebs, avail, gap; |
275 | |
276 | avail_lebs = c->orph_lebs - (c->ohead_lnum - c->orph_first) - 1; |
277 | avail = avail_lebs * |
278 | ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)); |
279 | gap = c->leb_size - c->ohead_offs; |
280 | if (gap >= UBIFS_ORPH_NODE_SZ + sizeof(__le64)) |
281 | avail += (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64); |
282 | return avail; |
283 | } |
284 | |
285 | /** |
286 | * tot_avail_orphs - calculate total space. |
287 | * @c: UBIFS file-system description object |
288 | * |
289 | * This function returns the number of orphans that can be written in half |
290 | * the total space. That leaves half the space for adding new orphans. |
291 | */ |
292 | static int tot_avail_orphs(struct ubifs_info *c) |
293 | { |
294 | int avail_lebs, avail; |
295 | |
296 | avail_lebs = c->orph_lebs; |
297 | avail = avail_lebs * |
298 | ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)); |
299 | return avail / 2; |
300 | } |
301 | |
302 | /** |
303 | * do_write_orph_node - write a node to the orphan head. |
304 | * @c: UBIFS file-system description object |
305 | * @len: length of node |
306 | * @atomic: write atomically |
307 | * |
308 | * This function writes a node to the orphan head from the orphan buffer. If |
309 | * %atomic is not zero, then the write is done atomically. On success, %0 is |
310 | * returned, otherwise a negative error code is returned. |
311 | */ |
312 | static int do_write_orph_node(struct ubifs_info *c, int len, int atomic) |
313 | { |
314 | int err = 0; |
315 | |
316 | if (atomic) { |
317 | ubifs_assert(c, c->ohead_offs == 0); |
318 | ubifs_prepare_node(c, buf: c->orph_buf, len, pad: 1); |
319 | len = ALIGN(len, c->min_io_size); |
320 | err = ubifs_leb_change(c, lnum: c->ohead_lnum, buf: c->orph_buf, len); |
321 | } else { |
322 | if (c->ohead_offs == 0) { |
323 | /* Ensure LEB has been unmapped */ |
324 | err = ubifs_leb_unmap(c, lnum: c->ohead_lnum); |
325 | if (err) |
326 | return err; |
327 | } |
328 | err = ubifs_write_node(c, node: c->orph_buf, len, lnum: c->ohead_lnum, |
329 | offs: c->ohead_offs); |
330 | } |
331 | return err; |
332 | } |
333 | |
334 | /** |
335 | * write_orph_node - write an orphan node. |
336 | * @c: UBIFS file-system description object |
337 | * @atomic: write atomically |
338 | * |
339 | * This function builds an orphan node from the cnext list and writes it to the |
340 | * orphan head. On success, %0 is returned, otherwise a negative error code |
341 | * is returned. |
342 | */ |
343 | static int write_orph_node(struct ubifs_info *c, int atomic) |
344 | { |
345 | struct ubifs_orphan *orphan, *cnext; |
346 | struct ubifs_orph_node *orph; |
347 | int gap, err, len, cnt, i; |
348 | |
349 | ubifs_assert(c, c->cmt_orphans > 0); |
350 | gap = c->leb_size - c->ohead_offs; |
351 | if (gap < UBIFS_ORPH_NODE_SZ + sizeof(__le64)) { |
352 | c->ohead_lnum += 1; |
353 | c->ohead_offs = 0; |
354 | gap = c->leb_size; |
355 | if (c->ohead_lnum > c->orph_last) { |
356 | /* |
357 | * We limit the number of orphans so that this should |
358 | * never happen. |
359 | */ |
360 | ubifs_err(c, fmt: "out of space in orphan area" ); |
361 | return -EINVAL; |
362 | } |
363 | } |
364 | cnt = (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64); |
365 | if (cnt > c->cmt_orphans) |
366 | cnt = c->cmt_orphans; |
367 | len = UBIFS_ORPH_NODE_SZ + cnt * sizeof(__le64); |
368 | ubifs_assert(c, c->orph_buf); |
369 | orph = c->orph_buf; |
370 | orph->ch.node_type = UBIFS_ORPH_NODE; |
371 | spin_lock(lock: &c->orphan_lock); |
372 | cnext = c->orph_cnext; |
373 | for (i = 0; i < cnt; i++) { |
374 | orphan = cnext; |
375 | ubifs_assert(c, orphan->cmt); |
376 | orph->inos[i] = cpu_to_le64(orphan->inum); |
377 | orphan->cmt = 0; |
378 | cnext = orphan->cnext; |
379 | orphan->cnext = NULL; |
380 | } |
381 | c->orph_cnext = cnext; |
382 | c->cmt_orphans -= cnt; |
383 | spin_unlock(lock: &c->orphan_lock); |
384 | if (c->cmt_orphans) |
385 | orph->cmt_no = cpu_to_le64(c->cmt_no); |
386 | else |
387 | /* Mark the last node of the commit */ |
388 | orph->cmt_no = cpu_to_le64((c->cmt_no) | (1ULL << 63)); |
389 | ubifs_assert(c, c->ohead_offs + len <= c->leb_size); |
390 | ubifs_assert(c, c->ohead_lnum >= c->orph_first); |
391 | ubifs_assert(c, c->ohead_lnum <= c->orph_last); |
392 | err = do_write_orph_node(c, len, atomic); |
393 | c->ohead_offs += ALIGN(len, c->min_io_size); |
394 | c->ohead_offs = ALIGN(c->ohead_offs, 8); |
395 | return err; |
396 | } |
397 | |
398 | /** |
399 | * write_orph_nodes - write orphan nodes until there are no more to commit. |
400 | * @c: UBIFS file-system description object |
401 | * @atomic: write atomically |
402 | * |
403 | * This function writes orphan nodes for all the orphans to commit. On success, |
404 | * %0 is returned, otherwise a negative error code is returned. |
405 | */ |
406 | static int write_orph_nodes(struct ubifs_info *c, int atomic) |
407 | { |
408 | int err; |
409 | |
410 | while (c->cmt_orphans > 0) { |
411 | err = write_orph_node(c, atomic); |
412 | if (err) |
413 | return err; |
414 | } |
415 | if (atomic) { |
416 | int lnum; |
417 | |
418 | /* Unmap any unused LEBs after consolidation */ |
419 | for (lnum = c->ohead_lnum + 1; lnum <= c->orph_last; lnum++) { |
420 | err = ubifs_leb_unmap(c, lnum); |
421 | if (err) |
422 | return err; |
423 | } |
424 | } |
425 | return 0; |
426 | } |
427 | |
428 | /** |
429 | * consolidate - consolidate the orphan area. |
430 | * @c: UBIFS file-system description object |
431 | * |
432 | * This function enables consolidation by putting all the orphans into the list |
433 | * to commit. The list is in the order that the orphans were added, and the |
434 | * LEBs are written atomically in order, so at no time can orphans be lost by |
435 | * an unclean unmount. |
436 | * |
437 | * This function returns %0 on success and a negative error code on failure. |
438 | */ |
439 | static int consolidate(struct ubifs_info *c) |
440 | { |
441 | int tot_avail = tot_avail_orphs(c), err = 0; |
442 | |
443 | spin_lock(lock: &c->orphan_lock); |
444 | dbg_cmt("there is space for %d orphans and there are %d" , |
445 | tot_avail, c->tot_orphans); |
446 | if (c->tot_orphans - c->new_orphans <= tot_avail) { |
447 | struct ubifs_orphan *orphan, **last; |
448 | int cnt = 0; |
449 | |
450 | /* Change the cnext list to include all non-new orphans */ |
451 | last = &c->orph_cnext; |
452 | list_for_each_entry(orphan, &c->orph_list, list) { |
453 | if (orphan->new) |
454 | continue; |
455 | orphan->cmt = 1; |
456 | *last = orphan; |
457 | last = &orphan->cnext; |
458 | cnt += 1; |
459 | } |
460 | *last = NULL; |
461 | ubifs_assert(c, cnt == c->tot_orphans - c->new_orphans); |
462 | c->cmt_orphans = cnt; |
463 | c->ohead_lnum = c->orph_first; |
464 | c->ohead_offs = 0; |
465 | } else { |
466 | /* |
467 | * We limit the number of orphans so that this should |
468 | * never happen. |
469 | */ |
470 | ubifs_err(c, fmt: "out of space in orphan area" ); |
471 | err = -EINVAL; |
472 | } |
473 | spin_unlock(lock: &c->orphan_lock); |
474 | return err; |
475 | } |
476 | |
477 | /** |
478 | * commit_orphans - commit orphans. |
479 | * @c: UBIFS file-system description object |
480 | * |
481 | * This function commits orphans to flash. On success, %0 is returned, |
482 | * otherwise a negative error code is returned. |
483 | */ |
484 | static int commit_orphans(struct ubifs_info *c) |
485 | { |
486 | int avail, atomic = 0, err; |
487 | |
488 | ubifs_assert(c, c->cmt_orphans > 0); |
489 | avail = avail_orphs(c); |
490 | if (avail < c->cmt_orphans) { |
491 | /* Not enough space to write new orphans, so consolidate */ |
492 | err = consolidate(c); |
493 | if (err) |
494 | return err; |
495 | atomic = 1; |
496 | } |
497 | err = write_orph_nodes(c, atomic); |
498 | return err; |
499 | } |
500 | |
501 | /** |
502 | * erase_deleted - erase the orphans marked for deletion. |
503 | * @c: UBIFS file-system description object |
504 | * |
505 | * During commit, the orphans being committed cannot be deleted, so they are |
506 | * marked for deletion and deleted by this function. Also, the recovery |
507 | * adds killed orphans to the deletion list, and therefore they are deleted |
508 | * here too. |
509 | */ |
510 | static void erase_deleted(struct ubifs_info *c) |
511 | { |
512 | struct ubifs_orphan *orphan, *dnext; |
513 | |
514 | spin_lock(lock: &c->orphan_lock); |
515 | dnext = c->orph_dnext; |
516 | while (dnext) { |
517 | orphan = dnext; |
518 | dnext = orphan->dnext; |
519 | ubifs_assert(c, !orphan->new); |
520 | ubifs_assert(c, orphan->del); |
521 | rb_erase(&orphan->rb, &c->orph_tree); |
522 | list_del(entry: &orphan->list); |
523 | c->tot_orphans -= 1; |
524 | dbg_gen("deleting orphan ino %lu" , (unsigned long)orphan->inum); |
525 | kfree(objp: orphan); |
526 | } |
527 | c->orph_dnext = NULL; |
528 | spin_unlock(lock: &c->orphan_lock); |
529 | } |
530 | |
531 | /** |
532 | * ubifs_orphan_end_commit - end commit of orphans. |
533 | * @c: UBIFS file-system description object |
534 | * |
535 | * End commit of orphans. |
536 | */ |
537 | int ubifs_orphan_end_commit(struct ubifs_info *c) |
538 | { |
539 | int err; |
540 | |
541 | if (c->cmt_orphans != 0) { |
542 | err = commit_orphans(c); |
543 | if (err) |
544 | return err; |
545 | } |
546 | erase_deleted(c); |
547 | err = dbg_check_orphans(c); |
548 | return err; |
549 | } |
550 | |
551 | /** |
552 | * ubifs_clear_orphans - erase all LEBs used for orphans. |
553 | * @c: UBIFS file-system description object |
554 | * |
555 | * If recovery is not required, then the orphans from the previous session |
556 | * are not needed. This function locates the LEBs used to record |
557 | * orphans, and un-maps them. |
558 | */ |
559 | int ubifs_clear_orphans(struct ubifs_info *c) |
560 | { |
561 | int lnum, err; |
562 | |
563 | for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { |
564 | err = ubifs_leb_unmap(c, lnum); |
565 | if (err) |
566 | return err; |
567 | } |
568 | c->ohead_lnum = c->orph_first; |
569 | c->ohead_offs = 0; |
570 | return 0; |
571 | } |
572 | |
573 | /** |
574 | * insert_dead_orphan - insert an orphan. |
575 | * @c: UBIFS file-system description object |
576 | * @inum: orphan inode number |
577 | * |
578 | * This function is a helper to the 'do_kill_orphans()' function. The orphan |
579 | * must be kept until the next commit, so it is added to the rb-tree and the |
580 | * deletion list. |
581 | */ |
582 | static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) |
583 | { |
584 | struct ubifs_orphan *orphan, *o; |
585 | struct rb_node **p, *parent = NULL; |
586 | |
587 | orphan = kzalloc(size: sizeof(struct ubifs_orphan), GFP_KERNEL); |
588 | if (!orphan) |
589 | return -ENOMEM; |
590 | orphan->inum = inum; |
591 | |
592 | p = &c->orph_tree.rb_node; |
593 | while (*p) { |
594 | parent = *p; |
595 | o = rb_entry(parent, struct ubifs_orphan, rb); |
596 | if (inum < o->inum) |
597 | p = &(*p)->rb_left; |
598 | else if (inum > o->inum) |
599 | p = &(*p)->rb_right; |
600 | else { |
601 | /* Already added - no problem */ |
602 | kfree(objp: orphan); |
603 | return 0; |
604 | } |
605 | } |
606 | c->tot_orphans += 1; |
607 | rb_link_node(node: &orphan->rb, parent, rb_link: p); |
608 | rb_insert_color(&orphan->rb, &c->orph_tree); |
609 | list_add_tail(new: &orphan->list, head: &c->orph_list); |
610 | orphan->del = 1; |
611 | orphan->dnext = c->orph_dnext; |
612 | c->orph_dnext = orphan; |
613 | dbg_mnt("ino %lu, new %d, tot %d" , (unsigned long)inum, |
614 | c->new_orphans, c->tot_orphans); |
615 | return 0; |
616 | } |
617 | |
618 | /** |
619 | * do_kill_orphans - remove orphan inodes from the index. |
620 | * @c: UBIFS file-system description object |
621 | * @sleb: scanned LEB |
622 | * @last_cmt_no: cmt_no of last orphan node read is passed and returned here |
623 | * @outofdate: whether the LEB is out of date is returned here |
624 | * @last_flagged: whether the end orphan node is encountered |
625 | * |
626 | * This function is a helper to the 'kill_orphans()' function. It goes through |
627 | * every orphan node in a LEB and for every inode number recorded, removes |
628 | * all keys for that inode from the TNC. |
629 | */ |
630 | static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, |
631 | unsigned long long *last_cmt_no, int *outofdate, |
632 | int *last_flagged) |
633 | { |
634 | struct ubifs_scan_node *snod; |
635 | struct ubifs_orph_node *orph; |
636 | struct ubifs_ino_node *ino = NULL; |
637 | unsigned long long cmt_no; |
638 | ino_t inum; |
639 | int i, n, err, first = 1; |
640 | |
641 | ino = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); |
642 | if (!ino) |
643 | return -ENOMEM; |
644 | |
645 | list_for_each_entry(snod, &sleb->nodes, list) { |
646 | if (snod->type != UBIFS_ORPH_NODE) { |
647 | ubifs_err(c, fmt: "invalid node type %d in orphan area at %d:%d" , |
648 | snod->type, sleb->lnum, snod->offs); |
649 | ubifs_dump_node(c, node: snod->node, |
650 | node_len: c->leb_size - snod->offs); |
651 | err = -EINVAL; |
652 | goto out_free; |
653 | } |
654 | |
655 | orph = snod->node; |
656 | |
657 | /* Check commit number */ |
658 | cmt_no = le64_to_cpu(orph->cmt_no) & LLONG_MAX; |
659 | /* |
660 | * The commit number on the master node may be less, because |
661 | * of a failed commit. If there are several failed commits in a |
662 | * row, the commit number written on orphan nodes will continue |
663 | * to increase (because the commit number is adjusted here) even |
664 | * though the commit number on the master node stays the same |
665 | * because the master node has not been re-written. |
666 | */ |
667 | if (cmt_no > c->cmt_no) |
668 | c->cmt_no = cmt_no; |
669 | if (cmt_no < *last_cmt_no && *last_flagged) { |
670 | /* |
671 | * The last orphan node had a higher commit number and |
672 | * was flagged as the last written for that commit |
673 | * number. That makes this orphan node, out of date. |
674 | */ |
675 | if (!first) { |
676 | ubifs_err(c, fmt: "out of order commit number %llu in orphan node at %d:%d" , |
677 | cmt_no, sleb->lnum, snod->offs); |
678 | ubifs_dump_node(c, node: snod->node, |
679 | node_len: c->leb_size - snod->offs); |
680 | err = -EINVAL; |
681 | goto out_free; |
682 | } |
683 | dbg_rcvry("out of date LEB %d" , sleb->lnum); |
684 | *outofdate = 1; |
685 | err = 0; |
686 | goto out_free; |
687 | } |
688 | |
689 | if (first) |
690 | first = 0; |
691 | |
692 | n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; |
693 | for (i = 0; i < n; i++) { |
694 | union ubifs_key key1, key2; |
695 | |
696 | inum = le64_to_cpu(orph->inos[i]); |
697 | |
698 | ino_key_init(c, key: &key1, inum); |
699 | err = ubifs_tnc_lookup(c, key: &key1, node: ino); |
700 | if (err && err != -ENOENT) |
701 | goto out_free; |
702 | |
703 | /* |
704 | * Check whether an inode can really get deleted. |
705 | * linkat() with O_TMPFILE allows rebirth of an inode. |
706 | */ |
707 | if (err == 0 && ino->nlink == 0) { |
708 | dbg_rcvry("deleting orphaned inode %lu" , |
709 | (unsigned long)inum); |
710 | |
711 | lowest_ino_key(c, key: &key1, inum); |
712 | highest_ino_key(c, key: &key2, inum); |
713 | |
714 | err = ubifs_tnc_remove_range(c, from_key: &key1, to_key: &key2); |
715 | if (err) |
716 | goto out_ro; |
717 | } |
718 | |
719 | err = insert_dead_orphan(c, inum); |
720 | if (err) |
721 | goto out_free; |
722 | } |
723 | |
724 | *last_cmt_no = cmt_no; |
725 | if (le64_to_cpu(orph->cmt_no) & (1ULL << 63)) { |
726 | dbg_rcvry("last orph node for commit %llu at %d:%d" , |
727 | cmt_no, sleb->lnum, snod->offs); |
728 | *last_flagged = 1; |
729 | } else |
730 | *last_flagged = 0; |
731 | } |
732 | |
733 | err = 0; |
734 | out_free: |
735 | kfree(objp: ino); |
736 | return err; |
737 | |
738 | out_ro: |
739 | ubifs_ro_mode(c, err); |
740 | kfree(objp: ino); |
741 | return err; |
742 | } |
743 | |
744 | /** |
745 | * kill_orphans - remove all orphan inodes from the index. |
746 | * @c: UBIFS file-system description object |
747 | * |
748 | * If recovery is required, then orphan inodes recorded during the previous |
749 | * session (which ended with an unclean unmount) must be deleted from the index. |
750 | * This is done by updating the TNC, but since the index is not updated until |
751 | * the next commit, the LEBs where the orphan information is recorded are not |
752 | * erased until the next commit. |
753 | */ |
754 | static int kill_orphans(struct ubifs_info *c) |
755 | { |
756 | unsigned long long last_cmt_no = 0; |
757 | int lnum, err = 0, outofdate = 0, last_flagged = 0; |
758 | |
759 | c->ohead_lnum = c->orph_first; |
760 | c->ohead_offs = 0; |
761 | /* Check no-orphans flag and skip this if no orphans */ |
762 | if (c->no_orphs) { |
763 | dbg_rcvry("no orphans" ); |
764 | return 0; |
765 | } |
766 | /* |
767 | * Orph nodes always start at c->orph_first and are written to each |
768 | * successive LEB in turn. Generally unused LEBs will have been unmapped |
769 | * but may contain out of date orphan nodes if the unmap didn't go |
770 | * through. In addition, the last orphan node written for each commit is |
771 | * marked (top bit of orph->cmt_no is set to 1). It is possible that |
772 | * there are orphan nodes from the next commit (i.e. the commit did not |
773 | * complete successfully). In that case, no orphans will have been lost |
774 | * due to the way that orphans are written, and any orphans added will |
775 | * be valid orphans anyway and so can be deleted. |
776 | */ |
777 | for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { |
778 | struct ubifs_scan_leb *sleb; |
779 | |
780 | dbg_rcvry("LEB %d" , lnum); |
781 | sleb = ubifs_scan(c, lnum, offs: 0, sbuf: c->sbuf, quiet: 1); |
782 | if (IS_ERR(ptr: sleb)) { |
783 | if (PTR_ERR(ptr: sleb) == -EUCLEAN) |
784 | sleb = ubifs_recover_leb(c, lnum, offs: 0, |
785 | sbuf: c->sbuf, jhead: -1); |
786 | if (IS_ERR(ptr: sleb)) { |
787 | err = PTR_ERR(ptr: sleb); |
788 | break; |
789 | } |
790 | } |
791 | err = do_kill_orphans(c, sleb, last_cmt_no: &last_cmt_no, outofdate: &outofdate, |
792 | last_flagged: &last_flagged); |
793 | if (err || outofdate) { |
794 | ubifs_scan_destroy(sleb); |
795 | break; |
796 | } |
797 | if (sleb->endpt) { |
798 | c->ohead_lnum = lnum; |
799 | c->ohead_offs = sleb->endpt; |
800 | } |
801 | ubifs_scan_destroy(sleb); |
802 | } |
803 | return err; |
804 | } |
805 | |
806 | /** |
807 | * ubifs_mount_orphans - delete orphan inodes and erase LEBs that recorded them. |
808 | * @c: UBIFS file-system description object |
809 | * @unclean: indicates recovery from unclean unmount |
810 | * @read_only: indicates read only mount |
811 | * |
812 | * This function is called when mounting to erase orphans from the previous |
813 | * session. If UBIFS was not unmounted cleanly, then the inodes recorded as |
814 | * orphans are deleted. |
815 | */ |
816 | int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only) |
817 | { |
818 | int err = 0; |
819 | |
820 | c->max_orphans = tot_avail_orphs(c); |
821 | |
822 | if (!read_only) { |
823 | c->orph_buf = vmalloc(size: c->leb_size); |
824 | if (!c->orph_buf) |
825 | return -ENOMEM; |
826 | } |
827 | |
828 | if (unclean) |
829 | err = kill_orphans(c); |
830 | else if (!read_only) |
831 | err = ubifs_clear_orphans(c); |
832 | |
833 | return err; |
834 | } |
835 | |
836 | /* |
837 | * Everything below is related to debugging. |
838 | */ |
839 | |
840 | struct check_orphan { |
841 | struct rb_node rb; |
842 | ino_t inum; |
843 | }; |
844 | |
845 | struct check_info { |
846 | unsigned long last_ino; |
847 | unsigned long tot_inos; |
848 | unsigned long missing; |
849 | unsigned long long leaf_cnt; |
850 | struct ubifs_ino_node *node; |
851 | struct rb_root root; |
852 | }; |
853 | |
854 | static bool dbg_find_orphan(struct ubifs_info *c, ino_t inum) |
855 | { |
856 | bool found = false; |
857 | |
858 | spin_lock(lock: &c->orphan_lock); |
859 | found = !!lookup_orphan(c, inum); |
860 | spin_unlock(lock: &c->orphan_lock); |
861 | |
862 | return found; |
863 | } |
864 | |
865 | static int dbg_ins_check_orphan(struct rb_root *root, ino_t inum) |
866 | { |
867 | struct check_orphan *orphan, *o; |
868 | struct rb_node **p, *parent = NULL; |
869 | |
870 | orphan = kzalloc(size: sizeof(struct check_orphan), GFP_NOFS); |
871 | if (!orphan) |
872 | return -ENOMEM; |
873 | orphan->inum = inum; |
874 | |
875 | p = &root->rb_node; |
876 | while (*p) { |
877 | parent = *p; |
878 | o = rb_entry(parent, struct check_orphan, rb); |
879 | if (inum < o->inum) |
880 | p = &(*p)->rb_left; |
881 | else if (inum > o->inum) |
882 | p = &(*p)->rb_right; |
883 | else { |
884 | kfree(objp: orphan); |
885 | return 0; |
886 | } |
887 | } |
888 | rb_link_node(node: &orphan->rb, parent, rb_link: p); |
889 | rb_insert_color(&orphan->rb, root); |
890 | return 0; |
891 | } |
892 | |
893 | static int dbg_find_check_orphan(struct rb_root *root, ino_t inum) |
894 | { |
895 | struct check_orphan *o; |
896 | struct rb_node *p; |
897 | |
898 | p = root->rb_node; |
899 | while (p) { |
900 | o = rb_entry(p, struct check_orphan, rb); |
901 | if (inum < o->inum) |
902 | p = p->rb_left; |
903 | else if (inum > o->inum) |
904 | p = p->rb_right; |
905 | else |
906 | return 1; |
907 | } |
908 | return 0; |
909 | } |
910 | |
911 | static void dbg_free_check_tree(struct rb_root *root) |
912 | { |
913 | struct check_orphan *o, *n; |
914 | |
915 | rbtree_postorder_for_each_entry_safe(o, n, root, rb) |
916 | kfree(objp: o); |
917 | } |
918 | |
919 | static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr, |
920 | void *priv) |
921 | { |
922 | struct check_info *ci = priv; |
923 | ino_t inum; |
924 | int err; |
925 | |
926 | inum = key_inum(c, k: &zbr->key); |
927 | if (inum != ci->last_ino) { |
928 | /* Lowest node type is the inode node, so it comes first */ |
929 | if (key_type(c, key: &zbr->key) != UBIFS_INO_KEY) |
930 | ubifs_err(c, fmt: "found orphan node ino %lu, type %d" , |
931 | (unsigned long)inum, key_type(c, key: &zbr->key)); |
932 | ci->last_ino = inum; |
933 | ci->tot_inos += 1; |
934 | err = ubifs_tnc_read_node(c, zbr, node: ci->node); |
935 | if (err) { |
936 | ubifs_err(c, fmt: "node read failed, error %d" , err); |
937 | return err; |
938 | } |
939 | if (ci->node->nlink == 0) |
940 | /* Must be recorded as an orphan */ |
941 | if (!dbg_find_check_orphan(root: &ci->root, inum) && |
942 | !dbg_find_orphan(c, inum)) { |
943 | ubifs_err(c, fmt: "missing orphan, ino %lu" , |
944 | (unsigned long)inum); |
945 | ci->missing += 1; |
946 | } |
947 | } |
948 | ci->leaf_cnt += 1; |
949 | return 0; |
950 | } |
951 | |
952 | static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb) |
953 | { |
954 | struct ubifs_scan_node *snod; |
955 | struct ubifs_orph_node *orph; |
956 | ino_t inum; |
957 | int i, n, err; |
958 | |
959 | list_for_each_entry(snod, &sleb->nodes, list) { |
960 | cond_resched(); |
961 | if (snod->type != UBIFS_ORPH_NODE) |
962 | continue; |
963 | orph = snod->node; |
964 | n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; |
965 | for (i = 0; i < n; i++) { |
966 | inum = le64_to_cpu(orph->inos[i]); |
967 | err = dbg_ins_check_orphan(root: &ci->root, inum); |
968 | if (err) |
969 | return err; |
970 | } |
971 | } |
972 | return 0; |
973 | } |
974 | |
975 | static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) |
976 | { |
977 | int lnum, err = 0; |
978 | void *buf; |
979 | |
980 | /* Check no-orphans flag and skip this if no orphans */ |
981 | if (c->no_orphs) |
982 | return 0; |
983 | |
984 | buf = __vmalloc(size: c->leb_size, GFP_NOFS); |
985 | if (!buf) { |
986 | ubifs_err(c, fmt: "cannot allocate memory to check orphans" ); |
987 | return 0; |
988 | } |
989 | |
990 | for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { |
991 | struct ubifs_scan_leb *sleb; |
992 | |
993 | sleb = ubifs_scan(c, lnum, offs: 0, sbuf: buf, quiet: 0); |
994 | if (IS_ERR(ptr: sleb)) { |
995 | err = PTR_ERR(ptr: sleb); |
996 | break; |
997 | } |
998 | |
999 | err = dbg_read_orphans(ci, sleb); |
1000 | ubifs_scan_destroy(sleb); |
1001 | if (err) |
1002 | break; |
1003 | } |
1004 | |
1005 | vfree(addr: buf); |
1006 | return err; |
1007 | } |
1008 | |
1009 | static int dbg_check_orphans(struct ubifs_info *c) |
1010 | { |
1011 | struct check_info ci; |
1012 | int err; |
1013 | |
1014 | if (!dbg_is_chk_orph(c)) |
1015 | return 0; |
1016 | |
1017 | ci.last_ino = 0; |
1018 | ci.tot_inos = 0; |
1019 | ci.missing = 0; |
1020 | ci.leaf_cnt = 0; |
1021 | ci.root = RB_ROOT; |
1022 | ci.node = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); |
1023 | if (!ci.node) { |
1024 | ubifs_err(c, fmt: "out of memory" ); |
1025 | return -ENOMEM; |
1026 | } |
1027 | |
1028 | err = dbg_scan_orphans(c, ci: &ci); |
1029 | if (err) |
1030 | goto out; |
1031 | |
1032 | err = dbg_walk_index(c, leaf_cb: &dbg_orphan_check, NULL, priv: &ci); |
1033 | if (err) { |
1034 | ubifs_err(c, fmt: "cannot scan TNC, error %d" , err); |
1035 | goto out; |
1036 | } |
1037 | |
1038 | if (ci.missing) { |
1039 | ubifs_err(c, fmt: "%lu missing orphan(s)" , ci.missing); |
1040 | err = -EINVAL; |
1041 | goto out; |
1042 | } |
1043 | |
1044 | dbg_cmt("last inode number is %lu" , ci.last_ino); |
1045 | dbg_cmt("total number of inodes is %lu" , ci.tot_inos); |
1046 | dbg_cmt("total number of leaf nodes is %llu" , ci.leaf_cnt); |
1047 | |
1048 | out: |
1049 | dbg_free_check_tree(root: &ci.root); |
1050 | kfree(objp: ci.node); |
1051 | return err; |
1052 | } |
1053 | |