1 | /* |
2 | * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README |
3 | */ |
4 | /* Reiserfs block (de)allocator, bitmap-based. */ |
5 | |
6 | #include <linux/time.h> |
7 | #include "reiserfs.h" |
8 | #include <linux/errno.h> |
9 | #include <linux/buffer_head.h> |
10 | #include <linux/kernel.h> |
11 | #include <linux/pagemap.h> |
12 | #include <linux/vmalloc.h> |
13 | #include <linux/quotaops.h> |
14 | #include <linux/seq_file.h> |
15 | |
16 | #define PREALLOCATION_SIZE 9 |
17 | |
18 | /* different reiserfs block allocator options */ |
19 | |
20 | #define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits) |
21 | |
22 | #define _ALLOC_concentrating_formatted_nodes 0 |
23 | #define _ALLOC_displacing_large_files 1 |
24 | #define _ALLOC_displacing_new_packing_localities 2 |
25 | #define _ALLOC_old_hashed_relocation 3 |
26 | #define _ALLOC_new_hashed_relocation 4 |
27 | #define _ALLOC_skip_busy 5 |
28 | #define _ALLOC_displace_based_on_dirid 6 |
29 | #define _ALLOC_hashed_formatted_nodes 7 |
30 | #define _ALLOC_old_way 8 |
31 | #define _ALLOC_hundredth_slices 9 |
32 | #define _ALLOC_dirid_groups 10 |
33 | #define _ALLOC_oid_groups 11 |
34 | #define _ALLOC_packing_groups 12 |
35 | |
36 | #define concentrating_formatted_nodes(s) test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s)) |
37 | #define displacing_large_files(s) test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s)) |
38 | #define displacing_new_packing_localities(s) test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s)) |
39 | |
40 | #define SET_OPTION(optname) \ |
41 | do { \ |
42 | reiserfs_info(s, "block allocator option \"%s\" is set", #optname); \ |
43 | set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \ |
44 | } while(0) |
45 | #define TEST_OPTION(optname, s) \ |
46 | test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)) |
47 | |
48 | static inline void get_bit_address(struct super_block *s, |
49 | b_blocknr_t block, |
50 | unsigned int *bmap_nr, |
51 | unsigned int *offset) |
52 | { |
53 | /* |
54 | * It is in the bitmap block number equal to the block |
55 | * number divided by the number of bits in a block. |
56 | */ |
57 | *bmap_nr = block >> (s->s_blocksize_bits + 3); |
58 | /* Within that bitmap block it is located at bit offset *offset. */ |
59 | *offset = block & ((s->s_blocksize << 3) - 1); |
60 | } |
61 | |
62 | int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value) |
63 | { |
64 | unsigned int bmap, offset; |
65 | unsigned int bmap_count = reiserfs_bmap_count(sb: s); |
66 | |
67 | if (block == 0 || block >= SB_BLOCK_COUNT(s)) { |
68 | reiserfs_error(s, "vs-4010" , |
69 | "block number is out of range %lu (%u)" , |
70 | block, SB_BLOCK_COUNT(s)); |
71 | return 0; |
72 | } |
73 | |
74 | get_bit_address(s, block, bmap_nr: &bmap, offset: &offset); |
75 | |
76 | /* |
77 | * Old format filesystem? Unlikely, but the bitmaps are all |
78 | * up front so we need to account for it. |
79 | */ |
80 | if (unlikely(test_bit(REISERFS_OLD_FORMAT, |
81 | &REISERFS_SB(s)->s_properties))) { |
82 | b_blocknr_t bmap1 = REISERFS_SB(sb: s)->s_sbh->b_blocknr + 1; |
83 | if (block >= bmap1 && |
84 | block <= bmap1 + bmap_count) { |
85 | reiserfs_error(s, "vs-4019" , "bitmap block %lu(%u) " |
86 | "can't be freed or reused" , |
87 | block, bmap_count); |
88 | return 0; |
89 | } |
90 | } else { |
91 | if (offset == 0) { |
92 | reiserfs_error(s, "vs-4020" , "bitmap block %lu(%u) " |
93 | "can't be freed or reused" , |
94 | block, bmap_count); |
95 | return 0; |
96 | } |
97 | } |
98 | |
99 | if (bmap >= bmap_count) { |
100 | reiserfs_error(s, "vs-4030" , "bitmap for requested block " |
101 | "is out of range: block=%lu, bitmap_nr=%u" , |
102 | block, bmap); |
103 | return 0; |
104 | } |
105 | |
106 | if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) { |
107 | reiserfs_error(s, "vs-4050" , "this is root block (%u), " |
108 | "it must be busy" , SB_ROOT_BLOCK(s)); |
109 | return 0; |
110 | } |
111 | |
112 | return 1; |
113 | } |
114 | |
115 | /* |
116 | * Searches in journal structures for a given block number (bmap, off). |
117 | * If block is found in reiserfs journal it suggests next free block |
118 | * candidate to test. |
119 | */ |
120 | static inline int is_block_in_journal(struct super_block *s, unsigned int bmap, |
121 | int off, int *next) |
122 | { |
123 | b_blocknr_t tmp; |
124 | |
125 | if (reiserfs_in_journal(sb: s, bmap_nr: bmap, bit_nr: off, searchall: 1, next: &tmp)) { |
126 | if (tmp) { /* hint supplied */ |
127 | *next = tmp; |
128 | PROC_INFO_INC(s, scan_bitmap.in_journal_hint); |
129 | } else { |
130 | (*next) = off + 1; /* inc offset to avoid looping. */ |
131 | PROC_INFO_INC(s, scan_bitmap.in_journal_nohint); |
132 | } |
133 | PROC_INFO_INC(s, scan_bitmap.retry); |
134 | return 1; |
135 | } |
136 | return 0; |
137 | } |
138 | |
139 | /* |
140 | * Searches for a window of zero bits with given minimum and maximum |
141 | * lengths in one bitmap block |
142 | */ |
143 | static int scan_bitmap_block(struct reiserfs_transaction_handle *th, |
144 | unsigned int bmap_n, int *beg, int boundary, |
145 | int min, int max, int unfm) |
146 | { |
147 | struct super_block *s = th->t_super; |
148 | struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n]; |
149 | struct buffer_head *bh; |
150 | int end, next; |
151 | int org = *beg; |
152 | |
153 | BUG_ON(!th->t_trans_id); |
154 | RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of " |
155 | "range (0..%u)" , bmap_n, reiserfs_bmap_count(s) - 1); |
156 | PROC_INFO_INC(s, scan_bitmap.bmap); |
157 | |
158 | if (!bi) { |
159 | reiserfs_error(s, "jdm-4055" , "NULL bitmap info pointer " |
160 | "for bitmap %d" , bmap_n); |
161 | return 0; |
162 | } |
163 | |
164 | bh = reiserfs_read_bitmap_block(sb: s, bitmap: bmap_n); |
165 | if (bh == NULL) |
166 | return 0; |
167 | |
168 | while (1) { |
169 | cont: |
170 | if (bi->free_count < min) { |
171 | brelse(bh); |
172 | return 0; /* No free blocks in this bitmap */ |
173 | } |
174 | |
175 | /* search for a first zero bit -- beginning of a window */ |
176 | *beg = reiserfs_find_next_zero_le_bit |
177 | (addr: (unsigned long *)(bh->b_data), size: boundary, offset: *beg); |
178 | |
179 | /* |
180 | * search for a zero bit fails or the rest of bitmap block |
181 | * cannot contain a zero window of minimum size |
182 | */ |
183 | if (*beg + min > boundary) { |
184 | brelse(bh); |
185 | return 0; |
186 | } |
187 | |
188 | if (unfm && is_block_in_journal(s, bmap: bmap_n, off: *beg, next: beg)) |
189 | continue; |
190 | /* first zero bit found; we check next bits */ |
191 | for (end = *beg + 1;; end++) { |
192 | if (end >= *beg + max || end >= boundary |
193 | || reiserfs_test_le_bit(nr: end, addr: bh->b_data)) { |
194 | next = end; |
195 | break; |
196 | } |
197 | |
198 | /* |
199 | * finding the other end of zero bit window requires |
200 | * looking into journal structures (in case of |
201 | * searching for free blocks for unformatted nodes) |
202 | */ |
203 | if (unfm && is_block_in_journal(s, bmap: bmap_n, off: end, next: &next)) |
204 | break; |
205 | } |
206 | |
207 | /* |
208 | * now (*beg) points to beginning of zero bits window, |
209 | * (end) points to one bit after the window end |
210 | */ |
211 | |
212 | /* found window of proper size */ |
213 | if (end - *beg >= min) { |
214 | int i; |
215 | reiserfs_prepare_for_journal(s, bh, wait: 1); |
216 | /* |
217 | * try to set all blocks used checking are |
218 | * they still free |
219 | */ |
220 | for (i = *beg; i < end; i++) { |
221 | /* Don't check in journal again. */ |
222 | if (reiserfs_test_and_set_le_bit |
223 | (nr: i, addr: bh->b_data)) { |
224 | /* |
225 | * bit was set by another process while |
226 | * we slept in prepare_for_journal() |
227 | */ |
228 | PROC_INFO_INC(s, scan_bitmap.stolen); |
229 | |
230 | /* |
231 | * we can continue with smaller set |
232 | * of allocated blocks, if length of |
233 | * this set is more or equal to `min' |
234 | */ |
235 | if (i >= *beg + min) { |
236 | end = i; |
237 | break; |
238 | } |
239 | |
240 | /* |
241 | * otherwise we clear all bit |
242 | * were set ... |
243 | */ |
244 | while (--i >= *beg) |
245 | reiserfs_clear_le_bit |
246 | (nr: i, addr: bh->b_data); |
247 | reiserfs_restore_prepared_buffer(s, bh); |
248 | *beg = org; |
249 | |
250 | /* |
251 | * Search again in current block |
252 | * from beginning |
253 | */ |
254 | goto cont; |
255 | } |
256 | } |
257 | bi->free_count -= (end - *beg); |
258 | journal_mark_dirty(th, bh); |
259 | brelse(bh); |
260 | |
261 | /* free block count calculation */ |
262 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), |
263 | wait: 1); |
264 | PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg)); |
265 | journal_mark_dirty(th, SB_BUFFER_WITH_SB(s)); |
266 | |
267 | return end - (*beg); |
268 | } else { |
269 | *beg = next; |
270 | } |
271 | } |
272 | } |
273 | |
274 | static int bmap_hash_id(struct super_block *s, u32 id) |
275 | { |
276 | char *hash_in = NULL; |
277 | unsigned long hash; |
278 | unsigned bm; |
279 | |
280 | if (id <= 2) { |
281 | bm = 1; |
282 | } else { |
283 | hash_in = (char *)(&id); |
284 | hash = keyed_hash(msg: hash_in, len: 4); |
285 | bm = hash % reiserfs_bmap_count(sb: s); |
286 | if (!bm) |
287 | bm = 1; |
288 | } |
289 | /* this can only be true when SB_BMAP_NR = 1 */ |
290 | if (bm >= reiserfs_bmap_count(sb: s)) |
291 | bm = 0; |
292 | return bm; |
293 | } |
294 | |
295 | /* |
296 | * hashes the id and then returns > 0 if the block group for the |
297 | * corresponding hash is full |
298 | */ |
299 | static inline int block_group_used(struct super_block *s, u32 id) |
300 | { |
301 | int bm = bmap_hash_id(s, id); |
302 | struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm]; |
303 | |
304 | /* |
305 | * If we don't have cached information on this bitmap block, we're |
306 | * going to have to load it later anyway. Loading it here allows us |
307 | * to make a better decision. This favors long-term performance gain |
308 | * with a better on-disk layout vs. a short term gain of skipping the |
309 | * read and potentially having a bad placement. |
310 | */ |
311 | if (info->free_count == UINT_MAX) { |
312 | struct buffer_head *bh = reiserfs_read_bitmap_block(sb: s, bitmap: bm); |
313 | brelse(bh); |
314 | } |
315 | |
316 | if (info->free_count > ((s->s_blocksize << 3) * 60 / 100)) { |
317 | return 0; |
318 | } |
319 | return 1; |
320 | } |
321 | |
322 | /* |
323 | * the packing is returned in disk byte order |
324 | */ |
325 | __le32 reiserfs_choose_packing(struct inode * dir) |
326 | { |
327 | __le32 packing; |
328 | if (TEST_OPTION(packing_groups, dir->i_sb)) { |
329 | u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id); |
330 | /* |
331 | * some versions of reiserfsck expect packing locality 1 to be |
332 | * special |
333 | */ |
334 | if (parent_dir == 1 || block_group_used(s: dir->i_sb, id: parent_dir)) |
335 | packing = INODE_PKEY(dir)->k_objectid; |
336 | else |
337 | packing = INODE_PKEY(dir)->k_dir_id; |
338 | } else |
339 | packing = INODE_PKEY(dir)->k_objectid; |
340 | return packing; |
341 | } |
342 | |
343 | /* |
344 | * Tries to find contiguous zero bit window (given size) in given region of |
345 | * bitmap and place new blocks there. Returns number of allocated blocks. |
346 | */ |
347 | static int scan_bitmap(struct reiserfs_transaction_handle *th, |
348 | b_blocknr_t * start, b_blocknr_t finish, |
349 | int min, int max, int unfm, sector_t file_block) |
350 | { |
351 | int nr_allocated = 0; |
352 | struct super_block *s = th->t_super; |
353 | unsigned int bm, off; |
354 | unsigned int end_bm, end_off; |
355 | unsigned int off_max = s->s_blocksize << 3; |
356 | |
357 | BUG_ON(!th->t_trans_id); |
358 | PROC_INFO_INC(s, scan_bitmap.call); |
359 | |
360 | /* No point in looking for more free blocks */ |
361 | if (SB_FREE_BLOCKS(s) <= 0) |
362 | return 0; |
363 | |
364 | get_bit_address(s, block: *start, bmap_nr: &bm, offset: &off); |
365 | get_bit_address(s, block: finish, bmap_nr: &end_bm, offset: &end_off); |
366 | if (bm > reiserfs_bmap_count(sb: s)) |
367 | return 0; |
368 | if (end_bm > reiserfs_bmap_count(sb: s)) |
369 | end_bm = reiserfs_bmap_count(sb: s); |
370 | |
371 | /* |
372 | * When the bitmap is more than 10% free, anyone can allocate. |
373 | * When it's less than 10% free, only files that already use the |
374 | * bitmap are allowed. Once we pass 80% full, this restriction |
375 | * is lifted. |
376 | * |
377 | * We do this so that files that grow later still have space close to |
378 | * their original allocation. This improves locality, and presumably |
379 | * performance as a result. |
380 | * |
381 | * This is only an allocation policy and does not make up for getting a |
382 | * bad hint. Decent hinting must be implemented for this to work well. |
383 | */ |
384 | if (TEST_OPTION(skip_busy, s) |
385 | && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) { |
386 | for (; bm < end_bm; bm++, off = 0) { |
387 | if ((off && (!unfm || (file_block != 0))) |
388 | || SB_AP_BITMAP(s)[bm].free_count > |
389 | (s->s_blocksize << 3) / 10) |
390 | nr_allocated = |
391 | scan_bitmap_block(th, bmap_n: bm, beg: &off, boundary: off_max, |
392 | min, max, unfm); |
393 | if (nr_allocated) |
394 | goto ret; |
395 | } |
396 | /* we know from above that start is a reasonable number */ |
397 | get_bit_address(s, block: *start, bmap_nr: &bm, offset: &off); |
398 | } |
399 | |
400 | for (; bm < end_bm; bm++, off = 0) { |
401 | nr_allocated = |
402 | scan_bitmap_block(th, bmap_n: bm, beg: &off, boundary: off_max, min, max, unfm); |
403 | if (nr_allocated) |
404 | goto ret; |
405 | } |
406 | |
407 | nr_allocated = |
408 | scan_bitmap_block(th, bmap_n: bm, beg: &off, boundary: end_off + 1, min, max, unfm); |
409 | |
410 | ret: |
411 | *start = bm * off_max + off; |
412 | return nr_allocated; |
413 | |
414 | } |
415 | |
416 | static void _reiserfs_free_block(struct reiserfs_transaction_handle *th, |
417 | struct inode *inode, b_blocknr_t block, |
418 | int for_unformatted) |
419 | { |
420 | struct super_block *s = th->t_super; |
421 | struct reiserfs_super_block *rs; |
422 | struct buffer_head *sbh, *bmbh; |
423 | struct reiserfs_bitmap_info *apbi; |
424 | unsigned int nr, offset; |
425 | |
426 | BUG_ON(!th->t_trans_id); |
427 | PROC_INFO_INC(s, free_block); |
428 | rs = SB_DISK_SUPER_BLOCK(s); |
429 | sbh = SB_BUFFER_WITH_SB(s); |
430 | apbi = SB_AP_BITMAP(s); |
431 | |
432 | get_bit_address(s, block, bmap_nr: &nr, offset: &offset); |
433 | |
434 | if (nr >= reiserfs_bmap_count(sb: s)) { |
435 | reiserfs_error(s, "vs-4075" , "block %lu is out of range" , |
436 | block); |
437 | return; |
438 | } |
439 | |
440 | bmbh = reiserfs_read_bitmap_block(sb: s, bitmap: nr); |
441 | if (!bmbh) |
442 | return; |
443 | |
444 | reiserfs_prepare_for_journal(s, bh: bmbh, wait: 1); |
445 | |
446 | /* clear bit for the given block in bit map */ |
447 | if (!reiserfs_test_and_clear_le_bit(nr: offset, addr: bmbh->b_data)) { |
448 | reiserfs_error(s, "vs-4080" , |
449 | "block %lu: bit already cleared" , block); |
450 | } |
451 | apbi[nr].free_count++; |
452 | journal_mark_dirty(th, bh: bmbh); |
453 | brelse(bh: bmbh); |
454 | |
455 | reiserfs_prepare_for_journal(s, bh: sbh, wait: 1); |
456 | /* update super block */ |
457 | set_sb_free_blocks(rs, sb_free_blocks(rs) + 1); |
458 | |
459 | journal_mark_dirty(th, bh: sbh); |
460 | if (for_unformatted) { |
461 | int depth = reiserfs_write_unlock_nested(s); |
462 | dquot_free_block_nodirty(inode, nr: 1); |
463 | reiserfs_write_lock_nested(s, depth); |
464 | } |
465 | } |
466 | |
467 | void reiserfs_free_block(struct reiserfs_transaction_handle *th, |
468 | struct inode *inode, b_blocknr_t block, |
469 | int for_unformatted) |
470 | { |
471 | struct super_block *s = th->t_super; |
472 | |
473 | BUG_ON(!th->t_trans_id); |
474 | RFALSE(!s, "vs-4061: trying to free block on nonexistent device" ); |
475 | if (!is_reusable(s, block, bit_value: 1)) |
476 | return; |
477 | |
478 | if (block > sb_block_count(REISERFS_SB(s)->s_rs)) { |
479 | reiserfs_error(th->t_super, "bitmap-4072" , |
480 | "Trying to free block outside file system " |
481 | "boundaries (%lu > %lu)" , |
482 | block, sb_block_count(REISERFS_SB(s)->s_rs)); |
483 | return; |
484 | } |
485 | /* mark it before we clear it, just in case */ |
486 | journal_mark_freed(th, s, blocknr: block); |
487 | _reiserfs_free_block(th, inode, block, for_unformatted); |
488 | } |
489 | |
490 | /* preallocated blocks don't need to be run through journal_mark_freed */ |
491 | static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th, |
492 | struct inode *inode, b_blocknr_t block) |
493 | { |
494 | BUG_ON(!th->t_trans_id); |
495 | RFALSE(!th->t_super, |
496 | "vs-4060: trying to free block on nonexistent device" ); |
497 | if (!is_reusable(s: th->t_super, block, bit_value: 1)) |
498 | return; |
499 | _reiserfs_free_block(th, inode, block, for_unformatted: 1); |
500 | } |
501 | |
502 | static void __discard_prealloc(struct reiserfs_transaction_handle *th, |
503 | struct reiserfs_inode_info *ei) |
504 | { |
505 | unsigned long save = ei->i_prealloc_block; |
506 | int dirty = 0; |
507 | struct inode *inode = &ei->vfs_inode; |
508 | |
509 | BUG_ON(!th->t_trans_id); |
510 | #ifdef CONFIG_REISERFS_CHECK |
511 | if (ei->i_prealloc_count < 0) |
512 | reiserfs_error(th->t_super, "zam-4001" , |
513 | "inode has negative prealloc blocks count." ); |
514 | #endif |
515 | while (ei->i_prealloc_count > 0) { |
516 | b_blocknr_t block_to_free; |
517 | |
518 | /* |
519 | * reiserfs_free_prealloc_block can drop the write lock, |
520 | * which could allow another caller to free the same block. |
521 | * We can protect against it by modifying the prealloc |
522 | * state before calling it. |
523 | */ |
524 | block_to_free = ei->i_prealloc_block++; |
525 | ei->i_prealloc_count--; |
526 | reiserfs_free_prealloc_block(th, inode, block: block_to_free); |
527 | dirty = 1; |
528 | } |
529 | if (dirty) |
530 | reiserfs_update_sd(th, inode); |
531 | ei->i_prealloc_block = save; |
532 | list_del_init(entry: &ei->i_prealloc_list); |
533 | } |
534 | |
535 | /* FIXME: It should be inline function */ |
536 | void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th, |
537 | struct inode *inode) |
538 | { |
539 | struct reiserfs_inode_info *ei = REISERFS_I(inode); |
540 | |
541 | BUG_ON(!th->t_trans_id); |
542 | if (ei->i_prealloc_count) |
543 | __discard_prealloc(th, ei); |
544 | } |
545 | |
546 | void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th) |
547 | { |
548 | struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list; |
549 | |
550 | BUG_ON(!th->t_trans_id); |
551 | while (!list_empty(head: plist)) { |
552 | struct reiserfs_inode_info *ei; |
553 | ei = list_entry(plist->next, struct reiserfs_inode_info, |
554 | i_prealloc_list); |
555 | #ifdef CONFIG_REISERFS_CHECK |
556 | if (!ei->i_prealloc_count) { |
557 | reiserfs_error(th->t_super, "zam-4001" , |
558 | "inode is in prealloc list but has " |
559 | "no preallocated blocks." ); |
560 | } |
561 | #endif |
562 | __discard_prealloc(th, ei); |
563 | } |
564 | } |
565 | |
566 | void reiserfs_init_alloc_options(struct super_block *s) |
567 | { |
568 | set_bit(_ALLOC_skip_busy, addr: &SB_ALLOC_OPTS(s)); |
569 | set_bit(_ALLOC_dirid_groups, addr: &SB_ALLOC_OPTS(s)); |
570 | set_bit(_ALLOC_packing_groups, addr: &SB_ALLOC_OPTS(s)); |
571 | } |
572 | |
573 | /* block allocator related options are parsed here */ |
574 | int reiserfs_parse_alloc_options(struct super_block *s, char *options) |
575 | { |
576 | char *this_char, *value; |
577 | |
578 | /* clear default settings */ |
579 | REISERFS_SB(sb: s)->s_alloc_options.bits = 0; |
580 | |
581 | while ((this_char = strsep(&options, ":" )) != NULL) { |
582 | if ((value = strchr(this_char, '=')) != NULL) |
583 | *value++ = 0; |
584 | |
585 | if (!strcmp(this_char, "concentrating_formatted_nodes" )) { |
586 | int temp; |
587 | SET_OPTION(concentrating_formatted_nodes); |
588 | temp = (value |
589 | && *value) ? simple_strtoul(value, &value, |
590 | 0) : 10; |
591 | if (temp <= 0 || temp > 100) { |
592 | REISERFS_SB(sb: s)->s_alloc_options.border = 10; |
593 | } else { |
594 | REISERFS_SB(sb: s)->s_alloc_options.border = |
595 | 100 / temp; |
596 | } |
597 | continue; |
598 | } |
599 | if (!strcmp(this_char, "displacing_large_files" )) { |
600 | SET_OPTION(displacing_large_files); |
601 | REISERFS_SB(sb: s)->s_alloc_options.large_file_size = |
602 | (value |
603 | && *value) ? simple_strtoul(value, &value, 0) : 16; |
604 | continue; |
605 | } |
606 | if (!strcmp(this_char, "displacing_new_packing_localities" )) { |
607 | SET_OPTION(displacing_new_packing_localities); |
608 | continue; |
609 | } |
610 | |
611 | if (!strcmp(this_char, "old_hashed_relocation" )) { |
612 | SET_OPTION(old_hashed_relocation); |
613 | continue; |
614 | } |
615 | |
616 | if (!strcmp(this_char, "new_hashed_relocation" )) { |
617 | SET_OPTION(new_hashed_relocation); |
618 | continue; |
619 | } |
620 | |
621 | if (!strcmp(this_char, "dirid_groups" )) { |
622 | SET_OPTION(dirid_groups); |
623 | continue; |
624 | } |
625 | if (!strcmp(this_char, "oid_groups" )) { |
626 | SET_OPTION(oid_groups); |
627 | continue; |
628 | } |
629 | if (!strcmp(this_char, "packing_groups" )) { |
630 | SET_OPTION(packing_groups); |
631 | continue; |
632 | } |
633 | if (!strcmp(this_char, "hashed_formatted_nodes" )) { |
634 | SET_OPTION(hashed_formatted_nodes); |
635 | continue; |
636 | } |
637 | |
638 | if (!strcmp(this_char, "skip_busy" )) { |
639 | SET_OPTION(skip_busy); |
640 | continue; |
641 | } |
642 | |
643 | if (!strcmp(this_char, "hundredth_slices" )) { |
644 | SET_OPTION(hundredth_slices); |
645 | continue; |
646 | } |
647 | |
648 | if (!strcmp(this_char, "old_way" )) { |
649 | SET_OPTION(old_way); |
650 | continue; |
651 | } |
652 | |
653 | if (!strcmp(this_char, "displace_based_on_dirid" )) { |
654 | SET_OPTION(displace_based_on_dirid); |
655 | continue; |
656 | } |
657 | |
658 | if (!strcmp(this_char, "preallocmin" )) { |
659 | REISERFS_SB(sb: s)->s_alloc_options.preallocmin = |
660 | (value |
661 | && *value) ? simple_strtoul(value, &value, 0) : 4; |
662 | continue; |
663 | } |
664 | |
665 | if (!strcmp(this_char, "preallocsize" )) { |
666 | REISERFS_SB(sb: s)->s_alloc_options.preallocsize = |
667 | (value |
668 | && *value) ? simple_strtoul(value, &value, |
669 | 0) : |
670 | PREALLOCATION_SIZE; |
671 | continue; |
672 | } |
673 | |
674 | reiserfs_warning(s, "zam-4001" , "unknown option - %s" , |
675 | this_char); |
676 | return 1; |
677 | } |
678 | |
679 | reiserfs_info(s, fmt: "allocator options = [%08x]\n" , SB_ALLOC_OPTS(s)); |
680 | return 0; |
681 | } |
682 | |
683 | static void print_sep(struct seq_file *seq, int *first) |
684 | { |
685 | if (!*first) |
686 | seq_puts(m: seq, s: ":" ); |
687 | else |
688 | *first = 0; |
689 | } |
690 | |
691 | void show_alloc_options(struct seq_file *seq, struct super_block *s) |
692 | { |
693 | int first = 1; |
694 | |
695 | if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) | |
696 | (1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups))) |
697 | return; |
698 | |
699 | seq_puts(m: seq, s: ",alloc=" ); |
700 | |
701 | if (TEST_OPTION(concentrating_formatted_nodes, s)) { |
702 | print_sep(seq, first: &first); |
703 | if (REISERFS_SB(sb: s)->s_alloc_options.border != 10) { |
704 | seq_printf(m: seq, fmt: "concentrating_formatted_nodes=%d" , |
705 | 100 / REISERFS_SB(sb: s)->s_alloc_options.border); |
706 | } else |
707 | seq_puts(m: seq, s: "concentrating_formatted_nodes" ); |
708 | } |
709 | if (TEST_OPTION(displacing_large_files, s)) { |
710 | print_sep(seq, first: &first); |
711 | if (REISERFS_SB(sb: s)->s_alloc_options.large_file_size != 16) { |
712 | seq_printf(m: seq, fmt: "displacing_large_files=%lu" , |
713 | REISERFS_SB(sb: s)->s_alloc_options.large_file_size); |
714 | } else |
715 | seq_puts(m: seq, s: "displacing_large_files" ); |
716 | } |
717 | if (TEST_OPTION(displacing_new_packing_localities, s)) { |
718 | print_sep(seq, first: &first); |
719 | seq_puts(m: seq, s: "displacing_new_packing_localities" ); |
720 | } |
721 | if (TEST_OPTION(old_hashed_relocation, s)) { |
722 | print_sep(seq, first: &first); |
723 | seq_puts(m: seq, s: "old_hashed_relocation" ); |
724 | } |
725 | if (TEST_OPTION(new_hashed_relocation, s)) { |
726 | print_sep(seq, first: &first); |
727 | seq_puts(m: seq, s: "new_hashed_relocation" ); |
728 | } |
729 | if (TEST_OPTION(dirid_groups, s)) { |
730 | print_sep(seq, first: &first); |
731 | seq_puts(m: seq, s: "dirid_groups" ); |
732 | } |
733 | if (TEST_OPTION(oid_groups, s)) { |
734 | print_sep(seq, first: &first); |
735 | seq_puts(m: seq, s: "oid_groups" ); |
736 | } |
737 | if (TEST_OPTION(packing_groups, s)) { |
738 | print_sep(seq, first: &first); |
739 | seq_puts(m: seq, s: "packing_groups" ); |
740 | } |
741 | if (TEST_OPTION(hashed_formatted_nodes, s)) { |
742 | print_sep(seq, first: &first); |
743 | seq_puts(m: seq, s: "hashed_formatted_nodes" ); |
744 | } |
745 | if (TEST_OPTION(skip_busy, s)) { |
746 | print_sep(seq, first: &first); |
747 | seq_puts(m: seq, s: "skip_busy" ); |
748 | } |
749 | if (TEST_OPTION(hundredth_slices, s)) { |
750 | print_sep(seq, first: &first); |
751 | seq_puts(m: seq, s: "hundredth_slices" ); |
752 | } |
753 | if (TEST_OPTION(old_way, s)) { |
754 | print_sep(seq, first: &first); |
755 | seq_puts(m: seq, s: "old_way" ); |
756 | } |
757 | if (TEST_OPTION(displace_based_on_dirid, s)) { |
758 | print_sep(seq, first: &first); |
759 | seq_puts(m: seq, s: "displace_based_on_dirid" ); |
760 | } |
761 | if (REISERFS_SB(sb: s)->s_alloc_options.preallocmin != 0) { |
762 | print_sep(seq, first: &first); |
763 | seq_printf(m: seq, fmt: "preallocmin=%d" , |
764 | REISERFS_SB(sb: s)->s_alloc_options.preallocmin); |
765 | } |
766 | if (REISERFS_SB(sb: s)->s_alloc_options.preallocsize != 17) { |
767 | print_sep(seq, first: &first); |
768 | seq_printf(m: seq, fmt: "preallocsize=%d" , |
769 | REISERFS_SB(sb: s)->s_alloc_options.preallocsize); |
770 | } |
771 | } |
772 | |
773 | static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint) |
774 | { |
775 | char *hash_in; |
776 | |
777 | if (hint->formatted_node) { |
778 | hash_in = (char *)&hint->key.k_dir_id; |
779 | } else { |
780 | if (!hint->inode) { |
781 | /*hint->search_start = hint->beg;*/ |
782 | hash_in = (char *)&hint->key.k_dir_id; |
783 | } else |
784 | if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) |
785 | hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id); |
786 | else |
787 | hash_in = |
788 | (char *)(&INODE_PKEY(hint->inode)->k_objectid); |
789 | } |
790 | |
791 | hint->search_start = |
792 | hint->beg + keyed_hash(msg: hash_in, len: 4) % (hint->end - hint->beg); |
793 | } |
794 | |
795 | /* |
796 | * Relocation based on dirid, hashing them into a given bitmap block |
797 | * files. Formatted nodes are unaffected, a separate policy covers them |
798 | */ |
799 | static void dirid_groups(reiserfs_blocknr_hint_t * hint) |
800 | { |
801 | unsigned long hash; |
802 | __u32 dirid = 0; |
803 | int bm = 0; |
804 | struct super_block *sb = hint->th->t_super; |
805 | |
806 | if (hint->inode) |
807 | dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); |
808 | else if (hint->formatted_node) |
809 | dirid = hint->key.k_dir_id; |
810 | |
811 | if (dirid) { |
812 | bm = bmap_hash_id(s: sb, id: dirid); |
813 | hash = bm * (sb->s_blocksize << 3); |
814 | /* give a portion of the block group to metadata */ |
815 | if (hint->inode) |
816 | hash += sb->s_blocksize / 2; |
817 | hint->search_start = hash; |
818 | } |
819 | } |
820 | |
821 | /* |
822 | * Relocation based on oid, hashing them into a given bitmap block |
823 | * files. Formatted nodes are unaffected, a separate policy covers them |
824 | */ |
825 | static void oid_groups(reiserfs_blocknr_hint_t * hint) |
826 | { |
827 | if (hint->inode) { |
828 | unsigned long hash; |
829 | __u32 oid; |
830 | __u32 dirid; |
831 | int bm; |
832 | |
833 | dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); |
834 | |
835 | /* |
836 | * keep the root dir and it's first set of subdirs close to |
837 | * the start of the disk |
838 | */ |
839 | if (dirid <= 2) |
840 | hash = (hint->inode->i_sb->s_blocksize << 3); |
841 | else { |
842 | oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid); |
843 | bm = bmap_hash_id(s: hint->inode->i_sb, id: oid); |
844 | hash = bm * (hint->inode->i_sb->s_blocksize << 3); |
845 | } |
846 | hint->search_start = hash; |
847 | } |
848 | } |
849 | |
850 | /* |
851 | * returns 1 if it finds an indirect item and gets valid hint info |
852 | * from it, otherwise 0 |
853 | */ |
854 | static int get_left_neighbor(reiserfs_blocknr_hint_t * hint) |
855 | { |
856 | struct treepath *path; |
857 | struct buffer_head *bh; |
858 | struct item_head *ih; |
859 | int pos_in_item; |
860 | __le32 *item; |
861 | int ret = 0; |
862 | |
863 | /* |
864 | * reiserfs code can call this function w/o pointer to path |
865 | * structure supplied; then we rely on supplied search_start |
866 | */ |
867 | if (!hint->path) |
868 | return 0; |
869 | |
870 | path = hint->path; |
871 | bh = get_last_bh(path); |
872 | RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor" ); |
873 | ih = tp_item_head(path); |
874 | pos_in_item = path->pos_in_item; |
875 | item = tp_item_body(path); |
876 | |
877 | hint->search_start = bh->b_blocknr; |
878 | |
879 | /* |
880 | * for indirect item: go to left and look for the first non-hole entry |
881 | * in the indirect item |
882 | */ |
883 | if (!hint->formatted_node && is_indirect_le_ih(ih)) { |
884 | if (pos_in_item == I_UNFM_NUM(ih)) |
885 | pos_in_item--; |
886 | while (pos_in_item >= 0) { |
887 | int t = get_block_num(item, pos_in_item); |
888 | if (t) { |
889 | hint->search_start = t; |
890 | ret = 1; |
891 | break; |
892 | } |
893 | pos_in_item--; |
894 | } |
895 | } |
896 | |
897 | /* does result value fit into specified region? */ |
898 | return ret; |
899 | } |
900 | |
901 | /* |
902 | * should be, if formatted node, then try to put on first part of the device |
903 | * specified as number of percent with mount option device, else try to put |
904 | * on last of device. This is not to say it is good code to do so, |
905 | * but the effect should be measured. |
906 | */ |
907 | static inline void set_border_in_hint(struct super_block *s, |
908 | reiserfs_blocknr_hint_t * hint) |
909 | { |
910 | b_blocknr_t border = |
911 | SB_BLOCK_COUNT(s) / REISERFS_SB(sb: s)->s_alloc_options.border; |
912 | |
913 | if (hint->formatted_node) |
914 | hint->end = border - 1; |
915 | else |
916 | hint->beg = border; |
917 | } |
918 | |
919 | static inline void displace_large_file(reiserfs_blocknr_hint_t * hint) |
920 | { |
921 | if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) |
922 | hint->search_start = |
923 | hint->beg + |
924 | keyed_hash(msg: (char *)(&INODE_PKEY(hint->inode)->k_dir_id), |
925 | len: 4) % (hint->end - hint->beg); |
926 | else |
927 | hint->search_start = |
928 | hint->beg + |
929 | keyed_hash(msg: (char *)(&INODE_PKEY(hint->inode)->k_objectid), |
930 | len: 4) % (hint->end - hint->beg); |
931 | } |
932 | |
933 | static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint) |
934 | { |
935 | char *hash_in; |
936 | |
937 | if (!hint->inode) |
938 | hash_in = (char *)&hint->key.k_dir_id; |
939 | else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) |
940 | hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id); |
941 | else |
942 | hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid); |
943 | |
944 | hint->search_start = |
945 | hint->beg + keyed_hash(msg: hash_in, len: 4) % (hint->end - hint->beg); |
946 | } |
947 | |
948 | static inline int |
949 | this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t * |
950 | hint) |
951 | { |
952 | return hint->block == |
953 | REISERFS_SB(sb: hint->th->t_super)->s_alloc_options.large_file_size; |
954 | } |
955 | |
956 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES |
957 | static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint) |
958 | { |
959 | struct in_core_key *key = &hint->key; |
960 | |
961 | hint->th->displace_new_blocks = 0; |
962 | hint->search_start = |
963 | hint->beg + keyed_hash(msg: (char *)(&key->k_objectid), |
964 | len: 4) % (hint->end - hint->beg); |
965 | } |
966 | #endif |
967 | |
968 | static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint) |
969 | { |
970 | b_blocknr_t border; |
971 | u32 hash_in; |
972 | |
973 | if (hint->formatted_node || hint->inode == NULL) { |
974 | return 0; |
975 | } |
976 | |
977 | hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id); |
978 | border = |
979 | hint->beg + (u32) keyed_hash(msg: ((char *)(&hash_in)), |
980 | len: 4) % (hint->end - hint->beg - 1); |
981 | if (border > hint->search_start) |
982 | hint->search_start = border; |
983 | |
984 | return 1; |
985 | } |
986 | |
987 | static inline int old_way(reiserfs_blocknr_hint_t * hint) |
988 | { |
989 | b_blocknr_t border; |
990 | |
991 | if (hint->formatted_node || hint->inode == NULL) { |
992 | return 0; |
993 | } |
994 | |
995 | border = |
996 | hint->beg + |
997 | le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end - |
998 | hint->beg); |
999 | if (border > hint->search_start) |
1000 | hint->search_start = border; |
1001 | |
1002 | return 1; |
1003 | } |
1004 | |
1005 | static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint) |
1006 | { |
1007 | struct in_core_key *key = &hint->key; |
1008 | b_blocknr_t slice_start; |
1009 | |
1010 | slice_start = |
1011 | (keyed_hash(msg: (char *)(&key->k_dir_id), len: 4) % 100) * (hint->end / 100); |
1012 | if (slice_start > hint->search_start |
1013 | || slice_start + (hint->end / 100) <= hint->search_start) { |
1014 | hint->search_start = slice_start; |
1015 | } |
1016 | } |
1017 | |
1018 | static void determine_search_start(reiserfs_blocknr_hint_t * hint, |
1019 | int amount_needed) |
1020 | { |
1021 | struct super_block *s = hint->th->t_super; |
1022 | int unfm_hint; |
1023 | |
1024 | hint->beg = 0; |
1025 | hint->end = SB_BLOCK_COUNT(s) - 1; |
1026 | |
1027 | /* This is former border algorithm. Now with tunable border offset */ |
1028 | if (concentrating_formatted_nodes(s)) |
1029 | set_border_in_hint(s, hint); |
1030 | |
1031 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES |
1032 | /* |
1033 | * whenever we create a new directory, we displace it. At first |
1034 | * we will hash for location, later we might look for a moderately |
1035 | * empty place for it |
1036 | */ |
1037 | if (displacing_new_packing_localities(s) |
1038 | && hint->th->displace_new_blocks) { |
1039 | displace_new_packing_locality(hint); |
1040 | |
1041 | /* |
1042 | * we do not continue determine_search_start, |
1043 | * if new packing locality is being displaced |
1044 | */ |
1045 | return; |
1046 | } |
1047 | #endif |
1048 | |
1049 | /* |
1050 | * all persons should feel encouraged to add more special cases |
1051 | * here and test them |
1052 | */ |
1053 | |
1054 | if (displacing_large_files(s) && !hint->formatted_node |
1055 | && this_blocknr_allocation_would_make_it_a_large_file(hint)) { |
1056 | displace_large_file(hint); |
1057 | return; |
1058 | } |
1059 | |
1060 | /* |
1061 | * if none of our special cases is relevant, use the left |
1062 | * neighbor in the tree order of the new node we are allocating for |
1063 | */ |
1064 | if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) { |
1065 | hash_formatted_node(hint); |
1066 | return; |
1067 | } |
1068 | |
1069 | unfm_hint = get_left_neighbor(hint); |
1070 | |
1071 | /* |
1072 | * Mimic old block allocator behaviour, that is if VFS allowed for |
1073 | * preallocation, new blocks are displaced based on directory ID. |
1074 | * Also, if suggested search_start is less than last preallocated |
1075 | * block, we start searching from it, assuming that HDD dataflow |
1076 | * is faster in forward direction |
1077 | */ |
1078 | if (TEST_OPTION(old_way, s)) { |
1079 | if (!hint->formatted_node) { |
1080 | if (!reiserfs_hashed_relocation(s)) |
1081 | old_way(hint); |
1082 | else if (!reiserfs_no_unhashed_relocation(s)) |
1083 | old_hashed_relocation(hint); |
1084 | |
1085 | if (hint->inode |
1086 | && hint->search_start < |
1087 | REISERFS_I(inode: hint->inode)->i_prealloc_block) |
1088 | hint->search_start = |
1089 | REISERFS_I(inode: hint->inode)->i_prealloc_block; |
1090 | } |
1091 | return; |
1092 | } |
1093 | |
1094 | /* This is an approach proposed by Hans */ |
1095 | if (TEST_OPTION(hundredth_slices, s) |
1096 | && !(displacing_large_files(s) && !hint->formatted_node)) { |
1097 | hundredth_slices(hint); |
1098 | return; |
1099 | } |
1100 | |
1101 | /* old_hashed_relocation only works on unformatted */ |
1102 | if (!unfm_hint && !hint->formatted_node && |
1103 | TEST_OPTION(old_hashed_relocation, s)) { |
1104 | old_hashed_relocation(hint); |
1105 | } |
1106 | |
1107 | /* new_hashed_relocation works with both formatted/unformatted nodes */ |
1108 | if ((!unfm_hint || hint->formatted_node) && |
1109 | TEST_OPTION(new_hashed_relocation, s)) { |
1110 | new_hashed_relocation(hint); |
1111 | } |
1112 | |
1113 | /* dirid grouping works only on unformatted nodes */ |
1114 | if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) { |
1115 | dirid_groups(hint); |
1116 | } |
1117 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES |
1118 | if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) { |
1119 | dirid_groups(hint); |
1120 | } |
1121 | #endif |
1122 | |
1123 | /* oid grouping works only on unformatted nodes */ |
1124 | if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) { |
1125 | oid_groups(hint); |
1126 | } |
1127 | return; |
1128 | } |
1129 | |
1130 | static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint) |
1131 | { |
1132 | /* make minimum size a mount option and benchmark both ways */ |
1133 | /* we preallocate blocks only for regular files, specific size */ |
1134 | /* benchmark preallocating always and see what happens */ |
1135 | |
1136 | hint->prealloc_size = 0; |
1137 | |
1138 | if (!hint->formatted_node && hint->preallocate) { |
1139 | if (S_ISREG(hint->inode->i_mode) && !IS_PRIVATE(hint->inode) |
1140 | && hint->inode->i_size >= |
1141 | REISERFS_SB(sb: hint->th->t_super)->s_alloc_options. |
1142 | preallocmin * hint->inode->i_sb->s_blocksize) |
1143 | hint->prealloc_size = |
1144 | REISERFS_SB(sb: hint->th->t_super)->s_alloc_options. |
1145 | preallocsize - 1; |
1146 | } |
1147 | return CARRY_ON; |
1148 | } |
1149 | |
1150 | static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint, |
1151 | b_blocknr_t * new_blocknrs, |
1152 | b_blocknr_t start, |
1153 | b_blocknr_t finish, int min, |
1154 | int amount_needed, |
1155 | int prealloc_size) |
1156 | { |
1157 | int rest = amount_needed; |
1158 | int nr_allocated; |
1159 | |
1160 | while (rest > 0 && start <= finish) { |
1161 | nr_allocated = scan_bitmap(th: hint->th, start: &start, finish, min, |
1162 | max: rest + prealloc_size, |
1163 | unfm: !hint->formatted_node, file_block: hint->block); |
1164 | |
1165 | if (nr_allocated == 0) /* no new blocks allocated, return */ |
1166 | break; |
1167 | |
1168 | /* fill free_blocknrs array first */ |
1169 | while (rest > 0 && nr_allocated > 0) { |
1170 | *new_blocknrs++ = start++; |
1171 | rest--; |
1172 | nr_allocated--; |
1173 | } |
1174 | |
1175 | /* do we have something to fill prealloc. array also ? */ |
1176 | if (nr_allocated > 0) { |
1177 | /* |
1178 | * it means prealloc_size was greater that 0 and |
1179 | * we do preallocation |
1180 | */ |
1181 | list_add(new: &REISERFS_I(inode: hint->inode)->i_prealloc_list, |
1182 | head: &SB_JOURNAL(hint->th->t_super)-> |
1183 | j_prealloc_list); |
1184 | REISERFS_I(inode: hint->inode)->i_prealloc_block = start; |
1185 | REISERFS_I(inode: hint->inode)->i_prealloc_count = |
1186 | nr_allocated; |
1187 | break; |
1188 | } |
1189 | } |
1190 | |
1191 | return (amount_needed - rest); |
1192 | } |
1193 | |
1194 | static inline int blocknrs_and_prealloc_arrays_from_search_start |
1195 | (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, |
1196 | int amount_needed) { |
1197 | struct super_block *s = hint->th->t_super; |
1198 | b_blocknr_t start = hint->search_start; |
1199 | b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1; |
1200 | int passno = 0; |
1201 | int nr_allocated = 0; |
1202 | int depth; |
1203 | |
1204 | determine_prealloc_size(hint); |
1205 | if (!hint->formatted_node) { |
1206 | int quota_ret; |
1207 | #ifdef REISERQUOTA_DEBUG |
1208 | reiserfs_debug(s, REISERFS_DEBUG_CODE, |
1209 | "reiserquota: allocating %d blocks id=%u" , |
1210 | amount_needed, hint->inode->i_uid); |
1211 | #endif |
1212 | depth = reiserfs_write_unlock_nested(s); |
1213 | quota_ret = |
1214 | dquot_alloc_block_nodirty(inode: hint->inode, nr: amount_needed); |
1215 | if (quota_ret) { /* Quota exceeded? */ |
1216 | reiserfs_write_lock_nested(s, depth); |
1217 | return QUOTA_EXCEEDED; |
1218 | } |
1219 | if (hint->preallocate && hint->prealloc_size) { |
1220 | #ifdef REISERQUOTA_DEBUG |
1221 | reiserfs_debug(s, REISERFS_DEBUG_CODE, |
1222 | "reiserquota: allocating (prealloc) %d blocks id=%u" , |
1223 | hint->prealloc_size, hint->inode->i_uid); |
1224 | #endif |
1225 | quota_ret = dquot_prealloc_block_nodirty(inode: hint->inode, |
1226 | nr: hint->prealloc_size); |
1227 | if (quota_ret) |
1228 | hint->preallocate = hint->prealloc_size = 0; |
1229 | } |
1230 | /* for unformatted nodes, force large allocations */ |
1231 | reiserfs_write_lock_nested(s, depth); |
1232 | } |
1233 | |
1234 | do { |
1235 | switch (passno++) { |
1236 | case 0: /* Search from hint->search_start to end of disk */ |
1237 | start = hint->search_start; |
1238 | finish = SB_BLOCK_COUNT(s) - 1; |
1239 | break; |
1240 | case 1: /* Search from hint->beg to hint->search_start */ |
1241 | start = hint->beg; |
1242 | finish = hint->search_start; |
1243 | break; |
1244 | case 2: /* Last chance: Search from 0 to hint->beg */ |
1245 | start = 0; |
1246 | finish = hint->beg; |
1247 | break; |
1248 | default: |
1249 | /* We've tried searching everywhere, not enough space */ |
1250 | /* Free the blocks */ |
1251 | if (!hint->formatted_node) { |
1252 | #ifdef REISERQUOTA_DEBUG |
1253 | reiserfs_debug(s, REISERFS_DEBUG_CODE, |
1254 | "reiserquota: freeing (nospace) %d blocks id=%u" , |
1255 | amount_needed + |
1256 | hint->prealloc_size - |
1257 | nr_allocated, |
1258 | hint->inode->i_uid); |
1259 | #endif |
1260 | /* Free not allocated blocks */ |
1261 | depth = reiserfs_write_unlock_nested(s); |
1262 | dquot_free_block_nodirty(inode: hint->inode, |
1263 | nr: amount_needed + hint->prealloc_size - |
1264 | nr_allocated); |
1265 | reiserfs_write_lock_nested(s, depth); |
1266 | } |
1267 | while (nr_allocated--) |
1268 | reiserfs_free_block(th: hint->th, inode: hint->inode, |
1269 | block: new_blocknrs[nr_allocated], |
1270 | for_unformatted: !hint->formatted_node); |
1271 | |
1272 | return NO_DISK_SPACE; |
1273 | } |
1274 | } while ((nr_allocated += allocate_without_wrapping_disk(hint, |
1275 | new_blocknrs: new_blocknrs + |
1276 | nr_allocated, |
1277 | start, finish, |
1278 | min: 1, |
1279 | amount_needed: amount_needed - |
1280 | nr_allocated, |
1281 | prealloc_size: hint-> |
1282 | prealloc_size)) |
1283 | < amount_needed); |
1284 | if (!hint->formatted_node && |
1285 | amount_needed + hint->prealloc_size > |
1286 | nr_allocated + REISERFS_I(inode: hint->inode)->i_prealloc_count) { |
1287 | /* Some of preallocation blocks were not allocated */ |
1288 | #ifdef REISERQUOTA_DEBUG |
1289 | reiserfs_debug(s, REISERFS_DEBUG_CODE, |
1290 | "reiserquota: freeing (failed prealloc) %d blocks id=%u" , |
1291 | amount_needed + hint->prealloc_size - |
1292 | nr_allocated - |
1293 | REISERFS_I(hint->inode)->i_prealloc_count, |
1294 | hint->inode->i_uid); |
1295 | #endif |
1296 | |
1297 | depth = reiserfs_write_unlock_nested(s); |
1298 | dquot_free_block_nodirty(inode: hint->inode, nr: amount_needed + |
1299 | hint->prealloc_size - nr_allocated - |
1300 | REISERFS_I(inode: hint->inode)-> |
1301 | i_prealloc_count); |
1302 | reiserfs_write_lock_nested(s, depth); |
1303 | } |
1304 | |
1305 | return CARRY_ON; |
1306 | } |
1307 | |
1308 | /* grab new blocknrs from preallocated list */ |
1309 | /* return amount still needed after using them */ |
1310 | static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint, |
1311 | b_blocknr_t * new_blocknrs, |
1312 | int amount_needed) |
1313 | { |
1314 | struct inode *inode = hint->inode; |
1315 | |
1316 | if (REISERFS_I(inode)->i_prealloc_count > 0) { |
1317 | while (amount_needed) { |
1318 | |
1319 | *new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++; |
1320 | REISERFS_I(inode)->i_prealloc_count--; |
1321 | |
1322 | amount_needed--; |
1323 | |
1324 | if (REISERFS_I(inode)->i_prealloc_count <= 0) { |
1325 | list_del(entry: &REISERFS_I(inode)->i_prealloc_list); |
1326 | break; |
1327 | } |
1328 | } |
1329 | } |
1330 | /* return amount still needed after using preallocated blocks */ |
1331 | return amount_needed; |
1332 | } |
1333 | |
1334 | int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint, |
1335 | b_blocknr_t *new_blocknrs, |
1336 | int amount_needed, |
1337 | /* Amount of blocks we have already reserved */ |
1338 | int reserved_by_us) |
1339 | { |
1340 | int initial_amount_needed = amount_needed; |
1341 | int ret; |
1342 | struct super_block *s = hint->th->t_super; |
1343 | |
1344 | /* Check if there is enough space, taking into account reserved space */ |
1345 | if (SB_FREE_BLOCKS(s) - REISERFS_SB(sb: s)->reserved_blocks < |
1346 | amount_needed - reserved_by_us) |
1347 | return NO_DISK_SPACE; |
1348 | /* should this be if !hint->inode && hint->preallocate? */ |
1349 | /* do you mean hint->formatted_node can be removed ? - Zam */ |
1350 | /* |
1351 | * hint->formatted_node cannot be removed because we try to access |
1352 | * inode information here, and there is often no inode associated with |
1353 | * metadata allocations - green |
1354 | */ |
1355 | |
1356 | if (!hint->formatted_node && hint->preallocate) { |
1357 | amount_needed = use_preallocated_list_if_available |
1358 | (hint, new_blocknrs, amount_needed); |
1359 | |
1360 | /* |
1361 | * We have all the block numbers we need from the |
1362 | * prealloc list |
1363 | */ |
1364 | if (amount_needed == 0) |
1365 | return CARRY_ON; |
1366 | new_blocknrs += (initial_amount_needed - amount_needed); |
1367 | } |
1368 | |
1369 | /* find search start and save it in hint structure */ |
1370 | determine_search_start(hint, amount_needed); |
1371 | if (hint->search_start >= SB_BLOCK_COUNT(s)) |
1372 | hint->search_start = SB_BLOCK_COUNT(s) - 1; |
1373 | |
1374 | /* allocation itself; fill new_blocknrs and preallocation arrays */ |
1375 | ret = blocknrs_and_prealloc_arrays_from_search_start |
1376 | (hint, new_blocknrs, amount_needed); |
1377 | |
1378 | /* |
1379 | * We used prealloc. list to fill (partially) new_blocknrs array. |
1380 | * If final allocation fails we need to return blocks back to |
1381 | * prealloc. list or just free them. -- Zam (I chose second |
1382 | * variant) |
1383 | */ |
1384 | if (ret != CARRY_ON) { |
1385 | while (amount_needed++ < initial_amount_needed) { |
1386 | reiserfs_free_block(th: hint->th, inode: hint->inode, |
1387 | block: *(--new_blocknrs), for_unformatted: 1); |
1388 | } |
1389 | } |
1390 | return ret; |
1391 | } |
1392 | |
1393 | void reiserfs_cache_bitmap_metadata(struct super_block *sb, |
1394 | struct buffer_head *bh, |
1395 | struct reiserfs_bitmap_info *info) |
1396 | { |
1397 | unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size); |
1398 | |
1399 | /* The first bit must ALWAYS be 1 */ |
1400 | if (!reiserfs_test_le_bit(nr: 0, addr: (unsigned long *)bh->b_data)) |
1401 | reiserfs_error(sb, "reiserfs-2025" , "bitmap block %lu is " |
1402 | "corrupted: first bit must be 1" , bh->b_blocknr); |
1403 | |
1404 | info->free_count = 0; |
1405 | |
1406 | while (--cur >= (unsigned long *)bh->b_data) { |
1407 | /* 0 and ~0 are special, we can optimize for them */ |
1408 | if (*cur == 0) |
1409 | info->free_count += BITS_PER_LONG; |
1410 | else if (*cur != ~0L) /* A mix, investigate */ |
1411 | info->free_count += BITS_PER_LONG - hweight_long(w: *cur); |
1412 | } |
1413 | } |
1414 | |
1415 | struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, |
1416 | unsigned int bitmap) |
1417 | { |
1418 | b_blocknr_t block = (sb->s_blocksize << 3) * bitmap; |
1419 | struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap; |
1420 | struct buffer_head *bh; |
1421 | |
1422 | /* |
1423 | * Way old format filesystems had the bitmaps packed up front. |
1424 | * I doubt there are any of these left, but just in case... |
1425 | */ |
1426 | if (unlikely(test_bit(REISERFS_OLD_FORMAT, |
1427 | &REISERFS_SB(sb)->s_properties))) |
1428 | block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap; |
1429 | else if (bitmap == 0) |
1430 | block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; |
1431 | |
1432 | bh = sb_bread(sb, block); |
1433 | if (bh == NULL) |
1434 | reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " |
1435 | "reading failed" , __func__, block); |
1436 | else { |
1437 | if (buffer_locked(bh)) { |
1438 | int depth; |
1439 | PROC_INFO_INC(sb, scan_bitmap.wait); |
1440 | depth = reiserfs_write_unlock_nested(s: sb); |
1441 | __wait_on_buffer(bh); |
1442 | reiserfs_write_lock_nested(s: sb, depth); |
1443 | } |
1444 | BUG_ON(!buffer_uptodate(bh)); |
1445 | BUG_ON(atomic_read(&bh->b_count) == 0); |
1446 | |
1447 | if (info->free_count == UINT_MAX) |
1448 | reiserfs_cache_bitmap_metadata(sb, bh, info); |
1449 | } |
1450 | |
1451 | return bh; |
1452 | } |
1453 | |
1454 | int reiserfs_init_bitmap_cache(struct super_block *sb) |
1455 | { |
1456 | struct reiserfs_bitmap_info *bitmap; |
1457 | unsigned int bmap_nr = reiserfs_bmap_count(sb); |
1458 | |
1459 | bitmap = vmalloc(array_size(bmap_nr, sizeof(*bitmap))); |
1460 | if (bitmap == NULL) |
1461 | return -ENOMEM; |
1462 | |
1463 | memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr); |
1464 | |
1465 | SB_AP_BITMAP(sb) = bitmap; |
1466 | |
1467 | return 0; |
1468 | } |
1469 | |
1470 | void reiserfs_free_bitmap_cache(struct super_block *sb) |
1471 | { |
1472 | if (SB_AP_BITMAP(sb)) { |
1473 | vfree(SB_AP_BITMAP(sb)); |
1474 | SB_AP_BITMAP(sb) = NULL; |
1475 | } |
1476 | } |
1477 | |