1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) 2003 Sistina Software Limited. |
4 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
5 | * |
6 | * This file is released under the GPL. |
7 | */ |
8 | |
9 | #include <linux/dm-dirty-log.h> |
10 | #include <linux/dm-region-hash.h> |
11 | |
12 | #include <linux/ctype.h> |
13 | #include <linux/init.h> |
14 | #include <linux/module.h> |
15 | #include <linux/slab.h> |
16 | #include <linux/vmalloc.h> |
17 | |
18 | #include "dm.h" |
19 | |
20 | #define DM_MSG_PREFIX "region hash" |
21 | |
22 | /* |
23 | *------------------------------------------------------------------ |
24 | * Region hash |
25 | * |
26 | * The mirror splits itself up into discrete regions. Each |
27 | * region can be in one of three states: clean, dirty, |
28 | * nosync. There is no need to put clean regions in the hash. |
29 | * |
30 | * In addition to being present in the hash table a region _may_ |
31 | * be present on one of three lists. |
32 | * |
33 | * clean_regions: Regions on this list have no io pending to |
34 | * them, they are in sync, we are no longer interested in them, |
35 | * they are dull. dm_rh_update_states() will remove them from the |
36 | * hash table. |
37 | * |
38 | * quiesced_regions: These regions have been spun down, ready |
39 | * for recovery. rh_recovery_start() will remove regions from |
40 | * this list and hand them to kmirrord, which will schedule the |
41 | * recovery io with kcopyd. |
42 | * |
43 | * recovered_regions: Regions that kcopyd has successfully |
44 | * recovered. dm_rh_update_states() will now schedule any delayed |
45 | * io, up the recovery_count, and remove the region from the |
46 | * hash. |
47 | * |
48 | * There are 2 locks: |
49 | * A rw spin lock 'hash_lock' protects just the hash table, |
50 | * this is never held in write mode from interrupt context, |
51 | * which I believe means that we only have to disable irqs when |
52 | * doing a write lock. |
53 | * |
54 | * An ordinary spin lock 'region_lock' that protects the three |
55 | * lists in the region_hash, with the 'state', 'list' and |
56 | * 'delayed_bios' fields of the regions. This is used from irq |
57 | * context, so all other uses will have to suspend local irqs. |
58 | *------------------------------------------------------------------ |
59 | */ |
60 | struct dm_region_hash { |
61 | uint32_t region_size; |
62 | unsigned int region_shift; |
63 | |
64 | /* holds persistent region state */ |
65 | struct dm_dirty_log *log; |
66 | |
67 | /* hash table */ |
68 | rwlock_t hash_lock; |
69 | unsigned int mask; |
70 | unsigned int nr_buckets; |
71 | unsigned int prime; |
72 | unsigned int shift; |
73 | struct list_head *buckets; |
74 | |
75 | /* |
76 | * If there was a flush failure no regions can be marked clean. |
77 | */ |
78 | int flush_failure; |
79 | |
80 | unsigned int max_recovery; /* Max # of regions to recover in parallel */ |
81 | |
82 | spinlock_t region_lock; |
83 | atomic_t recovery_in_flight; |
84 | struct list_head clean_regions; |
85 | struct list_head quiesced_regions; |
86 | struct list_head recovered_regions; |
87 | struct list_head failed_recovered_regions; |
88 | struct semaphore recovery_count; |
89 | |
90 | mempool_t region_pool; |
91 | |
92 | void *context; |
93 | sector_t target_begin; |
94 | |
95 | /* Callback function to schedule bios writes */ |
96 | void (*dispatch_bios)(void *context, struct bio_list *bios); |
97 | |
98 | /* Callback function to wakeup callers worker thread. */ |
99 | void (*wakeup_workers)(void *context); |
100 | |
101 | /* Callback function to wakeup callers recovery waiters. */ |
102 | void (*wakeup_all_recovery_waiters)(void *context); |
103 | }; |
104 | |
105 | struct dm_region { |
106 | struct dm_region_hash *rh; /* FIXME: can we get rid of this ? */ |
107 | region_t key; |
108 | int state; |
109 | |
110 | struct list_head hash_list; |
111 | struct list_head list; |
112 | |
113 | atomic_t pending; |
114 | struct bio_list delayed_bios; |
115 | }; |
116 | |
117 | /* |
118 | * Conversion fns |
119 | */ |
120 | static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector) |
121 | { |
122 | return sector >> rh->region_shift; |
123 | } |
124 | |
125 | sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region) |
126 | { |
127 | return region << rh->region_shift; |
128 | } |
129 | EXPORT_SYMBOL_GPL(dm_rh_region_to_sector); |
130 | |
131 | region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio) |
132 | { |
133 | return dm_rh_sector_to_region(rh, sector: bio->bi_iter.bi_sector - |
134 | rh->target_begin); |
135 | } |
136 | EXPORT_SYMBOL_GPL(dm_rh_bio_to_region); |
137 | |
138 | void *dm_rh_region_context(struct dm_region *reg) |
139 | { |
140 | return reg->rh->context; |
141 | } |
142 | EXPORT_SYMBOL_GPL(dm_rh_region_context); |
143 | |
144 | region_t dm_rh_get_region_key(struct dm_region *reg) |
145 | { |
146 | return reg->key; |
147 | } |
148 | EXPORT_SYMBOL_GPL(dm_rh_get_region_key); |
149 | |
150 | sector_t dm_rh_get_region_size(struct dm_region_hash *rh) |
151 | { |
152 | return rh->region_size; |
153 | } |
154 | EXPORT_SYMBOL_GPL(dm_rh_get_region_size); |
155 | |
156 | /* |
157 | * FIXME: shall we pass in a structure instead of all these args to |
158 | * dm_region_hash_create()???? |
159 | */ |
160 | #define RH_HASH_MULT 2654435387U |
161 | #define RH_HASH_SHIFT 12 |
162 | |
163 | #define MIN_REGIONS 64 |
164 | struct dm_region_hash *dm_region_hash_create( |
165 | void *context, void (*dispatch_bios)(void *context, |
166 | struct bio_list *bios), |
167 | void (*wakeup_workers)(void *context), |
168 | void (*wakeup_all_recovery_waiters)(void *context), |
169 | sector_t target_begin, unsigned int max_recovery, |
170 | struct dm_dirty_log *log, uint32_t region_size, |
171 | region_t nr_regions) |
172 | { |
173 | struct dm_region_hash *rh; |
174 | unsigned int nr_buckets, max_buckets; |
175 | size_t i; |
176 | int ret; |
177 | |
178 | /* |
179 | * Calculate a suitable number of buckets for our hash |
180 | * table. |
181 | */ |
182 | max_buckets = nr_regions >> 6; |
183 | for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1) |
184 | ; |
185 | nr_buckets >>= 1; |
186 | |
187 | rh = kzalloc(size: sizeof(*rh), GFP_KERNEL); |
188 | if (!rh) { |
189 | DMERR("unable to allocate region hash memory" ); |
190 | return ERR_PTR(error: -ENOMEM); |
191 | } |
192 | |
193 | rh->context = context; |
194 | rh->dispatch_bios = dispatch_bios; |
195 | rh->wakeup_workers = wakeup_workers; |
196 | rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters; |
197 | rh->target_begin = target_begin; |
198 | rh->max_recovery = max_recovery; |
199 | rh->log = log; |
200 | rh->region_size = region_size; |
201 | rh->region_shift = __ffs(region_size); |
202 | rwlock_init(&rh->hash_lock); |
203 | rh->mask = nr_buckets - 1; |
204 | rh->nr_buckets = nr_buckets; |
205 | |
206 | rh->shift = RH_HASH_SHIFT; |
207 | rh->prime = RH_HASH_MULT; |
208 | |
209 | rh->buckets = vmalloc(array_size(nr_buckets, sizeof(*rh->buckets))); |
210 | if (!rh->buckets) { |
211 | DMERR("unable to allocate region hash bucket memory" ); |
212 | kfree(objp: rh); |
213 | return ERR_PTR(error: -ENOMEM); |
214 | } |
215 | |
216 | for (i = 0; i < nr_buckets; i++) |
217 | INIT_LIST_HEAD(list: rh->buckets + i); |
218 | |
219 | spin_lock_init(&rh->region_lock); |
220 | sema_init(sem: &rh->recovery_count, val: 0); |
221 | atomic_set(v: &rh->recovery_in_flight, i: 0); |
222 | INIT_LIST_HEAD(list: &rh->clean_regions); |
223 | INIT_LIST_HEAD(list: &rh->quiesced_regions); |
224 | INIT_LIST_HEAD(list: &rh->recovered_regions); |
225 | INIT_LIST_HEAD(list: &rh->failed_recovered_regions); |
226 | rh->flush_failure = 0; |
227 | |
228 | ret = mempool_init_kmalloc_pool(pool: &rh->region_pool, MIN_REGIONS, |
229 | size: sizeof(struct dm_region)); |
230 | if (ret) { |
231 | vfree(addr: rh->buckets); |
232 | kfree(objp: rh); |
233 | rh = ERR_PTR(error: -ENOMEM); |
234 | } |
235 | |
236 | return rh; |
237 | } |
238 | EXPORT_SYMBOL_GPL(dm_region_hash_create); |
239 | |
240 | void dm_region_hash_destroy(struct dm_region_hash *rh) |
241 | { |
242 | unsigned int h; |
243 | struct dm_region *reg, *nreg; |
244 | |
245 | BUG_ON(!list_empty(&rh->quiesced_regions)); |
246 | for (h = 0; h < rh->nr_buckets; h++) { |
247 | list_for_each_entry_safe(reg, nreg, rh->buckets + h, |
248 | hash_list) { |
249 | BUG_ON(atomic_read(®->pending)); |
250 | mempool_free(element: reg, pool: &rh->region_pool); |
251 | } |
252 | } |
253 | |
254 | if (rh->log) |
255 | dm_dirty_log_destroy(log: rh->log); |
256 | |
257 | mempool_exit(pool: &rh->region_pool); |
258 | vfree(addr: rh->buckets); |
259 | kfree(objp: rh); |
260 | } |
261 | EXPORT_SYMBOL_GPL(dm_region_hash_destroy); |
262 | |
263 | struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh) |
264 | { |
265 | return rh->log; |
266 | } |
267 | EXPORT_SYMBOL_GPL(dm_rh_dirty_log); |
268 | |
269 | static unsigned int rh_hash(struct dm_region_hash *rh, region_t region) |
270 | { |
271 | return (unsigned int) ((region * rh->prime) >> rh->shift) & rh->mask; |
272 | } |
273 | |
274 | static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region) |
275 | { |
276 | struct dm_region *reg; |
277 | struct list_head *bucket = rh->buckets + rh_hash(rh, region); |
278 | |
279 | list_for_each_entry(reg, bucket, hash_list) |
280 | if (reg->key == region) |
281 | return reg; |
282 | |
283 | return NULL; |
284 | } |
285 | |
286 | static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg) |
287 | { |
288 | list_add(new: ®->hash_list, head: rh->buckets + rh_hash(rh, region: reg->key)); |
289 | } |
290 | |
291 | static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region) |
292 | { |
293 | struct dm_region *reg, *nreg; |
294 | |
295 | nreg = mempool_alloc(pool: &rh->region_pool, GFP_ATOMIC); |
296 | if (unlikely(!nreg)) |
297 | nreg = kmalloc(size: sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL); |
298 | |
299 | nreg->state = rh->log->type->in_sync(rh->log, region, 1) ? |
300 | DM_RH_CLEAN : DM_RH_NOSYNC; |
301 | nreg->rh = rh; |
302 | nreg->key = region; |
303 | INIT_LIST_HEAD(list: &nreg->list); |
304 | atomic_set(v: &nreg->pending, i: 0); |
305 | bio_list_init(bl: &nreg->delayed_bios); |
306 | |
307 | write_lock_irq(&rh->hash_lock); |
308 | reg = __rh_lookup(rh, region); |
309 | if (reg) |
310 | /* We lost the race. */ |
311 | mempool_free(element: nreg, pool: &rh->region_pool); |
312 | else { |
313 | __rh_insert(rh, reg: nreg); |
314 | if (nreg->state == DM_RH_CLEAN) { |
315 | spin_lock(lock: &rh->region_lock); |
316 | list_add(new: &nreg->list, head: &rh->clean_regions); |
317 | spin_unlock(lock: &rh->region_lock); |
318 | } |
319 | |
320 | reg = nreg; |
321 | } |
322 | write_unlock_irq(&rh->hash_lock); |
323 | |
324 | return reg; |
325 | } |
326 | |
327 | static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region) |
328 | { |
329 | struct dm_region *reg; |
330 | |
331 | reg = __rh_lookup(rh, region); |
332 | if (!reg) { |
333 | read_unlock(&rh->hash_lock); |
334 | reg = __rh_alloc(rh, region); |
335 | read_lock(&rh->hash_lock); |
336 | } |
337 | |
338 | return reg; |
339 | } |
340 | |
341 | int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block) |
342 | { |
343 | int r; |
344 | struct dm_region *reg; |
345 | |
346 | read_lock(&rh->hash_lock); |
347 | reg = __rh_lookup(rh, region); |
348 | read_unlock(&rh->hash_lock); |
349 | |
350 | if (reg) |
351 | return reg->state; |
352 | |
353 | /* |
354 | * The region wasn't in the hash, so we fall back to the |
355 | * dirty log. |
356 | */ |
357 | r = rh->log->type->in_sync(rh->log, region, may_block); |
358 | |
359 | /* |
360 | * Any error from the dirty log (eg. -EWOULDBLOCK) gets |
361 | * taken as a DM_RH_NOSYNC |
362 | */ |
363 | return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC; |
364 | } |
365 | EXPORT_SYMBOL_GPL(dm_rh_get_state); |
366 | |
367 | static void complete_resync_work(struct dm_region *reg, int success) |
368 | { |
369 | struct dm_region_hash *rh = reg->rh; |
370 | |
371 | rh->log->type->set_region_sync(rh->log, reg->key, success); |
372 | |
373 | /* |
374 | * Dispatch the bios before we call 'wake_up_all'. |
375 | * This is important because if we are suspending, |
376 | * we want to know that recovery is complete and |
377 | * the work queue is flushed. If we wake_up_all |
378 | * before we dispatch_bios (queue bios and call wake()), |
379 | * then we risk suspending before the work queue |
380 | * has been properly flushed. |
381 | */ |
382 | rh->dispatch_bios(rh->context, ®->delayed_bios); |
383 | if (atomic_dec_and_test(v: &rh->recovery_in_flight)) |
384 | rh->wakeup_all_recovery_waiters(rh->context); |
385 | up(sem: &rh->recovery_count); |
386 | } |
387 | |
388 | /* dm_rh_mark_nosync |
389 | * @ms |
390 | * @bio |
391 | * |
392 | * The bio was written on some mirror(s) but failed on other mirror(s). |
393 | * We can successfully endio the bio but should avoid the region being |
394 | * marked clean by setting the state DM_RH_NOSYNC. |
395 | * |
396 | * This function is _not_ safe in interrupt context! |
397 | */ |
398 | void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio) |
399 | { |
400 | unsigned long flags; |
401 | struct dm_dirty_log *log = rh->log; |
402 | struct dm_region *reg; |
403 | region_t region = dm_rh_bio_to_region(rh, bio); |
404 | int recovering = 0; |
405 | |
406 | if (bio->bi_opf & REQ_PREFLUSH) { |
407 | rh->flush_failure = 1; |
408 | return; |
409 | } |
410 | |
411 | if (bio_op(bio) == REQ_OP_DISCARD) |
412 | return; |
413 | |
414 | /* We must inform the log that the sync count has changed. */ |
415 | log->type->set_region_sync(log, region, 0); |
416 | |
417 | read_lock(&rh->hash_lock); |
418 | reg = __rh_find(rh, region); |
419 | read_unlock(&rh->hash_lock); |
420 | |
421 | /* region hash entry should exist because write was in-flight */ |
422 | BUG_ON(!reg); |
423 | BUG_ON(!list_empty(®->list)); |
424 | |
425 | spin_lock_irqsave(&rh->region_lock, flags); |
426 | /* |
427 | * Possible cases: |
428 | * 1) DM_RH_DIRTY |
429 | * 2) DM_RH_NOSYNC: was dirty, other preceding writes failed |
430 | * 3) DM_RH_RECOVERING: flushing pending writes |
431 | * Either case, the region should have not been connected to list. |
432 | */ |
433 | recovering = (reg->state == DM_RH_RECOVERING); |
434 | reg->state = DM_RH_NOSYNC; |
435 | BUG_ON(!list_empty(®->list)); |
436 | spin_unlock_irqrestore(lock: &rh->region_lock, flags); |
437 | |
438 | if (recovering) |
439 | complete_resync_work(reg, success: 0); |
440 | } |
441 | EXPORT_SYMBOL_GPL(dm_rh_mark_nosync); |
442 | |
443 | void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled) |
444 | { |
445 | struct dm_region *reg, *next; |
446 | |
447 | LIST_HEAD(clean); |
448 | LIST_HEAD(recovered); |
449 | LIST_HEAD(failed_recovered); |
450 | |
451 | /* |
452 | * Quickly grab the lists. |
453 | */ |
454 | write_lock_irq(&rh->hash_lock); |
455 | spin_lock(lock: &rh->region_lock); |
456 | if (!list_empty(head: &rh->clean_regions)) { |
457 | list_splice_init(list: &rh->clean_regions, head: &clean); |
458 | |
459 | list_for_each_entry(reg, &clean, list) |
460 | list_del(entry: ®->hash_list); |
461 | } |
462 | |
463 | if (!list_empty(head: &rh->recovered_regions)) { |
464 | list_splice_init(list: &rh->recovered_regions, head: &recovered); |
465 | |
466 | list_for_each_entry(reg, &recovered, list) |
467 | list_del(entry: ®->hash_list); |
468 | } |
469 | |
470 | if (!list_empty(head: &rh->failed_recovered_regions)) { |
471 | list_splice_init(list: &rh->failed_recovered_regions, |
472 | head: &failed_recovered); |
473 | |
474 | list_for_each_entry(reg, &failed_recovered, list) |
475 | list_del(entry: ®->hash_list); |
476 | } |
477 | |
478 | spin_unlock(lock: &rh->region_lock); |
479 | write_unlock_irq(&rh->hash_lock); |
480 | |
481 | /* |
482 | * All the regions on the recovered and clean lists have |
483 | * now been pulled out of the system, so no need to do |
484 | * any more locking. |
485 | */ |
486 | list_for_each_entry_safe(reg, next, &recovered, list) { |
487 | rh->log->type->clear_region(rh->log, reg->key); |
488 | complete_resync_work(reg, success: 1); |
489 | mempool_free(element: reg, pool: &rh->region_pool); |
490 | } |
491 | |
492 | list_for_each_entry_safe(reg, next, &failed_recovered, list) { |
493 | complete_resync_work(reg, success: errors_handled ? 0 : 1); |
494 | mempool_free(element: reg, pool: &rh->region_pool); |
495 | } |
496 | |
497 | list_for_each_entry_safe(reg, next, &clean, list) { |
498 | rh->log->type->clear_region(rh->log, reg->key); |
499 | mempool_free(element: reg, pool: &rh->region_pool); |
500 | } |
501 | |
502 | rh->log->type->flush(rh->log); |
503 | } |
504 | EXPORT_SYMBOL_GPL(dm_rh_update_states); |
505 | |
506 | static void rh_inc(struct dm_region_hash *rh, region_t region) |
507 | { |
508 | struct dm_region *reg; |
509 | |
510 | read_lock(&rh->hash_lock); |
511 | reg = __rh_find(rh, region); |
512 | |
513 | spin_lock_irq(lock: &rh->region_lock); |
514 | atomic_inc(v: ®->pending); |
515 | |
516 | if (reg->state == DM_RH_CLEAN) { |
517 | reg->state = DM_RH_DIRTY; |
518 | list_del_init(entry: ®->list); /* take off the clean list */ |
519 | spin_unlock_irq(lock: &rh->region_lock); |
520 | |
521 | rh->log->type->mark_region(rh->log, reg->key); |
522 | } else |
523 | spin_unlock_irq(lock: &rh->region_lock); |
524 | |
525 | |
526 | read_unlock(&rh->hash_lock); |
527 | } |
528 | |
529 | void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios) |
530 | { |
531 | struct bio *bio; |
532 | |
533 | for (bio = bios->head; bio; bio = bio->bi_next) { |
534 | if (bio->bi_opf & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD) |
535 | continue; |
536 | rh_inc(rh, region: dm_rh_bio_to_region(rh, bio)); |
537 | } |
538 | } |
539 | EXPORT_SYMBOL_GPL(dm_rh_inc_pending); |
540 | |
541 | void dm_rh_dec(struct dm_region_hash *rh, region_t region) |
542 | { |
543 | unsigned long flags; |
544 | struct dm_region *reg; |
545 | int should_wake = 0; |
546 | |
547 | read_lock(&rh->hash_lock); |
548 | reg = __rh_lookup(rh, region); |
549 | read_unlock(&rh->hash_lock); |
550 | |
551 | spin_lock_irqsave(&rh->region_lock, flags); |
552 | if (atomic_dec_and_test(v: ®->pending)) { |
553 | /* |
554 | * There is no pending I/O for this region. |
555 | * We can move the region to corresponding list for next action. |
556 | * At this point, the region is not yet connected to any list. |
557 | * |
558 | * If the state is DM_RH_NOSYNC, the region should be kept off |
559 | * from clean list. |
560 | * The hash entry for DM_RH_NOSYNC will remain in memory |
561 | * until the region is recovered or the map is reloaded. |
562 | */ |
563 | |
564 | /* do nothing for DM_RH_NOSYNC */ |
565 | if (unlikely(rh->flush_failure)) { |
566 | /* |
567 | * If a write flush failed some time ago, we |
568 | * don't know whether or not this write made it |
569 | * to the disk, so we must resync the device. |
570 | */ |
571 | reg->state = DM_RH_NOSYNC; |
572 | } else if (reg->state == DM_RH_RECOVERING) { |
573 | list_add_tail(new: ®->list, head: &rh->quiesced_regions); |
574 | } else if (reg->state == DM_RH_DIRTY) { |
575 | reg->state = DM_RH_CLEAN; |
576 | list_add(new: ®->list, head: &rh->clean_regions); |
577 | } |
578 | should_wake = 1; |
579 | } |
580 | spin_unlock_irqrestore(lock: &rh->region_lock, flags); |
581 | |
582 | if (should_wake) |
583 | rh->wakeup_workers(rh->context); |
584 | } |
585 | EXPORT_SYMBOL_GPL(dm_rh_dec); |
586 | |
587 | /* |
588 | * Starts quiescing a region in preparation for recovery. |
589 | */ |
590 | static int __rh_recovery_prepare(struct dm_region_hash *rh) |
591 | { |
592 | int r; |
593 | region_t region; |
594 | struct dm_region *reg; |
595 | |
596 | /* |
597 | * Ask the dirty log what's next. |
598 | */ |
599 | r = rh->log->type->get_resync_work(rh->log, ®ion); |
600 | if (r <= 0) |
601 | return r; |
602 | |
603 | /* |
604 | * Get this region, and start it quiescing by setting the |
605 | * recovering flag. |
606 | */ |
607 | read_lock(&rh->hash_lock); |
608 | reg = __rh_find(rh, region); |
609 | read_unlock(&rh->hash_lock); |
610 | |
611 | spin_lock_irq(lock: &rh->region_lock); |
612 | reg->state = DM_RH_RECOVERING; |
613 | |
614 | /* Already quiesced ? */ |
615 | if (atomic_read(v: ®->pending)) |
616 | list_del_init(entry: ®->list); |
617 | else |
618 | list_move(list: ®->list, head: &rh->quiesced_regions); |
619 | |
620 | spin_unlock_irq(lock: &rh->region_lock); |
621 | |
622 | return 1; |
623 | } |
624 | |
625 | void dm_rh_recovery_prepare(struct dm_region_hash *rh) |
626 | { |
627 | /* Extra reference to avoid race with dm_rh_stop_recovery */ |
628 | atomic_inc(v: &rh->recovery_in_flight); |
629 | |
630 | while (!down_trylock(sem: &rh->recovery_count)) { |
631 | atomic_inc(v: &rh->recovery_in_flight); |
632 | if (__rh_recovery_prepare(rh) <= 0) { |
633 | atomic_dec(v: &rh->recovery_in_flight); |
634 | up(sem: &rh->recovery_count); |
635 | break; |
636 | } |
637 | } |
638 | |
639 | /* Drop the extra reference */ |
640 | if (atomic_dec_and_test(v: &rh->recovery_in_flight)) |
641 | rh->wakeup_all_recovery_waiters(rh->context); |
642 | } |
643 | EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare); |
644 | |
645 | /* |
646 | * Returns any quiesced regions. |
647 | */ |
648 | struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh) |
649 | { |
650 | struct dm_region *reg = NULL; |
651 | |
652 | spin_lock_irq(lock: &rh->region_lock); |
653 | if (!list_empty(head: &rh->quiesced_regions)) { |
654 | reg = list_entry(rh->quiesced_regions.next, |
655 | struct dm_region, list); |
656 | list_del_init(entry: ®->list); /* remove from the quiesced list */ |
657 | } |
658 | spin_unlock_irq(lock: &rh->region_lock); |
659 | |
660 | return reg; |
661 | } |
662 | EXPORT_SYMBOL_GPL(dm_rh_recovery_start); |
663 | |
664 | void dm_rh_recovery_end(struct dm_region *reg, int success) |
665 | { |
666 | struct dm_region_hash *rh = reg->rh; |
667 | |
668 | spin_lock_irq(lock: &rh->region_lock); |
669 | if (success) |
670 | list_add(new: ®->list, head: ®->rh->recovered_regions); |
671 | else |
672 | list_add(new: ®->list, head: ®->rh->failed_recovered_regions); |
673 | |
674 | spin_unlock_irq(lock: &rh->region_lock); |
675 | |
676 | rh->wakeup_workers(rh->context); |
677 | } |
678 | EXPORT_SYMBOL_GPL(dm_rh_recovery_end); |
679 | |
680 | /* Return recovery in flight count. */ |
681 | int dm_rh_recovery_in_flight(struct dm_region_hash *rh) |
682 | { |
683 | return atomic_read(v: &rh->recovery_in_flight); |
684 | } |
685 | EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight); |
686 | |
687 | int dm_rh_flush(struct dm_region_hash *rh) |
688 | { |
689 | return rh->log->type->flush(rh->log); |
690 | } |
691 | EXPORT_SYMBOL_GPL(dm_rh_flush); |
692 | |
693 | void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio) |
694 | { |
695 | struct dm_region *reg; |
696 | |
697 | read_lock(&rh->hash_lock); |
698 | reg = __rh_find(rh, region: dm_rh_bio_to_region(rh, bio)); |
699 | bio_list_add(bl: ®->delayed_bios, bio); |
700 | read_unlock(&rh->hash_lock); |
701 | } |
702 | EXPORT_SYMBOL_GPL(dm_rh_delay); |
703 | |
704 | void dm_rh_stop_recovery(struct dm_region_hash *rh) |
705 | { |
706 | int i; |
707 | |
708 | /* wait for any recovering regions */ |
709 | for (i = 0; i < rh->max_recovery; i++) |
710 | down(sem: &rh->recovery_count); |
711 | } |
712 | EXPORT_SYMBOL_GPL(dm_rh_stop_recovery); |
713 | |
714 | void dm_rh_start_recovery(struct dm_region_hash *rh) |
715 | { |
716 | int i; |
717 | |
718 | for (i = 0; i < rh->max_recovery; i++) |
719 | up(sem: &rh->recovery_count); |
720 | |
721 | rh->wakeup_workers(rh->context); |
722 | } |
723 | EXPORT_SYMBOL_GPL(dm_rh_start_recovery); |
724 | |
725 | MODULE_DESCRIPTION(DM_NAME " region hash" ); |
726 | MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>" ); |
727 | MODULE_LICENSE("GPL" ); |
728 | |