dm-snap.c source code [linux/drivers/md/dm-snap.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
4	*
5	* This file is released under the GPL.
6	*/
7
8	#include <linux/blkdev.h>
9	#include <linux/device-mapper.h>
10	#include <linux/delay.h>
11	#include <linux/fs.h>
12	#include <linux/init.h>
13	#include <linux/kdev_t.h>
14	#include <linux/list.h>
15	#include <linux/list_bl.h>
16	#include <linux/mempool.h>
17	#include <linux/module.h>
18	#include <linux/slab.h>
19	#include <linux/vmalloc.h>
20	#include <linux/log2.h>
21	#include <linux/dm-kcopyd.h>
22
23	#include "dm.h"
24
25	#include "dm-exception-store.h"
26
27	#define DM_MSG_PREFIX "snapshots"
28
29	static const char dm_snapshot_merge_target_name[] = "snapshot-merge";
30
31	#define dm_target_is_snapshot_merge(ti) \
32	((ti)->type->name == dm_snapshot_merge_target_name)
33
34	/*
35	* The size of the mempool used to track chunks in use.
36	*/
37	#define MIN_IOS 256
38
39	#define DM_TRACKED_CHUNK_HASH_SIZE 16
40	#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
41	(DM_TRACKED_CHUNK_HASH_SIZE - 1))
42
43	struct dm_exception_table {
44	uint32_t hash_mask;
45	unsigned int hash_shift;
46	struct hlist_bl_head *table;
47	};
48
49	struct dm_snapshot {
50	struct rw_semaphore lock;
51
52	struct dm_dev *origin;
53	struct dm_dev *cow;
54
55	struct dm_target *ti;
56
57	/ List of snapshots per Origin /
58	struct list_head list;
59
60	/*
61	* You can't use a snapshot if this is 0 (e.g. if full).
62	* A snapshot-merge target never clears this.
63	*/
64	int valid;
65
66	/*
67	* The snapshot overflowed because of a write to the snapshot device.
68	* We don't have to invalidate the snapshot in this case, but we need
69	* to prevent further writes.
70	*/
71	int snapshot_overflowed;
72
73	/ Origin writes don't trigger exceptions until this is set /
74	int active;
75
76	atomic_t pending_exceptions_count;
77
78	spinlock_t pe_allocation_lock;
79
80	/ Protected by "pe_allocation_lock" /
81	sector_t exception_start_sequence;
82
83	/ Protected by kcopyd single-threaded callback /
84	sector_t exception_complete_sequence;
85
86	/*
87	* A list of pending exceptions that completed out of order.
88	* Protected by kcopyd single-threaded callback.
89	*/
90	struct rb_root out_of_order_tree;
91
92	mempool_t pending_pool;
93
94	struct dm_exception_table pending;
95	struct dm_exception_table complete;
96
97	/*
98	* pe_lock protects all pending_exception operations and access
99	* as well as the snapshot_bios list.
100	*/
101	spinlock_t pe_lock;
102
103	/ Chunks with outstanding reads /
104	spinlock_t tracked_chunk_lock;
105	struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
106
107	/ The on disk metadata handler /
108	struct dm_exception_store *store;
109
110	unsigned int in_progress;
111	struct wait_queue_head in_progress_wait;
112
113	struct dm_kcopyd_client *kcopyd_client;
114
115	/ Wait for events based on state_bits /
116	unsigned long state_bits;
117
118	/ Range of chunks currently being merged. /
119	chunk_t first_merging_chunk;
120	int num_merging_chunks;
121
122	/*
123	* The merge operation failed if this flag is set.
124	* Failure modes are handled as follows:
125	* - I/O error reading the header
126	* => don't load the target; abort.
127	* - Header does not have "valid" flag set
128	* => use the origin; forget about the snapshot.
129	* - I/O error when reading exceptions
130	* => don't load the target; abort.
131	* (We can't use the intermediate origin state.)
132	* - I/O error while merging
133	* => stop merging; set merge_failed; process I/O normally.
134	*/
135	bool merge_failed:`1`;
136
137	bool discard_zeroes_cow:`1`;
138	bool discard_passdown_origin:`1`;
139
140	/*
141	* Incoming bios that overlap with chunks being merged must wait
142	* for them to be committed.
143	*/
144	struct bio_list bios_queued_during_merge;
145	};
146
147	/*
148	* state_bits:
149	* RUNNING_MERGE - Merge operation is in progress.
150	* SHUTDOWN_MERGE - Set to signal that merge needs to be stopped;
151	* cleared afterwards.
152	*/
153	#define RUNNING_MERGE 0
154	#define SHUTDOWN_MERGE 1
155
156	/*
157	* Maximum number of chunks being copied on write.
158	*
159	* The value was decided experimentally as a trade-off between memory
160	* consumption, stalling the kernel's workqueues and maintaining a high enough
161	* throughput.
162	*/
163	#define DEFAULT_COW_THRESHOLD 2048
164
165	static unsigned int cow_threshold = DEFAULT_COW_THRESHOLD;
166	module_param_named(snapshot_cow_threshold, cow_threshold, uint, `0644`);
167	MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write");
168
169	DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
170	"A percentage of time allocated for copy on write");
171
172	struct dm_dev dm_snap_origin(struct* dm_snapshot *s)
173	{
174	return s->origin;
175	}
176	EXPORT_SYMBOL(dm_snap_origin);
177
178	struct dm_dev dm_snap_cow(struct* dm_snapshot *s)
179	{
180	return s->cow;
181	}
182	EXPORT_SYMBOL(dm_snap_cow);
183
184	static sector_t chunk_to_sector(struct dm_exception_store *store,
185	chunk_t chunk)
186	{
187	return chunk << store->chunk_shift;
188	}
189
190	static int bdev_equal(struct block_device lhs, struct* block_device *rhs)
191	{
192	/*
193	* There is only ever one instance of a particular block
194	* device so we can compare pointers safely.
195	*/
196	return lhs == rhs;
197	}
198
199	struct dm_snap_pending_exception {
200	struct dm_exception e;
201
202	/*
203	* Origin buffers waiting for this to complete are held
204	* in a bio list
205	*/
206	struct bio_list origin_bios;
207	struct bio_list snapshot_bios;
208
209	/ Pointer back to snapshot context /
210	struct dm_snapshot *snap;
211
212	/*
213	* 1 indicates the exception has already been sent to
214	* kcopyd.
215	*/
216	int started;
217
218	/ There was copying error. /
219	int copy_error;
220
221	/ A sequence number, it is used for in-order completion. /
222	sector_t exception_sequence;
223
224	struct rb_node out_of_order_node;
225
226	/*
227	* For writing a complete chunk, bypassing the copy.
228	*/
229	struct bio *full_bio;
230	bio_end_io_t *full_bio_end_io;
231	};
232
233	/*
234	* Hash table mapping origin volumes to lists of snapshots and
235	* a lock to protect it
236	*/
237	static struct kmem_cache *exception_cache;
238	static struct kmem_cache *pending_cache;
239
240	struct dm_snap_tracked_chunk {
241	struct hlist_node node;
242	chunk_t chunk;
243	};
244
245	static void init_tracked_chunk(struct bio *bio)
246	{
247	struct dm_snap_tracked_chunk c = dm_per_bio_data(bio, data_size: sizeof(struct* dm_snap_tracked_chunk));
248
249	INIT_HLIST_NODE(h: &c->node);
250	}
251
252	static bool is_bio_tracked(struct bio *bio)
253	{
254	struct dm_snap_tracked_chunk c = dm_per_bio_data(bio, data_size: sizeof(struct* dm_snap_tracked_chunk));
255
256	return !hlist_unhashed(h: &c->node);
257	}
258
259	static void track_chunk(struct dm_snapshot s, struct* bio *bio, chunk_t chunk)
260	{
261	struct dm_snap_tracked_chunk c = dm_per_bio_data(bio, data_size: sizeof(struct* dm_snap_tracked_chunk));
262
263	c->chunk = chunk;
264
265	spin_lock_irq(lock: &s->tracked_chunk_lock);
266	hlist_add_head(n: &c->node,
267	h: &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]);
268	spin_unlock_irq(lock: &s->tracked_chunk_lock);
269	}
270
271	static void stop_tracking_chunk(struct dm_snapshot s, struct* bio *bio)
272	{
273	struct dm_snap_tracked_chunk c = dm_per_bio_data(bio, data_size: sizeof(struct* dm_snap_tracked_chunk));
274	unsigned long flags;
275
276	spin_lock_irqsave(&s->tracked_chunk_lock, flags);
277	hlist_del(n: &c->node);
278	spin_unlock_irqrestore(lock: &s->tracked_chunk_lock, flags);
279	}
280
281	static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk)
282	{
283	struct dm_snap_tracked_chunk *c;
284	int found = `0`;
285
286	spin_lock_irq(lock: &s->tracked_chunk_lock);
287
288	hlist_for_each_entry(c,
289	&s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) {
290	if (c->chunk == chunk) {
291	found = `1`;
292	break;
293	}
294	}
295
296	spin_unlock_irq(lock: &s->tracked_chunk_lock);
297
298	return found;
299	}
300
301	/*
302	* This conflicting I/O is extremely improbable in the caller,
303	* so fsleep(1000) is sufficient and there is no need for a wait queue.
304	*/
305	static void __check_for_conflicting_io(struct dm_snapshot *s, chunk_t chunk)
306	{
307	while (__chunk_is_tracked(s, chunk))
308	fsleep(usecs: `1000`);
309	}
310
311	/*
312	* One of these per registered origin, held in the snapshot_origins hash
313	*/
314	struct origin {
315	/ The origin device /
316	struct block_device *bdev;
317
318	struct list_head hash_list;
319
320	/ List of snapshots for this origin /
321	struct list_head snapshots;
322	};
323
324	/*
325	* This structure is allocated for each origin target
326	*/
327	struct dm_origin {
328	struct dm_dev *dev;
329	struct dm_target *ti;
330	unsigned int split_boundary;
331	struct list_head hash_list;
332	};
333
334	/*
335	* Size of the hash table for origin volumes. If we make this
336	* the size of the minors list then it should be nearly perfect
337	*/
338	#define ORIGIN_HASH_SIZE 256
339	#define ORIGIN_MASK 0xFF
340	static struct list_head *_origins;
341	static struct list_head *_dm_origins;
342	static struct rw_semaphore _origins_lock;
343
344	static DECLARE_WAIT_QUEUE_HEAD(_pending_exceptions_done);
345	static DEFINE_SPINLOCK(_pending_exceptions_done_spinlock);
346	static uint64_t _pending_exceptions_done_count;
347
348	static int init_origin_hash(void)
349	{
350	int i;
351
352	_origins = kmalloc_array(ORIGIN_HASH_SIZE, size: sizeof(struct list_head),
353	GFP_KERNEL);
354	if (!_origins) {
355	DMERR("unable to allocate memory for _origins");
356	return -ENOMEM;
357	}
358	for (i = `0`; i < ORIGIN_HASH_SIZE; i++)
359	INIT_LIST_HEAD(list: _origins + i);
360
361	_dm_origins = kmalloc_array(ORIGIN_HASH_SIZE,
362	size: sizeof(struct list_head),
363	GFP_KERNEL);
364	if (!_dm_origins) {
365	DMERR("unable to allocate memory for _dm_origins");
366	kfree(objp: _origins);
367	return -ENOMEM;
368	}
369	for (i = `0`; i < ORIGIN_HASH_SIZE; i++)
370	INIT_LIST_HEAD(list: _dm_origins + i);
371
372	init_rwsem(&_origins_lock);
373
374	return `0`;
375	}
376
377	static void exit_origin_hash(void)
378	{
379	kfree(objp: _origins);
380	kfree(objp: _dm_origins);
381	}
382
383	static unsigned int origin_hash(struct block_device *bdev)
384	{
385	return bdev->bd_dev & ORIGIN_MASK;
386	}
387
388	static struct origin __lookup_origin(struct* block_device *origin)
389	{
390	struct list_head *ol;
391	struct origin *o;
392
393	ol = &_origins[origin_hash(bdev: origin)];
394	list_for_each_entry(o, ol, hash_list)
395	if (bdev_equal(lhs: o->bdev, rhs: origin))
396	return o;
397
398	return NULL;
399	}
400
401	static void __insert_origin(struct origin *o)
402	{
403	struct list_head *sl = &_origins[origin_hash(bdev: o->bdev)];
404
405	list_add_tail(new: &o->hash_list, head: sl);
406	}
407
408	static struct dm_origin __lookup_dm_origin(struct* block_device *origin)
409	{
410	struct list_head *ol;
411	struct dm_origin *o;
412
413	ol = &_dm_origins[origin_hash(bdev: origin)];
414	list_for_each_entry(o, ol, hash_list)
415	if (bdev_equal(lhs: o->dev->bdev, rhs: origin))
416	return o;
417
418	return NULL;
419	}
420
421	static void __insert_dm_origin(struct dm_origin *o)
422	{
423	struct list_head *sl = &_dm_origins[origin_hash(bdev: o->dev->bdev)];
424
425	list_add_tail(new: &o->hash_list, head: sl);
426	}
427
428	static void __remove_dm_origin(struct dm_origin *o)
429	{
430	list_del(entry: &o->hash_list);
431	}
432
433	/*
434	* _origins_lock must be held when calling this function.
435	* Returns number of snapshots registered using the supplied cow device, plus:
436	* snap_src - a snapshot suitable for use as a source of exception handover
437	* snap_dest - a snapshot capable of receiving exception handover.
438	* snap_merge - an existing snapshot-merge target linked to the same origin.
439	* There can be at most one snapshot-merge target. The parameter is optional.
440	*
441	* Possible return values and states of snap_src and snap_dest.
442	* 0: NULL, NULL - first new snapshot
443	* 1: snap_src, NULL - normal snapshot
444	* 2: snap_src, snap_dest - waiting for handover
445	* 2: snap_src, NULL - handed over, waiting for old to be deleted
446	* 1: NULL, snap_dest - source got destroyed without handover
447	*/
448	static int __find_snapshots_sharing_cow(struct dm_snapshot *snap,
449	struct dm_snapshot **snap_src,
450	struct dm_snapshot **snap_dest,
451	struct dm_snapshot **snap_merge)
452	{
453	struct dm_snapshot *s;
454	struct origin *o;
455	int count = `0`;
456	int active;
457
458	o = __lookup_origin(origin: snap->origin->bdev);
459	if (!o)
460	goto out;
461
462	list_for_each_entry(s, &o->snapshots, list) {
463	if (dm_target_is_snapshot_merge(s->ti) && snap_merge)
464	*snap_merge = s;
465	if (!bdev_equal(lhs: s->cow->bdev, rhs: snap->cow->bdev))
466	continue;
467
468	down_read(sem: &s->lock);
469	active = s->active;
470	up_read(sem: &s->lock);
471
472	if (active) {
473	if (snap_src)
474	*snap_src = s;
475	} else if (snap_dest)
476	*snap_dest = s;
477
478	count++;
479	}
480
481	out:
482	return count;
483	}
484
485	/*
486	* On success, returns 1 if this snapshot is a handover destination,
487	* otherwise returns 0.
488	*/
489	static int __validate_exception_handover(struct dm_snapshot *snap)
490	{
491	struct dm_snapshot snap_src = NULL, snap_dest = NULL;
492	struct dm_snapshot *snap_merge = NULL;
493
494	/ Does snapshot need exceptions handed over to it? /
495	if ((__find_snapshots_sharing_cow(snap, snap_src: &snap_src, snap_dest: &snap_dest,
496	snap_merge: &snap_merge) == `2`) \|\|
497	snap_dest) {
498	snap->ti->error = "Snapshot cow pairing for exception table handover failed";
499	return -EINVAL;
500	}
501
502	/*
503	* If no snap_src was found, snap cannot become a handover
504	* destination.
505	*/
506	if (!snap_src)
507	return `0`;
508
509	/*
510	* Non-snapshot-merge handover?
511	*/
512	if (!dm_target_is_snapshot_merge(snap->ti))
513	return `1`;
514
515	/*
516	* Do not allow more than one merging snapshot.
517	*/
518	if (snap_merge) {
519	snap->ti->error = "A snapshot is already merging.";
520	return -EINVAL;
521	}
522
523	if (!snap_src->store->type->prepare_merge \|\|
524	!snap_src->store->type->commit_merge) {
525	snap->ti->error = "Snapshot exception store does not support snapshot-merge.";
526	return -EINVAL;
527	}
528
529	return `1`;
530	}
531
532	static void __insert_snapshot(struct origin o, struct* dm_snapshot *s)
533	{
534	struct dm_snapshot *l;
535
536	/ Sort the list according to chunk size, largest-first smallest-last /
537	list_for_each_entry(l, &o->snapshots, list)
538	if (l->store->chunk_size < s->store->chunk_size)
539	break;
540	list_add_tail(new: &s->list, head: &l->list);
541	}
542
543	/*
544	* Make a note of the snapshot and its origin so we can look it
545	* up when the origin has a write on it.
546	*
547	* Also validate snapshot exception store handovers.
548	* On success, returns 1 if this registration is a handover destination,
549	* otherwise returns 0.
550	*/
551	static int register_snapshot(struct dm_snapshot *snap)
552	{
553	struct origin o, new_o = NULL;
554	struct block_device *bdev = snap->origin->bdev;
555	int r = `0`;
556
557	new_o = kmalloc(size: sizeof(*new_o), GFP_KERNEL);
558	if (!new_o)
559	return -ENOMEM;
560
561	down_write(sem: &_origins_lock);
562
563	r = __validate_exception_handover(snap);
564	if (r < `0`) {
565	kfree(objp: new_o);
566	goto out;
567	}
568
569	o = __lookup_origin(origin: bdev);
570	if (o)
571	kfree(objp: new_o);
572	else {
573	/ New origin /
574	o = new_o;
575
576	/ Initialise the struct /
577	INIT_LIST_HEAD(list: &o->snapshots);
578	o->bdev = bdev;
579
580	__insert_origin(o);
581	}
582
583	__insert_snapshot(o, s: snap);
584
585	out:
586	up_write(sem: &_origins_lock);
587
588	return r;
589	}
590
591	/*
592	* Move snapshot to correct place in list according to chunk size.
593	*/
594	static void reregister_snapshot(struct dm_snapshot *s)
595	{
596	struct block_device *bdev = s->origin->bdev;
597
598	down_write(sem: &_origins_lock);
599
600	list_del(entry: &s->list);
601	__insert_snapshot(o: __lookup_origin(origin: bdev), s);
602
603	up_write(sem: &_origins_lock);
604	}
605
606	static void unregister_snapshot(struct dm_snapshot *s)
607	{
608	struct origin *o;
609
610	down_write(sem: &_origins_lock);
611	o = __lookup_origin(origin: s->origin->bdev);
612
613	list_del(entry: &s->list);
614	if (o && list_empty(head: &o->snapshots)) {
615	list_del(entry: &o->hash_list);
616	kfree(objp: o);
617	}
618
619	up_write(sem: &_origins_lock);
620	}
621
622	/*
623	* Implementation of the exception hash tables.
624	* The lowest hash_shift bits of the chunk number are ignored, allowing
625	* some consecutive chunks to be grouped together.
626	*/
627	static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk);
628
629	/ Lock to protect access to the completed and pending exception hash tables. /
630	struct dm_exception_table_lock {
631	struct hlist_bl_head *complete_slot;
632	struct hlist_bl_head *pending_slot;
633	};
634
635	static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk,
636	struct dm_exception_table_lock *lock)
637	{
638	struct dm_exception_table *complete = &s->complete;
639	struct dm_exception_table *pending = &s->pending;
640
641	lock->complete_slot = &complete->table[exception_hash(et: complete, chunk)];
642	lock->pending_slot = &pending->table[exception_hash(et: pending, chunk)];
643	}
644
645	static void dm_exception_table_lock(struct dm_exception_table_lock *lock)
646	{
647	hlist_bl_lock(b: lock->complete_slot);
648	hlist_bl_lock(b: lock->pending_slot);
649	}
650
651	static void dm_exception_table_unlock(struct dm_exception_table_lock *lock)
652	{
653	hlist_bl_unlock(b: lock->pending_slot);
654	hlist_bl_unlock(b: lock->complete_slot);
655	}
656
657	static int dm_exception_table_init(struct dm_exception_table *et,
658	uint32_t size, unsigned int hash_shift)
659	{
660	unsigned int i;
661
662	et->hash_shift = hash_shift;
663	et->hash_mask = size - `1`;
664	et->table = kvmalloc_array(n: size, size: sizeof(struct hlist_bl_head),
665	GFP_KERNEL);
666	if (!et->table)
667	return -ENOMEM;
668
669	for (i = `0`; i < size; i++)
670	INIT_HLIST_BL_HEAD(et->table + i);
671
672	return `0`;
673	}
674
675	static void dm_exception_table_exit(struct dm_exception_table *et,
676	struct kmem_cache *mem)
677	{
678	struct hlist_bl_head *slot;
679	struct dm_exception *ex;
680	struct hlist_bl_node pos, n;
681	int i, size;
682
683	size = et->hash_mask + `1`;
684	for (i = `0`; i < size; i++) {
685	slot = et->table + i;
686
687	hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) {
688	kmem_cache_free(s: mem, objp: ex);
689	cond_resched();
690	}
691	}
692
693	kvfree(addr: et->table);
694	}
695
696	static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk)
697	{
698	return (chunk >> et->hash_shift) & et->hash_mask;
699	}
700
701	static void dm_remove_exception(struct dm_exception *e)
702	{
703	hlist_bl_del(n: &e->hash_list);
704	}
705
706	/*
707	* Return the exception data for a sector, or NULL if not
708	* remapped.
709	*/
710	static struct dm_exception dm_lookup_exception(struct* dm_exception_table *et,
711	chunk_t chunk)
712	{
713	struct hlist_bl_head *slot;
714	struct hlist_bl_node *pos;
715	struct dm_exception *e;
716
717	slot = &et->table[exception_hash(et, chunk)];
718	hlist_bl_for_each_entry(e, pos, slot, hash_list)
719	if (chunk >= e->old_chunk &&
720	chunk <= e->old_chunk + dm_consecutive_chunk_count(e))
721	return e;
722
723	return NULL;
724	}
725
726	static struct dm_exception *alloc_completed_exception(gfp_t gfp)
727	{
728	struct dm_exception *e;
729
730	e = kmem_cache_alloc(cachep: exception_cache, flags: gfp);
731	if (!e && gfp == GFP_NOIO)
732	e = kmem_cache_alloc(cachep: exception_cache, GFP_ATOMIC);
733
734	return e;
735	}
736
737	static void free_completed_exception(struct dm_exception *e)
738	{
739	kmem_cache_free(s: exception_cache, objp: e);
740	}
741
742	static struct dm_snap_pending_exception alloc_pending_exception(struct* dm_snapshot *s)
743	{
744	struct dm_snap_pending_exception *pe = mempool_alloc(pool: &s->pending_pool,
745	GFP_NOIO);
746
747	atomic_inc(v: &s->pending_exceptions_count);
748	pe->snap = s;
749
750	return pe;
751	}
752
753	static void free_pending_exception(struct dm_snap_pending_exception *pe)
754	{
755	struct dm_snapshot *s = pe->snap;
756
757	mempool_free(element: pe, pool: &s->pending_pool);
758	smp_mb__before_atomic();
759	atomic_dec(v: &s->pending_exceptions_count);
760	}
761
762	static void dm_insert_exception(struct dm_exception_table *eh,
763	struct dm_exception *new_e)
764	{
765	struct hlist_bl_head *l;
766	struct hlist_bl_node *pos;
767	struct dm_exception *e = NULL;
768
769	l = &eh->table[exception_hash(et: eh, chunk: new_e->old_chunk)];
770
771	/ Add immediately if this table doesn't support consecutive chunks /
772	if (!eh->hash_shift)
773	goto out;
774
775	/ List is ordered by old_chunk /
776	hlist_bl_for_each_entry(e, pos, l, hash_list) {
777	/ Insert after an existing chunk? /
778	if (new_e->old_chunk == (e->old_chunk +
779	dm_consecutive_chunk_count(e) + `1`) &&
780	new_e->new_chunk == (dm_chunk_number(chunk: e->new_chunk) +
781	dm_consecutive_chunk_count(e) + `1`)) {
782	dm_consecutive_chunk_count_inc(e);
783	free_completed_exception(e: new_e);
784	return;
785	}
786
787	/ Insert before an existing chunk? /
788	if (new_e->old_chunk == (e->old_chunk - `1`) &&
789	new_e->new_chunk == (dm_chunk_number(chunk: e->new_chunk) - `1`)) {
790	dm_consecutive_chunk_count_inc(e);
791	e->old_chunk--;
792	e->new_chunk--;
793	free_completed_exception(e: new_e);
794	return;
795	}
796
797	if (new_e->old_chunk < e->old_chunk)
798	break;
799	}
800
801	out:
802	if (!e) {
803	/*
804	* Either the table doesn't support consecutive chunks or slot
805	* l is empty.
806	*/
807	hlist_bl_add_head(n: &new_e->hash_list, h: l);
808	} else if (new_e->old_chunk < e->old_chunk) {
809	/ Add before an existing exception /
810	hlist_bl_add_before(n: &new_e->hash_list, next: &e->hash_list);
811	} else {
812	/ Add to l's tail: e is the last exception in this slot /
813	hlist_bl_add_behind(n: &new_e->hash_list, prev: &e->hash_list);
814	}
815	}
816
817	/*
818	* Callback used by the exception stores to load exceptions when
819	* initialising.
820	*/
821	static int dm_add_exception(void *context, chunk_t old, chunk_t new)
822	{
823	struct dm_exception_table_lock lock;
824	struct dm_snapshot *s = context;
825	struct dm_exception *e;
826
827	e = alloc_completed_exception(GFP_KERNEL);
828	if (!e)
829	return -ENOMEM;
830
831	e->old_chunk = old;
832
833	/ Consecutive_count is implicitly initialised to zero /
834	e->new_chunk = new;
835
836	/*
837	* Although there is no need to lock access to the exception tables
838	* here, if we don't then hlist_bl_add_head(), called by
839	* dm_insert_exception(), will complain about accessing the
840	* corresponding list without locking it first.
841	*/
842	dm_exception_table_lock_init(s, chunk: old, lock: &lock);
843
844	dm_exception_table_lock(lock: &lock);
845	dm_insert_exception(eh: &s->complete, new_e: e);
846	dm_exception_table_unlock(lock: &lock);
847
848	return `0`;
849	}
850
851	/*
852	* Return a minimum chunk size of all snapshots that have the specified origin.
853	* Return zero if the origin has no snapshots.
854	*/
855	static uint32_t __minimum_chunk_size(struct origin *o)
856	{
857	struct dm_snapshot *snap;
858	unsigned int chunk_size = rounddown_pow_of_two(UINT_MAX);
859
860	if (o)
861	list_for_each_entry(snap, &o->snapshots, list)
862	chunk_size = min_not_zero(chunk_size,
863	snap->store->chunk_size);
864
865	return (uint32_t) chunk_size;
866	}
867
868	/*
869	* Hard coded magic.
870	*/
871	static int calc_max_buckets(void)
872	{
873	/ use a fixed size of 2MB /
874	unsigned long mem = `2` * `1024` * `1024`;
875
876	mem /= sizeof(struct hlist_bl_head);
877
878	return mem;
879	}
880
881	/*
882	* Allocate room for a suitable hash table.
883	*/
884	static int init_hash_tables(struct dm_snapshot *s)
885	{
886	sector_t hash_size, cow_dev_size, max_buckets;
887
888	/*
889	* Calculate based on the size of the original volume or
890	* the COW volume...
891	*/
892	cow_dev_size = get_dev_size(bdev: s->cow->bdev);
893	max_buckets = calc_max_buckets();
894
895	hash_size = cow_dev_size >> s->store->chunk_shift;
896	hash_size = min(hash_size, max_buckets);
897
898	if (hash_size < `64`)
899	hash_size = `64`;
900	hash_size = rounddown_pow_of_two(hash_size);
901	if (dm_exception_table_init(et: &s->complete, size: hash_size,
902	DM_CHUNK_CONSECUTIVE_BITS))
903	return -ENOMEM;
904
905	/*
906	* Allocate hash table for in-flight exceptions
907	* Make this smaller than the real hash table
908	*/
909	hash_size >>= `3`;
910	if (hash_size < `64`)
911	hash_size = `64`;
912
913	if (dm_exception_table_init(et: &s->pending, size: hash_size, hash_shift: `0`)) {
914	dm_exception_table_exit(et: &s->complete, mem: exception_cache);
915	return -ENOMEM;
916	}
917
918	return `0`;
919	}
920
921	static void merge_shutdown(struct dm_snapshot *s)
922	{
923	clear_bit_unlock(RUNNING_MERGE, addr: &s->state_bits);
924	smp_mb__after_atomic();
925	wake_up_bit(word: &s->state_bits, RUNNING_MERGE);
926	}
927
928	static struct bio __release_queued_bios_after_merge(struct* dm_snapshot *s)
929	{
930	s->first_merging_chunk = `0`;
931	s->num_merging_chunks = `0`;
932
933	return bio_list_get(bl: &s->bios_queued_during_merge);
934	}
935
936	/*
937	* Remove one chunk from the index of completed exceptions.
938	*/
939	static int __remove_single_exception_chunk(struct dm_snapshot *s,
940	chunk_t old_chunk)
941	{
942	struct dm_exception *e;
943
944	e = dm_lookup_exception(et: &s->complete, chunk: old_chunk);
945	if (!e) {
946	DMERR("Corruption detected: exception for block %llu is on disk but not in memory",
947	(unsigned long long)old_chunk);
948	return -EINVAL;
949	}
950
951	/*
952	* If this is the only chunk using this exception, remove exception.
953	*/
954	if (!dm_consecutive_chunk_count(e)) {
955	dm_remove_exception(e);
956	free_completed_exception(e);
957	return `0`;
958	}
959
960	/*
961	* The chunk may be either at the beginning or the end of a
962	* group of consecutive chunks - never in the middle. We are
963	* removing chunks in the opposite order to that in which they
964	* were added, so this should always be true.
965	* Decrement the consecutive chunk counter and adjust the
966	* starting point if necessary.
967	*/
968	if (old_chunk == e->old_chunk) {
969	e->old_chunk++;
970	e->new_chunk++;
971	} else if (old_chunk != e->old_chunk +
972	dm_consecutive_chunk_count(e)) {
973	DMERR("Attempt to merge block %llu from the middle of a chunk range [%llu - %llu]",
974	(unsigned long long)old_chunk,
975	(unsigned long long)e->old_chunk,
976	(unsigned long long)
977	e->old_chunk + dm_consecutive_chunk_count(e));
978	return -EINVAL;
979	}
980
981	dm_consecutive_chunk_count_dec(e);
982
983	return `0`;
984	}
985
986	static void flush_bios(struct bio *bio);
987
988	static int remove_single_exception_chunk(struct dm_snapshot *s)
989	{
990	struct bio *b = NULL;
991	int r;
992	chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - `1`;
993
994	down_write(sem: &s->lock);
995
996	/*
997	* Process chunks (and associated exceptions) in reverse order
998	* so that dm_consecutive_chunk_count_dec() accounting works.
999	*/
1000	do {
1001	r = __remove_single_exception_chunk(s, old_chunk);
1002	if (r)
1003	goto out;
1004	} while (old_chunk-- > s->first_merging_chunk);
1005
1006	b = __release_queued_bios_after_merge(s);
1007
1008	out:
1009	up_write(sem: &s->lock);
1010	if (b)
1011	flush_bios(bio: b);
1012
1013	return r;
1014	}
1015
1016	static int origin_write_extent(struct dm_snapshot *merging_snap,
1017	sector_t sector, unsigned int chunk_size);
1018
1019	static void merge_callback(int read_err, unsigned long write_err,
1020	void *context);
1021
1022	static uint64_t read_pending_exceptions_done_count(void)
1023	{
1024	uint64_t pending_exceptions_done;
1025
1026	spin_lock(lock: &_pending_exceptions_done_spinlock);
1027	pending_exceptions_done = _pending_exceptions_done_count;
1028	spin_unlock(lock: &_pending_exceptions_done_spinlock);
1029
1030	return pending_exceptions_done;
1031	}
1032
1033	static void increment_pending_exceptions_done_count(void)
1034	{
1035	spin_lock(lock: &_pending_exceptions_done_spinlock);
1036	_pending_exceptions_done_count++;
1037	spin_unlock(lock: &_pending_exceptions_done_spinlock);
1038
1039	wake_up_all(&_pending_exceptions_done);
1040	}
1041
1042	static void snapshot_merge_next_chunks(struct dm_snapshot *s)
1043	{
1044	int i, linear_chunks;
1045	chunk_t old_chunk, new_chunk;
1046	struct dm_io_region src, dest;
1047	sector_t io_size;
1048	uint64_t previous_count;
1049
1050	BUG_ON(!test_bit(RUNNING_MERGE, &s->state_bits));
1051	if (unlikely(test_bit(SHUTDOWN_MERGE, &s->state_bits)))
1052	goto shut;
1053
1054	/*
1055	* valid flag never changes during merge, so no lock required.
1056	*/
1057	if (!s->valid) {
1058	DMERR("Snapshot is invalid: can't merge");
1059	goto shut;
1060	}
1061
1062	linear_chunks = s->store->type->prepare_merge(s->store, &old_chunk,
1063	&new_chunk);
1064	if (linear_chunks <= `0`) {
1065	if (linear_chunks < `0`) {
1066	DMERR("Read error in exception store: shutting down merge");
1067	down_write(sem: &s->lock);
1068	s->merge_failed = true;
1069	up_write(sem: &s->lock);
1070	}
1071	goto shut;
1072	}
1073
1074	/ Adjust old_chunk and new_chunk to reflect start of linear region /
1075	old_chunk = old_chunk + `1` - linear_chunks;
1076	new_chunk = new_chunk + `1` - linear_chunks;
1077
1078	/*
1079	* Use one (potentially large) I/O to copy all 'linear_chunks'
1080	* from the exception store to the origin
1081	*/
1082	io_size = linear_chunks * s->store->chunk_size;
1083
1084	dest.bdev = s->origin->bdev;
1085	dest.sector = chunk_to_sector(store: s->store, chunk: old_chunk);
1086	dest.count = min(io_size, get_dev_size(dest.bdev) - dest.sector);
1087
1088	src.bdev = s->cow->bdev;
1089	src.sector = chunk_to_sector(store: s->store, chunk: new_chunk);
1090	src.count = dest.count;
1091
1092	/*
1093	* Reallocate any exceptions needed in other snapshots then
1094	* wait for the pending exceptions to complete.
1095	* Each time any pending exception (globally on the system)
1096	* completes we are woken and repeat the process to find out
1097	* if we can proceed. While this may not seem a particularly
1098	* efficient algorithm, it is not expected to have any
1099	* significant impact on performance.
1100	*/
1101	previous_count = read_pending_exceptions_done_count();
1102	while (origin_write_extent(merging_snap: s, sector: dest.sector, chunk_size: io_size)) {
1103	wait_event(_pending_exceptions_done,
1104	(read_pending_exceptions_done_count() !=
1105	previous_count));
1106	/ Retry after the wait, until all exceptions are done. /
1107	previous_count = read_pending_exceptions_done_count();
1108	}
1109
1110	down_write(sem: &s->lock);
1111	s->first_merging_chunk = old_chunk;
1112	s->num_merging_chunks = linear_chunks;
1113	up_write(sem: &s->lock);
1114
1115	/ Wait until writes to all 'linear_chunks' drain /
1116	for (i = `0`; i < linear_chunks; i++)
1117	__check_for_conflicting_io(s, chunk: old_chunk + i);
1118
1119	dm_kcopyd_copy(kc: s->kcopyd_client, from: &src, num_dests: `1`, dests: &dest, flags: `0`, fn: merge_callback, context: s);
1120	return;
1121
1122	shut:
1123	merge_shutdown(s);
1124	}
1125
1126	static void error_bios(struct bio *bio);
1127
1128	static void merge_callback(int read_err, unsigned long write_err, void *context)
1129	{
1130	struct dm_snapshot *s = context;
1131	struct bio *b = NULL;
1132
1133	if (read_err \|\| write_err) {
1134	if (read_err)
1135	DMERR("Read error: shutting down merge.");
1136	else
1137	DMERR("Write error: shutting down merge.");
1138	goto shut;
1139	}
1140
1141	if (blkdev_issue_flush(bdev: s->origin->bdev) < `0`) {
1142	DMERR("Flush after merge failed: shutting down merge");
1143	goto shut;
1144	}
1145
1146	if (s->store->type->commit_merge(s->store,
1147	s->num_merging_chunks) < `0`) {
1148	DMERR("Write error in exception store: shutting down merge");
1149	goto shut;
1150	}
1151
1152	if (remove_single_exception_chunk(s) < `0`)
1153	goto shut;
1154
1155	snapshot_merge_next_chunks(s);
1156
1157	return;
1158
1159	shut:
1160	down_write(sem: &s->lock);
1161	s->merge_failed = true;
1162	b = __release_queued_bios_after_merge(s);
1163	up_write(sem: &s->lock);
1164	error_bios(bio: b);
1165
1166	merge_shutdown(s);
1167	}
1168
1169	static void start_merge(struct dm_snapshot *s)
1170	{
1171	if (!test_and_set_bit(RUNNING_MERGE, addr: &s->state_bits))
1172	snapshot_merge_next_chunks(s);
1173	}
1174
1175	/*
1176	* Stop the merging process and wait until it finishes.
1177	*/
1178	static void stop_merge(struct dm_snapshot *s)
1179	{
1180	set_bit(SHUTDOWN_MERGE, addr: &s->state_bits);
1181	wait_on_bit(word: &s->state_bits, RUNNING_MERGE, TASK_UNINTERRUPTIBLE);
1182	clear_bit(SHUTDOWN_MERGE, addr: &s->state_bits);
1183	}
1184
1185	static int parse_snapshot_features(struct dm_arg_set as, struct* dm_snapshot *s,
1186	struct dm_target *ti)
1187	{
1188	int r;
1189	unsigned int argc;
1190	const char *arg_name;
1191
1192	static const struct dm_arg _args[] = {
1193	{`0`, `2`, "Invalid number of feature arguments"},
1194	};
1195
1196	/*
1197	* No feature arguments supplied.
1198	*/
1199	if (!as->argc)
1200	return `0`;
1201
1202	r = dm_read_arg_group(arg: _args, arg_set: as, num_args: &argc, error: &ti->error);
1203	if (r)
1204	return -EINVAL;
1205
1206	while (argc && !r) {
1207	arg_name = dm_shift_arg(as);
1208	argc--;
1209
1210	if (!strcasecmp(s1: arg_name, s2: "discard_zeroes_cow"))
1211	s->discard_zeroes_cow = true;
1212
1213	else if (!strcasecmp(s1: arg_name, s2: "discard_passdown_origin"))
1214	s->discard_passdown_origin = true;
1215
1216	else {
1217	ti->error = "Unrecognised feature requested";
1218	r = -EINVAL;
1219	break;
1220	}
1221	}
1222
1223	if (!s->discard_zeroes_cow && s->discard_passdown_origin) {
1224	/*
1225	* TODO: really these are disjoint.. but ti->num_discard_bios
1226	* and dm_bio_get_target_bio_nr() require rigid constraints.
1227	*/
1228	ti->error = "discard_passdown_origin feature depends on discard_zeroes_cow";
1229	r = -EINVAL;
1230	}
1231
1232	return r;
1233	}
1234
1235	/*
1236	* Construct a snapshot mapping:
1237	* <origin_dev> <COW-dev> <p\|po\|n> <chunk-size> [<# feature args> [<arg>]*]
1238	*/
1239	static int snapshot_ctr(struct dm_target ti, unsigned* int argc, char **argv)
1240	{
1241	struct dm_snapshot *s;
1242	struct dm_arg_set as;
1243	int i;
1244	int r = -EINVAL;
1245	char origin_path, cow_path;
1246	unsigned int args_used, num_flush_bios = `1`;
1247	blk_mode_t origin_mode = BLK_OPEN_READ;
1248
1249	if (argc < `4`) {
1250	ti->error = "requires 4 or more arguments";
1251	r = -EINVAL;
1252	goto bad;
1253	}
1254
1255	if (dm_target_is_snapshot_merge(ti)) {
1256	num_flush_bios = `2`;
1257	origin_mode = BLK_OPEN_WRITE;
1258	}
1259
1260	s = kzalloc(size: sizeof(*s), GFP_KERNEL);
1261	if (!s) {
1262	ti->error = "Cannot allocate private snapshot structure";
1263	r = -ENOMEM;
1264	goto bad;
1265	}
1266
1267	as.argc = argc;
1268	as.argv = argv;
1269	dm_consume_args(as: &as, num_args: `4`);
1270	r = parse_snapshot_features(as: &as, s, ti);
1271	if (r)
1272	goto bad_features;
1273
1274	origin_path = argv[`0`];
1275	argv++;
1276	argc--;
1277
1278	r = dm_get_device(ti, path: origin_path, mode: origin_mode, result: &s->origin);
1279	if (r) {
1280	ti->error = "Cannot get origin device";
1281	goto bad_origin;
1282	}
1283
1284	cow_path = argv[`0`];
1285	argv++;
1286	argc--;
1287
1288	r = dm_get_device(ti, path: cow_path, mode: dm_table_get_mode(t: ti->table), result: &s->cow);
1289	if (r) {
1290	ti->error = "Cannot get COW device";
1291	goto bad_cow;
1292	}
1293	if (s->cow->bdev && s->cow->bdev == s->origin->bdev) {
1294	ti->error = "COW device cannot be the same as origin device";
1295	r = -EINVAL;
1296	goto bad_store;
1297	}
1298
1299	r = dm_exception_store_create(ti, argc, argv, snap: s, args_used: &args_used, store: &s->store);
1300	if (r) {
1301	ti->error = "Couldn't create exception store";
1302	r = -EINVAL;
1303	goto bad_store;
1304	}
1305
1306	argv += args_used;
1307	argc -= args_used;
1308
1309	s->ti = ti;
1310	s->valid = `1`;
1311	s->snapshot_overflowed = `0`;
1312	s->active = `0`;
1313	atomic_set(v: &s->pending_exceptions_count, i: `0`);
1314	spin_lock_init(&s->pe_allocation_lock);
1315	s->exception_start_sequence = `0`;
1316	s->exception_complete_sequence = `0`;
1317	s->out_of_order_tree = RB_ROOT;
1318	init_rwsem(&s->lock);
1319	INIT_LIST_HEAD(list: &s->list);
1320	spin_lock_init(&s->pe_lock);
1321	s->state_bits = `0`;
1322	s->merge_failed = false;
1323	s->first_merging_chunk = `0`;
1324	s->num_merging_chunks = `0`;
1325	bio_list_init(bl: &s->bios_queued_during_merge);
1326
1327	/ Allocate hash table for COW data /
1328	if (init_hash_tables(s)) {
1329	ti->error = "Unable to allocate hash table space";
1330	r = -ENOMEM;
1331	goto bad_hash_tables;
1332	}
1333
1334	init_waitqueue_head(&s->in_progress_wait);
1335
1336	s->kcopyd_client = dm_kcopyd_client_create(throttle: &dm_kcopyd_throttle);
1337	if (IS_ERR(ptr: s->kcopyd_client)) {
1338	r = PTR_ERR(ptr: s->kcopyd_client);
1339	ti->error = "Could not create kcopyd client";
1340	goto bad_kcopyd;
1341	}
1342
1343	r = mempool_init_slab_pool(pool: &s->pending_pool, MIN_IOS, kc: pending_cache);
1344	if (r) {
1345	ti->error = "Could not allocate mempool for pending exceptions";
1346	goto bad_pending_pool;
1347	}
1348
1349	for (i = `0`; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
1350	INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]);
1351
1352	spin_lock_init(&s->tracked_chunk_lock);
1353
1354	ti->private = s;
1355	ti->num_flush_bios = num_flush_bios;
1356	if (s->discard_zeroes_cow)
1357	ti->num_discard_bios = (s->discard_passdown_origin ? `2` : `1`);
1358	ti->per_io_data_size = sizeof(struct dm_snap_tracked_chunk);
1359
1360	/ Add snapshot to the list of snapshots for this origin /
1361	/ Exceptions aren't triggered till snapshot_resume() is called /
1362	r = register_snapshot(snap: s);
1363	if (r == -ENOMEM) {
1364	ti->error = "Snapshot origin struct allocation failed";
1365	goto bad_load_and_register;
1366	} else if (r < `0`) {
1367	/ invalid handover, register_snapshot has set ti->error /
1368	goto bad_load_and_register;
1369	}
1370
1371	/*
1372	* Metadata must only be loaded into one table at once, so skip this
1373	* if metadata will be handed over during resume.
1374	* Chunk size will be set during the handover - set it to zero to
1375	* ensure it's ignored.
1376	*/
1377	if (r > `0`) {
1378	s->store->chunk_size = `0`;
1379	return `0`;
1380	}
1381
1382	r = s->store->type->read_metadata(s->store, dm_add_exception,
1383	(void *)s);
1384	if (r < `0`) {
1385	ti->error = "Failed to read snapshot metadata";
1386	goto bad_read_metadata;
1387	} else if (r > `0`) {
1388	s->valid = `0`;
1389	DMWARN("Snapshot is marked invalid.");
1390	}
1391
1392	if (!s->store->chunk_size) {
1393	ti->error = "Chunk size not set";
1394	r = -EINVAL;
1395	goto bad_read_metadata;
1396	}
1397
1398	r = dm_set_target_max_io_len(ti, len: s->store->chunk_size);
1399	if (r)
1400	goto bad_read_metadata;
1401
1402	return `0`;
1403
1404	bad_read_metadata:
1405	unregister_snapshot(s);
1406	bad_load_and_register:
1407	mempool_exit(pool: &s->pending_pool);
1408	bad_pending_pool:
1409	dm_kcopyd_client_destroy(kc: s->kcopyd_client);
1410	bad_kcopyd:
1411	dm_exception_table_exit(et: &s->pending, mem: pending_cache);
1412	dm_exception_table_exit(et: &s->complete, mem: exception_cache);
1413	bad_hash_tables:
1414	dm_exception_store_destroy(store: s->store);
1415	bad_store:
1416	dm_put_device(ti, d: s->cow);
1417	bad_cow:
1418	dm_put_device(ti, d: s->origin);
1419	bad_origin:
1420	bad_features:
1421	kfree(objp: s);
1422	bad:
1423	return r;
1424	}
1425
1426	static void __free_exceptions(struct dm_snapshot *s)
1427	{
1428	dm_kcopyd_client_destroy(kc: s->kcopyd_client);
1429	s->kcopyd_client = NULL;
1430
1431	dm_exception_table_exit(et: &s->pending, mem: pending_cache);
1432	dm_exception_table_exit(et: &s->complete, mem: exception_cache);
1433	}
1434
1435	static void __handover_exceptions(struct dm_snapshot *snap_src,
1436	struct dm_snapshot *snap_dest)
1437	{
1438	union {
1439	struct dm_exception_table table_swap;
1440	struct dm_exception_store *store_swap;
1441	} u;
1442
1443	/*
1444	* Swap all snapshot context information between the two instances.
1445	*/
1446	u.table_swap = snap_dest->complete;
1447	snap_dest->complete = snap_src->complete;
1448	snap_src->complete = u.table_swap;
1449
1450	u.store_swap = snap_dest->store;
1451	snap_dest->store = snap_src->store;
1452	snap_dest->store->userspace_supports_overflow = u.store_swap->userspace_supports_overflow;
1453	snap_src->store = u.store_swap;
1454
1455	snap_dest->store->snap = snap_dest;
1456	snap_src->store->snap = snap_src;
1457
1458	snap_dest->ti->max_io_len = snap_dest->store->chunk_size;
1459	snap_dest->valid = snap_src->valid;
1460	snap_dest->snapshot_overflowed = snap_src->snapshot_overflowed;
1461
1462	/*
1463	* Set source invalid to ensure it receives no further I/O.
1464	*/
1465	snap_src->valid = `0`;
1466	}
1467
1468	static void snapshot_dtr(struct dm_target *ti)
1469	{
1470	#ifdef CONFIG_DM_DEBUG
1471	int i;
1472	#endif
1473	struct dm_snapshot *s = ti->private;
1474	struct dm_snapshot snap_src = NULL, snap_dest = NULL;
1475
1476	down_read(sem: &_origins_lock);
1477	/ Check whether exception handover must be cancelled /
1478	(void) __find_snapshots_sharing_cow(snap: s, snap_src: &snap_src, snap_dest: &snap_dest, NULL);
1479	if (snap_src && snap_dest && (s == snap_src)) {
1480	down_write(sem: &snap_dest->lock);
1481	snap_dest->valid = `0`;
1482	up_write(sem: &snap_dest->lock);
1483	DMERR("Cancelling snapshot handover.");
1484	}
1485	up_read(sem: &_origins_lock);
1486
1487	if (dm_target_is_snapshot_merge(ti))
1488	stop_merge(s);
1489
1490	/ Prevent further origin writes from using this snapshot. /
1491	/ After this returns there can be no new kcopyd jobs. /
1492	unregister_snapshot(s);
1493
1494	while (atomic_read(v: &s->pending_exceptions_count))
1495	fsleep(usecs: `1000`);
1496	/*
1497	* Ensure instructions in mempool_exit aren't reordered
1498	* before atomic_read.
1499	*/
1500	smp_mb();
1501
1502	#ifdef CONFIG_DM_DEBUG
1503	for (i = `0`; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
1504	BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i]));
1505	#endif
1506
1507	__free_exceptions(s);
1508
1509	mempool_exit(pool: &s->pending_pool);
1510
1511	dm_exception_store_destroy(store: s->store);
1512
1513	dm_put_device(ti, d: s->cow);
1514
1515	dm_put_device(ti, d: s->origin);
1516
1517	WARN_ON(s->in_progress);
1518
1519	kfree(objp: s);
1520	}
1521
1522	static void account_start_copy(struct dm_snapshot *s)
1523	{
1524	spin_lock(lock: &s->in_progress_wait.lock);
1525	s->in_progress++;
1526	spin_unlock(lock: &s->in_progress_wait.lock);
1527	}
1528
1529	static void account_end_copy(struct dm_snapshot *s)
1530	{
1531	spin_lock(lock: &s->in_progress_wait.lock);
1532	BUG_ON(!s->in_progress);
1533	s->in_progress--;
1534	if (likely(s->in_progress <= cow_threshold) &&
1535	unlikely(waitqueue_active(&s->in_progress_wait)))
1536	wake_up_locked(&s->in_progress_wait);
1537	spin_unlock(lock: &s->in_progress_wait.lock);
1538	}
1539
1540	static bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins)
1541	{
1542	if (unlikely(s->in_progress > cow_threshold)) {
1543	spin_lock(lock: &s->in_progress_wait.lock);
1544	if (likely(s->in_progress > cow_threshold)) {
1545	/*
1546	* NOTE: this throttle doesn't account for whether
1547	* the caller is servicing an IO that will trigger a COW
1548	* so excess throttling may result for chunks not required
1549	* to be COW'd. But if cow_threshold was reached, extra
1550	* throttling is unlikely to negatively impact performance.
1551	*/
1552	DECLARE_WAITQUEUE(wait, current);
1553
1554	__add_wait_queue(wq_head: &s->in_progress_wait, wq_entry: &wait);
1555	__set_current_state(TASK_UNINTERRUPTIBLE);
1556	spin_unlock(lock: &s->in_progress_wait.lock);
1557	if (unlock_origins)
1558	up_read(sem: &_origins_lock);
1559	io_schedule();
1560	remove_wait_queue(wq_head: &s->in_progress_wait, wq_entry: &wait);
1561	return false;
1562	}
1563	spin_unlock(lock: &s->in_progress_wait.lock);
1564	}
1565	return true;
1566	}
1567
1568	/*
1569	* Flush a list of buffers.
1570	*/
1571	static void flush_bios(struct bio *bio)
1572	{
1573	struct bio *n;
1574
1575	while (bio) {
1576	n = bio->bi_next;
1577	bio->bi_next = NULL;
1578	submit_bio_noacct(bio);
1579	bio = n;
1580	}
1581	}
1582
1583	static int do_origin(struct dm_dev origin, struct* bio *bio, bool limit);
1584
1585	/*
1586	* Flush a list of buffers.
1587	*/
1588	static void retry_origin_bios(struct dm_snapshot s, struct* bio *bio)
1589	{
1590	struct bio *n;
1591	int r;
1592
1593	while (bio) {
1594	n = bio->bi_next;
1595	bio->bi_next = NULL;
1596	r = do_origin(origin: s->origin, bio, limit: false);
1597	if (r == DM_MAPIO_REMAPPED)
1598	submit_bio_noacct(bio);
1599	bio = n;
1600	}
1601	}
1602
1603	/*
1604	* Error a list of buffers.
1605	*/
1606	static void error_bios(struct bio *bio)
1607	{
1608	struct bio *n;
1609
1610	while (bio) {
1611	n = bio->bi_next;
1612	bio->bi_next = NULL;
1613	bio_io_error(bio);
1614	bio = n;
1615	}
1616	}
1617
1618	static void __invalidate_snapshot(struct dm_snapshot s, int* err)
1619	{
1620	if (!s->valid)
1621	return;
1622
1623	if (err == -EIO)
1624	DMERR("Invalidating snapshot: Error reading/writing.");
1625	else if (err == -ENOMEM)
1626	DMERR("Invalidating snapshot: Unable to allocate exception.");
1627
1628	if (s->store->type->drop_snapshot)
1629	s->store->type->drop_snapshot(s->store);
1630
1631	s->valid = `0`;
1632
1633	dm_table_event(t: s->ti->table);
1634	}
1635
1636	static void invalidate_snapshot(struct dm_snapshot s, int* err)
1637	{
1638	down_write(sem: &s->lock);
1639	__invalidate_snapshot(s, err);
1640	up_write(sem: &s->lock);
1641	}
1642
1643	static void pending_complete(void context, int* success)
1644	{
1645	struct dm_snap_pending_exception *pe = context;
1646	struct dm_exception *e;
1647	struct dm_snapshot *s = pe->snap;
1648	struct bio *origin_bios = NULL;
1649	struct bio *snapshot_bios = NULL;
1650	struct bio *full_bio = NULL;
1651	struct dm_exception_table_lock lock;
1652	int error = `0`;
1653
1654	dm_exception_table_lock_init(s, chunk: pe->e.old_chunk, lock: &lock);
1655
1656	if (!success) {
1657	/ Read/write error - snapshot is unusable /
1658	invalidate_snapshot(s, err: -EIO);
1659	error = `1`;
1660
1661	dm_exception_table_lock(lock: &lock);
1662	goto out;
1663	}
1664
1665	e = alloc_completed_exception(GFP_NOIO);
1666	if (!e) {
1667	invalidate_snapshot(s, err: -ENOMEM);
1668	error = `1`;
1669
1670	dm_exception_table_lock(lock: &lock);
1671	goto out;
1672	}
1673	*e = pe->e;
1674
1675	down_read(sem: &s->lock);
1676	dm_exception_table_lock(lock: &lock);
1677	if (!s->valid) {
1678	up_read(sem: &s->lock);
1679	free_completed_exception(e);
1680	error = `1`;
1681
1682	goto out;
1683	}
1684
1685	/*
1686	* Add a proper exception. After inserting the completed exception all
1687	* subsequent snapshot reads to this chunk will be redirected to the
1688	* COW device. This ensures that we do not starve. Moreover, as long
1689	* as the pending exception exists, neither origin writes nor snapshot
1690	* merging can overwrite the chunk in origin.
1691	*/
1692	dm_insert_exception(eh: &s->complete, new_e: e);
1693	up_read(sem: &s->lock);
1694
1695	/ Wait for conflicting reads to drain /
1696	if (__chunk_is_tracked(s, chunk: pe->e.old_chunk)) {
1697	dm_exception_table_unlock(lock: &lock);
1698	__check_for_conflicting_io(s, chunk: pe->e.old_chunk);
1699	dm_exception_table_lock(lock: &lock);
1700	}
1701
1702	out:
1703	/ Remove the in-flight exception from the list /
1704	dm_remove_exception(e: &pe->e);
1705
1706	dm_exception_table_unlock(lock: &lock);
1707
1708	snapshot_bios = bio_list_get(bl: &pe->snapshot_bios);
1709	origin_bios = bio_list_get(bl: &pe->origin_bios);
1710	full_bio = pe->full_bio;
1711	if (full_bio)
1712	full_bio->bi_end_io = pe->full_bio_end_io;
1713	increment_pending_exceptions_done_count();
1714
1715	/ Submit any pending write bios /
1716	if (error) {
1717	if (full_bio)
1718	bio_io_error(bio: full_bio);
1719	error_bios(bio: snapshot_bios);
1720	} else {
1721	if (full_bio)
1722	bio_endio(full_bio);
1723	flush_bios(bio: snapshot_bios);
1724	}
1725
1726	retry_origin_bios(s, bio: origin_bios);
1727
1728	free_pending_exception(pe);
1729	}
1730
1731	static void complete_exception(struct dm_snap_pending_exception *pe)
1732	{
1733	struct dm_snapshot *s = pe->snap;
1734
1735	/ Update the metadata if we are persistent /
1736	s->store->type->commit_exception(s->store, &pe->e, !pe->copy_error,
1737	pending_complete, pe);
1738	}
1739
1740	/*
1741	* Called when the copy I/O has finished. kcopyd actually runs
1742	* this code so don't block.
1743	*/
1744	static void copy_callback(int read_err, unsigned long write_err, void *context)
1745	{
1746	struct dm_snap_pending_exception *pe = context;
1747	struct dm_snapshot *s = pe->snap;
1748
1749	pe->copy_error = read_err \|\| write_err;
1750
1751	if (pe->exception_sequence == s->exception_complete_sequence) {
1752	struct rb_node *next;
1753
1754	s->exception_complete_sequence++;
1755	complete_exception(pe);
1756
1757	next = rb_first(&s->out_of_order_tree);
1758	while (next) {
1759	pe = rb_entry(next, struct dm_snap_pending_exception,
1760	out_of_order_node);
1761	if (pe->exception_sequence != s->exception_complete_sequence)
1762	break;
1763	next = rb_next(next);
1764	s->exception_complete_sequence++;
1765	rb_erase(&pe->out_of_order_node, &s->out_of_order_tree);
1766	complete_exception(pe);
1767	cond_resched();
1768	}
1769	} else {
1770	struct rb_node *parent = NULL;
1771	struct rb_node **p = &s->out_of_order_tree.rb_node;
1772	struct dm_snap_pending_exception *pe2;
1773
1774	while (*p) {
1775	pe2 = rb_entry(p, struct* dm_snap_pending_exception, out_of_order_node);
1776	parent = *p;
1777
1778	BUG_ON(pe->exception_sequence == pe2->exception_sequence);
1779	if (pe->exception_sequence < pe2->exception_sequence)
1780	p = &((*p)->rb_left);
1781	else
1782	p = &((*p)->rb_right);
1783	}
1784
1785	rb_link_node(node: &pe->out_of_order_node, parent, rb_link: p);
1786	rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree);
1787	}
1788	account_end_copy(s);
1789	}
1790
1791	/*
1792	* Dispatches the copy operation to kcopyd.
1793	*/
1794	static void start_copy(struct dm_snap_pending_exception *pe)
1795	{
1796	struct dm_snapshot *s = pe->snap;
1797	struct dm_io_region src, dest;
1798	struct block_device *bdev = s->origin->bdev;
1799	sector_t dev_size;
1800
1801	dev_size = get_dev_size(bdev);
1802
1803	src.bdev = bdev;
1804	src.sector = chunk_to_sector(store: s->store, chunk: pe->e.old_chunk);
1805	src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector);
1806
1807	dest.bdev = s->cow->bdev;
1808	dest.sector = chunk_to_sector(store: s->store, chunk: pe->e.new_chunk);
1809	dest.count = src.count;
1810
1811	/ Hand over to kcopyd /
1812	account_start_copy(s);
1813	dm_kcopyd_copy(kc: s->kcopyd_client, from: &src, num_dests: `1`, dests: &dest, flags: `0`, fn: copy_callback, context: pe);
1814	}
1815
1816	static void full_bio_end_io(struct bio *bio)
1817	{
1818	void *callback_data = bio->bi_private;
1819
1820	dm_kcopyd_do_callback(job: callback_data, read_err: `0`, write_err: bio->bi_status ? `1` : `0`);
1821	}
1822
1823	static void start_full_bio(struct dm_snap_pending_exception *pe,
1824	struct bio *bio)
1825	{
1826	struct dm_snapshot *s = pe->snap;
1827	void *callback_data;
1828
1829	pe->full_bio = bio;
1830	pe->full_bio_end_io = bio->bi_end_io;
1831
1832	account_start_copy(s);
1833	callback_data = dm_kcopyd_prepare_callback(kc: s->kcopyd_client,
1834	fn: copy_callback, context: pe);
1835
1836	bio->bi_end_io = full_bio_end_io;
1837	bio->bi_private = callback_data;
1838
1839	submit_bio_noacct(bio);
1840	}
1841
1842	static struct dm_snap_pending_exception *
1843	__lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
1844	{
1845	struct dm_exception *e = dm_lookup_exception(et: &s->pending, chunk);
1846
1847	if (!e)
1848	return NULL;
1849
1850	return container_of(e, struct dm_snap_pending_exception, e);
1851	}
1852
1853	/*
1854	* Inserts a pending exception into the pending table.
1855	*
1856	* NOTE: a write lock must be held on the chunk's pending exception table slot
1857	* before calling this.
1858	*/
1859	static struct dm_snap_pending_exception *
1860	__insert_pending_exception(struct dm_snapshot *s,
1861	struct dm_snap_pending_exception *pe, chunk_t chunk)
1862	{
1863	pe->e.old_chunk = chunk;
1864	bio_list_init(bl: &pe->origin_bios);
1865	bio_list_init(bl: &pe->snapshot_bios);
1866	pe->started = `0`;
1867	pe->full_bio = NULL;
1868
1869	spin_lock(lock: &s->pe_allocation_lock);
1870	if (s->store->type->prepare_exception(s->store, &pe->e)) {
1871	spin_unlock(lock: &s->pe_allocation_lock);
1872	free_pending_exception(pe);
1873	return NULL;
1874	}
1875
1876	pe->exception_sequence = s->exception_start_sequence++;
1877	spin_unlock(lock: &s->pe_allocation_lock);
1878
1879	dm_insert_exception(eh: &s->pending, new_e: &pe->e);
1880
1881	return pe;
1882	}
1883
1884	/*
1885	* Looks to see if this snapshot already has a pending exception
1886	* for this chunk, otherwise it allocates a new one and inserts
1887	* it into the pending table.
1888	*
1889	* NOTE: a write lock must be held on the chunk's pending exception table slot
1890	* before calling this.
1891	*/
1892	static struct dm_snap_pending_exception *
1893	__find_pending_exception(struct dm_snapshot *s,
1894	struct dm_snap_pending_exception *pe, chunk_t chunk)
1895	{
1896	struct dm_snap_pending_exception *pe2;
1897
1898	pe2 = __lookup_pending_exception(s, chunk);
1899	if (pe2) {
1900	free_pending_exception(pe);
1901	return pe2;
1902	}
1903
1904	return __insert_pending_exception(s, pe, chunk);
1905	}
1906
1907	static void remap_exception(struct dm_snapshot s, struct* dm_exception *e,
1908	struct bio *bio, chunk_t chunk)
1909	{
1910	bio_set_dev(bio, bdev: s->cow->bdev);
1911	bio->bi_iter.bi_sector =
1912	chunk_to_sector(store: s->store, chunk: dm_chunk_number(chunk: e->new_chunk) +
1913	(chunk - e->old_chunk)) +
1914	(bio->bi_iter.bi_sector & s->store->chunk_mask);
1915	}
1916
1917	static void zero_callback(int read_err, unsigned long write_err, void *context)
1918	{
1919	struct bio *bio = context;
1920	struct dm_snapshot *s = bio->bi_private;
1921
1922	account_end_copy(s);
1923	bio->bi_status = write_err ? BLK_STS_IOERR : `0`;
1924	bio_endio(bio);
1925	}
1926
1927	static void zero_exception(struct dm_snapshot s, struct* dm_exception *e,
1928	struct bio *bio, chunk_t chunk)
1929	{
1930	struct dm_io_region dest;
1931
1932	dest.bdev = s->cow->bdev;
1933	dest.sector = bio->bi_iter.bi_sector;
1934	dest.count = s->store->chunk_size;
1935
1936	account_start_copy(s);
1937	WARN_ON_ONCE(bio->bi_private);
1938	bio->bi_private = s;
1939	dm_kcopyd_zero(kc: s->kcopyd_client, num_dests: `1`, dests: &dest, flags: `0`, fn: zero_callback, context: bio);
1940	}
1941
1942	static bool io_overlaps_chunk(struct dm_snapshot s, struct* bio *bio)
1943	{
1944	return bio->bi_iter.bi_size ==
1945	(s->store->chunk_size << SECTOR_SHIFT);
1946	}
1947
1948	static int snapshot_map(struct dm_target ti, struct* bio *bio)
1949	{
1950	struct dm_exception *e;
1951	struct dm_snapshot *s = ti->private;
1952	int r = DM_MAPIO_REMAPPED;
1953	chunk_t chunk;
1954	struct dm_snap_pending_exception *pe = NULL;
1955	struct dm_exception_table_lock lock;
1956
1957	init_tracked_chunk(bio);
1958
1959	if (bio->bi_opf & REQ_PREFLUSH) {
1960	bio_set_dev(bio, bdev: s->cow->bdev);
1961	return DM_MAPIO_REMAPPED;
1962	}
1963
1964	chunk = sector_to_chunk(store: s->store, sector: bio->bi_iter.bi_sector);
1965	dm_exception_table_lock_init(s, chunk, lock: &lock);
1966
1967	/ Full snapshots are not usable /
1968	/ To get here the table must be live so s->active is always set. /
1969	if (!s->valid)
1970	return DM_MAPIO_KILL;
1971
1972	if (bio_data_dir(bio) == WRITE) {
1973	while (unlikely(!wait_for_in_progress(s, false)))
1974	; / wait_for_in_progress() has slept /
1975	}
1976
1977	down_read(sem: &s->lock);
1978	dm_exception_table_lock(lock: &lock);
1979
1980	if (!s->valid \|\| (unlikely(s->snapshot_overflowed) &&
1981	bio_data_dir(bio) == WRITE)) {
1982	r = DM_MAPIO_KILL;
1983	goto out_unlock;
1984	}
1985
1986	if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
1987	if (s->discard_passdown_origin && dm_bio_get_target_bio_nr(bio)) {
1988	/*
1989	* passdown discard to origin (without triggering
1990	* snapshot exceptions via do_origin; doing so would
1991	* defeat the goal of freeing space in origin that is
1992	* implied by the "discard_passdown_origin" feature)
1993	*/
1994	bio_set_dev(bio, bdev: s->origin->bdev);
1995	track_chunk(s, bio, chunk);
1996	goto out_unlock;
1997	}
1998	/ discard to snapshot (target_bio_nr == 0) zeroes exceptions /
1999	}
2000
2001	/ If the block is already remapped - use that, else remap it /
2002	e = dm_lookup_exception(et: &s->complete, chunk);
2003	if (e) {
2004	remap_exception(s, e, bio, chunk);
2005	if (unlikely(bio_op(bio) == REQ_OP_DISCARD) &&
2006	io_overlaps_chunk(s, bio)) {
2007	dm_exception_table_unlock(lock: &lock);
2008	up_read(sem: &s->lock);
2009	zero_exception(s, e, bio, chunk);
2010	r = DM_MAPIO_SUBMITTED; / discard is not issued /
2011	goto out;
2012	}
2013	goto out_unlock;
2014	}
2015
2016	if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
2017	/*
2018	* If no exception exists, complete discard immediately
2019	* otherwise it'll trigger copy-out.
2020	*/
2021	bio_endio(bio);
2022	r = DM_MAPIO_SUBMITTED;
2023	goto out_unlock;
2024	}
2025
2026	/*
2027	* Write to snapshot - higher level takes care of RW/RO
2028	* flags so we should only get this if we are
2029	* writable.
2030	*/
2031	if (bio_data_dir(bio) == WRITE) {
2032	pe = __lookup_pending_exception(s, chunk);
2033	if (!pe) {
2034	dm_exception_table_unlock(lock: &lock);
2035	pe = alloc_pending_exception(s);
2036	dm_exception_table_lock(lock: &lock);
2037
2038	e = dm_lookup_exception(et: &s->complete, chunk);
2039	if (e) {
2040	free_pending_exception(pe);
2041	remap_exception(s, e, bio, chunk);
2042	goto out_unlock;
2043	}
2044
2045	pe = __find_pending_exception(s, pe, chunk);
2046	if (!pe) {
2047	dm_exception_table_unlock(lock: &lock);
2048	up_read(sem: &s->lock);
2049
2050	down_write(sem: &s->lock);
2051
2052	if (s->store->userspace_supports_overflow) {
2053	if (s->valid && !s->snapshot_overflowed) {
2054	s->snapshot_overflowed = `1`;
2055	DMERR("Snapshot overflowed: Unable to allocate exception.");
2056	}
2057	} else
2058	__invalidate_snapshot(s, err: -ENOMEM);
2059	up_write(sem: &s->lock);
2060
2061	r = DM_MAPIO_KILL;
2062	goto out;
2063	}
2064	}
2065
2066	remap_exception(s, e: &pe->e, bio, chunk);
2067
2068	r = DM_MAPIO_SUBMITTED;
2069
2070	if (!pe->started && io_overlaps_chunk(s, bio)) {
2071	pe->started = `1`;
2072
2073	dm_exception_table_unlock(lock: &lock);
2074	up_read(sem: &s->lock);
2075
2076	start_full_bio(pe, bio);
2077	goto out;
2078	}
2079
2080	bio_list_add(bl: &pe->snapshot_bios, bio);
2081
2082	if (!pe->started) {
2083	/ this is protected by the exception table lock /
2084	pe->started = `1`;
2085
2086	dm_exception_table_unlock(lock: &lock);
2087	up_read(sem: &s->lock);
2088
2089	start_copy(pe);
2090	goto out;
2091	}
2092	} else {
2093	bio_set_dev(bio, bdev: s->origin->bdev);
2094	track_chunk(s, bio, chunk);
2095	}
2096
2097	out_unlock:
2098	dm_exception_table_unlock(lock: &lock);
2099	up_read(sem: &s->lock);
2100	out:
2101	return r;
2102	}
2103
2104	/*
2105	* A snapshot-merge target behaves like a combination of a snapshot
2106	* target and a snapshot-origin target. It only generates new
2107	* exceptions in other snapshots and not in the one that is being
2108	* merged.
2109	*
2110	* For each chunk, if there is an existing exception, it is used to
2111	* redirect I/O to the cow device. Otherwise I/O is sent to the origin,
2112	* which in turn might generate exceptions in other snapshots.
2113	* If merging is currently taking place on the chunk in question, the
2114	* I/O is deferred by adding it to s->bios_queued_during_merge.
2115	*/
2116	static int snapshot_merge_map(struct dm_target ti, struct* bio *bio)
2117	{
2118	struct dm_exception *e;
2119	struct dm_snapshot *s = ti->private;
2120	int r = DM_MAPIO_REMAPPED;
2121	chunk_t chunk;
2122
2123	init_tracked_chunk(bio);
2124
2125	if (bio->bi_opf & REQ_PREFLUSH) {
2126	if (!dm_bio_get_target_bio_nr(bio))
2127	bio_set_dev(bio, bdev: s->origin->bdev);
2128	else
2129	bio_set_dev(bio, bdev: s->cow->bdev);
2130	return DM_MAPIO_REMAPPED;
2131	}
2132
2133	if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
2134	/ Once merging, discards no longer effect change /
2135	bio_endio(bio);
2136	return DM_MAPIO_SUBMITTED;
2137	}
2138
2139	chunk = sector_to_chunk(store: s->store, sector: bio->bi_iter.bi_sector);
2140
2141	down_write(sem: &s->lock);
2142
2143	/ Full merging snapshots are redirected to the origin /
2144	if (!s->valid)
2145	goto redirect_to_origin;
2146
2147	/ If the block is already remapped - use that /
2148	e = dm_lookup_exception(et: &s->complete, chunk);
2149	if (e) {
2150	/ Queue writes overlapping with chunks being merged /
2151	if (bio_data_dir(bio) == WRITE &&
2152	chunk >= s->first_merging_chunk &&
2153	chunk < (s->first_merging_chunk +
2154	s->num_merging_chunks)) {
2155	bio_set_dev(bio, bdev: s->origin->bdev);
2156	bio_list_add(bl: &s->bios_queued_during_merge, bio);
2157	r = DM_MAPIO_SUBMITTED;
2158	goto out_unlock;
2159	}
2160
2161	remap_exception(s, e, bio, chunk);
2162
2163	if (bio_data_dir(bio) == WRITE)
2164	track_chunk(s, bio, chunk);
2165	goto out_unlock;
2166	}
2167
2168	redirect_to_origin:
2169	bio_set_dev(bio, bdev: s->origin->bdev);
2170
2171	if (bio_data_dir(bio) == WRITE) {
2172	up_write(sem: &s->lock);
2173	return do_origin(origin: s->origin, bio, limit: false);
2174	}
2175
2176	out_unlock:
2177	up_write(sem: &s->lock);
2178
2179	return r;
2180	}
2181
2182	static int snapshot_end_io(struct dm_target ti, struct* bio *bio,
2183	blk_status_t *error)
2184	{
2185	struct dm_snapshot *s = ti->private;
2186
2187	if (is_bio_tracked(bio))
2188	stop_tracking_chunk(s, bio);
2189
2190	return DM_ENDIO_DONE;
2191	}
2192
2193	static void snapshot_merge_presuspend(struct dm_target *ti)
2194	{
2195	struct dm_snapshot *s = ti->private;
2196
2197	stop_merge(s);
2198	}
2199
2200	static int snapshot_preresume(struct dm_target *ti)
2201	{
2202	int r = `0`;
2203	struct dm_snapshot *s = ti->private;
2204	struct dm_snapshot snap_src = NULL, snap_dest = NULL;
2205
2206	down_read(sem: &_origins_lock);
2207	(void) __find_snapshots_sharing_cow(snap: s, snap_src: &snap_src, snap_dest: &snap_dest, NULL);
2208	if (snap_src && snap_dest) {
2209	down_read(sem: &snap_src->lock);
2210	if (s == snap_src) {
2211	DMERR("Unable to resume snapshot source until handover completes.");
2212	r = -EINVAL;
2213	} else if (!dm_suspended(ti: snap_src->ti)) {
2214	DMERR("Unable to perform snapshot handover until source is suspended.");
2215	r = -EINVAL;
2216	}
2217	up_read(sem: &snap_src->lock);
2218	}
2219	up_read(sem: &_origins_lock);
2220
2221	return r;
2222	}
2223
2224	static void snapshot_resume(struct dm_target *ti)
2225	{
2226	struct dm_snapshot *s = ti->private;
2227	struct dm_snapshot snap_src = NULL, snap_dest = NULL, *snap_merging = NULL;
2228	struct dm_origin *o;
2229	struct mapped_device *origin_md = NULL;
2230	bool must_restart_merging = false;
2231
2232	down_read(sem: &_origins_lock);
2233
2234	o = __lookup_dm_origin(origin: s->origin->bdev);
2235	if (o)
2236	origin_md = dm_table_get_md(t: o->ti->table);
2237	if (!origin_md) {
2238	(void) __find_snapshots_sharing_cow(snap: s, NULL, NULL, snap_merge: &snap_merging);
2239	if (snap_merging)
2240	origin_md = dm_table_get_md(t: snap_merging->ti->table);
2241	}
2242	if (origin_md == dm_table_get_md(t: ti->table))
2243	origin_md = NULL;
2244	if (origin_md) {
2245	if (dm_hold(md: origin_md))
2246	origin_md = NULL;
2247	}
2248
2249	up_read(sem: &_origins_lock);
2250
2251	if (origin_md) {
2252	dm_internal_suspend_fast(md: origin_md);
2253	if (snap_merging && test_bit(RUNNING_MERGE, &snap_merging->state_bits)) {
2254	must_restart_merging = true;
2255	stop_merge(s: snap_merging);
2256	}
2257	}
2258
2259	down_read(sem: &_origins_lock);
2260
2261	(void) __find_snapshots_sharing_cow(snap: s, snap_src: &snap_src, snap_dest: &snap_dest, NULL);
2262	if (snap_src && snap_dest) {
2263	down_write(sem: &snap_src->lock);
2264	down_write_nested(sem: &snap_dest->lock, SINGLE_DEPTH_NESTING);
2265	__handover_exceptions(snap_src, snap_dest);
2266	up_write(sem: &snap_dest->lock);
2267	up_write(sem: &snap_src->lock);
2268	}
2269
2270	up_read(sem: &_origins_lock);
2271
2272	if (origin_md) {
2273	if (must_restart_merging)
2274	start_merge(s: snap_merging);
2275	dm_internal_resume_fast(md: origin_md);
2276	dm_put(md: origin_md);
2277	}
2278
2279	/ Now we have correct chunk size, reregister /
2280	reregister_snapshot(s);
2281
2282	down_write(sem: &s->lock);
2283	s->active = `1`;
2284	up_write(sem: &s->lock);
2285	}
2286
2287	static uint32_t get_origin_minimum_chunksize(struct block_device *bdev)
2288	{
2289	uint32_t min_chunksize;
2290
2291	down_read(sem: &_origins_lock);
2292	min_chunksize = __minimum_chunk_size(o: __lookup_origin(origin: bdev));
2293	up_read(sem: &_origins_lock);
2294
2295	return min_chunksize;
2296	}
2297
2298	static void snapshot_merge_resume(struct dm_target *ti)
2299	{
2300	struct dm_snapshot *s = ti->private;
2301
2302	/*
2303	* Handover exceptions from existing snapshot.
2304	*/
2305	snapshot_resume(ti);
2306
2307	/*
2308	* snapshot-merge acts as an origin, so set ti->max_io_len
2309	*/
2310	ti->max_io_len = get_origin_minimum_chunksize(bdev: s->origin->bdev);
2311
2312	start_merge(s);
2313	}
2314
2315	static void snapshot_status(struct dm_target *ti, status_type_t type,
2316	unsigned int status_flags, char result, unsigned* int maxlen)
2317	{
2318	unsigned int sz = `0`;
2319	struct dm_snapshot *snap = ti->private;
2320	unsigned int num_features;
2321
2322	switch (type) {
2323	case STATUSTYPE_INFO:
2324
2325	down_write(sem: &snap->lock);
2326
2327	if (!snap->valid)
2328	DMEMIT("Invalid");
2329	else if (snap->merge_failed)
2330	DMEMIT("Merge failed");
2331	else if (snap->snapshot_overflowed)
2332	DMEMIT("Overflow");
2333	else {
2334	if (snap->store->type->usage) {
2335	sector_t total_sectors, sectors_allocated,
2336	metadata_sectors;
2337	snap->store->type->usage(snap->store,
2338	&total_sectors,
2339	&sectors_allocated,
2340	&metadata_sectors);
2341	DMEMIT("%llu/%llu %llu",
2342	(unsigned long long)sectors_allocated,
2343	(unsigned long long)total_sectors,
2344	(unsigned long long)metadata_sectors);
2345	} else
2346	DMEMIT("Unknown");
2347	}
2348
2349	up_write(sem: &snap->lock);
2350
2351	break;
2352
2353	case STATUSTYPE_TABLE:
2354	/*
2355	* kdevname returns a static pointer so we need
2356	* to make private copies if the output is to
2357	* make sense.
2358	*/
2359	DMEMIT("%s %s", snap->origin->name, snap->cow->name);
2360	sz += snap->store->type->status(snap->store, type, result + sz,
2361	maxlen - sz);
2362	num_features = snap->discard_zeroes_cow + snap->discard_passdown_origin;
2363	if (num_features) {
2364	DMEMIT(" %u", num_features);
2365	if (snap->discard_zeroes_cow)
2366	DMEMIT(" discard_zeroes_cow");
2367	if (snap->discard_passdown_origin)
2368	DMEMIT(" discard_passdown_origin");
2369	}
2370	break;
2371
2372	case STATUSTYPE_IMA:
2373	DMEMIT_TARGET_NAME_VERSION(ti->type);
2374	DMEMIT(",snap_origin_name=%s", snap->origin->name);
2375	DMEMIT(",snap_cow_name=%s", snap->cow->name);
2376	DMEMIT(",snap_valid=%c", snap->valid ? `'y'` : `'n'`);
2377	DMEMIT(",snap_merge_failed=%c", snap->merge_failed ? `'y'` : `'n'`);
2378	DMEMIT(",snapshot_overflowed=%c", snap->snapshot_overflowed ? `'y'` : `'n'`);
2379	DMEMIT(";");
2380	break;
2381	}
2382	}
2383
2384	static int snapshot_iterate_devices(struct dm_target *ti,
2385	iterate_devices_callout_fn fn, void *data)
2386	{
2387	struct dm_snapshot *snap = ti->private;
2388	int r;
2389
2390	r = fn(ti, snap->origin, `0`, ti->len, data);
2391
2392	if (!r)
2393	r = fn(ti, snap->cow, `0`, get_dev_size(bdev: snap->cow->bdev), data);
2394
2395	return r;
2396	}
2397
2398	static void snapshot_io_hints(struct dm_target ti, struct* queue_limits *limits)
2399	{
2400	struct dm_snapshot *snap = ti->private;
2401
2402	if (snap->discard_zeroes_cow) {
2403	struct dm_snapshot snap_src = NULL, snap_dest = NULL;
2404
2405	down_read(sem: &_origins_lock);
2406
2407	(void) __find_snapshots_sharing_cow(snap, snap_src: &snap_src, snap_dest: &snap_dest, NULL);
2408	if (snap_src && snap_dest)
2409	snap = snap_src;
2410
2411	/ All discards are split on chunk_size boundary /
2412	limits->discard_granularity = snap->store->chunk_size;
2413	limits->max_discard_sectors = snap->store->chunk_size;
2414
2415	up_read(sem: &_origins_lock);
2416	}
2417	}
2418
2419	/*
2420	*---------------------------------------------------------------
2421	* Origin methods
2422	*---------------------------------------------------------------
2423	*/
2424	/*
2425	* If no exceptions need creating, DM_MAPIO_REMAPPED is returned and any
2426	* supplied bio was ignored. The caller may submit it immediately.
2427	* (No remapping actually occurs as the origin is always a direct linear
2428	* map.)
2429	*
2430	* If further exceptions are required, DM_MAPIO_SUBMITTED is returned
2431	* and any supplied bio is added to a list to be submitted once all
2432	* the necessary exceptions exist.
2433	*/
2434	static int __origin_write(struct list_head *snapshots, sector_t sector,
2435	struct bio *bio)
2436	{
2437	int r = DM_MAPIO_REMAPPED;
2438	struct dm_snapshot *snap;
2439	struct dm_exception *e;
2440	struct dm_snap_pending_exception pe, pe2;
2441	struct dm_snap_pending_exception *pe_to_start_now = NULL;
2442	struct dm_snap_pending_exception *pe_to_start_last = NULL;
2443	struct dm_exception_table_lock lock;
2444	chunk_t chunk;
2445
2446	/ Do all the snapshots on this origin /
2447	list_for_each_entry(snap, snapshots, list) {
2448	/*
2449	* Don't make new exceptions in a merging snapshot
2450	* because it has effectively been deleted
2451	*/
2452	if (dm_target_is_snapshot_merge(snap->ti))
2453	continue;
2454
2455	/ Nothing to do if writing beyond end of snapshot /
2456	if (sector >= dm_table_get_size(t: snap->ti->table))
2457	continue;
2458
2459	/*
2460	* Remember, different snapshots can have
2461	* different chunk sizes.
2462	*/
2463	chunk = sector_to_chunk(store: snap->store, sector);
2464	dm_exception_table_lock_init(s: snap, chunk, lock: &lock);
2465
2466	down_read(sem: &snap->lock);
2467	dm_exception_table_lock(lock: &lock);
2468
2469	/ Only deal with valid and active snapshots /
2470	if (!snap->valid \|\| !snap->active)
2471	goto next_snapshot;
2472
2473	pe = __lookup_pending_exception(s: snap, chunk);
2474	if (!pe) {
2475	/*
2476	* Check exception table to see if block is already
2477	* remapped in this snapshot and trigger an exception
2478	* if not.
2479	*/
2480	e = dm_lookup_exception(et: &snap->complete, chunk);
2481	if (e)
2482	goto next_snapshot;
2483
2484	dm_exception_table_unlock(lock: &lock);
2485	pe = alloc_pending_exception(s: snap);
2486	dm_exception_table_lock(lock: &lock);
2487
2488	pe2 = __lookup_pending_exception(s: snap, chunk);
2489
2490	if (!pe2) {
2491	e = dm_lookup_exception(et: &snap->complete, chunk);
2492	if (e) {
2493	free_pending_exception(pe);
2494	goto next_snapshot;
2495	}
2496
2497	pe = __insert_pending_exception(s: snap, pe, chunk);
2498	if (!pe) {
2499	dm_exception_table_unlock(lock: &lock);
2500	up_read(sem: &snap->lock);
2501
2502	invalidate_snapshot(s: snap, err: -ENOMEM);
2503	continue;
2504	}
2505	} else {
2506	free_pending_exception(pe);
2507	pe = pe2;
2508	}
2509	}
2510
2511	r = DM_MAPIO_SUBMITTED;
2512
2513	/*
2514	* If an origin bio was supplied, queue it to wait for the
2515	* completion of this exception, and start this one last,
2516	* at the end of the function.
2517	*/
2518	if (bio) {
2519	bio_list_add(bl: &pe->origin_bios, bio);
2520	bio = NULL;
2521
2522	if (!pe->started) {
2523	pe->started = `1`;
2524	pe_to_start_last = pe;
2525	}
2526	}
2527
2528	if (!pe->started) {
2529	pe->started = `1`;
2530	pe_to_start_now = pe;
2531	}
2532
2533	next_snapshot:
2534	dm_exception_table_unlock(lock: &lock);
2535	up_read(sem: &snap->lock);
2536
2537	if (pe_to_start_now) {
2538	start_copy(pe: pe_to_start_now);
2539	pe_to_start_now = NULL;
2540	}
2541	}
2542
2543	/*
2544	* Submit the exception against which the bio is queued last,
2545	* to give the other exceptions a head start.
2546	*/
2547	if (pe_to_start_last)
2548	start_copy(pe: pe_to_start_last);
2549
2550	return r;
2551	}
2552
2553	/*
2554	* Called on a write from the origin driver.
2555	*/
2556	static int do_origin(struct dm_dev origin, struct* bio *bio, bool limit)
2557	{
2558	struct origin *o;
2559	int r = DM_MAPIO_REMAPPED;
2560
2561	again:
2562	down_read(sem: &_origins_lock);
2563	o = __lookup_origin(origin: origin->bdev);
2564	if (o) {
2565	if (limit) {
2566	struct dm_snapshot *s;
2567
2568	list_for_each_entry(s, &o->snapshots, list)
2569	if (unlikely(!wait_for_in_progress(s, true)))
2570	goto again;
2571	}
2572
2573	r = __origin_write(snapshots: &o->snapshots, sector: bio->bi_iter.bi_sector, bio);
2574	}
2575	up_read(sem: &_origins_lock);
2576
2577	return r;
2578	}
2579
2580	/*
2581	* Trigger exceptions in all non-merging snapshots.
2582	*
2583	* The chunk size of the merging snapshot may be larger than the chunk
2584	* size of some other snapshot so we may need to reallocate multiple
2585	* chunks in other snapshots.
2586	*
2587	* We scan all the overlapping exceptions in the other snapshots.
2588	* Returns 1 if anything was reallocated and must be waited for,
2589	* otherwise returns 0.
2590	*
2591	* size must be a multiple of merging_snap's chunk_size.
2592	*/
2593	static int origin_write_extent(struct dm_snapshot *merging_snap,
2594	sector_t sector, unsigned int size)
2595	{
2596	int must_wait = `0`;
2597	sector_t n;
2598	struct origin *o;
2599
2600	/*
2601	* The origin's __minimum_chunk_size() got stored in max_io_len
2602	* by snapshot_merge_resume().
2603	*/
2604	down_read(sem: &_origins_lock);
2605	o = __lookup_origin(origin: merging_snap->origin->bdev);
2606	for (n = `0`; n < size; n += merging_snap->ti->max_io_len)
2607	if (__origin_write(snapshots: &o->snapshots, sector: sector + n, NULL) ==
2608	DM_MAPIO_SUBMITTED)
2609	must_wait = `1`;
2610	up_read(sem: &_origins_lock);
2611
2612	return must_wait;
2613	}
2614
2615	/*
2616	* Origin: maps a linear range of a device, with hooks for snapshotting.
2617	*/
2618
2619	/*
2620	* Construct an origin mapping: <dev_path>
2621	* The context for an origin is merely a 'struct dm_dev *'
2622	* pointing to the real device.
2623	*/
2624	static int origin_ctr(struct dm_target ti, unsigned* int argc, char **argv)
2625	{
2626	int r;
2627	struct dm_origin *o;
2628
2629	if (argc != `1`) {
2630	ti->error = "origin: incorrect number of arguments";
2631	return -EINVAL;
2632	}
2633
2634	o = kmalloc(size: sizeof(struct dm_origin), GFP_KERNEL);
2635	if (!o) {
2636	ti->error = "Cannot allocate private origin structure";
2637	r = -ENOMEM;
2638	goto bad_alloc;
2639	}
2640
2641	r = dm_get_device(ti, path: argv[`0`], mode: dm_table_get_mode(t: ti->table), result: &o->dev);
2642	if (r) {
2643	ti->error = "Cannot get target device";
2644	goto bad_open;
2645	}
2646
2647	o->ti = ti;
2648	ti->private = o;
2649	ti->num_flush_bios = `1`;
2650
2651	return `0`;
2652
2653	bad_open:
2654	kfree(objp: o);
2655	bad_alloc:
2656	return r;
2657	}
2658
2659	static void origin_dtr(struct dm_target *ti)
2660	{
2661	struct dm_origin *o = ti->private;
2662
2663	dm_put_device(ti, d: o->dev);
2664	kfree(objp: o);
2665	}
2666
2667	static int origin_map(struct dm_target ti, struct* bio *bio)
2668	{
2669	struct dm_origin *o = ti->private;
2670	unsigned int available_sectors;
2671
2672	bio_set_dev(bio, bdev: o->dev->bdev);
2673
2674	if (unlikely(bio->bi_opf & REQ_PREFLUSH))
2675	return DM_MAPIO_REMAPPED;
2676
2677	if (bio_data_dir(bio) != WRITE)
2678	return DM_MAPIO_REMAPPED;
2679
2680	available_sectors = o->split_boundary -
2681	((unsigned int)bio->bi_iter.bi_sector & (o->split_boundary - `1`));
2682
2683	if (bio_sectors(bio) > available_sectors)
2684	dm_accept_partial_bio(bio, n_sectors: available_sectors);
2685
2686	/ Only tell snapshots if this is a write /
2687	return do_origin(origin: o->dev, bio, limit: true);
2688	}
2689
2690	/*
2691	* Set the target "max_io_len" field to the minimum of all the snapshots'
2692	* chunk sizes.
2693	*/
2694	static void origin_resume(struct dm_target *ti)
2695	{
2696	struct dm_origin *o = ti->private;
2697
2698	o->split_boundary = get_origin_minimum_chunksize(bdev: o->dev->bdev);
2699
2700	down_write(sem: &_origins_lock);
2701	__insert_dm_origin(o);
2702	up_write(sem: &_origins_lock);
2703	}
2704
2705	static void origin_postsuspend(struct dm_target *ti)
2706	{
2707	struct dm_origin *o = ti->private;
2708
2709	down_write(sem: &_origins_lock);
2710	__remove_dm_origin(o);
2711	up_write(sem: &_origins_lock);
2712	}
2713
2714	static void origin_status(struct dm_target *ti, status_type_t type,
2715	unsigned int status_flags, char result, unsigned* int maxlen)
2716	{
2717	struct dm_origin *o = ti->private;
2718
2719	switch (type) {
2720	case STATUSTYPE_INFO:
2721	result[`0`] = `'\0'`;
2722	break;
2723
2724	case STATUSTYPE_TABLE:
2725	snprintf(buf: result, size: maxlen, fmt: "%s", o->dev->name);
2726	break;
2727	case STATUSTYPE_IMA:
2728	result[`0`] = `'\0'`;
2729	break;
2730	}
2731	}
2732
2733	static int origin_iterate_devices(struct dm_target *ti,
2734	iterate_devices_callout_fn fn, void *data)
2735	{
2736	struct dm_origin *o = ti->private;
2737
2738	return fn(ti, o->dev, `0`, ti->len, data);
2739	}
2740
2741	static struct target_type origin_target = {
2742	.name = "snapshot-origin",
2743	.version = {`1`, `9`, `0`},
2744	.module = THIS_MODULE,
2745	.ctr = origin_ctr,
2746	.dtr = origin_dtr,
2747	.map = origin_map,
2748	.resume = origin_resume,
2749	.postsuspend = origin_postsuspend,
2750	.status = origin_status,
2751	.iterate_devices = origin_iterate_devices,
2752	};
2753
2754	static struct target_type snapshot_target = {
2755	.name = "snapshot",
2756	.version = {`1`, `16`, `0`},
2757	.module = THIS_MODULE,
2758	.ctr = snapshot_ctr,
2759	.dtr = snapshot_dtr,
2760	.map = snapshot_map,
2761	.end_io = snapshot_end_io,
2762	.preresume = snapshot_preresume,
2763	.resume = snapshot_resume,
2764	.status = snapshot_status,
2765	.iterate_devices = snapshot_iterate_devices,
2766	.io_hints = snapshot_io_hints,
2767	};
2768
2769	static struct target_type merge_target = {
2770	.name = dm_snapshot_merge_target_name,
2771	.version = {`1`, `5`, `0`},
2772	.module = THIS_MODULE,
2773	.ctr = snapshot_ctr,
2774	.dtr = snapshot_dtr,
2775	.map = snapshot_merge_map,
2776	.end_io = snapshot_end_io,
2777	.presuspend = snapshot_merge_presuspend,
2778	.preresume = snapshot_preresume,
2779	.resume = snapshot_merge_resume,
2780	.status = snapshot_status,
2781	.iterate_devices = snapshot_iterate_devices,
2782	.io_hints = snapshot_io_hints,
2783	};
2784
2785	static int __init dm_snapshot_init(void)
2786	{
2787	int r;
2788
2789	r = dm_exception_store_init();
2790	if (r) {
2791	DMERR("Failed to initialize exception stores");
2792	return r;
2793	}
2794
2795	r = init_origin_hash();
2796	if (r) {
2797	DMERR("init_origin_hash failed.");
2798	goto bad_origin_hash;
2799	}
2800
2801	exception_cache = KMEM_CACHE(dm_exception, `0`);
2802	if (!exception_cache) {
2803	DMERR("Couldn't create exception cache.");
2804	r = -ENOMEM;
2805	goto bad_exception_cache;
2806	}
2807
2808	pending_cache = KMEM_CACHE(dm_snap_pending_exception, `0`);
2809	if (!pending_cache) {
2810	DMERR("Couldn't create pending cache.");
2811	r = -ENOMEM;
2812	goto bad_pending_cache;
2813	}
2814
2815	r = dm_register_target(t: &snapshot_target);
2816	if (r < `0`)
2817	goto bad_register_snapshot_target;
2818
2819	r = dm_register_target(t: &origin_target);
2820	if (r < `0`)
2821	goto bad_register_origin_target;
2822
2823	r = dm_register_target(t: &merge_target);
2824	if (r < `0`)
2825	goto bad_register_merge_target;
2826
2827	return `0`;
2828
2829	bad_register_merge_target:
2830	dm_unregister_target(t: &origin_target);
2831	bad_register_origin_target:
2832	dm_unregister_target(t: &snapshot_target);
2833	bad_register_snapshot_target:
2834	kmem_cache_destroy(s: pending_cache);
2835	bad_pending_cache:
2836	kmem_cache_destroy(s: exception_cache);
2837	bad_exception_cache:
2838	exit_origin_hash();
2839	bad_origin_hash:
2840	dm_exception_store_exit();
2841
2842	return r;
2843	}
2844
2845	static void __exit dm_snapshot_exit(void)
2846	{
2847	dm_unregister_target(t: &snapshot_target);
2848	dm_unregister_target(t: &origin_target);
2849	dm_unregister_target(t: &merge_target);
2850
2851	exit_origin_hash();
2852	kmem_cache_destroy(s: pending_cache);
2853	kmem_cache_destroy(s: exception_cache);
2854
2855	dm_exception_store_exit();
2856	}
2857
2858	/ Module hooks /
2859	module_init(dm_snapshot_init);
2860	module_exit(dm_snapshot_exit);
2861
2862	MODULE_DESCRIPTION(DM_NAME " snapshot target");
2863	MODULE_AUTHOR("Joe Thornber");
2864	MODULE_LICENSE("GPL");
2865	MODULE_ALIAS("dm-snapshot-origin");
2866	MODULE_ALIAS("dm-snapshot-merge");
2867

source code of linux/drivers/md/dm-snap.c