dm-kcopyd.c source code [linux/drivers/md/dm-kcopyd.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* Copyright (C) 2002 Sistina Software (UK) Limited.
4	* Copyright (C) 2006 Red Hat GmbH
5	*
6	* This file is released under the GPL.
7	*
8	* Kcopyd provides a simple interface for copying an area of one
9	* block-device to one or more other block-devices, with an asynchronous
10	* completion notification.
11	*/
12
13	#include <linux/types.h>
14	#include <linux/atomic.h>
15	#include <linux/blkdev.h>
16	#include <linux/fs.h>
17	#include <linux/init.h>
18	#include <linux/list.h>
19	#include <linux/mempool.h>
20	#include <linux/module.h>
21	#include <linux/pagemap.h>
22	#include <linux/slab.h>
23	#include <linux/vmalloc.h>
24	#include <linux/workqueue.h>
25	#include <linux/mutex.h>
26	#include <linux/delay.h>
27	#include <linux/device-mapper.h>
28	#include <linux/dm-kcopyd.h>
29
30	#include "dm-core.h"
31
32	#define SPLIT_COUNT 8
33	#define MIN_JOBS 8
34
35	#define DEFAULT_SUB_JOB_SIZE_KB 512
36	#define MAX_SUB_JOB_SIZE_KB 1024
37
38	static unsigned int kcopyd_subjob_size_kb = DEFAULT_SUB_JOB_SIZE_KB;
39
40	module_param(kcopyd_subjob_size_kb, uint, `0644`);
41	MODULE_PARM_DESC(kcopyd_subjob_size_kb, "Sub-job size for dm-kcopyd clients");
42
43	static unsigned int dm_get_kcopyd_subjob_size(void)
44	{
45	unsigned int sub_job_size_kb;
46
47	sub_job_size_kb = __dm_get_module_param(module_param: &kcopyd_subjob_size_kb,
48	DEFAULT_SUB_JOB_SIZE_KB,
49	MAX_SUB_JOB_SIZE_KB);
50
51	return sub_job_size_kb << `1`;
52	}
53
54	/*
55	*----------------------------------------------------------------
56	* Each kcopyd client has its own little pool of preallocated
57	* pages for kcopyd io.
58	*---------------------------------------------------------------
59	*/
60	struct dm_kcopyd_client {
61	struct page_list *pages;
62	unsigned int nr_reserved_pages;
63	unsigned int nr_free_pages;
64	unsigned int sub_job_size;
65
66	struct dm_io_client *io_client;
67
68	wait_queue_head_t destroyq;
69
70	mempool_t job_pool;
71
72	struct workqueue_struct *kcopyd_wq;
73	struct work_struct kcopyd_work;
74
75	struct dm_kcopyd_throttle *throttle;
76
77	atomic_t nr_jobs;
78
79	/*
80	* We maintain four lists of jobs:
81	*
82	* i) jobs waiting for pages
83	* ii) jobs that have pages, and are waiting for the io to be issued.
84	* iii) jobs that don't need to do any IO and just run a callback
85	* iv) jobs that have completed.
86	*
87	* All four of these are protected by job_lock.
88	*/
89	spinlock_t job_lock;
90	struct list_head callback_jobs;
91	struct list_head complete_jobs;
92	struct list_head io_jobs;
93	struct list_head pages_jobs;
94	};
95
96	static struct page_list zero_page_list;
97
98	static DEFINE_SPINLOCK(throttle_spinlock);
99
100	/*
101	* IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period.
102	* When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided
103	* by 2.
104	*/
105	#define ACCOUNT_INTERVAL_SHIFT SHIFT_HZ
106
107	/*
108	* Sleep this number of milliseconds.
109	*
110	* The value was decided experimentally.
111	* Smaller values seem to cause an increased copy rate above the limit.
112	* The reason for this is unknown but possibly due to jiffies rounding errors
113	* or read/write cache inside the disk.
114	*/
115	#define SLEEP_USEC 100000
116
117	/*
118	* Maximum number of sleep events. There is a theoretical livelock if more
119	* kcopyd clients do work simultaneously which this limit avoids.
120	*/
121	#define MAX_SLEEPS 10
122
123	static void io_job_start(struct dm_kcopyd_throttle *t)
124	{
125	unsigned int throttle, now, difference;
126	int slept = `0`, skew;
127
128	if (unlikely(!t))
129	return;
130
131	try_again:
132	spin_lock_irq(lock: &throttle_spinlock);
133
134	throttle = READ_ONCE(t->throttle);
135
136	if (likely(throttle >= `100`))
137	goto skip_limit;
138
139	now = jiffies;
140	difference = now - t->last_jiffies;
141	t->last_jiffies = now;
142	if (t->num_io_jobs)
143	t->io_period += difference;
144	t->total_period += difference;
145
146	/*
147	* Maintain sane values if we got a temporary overflow.
148	*/
149	if (unlikely(t->io_period > t->total_period))
150	t->io_period = t->total_period;
151
152	if (unlikely(t->total_period >= (`1` << ACCOUNT_INTERVAL_SHIFT))) {
153	int shift = fls(x: t->total_period >> ACCOUNT_INTERVAL_SHIFT);
154
155	t->total_period >>= shift;
156	t->io_period >>= shift;
157	}
158
159	skew = t->io_period - throttle * t->total_period / `100`;
160
161	if (unlikely(skew > `0`) && slept < MAX_SLEEPS) {
162	slept++;
163	spin_unlock_irq(lock: &throttle_spinlock);
164	fsleep(SLEEP_USEC);
165	goto try_again;
166	}
167
168	skip_limit:
169	t->num_io_jobs++;
170
171	spin_unlock_irq(lock: &throttle_spinlock);
172	}
173
174	static void io_job_finish(struct dm_kcopyd_throttle *t)
175	{
176	unsigned long flags;
177
178	if (unlikely(!t))
179	return;
180
181	spin_lock_irqsave(&throttle_spinlock, flags);
182
183	t->num_io_jobs--;
184
185	if (likely(READ_ONCE(t->throttle) >= `100`))
186	goto skip_limit;
187
188	if (!t->num_io_jobs) {
189	unsigned int now, difference;
190
191	now = jiffies;
192	difference = now - t->last_jiffies;
193	t->last_jiffies = now;
194
195	t->io_period += difference;
196	t->total_period += difference;
197
198	/*
199	* Maintain sane values if we got a temporary overflow.
200	*/
201	if (unlikely(t->io_period > t->total_period))
202	t->io_period = t->total_period;
203	}
204
205	skip_limit:
206	spin_unlock_irqrestore(lock: &throttle_spinlock, flags);
207	}
208
209
210	static void wake(struct dm_kcopyd_client *kc)
211	{
212	queue_work(wq: kc->kcopyd_wq, work: &kc->kcopyd_work);
213	}
214
215	/*
216	* Obtain one page for the use of kcopyd.
217	*/
218	static struct page_list *alloc_pl(gfp_t gfp)
219	{
220	struct page_list *pl;
221
222	pl = kmalloc(size: sizeof(*pl), flags: gfp);
223	if (!pl)
224	return NULL;
225
226	pl->page = alloc_page(gfp \| __GFP_HIGHMEM);
227	if (!pl->page) {
228	kfree(objp: pl);
229	return NULL;
230	}
231
232	return pl;
233	}
234
235	static void free_pl(struct page_list *pl)
236	{
237	__free_page(pl->page);
238	kfree(objp: pl);
239	}
240
241	/*
242	* Add the provided pages to a client's free page list, releasing
243	* back to the system any beyond the reserved_pages limit.
244	*/
245	static void kcopyd_put_pages(struct dm_kcopyd_client kc, struct* page_list *pl)
246	{
247	struct page_list *next;
248
249	do {
250	next = pl->next;
251
252	if (kc->nr_free_pages >= kc->nr_reserved_pages)
253	free_pl(pl);
254	else {
255	pl->next = kc->pages;
256	kc->pages = pl;
257	kc->nr_free_pages++;
258	}
259
260	pl = next;
261	} while (pl);
262	}
263
264	static int kcopyd_get_pages(struct dm_kcopyd_client *kc,
265	unsigned int nr, struct page_list **pages)
266	{
267	struct page_list *pl;
268
269	*pages = NULL;
270
271	do {
272	pl = alloc_pl(__GFP_NOWARN \| __GFP_NORETRY \| __GFP_KSWAPD_RECLAIM);
273	if (unlikely(!pl)) {
274	/ Use reserved pages /
275	pl = kc->pages;
276	if (unlikely(!pl))
277	goto out_of_memory;
278	kc->pages = pl->next;
279	kc->nr_free_pages--;
280	}
281	pl->next = *pages;
282	*pages = pl;
283	} while (--nr);
284
285	return `0`;
286
287	out_of_memory:
288	if (*pages)
289	kcopyd_put_pages(kc, pl: *pages);
290	return -ENOMEM;
291	}
292
293	/*
294	* These three functions resize the page pool.
295	*/
296	static void drop_pages(struct page_list *pl)
297	{
298	struct page_list *next;
299
300	while (pl) {
301	next = pl->next;
302	free_pl(pl);
303	pl = next;
304	}
305	}
306
307	/*
308	* Allocate and reserve nr_pages for the use of a specific client.
309	*/
310	static int client_reserve_pages(struct dm_kcopyd_client kc, unsigned* int nr_pages)
311	{
312	unsigned int i;
313	struct page_list pl = NULL, next;
314
315	for (i = `0`; i < nr_pages; i++) {
316	next = alloc_pl(GFP_KERNEL);
317	if (!next) {
318	if (pl)
319	drop_pages(pl);
320	return -ENOMEM;
321	}
322	next->next = pl;
323	pl = next;
324	}
325
326	kc->nr_reserved_pages += nr_pages;
327	kcopyd_put_pages(kc, pl);
328
329	return `0`;
330	}
331
332	static void client_free_pages(struct dm_kcopyd_client *kc)
333	{
334	BUG_ON(kc->nr_free_pages != kc->nr_reserved_pages);
335	drop_pages(pl: kc->pages);
336	kc->pages = NULL;
337	kc->nr_free_pages = kc->nr_reserved_pages = `0`;
338	}
339
340	/*
341	*---------------------------------------------------------------
342	* kcopyd_jobs need to be allocated by the clients of kcopyd,
343	* for this reason we use a mempool to prevent the client from
344	* ever having to do io (which could cause a deadlock).
345	*---------------------------------------------------------------
346	*/
347	struct kcopyd_job {
348	struct dm_kcopyd_client *kc;
349	struct list_head list;
350	unsigned int flags;
351
352	/*
353	* Error state of the job.
354	*/
355	int read_err;
356	unsigned long write_err;
357
358	/*
359	* REQ_OP_READ, REQ_OP_WRITE or REQ_OP_WRITE_ZEROES.
360	*/
361	enum req_op op;
362	struct dm_io_region source;
363
364	/*
365	* The destinations for the transfer.
366	*/
367	unsigned int num_dests;
368	struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS];
369
370	struct page_list *pages;
371
372	/*
373	* Set this to ensure you are notified when the job has
374	* completed. 'context' is for callback to use.
375	*/
376	dm_kcopyd_notify_fn fn;
377	void *context;
378
379	/*
380	* These fields are only used if the job has been split
381	* into more manageable parts.
382	*/
383	struct mutex lock;
384	atomic_t sub_jobs;
385	sector_t progress;
386	sector_t write_offset;
387
388	struct kcopyd_job *master_job;
389	};
390
391	static struct kmem_cache *_job_cache;
392
393	int __init dm_kcopyd_init(void)
394	{
395	_job_cache = kmem_cache_create(name: "kcopyd_job",
396	size: sizeof(struct kcopyd_job) * (SPLIT_COUNT + `1`),
397	align: __alignof__(struct kcopyd_job), flags: `0`, NULL);
398	if (!_job_cache)
399	return -ENOMEM;
400
401	zero_page_list.next = &zero_page_list;
402	zero_page_list.page = ZERO_PAGE(`0`);
403
404	return `0`;
405	}
406
407	void dm_kcopyd_exit(void)
408	{
409	kmem_cache_destroy(s: _job_cache);
410	_job_cache = NULL;
411	}
412
413	/*
414	* Functions to push and pop a job onto the head of a given job
415	* list.
416	*/
417	static struct kcopyd_job pop_io_job(struct* list_head *jobs,
418	struct dm_kcopyd_client *kc)
419	{
420	struct kcopyd_job *job;
421
422	/*
423	* For I/O jobs, pop any read, any write without sequential write
424	* constraint and sequential writes that are at the right position.
425	*/
426	list_for_each_entry(job, jobs, list) {
427	if (job->op == REQ_OP_READ \|\|
428	!(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) {
429	list_del(entry: &job->list);
430	return job;
431	}
432
433	if (job->write_offset == job->master_job->write_offset) {
434	job->master_job->write_offset += job->source.count;
435	list_del(entry: &job->list);
436	return job;
437	}
438	}
439
440	return NULL;
441	}
442
443	static struct kcopyd_job pop(struct* list_head *jobs,
444	struct dm_kcopyd_client *kc)
445	{
446	struct kcopyd_job *job = NULL;
447
448	spin_lock_irq(lock: &kc->job_lock);
449
450	if (!list_empty(head: jobs)) {
451	if (jobs == &kc->io_jobs)
452	job = pop_io_job(jobs, kc);
453	else {
454	job = list_entry(jobs->next, struct kcopyd_job, list);
455	list_del(entry: &job->list);
456	}
457	}
458	spin_unlock_irq(lock: &kc->job_lock);
459
460	return job;
461	}
462
463	static void push(struct list_head jobs, struct* kcopyd_job *job)
464	{
465	unsigned long flags;
466	struct dm_kcopyd_client *kc = job->kc;
467
468	spin_lock_irqsave(&kc->job_lock, flags);
469	list_add_tail(new: &job->list, head: jobs);
470	spin_unlock_irqrestore(lock: &kc->job_lock, flags);
471	}
472
473
474	static void push_head(struct list_head jobs, struct* kcopyd_job *job)
475	{
476	struct dm_kcopyd_client *kc = job->kc;
477
478	spin_lock_irq(lock: &kc->job_lock);
479	list_add(new: &job->list, head: jobs);
480	spin_unlock_irq(lock: &kc->job_lock);
481	}
482
483	/*
484	* These three functions process 1 item from the corresponding
485	* job list.
486	*
487	* They return:
488	* < 0: error
489	* 0: success
490	* > 0: can't process yet.
491	*/
492	static int run_complete_job(struct kcopyd_job *job)
493	{
494	void *context = job->context;
495	int read_err = job->read_err;
496	unsigned long write_err = job->write_err;
497	dm_kcopyd_notify_fn fn = job->fn;
498	struct dm_kcopyd_client *kc = job->kc;
499
500	if (job->pages && job->pages != &zero_page_list)
501	kcopyd_put_pages(kc, pl: job->pages);
502	/*
503	* If this is the master job, the sub jobs have already
504	* completed so we can free everything.
505	*/
506	if (job->master_job == job) {
507	mutex_destroy(lock: &job->lock);
508	mempool_free(element: job, pool: &kc->job_pool);
509	}
510	fn(read_err, write_err, context);
511
512	if (atomic_dec_and_test(v: &kc->nr_jobs))
513	wake_up(&kc->destroyq);
514
515	cond_resched();
516
517	return `0`;
518	}
519
520	static void complete_io(unsigned long error, void *context)
521	{
522	struct kcopyd_job *job = context;
523	struct dm_kcopyd_client *kc = job->kc;
524
525	io_job_finish(t: kc->throttle);
526
527	if (error) {
528	if (op_is_write(op: job->op))
529	job->write_err \|= error;
530	else
531	job->read_err = `1`;
532
533	if (!(job->flags & BIT(DM_KCOPYD_IGNORE_ERROR))) {
534	push(jobs: &kc->complete_jobs, job);
535	wake(kc);
536	return;
537	}
538	}
539
540	if (op_is_write(op: job->op))
541	push(jobs: &kc->complete_jobs, job);
542
543	else {
544	job->op = REQ_OP_WRITE;
545	push(jobs: &kc->io_jobs, job);
546	}
547
548	wake(kc);
549	}
550
551	/*
552	* Request io on as many buffer heads as we can currently get for
553	* a particular job.
554	*/
555	static int run_io_job(struct kcopyd_job *job)
556	{
557	int r;
558	struct dm_io_request io_req = {
559	.bi_opf = job->op,
560	.mem.type = DM_IO_PAGE_LIST,
561	.mem.ptr.pl = job->pages,
562	.mem.offset = `0`,
563	.notify.fn = complete_io,
564	.notify.context = job,
565	.client = job->kc->io_client,
566	};
567
568	/*
569	* If we need to write sequentially and some reads or writes failed,
570	* no point in continuing.
571	*/
572	if (job->flags & BIT(DM_KCOPYD_WRITE_SEQ) &&
573	job->master_job->write_err) {
574	job->write_err = job->master_job->write_err;
575	return -EIO;
576	}
577
578	io_job_start(t: job->kc->throttle);
579
580	if (job->op == REQ_OP_READ)
581	r = dm_io(io_req: &io_req, num_regions: `1`, region: &job->source, NULL);
582	else
583	r = dm_io(io_req: &io_req, num_regions: job->num_dests, region: job->dests, NULL);
584
585	return r;
586	}
587
588	static int run_pages_job(struct kcopyd_job *job)
589	{
590	int r;
591	unsigned int nr_pages = dm_div_up(job->dests[`0`].count, PAGE_SIZE >> `9`);
592
593	r = kcopyd_get_pages(kc: job->kc, nr: nr_pages, pages: &job->pages);
594	if (!r) {
595	/ this job is ready for io /
596	push(jobs: &job->kc->io_jobs, job);
597	return `0`;
598	}
599
600	if (r == -ENOMEM)
601	/ can't complete now /
602	return `1`;
603
604	return r;
605	}
606
607	/*
608	* Run through a list for as long as possible. Returns the count
609	* of successful jobs.
610	*/
611	static int process_jobs(struct list_head jobs, struct* dm_kcopyd_client *kc,
612	int (fn)(struct* kcopyd_job *))
613	{
614	struct kcopyd_job *job;
615	int r, count = `0`;
616
617	while ((job = pop(jobs, kc))) {
618
619	r = fn(job);
620
621	if (r < `0`) {
622	/ error this rogue job /
623	if (op_is_write(op: job->op))
624	job->write_err = (unsigned long) -`1L`;
625	else
626	job->read_err = `1`;
627	push(jobs: &kc->complete_jobs, job);
628	wake(kc);
629	break;
630	}
631
632	if (r > `0`) {
633	/*
634	* We couldn't service this job ATM, so
635	* push this job back onto the list.
636	*/
637	push_head(jobs, job);
638	break;
639	}
640
641	count++;
642	}
643
644	return count;
645	}
646
647	/*
648	* kcopyd does this every time it's woken up.
649	*/
650	static void do_work(struct work_struct *work)
651	{
652	struct dm_kcopyd_client *kc = container_of(work,
653	struct dm_kcopyd_client, kcopyd_work);
654	struct blk_plug plug;
655
656	/*
657	* The order that these are called is very important.
658	* complete jobs can free some pages for pages jobs.
659	* Pages jobs when successful will jump onto the io jobs
660	* list. io jobs call wake when they complete and it all
661	* starts again.
662	*/
663	spin_lock_irq(lock: &kc->job_lock);
664	list_splice_tail_init(list: &kc->callback_jobs, head: &kc->complete_jobs);
665	spin_unlock_irq(lock: &kc->job_lock);
666
667	blk_start_plug(&plug);
668	process_jobs(jobs: &kc->complete_jobs, kc, fn: run_complete_job);
669	process_jobs(jobs: &kc->pages_jobs, kc, fn: run_pages_job);
670	process_jobs(jobs: &kc->io_jobs, kc, fn: run_io_job);
671	blk_finish_plug(&plug);
672	}
673
674	/*
675	* If we are copying a small region we just dispatch a single job
676	* to do the copy, otherwise the io has to be split up into many
677	* jobs.
678	*/
679	static void dispatch_job(struct kcopyd_job *job)
680	{
681	struct dm_kcopyd_client *kc = job->kc;
682
683	atomic_inc(v: &kc->nr_jobs);
684	if (unlikely(!job->source.count))
685	push(jobs: &kc->callback_jobs, job);
686	else if (job->pages == &zero_page_list)
687	push(jobs: &kc->io_jobs, job);
688	else
689	push(jobs: &kc->pages_jobs, job);
690	wake(kc);
691	}
692
693	static void segment_complete(int read_err, unsigned long write_err,
694	void *context)
695	{
696	/ FIXME: tidy this function /
697	sector_t progress = `0`;
698	sector_t count = `0`;
699	struct kcopyd_job *sub_job = context;
700	struct kcopyd_job *job = sub_job->master_job;
701	struct dm_kcopyd_client *kc = job->kc;
702
703	mutex_lock(&job->lock);
704
705	/ update the error /
706	if (read_err)
707	job->read_err = `1`;
708
709	if (write_err)
710	job->write_err \|= write_err;
711
712	/*
713	* Only dispatch more work if there hasn't been an error.
714	*/
715	if ((!job->read_err && !job->write_err) \|\|
716	job->flags & BIT(DM_KCOPYD_IGNORE_ERROR)) {
717	/ get the next chunk of work /
718	progress = job->progress;
719	count = job->source.count - progress;
720	if (count) {
721	if (count > kc->sub_job_size)
722	count = kc->sub_job_size;
723
724	job->progress += count;
725	}
726	}
727	mutex_unlock(lock: &job->lock);
728
729	if (count) {
730	int i;
731
732	sub_job = job;
733	sub_job->write_offset = progress;
734	sub_job->source.sector += progress;
735	sub_job->source.count = count;
736
737	for (i = `0`; i < job->num_dests; i++) {
738	sub_job->dests[i].sector += progress;
739	sub_job->dests[i].count = count;
740	}
741
742	sub_job->fn = segment_complete;
743	sub_job->context = sub_job;
744	dispatch_job(job: sub_job);
745
746	} else if (atomic_dec_and_test(v: &job->sub_jobs)) {
747
748	/*
749	* Queue the completion callback to the kcopyd thread.
750	*
751	* Some callers assume that all the completions are called
752	* from a single thread and don't race with each other.
753	*
754	* We must not call the callback directly here because this
755	* code may not be executing in the thread.
756	*/
757	push(jobs: &kc->complete_jobs, job);
758	wake(kc);
759	}
760	}
761
762	/*
763	* Create some sub jobs to share the work between them.
764	*/
765	static void split_job(struct kcopyd_job *master_job)
766	{
767	int i;
768
769	atomic_inc(v: &master_job->kc->nr_jobs);
770
771	atomic_set(v: &master_job->sub_jobs, SPLIT_COUNT);
772	for (i = `0`; i < SPLIT_COUNT; i++) {
773	master_job[i + `1`].master_job = master_job;
774	segment_complete(read_err: `0`, write_err: `0u`, context: &master_job[i + `1`]);
775	}
776	}
777
778	void dm_kcopyd_copy(struct dm_kcopyd_client kc, struct* dm_io_region *from,
779	unsigned int num_dests, struct dm_io_region *dests,
780	unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
781	{
782	struct kcopyd_job *job;
783	int i;
784
785	/*
786	* Allocate an array of jobs consisting of one master job
787	* followed by SPLIT_COUNT sub jobs.
788	*/
789	job = mempool_alloc(pool: &kc->job_pool, GFP_NOIO);
790	mutex_init(&job->lock);
791
792	/*
793	* set up for the read.
794	*/
795	job->kc = kc;
796	job->flags = flags;
797	job->read_err = `0`;
798	job->write_err = `0`;
799
800	job->num_dests = num_dests;
801	memcpy(&job->dests, dests, sizeof(dests) num_dests);
802
803	/*
804	* If one of the destination is a host-managed zoned block device,
805	* we need to write sequentially. If one of the destination is a
806	* host-aware device, then leave it to the caller to choose what to do.
807	*/
808	if (!(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) {
809	for (i = `0`; i < job->num_dests; i++) {
810	if (bdev_zoned_model(bdev: dests[i].bdev) == BLK_ZONED_HM) {
811	job->flags \|= BIT(DM_KCOPYD_WRITE_SEQ);
812	break;
813	}
814	}
815	}
816
817	/*
818	* If we need to write sequentially, errors cannot be ignored.
819	*/
820	if (job->flags & BIT(DM_KCOPYD_WRITE_SEQ) &&
821	job->flags & BIT(DM_KCOPYD_IGNORE_ERROR))
822	job->flags &= ~BIT(DM_KCOPYD_IGNORE_ERROR);
823
824	if (from) {
825	job->source = *from;
826	job->pages = NULL;
827	job->op = REQ_OP_READ;
828	} else {
829	memset(&job->source, `0`, sizeof(job->source));
830	job->source.count = job->dests[`0`].count;
831	job->pages = &zero_page_list;
832
833	/*
834	* Use WRITE ZEROES to optimize zeroing if all dests support it.
835	*/
836	job->op = REQ_OP_WRITE_ZEROES;
837	for (i = `0`; i < job->num_dests; i++)
838	if (!bdev_write_zeroes_sectors(bdev: job->dests[i].bdev)) {
839	job->op = REQ_OP_WRITE;
840	break;
841	}
842	}
843
844	job->fn = fn;
845	job->context = context;
846	job->master_job = job;
847	job->write_offset = `0`;
848
849	if (job->source.count <= kc->sub_job_size)
850	dispatch_job(job);
851	else {
852	job->progress = `0`;
853	split_job(master_job: job);
854	}
855	}
856	EXPORT_SYMBOL(dm_kcopyd_copy);
857
858	void dm_kcopyd_zero(struct dm_kcopyd_client *kc,
859	unsigned int num_dests, struct dm_io_region *dests,
860	unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
861	{
862	dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context);
863	}
864	EXPORT_SYMBOL(dm_kcopyd_zero);
865
866	void dm_kcopyd_prepare_callback(struct* dm_kcopyd_client *kc,
867	dm_kcopyd_notify_fn fn, void *context)
868	{
869	struct kcopyd_job *job;
870
871	job = mempool_alloc(pool: &kc->job_pool, GFP_NOIO);
872
873	memset(job, `0`, sizeof(struct kcopyd_job));
874	job->kc = kc;
875	job->fn = fn;
876	job->context = context;
877	job->master_job = job;
878
879	atomic_inc(v: &kc->nr_jobs);
880
881	return job;
882	}
883	EXPORT_SYMBOL(dm_kcopyd_prepare_callback);
884
885	void dm_kcopyd_do_callback(void j, int* read_err, unsigned long write_err)
886	{
887	struct kcopyd_job *job = j;
888	struct dm_kcopyd_client *kc = job->kc;
889
890	job->read_err = read_err;
891	job->write_err = write_err;
892
893	push(jobs: &kc->callback_jobs, job);
894	wake(kc);
895	}
896	EXPORT_SYMBOL(dm_kcopyd_do_callback);
897
898	/*
899	* Cancels a kcopyd job, eg. someone might be deactivating a
900	* mirror.
901	*/
902	#if 0
903	int kcopyd_cancel(struct kcopyd_job job, int* block)
904	{
905	/ FIXME: finish /
906	return -`1`;
907	}
908	#endif /* 0 */
909
910	/*
911	*---------------------------------------------------------------
912	* Client setup
913	*---------------------------------------------------------------
914	*/
915	struct dm_kcopyd_client dm_kcopyd_client_create(struct* dm_kcopyd_throttle *throttle)
916	{
917	int r;
918	unsigned int reserve_pages;
919	struct dm_kcopyd_client *kc;
920
921	kc = kzalloc(size: sizeof(*kc), GFP_KERNEL);
922	if (!kc)
923	return ERR_PTR(error: -ENOMEM);
924
925	spin_lock_init(&kc->job_lock);
926	INIT_LIST_HEAD(list: &kc->callback_jobs);
927	INIT_LIST_HEAD(list: &kc->complete_jobs);
928	INIT_LIST_HEAD(list: &kc->io_jobs);
929	INIT_LIST_HEAD(list: &kc->pages_jobs);
930	kc->throttle = throttle;
931
932	r = mempool_init_slab_pool(pool: &kc->job_pool, MIN_JOBS, kc: _job_cache);
933	if (r)
934	goto bad_slab;
935
936	INIT_WORK(&kc->kcopyd_work, do_work);
937	kc->kcopyd_wq = alloc_workqueue(fmt: "kcopyd", flags: WQ_MEM_RECLAIM, max_active: `0`);
938	if (!kc->kcopyd_wq) {
939	r = -ENOMEM;
940	goto bad_workqueue;
941	}
942
943	kc->sub_job_size = dm_get_kcopyd_subjob_size();
944	reserve_pages = DIV_ROUND_UP(kc->sub_job_size << SECTOR_SHIFT, PAGE_SIZE);
945
946	kc->pages = NULL;
947	kc->nr_reserved_pages = kc->nr_free_pages = `0`;
948	r = client_reserve_pages(kc, nr_pages: reserve_pages);
949	if (r)
950	goto bad_client_pages;
951
952	kc->io_client = dm_io_client_create();
953	if (IS_ERR(ptr: kc->io_client)) {
954	r = PTR_ERR(ptr: kc->io_client);
955	goto bad_io_client;
956	}
957
958	init_waitqueue_head(&kc->destroyq);
959	atomic_set(v: &kc->nr_jobs, i: `0`);
960
961	return kc;
962
963	bad_io_client:
964	client_free_pages(kc);
965	bad_client_pages:
966	destroy_workqueue(wq: kc->kcopyd_wq);
967	bad_workqueue:
968	mempool_exit(pool: &kc->job_pool);
969	bad_slab:
970	kfree(objp: kc);
971
972	return ERR_PTR(error: r);
973	}
974	EXPORT_SYMBOL(dm_kcopyd_client_create);
975
976	void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc)
977	{
978	/ Wait for completion of all jobs submitted by this client. /
979	wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
980
981	BUG_ON(!list_empty(&kc->callback_jobs));
982	BUG_ON(!list_empty(&kc->complete_jobs));
983	BUG_ON(!list_empty(&kc->io_jobs));
984	BUG_ON(!list_empty(&kc->pages_jobs));
985	destroy_workqueue(wq: kc->kcopyd_wq);
986	dm_io_client_destroy(client: kc->io_client);
987	client_free_pages(kc);
988	mempool_exit(pool: &kc->job_pool);
989	kfree(objp: kc);
990	}
991	EXPORT_SYMBOL(dm_kcopyd_client_destroy);
992
993	void dm_kcopyd_client_flush(struct dm_kcopyd_client *kc)
994	{
995	flush_workqueue(kc->kcopyd_wq);
996	}
997	EXPORT_SYMBOL(dm_kcopyd_client_flush);
998

source code of linux/drivers/md/dm-kcopyd.c