padata.c source code [linux/kernel/padata.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* padata.c - generic interface to process data streams in parallel
4	*
5	* See Documentation/core-api/padata.rst for more information.
6	*
7	* Copyright (C) 2008, 2009 secunet Security Networks AG
8	* Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
9	*
10	* Copyright (c) 2020 Oracle and/or its affiliates.
11	* Author: Daniel Jordan <daniel.m.jordan@oracle.com>
12	*/
13
14	#include <linux/completion.h>
15	#include <linux/export.h>
16	#include <linux/cpumask.h>
17	#include <linux/err.h>
18	#include <linux/cpu.h>
19	#include <linux/padata.h>
20	#include <linux/mutex.h>
21	#include <linux/sched.h>
22	#include <linux/slab.h>
23	#include <linux/sysfs.h>
24	#include <linux/rcupdate.h>
25
26	#define PADATA_WORK_ONSTACK 1 /* Work's memory is on stack */
27
28	struct padata_work {
29	struct work_struct pw_work;
30	struct list_head pw_list; / padata_free_works linkage /
31	void *pw_data;
32	};
33
34	static DEFINE_SPINLOCK(padata_works_lock);
35	static struct padata_work *padata_works;
36	static LIST_HEAD(padata_free_works);
37
38	struct padata_mt_job_state {
39	spinlock_t lock;
40	struct completion completion;
41	struct padata_mt_job *job;
42	int nworks;
43	int nworks_fini;
44	unsigned long chunk_size;
45	};
46
47	static void padata_free_pd(struct parallel_data *pd);
48	static void __init padata_mt_helper(struct work_struct *work);
49
50	static int padata_index_to_cpu(struct parallel_data pd, int* cpu_index)
51	{
52	int cpu, target_cpu;
53
54	target_cpu = cpumask_first(srcp: pd->cpumask.pcpu);
55	for (cpu = `0`; cpu < cpu_index; cpu++)
56	target_cpu = cpumask_next(n: target_cpu, srcp: pd->cpumask.pcpu);
57
58	return target_cpu;
59	}
60
61	static int padata_cpu_hash(struct parallel_data pd, unsigned* int seq_nr)
62	{
63	/*
64	* Hash the sequence numbers to the cpus by taking
65	* seq_nr mod. number of cpus in use.
66	*/
67	int cpu_index = seq_nr % cpumask_weight(srcp: pd->cpumask.pcpu);
68
69	return padata_index_to_cpu(pd, cpu_index);
70	}
71
72	static struct padata_work padata_work_alloc(void*)
73	{
74	struct padata_work *pw;
75
76	lockdep_assert_held(&padata_works_lock);
77
78	if (list_empty(head: &padata_free_works))
79	return NULL; / No more work items allowed to be queued. /
80
81	pw = list_first_entry(&padata_free_works, struct padata_work, pw_list);
82	list_del(entry: &pw->pw_list);
83	return pw;
84	}
85
86	/*
87	* This function is marked __ref because this function may be optimized in such
88	* a way that it directly refers to work_fn's address, which causes modpost to
89	* complain when work_fn is marked __init. This scenario was observed with clang
90	* LTO, where padata_work_init() was optimized to refer directly to
91	* padata_mt_helper() because the calls to padata_work_init() with other work_fn
92	* values were eliminated or inlined.
93	*/
94	static void __ref padata_work_init(struct padata_work *pw, work_func_t work_fn,
95	void data, int* flags)
96	{
97	if (flags & PADATA_WORK_ONSTACK)
98	INIT_WORK_ONSTACK(&pw->pw_work, work_fn);
99	else
100	INIT_WORK(&pw->pw_work, work_fn);
101	pw->pw_data = data;
102	}
103
104	static int __init padata_work_alloc_mt(int nworks, void *data,
105	struct list_head *head)
106	{
107	int i;
108
109	spin_lock(lock: &padata_works_lock);
110	/ Start at 1 because the current task participates in the job. /
111	for (i = `1`; i < nworks; ++i) {
112	struct padata_work *pw = padata_work_alloc();
113
114	if (!pw)
115	break;
116	padata_work_init(pw, work_fn: padata_mt_helper, data, flags: `0`);
117	list_add(new: &pw->pw_list, head);
118	}
119	spin_unlock(lock: &padata_works_lock);
120
121	return i;
122	}
123
124	static void padata_work_free(struct padata_work *pw)
125	{
126	lockdep_assert_held(&padata_works_lock);
127	list_add(new: &pw->pw_list, head: &padata_free_works);
128	}
129
130	static void __init padata_works_free(struct list_head *works)
131	{
132	struct padata_work cur, next;
133
134	if (list_empty(head: works))
135	return;
136
137	spin_lock(lock: &padata_works_lock);
138	list_for_each_entry_safe(cur, next, works, pw_list) {
139	list_del(entry: &cur->pw_list);
140	padata_work_free(pw: cur);
141	}
142	spin_unlock(lock: &padata_works_lock);
143	}
144
145	static void padata_parallel_worker(struct work_struct *parallel_work)
146	{
147	struct padata_work pw = container_of(parallel_work, struct* padata_work,
148	pw_work);
149	struct padata_priv *padata = pw->pw_data;
150
151	local_bh_disable();
152	padata->parallel(padata);
153	spin_lock(lock: &padata_works_lock);
154	padata_work_free(pw);
155	spin_unlock(lock: &padata_works_lock);
156	local_bh_enable();
157	}
158
159	/**
160	* padata_do_parallel - padata parallelization function
161	*
162	* @ps: padatashell
163	* @padata: object to be parallelized
164	* @cb_cpu: pointer to the CPU that the serialization callback function should
165	* run on. If it's not in the serial cpumask of @pinst
166	* (i.e. cpumask.cbcpu), this function selects a fallback CPU and if
167	* none found, returns -EINVAL.
168	*
169	* The parallelization callback function will run with BHs off.
170	* Note: Every object which is parallelized by padata_do_parallel
171	* must be seen by padata_do_serial.
172	*
173	* Return: 0 on success or else negative error code.
174	*/
175	int padata_do_parallel(struct padata_shell *ps,
176	struct padata_priv padata, int* *cb_cpu)
177	{
178	struct padata_instance *pinst = ps->pinst;
179	int i, cpu, cpu_index, err;
180	struct parallel_data *pd;
181	struct padata_work *pw;
182
183	rcu_read_lock_bh();
184
185	pd = rcu_dereference_bh(ps->pd);
186
187	err = -EINVAL;
188	if (!(pinst->flags & PADATA_INIT) \|\| pinst->flags & PADATA_INVALID)
189	goto out;
190
191	if (!cpumask_test_cpu(cpu: *cb_cpu, cpumask: pd->cpumask.cbcpu)) {
192	if (cpumask_empty(srcp: pd->cpumask.cbcpu))
193	goto out;
194
195	/ Select an alternate fallback CPU and notify the caller. /
196	cpu_index = *cb_cpu % cpumask_weight(srcp: pd->cpumask.cbcpu);
197
198	cpu = cpumask_first(srcp: pd->cpumask.cbcpu);
199	for (i = `0`; i < cpu_index; i++)
200	cpu = cpumask_next(n: cpu, srcp: pd->cpumask.cbcpu);
201
202	*cb_cpu = cpu;
203	}
204
205	err = -EBUSY;
206	if ((pinst->flags & PADATA_RESET))
207	goto out;
208
209	refcount_inc(r: &pd->refcnt);
210	padata->pd = pd;
211	padata->cb_cpu = *cb_cpu;
212
213	spin_lock(lock: &padata_works_lock);
214	padata->seq_nr = ++pd->seq_nr;
215	pw = padata_work_alloc();
216	spin_unlock(lock: &padata_works_lock);
217
218	if (!pw) {
219	/ Maximum works limit exceeded, run in the current task. /
220	padata->parallel(padata);
221	}
222
223	rcu_read_unlock_bh();
224
225	if (pw) {
226	padata_work_init(pw, work_fn: padata_parallel_worker, data: padata, flags: `0`);
227	queue_work(wq: pinst->parallel_wq, work: &pw->pw_work);
228	}
229
230	return `0`;
231	out:
232	rcu_read_unlock_bh();
233
234	return err;
235	}
236	EXPORT_SYMBOL(padata_do_parallel);
237
238	/*
239	* padata_find_next - Find the next object that needs serialization.
240	*
241	* Return:
242	* * A pointer to the control struct of the next object that needs
243	* serialization, if present in one of the percpu reorder queues.
244	* * NULL, if the next object that needs serialization will
245	* be parallel processed by another cpu and is not yet present in
246	* the cpu's reorder queue.
247	*/
248	static struct padata_priv padata_find_next(struct* parallel_data *pd,
249	bool remove_object)
250	{
251	struct padata_priv *padata;
252	struct padata_list *reorder;
253	int cpu = pd->cpu;
254
255	reorder = per_cpu_ptr(pd->reorder_list, cpu);
256
257	spin_lock(lock: &reorder->lock);
258	if (list_empty(head: &reorder->list)) {
259	spin_unlock(lock: &reorder->lock);
260	return NULL;
261	}
262
263	padata = list_entry(reorder->list.next, struct padata_priv, list);
264
265	/*
266	* Checks the rare case where two or more parallel jobs have hashed to
267	* the same CPU and one of the later ones finishes first.
268	*/
269	if (padata->seq_nr != pd->processed) {
270	spin_unlock(lock: &reorder->lock);
271	return NULL;
272	}
273
274	if (remove_object) {
275	list_del_init(entry: &padata->list);
276	++pd->processed;
277	pd->cpu = cpumask_next_wrap(n: cpu, mask: pd->cpumask.pcpu, start: -`1`, wrap: false);
278	}
279
280	spin_unlock(lock: &reorder->lock);
281	return padata;
282	}
283
284	static void padata_reorder(struct parallel_data *pd)
285	{
286	struct padata_instance *pinst = pd->ps->pinst;
287	int cb_cpu;
288	struct padata_priv *padata;
289	struct padata_serial_queue *squeue;
290	struct padata_list *reorder;
291
292	/*
293	* We need to ensure that only one cpu can work on dequeueing of
294	* the reorder queue the time. Calculating in which percpu reorder
295	* queue the next object will arrive takes some time. A spinlock
296	* would be highly contended. Also it is not clear in which order
297	* the objects arrive to the reorder queues. So a cpu could wait to
298	* get the lock just to notice that there is nothing to do at the
299	* moment. Therefore we use a trylock and let the holder of the lock
300	* care for all the objects enqueued during the holdtime of the lock.
301	*/
302	if (!spin_trylock_bh(lock: &pd->lock))
303	return;
304
305	while (`1`) {
306	padata = padata_find_next(pd, remove_object: true);
307
308	/*
309	* If the next object that needs serialization is parallel
310	* processed by another cpu and is still on it's way to the
311	* cpu's reorder queue, nothing to do for now.
312	*/
313	if (!padata)
314	break;
315
316	cb_cpu = padata->cb_cpu;
317	squeue = per_cpu_ptr(pd->squeue, cb_cpu);
318
319	spin_lock(lock: &squeue->serial.lock);
320	list_add_tail(new: &padata->list, head: &squeue->serial.list);
321	spin_unlock(lock: &squeue->serial.lock);
322
323	queue_work_on(cpu: cb_cpu, wq: pinst->serial_wq, work: &squeue->work);
324	}
325
326	spin_unlock_bh(lock: &pd->lock);
327
328	/*
329	* The next object that needs serialization might have arrived to
330	* the reorder queues in the meantime.
331	*
332	* Ensure reorder queue is read after pd->lock is dropped so we see
333	* new objects from another task in padata_do_serial. Pairs with
334	* smp_mb in padata_do_serial.
335	*/
336	smp_mb();
337
338	reorder = per_cpu_ptr(pd->reorder_list, pd->cpu);
339	if (!list_empty(head: &reorder->list) && padata_find_next(pd, remove_object: false))
340	queue_work(wq: pinst->serial_wq, work: &pd->reorder_work);
341	}
342
343	static void invoke_padata_reorder(struct work_struct *work)
344	{
345	struct parallel_data *pd;
346
347	local_bh_disable();
348	pd = container_of(work, struct parallel_data, reorder_work);
349	padata_reorder(pd);
350	local_bh_enable();
351	}
352
353	static void padata_serial_worker(struct work_struct *serial_work)
354	{
355	struct padata_serial_queue *squeue;
356	struct parallel_data *pd;
357	LIST_HEAD(local_list);
358	int cnt;
359
360	local_bh_disable();
361	squeue = container_of(serial_work, struct padata_serial_queue, work);
362	pd = squeue->pd;
363
364	spin_lock(lock: &squeue->serial.lock);
365	list_replace_init(old: &squeue->serial.list, new: &local_list);
366	spin_unlock(lock: &squeue->serial.lock);
367
368	cnt = `0`;
369
370	while (!list_empty(head: &local_list)) {
371	struct padata_priv *padata;
372
373	padata = list_entry(local_list.next,
374	struct padata_priv, list);
375
376	list_del_init(entry: &padata->list);
377
378	padata->serial(padata);
379	cnt++;
380	}
381	local_bh_enable();
382
383	if (refcount_sub_and_test(i: cnt, r: &pd->refcnt))
384	padata_free_pd(pd);
385	}
386
387	/**
388	* padata_do_serial - padata serialization function
389	*
390	* @padata: object to be serialized.
391	*
392	* padata_do_serial must be called for every parallelized object.
393	* The serialization callback function will run with BHs off.
394	*/
395	void padata_do_serial(struct padata_priv *padata)
396	{
397	struct parallel_data *pd = padata->pd;
398	int hashed_cpu = padata_cpu_hash(pd, seq_nr: padata->seq_nr);
399	struct padata_list *reorder = per_cpu_ptr(pd->reorder_list, hashed_cpu);
400	struct padata_priv *cur;
401	struct list_head *pos;
402
403	spin_lock(lock: &reorder->lock);
404	/ Sort in ascending order of sequence number. /
405	list_for_each_prev(pos, &reorder->list) {
406	cur = list_entry(pos, struct padata_priv, list);
407	if (cur->seq_nr < padata->seq_nr)
408	break;
409	}
410	list_add(new: &padata->list, head: pos);
411	spin_unlock(lock: &reorder->lock);
412
413	/*
414	* Ensure the addition to the reorder list is ordered correctly
415	* with the trylock of pd->lock in padata_reorder. Pairs with smp_mb
416	* in padata_reorder.
417	*/
418	smp_mb();
419
420	padata_reorder(pd);
421	}
422	EXPORT_SYMBOL(padata_do_serial);
423
424	static int padata_setup_cpumasks(struct padata_instance *pinst)
425	{
426	struct workqueue_attrs *attrs;
427	int err;
428
429	attrs = alloc_workqueue_attrs();
430	if (!attrs)
431	return -ENOMEM;
432
433	/ Restrict parallel_wq workers to pd->cpumask.pcpu. /
434	cpumask_copy(dstp: attrs->cpumask, srcp: pinst->cpumask.pcpu);
435	err = apply_workqueue_attrs(wq: pinst->parallel_wq, attrs);
436	free_workqueue_attrs(attrs);
437
438	return err;
439	}
440
441	static void __init padata_mt_helper(struct work_struct *w)
442	{
443	struct padata_work pw = container_of(w, struct* padata_work, pw_work);
444	struct padata_mt_job_state *ps = pw->pw_data;
445	struct padata_mt_job *job = ps->job;
446	bool done;
447
448	spin_lock(lock: &ps->lock);
449
450	while (job->size > `0`) {
451	unsigned long start, size, end;
452
453	start = job->start;
454	/ So end is chunk size aligned if enough work remains. /
455	size = roundup(start + `1`, ps->chunk_size) - start;
456	size = min(size, job->size);
457	end = start + size;
458
459	job->start = end;
460	job->size -= size;
461
462	spin_unlock(lock: &ps->lock);
463	job->thread_fn(start, end, job->fn_arg);
464	spin_lock(lock: &ps->lock);
465	}
466
467	++ps->nworks_fini;
468	done = (ps->nworks_fini == ps->nworks);
469	spin_unlock(lock: &ps->lock);
470
471	if (done)
472	complete(&ps->completion);
473	}
474
475	/**
476	* padata_do_multithreaded - run a multithreaded job
477	* @job: Description of the job.
478	*
479	* See the definition of struct padata_mt_job for more details.
480	*/
481	void __init padata_do_multithreaded(struct padata_mt_job *job)
482	{
483	/ In case threads finish at different times. /
484	static const unsigned long load_balance_factor = `4`;
485	struct padata_work my_work, *pw;
486	struct padata_mt_job_state ps;
487	LIST_HEAD(works);
488	int nworks;
489
490	if (job->size == `0`)
491	return;
492
493	/ Ensure at least one thread when size < min_chunk. /
494	nworks = max(job->size / max(job->min_chunk, job->align), `1ul`);
495	nworks = min(nworks, job->max_threads);
496
497	if (nworks == `1`) {
498	/ Single thread, no coordination needed, cut to the chase. /
499	job->thread_fn(job->start, job->start + job->size, job->fn_arg);
500	return;
501	}
502
503	spin_lock_init(&ps.lock);
504	init_completion(x: &ps.completion);
505	ps.job = job;
506	ps.nworks = padata_work_alloc_mt(nworks, data: &ps, head: &works);
507	ps.nworks_fini = `0`;
508
509	/*
510	* Chunk size is the amount of work a helper does per call to the
511	* thread function. Load balance large jobs between threads by
512	* increasing the number of chunks, guarantee at least the minimum
513	* chunk size from the caller, and honor the caller's alignment.
514	*/
515	ps.chunk_size = job->size / (ps.nworks * load_balance_factor);
516	ps.chunk_size = max(ps.chunk_size, job->min_chunk);
517	ps.chunk_size = roundup(ps.chunk_size, job->align);
518
519	list_for_each_entry(pw, &works, pw_list)
520	queue_work(wq: system_unbound_wq, work: &pw->pw_work);
521
522	/ Use the current thread, which saves starting a workqueue worker. /
523	padata_work_init(pw: &my_work, work_fn: padata_mt_helper, data: &ps, PADATA_WORK_ONSTACK);
524	padata_mt_helper(w: &my_work.pw_work);
525
526	/ Wait for all the helpers to finish. /
527	wait_for_completion(&ps.completion);
528
529	destroy_work_on_stack(work: &my_work.pw_work);
530	padata_works_free(works: &works);
531	}
532
533	static void __padata_list_init(struct padata_list *pd_list)
534	{
535	INIT_LIST_HEAD(list: &pd_list->list);
536	spin_lock_init(&pd_list->lock);
537	}
538
539	/ Initialize all percpu queues used by serial workers /
540	static void padata_init_squeues(struct parallel_data *pd)
541	{
542	int cpu;
543	struct padata_serial_queue *squeue;
544
545	for_each_cpu(cpu, pd->cpumask.cbcpu) {
546	squeue = per_cpu_ptr(pd->squeue, cpu);
547	squeue->pd = pd;
548	__padata_list_init(pd_list: &squeue->serial);
549	INIT_WORK(&squeue->work, padata_serial_worker);
550	}
551	}
552
553	/ Initialize per-CPU reorder lists /
554	static void padata_init_reorder_list(struct parallel_data *pd)
555	{
556	int cpu;
557	struct padata_list *list;
558
559	for_each_cpu(cpu, pd->cpumask.pcpu) {
560	list = per_cpu_ptr(pd->reorder_list, cpu);
561	__padata_list_init(pd_list: list);
562	}
563	}
564
565	/ Allocate and initialize the internal cpumask dependend resources. /
566	static struct parallel_data padata_alloc_pd(struct* padata_shell *ps)
567	{
568	struct padata_instance *pinst = ps->pinst;
569	struct parallel_data *pd;
570
571	pd = kzalloc(size: sizeof(struct parallel_data), GFP_KERNEL);
572	if (!pd)
573	goto err;
574
575	pd->reorder_list = alloc_percpu(struct padata_list);
576	if (!pd->reorder_list)
577	goto err_free_pd;
578
579	pd->squeue = alloc_percpu(struct padata_serial_queue);
580	if (!pd->squeue)
581	goto err_free_reorder_list;
582
583	pd->ps = ps;
584
585	if (!alloc_cpumask_var(mask: &pd->cpumask.pcpu, GFP_KERNEL))
586	goto err_free_squeue;
587	if (!alloc_cpumask_var(mask: &pd->cpumask.cbcpu, GFP_KERNEL))
588	goto err_free_pcpu;
589
590	cpumask_and(dstp: pd->cpumask.pcpu, src1p: pinst->cpumask.pcpu, cpu_online_mask);
591	cpumask_and(dstp: pd->cpumask.cbcpu, src1p: pinst->cpumask.cbcpu, cpu_online_mask);
592
593	padata_init_reorder_list(pd);
594	padata_init_squeues(pd);
595	pd->seq_nr = -`1`;
596	refcount_set(r: &pd->refcnt, n: `1`);
597	spin_lock_init(&pd->lock);
598	pd->cpu = cpumask_first(srcp: pd->cpumask.pcpu);
599	INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
600
601	return pd;
602
603	err_free_pcpu:
604	free_cpumask_var(mask: pd->cpumask.pcpu);
605	err_free_squeue:
606	free_percpu(pdata: pd->squeue);
607	err_free_reorder_list:
608	free_percpu(pdata: pd->reorder_list);
609	err_free_pd:
610	kfree(objp: pd);
611	err:
612	return NULL;
613	}
614
615	static void padata_free_pd(struct parallel_data *pd)
616	{
617	free_cpumask_var(mask: pd->cpumask.pcpu);
618	free_cpumask_var(mask: pd->cpumask.cbcpu);
619	free_percpu(pdata: pd->reorder_list);
620	free_percpu(pdata: pd->squeue);
621	kfree(objp: pd);
622	}
623
624	static void __padata_start(struct padata_instance *pinst)
625	{
626	pinst->flags \|= PADATA_INIT;
627	}
628
629	static void __padata_stop(struct padata_instance *pinst)
630	{
631	if (!(pinst->flags & PADATA_INIT))
632	return;
633
634	pinst->flags &= ~PADATA_INIT;
635
636	synchronize_rcu();
637	}
638
639	/ Replace the internal control structure with a new one. /
640	static int padata_replace_one(struct padata_shell *ps)
641	{
642	struct parallel_data *pd_new;
643
644	pd_new = padata_alloc_pd(ps);
645	if (!pd_new)
646	return -ENOMEM;
647
648	ps->opd = rcu_dereference_protected(ps->pd, `1`);
649	rcu_assign_pointer(ps->pd, pd_new);
650
651	return `0`;
652	}
653
654	static int padata_replace(struct padata_instance *pinst)
655	{
656	struct padata_shell *ps;
657	int err = `0`;
658
659	pinst->flags \|= PADATA_RESET;
660
661	list_for_each_entry(ps, &pinst->pslist, list) {
662	err = padata_replace_one(ps);
663	if (err)
664	break;
665	}
666
667	synchronize_rcu();
668
669	list_for_each_entry_continue_reverse(ps, &pinst->pslist, list)
670	if (refcount_dec_and_test(r: &ps->opd->refcnt))
671	padata_free_pd(pd: ps->opd);
672
673	pinst->flags &= ~PADATA_RESET;
674
675	return err;
676	}
677
678	/ If cpumask contains no active cpu, we mark the instance as invalid. /
679	static bool padata_validate_cpumask(struct padata_instance *pinst,
680	const struct cpumask *cpumask)
681	{
682	if (!cpumask_intersects(src1p: cpumask, cpu_online_mask)) {
683	pinst->flags \|= PADATA_INVALID;
684	return false;
685	}
686
687	pinst->flags &= ~PADATA_INVALID;
688	return true;
689	}
690
691	static int __padata_set_cpumasks(struct padata_instance *pinst,
692	cpumask_var_t pcpumask,
693	cpumask_var_t cbcpumask)
694	{
695	int valid;
696	int err;
697
698	valid = padata_validate_cpumask(pinst, cpumask: pcpumask);
699	if (!valid) {
700	__padata_stop(pinst);
701	goto out_replace;
702	}
703
704	valid = padata_validate_cpumask(pinst, cpumask: cbcpumask);
705	if (!valid)
706	__padata_stop(pinst);
707
708	out_replace:
709	cpumask_copy(dstp: pinst->cpumask.pcpu, srcp: pcpumask);
710	cpumask_copy(dstp: pinst->cpumask.cbcpu, srcp: cbcpumask);
711
712	err = padata_setup_cpumasks(pinst) ?: padata_replace(pinst);
713
714	if (valid)
715	__padata_start(pinst);
716
717	return err;
718	}
719
720	/**
721	* padata_set_cpumask - Sets specified by @cpumask_type cpumask to the value
722	* equivalent to @cpumask.
723	* @pinst: padata instance
724	* @cpumask_type: PADATA_CPU_SERIAL or PADATA_CPU_PARALLEL corresponding
725	* to parallel and serial cpumasks respectively.
726	* @cpumask: the cpumask to use
727	*
728	* Return: 0 on success or negative error code
729	*/
730	int padata_set_cpumask(struct padata_instance pinst, int* cpumask_type,
731	cpumask_var_t cpumask)
732	{
733	struct cpumask serial_mask, parallel_mask;
734	int err = -EINVAL;
735
736	cpus_read_lock();
737	mutex_lock(&pinst->lock);
738
739	switch (cpumask_type) {
740	case PADATA_CPU_PARALLEL:
741	serial_mask = pinst->cpumask.cbcpu;
742	parallel_mask = cpumask;
743	break;
744	case PADATA_CPU_SERIAL:
745	parallel_mask = pinst->cpumask.pcpu;
746	serial_mask = cpumask;
747	break;
748	default:
749	goto out;
750	}
751
752	err = __padata_set_cpumasks(pinst, pcpumask: parallel_mask, cbcpumask: serial_mask);
753
754	out:
755	mutex_unlock(lock: &pinst->lock);
756	cpus_read_unlock();
757
758	return err;
759	}
760	EXPORT_SYMBOL(padata_set_cpumask);
761
762	#ifdef CONFIG_HOTPLUG_CPU
763
764	static int __padata_add_cpu(struct padata_instance pinst, int* cpu)
765	{
766	int err = `0`;
767
768	if (cpumask_test_cpu(cpu, cpu_online_mask)) {
769	err = padata_replace(pinst);
770
771	if (padata_validate_cpumask(pinst, cpumask: pinst->cpumask.pcpu) &&
772	padata_validate_cpumask(pinst, cpumask: pinst->cpumask.cbcpu))
773	__padata_start(pinst);
774	}
775
776	return err;
777	}
778
779	static int __padata_remove_cpu(struct padata_instance pinst, int* cpu)
780	{
781	int err = `0`;
782
783	if (!cpumask_test_cpu(cpu, cpu_online_mask)) {
784	if (!padata_validate_cpumask(pinst, cpumask: pinst->cpumask.pcpu) \|\|
785	!padata_validate_cpumask(pinst, cpumask: pinst->cpumask.cbcpu))
786	__padata_stop(pinst);
787
788	err = padata_replace(pinst);
789	}
790
791	return err;
792	}
793
794	static inline int pinst_has_cpu(struct padata_instance pinst, int* cpu)
795	{
796	return cpumask_test_cpu(cpu, cpumask: pinst->cpumask.pcpu) \|\|
797	cpumask_test_cpu(cpu, cpumask: pinst->cpumask.cbcpu);
798	}
799
800	static int padata_cpu_online(unsigned int cpu, struct hlist_node *node)
801	{
802	struct padata_instance *pinst;
803	int ret;
804
805	pinst = hlist_entry_safe(node, struct padata_instance, cpu_online_node);
806	if (!pinst_has_cpu(pinst, cpu))
807	return `0`;
808
809	mutex_lock(&pinst->lock);
810	ret = __padata_add_cpu(pinst, cpu);
811	mutex_unlock(lock: &pinst->lock);
812	return ret;
813	}
814
815	static int padata_cpu_dead(unsigned int cpu, struct hlist_node *node)
816	{
817	struct padata_instance *pinst;
818	int ret;
819
820	pinst = hlist_entry_safe(node, struct padata_instance, cpu_dead_node);
821	if (!pinst_has_cpu(pinst, cpu))
822	return `0`;
823
824	mutex_lock(&pinst->lock);
825	ret = __padata_remove_cpu(pinst, cpu);
826	mutex_unlock(lock: &pinst->lock);
827	return ret;
828	}
829
830	static enum cpuhp_state hp_online;
831	#endif
832
833	static void __padata_free(struct padata_instance *pinst)
834	{
835	#ifdef CONFIG_HOTPLUG_CPU
836	cpuhp_state_remove_instance_nocalls(state: CPUHP_PADATA_DEAD,
837	node: &pinst->cpu_dead_node);
838	cpuhp_state_remove_instance_nocalls(state: hp_online, node: &pinst->cpu_online_node);
839	#endif
840
841	WARN_ON(!list_empty(&pinst->pslist));
842
843	free_cpumask_var(mask: pinst->cpumask.pcpu);
844	free_cpumask_var(mask: pinst->cpumask.cbcpu);
845	destroy_workqueue(wq: pinst->serial_wq);
846	destroy_workqueue(wq: pinst->parallel_wq);
847	kfree(objp: pinst);
848	}
849
850	#define kobj2pinst(_kobj) \
851	container_of(_kobj, struct padata_instance, kobj)
852	#define attr2pentry(_attr) \
853	container_of(_attr, struct padata_sysfs_entry, attr)
854
855	static void padata_sysfs_release(struct kobject *kobj)
856	{
857	struct padata_instance *pinst = kobj2pinst(kobj);
858	__padata_free(pinst);
859	}
860
861	struct padata_sysfs_entry {
862	struct attribute attr;
863	ssize_t (show)(struct* padata_instance , struct* attribute , char* *);
864	ssize_t (store)(struct* padata_instance , struct* attribute *,
865	const char *, size_t);
866	};
867
868	static ssize_t show_cpumask(struct padata_instance *pinst,
869	struct attribute attr, char* *buf)
870	{
871	struct cpumask *cpumask;
872	ssize_t len;
873
874	mutex_lock(&pinst->lock);
875	if (!strcmp(attr->name, "serial_cpumask"))
876	cpumask = pinst->cpumask.cbcpu;
877	else
878	cpumask = pinst->cpumask.pcpu;
879
880	len = snprintf(buf, PAGE_SIZE, fmt: "%*pb\n",
881	nr_cpu_ids, cpumask_bits(cpumask));
882	mutex_unlock(lock: &pinst->lock);
883	return len < PAGE_SIZE ? len : -EINVAL;
884	}
885
886	static ssize_t store_cpumask(struct padata_instance *pinst,
887	struct attribute *attr,
888	const char *buf, size_t count)
889	{
890	cpumask_var_t new_cpumask;
891	ssize_t ret;
892	int mask_type;
893
894	if (!alloc_cpumask_var(mask: &new_cpumask, GFP_KERNEL))
895	return -ENOMEM;
896
897	ret = bitmap_parse(buf, buflen: count, cpumask_bits(new_cpumask),
898	nr_cpumask_bits);
899	if (ret < `0`)
900	goto out;
901
902	mask_type = !strcmp(attr->name, "serial_cpumask") ?
903	PADATA_CPU_SERIAL : PADATA_CPU_PARALLEL;
904	ret = padata_set_cpumask(pinst, mask_type, new_cpumask);
905	if (!ret)
906	ret = count;
907
908	out:
909	free_cpumask_var(mask: new_cpumask);
910	return ret;
911	}
912
913	#define PADATA_ATTR_RW(_name, _show_name, _store_name) \
914	static struct padata_sysfs_entry _name##_attr = \
915	__ATTR(_name, 0644, _show_name, _store_name)
916	#define PADATA_ATTR_RO(_name, _show_name) \
917	static struct padata_sysfs_entry _name##_attr = \
918	__ATTR(_name, 0400, _show_name, NULL)
919
920	PADATA_ATTR_RW(serial_cpumask, show_cpumask, store_cpumask);
921	PADATA_ATTR_RW(parallel_cpumask, show_cpumask, store_cpumask);
922
923	/*
924	* Padata sysfs provides the following objects:
925	* serial_cpumask [RW] - cpumask for serial workers
926	* parallel_cpumask [RW] - cpumask for parallel workers
927	*/
928	static struct attribute *padata_default_attrs[] = {
929	&serial_cpumask_attr.attr,
930	&parallel_cpumask_attr.attr,
931	NULL,
932	};
933	ATTRIBUTE_GROUPS(padata_default);
934
935	static ssize_t padata_sysfs_show(struct kobject *kobj,
936	struct attribute attr, char* *buf)
937	{
938	struct padata_instance *pinst;
939	struct padata_sysfs_entry *pentry;
940	ssize_t ret = -EIO;
941
942	pinst = kobj2pinst(kobj);
943	pentry = attr2pentry(attr);
944	if (pentry->show)
945	ret = pentry->show(pinst, attr, buf);
946
947	return ret;
948	}
949
950	static ssize_t padata_sysfs_store(struct kobject kobj, struct* attribute *attr,
951	const char *buf, size_t count)
952	{
953	struct padata_instance *pinst;
954	struct padata_sysfs_entry *pentry;
955	ssize_t ret = -EIO;
956
957	pinst = kobj2pinst(kobj);
958	pentry = attr2pentry(attr);
959	if (pentry->show)
960	ret = pentry->store(pinst, attr, buf, count);
961
962	return ret;
963	}
964
965	static const struct sysfs_ops padata_sysfs_ops = {
966	.show = padata_sysfs_show,
967	.store = padata_sysfs_store,
968	};
969
970	static const struct kobj_type padata_attr_type = {
971	.sysfs_ops = &padata_sysfs_ops,
972	.default_groups = padata_default_groups,
973	.release = padata_sysfs_release,
974	};
975
976	/**
977	* padata_alloc - allocate and initialize a padata instance
978	* @name: used to identify the instance
979	*
980	* Return: new instance on success, NULL on error
981	*/
982	struct padata_instance padata_alloc(const* char *name)
983	{
984	struct padata_instance *pinst;
985
986	pinst = kzalloc(size: sizeof(struct padata_instance), GFP_KERNEL);
987	if (!pinst)
988	goto err;
989
990	pinst->parallel_wq = alloc_workqueue(fmt: "%s_parallel", flags: WQ_UNBOUND, max_active: `0`,
991	name);
992	if (!pinst->parallel_wq)
993	goto err_free_inst;
994
995	cpus_read_lock();
996
997	pinst->serial_wq = alloc_workqueue(fmt: "%s_serial", flags: WQ_MEM_RECLAIM \|
998	WQ_CPU_INTENSIVE, max_active: `1`, name);
999	if (!pinst->serial_wq)
1000	goto err_put_cpus;
1001
1002	if (!alloc_cpumask_var(mask: &pinst->cpumask.pcpu, GFP_KERNEL))
1003	goto err_free_serial_wq;
1004	if (!alloc_cpumask_var(mask: &pinst->cpumask.cbcpu, GFP_KERNEL)) {
1005	free_cpumask_var(mask: pinst->cpumask.pcpu);
1006	goto err_free_serial_wq;
1007	}
1008
1009	INIT_LIST_HEAD(list: &pinst->pslist);
1010
1011	cpumask_copy(dstp: pinst->cpumask.pcpu, cpu_possible_mask);
1012	cpumask_copy(dstp: pinst->cpumask.cbcpu, cpu_possible_mask);
1013
1014	if (padata_setup_cpumasks(pinst))
1015	goto err_free_masks;
1016
1017	__padata_start(pinst);
1018
1019	kobject_init(kobj: &pinst->kobj, ktype: &padata_attr_type);
1020	mutex_init(&pinst->lock);
1021
1022	#ifdef CONFIG_HOTPLUG_CPU
1023	cpuhp_state_add_instance_nocalls_cpuslocked(state: hp_online,
1024	node: &pinst->cpu_online_node);
1025	cpuhp_state_add_instance_nocalls_cpuslocked(state: CPUHP_PADATA_DEAD,
1026	node: &pinst->cpu_dead_node);
1027	#endif
1028
1029	cpus_read_unlock();
1030
1031	return pinst;
1032
1033	err_free_masks:
1034	free_cpumask_var(mask: pinst->cpumask.pcpu);
1035	free_cpumask_var(mask: pinst->cpumask.cbcpu);
1036	err_free_serial_wq:
1037	destroy_workqueue(wq: pinst->serial_wq);
1038	err_put_cpus:
1039	cpus_read_unlock();
1040	destroy_workqueue(wq: pinst->parallel_wq);
1041	err_free_inst:
1042	kfree(objp: pinst);
1043	err:
1044	return NULL;
1045	}
1046	EXPORT_SYMBOL(padata_alloc);
1047
1048	/**
1049	* padata_free - free a padata instance
1050	*
1051	* @pinst: padata instance to free
1052	*/
1053	void padata_free(struct padata_instance *pinst)
1054	{
1055	kobject_put(kobj: &pinst->kobj);
1056	}
1057	EXPORT_SYMBOL(padata_free);
1058
1059	/**
1060	* padata_alloc_shell - Allocate and initialize padata shell.
1061	*
1062	* @pinst: Parent padata_instance object.
1063	*
1064	* Return: new shell on success, NULL on error
1065	*/
1066	struct padata_shell padata_alloc_shell(struct* padata_instance *pinst)
1067	{
1068	struct parallel_data *pd;
1069	struct padata_shell *ps;
1070
1071	ps = kzalloc(size: sizeof(*ps), GFP_KERNEL);
1072	if (!ps)
1073	goto out;
1074
1075	ps->pinst = pinst;
1076
1077	cpus_read_lock();
1078	pd = padata_alloc_pd(ps);
1079	cpus_read_unlock();
1080
1081	if (!pd)
1082	goto out_free_ps;
1083
1084	mutex_lock(&pinst->lock);
1085	RCU_INIT_POINTER(ps->pd, pd);
1086	list_add(new: &ps->list, head: &pinst->pslist);
1087	mutex_unlock(lock: &pinst->lock);
1088
1089	return ps;
1090
1091	out_free_ps:
1092	kfree(objp: ps);
1093	out:
1094	return NULL;
1095	}
1096	EXPORT_SYMBOL(padata_alloc_shell);
1097
1098	/**
1099	* padata_free_shell - free a padata shell
1100	*
1101	* @ps: padata shell to free
1102	*/
1103	void padata_free_shell(struct padata_shell *ps)
1104	{
1105	struct parallel_data *pd;
1106
1107	if (!ps)
1108	return;
1109
1110	mutex_lock(&ps->pinst->lock);
1111	list_del(entry: &ps->list);
1112	pd = rcu_dereference_protected(ps->pd, `1`);
1113	if (refcount_dec_and_test(r: &pd->refcnt))
1114	padata_free_pd(pd);
1115	mutex_unlock(lock: &ps->pinst->lock);
1116
1117	kfree(objp: ps);
1118	}
1119	EXPORT_SYMBOL(padata_free_shell);
1120
1121	void __init padata_init(void)
1122	{
1123	unsigned int i, possible_cpus;
1124	#ifdef CONFIG_HOTPLUG_CPU
1125	int ret;
1126
1127	ret = cpuhp_setup_state_multi(state: CPUHP_AP_ONLINE_DYN, name: "padata:online",
1128	startup: padata_cpu_online, NULL);
1129	if (ret < `0`)
1130	goto err;
1131	hp_online = ret;
1132
1133	ret = cpuhp_setup_state_multi(state: CPUHP_PADATA_DEAD, name: "padata:dead",
1134	NULL, teardown: padata_cpu_dead);
1135	if (ret < `0`)
1136	goto remove_online_state;
1137	#endif
1138
1139	possible_cpus = num_possible_cpus();
1140	padata_works = kmalloc_array(n: possible_cpus, size: sizeof(struct padata_work),
1141	GFP_KERNEL);
1142	if (!padata_works)
1143	goto remove_dead_state;
1144
1145	for (i = `0`; i < possible_cpus; ++i)
1146	list_add(new: &padata_works[i].pw_list, head: &padata_free_works);
1147
1148	return;
1149
1150	remove_dead_state:
1151	#ifdef CONFIG_HOTPLUG_CPU
1152	cpuhp_remove_multi_state(state: CPUHP_PADATA_DEAD);
1153	remove_online_state:
1154	cpuhp_remove_multi_state(state: hp_online);
1155	err:
1156	#endif
1157	pr_warn("padata: initialization failed\n");
1158	}
1159

source code of linux/kernel/padata.c