1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright 2018 Linaro Limited |
4 | * |
5 | * Author: Daniel Lezcano <daniel.lezcano@linaro.org> |
6 | * |
7 | * The idle injection framework provides a way to force CPUs to enter idle |
8 | * states for a specified fraction of time over a specified period. |
9 | * |
10 | * It relies on the smpboot kthreads feature providing common code for CPU |
11 | * hotplug and thread [un]parking. |
12 | * |
13 | * All of the kthreads used for idle injection are created at init time. |
14 | * |
15 | * Next, the users of the idle injection framework provide a cpumask via |
16 | * its register function. The kthreads will be synchronized with respect to |
17 | * this cpumask. |
18 | * |
19 | * The idle + run duration is specified via separate helpers and that allows |
20 | * idle injection to be started. |
21 | * |
22 | * The idle injection kthreads will call play_idle_precise() with the idle |
23 | * duration and max allowed latency specified as per the above. |
24 | * |
25 | * After all of them have been woken up, a timer is set to start the next idle |
26 | * injection cycle. |
27 | * |
28 | * The timer interrupt handler will wake up the idle injection kthreads for |
29 | * all of the CPUs in the cpumask provided by the user. |
30 | * |
31 | * Idle injection is stopped synchronously and no leftover idle injection |
32 | * kthread activity after its completion is guaranteed. |
33 | * |
34 | * It is up to the user of this framework to provide a lock for higher-level |
35 | * synchronization to prevent race conditions like starting idle injection |
36 | * while unregistering from the framework. |
37 | */ |
38 | #define pr_fmt(fmt) "ii_dev: " fmt |
39 | |
40 | #include <linux/cpu.h> |
41 | #include <linux/hrtimer.h> |
42 | #include <linux/kthread.h> |
43 | #include <linux/sched.h> |
44 | #include <linux/slab.h> |
45 | #include <linux/smpboot.h> |
46 | #include <linux/idle_inject.h> |
47 | |
48 | #include <uapi/linux/sched/types.h> |
49 | |
50 | /** |
51 | * struct idle_inject_thread - task on/off switch structure |
52 | * @tsk: task injecting the idle cycles |
53 | * @should_run: whether or not to run the task (for the smpboot kthread API) |
54 | */ |
55 | struct idle_inject_thread { |
56 | struct task_struct *tsk; |
57 | int should_run; |
58 | }; |
59 | |
60 | /** |
61 | * struct idle_inject_device - idle injection data |
62 | * @timer: idle injection period timer |
63 | * @idle_duration_us: duration of CPU idle time to inject |
64 | * @run_duration_us: duration of CPU run time to allow |
65 | * @latency_us: max allowed latency |
66 | * @update: Optional callback deciding whether or not to skip idle |
67 | * injection in the given cycle. |
68 | * @cpumask: mask of CPUs affected by idle injection |
69 | * |
70 | * This structure is used to define per instance idle inject device data. Each |
71 | * instance has an idle duration, a run duration and mask of CPUs to inject |
72 | * idle. |
73 | * |
74 | * Actual CPU idle time is injected by calling kernel scheduler interface |
75 | * play_idle_precise(). There is one optional callback that can be registered |
76 | * by calling idle_inject_register_full(): |
77 | * |
78 | * update() - This callback is invoked just before waking up CPUs to inject |
79 | * idle. If it returns false, CPUs are not woken up to inject idle in the given |
80 | * cycle. It also allows the caller to readjust the idle and run duration by |
81 | * calling idle_inject_set_duration() for the next cycle. |
82 | */ |
83 | struct idle_inject_device { |
84 | struct hrtimer timer; |
85 | unsigned int idle_duration_us; |
86 | unsigned int run_duration_us; |
87 | unsigned int latency_us; |
88 | bool (*update)(void); |
89 | unsigned long cpumask[]; |
90 | }; |
91 | |
92 | static DEFINE_PER_CPU(struct idle_inject_thread, idle_inject_thread); |
93 | static DEFINE_PER_CPU(struct idle_inject_device *, idle_inject_device); |
94 | |
95 | /** |
96 | * idle_inject_wakeup - Wake up idle injection threads |
97 | * @ii_dev: target idle injection device |
98 | * |
99 | * Every idle injection task associated with the given idle injection device |
100 | * and running on an online CPU will be woken up. |
101 | */ |
102 | static void idle_inject_wakeup(struct idle_inject_device *ii_dev) |
103 | { |
104 | struct idle_inject_thread *iit; |
105 | unsigned int cpu; |
106 | |
107 | for_each_cpu_and(cpu, to_cpumask(ii_dev->cpumask), cpu_online_mask) { |
108 | iit = per_cpu_ptr(&idle_inject_thread, cpu); |
109 | iit->should_run = 1; |
110 | wake_up_process(tsk: iit->tsk); |
111 | } |
112 | } |
113 | |
114 | /** |
115 | * idle_inject_timer_fn - idle injection timer function |
116 | * @timer: idle injection hrtimer |
117 | * |
118 | * This function is called when the idle injection timer expires. It wakes up |
119 | * idle injection tasks associated with the timer and they, in turn, invoke |
120 | * play_idle_precise() to inject a specified amount of CPU idle time. |
121 | * |
122 | * Return: HRTIMER_RESTART. |
123 | */ |
124 | static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) |
125 | { |
126 | unsigned int duration_us; |
127 | struct idle_inject_device *ii_dev = |
128 | container_of(timer, struct idle_inject_device, timer); |
129 | |
130 | if (!ii_dev->update || (ii_dev->update && ii_dev->update())) |
131 | idle_inject_wakeup(ii_dev); |
132 | |
133 | duration_us = READ_ONCE(ii_dev->run_duration_us); |
134 | duration_us += READ_ONCE(ii_dev->idle_duration_us); |
135 | |
136 | hrtimer_forward_now(timer, interval: ns_to_ktime(ns: duration_us * NSEC_PER_USEC)); |
137 | |
138 | return HRTIMER_RESTART; |
139 | } |
140 | |
141 | /** |
142 | * idle_inject_fn - idle injection work function |
143 | * @cpu: the CPU owning the task |
144 | * |
145 | * This function calls play_idle_precise() to inject a specified amount of CPU |
146 | * idle time. |
147 | */ |
148 | static void idle_inject_fn(unsigned int cpu) |
149 | { |
150 | struct idle_inject_device *ii_dev; |
151 | struct idle_inject_thread *iit; |
152 | |
153 | ii_dev = per_cpu(idle_inject_device, cpu); |
154 | iit = per_cpu_ptr(&idle_inject_thread, cpu); |
155 | |
156 | /* |
157 | * Let the smpboot main loop know that the task should not run again. |
158 | */ |
159 | iit->should_run = 0; |
160 | |
161 | play_idle_precise(READ_ONCE(ii_dev->idle_duration_us) * NSEC_PER_USEC, |
162 | READ_ONCE(ii_dev->latency_us) * NSEC_PER_USEC); |
163 | } |
164 | |
165 | /** |
166 | * idle_inject_set_duration - idle and run duration update helper |
167 | * @ii_dev: idle injection control device structure |
168 | * @run_duration_us: CPU run time to allow in microseconds |
169 | * @idle_duration_us: CPU idle time to inject in microseconds |
170 | */ |
171 | void idle_inject_set_duration(struct idle_inject_device *ii_dev, |
172 | unsigned int run_duration_us, |
173 | unsigned int idle_duration_us) |
174 | { |
175 | if (run_duration_us + idle_duration_us) { |
176 | WRITE_ONCE(ii_dev->run_duration_us, run_duration_us); |
177 | WRITE_ONCE(ii_dev->idle_duration_us, idle_duration_us); |
178 | } |
179 | if (!run_duration_us) |
180 | pr_debug("CPU is forced to 100 percent idle\n" ); |
181 | } |
182 | EXPORT_SYMBOL_NS_GPL(idle_inject_set_duration, IDLE_INJECT); |
183 | |
184 | /** |
185 | * idle_inject_get_duration - idle and run duration retrieval helper |
186 | * @ii_dev: idle injection control device structure |
187 | * @run_duration_us: memory location to store the current CPU run time |
188 | * @idle_duration_us: memory location to store the current CPU idle time |
189 | */ |
190 | void idle_inject_get_duration(struct idle_inject_device *ii_dev, |
191 | unsigned int *run_duration_us, |
192 | unsigned int *idle_duration_us) |
193 | { |
194 | *run_duration_us = READ_ONCE(ii_dev->run_duration_us); |
195 | *idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); |
196 | } |
197 | EXPORT_SYMBOL_NS_GPL(idle_inject_get_duration, IDLE_INJECT); |
198 | |
199 | /** |
200 | * idle_inject_set_latency - set the maximum latency allowed |
201 | * @ii_dev: idle injection control device structure |
202 | * @latency_us: set the latency requirement for the idle state |
203 | */ |
204 | void idle_inject_set_latency(struct idle_inject_device *ii_dev, |
205 | unsigned int latency_us) |
206 | { |
207 | WRITE_ONCE(ii_dev->latency_us, latency_us); |
208 | } |
209 | EXPORT_SYMBOL_NS_GPL(idle_inject_set_latency, IDLE_INJECT); |
210 | |
211 | /** |
212 | * idle_inject_start - start idle injections |
213 | * @ii_dev: idle injection control device structure |
214 | * |
215 | * The function starts idle injection by first waking up all of the idle |
216 | * injection kthreads associated with @ii_dev to let them inject CPU idle time |
217 | * sets up a timer to start the next idle injection period. |
218 | * |
219 | * Return: -EINVAL if the CPU idle or CPU run time is not set or 0 on success. |
220 | */ |
221 | int idle_inject_start(struct idle_inject_device *ii_dev) |
222 | { |
223 | unsigned int idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); |
224 | unsigned int run_duration_us = READ_ONCE(ii_dev->run_duration_us); |
225 | |
226 | if (!(idle_duration_us + run_duration_us)) |
227 | return -EINVAL; |
228 | |
229 | pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n" , |
230 | cpumask_pr_args(to_cpumask(ii_dev->cpumask))); |
231 | |
232 | idle_inject_wakeup(ii_dev); |
233 | |
234 | hrtimer_start(timer: &ii_dev->timer, |
235 | tim: ns_to_ktime(ns: (idle_duration_us + run_duration_us) * |
236 | NSEC_PER_USEC), |
237 | mode: HRTIMER_MODE_REL); |
238 | |
239 | return 0; |
240 | } |
241 | EXPORT_SYMBOL_NS_GPL(idle_inject_start, IDLE_INJECT); |
242 | |
243 | /** |
244 | * idle_inject_stop - stops idle injections |
245 | * @ii_dev: idle injection control device structure |
246 | * |
247 | * The function stops idle injection and waits for the threads to finish work. |
248 | * If CPU idle time is being injected when this function runs, then it will |
249 | * wait until the end of the cycle. |
250 | * |
251 | * When it returns, there is no more idle injection kthread activity. The |
252 | * kthreads are scheduled out and the periodic timer is off. |
253 | */ |
254 | void idle_inject_stop(struct idle_inject_device *ii_dev) |
255 | { |
256 | struct idle_inject_thread *iit; |
257 | unsigned int cpu; |
258 | |
259 | pr_debug("Stopping idle injection on CPUs '%*pbl'\n" , |
260 | cpumask_pr_args(to_cpumask(ii_dev->cpumask))); |
261 | |
262 | hrtimer_cancel(timer: &ii_dev->timer); |
263 | |
264 | /* |
265 | * Stopping idle injection requires all of the idle injection kthreads |
266 | * associated with the given cpumask to be parked and stay that way, so |
267 | * prevent CPUs from going online at this point. Any CPUs going online |
268 | * after the loop below will be covered by clearing the should_run flag |
269 | * that will cause the smpboot main loop to schedule them out. |
270 | */ |
271 | cpu_hotplug_disable(); |
272 | |
273 | /* |
274 | * Iterate over all (online + offline) CPUs here in case one of them |
275 | * goes offline with the should_run flag set so as to prevent its idle |
276 | * injection kthread from running when the CPU goes online again after |
277 | * the ii_dev has been freed. |
278 | */ |
279 | for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { |
280 | iit = per_cpu_ptr(&idle_inject_thread, cpu); |
281 | iit->should_run = 0; |
282 | |
283 | wait_task_inactive(iit->tsk, TASK_ANY); |
284 | } |
285 | |
286 | cpu_hotplug_enable(); |
287 | } |
288 | EXPORT_SYMBOL_NS_GPL(idle_inject_stop, IDLE_INJECT); |
289 | |
290 | /** |
291 | * idle_inject_setup - prepare the current task for idle injection |
292 | * @cpu: not used |
293 | * |
294 | * Called once, this function is in charge of setting the current task's |
295 | * scheduler parameters to make it an RT task. |
296 | */ |
297 | static void idle_inject_setup(unsigned int cpu) |
298 | { |
299 | sched_set_fifo(current); |
300 | } |
301 | |
302 | /** |
303 | * idle_inject_should_run - function helper for the smpboot API |
304 | * @cpu: CPU the kthread is running on |
305 | * |
306 | * Return: whether or not the thread can run. |
307 | */ |
308 | static int idle_inject_should_run(unsigned int cpu) |
309 | { |
310 | struct idle_inject_thread *iit = |
311 | per_cpu_ptr(&idle_inject_thread, cpu); |
312 | |
313 | return iit->should_run; |
314 | } |
315 | |
316 | /** |
317 | * idle_inject_register_full - initialize idle injection on a set of CPUs |
318 | * @cpumask: CPUs to be affected by idle injection |
319 | * @update: This callback is called just before waking up CPUs to inject |
320 | * idle |
321 | * |
322 | * This function creates an idle injection control device structure for the |
323 | * given set of CPUs and initializes the timer associated with it. This |
324 | * function also allows to register update()callback. |
325 | * It does not start any injection cycles. |
326 | * |
327 | * Return: NULL if memory allocation fails, idle injection control device |
328 | * pointer on success. |
329 | */ |
330 | |
331 | struct idle_inject_device *idle_inject_register_full(struct cpumask *cpumask, |
332 | bool (*update)(void)) |
333 | { |
334 | struct idle_inject_device *ii_dev; |
335 | int cpu, cpu_rb; |
336 | |
337 | ii_dev = kzalloc(size: sizeof(*ii_dev) + cpumask_size(), GFP_KERNEL); |
338 | if (!ii_dev) |
339 | return NULL; |
340 | |
341 | cpumask_copy(to_cpumask(ii_dev->cpumask), srcp: cpumask); |
342 | hrtimer_init(timer: &ii_dev->timer, CLOCK_MONOTONIC, mode: HRTIMER_MODE_REL); |
343 | ii_dev->timer.function = idle_inject_timer_fn; |
344 | ii_dev->latency_us = UINT_MAX; |
345 | ii_dev->update = update; |
346 | |
347 | for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { |
348 | |
349 | if (per_cpu(idle_inject_device, cpu)) { |
350 | pr_err("cpu%d is already registered\n" , cpu); |
351 | goto out_rollback; |
352 | } |
353 | |
354 | per_cpu(idle_inject_device, cpu) = ii_dev; |
355 | } |
356 | |
357 | return ii_dev; |
358 | |
359 | out_rollback: |
360 | for_each_cpu(cpu_rb, to_cpumask(ii_dev->cpumask)) { |
361 | if (cpu == cpu_rb) |
362 | break; |
363 | per_cpu(idle_inject_device, cpu_rb) = NULL; |
364 | } |
365 | |
366 | kfree(objp: ii_dev); |
367 | |
368 | return NULL; |
369 | } |
370 | EXPORT_SYMBOL_NS_GPL(idle_inject_register_full, IDLE_INJECT); |
371 | |
372 | /** |
373 | * idle_inject_register - initialize idle injection on a set of CPUs |
374 | * @cpumask: CPUs to be affected by idle injection |
375 | * |
376 | * This function creates an idle injection control device structure for the |
377 | * given set of CPUs and initializes the timer associated with it. It does not |
378 | * start any injection cycles. |
379 | * |
380 | * Return: NULL if memory allocation fails, idle injection control device |
381 | * pointer on success. |
382 | */ |
383 | struct idle_inject_device *idle_inject_register(struct cpumask *cpumask) |
384 | { |
385 | return idle_inject_register_full(cpumask, NULL); |
386 | } |
387 | EXPORT_SYMBOL_NS_GPL(idle_inject_register, IDLE_INJECT); |
388 | |
389 | /** |
390 | * idle_inject_unregister - unregister idle injection control device |
391 | * @ii_dev: idle injection control device to unregister |
392 | * |
393 | * The function stops idle injection for the given control device, |
394 | * unregisters its kthreads and frees memory allocated when that device was |
395 | * created. |
396 | */ |
397 | void idle_inject_unregister(struct idle_inject_device *ii_dev) |
398 | { |
399 | unsigned int cpu; |
400 | |
401 | idle_inject_stop(ii_dev); |
402 | |
403 | for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) |
404 | per_cpu(idle_inject_device, cpu) = NULL; |
405 | |
406 | kfree(objp: ii_dev); |
407 | } |
408 | EXPORT_SYMBOL_NS_GPL(idle_inject_unregister, IDLE_INJECT); |
409 | |
410 | static struct smp_hotplug_thread idle_inject_threads = { |
411 | .store = &idle_inject_thread.tsk, |
412 | .setup = idle_inject_setup, |
413 | .thread_fn = idle_inject_fn, |
414 | .thread_comm = "idle_inject/%u" , |
415 | .thread_should_run = idle_inject_should_run, |
416 | }; |
417 | |
418 | static int __init idle_inject_init(void) |
419 | { |
420 | return smpboot_register_percpu_thread(plug_thread: &idle_inject_threads); |
421 | } |
422 | early_initcall(idle_inject_init); |
423 | |