1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * coupled.c - helper functions to enter the same idle state on multiple cpus |
4 | * |
5 | * Copyright (c) 2011 Google, Inc. |
6 | * |
7 | * Author: Colin Cross <ccross@android.com> |
8 | */ |
9 | |
10 | #include <linux/kernel.h> |
11 | #include <linux/cpu.h> |
12 | #include <linux/cpuidle.h> |
13 | #include <linux/mutex.h> |
14 | #include <linux/sched.h> |
15 | #include <linux/slab.h> |
16 | #include <linux/spinlock.h> |
17 | |
18 | #include "cpuidle.h" |
19 | |
20 | /** |
21 | * DOC: Coupled cpuidle states |
22 | * |
23 | * On some ARM SMP SoCs (OMAP4460, Tegra 2, and probably more), the |
24 | * cpus cannot be independently powered down, either due to |
25 | * sequencing restrictions (on Tegra 2, cpu 0 must be the last to |
26 | * power down), or due to HW bugs (on OMAP4460, a cpu powering up |
27 | * will corrupt the gic state unless the other cpu runs a work |
28 | * around). Each cpu has a power state that it can enter without |
29 | * coordinating with the other cpu (usually Wait For Interrupt, or |
30 | * WFI), and one or more "coupled" power states that affect blocks |
31 | * shared between the cpus (L2 cache, interrupt controller, and |
32 | * sometimes the whole SoC). Entering a coupled power state must |
33 | * be tightly controlled on both cpus. |
34 | * |
35 | * This file implements a solution, where each cpu will wait in the |
36 | * WFI state until all cpus are ready to enter a coupled state, at |
37 | * which point the coupled state function will be called on all |
38 | * cpus at approximately the same time. |
39 | * |
40 | * Once all cpus are ready to enter idle, they are woken by an smp |
41 | * cross call. At this point, there is a chance that one of the |
42 | * cpus will find work to do, and choose not to enter idle. A |
43 | * final pass is needed to guarantee that all cpus will call the |
44 | * power state enter function at the same time. During this pass, |
45 | * each cpu will increment the ready counter, and continue once the |
46 | * ready counter matches the number of online coupled cpus. If any |
47 | * cpu exits idle, the other cpus will decrement their counter and |
48 | * retry. |
49 | * |
50 | * requested_state stores the deepest coupled idle state each cpu |
51 | * is ready for. It is assumed that the states are indexed from |
52 | * shallowest (highest power, lowest exit latency) to deepest |
53 | * (lowest power, highest exit latency). The requested_state |
54 | * variable is not locked. It is only written from the cpu that |
55 | * it stores (or by the on/offlining cpu if that cpu is offline), |
56 | * and only read after all the cpus are ready for the coupled idle |
57 | * state are no longer updating it. |
58 | * |
59 | * Three atomic counters are used. alive_count tracks the number |
60 | * of cpus in the coupled set that are currently or soon will be |
61 | * online. waiting_count tracks the number of cpus that are in |
62 | * the waiting loop, in the ready loop, or in the coupled idle state. |
63 | * ready_count tracks the number of cpus that are in the ready loop |
64 | * or in the coupled idle state. |
65 | * |
66 | * To use coupled cpuidle states, a cpuidle driver must: |
67 | * |
68 | * Set struct cpuidle_device.coupled_cpus to the mask of all |
69 | * coupled cpus, usually the same as cpu_possible_mask if all cpus |
70 | * are part of the same cluster. The coupled_cpus mask must be |
71 | * set in the struct cpuidle_device for each cpu. |
72 | * |
73 | * Set struct cpuidle_device.safe_state to a state that is not a |
74 | * coupled state. This is usually WFI. |
75 | * |
76 | * Set CPUIDLE_FLAG_COUPLED in struct cpuidle_state.flags for each |
77 | * state that affects multiple cpus. |
78 | * |
79 | * Provide a struct cpuidle_state.enter function for each state |
80 | * that affects multiple cpus. This function is guaranteed to be |
81 | * called on all cpus at approximately the same time. The driver |
82 | * should ensure that the cpus all abort together if any cpu tries |
83 | * to abort once the function is called. The function should return |
84 | * with interrupts still disabled. |
85 | */ |
86 | |
87 | /** |
88 | * struct cpuidle_coupled - data for set of cpus that share a coupled idle state |
89 | * @coupled_cpus: mask of cpus that are part of the coupled set |
90 | * @requested_state: array of requested states for cpus in the coupled set |
91 | * @ready_waiting_counts: combined count of cpus in ready or waiting loops |
92 | * @abort_barrier: synchronisation point for abort cases |
93 | * @online_count: count of cpus that are online |
94 | * @refcnt: reference count of cpuidle devices that are using this struct |
95 | * @prevent: flag to prevent coupled idle while a cpu is hotplugging |
96 | */ |
97 | struct cpuidle_coupled { |
98 | cpumask_t coupled_cpus; |
99 | int requested_state[NR_CPUS]; |
100 | atomic_t ready_waiting_counts; |
101 | atomic_t abort_barrier; |
102 | int online_count; |
103 | int refcnt; |
104 | int prevent; |
105 | }; |
106 | |
107 | #define WAITING_BITS 16 |
108 | #define MAX_WAITING_CPUS (1 << WAITING_BITS) |
109 | #define WAITING_MASK (MAX_WAITING_CPUS - 1) |
110 | #define READY_MASK (~WAITING_MASK) |
111 | |
112 | #define CPUIDLE_COUPLED_NOT_IDLE (-1) |
113 | |
114 | static DEFINE_PER_CPU(call_single_data_t, cpuidle_coupled_poke_cb); |
115 | |
116 | /* |
117 | * The cpuidle_coupled_poke_pending mask is used to avoid calling |
118 | * __smp_call_function_single with the per cpu call_single_data_t struct already |
119 | * in use. This prevents a deadlock where two cpus are waiting for each others |
120 | * call_single_data_t struct to be available |
121 | */ |
122 | static cpumask_t cpuidle_coupled_poke_pending; |
123 | |
124 | /* |
125 | * The cpuidle_coupled_poked mask is used to ensure that each cpu has been poked |
126 | * once to minimize entering the ready loop with a poke pending, which would |
127 | * require aborting and retrying. |
128 | */ |
129 | static cpumask_t cpuidle_coupled_poked; |
130 | |
131 | /** |
132 | * cpuidle_coupled_parallel_barrier - synchronize all online coupled cpus |
133 | * @dev: cpuidle_device of the calling cpu |
134 | * @a: atomic variable to hold the barrier |
135 | * |
136 | * No caller to this function will return from this function until all online |
137 | * cpus in the same coupled group have called this function. Once any caller |
138 | * has returned from this function, the barrier is immediately available for |
139 | * reuse. |
140 | * |
141 | * The atomic variable must be initialized to 0 before any cpu calls |
142 | * this function, will be reset to 0 before any cpu returns from this function. |
143 | * |
144 | * Must only be called from within a coupled idle state handler |
145 | * (state.enter when state.flags has CPUIDLE_FLAG_COUPLED set). |
146 | * |
147 | * Provides full smp barrier semantics before and after calling. |
148 | */ |
149 | void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a) |
150 | { |
151 | int n = dev->coupled->online_count; |
152 | |
153 | smp_mb__before_atomic(); |
154 | atomic_inc(v: a); |
155 | |
156 | while (atomic_read(v: a) < n) |
157 | cpu_relax(); |
158 | |
159 | if (atomic_inc_return(v: a) == n * 2) { |
160 | atomic_set(v: a, i: 0); |
161 | return; |
162 | } |
163 | |
164 | while (atomic_read(v: a) > n) |
165 | cpu_relax(); |
166 | } |
167 | |
168 | /** |
169 | * cpuidle_state_is_coupled - check if a state is part of a coupled set |
170 | * @drv: struct cpuidle_driver for the platform |
171 | * @state: index of the target state in drv->states |
172 | * |
173 | * Returns true if the target state is coupled with cpus besides this one |
174 | */ |
175 | bool cpuidle_state_is_coupled(struct cpuidle_driver *drv, int state) |
176 | { |
177 | return drv->states[state].flags & CPUIDLE_FLAG_COUPLED; |
178 | } |
179 | |
180 | /** |
181 | * cpuidle_coupled_state_verify - check if the coupled states are correctly set. |
182 | * @drv: struct cpuidle_driver for the platform |
183 | * |
184 | * Returns 0 for valid state values, a negative error code otherwise: |
185 | * * -EINVAL if any coupled state(safe_state_index) is wrongly set. |
186 | */ |
187 | int cpuidle_coupled_state_verify(struct cpuidle_driver *drv) |
188 | { |
189 | int i; |
190 | |
191 | for (i = drv->state_count - 1; i >= 0; i--) { |
192 | if (cpuidle_state_is_coupled(drv, i) && |
193 | (drv->safe_state_index == i || |
194 | drv->safe_state_index < 0 || |
195 | drv->safe_state_index >= drv->state_count)) |
196 | return -EINVAL; |
197 | } |
198 | |
199 | return 0; |
200 | } |
201 | |
202 | /** |
203 | * cpuidle_coupled_set_ready - mark a cpu as ready |
204 | * @coupled: the struct coupled that contains the current cpu |
205 | */ |
206 | static inline void cpuidle_coupled_set_ready(struct cpuidle_coupled *coupled) |
207 | { |
208 | atomic_add(MAX_WAITING_CPUS, v: &coupled->ready_waiting_counts); |
209 | } |
210 | |
211 | /** |
212 | * cpuidle_coupled_set_not_ready - mark a cpu as not ready |
213 | * @coupled: the struct coupled that contains the current cpu |
214 | * |
215 | * Decrements the ready counter, unless the ready (and thus the waiting) counter |
216 | * is equal to the number of online cpus. Prevents a race where one cpu |
217 | * decrements the waiting counter and then re-increments it just before another |
218 | * cpu has decremented its ready counter, leading to the ready counter going |
219 | * down from the number of online cpus without going through the coupled idle |
220 | * state. |
221 | * |
222 | * Returns 0 if the counter was decremented successfully, -EINVAL if the ready |
223 | * counter was equal to the number of online cpus. |
224 | */ |
225 | static |
226 | inline int cpuidle_coupled_set_not_ready(struct cpuidle_coupled *coupled) |
227 | { |
228 | int all; |
229 | int ret; |
230 | |
231 | all = coupled->online_count | (coupled->online_count << WAITING_BITS); |
232 | ret = atomic_add_unless(v: &coupled->ready_waiting_counts, |
233 | a: -MAX_WAITING_CPUS, u: all); |
234 | |
235 | return ret ? 0 : -EINVAL; |
236 | } |
237 | |
238 | /** |
239 | * cpuidle_coupled_no_cpus_ready - check if no cpus in a coupled set are ready |
240 | * @coupled: the struct coupled that contains the current cpu |
241 | * |
242 | * Returns true if all of the cpus in a coupled set are out of the ready loop. |
243 | */ |
244 | static inline int cpuidle_coupled_no_cpus_ready(struct cpuidle_coupled *coupled) |
245 | { |
246 | int r = atomic_read(v: &coupled->ready_waiting_counts) >> WAITING_BITS; |
247 | return r == 0; |
248 | } |
249 | |
250 | /** |
251 | * cpuidle_coupled_cpus_ready - check if all cpus in a coupled set are ready |
252 | * @coupled: the struct coupled that contains the current cpu |
253 | * |
254 | * Returns true if all cpus coupled to this target state are in the ready loop |
255 | */ |
256 | static inline bool cpuidle_coupled_cpus_ready(struct cpuidle_coupled *coupled) |
257 | { |
258 | int r = atomic_read(v: &coupled->ready_waiting_counts) >> WAITING_BITS; |
259 | return r == coupled->online_count; |
260 | } |
261 | |
262 | /** |
263 | * cpuidle_coupled_cpus_waiting - check if all cpus in a coupled set are waiting |
264 | * @coupled: the struct coupled that contains the current cpu |
265 | * |
266 | * Returns true if all cpus coupled to this target state are in the wait loop |
267 | */ |
268 | static inline bool cpuidle_coupled_cpus_waiting(struct cpuidle_coupled *coupled) |
269 | { |
270 | int w = atomic_read(v: &coupled->ready_waiting_counts) & WAITING_MASK; |
271 | return w == coupled->online_count; |
272 | } |
273 | |
274 | /** |
275 | * cpuidle_coupled_no_cpus_waiting - check if no cpus in coupled set are waiting |
276 | * @coupled: the struct coupled that contains the current cpu |
277 | * |
278 | * Returns true if all of the cpus in a coupled set are out of the waiting loop. |
279 | */ |
280 | static inline int cpuidle_coupled_no_cpus_waiting(struct cpuidle_coupled *coupled) |
281 | { |
282 | int w = atomic_read(v: &coupled->ready_waiting_counts) & WAITING_MASK; |
283 | return w == 0; |
284 | } |
285 | |
286 | /** |
287 | * cpuidle_coupled_get_state - determine the deepest idle state |
288 | * @dev: struct cpuidle_device for this cpu |
289 | * @coupled: the struct coupled that contains the current cpu |
290 | * |
291 | * Returns the deepest idle state that all coupled cpus can enter |
292 | */ |
293 | static inline int cpuidle_coupled_get_state(struct cpuidle_device *dev, |
294 | struct cpuidle_coupled *coupled) |
295 | { |
296 | int i; |
297 | int state = INT_MAX; |
298 | |
299 | /* |
300 | * Read barrier ensures that read of requested_state is ordered after |
301 | * reads of ready_count. Matches the write barriers |
302 | * cpuidle_set_state_waiting. |
303 | */ |
304 | smp_rmb(); |
305 | |
306 | for_each_cpu(i, &coupled->coupled_cpus) |
307 | if (cpu_online(cpu: i) && coupled->requested_state[i] < state) |
308 | state = coupled->requested_state[i]; |
309 | |
310 | return state; |
311 | } |
312 | |
313 | static void cpuidle_coupled_handle_poke(void *info) |
314 | { |
315 | int cpu = (unsigned long)info; |
316 | cpumask_set_cpu(cpu, dstp: &cpuidle_coupled_poked); |
317 | cpumask_clear_cpu(cpu, dstp: &cpuidle_coupled_poke_pending); |
318 | } |
319 | |
320 | /** |
321 | * cpuidle_coupled_poke - wake up a cpu that may be waiting |
322 | * @cpu: target cpu |
323 | * |
324 | * Ensures that the target cpu exits it's waiting idle state (if it is in it) |
325 | * and will see updates to waiting_count before it re-enters it's waiting idle |
326 | * state. |
327 | * |
328 | * If cpuidle_coupled_poked_mask is already set for the target cpu, that cpu |
329 | * either has or will soon have a pending IPI that will wake it out of idle, |
330 | * or it is currently processing the IPI and is not in idle. |
331 | */ |
332 | static void cpuidle_coupled_poke(int cpu) |
333 | { |
334 | call_single_data_t *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); |
335 | |
336 | if (!cpumask_test_and_set_cpu(cpu, cpumask: &cpuidle_coupled_poke_pending)) |
337 | smp_call_function_single_async(cpu, csd); |
338 | } |
339 | |
340 | /** |
341 | * cpuidle_coupled_poke_others - wake up all other cpus that may be waiting |
342 | * @this_cpu: target cpu |
343 | * @coupled: the struct coupled that contains the current cpu |
344 | * |
345 | * Calls cpuidle_coupled_poke on all other online cpus. |
346 | */ |
347 | static void cpuidle_coupled_poke_others(int this_cpu, |
348 | struct cpuidle_coupled *coupled) |
349 | { |
350 | int cpu; |
351 | |
352 | for_each_cpu(cpu, &coupled->coupled_cpus) |
353 | if (cpu != this_cpu && cpu_online(cpu)) |
354 | cpuidle_coupled_poke(cpu); |
355 | } |
356 | |
357 | /** |
358 | * cpuidle_coupled_set_waiting - mark this cpu as in the wait loop |
359 | * @cpu: target cpu |
360 | * @coupled: the struct coupled that contains the current cpu |
361 | * @next_state: the index in drv->states of the requested state for this cpu |
362 | * |
363 | * Updates the requested idle state for the specified cpuidle device. |
364 | * Returns the number of waiting cpus. |
365 | */ |
366 | static int cpuidle_coupled_set_waiting(int cpu, |
367 | struct cpuidle_coupled *coupled, int next_state) |
368 | { |
369 | coupled->requested_state[cpu] = next_state; |
370 | |
371 | /* |
372 | * The atomic_inc_return provides a write barrier to order the write |
373 | * to requested_state with the later write that increments ready_count. |
374 | */ |
375 | return atomic_inc_return(v: &coupled->ready_waiting_counts) & WAITING_MASK; |
376 | } |
377 | |
378 | /** |
379 | * cpuidle_coupled_set_not_waiting - mark this cpu as leaving the wait loop |
380 | * @cpu: target cpu |
381 | * @coupled: the struct coupled that contains the current cpu |
382 | * |
383 | * Removes the requested idle state for the specified cpuidle device. |
384 | */ |
385 | static void cpuidle_coupled_set_not_waiting(int cpu, |
386 | struct cpuidle_coupled *coupled) |
387 | { |
388 | /* |
389 | * Decrementing waiting count can race with incrementing it in |
390 | * cpuidle_coupled_set_waiting, but that's OK. Worst case, some |
391 | * cpus will increment ready_count and then spin until they |
392 | * notice that this cpu has cleared it's requested_state. |
393 | */ |
394 | atomic_dec(v: &coupled->ready_waiting_counts); |
395 | |
396 | coupled->requested_state[cpu] = CPUIDLE_COUPLED_NOT_IDLE; |
397 | } |
398 | |
399 | /** |
400 | * cpuidle_coupled_set_done - mark this cpu as leaving the ready loop |
401 | * @cpu: the current cpu |
402 | * @coupled: the struct coupled that contains the current cpu |
403 | * |
404 | * Marks this cpu as no longer in the ready and waiting loops. Decrements |
405 | * the waiting count first to prevent another cpu looping back in and seeing |
406 | * this cpu as waiting just before it exits idle. |
407 | */ |
408 | static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled) |
409 | { |
410 | cpuidle_coupled_set_not_waiting(cpu, coupled); |
411 | atomic_sub(MAX_WAITING_CPUS, v: &coupled->ready_waiting_counts); |
412 | } |
413 | |
414 | /** |
415 | * cpuidle_coupled_clear_pokes - spin until the poke interrupt is processed |
416 | * @cpu: this cpu |
417 | * |
418 | * Turns on interrupts and spins until any outstanding poke interrupts have |
419 | * been processed and the poke bit has been cleared. |
420 | * |
421 | * Other interrupts may also be processed while interrupts are enabled, so |
422 | * need_resched() must be tested after this function returns to make sure |
423 | * the interrupt didn't schedule work that should take the cpu out of idle. |
424 | * |
425 | * Returns 0 if no poke was pending, 1 if a poke was cleared. |
426 | */ |
427 | static int cpuidle_coupled_clear_pokes(int cpu) |
428 | { |
429 | if (!cpumask_test_cpu(cpu, cpumask: &cpuidle_coupled_poke_pending)) |
430 | return 0; |
431 | |
432 | local_irq_enable(); |
433 | while (cpumask_test_cpu(cpu, cpumask: &cpuidle_coupled_poke_pending)) |
434 | cpu_relax(); |
435 | local_irq_disable(); |
436 | |
437 | return 1; |
438 | } |
439 | |
440 | static bool cpuidle_coupled_any_pokes_pending(struct cpuidle_coupled *coupled) |
441 | { |
442 | cpumask_t cpus; |
443 | int ret; |
444 | |
445 | cpumask_and(dstp: &cpus, cpu_online_mask, src2p: &coupled->coupled_cpus); |
446 | ret = cpumask_and(dstp: &cpus, src1p: &cpuidle_coupled_poke_pending, src2p: &cpus); |
447 | |
448 | return ret; |
449 | } |
450 | |
451 | /** |
452 | * cpuidle_enter_state_coupled - attempt to enter a state with coupled cpus |
453 | * @dev: struct cpuidle_device for the current cpu |
454 | * @drv: struct cpuidle_driver for the platform |
455 | * @next_state: index of the requested state in drv->states |
456 | * |
457 | * Coordinate with coupled cpus to enter the target state. This is a two |
458 | * stage process. In the first stage, the cpus are operating independently, |
459 | * and may call into cpuidle_enter_state_coupled at completely different times. |
460 | * To save as much power as possible, the first cpus to call this function will |
461 | * go to an intermediate state (the cpuidle_device's safe state), and wait for |
462 | * all the other cpus to call this function. Once all coupled cpus are idle, |
463 | * the second stage will start. Each coupled cpu will spin until all cpus have |
464 | * guaranteed that they will call the target_state. |
465 | * |
466 | * This function must be called with interrupts disabled. It may enable |
467 | * interrupts while preparing for idle, and it will always return with |
468 | * interrupts enabled. |
469 | */ |
470 | int cpuidle_enter_state_coupled(struct cpuidle_device *dev, |
471 | struct cpuidle_driver *drv, int next_state) |
472 | { |
473 | int entered_state = -1; |
474 | struct cpuidle_coupled *coupled = dev->coupled; |
475 | int w; |
476 | |
477 | if (!coupled) |
478 | return -EINVAL; |
479 | |
480 | while (coupled->prevent) { |
481 | cpuidle_coupled_clear_pokes(cpu: dev->cpu); |
482 | if (need_resched()) { |
483 | local_irq_enable(); |
484 | return entered_state; |
485 | } |
486 | entered_state = cpuidle_enter_state(dev, drv, |
487 | next_state: drv->safe_state_index); |
488 | local_irq_disable(); |
489 | } |
490 | |
491 | /* Read barrier ensures online_count is read after prevent is cleared */ |
492 | smp_rmb(); |
493 | |
494 | reset: |
495 | cpumask_clear_cpu(cpu: dev->cpu, dstp: &cpuidle_coupled_poked); |
496 | |
497 | w = cpuidle_coupled_set_waiting(cpu: dev->cpu, coupled, next_state); |
498 | /* |
499 | * If this is the last cpu to enter the waiting state, poke |
500 | * all the other cpus out of their waiting state so they can |
501 | * enter a deeper state. This can race with one of the cpus |
502 | * exiting the waiting state due to an interrupt and |
503 | * decrementing waiting_count, see comment below. |
504 | */ |
505 | if (w == coupled->online_count) { |
506 | cpumask_set_cpu(cpu: dev->cpu, dstp: &cpuidle_coupled_poked); |
507 | cpuidle_coupled_poke_others(this_cpu: dev->cpu, coupled); |
508 | } |
509 | |
510 | retry: |
511 | /* |
512 | * Wait for all coupled cpus to be idle, using the deepest state |
513 | * allowed for a single cpu. If this was not the poking cpu, wait |
514 | * for at least one poke before leaving to avoid a race where |
515 | * two cpus could arrive at the waiting loop at the same time, |
516 | * but the first of the two to arrive could skip the loop without |
517 | * processing the pokes from the last to arrive. |
518 | */ |
519 | while (!cpuidle_coupled_cpus_waiting(coupled) || |
520 | !cpumask_test_cpu(cpu: dev->cpu, cpumask: &cpuidle_coupled_poked)) { |
521 | if (cpuidle_coupled_clear_pokes(cpu: dev->cpu)) |
522 | continue; |
523 | |
524 | if (need_resched()) { |
525 | cpuidle_coupled_set_not_waiting(cpu: dev->cpu, coupled); |
526 | goto out; |
527 | } |
528 | |
529 | if (coupled->prevent) { |
530 | cpuidle_coupled_set_not_waiting(cpu: dev->cpu, coupled); |
531 | goto out; |
532 | } |
533 | |
534 | entered_state = cpuidle_enter_state(dev, drv, |
535 | next_state: drv->safe_state_index); |
536 | local_irq_disable(); |
537 | } |
538 | |
539 | cpuidle_coupled_clear_pokes(cpu: dev->cpu); |
540 | if (need_resched()) { |
541 | cpuidle_coupled_set_not_waiting(cpu: dev->cpu, coupled); |
542 | goto out; |
543 | } |
544 | |
545 | /* |
546 | * Make sure final poke status for this cpu is visible before setting |
547 | * cpu as ready. |
548 | */ |
549 | smp_wmb(); |
550 | |
551 | /* |
552 | * All coupled cpus are probably idle. There is a small chance that |
553 | * one of the other cpus just became active. Increment the ready count, |
554 | * and spin until all coupled cpus have incremented the counter. Once a |
555 | * cpu has incremented the ready counter, it cannot abort idle and must |
556 | * spin until either all cpus have incremented the ready counter, or |
557 | * another cpu leaves idle and decrements the waiting counter. |
558 | */ |
559 | |
560 | cpuidle_coupled_set_ready(coupled); |
561 | while (!cpuidle_coupled_cpus_ready(coupled)) { |
562 | /* Check if any other cpus bailed out of idle. */ |
563 | if (!cpuidle_coupled_cpus_waiting(coupled)) |
564 | if (!cpuidle_coupled_set_not_ready(coupled)) |
565 | goto retry; |
566 | |
567 | cpu_relax(); |
568 | } |
569 | |
570 | /* |
571 | * Make sure read of all cpus ready is done before reading pending pokes |
572 | */ |
573 | smp_rmb(); |
574 | |
575 | /* |
576 | * There is a small chance that a cpu left and reentered idle after this |
577 | * cpu saw that all cpus were waiting. The cpu that reentered idle will |
578 | * have sent this cpu a poke, which will still be pending after the |
579 | * ready loop. The pending interrupt may be lost by the interrupt |
580 | * controller when entering the deep idle state. It's not possible to |
581 | * clear a pending interrupt without turning interrupts on and handling |
582 | * it, and it's too late to turn on interrupts here, so reset the |
583 | * coupled idle state of all cpus and retry. |
584 | */ |
585 | if (cpuidle_coupled_any_pokes_pending(coupled)) { |
586 | cpuidle_coupled_set_done(cpu: dev->cpu, coupled); |
587 | /* Wait for all cpus to see the pending pokes */ |
588 | cpuidle_coupled_parallel_barrier(dev, &coupled->abort_barrier); |
589 | goto reset; |
590 | } |
591 | |
592 | /* all cpus have acked the coupled state */ |
593 | next_state = cpuidle_coupled_get_state(dev, coupled); |
594 | |
595 | entered_state = cpuidle_enter_state(dev, drv, next_state); |
596 | |
597 | cpuidle_coupled_set_done(cpu: dev->cpu, coupled); |
598 | |
599 | out: |
600 | /* |
601 | * Normal cpuidle states are expected to return with irqs enabled. |
602 | * That leads to an inefficiency where a cpu receiving an interrupt |
603 | * that brings it out of idle will process that interrupt before |
604 | * exiting the idle enter function and decrementing ready_count. All |
605 | * other cpus will need to spin waiting for the cpu that is processing |
606 | * the interrupt. If the driver returns with interrupts disabled, |
607 | * all other cpus will loop back into the safe idle state instead of |
608 | * spinning, saving power. |
609 | * |
610 | * Calling local_irq_enable here allows coupled states to return with |
611 | * interrupts disabled, but won't cause problems for drivers that |
612 | * exit with interrupts enabled. |
613 | */ |
614 | local_irq_enable(); |
615 | |
616 | /* |
617 | * Wait until all coupled cpus have exited idle. There is no risk that |
618 | * a cpu exits and re-enters the ready state because this cpu has |
619 | * already decremented its waiting_count. |
620 | */ |
621 | while (!cpuidle_coupled_no_cpus_ready(coupled)) |
622 | cpu_relax(); |
623 | |
624 | return entered_state; |
625 | } |
626 | |
627 | static void cpuidle_coupled_update_online_cpus(struct cpuidle_coupled *coupled) |
628 | { |
629 | cpumask_t cpus; |
630 | cpumask_and(dstp: &cpus, cpu_online_mask, src2p: &coupled->coupled_cpus); |
631 | coupled->online_count = cpumask_weight(srcp: &cpus); |
632 | } |
633 | |
634 | /** |
635 | * cpuidle_coupled_register_device - register a coupled cpuidle device |
636 | * @dev: struct cpuidle_device for the current cpu |
637 | * |
638 | * Called from cpuidle_register_device to handle coupled idle init. Finds the |
639 | * cpuidle_coupled struct for this set of coupled cpus, or creates one if none |
640 | * exists yet. |
641 | */ |
642 | int cpuidle_coupled_register_device(struct cpuidle_device *dev) |
643 | { |
644 | int cpu; |
645 | struct cpuidle_device *other_dev; |
646 | call_single_data_t *csd; |
647 | struct cpuidle_coupled *coupled; |
648 | |
649 | if (cpumask_empty(srcp: &dev->coupled_cpus)) |
650 | return 0; |
651 | |
652 | for_each_cpu(cpu, &dev->coupled_cpus) { |
653 | other_dev = per_cpu(cpuidle_devices, cpu); |
654 | if (other_dev && other_dev->coupled) { |
655 | coupled = other_dev->coupled; |
656 | goto have_coupled; |
657 | } |
658 | } |
659 | |
660 | /* No existing coupled info found, create a new one */ |
661 | coupled = kzalloc(size: sizeof(struct cpuidle_coupled), GFP_KERNEL); |
662 | if (!coupled) |
663 | return -ENOMEM; |
664 | |
665 | coupled->coupled_cpus = dev->coupled_cpus; |
666 | |
667 | have_coupled: |
668 | dev->coupled = coupled; |
669 | if (WARN_ON(!cpumask_equal(&dev->coupled_cpus, &coupled->coupled_cpus))) |
670 | coupled->prevent++; |
671 | |
672 | cpuidle_coupled_update_online_cpus(coupled); |
673 | |
674 | coupled->refcnt++; |
675 | |
676 | csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu); |
677 | INIT_CSD(csd, cpuidle_coupled_handle_poke, (void *)(unsigned long)dev->cpu); |
678 | |
679 | return 0; |
680 | } |
681 | |
682 | /** |
683 | * cpuidle_coupled_unregister_device - unregister a coupled cpuidle device |
684 | * @dev: struct cpuidle_device for the current cpu |
685 | * |
686 | * Called from cpuidle_unregister_device to tear down coupled idle. Removes the |
687 | * cpu from the coupled idle set, and frees the cpuidle_coupled_info struct if |
688 | * this was the last cpu in the set. |
689 | */ |
690 | void cpuidle_coupled_unregister_device(struct cpuidle_device *dev) |
691 | { |
692 | struct cpuidle_coupled *coupled = dev->coupled; |
693 | |
694 | if (cpumask_empty(srcp: &dev->coupled_cpus)) |
695 | return; |
696 | |
697 | if (--coupled->refcnt) |
698 | kfree(objp: coupled); |
699 | dev->coupled = NULL; |
700 | } |
701 | |
702 | /** |
703 | * cpuidle_coupled_prevent_idle - prevent cpus from entering a coupled state |
704 | * @coupled: the struct coupled that contains the cpu that is changing state |
705 | * |
706 | * Disables coupled cpuidle on a coupled set of cpus. Used to ensure that |
707 | * cpu_online_mask doesn't change while cpus are coordinating coupled idle. |
708 | */ |
709 | static void cpuidle_coupled_prevent_idle(struct cpuidle_coupled *coupled) |
710 | { |
711 | int cpu = get_cpu(); |
712 | |
713 | /* Force all cpus out of the waiting loop. */ |
714 | coupled->prevent++; |
715 | cpuidle_coupled_poke_others(this_cpu: cpu, coupled); |
716 | put_cpu(); |
717 | while (!cpuidle_coupled_no_cpus_waiting(coupled)) |
718 | cpu_relax(); |
719 | } |
720 | |
721 | /** |
722 | * cpuidle_coupled_allow_idle - allows cpus to enter a coupled state |
723 | * @coupled: the struct coupled that contains the cpu that is changing state |
724 | * |
725 | * Enables coupled cpuidle on a coupled set of cpus. Used to ensure that |
726 | * cpu_online_mask doesn't change while cpus are coordinating coupled idle. |
727 | */ |
728 | static void cpuidle_coupled_allow_idle(struct cpuidle_coupled *coupled) |
729 | { |
730 | int cpu = get_cpu(); |
731 | |
732 | /* |
733 | * Write barrier ensures readers see the new online_count when they |
734 | * see prevent == 0. |
735 | */ |
736 | smp_wmb(); |
737 | coupled->prevent--; |
738 | /* Force cpus out of the prevent loop. */ |
739 | cpuidle_coupled_poke_others(this_cpu: cpu, coupled); |
740 | put_cpu(); |
741 | } |
742 | |
743 | static int coupled_cpu_online(unsigned int cpu) |
744 | { |
745 | struct cpuidle_device *dev; |
746 | |
747 | mutex_lock(&cpuidle_lock); |
748 | |
749 | dev = per_cpu(cpuidle_devices, cpu); |
750 | if (dev && dev->coupled) { |
751 | cpuidle_coupled_update_online_cpus(coupled: dev->coupled); |
752 | cpuidle_coupled_allow_idle(coupled: dev->coupled); |
753 | } |
754 | |
755 | mutex_unlock(lock: &cpuidle_lock); |
756 | return 0; |
757 | } |
758 | |
759 | static int coupled_cpu_up_prepare(unsigned int cpu) |
760 | { |
761 | struct cpuidle_device *dev; |
762 | |
763 | mutex_lock(&cpuidle_lock); |
764 | |
765 | dev = per_cpu(cpuidle_devices, cpu); |
766 | if (dev && dev->coupled) |
767 | cpuidle_coupled_prevent_idle(coupled: dev->coupled); |
768 | |
769 | mutex_unlock(lock: &cpuidle_lock); |
770 | return 0; |
771 | } |
772 | |
773 | static int __init cpuidle_coupled_init(void) |
774 | { |
775 | int ret; |
776 | |
777 | ret = cpuhp_setup_state_nocalls(state: CPUHP_CPUIDLE_COUPLED_PREPARE, |
778 | name: "cpuidle/coupled:prepare" , |
779 | startup: coupled_cpu_up_prepare, |
780 | teardown: coupled_cpu_online); |
781 | if (ret) |
782 | return ret; |
783 | ret = cpuhp_setup_state_nocalls(state: CPUHP_AP_ONLINE_DYN, |
784 | name: "cpuidle/coupled:online" , |
785 | startup: coupled_cpu_online, |
786 | teardown: coupled_cpu_up_prepare); |
787 | if (ret < 0) |
788 | cpuhp_remove_state_nocalls(state: CPUHP_CPUIDLE_COUPLED_PREPARE); |
789 | return ret; |
790 | } |
791 | core_initcall(cpuidle_coupled_init); |
792 | |