1 | // SPDX-License-Identifier: GPL-2.0 |
2 | // Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> |
3 | #define pr_fmt(fmt) "irq_timings: " fmt |
4 | |
5 | #include <linux/kernel.h> |
6 | #include <linux/percpu.h> |
7 | #include <linux/slab.h> |
8 | #include <linux/static_key.h> |
9 | #include <linux/init.h> |
10 | #include <linux/interrupt.h> |
11 | #include <linux/idr.h> |
12 | #include <linux/irq.h> |
13 | #include <linux/math64.h> |
14 | #include <linux/log2.h> |
15 | |
16 | #include <trace/events/irq.h> |
17 | |
18 | #include "internals.h" |
19 | |
20 | DEFINE_STATIC_KEY_FALSE(irq_timing_enabled); |
21 | |
22 | DEFINE_PER_CPU(struct irq_timings, irq_timings); |
23 | |
24 | static DEFINE_IDR(irqt_stats); |
25 | |
26 | void irq_timings_enable(void) |
27 | { |
28 | static_branch_enable(&irq_timing_enabled); |
29 | } |
30 | |
31 | void irq_timings_disable(void) |
32 | { |
33 | static_branch_disable(&irq_timing_enabled); |
34 | } |
35 | |
36 | /* |
37 | * The main goal of this algorithm is to predict the next interrupt |
38 | * occurrence on the current CPU. |
39 | * |
40 | * Currently, the interrupt timings are stored in a circular array |
41 | * buffer every time there is an interrupt, as a tuple: the interrupt |
42 | * number and the associated timestamp when the event occurred <irq, |
43 | * timestamp>. |
44 | * |
45 | * For every interrupt occurring in a short period of time, we can |
46 | * measure the elapsed time between the occurrences for the same |
47 | * interrupt and we end up with a suite of intervals. The experience |
48 | * showed the interrupts are often coming following a periodic |
49 | * pattern. |
50 | * |
51 | * The objective of the algorithm is to find out this periodic pattern |
52 | * in a fastest way and use its period to predict the next irq event. |
53 | * |
54 | * When the next interrupt event is requested, we are in the situation |
55 | * where the interrupts are disabled and the circular buffer |
56 | * containing the timings is filled with the events which happened |
57 | * after the previous next-interrupt-event request. |
58 | * |
59 | * At this point, we read the circular buffer and we fill the irq |
60 | * related statistics structure. After this step, the circular array |
61 | * containing the timings is empty because all the values are |
62 | * dispatched in their corresponding buffers. |
63 | * |
64 | * Now for each interrupt, we can predict the next event by using the |
65 | * suffix array, log interval and exponential moving average |
66 | * |
67 | * 1. Suffix array |
68 | * |
69 | * Suffix array is an array of all the suffixes of a string. It is |
70 | * widely used as a data structure for compression, text search, ... |
71 | * For instance for the word 'banana', the suffixes will be: 'banana' |
72 | * 'anana' 'nana' 'ana' 'na' 'a' |
73 | * |
74 | * Usually, the suffix array is sorted but for our purpose it is |
75 | * not necessary and won't provide any improvement in the context of |
76 | * the solved problem where we clearly define the boundaries of the |
77 | * search by a max period and min period. |
78 | * |
79 | * The suffix array will build a suite of intervals of different |
80 | * length and will look for the repetition of each suite. If the suite |
81 | * is repeating then we have the period because it is the length of |
82 | * the suite whatever its position in the buffer. |
83 | * |
84 | * 2. Log interval |
85 | * |
86 | * We saw the irq timings allow to compute the interval of the |
87 | * occurrences for a specific interrupt. We can reasonably assume the |
88 | * longer is the interval, the higher is the error for the next event |
89 | * and we can consider storing those interval values into an array |
90 | * where each slot in the array correspond to an interval at the power |
91 | * of 2 of the index. For example, index 12 will contain values |
92 | * between 2^11 and 2^12. |
93 | * |
94 | * At the end we have an array of values where at each index defines a |
95 | * [2^index - 1, 2 ^ index] interval values allowing to store a large |
96 | * number of values inside a small array. |
97 | * |
98 | * For example, if we have the value 1123, then we store it at |
99 | * ilog2(1123) = 10 index value. |
100 | * |
101 | * Storing those value at the specific index is done by computing an |
102 | * exponential moving average for this specific slot. For instance, |
103 | * for values 1800, 1123, 1453, ... fall under the same slot (10) and |
104 | * the exponential moving average is computed every time a new value |
105 | * is stored at this slot. |
106 | * |
107 | * 3. Exponential Moving Average |
108 | * |
109 | * The EMA is largely used to track a signal for stocks or as a low |
110 | * pass filter. The magic of the formula, is it is very simple and the |
111 | * reactivity of the average can be tuned with the factors called |
112 | * alpha. |
113 | * |
114 | * The higher the alphas are, the faster the average respond to the |
115 | * signal change. In our case, if a slot in the array is a big |
116 | * interval, we can have numbers with a big difference between |
117 | * them. The impact of those differences in the average computation |
118 | * can be tuned by changing the alpha value. |
119 | * |
120 | * |
121 | * -- The algorithm -- |
122 | * |
123 | * We saw the different processing above, now let's see how they are |
124 | * used together. |
125 | * |
126 | * For each interrupt: |
127 | * For each interval: |
128 | * Compute the index = ilog2(interval) |
129 | * Compute a new_ema(buffer[index], interval) |
130 | * Store the index in a circular buffer |
131 | * |
132 | * Compute the suffix array of the indexes |
133 | * |
134 | * For each suffix: |
135 | * If the suffix is reverse-found 3 times |
136 | * Return suffix |
137 | * |
138 | * Return Not found |
139 | * |
140 | * However we can not have endless suffix array to be build, it won't |
141 | * make sense and it will add an extra overhead, so we can restrict |
142 | * this to a maximum suffix length of 5 and a minimum suffix length of |
143 | * 2. The experience showed 5 is the majority of the maximum pattern |
144 | * period found for different devices. |
145 | * |
146 | * The result is a pattern finding less than 1us for an interrupt. |
147 | * |
148 | * Example based on real values: |
149 | * |
150 | * Example 1 : MMC write/read interrupt interval: |
151 | * |
152 | * 223947, 1240, 1384, 1386, 1386, |
153 | * 217416, 1236, 1384, 1386, 1387, |
154 | * 214719, 1241, 1386, 1387, 1384, |
155 | * 213696, 1234, 1384, 1386, 1388, |
156 | * 219904, 1240, 1385, 1389, 1385, |
157 | * 212240, 1240, 1386, 1386, 1386, |
158 | * 214415, 1236, 1384, 1386, 1387, |
159 | * 214276, 1234, 1384, 1388, ? |
160 | * |
161 | * For each element, apply ilog2(value) |
162 | * |
163 | * 15, 8, 8, 8, 8, |
164 | * 15, 8, 8, 8, 8, |
165 | * 15, 8, 8, 8, 8, |
166 | * 15, 8, 8, 8, 8, |
167 | * 15, 8, 8, 8, 8, |
168 | * 15, 8, 8, 8, 8, |
169 | * 15, 8, 8, 8, 8, |
170 | * 15, 8, 8, 8, ? |
171 | * |
172 | * Max period of 5, we take the last (max_period * 3) 15 elements as |
173 | * we can be confident if the pattern repeats itself three times it is |
174 | * a repeating pattern. |
175 | * |
176 | * 8, |
177 | * 15, 8, 8, 8, 8, |
178 | * 15, 8, 8, 8, 8, |
179 | * 15, 8, 8, 8, ? |
180 | * |
181 | * Suffixes are: |
182 | * |
183 | * 1) 8, 15, 8, 8, 8 <- max period |
184 | * 2) 8, 15, 8, 8 |
185 | * 3) 8, 15, 8 |
186 | * 4) 8, 15 <- min period |
187 | * |
188 | * From there we search the repeating pattern for each suffix. |
189 | * |
190 | * buffer: 8, 15, 8, 8, 8, 8, 15, 8, 8, 8, 8, 15, 8, 8, 8 |
191 | * | | | | | | | | | | | | | | | |
192 | * 8, 15, 8, 8, 8 | | | | | | | | | | |
193 | * 8, 15, 8, 8, 8 | | | | | |
194 | * 8, 15, 8, 8, 8 |
195 | * |
196 | * When moving the suffix, we found exactly 3 matches. |
197 | * |
198 | * The first suffix with period 5 is repeating. |
199 | * |
200 | * The next event is (3 * max_period) % suffix_period |
201 | * |
202 | * In this example, the result 0, so the next event is suffix[0] => 8 |
203 | * |
204 | * However, 8 is the index in the array of exponential moving average |
205 | * which was calculated on the fly when storing the values, so the |
206 | * interval is ema[8] = 1366 |
207 | * |
208 | * |
209 | * Example 2: |
210 | * |
211 | * 4, 3, 5, 100, |
212 | * 3, 3, 5, 117, |
213 | * 4, 4, 5, 112, |
214 | * 4, 3, 4, 110, |
215 | * 3, 5, 3, 117, |
216 | * 4, 4, 5, 112, |
217 | * 4, 3, 4, 110, |
218 | * 3, 4, 5, 112, |
219 | * 4, 3, 4, 110 |
220 | * |
221 | * ilog2 |
222 | * |
223 | * 0, 0, 0, 4, |
224 | * 0, 0, 0, 4, |
225 | * 0, 0, 0, 4, |
226 | * 0, 0, 0, 4, |
227 | * 0, 0, 0, 4, |
228 | * 0, 0, 0, 4, |
229 | * 0, 0, 0, 4, |
230 | * 0, 0, 0, 4, |
231 | * 0, 0, 0, 4 |
232 | * |
233 | * Max period 5: |
234 | * 0, 0, 4, |
235 | * 0, 0, 0, 4, |
236 | * 0, 0, 0, 4, |
237 | * 0, 0, 0, 4 |
238 | * |
239 | * Suffixes: |
240 | * |
241 | * 1) 0, 0, 4, 0, 0 |
242 | * 2) 0, 0, 4, 0 |
243 | * 3) 0, 0, 4 |
244 | * 4) 0, 0 |
245 | * |
246 | * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4 |
247 | * | | | | | | X |
248 | * 0, 0, 4, 0, 0, | X |
249 | * 0, 0 |
250 | * |
251 | * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4 |
252 | * | | | | | | | | | | | | | | | |
253 | * 0, 0, 4, 0, | | | | | | | | | | | |
254 | * 0, 0, 4, 0, | | | | | | | |
255 | * 0, 0, 4, 0, | | | |
256 | * 0 0 4 |
257 | * |
258 | * Pattern is found 3 times, the remaining is 1 which results from |
259 | * (max_period * 3) % suffix_period. This value is the index in the |
260 | * suffix arrays. The suffix array for a period 4 has the value 4 |
261 | * at index 1. |
262 | */ |
263 | #define EMA_ALPHA_VAL 64 |
264 | #define EMA_ALPHA_SHIFT 7 |
265 | |
266 | #define PREDICTION_PERIOD_MIN 3 |
267 | #define PREDICTION_PERIOD_MAX 5 |
268 | #define PREDICTION_FACTOR 4 |
269 | #define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */ |
270 | #define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */ |
271 | |
272 | /* |
273 | * Number of elements in the circular buffer: If it happens it was |
274 | * flushed before, then the number of elements could be smaller than |
275 | * IRQ_TIMINGS_SIZE, so the count is used, otherwise the array size is |
276 | * used as we wrapped. The index begins from zero when we did not |
277 | * wrap. That could be done in a nicer way with the proper circular |
278 | * array structure type but with the cost of extra computation in the |
279 | * interrupt handler hot path. We choose efficiency. |
280 | */ |
281 | #define for_each_irqts(i, irqts) \ |
282 | for (i = irqts->count < IRQ_TIMINGS_SIZE ? \ |
283 | 0 : irqts->count & IRQ_TIMINGS_MASK, \ |
284 | irqts->count = min(IRQ_TIMINGS_SIZE, \ |
285 | irqts->count); \ |
286 | irqts->count > 0; irqts->count--, \ |
287 | i = (i + 1) & IRQ_TIMINGS_MASK) |
288 | |
289 | struct irqt_stat { |
290 | u64 last_ts; |
291 | u64 ema_time[PREDICTION_BUFFER_SIZE]; |
292 | int timings[IRQ_TIMINGS_SIZE]; |
293 | int circ_timings[IRQ_TIMINGS_SIZE]; |
294 | int count; |
295 | }; |
296 | |
297 | /* |
298 | * Exponential moving average computation |
299 | */ |
300 | static u64 irq_timings_ema_new(u64 value, u64 ema_old) |
301 | { |
302 | s64 diff; |
303 | |
304 | if (unlikely(!ema_old)) |
305 | return value; |
306 | |
307 | diff = (value - ema_old) * EMA_ALPHA_VAL; |
308 | /* |
309 | * We can use a s64 type variable to be added with the u64 |
310 | * ema_old variable as this one will never have its topmost |
311 | * bit set, it will be always smaller than 2^63 nanosec |
312 | * interrupt interval (292 years). |
313 | */ |
314 | return ema_old + (diff >> EMA_ALPHA_SHIFT); |
315 | } |
316 | |
317 | static int irq_timings_next_event_index(int *buffer, size_t len, int period_max) |
318 | { |
319 | int period; |
320 | |
321 | /* |
322 | * Move the beginning pointer to the end minus the max period x 3. |
323 | * We are at the point we can begin searching the pattern |
324 | */ |
325 | buffer = &buffer[len - (period_max * 3)]; |
326 | |
327 | /* Adjust the length to the maximum allowed period x 3 */ |
328 | len = period_max * 3; |
329 | |
330 | /* |
331 | * The buffer contains the suite of intervals, in a ilog2 |
332 | * basis, we are looking for a repetition. We point the |
333 | * beginning of the search three times the length of the |
334 | * period beginning at the end of the buffer. We do that for |
335 | * each suffix. |
336 | */ |
337 | for (period = period_max; period >= PREDICTION_PERIOD_MIN; period--) { |
338 | |
339 | /* |
340 | * The first comparison always succeed because the |
341 | * suffix is deduced from the first n-period bytes of |
342 | * the buffer and we compare the initial suffix with |
343 | * itself, so we can skip the first iteration. |
344 | */ |
345 | int idx = period; |
346 | size_t size = period; |
347 | |
348 | /* |
349 | * We look if the suite with period 'i' repeat |
350 | * itself. If it is truncated at the end, as it |
351 | * repeats we can use the period to find out the next |
352 | * element with the modulo. |
353 | */ |
354 | while (!memcmp(p: buffer, q: &buffer[idx], size: size * sizeof(int))) { |
355 | |
356 | /* |
357 | * Move the index in a period basis |
358 | */ |
359 | idx += size; |
360 | |
361 | /* |
362 | * If this condition is reached, all previous |
363 | * memcmp were successful, so the period is |
364 | * found. |
365 | */ |
366 | if (idx == len) |
367 | return buffer[len % period]; |
368 | |
369 | /* |
370 | * If the remaining elements to compare are |
371 | * smaller than the period, readjust the size |
372 | * of the comparison for the last iteration. |
373 | */ |
374 | if (len - idx < period) |
375 | size = len - idx; |
376 | } |
377 | } |
378 | |
379 | return -1; |
380 | } |
381 | |
382 | static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now) |
383 | { |
384 | int index, i, period_max, count, start, min = INT_MAX; |
385 | |
386 | if ((now - irqs->last_ts) >= NSEC_PER_SEC) { |
387 | irqs->count = irqs->last_ts = 0; |
388 | return U64_MAX; |
389 | } |
390 | |
391 | /* |
392 | * As we want to find three times the repetition, we need a |
393 | * number of intervals greater or equal to three times the |
394 | * maximum period, otherwise we truncate the max period. |
395 | */ |
396 | period_max = irqs->count > (3 * PREDICTION_PERIOD_MAX) ? |
397 | PREDICTION_PERIOD_MAX : irqs->count / 3; |
398 | |
399 | /* |
400 | * If we don't have enough irq timings for this prediction, |
401 | * just bail out. |
402 | */ |
403 | if (period_max <= PREDICTION_PERIOD_MIN) |
404 | return U64_MAX; |
405 | |
406 | /* |
407 | * 'count' will depends if the circular buffer wrapped or not |
408 | */ |
409 | count = irqs->count < IRQ_TIMINGS_SIZE ? |
410 | irqs->count : IRQ_TIMINGS_SIZE; |
411 | |
412 | start = irqs->count < IRQ_TIMINGS_SIZE ? |
413 | 0 : (irqs->count & IRQ_TIMINGS_MASK); |
414 | |
415 | /* |
416 | * Copy the content of the circular buffer into another buffer |
417 | * in order to linearize the buffer instead of dealing with |
418 | * wrapping indexes and shifted array which will be prone to |
419 | * error and extremely difficult to debug. |
420 | */ |
421 | for (i = 0; i < count; i++) { |
422 | int index = (start + i) & IRQ_TIMINGS_MASK; |
423 | |
424 | irqs->timings[i] = irqs->circ_timings[index]; |
425 | min = min_t(int, irqs->timings[i], min); |
426 | } |
427 | |
428 | index = irq_timings_next_event_index(buffer: irqs->timings, len: count, period_max); |
429 | if (index < 0) |
430 | return irqs->last_ts + irqs->ema_time[min]; |
431 | |
432 | return irqs->last_ts + irqs->ema_time[index]; |
433 | } |
434 | |
435 | static __always_inline int irq_timings_interval_index(u64 interval) |
436 | { |
437 | /* |
438 | * The PREDICTION_FACTOR increase the interval size for the |
439 | * array of exponential average. |
440 | */ |
441 | u64 interval_us = (interval >> 10) / PREDICTION_FACTOR; |
442 | |
443 | return likely(interval_us) ? ilog2(interval_us) : 0; |
444 | } |
445 | |
446 | static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs, |
447 | u64 interval) |
448 | { |
449 | int index; |
450 | |
451 | /* |
452 | * Get the index in the ema table for this interrupt. |
453 | */ |
454 | index = irq_timings_interval_index(interval); |
455 | |
456 | if (index > PREDICTION_BUFFER_SIZE - 1) { |
457 | irqs->count = 0; |
458 | return; |
459 | } |
460 | |
461 | /* |
462 | * Store the index as an element of the pattern in another |
463 | * circular array. |
464 | */ |
465 | irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index; |
466 | |
467 | irqs->ema_time[index] = irq_timings_ema_new(value: interval, |
468 | ema_old: irqs->ema_time[index]); |
469 | |
470 | irqs->count++; |
471 | } |
472 | |
473 | static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) |
474 | { |
475 | u64 old_ts = irqs->last_ts; |
476 | u64 interval; |
477 | |
478 | /* |
479 | * The timestamps are absolute time values, we need to compute |
480 | * the timing interval between two interrupts. |
481 | */ |
482 | irqs->last_ts = ts; |
483 | |
484 | /* |
485 | * The interval type is u64 in order to deal with the same |
486 | * type in our computation, that prevent mindfuck issues with |
487 | * overflow, sign and division. |
488 | */ |
489 | interval = ts - old_ts; |
490 | |
491 | /* |
492 | * The interrupt triggered more than one second apart, that |
493 | * ends the sequence as predictable for our purpose. In this |
494 | * case, assume we have the beginning of a sequence and the |
495 | * timestamp is the first value. As it is impossible to |
496 | * predict anything at this point, return. |
497 | * |
498 | * Note the first timestamp of the sequence will always fall |
499 | * in this test because the old_ts is zero. That is what we |
500 | * want as we need another timestamp to compute an interval. |
501 | */ |
502 | if (interval >= NSEC_PER_SEC) { |
503 | irqs->count = 0; |
504 | return; |
505 | } |
506 | |
507 | __irq_timings_store(irq, irqs, interval); |
508 | } |
509 | |
510 | /** |
511 | * irq_timings_next_event - Return when the next event is supposed to arrive |
512 | * |
513 | * During the last busy cycle, the number of interrupts is incremented |
514 | * and stored in the irq_timings structure. This information is |
515 | * necessary to: |
516 | * |
517 | * - know if the index in the table wrapped up: |
518 | * |
519 | * If more than the array size interrupts happened during the |
520 | * last busy/idle cycle, the index wrapped up and we have to |
521 | * begin with the next element in the array which is the last one |
522 | * in the sequence, otherwise it is at the index 0. |
523 | * |
524 | * - have an indication of the interrupts activity on this CPU |
525 | * (eg. irq/sec) |
526 | * |
527 | * The values are 'consumed' after inserting in the statistical model, |
528 | * thus the count is reinitialized. |
529 | * |
530 | * The array of values **must** be browsed in the time direction, the |
531 | * timestamp must increase between an element and the next one. |
532 | * |
533 | * Returns a nanosec time based estimation of the earliest interrupt, |
534 | * U64_MAX otherwise. |
535 | */ |
536 | u64 irq_timings_next_event(u64 now) |
537 | { |
538 | struct irq_timings *irqts = this_cpu_ptr(&irq_timings); |
539 | struct irqt_stat *irqs; |
540 | struct irqt_stat __percpu *s; |
541 | u64 ts, next_evt = U64_MAX; |
542 | int i, irq = 0; |
543 | |
544 | /* |
545 | * This function must be called with the local irq disabled in |
546 | * order to prevent the timings circular buffer to be updated |
547 | * while we are reading it. |
548 | */ |
549 | lockdep_assert_irqs_disabled(); |
550 | |
551 | if (!irqts->count) |
552 | return next_evt; |
553 | |
554 | /* |
555 | * Number of elements in the circular buffer: If it happens it |
556 | * was flushed before, then the number of elements could be |
557 | * smaller than IRQ_TIMINGS_SIZE, so the count is used, |
558 | * otherwise the array size is used as we wrapped. The index |
559 | * begins from zero when we did not wrap. That could be done |
560 | * in a nicer way with the proper circular array structure |
561 | * type but with the cost of extra computation in the |
562 | * interrupt handler hot path. We choose efficiency. |
563 | * |
564 | * Inject measured irq/timestamp to the pattern prediction |
565 | * model while decrementing the counter because we consume the |
566 | * data from our circular buffer. |
567 | */ |
568 | for_each_irqts(i, irqts) { |
569 | irq = irq_timing_decode(irqts->values[i], &ts); |
570 | s = idr_find(&irqt_stats, id: irq); |
571 | if (s) |
572 | irq_timings_store(irq, this_cpu_ptr(s), ts); |
573 | } |
574 | |
575 | /* |
576 | * Look in the list of interrupts' statistics, the earliest |
577 | * next event. |
578 | */ |
579 | idr_for_each_entry(&irqt_stats, s, i) { |
580 | |
581 | irqs = this_cpu_ptr(s); |
582 | |
583 | ts = __irq_timings_next_event(irqs, irq: i, now); |
584 | if (ts <= now) |
585 | return now; |
586 | |
587 | if (ts < next_evt) |
588 | next_evt = ts; |
589 | } |
590 | |
591 | return next_evt; |
592 | } |
593 | |
594 | void irq_timings_free(int irq) |
595 | { |
596 | struct irqt_stat __percpu *s; |
597 | |
598 | s = idr_find(&irqt_stats, id: irq); |
599 | if (s) { |
600 | free_percpu(pdata: s); |
601 | idr_remove(&irqt_stats, id: irq); |
602 | } |
603 | } |
604 | |
605 | int irq_timings_alloc(int irq) |
606 | { |
607 | struct irqt_stat __percpu *s; |
608 | int id; |
609 | |
610 | /* |
611 | * Some platforms can have the same private interrupt per cpu, |
612 | * so this function may be called several times with the |
613 | * same interrupt number. Just bail out in case the per cpu |
614 | * stat structure is already allocated. |
615 | */ |
616 | s = idr_find(&irqt_stats, id: irq); |
617 | if (s) |
618 | return 0; |
619 | |
620 | s = alloc_percpu(*s); |
621 | if (!s) |
622 | return -ENOMEM; |
623 | |
624 | idr_preload(GFP_KERNEL); |
625 | id = idr_alloc(&irqt_stats, ptr: s, start: irq, end: irq + 1, GFP_NOWAIT); |
626 | idr_preload_end(); |
627 | |
628 | if (id < 0) { |
629 | free_percpu(pdata: s); |
630 | return id; |
631 | } |
632 | |
633 | return 0; |
634 | } |
635 | |
636 | #ifdef CONFIG_TEST_IRQ_TIMINGS |
637 | struct timings_intervals { |
638 | u64 *intervals; |
639 | size_t count; |
640 | }; |
641 | |
642 | /* |
643 | * Intervals are given in nanosecond base |
644 | */ |
645 | static u64 intervals0[] __initdata = { |
646 | 10000, 50000, 200000, 500000, |
647 | 10000, 50000, 200000, 500000, |
648 | 10000, 50000, 200000, 500000, |
649 | 10000, 50000, 200000, 500000, |
650 | 10000, 50000, 200000, 500000, |
651 | 10000, 50000, 200000, 500000, |
652 | 10000, 50000, 200000, 500000, |
653 | 10000, 50000, 200000, 500000, |
654 | 10000, 50000, 200000, |
655 | }; |
656 | |
657 | static u64 intervals1[] __initdata = { |
658 | 223947000, 1240000, 1384000, 1386000, 1386000, |
659 | 217416000, 1236000, 1384000, 1386000, 1387000, |
660 | 214719000, 1241000, 1386000, 1387000, 1384000, |
661 | 213696000, 1234000, 1384000, 1386000, 1388000, |
662 | 219904000, 1240000, 1385000, 1389000, 1385000, |
663 | 212240000, 1240000, 1386000, 1386000, 1386000, |
664 | 214415000, 1236000, 1384000, 1386000, 1387000, |
665 | 214276000, 1234000, |
666 | }; |
667 | |
668 | static u64 intervals2[] __initdata = { |
669 | 4000, 3000, 5000, 100000, |
670 | 3000, 3000, 5000, 117000, |
671 | 4000, 4000, 5000, 112000, |
672 | 4000, 3000, 4000, 110000, |
673 | 3000, 5000, 3000, 117000, |
674 | 4000, 4000, 5000, 112000, |
675 | 4000, 3000, 4000, 110000, |
676 | 3000, 4000, 5000, 112000, |
677 | 4000, |
678 | }; |
679 | |
680 | static u64 intervals3[] __initdata = { |
681 | 1385000, 212240000, 1240000, |
682 | 1386000, 214415000, 1236000, |
683 | 1384000, 214276000, 1234000, |
684 | 1386000, 214415000, 1236000, |
685 | 1385000, 212240000, 1240000, |
686 | 1386000, 214415000, 1236000, |
687 | 1384000, 214276000, 1234000, |
688 | 1386000, 214415000, 1236000, |
689 | 1385000, 212240000, 1240000, |
690 | }; |
691 | |
692 | static u64 intervals4[] __initdata = { |
693 | 10000, 50000, 10000, 50000, |
694 | 10000, 50000, 10000, 50000, |
695 | 10000, 50000, 10000, 50000, |
696 | 10000, 50000, 10000, 50000, |
697 | 10000, 50000, 10000, 50000, |
698 | 10000, 50000, 10000, 50000, |
699 | 10000, 50000, 10000, 50000, |
700 | 10000, 50000, 10000, 50000, |
701 | 10000, |
702 | }; |
703 | |
704 | static struct timings_intervals tis[] __initdata = { |
705 | { intervals0, ARRAY_SIZE(intervals0) }, |
706 | { intervals1, ARRAY_SIZE(intervals1) }, |
707 | { intervals2, ARRAY_SIZE(intervals2) }, |
708 | { intervals3, ARRAY_SIZE(intervals3) }, |
709 | { intervals4, ARRAY_SIZE(intervals4) }, |
710 | }; |
711 | |
712 | static int __init irq_timings_test_next_index(struct timings_intervals *ti) |
713 | { |
714 | int _buffer[IRQ_TIMINGS_SIZE]; |
715 | int buffer[IRQ_TIMINGS_SIZE]; |
716 | int index, start, i, count, period_max; |
717 | |
718 | count = ti->count - 1; |
719 | |
720 | period_max = count > (3 * PREDICTION_PERIOD_MAX) ? |
721 | PREDICTION_PERIOD_MAX : count / 3; |
722 | |
723 | /* |
724 | * Inject all values except the last one which will be used |
725 | * to compare with the next index result. |
726 | */ |
727 | pr_debug("index suite: " ); |
728 | |
729 | for (i = 0; i < count; i++) { |
730 | index = irq_timings_interval_index(ti->intervals[i]); |
731 | _buffer[i & IRQ_TIMINGS_MASK] = index; |
732 | pr_cont("%d " , index); |
733 | } |
734 | |
735 | start = count < IRQ_TIMINGS_SIZE ? 0 : |
736 | count & IRQ_TIMINGS_MASK; |
737 | |
738 | count = min_t(int, count, IRQ_TIMINGS_SIZE); |
739 | |
740 | for (i = 0; i < count; i++) { |
741 | int index = (start + i) & IRQ_TIMINGS_MASK; |
742 | buffer[i] = _buffer[index]; |
743 | } |
744 | |
745 | index = irq_timings_next_event_index(buffer, count, period_max); |
746 | i = irq_timings_interval_index(ti->intervals[ti->count - 1]); |
747 | |
748 | if (index != i) { |
749 | pr_err("Expected (%d) and computed (%d) next indexes differ\n" , |
750 | i, index); |
751 | return -EINVAL; |
752 | } |
753 | |
754 | return 0; |
755 | } |
756 | |
757 | static int __init irq_timings_next_index_selftest(void) |
758 | { |
759 | int i, ret; |
760 | |
761 | for (i = 0; i < ARRAY_SIZE(tis); i++) { |
762 | |
763 | pr_info("---> Injecting intervals number #%d (count=%zd)\n" , |
764 | i, tis[i].count); |
765 | |
766 | ret = irq_timings_test_next_index(&tis[i]); |
767 | if (ret) |
768 | break; |
769 | } |
770 | |
771 | return ret; |
772 | } |
773 | |
774 | static int __init irq_timings_test_irqs(struct timings_intervals *ti) |
775 | { |
776 | struct irqt_stat __percpu *s; |
777 | struct irqt_stat *irqs; |
778 | int i, index, ret, irq = 0xACE5; |
779 | |
780 | ret = irq_timings_alloc(irq); |
781 | if (ret) { |
782 | pr_err("Failed to allocate irq timings\n" ); |
783 | return ret; |
784 | } |
785 | |
786 | s = idr_find(&irqt_stats, irq); |
787 | if (!s) { |
788 | ret = -EIDRM; |
789 | goto out; |
790 | } |
791 | |
792 | irqs = this_cpu_ptr(s); |
793 | |
794 | for (i = 0; i < ti->count; i++) { |
795 | |
796 | index = irq_timings_interval_index(ti->intervals[i]); |
797 | pr_debug("%d: interval=%llu ema_index=%d\n" , |
798 | i, ti->intervals[i], index); |
799 | |
800 | __irq_timings_store(irq, irqs, ti->intervals[i]); |
801 | if (irqs->circ_timings[i & IRQ_TIMINGS_MASK] != index) { |
802 | ret = -EBADSLT; |
803 | pr_err("Failed to store in the circular buffer\n" ); |
804 | goto out; |
805 | } |
806 | } |
807 | |
808 | if (irqs->count != ti->count) { |
809 | ret = -ERANGE; |
810 | pr_err("Count differs\n" ); |
811 | goto out; |
812 | } |
813 | |
814 | ret = 0; |
815 | out: |
816 | irq_timings_free(irq); |
817 | |
818 | return ret; |
819 | } |
820 | |
821 | static int __init irq_timings_irqs_selftest(void) |
822 | { |
823 | int i, ret; |
824 | |
825 | for (i = 0; i < ARRAY_SIZE(tis); i++) { |
826 | pr_info("---> Injecting intervals number #%d (count=%zd)\n" , |
827 | i, tis[i].count); |
828 | ret = irq_timings_test_irqs(&tis[i]); |
829 | if (ret) |
830 | break; |
831 | } |
832 | |
833 | return ret; |
834 | } |
835 | |
836 | static int __init irq_timings_test_irqts(struct irq_timings *irqts, |
837 | unsigned count) |
838 | { |
839 | int start = count >= IRQ_TIMINGS_SIZE ? count - IRQ_TIMINGS_SIZE : 0; |
840 | int i, irq, oirq = 0xBEEF; |
841 | u64 ots = 0xDEAD, ts; |
842 | |
843 | /* |
844 | * Fill the circular buffer by using the dedicated function. |
845 | */ |
846 | for (i = 0; i < count; i++) { |
847 | pr_debug("%d: index=%d, ts=%llX irq=%X\n" , |
848 | i, i & IRQ_TIMINGS_MASK, ots + i, oirq + i); |
849 | |
850 | irq_timings_push(ots + i, oirq + i); |
851 | } |
852 | |
853 | /* |
854 | * Compute the first elements values after the index wrapped |
855 | * up or not. |
856 | */ |
857 | ots += start; |
858 | oirq += start; |
859 | |
860 | /* |
861 | * Test the circular buffer count is correct. |
862 | */ |
863 | pr_debug("---> Checking timings array count (%d) is right\n" , count); |
864 | if (WARN_ON(irqts->count != count)) |
865 | return -EINVAL; |
866 | |
867 | /* |
868 | * Test the macro allowing to browse all the irqts. |
869 | */ |
870 | pr_debug("---> Checking the for_each_irqts() macro\n" ); |
871 | for_each_irqts(i, irqts) { |
872 | |
873 | irq = irq_timing_decode(irqts->values[i], &ts); |
874 | |
875 | pr_debug("index=%d, ts=%llX / %llX, irq=%X / %X\n" , |
876 | i, ts, ots, irq, oirq); |
877 | |
878 | if (WARN_ON(ts != ots || irq != oirq)) |
879 | return -EINVAL; |
880 | |
881 | ots++; oirq++; |
882 | } |
883 | |
884 | /* |
885 | * The circular buffer should have be flushed when browsed |
886 | * with for_each_irqts |
887 | */ |
888 | pr_debug("---> Checking timings array is empty after browsing it\n" ); |
889 | if (WARN_ON(irqts->count)) |
890 | return -EINVAL; |
891 | |
892 | return 0; |
893 | } |
894 | |
895 | static int __init irq_timings_irqts_selftest(void) |
896 | { |
897 | struct irq_timings *irqts = this_cpu_ptr(&irq_timings); |
898 | int i, ret; |
899 | |
900 | /* |
901 | * Test the circular buffer with different number of |
902 | * elements. The purpose is to test at the limits (empty, half |
903 | * full, full, wrapped with the cursor at the boundaries, |
904 | * wrapped several times, etc ... |
905 | */ |
906 | int count[] = { 0, |
907 | IRQ_TIMINGS_SIZE >> 1, |
908 | IRQ_TIMINGS_SIZE, |
909 | IRQ_TIMINGS_SIZE + (IRQ_TIMINGS_SIZE >> 1), |
910 | 2 * IRQ_TIMINGS_SIZE, |
911 | (2 * IRQ_TIMINGS_SIZE) + 3, |
912 | }; |
913 | |
914 | for (i = 0; i < ARRAY_SIZE(count); i++) { |
915 | |
916 | pr_info("---> Checking the timings with %d/%d values\n" , |
917 | count[i], IRQ_TIMINGS_SIZE); |
918 | |
919 | ret = irq_timings_test_irqts(irqts, count[i]); |
920 | if (ret) |
921 | break; |
922 | } |
923 | |
924 | return ret; |
925 | } |
926 | |
927 | static int __init irq_timings_selftest(void) |
928 | { |
929 | int ret; |
930 | |
931 | pr_info("------------------- selftest start -----------------\n" ); |
932 | |
933 | /* |
934 | * At this point, we don't except any subsystem to use the irq |
935 | * timings but us, so it should not be enabled. |
936 | */ |
937 | if (static_branch_unlikely(&irq_timing_enabled)) { |
938 | pr_warn("irq timings already initialized, skipping selftest\n" ); |
939 | return 0; |
940 | } |
941 | |
942 | ret = irq_timings_irqts_selftest(); |
943 | if (ret) |
944 | goto out; |
945 | |
946 | ret = irq_timings_irqs_selftest(); |
947 | if (ret) |
948 | goto out; |
949 | |
950 | ret = irq_timings_next_index_selftest(); |
951 | out: |
952 | pr_info("---------- selftest end with %s -----------\n" , |
953 | ret ? "failure" : "success" ); |
954 | |
955 | return ret; |
956 | } |
957 | early_initcall(irq_timings_selftest); |
958 | #endif |
959 | |