1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * drivers/cpufreq/cpufreq_ondemand.c |
4 | * |
5 | * Copyright (C) 2001 Russell King |
6 | * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. |
7 | * Jun Nakajima <jun.nakajima@intel.com> |
8 | */ |
9 | |
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
11 | |
12 | #include <linux/cpu.h> |
13 | #include <linux/percpu-defs.h> |
14 | #include <linux/slab.h> |
15 | #include <linux/tick.h> |
16 | #include <linux/sched/cpufreq.h> |
17 | |
18 | #include "cpufreq_ondemand.h" |
19 | |
20 | /* On-demand governor macros */ |
21 | #define DEF_FREQUENCY_UP_THRESHOLD (80) |
22 | #define DEF_SAMPLING_DOWN_FACTOR (1) |
23 | #define MAX_SAMPLING_DOWN_FACTOR (100000) |
24 | #define MICRO_FREQUENCY_UP_THRESHOLD (95) |
25 | #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) |
26 | #define MIN_FREQUENCY_UP_THRESHOLD (1) |
27 | #define MAX_FREQUENCY_UP_THRESHOLD (100) |
28 | |
29 | static struct od_ops od_ops; |
30 | |
31 | static unsigned int default_powersave_bias; |
32 | |
33 | /* |
34 | * Not all CPUs want IO time to be accounted as busy; this depends on how |
35 | * efficient idling at a higher frequency/voltage is. |
36 | * Pavel Machek says this is not so for various generations of AMD and old |
37 | * Intel systems. |
38 | * Mike Chan (android.com) claims this is also not true for ARM. |
39 | * Because of this, whitelist specific known (series) of CPUs by default, and |
40 | * leave all others up to the user. |
41 | */ |
42 | static int should_io_be_busy(void) |
43 | { |
44 | #if defined(CONFIG_X86) |
45 | /* |
46 | * For Intel, Core 2 (model 15) and later have an efficient idle. |
47 | */ |
48 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && |
49 | boot_cpu_data.x86 == 6 && |
50 | boot_cpu_data.x86_model >= 15) |
51 | return 1; |
52 | #endif |
53 | return 0; |
54 | } |
55 | |
56 | /* |
57 | * Find right freq to be set now with powersave_bias on. |
58 | * Returns the freq_hi to be used right now and will set freq_hi_delay_us, |
59 | * freq_lo, and freq_lo_delay_us in percpu area for averaging freqs. |
60 | */ |
61 | static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, |
62 | unsigned int freq_next, unsigned int relation) |
63 | { |
64 | unsigned int freq_req, freq_reduc, freq_avg; |
65 | unsigned int freq_hi, freq_lo; |
66 | unsigned int index; |
67 | unsigned int delay_hi_us; |
68 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
69 | struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); |
70 | struct dbs_data *dbs_data = policy_dbs->dbs_data; |
71 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; |
72 | struct cpufreq_frequency_table *freq_table = policy->freq_table; |
73 | |
74 | if (!freq_table) { |
75 | dbs_info->freq_lo = 0; |
76 | dbs_info->freq_lo_delay_us = 0; |
77 | return freq_next; |
78 | } |
79 | |
80 | index = cpufreq_frequency_table_target(policy, target_freq: freq_next, relation); |
81 | freq_req = freq_table[index].frequency; |
82 | freq_reduc = freq_req * od_tuners->powersave_bias / 1000; |
83 | freq_avg = freq_req - freq_reduc; |
84 | |
85 | /* Find freq bounds for freq_avg in freq_table */ |
86 | index = cpufreq_table_find_index_h(policy, target_freq: freq_avg, |
87 | efficiencies: relation & CPUFREQ_RELATION_E); |
88 | freq_lo = freq_table[index].frequency; |
89 | index = cpufreq_table_find_index_l(policy, target_freq: freq_avg, |
90 | efficiencies: relation & CPUFREQ_RELATION_E); |
91 | freq_hi = freq_table[index].frequency; |
92 | |
93 | /* Find out how long we have to be in hi and lo freqs */ |
94 | if (freq_hi == freq_lo) { |
95 | dbs_info->freq_lo = 0; |
96 | dbs_info->freq_lo_delay_us = 0; |
97 | return freq_lo; |
98 | } |
99 | delay_hi_us = (freq_avg - freq_lo) * dbs_data->sampling_rate; |
100 | delay_hi_us += (freq_hi - freq_lo) / 2; |
101 | delay_hi_us /= freq_hi - freq_lo; |
102 | dbs_info->freq_hi_delay_us = delay_hi_us; |
103 | dbs_info->freq_lo = freq_lo; |
104 | dbs_info->freq_lo_delay_us = dbs_data->sampling_rate - delay_hi_us; |
105 | return freq_hi; |
106 | } |
107 | |
108 | static void ondemand_powersave_bias_init(struct cpufreq_policy *policy) |
109 | { |
110 | struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs: policy->governor_data); |
111 | |
112 | dbs_info->freq_lo = 0; |
113 | } |
114 | |
115 | static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) |
116 | { |
117 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
118 | struct dbs_data *dbs_data = policy_dbs->dbs_data; |
119 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; |
120 | |
121 | if (od_tuners->powersave_bias) |
122 | freq = od_ops.powersave_bias_target(policy, freq, |
123 | CPUFREQ_RELATION_HE); |
124 | else if (policy->cur == policy->max) |
125 | return; |
126 | |
127 | __cpufreq_driver_target(policy, target_freq: freq, relation: od_tuners->powersave_bias ? |
128 | CPUFREQ_RELATION_LE : CPUFREQ_RELATION_HE); |
129 | } |
130 | |
131 | /* |
132 | * Every sampling_rate, we check, if current idle time is less than 20% |
133 | * (default), then we try to increase frequency. Else, we adjust the frequency |
134 | * proportional to load. |
135 | */ |
136 | static void od_update(struct cpufreq_policy *policy) |
137 | { |
138 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
139 | struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); |
140 | struct dbs_data *dbs_data = policy_dbs->dbs_data; |
141 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; |
142 | unsigned int load = dbs_update(policy); |
143 | |
144 | dbs_info->freq_lo = 0; |
145 | |
146 | /* Check for frequency increase */ |
147 | if (load > dbs_data->up_threshold) { |
148 | /* If switching to max speed, apply sampling_down_factor */ |
149 | if (policy->cur < policy->max) |
150 | policy_dbs->rate_mult = dbs_data->sampling_down_factor; |
151 | dbs_freq_increase(policy, freq: policy->max); |
152 | } else { |
153 | /* Calculate the next frequency proportional to load */ |
154 | unsigned int freq_next, min_f, max_f; |
155 | |
156 | min_f = policy->cpuinfo.min_freq; |
157 | max_f = policy->cpuinfo.max_freq; |
158 | freq_next = min_f + load * (max_f - min_f) / 100; |
159 | |
160 | /* No longer fully busy, reset rate_mult */ |
161 | policy_dbs->rate_mult = 1; |
162 | |
163 | if (od_tuners->powersave_bias) |
164 | freq_next = od_ops.powersave_bias_target(policy, |
165 | freq_next, |
166 | CPUFREQ_RELATION_LE); |
167 | |
168 | __cpufreq_driver_target(policy, target_freq: freq_next, CPUFREQ_RELATION_CE); |
169 | } |
170 | } |
171 | |
172 | static unsigned int od_dbs_update(struct cpufreq_policy *policy) |
173 | { |
174 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
175 | struct dbs_data *dbs_data = policy_dbs->dbs_data; |
176 | struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); |
177 | int sample_type = dbs_info->sample_type; |
178 | |
179 | /* Common NORMAL_SAMPLE setup */ |
180 | dbs_info->sample_type = OD_NORMAL_SAMPLE; |
181 | /* |
182 | * OD_SUB_SAMPLE doesn't make sense if sample_delay_ns is 0, so ignore |
183 | * it then. |
184 | */ |
185 | if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) { |
186 | __cpufreq_driver_target(policy, target_freq: dbs_info->freq_lo, |
187 | CPUFREQ_RELATION_HE); |
188 | return dbs_info->freq_lo_delay_us; |
189 | } |
190 | |
191 | od_update(policy); |
192 | |
193 | if (dbs_info->freq_lo) { |
194 | /* Setup SUB_SAMPLE */ |
195 | dbs_info->sample_type = OD_SUB_SAMPLE; |
196 | return dbs_info->freq_hi_delay_us; |
197 | } |
198 | |
199 | return dbs_data->sampling_rate * policy_dbs->rate_mult; |
200 | } |
201 | |
202 | /************************** sysfs interface ************************/ |
203 | static struct dbs_governor od_dbs_gov; |
204 | |
205 | static ssize_t io_is_busy_store(struct gov_attr_set *attr_set, const char *buf, |
206 | size_t count) |
207 | { |
208 | struct dbs_data *dbs_data = to_dbs_data(attr_set); |
209 | unsigned int input; |
210 | int ret; |
211 | |
212 | ret = sscanf(buf, "%u" , &input); |
213 | if (ret != 1) |
214 | return -EINVAL; |
215 | dbs_data->io_is_busy = !!input; |
216 | |
217 | /* we need to re-evaluate prev_cpu_idle */ |
218 | gov_update_cpu_data(dbs_data); |
219 | |
220 | return count; |
221 | } |
222 | |
223 | static ssize_t up_threshold_store(struct gov_attr_set *attr_set, |
224 | const char *buf, size_t count) |
225 | { |
226 | struct dbs_data *dbs_data = to_dbs_data(attr_set); |
227 | unsigned int input; |
228 | int ret; |
229 | ret = sscanf(buf, "%u" , &input); |
230 | |
231 | if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || |
232 | input < MIN_FREQUENCY_UP_THRESHOLD) { |
233 | return -EINVAL; |
234 | } |
235 | |
236 | dbs_data->up_threshold = input; |
237 | return count; |
238 | } |
239 | |
240 | static ssize_t sampling_down_factor_store(struct gov_attr_set *attr_set, |
241 | const char *buf, size_t count) |
242 | { |
243 | struct dbs_data *dbs_data = to_dbs_data(attr_set); |
244 | struct policy_dbs_info *policy_dbs; |
245 | unsigned int input; |
246 | int ret; |
247 | ret = sscanf(buf, "%u" , &input); |
248 | |
249 | if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) |
250 | return -EINVAL; |
251 | |
252 | dbs_data->sampling_down_factor = input; |
253 | |
254 | /* Reset down sampling multiplier in case it was active */ |
255 | list_for_each_entry(policy_dbs, &attr_set->policy_list, list) { |
256 | /* |
257 | * Doing this without locking might lead to using different |
258 | * rate_mult values in od_update() and od_dbs_update(). |
259 | */ |
260 | mutex_lock(&policy_dbs->update_mutex); |
261 | policy_dbs->rate_mult = 1; |
262 | mutex_unlock(lock: &policy_dbs->update_mutex); |
263 | } |
264 | |
265 | return count; |
266 | } |
267 | |
268 | static ssize_t ignore_nice_load_store(struct gov_attr_set *attr_set, |
269 | const char *buf, size_t count) |
270 | { |
271 | struct dbs_data *dbs_data = to_dbs_data(attr_set); |
272 | unsigned int input; |
273 | int ret; |
274 | |
275 | ret = sscanf(buf, "%u" , &input); |
276 | if (ret != 1) |
277 | return -EINVAL; |
278 | |
279 | if (input > 1) |
280 | input = 1; |
281 | |
282 | if (input == dbs_data->ignore_nice_load) { /* nothing to do */ |
283 | return count; |
284 | } |
285 | dbs_data->ignore_nice_load = input; |
286 | |
287 | /* we need to re-evaluate prev_cpu_idle */ |
288 | gov_update_cpu_data(dbs_data); |
289 | |
290 | return count; |
291 | } |
292 | |
293 | static ssize_t powersave_bias_store(struct gov_attr_set *attr_set, |
294 | const char *buf, size_t count) |
295 | { |
296 | struct dbs_data *dbs_data = to_dbs_data(attr_set); |
297 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; |
298 | struct policy_dbs_info *policy_dbs; |
299 | unsigned int input; |
300 | int ret; |
301 | ret = sscanf(buf, "%u" , &input); |
302 | |
303 | if (ret != 1) |
304 | return -EINVAL; |
305 | |
306 | if (input > 1000) |
307 | input = 1000; |
308 | |
309 | od_tuners->powersave_bias = input; |
310 | |
311 | list_for_each_entry(policy_dbs, &attr_set->policy_list, list) |
312 | ondemand_powersave_bias_init(policy: policy_dbs->policy); |
313 | |
314 | return count; |
315 | } |
316 | |
317 | gov_show_one_common(sampling_rate); |
318 | gov_show_one_common(up_threshold); |
319 | gov_show_one_common(sampling_down_factor); |
320 | gov_show_one_common(ignore_nice_load); |
321 | gov_show_one_common(io_is_busy); |
322 | gov_show_one(od, powersave_bias); |
323 | |
324 | gov_attr_rw(sampling_rate); |
325 | gov_attr_rw(io_is_busy); |
326 | gov_attr_rw(up_threshold); |
327 | gov_attr_rw(sampling_down_factor); |
328 | gov_attr_rw(ignore_nice_load); |
329 | gov_attr_rw(powersave_bias); |
330 | |
331 | static struct attribute *od_attrs[] = { |
332 | &sampling_rate.attr, |
333 | &up_threshold.attr, |
334 | &sampling_down_factor.attr, |
335 | &ignore_nice_load.attr, |
336 | &powersave_bias.attr, |
337 | &io_is_busy.attr, |
338 | NULL |
339 | }; |
340 | ATTRIBUTE_GROUPS(od); |
341 | |
342 | /************************** sysfs end ************************/ |
343 | |
344 | static struct policy_dbs_info *od_alloc(void) |
345 | { |
346 | struct od_policy_dbs_info *dbs_info; |
347 | |
348 | dbs_info = kzalloc(size: sizeof(*dbs_info), GFP_KERNEL); |
349 | return dbs_info ? &dbs_info->policy_dbs : NULL; |
350 | } |
351 | |
352 | static void od_free(struct policy_dbs_info *policy_dbs) |
353 | { |
354 | kfree(objp: to_dbs_info(policy_dbs)); |
355 | } |
356 | |
357 | static int od_init(struct dbs_data *dbs_data) |
358 | { |
359 | struct od_dbs_tuners *tuners; |
360 | u64 idle_time; |
361 | int cpu; |
362 | |
363 | tuners = kzalloc(size: sizeof(*tuners), GFP_KERNEL); |
364 | if (!tuners) |
365 | return -ENOMEM; |
366 | |
367 | cpu = get_cpu(); |
368 | idle_time = get_cpu_idle_time_us(cpu, NULL); |
369 | put_cpu(); |
370 | if (idle_time != -1ULL) { |
371 | /* Idle micro accounting is supported. Use finer thresholds */ |
372 | dbs_data->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; |
373 | } else { |
374 | dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; |
375 | } |
376 | |
377 | dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; |
378 | dbs_data->ignore_nice_load = 0; |
379 | tuners->powersave_bias = default_powersave_bias; |
380 | dbs_data->io_is_busy = should_io_be_busy(); |
381 | |
382 | dbs_data->tuners = tuners; |
383 | return 0; |
384 | } |
385 | |
386 | static void od_exit(struct dbs_data *dbs_data) |
387 | { |
388 | kfree(objp: dbs_data->tuners); |
389 | } |
390 | |
391 | static void od_start(struct cpufreq_policy *policy) |
392 | { |
393 | struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs: policy->governor_data); |
394 | |
395 | dbs_info->sample_type = OD_NORMAL_SAMPLE; |
396 | ondemand_powersave_bias_init(policy); |
397 | } |
398 | |
399 | static struct od_ops od_ops = { |
400 | .powersave_bias_target = generic_powersave_bias_target, |
401 | }; |
402 | |
403 | static struct dbs_governor od_dbs_gov = { |
404 | .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("ondemand" ), |
405 | .kobj_type = { .default_groups = od_groups }, |
406 | .gov_dbs_update = od_dbs_update, |
407 | .alloc = od_alloc, |
408 | .free = od_free, |
409 | .init = od_init, |
410 | .exit = od_exit, |
411 | .start = od_start, |
412 | }; |
413 | |
414 | #define CPU_FREQ_GOV_ONDEMAND (od_dbs_gov.gov) |
415 | |
416 | static void od_set_powersave_bias(unsigned int powersave_bias) |
417 | { |
418 | unsigned int cpu; |
419 | cpumask_var_t done; |
420 | |
421 | if (!alloc_cpumask_var(mask: &done, GFP_KERNEL)) |
422 | return; |
423 | |
424 | default_powersave_bias = powersave_bias; |
425 | cpumask_clear(dstp: done); |
426 | |
427 | cpus_read_lock(); |
428 | for_each_online_cpu(cpu) { |
429 | struct cpufreq_policy *policy; |
430 | struct policy_dbs_info *policy_dbs; |
431 | struct dbs_data *dbs_data; |
432 | struct od_dbs_tuners *od_tuners; |
433 | |
434 | if (cpumask_test_cpu(cpu, cpumask: done)) |
435 | continue; |
436 | |
437 | policy = cpufreq_cpu_get_raw(cpu); |
438 | if (!policy || policy->governor != &CPU_FREQ_GOV_ONDEMAND) |
439 | continue; |
440 | |
441 | policy_dbs = policy->governor_data; |
442 | if (!policy_dbs) |
443 | continue; |
444 | |
445 | cpumask_or(dstp: done, src1p: done, src2p: policy->cpus); |
446 | |
447 | dbs_data = policy_dbs->dbs_data; |
448 | od_tuners = dbs_data->tuners; |
449 | od_tuners->powersave_bias = default_powersave_bias; |
450 | } |
451 | cpus_read_unlock(); |
452 | |
453 | free_cpumask_var(mask: done); |
454 | } |
455 | |
456 | void od_register_powersave_bias_handler(unsigned int (*f) |
457 | (struct cpufreq_policy *, unsigned int, unsigned int), |
458 | unsigned int powersave_bias) |
459 | { |
460 | od_ops.powersave_bias_target = f; |
461 | od_set_powersave_bias(powersave_bias); |
462 | } |
463 | EXPORT_SYMBOL_GPL(od_register_powersave_bias_handler); |
464 | |
465 | void od_unregister_powersave_bias_handler(void) |
466 | { |
467 | od_ops.powersave_bias_target = generic_powersave_bias_target; |
468 | od_set_powersave_bias(powersave_bias: 0); |
469 | } |
470 | EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); |
471 | |
472 | MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>" ); |
473 | MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>" ); |
474 | MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " |
475 | "Low Latency Frequency Transition capable processors" ); |
476 | MODULE_LICENSE("GPL" ); |
477 | |
478 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND |
479 | struct cpufreq_governor *cpufreq_default_governor(void) |
480 | { |
481 | return &CPU_FREQ_GOV_ONDEMAND; |
482 | } |
483 | #endif |
484 | |
485 | cpufreq_governor_init(CPU_FREQ_GOV_ONDEMAND); |
486 | cpufreq_governor_exit(CPU_FREQ_GOV_ONDEMAND); |
487 | |