1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * amd-pstate.c - AMD Processor P-state Frequency Driver
4 *
5 * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
6 *
7 * Author: Huang Rui <ray.huang@amd.com>
8 *
9 * AMD P-State introduces a new CPU performance scaling design for AMD
10 * processors using the ACPI Collaborative Performance and Power Control (CPPC)
11 * feature which works with the AMD SMU firmware providing a finer grained
12 * frequency control range. It is to replace the legacy ACPI P-States control,
13 * allows a flexible, low-latency interface for the Linux kernel to directly
14 * communicate the performance hints to hardware.
15 *
16 * AMD P-State is supported on recent AMD Zen base CPU series include some of
17 * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD
18 * P-State supported system. And there are two types of hardware implementations
19 * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution.
20 * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types.
21 */
22
23#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
24
25#include <linux/kernel.h>
26#include <linux/module.h>
27#include <linux/init.h>
28#include <linux/smp.h>
29#include <linux/sched.h>
30#include <linux/cpufreq.h>
31#include <linux/compiler.h>
32#include <linux/dmi.h>
33#include <linux/slab.h>
34#include <linux/acpi.h>
35#include <linux/io.h>
36#include <linux/delay.h>
37#include <linux/uaccess.h>
38#include <linux/static_call.h>
39#include <linux/amd-pstate.h>
40#include <linux/topology.h>
41
42#include <acpi/processor.h>
43#include <acpi/cppc_acpi.h>
44
45#include <asm/msr.h>
46#include <asm/processor.h>
47#include <asm/cpufeature.h>
48#include <asm/cpu_device_id.h>
49#include "amd-pstate-trace.h"
50
51#define AMD_PSTATE_TRANSITION_LATENCY 20000
52#define AMD_PSTATE_TRANSITION_DELAY 1000
53#define AMD_PSTATE_PREFCORE_THRESHOLD 166
54
55/*
56 * TODO: We need more time to fine tune processors with shared memory solution
57 * with community together.
58 *
59 * There are some performance drops on the CPU benchmarks which reports from
60 * Suse. We are co-working with them to fine tune the shared memory solution. So
61 * we disable it by default to go acpi-cpufreq on these processors and add a
62 * module parameter to be able to enable it manually for debugging.
63 */
64static struct cpufreq_driver *current_pstate_driver;
65static struct cpufreq_driver amd_pstate_driver;
66static struct cpufreq_driver amd_pstate_epp_driver;
67static int cppc_state = AMD_PSTATE_UNDEFINED;
68static bool cppc_enabled;
69static bool amd_pstate_prefcore = true;
70
71/*
72 * AMD Energy Preference Performance (EPP)
73 * The EPP is used in the CCLK DPM controller to drive
74 * the frequency that a core is going to operate during
75 * short periods of activity. EPP values will be utilized for
76 * different OS profiles (balanced, performance, power savings)
77 * display strings corresponding to EPP index in the
78 * energy_perf_strings[]
79 * index String
80 *-------------------------------------
81 * 0 default
82 * 1 performance
83 * 2 balance_performance
84 * 3 balance_power
85 * 4 power
86 */
87enum energy_perf_value_index {
88 EPP_INDEX_DEFAULT = 0,
89 EPP_INDEX_PERFORMANCE,
90 EPP_INDEX_BALANCE_PERFORMANCE,
91 EPP_INDEX_BALANCE_POWERSAVE,
92 EPP_INDEX_POWERSAVE,
93};
94
95static const char * const energy_perf_strings[] = {
96 [EPP_INDEX_DEFAULT] = "default",
97 [EPP_INDEX_PERFORMANCE] = "performance",
98 [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
99 [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
100 [EPP_INDEX_POWERSAVE] = "power",
101 NULL
102};
103
104static unsigned int epp_values[] = {
105 [EPP_INDEX_DEFAULT] = 0,
106 [EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE,
107 [EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE,
108 [EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE,
109 [EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE,
110 };
111
112typedef int (*cppc_mode_transition_fn)(int);
113
114static inline int get_mode_idx_from_str(const char *str, size_t size)
115{
116 int i;
117
118 for (i=0; i < AMD_PSTATE_MAX; i++) {
119 if (!strncmp(str, amd_pstate_mode_string[i], size))
120 return i;
121 }
122 return -EINVAL;
123}
124
125static DEFINE_MUTEX(amd_pstate_limits_lock);
126static DEFINE_MUTEX(amd_pstate_driver_lock);
127
128static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
129{
130 u64 epp;
131 int ret;
132
133 if (boot_cpu_has(X86_FEATURE_CPPC)) {
134 if (!cppc_req_cached) {
135 epp = rdmsrl_on_cpu(cpu: cpudata->cpu, MSR_AMD_CPPC_REQ,
136 q: &cppc_req_cached);
137 if (epp)
138 return epp;
139 }
140 epp = (cppc_req_cached >> 24) & 0xFF;
141 } else {
142 ret = cppc_get_epp_perf(cpunum: cpudata->cpu, epp_perf: &epp);
143 if (ret < 0) {
144 pr_debug("Could not retrieve energy perf value (%d)\n", ret);
145 return -EIO;
146 }
147 }
148
149 return (s16)(epp & 0xff);
150}
151
152static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
153{
154 s16 epp;
155 int index = -EINVAL;
156
157 epp = amd_pstate_get_epp(cpudata, cppc_req_cached: 0);
158 if (epp < 0)
159 return epp;
160
161 switch (epp) {
162 case AMD_CPPC_EPP_PERFORMANCE:
163 index = EPP_INDEX_PERFORMANCE;
164 break;
165 case AMD_CPPC_EPP_BALANCE_PERFORMANCE:
166 index = EPP_INDEX_BALANCE_PERFORMANCE;
167 break;
168 case AMD_CPPC_EPP_BALANCE_POWERSAVE:
169 index = EPP_INDEX_BALANCE_POWERSAVE;
170 break;
171 case AMD_CPPC_EPP_POWERSAVE:
172 index = EPP_INDEX_POWERSAVE;
173 break;
174 default:
175 break;
176 }
177
178 return index;
179}
180
181static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
182{
183 int ret;
184 struct cppc_perf_ctrls perf_ctrls;
185
186 if (boot_cpu_has(X86_FEATURE_CPPC)) {
187 u64 value = READ_ONCE(cpudata->cppc_req_cached);
188
189 value &= ~GENMASK_ULL(31, 24);
190 value |= (u64)epp << 24;
191 WRITE_ONCE(cpudata->cppc_req_cached, value);
192
193 ret = wrmsrl_on_cpu(cpu: cpudata->cpu, MSR_AMD_CPPC_REQ, q: value);
194 if (!ret)
195 cpudata->epp_cached = epp;
196 } else {
197 perf_ctrls.energy_perf = epp;
198 ret = cppc_set_epp_perf(cpu: cpudata->cpu, perf_ctrls: &perf_ctrls, enable: 1);
199 if (ret) {
200 pr_debug("failed to set energy perf value (%d)\n", ret);
201 return ret;
202 }
203 cpudata->epp_cached = epp;
204 }
205
206 return ret;
207}
208
209static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
210 int pref_index)
211{
212 int epp = -EINVAL;
213 int ret;
214
215 if (!pref_index) {
216 pr_debug("EPP pref_index is invalid\n");
217 return -EINVAL;
218 }
219
220 if (epp == -EINVAL)
221 epp = epp_values[pref_index];
222
223 if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
224 pr_debug("EPP cannot be set under performance policy\n");
225 return -EBUSY;
226 }
227
228 ret = amd_pstate_set_epp(cpudata, epp);
229
230 return ret;
231}
232
233static inline int pstate_enable(bool enable)
234{
235 int ret, cpu;
236 unsigned long logical_proc_id_mask = 0;
237
238 if (enable == cppc_enabled)
239 return 0;
240
241 for_each_present_cpu(cpu) {
242 unsigned long logical_id = topology_logical_die_id(cpu);
243
244 if (test_bit(logical_id, &logical_proc_id_mask))
245 continue;
246
247 set_bit(nr: logical_id, addr: &logical_proc_id_mask);
248
249 ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE,
250 q: enable);
251 if (ret)
252 return ret;
253 }
254
255 cppc_enabled = enable;
256 return 0;
257}
258
259static int cppc_enable(bool enable)
260{
261 int cpu, ret = 0;
262 struct cppc_perf_ctrls perf_ctrls;
263
264 if (enable == cppc_enabled)
265 return 0;
266
267 for_each_present_cpu(cpu) {
268 ret = cppc_set_enable(cpu, enable);
269 if (ret)
270 return ret;
271
272 /* Enable autonomous mode for EPP */
273 if (cppc_state == AMD_PSTATE_ACTIVE) {
274 /* Set desired perf as zero to allow EPP firmware control */
275 perf_ctrls.desired_perf = 0;
276 ret = cppc_set_perf(cpu, perf_ctrls: &perf_ctrls);
277 if (ret)
278 return ret;
279 }
280 }
281
282 cppc_enabled = enable;
283 return ret;
284}
285
286DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable);
287
288static inline int amd_pstate_enable(bool enable)
289{
290 return static_call(amd_pstate_enable)(enable);
291}
292
293static int pstate_init_perf(struct amd_cpudata *cpudata)
294{
295 u64 cap1;
296 u32 highest_perf;
297
298 int ret = rdmsrl_safe_on_cpu(cpu: cpudata->cpu, MSR_AMD_CPPC_CAP1,
299 q: &cap1);
300 if (ret)
301 return ret;
302
303 /* For platforms that do not support the preferred core feature, the
304 * highest_pef may be configured with 166 or 255, to avoid max frequency
305 * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
306 * the default max perf.
307 */
308 if (cpudata->hw_prefcore)
309 highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD;
310 else
311 highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
312
313 WRITE_ONCE(cpudata->highest_perf, highest_perf);
314 WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
315 WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
316 WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
317 WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
318 WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
319 WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1));
320 return 0;
321}
322
323static int cppc_init_perf(struct amd_cpudata *cpudata)
324{
325 struct cppc_perf_caps cppc_perf;
326 u32 highest_perf;
327
328 int ret = cppc_get_perf_caps(cpu: cpudata->cpu, caps: &cppc_perf);
329 if (ret)
330 return ret;
331
332 if (cpudata->hw_prefcore)
333 highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD;
334 else
335 highest_perf = cppc_perf.highest_perf;
336
337 WRITE_ONCE(cpudata->highest_perf, highest_perf);
338 WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
339 WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
340 WRITE_ONCE(cpudata->lowest_nonlinear_perf,
341 cppc_perf.lowest_nonlinear_perf);
342 WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
343 WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
344 WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf);
345
346 if (cppc_state == AMD_PSTATE_ACTIVE)
347 return 0;
348
349 ret = cppc_get_auto_sel_caps(cpunum: cpudata->cpu, perf_caps: &cppc_perf);
350 if (ret) {
351 pr_warn("failed to get auto_sel, ret: %d\n", ret);
352 return 0;
353 }
354
355 ret = cppc_set_auto_sel(cpu: cpudata->cpu,
356 enable: (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1);
357
358 if (ret)
359 pr_warn("failed to set auto_sel, ret: %d\n", ret);
360
361 return ret;
362}
363
364DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf);
365
366static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata)
367{
368 return static_call(amd_pstate_init_perf)(cpudata);
369}
370
371static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
372 u32 des_perf, u32 max_perf, bool fast_switch)
373{
374 if (fast_switch)
375 wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached));
376 else
377 wrmsrl_on_cpu(cpu: cpudata->cpu, MSR_AMD_CPPC_REQ,
378 READ_ONCE(cpudata->cppc_req_cached));
379}
380
381static void cppc_update_perf(struct amd_cpudata *cpudata,
382 u32 min_perf, u32 des_perf,
383 u32 max_perf, bool fast_switch)
384{
385 struct cppc_perf_ctrls perf_ctrls;
386
387 perf_ctrls.max_perf = max_perf;
388 perf_ctrls.min_perf = min_perf;
389 perf_ctrls.desired_perf = des_perf;
390
391 cppc_set_perf(cpu: cpudata->cpu, perf_ctrls: &perf_ctrls);
392}
393
394DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf);
395
396static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
397 u32 min_perf, u32 des_perf,
398 u32 max_perf, bool fast_switch)
399{
400 static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
401 max_perf, fast_switch);
402}
403
404static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
405{
406 u64 aperf, mperf, tsc;
407 unsigned long flags;
408
409 local_irq_save(flags);
410 rdmsrl(MSR_IA32_APERF, aperf);
411 rdmsrl(MSR_IA32_MPERF, mperf);
412 tsc = rdtsc();
413
414 if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) {
415 local_irq_restore(flags);
416 return false;
417 }
418
419 local_irq_restore(flags);
420
421 cpudata->cur.aperf = aperf;
422 cpudata->cur.mperf = mperf;
423 cpudata->cur.tsc = tsc;
424 cpudata->cur.aperf -= cpudata->prev.aperf;
425 cpudata->cur.mperf -= cpudata->prev.mperf;
426 cpudata->cur.tsc -= cpudata->prev.tsc;
427
428 cpudata->prev.aperf = aperf;
429 cpudata->prev.mperf = mperf;
430 cpudata->prev.tsc = tsc;
431
432 cpudata->freq = div64_u64(dividend: (cpudata->cur.aperf * cpu_khz), divisor: cpudata->cur.mperf);
433
434 return true;
435}
436
437static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
438 u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags)
439{
440 u64 prev = READ_ONCE(cpudata->cppc_req_cached);
441 u64 value = prev;
442
443 min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
444 cpudata->max_limit_perf);
445 max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
446 cpudata->max_limit_perf);
447 des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
448
449 if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
450 min_perf = des_perf;
451 des_perf = 0;
452 }
453
454 value &= ~AMD_CPPC_MIN_PERF(~0L);
455 value |= AMD_CPPC_MIN_PERF(min_perf);
456
457 value &= ~AMD_CPPC_DES_PERF(~0L);
458 value |= AMD_CPPC_DES_PERF(des_perf);
459
460 value &= ~AMD_CPPC_MAX_PERF(~0L);
461 value |= AMD_CPPC_MAX_PERF(max_perf);
462
463 if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
464 trace_amd_pstate_perf(min_perf, target_perf: des_perf, capacity: max_perf, freq: cpudata->freq,
465 mperf: cpudata->cur.mperf, aperf: cpudata->cur.aperf, tsc: cpudata->cur.tsc,
466 cpu_id: cpudata->cpu, changed: (value != prev), fast_switch);
467 }
468
469 if (value == prev)
470 return;
471
472 WRITE_ONCE(cpudata->cppc_req_cached, value);
473
474 amd_pstate_update_perf(cpudata, min_perf, des_perf,
475 max_perf, fast_switch);
476}
477
478static int amd_pstate_verify(struct cpufreq_policy_data *policy)
479{
480 cpufreq_verify_within_cpu_limits(policy);
481
482 return 0;
483}
484
485static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
486{
487 u32 max_limit_perf, min_limit_perf, lowest_perf;
488 struct amd_cpudata *cpudata = policy->driver_data;
489
490 max_limit_perf = div_u64(dividend: policy->max * cpudata->highest_perf, divisor: cpudata->max_freq);
491 min_limit_perf = div_u64(dividend: policy->min * cpudata->highest_perf, divisor: cpudata->max_freq);
492
493 lowest_perf = READ_ONCE(cpudata->lowest_perf);
494 if (min_limit_perf < lowest_perf)
495 min_limit_perf = lowest_perf;
496
497 if (max_limit_perf < min_limit_perf)
498 max_limit_perf = min_limit_perf;
499
500 WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
501 WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
502 WRITE_ONCE(cpudata->max_limit_freq, policy->max);
503 WRITE_ONCE(cpudata->min_limit_freq, policy->min);
504
505 return 0;
506}
507
508static int amd_pstate_update_freq(struct cpufreq_policy *policy,
509 unsigned int target_freq, bool fast_switch)
510{
511 struct cpufreq_freqs freqs;
512 struct amd_cpudata *cpudata = policy->driver_data;
513 unsigned long max_perf, min_perf, des_perf, cap_perf;
514
515 if (!cpudata->max_freq)
516 return -ENODEV;
517
518 if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
519 amd_pstate_update_min_max_limit(policy);
520
521 cap_perf = READ_ONCE(cpudata->highest_perf);
522 min_perf = READ_ONCE(cpudata->lowest_perf);
523 max_perf = cap_perf;
524
525 freqs.old = policy->cur;
526 freqs.new = target_freq;
527
528 des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf,
529 cpudata->max_freq);
530
531 WARN_ON(fast_switch && !policy->fast_switch_enabled);
532 /*
533 * If fast_switch is desired, then there aren't any registered
534 * transition notifiers. See comment for
535 * cpufreq_enable_fast_switch().
536 */
537 if (!fast_switch)
538 cpufreq_freq_transition_begin(policy, freqs: &freqs);
539
540 amd_pstate_update(cpudata, min_perf, des_perf,
541 max_perf, fast_switch, gov_flags: policy->governor->flags);
542
543 if (!fast_switch)
544 cpufreq_freq_transition_end(policy, freqs: &freqs, transition_failed: false);
545
546 return 0;
547}
548
549static int amd_pstate_target(struct cpufreq_policy *policy,
550 unsigned int target_freq,
551 unsigned int relation)
552{
553 return amd_pstate_update_freq(policy, target_freq, fast_switch: false);
554}
555
556static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy,
557 unsigned int target_freq)
558{
559 if (!amd_pstate_update_freq(policy, target_freq, fast_switch: true))
560 return target_freq;
561 return policy->cur;
562}
563
564static void amd_pstate_adjust_perf(unsigned int cpu,
565 unsigned long _min_perf,
566 unsigned long target_perf,
567 unsigned long capacity)
568{
569 unsigned long max_perf, min_perf, des_perf,
570 cap_perf, lowest_nonlinear_perf, max_freq;
571 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
572 struct amd_cpudata *cpudata = policy->driver_data;
573 unsigned int target_freq;
574
575 if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
576 amd_pstate_update_min_max_limit(policy);
577
578
579 cap_perf = READ_ONCE(cpudata->highest_perf);
580 lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
581 max_freq = READ_ONCE(cpudata->max_freq);
582
583 des_perf = cap_perf;
584 if (target_perf < capacity)
585 des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity);
586
587 min_perf = READ_ONCE(cpudata->lowest_perf);
588 if (_min_perf < capacity)
589 min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
590
591 if (min_perf < lowest_nonlinear_perf)
592 min_perf = lowest_nonlinear_perf;
593
594 max_perf = cap_perf;
595 if (max_perf < min_perf)
596 max_perf = min_perf;
597
598 des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
599 target_freq = div_u64(dividend: des_perf * max_freq, divisor: max_perf);
600 policy->cur = target_freq;
601
602 amd_pstate_update(cpudata, min_perf, des_perf, max_perf, fast_switch: true,
603 gov_flags: policy->governor->flags);
604 cpufreq_cpu_put(policy);
605}
606
607static int amd_get_min_freq(struct amd_cpudata *cpudata)
608{
609 struct cppc_perf_caps cppc_perf;
610
611 int ret = cppc_get_perf_caps(cpu: cpudata->cpu, caps: &cppc_perf);
612 if (ret)
613 return ret;
614
615 /* Switch to khz */
616 return cppc_perf.lowest_freq * 1000;
617}
618
619static int amd_get_max_freq(struct amd_cpudata *cpudata)
620{
621 struct cppc_perf_caps cppc_perf;
622 u32 max_perf, max_freq, nominal_freq, nominal_perf;
623 u64 boost_ratio;
624
625 int ret = cppc_get_perf_caps(cpu: cpudata->cpu, caps: &cppc_perf);
626 if (ret)
627 return ret;
628
629 nominal_freq = cppc_perf.nominal_freq;
630 nominal_perf = READ_ONCE(cpudata->nominal_perf);
631 max_perf = READ_ONCE(cpudata->highest_perf);
632
633 boost_ratio = div_u64(dividend: max_perf << SCHED_CAPACITY_SHIFT,
634 divisor: nominal_perf);
635
636 max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT;
637
638 /* Switch to khz */
639 return max_freq * 1000;
640}
641
642static int amd_get_nominal_freq(struct amd_cpudata *cpudata)
643{
644 struct cppc_perf_caps cppc_perf;
645
646 int ret = cppc_get_perf_caps(cpu: cpudata->cpu, caps: &cppc_perf);
647 if (ret)
648 return ret;
649
650 /* Switch to khz */
651 return cppc_perf.nominal_freq * 1000;
652}
653
654static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata)
655{
656 struct cppc_perf_caps cppc_perf;
657 u32 lowest_nonlinear_freq, lowest_nonlinear_perf,
658 nominal_freq, nominal_perf;
659 u64 lowest_nonlinear_ratio;
660
661 int ret = cppc_get_perf_caps(cpu: cpudata->cpu, caps: &cppc_perf);
662 if (ret)
663 return ret;
664
665 nominal_freq = cppc_perf.nominal_freq;
666 nominal_perf = READ_ONCE(cpudata->nominal_perf);
667
668 lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf;
669
670 lowest_nonlinear_ratio = div_u64(dividend: lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT,
671 divisor: nominal_perf);
672
673 lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT;
674
675 /* Switch to khz */
676 return lowest_nonlinear_freq * 1000;
677}
678
679static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
680{
681 struct amd_cpudata *cpudata = policy->driver_data;
682 int ret;
683
684 if (!cpudata->boost_supported) {
685 pr_err("Boost mode is not supported by this processor or SBIOS\n");
686 return -EINVAL;
687 }
688
689 if (state)
690 policy->cpuinfo.max_freq = cpudata->max_freq;
691 else
692 policy->cpuinfo.max_freq = cpudata->nominal_freq;
693
694 policy->max = policy->cpuinfo.max_freq;
695
696 ret = freq_qos_update_request(req: &cpudata->req[1],
697 new_value: policy->cpuinfo.max_freq);
698 if (ret < 0)
699 return ret;
700
701 return 0;
702}
703
704static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
705{
706 u32 highest_perf, nominal_perf;
707
708 highest_perf = READ_ONCE(cpudata->highest_perf);
709 nominal_perf = READ_ONCE(cpudata->nominal_perf);
710
711 if (highest_perf <= nominal_perf)
712 return;
713
714 cpudata->boost_supported = true;
715 current_pstate_driver->boost_enabled = true;
716}
717
718static void amd_perf_ctl_reset(unsigned int cpu)
719{
720 wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, q: 0);
721}
722
723/*
724 * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
725 * due to locking, so queue the work for later.
726 */
727static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
728{
729 sched_set_itmt_support();
730}
731static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
732
733/*
734 * Get the highest performance register value.
735 * @cpu: CPU from which to get highest performance.
736 * @highest_perf: Return address.
737 *
738 * Return: 0 for success, -EIO otherwise.
739 */
740static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf)
741{
742 int ret;
743
744 if (boot_cpu_has(X86_FEATURE_CPPC)) {
745 u64 cap1;
746
747 ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, q: &cap1);
748 if (ret)
749 return ret;
750 WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
751 } else {
752 u64 cppc_highest_perf;
753
754 ret = cppc_get_highest_perf(cpunum: cpu, highest_perf: &cppc_highest_perf);
755 if (ret)
756 return ret;
757 WRITE_ONCE(*highest_perf, cppc_highest_perf);
758 }
759
760 return (ret);
761}
762
763#define CPPC_MAX_PERF U8_MAX
764
765static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
766{
767 int ret, prio;
768 u32 highest_perf;
769
770 ret = amd_pstate_get_highest_perf(cpu: cpudata->cpu, highest_perf: &highest_perf);
771 if (ret)
772 return;
773
774 cpudata->hw_prefcore = true;
775 /* check if CPPC preferred core feature is enabled*/
776 if (highest_perf < CPPC_MAX_PERF)
777 prio = (int)highest_perf;
778 else {
779 pr_debug("AMD CPPC preferred core is unsupported!\n");
780 cpudata->hw_prefcore = false;
781 return;
782 }
783
784 if (!amd_pstate_prefcore)
785 return;
786
787 /*
788 * The priorities can be set regardless of whether or not
789 * sched_set_itmt_support(true) has been called and it is valid to
790 * update them at any time after it has been called.
791 */
792 sched_set_itmt_core_prio(prio, core_cpu: cpudata->cpu);
793
794 schedule_work(work: &sched_prefcore_work);
795}
796
797static void amd_pstate_update_limits(unsigned int cpu)
798{
799 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
800 struct amd_cpudata *cpudata = policy->driver_data;
801 u32 prev_high = 0, cur_high = 0;
802 int ret;
803 bool highest_perf_changed = false;
804
805 mutex_lock(&amd_pstate_driver_lock);
806 if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
807 goto free_cpufreq_put;
808
809 ret = amd_pstate_get_highest_perf(cpu, highest_perf: &cur_high);
810 if (ret)
811 goto free_cpufreq_put;
812
813 prev_high = READ_ONCE(cpudata->prefcore_ranking);
814 if (prev_high != cur_high) {
815 highest_perf_changed = true;
816 WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
817
818 if (cur_high < CPPC_MAX_PERF)
819 sched_set_itmt_core_prio(prio: (int)cur_high, core_cpu: cpu);
820 }
821
822free_cpufreq_put:
823 cpufreq_cpu_put(policy);
824
825 if (!highest_perf_changed)
826 cpufreq_update_policy(cpu);
827
828 mutex_unlock(lock: &amd_pstate_driver_lock);
829}
830
831static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
832{
833 int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
834 struct device *dev;
835 struct amd_cpudata *cpudata;
836
837 /*
838 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
839 * which is ideal for initialization process.
840 */
841 amd_perf_ctl_reset(cpu: policy->cpu);
842 dev = get_cpu_device(cpu: policy->cpu);
843 if (!dev)
844 return -ENODEV;
845
846 cpudata = kzalloc(size: sizeof(*cpudata), GFP_KERNEL);
847 if (!cpudata)
848 return -ENOMEM;
849
850 cpudata->cpu = policy->cpu;
851
852 amd_pstate_init_prefcore(cpudata);
853
854 ret = amd_pstate_init_perf(cpudata);
855 if (ret)
856 goto free_cpudata1;
857
858 min_freq = amd_get_min_freq(cpudata);
859 max_freq = amd_get_max_freq(cpudata);
860 nominal_freq = amd_get_nominal_freq(cpudata);
861 lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
862
863 if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
864 dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
865 min_freq, max_freq);
866 ret = -EINVAL;
867 goto free_cpudata1;
868 }
869
870 policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY;
871 policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY;
872
873 policy->min = min_freq;
874 policy->max = max_freq;
875
876 policy->cpuinfo.min_freq = min_freq;
877 policy->cpuinfo.max_freq = max_freq;
878
879 /* It will be updated by governor */
880 policy->cur = policy->cpuinfo.min_freq;
881
882 if (boot_cpu_has(X86_FEATURE_CPPC))
883 policy->fast_switch_possible = true;
884
885 ret = freq_qos_add_request(qos: &policy->constraints, req: &cpudata->req[0],
886 type: FREQ_QOS_MIN, value: policy->cpuinfo.min_freq);
887 if (ret < 0) {
888 dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
889 goto free_cpudata1;
890 }
891
892 ret = freq_qos_add_request(qos: &policy->constraints, req: &cpudata->req[1],
893 type: FREQ_QOS_MAX, value: policy->cpuinfo.max_freq);
894 if (ret < 0) {
895 dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
896 goto free_cpudata2;
897 }
898
899 /* Initial processor data capability frequencies */
900 cpudata->max_freq = max_freq;
901 cpudata->min_freq = min_freq;
902 cpudata->max_limit_freq = max_freq;
903 cpudata->min_limit_freq = min_freq;
904 cpudata->nominal_freq = nominal_freq;
905 cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
906
907 policy->driver_data = cpudata;
908
909 amd_pstate_boost_init(cpudata);
910 if (!current_pstate_driver->adjust_perf)
911 current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
912
913 return 0;
914
915free_cpudata2:
916 freq_qos_remove_request(req: &cpudata->req[0]);
917free_cpudata1:
918 kfree(objp: cpudata);
919 return ret;
920}
921
922static int amd_pstate_cpu_exit(struct cpufreq_policy *policy)
923{
924 struct amd_cpudata *cpudata = policy->driver_data;
925
926 freq_qos_remove_request(req: &cpudata->req[1]);
927 freq_qos_remove_request(req: &cpudata->req[0]);
928 policy->fast_switch_possible = false;
929 kfree(objp: cpudata);
930
931 return 0;
932}
933
934static int amd_pstate_cpu_resume(struct cpufreq_policy *policy)
935{
936 int ret;
937
938 ret = amd_pstate_enable(enable: true);
939 if (ret)
940 pr_err("failed to enable amd-pstate during resume, return %d\n", ret);
941
942 return ret;
943}
944
945static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy)
946{
947 int ret;
948
949 ret = amd_pstate_enable(enable: false);
950 if (ret)
951 pr_err("failed to disable amd-pstate during suspend, return %d\n", ret);
952
953 return ret;
954}
955
956/* Sysfs attributes */
957
958/*
959 * This frequency is to indicate the maximum hardware frequency.
960 * If boost is not active but supported, the frequency will be larger than the
961 * one in cpuinfo.
962 */
963static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
964 char *buf)
965{
966 int max_freq;
967 struct amd_cpudata *cpudata = policy->driver_data;
968
969 max_freq = amd_get_max_freq(cpudata);
970 if (max_freq < 0)
971 return max_freq;
972
973 return sysfs_emit(buf, fmt: "%u\n", max_freq);
974}
975
976static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy,
977 char *buf)
978{
979 int freq;
980 struct amd_cpudata *cpudata = policy->driver_data;
981
982 freq = amd_get_lowest_nonlinear_freq(cpudata);
983 if (freq < 0)
984 return freq;
985
986 return sysfs_emit(buf, fmt: "%u\n", freq);
987}
988
989/*
990 * In some of ASICs, the highest_perf is not the one in the _CPC table, so we
991 * need to expose it to sysfs.
992 */
993static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
994 char *buf)
995{
996 u32 perf;
997 struct amd_cpudata *cpudata = policy->driver_data;
998
999 perf = READ_ONCE(cpudata->highest_perf);
1000
1001 return sysfs_emit(buf, fmt: "%u\n", perf);
1002}
1003
1004static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy,
1005 char *buf)
1006{
1007 u32 perf;
1008 struct amd_cpudata *cpudata = policy->driver_data;
1009
1010 perf = READ_ONCE(cpudata->prefcore_ranking);
1011
1012 return sysfs_emit(buf, fmt: "%u\n", perf);
1013}
1014
1015static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy,
1016 char *buf)
1017{
1018 bool hw_prefcore;
1019 struct amd_cpudata *cpudata = policy->driver_data;
1020
1021 hw_prefcore = READ_ONCE(cpudata->hw_prefcore);
1022
1023 return sysfs_emit(buf, fmt: "%s\n", str_enabled_disabled(v: hw_prefcore));
1024}
1025
1026static ssize_t show_energy_performance_available_preferences(
1027 struct cpufreq_policy *policy, char *buf)
1028{
1029 int i = 0;
1030 int offset = 0;
1031 struct amd_cpudata *cpudata = policy->driver_data;
1032
1033 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
1034 return sysfs_emit_at(buf, at: offset, fmt: "%s\n",
1035 energy_perf_strings[EPP_INDEX_PERFORMANCE]);
1036
1037 while (energy_perf_strings[i] != NULL)
1038 offset += sysfs_emit_at(buf, at: offset, fmt: "%s ", energy_perf_strings[i++]);
1039
1040 offset += sysfs_emit_at(buf, at: offset, fmt: "\n");
1041
1042 return offset;
1043}
1044
1045static ssize_t store_energy_performance_preference(
1046 struct cpufreq_policy *policy, const char *buf, size_t count)
1047{
1048 struct amd_cpudata *cpudata = policy->driver_data;
1049 char str_preference[21];
1050 ssize_t ret;
1051
1052 ret = sscanf(buf, "%20s", str_preference);
1053 if (ret != 1)
1054 return -EINVAL;
1055
1056 ret = match_string(array: energy_perf_strings, n: -1, string: str_preference);
1057 if (ret < 0)
1058 return -EINVAL;
1059
1060 mutex_lock(&amd_pstate_limits_lock);
1061 ret = amd_pstate_set_energy_pref_index(cpudata, pref_index: ret);
1062 mutex_unlock(lock: &amd_pstate_limits_lock);
1063
1064 return ret ?: count;
1065}
1066
1067static ssize_t show_energy_performance_preference(
1068 struct cpufreq_policy *policy, char *buf)
1069{
1070 struct amd_cpudata *cpudata = policy->driver_data;
1071 int preference;
1072
1073 preference = amd_pstate_get_energy_pref_index(cpudata);
1074 if (preference < 0)
1075 return preference;
1076
1077 return sysfs_emit(buf, fmt: "%s\n", energy_perf_strings[preference]);
1078}
1079
1080static void amd_pstate_driver_cleanup(void)
1081{
1082 amd_pstate_enable(enable: false);
1083 cppc_state = AMD_PSTATE_DISABLE;
1084 current_pstate_driver = NULL;
1085}
1086
1087static int amd_pstate_register_driver(int mode)
1088{
1089 int ret;
1090
1091 if (mode == AMD_PSTATE_PASSIVE || mode == AMD_PSTATE_GUIDED)
1092 current_pstate_driver = &amd_pstate_driver;
1093 else if (mode == AMD_PSTATE_ACTIVE)
1094 current_pstate_driver = &amd_pstate_epp_driver;
1095 else
1096 return -EINVAL;
1097
1098 cppc_state = mode;
1099 ret = cpufreq_register_driver(driver_data: current_pstate_driver);
1100 if (ret) {
1101 amd_pstate_driver_cleanup();
1102 return ret;
1103 }
1104 return 0;
1105}
1106
1107static int amd_pstate_unregister_driver(int dummy)
1108{
1109 cpufreq_unregister_driver(driver_data: current_pstate_driver);
1110 amd_pstate_driver_cleanup();
1111 return 0;
1112}
1113
1114static int amd_pstate_change_mode_without_dvr_change(int mode)
1115{
1116 int cpu = 0;
1117
1118 cppc_state = mode;
1119
1120 if (boot_cpu_has(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE)
1121 return 0;
1122
1123 for_each_present_cpu(cpu) {
1124 cppc_set_auto_sel(cpu, enable: (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1);
1125 }
1126
1127 return 0;
1128}
1129
1130static int amd_pstate_change_driver_mode(int mode)
1131{
1132 int ret;
1133
1134 ret = amd_pstate_unregister_driver(dummy: 0);
1135 if (ret)
1136 return ret;
1137
1138 ret = amd_pstate_register_driver(mode);
1139 if (ret)
1140 return ret;
1141
1142 return 0;
1143}
1144
1145static cppc_mode_transition_fn mode_state_machine[AMD_PSTATE_MAX][AMD_PSTATE_MAX] = {
1146 [AMD_PSTATE_DISABLE] = {
1147 [AMD_PSTATE_DISABLE] = NULL,
1148 [AMD_PSTATE_PASSIVE] = amd_pstate_register_driver,
1149 [AMD_PSTATE_ACTIVE] = amd_pstate_register_driver,
1150 [AMD_PSTATE_GUIDED] = amd_pstate_register_driver,
1151 },
1152 [AMD_PSTATE_PASSIVE] = {
1153 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver,
1154 [AMD_PSTATE_PASSIVE] = NULL,
1155 [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode,
1156 [AMD_PSTATE_GUIDED] = amd_pstate_change_mode_without_dvr_change,
1157 },
1158 [AMD_PSTATE_ACTIVE] = {
1159 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver,
1160 [AMD_PSTATE_PASSIVE] = amd_pstate_change_driver_mode,
1161 [AMD_PSTATE_ACTIVE] = NULL,
1162 [AMD_PSTATE_GUIDED] = amd_pstate_change_driver_mode,
1163 },
1164 [AMD_PSTATE_GUIDED] = {
1165 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver,
1166 [AMD_PSTATE_PASSIVE] = amd_pstate_change_mode_without_dvr_change,
1167 [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode,
1168 [AMD_PSTATE_GUIDED] = NULL,
1169 },
1170};
1171
1172static ssize_t amd_pstate_show_status(char *buf)
1173{
1174 if (!current_pstate_driver)
1175 return sysfs_emit(buf, fmt: "disable\n");
1176
1177 return sysfs_emit(buf, fmt: "%s\n", amd_pstate_mode_string[cppc_state]);
1178}
1179
1180static int amd_pstate_update_status(const char *buf, size_t size)
1181{
1182 int mode_idx;
1183
1184 if (size > strlen("passive") || size < strlen("active"))
1185 return -EINVAL;
1186
1187 mode_idx = get_mode_idx_from_str(str: buf, size);
1188
1189 if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX)
1190 return -EINVAL;
1191
1192 if (mode_state_machine[cppc_state][mode_idx])
1193 return mode_state_machine[cppc_state][mode_idx](mode_idx);
1194
1195 return 0;
1196}
1197
1198static ssize_t status_show(struct device *dev,
1199 struct device_attribute *attr, char *buf)
1200{
1201 ssize_t ret;
1202
1203 mutex_lock(&amd_pstate_driver_lock);
1204 ret = amd_pstate_show_status(buf);
1205 mutex_unlock(lock: &amd_pstate_driver_lock);
1206
1207 return ret;
1208}
1209
1210static ssize_t status_store(struct device *a, struct device_attribute *b,
1211 const char *buf, size_t count)
1212{
1213 char *p = memchr(p: buf, c: '\n', size: count);
1214 int ret;
1215
1216 mutex_lock(&amd_pstate_driver_lock);
1217 ret = amd_pstate_update_status(buf, size: p ? p - buf : count);
1218 mutex_unlock(lock: &amd_pstate_driver_lock);
1219
1220 return ret < 0 ? ret : count;
1221}
1222
1223static ssize_t prefcore_show(struct device *dev,
1224 struct device_attribute *attr, char *buf)
1225{
1226 return sysfs_emit(buf, fmt: "%s\n", str_enabled_disabled(v: amd_pstate_prefcore));
1227}
1228
1229cpufreq_freq_attr_ro(amd_pstate_max_freq);
1230cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
1231
1232cpufreq_freq_attr_ro(amd_pstate_highest_perf);
1233cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking);
1234cpufreq_freq_attr_ro(amd_pstate_hw_prefcore);
1235cpufreq_freq_attr_rw(energy_performance_preference);
1236cpufreq_freq_attr_ro(energy_performance_available_preferences);
1237static DEVICE_ATTR_RW(status);
1238static DEVICE_ATTR_RO(prefcore);
1239
1240static struct freq_attr *amd_pstate_attr[] = {
1241 &amd_pstate_max_freq,
1242 &amd_pstate_lowest_nonlinear_freq,
1243 &amd_pstate_highest_perf,
1244 &amd_pstate_prefcore_ranking,
1245 &amd_pstate_hw_prefcore,
1246 NULL,
1247};
1248
1249static struct freq_attr *amd_pstate_epp_attr[] = {
1250 &amd_pstate_max_freq,
1251 &amd_pstate_lowest_nonlinear_freq,
1252 &amd_pstate_highest_perf,
1253 &amd_pstate_prefcore_ranking,
1254 &amd_pstate_hw_prefcore,
1255 &energy_performance_preference,
1256 &energy_performance_available_preferences,
1257 NULL,
1258};
1259
1260static struct attribute *pstate_global_attributes[] = {
1261 &dev_attr_status.attr,
1262 &dev_attr_prefcore.attr,
1263 NULL
1264};
1265
1266static const struct attribute_group amd_pstate_global_attr_group = {
1267 .name = "amd_pstate",
1268 .attrs = pstate_global_attributes,
1269};
1270
1271static bool amd_pstate_acpi_pm_profile_server(void)
1272{
1273 switch (acpi_gbl_FADT.preferred_profile) {
1274 case PM_ENTERPRISE_SERVER:
1275 case PM_SOHO_SERVER:
1276 case PM_PERFORMANCE_SERVER:
1277 return true;
1278 }
1279 return false;
1280}
1281
1282static bool amd_pstate_acpi_pm_profile_undefined(void)
1283{
1284 if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED)
1285 return true;
1286 if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES)
1287 return true;
1288 return false;
1289}
1290
1291static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
1292{
1293 int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
1294 struct amd_cpudata *cpudata;
1295 struct device *dev;
1296 u64 value;
1297
1298 /*
1299 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
1300 * which is ideal for initialization process.
1301 */
1302 amd_perf_ctl_reset(cpu: policy->cpu);
1303 dev = get_cpu_device(cpu: policy->cpu);
1304 if (!dev)
1305 return -ENODEV;
1306
1307 cpudata = kzalloc(size: sizeof(*cpudata), GFP_KERNEL);
1308 if (!cpudata)
1309 return -ENOMEM;
1310
1311 cpudata->cpu = policy->cpu;
1312 cpudata->epp_policy = 0;
1313
1314 amd_pstate_init_prefcore(cpudata);
1315
1316 ret = amd_pstate_init_perf(cpudata);
1317 if (ret)
1318 goto free_cpudata1;
1319
1320 min_freq = amd_get_min_freq(cpudata);
1321 max_freq = amd_get_max_freq(cpudata);
1322 nominal_freq = amd_get_nominal_freq(cpudata);
1323 lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
1324 if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
1325 dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
1326 min_freq, max_freq);
1327 ret = -EINVAL;
1328 goto free_cpudata1;
1329 }
1330
1331 policy->cpuinfo.min_freq = min_freq;
1332 policy->cpuinfo.max_freq = max_freq;
1333 /* It will be updated by governor */
1334 policy->cur = policy->cpuinfo.min_freq;
1335
1336 /* Initial processor data capability frequencies */
1337 cpudata->max_freq = max_freq;
1338 cpudata->min_freq = min_freq;
1339 cpudata->nominal_freq = nominal_freq;
1340 cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
1341
1342 policy->driver_data = cpudata;
1343
1344 cpudata->epp_cached = amd_pstate_get_epp(cpudata, cppc_req_cached: 0);
1345
1346 policy->min = policy->cpuinfo.min_freq;
1347 policy->max = policy->cpuinfo.max_freq;
1348
1349 /*
1350 * Set the policy to provide a valid fallback value in case
1351 * the default cpufreq governor is neither powersave nor performance.
1352 */
1353 if (amd_pstate_acpi_pm_profile_server() ||
1354 amd_pstate_acpi_pm_profile_undefined())
1355 policy->policy = CPUFREQ_POLICY_PERFORMANCE;
1356 else
1357 policy->policy = CPUFREQ_POLICY_POWERSAVE;
1358
1359 if (boot_cpu_has(X86_FEATURE_CPPC)) {
1360 ret = rdmsrl_on_cpu(cpu: cpudata->cpu, MSR_AMD_CPPC_REQ, q: &value);
1361 if (ret)
1362 return ret;
1363 WRITE_ONCE(cpudata->cppc_req_cached, value);
1364
1365 ret = rdmsrl_on_cpu(cpu: cpudata->cpu, MSR_AMD_CPPC_CAP1, q: &value);
1366 if (ret)
1367 return ret;
1368 WRITE_ONCE(cpudata->cppc_cap1_cached, value);
1369 }
1370 amd_pstate_boost_init(cpudata);
1371
1372 return 0;
1373
1374free_cpudata1:
1375 kfree(objp: cpudata);
1376 return ret;
1377}
1378
1379static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
1380{
1381 pr_debug("CPU %d exiting\n", policy->cpu);
1382 return 0;
1383}
1384
1385static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
1386{
1387 struct amd_cpudata *cpudata = policy->driver_data;
1388 u32 max_perf, min_perf, min_limit_perf, max_limit_perf;
1389 u64 value;
1390 s16 epp;
1391
1392 max_perf = READ_ONCE(cpudata->highest_perf);
1393 min_perf = READ_ONCE(cpudata->lowest_perf);
1394 max_limit_perf = div_u64(dividend: policy->max * cpudata->highest_perf, divisor: cpudata->max_freq);
1395 min_limit_perf = div_u64(dividend: policy->min * cpudata->highest_perf, divisor: cpudata->max_freq);
1396
1397 if (min_limit_perf < min_perf)
1398 min_limit_perf = min_perf;
1399
1400 if (max_limit_perf < min_limit_perf)
1401 max_limit_perf = min_limit_perf;
1402
1403 WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
1404 WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
1405
1406 max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
1407 cpudata->max_limit_perf);
1408 min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
1409 cpudata->max_limit_perf);
1410 value = READ_ONCE(cpudata->cppc_req_cached);
1411
1412 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
1413 min_perf = max_perf;
1414
1415 /* Initial min/max values for CPPC Performance Controls Register */
1416 value &= ~AMD_CPPC_MIN_PERF(~0L);
1417 value |= AMD_CPPC_MIN_PERF(min_perf);
1418
1419 value &= ~AMD_CPPC_MAX_PERF(~0L);
1420 value |= AMD_CPPC_MAX_PERF(max_perf);
1421
1422 /* CPPC EPP feature require to set zero to the desire perf bit */
1423 value &= ~AMD_CPPC_DES_PERF(~0L);
1424 value |= AMD_CPPC_DES_PERF(0);
1425
1426 cpudata->epp_policy = cpudata->policy;
1427
1428 /* Get BIOS pre-defined epp value */
1429 epp = amd_pstate_get_epp(cpudata, cppc_req_cached: value);
1430 if (epp < 0) {
1431 /**
1432 * This return value can only be negative for shared_memory
1433 * systems where EPP register read/write not supported.
1434 */
1435 return;
1436 }
1437
1438 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
1439 epp = 0;
1440
1441 /* Set initial EPP value */
1442 if (boot_cpu_has(X86_FEATURE_CPPC)) {
1443 value &= ~GENMASK_ULL(31, 24);
1444 value |= (u64)epp << 24;
1445 }
1446
1447 WRITE_ONCE(cpudata->cppc_req_cached, value);
1448 amd_pstate_set_epp(cpudata, epp);
1449}
1450
1451static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
1452{
1453 struct amd_cpudata *cpudata = policy->driver_data;
1454
1455 if (!policy->cpuinfo.max_freq)
1456 return -ENODEV;
1457
1458 pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
1459 policy->cpuinfo.max_freq, policy->max);
1460
1461 cpudata->policy = policy->policy;
1462
1463 amd_pstate_epp_update_limit(policy);
1464
1465 return 0;
1466}
1467
1468static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata)
1469{
1470 struct cppc_perf_ctrls perf_ctrls;
1471 u64 value, max_perf;
1472 int ret;
1473
1474 ret = amd_pstate_enable(enable: true);
1475 if (ret)
1476 pr_err("failed to enable amd pstate during resume, return %d\n", ret);
1477
1478 value = READ_ONCE(cpudata->cppc_req_cached);
1479 max_perf = READ_ONCE(cpudata->highest_perf);
1480
1481 if (boot_cpu_has(X86_FEATURE_CPPC)) {
1482 wrmsrl_on_cpu(cpu: cpudata->cpu, MSR_AMD_CPPC_REQ, q: value);
1483 } else {
1484 perf_ctrls.max_perf = max_perf;
1485 perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached);
1486 cppc_set_perf(cpu: cpudata->cpu, perf_ctrls: &perf_ctrls);
1487 }
1488}
1489
1490static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy)
1491{
1492 struct amd_cpudata *cpudata = policy->driver_data;
1493
1494 pr_debug("AMD CPU Core %d going online\n", cpudata->cpu);
1495
1496 if (cppc_state == AMD_PSTATE_ACTIVE) {
1497 amd_pstate_epp_reenable(cpudata);
1498 cpudata->suspended = false;
1499 }
1500
1501 return 0;
1502}
1503
1504static void amd_pstate_epp_offline(struct cpufreq_policy *policy)
1505{
1506 struct amd_cpudata *cpudata = policy->driver_data;
1507 struct cppc_perf_ctrls perf_ctrls;
1508 int min_perf;
1509 u64 value;
1510
1511 min_perf = READ_ONCE(cpudata->lowest_perf);
1512 value = READ_ONCE(cpudata->cppc_req_cached);
1513
1514 mutex_lock(&amd_pstate_limits_lock);
1515 if (boot_cpu_has(X86_FEATURE_CPPC)) {
1516 cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN;
1517
1518 /* Set max perf same as min perf */
1519 value &= ~AMD_CPPC_MAX_PERF(~0L);
1520 value |= AMD_CPPC_MAX_PERF(min_perf);
1521 value &= ~AMD_CPPC_MIN_PERF(~0L);
1522 value |= AMD_CPPC_MIN_PERF(min_perf);
1523 wrmsrl_on_cpu(cpu: cpudata->cpu, MSR_AMD_CPPC_REQ, q: value);
1524 } else {
1525 perf_ctrls.desired_perf = 0;
1526 perf_ctrls.max_perf = min_perf;
1527 perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE);
1528 cppc_set_perf(cpu: cpudata->cpu, perf_ctrls: &perf_ctrls);
1529 }
1530 mutex_unlock(lock: &amd_pstate_limits_lock);
1531}
1532
1533static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
1534{
1535 struct amd_cpudata *cpudata = policy->driver_data;
1536
1537 pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu);
1538
1539 if (cpudata->suspended)
1540 return 0;
1541
1542 if (cppc_state == AMD_PSTATE_ACTIVE)
1543 amd_pstate_epp_offline(policy);
1544
1545 return 0;
1546}
1547
1548static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
1549{
1550 cpufreq_verify_within_cpu_limits(policy);
1551 pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
1552 return 0;
1553}
1554
1555static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
1556{
1557 struct amd_cpudata *cpudata = policy->driver_data;
1558 int ret;
1559
1560 /* avoid suspending when EPP is not enabled */
1561 if (cppc_state != AMD_PSTATE_ACTIVE)
1562 return 0;
1563
1564 /* set this flag to avoid setting core offline*/
1565 cpudata->suspended = true;
1566
1567 /* disable CPPC in lowlevel firmware */
1568 ret = amd_pstate_enable(enable: false);
1569 if (ret)
1570 pr_err("failed to suspend, return %d\n", ret);
1571
1572 return 0;
1573}
1574
1575static int amd_pstate_epp_resume(struct cpufreq_policy *policy)
1576{
1577 struct amd_cpudata *cpudata = policy->driver_data;
1578
1579 if (cpudata->suspended) {
1580 mutex_lock(&amd_pstate_limits_lock);
1581
1582 /* enable amd pstate from suspend state*/
1583 amd_pstate_epp_reenable(cpudata);
1584
1585 mutex_unlock(lock: &amd_pstate_limits_lock);
1586
1587 cpudata->suspended = false;
1588 }
1589
1590 return 0;
1591}
1592
1593static struct cpufreq_driver amd_pstate_driver = {
1594 .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
1595 .verify = amd_pstate_verify,
1596 .target = amd_pstate_target,
1597 .fast_switch = amd_pstate_fast_switch,
1598 .init = amd_pstate_cpu_init,
1599 .exit = amd_pstate_cpu_exit,
1600 .suspend = amd_pstate_cpu_suspend,
1601 .resume = amd_pstate_cpu_resume,
1602 .set_boost = amd_pstate_set_boost,
1603 .update_limits = amd_pstate_update_limits,
1604 .name = "amd-pstate",
1605 .attr = amd_pstate_attr,
1606};
1607
1608static struct cpufreq_driver amd_pstate_epp_driver = {
1609 .flags = CPUFREQ_CONST_LOOPS,
1610 .verify = amd_pstate_epp_verify_policy,
1611 .setpolicy = amd_pstate_epp_set_policy,
1612 .init = amd_pstate_epp_cpu_init,
1613 .exit = amd_pstate_epp_cpu_exit,
1614 .offline = amd_pstate_epp_cpu_offline,
1615 .online = amd_pstate_epp_cpu_online,
1616 .suspend = amd_pstate_epp_suspend,
1617 .resume = amd_pstate_epp_resume,
1618 .update_limits = amd_pstate_update_limits,
1619 .name = "amd-pstate-epp",
1620 .attr = amd_pstate_epp_attr,
1621};
1622
1623static int __init amd_pstate_set_driver(int mode_idx)
1624{
1625 if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
1626 cppc_state = mode_idx;
1627 if (cppc_state == AMD_PSTATE_DISABLE)
1628 pr_info("driver is explicitly disabled\n");
1629
1630 if (cppc_state == AMD_PSTATE_ACTIVE)
1631 current_pstate_driver = &amd_pstate_epp_driver;
1632
1633 if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
1634 current_pstate_driver = &amd_pstate_driver;
1635
1636 return 0;
1637 }
1638
1639 return -EINVAL;
1640}
1641
1642static int __init amd_pstate_init(void)
1643{
1644 struct device *dev_root;
1645 int ret;
1646
1647 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
1648 return -ENODEV;
1649
1650 if (!acpi_cpc_valid()) {
1651 pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n");
1652 return -ENODEV;
1653 }
1654
1655 /* don't keep reloading if cpufreq_driver exists */
1656 if (cpufreq_get_current_driver())
1657 return -EEXIST;
1658
1659 switch (cppc_state) {
1660 case AMD_PSTATE_UNDEFINED:
1661 /* Disable on the following configs by default:
1662 * 1. Undefined platforms
1663 * 2. Server platforms
1664 * 3. Shared memory designs
1665 */
1666 if (amd_pstate_acpi_pm_profile_undefined() ||
1667 amd_pstate_acpi_pm_profile_server() ||
1668 !boot_cpu_has(X86_FEATURE_CPPC)) {
1669 pr_info("driver load is disabled, boot with specific mode to enable this\n");
1670 return -ENODEV;
1671 }
1672 ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE);
1673 if (ret)
1674 return ret;
1675 break;
1676 case AMD_PSTATE_DISABLE:
1677 return -ENODEV;
1678 case AMD_PSTATE_PASSIVE:
1679 case AMD_PSTATE_ACTIVE:
1680 case AMD_PSTATE_GUIDED:
1681 break;
1682 default:
1683 return -EINVAL;
1684 }
1685
1686 /* capability check */
1687 if (boot_cpu_has(X86_FEATURE_CPPC)) {
1688 pr_debug("AMD CPPC MSR based functionality is supported\n");
1689 if (cppc_state != AMD_PSTATE_ACTIVE)
1690 current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
1691 } else {
1692 pr_debug("AMD CPPC shared memory based functionality is supported\n");
1693 static_call_update(amd_pstate_enable, cppc_enable);
1694 static_call_update(amd_pstate_init_perf, cppc_init_perf);
1695 static_call_update(amd_pstate_update_perf, cppc_update_perf);
1696 }
1697
1698 /* enable amd pstate feature */
1699 ret = amd_pstate_enable(enable: true);
1700 if (ret) {
1701 pr_err("failed to enable with return %d\n", ret);
1702 return ret;
1703 }
1704
1705 ret = cpufreq_register_driver(driver_data: current_pstate_driver);
1706 if (ret)
1707 pr_err("failed to register with return %d\n", ret);
1708
1709 dev_root = bus_get_dev_root(bus: &cpu_subsys);
1710 if (dev_root) {
1711 ret = sysfs_create_group(kobj: &dev_root->kobj, grp: &amd_pstate_global_attr_group);
1712 put_device(dev: dev_root);
1713 if (ret) {
1714 pr_err("sysfs attribute export failed with error %d.\n", ret);
1715 goto global_attr_free;
1716 }
1717 }
1718
1719 return ret;
1720
1721global_attr_free:
1722 cpufreq_unregister_driver(driver_data: current_pstate_driver);
1723 return ret;
1724}
1725device_initcall(amd_pstate_init);
1726
1727static int __init amd_pstate_param(char *str)
1728{
1729 size_t size;
1730 int mode_idx;
1731
1732 if (!str)
1733 return -EINVAL;
1734
1735 size = strlen(str);
1736 mode_idx = get_mode_idx_from_str(str, size);
1737
1738 return amd_pstate_set_driver(mode_idx);
1739}
1740
1741static int __init amd_prefcore_param(char *str)
1742{
1743 if (!strcmp(str, "disable"))
1744 amd_pstate_prefcore = false;
1745
1746 return 0;
1747}
1748
1749early_param("amd_pstate", amd_pstate_param);
1750early_param("amd_prefcore", amd_prefcore_param);
1751
1752MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
1753MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");
1754

source code of linux/drivers/cpufreq/amd-pstate.c