1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * itmt.c: Support Intel Turbo Boost Max Technology 3.0 |
4 | * |
5 | * (C) Copyright 2016 Intel Corporation |
6 | * Author: Tim Chen <tim.c.chen@linux.intel.com> |
7 | * |
8 | * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT), |
9 | * the maximum turbo frequencies of some cores in a CPU package may be |
10 | * higher than for the other cores in the same package. In that case, |
11 | * better performance can be achieved by making the scheduler prefer |
12 | * to run tasks on the CPUs with higher max turbo frequencies. |
13 | * |
14 | * This file provides functions and data structures for enabling the |
15 | * scheduler to favor scheduling on cores can be boosted to a higher |
16 | * frequency under ITMT. |
17 | */ |
18 | |
19 | #include <linux/sched.h> |
20 | #include <linux/cpumask.h> |
21 | #include <linux/cpuset.h> |
22 | #include <linux/mutex.h> |
23 | #include <linux/sysctl.h> |
24 | #include <linux/nodemask.h> |
25 | |
26 | static DEFINE_MUTEX(itmt_update_mutex); |
27 | DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority); |
28 | |
29 | /* Boolean to track if system has ITMT capabilities */ |
30 | static bool __read_mostly sched_itmt_capable; |
31 | |
32 | /* |
33 | * Boolean to control whether we want to move processes to cpu capable |
34 | * of higher turbo frequency for cpus supporting Intel Turbo Boost Max |
35 | * Technology 3.0. |
36 | * |
37 | * It can be set via /proc/sys/kernel/sched_itmt_enabled |
38 | */ |
39 | unsigned int __read_mostly sysctl_sched_itmt_enabled; |
40 | |
41 | static int sched_itmt_update_handler(struct ctl_table *table, int write, |
42 | void *buffer, size_t *lenp, loff_t *ppos) |
43 | { |
44 | unsigned int old_sysctl; |
45 | int ret; |
46 | |
47 | mutex_lock(&itmt_update_mutex); |
48 | |
49 | if (!sched_itmt_capable) { |
50 | mutex_unlock(lock: &itmt_update_mutex); |
51 | return -EINVAL; |
52 | } |
53 | |
54 | old_sysctl = sysctl_sched_itmt_enabled; |
55 | ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
56 | |
57 | if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) { |
58 | x86_topology_update = true; |
59 | rebuild_sched_domains(); |
60 | } |
61 | |
62 | mutex_unlock(lock: &itmt_update_mutex); |
63 | |
64 | return ret; |
65 | } |
66 | |
67 | static struct ctl_table itmt_kern_table[] = { |
68 | { |
69 | .procname = "sched_itmt_enabled" , |
70 | .data = &sysctl_sched_itmt_enabled, |
71 | .maxlen = sizeof(unsigned int), |
72 | .mode = 0644, |
73 | .proc_handler = sched_itmt_update_handler, |
74 | .extra1 = SYSCTL_ZERO, |
75 | .extra2 = SYSCTL_ONE, |
76 | }, |
77 | }; |
78 | |
79 | static struct ctl_table_header *; |
80 | |
81 | /** |
82 | * sched_set_itmt_support() - Indicate platform supports ITMT |
83 | * |
84 | * This function is used by the OS to indicate to scheduler that the platform |
85 | * is capable of supporting the ITMT feature. |
86 | * |
87 | * The current scheme has the pstate driver detects if the system |
88 | * is ITMT capable and call sched_set_itmt_support. |
89 | * |
90 | * This must be done only after sched_set_itmt_core_prio |
91 | * has been called to set the cpus' priorities. |
92 | * It must not be called with cpu hot plug lock |
93 | * held as we need to acquire the lock to rebuild sched domains |
94 | * later. |
95 | * |
96 | * Return: 0 on success |
97 | */ |
98 | int sched_set_itmt_support(void) |
99 | { |
100 | mutex_lock(&itmt_update_mutex); |
101 | |
102 | if (sched_itmt_capable) { |
103 | mutex_unlock(lock: &itmt_update_mutex); |
104 | return 0; |
105 | } |
106 | |
107 | itmt_sysctl_header = register_sysctl("kernel" , itmt_kern_table); |
108 | if (!itmt_sysctl_header) { |
109 | mutex_unlock(lock: &itmt_update_mutex); |
110 | return -ENOMEM; |
111 | } |
112 | |
113 | sched_itmt_capable = true; |
114 | |
115 | sysctl_sched_itmt_enabled = 1; |
116 | |
117 | x86_topology_update = true; |
118 | rebuild_sched_domains(); |
119 | |
120 | mutex_unlock(lock: &itmt_update_mutex); |
121 | |
122 | return 0; |
123 | } |
124 | |
125 | /** |
126 | * sched_clear_itmt_support() - Revoke platform's support of ITMT |
127 | * |
128 | * This function is used by the OS to indicate that it has |
129 | * revoked the platform's support of ITMT feature. |
130 | * |
131 | * It must not be called with cpu hot plug lock |
132 | * held as we need to acquire the lock to rebuild sched domains |
133 | * later. |
134 | */ |
135 | void sched_clear_itmt_support(void) |
136 | { |
137 | mutex_lock(&itmt_update_mutex); |
138 | |
139 | if (!sched_itmt_capable) { |
140 | mutex_unlock(lock: &itmt_update_mutex); |
141 | return; |
142 | } |
143 | sched_itmt_capable = false; |
144 | |
145 | if (itmt_sysctl_header) { |
146 | unregister_sysctl_table(table: itmt_sysctl_header); |
147 | itmt_sysctl_header = NULL; |
148 | } |
149 | |
150 | if (sysctl_sched_itmt_enabled) { |
151 | /* disable sched_itmt if we are no longer ITMT capable */ |
152 | sysctl_sched_itmt_enabled = 0; |
153 | x86_topology_update = true; |
154 | rebuild_sched_domains(); |
155 | } |
156 | |
157 | mutex_unlock(lock: &itmt_update_mutex); |
158 | } |
159 | |
160 | int arch_asym_cpu_priority(int cpu) |
161 | { |
162 | return per_cpu(sched_core_priority, cpu); |
163 | } |
164 | |
165 | /** |
166 | * sched_set_itmt_core_prio() - Set CPU priority based on ITMT |
167 | * @prio: Priority of @cpu |
168 | * @cpu: The CPU number |
169 | * |
170 | * The pstate driver will find out the max boost frequency |
171 | * and call this function to set a priority proportional |
172 | * to the max boost frequency. CPUs with higher boost |
173 | * frequency will receive higher priority. |
174 | * |
175 | * No need to rebuild sched domain after updating |
176 | * the CPU priorities. The sched domains have no |
177 | * dependency on CPU priorities. |
178 | */ |
179 | void sched_set_itmt_core_prio(int prio, int cpu) |
180 | { |
181 | per_cpu(sched_core_priority, cpu) = prio; |
182 | } |
183 | |