1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | /* |
4 | * Auto-group scheduling implementation: |
5 | */ |
6 | |
7 | unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; |
8 | static struct autogroup autogroup_default; |
9 | static atomic_t autogroup_seq_nr; |
10 | |
11 | #ifdef CONFIG_SYSCTL |
12 | static struct ctl_table sched_autogroup_sysctls[] = { |
13 | { |
14 | .procname = "sched_autogroup_enabled" , |
15 | .data = &sysctl_sched_autogroup_enabled, |
16 | .maxlen = sizeof(unsigned int), |
17 | .mode = 0644, |
18 | .proc_handler = proc_dointvec_minmax, |
19 | .extra1 = SYSCTL_ZERO, |
20 | .extra2 = SYSCTL_ONE, |
21 | }, |
22 | {} |
23 | }; |
24 | |
25 | static void __init sched_autogroup_sysctl_init(void) |
26 | { |
27 | register_sysctl_init("kernel" , sched_autogroup_sysctls); |
28 | } |
29 | #else |
30 | #define sched_autogroup_sysctl_init() do { } while (0) |
31 | #endif |
32 | |
33 | void __init autogroup_init(struct task_struct *init_task) |
34 | { |
35 | autogroup_default.tg = &root_task_group; |
36 | kref_init(&autogroup_default.kref); |
37 | init_rwsem(&autogroup_default.lock); |
38 | init_task->signal->autogroup = &autogroup_default; |
39 | sched_autogroup_sysctl_init(); |
40 | } |
41 | |
42 | void autogroup_free(struct task_group *tg) |
43 | { |
44 | kfree(tg->autogroup); |
45 | } |
46 | |
47 | static inline void autogroup_destroy(struct kref *kref) |
48 | { |
49 | struct autogroup *ag = container_of(kref, struct autogroup, kref); |
50 | |
51 | #ifdef CONFIG_RT_GROUP_SCHED |
52 | /* We've redirected RT tasks to the root task group... */ |
53 | ag->tg->rt_se = NULL; |
54 | ag->tg->rt_rq = NULL; |
55 | #endif |
56 | sched_release_group(ag->tg); |
57 | sched_destroy_group(ag->tg); |
58 | } |
59 | |
60 | static inline void autogroup_kref_put(struct autogroup *ag) |
61 | { |
62 | kref_put(&ag->kref, autogroup_destroy); |
63 | } |
64 | |
65 | static inline struct autogroup *autogroup_kref_get(struct autogroup *ag) |
66 | { |
67 | kref_get(&ag->kref); |
68 | return ag; |
69 | } |
70 | |
71 | static inline struct autogroup *autogroup_task_get(struct task_struct *p) |
72 | { |
73 | struct autogroup *ag; |
74 | unsigned long flags; |
75 | |
76 | if (!lock_task_sighand(p, &flags)) |
77 | return autogroup_kref_get(ag: &autogroup_default); |
78 | |
79 | ag = autogroup_kref_get(ag: p->signal->autogroup); |
80 | unlock_task_sighand(p, &flags); |
81 | |
82 | return ag; |
83 | } |
84 | |
85 | static inline struct autogroup *autogroup_create(void) |
86 | { |
87 | struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); |
88 | struct task_group *tg; |
89 | |
90 | if (!ag) |
91 | goto out_fail; |
92 | |
93 | tg = sched_create_group(&root_task_group); |
94 | if (IS_ERR(tg)) |
95 | goto out_free; |
96 | |
97 | kref_init(&ag->kref); |
98 | init_rwsem(&ag->lock); |
99 | ag->id = atomic_inc_return(&autogroup_seq_nr); |
100 | ag->tg = tg; |
101 | #ifdef CONFIG_RT_GROUP_SCHED |
102 | /* |
103 | * Autogroup RT tasks are redirected to the root task group |
104 | * so we don't have to move tasks around upon policy change, |
105 | * or flail around trying to allocate bandwidth on the fly. |
106 | * A bandwidth exception in __sched_setscheduler() allows |
107 | * the policy change to proceed. |
108 | */ |
109 | free_rt_sched_group(tg); |
110 | tg->rt_se = root_task_group.rt_se; |
111 | tg->rt_rq = root_task_group.rt_rq; |
112 | #endif |
113 | tg->autogroup = ag; |
114 | |
115 | sched_online_group(tg, &root_task_group); |
116 | return ag; |
117 | |
118 | out_free: |
119 | kfree(ag); |
120 | out_fail: |
121 | if (printk_ratelimit()) { |
122 | printk(KERN_WARNING "autogroup_create: %s failure.\n" , |
123 | ag ? "sched_create_group()" : "kzalloc()" ); |
124 | } |
125 | |
126 | return autogroup_kref_get(ag: &autogroup_default); |
127 | } |
128 | |
129 | bool task_wants_autogroup(struct task_struct *p, struct task_group *tg) |
130 | { |
131 | if (tg != &root_task_group) |
132 | return false; |
133 | /* |
134 | * If we race with autogroup_move_group() the caller can use the old |
135 | * value of signal->autogroup but in this case sched_move_task() will |
136 | * be called again before autogroup_kref_put(). |
137 | * |
138 | * However, there is no way sched_autogroup_exit_task() could tell us |
139 | * to avoid autogroup->tg, so we abuse PF_EXITING flag for this case. |
140 | */ |
141 | if (p->flags & PF_EXITING) |
142 | return false; |
143 | |
144 | return true; |
145 | } |
146 | |
147 | void sched_autogroup_exit_task(struct task_struct *p) |
148 | { |
149 | /* |
150 | * We are going to call exit_notify() and autogroup_move_group() can't |
151 | * see this thread after that: we can no longer use signal->autogroup. |
152 | * See the PF_EXITING check in task_wants_autogroup(). |
153 | */ |
154 | sched_move_task(p); |
155 | } |
156 | |
157 | static void |
158 | autogroup_move_group(struct task_struct *p, struct autogroup *ag) |
159 | { |
160 | struct autogroup *prev; |
161 | struct task_struct *t; |
162 | unsigned long flags; |
163 | |
164 | if (WARN_ON_ONCE(!lock_task_sighand(p, &flags))) |
165 | return; |
166 | |
167 | prev = p->signal->autogroup; |
168 | if (prev == ag) { |
169 | unlock_task_sighand(p, &flags); |
170 | return; |
171 | } |
172 | |
173 | p->signal->autogroup = autogroup_kref_get(ag); |
174 | /* |
175 | * We can't avoid sched_move_task() after we changed signal->autogroup, |
176 | * this process can already run with task_group() == prev->tg or we can |
177 | * race with cgroup code which can read autogroup = prev under rq->lock. |
178 | * In the latter case for_each_thread() can not miss a migrating thread, |
179 | * cpu_cgroup_attach() must not be possible after cgroup_exit() and it |
180 | * can't be removed from thread list, we hold ->siglock. |
181 | * |
182 | * If an exiting thread was already removed from thread list we rely on |
183 | * sched_autogroup_exit_task(). |
184 | */ |
185 | for_each_thread(p, t) |
186 | sched_move_task(t); |
187 | |
188 | unlock_task_sighand(p, &flags); |
189 | autogroup_kref_put(ag: prev); |
190 | } |
191 | |
192 | /* Allocates GFP_KERNEL, cannot be called under any spinlock: */ |
193 | void sched_autogroup_create_attach(struct task_struct *p) |
194 | { |
195 | struct autogroup *ag = autogroup_create(); |
196 | |
197 | autogroup_move_group(p, ag); |
198 | |
199 | /* Drop extra reference added by autogroup_create(): */ |
200 | autogroup_kref_put(ag); |
201 | } |
202 | EXPORT_SYMBOL(sched_autogroup_create_attach); |
203 | |
204 | /* Cannot be called under siglock. Currently has no users: */ |
205 | void sched_autogroup_detach(struct task_struct *p) |
206 | { |
207 | autogroup_move_group(p, ag: &autogroup_default); |
208 | } |
209 | EXPORT_SYMBOL(sched_autogroup_detach); |
210 | |
211 | void sched_autogroup_fork(struct signal_struct *sig) |
212 | { |
213 | sig->autogroup = autogroup_task_get(p: current); |
214 | } |
215 | |
216 | void sched_autogroup_exit(struct signal_struct *sig) |
217 | { |
218 | autogroup_kref_put(ag: sig->autogroup); |
219 | } |
220 | |
221 | static int __init setup_autogroup(char *str) |
222 | { |
223 | sysctl_sched_autogroup_enabled = 0; |
224 | |
225 | return 1; |
226 | } |
227 | __setup("noautogroup" , setup_autogroup); |
228 | |
229 | #ifdef CONFIG_PROC_FS |
230 | |
231 | int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) |
232 | { |
233 | static unsigned long next = INITIAL_JIFFIES; |
234 | struct autogroup *ag; |
235 | unsigned long shares; |
236 | int err, idx; |
237 | |
238 | if (nice < MIN_NICE || nice > MAX_NICE) |
239 | return -EINVAL; |
240 | |
241 | err = security_task_setnice(current, nice); |
242 | if (err) |
243 | return err; |
244 | |
245 | if (nice < 0 && !can_nice(current, nice)) |
246 | return -EPERM; |
247 | |
248 | /* This is a heavy operation, taking global locks.. */ |
249 | if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next)) |
250 | return -EAGAIN; |
251 | |
252 | next = HZ / 10 + jiffies; |
253 | ag = autogroup_task_get(p); |
254 | |
255 | idx = array_index_nospec(nice + 20, 40); |
256 | shares = scale_load(sched_prio_to_weight[idx]); |
257 | |
258 | down_write(&ag->lock); |
259 | err = sched_group_set_shares(ag->tg, shares); |
260 | if (!err) |
261 | ag->nice = nice; |
262 | up_write(&ag->lock); |
263 | |
264 | autogroup_kref_put(ag); |
265 | |
266 | return err; |
267 | } |
268 | |
269 | void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) |
270 | { |
271 | struct autogroup *ag = autogroup_task_get(p); |
272 | |
273 | if (!task_group_is_autogroup(ag->tg)) |
274 | goto out; |
275 | |
276 | down_read(&ag->lock); |
277 | seq_printf(m, "/autogroup-%ld nice %d\n" , ag->id, ag->nice); |
278 | up_read(&ag->lock); |
279 | |
280 | out: |
281 | autogroup_kref_put(ag); |
282 | } |
283 | #endif /* CONFIG_PROC_FS */ |
284 | |
285 | int autogroup_path(struct task_group *tg, char *buf, int buflen) |
286 | { |
287 | if (!task_group_is_autogroup(tg)) |
288 | return 0; |
289 | |
290 | return snprintf(buf, buflen, "%s-%ld" , "/autogroup" , tg->autogroup->id); |
291 | } |
292 | |