1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * fs/ioprio.c |
4 | * |
5 | * Copyright (C) 2004 Jens Axboe <axboe@kernel.dk> |
6 | * |
7 | * Helper functions for setting/querying io priorities of processes. The |
8 | * system calls closely mimmick getpriority/setpriority, see the man page for |
9 | * those. The prio argument is a composite of prio class and prio data, where |
10 | * the data argument has meaning within that class. The standard scheduling |
11 | * classes have 8 distinct prio levels, with 0 being the highest prio and 7 |
12 | * being the lowest. |
13 | * |
14 | * IOW, setting BE scheduling class with prio 2 is done ala: |
15 | * |
16 | * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2; |
17 | * |
18 | * ioprio_set(PRIO_PROCESS, pid, prio); |
19 | * |
20 | * See also Documentation/block/ioprio.rst |
21 | * |
22 | */ |
23 | #include <linux/gfp.h> |
24 | #include <linux/kernel.h> |
25 | #include <linux/ioprio.h> |
26 | #include <linux/cred.h> |
27 | #include <linux/blkdev.h> |
28 | #include <linux/capability.h> |
29 | #include <linux/syscalls.h> |
30 | #include <linux/security.h> |
31 | #include <linux/pid_namespace.h> |
32 | |
33 | int ioprio_check_cap(int ioprio) |
34 | { |
35 | int class = IOPRIO_PRIO_CLASS(ioprio); |
36 | int level = IOPRIO_PRIO_LEVEL(ioprio); |
37 | |
38 | switch (class) { |
39 | case IOPRIO_CLASS_RT: |
40 | /* |
41 | * Originally this only checked for CAP_SYS_ADMIN, |
42 | * which was implicitly allowed for pid 0 by security |
43 | * modules such as SELinux. Make sure we check |
44 | * CAP_SYS_ADMIN first to avoid a denial/avc for |
45 | * possibly missing CAP_SYS_NICE permission. |
46 | */ |
47 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE)) |
48 | return -EPERM; |
49 | fallthrough; |
50 | /* rt has prio field too */ |
51 | case IOPRIO_CLASS_BE: |
52 | if (level >= IOPRIO_NR_LEVELS) |
53 | return -EINVAL; |
54 | break; |
55 | case IOPRIO_CLASS_IDLE: |
56 | break; |
57 | case IOPRIO_CLASS_NONE: |
58 | if (level) |
59 | return -EINVAL; |
60 | break; |
61 | case IOPRIO_CLASS_INVALID: |
62 | default: |
63 | return -EINVAL; |
64 | } |
65 | |
66 | return 0; |
67 | } |
68 | |
69 | SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio) |
70 | { |
71 | struct task_struct *p, *g; |
72 | struct user_struct *user; |
73 | struct pid *pgrp; |
74 | kuid_t uid; |
75 | int ret; |
76 | |
77 | ret = ioprio_check_cap(ioprio); |
78 | if (ret) |
79 | return ret; |
80 | |
81 | ret = -ESRCH; |
82 | rcu_read_lock(); |
83 | switch (which) { |
84 | case IOPRIO_WHO_PROCESS: |
85 | if (!who) |
86 | p = current; |
87 | else |
88 | p = find_task_by_vpid(nr: who); |
89 | if (p) |
90 | ret = set_task_ioprio(task: p, ioprio); |
91 | break; |
92 | case IOPRIO_WHO_PGRP: |
93 | if (!who) |
94 | pgrp = task_pgrp(current); |
95 | else |
96 | pgrp = find_vpid(nr: who); |
97 | |
98 | read_lock(&tasklist_lock); |
99 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
100 | ret = set_task_ioprio(task: p, ioprio); |
101 | if (ret) { |
102 | read_unlock(&tasklist_lock); |
103 | goto out; |
104 | } |
105 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
106 | read_unlock(&tasklist_lock); |
107 | |
108 | break; |
109 | case IOPRIO_WHO_USER: |
110 | uid = make_kuid(current_user_ns(), uid: who); |
111 | if (!uid_valid(uid)) |
112 | break; |
113 | if (!who) |
114 | user = current_user(); |
115 | else |
116 | user = find_user(uid); |
117 | |
118 | if (!user) |
119 | break; |
120 | |
121 | for_each_process_thread(g, p) { |
122 | if (!uid_eq(task_uid(p), right: uid) || |
123 | !task_pid_vnr(tsk: p)) |
124 | continue; |
125 | ret = set_task_ioprio(task: p, ioprio); |
126 | if (ret) |
127 | goto free_uid; |
128 | } |
129 | free_uid: |
130 | if (who) |
131 | free_uid(user); |
132 | break; |
133 | default: |
134 | ret = -EINVAL; |
135 | } |
136 | |
137 | out: |
138 | rcu_read_unlock(); |
139 | return ret; |
140 | } |
141 | |
142 | /* |
143 | * If the task has set an I/O priority, use that. Otherwise, return |
144 | * the default I/O priority. |
145 | * |
146 | * Expected to be called for current task or with task_lock() held to keep |
147 | * io_context stable. |
148 | */ |
149 | int __get_task_ioprio(struct task_struct *p) |
150 | { |
151 | struct io_context *ioc = p->io_context; |
152 | int prio; |
153 | |
154 | if (p != current) |
155 | lockdep_assert_held(&p->alloc_lock); |
156 | if (ioc) |
157 | prio = ioc->ioprio; |
158 | else |
159 | prio = IOPRIO_DEFAULT; |
160 | |
161 | if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE) |
162 | prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p), |
163 | task_nice_ioprio(p)); |
164 | return prio; |
165 | } |
166 | EXPORT_SYMBOL_GPL(__get_task_ioprio); |
167 | |
168 | static int get_task_ioprio(struct task_struct *p) |
169 | { |
170 | int ret; |
171 | |
172 | ret = security_task_getioprio(p); |
173 | if (ret) |
174 | goto out; |
175 | task_lock(p); |
176 | ret = __get_task_ioprio(p); |
177 | task_unlock(p); |
178 | out: |
179 | return ret; |
180 | } |
181 | |
182 | /* |
183 | * Return raw IO priority value as set by userspace. We use this for |
184 | * ioprio_get(pid, IOPRIO_WHO_PROCESS) so that we keep historical behavior and |
185 | * also so that userspace can distinguish unset IO priority (which just gets |
186 | * overriden based on task's nice value) from IO priority set to some value. |
187 | */ |
188 | static int get_task_raw_ioprio(struct task_struct *p) |
189 | { |
190 | int ret; |
191 | |
192 | ret = security_task_getioprio(p); |
193 | if (ret) |
194 | goto out; |
195 | task_lock(p); |
196 | if (p->io_context) |
197 | ret = p->io_context->ioprio; |
198 | else |
199 | ret = IOPRIO_DEFAULT; |
200 | task_unlock(p); |
201 | out: |
202 | return ret; |
203 | } |
204 | |
205 | static int ioprio_best(unsigned short aprio, unsigned short bprio) |
206 | { |
207 | return min(aprio, bprio); |
208 | } |
209 | |
210 | SYSCALL_DEFINE2(ioprio_get, int, which, int, who) |
211 | { |
212 | struct task_struct *g, *p; |
213 | struct user_struct *user; |
214 | struct pid *pgrp; |
215 | kuid_t uid; |
216 | int ret = -ESRCH; |
217 | int tmpio; |
218 | |
219 | rcu_read_lock(); |
220 | switch (which) { |
221 | case IOPRIO_WHO_PROCESS: |
222 | if (!who) |
223 | p = current; |
224 | else |
225 | p = find_task_by_vpid(nr: who); |
226 | if (p) |
227 | ret = get_task_raw_ioprio(p); |
228 | break; |
229 | case IOPRIO_WHO_PGRP: |
230 | if (!who) |
231 | pgrp = task_pgrp(current); |
232 | else |
233 | pgrp = find_vpid(nr: who); |
234 | read_lock(&tasklist_lock); |
235 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
236 | tmpio = get_task_ioprio(p); |
237 | if (tmpio < 0) |
238 | continue; |
239 | if (ret == -ESRCH) |
240 | ret = tmpio; |
241 | else |
242 | ret = ioprio_best(aprio: ret, bprio: tmpio); |
243 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
244 | read_unlock(&tasklist_lock); |
245 | |
246 | break; |
247 | case IOPRIO_WHO_USER: |
248 | uid = make_kuid(current_user_ns(), uid: who); |
249 | if (!who) |
250 | user = current_user(); |
251 | else |
252 | user = find_user(uid); |
253 | |
254 | if (!user) |
255 | break; |
256 | |
257 | for_each_process_thread(g, p) { |
258 | if (!uid_eq(task_uid(p), right: user->uid) || |
259 | !task_pid_vnr(tsk: p)) |
260 | continue; |
261 | tmpio = get_task_ioprio(p); |
262 | if (tmpio < 0) |
263 | continue; |
264 | if (ret == -ESRCH) |
265 | ret = tmpio; |
266 | else |
267 | ret = ioprio_best(aprio: ret, bprio: tmpio); |
268 | } |
269 | |
270 | if (who) |
271 | free_uid(user); |
272 | break; |
273 | default: |
274 | ret = -EINVAL; |
275 | } |
276 | |
277 | rcu_read_unlock(); |
278 | return ret; |
279 | } |
280 | |