1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * fs/ioprio.c |
4 | * |
5 | * Copyright (C) 2004 Jens Axboe <axboe@kernel.dk> |
6 | * |
7 | * Helper functions for setting/querying io priorities of processes. The |
8 | * system calls closely mimmick getpriority/setpriority, see the man page for |
9 | * those. The prio argument is a composite of prio class and prio data, where |
10 | * the data argument has meaning within that class. The standard scheduling |
11 | * classes have 8 distinct prio levels, with 0 being the highest prio and 7 |
12 | * being the lowest. |
13 | * |
14 | * IOW, setting BE scheduling class with prio 2 is done ala: |
15 | * |
16 | * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2; |
17 | * |
18 | * ioprio_set(PRIO_PROCESS, pid, prio); |
19 | * |
20 | * See also Documentation/block/ioprio.rst |
21 | * |
22 | */ |
23 | #include <linux/gfp.h> |
24 | #include <linux/kernel.h> |
25 | #include <linux/ioprio.h> |
26 | #include <linux/cred.h> |
27 | #include <linux/blkdev.h> |
28 | #include <linux/capability.h> |
29 | #include <linux/syscalls.h> |
30 | #include <linux/security.h> |
31 | #include <linux/pid_namespace.h> |
32 | |
33 | int ioprio_check_cap(int ioprio) |
34 | { |
35 | int class = IOPRIO_PRIO_CLASS(ioprio); |
36 | int level = IOPRIO_PRIO_LEVEL(ioprio); |
37 | |
38 | switch (class) { |
39 | case IOPRIO_CLASS_RT: |
40 | /* |
41 | * Originally this only checked for CAP_SYS_ADMIN, |
42 | * which was implicitly allowed for pid 0 by security |
43 | * modules such as SELinux. Make sure we check |
44 | * CAP_SYS_ADMIN first to avoid a denial/avc for |
45 | * possibly missing CAP_SYS_NICE permission. |
46 | */ |
47 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE)) |
48 | return -EPERM; |
49 | fallthrough; |
50 | /* rt has prio field too */ |
51 | case IOPRIO_CLASS_BE: |
52 | if (level >= IOPRIO_NR_LEVELS) |
53 | return -EINVAL; |
54 | break; |
55 | case IOPRIO_CLASS_IDLE: |
56 | break; |
57 | case IOPRIO_CLASS_NONE: |
58 | if (level) |
59 | return -EINVAL; |
60 | break; |
61 | case IOPRIO_CLASS_INVALID: |
62 | default: |
63 | return -EINVAL; |
64 | } |
65 | |
66 | return 0; |
67 | } |
68 | |
69 | SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio) |
70 | { |
71 | struct task_struct *p, *g; |
72 | struct user_struct *user; |
73 | struct pid *pgrp; |
74 | kuid_t uid; |
75 | int ret; |
76 | |
77 | ret = ioprio_check_cap(ioprio); |
78 | if (ret) |
79 | return ret; |
80 | |
81 | ret = -ESRCH; |
82 | rcu_read_lock(); |
83 | switch (which) { |
84 | case IOPRIO_WHO_PROCESS: |
85 | if (!who) |
86 | p = current; |
87 | else |
88 | p = find_task_by_vpid(nr: who); |
89 | if (p) |
90 | ret = set_task_ioprio(task: p, ioprio); |
91 | break; |
92 | case IOPRIO_WHO_PGRP: |
93 | if (!who) |
94 | pgrp = task_pgrp(current); |
95 | else |
96 | pgrp = find_vpid(nr: who); |
97 | |
98 | read_lock(&tasklist_lock); |
99 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
100 | ret = set_task_ioprio(task: p, ioprio); |
101 | if (ret) { |
102 | read_unlock(&tasklist_lock); |
103 | goto out; |
104 | } |
105 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
106 | read_unlock(&tasklist_lock); |
107 | |
108 | break; |
109 | case IOPRIO_WHO_USER: |
110 | uid = make_kuid(current_user_ns(), uid: who); |
111 | if (!uid_valid(uid)) |
112 | break; |
113 | if (!who) |
114 | user = current_user(); |
115 | else |
116 | user = find_user(uid); |
117 | |
118 | if (!user) |
119 | break; |
120 | |
121 | for_each_process_thread(g, p) { |
122 | if (!uid_eq(task_uid(p), right: uid) || |
123 | !task_pid_vnr(tsk: p)) |
124 | continue; |
125 | ret = set_task_ioprio(task: p, ioprio); |
126 | if (ret) |
127 | goto free_uid; |
128 | } |
129 | free_uid: |
130 | if (who) |
131 | free_uid(user); |
132 | break; |
133 | default: |
134 | ret = -EINVAL; |
135 | } |
136 | |
137 | out: |
138 | rcu_read_unlock(); |
139 | return ret; |
140 | } |
141 | |
142 | static int get_task_ioprio(struct task_struct *p) |
143 | { |
144 | int ret; |
145 | |
146 | ret = security_task_getioprio(p); |
147 | if (ret) |
148 | goto out; |
149 | task_lock(p); |
150 | ret = __get_task_ioprio(p); |
151 | task_unlock(p); |
152 | out: |
153 | return ret; |
154 | } |
155 | |
156 | /* |
157 | * Return raw IO priority value as set by userspace. We use this for |
158 | * ioprio_get(pid, IOPRIO_WHO_PROCESS) so that we keep historical behavior and |
159 | * also so that userspace can distinguish unset IO priority (which just gets |
160 | * overriden based on task's nice value) from IO priority set to some value. |
161 | */ |
162 | static int get_task_raw_ioprio(struct task_struct *p) |
163 | { |
164 | int ret; |
165 | |
166 | ret = security_task_getioprio(p); |
167 | if (ret) |
168 | goto out; |
169 | task_lock(p); |
170 | if (p->io_context) |
171 | ret = p->io_context->ioprio; |
172 | else |
173 | ret = IOPRIO_DEFAULT; |
174 | task_unlock(p); |
175 | out: |
176 | return ret; |
177 | } |
178 | |
179 | static int ioprio_best(unsigned short aprio, unsigned short bprio) |
180 | { |
181 | return min(aprio, bprio); |
182 | } |
183 | |
184 | SYSCALL_DEFINE2(ioprio_get, int, which, int, who) |
185 | { |
186 | struct task_struct *g, *p; |
187 | struct user_struct *user; |
188 | struct pid *pgrp; |
189 | kuid_t uid; |
190 | int ret = -ESRCH; |
191 | int tmpio; |
192 | |
193 | rcu_read_lock(); |
194 | switch (which) { |
195 | case IOPRIO_WHO_PROCESS: |
196 | if (!who) |
197 | p = current; |
198 | else |
199 | p = find_task_by_vpid(nr: who); |
200 | if (p) |
201 | ret = get_task_raw_ioprio(p); |
202 | break; |
203 | case IOPRIO_WHO_PGRP: |
204 | if (!who) |
205 | pgrp = task_pgrp(current); |
206 | else |
207 | pgrp = find_vpid(nr: who); |
208 | read_lock(&tasklist_lock); |
209 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
210 | tmpio = get_task_ioprio(p); |
211 | if (tmpio < 0) |
212 | continue; |
213 | if (ret == -ESRCH) |
214 | ret = tmpio; |
215 | else |
216 | ret = ioprio_best(aprio: ret, bprio: tmpio); |
217 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
218 | read_unlock(&tasklist_lock); |
219 | |
220 | break; |
221 | case IOPRIO_WHO_USER: |
222 | uid = make_kuid(current_user_ns(), uid: who); |
223 | if (!who) |
224 | user = current_user(); |
225 | else |
226 | user = find_user(uid); |
227 | |
228 | if (!user) |
229 | break; |
230 | |
231 | for_each_process_thread(g, p) { |
232 | if (!uid_eq(task_uid(p), right: user->uid) || |
233 | !task_pid_vnr(tsk: p)) |
234 | continue; |
235 | tmpio = get_task_ioprio(p); |
236 | if (tmpio < 0) |
237 | continue; |
238 | if (ret == -ESRCH) |
239 | ret = tmpio; |
240 | else |
241 | ret = ioprio_best(aprio: ret, bprio: tmpio); |
242 | } |
243 | |
244 | if (who) |
245 | free_uid(user); |
246 | break; |
247 | default: |
248 | ret = -EINVAL; |
249 | } |
250 | |
251 | rcu_read_unlock(); |
252 | return ret; |
253 | } |
254 | |