1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Functions related to io context handling |
4 | */ |
5 | #include <linux/kernel.h> |
6 | #include <linux/module.h> |
7 | #include <linux/init.h> |
8 | #include <linux/bio.h> |
9 | #include <linux/blkdev.h> |
10 | #include <linux/slab.h> |
11 | #include <linux/security.h> |
12 | #include <linux/sched/task.h> |
13 | |
14 | #include "blk.h" |
15 | #include "blk-mq-sched.h" |
16 | |
17 | /* |
18 | * For io context allocations |
19 | */ |
20 | static struct kmem_cache *iocontext_cachep; |
21 | |
22 | #ifdef CONFIG_BLK_ICQ |
23 | /** |
24 | * get_io_context - increment reference count to io_context |
25 | * @ioc: io_context to get |
26 | * |
27 | * Increment reference count to @ioc. |
28 | */ |
29 | static void get_io_context(struct io_context *ioc) |
30 | { |
31 | BUG_ON(atomic_long_read(&ioc->refcount) <= 0); |
32 | atomic_long_inc(v: &ioc->refcount); |
33 | } |
34 | |
35 | static void icq_free_icq_rcu(struct rcu_head *head) |
36 | { |
37 | struct io_cq *icq = container_of(head, struct io_cq, __rcu_head); |
38 | |
39 | kmem_cache_free(s: icq->__rcu_icq_cache, objp: icq); |
40 | } |
41 | |
42 | /* |
43 | * Exit an icq. Called with ioc locked for blk-mq, and with both ioc |
44 | * and queue locked for legacy. |
45 | */ |
46 | static void ioc_exit_icq(struct io_cq *icq) |
47 | { |
48 | struct elevator_type *et = icq->q->elevator->type; |
49 | |
50 | if (icq->flags & ICQ_EXITED) |
51 | return; |
52 | |
53 | if (et->ops.exit_icq) |
54 | et->ops.exit_icq(icq); |
55 | |
56 | icq->flags |= ICQ_EXITED; |
57 | } |
58 | |
59 | static void ioc_exit_icqs(struct io_context *ioc) |
60 | { |
61 | struct io_cq *icq; |
62 | |
63 | spin_lock_irq(lock: &ioc->lock); |
64 | hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) |
65 | ioc_exit_icq(icq); |
66 | spin_unlock_irq(lock: &ioc->lock); |
67 | } |
68 | |
69 | /* |
70 | * Release an icq. Called with ioc locked for blk-mq, and with both ioc |
71 | * and queue locked for legacy. |
72 | */ |
73 | static void ioc_destroy_icq(struct io_cq *icq) |
74 | { |
75 | struct io_context *ioc = icq->ioc; |
76 | struct request_queue *q = icq->q; |
77 | struct elevator_type *et = q->elevator->type; |
78 | |
79 | lockdep_assert_held(&ioc->lock); |
80 | lockdep_assert_held(&q->queue_lock); |
81 | |
82 | if (icq->flags & ICQ_DESTROYED) |
83 | return; |
84 | |
85 | radix_tree_delete(&ioc->icq_tree, icq->q->id); |
86 | hlist_del_init(n: &icq->ioc_node); |
87 | list_del_init(entry: &icq->q_node); |
88 | |
89 | /* |
90 | * Both setting lookup hint to and clearing it from @icq are done |
91 | * under queue_lock. If it's not pointing to @icq now, it never |
92 | * will. Hint assignment itself can race safely. |
93 | */ |
94 | if (rcu_access_pointer(ioc->icq_hint) == icq) |
95 | rcu_assign_pointer(ioc->icq_hint, NULL); |
96 | |
97 | ioc_exit_icq(icq); |
98 | |
99 | /* |
100 | * @icq->q might have gone away by the time RCU callback runs |
101 | * making it impossible to determine icq_cache. Record it in @icq. |
102 | */ |
103 | icq->__rcu_icq_cache = et->icq_cache; |
104 | icq->flags |= ICQ_DESTROYED; |
105 | call_rcu(head: &icq->__rcu_head, func: icq_free_icq_rcu); |
106 | } |
107 | |
108 | /* |
109 | * Slow path for ioc release in put_io_context(). Performs double-lock |
110 | * dancing to unlink all icq's and then frees ioc. |
111 | */ |
112 | static void ioc_release_fn(struct work_struct *work) |
113 | { |
114 | struct io_context *ioc = container_of(work, struct io_context, |
115 | release_work); |
116 | spin_lock_irq(lock: &ioc->lock); |
117 | |
118 | while (!hlist_empty(h: &ioc->icq_list)) { |
119 | struct io_cq *icq = hlist_entry(ioc->icq_list.first, |
120 | struct io_cq, ioc_node); |
121 | struct request_queue *q = icq->q; |
122 | |
123 | if (spin_trylock(lock: &q->queue_lock)) { |
124 | ioc_destroy_icq(icq); |
125 | spin_unlock(lock: &q->queue_lock); |
126 | } else { |
127 | /* Make sure q and icq cannot be freed. */ |
128 | rcu_read_lock(); |
129 | |
130 | /* Re-acquire the locks in the correct order. */ |
131 | spin_unlock(lock: &ioc->lock); |
132 | spin_lock(lock: &q->queue_lock); |
133 | spin_lock(lock: &ioc->lock); |
134 | |
135 | ioc_destroy_icq(icq); |
136 | |
137 | spin_unlock(lock: &q->queue_lock); |
138 | rcu_read_unlock(); |
139 | } |
140 | } |
141 | |
142 | spin_unlock_irq(lock: &ioc->lock); |
143 | |
144 | kmem_cache_free(s: iocontext_cachep, objp: ioc); |
145 | } |
146 | |
147 | /* |
148 | * Releasing icqs requires reverse order double locking and we may already be |
149 | * holding a queue_lock. Do it asynchronously from a workqueue. |
150 | */ |
151 | static bool ioc_delay_free(struct io_context *ioc) |
152 | { |
153 | unsigned long flags; |
154 | |
155 | spin_lock_irqsave(&ioc->lock, flags); |
156 | if (!hlist_empty(h: &ioc->icq_list)) { |
157 | queue_work(wq: system_power_efficient_wq, work: &ioc->release_work); |
158 | spin_unlock_irqrestore(lock: &ioc->lock, flags); |
159 | return true; |
160 | } |
161 | spin_unlock_irqrestore(lock: &ioc->lock, flags); |
162 | return false; |
163 | } |
164 | |
165 | /** |
166 | * ioc_clear_queue - break any ioc association with the specified queue |
167 | * @q: request_queue being cleared |
168 | * |
169 | * Walk @q->icq_list and exit all io_cq's. |
170 | */ |
171 | void ioc_clear_queue(struct request_queue *q) |
172 | { |
173 | spin_lock_irq(lock: &q->queue_lock); |
174 | while (!list_empty(head: &q->icq_list)) { |
175 | struct io_cq *icq = |
176 | list_first_entry(&q->icq_list, struct io_cq, q_node); |
177 | |
178 | /* |
179 | * Other context won't hold ioc lock to wait for queue_lock, see |
180 | * details in ioc_release_fn(). |
181 | */ |
182 | spin_lock(lock: &icq->ioc->lock); |
183 | ioc_destroy_icq(icq); |
184 | spin_unlock(lock: &icq->ioc->lock); |
185 | } |
186 | spin_unlock_irq(lock: &q->queue_lock); |
187 | } |
188 | #else /* CONFIG_BLK_ICQ */ |
189 | static inline void ioc_exit_icqs(struct io_context *ioc) |
190 | { |
191 | } |
192 | static inline bool ioc_delay_free(struct io_context *ioc) |
193 | { |
194 | return false; |
195 | } |
196 | #endif /* CONFIG_BLK_ICQ */ |
197 | |
198 | /** |
199 | * put_io_context - put a reference of io_context |
200 | * @ioc: io_context to put |
201 | * |
202 | * Decrement reference count of @ioc and release it if the count reaches |
203 | * zero. |
204 | */ |
205 | void put_io_context(struct io_context *ioc) |
206 | { |
207 | BUG_ON(atomic_long_read(&ioc->refcount) <= 0); |
208 | if (atomic_long_dec_and_test(v: &ioc->refcount) && !ioc_delay_free(ioc)) |
209 | kmem_cache_free(s: iocontext_cachep, objp: ioc); |
210 | } |
211 | EXPORT_SYMBOL_GPL(put_io_context); |
212 | |
213 | /* Called by the exiting task */ |
214 | void exit_io_context(struct task_struct *task) |
215 | { |
216 | struct io_context *ioc; |
217 | |
218 | task_lock(p: task); |
219 | ioc = task->io_context; |
220 | task->io_context = NULL; |
221 | task_unlock(p: task); |
222 | |
223 | if (atomic_dec_and_test(v: &ioc->active_ref)) { |
224 | ioc_exit_icqs(ioc); |
225 | put_io_context(ioc); |
226 | } |
227 | } |
228 | |
229 | static struct io_context *alloc_io_context(gfp_t gfp_flags, int node) |
230 | { |
231 | struct io_context *ioc; |
232 | |
233 | ioc = kmem_cache_alloc_node(s: iocontext_cachep, flags: gfp_flags | __GFP_ZERO, |
234 | node); |
235 | if (unlikely(!ioc)) |
236 | return NULL; |
237 | |
238 | atomic_long_set(v: &ioc->refcount, i: 1); |
239 | atomic_set(v: &ioc->active_ref, i: 1); |
240 | #ifdef CONFIG_BLK_ICQ |
241 | spin_lock_init(&ioc->lock); |
242 | INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC); |
243 | INIT_HLIST_HEAD(&ioc->icq_list); |
244 | INIT_WORK(&ioc->release_work, ioc_release_fn); |
245 | #endif |
246 | ioc->ioprio = IOPRIO_DEFAULT; |
247 | |
248 | return ioc; |
249 | } |
250 | |
251 | int set_task_ioprio(struct task_struct *task, int ioprio) |
252 | { |
253 | int err; |
254 | const struct cred *cred = current_cred(), *tcred; |
255 | |
256 | rcu_read_lock(); |
257 | tcred = __task_cred(task); |
258 | if (!uid_eq(left: tcred->uid, right: cred->euid) && |
259 | !uid_eq(left: tcred->uid, right: cred->uid) && !capable(CAP_SYS_NICE)) { |
260 | rcu_read_unlock(); |
261 | return -EPERM; |
262 | } |
263 | rcu_read_unlock(); |
264 | |
265 | err = security_task_setioprio(p: task, ioprio); |
266 | if (err) |
267 | return err; |
268 | |
269 | task_lock(p: task); |
270 | if (unlikely(!task->io_context)) { |
271 | struct io_context *ioc; |
272 | |
273 | task_unlock(p: task); |
274 | |
275 | ioc = alloc_io_context(GFP_ATOMIC, NUMA_NO_NODE); |
276 | if (!ioc) |
277 | return -ENOMEM; |
278 | |
279 | task_lock(p: task); |
280 | if (task->flags & PF_EXITING) { |
281 | kmem_cache_free(s: iocontext_cachep, objp: ioc); |
282 | goto out; |
283 | } |
284 | if (task->io_context) |
285 | kmem_cache_free(s: iocontext_cachep, objp: ioc); |
286 | else |
287 | task->io_context = ioc; |
288 | } |
289 | task->io_context->ioprio = ioprio; |
290 | out: |
291 | task_unlock(p: task); |
292 | return 0; |
293 | } |
294 | EXPORT_SYMBOL_GPL(set_task_ioprio); |
295 | |
296 | int __copy_io(unsigned long clone_flags, struct task_struct *tsk) |
297 | { |
298 | struct io_context *ioc = current->io_context; |
299 | |
300 | /* |
301 | * Share io context with parent, if CLONE_IO is set |
302 | */ |
303 | if (clone_flags & CLONE_IO) { |
304 | atomic_inc(v: &ioc->active_ref); |
305 | tsk->io_context = ioc; |
306 | } else if (ioprio_valid(ioprio: ioc->ioprio)) { |
307 | tsk->io_context = alloc_io_context(GFP_KERNEL, NUMA_NO_NODE); |
308 | if (!tsk->io_context) |
309 | return -ENOMEM; |
310 | tsk->io_context->ioprio = ioc->ioprio; |
311 | } |
312 | |
313 | return 0; |
314 | } |
315 | |
316 | #ifdef CONFIG_BLK_ICQ |
317 | /** |
318 | * ioc_lookup_icq - lookup io_cq from ioc |
319 | * @q: the associated request_queue |
320 | * |
321 | * Look up io_cq associated with @ioc - @q pair from @ioc. Must be called |
322 | * with @q->queue_lock held. |
323 | */ |
324 | struct io_cq *ioc_lookup_icq(struct request_queue *q) |
325 | { |
326 | struct io_context *ioc = current->io_context; |
327 | struct io_cq *icq; |
328 | |
329 | lockdep_assert_held(&q->queue_lock); |
330 | |
331 | /* |
332 | * icq's are indexed from @ioc using radix tree and hint pointer, |
333 | * both of which are protected with RCU. All removals are done |
334 | * holding both q and ioc locks, and we're holding q lock - if we |
335 | * find a icq which points to us, it's guaranteed to be valid. |
336 | */ |
337 | rcu_read_lock(); |
338 | icq = rcu_dereference(ioc->icq_hint); |
339 | if (icq && icq->q == q) |
340 | goto out; |
341 | |
342 | icq = radix_tree_lookup(&ioc->icq_tree, q->id); |
343 | if (icq && icq->q == q) |
344 | rcu_assign_pointer(ioc->icq_hint, icq); /* allowed to race */ |
345 | else |
346 | icq = NULL; |
347 | out: |
348 | rcu_read_unlock(); |
349 | return icq; |
350 | } |
351 | EXPORT_SYMBOL(ioc_lookup_icq); |
352 | |
353 | /** |
354 | * ioc_create_icq - create and link io_cq |
355 | * @q: request_queue of interest |
356 | * |
357 | * Make sure io_cq linking @ioc and @q exists. If icq doesn't exist, they |
358 | * will be created using @gfp_mask. |
359 | * |
360 | * The caller is responsible for ensuring @ioc won't go away and @q is |
361 | * alive and will stay alive until this function returns. |
362 | */ |
363 | static struct io_cq *ioc_create_icq(struct request_queue *q) |
364 | { |
365 | struct io_context *ioc = current->io_context; |
366 | struct elevator_type *et = q->elevator->type; |
367 | struct io_cq *icq; |
368 | |
369 | /* allocate stuff */ |
370 | icq = kmem_cache_alloc_node(s: et->icq_cache, GFP_ATOMIC | __GFP_ZERO, |
371 | node: q->node); |
372 | if (!icq) |
373 | return NULL; |
374 | |
375 | if (radix_tree_maybe_preload(GFP_ATOMIC) < 0) { |
376 | kmem_cache_free(s: et->icq_cache, objp: icq); |
377 | return NULL; |
378 | } |
379 | |
380 | icq->ioc = ioc; |
381 | icq->q = q; |
382 | INIT_LIST_HEAD(list: &icq->q_node); |
383 | INIT_HLIST_NODE(h: &icq->ioc_node); |
384 | |
385 | /* lock both q and ioc and try to link @icq */ |
386 | spin_lock_irq(lock: &q->queue_lock); |
387 | spin_lock(lock: &ioc->lock); |
388 | |
389 | if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { |
390 | hlist_add_head(n: &icq->ioc_node, h: &ioc->icq_list); |
391 | list_add(new: &icq->q_node, head: &q->icq_list); |
392 | if (et->ops.init_icq) |
393 | et->ops.init_icq(icq); |
394 | } else { |
395 | kmem_cache_free(s: et->icq_cache, objp: icq); |
396 | icq = ioc_lookup_icq(q); |
397 | if (!icq) |
398 | printk(KERN_ERR "cfq: icq link failed!\n" ); |
399 | } |
400 | |
401 | spin_unlock(lock: &ioc->lock); |
402 | spin_unlock_irq(lock: &q->queue_lock); |
403 | radix_tree_preload_end(); |
404 | return icq; |
405 | } |
406 | |
407 | struct io_cq *ioc_find_get_icq(struct request_queue *q) |
408 | { |
409 | struct io_context *ioc = current->io_context; |
410 | struct io_cq *icq = NULL; |
411 | |
412 | if (unlikely(!ioc)) { |
413 | ioc = alloc_io_context(GFP_ATOMIC, node: q->node); |
414 | if (!ioc) |
415 | return NULL; |
416 | |
417 | task_lock(current); |
418 | if (current->io_context) { |
419 | kmem_cache_free(s: iocontext_cachep, objp: ioc); |
420 | ioc = current->io_context; |
421 | } else { |
422 | current->io_context = ioc; |
423 | } |
424 | |
425 | get_io_context(ioc); |
426 | task_unlock(current); |
427 | } else { |
428 | get_io_context(ioc); |
429 | |
430 | spin_lock_irq(lock: &q->queue_lock); |
431 | icq = ioc_lookup_icq(q); |
432 | spin_unlock_irq(lock: &q->queue_lock); |
433 | } |
434 | |
435 | if (!icq) { |
436 | icq = ioc_create_icq(q); |
437 | if (!icq) { |
438 | put_io_context(ioc); |
439 | return NULL; |
440 | } |
441 | } |
442 | return icq; |
443 | } |
444 | EXPORT_SYMBOL_GPL(ioc_find_get_icq); |
445 | #endif /* CONFIG_BLK_ICQ */ |
446 | |
447 | static int __init blk_ioc_init(void) |
448 | { |
449 | iocontext_cachep = kmem_cache_create(name: "blkdev_ioc" , |
450 | size: sizeof(struct io_context), align: 0, SLAB_PANIC, NULL); |
451 | return 0; |
452 | } |
453 | subsys_initcall(blk_ioc_init); |
454 | |