1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _BLK_CGROUP_PRIVATE_H |
3 | #define _BLK_CGROUP_PRIVATE_H |
4 | /* |
5 | * block cgroup private header |
6 | * |
7 | * Based on ideas and code from CFQ, CFS and BFQ: |
8 | * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> |
9 | * |
10 | * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> |
11 | * Paolo Valente <paolo.valente@unimore.it> |
12 | * |
13 | * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> |
14 | * Nauman Rafique <nauman@google.com> |
15 | */ |
16 | |
17 | #include <linux/blk-cgroup.h> |
18 | #include <linux/cgroup.h> |
19 | #include <linux/kthread.h> |
20 | #include <linux/blk-mq.h> |
21 | #include <linux/llist.h> |
22 | #include "blk.h" |
23 | |
24 | struct blkcg_gq; |
25 | struct blkg_policy_data; |
26 | |
27 | |
28 | /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ |
29 | #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) |
30 | |
31 | #ifdef CONFIG_BLK_CGROUP |
32 | |
33 | enum blkg_iostat_type { |
34 | BLKG_IOSTAT_READ, |
35 | BLKG_IOSTAT_WRITE, |
36 | BLKG_IOSTAT_DISCARD, |
37 | |
38 | BLKG_IOSTAT_NR, |
39 | }; |
40 | |
41 | struct blkg_iostat { |
42 | u64 bytes[BLKG_IOSTAT_NR]; |
43 | u64 ios[BLKG_IOSTAT_NR]; |
44 | }; |
45 | |
46 | struct blkg_iostat_set { |
47 | struct u64_stats_sync sync; |
48 | struct blkcg_gq *blkg; |
49 | struct llist_node lnode; |
50 | int lqueued; /* queued in llist */ |
51 | struct blkg_iostat cur; |
52 | struct blkg_iostat last; |
53 | }; |
54 | |
55 | /* association between a blk cgroup and a request queue */ |
56 | struct blkcg_gq { |
57 | /* Pointer to the associated request_queue */ |
58 | struct request_queue *q; |
59 | struct list_head q_node; |
60 | struct hlist_node blkcg_node; |
61 | struct blkcg *blkcg; |
62 | |
63 | /* all non-root blkcg_gq's are guaranteed to have access to parent */ |
64 | struct blkcg_gq *parent; |
65 | |
66 | /* reference count */ |
67 | struct percpu_ref refcnt; |
68 | |
69 | /* is this blkg online? protected by both blkcg and q locks */ |
70 | bool online; |
71 | |
72 | struct blkg_iostat_set __percpu *iostat_cpu; |
73 | struct blkg_iostat_set iostat; |
74 | |
75 | struct blkg_policy_data *pd[BLKCG_MAX_POLS]; |
76 | #ifdef CONFIG_BLK_CGROUP_PUNT_BIO |
77 | spinlock_t async_bio_lock; |
78 | struct bio_list async_bios; |
79 | #endif |
80 | union { |
81 | struct work_struct async_bio_work; |
82 | struct work_struct free_work; |
83 | }; |
84 | |
85 | atomic_t use_delay; |
86 | atomic64_t delay_nsec; |
87 | atomic64_t delay_start; |
88 | u64 last_delay; |
89 | int last_use; |
90 | |
91 | struct rcu_head rcu_head; |
92 | }; |
93 | |
94 | struct blkcg { |
95 | struct cgroup_subsys_state css; |
96 | spinlock_t lock; |
97 | refcount_t online_pin; |
98 | |
99 | struct radix_tree_root blkg_tree; |
100 | struct blkcg_gq __rcu *blkg_hint; |
101 | struct hlist_head blkg_list; |
102 | |
103 | struct blkcg_policy_data *cpd[BLKCG_MAX_POLS]; |
104 | |
105 | struct list_head all_blkcgs_node; |
106 | |
107 | /* |
108 | * List of updated percpu blkg_iostat_set's since the last flush. |
109 | */ |
110 | struct llist_head __percpu *lhead; |
111 | |
112 | #ifdef CONFIG_BLK_CGROUP_FC_APPID |
113 | char fc_app_id[FC_APPID_LEN]; |
114 | #endif |
115 | #ifdef CONFIG_CGROUP_WRITEBACK |
116 | struct list_head cgwb_list; |
117 | #endif |
118 | }; |
119 | |
120 | static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) |
121 | { |
122 | return css ? container_of(css, struct blkcg, css) : NULL; |
123 | } |
124 | |
125 | /* |
126 | * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a |
127 | * request_queue (q). This is used by blkcg policies which need to track |
128 | * information per blkcg - q pair. |
129 | * |
130 | * There can be multiple active blkcg policies and each blkg:policy pair is |
131 | * represented by a blkg_policy_data which is allocated and freed by each |
132 | * policy's pd_alloc/free_fn() methods. A policy can allocate private data |
133 | * area by allocating larger data structure which embeds blkg_policy_data |
134 | * at the beginning. |
135 | */ |
136 | struct blkg_policy_data { |
137 | /* the blkg and policy id this per-policy data belongs to */ |
138 | struct blkcg_gq *blkg; |
139 | int plid; |
140 | bool online; |
141 | }; |
142 | |
143 | /* |
144 | * Policies that need to keep per-blkcg data which is independent from any |
145 | * request_queue associated to it should implement cpd_alloc/free_fn() |
146 | * methods. A policy can allocate private data area by allocating larger |
147 | * data structure which embeds blkcg_policy_data at the beginning. |
148 | * cpd_init() is invoked to let each policy handle per-blkcg data. |
149 | */ |
150 | struct blkcg_policy_data { |
151 | /* the blkcg and policy id this per-policy data belongs to */ |
152 | struct blkcg *blkcg; |
153 | int plid; |
154 | }; |
155 | |
156 | typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); |
157 | typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd); |
158 | typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd); |
159 | typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd); |
160 | typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(struct gendisk *disk, |
161 | struct blkcg *blkcg, gfp_t gfp); |
162 | typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd); |
163 | typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); |
164 | typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); |
165 | typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); |
166 | typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); |
167 | typedef void (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, |
168 | struct seq_file *s); |
169 | |
170 | struct blkcg_policy { |
171 | int plid; |
172 | /* cgroup files for the policy */ |
173 | struct cftype *dfl_cftypes; |
174 | struct cftype *legacy_cftypes; |
175 | |
176 | /* operations */ |
177 | blkcg_pol_alloc_cpd_fn *cpd_alloc_fn; |
178 | blkcg_pol_free_cpd_fn *cpd_free_fn; |
179 | |
180 | blkcg_pol_alloc_pd_fn *pd_alloc_fn; |
181 | blkcg_pol_init_pd_fn *pd_init_fn; |
182 | blkcg_pol_online_pd_fn *pd_online_fn; |
183 | blkcg_pol_offline_pd_fn *pd_offline_fn; |
184 | blkcg_pol_free_pd_fn *pd_free_fn; |
185 | blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; |
186 | blkcg_pol_stat_pd_fn *pd_stat_fn; |
187 | }; |
188 | |
189 | extern struct blkcg blkcg_root; |
190 | extern bool blkcg_debug_stats; |
191 | |
192 | void blkg_init_queue(struct request_queue *q); |
193 | int blkcg_init_disk(struct gendisk *disk); |
194 | void blkcg_exit_disk(struct gendisk *disk); |
195 | |
196 | /* Blkio controller policy registration */ |
197 | int blkcg_policy_register(struct blkcg_policy *pol); |
198 | void blkcg_policy_unregister(struct blkcg_policy *pol); |
199 | int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol); |
200 | void blkcg_deactivate_policy(struct gendisk *disk, |
201 | const struct blkcg_policy *pol); |
202 | |
203 | const char *blkg_dev_name(struct blkcg_gq *blkg); |
204 | void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, |
205 | u64 (*prfill)(struct seq_file *, |
206 | struct blkg_policy_data *, int), |
207 | const struct blkcg_policy *pol, int data, |
208 | bool show_total); |
209 | u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); |
210 | |
211 | struct blkg_conf_ctx { |
212 | char *input; |
213 | char *body; |
214 | struct block_device *bdev; |
215 | struct blkcg_gq *blkg; |
216 | }; |
217 | |
218 | void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input); |
219 | int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx); |
220 | int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, |
221 | struct blkg_conf_ctx *ctx); |
222 | void blkg_conf_exit(struct blkg_conf_ctx *ctx); |
223 | |
224 | /** |
225 | * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg |
226 | * @return: true if this bio needs to be submitted with the root blkg context. |
227 | * |
228 | * In order to avoid priority inversions we sometimes need to issue a bio as if |
229 | * it were attached to the root blkg, and then backcharge to the actual owning |
230 | * blkg. The idea is we do bio_blkcg_css() to look up the actual context for |
231 | * the bio and attach the appropriate blkg to the bio. Then we call this helper |
232 | * and if it is true run with the root blkg for that queue and then do any |
233 | * backcharging to the originating cgroup once the io is complete. |
234 | */ |
235 | static inline bool bio_issue_as_root_blkg(struct bio *bio) |
236 | { |
237 | return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0; |
238 | } |
239 | |
240 | /** |
241 | * blkg_lookup - lookup blkg for the specified blkcg - q pair |
242 | * @blkcg: blkcg of interest |
243 | * @q: request_queue of interest |
244 | * |
245 | * Lookup blkg for the @blkcg - @q pair. |
246 | |
247 | * Must be called in a RCU critical section. |
248 | */ |
249 | static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, |
250 | struct request_queue *q) |
251 | { |
252 | struct blkcg_gq *blkg; |
253 | |
254 | if (blkcg == &blkcg_root) |
255 | return q->root_blkg; |
256 | |
257 | blkg = rcu_dereference_check(blkcg->blkg_hint, |
258 | lockdep_is_held(&q->queue_lock)); |
259 | if (blkg && blkg->q == q) |
260 | return blkg; |
261 | |
262 | blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); |
263 | if (blkg && blkg->q != q) |
264 | blkg = NULL; |
265 | return blkg; |
266 | } |
267 | |
268 | /** |
269 | * blkg_to_pdata - get policy private data |
270 | * @blkg: blkg of interest |
271 | * @pol: policy of interest |
272 | * |
273 | * Return pointer to private data associated with the @blkg-@pol pair. |
274 | */ |
275 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, |
276 | struct blkcg_policy *pol) |
277 | { |
278 | return blkg ? blkg->pd[pol->plid] : NULL; |
279 | } |
280 | |
281 | static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, |
282 | struct blkcg_policy *pol) |
283 | { |
284 | return blkcg ? blkcg->cpd[pol->plid] : NULL; |
285 | } |
286 | |
287 | /** |
288 | * pdata_to_blkg - get blkg associated with policy private data |
289 | * @pd: policy private data of interest |
290 | * |
291 | * @pd is policy private data. Determine the blkg it's associated with. |
292 | */ |
293 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) |
294 | { |
295 | return pd ? pd->blkg : NULL; |
296 | } |
297 | |
298 | static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) |
299 | { |
300 | return cpd ? cpd->blkcg : NULL; |
301 | } |
302 | |
303 | /** |
304 | * blkg_path - format cgroup path of blkg |
305 | * @blkg: blkg of interest |
306 | * @buf: target buffer |
307 | * @buflen: target buffer length |
308 | * |
309 | * Format the path of the cgroup of @blkg into @buf. |
310 | */ |
311 | static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) |
312 | { |
313 | return cgroup_path(cgrp: blkg->blkcg->css.cgroup, buf, buflen); |
314 | } |
315 | |
316 | /** |
317 | * blkg_get - get a blkg reference |
318 | * @blkg: blkg to get |
319 | * |
320 | * The caller should be holding an existing reference. |
321 | */ |
322 | static inline void blkg_get(struct blkcg_gq *blkg) |
323 | { |
324 | percpu_ref_get(ref: &blkg->refcnt); |
325 | } |
326 | |
327 | /** |
328 | * blkg_tryget - try and get a blkg reference |
329 | * @blkg: blkg to get |
330 | * |
331 | * This is for use when doing an RCU lookup of the blkg. We may be in the midst |
332 | * of freeing this blkg, so we can only use it if the refcnt is not zero. |
333 | */ |
334 | static inline bool blkg_tryget(struct blkcg_gq *blkg) |
335 | { |
336 | return blkg && percpu_ref_tryget(ref: &blkg->refcnt); |
337 | } |
338 | |
339 | /** |
340 | * blkg_put - put a blkg reference |
341 | * @blkg: blkg to put |
342 | */ |
343 | static inline void blkg_put(struct blkcg_gq *blkg) |
344 | { |
345 | percpu_ref_put(ref: &blkg->refcnt); |
346 | } |
347 | |
348 | /** |
349 | * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants |
350 | * @d_blkg: loop cursor pointing to the current descendant |
351 | * @pos_css: used for iteration |
352 | * @p_blkg: target blkg to walk descendants of |
353 | * |
354 | * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU |
355 | * read locked. If called under either blkcg or queue lock, the iteration |
356 | * is guaranteed to include all and only online blkgs. The caller may |
357 | * update @pos_css by calling css_rightmost_descendant() to skip subtree. |
358 | * @p_blkg is included in the iteration and the first node to be visited. |
359 | */ |
360 | #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ |
361 | css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ |
362 | if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \ |
363 | (p_blkg)->q))) |
364 | |
365 | /** |
366 | * blkg_for_each_descendant_post - post-order walk of a blkg's descendants |
367 | * @d_blkg: loop cursor pointing to the current descendant |
368 | * @pos_css: used for iteration |
369 | * @p_blkg: target blkg to walk descendants of |
370 | * |
371 | * Similar to blkg_for_each_descendant_pre() but performs post-order |
372 | * traversal instead. Synchronization rules are the same. @p_blkg is |
373 | * included in the iteration and the last node to be visited. |
374 | */ |
375 | #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ |
376 | css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ |
377 | if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \ |
378 | (p_blkg)->q))) |
379 | |
380 | static inline void blkcg_bio_issue_init(struct bio *bio) |
381 | { |
382 | bio_issue_init(issue: &bio->bi_issue, bio_sectors(bio)); |
383 | } |
384 | |
385 | static inline void blkcg_use_delay(struct blkcg_gq *blkg) |
386 | { |
387 | if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) |
388 | return; |
389 | if (atomic_add_return(i: 1, v: &blkg->use_delay) == 1) |
390 | atomic_inc(v: &blkg->blkcg->css.cgroup->congestion_count); |
391 | } |
392 | |
393 | static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) |
394 | { |
395 | int old = atomic_read(v: &blkg->use_delay); |
396 | |
397 | if (WARN_ON_ONCE(old < 0)) |
398 | return 0; |
399 | if (old == 0) |
400 | return 0; |
401 | |
402 | /* |
403 | * We do this song and dance because we can race with somebody else |
404 | * adding or removing delay. If we just did an atomic_dec we'd end up |
405 | * negative and we'd already be in trouble. We need to subtract 1 and |
406 | * then check to see if we were the last delay so we can drop the |
407 | * congestion count on the cgroup. |
408 | */ |
409 | while (old && !atomic_try_cmpxchg(v: &blkg->use_delay, old: &old, new: old - 1)) |
410 | ; |
411 | |
412 | if (old == 0) |
413 | return 0; |
414 | if (old == 1) |
415 | atomic_dec(v: &blkg->blkcg->css.cgroup->congestion_count); |
416 | return 1; |
417 | } |
418 | |
419 | /** |
420 | * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount |
421 | * @blkg: target blkg |
422 | * @delay: delay duration in nsecs |
423 | * |
424 | * When enabled with this function, the delay is not decayed and must be |
425 | * explicitly cleared with blkcg_clear_delay(). Must not be mixed with |
426 | * blkcg_[un]use_delay() and blkcg_add_delay() usages. |
427 | */ |
428 | static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay) |
429 | { |
430 | int old = atomic_read(v: &blkg->use_delay); |
431 | |
432 | /* We only want 1 person setting the congestion count for this blkg. */ |
433 | if (!old && atomic_try_cmpxchg(v: &blkg->use_delay, old: &old, new: -1)) |
434 | atomic_inc(v: &blkg->blkcg->css.cgroup->congestion_count); |
435 | |
436 | atomic64_set(v: &blkg->delay_nsec, i: delay); |
437 | } |
438 | |
439 | /** |
440 | * blkcg_clear_delay - Disable allocator delay mechanism |
441 | * @blkg: target blkg |
442 | * |
443 | * Disable use_delay mechanism. See blkcg_set_delay(). |
444 | */ |
445 | static inline void blkcg_clear_delay(struct blkcg_gq *blkg) |
446 | { |
447 | int old = atomic_read(v: &blkg->use_delay); |
448 | |
449 | /* We only want 1 person clearing the congestion count for this blkg. */ |
450 | if (old && atomic_try_cmpxchg(v: &blkg->use_delay, old: &old, new: 0)) |
451 | atomic_dec(v: &blkg->blkcg->css.cgroup->congestion_count); |
452 | } |
453 | |
454 | /** |
455 | * blk_cgroup_mergeable - Determine whether to allow or disallow merges |
456 | * @rq: request to merge into |
457 | * @bio: bio to merge |
458 | * |
459 | * @bio and @rq should belong to the same cgroup and their issue_as_root should |
460 | * match. The latter is necessary as we don't want to throttle e.g. a metadata |
461 | * update because it happens to be next to a regular IO. |
462 | */ |
463 | static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) |
464 | { |
465 | return rq->bio->bi_blkg == bio->bi_blkg && |
466 | bio_issue_as_root_blkg(bio: rq->bio) == bio_issue_as_root_blkg(bio); |
467 | } |
468 | |
469 | void blk_cgroup_bio_start(struct bio *bio); |
470 | void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); |
471 | #else /* CONFIG_BLK_CGROUP */ |
472 | |
473 | struct blkg_policy_data { |
474 | }; |
475 | |
476 | struct blkcg_policy_data { |
477 | }; |
478 | |
479 | struct blkcg_policy { |
480 | }; |
481 | |
482 | struct blkcg { |
483 | }; |
484 | |
485 | static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } |
486 | static inline void blkg_init_queue(struct request_queue *q) { } |
487 | static inline int blkcg_init_disk(struct gendisk *disk) { return 0; } |
488 | static inline void blkcg_exit_disk(struct gendisk *disk) { } |
489 | static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } |
490 | static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } |
491 | static inline int blkcg_activate_policy(struct gendisk *disk, |
492 | const struct blkcg_policy *pol) { return 0; } |
493 | static inline void blkcg_deactivate_policy(struct gendisk *disk, |
494 | const struct blkcg_policy *pol) { } |
495 | |
496 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, |
497 | struct blkcg_policy *pol) { return NULL; } |
498 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } |
499 | static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } |
500 | static inline void blkg_get(struct blkcg_gq *blkg) { } |
501 | static inline void blkg_put(struct blkcg_gq *blkg) { } |
502 | static inline void blkcg_bio_issue_init(struct bio *bio) { } |
503 | static inline void blk_cgroup_bio_start(struct bio *bio) { } |
504 | static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; } |
505 | |
506 | #define blk_queue_for_each_rl(rl, q) \ |
507 | for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) |
508 | |
509 | #endif /* CONFIG_BLK_CGROUP */ |
510 | |
511 | #endif /* _BLK_CGROUP_PRIVATE_H */ |
512 | |