1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler, |
4 | * for the blk-mq scheduling framework |
5 | * |
6 | * Copyright (C) 2016 Jens Axboe <axboe@kernel.dk> |
7 | */ |
8 | #include <linux/kernel.h> |
9 | #include <linux/fs.h> |
10 | #include <linux/blkdev.h> |
11 | #include <linux/bio.h> |
12 | #include <linux/module.h> |
13 | #include <linux/slab.h> |
14 | #include <linux/init.h> |
15 | #include <linux/compiler.h> |
16 | #include <linux/rbtree.h> |
17 | #include <linux/sbitmap.h> |
18 | |
19 | #include <trace/events/block.h> |
20 | |
21 | #include "elevator.h" |
22 | #include "blk.h" |
23 | #include "blk-mq.h" |
24 | #include "blk-mq-debugfs.h" |
25 | #include "blk-mq-sched.h" |
26 | |
27 | /* |
28 | * See Documentation/block/deadline-iosched.rst |
29 | */ |
30 | static const int read_expire = HZ / 2; /* max time before a read is submitted. */ |
31 | static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ |
32 | /* |
33 | * Time after which to dispatch lower priority requests even if higher |
34 | * priority requests are pending. |
35 | */ |
36 | static const int prio_aging_expire = 10 * HZ; |
37 | static const int writes_starved = 2; /* max times reads can starve a write */ |
38 | static const int fifo_batch = 16; /* # of sequential requests treated as one |
39 | by the above parameters. For throughput. */ |
40 | |
41 | enum dd_data_dir { |
42 | DD_READ = READ, |
43 | DD_WRITE = WRITE, |
44 | }; |
45 | |
46 | enum { DD_DIR_COUNT = 2 }; |
47 | |
48 | enum dd_prio { |
49 | DD_RT_PRIO = 0, |
50 | DD_BE_PRIO = 1, |
51 | DD_IDLE_PRIO = 2, |
52 | DD_PRIO_MAX = 2, |
53 | }; |
54 | |
55 | enum { DD_PRIO_COUNT = 3 }; |
56 | |
57 | /* |
58 | * I/O statistics per I/O priority. It is fine if these counters overflow. |
59 | * What matters is that these counters are at least as wide as |
60 | * log2(max_outstanding_requests). |
61 | */ |
62 | struct io_stats_per_prio { |
63 | uint32_t inserted; |
64 | uint32_t merged; |
65 | uint32_t dispatched; |
66 | atomic_t completed; |
67 | }; |
68 | |
69 | /* |
70 | * Deadline scheduler data per I/O priority (enum dd_prio). Requests are |
71 | * present on both sort_list[] and fifo_list[]. |
72 | */ |
73 | struct dd_per_prio { |
74 | struct list_head dispatch; |
75 | struct rb_root sort_list[DD_DIR_COUNT]; |
76 | struct list_head fifo_list[DD_DIR_COUNT]; |
77 | /* Position of the most recently dispatched request. */ |
78 | sector_t latest_pos[DD_DIR_COUNT]; |
79 | struct io_stats_per_prio stats; |
80 | }; |
81 | |
82 | struct deadline_data { |
83 | /* |
84 | * run time data |
85 | */ |
86 | |
87 | struct dd_per_prio per_prio[DD_PRIO_COUNT]; |
88 | |
89 | /* Data direction of latest dispatched request. */ |
90 | enum dd_data_dir last_dir; |
91 | unsigned int batching; /* number of sequential requests made */ |
92 | unsigned int starved; /* times reads have starved writes */ |
93 | |
94 | /* |
95 | * settings that change how the i/o scheduler behaves |
96 | */ |
97 | int fifo_expire[DD_DIR_COUNT]; |
98 | int fifo_batch; |
99 | int writes_starved; |
100 | int front_merges; |
101 | u32 async_depth; |
102 | int prio_aging_expire; |
103 | |
104 | spinlock_t lock; |
105 | spinlock_t zone_lock; |
106 | }; |
107 | |
108 | /* Maps an I/O priority class to a deadline scheduler priority. */ |
109 | static const enum dd_prio ioprio_class_to_prio[] = { |
110 | [IOPRIO_CLASS_NONE] = DD_BE_PRIO, |
111 | [IOPRIO_CLASS_RT] = DD_RT_PRIO, |
112 | [IOPRIO_CLASS_BE] = DD_BE_PRIO, |
113 | [IOPRIO_CLASS_IDLE] = DD_IDLE_PRIO, |
114 | }; |
115 | |
116 | static inline struct rb_root * |
117 | deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq) |
118 | { |
119 | return &per_prio->sort_list[rq_data_dir(rq)]; |
120 | } |
121 | |
122 | /* |
123 | * Returns the I/O priority class (IOPRIO_CLASS_*) that has been assigned to a |
124 | * request. |
125 | */ |
126 | static u8 dd_rq_ioclass(struct request *rq) |
127 | { |
128 | return IOPRIO_PRIO_CLASS(req_get_ioprio(rq)); |
129 | } |
130 | |
131 | /* |
132 | * get the request before `rq' in sector-sorted order |
133 | */ |
134 | static inline struct request * |
135 | deadline_earlier_request(struct request *rq) |
136 | { |
137 | struct rb_node *node = rb_prev(&rq->rb_node); |
138 | |
139 | if (node) |
140 | return rb_entry_rq(node); |
141 | |
142 | return NULL; |
143 | } |
144 | |
145 | /* |
146 | * get the request after `rq' in sector-sorted order |
147 | */ |
148 | static inline struct request * |
149 | deadline_latter_request(struct request *rq) |
150 | { |
151 | struct rb_node *node = rb_next(&rq->rb_node); |
152 | |
153 | if (node) |
154 | return rb_entry_rq(node); |
155 | |
156 | return NULL; |
157 | } |
158 | |
159 | /* |
160 | * Return the first request for which blk_rq_pos() >= @pos. For zoned devices, |
161 | * return the first request after the start of the zone containing @pos. |
162 | */ |
163 | static inline struct request *deadline_from_pos(struct dd_per_prio *per_prio, |
164 | enum dd_data_dir data_dir, sector_t pos) |
165 | { |
166 | struct rb_node *node = per_prio->sort_list[data_dir].rb_node; |
167 | struct request *rq, *res = NULL; |
168 | |
169 | if (!node) |
170 | return NULL; |
171 | |
172 | rq = rb_entry_rq(node); |
173 | /* |
174 | * A zoned write may have been requeued with a starting position that |
175 | * is below that of the most recently dispatched request. Hence, for |
176 | * zoned writes, start searching from the start of a zone. |
177 | */ |
178 | if (blk_rq_is_seq_zoned_write(rq)) |
179 | pos = round_down(pos, rq->q->limits.chunk_sectors); |
180 | |
181 | while (node) { |
182 | rq = rb_entry_rq(node); |
183 | if (blk_rq_pos(rq) >= pos) { |
184 | res = rq; |
185 | node = node->rb_left; |
186 | } else { |
187 | node = node->rb_right; |
188 | } |
189 | } |
190 | return res; |
191 | } |
192 | |
193 | static void |
194 | deadline_add_rq_rb(struct dd_per_prio *per_prio, struct request *rq) |
195 | { |
196 | struct rb_root *root = deadline_rb_root(per_prio, rq); |
197 | |
198 | elv_rb_add(root, rq); |
199 | } |
200 | |
201 | static inline void |
202 | deadline_del_rq_rb(struct dd_per_prio *per_prio, struct request *rq) |
203 | { |
204 | elv_rb_del(deadline_rb_root(per_prio, rq), rq); |
205 | } |
206 | |
207 | /* |
208 | * remove rq from rbtree and fifo. |
209 | */ |
210 | static void deadline_remove_request(struct request_queue *q, |
211 | struct dd_per_prio *per_prio, |
212 | struct request *rq) |
213 | { |
214 | list_del_init(entry: &rq->queuelist); |
215 | |
216 | /* |
217 | * We might not be on the rbtree, if we are doing an insert merge |
218 | */ |
219 | if (!RB_EMPTY_NODE(&rq->rb_node)) |
220 | deadline_del_rq_rb(per_prio, rq); |
221 | |
222 | elv_rqhash_del(q, rq); |
223 | if (q->last_merge == rq) |
224 | q->last_merge = NULL; |
225 | } |
226 | |
227 | static void dd_request_merged(struct request_queue *q, struct request *req, |
228 | enum elv_merge type) |
229 | { |
230 | struct deadline_data *dd = q->elevator->elevator_data; |
231 | const u8 ioprio_class = dd_rq_ioclass(rq: req); |
232 | const enum dd_prio prio = ioprio_class_to_prio[ioprio_class]; |
233 | struct dd_per_prio *per_prio = &dd->per_prio[prio]; |
234 | |
235 | /* |
236 | * if the merge was a front merge, we need to reposition request |
237 | */ |
238 | if (type == ELEVATOR_FRONT_MERGE) { |
239 | elv_rb_del(deadline_rb_root(per_prio, rq: req), req); |
240 | deadline_add_rq_rb(per_prio, rq: req); |
241 | } |
242 | } |
243 | |
244 | /* |
245 | * Callback function that is invoked after @next has been merged into @req. |
246 | */ |
247 | static void dd_merged_requests(struct request_queue *q, struct request *req, |
248 | struct request *next) |
249 | { |
250 | struct deadline_data *dd = q->elevator->elevator_data; |
251 | const u8 ioprio_class = dd_rq_ioclass(rq: next); |
252 | const enum dd_prio prio = ioprio_class_to_prio[ioprio_class]; |
253 | |
254 | lockdep_assert_held(&dd->lock); |
255 | |
256 | dd->per_prio[prio].stats.merged++; |
257 | |
258 | /* |
259 | * if next expires before rq, assign its expire time to rq |
260 | * and move into next position (next will be deleted) in fifo |
261 | */ |
262 | if (!list_empty(head: &req->queuelist) && !list_empty(head: &next->queuelist)) { |
263 | if (time_before((unsigned long)next->fifo_time, |
264 | (unsigned long)req->fifo_time)) { |
265 | list_move(list: &req->queuelist, head: &next->queuelist); |
266 | req->fifo_time = next->fifo_time; |
267 | } |
268 | } |
269 | |
270 | /* |
271 | * kill knowledge of next, this one is a goner |
272 | */ |
273 | deadline_remove_request(q, per_prio: &dd->per_prio[prio], rq: next); |
274 | } |
275 | |
276 | /* |
277 | * move an entry to dispatch queue |
278 | */ |
279 | static void |
280 | deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio, |
281 | struct request *rq) |
282 | { |
283 | /* |
284 | * take it off the sort and fifo list |
285 | */ |
286 | deadline_remove_request(q: rq->q, per_prio, rq); |
287 | } |
288 | |
289 | /* Number of requests queued for a given priority level. */ |
290 | static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio) |
291 | { |
292 | const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats; |
293 | |
294 | lockdep_assert_held(&dd->lock); |
295 | |
296 | return stats->inserted - atomic_read(v: &stats->completed); |
297 | } |
298 | |
299 | /* |
300 | * deadline_check_fifo returns true if and only if there are expired requests |
301 | * in the FIFO list. Requires !list_empty(&dd->fifo_list[data_dir]). |
302 | */ |
303 | static inline bool deadline_check_fifo(struct dd_per_prio *per_prio, |
304 | enum dd_data_dir data_dir) |
305 | { |
306 | struct request *rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next); |
307 | |
308 | return time_is_before_eq_jiffies((unsigned long)rq->fifo_time); |
309 | } |
310 | |
311 | /* |
312 | * Check if rq has a sequential request preceding it. |
313 | */ |
314 | static bool deadline_is_seq_write(struct deadline_data *dd, struct request *rq) |
315 | { |
316 | struct request *prev = deadline_earlier_request(rq); |
317 | |
318 | if (!prev) |
319 | return false; |
320 | |
321 | return blk_rq_pos(rq: prev) + blk_rq_sectors(rq: prev) == blk_rq_pos(rq); |
322 | } |
323 | |
324 | /* |
325 | * Skip all write requests that are sequential from @rq, even if we cross |
326 | * a zone boundary. |
327 | */ |
328 | static struct request *deadline_skip_seq_writes(struct deadline_data *dd, |
329 | struct request *rq) |
330 | { |
331 | sector_t pos = blk_rq_pos(rq); |
332 | |
333 | do { |
334 | pos += blk_rq_sectors(rq); |
335 | rq = deadline_latter_request(rq); |
336 | } while (rq && blk_rq_pos(rq) == pos); |
337 | |
338 | return rq; |
339 | } |
340 | |
341 | /* |
342 | * For the specified data direction, return the next request to |
343 | * dispatch using arrival ordered lists. |
344 | */ |
345 | static struct request * |
346 | deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio, |
347 | enum dd_data_dir data_dir) |
348 | { |
349 | struct request *rq, *rb_rq, *next; |
350 | unsigned long flags; |
351 | |
352 | if (list_empty(head: &per_prio->fifo_list[data_dir])) |
353 | return NULL; |
354 | |
355 | rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next); |
356 | if (data_dir == DD_READ || !blk_queue_is_zoned(q: rq->q)) |
357 | return rq; |
358 | |
359 | /* |
360 | * Look for a write request that can be dispatched, that is one with |
361 | * an unlocked target zone. For some HDDs, breaking a sequential |
362 | * write stream can lead to lower throughput, so make sure to preserve |
363 | * sequential write streams, even if that stream crosses into the next |
364 | * zones and these zones are unlocked. |
365 | */ |
366 | spin_lock_irqsave(&dd->zone_lock, flags); |
367 | list_for_each_entry_safe(rq, next, &per_prio->fifo_list[DD_WRITE], |
368 | queuelist) { |
369 | /* Check whether a prior request exists for the same zone. */ |
370 | rb_rq = deadline_from_pos(per_prio, data_dir, pos: blk_rq_pos(rq)); |
371 | if (rb_rq && blk_rq_pos(rq: rb_rq) < blk_rq_pos(rq)) |
372 | rq = rb_rq; |
373 | if (blk_req_can_dispatch_to_zone(rq) && |
374 | (blk_queue_nonrot(rq->q) || |
375 | !deadline_is_seq_write(dd, rq))) |
376 | goto out; |
377 | } |
378 | rq = NULL; |
379 | out: |
380 | spin_unlock_irqrestore(lock: &dd->zone_lock, flags); |
381 | |
382 | return rq; |
383 | } |
384 | |
385 | /* |
386 | * For the specified data direction, return the next request to |
387 | * dispatch using sector position sorted lists. |
388 | */ |
389 | static struct request * |
390 | deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio, |
391 | enum dd_data_dir data_dir) |
392 | { |
393 | struct request *rq; |
394 | unsigned long flags; |
395 | |
396 | rq = deadline_from_pos(per_prio, data_dir, |
397 | pos: per_prio->latest_pos[data_dir]); |
398 | if (!rq) |
399 | return NULL; |
400 | |
401 | if (data_dir == DD_READ || !blk_queue_is_zoned(q: rq->q)) |
402 | return rq; |
403 | |
404 | /* |
405 | * Look for a write request that can be dispatched, that is one with |
406 | * an unlocked target zone. For some HDDs, breaking a sequential |
407 | * write stream can lead to lower throughput, so make sure to preserve |
408 | * sequential write streams, even if that stream crosses into the next |
409 | * zones and these zones are unlocked. |
410 | */ |
411 | spin_lock_irqsave(&dd->zone_lock, flags); |
412 | while (rq) { |
413 | if (blk_req_can_dispatch_to_zone(rq)) |
414 | break; |
415 | if (blk_queue_nonrot(rq->q)) |
416 | rq = deadline_latter_request(rq); |
417 | else |
418 | rq = deadline_skip_seq_writes(dd, rq); |
419 | } |
420 | spin_unlock_irqrestore(lock: &dd->zone_lock, flags); |
421 | |
422 | return rq; |
423 | } |
424 | |
425 | /* |
426 | * Returns true if and only if @rq started after @latest_start where |
427 | * @latest_start is in jiffies. |
428 | */ |
429 | static bool started_after(struct deadline_data *dd, struct request *rq, |
430 | unsigned long latest_start) |
431 | { |
432 | unsigned long start_time = (unsigned long)rq->fifo_time; |
433 | |
434 | start_time -= dd->fifo_expire[rq_data_dir(rq)]; |
435 | |
436 | return time_after(start_time, latest_start); |
437 | } |
438 | |
439 | /* |
440 | * deadline_dispatch_requests selects the best request according to |
441 | * read/write expire, fifo_batch, etc and with a start time <= @latest_start. |
442 | */ |
443 | static struct request *__dd_dispatch_request(struct deadline_data *dd, |
444 | struct dd_per_prio *per_prio, |
445 | unsigned long latest_start) |
446 | { |
447 | struct request *rq, *next_rq; |
448 | enum dd_data_dir data_dir; |
449 | enum dd_prio prio; |
450 | u8 ioprio_class; |
451 | |
452 | lockdep_assert_held(&dd->lock); |
453 | |
454 | if (!list_empty(head: &per_prio->dispatch)) { |
455 | rq = list_first_entry(&per_prio->dispatch, struct request, |
456 | queuelist); |
457 | if (started_after(dd, rq, latest_start)) |
458 | return NULL; |
459 | list_del_init(entry: &rq->queuelist); |
460 | data_dir = rq_data_dir(rq); |
461 | goto done; |
462 | } |
463 | |
464 | /* |
465 | * batches are currently reads XOR writes |
466 | */ |
467 | rq = deadline_next_request(dd, per_prio, data_dir: dd->last_dir); |
468 | if (rq && dd->batching < dd->fifo_batch) { |
469 | /* we have a next request and are still entitled to batch */ |
470 | data_dir = rq_data_dir(rq); |
471 | goto dispatch_request; |
472 | } |
473 | |
474 | /* |
475 | * at this point we are not running a batch. select the appropriate |
476 | * data direction (read / write) |
477 | */ |
478 | |
479 | if (!list_empty(head: &per_prio->fifo_list[DD_READ])) { |
480 | BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_READ])); |
481 | |
482 | if (deadline_fifo_request(dd, per_prio, data_dir: DD_WRITE) && |
483 | (dd->starved++ >= dd->writes_starved)) |
484 | goto dispatch_writes; |
485 | |
486 | data_dir = DD_READ; |
487 | |
488 | goto dispatch_find_request; |
489 | } |
490 | |
491 | /* |
492 | * there are either no reads or writes have been starved |
493 | */ |
494 | |
495 | if (!list_empty(head: &per_prio->fifo_list[DD_WRITE])) { |
496 | dispatch_writes: |
497 | BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_WRITE])); |
498 | |
499 | dd->starved = 0; |
500 | |
501 | data_dir = DD_WRITE; |
502 | |
503 | goto dispatch_find_request; |
504 | } |
505 | |
506 | return NULL; |
507 | |
508 | dispatch_find_request: |
509 | /* |
510 | * we are not running a batch, find best request for selected data_dir |
511 | */ |
512 | next_rq = deadline_next_request(dd, per_prio, data_dir); |
513 | if (deadline_check_fifo(per_prio, data_dir) || !next_rq) { |
514 | /* |
515 | * A deadline has expired, the last request was in the other |
516 | * direction, or we have run out of higher-sectored requests. |
517 | * Start again from the request with the earliest expiry time. |
518 | */ |
519 | rq = deadline_fifo_request(dd, per_prio, data_dir); |
520 | } else { |
521 | /* |
522 | * The last req was the same dir and we have a next request in |
523 | * sort order. No expired requests so continue on from here. |
524 | */ |
525 | rq = next_rq; |
526 | } |
527 | |
528 | /* |
529 | * For a zoned block device, if we only have writes queued and none of |
530 | * them can be dispatched, rq will be NULL. |
531 | */ |
532 | if (!rq) |
533 | return NULL; |
534 | |
535 | dd->last_dir = data_dir; |
536 | dd->batching = 0; |
537 | |
538 | dispatch_request: |
539 | if (started_after(dd, rq, latest_start)) |
540 | return NULL; |
541 | |
542 | /* |
543 | * rq is the selected appropriate request. |
544 | */ |
545 | dd->batching++; |
546 | deadline_move_request(dd, per_prio, rq); |
547 | done: |
548 | ioprio_class = dd_rq_ioclass(rq); |
549 | prio = ioprio_class_to_prio[ioprio_class]; |
550 | dd->per_prio[prio].latest_pos[data_dir] = blk_rq_pos(rq); |
551 | dd->per_prio[prio].stats.dispatched++; |
552 | /* |
553 | * If the request needs its target zone locked, do it. |
554 | */ |
555 | blk_req_zone_write_lock(rq); |
556 | rq->rq_flags |= RQF_STARTED; |
557 | return rq; |
558 | } |
559 | |
560 | /* |
561 | * Check whether there are any requests with priority other than DD_RT_PRIO |
562 | * that were inserted more than prio_aging_expire jiffies ago. |
563 | */ |
564 | static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd, |
565 | unsigned long now) |
566 | { |
567 | struct request *rq; |
568 | enum dd_prio prio; |
569 | int prio_cnt; |
570 | |
571 | lockdep_assert_held(&dd->lock); |
572 | |
573 | prio_cnt = !!dd_queued(dd, prio: DD_RT_PRIO) + !!dd_queued(dd, prio: DD_BE_PRIO) + |
574 | !!dd_queued(dd, prio: DD_IDLE_PRIO); |
575 | if (prio_cnt < 2) |
576 | return NULL; |
577 | |
578 | for (prio = DD_BE_PRIO; prio <= DD_PRIO_MAX; prio++) { |
579 | rq = __dd_dispatch_request(dd, per_prio: &dd->per_prio[prio], |
580 | latest_start: now - dd->prio_aging_expire); |
581 | if (rq) |
582 | return rq; |
583 | } |
584 | |
585 | return NULL; |
586 | } |
587 | |
588 | /* |
589 | * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests(). |
590 | * |
591 | * One confusing aspect here is that we get called for a specific |
592 | * hardware queue, but we may return a request that is for a |
593 | * different hardware queue. This is because mq-deadline has shared |
594 | * state for all hardware queues, in terms of sorting, FIFOs, etc. |
595 | */ |
596 | static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) |
597 | { |
598 | struct deadline_data *dd = hctx->queue->elevator->elevator_data; |
599 | const unsigned long now = jiffies; |
600 | struct request *rq; |
601 | enum dd_prio prio; |
602 | |
603 | spin_lock(lock: &dd->lock); |
604 | rq = dd_dispatch_prio_aged_requests(dd, now); |
605 | if (rq) |
606 | goto unlock; |
607 | |
608 | /* |
609 | * Next, dispatch requests in priority order. Ignore lower priority |
610 | * requests if any higher priority requests are pending. |
611 | */ |
612 | for (prio = 0; prio <= DD_PRIO_MAX; prio++) { |
613 | rq = __dd_dispatch_request(dd, per_prio: &dd->per_prio[prio], latest_start: now); |
614 | if (rq || dd_queued(dd, prio)) |
615 | break; |
616 | } |
617 | |
618 | unlock: |
619 | spin_unlock(lock: &dd->lock); |
620 | |
621 | return rq; |
622 | } |
623 | |
624 | /* |
625 | * Called by __blk_mq_alloc_request(). The shallow_depth value set by this |
626 | * function is used by __blk_mq_get_tag(). |
627 | */ |
628 | static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) |
629 | { |
630 | struct deadline_data *dd = data->q->elevator->elevator_data; |
631 | |
632 | /* Do not throttle synchronous reads. */ |
633 | if (op_is_sync(op: opf) && !op_is_write(op: opf)) |
634 | return; |
635 | |
636 | /* |
637 | * Throttle asynchronous requests and writes such that these requests |
638 | * do not block the allocation of synchronous requests. |
639 | */ |
640 | data->shallow_depth = dd->async_depth; |
641 | } |
642 | |
643 | /* Called by blk_mq_update_nr_requests(). */ |
644 | static void dd_depth_updated(struct blk_mq_hw_ctx *hctx) |
645 | { |
646 | struct request_queue *q = hctx->queue; |
647 | struct deadline_data *dd = q->elevator->elevator_data; |
648 | struct blk_mq_tags *tags = hctx->sched_tags; |
649 | |
650 | dd->async_depth = max(1UL, 3 * q->nr_requests / 4); |
651 | |
652 | sbitmap_queue_min_shallow_depth(sbq: &tags->bitmap_tags, min_shallow_depth: dd->async_depth); |
653 | } |
654 | |
655 | /* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */ |
656 | static int dd_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) |
657 | { |
658 | dd_depth_updated(hctx); |
659 | return 0; |
660 | } |
661 | |
662 | static void dd_exit_sched(struct elevator_queue *e) |
663 | { |
664 | struct deadline_data *dd = e->elevator_data; |
665 | enum dd_prio prio; |
666 | |
667 | for (prio = 0; prio <= DD_PRIO_MAX; prio++) { |
668 | struct dd_per_prio *per_prio = &dd->per_prio[prio]; |
669 | const struct io_stats_per_prio *stats = &per_prio->stats; |
670 | uint32_t queued; |
671 | |
672 | WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_READ])); |
673 | WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_WRITE])); |
674 | |
675 | spin_lock(lock: &dd->lock); |
676 | queued = dd_queued(dd, prio); |
677 | spin_unlock(lock: &dd->lock); |
678 | |
679 | WARN_ONCE(queued != 0, |
680 | "statistics for priority %d: i %u m %u d %u c %u\n" , |
681 | prio, stats->inserted, stats->merged, |
682 | stats->dispatched, atomic_read(&stats->completed)); |
683 | } |
684 | |
685 | kfree(objp: dd); |
686 | } |
687 | |
688 | /* |
689 | * initialize elevator private data (deadline_data). |
690 | */ |
691 | static int dd_init_sched(struct request_queue *q, struct elevator_type *e) |
692 | { |
693 | struct deadline_data *dd; |
694 | struct elevator_queue *eq; |
695 | enum dd_prio prio; |
696 | int ret = -ENOMEM; |
697 | |
698 | eq = elevator_alloc(q, e); |
699 | if (!eq) |
700 | return ret; |
701 | |
702 | dd = kzalloc_node(size: sizeof(*dd), GFP_KERNEL, node: q->node); |
703 | if (!dd) |
704 | goto put_eq; |
705 | |
706 | eq->elevator_data = dd; |
707 | |
708 | for (prio = 0; prio <= DD_PRIO_MAX; prio++) { |
709 | struct dd_per_prio *per_prio = &dd->per_prio[prio]; |
710 | |
711 | INIT_LIST_HEAD(list: &per_prio->dispatch); |
712 | INIT_LIST_HEAD(list: &per_prio->fifo_list[DD_READ]); |
713 | INIT_LIST_HEAD(list: &per_prio->fifo_list[DD_WRITE]); |
714 | per_prio->sort_list[DD_READ] = RB_ROOT; |
715 | per_prio->sort_list[DD_WRITE] = RB_ROOT; |
716 | } |
717 | dd->fifo_expire[DD_READ] = read_expire; |
718 | dd->fifo_expire[DD_WRITE] = write_expire; |
719 | dd->writes_starved = writes_starved; |
720 | dd->front_merges = 1; |
721 | dd->last_dir = DD_WRITE; |
722 | dd->fifo_batch = fifo_batch; |
723 | dd->prio_aging_expire = prio_aging_expire; |
724 | spin_lock_init(&dd->lock); |
725 | spin_lock_init(&dd->zone_lock); |
726 | |
727 | /* We dispatch from request queue wide instead of hw queue */ |
728 | blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q); |
729 | |
730 | q->elevator = eq; |
731 | return 0; |
732 | |
733 | put_eq: |
734 | kobject_put(kobj: &eq->kobj); |
735 | return ret; |
736 | } |
737 | |
738 | /* |
739 | * Try to merge @bio into an existing request. If @bio has been merged into |
740 | * an existing request, store the pointer to that request into *@rq. |
741 | */ |
742 | static int dd_request_merge(struct request_queue *q, struct request **rq, |
743 | struct bio *bio) |
744 | { |
745 | struct deadline_data *dd = q->elevator->elevator_data; |
746 | const u8 ioprio_class = IOPRIO_PRIO_CLASS(bio->bi_ioprio); |
747 | const enum dd_prio prio = ioprio_class_to_prio[ioprio_class]; |
748 | struct dd_per_prio *per_prio = &dd->per_prio[prio]; |
749 | sector_t sector = bio_end_sector(bio); |
750 | struct request *__rq; |
751 | |
752 | if (!dd->front_merges) |
753 | return ELEVATOR_NO_MERGE; |
754 | |
755 | __rq = elv_rb_find(&per_prio->sort_list[bio_data_dir(bio)], sector); |
756 | if (__rq) { |
757 | BUG_ON(sector != blk_rq_pos(__rq)); |
758 | |
759 | if (elv_bio_merge_ok(__rq, bio)) { |
760 | *rq = __rq; |
761 | if (blk_discard_mergable(req: __rq)) |
762 | return ELEVATOR_DISCARD_MERGE; |
763 | return ELEVATOR_FRONT_MERGE; |
764 | } |
765 | } |
766 | |
767 | return ELEVATOR_NO_MERGE; |
768 | } |
769 | |
770 | /* |
771 | * Attempt to merge a bio into an existing request. This function is called |
772 | * before @bio is associated with a request. |
773 | */ |
774 | static bool dd_bio_merge(struct request_queue *q, struct bio *bio, |
775 | unsigned int nr_segs) |
776 | { |
777 | struct deadline_data *dd = q->elevator->elevator_data; |
778 | struct request *free = NULL; |
779 | bool ret; |
780 | |
781 | spin_lock(lock: &dd->lock); |
782 | ret = blk_mq_sched_try_merge(q, bio, nr_segs, merged_request: &free); |
783 | spin_unlock(lock: &dd->lock); |
784 | |
785 | if (free) |
786 | blk_mq_free_request(rq: free); |
787 | |
788 | return ret; |
789 | } |
790 | |
791 | /* |
792 | * add rq to rbtree and fifo |
793 | */ |
794 | static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, |
795 | blk_insert_t flags, struct list_head *free) |
796 | { |
797 | struct request_queue *q = hctx->queue; |
798 | struct deadline_data *dd = q->elevator->elevator_data; |
799 | const enum dd_data_dir data_dir = rq_data_dir(rq); |
800 | u16 ioprio = req_get_ioprio(req: rq); |
801 | u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio); |
802 | struct dd_per_prio *per_prio; |
803 | enum dd_prio prio; |
804 | |
805 | lockdep_assert_held(&dd->lock); |
806 | |
807 | /* |
808 | * This may be a requeue of a write request that has locked its |
809 | * target zone. If it is the case, this releases the zone lock. |
810 | */ |
811 | blk_req_zone_write_unlock(rq); |
812 | |
813 | prio = ioprio_class_to_prio[ioprio_class]; |
814 | per_prio = &dd->per_prio[prio]; |
815 | if (!rq->elv.priv[0]) { |
816 | per_prio->stats.inserted++; |
817 | rq->elv.priv[0] = (void *)(uintptr_t)1; |
818 | } |
819 | |
820 | if (blk_mq_sched_try_insert_merge(q, rq, free)) |
821 | return; |
822 | |
823 | trace_block_rq_insert(rq); |
824 | |
825 | if (flags & BLK_MQ_INSERT_AT_HEAD) { |
826 | list_add(new: &rq->queuelist, head: &per_prio->dispatch); |
827 | rq->fifo_time = jiffies; |
828 | } else { |
829 | struct list_head *insert_before; |
830 | |
831 | deadline_add_rq_rb(per_prio, rq); |
832 | |
833 | if (rq_mergeable(rq)) { |
834 | elv_rqhash_add(q, rq); |
835 | if (!q->last_merge) |
836 | q->last_merge = rq; |
837 | } |
838 | |
839 | /* |
840 | * set expire time and add to fifo list |
841 | */ |
842 | rq->fifo_time = jiffies + dd->fifo_expire[data_dir]; |
843 | insert_before = &per_prio->fifo_list[data_dir]; |
844 | #ifdef CONFIG_BLK_DEV_ZONED |
845 | /* |
846 | * Insert zoned writes such that requests are sorted by |
847 | * position per zone. |
848 | */ |
849 | if (blk_rq_is_seq_zoned_write(rq)) { |
850 | struct request *rq2 = deadline_latter_request(rq); |
851 | |
852 | if (rq2 && blk_rq_zone_no(rq: rq2) == blk_rq_zone_no(rq)) |
853 | insert_before = &rq2->queuelist; |
854 | } |
855 | #endif |
856 | list_add_tail(new: &rq->queuelist, head: insert_before); |
857 | } |
858 | } |
859 | |
860 | /* |
861 | * Called from blk_mq_insert_request() or blk_mq_dispatch_plug_list(). |
862 | */ |
863 | static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, |
864 | struct list_head *list, |
865 | blk_insert_t flags) |
866 | { |
867 | struct request_queue *q = hctx->queue; |
868 | struct deadline_data *dd = q->elevator->elevator_data; |
869 | LIST_HEAD(free); |
870 | |
871 | spin_lock(lock: &dd->lock); |
872 | while (!list_empty(head: list)) { |
873 | struct request *rq; |
874 | |
875 | rq = list_first_entry(list, struct request, queuelist); |
876 | list_del_init(entry: &rq->queuelist); |
877 | dd_insert_request(hctx, rq, flags, free: &free); |
878 | } |
879 | spin_unlock(lock: &dd->lock); |
880 | |
881 | blk_mq_free_requests(list: &free); |
882 | } |
883 | |
884 | /* Callback from inside blk_mq_rq_ctx_init(). */ |
885 | static void dd_prepare_request(struct request *rq) |
886 | { |
887 | rq->elv.priv[0] = NULL; |
888 | } |
889 | |
890 | static bool dd_has_write_work(struct blk_mq_hw_ctx *hctx) |
891 | { |
892 | struct deadline_data *dd = hctx->queue->elevator->elevator_data; |
893 | enum dd_prio p; |
894 | |
895 | for (p = 0; p <= DD_PRIO_MAX; p++) |
896 | if (!list_empty_careful(head: &dd->per_prio[p].fifo_list[DD_WRITE])) |
897 | return true; |
898 | |
899 | return false; |
900 | } |
901 | |
902 | /* |
903 | * Callback from inside blk_mq_free_request(). |
904 | * |
905 | * For zoned block devices, write unlock the target zone of |
906 | * completed write requests. Do this while holding the zone lock |
907 | * spinlock so that the zone is never unlocked while deadline_fifo_request() |
908 | * or deadline_next_request() are executing. This function is called for |
909 | * all requests, whether or not these requests complete successfully. |
910 | * |
911 | * For a zoned block device, __dd_dispatch_request() may have stopped |
912 | * dispatching requests if all the queued requests are write requests directed |
913 | * at zones that are already locked due to on-going write requests. To ensure |
914 | * write request dispatch progress in this case, mark the queue as needing a |
915 | * restart to ensure that the queue is run again after completion of the |
916 | * request and zones being unlocked. |
917 | */ |
918 | static void dd_finish_request(struct request *rq) |
919 | { |
920 | struct request_queue *q = rq->q; |
921 | struct deadline_data *dd = q->elevator->elevator_data; |
922 | const u8 ioprio_class = dd_rq_ioclass(rq); |
923 | const enum dd_prio prio = ioprio_class_to_prio[ioprio_class]; |
924 | struct dd_per_prio *per_prio = &dd->per_prio[prio]; |
925 | |
926 | /* |
927 | * The block layer core may call dd_finish_request() without having |
928 | * called dd_insert_requests(). Skip requests that bypassed I/O |
929 | * scheduling. See also blk_mq_request_bypass_insert(). |
930 | */ |
931 | if (!rq->elv.priv[0]) |
932 | return; |
933 | |
934 | atomic_inc(v: &per_prio->stats.completed); |
935 | |
936 | if (blk_queue_is_zoned(q)) { |
937 | unsigned long flags; |
938 | |
939 | spin_lock_irqsave(&dd->zone_lock, flags); |
940 | blk_req_zone_write_unlock(rq); |
941 | spin_unlock_irqrestore(lock: &dd->zone_lock, flags); |
942 | |
943 | if (dd_has_write_work(hctx: rq->mq_hctx)) |
944 | blk_mq_sched_mark_restart_hctx(hctx: rq->mq_hctx); |
945 | } |
946 | } |
947 | |
948 | static bool dd_has_work_for_prio(struct dd_per_prio *per_prio) |
949 | { |
950 | return !list_empty_careful(head: &per_prio->dispatch) || |
951 | !list_empty_careful(head: &per_prio->fifo_list[DD_READ]) || |
952 | !list_empty_careful(head: &per_prio->fifo_list[DD_WRITE]); |
953 | } |
954 | |
955 | static bool dd_has_work(struct blk_mq_hw_ctx *hctx) |
956 | { |
957 | struct deadline_data *dd = hctx->queue->elevator->elevator_data; |
958 | enum dd_prio prio; |
959 | |
960 | for (prio = 0; prio <= DD_PRIO_MAX; prio++) |
961 | if (dd_has_work_for_prio(per_prio: &dd->per_prio[prio])) |
962 | return true; |
963 | |
964 | return false; |
965 | } |
966 | |
967 | /* |
968 | * sysfs parts below |
969 | */ |
970 | #define SHOW_INT(__FUNC, __VAR) \ |
971 | static ssize_t __FUNC(struct elevator_queue *e, char *page) \ |
972 | { \ |
973 | struct deadline_data *dd = e->elevator_data; \ |
974 | \ |
975 | return sysfs_emit(page, "%d\n", __VAR); \ |
976 | } |
977 | #define SHOW_JIFFIES(__FUNC, __VAR) SHOW_INT(__FUNC, jiffies_to_msecs(__VAR)) |
978 | SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]); |
979 | SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]); |
980 | SHOW_JIFFIES(deadline_prio_aging_expire_show, dd->prio_aging_expire); |
981 | SHOW_INT(deadline_writes_starved_show, dd->writes_starved); |
982 | SHOW_INT(deadline_front_merges_show, dd->front_merges); |
983 | SHOW_INT(deadline_async_depth_show, dd->async_depth); |
984 | SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch); |
985 | #undef SHOW_INT |
986 | #undef SHOW_JIFFIES |
987 | |
988 | #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ |
989 | static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ |
990 | { \ |
991 | struct deadline_data *dd = e->elevator_data; \ |
992 | int __data, __ret; \ |
993 | \ |
994 | __ret = kstrtoint(page, 0, &__data); \ |
995 | if (__ret < 0) \ |
996 | return __ret; \ |
997 | if (__data < (MIN)) \ |
998 | __data = (MIN); \ |
999 | else if (__data > (MAX)) \ |
1000 | __data = (MAX); \ |
1001 | *(__PTR) = __CONV(__data); \ |
1002 | return count; \ |
1003 | } |
1004 | #define STORE_INT(__FUNC, __PTR, MIN, MAX) \ |
1005 | STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, ) |
1006 | #define STORE_JIFFIES(__FUNC, __PTR, MIN, MAX) \ |
1007 | STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, msecs_to_jiffies) |
1008 | STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX); |
1009 | STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX); |
1010 | STORE_JIFFIES(deadline_prio_aging_expire_store, &dd->prio_aging_expire, 0, INT_MAX); |
1011 | STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX); |
1012 | STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1); |
1013 | STORE_INT(deadline_async_depth_store, &dd->async_depth, 1, INT_MAX); |
1014 | STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX); |
1015 | #undef STORE_FUNCTION |
1016 | #undef STORE_INT |
1017 | #undef STORE_JIFFIES |
1018 | |
1019 | #define DD_ATTR(name) \ |
1020 | __ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store) |
1021 | |
1022 | static struct elv_fs_entry deadline_attrs[] = { |
1023 | DD_ATTR(read_expire), |
1024 | DD_ATTR(write_expire), |
1025 | DD_ATTR(writes_starved), |
1026 | DD_ATTR(front_merges), |
1027 | DD_ATTR(async_depth), |
1028 | DD_ATTR(fifo_batch), |
1029 | DD_ATTR(prio_aging_expire), |
1030 | __ATTR_NULL |
1031 | }; |
1032 | |
1033 | #ifdef CONFIG_BLK_DEBUG_FS |
1034 | #define DEADLINE_DEBUGFS_DDIR_ATTRS(prio, data_dir, name) \ |
1035 | static void *deadline_##name##_fifo_start(struct seq_file *m, \ |
1036 | loff_t *pos) \ |
1037 | __acquires(&dd->lock) \ |
1038 | { \ |
1039 | struct request_queue *q = m->private; \ |
1040 | struct deadline_data *dd = q->elevator->elevator_data; \ |
1041 | struct dd_per_prio *per_prio = &dd->per_prio[prio]; \ |
1042 | \ |
1043 | spin_lock(&dd->lock); \ |
1044 | return seq_list_start(&per_prio->fifo_list[data_dir], *pos); \ |
1045 | } \ |
1046 | \ |
1047 | static void *deadline_##name##_fifo_next(struct seq_file *m, void *v, \ |
1048 | loff_t *pos) \ |
1049 | { \ |
1050 | struct request_queue *q = m->private; \ |
1051 | struct deadline_data *dd = q->elevator->elevator_data; \ |
1052 | struct dd_per_prio *per_prio = &dd->per_prio[prio]; \ |
1053 | \ |
1054 | return seq_list_next(v, &per_prio->fifo_list[data_dir], pos); \ |
1055 | } \ |
1056 | \ |
1057 | static void deadline_##name##_fifo_stop(struct seq_file *m, void *v) \ |
1058 | __releases(&dd->lock) \ |
1059 | { \ |
1060 | struct request_queue *q = m->private; \ |
1061 | struct deadline_data *dd = q->elevator->elevator_data; \ |
1062 | \ |
1063 | spin_unlock(&dd->lock); \ |
1064 | } \ |
1065 | \ |
1066 | static const struct seq_operations deadline_##name##_fifo_seq_ops = { \ |
1067 | .start = deadline_##name##_fifo_start, \ |
1068 | .next = deadline_##name##_fifo_next, \ |
1069 | .stop = deadline_##name##_fifo_stop, \ |
1070 | .show = blk_mq_debugfs_rq_show, \ |
1071 | }; \ |
1072 | \ |
1073 | static int deadline_##name##_next_rq_show(void *data, \ |
1074 | struct seq_file *m) \ |
1075 | { \ |
1076 | struct request_queue *q = data; \ |
1077 | struct deadline_data *dd = q->elevator->elevator_data; \ |
1078 | struct dd_per_prio *per_prio = &dd->per_prio[prio]; \ |
1079 | struct request *rq; \ |
1080 | \ |
1081 | rq = deadline_from_pos(per_prio, data_dir, \ |
1082 | per_prio->latest_pos[data_dir]); \ |
1083 | if (rq) \ |
1084 | __blk_mq_debugfs_rq_show(m, rq); \ |
1085 | return 0; \ |
1086 | } |
1087 | |
1088 | DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_READ, read0); |
1089 | DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_WRITE, write0); |
1090 | DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_READ, read1); |
1091 | DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_WRITE, write1); |
1092 | DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_READ, read2); |
1093 | DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_WRITE, write2); |
1094 | #undef DEADLINE_DEBUGFS_DDIR_ATTRS |
1095 | |
1096 | static int deadline_batching_show(void *data, struct seq_file *m) |
1097 | { |
1098 | struct request_queue *q = data; |
1099 | struct deadline_data *dd = q->elevator->elevator_data; |
1100 | |
1101 | seq_printf(m, fmt: "%u\n" , dd->batching); |
1102 | return 0; |
1103 | } |
1104 | |
1105 | static int deadline_starved_show(void *data, struct seq_file *m) |
1106 | { |
1107 | struct request_queue *q = data; |
1108 | struct deadline_data *dd = q->elevator->elevator_data; |
1109 | |
1110 | seq_printf(m, fmt: "%u\n" , dd->starved); |
1111 | return 0; |
1112 | } |
1113 | |
1114 | static int dd_async_depth_show(void *data, struct seq_file *m) |
1115 | { |
1116 | struct request_queue *q = data; |
1117 | struct deadline_data *dd = q->elevator->elevator_data; |
1118 | |
1119 | seq_printf(m, fmt: "%u\n" , dd->async_depth); |
1120 | return 0; |
1121 | } |
1122 | |
1123 | static int dd_queued_show(void *data, struct seq_file *m) |
1124 | { |
1125 | struct request_queue *q = data; |
1126 | struct deadline_data *dd = q->elevator->elevator_data; |
1127 | u32 rt, be, idle; |
1128 | |
1129 | spin_lock(lock: &dd->lock); |
1130 | rt = dd_queued(dd, prio: DD_RT_PRIO); |
1131 | be = dd_queued(dd, prio: DD_BE_PRIO); |
1132 | idle = dd_queued(dd, prio: DD_IDLE_PRIO); |
1133 | spin_unlock(lock: &dd->lock); |
1134 | |
1135 | seq_printf(m, fmt: "%u %u %u\n" , rt, be, idle); |
1136 | |
1137 | return 0; |
1138 | } |
1139 | |
1140 | /* Number of requests owned by the block driver for a given priority. */ |
1141 | static u32 dd_owned_by_driver(struct deadline_data *dd, enum dd_prio prio) |
1142 | { |
1143 | const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats; |
1144 | |
1145 | lockdep_assert_held(&dd->lock); |
1146 | |
1147 | return stats->dispatched + stats->merged - |
1148 | atomic_read(v: &stats->completed); |
1149 | } |
1150 | |
1151 | static int dd_owned_by_driver_show(void *data, struct seq_file *m) |
1152 | { |
1153 | struct request_queue *q = data; |
1154 | struct deadline_data *dd = q->elevator->elevator_data; |
1155 | u32 rt, be, idle; |
1156 | |
1157 | spin_lock(lock: &dd->lock); |
1158 | rt = dd_owned_by_driver(dd, prio: DD_RT_PRIO); |
1159 | be = dd_owned_by_driver(dd, prio: DD_BE_PRIO); |
1160 | idle = dd_owned_by_driver(dd, prio: DD_IDLE_PRIO); |
1161 | spin_unlock(lock: &dd->lock); |
1162 | |
1163 | seq_printf(m, fmt: "%u %u %u\n" , rt, be, idle); |
1164 | |
1165 | return 0; |
1166 | } |
1167 | |
1168 | #define DEADLINE_DISPATCH_ATTR(prio) \ |
1169 | static void *deadline_dispatch##prio##_start(struct seq_file *m, \ |
1170 | loff_t *pos) \ |
1171 | __acquires(&dd->lock) \ |
1172 | { \ |
1173 | struct request_queue *q = m->private; \ |
1174 | struct deadline_data *dd = q->elevator->elevator_data; \ |
1175 | struct dd_per_prio *per_prio = &dd->per_prio[prio]; \ |
1176 | \ |
1177 | spin_lock(&dd->lock); \ |
1178 | return seq_list_start(&per_prio->dispatch, *pos); \ |
1179 | } \ |
1180 | \ |
1181 | static void *deadline_dispatch##prio##_next(struct seq_file *m, \ |
1182 | void *v, loff_t *pos) \ |
1183 | { \ |
1184 | struct request_queue *q = m->private; \ |
1185 | struct deadline_data *dd = q->elevator->elevator_data; \ |
1186 | struct dd_per_prio *per_prio = &dd->per_prio[prio]; \ |
1187 | \ |
1188 | return seq_list_next(v, &per_prio->dispatch, pos); \ |
1189 | } \ |
1190 | \ |
1191 | static void deadline_dispatch##prio##_stop(struct seq_file *m, void *v) \ |
1192 | __releases(&dd->lock) \ |
1193 | { \ |
1194 | struct request_queue *q = m->private; \ |
1195 | struct deadline_data *dd = q->elevator->elevator_data; \ |
1196 | \ |
1197 | spin_unlock(&dd->lock); \ |
1198 | } \ |
1199 | \ |
1200 | static const struct seq_operations deadline_dispatch##prio##_seq_ops = { \ |
1201 | .start = deadline_dispatch##prio##_start, \ |
1202 | .next = deadline_dispatch##prio##_next, \ |
1203 | .stop = deadline_dispatch##prio##_stop, \ |
1204 | .show = blk_mq_debugfs_rq_show, \ |
1205 | } |
1206 | |
1207 | DEADLINE_DISPATCH_ATTR(0); |
1208 | DEADLINE_DISPATCH_ATTR(1); |
1209 | DEADLINE_DISPATCH_ATTR(2); |
1210 | #undef DEADLINE_DISPATCH_ATTR |
1211 | |
1212 | #define DEADLINE_QUEUE_DDIR_ATTRS(name) \ |
1213 | {#name "_fifo_list", 0400, \ |
1214 | .seq_ops = &deadline_##name##_fifo_seq_ops} |
1215 | #define DEADLINE_NEXT_RQ_ATTR(name) \ |
1216 | {#name "_next_rq", 0400, deadline_##name##_next_rq_show} |
1217 | static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = { |
1218 | DEADLINE_QUEUE_DDIR_ATTRS(read0), |
1219 | DEADLINE_QUEUE_DDIR_ATTRS(write0), |
1220 | DEADLINE_QUEUE_DDIR_ATTRS(read1), |
1221 | DEADLINE_QUEUE_DDIR_ATTRS(write1), |
1222 | DEADLINE_QUEUE_DDIR_ATTRS(read2), |
1223 | DEADLINE_QUEUE_DDIR_ATTRS(write2), |
1224 | DEADLINE_NEXT_RQ_ATTR(read0), |
1225 | DEADLINE_NEXT_RQ_ATTR(write0), |
1226 | DEADLINE_NEXT_RQ_ATTR(read1), |
1227 | DEADLINE_NEXT_RQ_ATTR(write1), |
1228 | DEADLINE_NEXT_RQ_ATTR(read2), |
1229 | DEADLINE_NEXT_RQ_ATTR(write2), |
1230 | {"batching" , 0400, deadline_batching_show}, |
1231 | {"starved" , 0400, deadline_starved_show}, |
1232 | {"async_depth" , 0400, dd_async_depth_show}, |
1233 | {"dispatch0" , 0400, .seq_ops = &deadline_dispatch0_seq_ops}, |
1234 | {"dispatch1" , 0400, .seq_ops = &deadline_dispatch1_seq_ops}, |
1235 | {"dispatch2" , 0400, .seq_ops = &deadline_dispatch2_seq_ops}, |
1236 | {"owned_by_driver" , 0400, dd_owned_by_driver_show}, |
1237 | {"queued" , 0400, dd_queued_show}, |
1238 | {}, |
1239 | }; |
1240 | #undef DEADLINE_QUEUE_DDIR_ATTRS |
1241 | #endif |
1242 | |
1243 | static struct elevator_type mq_deadline = { |
1244 | .ops = { |
1245 | .depth_updated = dd_depth_updated, |
1246 | .limit_depth = dd_limit_depth, |
1247 | .insert_requests = dd_insert_requests, |
1248 | .dispatch_request = dd_dispatch_request, |
1249 | .prepare_request = dd_prepare_request, |
1250 | .finish_request = dd_finish_request, |
1251 | .next_request = elv_rb_latter_request, |
1252 | .former_request = elv_rb_former_request, |
1253 | .bio_merge = dd_bio_merge, |
1254 | .request_merge = dd_request_merge, |
1255 | .requests_merged = dd_merged_requests, |
1256 | .request_merged = dd_request_merged, |
1257 | .has_work = dd_has_work, |
1258 | .init_sched = dd_init_sched, |
1259 | .exit_sched = dd_exit_sched, |
1260 | .init_hctx = dd_init_hctx, |
1261 | }, |
1262 | |
1263 | #ifdef CONFIG_BLK_DEBUG_FS |
1264 | .queue_debugfs_attrs = deadline_queue_debugfs_attrs, |
1265 | #endif |
1266 | .elevator_attrs = deadline_attrs, |
1267 | .elevator_name = "mq-deadline" , |
1268 | .elevator_alias = "deadline" , |
1269 | .elevator_features = ELEVATOR_F_ZBD_SEQ_WRITE, |
1270 | .elevator_owner = THIS_MODULE, |
1271 | }; |
1272 | MODULE_ALIAS("mq-deadline-iosched" ); |
1273 | |
1274 | static int __init deadline_init(void) |
1275 | { |
1276 | return elv_register(&mq_deadline); |
1277 | } |
1278 | |
1279 | static void __exit deadline_exit(void) |
1280 | { |
1281 | elv_unregister(&mq_deadline); |
1282 | } |
1283 | |
1284 | module_init(deadline_init); |
1285 | module_exit(deadline_exit); |
1286 | |
1287 | MODULE_AUTHOR("Jens Axboe, Damien Le Moal and Bart Van Assche" ); |
1288 | MODULE_LICENSE("GPL" ); |
1289 | MODULE_DESCRIPTION("MQ deadline IO scheduler" ); |
1290 | |