1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * Block data types and constants. Directly include this file only to |
4 | * break include dependency loop. |
5 | */ |
6 | #ifndef __LINUX_BLK_TYPES_H |
7 | #define __LINUX_BLK_TYPES_H |
8 | |
9 | #include <linux/types.h> |
10 | #include <linux/bvec.h> |
11 | #include <linux/ktime.h> |
12 | |
13 | struct bio_set; |
14 | struct bio; |
15 | struct bio_integrity_payload; |
16 | struct page; |
17 | struct block_device; |
18 | struct io_context; |
19 | struct cgroup_subsys_state; |
20 | typedef void (bio_end_io_t) (struct bio *); |
21 | |
22 | /* |
23 | * Block error status values. See block/blk-core:blk_errors for the details. |
24 | * Alpha cannot write a byte atomically, so we need to use 32-bit value. |
25 | */ |
26 | #if defined(CONFIG_ALPHA) && !defined(__alpha_bwx__) |
27 | typedef u32 __bitwise blk_status_t; |
28 | #else |
29 | typedef u8 __bitwise blk_status_t; |
30 | #endif |
31 | #define BLK_STS_OK 0 |
32 | #define BLK_STS_NOTSUPP ((__force blk_status_t)1) |
33 | #define BLK_STS_TIMEOUT ((__force blk_status_t)2) |
34 | #define BLK_STS_NOSPC ((__force blk_status_t)3) |
35 | #define BLK_STS_TRANSPORT ((__force blk_status_t)4) |
36 | #define BLK_STS_TARGET ((__force blk_status_t)5) |
37 | #define BLK_STS_NEXUS ((__force blk_status_t)6) |
38 | #define BLK_STS_MEDIUM ((__force blk_status_t)7) |
39 | #define BLK_STS_PROTECTION ((__force blk_status_t)8) |
40 | #define BLK_STS_RESOURCE ((__force blk_status_t)9) |
41 | #define BLK_STS_IOERR ((__force blk_status_t)10) |
42 | |
43 | /* hack for device mapper, don't use elsewhere: */ |
44 | #define BLK_STS_DM_REQUEUE ((__force blk_status_t)11) |
45 | |
46 | #define BLK_STS_AGAIN ((__force blk_status_t)12) |
47 | |
48 | /* |
49 | * BLK_STS_DEV_RESOURCE is returned from the driver to the block layer if |
50 | * device related resources are unavailable, but the driver can guarantee |
51 | * that the queue will be rerun in the future once resources become |
52 | * available again. This is typically the case for device specific |
53 | * resources that are consumed for IO. If the driver fails allocating these |
54 | * resources, we know that inflight (or pending) IO will free these |
55 | * resource upon completion. |
56 | * |
57 | * This is different from BLK_STS_RESOURCE in that it explicitly references |
58 | * a device specific resource. For resources of wider scope, allocation |
59 | * failure can happen without having pending IO. This means that we can't |
60 | * rely on request completions freeing these resources, as IO may not be in |
61 | * flight. Examples of that are kernel memory allocations, DMA mappings, or |
62 | * any other system wide resources. |
63 | */ |
64 | #define BLK_STS_DEV_RESOURCE ((__force blk_status_t)13) |
65 | |
66 | /** |
67 | * blk_path_error - returns true if error may be path related |
68 | * @error: status the request was completed with |
69 | * |
70 | * Description: |
71 | * This classifies block error status into non-retryable errors and ones |
72 | * that may be successful if retried on a failover path. |
73 | * |
74 | * Return: |
75 | * %false - retrying failover path will not help |
76 | * %true - may succeed if retried |
77 | */ |
78 | static inline bool blk_path_error(blk_status_t error) |
79 | { |
80 | switch (error) { |
81 | case BLK_STS_NOTSUPP: |
82 | case BLK_STS_NOSPC: |
83 | case BLK_STS_TARGET: |
84 | case BLK_STS_NEXUS: |
85 | case BLK_STS_MEDIUM: |
86 | case BLK_STS_PROTECTION: |
87 | return false; |
88 | } |
89 | |
90 | /* Anything else could be a path failure, so should be retried */ |
91 | return true; |
92 | } |
93 | |
94 | /* |
95 | * From most significant bit: |
96 | * 1 bit: reserved for other usage, see below |
97 | * 12 bits: original size of bio |
98 | * 51 bits: issue time of bio |
99 | */ |
100 | #define BIO_ISSUE_RES_BITS 1 |
101 | #define BIO_ISSUE_SIZE_BITS 12 |
102 | #define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS) |
103 | #define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS) |
104 | #define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1) |
105 | #define BIO_ISSUE_SIZE_MASK \ |
106 | (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT) |
107 | #define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1)) |
108 | |
109 | /* Reserved bit for blk-throtl */ |
110 | #define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63) |
111 | |
112 | struct bio_issue { |
113 | u64 value; |
114 | }; |
115 | |
116 | static inline u64 __bio_issue_time(u64 time) |
117 | { |
118 | return time & BIO_ISSUE_TIME_MASK; |
119 | } |
120 | |
121 | static inline u64 bio_issue_time(struct bio_issue *issue) |
122 | { |
123 | return __bio_issue_time(issue->value); |
124 | } |
125 | |
126 | static inline sector_t bio_issue_size(struct bio_issue *issue) |
127 | { |
128 | return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT); |
129 | } |
130 | |
131 | static inline void bio_issue_init(struct bio_issue *issue, |
132 | sector_t size) |
133 | { |
134 | size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1; |
135 | issue->value = ((issue->value & BIO_ISSUE_RES_MASK) | |
136 | (ktime_get_ns() & BIO_ISSUE_TIME_MASK) | |
137 | ((u64)size << BIO_ISSUE_SIZE_SHIFT)); |
138 | } |
139 | |
140 | /* |
141 | * main unit of I/O for the block layer and lower layers (ie drivers and |
142 | * stacking drivers) |
143 | */ |
144 | struct bio { |
145 | struct bio *bi_next; /* request queue link */ |
146 | struct gendisk *bi_disk; |
147 | unsigned int bi_opf; /* bottom bits req flags, |
148 | * top bits REQ_OP. Use |
149 | * accessors. |
150 | */ |
151 | unsigned short bi_flags; /* status, etc and bvec pool number */ |
152 | unsigned short bi_ioprio; |
153 | unsigned short bi_write_hint; |
154 | blk_status_t bi_status; |
155 | u8 bi_partno; |
156 | |
157 | /* Number of segments in this BIO after |
158 | * physical address coalescing is performed. |
159 | */ |
160 | unsigned int bi_phys_segments; |
161 | |
162 | /* |
163 | * To keep track of the max segment size, we account for the |
164 | * sizes of the first and last mergeable segments in this bio. |
165 | */ |
166 | unsigned int bi_seg_front_size; |
167 | unsigned int bi_seg_back_size; |
168 | |
169 | struct bvec_iter bi_iter; |
170 | |
171 | atomic_t __bi_remaining; |
172 | bio_end_io_t *bi_end_io; |
173 | |
174 | void *bi_private; |
175 | #ifdef CONFIG_BLK_CGROUP |
176 | /* |
177 | * Represents the association of the css and request_queue for the bio. |
178 | * If a bio goes direct to device, it will not have a blkg as it will |
179 | * not have a request_queue associated with it. The reference is put |
180 | * on release of the bio. |
181 | */ |
182 | struct blkcg_gq *bi_blkg; |
183 | struct bio_issue bi_issue; |
184 | #endif |
185 | union { |
186 | #if defined(CONFIG_BLK_DEV_INTEGRITY) |
187 | struct bio_integrity_payload *bi_integrity; /* data integrity */ |
188 | #endif |
189 | }; |
190 | |
191 | unsigned short bi_vcnt; /* how many bio_vec's */ |
192 | |
193 | /* |
194 | * Everything starting with bi_max_vecs will be preserved by bio_reset() |
195 | */ |
196 | |
197 | unsigned short bi_max_vecs; /* max bvl_vecs we can hold */ |
198 | |
199 | atomic_t __bi_cnt; /* pin count */ |
200 | |
201 | struct bio_vec *bi_io_vec; /* the actual vec list */ |
202 | |
203 | struct bio_set *bi_pool; |
204 | |
205 | /* |
206 | * We can inline a number of vecs at the end of the bio, to avoid |
207 | * double allocations for a small number of bio_vecs. This member |
208 | * MUST obviously be kept at the very end of the bio. |
209 | */ |
210 | struct bio_vec bi_inline_vecs[0]; |
211 | }; |
212 | |
213 | #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) |
214 | |
215 | /* |
216 | * bio flags |
217 | */ |
218 | #define BIO_NO_PAGE_REF 0 /* don't put release vec pages */ |
219 | #define BIO_SEG_VALID 1 /* bi_phys_segments valid */ |
220 | #define BIO_CLONED 2 /* doesn't own data */ |
221 | #define BIO_BOUNCED 3 /* bio is a bounce bio */ |
222 | #define BIO_USER_MAPPED 4 /* contains user pages */ |
223 | #define BIO_NULL_MAPPED 5 /* contains invalid user pages */ |
224 | #define BIO_QUIET 6 /* Make BIO Quiet */ |
225 | #define BIO_CHAIN 7 /* chained bio, ->bi_remaining in effect */ |
226 | #define BIO_REFFED 8 /* bio has elevated ->bi_cnt */ |
227 | #define BIO_THROTTLED 9 /* This bio has already been subjected to |
228 | * throttling rules. Don't do it again. */ |
229 | #define BIO_TRACE_COMPLETION 10 /* bio_endio() should trace the final completion |
230 | * of this bio. */ |
231 | #define BIO_QUEUE_ENTERED 11 /* can use blk_queue_enter_live() */ |
232 | #define BIO_TRACKED 12 /* set if bio goes through the rq_qos path */ |
233 | |
234 | /* See BVEC_POOL_OFFSET below before adding new flags */ |
235 | |
236 | /* |
237 | * We support 6 different bvec pools, the last one is magic in that it |
238 | * is backed by a mempool. |
239 | */ |
240 | #define BVEC_POOL_NR 6 |
241 | #define BVEC_POOL_MAX (BVEC_POOL_NR - 1) |
242 | |
243 | /* |
244 | * Top 3 bits of bio flags indicate the pool the bvecs came from. We add |
245 | * 1 to the actual index so that 0 indicates that there are no bvecs to be |
246 | * freed. |
247 | */ |
248 | #define BVEC_POOL_BITS (3) |
249 | #define BVEC_POOL_OFFSET (16 - BVEC_POOL_BITS) |
250 | #define BVEC_POOL_IDX(bio) ((bio)->bi_flags >> BVEC_POOL_OFFSET) |
251 | #if (1<< BVEC_POOL_BITS) < (BVEC_POOL_NR+1) |
252 | # error "BVEC_POOL_BITS is too small" |
253 | #endif |
254 | |
255 | /* |
256 | * Flags starting here get preserved by bio_reset() - this includes |
257 | * only BVEC_POOL_IDX() |
258 | */ |
259 | #define BIO_RESET_BITS BVEC_POOL_OFFSET |
260 | |
261 | typedef __u32 __bitwise blk_mq_req_flags_t; |
262 | |
263 | /* |
264 | * Operations and flags common to the bio and request structures. |
265 | * We use 8 bits for encoding the operation, and the remaining 24 for flags. |
266 | * |
267 | * The least significant bit of the operation number indicates the data |
268 | * transfer direction: |
269 | * |
270 | * - if the least significant bit is set transfers are TO the device |
271 | * - if the least significant bit is not set transfers are FROM the device |
272 | * |
273 | * If a operation does not transfer data the least significant bit has no |
274 | * meaning. |
275 | */ |
276 | #define REQ_OP_BITS 8 |
277 | #define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) |
278 | #define REQ_FLAG_BITS 24 |
279 | |
280 | enum req_opf { |
281 | /* read sectors from the device */ |
282 | REQ_OP_READ = 0, |
283 | /* write sectors to the device */ |
284 | REQ_OP_WRITE = 1, |
285 | /* flush the volatile write cache */ |
286 | REQ_OP_FLUSH = 2, |
287 | /* discard sectors */ |
288 | REQ_OP_DISCARD = 3, |
289 | /* securely erase sectors */ |
290 | REQ_OP_SECURE_ERASE = 5, |
291 | /* reset a zone write pointer */ |
292 | REQ_OP_ZONE_RESET = 6, |
293 | /* write the same sector many times */ |
294 | REQ_OP_WRITE_SAME = 7, |
295 | /* write the zero filled sector many times */ |
296 | REQ_OP_WRITE_ZEROES = 9, |
297 | |
298 | /* SCSI passthrough using struct scsi_request */ |
299 | REQ_OP_SCSI_IN = 32, |
300 | REQ_OP_SCSI_OUT = 33, |
301 | /* Driver private requests */ |
302 | REQ_OP_DRV_IN = 34, |
303 | REQ_OP_DRV_OUT = 35, |
304 | |
305 | REQ_OP_LAST, |
306 | }; |
307 | |
308 | enum req_flag_bits { |
309 | __REQ_FAILFAST_DEV = /* no driver retries of device errors */ |
310 | REQ_OP_BITS, |
311 | __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ |
312 | __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ |
313 | __REQ_SYNC, /* request is sync (sync write or read) */ |
314 | __REQ_META, /* metadata io request */ |
315 | __REQ_PRIO, /* boost priority in cfq */ |
316 | __REQ_NOMERGE, /* don't touch this for merging */ |
317 | __REQ_IDLE, /* anticipate more IO after this one */ |
318 | __REQ_INTEGRITY, /* I/O includes block integrity payload */ |
319 | __REQ_FUA, /* forced unit access */ |
320 | __REQ_PREFLUSH, /* request for cache flush */ |
321 | __REQ_RAHEAD, /* read ahead, can fail anytime */ |
322 | __REQ_BACKGROUND, /* background IO */ |
323 | __REQ_NOWAIT, /* Don't wait if request will block */ |
324 | |
325 | /* command specific flags for REQ_OP_WRITE_ZEROES: */ |
326 | __REQ_NOUNMAP, /* do not free blocks when zeroing */ |
327 | |
328 | __REQ_HIPRI, |
329 | |
330 | /* for driver use */ |
331 | __REQ_DRV, |
332 | __REQ_SWAP, /* swapping request. */ |
333 | __REQ_NR_BITS, /* stops here */ |
334 | }; |
335 | |
336 | #define REQ_FAILFAST_DEV (1ULL << __REQ_FAILFAST_DEV) |
337 | #define REQ_FAILFAST_TRANSPORT (1ULL << __REQ_FAILFAST_TRANSPORT) |
338 | #define REQ_FAILFAST_DRIVER (1ULL << __REQ_FAILFAST_DRIVER) |
339 | #define REQ_SYNC (1ULL << __REQ_SYNC) |
340 | #define REQ_META (1ULL << __REQ_META) |
341 | #define REQ_PRIO (1ULL << __REQ_PRIO) |
342 | #define REQ_NOMERGE (1ULL << __REQ_NOMERGE) |
343 | #define REQ_IDLE (1ULL << __REQ_IDLE) |
344 | #define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY) |
345 | #define REQ_FUA (1ULL << __REQ_FUA) |
346 | #define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) |
347 | #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) |
348 | #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) |
349 | #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) |
350 | #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) |
351 | #define REQ_HIPRI (1ULL << __REQ_HIPRI) |
352 | |
353 | #define REQ_DRV (1ULL << __REQ_DRV) |
354 | #define REQ_SWAP (1ULL << __REQ_SWAP) |
355 | |
356 | #define REQ_FAILFAST_MASK \ |
357 | (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) |
358 | |
359 | #define REQ_NOMERGE_FLAGS \ |
360 | (REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA) |
361 | |
362 | enum stat_group { |
363 | STAT_READ, |
364 | STAT_WRITE, |
365 | STAT_DISCARD, |
366 | |
367 | NR_STAT_GROUPS |
368 | }; |
369 | |
370 | #define bio_op(bio) \ |
371 | ((bio)->bi_opf & REQ_OP_MASK) |
372 | #define req_op(req) \ |
373 | ((req)->cmd_flags & REQ_OP_MASK) |
374 | |
375 | /* obsolete, don't use in new code */ |
376 | static inline void bio_set_op_attrs(struct bio *bio, unsigned op, |
377 | unsigned op_flags) |
378 | { |
379 | bio->bi_opf = op | op_flags; |
380 | } |
381 | |
382 | static inline bool op_is_write(unsigned int op) |
383 | { |
384 | return (op & 1); |
385 | } |
386 | |
387 | /* |
388 | * Check if the bio or request is one that needs special treatment in the |
389 | * flush state machine. |
390 | */ |
391 | static inline bool op_is_flush(unsigned int op) |
392 | { |
393 | return op & (REQ_FUA | REQ_PREFLUSH); |
394 | } |
395 | |
396 | /* |
397 | * Reads are always treated as synchronous, as are requests with the FUA or |
398 | * PREFLUSH flag. Other operations may be marked as synchronous using the |
399 | * REQ_SYNC flag. |
400 | */ |
401 | static inline bool op_is_sync(unsigned int op) |
402 | { |
403 | return (op & REQ_OP_MASK) == REQ_OP_READ || |
404 | (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH)); |
405 | } |
406 | |
407 | static inline bool op_is_discard(unsigned int op) |
408 | { |
409 | return (op & REQ_OP_MASK) == REQ_OP_DISCARD; |
410 | } |
411 | |
412 | static inline int op_stat_group(unsigned int op) |
413 | { |
414 | if (op_is_discard(op)) |
415 | return STAT_DISCARD; |
416 | return op_is_write(op); |
417 | } |
418 | |
419 | typedef unsigned int blk_qc_t; |
420 | #define BLK_QC_T_NONE -1U |
421 | #define BLK_QC_T_SHIFT 16 |
422 | #define BLK_QC_T_INTERNAL (1U << 31) |
423 | |
424 | static inline bool blk_qc_t_valid(blk_qc_t cookie) |
425 | { |
426 | return cookie != BLK_QC_T_NONE; |
427 | } |
428 | |
429 | static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) |
430 | { |
431 | return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT; |
432 | } |
433 | |
434 | static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) |
435 | { |
436 | return cookie & ((1u << BLK_QC_T_SHIFT) - 1); |
437 | } |
438 | |
439 | static inline bool blk_qc_t_is_internal(blk_qc_t cookie) |
440 | { |
441 | return (cookie & BLK_QC_T_INTERNAL) != 0; |
442 | } |
443 | |
444 | struct blk_rq_stat { |
445 | u64 mean; |
446 | u64 min; |
447 | u64 max; |
448 | u32 nr_samples; |
449 | u64 batch; |
450 | }; |
451 | |
452 | #endif /* __LINUX_BLK_TYPES_H */ |
453 | |