1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * Block data types and constants. Directly include this file only to |
4 | * break include dependency loop. |
5 | */ |
6 | #ifndef __LINUX_BLK_TYPES_H |
7 | #define __LINUX_BLK_TYPES_H |
8 | |
9 | #include <linux/types.h> |
10 | #include <linux/bvec.h> |
11 | #include <linux/device.h> |
12 | #include <linux/ktime.h> |
13 | #include <linux/rw_hint.h> |
14 | |
15 | struct bio_set; |
16 | struct bio; |
17 | struct bio_integrity_payload; |
18 | struct page; |
19 | struct io_context; |
20 | struct cgroup_subsys_state; |
21 | typedef void (bio_end_io_t) (struct bio *); |
22 | struct bio_crypt_ctx; |
23 | |
24 | /* |
25 | * The basic unit of block I/O is a sector. It is used in a number of contexts |
26 | * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9 |
27 | * bytes. Variables of type sector_t represent an offset or size that is a |
28 | * multiple of 512 bytes. Hence these two constants. |
29 | */ |
30 | #ifndef SECTOR_SHIFT |
31 | #define SECTOR_SHIFT 9 |
32 | #endif |
33 | #ifndef SECTOR_SIZE |
34 | #define SECTOR_SIZE (1 << SECTOR_SHIFT) |
35 | #endif |
36 | |
37 | #define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) |
38 | #define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) |
39 | #define SECTOR_MASK (PAGE_SECTORS - 1) |
40 | |
41 | struct block_device { |
42 | sector_t bd_start_sect; |
43 | sector_t bd_nr_sectors; |
44 | struct gendisk * bd_disk; |
45 | struct request_queue * bd_queue; |
46 | struct disk_stats __percpu *bd_stats; |
47 | unsigned long bd_stamp; |
48 | bool bd_read_only; /* read-only policy */ |
49 | u8 bd_partno; |
50 | bool bd_write_holder; |
51 | bool bd_has_submit_bio; |
52 | dev_t bd_dev; |
53 | struct inode *bd_inode; /* will die */ |
54 | |
55 | atomic_t bd_openers; |
56 | spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ |
57 | void * bd_claiming; |
58 | void * bd_holder; |
59 | const struct blk_holder_ops *bd_holder_ops; |
60 | struct mutex bd_holder_lock; |
61 | int bd_holders; |
62 | struct kobject *bd_holder_dir; |
63 | |
64 | atomic_t bd_fsfreeze_count; /* number of freeze requests */ |
65 | struct mutex bd_fsfreeze_mutex; /* serialize freeze/thaw */ |
66 | |
67 | struct partition_meta_info *bd_meta_info; |
68 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
69 | bool bd_make_it_fail; |
70 | #endif |
71 | bool bd_ro_warned; |
72 | int bd_writers; |
73 | /* |
74 | * keep this out-of-line as it's both big and not needed in the fast |
75 | * path |
76 | */ |
77 | struct device bd_device; |
78 | } __randomize_layout; |
79 | |
80 | #define bdev_whole(_bdev) \ |
81 | ((_bdev)->bd_disk->part0) |
82 | |
83 | #define dev_to_bdev(device) \ |
84 | container_of((device), struct block_device, bd_device) |
85 | |
86 | #define bdev_kobj(_bdev) \ |
87 | (&((_bdev)->bd_device.kobj)) |
88 | |
89 | /* |
90 | * Block error status values. See block/blk-core:blk_errors for the details. |
91 | * Alpha cannot write a byte atomically, so we need to use 32-bit value. |
92 | */ |
93 | #if defined(CONFIG_ALPHA) && !defined(__alpha_bwx__) |
94 | typedef u32 __bitwise blk_status_t; |
95 | typedef u32 blk_short_t; |
96 | #else |
97 | typedef u8 __bitwise blk_status_t; |
98 | typedef u16 blk_short_t; |
99 | #endif |
100 | #define BLK_STS_OK 0 |
101 | #define BLK_STS_NOTSUPP ((__force blk_status_t)1) |
102 | #define BLK_STS_TIMEOUT ((__force blk_status_t)2) |
103 | #define BLK_STS_NOSPC ((__force blk_status_t)3) |
104 | #define BLK_STS_TRANSPORT ((__force blk_status_t)4) |
105 | #define BLK_STS_TARGET ((__force blk_status_t)5) |
106 | #define BLK_STS_RESV_CONFLICT ((__force blk_status_t)6) |
107 | #define BLK_STS_MEDIUM ((__force blk_status_t)7) |
108 | #define BLK_STS_PROTECTION ((__force blk_status_t)8) |
109 | #define BLK_STS_RESOURCE ((__force blk_status_t)9) |
110 | #define BLK_STS_IOERR ((__force blk_status_t)10) |
111 | |
112 | /* hack for device mapper, don't use elsewhere: */ |
113 | #define BLK_STS_DM_REQUEUE ((__force blk_status_t)11) |
114 | |
115 | /* |
116 | * BLK_STS_AGAIN should only be returned if RQF_NOWAIT is set |
117 | * and the bio would block (cf bio_wouldblock_error()) |
118 | */ |
119 | #define BLK_STS_AGAIN ((__force blk_status_t)12) |
120 | |
121 | /* |
122 | * BLK_STS_DEV_RESOURCE is returned from the driver to the block layer if |
123 | * device related resources are unavailable, but the driver can guarantee |
124 | * that the queue will be rerun in the future once resources become |
125 | * available again. This is typically the case for device specific |
126 | * resources that are consumed for IO. If the driver fails allocating these |
127 | * resources, we know that inflight (or pending) IO will free these |
128 | * resource upon completion. |
129 | * |
130 | * This is different from BLK_STS_RESOURCE in that it explicitly references |
131 | * a device specific resource. For resources of wider scope, allocation |
132 | * failure can happen without having pending IO. This means that we can't |
133 | * rely on request completions freeing these resources, as IO may not be in |
134 | * flight. Examples of that are kernel memory allocations, DMA mappings, or |
135 | * any other system wide resources. |
136 | */ |
137 | #define BLK_STS_DEV_RESOURCE ((__force blk_status_t)13) |
138 | |
139 | /* |
140 | * BLK_STS_ZONE_RESOURCE is returned from the driver to the block layer if zone |
141 | * related resources are unavailable, but the driver can guarantee the queue |
142 | * will be rerun in the future once the resources become available again. |
143 | * |
144 | * This is different from BLK_STS_DEV_RESOURCE in that it explicitly references |
145 | * a zone specific resource and IO to a different zone on the same device could |
146 | * still be served. Examples of that are zones that are write-locked, but a read |
147 | * to the same zone could be served. |
148 | */ |
149 | #define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14) |
150 | |
151 | /* |
152 | * BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion |
153 | * path if the device returns a status indicating that too many zone resources |
154 | * are currently open. The same command should be successful if resubmitted |
155 | * after the number of open zones decreases below the device's limits, which is |
156 | * reported in the request_queue's max_open_zones. |
157 | */ |
158 | #define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)15) |
159 | |
160 | /* |
161 | * BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion |
162 | * path if the device returns a status indicating that too many zone resources |
163 | * are currently active. The same command should be successful if resubmitted |
164 | * after the number of active zones decreases below the device's limits, which |
165 | * is reported in the request_queue's max_active_zones. |
166 | */ |
167 | #define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16) |
168 | |
169 | /* |
170 | * BLK_STS_OFFLINE is returned from the driver when the target device is offline |
171 | * or is being taken offline. This could help differentiate the case where a |
172 | * device is intentionally being shut down from a real I/O error. |
173 | */ |
174 | #define BLK_STS_OFFLINE ((__force blk_status_t)17) |
175 | |
176 | /* |
177 | * BLK_STS_DURATION_LIMIT is returned from the driver when the target device |
178 | * aborted the command because it exceeded one of its Command Duration Limits. |
179 | */ |
180 | #define BLK_STS_DURATION_LIMIT ((__force blk_status_t)18) |
181 | |
182 | /** |
183 | * blk_path_error - returns true if error may be path related |
184 | * @error: status the request was completed with |
185 | * |
186 | * Description: |
187 | * This classifies block error status into non-retryable errors and ones |
188 | * that may be successful if retried on a failover path. |
189 | * |
190 | * Return: |
191 | * %false - retrying failover path will not help |
192 | * %true - may succeed if retried |
193 | */ |
194 | static inline bool blk_path_error(blk_status_t error) |
195 | { |
196 | switch (error) { |
197 | case BLK_STS_NOTSUPP: |
198 | case BLK_STS_NOSPC: |
199 | case BLK_STS_TARGET: |
200 | case BLK_STS_RESV_CONFLICT: |
201 | case BLK_STS_MEDIUM: |
202 | case BLK_STS_PROTECTION: |
203 | return false; |
204 | } |
205 | |
206 | /* Anything else could be a path failure, so should be retried */ |
207 | return true; |
208 | } |
209 | |
210 | struct bio_issue { |
211 | u64 value; |
212 | }; |
213 | |
214 | typedef __u32 __bitwise blk_opf_t; |
215 | |
216 | typedef unsigned int blk_qc_t; |
217 | #define BLK_QC_T_NONE -1U |
218 | |
219 | /* |
220 | * main unit of I/O for the block layer and lower layers (ie drivers and |
221 | * stacking drivers) |
222 | */ |
223 | struct bio { |
224 | struct bio *bi_next; /* request queue link */ |
225 | struct block_device *bi_bdev; |
226 | blk_opf_t bi_opf; /* bottom bits REQ_OP, top bits |
227 | * req_flags. |
228 | */ |
229 | unsigned short bi_flags; /* BIO_* below */ |
230 | unsigned short bi_ioprio; |
231 | enum rw_hint bi_write_hint; |
232 | blk_status_t bi_status; |
233 | atomic_t __bi_remaining; |
234 | |
235 | struct bvec_iter bi_iter; |
236 | |
237 | blk_qc_t bi_cookie; |
238 | bio_end_io_t *bi_end_io; |
239 | void *bi_private; |
240 | #ifdef CONFIG_BLK_CGROUP |
241 | /* |
242 | * Represents the association of the css and request_queue for the bio. |
243 | * If a bio goes direct to device, it will not have a blkg as it will |
244 | * not have a request_queue associated with it. The reference is put |
245 | * on release of the bio. |
246 | */ |
247 | struct blkcg_gq *bi_blkg; |
248 | struct bio_issue bi_issue; |
249 | #ifdef CONFIG_BLK_CGROUP_IOCOST |
250 | u64 bi_iocost_cost; |
251 | #endif |
252 | #endif |
253 | |
254 | #ifdef CONFIG_BLK_INLINE_ENCRYPTION |
255 | struct bio_crypt_ctx *bi_crypt_context; |
256 | #endif |
257 | |
258 | union { |
259 | #if defined(CONFIG_BLK_DEV_INTEGRITY) |
260 | struct bio_integrity_payload *bi_integrity; /* data integrity */ |
261 | #endif |
262 | }; |
263 | |
264 | unsigned short bi_vcnt; /* how many bio_vec's */ |
265 | |
266 | /* |
267 | * Everything starting with bi_max_vecs will be preserved by bio_reset() |
268 | */ |
269 | |
270 | unsigned short bi_max_vecs; /* max bvl_vecs we can hold */ |
271 | |
272 | atomic_t __bi_cnt; /* pin count */ |
273 | |
274 | struct bio_vec *bi_io_vec; /* the actual vec list */ |
275 | |
276 | struct bio_set *bi_pool; |
277 | |
278 | /* |
279 | * We can inline a number of vecs at the end of the bio, to avoid |
280 | * double allocations for a small number of bio_vecs. This member |
281 | * MUST obviously be kept at the very end of the bio. |
282 | */ |
283 | struct bio_vec bi_inline_vecs[]; |
284 | }; |
285 | |
286 | #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) |
287 | #define BIO_MAX_SECTORS (UINT_MAX >> SECTOR_SHIFT) |
288 | |
289 | /* |
290 | * bio flags |
291 | */ |
292 | enum { |
293 | BIO_PAGE_PINNED, /* Unpin pages in bio_release_pages() */ |
294 | BIO_CLONED, /* doesn't own data */ |
295 | BIO_BOUNCED, /* bio is a bounce bio */ |
296 | BIO_QUIET, /* Make BIO Quiet */ |
297 | BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ |
298 | BIO_REFFED, /* bio has elevated ->bi_cnt */ |
299 | BIO_BPS_THROTTLED, /* This bio has already been subjected to |
300 | * throttling rules. Don't do it again. */ |
301 | BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion |
302 | * of this bio. */ |
303 | BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ |
304 | BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */ |
305 | BIO_QOS_MERGED, /* but went through rq_qos merge path */ |
306 | BIO_REMAPPED, |
307 | BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ |
308 | BIO_FLAG_LAST |
309 | }; |
310 | |
311 | typedef __u32 __bitwise blk_mq_req_flags_t; |
312 | |
313 | #define REQ_OP_BITS 8 |
314 | #define REQ_OP_MASK (__force blk_opf_t)((1 << REQ_OP_BITS) - 1) |
315 | #define REQ_FLAG_BITS 24 |
316 | |
317 | /** |
318 | * enum req_op - Operations common to the bio and request structures. |
319 | * We use 8 bits for encoding the operation, and the remaining 24 for flags. |
320 | * |
321 | * The least significant bit of the operation number indicates the data |
322 | * transfer direction: |
323 | * |
324 | * - if the least significant bit is set transfers are TO the device |
325 | * - if the least significant bit is not set transfers are FROM the device |
326 | * |
327 | * If a operation does not transfer data the least significant bit has no |
328 | * meaning. |
329 | */ |
330 | enum req_op { |
331 | /* read sectors from the device */ |
332 | REQ_OP_READ = (__force blk_opf_t)0, |
333 | /* write sectors to the device */ |
334 | REQ_OP_WRITE = (__force blk_opf_t)1, |
335 | /* flush the volatile write cache */ |
336 | REQ_OP_FLUSH = (__force blk_opf_t)2, |
337 | /* discard sectors */ |
338 | REQ_OP_DISCARD = (__force blk_opf_t)3, |
339 | /* securely erase sectors */ |
340 | REQ_OP_SECURE_ERASE = (__force blk_opf_t)5, |
341 | /* write data at the current zone write pointer */ |
342 | REQ_OP_ZONE_APPEND = (__force blk_opf_t)7, |
343 | /* write the zero filled sector many times */ |
344 | REQ_OP_WRITE_ZEROES = (__force blk_opf_t)9, |
345 | /* Open a zone */ |
346 | REQ_OP_ZONE_OPEN = (__force blk_opf_t)10, |
347 | /* Close a zone */ |
348 | REQ_OP_ZONE_CLOSE = (__force blk_opf_t)11, |
349 | /* Transition a zone to full */ |
350 | REQ_OP_ZONE_FINISH = (__force blk_opf_t)12, |
351 | /* reset a zone write pointer */ |
352 | REQ_OP_ZONE_RESET = (__force blk_opf_t)13, |
353 | /* reset all the zone present on the device */ |
354 | REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)15, |
355 | |
356 | /* Driver private requests */ |
357 | REQ_OP_DRV_IN = (__force blk_opf_t)34, |
358 | REQ_OP_DRV_OUT = (__force blk_opf_t)35, |
359 | |
360 | REQ_OP_LAST = (__force blk_opf_t)36, |
361 | }; |
362 | |
363 | enum req_flag_bits { |
364 | __REQ_FAILFAST_DEV = /* no driver retries of device errors */ |
365 | REQ_OP_BITS, |
366 | __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ |
367 | __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ |
368 | __REQ_SYNC, /* request is sync (sync write or read) */ |
369 | __REQ_META, /* metadata io request */ |
370 | __REQ_PRIO, /* boost priority in cfq */ |
371 | __REQ_NOMERGE, /* don't touch this for merging */ |
372 | __REQ_IDLE, /* anticipate more IO after this one */ |
373 | __REQ_INTEGRITY, /* I/O includes block integrity payload */ |
374 | __REQ_FUA, /* forced unit access */ |
375 | __REQ_PREFLUSH, /* request for cache flush */ |
376 | __REQ_RAHEAD, /* read ahead, can fail anytime */ |
377 | __REQ_BACKGROUND, /* background IO */ |
378 | __REQ_NOWAIT, /* Don't wait if request will block */ |
379 | __REQ_POLLED, /* caller polls for completion using bio_poll */ |
380 | __REQ_ALLOC_CACHE, /* allocate IO from cache if available */ |
381 | __REQ_SWAP, /* swap I/O */ |
382 | __REQ_DRV, /* for driver use */ |
383 | __REQ_FS_PRIVATE, /* for file system (submitter) use */ |
384 | |
385 | /* |
386 | * Command specific flags, keep last: |
387 | */ |
388 | /* for REQ_OP_WRITE_ZEROES: */ |
389 | __REQ_NOUNMAP, /* do not free blocks when zeroing */ |
390 | |
391 | __REQ_NR_BITS, /* stops here */ |
392 | }; |
393 | |
394 | #define REQ_FAILFAST_DEV \ |
395 | (__force blk_opf_t)(1ULL << __REQ_FAILFAST_DEV) |
396 | #define REQ_FAILFAST_TRANSPORT \ |
397 | (__force blk_opf_t)(1ULL << __REQ_FAILFAST_TRANSPORT) |
398 | #define REQ_FAILFAST_DRIVER \ |
399 | (__force blk_opf_t)(1ULL << __REQ_FAILFAST_DRIVER) |
400 | #define REQ_SYNC (__force blk_opf_t)(1ULL << __REQ_SYNC) |
401 | #define REQ_META (__force blk_opf_t)(1ULL << __REQ_META) |
402 | #define REQ_PRIO (__force blk_opf_t)(1ULL << __REQ_PRIO) |
403 | #define REQ_NOMERGE (__force blk_opf_t)(1ULL << __REQ_NOMERGE) |
404 | #define REQ_IDLE (__force blk_opf_t)(1ULL << __REQ_IDLE) |
405 | #define REQ_INTEGRITY (__force blk_opf_t)(1ULL << __REQ_INTEGRITY) |
406 | #define REQ_FUA (__force blk_opf_t)(1ULL << __REQ_FUA) |
407 | #define REQ_PREFLUSH (__force blk_opf_t)(1ULL << __REQ_PREFLUSH) |
408 | #define REQ_RAHEAD (__force blk_opf_t)(1ULL << __REQ_RAHEAD) |
409 | #define REQ_BACKGROUND (__force blk_opf_t)(1ULL << __REQ_BACKGROUND) |
410 | #define REQ_NOWAIT (__force blk_opf_t)(1ULL << __REQ_NOWAIT) |
411 | #define REQ_POLLED (__force blk_opf_t)(1ULL << __REQ_POLLED) |
412 | #define REQ_ALLOC_CACHE (__force blk_opf_t)(1ULL << __REQ_ALLOC_CACHE) |
413 | #define REQ_SWAP (__force blk_opf_t)(1ULL << __REQ_SWAP) |
414 | #define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV) |
415 | #define REQ_FS_PRIVATE (__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE) |
416 | |
417 | #define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP) |
418 | |
419 | #define REQ_FAILFAST_MASK \ |
420 | (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) |
421 | |
422 | #define REQ_NOMERGE_FLAGS \ |
423 | (REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA) |
424 | |
425 | enum stat_group { |
426 | STAT_READ, |
427 | STAT_WRITE, |
428 | STAT_DISCARD, |
429 | STAT_FLUSH, |
430 | |
431 | NR_STAT_GROUPS |
432 | }; |
433 | |
434 | static inline enum req_op bio_op(const struct bio *bio) |
435 | { |
436 | return bio->bi_opf & REQ_OP_MASK; |
437 | } |
438 | |
439 | static inline bool op_is_write(blk_opf_t op) |
440 | { |
441 | return !!(op & (__force blk_opf_t)1); |
442 | } |
443 | |
444 | /* |
445 | * Check if the bio or request is one that needs special treatment in the |
446 | * flush state machine. |
447 | */ |
448 | static inline bool op_is_flush(blk_opf_t op) |
449 | { |
450 | return op & (REQ_FUA | REQ_PREFLUSH); |
451 | } |
452 | |
453 | /* |
454 | * Reads are always treated as synchronous, as are requests with the FUA or |
455 | * PREFLUSH flag. Other operations may be marked as synchronous using the |
456 | * REQ_SYNC flag. |
457 | */ |
458 | static inline bool op_is_sync(blk_opf_t op) |
459 | { |
460 | return (op & REQ_OP_MASK) == REQ_OP_READ || |
461 | (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH)); |
462 | } |
463 | |
464 | static inline bool op_is_discard(blk_opf_t op) |
465 | { |
466 | return (op & REQ_OP_MASK) == REQ_OP_DISCARD; |
467 | } |
468 | |
469 | /* |
470 | * Check if a bio or request operation is a zone management operation, with |
471 | * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case |
472 | * due to its different handling in the block layer and device response in |
473 | * case of command failure. |
474 | */ |
475 | static inline bool op_is_zone_mgmt(enum req_op op) |
476 | { |
477 | switch (op & REQ_OP_MASK) { |
478 | case REQ_OP_ZONE_RESET: |
479 | case REQ_OP_ZONE_OPEN: |
480 | case REQ_OP_ZONE_CLOSE: |
481 | case REQ_OP_ZONE_FINISH: |
482 | return true; |
483 | default: |
484 | return false; |
485 | } |
486 | } |
487 | |
488 | static inline int op_stat_group(enum req_op op) |
489 | { |
490 | if (op_is_discard(op)) |
491 | return STAT_DISCARD; |
492 | return op_is_write(op); |
493 | } |
494 | |
495 | struct blk_rq_stat { |
496 | u64 mean; |
497 | u64 min; |
498 | u64 max; |
499 | u32 nr_samples; |
500 | u64 batch; |
501 | }; |
502 | |
503 | #endif /* __LINUX_BLK_TYPES_H */ |
504 | |