1 | /* SPDX-License-Identifier: MIT */ |
2 | /****************************************************************************** |
3 | * blkif.h |
4 | * |
5 | * Unified block-device I/O interface for Xen guest OSes. |
6 | * |
7 | * Copyright (c) 2003-2004, Keir Fraser |
8 | */ |
9 | |
10 | #ifndef __XEN_PUBLIC_IO_BLKIF_H__ |
11 | #define __XEN_PUBLIC_IO_BLKIF_H__ |
12 | |
13 | #include <xen/interface/io/ring.h> |
14 | #include <xen/interface/grant_table.h> |
15 | |
16 | /* |
17 | * Front->back notifications: When enqueuing a new request, sending a |
18 | * notification can be made conditional on req_event (i.e., the generic |
19 | * hold-off mechanism provided by the ring macros). Backends must set |
20 | * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). |
21 | * |
22 | * Back->front notifications: When enqueuing a new response, sending a |
23 | * notification can be made conditional on rsp_event (i.e., the generic |
24 | * hold-off mechanism provided by the ring macros). Frontends must set |
25 | * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). |
26 | */ |
27 | |
28 | typedef uint16_t blkif_vdev_t; |
29 | typedef uint64_t blkif_sector_t; |
30 | |
31 | /* |
32 | * Multiple hardware queues/rings: |
33 | * If supported, the backend will write the key "multi-queue-max-queues" to |
34 | * the directory for that vbd, and set its value to the maximum supported |
35 | * number of queues. |
36 | * Frontends that are aware of this feature and wish to use it can write the |
37 | * key "multi-queue-num-queues" with the number they wish to use, which must be |
38 | * greater than zero, and no more than the value reported by the backend in |
39 | * "multi-queue-max-queues". |
40 | * |
41 | * For frontends requesting just one queue, the usual event-channel and |
42 | * ring-ref keys are written as before, simplifying the backend processing |
43 | * to avoid distinguishing between a frontend that doesn't understand the |
44 | * multi-queue feature, and one that does, but requested only one queue. |
45 | * |
46 | * Frontends requesting two or more queues must not write the toplevel |
47 | * event-channel and ring-ref keys, instead writing those keys under sub-keys |
48 | * having the name "queue-N" where N is the integer ID of the queue/ring for |
49 | * which those keys belong. Queues are indexed from zero. |
50 | * For example, a frontend with two queues must write the following set of |
51 | * queue-related keys: |
52 | * |
53 | * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" |
54 | * /local/domain/1/device/vbd/0/queue-0 = "" |
55 | * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>" |
56 | * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" |
57 | * /local/domain/1/device/vbd/0/queue-1 = "" |
58 | * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>" |
59 | * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" |
60 | * |
61 | * It is also possible to use multiple queues/rings together with |
62 | * feature multi-page ring buffer. |
63 | * For example, a frontend requests two queues/rings and the size of each ring |
64 | * buffer is two pages must write the following set of related keys: |
65 | * |
66 | * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" |
67 | * /local/domain/1/device/vbd/0/ring-page-order = "1" |
68 | * /local/domain/1/device/vbd/0/queue-0 = "" |
69 | * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>" |
70 | * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>" |
71 | * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" |
72 | * /local/domain/1/device/vbd/0/queue-1 = "" |
73 | * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>" |
74 | * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>" |
75 | * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" |
76 | * |
77 | */ |
78 | |
79 | /* |
80 | * REQUEST CODES. |
81 | */ |
82 | #define BLKIF_OP_READ 0 |
83 | #define BLKIF_OP_WRITE 1 |
84 | /* |
85 | * Recognised only if "feature-barrier" is present in backend xenbus info. |
86 | * The "feature_barrier" node contains a boolean indicating whether barrier |
87 | * requests are likely to succeed or fail. Either way, a barrier request |
88 | * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by |
89 | * the underlying block-device hardware. The boolean simply indicates whether |
90 | * or not it is worthwhile for the frontend to attempt barrier requests. |
91 | * If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not* |
92 | * create the "feature-barrier" node! |
93 | */ |
94 | #define BLKIF_OP_WRITE_BARRIER 2 |
95 | |
96 | /* |
97 | * Recognised if "feature-flush-cache" is present in backend xenbus |
98 | * info. A flush will ask the underlying storage hardware to flush its |
99 | * non-volatile caches as appropriate. The "feature-flush-cache" node |
100 | * contains a boolean indicating whether flush requests are likely to |
101 | * succeed or fail. Either way, a flush request may fail at any time |
102 | * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying |
103 | * block-device hardware. The boolean simply indicates whether or not it |
104 | * is worthwhile for the frontend to attempt flushes. If a backend does |
105 | * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the |
106 | * "feature-flush-cache" node! |
107 | */ |
108 | #define BLKIF_OP_FLUSH_DISKCACHE 3 |
109 | |
110 | /* |
111 | * Recognised only if "feature-discard" is present in backend xenbus info. |
112 | * The "feature-discard" node contains a boolean indicating whether trim |
113 | * (ATA) or unmap (SCSI) - conviently called discard requests are likely |
114 | * to succeed or fail. Either way, a discard request |
115 | * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by |
116 | * the underlying block-device hardware. The boolean simply indicates whether |
117 | * or not it is worthwhile for the frontend to attempt discard requests. |
118 | * If a backend does not recognise BLKIF_OP_DISCARD, it should *not* |
119 | * create the "feature-discard" node! |
120 | * |
121 | * Discard operation is a request for the underlying block device to mark |
122 | * extents to be erased. However, discard does not guarantee that the blocks |
123 | * will be erased from the device - it is just a hint to the device |
124 | * controller that these blocks are no longer in use. What the device |
125 | * controller does with that information is left to the controller. |
126 | * Discard operations are passed with sector_number as the |
127 | * sector index to begin discard operations at and nr_sectors as the number of |
128 | * sectors to be discarded. The specified sectors should be discarded if the |
129 | * underlying block device supports trim (ATA) or unmap (SCSI) operations, |
130 | * or a BLKIF_RSP_EOPNOTSUPP should be returned. |
131 | * More information about trim/unmap operations at: |
132 | * http://t13.org/Documents/UploadedDocuments/docs2008/ |
133 | * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc |
134 | * http://www.seagate.com/staticfiles/support/disc/manuals/ |
135 | * Interface%20manuals/100293068c.pdf |
136 | * The backend can optionally provide three extra XenBus attributes to |
137 | * further optimize the discard functionality: |
138 | * 'discard-alignment' - Devices that support discard functionality may |
139 | * internally allocate space in units that are bigger than the exported |
140 | * logical block size. The discard-alignment parameter indicates how many bytes |
141 | * the beginning of the partition is offset from the internal allocation unit's |
142 | * natural alignment. |
143 | * 'discard-granularity' - Devices that support discard functionality may |
144 | * internally allocate space using units that are bigger than the logical block |
145 | * size. The discard-granularity parameter indicates the size of the internal |
146 | * allocation unit in bytes if reported by the device. Otherwise the |
147 | * discard-granularity will be set to match the device's physical block size. |
148 | * 'discard-secure' - All copies of the discarded sectors (potentially created |
149 | * by garbage collection) must also be erased. To use this feature, the flag |
150 | * BLKIF_DISCARD_SECURE must be set in the blkif_request_trim. |
151 | */ |
152 | #define BLKIF_OP_DISCARD 5 |
153 | |
154 | /* |
155 | * Recognized if "feature-max-indirect-segments" in present in the backend |
156 | * xenbus info. The "feature-max-indirect-segments" node contains the maximum |
157 | * number of segments allowed by the backend per request. If the node is |
158 | * present, the frontend might use blkif_request_indirect structs in order to |
159 | * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The |
160 | * maximum number of indirect segments is fixed by the backend, but the |
161 | * frontend can issue requests with any number of indirect segments as long as |
162 | * it's less than the number provided by the backend. The indirect_grefs field |
163 | * in blkif_request_indirect should be filled by the frontend with the |
164 | * grant references of the pages that are holding the indirect segments. |
165 | * These pages are filled with an array of blkif_request_segment that hold the |
166 | * information about the segments. The number of indirect pages to use is |
167 | * determined by the number of segments an indirect request contains. Every |
168 | * indirect page can contain a maximum of |
169 | * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to |
170 | * calculate the number of indirect pages to use we have to do |
171 | * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). |
172 | * |
173 | * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* |
174 | * create the "feature-max-indirect-segments" node! |
175 | */ |
176 | #define BLKIF_OP_INDIRECT 6 |
177 | |
178 | /* |
179 | * Maximum scatter/gather segments per request. |
180 | * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. |
181 | * NB. This could be 12 if the ring indexes weren't stored in the same page. |
182 | */ |
183 | #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 |
184 | |
185 | #define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 |
186 | |
187 | struct blkif_request_segment { |
188 | grant_ref_t gref; /* reference to I/O buffer frame */ |
189 | /* @first_sect: first sector in frame to transfer (inclusive). */ |
190 | /* @last_sect: last sector in frame to transfer (inclusive). */ |
191 | uint8_t first_sect, last_sect; |
192 | }; |
193 | |
194 | struct blkif_request_rw { |
195 | uint8_t nr_segments; /* number of segments */ |
196 | blkif_vdev_t handle; /* only for read/write requests */ |
197 | #ifndef CONFIG_X86_32 |
198 | uint32_t _pad1; /* offsetof(blkif_request,u.rw.id) == 8 */ |
199 | #endif |
200 | uint64_t id; /* private guest value, echoed in resp */ |
201 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
202 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
203 | } __attribute__((__packed__)); |
204 | |
205 | struct blkif_request_discard { |
206 | uint8_t flag; /* BLKIF_DISCARD_SECURE or zero. */ |
207 | #define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */ |
208 | blkif_vdev_t _pad1; /* only for read/write requests */ |
209 | #ifndef CONFIG_X86_32 |
210 | uint32_t _pad2; /* offsetof(blkif_req..,u.discard.id)==8*/ |
211 | #endif |
212 | uint64_t id; /* private guest value, echoed in resp */ |
213 | blkif_sector_t sector_number; |
214 | uint64_t nr_sectors; |
215 | uint8_t _pad3; |
216 | } __attribute__((__packed__)); |
217 | |
218 | struct blkif_request_other { |
219 | uint8_t _pad1; |
220 | blkif_vdev_t _pad2; /* only for read/write requests */ |
221 | #ifndef CONFIG_X86_32 |
222 | uint32_t _pad3; /* offsetof(blkif_req..,u.other.id)==8*/ |
223 | #endif |
224 | uint64_t id; /* private guest value, echoed in resp */ |
225 | } __attribute__((__packed__)); |
226 | |
227 | struct blkif_request_indirect { |
228 | uint8_t indirect_op; |
229 | uint16_t nr_segments; |
230 | #ifndef CONFIG_X86_32 |
231 | uint32_t _pad1; /* offsetof(blkif_...,u.indirect.id) == 8 */ |
232 | #endif |
233 | uint64_t id; |
234 | blkif_sector_t sector_number; |
235 | blkif_vdev_t handle; |
236 | uint16_t _pad2; |
237 | grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; |
238 | #ifndef CONFIG_X86_32 |
239 | uint32_t _pad3; /* make it 64 byte aligned */ |
240 | #else |
241 | uint64_t _pad3; /* make it 64 byte aligned */ |
242 | #endif |
243 | } __attribute__((__packed__)); |
244 | |
245 | struct blkif_request { |
246 | uint8_t operation; /* BLKIF_OP_??? */ |
247 | union { |
248 | struct blkif_request_rw rw; |
249 | struct blkif_request_discard discard; |
250 | struct blkif_request_other other; |
251 | struct blkif_request_indirect indirect; |
252 | } u; |
253 | } __attribute__((__packed__)); |
254 | |
255 | struct blkif_response { |
256 | uint64_t id; /* copied from request */ |
257 | uint8_t operation; /* copied from request */ |
258 | int16_t status; /* BLKIF_RSP_??? */ |
259 | }; |
260 | |
261 | /* |
262 | * STATUS RETURN CODES. |
263 | */ |
264 | /* Operation not supported (only happens on barrier writes). */ |
265 | #define BLKIF_RSP_EOPNOTSUPP -2 |
266 | /* Operation failed for some unspecified reason (-EIO). */ |
267 | #define BLKIF_RSP_ERROR -1 |
268 | /* Operation completed successfully. */ |
269 | #define BLKIF_RSP_OKAY 0 |
270 | |
271 | /* |
272 | * Generate blkif ring structures and types. |
273 | */ |
274 | |
275 | DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); |
276 | |
277 | #define VDISK_CDROM 0x1 |
278 | #define VDISK_REMOVABLE 0x2 |
279 | #define VDISK_READONLY 0x4 |
280 | |
281 | /* Xen-defined major numbers for virtual disks, they look strangely |
282 | * familiar */ |
283 | #define XEN_IDE0_MAJOR 3 |
284 | #define XEN_IDE1_MAJOR 22 |
285 | #define XEN_SCSI_DISK0_MAJOR 8 |
286 | #define XEN_SCSI_DISK1_MAJOR 65 |
287 | #define XEN_SCSI_DISK2_MAJOR 66 |
288 | #define XEN_SCSI_DISK3_MAJOR 67 |
289 | #define XEN_SCSI_DISK4_MAJOR 68 |
290 | #define XEN_SCSI_DISK5_MAJOR 69 |
291 | #define XEN_SCSI_DISK6_MAJOR 70 |
292 | #define XEN_SCSI_DISK7_MAJOR 71 |
293 | #define XEN_SCSI_DISK8_MAJOR 128 |
294 | #define XEN_SCSI_DISK9_MAJOR 129 |
295 | #define XEN_SCSI_DISK10_MAJOR 130 |
296 | #define XEN_SCSI_DISK11_MAJOR 131 |
297 | #define XEN_SCSI_DISK12_MAJOR 132 |
298 | #define XEN_SCSI_DISK13_MAJOR 133 |
299 | #define XEN_SCSI_DISK14_MAJOR 134 |
300 | #define XEN_SCSI_DISK15_MAJOR 135 |
301 | |
302 | #endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ |
303 | |