1 | /* |
2 | * This program is free software; you can redistribute it and/or |
3 | * modify it under the terms of the GNU General Public License version 2 |
4 | * as published by the Free Software Foundation; or, when distributed |
5 | * separately from the Linux kernel or incorporated into other |
6 | * software packages, subject to the following license: |
7 | * |
8 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
9 | * of this source file (the "Software"), to deal in the Software without |
10 | * restriction, including without limitation the rights to use, copy, modify, |
11 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, |
12 | * and to permit persons to whom the Software is furnished to do so, subject to |
13 | * the following conditions: |
14 | * |
15 | * The above copyright notice and this permission notice shall be included in |
16 | * all copies or substantial portions of the Software. |
17 | * |
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
24 | * IN THE SOFTWARE. |
25 | */ |
26 | |
27 | #ifndef __XEN_BLKIF__BACKEND__COMMON_H__ |
28 | #define __XEN_BLKIF__BACKEND__COMMON_H__ |
29 | |
30 | #include <linux/module.h> |
31 | #include <linux/interrupt.h> |
32 | #include <linux/slab.h> |
33 | #include <linux/blkdev.h> |
34 | #include <linux/vmalloc.h> |
35 | #include <linux/wait.h> |
36 | #include <linux/io.h> |
37 | #include <linux/rbtree.h> |
38 | #include <asm/setup.h> |
39 | #include <asm/hypervisor.h> |
40 | #include <xen/grant_table.h> |
41 | #include <xen/page.h> |
42 | #include <xen/xenbus.h> |
43 | #include <xen/interface/io/ring.h> |
44 | #include <xen/interface/io/blkif.h> |
45 | #include <xen/interface/io/protocols.h> |
46 | |
47 | extern unsigned int xen_blkif_max_ring_order; |
48 | extern unsigned int xenblk_max_queues; |
49 | /* |
50 | * This is the maximum number of segments that would be allowed in indirect |
51 | * requests. This value will also be passed to the frontend. |
52 | */ |
53 | #define MAX_INDIRECT_SEGMENTS 256 |
54 | |
55 | /* |
56 | * Xen use 4K pages. The guest may use different page size (4K or 64K) |
57 | * Number of Xen pages per segment |
58 | */ |
59 | #define XEN_PAGES_PER_SEGMENT (PAGE_SIZE / XEN_PAGE_SIZE) |
60 | |
61 | #define XEN_PAGES_PER_INDIRECT_FRAME \ |
62 | (XEN_PAGE_SIZE/sizeof(struct blkif_request_segment)) |
63 | #define SEGS_PER_INDIRECT_FRAME \ |
64 | (XEN_PAGES_PER_INDIRECT_FRAME / XEN_PAGES_PER_SEGMENT) |
65 | |
66 | #define MAX_INDIRECT_PAGES \ |
67 | ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) |
68 | #define INDIRECT_PAGES(_segs) DIV_ROUND_UP(_segs, XEN_PAGES_PER_INDIRECT_FRAME) |
69 | |
70 | /* Not a real protocol. Used to generate ring structs which contain |
71 | * the elements common to all protocols only. This way we get a |
72 | * compiler-checkable way to use common struct elements, so we can |
73 | * avoid using switch(protocol) in a number of places. */ |
74 | struct blkif_common_request { |
75 | char dummy; |
76 | }; |
77 | |
78 | /* i386 protocol version */ |
79 | |
80 | struct blkif_x86_32_request_rw { |
81 | uint8_t nr_segments; /* number of segments */ |
82 | blkif_vdev_t handle; /* only for read/write requests */ |
83 | uint64_t id; /* private guest value, echoed in resp */ |
84 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
85 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
86 | } __attribute__((__packed__)); |
87 | |
88 | struct blkif_x86_32_request_discard { |
89 | uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ |
90 | blkif_vdev_t _pad1; /* was "handle" for read/write requests */ |
91 | uint64_t id; /* private guest value, echoed in resp */ |
92 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
93 | uint64_t nr_sectors; |
94 | } __attribute__((__packed__)); |
95 | |
96 | struct blkif_x86_32_request_other { |
97 | uint8_t _pad1; |
98 | blkif_vdev_t _pad2; |
99 | uint64_t id; /* private guest value, echoed in resp */ |
100 | } __attribute__((__packed__)); |
101 | |
102 | struct blkif_x86_32_request_indirect { |
103 | uint8_t indirect_op; |
104 | uint16_t nr_segments; |
105 | uint64_t id; |
106 | blkif_sector_t sector_number; |
107 | blkif_vdev_t handle; |
108 | uint16_t _pad1; |
109 | grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; |
110 | /* |
111 | * The maximum number of indirect segments (and pages) that will |
112 | * be used is determined by MAX_INDIRECT_SEGMENTS, this value |
113 | * is also exported to the guest (via xenstore |
114 | * feature-max-indirect-segments entry), so the frontend knows how |
115 | * many indirect segments the backend supports. |
116 | */ |
117 | uint64_t _pad2; /* make it 64 byte aligned */ |
118 | } __attribute__((__packed__)); |
119 | |
120 | struct blkif_x86_32_request { |
121 | uint8_t operation; /* BLKIF_OP_??? */ |
122 | union { |
123 | struct blkif_x86_32_request_rw rw; |
124 | struct blkif_x86_32_request_discard discard; |
125 | struct blkif_x86_32_request_other other; |
126 | struct blkif_x86_32_request_indirect indirect; |
127 | } u; |
128 | } __attribute__((__packed__)); |
129 | |
130 | /* x86_64 protocol version */ |
131 | |
132 | struct blkif_x86_64_request_rw { |
133 | uint8_t nr_segments; /* number of segments */ |
134 | blkif_vdev_t handle; /* only for read/write requests */ |
135 | uint32_t _pad1; /* offsetof(blkif_reqest..,u.rw.id)==8 */ |
136 | uint64_t id; |
137 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
138 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
139 | } __attribute__((__packed__)); |
140 | |
141 | struct blkif_x86_64_request_discard { |
142 | uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ |
143 | blkif_vdev_t _pad1; /* was "handle" for read/write requests */ |
144 | uint32_t _pad2; /* offsetof(blkif_..,u.discard.id)==8 */ |
145 | uint64_t id; |
146 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
147 | uint64_t nr_sectors; |
148 | } __attribute__((__packed__)); |
149 | |
150 | struct blkif_x86_64_request_other { |
151 | uint8_t _pad1; |
152 | blkif_vdev_t _pad2; |
153 | uint32_t _pad3; /* offsetof(blkif_..,u.discard.id)==8 */ |
154 | uint64_t id; /* private guest value, echoed in resp */ |
155 | } __attribute__((__packed__)); |
156 | |
157 | struct blkif_x86_64_request_indirect { |
158 | uint8_t indirect_op; |
159 | uint16_t nr_segments; |
160 | uint32_t _pad1; /* offsetof(blkif_..,u.indirect.id)==8 */ |
161 | uint64_t id; |
162 | blkif_sector_t sector_number; |
163 | blkif_vdev_t handle; |
164 | uint16_t _pad2; |
165 | grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; |
166 | /* |
167 | * The maximum number of indirect segments (and pages) that will |
168 | * be used is determined by MAX_INDIRECT_SEGMENTS, this value |
169 | * is also exported to the guest (via xenstore |
170 | * feature-max-indirect-segments entry), so the frontend knows how |
171 | * many indirect segments the backend supports. |
172 | */ |
173 | uint32_t _pad3; /* make it 64 byte aligned */ |
174 | } __attribute__((__packed__)); |
175 | |
176 | struct blkif_x86_64_request { |
177 | uint8_t operation; /* BLKIF_OP_??? */ |
178 | union { |
179 | struct blkif_x86_64_request_rw rw; |
180 | struct blkif_x86_64_request_discard discard; |
181 | struct blkif_x86_64_request_other other; |
182 | struct blkif_x86_64_request_indirect indirect; |
183 | } u; |
184 | } __attribute__((__packed__)); |
185 | |
186 | DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, |
187 | struct blkif_response); |
188 | DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, |
189 | struct blkif_response __packed); |
190 | DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, |
191 | struct blkif_response); |
192 | |
193 | union blkif_back_rings { |
194 | struct blkif_back_ring native; |
195 | struct blkif_common_back_ring common; |
196 | struct blkif_x86_32_back_ring x86_32; |
197 | struct blkif_x86_64_back_ring x86_64; |
198 | }; |
199 | |
200 | enum blkif_protocol { |
201 | BLKIF_PROTOCOL_NATIVE = 1, |
202 | BLKIF_PROTOCOL_X86_32 = 2, |
203 | BLKIF_PROTOCOL_X86_64 = 3, |
204 | }; |
205 | |
206 | /* |
207 | * Default protocol if the frontend doesn't specify one. |
208 | */ |
209 | #ifdef CONFIG_X86 |
210 | # define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_X86_32 |
211 | #else |
212 | # define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_NATIVE |
213 | #endif |
214 | |
215 | struct xen_vbd { |
216 | /* What the domain refers to this vbd as. */ |
217 | blkif_vdev_t handle; |
218 | /* Non-zero -> read-only */ |
219 | unsigned char readonly; |
220 | /* VDISK_xxx */ |
221 | unsigned char type; |
222 | /* phys device that this vbd maps to. */ |
223 | u32 pdevice; |
224 | struct bdev_handle *bdev_handle; |
225 | /* Cached size parameter. */ |
226 | sector_t size; |
227 | unsigned int flush_support:1; |
228 | unsigned int discard_secure:1; |
229 | /* Connect-time cached feature_persistent parameter value */ |
230 | unsigned int feature_gnt_persistent_parm:1; |
231 | /* Persistent grants feature negotiation result */ |
232 | unsigned int feature_gnt_persistent:1; |
233 | unsigned int overflow_max_grants:1; |
234 | }; |
235 | |
236 | struct backend_info; |
237 | |
238 | /* Number of requests that we can fit in a ring */ |
239 | #define XEN_BLKIF_REQS_PER_PAGE 32 |
240 | |
241 | struct persistent_gnt { |
242 | struct page *page; |
243 | grant_ref_t gnt; |
244 | grant_handle_t handle; |
245 | unsigned long last_used; |
246 | bool active; |
247 | struct rb_node node; |
248 | struct list_head remove_node; |
249 | }; |
250 | |
251 | /* Per-ring information. */ |
252 | struct xen_blkif_ring { |
253 | /* Physical parameters of the comms window. */ |
254 | unsigned int irq; |
255 | union blkif_back_rings blk_rings; |
256 | void *blk_ring; |
257 | /* Private fields. */ |
258 | spinlock_t blk_ring_lock; |
259 | |
260 | wait_queue_head_t wq; |
261 | atomic_t inflight; |
262 | bool active; |
263 | /* One thread per blkif ring. */ |
264 | struct task_struct *xenblkd; |
265 | unsigned int waiting_reqs; |
266 | |
267 | /* List of all 'pending_req' available */ |
268 | struct list_head pending_free; |
269 | /* And its spinlock. */ |
270 | spinlock_t pending_free_lock; |
271 | wait_queue_head_t pending_free_wq; |
272 | |
273 | /* Tree to store persistent grants. */ |
274 | struct rb_root persistent_gnts; |
275 | unsigned int persistent_gnt_c; |
276 | atomic_t persistent_gnt_in_use; |
277 | unsigned long next_lru; |
278 | |
279 | /* Statistics. */ |
280 | unsigned long st_print; |
281 | unsigned long long st_rd_req; |
282 | unsigned long long st_wr_req; |
283 | unsigned long long st_oo_req; |
284 | unsigned long long st_f_req; |
285 | unsigned long long st_ds_req; |
286 | unsigned long long st_rd_sect; |
287 | unsigned long long st_wr_sect; |
288 | |
289 | /* Used by the kworker that offload work from the persistent purge. */ |
290 | struct list_head persistent_purge_list; |
291 | struct work_struct persistent_purge_work; |
292 | |
293 | /* Buffer of free pages to map grant refs. */ |
294 | struct gnttab_page_cache free_pages; |
295 | |
296 | struct work_struct free_work; |
297 | /* Thread shutdown wait queue. */ |
298 | wait_queue_head_t shutdown_wq; |
299 | struct xen_blkif *blkif; |
300 | }; |
301 | |
302 | struct xen_blkif { |
303 | /* Unique identifier for this interface. */ |
304 | domid_t domid; |
305 | unsigned int handle; |
306 | /* Comms information. */ |
307 | enum blkif_protocol blk_protocol; |
308 | /* The VBD attached to this interface. */ |
309 | struct xen_vbd vbd; |
310 | /* Back pointer to the backend_info. */ |
311 | struct backend_info *be; |
312 | atomic_t refcnt; |
313 | /* for barrier (drain) requests */ |
314 | struct completion drain_complete; |
315 | atomic_t drain; |
316 | |
317 | struct work_struct free_work; |
318 | unsigned int nr_ring_pages; |
319 | bool multi_ref; |
320 | /* All rings for this device. */ |
321 | struct xen_blkif_ring *rings; |
322 | unsigned int nr_rings; |
323 | unsigned long buffer_squeeze_end; |
324 | }; |
325 | |
326 | struct seg_buf { |
327 | unsigned long offset; |
328 | unsigned int nsec; |
329 | }; |
330 | |
331 | struct grant_page { |
332 | struct page *page; |
333 | struct persistent_gnt *persistent_gnt; |
334 | grant_handle_t handle; |
335 | grant_ref_t gref; |
336 | }; |
337 | |
338 | /* |
339 | * Each outstanding request that we've passed to the lower device layers has a |
340 | * 'pending_req' allocated to it. Each buffer_head that completes decrements |
341 | * the pendcnt towards zero. When it hits zero, the specified domain has a |
342 | * response queued for it, with the saved 'id' passed back. |
343 | */ |
344 | struct pending_req { |
345 | struct xen_blkif_ring *ring; |
346 | u64 id; |
347 | int nr_segs; |
348 | atomic_t pendcnt; |
349 | unsigned short operation; |
350 | int status; |
351 | struct list_head free_list; |
352 | struct grant_page *segments[MAX_INDIRECT_SEGMENTS]; |
353 | /* Indirect descriptors */ |
354 | struct grant_page *indirect_pages[MAX_INDIRECT_PAGES]; |
355 | struct seg_buf seg[MAX_INDIRECT_SEGMENTS]; |
356 | struct bio *biolist[MAX_INDIRECT_SEGMENTS]; |
357 | struct gnttab_unmap_grant_ref unmap[MAX_INDIRECT_SEGMENTS]; |
358 | struct page *unmap_pages[MAX_INDIRECT_SEGMENTS]; |
359 | struct gntab_unmap_queue_data gnttab_unmap_data; |
360 | }; |
361 | |
362 | |
363 | #define vbd_sz(_v) bdev_nr_sectors((_v)->bdev_handle->bdev) |
364 | |
365 | #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt)) |
366 | #define xen_blkif_put(_b) \ |
367 | do { \ |
368 | if (atomic_dec_and_test(&(_b)->refcnt)) \ |
369 | schedule_work(&(_b)->free_work);\ |
370 | } while (0) |
371 | |
372 | struct phys_req { |
373 | unsigned short dev; |
374 | blkif_sector_t nr_sects; |
375 | struct block_device *bdev; |
376 | blkif_sector_t sector_number; |
377 | }; |
378 | |
379 | int xen_blkif_interface_init(void); |
380 | void xen_blkif_interface_fini(void); |
381 | |
382 | int xen_blkif_xenbus_init(void); |
383 | void xen_blkif_xenbus_fini(void); |
384 | |
385 | irqreturn_t xen_blkif_be_int(int irq, void *dev_id); |
386 | int xen_blkif_schedule(void *arg); |
387 | void xen_blkbk_free_caches(struct xen_blkif_ring *ring); |
388 | |
389 | int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, |
390 | struct backend_info *be, int state); |
391 | |
392 | int xen_blkbk_barrier(struct xenbus_transaction xbt, |
393 | struct backend_info *be, int state); |
394 | struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); |
395 | void xen_blkbk_unmap_purged_grants(struct work_struct *work); |
396 | |
397 | #endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */ |
398 | |