1 | #ifndef IO_URING_TYPES_H |
2 | #define IO_URING_TYPES_H |
3 | |
4 | #include <linux/blkdev.h> |
5 | #include <linux/hashtable.h> |
6 | #include <linux/task_work.h> |
7 | #include <linux/bitmap.h> |
8 | #include <linux/llist.h> |
9 | #include <uapi/linux/io_uring.h> |
10 | |
11 | enum { |
12 | /* |
13 | * A hint to not wake right away but delay until there are enough of |
14 | * tw's queued to match the number of CQEs the task is waiting for. |
15 | * |
16 | * Must not be used with requests generating more than one CQE. |
17 | * It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set. |
18 | */ |
19 | IOU_F_TWQ_LAZY_WAKE = 1, |
20 | }; |
21 | |
22 | enum io_uring_cmd_flags { |
23 | IO_URING_F_COMPLETE_DEFER = 1, |
24 | IO_URING_F_UNLOCKED = 2, |
25 | /* the request is executed from poll, it should not be freed */ |
26 | IO_URING_F_MULTISHOT = 4, |
27 | /* executed by io-wq */ |
28 | IO_URING_F_IOWQ = 8, |
29 | /* int's last bit, sign checks are usually faster than a bit test */ |
30 | IO_URING_F_NONBLOCK = INT_MIN, |
31 | |
32 | /* ctx state flags, for URING_CMD */ |
33 | IO_URING_F_SQE128 = (1 << 8), |
34 | IO_URING_F_CQE32 = (1 << 9), |
35 | IO_URING_F_IOPOLL = (1 << 10), |
36 | |
37 | /* set when uring wants to cancel a previously issued command */ |
38 | IO_URING_F_CANCEL = (1 << 11), |
39 | IO_URING_F_COMPAT = (1 << 12), |
40 | }; |
41 | |
42 | struct io_wq_work_node { |
43 | struct io_wq_work_node *next; |
44 | }; |
45 | |
46 | struct io_wq_work_list { |
47 | struct io_wq_work_node *first; |
48 | struct io_wq_work_node *last; |
49 | }; |
50 | |
51 | struct io_wq_work { |
52 | struct io_wq_work_node list; |
53 | unsigned flags; |
54 | /* place it here instead of io_kiocb as it fills padding and saves 4B */ |
55 | int cancel_seq; |
56 | }; |
57 | |
58 | struct io_fixed_file { |
59 | /* file * with additional FFS_* flags */ |
60 | unsigned long file_ptr; |
61 | }; |
62 | |
63 | struct io_file_table { |
64 | struct io_fixed_file *files; |
65 | unsigned long *bitmap; |
66 | unsigned int alloc_hint; |
67 | }; |
68 | |
69 | struct io_hash_bucket { |
70 | spinlock_t lock; |
71 | struct hlist_head list; |
72 | } ____cacheline_aligned_in_smp; |
73 | |
74 | struct io_hash_table { |
75 | struct io_hash_bucket *hbs; |
76 | unsigned hash_bits; |
77 | }; |
78 | |
79 | /* |
80 | * Arbitrary limit, can be raised if need be |
81 | */ |
82 | #define IO_RINGFD_REG_MAX 16 |
83 | |
84 | struct io_uring_task { |
85 | /* submission side */ |
86 | int cached_refs; |
87 | const struct io_ring_ctx *last; |
88 | struct io_wq *io_wq; |
89 | struct file *registered_rings[IO_RINGFD_REG_MAX]; |
90 | |
91 | struct xarray xa; |
92 | struct wait_queue_head wait; |
93 | atomic_t in_cancel; |
94 | atomic_t inflight_tracked; |
95 | struct percpu_counter inflight; |
96 | |
97 | struct { /* task_work */ |
98 | struct llist_head task_list; |
99 | struct callback_head task_work; |
100 | } ____cacheline_aligned_in_smp; |
101 | }; |
102 | |
103 | struct io_uring { |
104 | u32 head; |
105 | u32 tail; |
106 | }; |
107 | |
108 | /* |
109 | * This data is shared with the application through the mmap at offsets |
110 | * IORING_OFF_SQ_RING and IORING_OFF_CQ_RING. |
111 | * |
112 | * The offsets to the member fields are published through struct |
113 | * io_sqring_offsets when calling io_uring_setup. |
114 | */ |
115 | struct io_rings { |
116 | /* |
117 | * Head and tail offsets into the ring; the offsets need to be |
118 | * masked to get valid indices. |
119 | * |
120 | * The kernel controls head of the sq ring and the tail of the cq ring, |
121 | * and the application controls tail of the sq ring and the head of the |
122 | * cq ring. |
123 | */ |
124 | struct io_uring sq, cq; |
125 | /* |
126 | * Bitmasks to apply to head and tail offsets (constant, equals |
127 | * ring_entries - 1) |
128 | */ |
129 | u32 sq_ring_mask, cq_ring_mask; |
130 | /* Ring sizes (constant, power of 2) */ |
131 | u32 sq_ring_entries, cq_ring_entries; |
132 | /* |
133 | * Number of invalid entries dropped by the kernel due to |
134 | * invalid index stored in array |
135 | * |
136 | * Written by the kernel, shouldn't be modified by the |
137 | * application (i.e. get number of "new events" by comparing to |
138 | * cached value). |
139 | * |
140 | * After a new SQ head value was read by the application this |
141 | * counter includes all submissions that were dropped reaching |
142 | * the new SQ head (and possibly more). |
143 | */ |
144 | u32 sq_dropped; |
145 | /* |
146 | * Runtime SQ flags |
147 | * |
148 | * Written by the kernel, shouldn't be modified by the |
149 | * application. |
150 | * |
151 | * The application needs a full memory barrier before checking |
152 | * for IORING_SQ_NEED_WAKEUP after updating the sq tail. |
153 | */ |
154 | atomic_t sq_flags; |
155 | /* |
156 | * Runtime CQ flags |
157 | * |
158 | * Written by the application, shouldn't be modified by the |
159 | * kernel. |
160 | */ |
161 | u32 cq_flags; |
162 | /* |
163 | * Number of completion events lost because the queue was full; |
164 | * this should be avoided by the application by making sure |
165 | * there are not more requests pending than there is space in |
166 | * the completion queue. |
167 | * |
168 | * Written by the kernel, shouldn't be modified by the |
169 | * application (i.e. get number of "new events" by comparing to |
170 | * cached value). |
171 | * |
172 | * As completion events come in out of order this counter is not |
173 | * ordered with any other data. |
174 | */ |
175 | u32 cq_overflow; |
176 | /* |
177 | * Ring buffer of completion events. |
178 | * |
179 | * The kernel writes completion events fresh every time they are |
180 | * produced, so the application is allowed to modify pending |
181 | * entries. |
182 | */ |
183 | struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp; |
184 | }; |
185 | |
186 | struct io_restriction { |
187 | DECLARE_BITMAP(register_op, IORING_REGISTER_LAST); |
188 | DECLARE_BITMAP(sqe_op, IORING_OP_LAST); |
189 | u8 sqe_flags_allowed; |
190 | u8 sqe_flags_required; |
191 | bool registered; |
192 | }; |
193 | |
194 | struct io_submit_link { |
195 | struct io_kiocb *head; |
196 | struct io_kiocb *last; |
197 | }; |
198 | |
199 | struct io_submit_state { |
200 | /* inline/task_work completion list, under ->uring_lock */ |
201 | struct io_wq_work_node free_list; |
202 | /* batch completion logic */ |
203 | struct io_wq_work_list compl_reqs; |
204 | struct io_submit_link link; |
205 | |
206 | bool plug_started; |
207 | bool need_plug; |
208 | unsigned short submit_nr; |
209 | unsigned int cqes_count; |
210 | struct blk_plug plug; |
211 | }; |
212 | |
213 | struct io_ev_fd { |
214 | struct eventfd_ctx *cq_ev_fd; |
215 | unsigned int eventfd_async: 1; |
216 | struct rcu_head rcu; |
217 | atomic_t refs; |
218 | atomic_t ops; |
219 | }; |
220 | |
221 | struct io_alloc_cache { |
222 | struct io_wq_work_node list; |
223 | unsigned int nr_cached; |
224 | unsigned int max_cached; |
225 | size_t elem_size; |
226 | }; |
227 | |
228 | struct io_ring_ctx { |
229 | /* const or read-mostly hot data */ |
230 | struct { |
231 | unsigned int flags; |
232 | unsigned int drain_next: 1; |
233 | unsigned int restricted: 1; |
234 | unsigned int off_timeout_used: 1; |
235 | unsigned int drain_active: 1; |
236 | unsigned int has_evfd: 1; |
237 | /* all CQEs should be posted only by the submitter task */ |
238 | unsigned int task_complete: 1; |
239 | unsigned int lockless_cq: 1; |
240 | unsigned int syscall_iopoll: 1; |
241 | unsigned int poll_activated: 1; |
242 | unsigned int drain_disabled: 1; |
243 | unsigned int compat: 1; |
244 | unsigned int iowq_limits_set : 1; |
245 | |
246 | struct task_struct *submitter_task; |
247 | struct io_rings *rings; |
248 | struct percpu_ref refs; |
249 | |
250 | enum task_work_notify_mode notify_method; |
251 | unsigned sq_thread_idle; |
252 | } ____cacheline_aligned_in_smp; |
253 | |
254 | /* submission data */ |
255 | struct { |
256 | struct mutex uring_lock; |
257 | |
258 | /* |
259 | * Ring buffer of indices into array of io_uring_sqe, which is |
260 | * mmapped by the application using the IORING_OFF_SQES offset. |
261 | * |
262 | * This indirection could e.g. be used to assign fixed |
263 | * io_uring_sqe entries to operations and only submit them to |
264 | * the queue when needed. |
265 | * |
266 | * The kernel modifies neither the indices array nor the entries |
267 | * array. |
268 | */ |
269 | u32 *sq_array; |
270 | struct io_uring_sqe *sq_sqes; |
271 | unsigned cached_sq_head; |
272 | unsigned sq_entries; |
273 | |
274 | /* |
275 | * Fixed resources fast path, should be accessed only under |
276 | * uring_lock, and updated through io_uring_register(2) |
277 | */ |
278 | struct io_rsrc_node *rsrc_node; |
279 | atomic_t cancel_seq; |
280 | |
281 | /* |
282 | * ->iopoll_list is protected by the ctx->uring_lock for |
283 | * io_uring instances that don't use IORING_SETUP_SQPOLL. |
284 | * For SQPOLL, only the single threaded io_sq_thread() will |
285 | * manipulate the list, hence no extra locking is needed there. |
286 | */ |
287 | bool poll_multi_queue; |
288 | struct io_wq_work_list iopoll_list; |
289 | |
290 | struct io_file_table file_table; |
291 | struct io_mapped_ubuf **user_bufs; |
292 | unsigned nr_user_files; |
293 | unsigned nr_user_bufs; |
294 | |
295 | struct io_submit_state submit_state; |
296 | |
297 | struct xarray io_bl_xa; |
298 | |
299 | struct io_hash_table cancel_table_locked; |
300 | struct io_alloc_cache apoll_cache; |
301 | struct io_alloc_cache netmsg_cache; |
302 | |
303 | /* |
304 | * Any cancelable uring_cmd is added to this list in |
305 | * ->uring_cmd() by io_uring_cmd_insert_cancelable() |
306 | */ |
307 | struct hlist_head cancelable_uring_cmd; |
308 | } ____cacheline_aligned_in_smp; |
309 | |
310 | struct { |
311 | /* |
312 | * We cache a range of free CQEs we can use, once exhausted it |
313 | * should go through a slower range setup, see __io_get_cqe() |
314 | */ |
315 | struct io_uring_cqe *cqe_cached; |
316 | struct io_uring_cqe *cqe_sentinel; |
317 | |
318 | unsigned cached_cq_tail; |
319 | unsigned cq_entries; |
320 | struct io_ev_fd __rcu *io_ev_fd; |
321 | unsigned ; |
322 | } ____cacheline_aligned_in_smp; |
323 | |
324 | /* |
325 | * task_work and async notification delivery cacheline. Expected to |
326 | * regularly bounce b/w CPUs. |
327 | */ |
328 | struct { |
329 | struct llist_head work_llist; |
330 | unsigned long check_cq; |
331 | atomic_t cq_wait_nr; |
332 | atomic_t cq_timeouts; |
333 | struct wait_queue_head cq_wait; |
334 | } ____cacheline_aligned_in_smp; |
335 | |
336 | /* timeouts */ |
337 | struct { |
338 | spinlock_t timeout_lock; |
339 | struct list_head timeout_list; |
340 | struct list_head ltimeout_list; |
341 | unsigned cq_last_tm_flush; |
342 | } ____cacheline_aligned_in_smp; |
343 | |
344 | struct io_uring_cqe completion_cqes[16]; |
345 | |
346 | spinlock_t completion_lock; |
347 | |
348 | /* IRQ completion list, under ->completion_lock */ |
349 | unsigned int locked_free_nr; |
350 | struct io_wq_work_list locked_free_list; |
351 | |
352 | struct list_head io_buffers_comp; |
353 | struct list_head cq_overflow_list; |
354 | struct io_hash_table cancel_table; |
355 | |
356 | struct hlist_head waitid_list; |
357 | |
358 | #ifdef CONFIG_FUTEX |
359 | struct hlist_head futex_list; |
360 | struct io_alloc_cache futex_cache; |
361 | #endif |
362 | |
363 | const struct cred *sq_creds; /* cred used for __io_sq_thread() */ |
364 | struct io_sq_data *sq_data; /* if using sq thread polling */ |
365 | |
366 | struct wait_queue_head sqo_sq_wait; |
367 | struct list_head sqd_list; |
368 | |
369 | unsigned int file_alloc_start; |
370 | unsigned int file_alloc_end; |
371 | |
372 | struct list_head io_buffers_cache; |
373 | |
374 | /* deferred free list, protected by ->uring_lock */ |
375 | struct hlist_head io_buf_list; |
376 | |
377 | /* Keep this last, we don't need it for the fast path */ |
378 | struct wait_queue_head poll_wq; |
379 | struct io_restriction restrictions; |
380 | |
381 | /* slow path rsrc auxilary data, used by update/register */ |
382 | struct io_mapped_ubuf *dummy_ubuf; |
383 | struct io_rsrc_data *file_data; |
384 | struct io_rsrc_data *buf_data; |
385 | |
386 | /* protected by ->uring_lock */ |
387 | struct list_head rsrc_ref_list; |
388 | struct io_alloc_cache rsrc_node_cache; |
389 | struct wait_queue_head rsrc_quiesce_wq; |
390 | unsigned rsrc_quiesce; |
391 | |
392 | u32 pers_next; |
393 | struct xarray personalities; |
394 | |
395 | /* hashed buffered write serialization */ |
396 | struct io_wq_hash *hash_map; |
397 | |
398 | /* Only used for accounting purposes */ |
399 | struct user_struct *user; |
400 | struct mm_struct *mm_account; |
401 | |
402 | /* ctx exit and cancelation */ |
403 | struct llist_head fallback_llist; |
404 | struct delayed_work fallback_work; |
405 | struct work_struct exit_work; |
406 | struct list_head tctx_list; |
407 | struct completion ref_comp; |
408 | |
409 | /* io-wq management, e.g. thread count */ |
410 | u32 iowq_limits[2]; |
411 | |
412 | struct callback_head poll_wq_task_work; |
413 | struct list_head defer_list; |
414 | |
415 | #ifdef CONFIG_NET_RX_BUSY_POLL |
416 | struct list_head napi_list; /* track busy poll napi_id */ |
417 | spinlock_t napi_lock; /* napi_list lock */ |
418 | |
419 | /* napi busy poll default timeout */ |
420 | unsigned int napi_busy_poll_to; |
421 | bool napi_prefer_busy_poll; |
422 | bool napi_enabled; |
423 | |
424 | DECLARE_HASHTABLE(napi_ht, 4); |
425 | #endif |
426 | |
427 | /* protected by ->completion_lock */ |
428 | unsigned evfd_last_cq_tail; |
429 | |
430 | /* |
431 | * If IORING_SETUP_NO_MMAP is used, then the below holds |
432 | * the gup'ed pages for the two rings, and the sqes. |
433 | */ |
434 | unsigned short n_ring_pages; |
435 | unsigned short n_sqe_pages; |
436 | struct page **ring_pages; |
437 | struct page **sqe_pages; |
438 | }; |
439 | |
440 | struct io_tw_state { |
441 | /* ->uring_lock is taken, callbacks can use io_tw_lock to lock it */ |
442 | bool locked; |
443 | }; |
444 | |
445 | enum { |
446 | REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, |
447 | REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT, |
448 | REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT, |
449 | REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT, |
450 | REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT, |
451 | REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT, |
452 | REQ_F_CQE_SKIP_BIT = IOSQE_CQE_SKIP_SUCCESS_BIT, |
453 | |
454 | /* first byte is taken by user flags, shift it to not overlap */ |
455 | REQ_F_FAIL_BIT = 8, |
456 | REQ_F_INFLIGHT_BIT, |
457 | REQ_F_CUR_POS_BIT, |
458 | REQ_F_NOWAIT_BIT, |
459 | REQ_F_LINK_TIMEOUT_BIT, |
460 | REQ_F_NEED_CLEANUP_BIT, |
461 | REQ_F_POLLED_BIT, |
462 | REQ_F_BUFFER_SELECTED_BIT, |
463 | REQ_F_BUFFER_RING_BIT, |
464 | REQ_F_REISSUE_BIT, |
465 | REQ_F_CREDS_BIT, |
466 | REQ_F_REFCOUNT_BIT, |
467 | REQ_F_ARM_LTIMEOUT_BIT, |
468 | REQ_F_ASYNC_DATA_BIT, |
469 | REQ_F_SKIP_LINK_CQES_BIT, |
470 | REQ_F_SINGLE_POLL_BIT, |
471 | REQ_F_DOUBLE_POLL_BIT, |
472 | REQ_F_APOLL_MULTISHOT_BIT, |
473 | REQ_F_CLEAR_POLLIN_BIT, |
474 | REQ_F_HASH_LOCKED_BIT, |
475 | /* keep async read/write and isreg together and in order */ |
476 | REQ_F_SUPPORT_NOWAIT_BIT, |
477 | REQ_F_ISREG_BIT, |
478 | REQ_F_POLL_NO_LAZY_BIT, |
479 | REQ_F_CANCEL_SEQ_BIT, |
480 | REQ_F_CAN_POLL_BIT, |
481 | REQ_F_BL_EMPTY_BIT, |
482 | REQ_F_BL_NO_RECYCLE_BIT, |
483 | |
484 | /* not a real bit, just to check we're not overflowing the space */ |
485 | __REQ_F_LAST_BIT, |
486 | }; |
487 | |
488 | typedef u64 __bitwise io_req_flags_t; |
489 | #define IO_REQ_FLAG(bitno) ((__force io_req_flags_t) BIT_ULL((bitno))) |
490 | |
491 | enum { |
492 | /* ctx owns file */ |
493 | REQ_F_FIXED_FILE = IO_REQ_FLAG(REQ_F_FIXED_FILE_BIT), |
494 | /* drain existing IO first */ |
495 | REQ_F_IO_DRAIN = IO_REQ_FLAG(REQ_F_IO_DRAIN_BIT), |
496 | /* linked sqes */ |
497 | REQ_F_LINK = IO_REQ_FLAG(REQ_F_LINK_BIT), |
498 | /* doesn't sever on completion < 0 */ |
499 | REQ_F_HARDLINK = IO_REQ_FLAG(REQ_F_HARDLINK_BIT), |
500 | /* IOSQE_ASYNC */ |
501 | REQ_F_FORCE_ASYNC = IO_REQ_FLAG(REQ_F_FORCE_ASYNC_BIT), |
502 | /* IOSQE_BUFFER_SELECT */ |
503 | REQ_F_BUFFER_SELECT = IO_REQ_FLAG(REQ_F_BUFFER_SELECT_BIT), |
504 | /* IOSQE_CQE_SKIP_SUCCESS */ |
505 | REQ_F_CQE_SKIP = IO_REQ_FLAG(REQ_F_CQE_SKIP_BIT), |
506 | |
507 | /* fail rest of links */ |
508 | REQ_F_FAIL = IO_REQ_FLAG(REQ_F_FAIL_BIT), |
509 | /* on inflight list, should be cancelled and waited on exit reliably */ |
510 | REQ_F_INFLIGHT = IO_REQ_FLAG(REQ_F_INFLIGHT_BIT), |
511 | /* read/write uses file position */ |
512 | REQ_F_CUR_POS = IO_REQ_FLAG(REQ_F_CUR_POS_BIT), |
513 | /* must not punt to workers */ |
514 | REQ_F_NOWAIT = IO_REQ_FLAG(REQ_F_NOWAIT_BIT), |
515 | /* has or had linked timeout */ |
516 | REQ_F_LINK_TIMEOUT = IO_REQ_FLAG(REQ_F_LINK_TIMEOUT_BIT), |
517 | /* needs cleanup */ |
518 | REQ_F_NEED_CLEANUP = IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT), |
519 | /* already went through poll handler */ |
520 | REQ_F_POLLED = IO_REQ_FLAG(REQ_F_POLLED_BIT), |
521 | /* buffer already selected */ |
522 | REQ_F_BUFFER_SELECTED = IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT), |
523 | /* buffer selected from ring, needs commit */ |
524 | REQ_F_BUFFER_RING = IO_REQ_FLAG(REQ_F_BUFFER_RING_BIT), |
525 | /* caller should reissue async */ |
526 | REQ_F_REISSUE = IO_REQ_FLAG(REQ_F_REISSUE_BIT), |
527 | /* supports async reads/writes */ |
528 | REQ_F_SUPPORT_NOWAIT = IO_REQ_FLAG(REQ_F_SUPPORT_NOWAIT_BIT), |
529 | /* regular file */ |
530 | REQ_F_ISREG = IO_REQ_FLAG(REQ_F_ISREG_BIT), |
531 | /* has creds assigned */ |
532 | REQ_F_CREDS = IO_REQ_FLAG(REQ_F_CREDS_BIT), |
533 | /* skip refcounting if not set */ |
534 | REQ_F_REFCOUNT = IO_REQ_FLAG(REQ_F_REFCOUNT_BIT), |
535 | /* there is a linked timeout that has to be armed */ |
536 | REQ_F_ARM_LTIMEOUT = IO_REQ_FLAG(REQ_F_ARM_LTIMEOUT_BIT), |
537 | /* ->async_data allocated */ |
538 | REQ_F_ASYNC_DATA = IO_REQ_FLAG(REQ_F_ASYNC_DATA_BIT), |
539 | /* don't post CQEs while failing linked requests */ |
540 | REQ_F_SKIP_LINK_CQES = IO_REQ_FLAG(REQ_F_SKIP_LINK_CQES_BIT), |
541 | /* single poll may be active */ |
542 | REQ_F_SINGLE_POLL = IO_REQ_FLAG(REQ_F_SINGLE_POLL_BIT), |
543 | /* double poll may active */ |
544 | REQ_F_DOUBLE_POLL = IO_REQ_FLAG(REQ_F_DOUBLE_POLL_BIT), |
545 | /* fast poll multishot mode */ |
546 | REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT), |
547 | /* recvmsg special flag, clear EPOLLIN */ |
548 | REQ_F_CLEAR_POLLIN = IO_REQ_FLAG(REQ_F_CLEAR_POLLIN_BIT), |
549 | /* hashed into ->cancel_hash_locked, protected by ->uring_lock */ |
550 | REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT), |
551 | /* don't use lazy poll wake for this request */ |
552 | REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT), |
553 | /* cancel sequence is set and valid */ |
554 | REQ_F_CANCEL_SEQ = IO_REQ_FLAG(REQ_F_CANCEL_SEQ_BIT), |
555 | /* file is pollable */ |
556 | REQ_F_CAN_POLL = IO_REQ_FLAG(REQ_F_CAN_POLL_BIT), |
557 | /* buffer list was empty after selection of buffer */ |
558 | REQ_F_BL_EMPTY = IO_REQ_FLAG(REQ_F_BL_EMPTY_BIT), |
559 | /* don't recycle provided buffers for this request */ |
560 | REQ_F_BL_NO_RECYCLE = IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT), |
561 | }; |
562 | |
563 | typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts); |
564 | |
565 | struct io_task_work { |
566 | struct llist_node node; |
567 | io_req_tw_func_t func; |
568 | }; |
569 | |
570 | struct io_cqe { |
571 | __u64 user_data; |
572 | __s32 res; |
573 | /* fd initially, then cflags for completion */ |
574 | union { |
575 | __u32 flags; |
576 | int fd; |
577 | }; |
578 | }; |
579 | |
580 | /* |
581 | * Each request type overlays its private data structure on top of this one. |
582 | * They must not exceed this one in size. |
583 | */ |
584 | struct io_cmd_data { |
585 | struct file *file; |
586 | /* each command gets 56 bytes of data */ |
587 | __u8 data[56]; |
588 | }; |
589 | |
590 | static inline void io_kiocb_cmd_sz_check(size_t cmd_sz) |
591 | { |
592 | BUILD_BUG_ON(cmd_sz > sizeof(struct io_cmd_data)); |
593 | } |
594 | #define io_kiocb_to_cmd(req, cmd_type) ( \ |
595 | io_kiocb_cmd_sz_check(sizeof(cmd_type)) , \ |
596 | ((cmd_type *)&(req)->cmd) \ |
597 | ) |
598 | #define cmd_to_io_kiocb(ptr) ((struct io_kiocb *) ptr) |
599 | |
600 | struct io_kiocb { |
601 | union { |
602 | /* |
603 | * NOTE! Each of the io_kiocb union members has the file pointer |
604 | * as the first entry in their struct definition. So you can |
605 | * access the file pointer through any of the sub-structs, |
606 | * or directly as just 'file' in this struct. |
607 | */ |
608 | struct file *file; |
609 | struct io_cmd_data cmd; |
610 | }; |
611 | |
612 | u8 opcode; |
613 | /* polled IO has completed */ |
614 | u8 iopoll_completed; |
615 | /* |
616 | * Can be either a fixed buffer index, or used with provided buffers. |
617 | * For the latter, before issue it points to the buffer group ID, |
618 | * and after selection it points to the buffer ID itself. |
619 | */ |
620 | u16 buf_index; |
621 | |
622 | unsigned nr_tw; |
623 | |
624 | /* REQ_F_* flags */ |
625 | io_req_flags_t flags; |
626 | |
627 | struct io_cqe cqe; |
628 | |
629 | struct io_ring_ctx *ctx; |
630 | struct task_struct *task; |
631 | |
632 | union { |
633 | /* store used ubuf, so we can prevent reloading */ |
634 | struct io_mapped_ubuf *imu; |
635 | |
636 | /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */ |
637 | struct io_buffer *kbuf; |
638 | |
639 | /* |
640 | * stores buffer ID for ring provided buffers, valid IFF |
641 | * REQ_F_BUFFER_RING is set. |
642 | */ |
643 | struct io_buffer_list *buf_list; |
644 | }; |
645 | |
646 | union { |
647 | /* used by request caches, completion batching and iopoll */ |
648 | struct io_wq_work_node comp_list; |
649 | /* cache ->apoll->events */ |
650 | __poll_t apoll_events; |
651 | }; |
652 | |
653 | struct io_rsrc_node *rsrc_node; |
654 | |
655 | atomic_t refs; |
656 | atomic_t poll_refs; |
657 | struct io_task_work io_task_work; |
658 | /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ |
659 | struct hlist_node hash_node; |
660 | /* internal polling, see IORING_FEAT_FAST_POLL */ |
661 | struct async_poll *apoll; |
662 | /* opcode allocated if it needs to store data for async defer */ |
663 | void *async_data; |
664 | /* linked requests, IFF REQ_F_HARDLINK or REQ_F_LINK are set */ |
665 | struct io_kiocb *link; |
666 | /* custom credentials, valid IFF REQ_F_CREDS is set */ |
667 | const struct cred *creds; |
668 | struct io_wq_work work; |
669 | |
670 | struct { |
671 | u64 ; |
672 | u64 ; |
673 | } big_cqe; |
674 | }; |
675 | |
676 | struct io_overflow_cqe { |
677 | struct list_head list; |
678 | struct io_uring_cqe cqe; |
679 | }; |
680 | |
681 | #endif |
682 | |