1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* XDP user-space ring structure |
3 | * Copyright(c) 2018 Intel Corporation. |
4 | */ |
5 | |
6 | #ifndef _LINUX_XSK_QUEUE_H |
7 | #define _LINUX_XSK_QUEUE_H |
8 | |
9 | #include <linux/types.h> |
10 | #include <linux/if_xdp.h> |
11 | #include <net/xdp_sock.h> |
12 | #include <net/xsk_buff_pool.h> |
13 | |
14 | #include "xsk.h" |
15 | |
16 | struct xdp_ring { |
17 | u32 producer ____cacheline_aligned_in_smp; |
18 | /* Hinder the adjacent cache prefetcher to prefetch the consumer |
19 | * pointer if the producer pointer is touched and vice versa. |
20 | */ |
21 | u32 pad1 ____cacheline_aligned_in_smp; |
22 | u32 consumer ____cacheline_aligned_in_smp; |
23 | u32 pad2 ____cacheline_aligned_in_smp; |
24 | u32 flags; |
25 | u32 pad3 ____cacheline_aligned_in_smp; |
26 | }; |
27 | |
28 | /* Used for the RX and TX queues for packets */ |
29 | struct xdp_rxtx_ring { |
30 | struct xdp_ring ptrs; |
31 | struct xdp_desc desc[] ____cacheline_aligned_in_smp; |
32 | }; |
33 | |
34 | /* Used for the fill and completion queues for buffers */ |
35 | struct xdp_umem_ring { |
36 | struct xdp_ring ptrs; |
37 | u64 desc[] ____cacheline_aligned_in_smp; |
38 | }; |
39 | |
40 | struct xsk_queue { |
41 | u32 ring_mask; |
42 | u32 nentries; |
43 | u32 cached_prod; |
44 | u32 cached_cons; |
45 | struct xdp_ring *ring; |
46 | u64 invalid_descs; |
47 | u64 queue_empty_descs; |
48 | size_t ring_vmalloc_size; |
49 | }; |
50 | |
51 | struct parsed_desc { |
52 | u32 mb; |
53 | u32 valid; |
54 | }; |
55 | |
56 | /* The structure of the shared state of the rings are a simple |
57 | * circular buffer, as outlined in |
58 | * Documentation/core-api/circular-buffers.rst. For the Rx and |
59 | * completion ring, the kernel is the producer and user space is the |
60 | * consumer. For the Tx and fill rings, the kernel is the consumer and |
61 | * user space is the producer. |
62 | * |
63 | * producer consumer |
64 | * |
65 | * if (LOAD ->consumer) { (A) LOAD.acq ->producer (C) |
66 | * STORE $data LOAD $data |
67 | * STORE.rel ->producer (B) STORE.rel ->consumer (D) |
68 | * } |
69 | * |
70 | * (A) pairs with (D), and (B) pairs with (C). |
71 | * |
72 | * Starting with (B), it protects the data from being written after |
73 | * the producer pointer. If this barrier was missing, the consumer |
74 | * could observe the producer pointer being set and thus load the data |
75 | * before the producer has written the new data. The consumer would in |
76 | * this case load the old data. |
77 | * |
78 | * (C) protects the consumer from speculatively loading the data before |
79 | * the producer pointer actually has been read. If we do not have this |
80 | * barrier, some architectures could load old data as speculative loads |
81 | * are not discarded as the CPU does not know there is a dependency |
82 | * between ->producer and data. |
83 | * |
84 | * (A) is a control dependency that separates the load of ->consumer |
85 | * from the stores of $data. In case ->consumer indicates there is no |
86 | * room in the buffer to store $data we do not. The dependency will |
87 | * order both of the stores after the loads. So no barrier is needed. |
88 | * |
89 | * (D) protects the load of the data to be observed to happen after the |
90 | * store of the consumer pointer. If we did not have this memory |
91 | * barrier, the producer could observe the consumer pointer being set |
92 | * and overwrite the data with a new value before the consumer got the |
93 | * chance to read the old value. The consumer would thus miss reading |
94 | * the old entry and very likely read the new entry twice, once right |
95 | * now and again after circling through the ring. |
96 | */ |
97 | |
98 | /* The operations on the rings are the following: |
99 | * |
100 | * producer consumer |
101 | * |
102 | * RESERVE entries PEEK in the ring for entries |
103 | * WRITE data into the ring READ data from the ring |
104 | * SUBMIT entries RELEASE entries |
105 | * |
106 | * The producer reserves one or more entries in the ring. It can then |
107 | * fill in these entries and finally submit them so that they can be |
108 | * seen and read by the consumer. |
109 | * |
110 | * The consumer peeks into the ring to see if the producer has written |
111 | * any new entries. If so, the consumer can then read these entries |
112 | * and when it is done reading them release them back to the producer |
113 | * so that the producer can use these slots to fill in new entries. |
114 | * |
115 | * The function names below reflect these operations. |
116 | */ |
117 | |
118 | /* Functions that read and validate content from consumer rings. */ |
119 | |
120 | static inline void __xskq_cons_read_addr_unchecked(struct xsk_queue *q, u32 cached_cons, u64 *addr) |
121 | { |
122 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; |
123 | u32 idx = cached_cons & q->ring_mask; |
124 | |
125 | *addr = ring->desc[idx]; |
126 | } |
127 | |
128 | static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) |
129 | { |
130 | if (q->cached_cons != q->cached_prod) { |
131 | __xskq_cons_read_addr_unchecked(q, cached_cons: q->cached_cons, addr); |
132 | return true; |
133 | } |
134 | |
135 | return false; |
136 | } |
137 | |
138 | static inline bool xp_unused_options_set(u32 options) |
139 | { |
140 | return options & ~XDP_PKT_CONTD; |
141 | } |
142 | |
143 | static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, |
144 | struct xdp_desc *desc) |
145 | { |
146 | u64 offset = desc->addr & (pool->chunk_size - 1); |
147 | |
148 | if (!desc->len) |
149 | return false; |
150 | |
151 | if (offset + desc->len > pool->chunk_size) |
152 | return false; |
153 | |
154 | if (desc->addr >= pool->addrs_cnt) |
155 | return false; |
156 | |
157 | if (xp_unused_options_set(options: desc->options)) |
158 | return false; |
159 | return true; |
160 | } |
161 | |
162 | static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, |
163 | struct xdp_desc *desc) |
164 | { |
165 | u64 addr = xp_unaligned_add_offset_to_addr(addr: desc->addr); |
166 | |
167 | if (!desc->len) |
168 | return false; |
169 | |
170 | if (desc->len > pool->chunk_size) |
171 | return false; |
172 | |
173 | if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt || |
174 | xp_desc_crosses_non_contig_pg(pool, addr, len: desc->len)) |
175 | return false; |
176 | |
177 | if (xp_unused_options_set(options: desc->options)) |
178 | return false; |
179 | return true; |
180 | } |
181 | |
182 | static inline bool xp_validate_desc(struct xsk_buff_pool *pool, |
183 | struct xdp_desc *desc) |
184 | { |
185 | return pool->unaligned ? xp_unaligned_validate_desc(pool, desc) : |
186 | xp_aligned_validate_desc(pool, desc); |
187 | } |
188 | |
189 | static inline bool xskq_has_descs(struct xsk_queue *q) |
190 | { |
191 | return q->cached_cons != q->cached_prod; |
192 | } |
193 | |
194 | static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q, |
195 | struct xdp_desc *d, |
196 | struct xsk_buff_pool *pool) |
197 | { |
198 | if (!xp_validate_desc(pool, desc: d)) { |
199 | q->invalid_descs++; |
200 | return false; |
201 | } |
202 | return true; |
203 | } |
204 | |
205 | static inline bool xskq_cons_read_desc(struct xsk_queue *q, |
206 | struct xdp_desc *desc, |
207 | struct xsk_buff_pool *pool) |
208 | { |
209 | if (q->cached_cons != q->cached_prod) { |
210 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; |
211 | u32 idx = q->cached_cons & q->ring_mask; |
212 | |
213 | *desc = ring->desc[idx]; |
214 | return xskq_cons_is_valid_desc(q, d: desc, pool); |
215 | } |
216 | |
217 | q->queue_empty_descs++; |
218 | return false; |
219 | } |
220 | |
221 | static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt) |
222 | { |
223 | q->cached_cons += cnt; |
224 | } |
225 | |
226 | static inline void parse_desc(struct xsk_queue *q, struct xsk_buff_pool *pool, |
227 | struct xdp_desc *desc, struct parsed_desc *parsed) |
228 | { |
229 | parsed->valid = xskq_cons_is_valid_desc(q, d: desc, pool); |
230 | parsed->mb = xp_mb_desc(desc); |
231 | } |
232 | |
233 | static inline |
234 | u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool, |
235 | u32 max) |
236 | { |
237 | u32 cached_cons = q->cached_cons, nb_entries = 0; |
238 | struct xdp_desc *descs = pool->tx_descs; |
239 | u32 total_descs = 0, nr_frags = 0; |
240 | |
241 | /* track first entry, if stumble upon *any* invalid descriptor, rewind |
242 | * current packet that consists of frags and stop the processing |
243 | */ |
244 | while (cached_cons != q->cached_prod && nb_entries < max) { |
245 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; |
246 | u32 idx = cached_cons & q->ring_mask; |
247 | struct parsed_desc parsed; |
248 | |
249 | descs[nb_entries] = ring->desc[idx]; |
250 | cached_cons++; |
251 | parse_desc(q, pool, desc: &descs[nb_entries], parsed: &parsed); |
252 | if (unlikely(!parsed.valid)) |
253 | break; |
254 | |
255 | if (likely(!parsed.mb)) { |
256 | total_descs += (nr_frags + 1); |
257 | nr_frags = 0; |
258 | } else { |
259 | nr_frags++; |
260 | if (nr_frags == pool->netdev->xdp_zc_max_segs) { |
261 | nr_frags = 0; |
262 | break; |
263 | } |
264 | } |
265 | nb_entries++; |
266 | } |
267 | |
268 | cached_cons -= nr_frags; |
269 | /* Release valid plus any invalid entries */ |
270 | xskq_cons_release_n(q, cnt: cached_cons - q->cached_cons); |
271 | return total_descs; |
272 | } |
273 | |
274 | /* Functions for consumers */ |
275 | |
276 | static inline void __xskq_cons_release(struct xsk_queue *q) |
277 | { |
278 | smp_store_release(&q->ring->consumer, q->cached_cons); /* D, matchees A */ |
279 | } |
280 | |
281 | static inline void __xskq_cons_peek(struct xsk_queue *q) |
282 | { |
283 | /* Refresh the local pointer */ |
284 | q->cached_prod = smp_load_acquire(&q->ring->producer); /* C, matches B */ |
285 | } |
286 | |
287 | static inline void xskq_cons_get_entries(struct xsk_queue *q) |
288 | { |
289 | __xskq_cons_release(q); |
290 | __xskq_cons_peek(q); |
291 | } |
292 | |
293 | static inline u32 xskq_cons_nb_entries(struct xsk_queue *q, u32 max) |
294 | { |
295 | u32 entries = q->cached_prod - q->cached_cons; |
296 | |
297 | if (entries >= max) |
298 | return max; |
299 | |
300 | __xskq_cons_peek(q); |
301 | entries = q->cached_prod - q->cached_cons; |
302 | |
303 | return entries >= max ? max : entries; |
304 | } |
305 | |
306 | static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt) |
307 | { |
308 | return xskq_cons_nb_entries(q, max: cnt) >= cnt; |
309 | } |
310 | |
311 | static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr) |
312 | { |
313 | if (q->cached_prod == q->cached_cons) |
314 | xskq_cons_get_entries(q); |
315 | return xskq_cons_read_addr_unchecked(q, addr); |
316 | } |
317 | |
318 | static inline bool xskq_cons_peek_desc(struct xsk_queue *q, |
319 | struct xdp_desc *desc, |
320 | struct xsk_buff_pool *pool) |
321 | { |
322 | if (q->cached_prod == q->cached_cons) |
323 | xskq_cons_get_entries(q); |
324 | return xskq_cons_read_desc(q, desc, pool); |
325 | } |
326 | |
327 | /* To improve performance in the xskq_cons_release functions, only update local state here. |
328 | * Reflect this to global state when we get new entries from the ring in |
329 | * xskq_cons_get_entries() and whenever Rx or Tx processing are completed in the NAPI loop. |
330 | */ |
331 | static inline void xskq_cons_release(struct xsk_queue *q) |
332 | { |
333 | q->cached_cons++; |
334 | } |
335 | |
336 | static inline void xskq_cons_cancel_n(struct xsk_queue *q, u32 cnt) |
337 | { |
338 | q->cached_cons -= cnt; |
339 | } |
340 | |
341 | static inline u32 xskq_cons_present_entries(struct xsk_queue *q) |
342 | { |
343 | /* No barriers needed since data is not accessed */ |
344 | return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer); |
345 | } |
346 | |
347 | /* Functions for producers */ |
348 | |
349 | static inline u32 xskq_prod_nb_free(struct xsk_queue *q, u32 max) |
350 | { |
351 | u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons); |
352 | |
353 | if (free_entries >= max) |
354 | return max; |
355 | |
356 | /* Refresh the local tail pointer */ |
357 | q->cached_cons = READ_ONCE(q->ring->consumer); |
358 | free_entries = q->nentries - (q->cached_prod - q->cached_cons); |
359 | |
360 | return free_entries >= max ? max : free_entries; |
361 | } |
362 | |
363 | static inline bool xskq_prod_is_full(struct xsk_queue *q) |
364 | { |
365 | return xskq_prod_nb_free(q, max: 1) ? false : true; |
366 | } |
367 | |
368 | static inline void xskq_prod_cancel_n(struct xsk_queue *q, u32 cnt) |
369 | { |
370 | q->cached_prod -= cnt; |
371 | } |
372 | |
373 | static inline int xskq_prod_reserve(struct xsk_queue *q) |
374 | { |
375 | if (xskq_prod_is_full(q)) |
376 | return -ENOSPC; |
377 | |
378 | /* A, matches D */ |
379 | q->cached_prod++; |
380 | return 0; |
381 | } |
382 | |
383 | static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr) |
384 | { |
385 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; |
386 | |
387 | if (xskq_prod_is_full(q)) |
388 | return -ENOSPC; |
389 | |
390 | /* A, matches D */ |
391 | ring->desc[q->cached_prod++ & q->ring_mask] = addr; |
392 | return 0; |
393 | } |
394 | |
395 | static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_desc *descs, |
396 | u32 nb_entries) |
397 | { |
398 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; |
399 | u32 i, cached_prod; |
400 | |
401 | /* A, matches D */ |
402 | cached_prod = q->cached_prod; |
403 | for (i = 0; i < nb_entries; i++) |
404 | ring->desc[cached_prod++ & q->ring_mask] = descs[i].addr; |
405 | q->cached_prod = cached_prod; |
406 | } |
407 | |
408 | static inline int xskq_prod_reserve_desc(struct xsk_queue *q, |
409 | u64 addr, u32 len, u32 flags) |
410 | { |
411 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; |
412 | u32 idx; |
413 | |
414 | if (xskq_prod_is_full(q)) |
415 | return -ENOBUFS; |
416 | |
417 | /* A, matches D */ |
418 | idx = q->cached_prod++ & q->ring_mask; |
419 | ring->desc[idx].addr = addr; |
420 | ring->desc[idx].len = len; |
421 | ring->desc[idx].options = flags; |
422 | |
423 | return 0; |
424 | } |
425 | |
426 | static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx) |
427 | { |
428 | smp_store_release(&q->ring->producer, idx); /* B, matches C */ |
429 | } |
430 | |
431 | static inline void xskq_prod_submit(struct xsk_queue *q) |
432 | { |
433 | __xskq_prod_submit(q, idx: q->cached_prod); |
434 | } |
435 | |
436 | static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries) |
437 | { |
438 | __xskq_prod_submit(q, idx: q->ring->producer + nb_entries); |
439 | } |
440 | |
441 | static inline bool xskq_prod_is_empty(struct xsk_queue *q) |
442 | { |
443 | /* No barriers needed since data is not accessed */ |
444 | return READ_ONCE(q->ring->consumer) == READ_ONCE(q->ring->producer); |
445 | } |
446 | |
447 | /* For both producers and consumers */ |
448 | |
449 | static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) |
450 | { |
451 | return q ? q->invalid_descs : 0; |
452 | } |
453 | |
454 | static inline u64 xskq_nb_queue_empty_descs(struct xsk_queue *q) |
455 | { |
456 | return q ? q->queue_empty_descs : 0; |
457 | } |
458 | |
459 | struct xsk_queue *xskq_create(u32 nentries, bool umem_queue); |
460 | void xskq_destroy(struct xsk_queue *q_ops); |
461 | |
462 | #endif /* _LINUX_XSK_QUEUE_H */ |
463 | |