1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright(c) 2018 Intel Corporation. */ |
3 | |
4 | #include <linux/bpf_trace.h> |
5 | #include <net/xdp_sock_drv.h> |
6 | #include "i40e_txrx_common.h" |
7 | #include "i40e_xsk.h" |
8 | |
9 | void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring) |
10 | { |
11 | memset(rx_ring->rx_bi_zc, 0, |
12 | sizeof(*rx_ring->rx_bi_zc) * rx_ring->count); |
13 | } |
14 | |
15 | static struct xdp_buff **i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx) |
16 | { |
17 | return &rx_ring->rx_bi_zc[idx]; |
18 | } |
19 | |
20 | /** |
21 | * i40e_realloc_rx_xdp_bi - reallocate SW ring for either XSK or normal buffer |
22 | * @rx_ring: Current rx ring |
23 | * @pool_present: is pool for XSK present |
24 | * |
25 | * Try allocating memory and return ENOMEM, if failed to allocate. |
26 | * If allocation was successful, substitute buffer with allocated one. |
27 | * Returns 0 on success, negative on failure |
28 | */ |
29 | static int i40e_realloc_rx_xdp_bi(struct i40e_ring *rx_ring, bool pool_present) |
30 | { |
31 | size_t elem_size = pool_present ? sizeof(*rx_ring->rx_bi_zc) : |
32 | sizeof(*rx_ring->rx_bi); |
33 | void *sw_ring = kcalloc(n: rx_ring->count, size: elem_size, GFP_KERNEL); |
34 | |
35 | if (!sw_ring) |
36 | return -ENOMEM; |
37 | |
38 | if (pool_present) { |
39 | kfree(objp: rx_ring->rx_bi); |
40 | rx_ring->rx_bi = NULL; |
41 | rx_ring->rx_bi_zc = sw_ring; |
42 | } else { |
43 | kfree(objp: rx_ring->rx_bi_zc); |
44 | rx_ring->rx_bi_zc = NULL; |
45 | rx_ring->rx_bi = sw_ring; |
46 | } |
47 | return 0; |
48 | } |
49 | |
50 | /** |
51 | * i40e_realloc_rx_bi_zc - reallocate rx SW rings |
52 | * @vsi: Current VSI |
53 | * @zc: is zero copy set |
54 | * |
55 | * Reallocate buffer for rx_rings that might be used by XSK. |
56 | * XDP requires more memory, than rx_buf provides. |
57 | * Returns 0 on success, negative on failure |
58 | */ |
59 | int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc) |
60 | { |
61 | struct i40e_ring *rx_ring; |
62 | unsigned long q; |
63 | |
64 | for_each_set_bit(q, vsi->af_xdp_zc_qps, vsi->alloc_queue_pairs) { |
65 | rx_ring = vsi->rx_rings[q]; |
66 | if (i40e_realloc_rx_xdp_bi(rx_ring, pool_present: zc)) |
67 | return -ENOMEM; |
68 | } |
69 | return 0; |
70 | } |
71 | |
72 | /** |
73 | * i40e_xsk_pool_enable - Enable/associate an AF_XDP buffer pool to a |
74 | * certain ring/qid |
75 | * @vsi: Current VSI |
76 | * @pool: buffer pool |
77 | * @qid: Rx ring to associate buffer pool with |
78 | * |
79 | * Returns 0 on success, <0 on failure |
80 | **/ |
81 | static int i40e_xsk_pool_enable(struct i40e_vsi *vsi, |
82 | struct xsk_buff_pool *pool, |
83 | u16 qid) |
84 | { |
85 | struct net_device *netdev = vsi->netdev; |
86 | bool if_running; |
87 | int err; |
88 | |
89 | if (vsi->type != I40E_VSI_MAIN) |
90 | return -EINVAL; |
91 | |
92 | if (qid >= vsi->num_queue_pairs) |
93 | return -EINVAL; |
94 | |
95 | if (qid >= netdev->real_num_rx_queues || |
96 | qid >= netdev->real_num_tx_queues) |
97 | return -EINVAL; |
98 | |
99 | err = xsk_pool_dma_map(pool, dev: &vsi->back->pdev->dev, I40E_RX_DMA_ATTR); |
100 | if (err) |
101 | return err; |
102 | |
103 | set_bit(nr: qid, addr: vsi->af_xdp_zc_qps); |
104 | |
105 | if_running = netif_running(dev: vsi->netdev) && i40e_enabled_xdp_vsi(vsi); |
106 | |
107 | if (if_running) { |
108 | err = i40e_queue_pair_disable(vsi, queue_pair: qid); |
109 | if (err) |
110 | return err; |
111 | |
112 | err = i40e_realloc_rx_xdp_bi(rx_ring: vsi->rx_rings[qid], pool_present: true); |
113 | if (err) |
114 | return err; |
115 | |
116 | err = i40e_queue_pair_enable(vsi, queue_pair: qid); |
117 | if (err) |
118 | return err; |
119 | |
120 | /* Kick start the NAPI context so that receiving will start */ |
121 | err = i40e_xsk_wakeup(dev: vsi->netdev, queue_id: qid, XDP_WAKEUP_RX); |
122 | if (err) |
123 | return err; |
124 | } |
125 | |
126 | return 0; |
127 | } |
128 | |
129 | /** |
130 | * i40e_xsk_pool_disable - Disassociate an AF_XDP buffer pool from a |
131 | * certain ring/qid |
132 | * @vsi: Current VSI |
133 | * @qid: Rx ring to associate buffer pool with |
134 | * |
135 | * Returns 0 on success, <0 on failure |
136 | **/ |
137 | static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16 qid) |
138 | { |
139 | struct net_device *netdev = vsi->netdev; |
140 | struct xsk_buff_pool *pool; |
141 | bool if_running; |
142 | int err; |
143 | |
144 | pool = xsk_get_pool_from_qid(dev: netdev, queue_id: qid); |
145 | if (!pool) |
146 | return -EINVAL; |
147 | |
148 | if_running = netif_running(dev: vsi->netdev) && i40e_enabled_xdp_vsi(vsi); |
149 | |
150 | if (if_running) { |
151 | err = i40e_queue_pair_disable(vsi, queue_pair: qid); |
152 | if (err) |
153 | return err; |
154 | } |
155 | |
156 | clear_bit(nr: qid, addr: vsi->af_xdp_zc_qps); |
157 | xsk_pool_dma_unmap(pool, I40E_RX_DMA_ATTR); |
158 | |
159 | if (if_running) { |
160 | err = i40e_realloc_rx_xdp_bi(rx_ring: vsi->rx_rings[qid], pool_present: false); |
161 | if (err) |
162 | return err; |
163 | err = i40e_queue_pair_enable(vsi, queue_pair: qid); |
164 | if (err) |
165 | return err; |
166 | } |
167 | |
168 | return 0; |
169 | } |
170 | |
171 | /** |
172 | * i40e_xsk_pool_setup - Enable/disassociate an AF_XDP buffer pool to/from |
173 | * a ring/qid |
174 | * @vsi: Current VSI |
175 | * @pool: Buffer pool to enable/associate to a ring, or NULL to disable |
176 | * @qid: Rx ring to (dis)associate buffer pool (from)to |
177 | * |
178 | * This function enables or disables a buffer pool to a certain ring. |
179 | * |
180 | * Returns 0 on success, <0 on failure |
181 | **/ |
182 | int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool, |
183 | u16 qid) |
184 | { |
185 | return pool ? i40e_xsk_pool_enable(vsi, pool, qid) : |
186 | i40e_xsk_pool_disable(vsi, qid); |
187 | } |
188 | |
189 | /** |
190 | * i40e_run_xdp_zc - Executes an XDP program on an xdp_buff |
191 | * @rx_ring: Rx ring |
192 | * @xdp: xdp_buff used as input to the XDP program |
193 | * @xdp_prog: XDP program to run |
194 | * |
195 | * Returns any of I40E_XDP_{PASS, CONSUMED, TX, REDIR} |
196 | **/ |
197 | static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp, |
198 | struct bpf_prog *xdp_prog) |
199 | { |
200 | int err, result = I40E_XDP_PASS; |
201 | struct i40e_ring *xdp_ring; |
202 | u32 act; |
203 | |
204 | act = bpf_prog_run_xdp(prog: xdp_prog, xdp); |
205 | |
206 | if (likely(act == XDP_REDIRECT)) { |
207 | err = xdp_do_redirect(dev: rx_ring->netdev, xdp, prog: xdp_prog); |
208 | if (!err) |
209 | return I40E_XDP_REDIR; |
210 | if (xsk_uses_need_wakeup(pool: rx_ring->xsk_pool) && err == -ENOBUFS) |
211 | result = I40E_XDP_EXIT; |
212 | else |
213 | result = I40E_XDP_CONSUMED; |
214 | goto out_failure; |
215 | } |
216 | |
217 | switch (act) { |
218 | case XDP_PASS: |
219 | break; |
220 | case XDP_TX: |
221 | xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index]; |
222 | result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring); |
223 | if (result == I40E_XDP_CONSUMED) |
224 | goto out_failure; |
225 | break; |
226 | case XDP_DROP: |
227 | result = I40E_XDP_CONSUMED; |
228 | break; |
229 | default: |
230 | bpf_warn_invalid_xdp_action(dev: rx_ring->netdev, prog: xdp_prog, act); |
231 | fallthrough; |
232 | case XDP_ABORTED: |
233 | result = I40E_XDP_CONSUMED; |
234 | out_failure: |
235 | trace_xdp_exception(dev: rx_ring->netdev, xdp: xdp_prog, act); |
236 | } |
237 | return result; |
238 | } |
239 | |
240 | bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count) |
241 | { |
242 | u16 ntu = rx_ring->next_to_use; |
243 | union i40e_rx_desc *rx_desc; |
244 | struct xdp_buff **xdp; |
245 | u32 nb_buffs, i; |
246 | dma_addr_t dma; |
247 | |
248 | rx_desc = I40E_RX_DESC(rx_ring, ntu); |
249 | xdp = i40e_rx_bi(rx_ring, idx: ntu); |
250 | |
251 | nb_buffs = min_t(u16, count, rx_ring->count - ntu); |
252 | nb_buffs = xsk_buff_alloc_batch(pool: rx_ring->xsk_pool, xdp, max: nb_buffs); |
253 | if (!nb_buffs) |
254 | return false; |
255 | |
256 | i = nb_buffs; |
257 | while (i--) { |
258 | dma = xsk_buff_xdp_get_dma(xdp: *xdp); |
259 | rx_desc->read.pkt_addr = cpu_to_le64(dma); |
260 | rx_desc->read.hdr_addr = 0; |
261 | |
262 | rx_desc++; |
263 | xdp++; |
264 | } |
265 | |
266 | ntu += nb_buffs; |
267 | if (ntu == rx_ring->count) { |
268 | rx_desc = I40E_RX_DESC(rx_ring, 0); |
269 | ntu = 0; |
270 | } |
271 | |
272 | /* clear the status bits for the next_to_use descriptor */ |
273 | rx_desc->wb.qword1.status_error_len = 0; |
274 | i40e_release_rx_desc(rx_ring, val: ntu); |
275 | |
276 | return count == nb_buffs; |
277 | } |
278 | |
279 | /** |
280 | * i40e_construct_skb_zc - Create skbuff from zero-copy Rx buffer |
281 | * @rx_ring: Rx ring |
282 | * @xdp: xdp_buff |
283 | * |
284 | * This functions allocates a new skb from a zero-copy Rx buffer. |
285 | * |
286 | * Returns the skb, or NULL on failure. |
287 | **/ |
288 | static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring, |
289 | struct xdp_buff *xdp) |
290 | { |
291 | unsigned int totalsize = xdp->data_end - xdp->data_meta; |
292 | unsigned int metasize = xdp->data - xdp->data_meta; |
293 | struct skb_shared_info *sinfo = NULL; |
294 | struct sk_buff *skb; |
295 | u32 nr_frags = 0; |
296 | |
297 | if (unlikely(xdp_buff_has_frags(xdp))) { |
298 | sinfo = xdp_get_shared_info_from_buff(xdp); |
299 | nr_frags = sinfo->nr_frags; |
300 | } |
301 | net_prefetch(p: xdp->data_meta); |
302 | |
303 | /* allocate a skb to store the frags */ |
304 | skb = __napi_alloc_skb(napi: &rx_ring->q_vector->napi, length: totalsize, |
305 | GFP_ATOMIC | __GFP_NOWARN); |
306 | if (unlikely(!skb)) |
307 | goto out; |
308 | |
309 | memcpy(__skb_put(skb, totalsize), xdp->data_meta, |
310 | ALIGN(totalsize, sizeof(long))); |
311 | |
312 | if (metasize) { |
313 | skb_metadata_set(skb, meta_len: metasize); |
314 | __skb_pull(skb, len: metasize); |
315 | } |
316 | |
317 | if (likely(!xdp_buff_has_frags(xdp))) |
318 | goto out; |
319 | |
320 | for (int i = 0; i < nr_frags; i++) { |
321 | struct skb_shared_info *skinfo = skb_shinfo(skb); |
322 | skb_frag_t *frag = &sinfo->frags[i]; |
323 | struct page *page; |
324 | void *addr; |
325 | |
326 | page = dev_alloc_page(); |
327 | if (!page) { |
328 | dev_kfree_skb(skb); |
329 | return NULL; |
330 | } |
331 | addr = page_to_virt(page); |
332 | |
333 | memcpy(addr, skb_frag_page(frag), skb_frag_size(frag)); |
334 | |
335 | __skb_fill_page_desc_noacc(shinfo: skinfo, i: skinfo->nr_frags++, |
336 | page: addr, off: 0, size: skb_frag_size(frag)); |
337 | } |
338 | |
339 | out: |
340 | xsk_buff_free(xdp); |
341 | return skb; |
342 | } |
343 | |
344 | static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring, |
345 | struct xdp_buff *xdp_buff, |
346 | union i40e_rx_desc *rx_desc, |
347 | unsigned int *rx_packets, |
348 | unsigned int *rx_bytes, |
349 | unsigned int xdp_res, |
350 | bool *failure) |
351 | { |
352 | struct sk_buff *skb; |
353 | |
354 | *rx_packets = 1; |
355 | *rx_bytes = xdp_get_buff_len(xdp: xdp_buff); |
356 | |
357 | if (likely(xdp_res == I40E_XDP_REDIR) || xdp_res == I40E_XDP_TX) |
358 | return; |
359 | |
360 | if (xdp_res == I40E_XDP_EXIT) { |
361 | *failure = true; |
362 | return; |
363 | } |
364 | |
365 | if (xdp_res == I40E_XDP_CONSUMED) { |
366 | xsk_buff_free(xdp: xdp_buff); |
367 | return; |
368 | } |
369 | if (xdp_res == I40E_XDP_PASS) { |
370 | /* NB! We are not checking for errors using |
371 | * i40e_test_staterr with |
372 | * BIT(I40E_RXD_QW1_ERROR_SHIFT). This is due to that |
373 | * SBP is *not* set in PRT_SBPVSI (default not set). |
374 | */ |
375 | skb = i40e_construct_skb_zc(rx_ring, xdp: xdp_buff); |
376 | if (!skb) { |
377 | rx_ring->rx_stats.alloc_buff_failed++; |
378 | *rx_packets = 0; |
379 | *rx_bytes = 0; |
380 | return; |
381 | } |
382 | |
383 | if (eth_skb_pad(skb)) { |
384 | *rx_packets = 0; |
385 | *rx_bytes = 0; |
386 | return; |
387 | } |
388 | |
389 | i40e_process_skb_fields(rx_ring, rx_desc, skb); |
390 | napi_gro_receive(napi: &rx_ring->q_vector->napi, skb); |
391 | return; |
392 | } |
393 | |
394 | /* Should never get here, as all valid cases have been handled already. |
395 | */ |
396 | WARN_ON_ONCE(1); |
397 | } |
398 | |
399 | static int |
400 | i40e_add_xsk_frag(struct i40e_ring *rx_ring, struct xdp_buff *first, |
401 | struct xdp_buff *xdp, const unsigned int size) |
402 | { |
403 | struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp: first); |
404 | |
405 | if (!xdp_buff_has_frags(xdp: first)) { |
406 | sinfo->nr_frags = 0; |
407 | sinfo->xdp_frags_size = 0; |
408 | xdp_buff_set_frags_flag(xdp: first); |
409 | } |
410 | |
411 | if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) { |
412 | xsk_buff_free(xdp: first); |
413 | return -ENOMEM; |
414 | } |
415 | |
416 | __skb_fill_page_desc_noacc(shinfo: sinfo, i: sinfo->nr_frags++, |
417 | virt_to_page(xdp->data_hard_start), |
418 | XDP_PACKET_HEADROOM, size); |
419 | sinfo->xdp_frags_size += size; |
420 | xsk_buff_add_frag(xdp); |
421 | |
422 | return 0; |
423 | } |
424 | |
425 | /** |
426 | * i40e_clean_rx_irq_zc - Consumes Rx packets from the hardware ring |
427 | * @rx_ring: Rx ring |
428 | * @budget: NAPI budget |
429 | * |
430 | * Returns amount of work completed |
431 | **/ |
432 | int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) |
433 | { |
434 | unsigned int total_rx_bytes = 0, total_rx_packets = 0; |
435 | u16 next_to_process = rx_ring->next_to_process; |
436 | u16 next_to_clean = rx_ring->next_to_clean; |
437 | unsigned int xdp_res, xdp_xmit = 0; |
438 | struct xdp_buff *first = NULL; |
439 | u32 count = rx_ring->count; |
440 | struct bpf_prog *xdp_prog; |
441 | u32 entries_to_alloc; |
442 | bool failure = false; |
443 | |
444 | if (next_to_process != next_to_clean) |
445 | first = *i40e_rx_bi(rx_ring, idx: next_to_clean); |
446 | |
447 | /* NB! xdp_prog will always be !NULL, due to the fact that |
448 | * this path is enabled by setting an XDP program. |
449 | */ |
450 | xdp_prog = READ_ONCE(rx_ring->xdp_prog); |
451 | |
452 | while (likely(total_rx_packets < (unsigned int)budget)) { |
453 | union i40e_rx_desc *rx_desc; |
454 | unsigned int rx_packets; |
455 | unsigned int rx_bytes; |
456 | struct xdp_buff *bi; |
457 | unsigned int size; |
458 | u64 qword; |
459 | |
460 | rx_desc = I40E_RX_DESC(rx_ring, next_to_process); |
461 | qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); |
462 | |
463 | /* This memory barrier is needed to keep us from reading |
464 | * any other fields out of the rx_desc until we have |
465 | * verified the descriptor has been written back. |
466 | */ |
467 | dma_rmb(); |
468 | |
469 | if (i40e_rx_is_programming_status(qword1: qword)) { |
470 | i40e_clean_programming_status(rx_ring, |
471 | qword0_raw: rx_desc->raw.qword[0], |
472 | qword1: qword); |
473 | bi = *i40e_rx_bi(rx_ring, idx: next_to_process); |
474 | xsk_buff_free(xdp: bi); |
475 | if (++next_to_process == count) |
476 | next_to_process = 0; |
477 | continue; |
478 | } |
479 | |
480 | size = FIELD_GET(I40E_RXD_QW1_LENGTH_PBUF_MASK, qword); |
481 | if (!size) |
482 | break; |
483 | |
484 | bi = *i40e_rx_bi(rx_ring, idx: next_to_process); |
485 | xsk_buff_set_size(xdp: bi, size); |
486 | xsk_buff_dma_sync_for_cpu(xdp: bi, pool: rx_ring->xsk_pool); |
487 | |
488 | if (!first) |
489 | first = bi; |
490 | else if (i40e_add_xsk_frag(rx_ring, first, xdp: bi, size)) |
491 | break; |
492 | |
493 | if (++next_to_process == count) |
494 | next_to_process = 0; |
495 | |
496 | if (i40e_is_non_eop(rx_ring, rx_desc)) |
497 | continue; |
498 | |
499 | xdp_res = i40e_run_xdp_zc(rx_ring, xdp: first, xdp_prog); |
500 | i40e_handle_xdp_result_zc(rx_ring, xdp_buff: first, rx_desc, rx_packets: &rx_packets, |
501 | rx_bytes: &rx_bytes, xdp_res, failure: &failure); |
502 | next_to_clean = next_to_process; |
503 | if (failure) |
504 | break; |
505 | total_rx_packets += rx_packets; |
506 | total_rx_bytes += rx_bytes; |
507 | xdp_xmit |= xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR); |
508 | first = NULL; |
509 | } |
510 | |
511 | rx_ring->next_to_clean = next_to_clean; |
512 | rx_ring->next_to_process = next_to_process; |
513 | |
514 | entries_to_alloc = I40E_DESC_UNUSED(rx_ring); |
515 | if (entries_to_alloc >= I40E_RX_BUFFER_WRITE) |
516 | failure |= !i40e_alloc_rx_buffers_zc(rx_ring, count: entries_to_alloc); |
517 | |
518 | i40e_finalize_xdp_rx(rx_ring, xdp_res: xdp_xmit); |
519 | i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets); |
520 | |
521 | if (xsk_uses_need_wakeup(pool: rx_ring->xsk_pool)) { |
522 | if (failure || next_to_clean == rx_ring->next_to_use) |
523 | xsk_set_rx_need_wakeup(pool: rx_ring->xsk_pool); |
524 | else |
525 | xsk_clear_rx_need_wakeup(pool: rx_ring->xsk_pool); |
526 | |
527 | return (int)total_rx_packets; |
528 | } |
529 | return failure ? budget : (int)total_rx_packets; |
530 | } |
531 | |
532 | static void i40e_xmit_pkt(struct i40e_ring *xdp_ring, struct xdp_desc *desc, |
533 | unsigned int *total_bytes) |
534 | { |
535 | u32 cmd = I40E_TX_DESC_CMD_ICRC | xsk_is_eop_desc(desc); |
536 | struct i40e_tx_desc *tx_desc; |
537 | dma_addr_t dma; |
538 | |
539 | dma = xsk_buff_raw_get_dma(pool: xdp_ring->xsk_pool, addr: desc->addr); |
540 | xsk_buff_raw_dma_sync_for_device(pool: xdp_ring->xsk_pool, dma, size: desc->len); |
541 | |
542 | tx_desc = I40E_TX_DESC(xdp_ring, xdp_ring->next_to_use++); |
543 | tx_desc->buffer_addr = cpu_to_le64(dma); |
544 | tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd: cmd, td_offset: 0, size: desc->len, td_tag: 0); |
545 | |
546 | *total_bytes += desc->len; |
547 | } |
548 | |
549 | static void i40e_xmit_pkt_batch(struct i40e_ring *xdp_ring, struct xdp_desc *desc, |
550 | unsigned int *total_bytes) |
551 | { |
552 | u16 ntu = xdp_ring->next_to_use; |
553 | struct i40e_tx_desc *tx_desc; |
554 | dma_addr_t dma; |
555 | u32 i; |
556 | |
557 | loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) { |
558 | u32 cmd = I40E_TX_DESC_CMD_ICRC | xsk_is_eop_desc(desc: &desc[i]); |
559 | |
560 | dma = xsk_buff_raw_get_dma(pool: xdp_ring->xsk_pool, addr: desc[i].addr); |
561 | xsk_buff_raw_dma_sync_for_device(pool: xdp_ring->xsk_pool, dma, size: desc[i].len); |
562 | |
563 | tx_desc = I40E_TX_DESC(xdp_ring, ntu++); |
564 | tx_desc->buffer_addr = cpu_to_le64(dma); |
565 | tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd: cmd, td_offset: 0, size: desc[i].len, td_tag: 0); |
566 | |
567 | *total_bytes += desc[i].len; |
568 | } |
569 | |
570 | xdp_ring->next_to_use = ntu; |
571 | } |
572 | |
573 | static void i40e_fill_tx_hw_ring(struct i40e_ring *xdp_ring, struct xdp_desc *descs, u32 nb_pkts, |
574 | unsigned int *total_bytes) |
575 | { |
576 | u32 batched, leftover, i; |
577 | |
578 | batched = nb_pkts & ~(PKTS_PER_BATCH - 1); |
579 | leftover = nb_pkts & (PKTS_PER_BATCH - 1); |
580 | for (i = 0; i < batched; i += PKTS_PER_BATCH) |
581 | i40e_xmit_pkt_batch(xdp_ring, desc: &descs[i], total_bytes); |
582 | for (i = batched; i < batched + leftover; i++) |
583 | i40e_xmit_pkt(xdp_ring, desc: &descs[i], total_bytes); |
584 | } |
585 | |
586 | static void i40e_set_rs_bit(struct i40e_ring *xdp_ring) |
587 | { |
588 | u16 ntu = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1; |
589 | struct i40e_tx_desc *tx_desc; |
590 | |
591 | tx_desc = I40E_TX_DESC(xdp_ring, ntu); |
592 | tx_desc->cmd_type_offset_bsz |= cpu_to_le64(I40E_TX_DESC_CMD_RS << I40E_TXD_QW1_CMD_SHIFT); |
593 | } |
594 | |
595 | /** |
596 | * i40e_xmit_zc - Performs zero-copy Tx AF_XDP |
597 | * @xdp_ring: XDP Tx ring |
598 | * @budget: NAPI budget |
599 | * |
600 | * Returns true if the work is finished. |
601 | **/ |
602 | static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) |
603 | { |
604 | struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; |
605 | u32 nb_pkts, nb_processed = 0; |
606 | unsigned int total_bytes = 0; |
607 | |
608 | nb_pkts = xsk_tx_peek_release_desc_batch(pool: xdp_ring->xsk_pool, max: budget); |
609 | if (!nb_pkts) |
610 | return true; |
611 | |
612 | if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { |
613 | nb_processed = xdp_ring->count - xdp_ring->next_to_use; |
614 | i40e_fill_tx_hw_ring(xdp_ring, descs, nb_pkts: nb_processed, total_bytes: &total_bytes); |
615 | xdp_ring->next_to_use = 0; |
616 | } |
617 | |
618 | i40e_fill_tx_hw_ring(xdp_ring, descs: &descs[nb_processed], nb_pkts: nb_pkts - nb_processed, |
619 | total_bytes: &total_bytes); |
620 | |
621 | /* Request an interrupt for the last frame and bump tail ptr. */ |
622 | i40e_set_rs_bit(xdp_ring); |
623 | i40e_xdp_ring_update_tail(xdp_ring); |
624 | |
625 | i40e_update_tx_stats(tx_ring: xdp_ring, total_packets: nb_pkts, total_bytes); |
626 | |
627 | return nb_pkts < budget; |
628 | } |
629 | |
630 | /** |
631 | * i40e_clean_xdp_tx_buffer - Frees and unmaps an XDP Tx entry |
632 | * @tx_ring: XDP Tx ring |
633 | * @tx_bi: Tx buffer info to clean |
634 | **/ |
635 | static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring, |
636 | struct i40e_tx_buffer *tx_bi) |
637 | { |
638 | xdp_return_frame(xdpf: tx_bi->xdpf); |
639 | tx_ring->xdp_tx_active--; |
640 | dma_unmap_single(tx_ring->dev, |
641 | dma_unmap_addr(tx_bi, dma), |
642 | dma_unmap_len(tx_bi, len), DMA_TO_DEVICE); |
643 | dma_unmap_len_set(tx_bi, len, 0); |
644 | } |
645 | |
646 | /** |
647 | * i40e_clean_xdp_tx_irq - Completes AF_XDP entries, and cleans XDP entries |
648 | * @vsi: Current VSI |
649 | * @tx_ring: XDP Tx ring |
650 | * |
651 | * Returns true if cleanup/transmission is done. |
652 | **/ |
653 | bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring) |
654 | { |
655 | struct xsk_buff_pool *bp = tx_ring->xsk_pool; |
656 | u32 i, completed_frames, xsk_frames = 0; |
657 | u32 head_idx = i40e_get_head(tx_ring); |
658 | struct i40e_tx_buffer *tx_bi; |
659 | unsigned int ntc; |
660 | |
661 | if (head_idx < tx_ring->next_to_clean) |
662 | head_idx += tx_ring->count; |
663 | completed_frames = head_idx - tx_ring->next_to_clean; |
664 | |
665 | if (completed_frames == 0) |
666 | goto out_xmit; |
667 | |
668 | if (likely(!tx_ring->xdp_tx_active)) { |
669 | xsk_frames = completed_frames; |
670 | goto skip; |
671 | } |
672 | |
673 | ntc = tx_ring->next_to_clean; |
674 | |
675 | for (i = 0; i < completed_frames; i++) { |
676 | tx_bi = &tx_ring->tx_bi[ntc]; |
677 | |
678 | if (tx_bi->xdpf) { |
679 | i40e_clean_xdp_tx_buffer(tx_ring, tx_bi); |
680 | tx_bi->xdpf = NULL; |
681 | } else { |
682 | xsk_frames++; |
683 | } |
684 | |
685 | if (++ntc >= tx_ring->count) |
686 | ntc = 0; |
687 | } |
688 | |
689 | skip: |
690 | tx_ring->next_to_clean += completed_frames; |
691 | if (unlikely(tx_ring->next_to_clean >= tx_ring->count)) |
692 | tx_ring->next_to_clean -= tx_ring->count; |
693 | |
694 | if (xsk_frames) |
695 | xsk_tx_completed(pool: bp, nb_entries: xsk_frames); |
696 | |
697 | i40e_arm_wb(tx_ring, vsi, budget: completed_frames); |
698 | |
699 | out_xmit: |
700 | if (xsk_uses_need_wakeup(pool: tx_ring->xsk_pool)) |
701 | xsk_set_tx_need_wakeup(pool: tx_ring->xsk_pool); |
702 | |
703 | return i40e_xmit_zc(xdp_ring: tx_ring, I40E_DESC_UNUSED(tx_ring)); |
704 | } |
705 | |
706 | /** |
707 | * i40e_xsk_wakeup - Implements the ndo_xsk_wakeup |
708 | * @dev: the netdevice |
709 | * @queue_id: queue id to wake up |
710 | * @flags: ignored in our case since we have Rx and Tx in the same NAPI. |
711 | * |
712 | * Returns <0 for errors, 0 otherwise. |
713 | **/ |
714 | int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) |
715 | { |
716 | struct i40e_netdev_priv *np = netdev_priv(dev); |
717 | struct i40e_vsi *vsi = np->vsi; |
718 | struct i40e_pf *pf = vsi->back; |
719 | struct i40e_ring *ring; |
720 | |
721 | if (test_bit(__I40E_CONFIG_BUSY, pf->state)) |
722 | return -EAGAIN; |
723 | |
724 | if (test_bit(__I40E_VSI_DOWN, vsi->state)) |
725 | return -ENETDOWN; |
726 | |
727 | if (!i40e_enabled_xdp_vsi(vsi)) |
728 | return -EINVAL; |
729 | |
730 | if (queue_id >= vsi->num_queue_pairs) |
731 | return -EINVAL; |
732 | |
733 | if (!vsi->xdp_rings[queue_id]->xsk_pool) |
734 | return -EINVAL; |
735 | |
736 | ring = vsi->xdp_rings[queue_id]; |
737 | |
738 | /* The idea here is that if NAPI is running, mark a miss, so |
739 | * it will run again. If not, trigger an interrupt and |
740 | * schedule the NAPI from interrupt context. If NAPI would be |
741 | * scheduled here, the interrupt affinity would not be |
742 | * honored. |
743 | */ |
744 | if (!napi_if_scheduled_mark_missed(n: &ring->q_vector->napi)) |
745 | i40e_force_wb(vsi, q_vector: ring->q_vector); |
746 | |
747 | return 0; |
748 | } |
749 | |
750 | void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring) |
751 | { |
752 | u16 ntc = rx_ring->next_to_clean; |
753 | u16 ntu = rx_ring->next_to_use; |
754 | |
755 | while (ntc != ntu) { |
756 | struct xdp_buff *rx_bi = *i40e_rx_bi(rx_ring, idx: ntc); |
757 | |
758 | xsk_buff_free(xdp: rx_bi); |
759 | ntc++; |
760 | if (ntc >= rx_ring->count) |
761 | ntc = 0; |
762 | } |
763 | } |
764 | |
765 | /** |
766 | * i40e_xsk_clean_tx_ring - Clean the XDP Tx ring on shutdown |
767 | * @tx_ring: XDP Tx ring |
768 | **/ |
769 | void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring) |
770 | { |
771 | u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use; |
772 | struct xsk_buff_pool *bp = tx_ring->xsk_pool; |
773 | struct i40e_tx_buffer *tx_bi; |
774 | u32 xsk_frames = 0; |
775 | |
776 | while (ntc != ntu) { |
777 | tx_bi = &tx_ring->tx_bi[ntc]; |
778 | |
779 | if (tx_bi->xdpf) |
780 | i40e_clean_xdp_tx_buffer(tx_ring, tx_bi); |
781 | else |
782 | xsk_frames++; |
783 | |
784 | tx_bi->xdpf = NULL; |
785 | |
786 | ntc++; |
787 | if (ntc >= tx_ring->count) |
788 | ntc = 0; |
789 | } |
790 | |
791 | if (xsk_frames) |
792 | xsk_tx_completed(pool: bp, nb_entries: xsk_frames); |
793 | } |
794 | |
795 | /** |
796 | * i40e_xsk_any_rx_ring_enabled - Checks if Rx rings have an AF_XDP |
797 | * buffer pool attached |
798 | * @vsi: vsi |
799 | * |
800 | * Returns true if any of the Rx rings has an AF_XDP buffer pool attached |
801 | **/ |
802 | bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi) |
803 | { |
804 | struct net_device *netdev = vsi->netdev; |
805 | int i; |
806 | |
807 | for (i = 0; i < vsi->num_queue_pairs; i++) { |
808 | if (xsk_get_pool_from_qid(dev: netdev, queue_id: i)) |
809 | return true; |
810 | } |
811 | |
812 | return false; |
813 | } |
814 | |