1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright(c) 2018 Intel Corporation. */ |
3 | |
4 | #include <linux/bpf_trace.h> |
5 | #include <net/xdp_sock_drv.h> |
6 | #include <net/xdp.h> |
7 | |
8 | #include "ixgbe.h" |
9 | #include "ixgbe_txrx_common.h" |
10 | |
11 | struct xsk_buff_pool *ixgbe_xsk_pool(struct ixgbe_adapter *adapter, |
12 | struct ixgbe_ring *ring) |
13 | { |
14 | bool xdp_on = READ_ONCE(adapter->xdp_prog); |
15 | int qid = ring->ring_idx; |
16 | |
17 | if (!xdp_on || !test_bit(qid, adapter->af_xdp_zc_qps)) |
18 | return NULL; |
19 | |
20 | return xsk_get_pool_from_qid(dev: adapter->netdev, queue_id: qid); |
21 | } |
22 | |
23 | static int ixgbe_xsk_pool_enable(struct ixgbe_adapter *adapter, |
24 | struct xsk_buff_pool *pool, |
25 | u16 qid) |
26 | { |
27 | struct net_device *netdev = adapter->netdev; |
28 | bool if_running; |
29 | int err; |
30 | |
31 | if (qid >= adapter->num_rx_queues) |
32 | return -EINVAL; |
33 | |
34 | if (qid >= netdev->real_num_rx_queues || |
35 | qid >= netdev->real_num_tx_queues) |
36 | return -EINVAL; |
37 | |
38 | err = xsk_pool_dma_map(pool, dev: &adapter->pdev->dev, IXGBE_RX_DMA_ATTR); |
39 | if (err) |
40 | return err; |
41 | |
42 | if_running = netif_running(dev: adapter->netdev) && |
43 | ixgbe_enabled_xdp_adapter(adapter); |
44 | |
45 | if (if_running) |
46 | ixgbe_txrx_ring_disable(adapter, ring: qid); |
47 | |
48 | set_bit(nr: qid, addr: adapter->af_xdp_zc_qps); |
49 | |
50 | if (if_running) { |
51 | ixgbe_txrx_ring_enable(adapter, ring: qid); |
52 | |
53 | /* Kick start the NAPI context so that receiving will start */ |
54 | err = ixgbe_xsk_wakeup(dev: adapter->netdev, queue_id: qid, XDP_WAKEUP_RX); |
55 | if (err) { |
56 | clear_bit(nr: qid, addr: adapter->af_xdp_zc_qps); |
57 | xsk_pool_dma_unmap(pool, IXGBE_RX_DMA_ATTR); |
58 | return err; |
59 | } |
60 | } |
61 | |
62 | return 0; |
63 | } |
64 | |
65 | static int ixgbe_xsk_pool_disable(struct ixgbe_adapter *adapter, u16 qid) |
66 | { |
67 | struct xsk_buff_pool *pool; |
68 | bool if_running; |
69 | |
70 | pool = xsk_get_pool_from_qid(dev: adapter->netdev, queue_id: qid); |
71 | if (!pool) |
72 | return -EINVAL; |
73 | |
74 | if_running = netif_running(dev: adapter->netdev) && |
75 | ixgbe_enabled_xdp_adapter(adapter); |
76 | |
77 | if (if_running) |
78 | ixgbe_txrx_ring_disable(adapter, ring: qid); |
79 | |
80 | clear_bit(nr: qid, addr: adapter->af_xdp_zc_qps); |
81 | xsk_pool_dma_unmap(pool, IXGBE_RX_DMA_ATTR); |
82 | |
83 | if (if_running) |
84 | ixgbe_txrx_ring_enable(adapter, ring: qid); |
85 | |
86 | return 0; |
87 | } |
88 | |
89 | int ixgbe_xsk_pool_setup(struct ixgbe_adapter *adapter, |
90 | struct xsk_buff_pool *pool, |
91 | u16 qid) |
92 | { |
93 | return pool ? ixgbe_xsk_pool_enable(adapter, pool, qid) : |
94 | ixgbe_xsk_pool_disable(adapter, qid); |
95 | } |
96 | |
97 | static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter, |
98 | struct ixgbe_ring *rx_ring, |
99 | struct xdp_buff *xdp) |
100 | { |
101 | int err, result = IXGBE_XDP_PASS; |
102 | struct bpf_prog *xdp_prog; |
103 | struct ixgbe_ring *ring; |
104 | struct xdp_frame *xdpf; |
105 | u32 act; |
106 | |
107 | xdp_prog = READ_ONCE(rx_ring->xdp_prog); |
108 | act = bpf_prog_run_xdp(prog: xdp_prog, xdp); |
109 | |
110 | if (likely(act == XDP_REDIRECT)) { |
111 | err = xdp_do_redirect(dev: rx_ring->netdev, xdp, prog: xdp_prog); |
112 | if (!err) |
113 | return IXGBE_XDP_REDIR; |
114 | if (xsk_uses_need_wakeup(pool: rx_ring->xsk_pool) && err == -ENOBUFS) |
115 | result = IXGBE_XDP_EXIT; |
116 | else |
117 | result = IXGBE_XDP_CONSUMED; |
118 | goto out_failure; |
119 | } |
120 | |
121 | switch (act) { |
122 | case XDP_PASS: |
123 | break; |
124 | case XDP_TX: |
125 | xdpf = xdp_convert_buff_to_frame(xdp); |
126 | if (unlikely(!xdpf)) |
127 | goto out_failure; |
128 | ring = ixgbe_determine_xdp_ring(adapter); |
129 | if (static_branch_unlikely(&ixgbe_xdp_locking_key)) |
130 | spin_lock(lock: &ring->tx_lock); |
131 | result = ixgbe_xmit_xdp_ring(ring, xdpf); |
132 | if (static_branch_unlikely(&ixgbe_xdp_locking_key)) |
133 | spin_unlock(lock: &ring->tx_lock); |
134 | if (result == IXGBE_XDP_CONSUMED) |
135 | goto out_failure; |
136 | break; |
137 | case XDP_DROP: |
138 | result = IXGBE_XDP_CONSUMED; |
139 | break; |
140 | default: |
141 | bpf_warn_invalid_xdp_action(dev: rx_ring->netdev, prog: xdp_prog, act); |
142 | fallthrough; |
143 | case XDP_ABORTED: |
144 | result = IXGBE_XDP_CONSUMED; |
145 | out_failure: |
146 | trace_xdp_exception(dev: rx_ring->netdev, xdp: xdp_prog, act); |
147 | } |
148 | return result; |
149 | } |
150 | |
151 | bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count) |
152 | { |
153 | union ixgbe_adv_rx_desc *rx_desc; |
154 | struct ixgbe_rx_buffer *bi; |
155 | u16 i = rx_ring->next_to_use; |
156 | dma_addr_t dma; |
157 | bool ok = true; |
158 | |
159 | /* nothing to do */ |
160 | if (!count) |
161 | return true; |
162 | |
163 | rx_desc = IXGBE_RX_DESC(rx_ring, i); |
164 | bi = &rx_ring->rx_buffer_info[i]; |
165 | i -= rx_ring->count; |
166 | |
167 | do { |
168 | bi->xdp = xsk_buff_alloc(pool: rx_ring->xsk_pool); |
169 | if (!bi->xdp) { |
170 | ok = false; |
171 | break; |
172 | } |
173 | |
174 | dma = xsk_buff_xdp_get_dma(xdp: bi->xdp); |
175 | |
176 | /* Refresh the desc even if buffer_addrs didn't change |
177 | * because each write-back erases this info. |
178 | */ |
179 | rx_desc->read.pkt_addr = cpu_to_le64(dma); |
180 | |
181 | rx_desc++; |
182 | bi++; |
183 | i++; |
184 | if (unlikely(!i)) { |
185 | rx_desc = IXGBE_RX_DESC(rx_ring, 0); |
186 | bi = rx_ring->rx_buffer_info; |
187 | i -= rx_ring->count; |
188 | } |
189 | |
190 | /* clear the length for the next_to_use descriptor */ |
191 | rx_desc->wb.upper.length = 0; |
192 | |
193 | count--; |
194 | } while (count); |
195 | |
196 | i += rx_ring->count; |
197 | |
198 | if (rx_ring->next_to_use != i) { |
199 | rx_ring->next_to_use = i; |
200 | |
201 | /* Force memory writes to complete before letting h/w |
202 | * know there are new descriptors to fetch. (Only |
203 | * applicable for weak-ordered memory model archs, |
204 | * such as IA-64). |
205 | */ |
206 | wmb(); |
207 | writel(val: i, addr: rx_ring->tail); |
208 | } |
209 | |
210 | return ok; |
211 | } |
212 | |
213 | static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring, |
214 | const struct xdp_buff *xdp) |
215 | { |
216 | unsigned int totalsize = xdp->data_end - xdp->data_meta; |
217 | unsigned int metasize = xdp->data - xdp->data_meta; |
218 | struct sk_buff *skb; |
219 | |
220 | net_prefetch(p: xdp->data_meta); |
221 | |
222 | /* allocate a skb to store the frags */ |
223 | skb = __napi_alloc_skb(napi: &rx_ring->q_vector->napi, length: totalsize, |
224 | GFP_ATOMIC | __GFP_NOWARN); |
225 | if (unlikely(!skb)) |
226 | return NULL; |
227 | |
228 | memcpy(__skb_put(skb, totalsize), xdp->data_meta, |
229 | ALIGN(totalsize, sizeof(long))); |
230 | |
231 | if (metasize) { |
232 | skb_metadata_set(skb, meta_len: metasize); |
233 | __skb_pull(skb, len: metasize); |
234 | } |
235 | |
236 | return skb; |
237 | } |
238 | |
239 | static void ixgbe_inc_ntc(struct ixgbe_ring *rx_ring) |
240 | { |
241 | u32 ntc = rx_ring->next_to_clean + 1; |
242 | |
243 | ntc = (ntc < rx_ring->count) ? ntc : 0; |
244 | rx_ring->next_to_clean = ntc; |
245 | prefetch(IXGBE_RX_DESC(rx_ring, ntc)); |
246 | } |
247 | |
248 | int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, |
249 | struct ixgbe_ring *rx_ring, |
250 | const int budget) |
251 | { |
252 | unsigned int total_rx_bytes = 0, total_rx_packets = 0; |
253 | struct ixgbe_adapter *adapter = q_vector->adapter; |
254 | u16 cleaned_count = ixgbe_desc_unused(ring: rx_ring); |
255 | unsigned int xdp_res, xdp_xmit = 0; |
256 | bool failure = false; |
257 | struct sk_buff *skb; |
258 | |
259 | while (likely(total_rx_packets < budget)) { |
260 | union ixgbe_adv_rx_desc *rx_desc; |
261 | struct ixgbe_rx_buffer *bi; |
262 | unsigned int size; |
263 | |
264 | /* return some buffers to hardware, one at a time is too slow */ |
265 | if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) { |
266 | failure = failure || |
267 | !ixgbe_alloc_rx_buffers_zc(rx_ring, |
268 | count: cleaned_count); |
269 | cleaned_count = 0; |
270 | } |
271 | |
272 | rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean); |
273 | size = le16_to_cpu(rx_desc->wb.upper.length); |
274 | if (!size) |
275 | break; |
276 | |
277 | /* This memory barrier is needed to keep us from reading |
278 | * any other fields out of the rx_desc until we know the |
279 | * descriptor has been written back |
280 | */ |
281 | dma_rmb(); |
282 | |
283 | bi = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; |
284 | |
285 | if (unlikely(!ixgbe_test_staterr(rx_desc, |
286 | IXGBE_RXD_STAT_EOP))) { |
287 | struct ixgbe_rx_buffer *next_bi; |
288 | |
289 | xsk_buff_free(xdp: bi->xdp); |
290 | bi->xdp = NULL; |
291 | ixgbe_inc_ntc(rx_ring); |
292 | next_bi = |
293 | &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; |
294 | next_bi->discard = true; |
295 | continue; |
296 | } |
297 | |
298 | if (unlikely(bi->discard)) { |
299 | xsk_buff_free(xdp: bi->xdp); |
300 | bi->xdp = NULL; |
301 | bi->discard = false; |
302 | ixgbe_inc_ntc(rx_ring); |
303 | continue; |
304 | } |
305 | |
306 | bi->xdp->data_end = bi->xdp->data + size; |
307 | xsk_buff_dma_sync_for_cpu(xdp: bi->xdp, pool: rx_ring->xsk_pool); |
308 | xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, xdp: bi->xdp); |
309 | |
310 | if (likely(xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR))) { |
311 | xdp_xmit |= xdp_res; |
312 | } else if (xdp_res == IXGBE_XDP_EXIT) { |
313 | failure = true; |
314 | break; |
315 | } else if (xdp_res == IXGBE_XDP_CONSUMED) { |
316 | xsk_buff_free(xdp: bi->xdp); |
317 | } else if (xdp_res == IXGBE_XDP_PASS) { |
318 | goto construct_skb; |
319 | } |
320 | |
321 | bi->xdp = NULL; |
322 | total_rx_packets++; |
323 | total_rx_bytes += size; |
324 | |
325 | cleaned_count++; |
326 | ixgbe_inc_ntc(rx_ring); |
327 | continue; |
328 | |
329 | construct_skb: |
330 | /* XDP_PASS path */ |
331 | skb = ixgbe_construct_skb_zc(rx_ring, xdp: bi->xdp); |
332 | if (!skb) { |
333 | rx_ring->rx_stats.alloc_rx_buff_failed++; |
334 | break; |
335 | } |
336 | |
337 | xsk_buff_free(xdp: bi->xdp); |
338 | bi->xdp = NULL; |
339 | |
340 | cleaned_count++; |
341 | ixgbe_inc_ntc(rx_ring); |
342 | |
343 | if (eth_skb_pad(skb)) |
344 | continue; |
345 | |
346 | total_rx_bytes += skb->len; |
347 | total_rx_packets++; |
348 | |
349 | ixgbe_process_skb_fields(rx_ring, rx_desc, skb); |
350 | ixgbe_rx_skb(q_vector, skb); |
351 | } |
352 | |
353 | if (xdp_xmit & IXGBE_XDP_REDIR) |
354 | xdp_do_flush(); |
355 | |
356 | if (xdp_xmit & IXGBE_XDP_TX) { |
357 | struct ixgbe_ring *ring = ixgbe_determine_xdp_ring(adapter); |
358 | |
359 | ixgbe_xdp_ring_update_tail_locked(ring); |
360 | } |
361 | |
362 | ixgbe_update_rx_ring_stats(rx_ring, q_vector, pkts: total_rx_packets, |
363 | bytes: total_rx_bytes); |
364 | |
365 | if (xsk_uses_need_wakeup(pool: rx_ring->xsk_pool)) { |
366 | if (failure || rx_ring->next_to_clean == rx_ring->next_to_use) |
367 | xsk_set_rx_need_wakeup(pool: rx_ring->xsk_pool); |
368 | else |
369 | xsk_clear_rx_need_wakeup(pool: rx_ring->xsk_pool); |
370 | |
371 | return (int)total_rx_packets; |
372 | } |
373 | return failure ? budget : (int)total_rx_packets; |
374 | } |
375 | |
376 | void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring) |
377 | { |
378 | struct ixgbe_rx_buffer *bi; |
379 | u16 i; |
380 | |
381 | for (i = 0; i < rx_ring->count; i++) { |
382 | bi = &rx_ring->rx_buffer_info[i]; |
383 | |
384 | if (!bi->xdp) |
385 | continue; |
386 | |
387 | xsk_buff_free(xdp: bi->xdp); |
388 | bi->xdp = NULL; |
389 | } |
390 | } |
391 | |
392 | static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) |
393 | { |
394 | struct xsk_buff_pool *pool = xdp_ring->xsk_pool; |
395 | union ixgbe_adv_tx_desc *tx_desc = NULL; |
396 | struct ixgbe_tx_buffer *tx_bi; |
397 | bool work_done = true; |
398 | struct xdp_desc desc; |
399 | dma_addr_t dma; |
400 | u32 cmd_type; |
401 | |
402 | while (budget-- > 0) { |
403 | if (unlikely(!ixgbe_desc_unused(xdp_ring))) { |
404 | work_done = false; |
405 | break; |
406 | } |
407 | |
408 | if (!netif_carrier_ok(dev: xdp_ring->netdev)) |
409 | break; |
410 | |
411 | if (!xsk_tx_peek_desc(pool, desc: &desc)) |
412 | break; |
413 | |
414 | dma = xsk_buff_raw_get_dma(pool, addr: desc.addr); |
415 | xsk_buff_raw_dma_sync_for_device(pool, dma, size: desc.len); |
416 | |
417 | tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use]; |
418 | tx_bi->bytecount = desc.len; |
419 | tx_bi->xdpf = NULL; |
420 | tx_bi->gso_segs = 1; |
421 | |
422 | tx_desc = IXGBE_TX_DESC(xdp_ring, xdp_ring->next_to_use); |
423 | tx_desc->read.buffer_addr = cpu_to_le64(dma); |
424 | |
425 | /* put descriptor type bits */ |
426 | cmd_type = IXGBE_ADVTXD_DTYP_DATA | |
427 | IXGBE_ADVTXD_DCMD_DEXT | |
428 | IXGBE_ADVTXD_DCMD_IFCS; |
429 | cmd_type |= desc.len | IXGBE_TXD_CMD; |
430 | tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); |
431 | tx_desc->read.olinfo_status = |
432 | cpu_to_le32(desc.len << IXGBE_ADVTXD_PAYLEN_SHIFT); |
433 | |
434 | xdp_ring->next_to_use++; |
435 | if (xdp_ring->next_to_use == xdp_ring->count) |
436 | xdp_ring->next_to_use = 0; |
437 | } |
438 | |
439 | if (tx_desc) { |
440 | ixgbe_xdp_ring_update_tail(ring: xdp_ring); |
441 | xsk_tx_release(pool); |
442 | } |
443 | |
444 | return !!budget && work_done; |
445 | } |
446 | |
447 | static void ixgbe_clean_xdp_tx_buffer(struct ixgbe_ring *tx_ring, |
448 | struct ixgbe_tx_buffer *tx_bi) |
449 | { |
450 | xdp_return_frame(xdpf: tx_bi->xdpf); |
451 | dma_unmap_single(tx_ring->dev, |
452 | dma_unmap_addr(tx_bi, dma), |
453 | dma_unmap_len(tx_bi, len), DMA_TO_DEVICE); |
454 | dma_unmap_len_set(tx_bi, len, 0); |
455 | } |
456 | |
457 | bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, |
458 | struct ixgbe_ring *tx_ring, int napi_budget) |
459 | { |
460 | u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use; |
461 | unsigned int total_packets = 0, total_bytes = 0; |
462 | struct xsk_buff_pool *pool = tx_ring->xsk_pool; |
463 | union ixgbe_adv_tx_desc *tx_desc; |
464 | struct ixgbe_tx_buffer *tx_bi; |
465 | u32 xsk_frames = 0; |
466 | |
467 | tx_bi = &tx_ring->tx_buffer_info[ntc]; |
468 | tx_desc = IXGBE_TX_DESC(tx_ring, ntc); |
469 | |
470 | while (ntc != ntu) { |
471 | if (!(tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) |
472 | break; |
473 | |
474 | total_bytes += tx_bi->bytecount; |
475 | total_packets += tx_bi->gso_segs; |
476 | |
477 | if (tx_bi->xdpf) |
478 | ixgbe_clean_xdp_tx_buffer(tx_ring, tx_bi); |
479 | else |
480 | xsk_frames++; |
481 | |
482 | tx_bi->xdpf = NULL; |
483 | |
484 | tx_bi++; |
485 | tx_desc++; |
486 | ntc++; |
487 | if (unlikely(ntc == tx_ring->count)) { |
488 | ntc = 0; |
489 | tx_bi = tx_ring->tx_buffer_info; |
490 | tx_desc = IXGBE_TX_DESC(tx_ring, 0); |
491 | } |
492 | |
493 | /* issue prefetch for next Tx descriptor */ |
494 | prefetch(tx_desc); |
495 | } |
496 | |
497 | tx_ring->next_to_clean = ntc; |
498 | ixgbe_update_tx_ring_stats(tx_ring, q_vector, pkts: total_packets, |
499 | bytes: total_bytes); |
500 | |
501 | if (xsk_frames) |
502 | xsk_tx_completed(pool, nb_entries: xsk_frames); |
503 | |
504 | if (xsk_uses_need_wakeup(pool)) |
505 | xsk_set_tx_need_wakeup(pool); |
506 | |
507 | return ixgbe_xmit_zc(xdp_ring: tx_ring, budget: q_vector->tx.work_limit); |
508 | } |
509 | |
510 | int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) |
511 | { |
512 | struct ixgbe_adapter *adapter = netdev_priv(dev); |
513 | struct ixgbe_ring *ring; |
514 | |
515 | if (test_bit(__IXGBE_DOWN, &adapter->state)) |
516 | return -ENETDOWN; |
517 | |
518 | if (!READ_ONCE(adapter->xdp_prog)) |
519 | return -EINVAL; |
520 | |
521 | if (qid >= adapter->num_xdp_queues) |
522 | return -EINVAL; |
523 | |
524 | ring = adapter->xdp_ring[qid]; |
525 | |
526 | if (test_bit(__IXGBE_TX_DISABLED, &ring->state)) |
527 | return -ENETDOWN; |
528 | |
529 | if (!ring->xsk_pool) |
530 | return -EINVAL; |
531 | |
532 | if (!napi_if_scheduled_mark_missed(n: &ring->q_vector->napi)) { |
533 | u64 eics = BIT_ULL(ring->q_vector->v_idx); |
534 | |
535 | ixgbe_irq_rearm_queues(adapter, qmask: eics); |
536 | } |
537 | |
538 | return 0; |
539 | } |
540 | |
541 | void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring) |
542 | { |
543 | u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use; |
544 | struct xsk_buff_pool *pool = tx_ring->xsk_pool; |
545 | struct ixgbe_tx_buffer *tx_bi; |
546 | u32 xsk_frames = 0; |
547 | |
548 | while (ntc != ntu) { |
549 | tx_bi = &tx_ring->tx_buffer_info[ntc]; |
550 | |
551 | if (tx_bi->xdpf) |
552 | ixgbe_clean_xdp_tx_buffer(tx_ring, tx_bi); |
553 | else |
554 | xsk_frames++; |
555 | |
556 | tx_bi->xdpf = NULL; |
557 | |
558 | ntc++; |
559 | if (ntc == tx_ring->count) |
560 | ntc = 0; |
561 | } |
562 | |
563 | if (xsk_frames) |
564 | xsk_tx_completed(pool, nb_entries: xsk_frames); |
565 | } |
566 | |