1 | // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) |
2 | /* Copyright (C) 2018 Netronome Systems, Inc */ |
3 | /* Copyright (C) 2021 Corigine, Inc */ |
4 | |
5 | #include <linux/bpf_trace.h> |
6 | #include <linux/netdevice.h> |
7 | |
8 | #include "../nfp_app.h" |
9 | #include "../nfp_net.h" |
10 | #include "../nfp_net_dp.h" |
11 | #include "../nfp_net_xsk.h" |
12 | #include "nfd3.h" |
13 | |
14 | static bool |
15 | nfp_nfd3_xsk_tx_xdp(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, |
16 | struct nfp_net_rx_ring *rx_ring, |
17 | struct nfp_net_tx_ring *tx_ring, |
18 | struct nfp_net_xsk_rx_buf *xrxbuf, unsigned int pkt_len, |
19 | int pkt_off) |
20 | { |
21 | struct xsk_buff_pool *pool = r_vec->xsk_pool; |
22 | struct nfp_nfd3_tx_buf *txbuf; |
23 | struct nfp_nfd3_tx_desc *txd; |
24 | unsigned int wr_idx; |
25 | |
26 | if (nfp_net_tx_space(tx_ring) < 1) |
27 | return false; |
28 | |
29 | xsk_buff_raw_dma_sync_for_device(pool, dma: xrxbuf->dma_addr + pkt_off, |
30 | size: pkt_len); |
31 | |
32 | wr_idx = D_IDX(tx_ring, tx_ring->wr_p); |
33 | |
34 | txbuf = &tx_ring->txbufs[wr_idx]; |
35 | txbuf->xdp = xrxbuf->xdp; |
36 | txbuf->real_len = pkt_len; |
37 | txbuf->is_xsk_tx = true; |
38 | |
39 | /* Build TX descriptor */ |
40 | txd = &tx_ring->txds[wr_idx]; |
41 | txd->offset_eop = NFD3_DESC_TX_EOP; |
42 | txd->dma_len = cpu_to_le16(pkt_len); |
43 | nfp_desc_set_dma_addr_40b(txd, xrxbuf->dma_addr + pkt_off); |
44 | txd->data_len = cpu_to_le16(pkt_len); |
45 | |
46 | txd->flags = 0; |
47 | txd->mss = 0; |
48 | txd->lso_hdrlen = 0; |
49 | |
50 | tx_ring->wr_ptr_add++; |
51 | tx_ring->wr_p++; |
52 | |
53 | return true; |
54 | } |
55 | |
56 | static void nfp_nfd3_xsk_rx_skb(struct nfp_net_rx_ring *rx_ring, |
57 | const struct nfp_net_rx_desc *rxd, |
58 | struct nfp_net_xsk_rx_buf *xrxbuf, |
59 | const struct nfp_meta_parsed *meta, |
60 | unsigned int pkt_len, |
61 | bool meta_xdp, |
62 | unsigned int *skbs_polled) |
63 | { |
64 | struct nfp_net_r_vector *r_vec = rx_ring->r_vec; |
65 | struct nfp_net_dp *dp = &r_vec->nfp_net->dp; |
66 | struct net_device *netdev; |
67 | struct sk_buff *skb; |
68 | |
69 | if (likely(!meta->portid)) { |
70 | netdev = dp->netdev; |
71 | } else { |
72 | struct nfp_net *nn = netdev_priv(dev: dp->netdev); |
73 | |
74 | netdev = nfp_app_dev_get(app: nn->app, id: meta->portid, NULL); |
75 | if (unlikely(!netdev)) { |
76 | nfp_net_xsk_rx_drop(r_vec, xrxbuf); |
77 | return; |
78 | } |
79 | nfp_repr_inc_rx_stats(netdev, len: pkt_len); |
80 | } |
81 | |
82 | skb = napi_alloc_skb(napi: &r_vec->napi, length: pkt_len); |
83 | if (!skb) { |
84 | nfp_net_xsk_rx_drop(r_vec, xrxbuf); |
85 | return; |
86 | } |
87 | skb_put_data(skb, data: xrxbuf->xdp->data, len: pkt_len); |
88 | |
89 | skb->mark = meta->mark; |
90 | skb_set_hash(skb, hash: meta->hash, type: meta->hash_type); |
91 | |
92 | skb_record_rx_queue(skb, rx_queue: rx_ring->idx); |
93 | skb->protocol = eth_type_trans(skb, dev: netdev); |
94 | |
95 | nfp_nfd3_rx_csum(dp, r_vec, rxd, meta, skb); |
96 | |
97 | if (unlikely(!nfp_net_vlan_strip(skb, rxd, meta))) { |
98 | dev_kfree_skb_any(skb); |
99 | nfp_net_xsk_rx_drop(r_vec, xrxbuf); |
100 | return; |
101 | } |
102 | |
103 | if (meta_xdp) |
104 | skb_metadata_set(skb, |
105 | meta_len: xrxbuf->xdp->data - xrxbuf->xdp->data_meta); |
106 | |
107 | napi_gro_receive(napi: &rx_ring->r_vec->napi, skb); |
108 | |
109 | nfp_net_xsk_rx_free(rxbuf: xrxbuf); |
110 | |
111 | (*skbs_polled)++; |
112 | } |
113 | |
114 | static unsigned int |
115 | nfp_nfd3_xsk_rx(struct nfp_net_rx_ring *rx_ring, int budget, |
116 | unsigned int *skbs_polled) |
117 | { |
118 | struct nfp_net_r_vector *r_vec = rx_ring->r_vec; |
119 | struct nfp_net_dp *dp = &r_vec->nfp_net->dp; |
120 | struct nfp_net_tx_ring *tx_ring; |
121 | struct bpf_prog *xdp_prog; |
122 | bool xdp_redir = false; |
123 | int pkts_polled = 0; |
124 | |
125 | xdp_prog = READ_ONCE(dp->xdp_prog); |
126 | tx_ring = r_vec->xdp_ring; |
127 | |
128 | while (pkts_polled < budget) { |
129 | unsigned int meta_len, data_len, pkt_len, pkt_off; |
130 | struct nfp_net_xsk_rx_buf *xrxbuf; |
131 | struct nfp_net_rx_desc *rxd; |
132 | struct nfp_meta_parsed meta; |
133 | int idx, act; |
134 | |
135 | idx = D_IDX(rx_ring, rx_ring->rd_p); |
136 | |
137 | rxd = &rx_ring->rxds[idx]; |
138 | if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) |
139 | break; |
140 | |
141 | rx_ring->rd_p++; |
142 | pkts_polled++; |
143 | |
144 | xrxbuf = &rx_ring->xsk_rxbufs[idx]; |
145 | |
146 | /* If starved of buffers "drop" it and scream. */ |
147 | if (rx_ring->rd_p >= rx_ring->wr_p) { |
148 | nn_dp_warn(dp, "Starved of RX buffers\n" ); |
149 | nfp_net_xsk_rx_drop(r_vec, xrxbuf); |
150 | break; |
151 | } |
152 | |
153 | /* Memory barrier to ensure that we won't do other reads |
154 | * before the DD bit. |
155 | */ |
156 | dma_rmb(); |
157 | |
158 | memset(&meta, 0, sizeof(meta)); |
159 | |
160 | /* Only supporting AF_XDP with dynamic metadata so buffer layout |
161 | * is always: |
162 | * |
163 | * --------------------------------------------------------- |
164 | * | off | metadata | packet | XXXX | |
165 | * --------------------------------------------------------- |
166 | */ |
167 | meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; |
168 | data_len = le16_to_cpu(rxd->rxd.data_len); |
169 | pkt_len = data_len - meta_len; |
170 | |
171 | if (unlikely(meta_len > NFP_NET_MAX_PREPEND)) { |
172 | nn_dp_warn(dp, "Oversized RX packet metadata %u\n" , |
173 | meta_len); |
174 | nfp_net_xsk_rx_drop(r_vec, xrxbuf); |
175 | continue; |
176 | } |
177 | |
178 | /* Stats update. */ |
179 | u64_stats_update_begin(syncp: &r_vec->rx_sync); |
180 | r_vec->rx_pkts++; |
181 | r_vec->rx_bytes += pkt_len; |
182 | u64_stats_update_end(syncp: &r_vec->rx_sync); |
183 | |
184 | xrxbuf->xdp->data += meta_len; |
185 | xrxbuf->xdp->data_end = xrxbuf->xdp->data + pkt_len; |
186 | xdp_set_data_meta_invalid(xdp: xrxbuf->xdp); |
187 | xsk_buff_dma_sync_for_cpu(xdp: xrxbuf->xdp, pool: r_vec->xsk_pool); |
188 | net_prefetch(p: xrxbuf->xdp->data); |
189 | |
190 | if (meta_len) { |
191 | if (unlikely(nfp_nfd3_parse_meta(dp->netdev, &meta, |
192 | xrxbuf->xdp->data - |
193 | meta_len, |
194 | xrxbuf->xdp->data, |
195 | pkt_len, meta_len))) { |
196 | nn_dp_warn(dp, "Invalid RX packet metadata\n" ); |
197 | nfp_net_xsk_rx_drop(r_vec, xrxbuf); |
198 | continue; |
199 | } |
200 | |
201 | if (unlikely(meta.portid)) { |
202 | struct nfp_net *nn = netdev_priv(dev: dp->netdev); |
203 | |
204 | if (meta.portid != NFP_META_PORT_ID_CTRL) { |
205 | nfp_nfd3_xsk_rx_skb(rx_ring, rxd, |
206 | xrxbuf, meta: &meta, |
207 | pkt_len, meta_xdp: false, |
208 | skbs_polled); |
209 | continue; |
210 | } |
211 | |
212 | nfp_app_ctrl_rx_raw(app: nn->app, data: xrxbuf->xdp->data, |
213 | len: pkt_len); |
214 | nfp_net_xsk_rx_free(rxbuf: xrxbuf); |
215 | continue; |
216 | } |
217 | } |
218 | |
219 | act = bpf_prog_run_xdp(prog: xdp_prog, xdp: xrxbuf->xdp); |
220 | |
221 | pkt_len = xrxbuf->xdp->data_end - xrxbuf->xdp->data; |
222 | pkt_off = xrxbuf->xdp->data - xrxbuf->xdp->data_hard_start; |
223 | |
224 | switch (act) { |
225 | case XDP_PASS: |
226 | nfp_nfd3_xsk_rx_skb(rx_ring, rxd, xrxbuf, meta: &meta, pkt_len, |
227 | meta_xdp: true, skbs_polled); |
228 | break; |
229 | case XDP_TX: |
230 | if (!nfp_nfd3_xsk_tx_xdp(dp, r_vec, rx_ring, tx_ring, |
231 | xrxbuf, pkt_len, pkt_off)) |
232 | nfp_net_xsk_rx_drop(r_vec, xrxbuf); |
233 | else |
234 | nfp_net_xsk_rx_unstash(rxbuf: xrxbuf); |
235 | break; |
236 | case XDP_REDIRECT: |
237 | if (xdp_do_redirect(dev: dp->netdev, xdp: xrxbuf->xdp, prog: xdp_prog)) { |
238 | nfp_net_xsk_rx_drop(r_vec, xrxbuf); |
239 | } else { |
240 | nfp_net_xsk_rx_unstash(rxbuf: xrxbuf); |
241 | xdp_redir = true; |
242 | } |
243 | break; |
244 | default: |
245 | bpf_warn_invalid_xdp_action(dev: dp->netdev, prog: xdp_prog, act); |
246 | fallthrough; |
247 | case XDP_ABORTED: |
248 | trace_xdp_exception(dev: dp->netdev, xdp: xdp_prog, act); |
249 | fallthrough; |
250 | case XDP_DROP: |
251 | nfp_net_xsk_rx_drop(r_vec, xrxbuf); |
252 | break; |
253 | } |
254 | } |
255 | |
256 | nfp_net_xsk_rx_ring_fill_freelist(rx_ring: r_vec->rx_ring); |
257 | |
258 | if (xdp_redir) |
259 | xdp_do_flush(); |
260 | |
261 | if (tx_ring->wr_ptr_add) |
262 | nfp_net_tx_xmit_more_flush(tx_ring); |
263 | |
264 | return pkts_polled; |
265 | } |
266 | |
267 | void nfp_nfd3_xsk_tx_free(struct nfp_nfd3_tx_buf *txbuf) |
268 | { |
269 | xsk_buff_free(xdp: txbuf->xdp); |
270 | |
271 | txbuf->dma_addr = 0; |
272 | txbuf->xdp = NULL; |
273 | } |
274 | |
275 | static bool nfp_nfd3_xsk_complete(struct nfp_net_tx_ring *tx_ring) |
276 | { |
277 | struct nfp_net_r_vector *r_vec = tx_ring->r_vec; |
278 | u32 done_pkts = 0, done_bytes = 0, reused = 0; |
279 | bool done_all; |
280 | int idx, todo; |
281 | u32 qcp_rd_p; |
282 | |
283 | if (tx_ring->wr_p == tx_ring->rd_p) |
284 | return true; |
285 | |
286 | /* Work out how many descriptors have been transmitted. */ |
287 | qcp_rd_p = nfp_qcp_rd_ptr_read(q: tx_ring->qcp_q); |
288 | |
289 | if (qcp_rd_p == tx_ring->qcp_rd_p) |
290 | return true; |
291 | |
292 | todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); |
293 | |
294 | done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; |
295 | todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); |
296 | |
297 | tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo); |
298 | |
299 | done_pkts = todo; |
300 | while (todo--) { |
301 | struct nfp_nfd3_tx_buf *txbuf; |
302 | |
303 | idx = D_IDX(tx_ring, tx_ring->rd_p); |
304 | tx_ring->rd_p++; |
305 | |
306 | txbuf = &tx_ring->txbufs[idx]; |
307 | if (unlikely(!txbuf->real_len)) |
308 | continue; |
309 | |
310 | done_bytes += txbuf->real_len; |
311 | txbuf->real_len = 0; |
312 | |
313 | if (txbuf->is_xsk_tx) { |
314 | nfp_nfd3_xsk_tx_free(txbuf); |
315 | reused++; |
316 | } |
317 | } |
318 | |
319 | u64_stats_update_begin(syncp: &r_vec->tx_sync); |
320 | r_vec->tx_bytes += done_bytes; |
321 | r_vec->tx_pkts += done_pkts; |
322 | u64_stats_update_end(syncp: &r_vec->tx_sync); |
323 | |
324 | xsk_tx_completed(pool: r_vec->xsk_pool, nb_entries: done_pkts - reused); |
325 | |
326 | WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, |
327 | "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n" , |
328 | tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); |
329 | |
330 | return done_all; |
331 | } |
332 | |
333 | static void nfp_nfd3_xsk_tx(struct nfp_net_tx_ring *tx_ring) |
334 | { |
335 | struct nfp_net_r_vector *r_vec = tx_ring->r_vec; |
336 | struct xdp_desc desc[NFP_NET_XSK_TX_BATCH]; |
337 | struct xsk_buff_pool *xsk_pool; |
338 | struct nfp_nfd3_tx_desc *txd; |
339 | u32 pkts = 0, wr_idx; |
340 | u32 i, got; |
341 | |
342 | xsk_pool = r_vec->xsk_pool; |
343 | |
344 | while (nfp_net_tx_space(tx_ring) >= NFP_NET_XSK_TX_BATCH) { |
345 | for (i = 0; i < NFP_NET_XSK_TX_BATCH; i++) |
346 | if (!xsk_tx_peek_desc(pool: xsk_pool, desc: &desc[i])) |
347 | break; |
348 | got = i; |
349 | if (!got) |
350 | break; |
351 | |
352 | wr_idx = D_IDX(tx_ring, tx_ring->wr_p + i); |
353 | prefetchw(x: &tx_ring->txds[wr_idx]); |
354 | |
355 | for (i = 0; i < got; i++) |
356 | xsk_buff_raw_dma_sync_for_device(pool: xsk_pool, dma: desc[i].addr, |
357 | size: desc[i].len); |
358 | |
359 | for (i = 0; i < got; i++) { |
360 | wr_idx = D_IDX(tx_ring, tx_ring->wr_p + i); |
361 | |
362 | tx_ring->txbufs[wr_idx].real_len = desc[i].len; |
363 | tx_ring->txbufs[wr_idx].is_xsk_tx = false; |
364 | |
365 | /* Build TX descriptor. */ |
366 | txd = &tx_ring->txds[wr_idx]; |
367 | nfp_desc_set_dma_addr_40b(txd, |
368 | xsk_buff_raw_get_dma(xsk_pool, desc[i].addr)); |
369 | txd->offset_eop = NFD3_DESC_TX_EOP; |
370 | txd->dma_len = cpu_to_le16(desc[i].len); |
371 | txd->data_len = cpu_to_le16(desc[i].len); |
372 | } |
373 | |
374 | tx_ring->wr_p += got; |
375 | pkts += got; |
376 | } |
377 | |
378 | if (!pkts) |
379 | return; |
380 | |
381 | xsk_tx_release(pool: xsk_pool); |
382 | /* Ensure all records are visible before incrementing write counter. */ |
383 | wmb(); |
384 | nfp_qcp_wr_ptr_add(q: tx_ring->qcp_q, val: pkts); |
385 | } |
386 | |
387 | int nfp_nfd3_xsk_poll(struct napi_struct *napi, int budget) |
388 | { |
389 | struct nfp_net_r_vector *r_vec = |
390 | container_of(napi, struct nfp_net_r_vector, napi); |
391 | unsigned int pkts_polled, skbs = 0; |
392 | |
393 | pkts_polled = nfp_nfd3_xsk_rx(rx_ring: r_vec->rx_ring, budget, skbs_polled: &skbs); |
394 | |
395 | if (pkts_polled < budget) { |
396 | if (r_vec->tx_ring) |
397 | nfp_nfd3_tx_complete(tx_ring: r_vec->tx_ring, budget); |
398 | |
399 | if (!nfp_nfd3_xsk_complete(tx_ring: r_vec->xdp_ring)) |
400 | pkts_polled = budget; |
401 | |
402 | nfp_nfd3_xsk_tx(tx_ring: r_vec->xdp_ring); |
403 | |
404 | if (pkts_polled < budget && napi_complete_done(n: napi, work_done: skbs)) |
405 | nfp_net_irq_unmask(nn: r_vec->nfp_net, entry_nr: r_vec->irq_entry); |
406 | } |
407 | |
408 | return pkts_polled; |
409 | } |
410 | |