1 | // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) |
2 | /* Copyright (C) 2015-2019 Netronome Systems, Inc. */ |
3 | |
4 | #include <linux/bpf_trace.h> |
5 | #include <linux/netdevice.h> |
6 | #include <linux/bitfield.h> |
7 | #include <net/xfrm.h> |
8 | |
9 | #include "../nfp_app.h" |
10 | #include "../nfp_net.h" |
11 | #include "../nfp_net_dp.h" |
12 | #include "../nfp_net_xsk.h" |
13 | #include "../crypto/crypto.h" |
14 | #include "../crypto/fw.h" |
15 | #include "nfd3.h" |
16 | |
17 | /* Transmit processing |
18 | * |
19 | * One queue controller peripheral queue is used for transmit. The |
20 | * driver en-queues packets for transmit by advancing the write |
21 | * pointer. The device indicates that packets have transmitted by |
22 | * advancing the read pointer. The driver maintains a local copy of |
23 | * the read and write pointer in @struct nfp_net_tx_ring. The driver |
24 | * keeps @wr_p in sync with the queue controller write pointer and can |
25 | * determine how many packets have been transmitted by comparing its |
26 | * copy of the read pointer @rd_p with the read pointer maintained by |
27 | * the queue controller peripheral. |
28 | */ |
29 | |
30 | /* Wrappers for deciding when to stop and restart TX queues */ |
31 | static int nfp_nfd3_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring) |
32 | { |
33 | return !nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS * 4); |
34 | } |
35 | |
36 | static int nfp_nfd3_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring) |
37 | { |
38 | return nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS + 1); |
39 | } |
40 | |
41 | /** |
42 | * nfp_nfd3_tx_ring_stop() - stop tx ring |
43 | * @nd_q: netdev queue |
44 | * @tx_ring: driver tx queue structure |
45 | * |
46 | * Safely stop TX ring. Remember that while we are running .start_xmit() |
47 | * someone else may be cleaning the TX ring completions so we need to be |
48 | * extra careful here. |
49 | */ |
50 | static void |
51 | nfp_nfd3_tx_ring_stop(struct netdev_queue *nd_q, |
52 | struct nfp_net_tx_ring *tx_ring) |
53 | { |
54 | netif_tx_stop_queue(dev_queue: nd_q); |
55 | |
56 | /* We can race with the TX completion out of NAPI so recheck */ |
57 | smp_mb(); |
58 | if (unlikely(nfp_nfd3_tx_ring_should_wake(tx_ring))) |
59 | netif_tx_start_queue(dev_queue: nd_q); |
60 | } |
61 | |
62 | /** |
63 | * nfp_nfd3_tx_tso() - Set up Tx descriptor for LSO |
64 | * @r_vec: per-ring structure |
65 | * @txbuf: Pointer to driver soft TX descriptor |
66 | * @txd: Pointer to HW TX descriptor |
67 | * @skb: Pointer to SKB |
68 | * @md_bytes: Prepend length |
69 | * |
70 | * Set up Tx descriptor for LSO, do nothing for non-LSO skbs. |
71 | * Return error on packet header greater than maximum supported LSO header size. |
72 | */ |
73 | static void |
74 | nfp_nfd3_tx_tso(struct nfp_net_r_vector *r_vec, struct nfp_nfd3_tx_buf *txbuf, |
75 | struct nfp_nfd3_tx_desc *txd, struct sk_buff *skb, u32 md_bytes) |
76 | { |
77 | u32 l3_offset, l4_offset, hdrlen, l4_hdrlen; |
78 | u16 mss; |
79 | |
80 | if (!skb_is_gso(skb)) |
81 | return; |
82 | |
83 | if (!skb->encapsulation) { |
84 | l3_offset = skb_network_offset(skb); |
85 | l4_offset = skb_transport_offset(skb); |
86 | l4_hdrlen = (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ? |
87 | sizeof(struct udphdr) : tcp_hdrlen(skb); |
88 | } else { |
89 | l3_offset = skb_inner_network_offset(skb); |
90 | l4_offset = skb_inner_transport_offset(skb); |
91 | l4_hdrlen = (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ? |
92 | sizeof(struct udphdr) : inner_tcp_hdrlen(skb); |
93 | } |
94 | |
95 | hdrlen = l4_offset + l4_hdrlen; |
96 | txbuf->pkt_cnt = skb_shinfo(skb)->gso_segs; |
97 | txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1); |
98 | |
99 | mss = skb_shinfo(skb)->gso_size & NFD3_DESC_TX_MSS_MASK; |
100 | txd->l3_offset = l3_offset - md_bytes; |
101 | txd->l4_offset = l4_offset - md_bytes; |
102 | txd->lso_hdrlen = hdrlen - md_bytes; |
103 | txd->mss = cpu_to_le16(mss); |
104 | txd->flags |= NFD3_DESC_TX_LSO; |
105 | |
106 | u64_stats_update_begin(syncp: &r_vec->tx_sync); |
107 | r_vec->tx_lso++; |
108 | u64_stats_update_end(syncp: &r_vec->tx_sync); |
109 | } |
110 | |
111 | /** |
112 | * nfp_nfd3_tx_csum() - Set TX CSUM offload flags in TX descriptor |
113 | * @dp: NFP Net data path struct |
114 | * @r_vec: per-ring structure |
115 | * @txbuf: Pointer to driver soft TX descriptor |
116 | * @txd: Pointer to TX descriptor |
117 | * @skb: Pointer to SKB |
118 | * |
119 | * This function sets the TX checksum flags in the TX descriptor based |
120 | * on the configuration and the protocol of the packet to be transmitted. |
121 | */ |
122 | static void |
123 | nfp_nfd3_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, |
124 | struct nfp_nfd3_tx_buf *txbuf, struct nfp_nfd3_tx_desc *txd, |
125 | struct sk_buff *skb) |
126 | { |
127 | struct ipv6hdr *ipv6h; |
128 | struct iphdr *iph; |
129 | u8 l4_hdr; |
130 | |
131 | if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM)) |
132 | return; |
133 | |
134 | if (skb->ip_summed != CHECKSUM_PARTIAL) |
135 | return; |
136 | |
137 | txd->flags |= NFD3_DESC_TX_CSUM; |
138 | if (skb->encapsulation) |
139 | txd->flags |= NFD3_DESC_TX_ENCAP; |
140 | |
141 | iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); |
142 | ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); |
143 | |
144 | if (iph->version == 4) { |
145 | txd->flags |= NFD3_DESC_TX_IP4_CSUM; |
146 | l4_hdr = iph->protocol; |
147 | } else if (ipv6h->version == 6) { |
148 | l4_hdr = ipv6h->nexthdr; |
149 | } else { |
150 | nn_dp_warn(dp, "partial checksum but ipv=%x!\n" , iph->version); |
151 | return; |
152 | } |
153 | |
154 | switch (l4_hdr) { |
155 | case IPPROTO_TCP: |
156 | txd->flags |= NFD3_DESC_TX_TCP_CSUM; |
157 | break; |
158 | case IPPROTO_UDP: |
159 | txd->flags |= NFD3_DESC_TX_UDP_CSUM; |
160 | break; |
161 | default: |
162 | nn_dp_warn(dp, "partial checksum but l4 proto=%x!\n" , l4_hdr); |
163 | return; |
164 | } |
165 | |
166 | u64_stats_update_begin(syncp: &r_vec->tx_sync); |
167 | if (skb->encapsulation) |
168 | r_vec->hw_csum_tx_inner += txbuf->pkt_cnt; |
169 | else |
170 | r_vec->hw_csum_tx += txbuf->pkt_cnt; |
171 | u64_stats_update_end(syncp: &r_vec->tx_sync); |
172 | } |
173 | |
174 | static int nfp_nfd3_prep_tx_meta(struct nfp_net_dp *dp, struct sk_buff *skb, |
175 | u64 tls_handle, bool *ipsec) |
176 | { |
177 | struct metadata_dst *md_dst = skb_metadata_dst(skb); |
178 | struct nfp_ipsec_offload offload_info; |
179 | unsigned char *data; |
180 | bool vlan_insert; |
181 | u32 meta_id = 0; |
182 | int md_bytes; |
183 | |
184 | #ifdef CONFIG_NFP_NET_IPSEC |
185 | if (xfrm_offload(skb)) |
186 | *ipsec = nfp_net_ipsec_tx_prep(dp, skb, offload_info: &offload_info); |
187 | #endif |
188 | |
189 | if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) |
190 | md_dst = NULL; |
191 | |
192 | vlan_insert = skb_vlan_tag_present(skb) && (dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN_V2); |
193 | |
194 | if (!(md_dst || tls_handle || vlan_insert || *ipsec)) |
195 | return 0; |
196 | |
197 | md_bytes = sizeof(meta_id) + |
198 | (!!md_dst ? NFP_NET_META_PORTID_SIZE : 0) + |
199 | (!!tls_handle ? NFP_NET_META_CONN_HANDLE_SIZE : 0) + |
200 | (vlan_insert ? NFP_NET_META_VLAN_SIZE : 0) + |
201 | (*ipsec ? NFP_NET_META_IPSEC_FIELD_SIZE : 0); |
202 | |
203 | if (unlikely(skb_cow_head(skb, md_bytes))) |
204 | return -ENOMEM; |
205 | |
206 | data = skb_push(skb, len: md_bytes) + md_bytes; |
207 | if (md_dst) { |
208 | data -= NFP_NET_META_PORTID_SIZE; |
209 | put_unaligned_be32(val: md_dst->u.port_info.port_id, p: data); |
210 | meta_id = NFP_NET_META_PORTID; |
211 | } |
212 | if (tls_handle) { |
213 | /* conn handle is opaque, we just use u64 to be able to quickly |
214 | * compare it to zero |
215 | */ |
216 | data -= NFP_NET_META_CONN_HANDLE_SIZE; |
217 | memcpy(data, &tls_handle, sizeof(tls_handle)); |
218 | meta_id <<= NFP_NET_META_FIELD_SIZE; |
219 | meta_id |= NFP_NET_META_CONN_HANDLE; |
220 | } |
221 | if (vlan_insert) { |
222 | data -= NFP_NET_META_VLAN_SIZE; |
223 | /* data type of skb->vlan_proto is __be16 |
224 | * so it fills metadata without calling put_unaligned_be16 |
225 | */ |
226 | memcpy(data, &skb->vlan_proto, sizeof(skb->vlan_proto)); |
227 | put_unaligned_be16(skb_vlan_tag_get(skb), p: data + sizeof(skb->vlan_proto)); |
228 | meta_id <<= NFP_NET_META_FIELD_SIZE; |
229 | meta_id |= NFP_NET_META_VLAN; |
230 | } |
231 | if (*ipsec) { |
232 | data -= NFP_NET_META_IPSEC_SIZE; |
233 | put_unaligned_be32(val: offload_info.seq_hi, p: data); |
234 | data -= NFP_NET_META_IPSEC_SIZE; |
235 | put_unaligned_be32(val: offload_info.seq_low, p: data); |
236 | data -= NFP_NET_META_IPSEC_SIZE; |
237 | put_unaligned_be32(val: offload_info.handle - 1, p: data); |
238 | meta_id <<= NFP_NET_META_IPSEC_FIELD_SIZE; |
239 | meta_id |= NFP_NET_META_IPSEC << 8 | NFP_NET_META_IPSEC << 4 | NFP_NET_META_IPSEC; |
240 | } |
241 | |
242 | data -= sizeof(meta_id); |
243 | put_unaligned_be32(val: meta_id, p: data); |
244 | |
245 | return md_bytes; |
246 | } |
247 | |
248 | /** |
249 | * nfp_nfd3_tx() - Main transmit entry point |
250 | * @skb: SKB to transmit |
251 | * @netdev: netdev structure |
252 | * |
253 | * Return: NETDEV_TX_OK on success. |
254 | */ |
255 | netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev) |
256 | { |
257 | struct nfp_net *nn = netdev_priv(dev: netdev); |
258 | int f, nr_frags, wr_idx, md_bytes; |
259 | struct nfp_net_tx_ring *tx_ring; |
260 | struct nfp_net_r_vector *r_vec; |
261 | struct nfp_nfd3_tx_buf *txbuf; |
262 | struct nfp_nfd3_tx_desc *txd; |
263 | struct netdev_queue *nd_q; |
264 | const skb_frag_t *frag; |
265 | struct nfp_net_dp *dp; |
266 | dma_addr_t dma_addr; |
267 | unsigned int fsize; |
268 | u64 tls_handle = 0; |
269 | bool ipsec = false; |
270 | u16 qidx; |
271 | |
272 | dp = &nn->dp; |
273 | qidx = skb_get_queue_mapping(skb); |
274 | tx_ring = &dp->tx_rings[qidx]; |
275 | r_vec = tx_ring->r_vec; |
276 | |
277 | nr_frags = skb_shinfo(skb)->nr_frags; |
278 | |
279 | if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) { |
280 | nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n" , |
281 | qidx, tx_ring->wr_p, tx_ring->rd_p); |
282 | nd_q = netdev_get_tx_queue(dev: dp->netdev, index: qidx); |
283 | netif_tx_stop_queue(dev_queue: nd_q); |
284 | nfp_net_tx_xmit_more_flush(tx_ring); |
285 | u64_stats_update_begin(syncp: &r_vec->tx_sync); |
286 | r_vec->tx_busy++; |
287 | u64_stats_update_end(syncp: &r_vec->tx_sync); |
288 | return NETDEV_TX_BUSY; |
289 | } |
290 | |
291 | skb = nfp_net_tls_tx(dp, r_vec, skb, tls_handle: &tls_handle, nr_frags: &nr_frags); |
292 | if (unlikely(!skb)) { |
293 | nfp_net_tx_xmit_more_flush(tx_ring); |
294 | return NETDEV_TX_OK; |
295 | } |
296 | |
297 | md_bytes = nfp_nfd3_prep_tx_meta(dp, skb, tls_handle, ipsec: &ipsec); |
298 | if (unlikely(md_bytes < 0)) |
299 | goto err_flush; |
300 | |
301 | /* Start with the head skbuf */ |
302 | dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb), |
303 | DMA_TO_DEVICE); |
304 | if (dma_mapping_error(dev: dp->dev, dma_addr)) |
305 | goto err_dma_err; |
306 | |
307 | wr_idx = D_IDX(tx_ring, tx_ring->wr_p); |
308 | |
309 | /* Stash the soft descriptor of the head then initialize it */ |
310 | txbuf = &tx_ring->txbufs[wr_idx]; |
311 | txbuf->skb = skb; |
312 | txbuf->dma_addr = dma_addr; |
313 | txbuf->fidx = -1; |
314 | txbuf->pkt_cnt = 1; |
315 | txbuf->real_len = skb->len; |
316 | |
317 | /* Build TX descriptor */ |
318 | txd = &tx_ring->txds[wr_idx]; |
319 | txd->offset_eop = (nr_frags ? 0 : NFD3_DESC_TX_EOP) | md_bytes; |
320 | txd->dma_len = cpu_to_le16(skb_headlen(skb)); |
321 | nfp_desc_set_dma_addr_40b(txd, dma_addr); |
322 | txd->data_len = cpu_to_le16(skb->len); |
323 | |
324 | txd->flags = 0; |
325 | txd->mss = 0; |
326 | txd->lso_hdrlen = 0; |
327 | |
328 | /* Do not reorder - tso may adjust pkt cnt, vlan may override fields */ |
329 | nfp_nfd3_tx_tso(r_vec, txbuf, txd, skb, md_bytes); |
330 | if (ipsec) |
331 | nfp_nfd3_ipsec_tx(txd, skb); |
332 | else |
333 | nfp_nfd3_tx_csum(dp, r_vec, txbuf, txd, skb); |
334 | if (skb_vlan_tag_present(skb) && dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN) { |
335 | txd->flags |= NFD3_DESC_TX_VLAN; |
336 | txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb)); |
337 | } |
338 | |
339 | /* Gather DMA */ |
340 | if (nr_frags > 0) { |
341 | __le64 second_half; |
342 | |
343 | /* all descs must match except for in addr, length and eop */ |
344 | second_half = txd->vals8[1]; |
345 | |
346 | for (f = 0; f < nr_frags; f++) { |
347 | frag = &skb_shinfo(skb)->frags[f]; |
348 | fsize = skb_frag_size(frag); |
349 | |
350 | dma_addr = skb_frag_dma_map(dev: dp->dev, frag, offset: 0, |
351 | size: fsize, dir: DMA_TO_DEVICE); |
352 | if (dma_mapping_error(dev: dp->dev, dma_addr)) |
353 | goto err_unmap; |
354 | |
355 | wr_idx = D_IDX(tx_ring, wr_idx + 1); |
356 | tx_ring->txbufs[wr_idx].skb = skb; |
357 | tx_ring->txbufs[wr_idx].dma_addr = dma_addr; |
358 | tx_ring->txbufs[wr_idx].fidx = f; |
359 | |
360 | txd = &tx_ring->txds[wr_idx]; |
361 | txd->dma_len = cpu_to_le16(fsize); |
362 | nfp_desc_set_dma_addr_40b(txd, dma_addr); |
363 | txd->offset_eop = md_bytes | |
364 | ((f == nr_frags - 1) ? NFD3_DESC_TX_EOP : 0); |
365 | txd->vals8[1] = second_half; |
366 | } |
367 | |
368 | u64_stats_update_begin(syncp: &r_vec->tx_sync); |
369 | r_vec->tx_gather++; |
370 | u64_stats_update_end(syncp: &r_vec->tx_sync); |
371 | } |
372 | |
373 | skb_tx_timestamp(skb); |
374 | |
375 | nd_q = netdev_get_tx_queue(dev: dp->netdev, index: tx_ring->idx); |
376 | |
377 | tx_ring->wr_p += nr_frags + 1; |
378 | if (nfp_nfd3_tx_ring_should_stop(tx_ring)) |
379 | nfp_nfd3_tx_ring_stop(nd_q, tx_ring); |
380 | |
381 | tx_ring->wr_ptr_add += nr_frags + 1; |
382 | if (__netdev_tx_sent_queue(dev_queue: nd_q, bytes: txbuf->real_len, xmit_more: netdev_xmit_more())) |
383 | nfp_net_tx_xmit_more_flush(tx_ring); |
384 | |
385 | return NETDEV_TX_OK; |
386 | |
387 | err_unmap: |
388 | while (--f >= 0) { |
389 | frag = &skb_shinfo(skb)->frags[f]; |
390 | dma_unmap_page(dp->dev, tx_ring->txbufs[wr_idx].dma_addr, |
391 | skb_frag_size(frag), DMA_TO_DEVICE); |
392 | tx_ring->txbufs[wr_idx].skb = NULL; |
393 | tx_ring->txbufs[wr_idx].dma_addr = 0; |
394 | tx_ring->txbufs[wr_idx].fidx = -2; |
395 | wr_idx = wr_idx - 1; |
396 | if (wr_idx < 0) |
397 | wr_idx += tx_ring->cnt; |
398 | } |
399 | dma_unmap_single(dp->dev, tx_ring->txbufs[wr_idx].dma_addr, |
400 | skb_headlen(skb), DMA_TO_DEVICE); |
401 | tx_ring->txbufs[wr_idx].skb = NULL; |
402 | tx_ring->txbufs[wr_idx].dma_addr = 0; |
403 | tx_ring->txbufs[wr_idx].fidx = -2; |
404 | err_dma_err: |
405 | nn_dp_warn(dp, "Failed to map DMA TX buffer\n" ); |
406 | err_flush: |
407 | nfp_net_tx_xmit_more_flush(tx_ring); |
408 | u64_stats_update_begin(syncp: &r_vec->tx_sync); |
409 | r_vec->tx_errors++; |
410 | u64_stats_update_end(syncp: &r_vec->tx_sync); |
411 | nfp_net_tls_tx_undo(skb, tls_handle); |
412 | dev_kfree_skb_any(skb); |
413 | return NETDEV_TX_OK; |
414 | } |
415 | |
416 | /** |
417 | * nfp_nfd3_tx_complete() - Handled completed TX packets |
418 | * @tx_ring: TX ring structure |
419 | * @budget: NAPI budget (only used as bool to determine if in NAPI context) |
420 | */ |
421 | void nfp_nfd3_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) |
422 | { |
423 | struct nfp_net_r_vector *r_vec = tx_ring->r_vec; |
424 | struct nfp_net_dp *dp = &r_vec->nfp_net->dp; |
425 | u32 done_pkts = 0, done_bytes = 0; |
426 | struct netdev_queue *nd_q; |
427 | u32 qcp_rd_p; |
428 | int todo; |
429 | |
430 | if (tx_ring->wr_p == tx_ring->rd_p) |
431 | return; |
432 | |
433 | /* Work out how many descriptors have been transmitted */ |
434 | qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); |
435 | |
436 | if (qcp_rd_p == tx_ring->qcp_rd_p) |
437 | return; |
438 | |
439 | todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); |
440 | |
441 | while (todo--) { |
442 | const skb_frag_t *frag; |
443 | struct nfp_nfd3_tx_buf *tx_buf; |
444 | struct sk_buff *skb; |
445 | int fidx, nr_frags; |
446 | int idx; |
447 | |
448 | idx = D_IDX(tx_ring, tx_ring->rd_p++); |
449 | tx_buf = &tx_ring->txbufs[idx]; |
450 | |
451 | skb = tx_buf->skb; |
452 | if (!skb) |
453 | continue; |
454 | |
455 | nr_frags = skb_shinfo(skb)->nr_frags; |
456 | fidx = tx_buf->fidx; |
457 | |
458 | if (fidx == -1) { |
459 | /* unmap head */ |
460 | dma_unmap_single(dp->dev, tx_buf->dma_addr, |
461 | skb_headlen(skb), DMA_TO_DEVICE); |
462 | |
463 | done_pkts += tx_buf->pkt_cnt; |
464 | done_bytes += tx_buf->real_len; |
465 | } else { |
466 | /* unmap fragment */ |
467 | frag = &skb_shinfo(skb)->frags[fidx]; |
468 | dma_unmap_page(dp->dev, tx_buf->dma_addr, |
469 | skb_frag_size(frag), DMA_TO_DEVICE); |
470 | } |
471 | |
472 | /* check for last gather fragment */ |
473 | if (fidx == nr_frags - 1) |
474 | napi_consume_skb(skb, budget); |
475 | |
476 | tx_buf->dma_addr = 0; |
477 | tx_buf->skb = NULL; |
478 | tx_buf->fidx = -2; |
479 | } |
480 | |
481 | tx_ring->qcp_rd_p = qcp_rd_p; |
482 | |
483 | u64_stats_update_begin(syncp: &r_vec->tx_sync); |
484 | r_vec->tx_bytes += done_bytes; |
485 | r_vec->tx_pkts += done_pkts; |
486 | u64_stats_update_end(syncp: &r_vec->tx_sync); |
487 | |
488 | if (!dp->netdev) |
489 | return; |
490 | |
491 | nd_q = netdev_get_tx_queue(dev: dp->netdev, index: tx_ring->idx); |
492 | netdev_tx_completed_queue(dev_queue: nd_q, pkts: done_pkts, bytes: done_bytes); |
493 | if (nfp_nfd3_tx_ring_should_wake(tx_ring)) { |
494 | /* Make sure TX thread will see updated tx_ring->rd_p */ |
495 | smp_mb(); |
496 | |
497 | if (unlikely(netif_tx_queue_stopped(nd_q))) |
498 | netif_tx_wake_queue(dev_queue: nd_q); |
499 | } |
500 | |
501 | WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, |
502 | "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n" , |
503 | tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); |
504 | } |
505 | |
506 | static bool nfp_nfd3_xdp_complete(struct nfp_net_tx_ring *tx_ring) |
507 | { |
508 | struct nfp_net_r_vector *r_vec = tx_ring->r_vec; |
509 | struct nfp_net_dp *dp = &r_vec->nfp_net->dp; |
510 | u32 done_pkts = 0, done_bytes = 0; |
511 | bool done_all; |
512 | int idx, todo; |
513 | u32 qcp_rd_p; |
514 | |
515 | /* Work out how many descriptors have been transmitted */ |
516 | qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); |
517 | |
518 | if (qcp_rd_p == tx_ring->qcp_rd_p) |
519 | return true; |
520 | |
521 | todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); |
522 | |
523 | done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; |
524 | todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); |
525 | |
526 | tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo); |
527 | |
528 | done_pkts = todo; |
529 | while (todo--) { |
530 | idx = D_IDX(tx_ring, tx_ring->rd_p); |
531 | tx_ring->rd_p++; |
532 | |
533 | done_bytes += tx_ring->txbufs[idx].real_len; |
534 | } |
535 | |
536 | u64_stats_update_begin(syncp: &r_vec->tx_sync); |
537 | r_vec->tx_bytes += done_bytes; |
538 | r_vec->tx_pkts += done_pkts; |
539 | u64_stats_update_end(syncp: &r_vec->tx_sync); |
540 | |
541 | WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, |
542 | "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n" , |
543 | tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); |
544 | |
545 | return done_all; |
546 | } |
547 | |
548 | /* Receive processing |
549 | */ |
550 | |
551 | static void * |
552 | nfp_nfd3_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) |
553 | { |
554 | void *frag; |
555 | |
556 | if (!dp->xdp_prog) { |
557 | frag = napi_alloc_frag(fragsz: dp->fl_bufsz); |
558 | if (unlikely(!frag)) |
559 | return NULL; |
560 | } else { |
561 | struct page *page; |
562 | |
563 | page = dev_alloc_page(); |
564 | if (unlikely(!page)) |
565 | return NULL; |
566 | frag = page_address(page); |
567 | } |
568 | |
569 | *dma_addr = nfp_net_dma_map_rx(dp, frag); |
570 | if (dma_mapping_error(dev: dp->dev, dma_addr: *dma_addr)) { |
571 | nfp_net_free_frag(frag, xdp: dp->xdp_prog); |
572 | nn_dp_warn(dp, "Failed to map DMA RX buffer\n" ); |
573 | return NULL; |
574 | } |
575 | |
576 | return frag; |
577 | } |
578 | |
579 | /** |
580 | * nfp_nfd3_rx_give_one() - Put mapped skb on the software and hardware rings |
581 | * @dp: NFP Net data path struct |
582 | * @rx_ring: RX ring structure |
583 | * @frag: page fragment buffer |
584 | * @dma_addr: DMA address of skb mapping |
585 | */ |
586 | static void |
587 | nfp_nfd3_rx_give_one(const struct nfp_net_dp *dp, |
588 | struct nfp_net_rx_ring *rx_ring, |
589 | void *frag, dma_addr_t dma_addr) |
590 | { |
591 | unsigned int wr_idx; |
592 | |
593 | wr_idx = D_IDX(rx_ring, rx_ring->wr_p); |
594 | |
595 | nfp_net_dma_sync_dev_rx(dp, dma_addr); |
596 | |
597 | /* Stash SKB and DMA address away */ |
598 | rx_ring->rxbufs[wr_idx].frag = frag; |
599 | rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; |
600 | |
601 | /* Fill freelist descriptor */ |
602 | rx_ring->rxds[wr_idx].fld.reserved = 0; |
603 | rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; |
604 | /* DMA address is expanded to 48-bit width in freelist for NFP3800, |
605 | * so the *_48b macro is used accordingly, it's also OK to fill |
606 | * a 40-bit address since the top 8 bits are get set to 0. |
607 | */ |
608 | nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld, |
609 | dma_addr + dp->rx_dma_off); |
610 | |
611 | rx_ring->wr_p++; |
612 | if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) { |
613 | /* Update write pointer of the freelist queue. Make |
614 | * sure all writes are flushed before telling the hardware. |
615 | */ |
616 | wmb(); |
617 | nfp_qcp_wr_ptr_add(q: rx_ring->qcp_fl, NFP_NET_FL_BATCH); |
618 | } |
619 | } |
620 | |
621 | /** |
622 | * nfp_nfd3_rx_ring_fill_freelist() - Give buffers from the ring to FW |
623 | * @dp: NFP Net data path struct |
624 | * @rx_ring: RX ring to fill |
625 | */ |
626 | void nfp_nfd3_rx_ring_fill_freelist(struct nfp_net_dp *dp, |
627 | struct nfp_net_rx_ring *rx_ring) |
628 | { |
629 | unsigned int i; |
630 | |
631 | if (nfp_net_has_xsk_pool_slow(dp, qid: rx_ring->idx)) |
632 | return nfp_net_xsk_rx_ring_fill_freelist(rx_ring); |
633 | |
634 | for (i = 0; i < rx_ring->cnt - 1; i++) |
635 | nfp_nfd3_rx_give_one(dp, rx_ring, frag: rx_ring->rxbufs[i].frag, |
636 | dma_addr: rx_ring->rxbufs[i].dma_addr); |
637 | } |
638 | |
639 | /** |
640 | * nfp_nfd3_rx_csum_has_errors() - group check if rxd has any csum errors |
641 | * @flags: RX descriptor flags field in CPU byte order |
642 | */ |
643 | static int nfp_nfd3_rx_csum_has_errors(u16 flags) |
644 | { |
645 | u16 csum_all_checked, csum_all_ok; |
646 | |
647 | csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL; |
648 | csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK; |
649 | |
650 | return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT); |
651 | } |
652 | |
653 | /** |
654 | * nfp_nfd3_rx_csum() - set SKB checksum field based on RX descriptor flags |
655 | * @dp: NFP Net data path struct |
656 | * @r_vec: per-ring structure |
657 | * @rxd: Pointer to RX descriptor |
658 | * @meta: Parsed metadata prepend |
659 | * @skb: Pointer to SKB |
660 | */ |
661 | void |
662 | nfp_nfd3_rx_csum(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, |
663 | const struct nfp_net_rx_desc *rxd, |
664 | const struct nfp_meta_parsed *meta, struct sk_buff *skb) |
665 | { |
666 | skb_checksum_none_assert(skb); |
667 | |
668 | if (!(dp->netdev->features & NETIF_F_RXCSUM)) |
669 | return; |
670 | |
671 | if (meta->csum_type) { |
672 | skb->ip_summed = meta->csum_type; |
673 | skb->csum = meta->csum; |
674 | u64_stats_update_begin(syncp: &r_vec->rx_sync); |
675 | r_vec->hw_csum_rx_complete++; |
676 | u64_stats_update_end(syncp: &r_vec->rx_sync); |
677 | return; |
678 | } |
679 | |
680 | if (nfp_nfd3_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) { |
681 | u64_stats_update_begin(syncp: &r_vec->rx_sync); |
682 | r_vec->hw_csum_rx_error++; |
683 | u64_stats_update_end(syncp: &r_vec->rx_sync); |
684 | return; |
685 | } |
686 | |
687 | /* Assume that the firmware will never report inner CSUM_OK unless outer |
688 | * L4 headers were successfully parsed. FW will always report zero UDP |
689 | * checksum as CSUM_OK. |
690 | */ |
691 | if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK || |
692 | rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) { |
693 | __skb_incr_checksum_unnecessary(skb); |
694 | u64_stats_update_begin(syncp: &r_vec->rx_sync); |
695 | r_vec->hw_csum_rx_ok++; |
696 | u64_stats_update_end(syncp: &r_vec->rx_sync); |
697 | } |
698 | |
699 | if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK || |
700 | rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) { |
701 | __skb_incr_checksum_unnecessary(skb); |
702 | u64_stats_update_begin(syncp: &r_vec->rx_sync); |
703 | r_vec->hw_csum_rx_inner_ok++; |
704 | u64_stats_update_end(syncp: &r_vec->rx_sync); |
705 | } |
706 | } |
707 | |
708 | static void |
709 | nfp_nfd3_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta, |
710 | unsigned int type, __be32 *hash) |
711 | { |
712 | if (!(netdev->features & NETIF_F_RXHASH)) |
713 | return; |
714 | |
715 | switch (type) { |
716 | case NFP_NET_RSS_IPV4: |
717 | case NFP_NET_RSS_IPV6: |
718 | case NFP_NET_RSS_IPV6_EX: |
719 | meta->hash_type = PKT_HASH_TYPE_L3; |
720 | break; |
721 | default: |
722 | meta->hash_type = PKT_HASH_TYPE_L4; |
723 | break; |
724 | } |
725 | |
726 | meta->hash = get_unaligned_be32(p: hash); |
727 | } |
728 | |
729 | static void |
730 | nfp_nfd3_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta, |
731 | void *data, struct nfp_net_rx_desc *rxd) |
732 | { |
733 | struct nfp_net_rx_hash *rx_hash = data; |
734 | |
735 | if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS)) |
736 | return; |
737 | |
738 | nfp_nfd3_set_hash(netdev, meta, type: get_unaligned_be32(p: &rx_hash->hash_type), |
739 | hash: &rx_hash->hash); |
740 | } |
741 | |
742 | bool |
743 | nfp_nfd3_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, |
744 | void *data, void *pkt, unsigned int pkt_len, int meta_len) |
745 | { |
746 | u32 meta_info, vlan_info; |
747 | |
748 | meta_info = get_unaligned_be32(p: data); |
749 | data += 4; |
750 | |
751 | while (meta_info) { |
752 | switch (meta_info & NFP_NET_META_FIELD_MASK) { |
753 | case NFP_NET_META_HASH: |
754 | meta_info >>= NFP_NET_META_FIELD_SIZE; |
755 | nfp_nfd3_set_hash(netdev, meta, |
756 | type: meta_info & NFP_NET_META_FIELD_MASK, |
757 | hash: (__be32 *)data); |
758 | data += 4; |
759 | break; |
760 | case NFP_NET_META_MARK: |
761 | meta->mark = get_unaligned_be32(p: data); |
762 | data += 4; |
763 | break; |
764 | case NFP_NET_META_VLAN: |
765 | vlan_info = get_unaligned_be32(p: data); |
766 | if (FIELD_GET(NFP_NET_META_VLAN_STRIP, vlan_info)) { |
767 | meta->vlan.stripped = true; |
768 | meta->vlan.tpid = FIELD_GET(NFP_NET_META_VLAN_TPID_MASK, |
769 | vlan_info); |
770 | meta->vlan.tci = FIELD_GET(NFP_NET_META_VLAN_TCI_MASK, |
771 | vlan_info); |
772 | } |
773 | data += 4; |
774 | break; |
775 | case NFP_NET_META_PORTID: |
776 | meta->portid = get_unaligned_be32(p: data); |
777 | data += 4; |
778 | break; |
779 | case NFP_NET_META_CSUM: |
780 | meta->csum_type = CHECKSUM_COMPLETE; |
781 | meta->csum = |
782 | (__force __wsum)__get_unaligned_cpu32(p: data); |
783 | data += 4; |
784 | break; |
785 | case NFP_NET_META_RESYNC_INFO: |
786 | if (nfp_net_tls_rx_resync_req(netdev, req: data, pkt, |
787 | pkt_len)) |
788 | return false; |
789 | data += sizeof(struct nfp_net_tls_resync_req); |
790 | break; |
791 | #ifdef CONFIG_NFP_NET_IPSEC |
792 | case NFP_NET_META_IPSEC: |
793 | /* Note: IPsec packet will have zero saidx, so need add 1 |
794 | * to indicate packet is IPsec packet within driver. |
795 | */ |
796 | meta->ipsec_saidx = get_unaligned_be32(p: data) + 1; |
797 | data += 4; |
798 | break; |
799 | #endif |
800 | default: |
801 | return true; |
802 | } |
803 | |
804 | meta_info >>= NFP_NET_META_FIELD_SIZE; |
805 | } |
806 | |
807 | return data != pkt; |
808 | } |
809 | |
810 | static void |
811 | nfp_nfd3_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, |
812 | struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf, |
813 | struct sk_buff *skb) |
814 | { |
815 | u64_stats_update_begin(syncp: &r_vec->rx_sync); |
816 | r_vec->rx_drops++; |
817 | /* If we have both skb and rxbuf the replacement buffer allocation |
818 | * must have failed, count this as an alloc failure. |
819 | */ |
820 | if (skb && rxbuf) |
821 | r_vec->rx_replace_buf_alloc_fail++; |
822 | u64_stats_update_end(syncp: &r_vec->rx_sync); |
823 | |
824 | /* skb is build based on the frag, free_skb() would free the frag |
825 | * so to be able to reuse it we need an extra ref. |
826 | */ |
827 | if (skb && rxbuf && skb->head == rxbuf->frag) |
828 | page_ref_inc(page: virt_to_head_page(x: rxbuf->frag)); |
829 | if (rxbuf) |
830 | nfp_nfd3_rx_give_one(dp, rx_ring, frag: rxbuf->frag, dma_addr: rxbuf->dma_addr); |
831 | if (skb) |
832 | dev_kfree_skb_any(skb); |
833 | } |
834 | |
835 | static bool |
836 | nfp_nfd3_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, |
837 | struct nfp_net_tx_ring *tx_ring, |
838 | struct nfp_net_rx_buf *rxbuf, unsigned int dma_off, |
839 | unsigned int pkt_len, bool *completed) |
840 | { |
841 | unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA; |
842 | struct nfp_nfd3_tx_buf *txbuf; |
843 | struct nfp_nfd3_tx_desc *txd; |
844 | int wr_idx; |
845 | |
846 | /* Reject if xdp_adjust_tail grow packet beyond DMA area */ |
847 | if (pkt_len + dma_off > dma_map_sz) |
848 | return false; |
849 | |
850 | if (unlikely(nfp_net_tx_full(tx_ring, 1))) { |
851 | if (!*completed) { |
852 | nfp_nfd3_xdp_complete(tx_ring); |
853 | *completed = true; |
854 | } |
855 | |
856 | if (unlikely(nfp_net_tx_full(tx_ring, 1))) { |
857 | nfp_nfd3_rx_drop(dp, r_vec: rx_ring->r_vec, rx_ring, rxbuf, |
858 | NULL); |
859 | return false; |
860 | } |
861 | } |
862 | |
863 | wr_idx = D_IDX(tx_ring, tx_ring->wr_p); |
864 | |
865 | /* Stash the soft descriptor of the head then initialize it */ |
866 | txbuf = &tx_ring->txbufs[wr_idx]; |
867 | |
868 | nfp_nfd3_rx_give_one(dp, rx_ring, frag: txbuf->frag, dma_addr: txbuf->dma_addr); |
869 | |
870 | txbuf->frag = rxbuf->frag; |
871 | txbuf->dma_addr = rxbuf->dma_addr; |
872 | txbuf->fidx = -1; |
873 | txbuf->pkt_cnt = 1; |
874 | txbuf->real_len = pkt_len; |
875 | |
876 | dma_sync_single_for_device(dev: dp->dev, addr: rxbuf->dma_addr + dma_off, |
877 | size: pkt_len, dir: DMA_BIDIRECTIONAL); |
878 | |
879 | /* Build TX descriptor */ |
880 | txd = &tx_ring->txds[wr_idx]; |
881 | txd->offset_eop = NFD3_DESC_TX_EOP; |
882 | txd->dma_len = cpu_to_le16(pkt_len); |
883 | nfp_desc_set_dma_addr_40b(txd, rxbuf->dma_addr + dma_off); |
884 | txd->data_len = cpu_to_le16(pkt_len); |
885 | |
886 | txd->flags = 0; |
887 | txd->mss = 0; |
888 | txd->lso_hdrlen = 0; |
889 | |
890 | tx_ring->wr_p++; |
891 | tx_ring->wr_ptr_add++; |
892 | return true; |
893 | } |
894 | |
895 | /** |
896 | * nfp_nfd3_rx() - receive up to @budget packets on @rx_ring |
897 | * @rx_ring: RX ring to receive from |
898 | * @budget: NAPI budget |
899 | * |
900 | * Note, this function is separated out from the napi poll function to |
901 | * more cleanly separate packet receive code from other bookkeeping |
902 | * functions performed in the napi poll function. |
903 | * |
904 | * Return: Number of packets received. |
905 | */ |
906 | static int nfp_nfd3_rx(struct nfp_net_rx_ring *rx_ring, int budget) |
907 | { |
908 | struct nfp_net_r_vector *r_vec = rx_ring->r_vec; |
909 | struct nfp_net_dp *dp = &r_vec->nfp_net->dp; |
910 | struct nfp_net_tx_ring *tx_ring; |
911 | struct bpf_prog *xdp_prog; |
912 | int idx, pkts_polled = 0; |
913 | bool xdp_tx_cmpl = false; |
914 | unsigned int true_bufsz; |
915 | struct sk_buff *skb; |
916 | struct xdp_buff xdp; |
917 | |
918 | xdp_prog = READ_ONCE(dp->xdp_prog); |
919 | true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz; |
920 | xdp_init_buff(xdp: &xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM, |
921 | rxq: &rx_ring->xdp_rxq); |
922 | tx_ring = r_vec->xdp_ring; |
923 | |
924 | while (pkts_polled < budget) { |
925 | unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; |
926 | struct nfp_net_rx_buf *rxbuf; |
927 | struct nfp_net_rx_desc *rxd; |
928 | struct nfp_meta_parsed meta; |
929 | bool redir_egress = false; |
930 | struct net_device *netdev; |
931 | dma_addr_t new_dma_addr; |
932 | u32 meta_len_xdp = 0; |
933 | void *new_frag; |
934 | |
935 | idx = D_IDX(rx_ring, rx_ring->rd_p); |
936 | |
937 | rxd = &rx_ring->rxds[idx]; |
938 | if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) |
939 | break; |
940 | |
941 | /* Memory barrier to ensure that we won't do other reads |
942 | * before the DD bit. |
943 | */ |
944 | dma_rmb(); |
945 | |
946 | memset(&meta, 0, sizeof(meta)); |
947 | |
948 | rx_ring->rd_p++; |
949 | pkts_polled++; |
950 | |
951 | rxbuf = &rx_ring->rxbufs[idx]; |
952 | /* < meta_len > |
953 | * <-- [rx_offset] --> |
954 | * --------------------------------------------------------- |
955 | * | [XX] | metadata | packet | XXXX | |
956 | * --------------------------------------------------------- |
957 | * <---------------- data_len ---------------> |
958 | * |
959 | * The rx_offset is fixed for all packets, the meta_len can vary |
960 | * on a packet by packet basis. If rx_offset is set to zero |
961 | * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the |
962 | * buffer and is immediately followed by the packet (no [XX]). |
963 | */ |
964 | meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; |
965 | data_len = le16_to_cpu(rxd->rxd.data_len); |
966 | pkt_len = data_len - meta_len; |
967 | |
968 | pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; |
969 | if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) |
970 | pkt_off += meta_len; |
971 | else |
972 | pkt_off += dp->rx_offset; |
973 | meta_off = pkt_off - meta_len; |
974 | |
975 | /* Stats update */ |
976 | u64_stats_update_begin(syncp: &r_vec->rx_sync); |
977 | r_vec->rx_pkts++; |
978 | r_vec->rx_bytes += pkt_len; |
979 | u64_stats_update_end(syncp: &r_vec->rx_sync); |
980 | |
981 | if (unlikely(meta_len > NFP_NET_MAX_PREPEND || |
982 | (dp->rx_offset && meta_len > dp->rx_offset))) { |
983 | nn_dp_warn(dp, "oversized RX packet metadata %u\n" , |
984 | meta_len); |
985 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); |
986 | continue; |
987 | } |
988 | |
989 | nfp_net_dma_sync_cpu_rx(dp, dma_addr: rxbuf->dma_addr + meta_off, |
990 | len: data_len); |
991 | |
992 | if (!dp->chained_metadata_format) { |
993 | nfp_nfd3_set_hash_desc(netdev: dp->netdev, meta: &meta, |
994 | data: rxbuf->frag + meta_off, rxd); |
995 | } else if (meta_len) { |
996 | if (unlikely(nfp_nfd3_parse_meta(dp->netdev, &meta, |
997 | rxbuf->frag + meta_off, |
998 | rxbuf->frag + pkt_off, |
999 | pkt_len, meta_len))) { |
1000 | nn_dp_warn(dp, "invalid RX packet metadata\n" ); |
1001 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, |
1002 | NULL); |
1003 | continue; |
1004 | } |
1005 | } |
1006 | |
1007 | if (xdp_prog && !meta.portid) { |
1008 | void *orig_data = rxbuf->frag + pkt_off; |
1009 | unsigned int dma_off; |
1010 | int act; |
1011 | |
1012 | xdp_prepare_buff(xdp: &xdp, |
1013 | hard_start: rxbuf->frag + NFP_NET_RX_BUF_HEADROOM, |
1014 | headroom: pkt_off - NFP_NET_RX_BUF_HEADROOM, |
1015 | data_len: pkt_len, meta_valid: true); |
1016 | |
1017 | act = bpf_prog_run_xdp(prog: xdp_prog, xdp: &xdp); |
1018 | |
1019 | pkt_len = xdp.data_end - xdp.data; |
1020 | pkt_off += xdp.data - orig_data; |
1021 | |
1022 | switch (act) { |
1023 | case XDP_PASS: |
1024 | meta_len_xdp = xdp.data - xdp.data_meta; |
1025 | break; |
1026 | case XDP_TX: |
1027 | dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM; |
1028 | if (unlikely(!nfp_nfd3_tx_xdp_buf(dp, rx_ring, |
1029 | tx_ring, |
1030 | rxbuf, |
1031 | dma_off, |
1032 | pkt_len, |
1033 | &xdp_tx_cmpl))) |
1034 | trace_xdp_exception(dev: dp->netdev, |
1035 | xdp: xdp_prog, act); |
1036 | continue; |
1037 | default: |
1038 | bpf_warn_invalid_xdp_action(dev: dp->netdev, prog: xdp_prog, act); |
1039 | fallthrough; |
1040 | case XDP_ABORTED: |
1041 | trace_xdp_exception(dev: dp->netdev, xdp: xdp_prog, act); |
1042 | fallthrough; |
1043 | case XDP_DROP: |
1044 | nfp_nfd3_rx_give_one(dp, rx_ring, frag: rxbuf->frag, |
1045 | dma_addr: rxbuf->dma_addr); |
1046 | continue; |
1047 | } |
1048 | } |
1049 | |
1050 | if (likely(!meta.portid)) { |
1051 | netdev = dp->netdev; |
1052 | } else if (meta.portid == NFP_META_PORT_ID_CTRL) { |
1053 | struct nfp_net *nn = netdev_priv(dev: dp->netdev); |
1054 | |
1055 | nfp_app_ctrl_rx_raw(app: nn->app, data: rxbuf->frag + pkt_off, |
1056 | len: pkt_len); |
1057 | nfp_nfd3_rx_give_one(dp, rx_ring, frag: rxbuf->frag, |
1058 | dma_addr: rxbuf->dma_addr); |
1059 | continue; |
1060 | } else { |
1061 | struct nfp_net *nn; |
1062 | |
1063 | nn = netdev_priv(dev: dp->netdev); |
1064 | netdev = nfp_app_dev_get(app: nn->app, id: meta.portid, |
1065 | redir_egress: &redir_egress); |
1066 | if (unlikely(!netdev)) { |
1067 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, |
1068 | NULL); |
1069 | continue; |
1070 | } |
1071 | |
1072 | if (nfp_netdev_is_nfp_repr(netdev)) |
1073 | nfp_repr_inc_rx_stats(netdev, len: pkt_len); |
1074 | } |
1075 | |
1076 | skb = napi_build_skb(data: rxbuf->frag, frag_size: true_bufsz); |
1077 | if (unlikely(!skb)) { |
1078 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); |
1079 | continue; |
1080 | } |
1081 | new_frag = nfp_nfd3_napi_alloc_one(dp, dma_addr: &new_dma_addr); |
1082 | if (unlikely(!new_frag)) { |
1083 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); |
1084 | continue; |
1085 | } |
1086 | |
1087 | nfp_net_dma_unmap_rx(dp, dma_addr: rxbuf->dma_addr); |
1088 | |
1089 | nfp_nfd3_rx_give_one(dp, rx_ring, frag: new_frag, dma_addr: new_dma_addr); |
1090 | |
1091 | skb_reserve(skb, len: pkt_off); |
1092 | skb_put(skb, len: pkt_len); |
1093 | |
1094 | skb->mark = meta.mark; |
1095 | skb_set_hash(skb, hash: meta.hash, type: meta.hash_type); |
1096 | |
1097 | skb_record_rx_queue(skb, rx_queue: rx_ring->idx); |
1098 | skb->protocol = eth_type_trans(skb, dev: netdev); |
1099 | |
1100 | nfp_nfd3_rx_csum(dp, r_vec, rxd, meta: &meta, skb); |
1101 | |
1102 | #ifdef CONFIG_TLS_DEVICE |
1103 | if (rxd->rxd.flags & PCIE_DESC_RX_DECRYPTED) { |
1104 | skb->decrypted = true; |
1105 | u64_stats_update_begin(syncp: &r_vec->rx_sync); |
1106 | r_vec->hw_tls_rx++; |
1107 | u64_stats_update_end(syncp: &r_vec->rx_sync); |
1108 | } |
1109 | #endif |
1110 | |
1111 | if (unlikely(!nfp_net_vlan_strip(skb, rxd, &meta))) { |
1112 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, NULL, skb); |
1113 | continue; |
1114 | } |
1115 | |
1116 | #ifdef CONFIG_NFP_NET_IPSEC |
1117 | if (meta.ipsec_saidx != 0 && unlikely(nfp_net_ipsec_rx(&meta, skb))) { |
1118 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, NULL, skb); |
1119 | continue; |
1120 | } |
1121 | #endif |
1122 | |
1123 | if (meta_len_xdp) |
1124 | skb_metadata_set(skb, meta_len: meta_len_xdp); |
1125 | |
1126 | if (likely(!redir_egress)) { |
1127 | napi_gro_receive(napi: &rx_ring->r_vec->napi, skb); |
1128 | } else { |
1129 | skb->dev = netdev; |
1130 | skb_reset_network_header(skb); |
1131 | __skb_push(skb, ETH_HLEN); |
1132 | dev_queue_xmit(skb); |
1133 | } |
1134 | } |
1135 | |
1136 | if (xdp_prog) { |
1137 | if (tx_ring->wr_ptr_add) |
1138 | nfp_net_tx_xmit_more_flush(tx_ring); |
1139 | else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) && |
1140 | !xdp_tx_cmpl) |
1141 | if (!nfp_nfd3_xdp_complete(tx_ring)) |
1142 | pkts_polled = budget; |
1143 | } |
1144 | |
1145 | return pkts_polled; |
1146 | } |
1147 | |
1148 | /** |
1149 | * nfp_nfd3_poll() - napi poll function |
1150 | * @napi: NAPI structure |
1151 | * @budget: NAPI budget |
1152 | * |
1153 | * Return: number of packets polled. |
1154 | */ |
1155 | int nfp_nfd3_poll(struct napi_struct *napi, int budget) |
1156 | { |
1157 | struct nfp_net_r_vector *r_vec = |
1158 | container_of(napi, struct nfp_net_r_vector, napi); |
1159 | unsigned int pkts_polled = 0; |
1160 | |
1161 | if (r_vec->tx_ring) |
1162 | nfp_nfd3_tx_complete(tx_ring: r_vec->tx_ring, budget); |
1163 | if (r_vec->rx_ring) |
1164 | pkts_polled = nfp_nfd3_rx(rx_ring: r_vec->rx_ring, budget); |
1165 | |
1166 | if (pkts_polled < budget) |
1167 | if (napi_complete_done(n: napi, work_done: pkts_polled)) |
1168 | nfp_net_irq_unmask(nn: r_vec->nfp_net, entry_nr: r_vec->irq_entry); |
1169 | |
1170 | if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) { |
1171 | struct dim_sample dim_sample = {}; |
1172 | unsigned int start; |
1173 | u64 pkts, bytes; |
1174 | |
1175 | do { |
1176 | start = u64_stats_fetch_begin(syncp: &r_vec->rx_sync); |
1177 | pkts = r_vec->rx_pkts; |
1178 | bytes = r_vec->rx_bytes; |
1179 | } while (u64_stats_fetch_retry(syncp: &r_vec->rx_sync, start)); |
1180 | |
1181 | dim_update_sample(event_ctr: r_vec->event_ctr, packets: pkts, bytes, s: &dim_sample); |
1182 | net_dim(dim: &r_vec->rx_dim, end_sample: dim_sample); |
1183 | } |
1184 | |
1185 | if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { |
1186 | struct dim_sample dim_sample = {}; |
1187 | unsigned int start; |
1188 | u64 pkts, bytes; |
1189 | |
1190 | do { |
1191 | start = u64_stats_fetch_begin(syncp: &r_vec->tx_sync); |
1192 | pkts = r_vec->tx_pkts; |
1193 | bytes = r_vec->tx_bytes; |
1194 | } while (u64_stats_fetch_retry(syncp: &r_vec->tx_sync, start)); |
1195 | |
1196 | dim_update_sample(event_ctr: r_vec->event_ctr, packets: pkts, bytes, s: &dim_sample); |
1197 | net_dim(dim: &r_vec->tx_dim, end_sample: dim_sample); |
1198 | } |
1199 | |
1200 | return pkts_polled; |
1201 | } |
1202 | |
1203 | /* Control device data path |
1204 | */ |
1205 | |
1206 | bool |
1207 | nfp_nfd3_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, |
1208 | struct sk_buff *skb, bool old) |
1209 | { |
1210 | unsigned int real_len = skb->len, meta_len = 0; |
1211 | struct nfp_net_tx_ring *tx_ring; |
1212 | struct nfp_nfd3_tx_buf *txbuf; |
1213 | struct nfp_nfd3_tx_desc *txd; |
1214 | struct nfp_net_dp *dp; |
1215 | dma_addr_t dma_addr; |
1216 | int wr_idx; |
1217 | |
1218 | dp = &r_vec->nfp_net->dp; |
1219 | tx_ring = r_vec->tx_ring; |
1220 | |
1221 | if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) { |
1222 | nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n" ); |
1223 | goto err_free; |
1224 | } |
1225 | |
1226 | if (unlikely(nfp_net_tx_full(tx_ring, 1))) { |
1227 | u64_stats_update_begin(syncp: &r_vec->tx_sync); |
1228 | r_vec->tx_busy++; |
1229 | u64_stats_update_end(syncp: &r_vec->tx_sync); |
1230 | if (!old) |
1231 | __skb_queue_tail(list: &r_vec->queue, newsk: skb); |
1232 | else |
1233 | __skb_queue_head(list: &r_vec->queue, newsk: skb); |
1234 | return true; |
1235 | } |
1236 | |
1237 | if (nfp_app_ctrl_has_meta(app: nn->app)) { |
1238 | if (unlikely(skb_headroom(skb) < 8)) { |
1239 | nn_dp_warn(dp, "CTRL TX on skb without headroom\n" ); |
1240 | goto err_free; |
1241 | } |
1242 | meta_len = 8; |
1243 | put_unaligned_be32(NFP_META_PORT_ID_CTRL, p: skb_push(skb, len: 4)); |
1244 | put_unaligned_be32(NFP_NET_META_PORTID, p: skb_push(skb, len: 4)); |
1245 | } |
1246 | |
1247 | /* Start with the head skbuf */ |
1248 | dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb), |
1249 | DMA_TO_DEVICE); |
1250 | if (dma_mapping_error(dev: dp->dev, dma_addr)) |
1251 | goto err_dma_warn; |
1252 | |
1253 | wr_idx = D_IDX(tx_ring, tx_ring->wr_p); |
1254 | |
1255 | /* Stash the soft descriptor of the head then initialize it */ |
1256 | txbuf = &tx_ring->txbufs[wr_idx]; |
1257 | txbuf->skb = skb; |
1258 | txbuf->dma_addr = dma_addr; |
1259 | txbuf->fidx = -1; |
1260 | txbuf->pkt_cnt = 1; |
1261 | txbuf->real_len = real_len; |
1262 | |
1263 | /* Build TX descriptor */ |
1264 | txd = &tx_ring->txds[wr_idx]; |
1265 | txd->offset_eop = meta_len | NFD3_DESC_TX_EOP; |
1266 | txd->dma_len = cpu_to_le16(skb_headlen(skb)); |
1267 | nfp_desc_set_dma_addr_40b(txd, dma_addr); |
1268 | txd->data_len = cpu_to_le16(skb->len); |
1269 | |
1270 | txd->flags = 0; |
1271 | txd->mss = 0; |
1272 | txd->lso_hdrlen = 0; |
1273 | |
1274 | tx_ring->wr_p++; |
1275 | tx_ring->wr_ptr_add++; |
1276 | nfp_net_tx_xmit_more_flush(tx_ring); |
1277 | |
1278 | return false; |
1279 | |
1280 | err_dma_warn: |
1281 | nn_dp_warn(dp, "Failed to DMA map TX CTRL buffer\n" ); |
1282 | err_free: |
1283 | u64_stats_update_begin(syncp: &r_vec->tx_sync); |
1284 | r_vec->tx_errors++; |
1285 | u64_stats_update_end(syncp: &r_vec->tx_sync); |
1286 | dev_kfree_skb_any(skb); |
1287 | return false; |
1288 | } |
1289 | |
1290 | static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec) |
1291 | { |
1292 | struct sk_buff *skb; |
1293 | |
1294 | while ((skb = __skb_dequeue(list: &r_vec->queue))) |
1295 | if (nfp_nfd3_ctrl_tx_one(nn: r_vec->nfp_net, r_vec, skb, old: true)) |
1296 | return; |
1297 | } |
1298 | |
1299 | static bool |
1300 | nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len) |
1301 | { |
1302 | u32 meta_type, meta_tag; |
1303 | |
1304 | if (!nfp_app_ctrl_has_meta(app: nn->app)) |
1305 | return !meta_len; |
1306 | |
1307 | if (meta_len != 8) |
1308 | return false; |
1309 | |
1310 | meta_type = get_unaligned_be32(p: data); |
1311 | meta_tag = get_unaligned_be32(p: data + 4); |
1312 | |
1313 | return (meta_type == NFP_NET_META_PORTID && |
1314 | meta_tag == NFP_META_PORT_ID_CTRL); |
1315 | } |
1316 | |
1317 | static bool |
1318 | nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp, |
1319 | struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring) |
1320 | { |
1321 | unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; |
1322 | struct nfp_net_rx_buf *rxbuf; |
1323 | struct nfp_net_rx_desc *rxd; |
1324 | dma_addr_t new_dma_addr; |
1325 | struct sk_buff *skb; |
1326 | void *new_frag; |
1327 | int idx; |
1328 | |
1329 | idx = D_IDX(rx_ring, rx_ring->rd_p); |
1330 | |
1331 | rxd = &rx_ring->rxds[idx]; |
1332 | if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) |
1333 | return false; |
1334 | |
1335 | /* Memory barrier to ensure that we won't do other reads |
1336 | * before the DD bit. |
1337 | */ |
1338 | dma_rmb(); |
1339 | |
1340 | rx_ring->rd_p++; |
1341 | |
1342 | rxbuf = &rx_ring->rxbufs[idx]; |
1343 | meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; |
1344 | data_len = le16_to_cpu(rxd->rxd.data_len); |
1345 | pkt_len = data_len - meta_len; |
1346 | |
1347 | pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; |
1348 | if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) |
1349 | pkt_off += meta_len; |
1350 | else |
1351 | pkt_off += dp->rx_offset; |
1352 | meta_off = pkt_off - meta_len; |
1353 | |
1354 | /* Stats update */ |
1355 | u64_stats_update_begin(syncp: &r_vec->rx_sync); |
1356 | r_vec->rx_pkts++; |
1357 | r_vec->rx_bytes += pkt_len; |
1358 | u64_stats_update_end(syncp: &r_vec->rx_sync); |
1359 | |
1360 | nfp_net_dma_sync_cpu_rx(dp, dma_addr: rxbuf->dma_addr + meta_off, len: data_len); |
1361 | |
1362 | if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) { |
1363 | nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n" , |
1364 | meta_len); |
1365 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); |
1366 | return true; |
1367 | } |
1368 | |
1369 | skb = build_skb(data: rxbuf->frag, frag_size: dp->fl_bufsz); |
1370 | if (unlikely(!skb)) { |
1371 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); |
1372 | return true; |
1373 | } |
1374 | new_frag = nfp_nfd3_napi_alloc_one(dp, dma_addr: &new_dma_addr); |
1375 | if (unlikely(!new_frag)) { |
1376 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); |
1377 | return true; |
1378 | } |
1379 | |
1380 | nfp_net_dma_unmap_rx(dp, dma_addr: rxbuf->dma_addr); |
1381 | |
1382 | nfp_nfd3_rx_give_one(dp, rx_ring, frag: new_frag, dma_addr: new_dma_addr); |
1383 | |
1384 | skb_reserve(skb, len: pkt_off); |
1385 | skb_put(skb, len: pkt_len); |
1386 | |
1387 | nfp_app_ctrl_rx(app: nn->app, skb); |
1388 | |
1389 | return true; |
1390 | } |
1391 | |
1392 | static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) |
1393 | { |
1394 | struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; |
1395 | struct nfp_net *nn = r_vec->nfp_net; |
1396 | struct nfp_net_dp *dp = &nn->dp; |
1397 | unsigned int budget = 512; |
1398 | |
1399 | while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) |
1400 | continue; |
1401 | |
1402 | return budget; |
1403 | } |
1404 | |
1405 | void nfp_nfd3_ctrl_poll(struct tasklet_struct *t) |
1406 | { |
1407 | struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet); |
1408 | |
1409 | spin_lock(lock: &r_vec->lock); |
1410 | nfp_nfd3_tx_complete(tx_ring: r_vec->tx_ring, budget: 0); |
1411 | __nfp_ctrl_tx_queued(r_vec); |
1412 | spin_unlock(lock: &r_vec->lock); |
1413 | |
1414 | if (nfp_ctrl_rx(r_vec)) { |
1415 | nfp_net_irq_unmask(nn: r_vec->nfp_net, entry_nr: r_vec->irq_entry); |
1416 | } else { |
1417 | tasklet_schedule(t: &r_vec->tasklet); |
1418 | nn_dp_warn(&r_vec->nfp_net->dp, |
1419 | "control message budget exceeded!\n" ); |
1420 | } |
1421 | } |
1422 | |