1 | // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB |
2 | /* Copyright (c) 2019 Mellanox Technologies. */ |
3 | |
4 | #include "rx.h" |
5 | #include "en/xdp.h" |
6 | #include <net/xdp_sock_drv.h> |
7 | #include <linux/filter.h> |
8 | |
9 | /* RX data path */ |
10 | |
11 | static struct mlx5e_xdp_buff *xsk_buff_to_mxbuf(struct xdp_buff *xdp) |
12 | { |
13 | /* mlx5e_xdp_buff shares its layout with xdp_buff_xsk |
14 | * and private mlx5e_xdp_buff fields fall into xdp_buff_xsk->cb |
15 | */ |
16 | return (struct mlx5e_xdp_buff *)xdp; |
17 | } |
18 | |
19 | int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) |
20 | { |
21 | struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, i: ix); |
22 | struct mlx5e_icosq *icosq = rq->icosq; |
23 | struct mlx5_wq_cyc *wq = &icosq->wq; |
24 | struct mlx5e_umr_wqe *umr_wqe; |
25 | struct xdp_buff **xsk_buffs; |
26 | int batch, i; |
27 | u32 offset; /* 17-bit value with MTT. */ |
28 | u16 pi; |
29 | |
30 | if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe))) |
31 | goto err; |
32 | |
33 | XSK_CHECK_PRIV_TYPE(struct mlx5e_xdp_buff); |
34 | xsk_buffs = (struct xdp_buff **)wi->alloc_units.xsk_buffs; |
35 | batch = xsk_buff_alloc_batch(pool: rq->xsk_pool, xdp: xsk_buffs, |
36 | max: rq->mpwqe.pages_per_wqe); |
37 | |
38 | /* If batch < pages_per_wqe, either: |
39 | * 1. Some (or all) descriptors were invalid. |
40 | * 2. dma_need_sync is true, and it fell back to allocating one frame. |
41 | * In either case, try to continue allocating frames one by one, until |
42 | * the first error, which will mean there are no more valid descriptors. |
43 | */ |
44 | for (; batch < rq->mpwqe.pages_per_wqe; batch++) { |
45 | xsk_buffs[batch] = xsk_buff_alloc(pool: rq->xsk_pool); |
46 | if (unlikely(!xsk_buffs[batch])) |
47 | goto err_reuse_batch; |
48 | } |
49 | |
50 | pi = mlx5e_icosq_get_next_pi(sq: icosq, size: rq->mpwqe.umr_wqebbs); |
51 | umr_wqe = mlx5_wq_cyc_get_wqe(wq, ix: pi); |
52 | memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); |
53 | |
54 | if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) { |
55 | for (i = 0; i < batch; i++) { |
56 | struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xdp: xsk_buffs[i]); |
57 | dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xdp: xsk_buffs[i]); |
58 | |
59 | umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { |
60 | .ptag = cpu_to_be64(addr | MLX5_EN_WR), |
61 | }; |
62 | mxbuf->rq = rq; |
63 | } |
64 | } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) { |
65 | for (i = 0; i < batch; i++) { |
66 | struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xdp: xsk_buffs[i]); |
67 | dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xdp: xsk_buffs[i]); |
68 | |
69 | umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { |
70 | .key = rq->mkey_be, |
71 | .va = cpu_to_be64(addr), |
72 | }; |
73 | mxbuf->rq = rq; |
74 | } |
75 | } else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) { |
76 | u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2); |
77 | |
78 | for (i = 0; i < batch; i++) { |
79 | struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xdp: xsk_buffs[i]); |
80 | dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xdp: xsk_buffs[i]); |
81 | |
82 | umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) { |
83 | .key = rq->mkey_be, |
84 | .va = cpu_to_be64(addr), |
85 | }; |
86 | umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) { |
87 | .key = rq->mkey_be, |
88 | .va = cpu_to_be64(addr + mapping_size), |
89 | }; |
90 | umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) { |
91 | .key = rq->mkey_be, |
92 | .va = cpu_to_be64(addr + mapping_size * 2), |
93 | }; |
94 | umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) { |
95 | .key = rq->mkey_be, |
96 | .va = cpu_to_be64(rq->wqe_overflow.addr), |
97 | }; |
98 | mxbuf->rq = rq; |
99 | } |
100 | } else { |
101 | __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) - |
102 | rq->xsk_pool->chunk_size); |
103 | __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size); |
104 | |
105 | for (i = 0; i < batch; i++) { |
106 | struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xdp: xsk_buffs[i]); |
107 | dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xdp: xsk_buffs[i]); |
108 | |
109 | umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) { |
110 | .key = rq->mkey_be, |
111 | .va = cpu_to_be64(addr), |
112 | .bcount = frame_size, |
113 | }; |
114 | umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) { |
115 | .key = rq->mkey_be, |
116 | .va = cpu_to_be64(rq->wqe_overflow.addr), |
117 | .bcount = pad_size, |
118 | }; |
119 | mxbuf->rq = rq; |
120 | } |
121 | } |
122 | |
123 | bitmap_zero(dst: wi->skip_release_bitmap, nbits: rq->mpwqe.pages_per_wqe); |
124 | wi->consumed_strides = 0; |
125 | |
126 | umr_wqe->ctrl.opmod_idx_opcode = |
127 | cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); |
128 | |
129 | /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */ |
130 | offset = ix * rq->mpwqe.mtts_per_wqe; |
131 | if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) |
132 | offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD; |
133 | else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED)) |
134 | offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD; |
135 | else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) |
136 | offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD; |
137 | umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); |
138 | |
139 | icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { |
140 | .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX, |
141 | .num_wqebbs = rq->mpwqe.umr_wqebbs, |
142 | .umr.rq = rq, |
143 | }; |
144 | |
145 | icosq->pc += rq->mpwqe.umr_wqebbs; |
146 | |
147 | icosq->doorbell_cseg = &umr_wqe->ctrl; |
148 | |
149 | return 0; |
150 | |
151 | err_reuse_batch: |
152 | while (--batch >= 0) |
153 | xsk_buff_free(xdp: xsk_buffs[batch]); |
154 | |
155 | err: |
156 | rq->stats->buff_alloc_err++; |
157 | return -ENOMEM; |
158 | } |
159 | |
160 | int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) |
161 | { |
162 | struct mlx5_wq_cyc *wq = &rq->wqe.wq; |
163 | struct xdp_buff **buffs; |
164 | u32 contig, alloc; |
165 | int i; |
166 | |
167 | /* Each rq->wqe.frags->xskp is 1:1 mapped to an element inside the |
168 | * rq->wqe.alloc_units->xsk_buffs array allocated here. |
169 | */ |
170 | buffs = rq->wqe.alloc_units->xsk_buffs; |
171 | contig = mlx5_wq_cyc_get_size(wq) - ix; |
172 | if (wqe_bulk <= contig) { |
173 | alloc = xsk_buff_alloc_batch(pool: rq->xsk_pool, xdp: buffs + ix, max: wqe_bulk); |
174 | } else { |
175 | alloc = xsk_buff_alloc_batch(pool: rq->xsk_pool, xdp: buffs + ix, max: contig); |
176 | if (likely(alloc == contig)) |
177 | alloc += xsk_buff_alloc_batch(pool: rq->xsk_pool, xdp: buffs, max: wqe_bulk - contig); |
178 | } |
179 | |
180 | for (i = 0; i < alloc; i++) { |
181 | int j = mlx5_wq_cyc_ctr2ix(wq, ctr: ix + i); |
182 | struct mlx5e_wqe_frag_info *frag; |
183 | struct mlx5e_rx_wqe_cyc *wqe; |
184 | dma_addr_t addr; |
185 | |
186 | wqe = mlx5_wq_cyc_get_wqe(wq, ix: j); |
187 | /* Assumes log_num_frags == 0. */ |
188 | frag = &rq->wqe.frags[j]; |
189 | |
190 | addr = xsk_buff_xdp_get_frame_dma(xdp: *frag->xskp); |
191 | wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); |
192 | frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); |
193 | } |
194 | |
195 | return alloc; |
196 | } |
197 | |
198 | int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) |
199 | { |
200 | struct mlx5_wq_cyc *wq = &rq->wqe.wq; |
201 | int i; |
202 | |
203 | for (i = 0; i < wqe_bulk; i++) { |
204 | int j = mlx5_wq_cyc_ctr2ix(wq, ctr: ix + i); |
205 | struct mlx5e_wqe_frag_info *frag; |
206 | struct mlx5e_rx_wqe_cyc *wqe; |
207 | dma_addr_t addr; |
208 | |
209 | wqe = mlx5_wq_cyc_get_wqe(wq, ix: j); |
210 | /* Assumes log_num_frags == 0. */ |
211 | frag = &rq->wqe.frags[j]; |
212 | |
213 | *frag->xskp = xsk_buff_alloc(pool: rq->xsk_pool); |
214 | if (unlikely(!*frag->xskp)) |
215 | return i; |
216 | |
217 | addr = xsk_buff_xdp_get_frame_dma(xdp: *frag->xskp); |
218 | wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); |
219 | frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); |
220 | } |
221 | |
222 | return wqe_bulk; |
223 | } |
224 | |
225 | static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp) |
226 | { |
227 | u32 totallen = xdp->data_end - xdp->data_meta; |
228 | u32 metalen = xdp->data - xdp->data_meta; |
229 | struct sk_buff *skb; |
230 | |
231 | skb = napi_alloc_skb(napi: rq->cq.napi, length: totallen); |
232 | if (unlikely(!skb)) { |
233 | rq->stats->buff_alloc_err++; |
234 | return NULL; |
235 | } |
236 | |
237 | skb_put_data(skb, data: xdp->data_meta, len: totallen); |
238 | |
239 | if (metalen) { |
240 | skb_metadata_set(skb, meta_len: metalen); |
241 | __skb_pull(skb, len: metalen); |
242 | } |
243 | |
244 | return skb; |
245 | } |
246 | |
247 | struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, |
248 | struct mlx5e_mpw_info *wi, |
249 | struct mlx5_cqe64 *cqe, |
250 | u16 cqe_bcnt, |
251 | u32 head_offset, |
252 | u32 page_idx) |
253 | { |
254 | struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xdp: wi->alloc_units.xsk_buffs[page_idx]); |
255 | struct bpf_prog *prog; |
256 | |
257 | /* Check packet size. Note LRO doesn't use linear SKB */ |
258 | if (unlikely(cqe_bcnt > rq->hw_mtu)) { |
259 | rq->stats->oversize_pkts_sw_drop++; |
260 | return NULL; |
261 | } |
262 | |
263 | /* head_offset is not used in this function, because xdp->data and the |
264 | * DMA address point directly to the necessary place. Furthermore, in |
265 | * the current implementation, UMR pages are mapped to XSK frames, so |
266 | * head_offset should always be 0. |
267 | */ |
268 | WARN_ON_ONCE(head_offset); |
269 | |
270 | /* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */ |
271 | mxbuf->cqe = cqe; |
272 | xsk_buff_set_size(xdp: &mxbuf->xdp, size: cqe_bcnt); |
273 | xsk_buff_dma_sync_for_cpu(xdp: &mxbuf->xdp, pool: rq->xsk_pool); |
274 | net_prefetch(p: mxbuf->xdp.data); |
275 | |
276 | /* Possible flows: |
277 | * - XDP_REDIRECT to XSKMAP: |
278 | * The page is owned by the userspace from now. |
279 | * - XDP_TX and other XDP_REDIRECTs: |
280 | * The page was returned by ZCA and recycled. |
281 | * - XDP_DROP: |
282 | * Recycle the page. |
283 | * - XDP_PASS: |
284 | * Allocate an SKB, copy the data and recycle the page. |
285 | * |
286 | * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its |
287 | * size is the same as the Driver RX Ring's size, and pages for WQEs are |
288 | * allocated first from the Reuse Ring, so it has enough space. |
289 | */ |
290 | |
291 | prog = rcu_dereference(rq->xdp_prog); |
292 | if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) { |
293 | if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) |
294 | __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */ |
295 | return NULL; /* page/packet was consumed by XDP */ |
296 | } |
297 | |
298 | /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the |
299 | * frame. On SKB allocation failure, NULL is returned. |
300 | */ |
301 | return mlx5e_xsk_construct_skb(rq, xdp: &mxbuf->xdp); |
302 | } |
303 | |
304 | struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, |
305 | struct mlx5e_wqe_frag_info *wi, |
306 | struct mlx5_cqe64 *cqe, |
307 | u32 cqe_bcnt) |
308 | { |
309 | struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xdp: *wi->xskp); |
310 | struct bpf_prog *prog; |
311 | |
312 | /* wi->offset is not used in this function, because xdp->data and the |
313 | * DMA address point directly to the necessary place. Furthermore, the |
314 | * XSK allocator allocates frames per packet, instead of pages, so |
315 | * wi->offset should always be 0. |
316 | */ |
317 | WARN_ON_ONCE(wi->offset); |
318 | |
319 | /* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */ |
320 | mxbuf->cqe = cqe; |
321 | xsk_buff_set_size(xdp: &mxbuf->xdp, size: cqe_bcnt); |
322 | xsk_buff_dma_sync_for_cpu(xdp: &mxbuf->xdp, pool: rq->xsk_pool); |
323 | net_prefetch(p: mxbuf->xdp.data); |
324 | |
325 | prog = rcu_dereference(rq->xdp_prog); |
326 | if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) { |
327 | if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) |
328 | wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); |
329 | return NULL; /* page/packet was consumed by XDP */ |
330 | } |
331 | |
332 | /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse |
333 | * will be handled by mlx5e_free_rx_wqe. |
334 | * On SKB allocation failure, NULL is returned. |
335 | */ |
336 | return mlx5e_xsk_construct_skb(rq, xdp: &mxbuf->xdp); |
337 | } |
338 | |