rx.c source code [linux/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c]

1	// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2	/ Copyright (c) 2019 Mellanox Technologies. /
3
4	#include "rx.h"
5	#include "en/xdp.h"
6	#include <net/xdp_sock_drv.h>
7	#include <linux/filter.h>
8
9	/ RX data path /
10
11	static struct mlx5e_xdp_buff xsk_buff_to_mxbuf(struct* xdp_buff *xdp)
12	{
13	/ mlx5e_xdp_buff shares its layout with xdp_buff_xsk*
14	* and private mlx5e_xdp_buff fields fall into xdp_buff_xsk->cb
15	*/
16	return (struct mlx5e_xdp_buff *)xdp;
17	}
18
19	int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
20	{
21	struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, i: ix);
22	struct mlx5e_icosq *icosq = rq->icosq;
23	struct mlx5_wq_cyc *wq = &icosq->wq;
24	struct mlx5e_umr_wqe *umr_wqe;
25	struct xdp_buff **xsk_buffs;
26	int batch, i;
27	u32 offset; / 17-bit value with MTT. /
28	u16 pi;
29
30	if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe)))
31	goto err;
32
33	XSK_CHECK_PRIV_TYPE(struct mlx5e_xdp_buff);
34	xsk_buffs = (struct xdp_buff **)wi->alloc_units.xsk_buffs;
35	batch = xsk_buff_alloc_batch(pool: rq->xsk_pool, xdp: xsk_buffs,
36	max: rq->mpwqe.pages_per_wqe);
37
38	/ If batch < pages_per_wqe, either:*
39	* 1. Some (or all) descriptors were invalid.
40	* 2. dma_need_sync is true, and it fell back to allocating one frame.
41	* In either case, try to continue allocating frames one by one, until
42	* the first error, which will mean there are no more valid descriptors.
43	*/
44	for (; batch < rq->mpwqe.pages_per_wqe; batch++) {
45	xsk_buffs[batch] = xsk_buff_alloc(pool: rq->xsk_pool);
46	if (unlikely(!xsk_buffs[batch]))
47	goto err_reuse_batch;
48	}
49
50	pi = mlx5e_icosq_get_next_pi(sq: icosq, size: rq->mpwqe.umr_wqebbs);
51	umr_wqe = mlx5_wq_cyc_get_wqe(wq, ix: pi);
52	memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
53
54	if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) {
55	for (i = `0`; i < batch; i++) {
56	struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xdp: xsk_buffs[i]);
57	dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xdp: xsk_buffs[i]);
58
59	umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
60	.ptag = cpu_to_be64(addr \| MLX5_EN_WR),
61	};
62	mxbuf->rq = rq;
63	}
64	} else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) {
65	for (i = `0`; i < batch; i++) {
66	struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xdp: xsk_buffs[i]);
67	dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xdp: xsk_buffs[i]);
68
69	umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
70	.key = rq->mkey_be,
71	.va = cpu_to_be64(addr),
72	};
73	mxbuf->rq = rq;
74	}
75	} else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) {
76	u32 mapping_size = `1` << (rq->mpwqe.page_shift - `2`);
77
78	for (i = `0`; i < batch; i++) {
79	struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xdp: xsk_buffs[i]);
80	dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xdp: xsk_buffs[i]);
81
82	umr_wqe->inline_ksms[i << `2`] = (struct mlx5_ksm) {
83	.key = rq->mkey_be,
84	.va = cpu_to_be64(addr),
85	};
86	umr_wqe->inline_ksms[(i << `2`) + `1`] = (struct mlx5_ksm) {
87	.key = rq->mkey_be,
88	.va = cpu_to_be64(addr + mapping_size),
89	};
90	umr_wqe->inline_ksms[(i << `2`) + `2`] = (struct mlx5_ksm) {
91	.key = rq->mkey_be,
92	.va = cpu_to_be64(addr + mapping_size * `2`),
93	};
94	umr_wqe->inline_ksms[(i << `2`) + `3`] = (struct mlx5_ksm) {
95	.key = rq->mkey_be,
96	.va = cpu_to_be64(rq->wqe_overflow.addr),
97	};
98	mxbuf->rq = rq;
99	}
100	} else {
101	__be32 pad_size = cpu_to_be32((`1` << rq->mpwqe.page_shift) -
102	rq->xsk_pool->chunk_size);
103	__be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size);
104
105	for (i = `0`; i < batch; i++) {
106	struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xdp: xsk_buffs[i]);
107	dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xdp: xsk_buffs[i]);
108
109	umr_wqe->inline_klms[i << `1`] = (struct mlx5_klm) {
110	.key = rq->mkey_be,
111	.va = cpu_to_be64(addr),
112	.bcount = frame_size,
113	};
114	umr_wqe->inline_klms[(i << `1`) + `1`] = (struct mlx5_klm) {
115	.key = rq->mkey_be,
116	.va = cpu_to_be64(rq->wqe_overflow.addr),
117	.bcount = pad_size,
118	};
119	mxbuf->rq = rq;
120	}
121	}
122
123	bitmap_zero(dst: wi->skip_release_bitmap, nbits: rq->mpwqe.pages_per_wqe);
124	wi->consumed_strides = `0`;
125
126	umr_wqe->ctrl.opmod_idx_opcode =
127	cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) \| MLX5_OPCODE_UMR);
128
129	/ Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. /
130	offset = ix * rq->mpwqe.mtts_per_wqe;
131	if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
132	offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
133	else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED))
134	offset = offset * sizeof(struct mlx5_klm) * `2` / MLX5_OCTWORD;
135	else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE))
136	offset = offset * sizeof(struct mlx5_ksm) * `4` / MLX5_OCTWORD;
137	umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
138
139	icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
140	.wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
141	.num_wqebbs = rq->mpwqe.umr_wqebbs,
142	.umr.rq = rq,
143	};
144
145	icosq->pc += rq->mpwqe.umr_wqebbs;
146
147	icosq->doorbell_cseg = &umr_wqe->ctrl;
148
149	return `0`;
150
151	err_reuse_batch:
152	while (--batch >= `0`)
153	xsk_buff_free(xdp: xsk_buffs[batch]);
154
155	err:
156	rq->stats->buff_alloc_err++;
157	return -ENOMEM;
158	}
159
160	int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq rq, u16 ix, int* wqe_bulk)
161	{
162	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
163	struct xdp_buff **buffs;
164	u32 contig, alloc;
165	int i;
166
167	/ Each rq->wqe.frags->xskp is 1:1 mapped to an element inside the*
168	* rq->wqe.alloc_units->xsk_buffs array allocated here.
169	*/
170	buffs = rq->wqe.alloc_units->xsk_buffs;
171	contig = mlx5_wq_cyc_get_size(wq) - ix;
172	if (wqe_bulk <= contig) {
173	alloc = xsk_buff_alloc_batch(pool: rq->xsk_pool, xdp: buffs + ix, max: wqe_bulk);
174	} else {
175	alloc = xsk_buff_alloc_batch(pool: rq->xsk_pool, xdp: buffs + ix, max: contig);
176	if (likely(alloc == contig))
177	alloc += xsk_buff_alloc_batch(pool: rq->xsk_pool, xdp: buffs, max: wqe_bulk - contig);
178	}
179
180	for (i = `0`; i < alloc; i++) {
181	int j = mlx5_wq_cyc_ctr2ix(wq, ctr: ix + i);
182	struct mlx5e_wqe_frag_info *frag;
183	struct mlx5e_rx_wqe_cyc *wqe;
184	dma_addr_t addr;
185
186	wqe = mlx5_wq_cyc_get_wqe(wq, ix: j);
187	/ Assumes log_num_frags == 0. /
188	frag = &rq->wqe.frags[j];
189
190	addr = xsk_buff_xdp_get_frame_dma(xdp: *frag->xskp);
191	wqe->data[`0`].addr = cpu_to_be64(addr + rq->buff.headroom);
192	frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
193	}
194
195	return alloc;
196	}
197
198	int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq rq, u16 ix, int* wqe_bulk)
199	{
200	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
201	int i;
202
203	for (i = `0`; i < wqe_bulk; i++) {
204	int j = mlx5_wq_cyc_ctr2ix(wq, ctr: ix + i);
205	struct mlx5e_wqe_frag_info *frag;
206	struct mlx5e_rx_wqe_cyc *wqe;
207	dma_addr_t addr;
208
209	wqe = mlx5_wq_cyc_get_wqe(wq, ix: j);
210	/ Assumes log_num_frags == 0. /
211	frag = &rq->wqe.frags[j];
212
213	*frag->xskp = xsk_buff_alloc(pool: rq->xsk_pool);
214	if (unlikely(!*frag->xskp))
215	return i;
216
217	addr = xsk_buff_xdp_get_frame_dma(xdp: *frag->xskp);
218	wqe->data[`0`].addr = cpu_to_be64(addr + rq->buff.headroom);
219	frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
220	}
221
222	return wqe_bulk;
223	}
224
225	static struct sk_buff mlx5e_xsk_construct_skb(struct* mlx5e_rq rq, struct* xdp_buff *xdp)
226	{
227	u32 totallen = xdp->data_end - xdp->data_meta;
228	u32 metalen = xdp->data - xdp->data_meta;
229	struct sk_buff *skb;
230
231	skb = napi_alloc_skb(napi: rq->cq.napi, length: totallen);
232	if (unlikely(!skb)) {
233	rq->stats->buff_alloc_err++;
234	return NULL;
235	}
236
237	skb_put_data(skb, data: xdp->data_meta, len: totallen);
238
239	if (metalen) {
240	skb_metadata_set(skb, meta_len: metalen);
241	__skb_pull(skb, len: metalen);
242	}
243
244	return skb;
245	}
246
247	struct sk_buff mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct* mlx5e_rq *rq,
248	struct mlx5e_mpw_info *wi,
249	struct mlx5_cqe64 *cqe,
250	u16 cqe_bcnt,
251	u32 head_offset,
252	u32 page_idx)
253	{
254	struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xdp: wi->alloc_units.xsk_buffs[page_idx]);
255	struct bpf_prog *prog;
256
257	/ Check packet size. Note LRO doesn't use linear SKB /
258	if (unlikely(cqe_bcnt > rq->hw_mtu)) {
259	rq->stats->oversize_pkts_sw_drop++;
260	return NULL;
261	}
262
263	/ head_offset is not used in this function, because xdp->data and the*
264	* DMA address point directly to the necessary place. Furthermore, in
265	* the current implementation, UMR pages are mapped to XSK frames, so
266	* head_offset should always be 0.
267	*/
268	WARN_ON_ONCE(head_offset);
269
270	/ mxbuf->rq is set on allocation, but cqe is per-packet so set it here /
271	mxbuf->cqe = cqe;
272	xsk_buff_set_size(xdp: &mxbuf->xdp, size: cqe_bcnt);
273	xsk_buff_dma_sync_for_cpu(xdp: &mxbuf->xdp, pool: rq->xsk_pool);
274	net_prefetch(p: mxbuf->xdp.data);
275
276	/ Possible flows:*
277	* - XDP_REDIRECT to XSKMAP:
278	* The page is owned by the userspace from now.
279	* - XDP_TX and other XDP_REDIRECTs:
280	* The page was returned by ZCA and recycled.
281	* - XDP_DROP:
282	* Recycle the page.
283	* - XDP_PASS:
284	* Allocate an SKB, copy the data and recycle the page.
285	*
286	* Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its
287	* size is the same as the Driver RX Ring's size, and pages for WQEs are
288	* allocated first from the Reuse Ring, so it has enough space.
289	*/
290
291	prog = rcu_dereference(rq->xdp_prog);
292	if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) {
293	if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
294	__set_bit(page_idx, wi->skip_release_bitmap); / non-atomic /
295	return NULL; / page/packet was consumed by XDP /
296	}
297
298	/ XDP_PASS: copy the data from the UMEM to a new SKB and reuse the*
299	* frame. On SKB allocation failure, NULL is returned.
300	*/
301	return mlx5e_xsk_construct_skb(rq, xdp: &mxbuf->xdp);
302	}
303
304	struct sk_buff mlx5e_xsk_skb_from_cqe_linear(struct* mlx5e_rq *rq,
305	struct mlx5e_wqe_frag_info *wi,
306	struct mlx5_cqe64 *cqe,
307	u32 cqe_bcnt)
308	{
309	struct mlx5e_xdp_buff mxbuf = xsk_buff_to_mxbuf(xdp: wi->xskp);
310	struct bpf_prog *prog;
311
312	/ wi->offset is not used in this function, because xdp->data and the*
313	* DMA address point directly to the necessary place. Furthermore, the
314	* XSK allocator allocates frames per packet, instead of pages, so
315	* wi->offset should always be 0.
316	*/
317	WARN_ON_ONCE(wi->offset);
318
319	/ mxbuf->rq is set on allocation, but cqe is per-packet so set it here /
320	mxbuf->cqe = cqe;
321	xsk_buff_set_size(xdp: &mxbuf->xdp, size: cqe_bcnt);
322	xsk_buff_dma_sync_for_cpu(xdp: &mxbuf->xdp, pool: rq->xsk_pool);
323	net_prefetch(p: mxbuf->xdp.data);
324
325	prog = rcu_dereference(rq->xdp_prog);
326	if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) {
327	if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
328	wi->flags \|= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
329	return NULL; / page/packet was consumed by XDP /
330	}
331
332	/ XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse*
333	* will be handled by mlx5e_free_rx_wqe.
334	* On SKB allocation failure, NULL is returned.
335	*/
336	return mlx5e_xsk_construct_skb(rq, xdp: &mxbuf->xdp);
337	}
338

source code of linux/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c