1 | /* |
2 | * Copyright (c) 2018, Mellanox Technologies. All rights reserved. |
3 | * |
4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU |
6 | * General Public License (GPL) Version 2, available from the file |
7 | * COPYING in the main directory of this source tree, or the |
8 | * OpenIB.org BSD license below: |
9 | * |
10 | * Redistribution and use in source and binary forms, with or |
11 | * without modification, are permitted provided that the following |
12 | * conditions are met: |
13 | * |
14 | * - Redistributions of source code must retain the above |
15 | * copyright notice, this list of conditions and the following |
16 | * disclaimer. |
17 | * |
18 | * - Redistributions in binary form must reproduce the above |
19 | * copyright notice, this list of conditions and the following |
20 | * disclaimer in the documentation and/or other materials |
21 | * provided with the distribution. |
22 | * |
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
30 | * SOFTWARE. |
31 | */ |
32 | #ifndef __MLX5_EN_XDP_H__ |
33 | #define __MLX5_EN_XDP_H__ |
34 | |
35 | #include <linux/indirect_call_wrapper.h> |
36 | #include <net/xdp_sock.h> |
37 | |
38 | #include "en.h" |
39 | #include "en/txrx.h" |
40 | |
41 | #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) |
42 | |
43 | #define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT 16 |
44 | #define MLX5E_XDP_INLINE_WQE_SZ_THRSD \ |
45 | (MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \ |
46 | sizeof(struct mlx5_wqe_inline_seg)) |
47 | |
48 | struct mlx5e_xdp_buff { |
49 | struct xdp_buff xdp; |
50 | struct mlx5_cqe64 *cqe; |
51 | struct mlx5e_rq *rq; |
52 | }; |
53 | |
54 | /* XDP packets can be transmitted in different ways. On completion, we need to |
55 | * distinguish between them to clean up things in a proper way. |
56 | */ |
57 | enum mlx5e_xdp_xmit_mode { |
58 | /* An xdp_frame was transmitted due to either XDP_REDIRECT from another |
59 | * device or XDP_TX from an XSK RQ. The frame has to be unmapped and |
60 | * returned. |
61 | */ |
62 | MLX5E_XDP_XMIT_MODE_FRAME, |
63 | |
64 | /* The xdp_frame was created in place as a result of XDP_TX from a |
65 | * regular RQ. No DMA remapping happened, and the page belongs to us. |
66 | */ |
67 | MLX5E_XDP_XMIT_MODE_PAGE, |
68 | |
69 | /* No xdp_frame was created at all, the transmit happened from a UMEM |
70 | * page. The UMEM Completion Ring producer pointer has to be increased. |
71 | */ |
72 | MLX5E_XDP_XMIT_MODE_XSK, |
73 | }; |
74 | |
75 | /* xmit_mode entry is pushed to the fifo per packet, followed by multiple |
76 | * entries, as follows: |
77 | * |
78 | * MLX5E_XDP_XMIT_MODE_FRAME: |
79 | * xdpf, dma_addr_1, dma_addr_2, ... , dma_addr_num. |
80 | * 'num' is derived from xdpf. |
81 | * |
82 | * MLX5E_XDP_XMIT_MODE_PAGE: |
83 | * num, page_1, page_2, ... , page_num. |
84 | * |
85 | * MLX5E_XDP_XMIT_MODE_XSK: |
86 | * frame.xsk_meta. |
87 | */ |
88 | #define MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO 4 |
89 | |
90 | union mlx5e_xdp_info { |
91 | enum mlx5e_xdp_xmit_mode mode; |
92 | union { |
93 | struct xdp_frame *xdpf; |
94 | dma_addr_t dma_addr; |
95 | } frame; |
96 | union { |
97 | struct mlx5e_rq *rq; |
98 | u8 num; |
99 | struct page *page; |
100 | } page; |
101 | struct xsk_tx_metadata_compl xsk_meta; |
102 | }; |
103 | |
104 | struct mlx5e_xsk_param; |
105 | int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); |
106 | bool mlx5e_xdp_handle(struct mlx5e_rq *rq, |
107 | struct bpf_prog *prog, struct mlx5e_xdp_buff *mlctx); |
108 | void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq); |
109 | bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); |
110 | void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); |
111 | void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw); |
112 | void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq); |
113 | int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, |
114 | u32 flags); |
115 | |
116 | extern const struct xdp_metadata_ops mlx5e_xdp_metadata_ops; |
117 | extern const struct xsk_tx_metadata_ops mlx5e_xsk_tx_metadata_ops; |
118 | |
119 | INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, |
120 | struct mlx5e_xmit_data *xdptxd, |
121 | int check_result, |
122 | struct xsk_tx_metadata *meta)); |
123 | INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, |
124 | struct mlx5e_xmit_data *xdptxd, |
125 | int check_result, |
126 | struct xsk_tx_metadata *meta)); |
127 | INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)); |
128 | INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)); |
129 | |
130 | static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv) |
131 | { |
132 | set_bit(nr: MLX5E_STATE_XDP_TX_ENABLED, addr: &priv->state); |
133 | |
134 | if (priv->channels.params.xdp_prog) |
135 | set_bit(nr: MLX5E_STATE_XDP_ACTIVE, addr: &priv->state); |
136 | } |
137 | |
138 | static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv) |
139 | { |
140 | if (priv->channels.params.xdp_prog) |
141 | clear_bit(nr: MLX5E_STATE_XDP_ACTIVE, addr: &priv->state); |
142 | |
143 | clear_bit(nr: MLX5E_STATE_XDP_TX_ENABLED, addr: &priv->state); |
144 | /* Let other device's napi(s) and XSK wakeups see our new state. */ |
145 | synchronize_net(); |
146 | } |
147 | |
148 | static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) |
149 | { |
150 | return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); |
151 | } |
152 | |
153 | static inline bool mlx5e_xdp_is_active(struct mlx5e_priv *priv) |
154 | { |
155 | return test_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state); |
156 | } |
157 | |
158 | static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) |
159 | { |
160 | if (sq->doorbell_cseg) { |
161 | mlx5e_notify_hw(wq: &sq->wq, pc: sq->pc, uar_map: sq->uar_map, ctrl: sq->doorbell_cseg); |
162 | sq->doorbell_cseg = NULL; |
163 | } |
164 | } |
165 | |
166 | /* Enable inline WQEs to shift some load from a congested HCA (HW) to |
167 | * a less congested cpu (SW). |
168 | */ |
169 | static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur) |
170 | { |
171 | u16 outstanding = sq->xdpi_fifo_pc - sq->xdpi_fifo_cc; |
172 | |
173 | #define MLX5E_XDP_INLINE_WATERMARK_LOW 10 |
174 | #define MLX5E_XDP_INLINE_WATERMARK_HIGH 128 |
175 | |
176 | if (cur && outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW) |
177 | return false; |
178 | |
179 | if (!cur && outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH) |
180 | return true; |
181 | |
182 | return cur; |
183 | } |
184 | |
185 | static inline bool mlx5e_xdp_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) |
186 | { |
187 | if (session->inline_on) |
188 | return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > |
189 | max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS; |
190 | |
191 | return mlx5e_tx_mpwqe_is_full(session, max_sq_mpw_wqebbs); |
192 | } |
193 | |
194 | struct mlx5e_xdp_wqe_info { |
195 | u8 num_wqebbs; |
196 | u8 num_pkts; |
197 | }; |
198 | |
199 | static inline void |
200 | mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, |
201 | struct mlx5e_xmit_data *xdptxd, |
202 | struct mlx5e_xdpsq_stats *stats) |
203 | { |
204 | struct mlx5e_tx_mpwqe *session = &sq->mpwqe; |
205 | struct mlx5_wqe_data_seg *dseg = |
206 | (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count; |
207 | u32 dma_len = xdptxd->len; |
208 | |
209 | session->pkt_count++; |
210 | session->bytes_count += dma_len; |
211 | |
212 | if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) { |
213 | struct mlx5_wqe_inline_seg *inline_dseg = |
214 | (struct mlx5_wqe_inline_seg *)dseg; |
215 | u16 ds_len = sizeof(*inline_dseg) + dma_len; |
216 | u16 ds_cnt = DIV_ROUND_UP(ds_len, MLX5_SEND_WQE_DS); |
217 | |
218 | inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG); |
219 | memcpy(inline_dseg->data, xdptxd->data, dma_len); |
220 | |
221 | session->ds_count += ds_cnt; |
222 | stats->inlnw++; |
223 | return; |
224 | } |
225 | |
226 | dseg->addr = cpu_to_be64(xdptxd->dma_addr); |
227 | dseg->byte_count = cpu_to_be32(dma_len); |
228 | dseg->lkey = sq->mkey_be; |
229 | session->ds_count++; |
230 | } |
231 | |
232 | static inline void |
233 | mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo, |
234 | union mlx5e_xdp_info xi) |
235 | { |
236 | u32 i = (*fifo->pc)++ & fifo->mask; |
237 | |
238 | fifo->xi[i] = xi; |
239 | } |
240 | |
241 | static inline union mlx5e_xdp_info |
242 | mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo) |
243 | { |
244 | return fifo->xi[(*fifo->cc)++ & fifo->mask]; |
245 | } |
246 | #endif |
247 | |