1 | /* |
2 | * Copyright (c) 2007 Mellanox Technologies. All rights reserved. |
3 | * |
4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU |
6 | * General Public License (GPL) Version 2, available from the file |
7 | * COPYING in the main directory of this source tree, or the |
8 | * OpenIB.org BSD license below: |
9 | * |
10 | * Redistribution and use in source and binary forms, with or |
11 | * without modification, are permitted provided that the following |
12 | * conditions are met: |
13 | * |
14 | * - Redistributions of source code must retain the above |
15 | * copyright notice, this list of conditions and the following |
16 | * disclaimer. |
17 | * |
18 | * - Redistributions in binary form must reproduce the above |
19 | * copyright notice, this list of conditions and the following |
20 | * disclaimer in the documentation and/or other materials |
21 | * provided with the distribution. |
22 | * |
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
30 | * SOFTWARE. |
31 | * |
32 | */ |
33 | |
34 | #include <linux/bpf.h> |
35 | #include <linux/bpf_trace.h> |
36 | #include <linux/mlx4/cq.h> |
37 | #include <linux/slab.h> |
38 | #include <linux/mlx4/qp.h> |
39 | #include <linux/skbuff.h> |
40 | #include <linux/rculist.h> |
41 | #include <linux/if_ether.h> |
42 | #include <linux/if_vlan.h> |
43 | #include <linux/vmalloc.h> |
44 | #include <linux/irq.h> |
45 | |
46 | #include <net/ip.h> |
47 | #if IS_ENABLED(CONFIG_IPV6) |
48 | #include <net/ip6_checksum.h> |
49 | #endif |
50 | |
51 | #include "mlx4_en.h" |
52 | |
53 | static int mlx4_alloc_page(struct mlx4_en_priv *priv, |
54 | struct mlx4_en_rx_alloc *frag, |
55 | gfp_t gfp) |
56 | { |
57 | struct page *page; |
58 | dma_addr_t dma; |
59 | |
60 | page = alloc_page(gfp); |
61 | if (unlikely(!page)) |
62 | return -ENOMEM; |
63 | dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir); |
64 | if (unlikely(dma_mapping_error(priv->ddev, dma))) { |
65 | __free_page(page); |
66 | return -ENOMEM; |
67 | } |
68 | frag->page = page; |
69 | frag->dma = dma; |
70 | frag->page_offset = priv->rx_headroom; |
71 | return 0; |
72 | } |
73 | |
74 | static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, |
75 | struct mlx4_en_rx_ring *ring, |
76 | struct mlx4_en_rx_desc *rx_desc, |
77 | struct mlx4_en_rx_alloc *frags, |
78 | gfp_t gfp) |
79 | { |
80 | int i; |
81 | |
82 | for (i = 0; i < priv->num_frags; i++, frags++) { |
83 | if (!frags->page) { |
84 | if (mlx4_alloc_page(priv, frag: frags, gfp)) |
85 | return -ENOMEM; |
86 | ring->rx_alloc_pages++; |
87 | } |
88 | rx_desc->data[i].addr = cpu_to_be64(frags->dma + |
89 | frags->page_offset); |
90 | } |
91 | return 0; |
92 | } |
93 | |
94 | static void mlx4_en_free_frag(const struct mlx4_en_priv *priv, |
95 | struct mlx4_en_rx_alloc *frag) |
96 | { |
97 | if (frag->page) { |
98 | dma_unmap_page(priv->ddev, frag->dma, |
99 | PAGE_SIZE, priv->dma_dir); |
100 | __free_page(frag->page); |
101 | } |
102 | /* We need to clear all fields, otherwise a change of priv->log_rx_info |
103 | * could lead to see garbage later in frag->page. |
104 | */ |
105 | memset(frag, 0, sizeof(*frag)); |
106 | } |
107 | |
108 | static void mlx4_en_init_rx_desc(const struct mlx4_en_priv *priv, |
109 | struct mlx4_en_rx_ring *ring, int index) |
110 | { |
111 | struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index; |
112 | int possible_frags; |
113 | int i; |
114 | |
115 | /* Set size and memtype fields */ |
116 | for (i = 0; i < priv->num_frags; i++) { |
117 | rx_desc->data[i].byte_count = |
118 | cpu_to_be32(priv->frag_info[i].frag_size); |
119 | rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key); |
120 | } |
121 | |
122 | /* If the number of used fragments does not fill up the ring stride, |
123 | * remaining (unused) fragments must be padded with null address/size |
124 | * and a special memory key */ |
125 | possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE; |
126 | for (i = priv->num_frags; i < possible_frags; i++) { |
127 | rx_desc->data[i].byte_count = 0; |
128 | rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); |
129 | rx_desc->data[i].addr = 0; |
130 | } |
131 | } |
132 | |
133 | static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, |
134 | struct mlx4_en_rx_ring *ring, int index, |
135 | gfp_t gfp) |
136 | { |
137 | struct mlx4_en_rx_desc *rx_desc = ring->buf + |
138 | (index << ring->log_stride); |
139 | struct mlx4_en_rx_alloc *frags = ring->rx_info + |
140 | (index << priv->log_rx_info); |
141 | if (likely(ring->page_cache.index > 0)) { |
142 | /* XDP uses a single page per frame */ |
143 | if (!frags->page) { |
144 | ring->page_cache.index--; |
145 | frags->page = ring->page_cache.buf[ring->page_cache.index].page; |
146 | frags->dma = ring->page_cache.buf[ring->page_cache.index].dma; |
147 | } |
148 | frags->page_offset = XDP_PACKET_HEADROOM; |
149 | rx_desc->data[0].addr = cpu_to_be64(frags->dma + |
150 | XDP_PACKET_HEADROOM); |
151 | return 0; |
152 | } |
153 | |
154 | return mlx4_en_alloc_frags(priv, ring, rx_desc, frags, gfp); |
155 | } |
156 | |
157 | static bool mlx4_en_is_ring_empty(const struct mlx4_en_rx_ring *ring) |
158 | { |
159 | return ring->prod == ring->cons; |
160 | } |
161 | |
162 | static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) |
163 | { |
164 | *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); |
165 | } |
166 | |
167 | /* slow path */ |
168 | static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv, |
169 | struct mlx4_en_rx_ring *ring, |
170 | int index) |
171 | { |
172 | struct mlx4_en_rx_alloc *frags; |
173 | int nr; |
174 | |
175 | frags = ring->rx_info + (index << priv->log_rx_info); |
176 | for (nr = 0; nr < priv->num_frags; nr++) { |
177 | en_dbg(DRV, priv, "Freeing fragment:%d\n" , nr); |
178 | mlx4_en_free_frag(priv, frag: frags + nr); |
179 | } |
180 | } |
181 | |
182 | /* Function not in fast-path */ |
183 | static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) |
184 | { |
185 | struct mlx4_en_rx_ring *ring; |
186 | int ring_ind; |
187 | int buf_ind; |
188 | int new_size; |
189 | |
190 | for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) { |
191 | for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { |
192 | ring = priv->rx_ring[ring_ind]; |
193 | |
194 | if (mlx4_en_prepare_rx_desc(priv, ring, |
195 | index: ring->actual_size, |
196 | GFP_KERNEL)) { |
197 | if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) { |
198 | en_err(priv, "Failed to allocate enough rx buffers\n" ); |
199 | return -ENOMEM; |
200 | } else { |
201 | new_size = rounddown_pow_of_two(ring->actual_size); |
202 | en_warn(priv, "Only %d buffers allocated reducing ring size to %d\n" , |
203 | ring->actual_size, new_size); |
204 | goto reduce_rings; |
205 | } |
206 | } |
207 | ring->actual_size++; |
208 | ring->prod++; |
209 | } |
210 | } |
211 | return 0; |
212 | |
213 | reduce_rings: |
214 | for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { |
215 | ring = priv->rx_ring[ring_ind]; |
216 | while (ring->actual_size > new_size) { |
217 | ring->actual_size--; |
218 | ring->prod--; |
219 | mlx4_en_free_rx_desc(priv, ring, index: ring->actual_size); |
220 | } |
221 | } |
222 | |
223 | return 0; |
224 | } |
225 | |
226 | static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv, |
227 | struct mlx4_en_rx_ring *ring) |
228 | { |
229 | int index; |
230 | |
231 | en_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n" , |
232 | ring->cons, ring->prod); |
233 | |
234 | /* Unmap and free Rx buffers */ |
235 | for (index = 0; index < ring->size; index++) { |
236 | en_dbg(DRV, priv, "Processing descriptor:%d\n" , index); |
237 | mlx4_en_free_rx_desc(priv, ring, index); |
238 | } |
239 | ring->cons = 0; |
240 | ring->prod = 0; |
241 | } |
242 | |
243 | void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev) |
244 | { |
245 | int i; |
246 | int num_of_eqs; |
247 | int num_rx_rings; |
248 | struct mlx4_dev *dev = mdev->dev; |
249 | |
250 | mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { |
251 | num_of_eqs = max_t(int, MIN_RX_RINGS, |
252 | min_t(int, |
253 | mlx4_get_eqs_per_port(mdev->dev, i), |
254 | DEF_RX_RINGS)); |
255 | |
256 | num_rx_rings = mlx4_low_memory_profile() ? MIN_RX_RINGS : |
257 | min_t(int, num_of_eqs, num_online_cpus()); |
258 | mdev->profile.prof[i].rx_ring_num = |
259 | rounddown_pow_of_two(num_rx_rings); |
260 | } |
261 | } |
262 | |
263 | int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, |
264 | struct mlx4_en_rx_ring **pring, |
265 | u32 size, u16 stride, int node, int queue_index) |
266 | { |
267 | struct mlx4_en_dev *mdev = priv->mdev; |
268 | struct mlx4_en_rx_ring *ring; |
269 | int err = -ENOMEM; |
270 | int tmp; |
271 | |
272 | ring = kzalloc_node(size: sizeof(*ring), GFP_KERNEL, node); |
273 | if (!ring) { |
274 | en_err(priv, "Failed to allocate RX ring structure\n" ); |
275 | return -ENOMEM; |
276 | } |
277 | |
278 | ring->prod = 0; |
279 | ring->cons = 0; |
280 | ring->size = size; |
281 | ring->size_mask = size - 1; |
282 | ring->stride = stride; |
283 | ring->log_stride = ffs(ring->stride) - 1; |
284 | ring->buf_size = ring->size * ring->stride + TXBB_SIZE; |
285 | |
286 | if (xdp_rxq_info_reg(xdp_rxq: &ring->xdp_rxq, dev: priv->dev, queue_index, napi_id: 0) < 0) |
287 | goto err_ring; |
288 | |
289 | tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * |
290 | sizeof(struct mlx4_en_rx_alloc)); |
291 | ring->rx_info = kvzalloc_node(size: tmp, GFP_KERNEL, node); |
292 | if (!ring->rx_info) { |
293 | err = -ENOMEM; |
294 | goto err_xdp_info; |
295 | } |
296 | |
297 | en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n" , |
298 | ring->rx_info, tmp); |
299 | |
300 | /* Allocate HW buffers on provided NUMA node */ |
301 | set_dev_node(dev: &mdev->dev->persist->pdev->dev, node); |
302 | err = mlx4_alloc_hwq_res(dev: mdev->dev, wqres: &ring->wqres, size: ring->buf_size); |
303 | set_dev_node(dev: &mdev->dev->persist->pdev->dev, node: mdev->dev->numa_node); |
304 | if (err) |
305 | goto err_info; |
306 | |
307 | ring->buf = ring->wqres.buf.direct.buf; |
308 | |
309 | ring->hwtstamp_rx_filter = priv->hwtstamp_config.rx_filter; |
310 | |
311 | *pring = ring; |
312 | return 0; |
313 | |
314 | err_info: |
315 | kvfree(addr: ring->rx_info); |
316 | ring->rx_info = NULL; |
317 | err_xdp_info: |
318 | xdp_rxq_info_unreg(xdp_rxq: &ring->xdp_rxq); |
319 | err_ring: |
320 | kfree(objp: ring); |
321 | *pring = NULL; |
322 | |
323 | return err; |
324 | } |
325 | |
326 | int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) |
327 | { |
328 | struct mlx4_en_rx_ring *ring; |
329 | int i; |
330 | int ring_ind; |
331 | int err; |
332 | int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + |
333 | DS_SIZE * priv->num_frags); |
334 | |
335 | for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { |
336 | ring = priv->rx_ring[ring_ind]; |
337 | |
338 | ring->prod = 0; |
339 | ring->cons = 0; |
340 | ring->actual_size = 0; |
341 | ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn; |
342 | |
343 | ring->stride = stride; |
344 | if (ring->stride <= TXBB_SIZE) { |
345 | /* Stamp first unused send wqe */ |
346 | __be32 *ptr = (__be32 *)ring->buf; |
347 | __be32 stamp = cpu_to_be32(1 << STAMP_SHIFT); |
348 | *ptr = stamp; |
349 | /* Move pointer to start of rx section */ |
350 | ring->buf += TXBB_SIZE; |
351 | } |
352 | |
353 | ring->log_stride = ffs(ring->stride) - 1; |
354 | ring->buf_size = ring->size * ring->stride; |
355 | |
356 | memset(ring->buf, 0, ring->buf_size); |
357 | mlx4_en_update_rx_prod_db(ring); |
358 | |
359 | /* Initialize all descriptors */ |
360 | for (i = 0; i < ring->size; i++) |
361 | mlx4_en_init_rx_desc(priv, ring, index: i); |
362 | } |
363 | err = mlx4_en_fill_rx_buffers(priv); |
364 | if (err) |
365 | goto err_buffers; |
366 | |
367 | for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { |
368 | ring = priv->rx_ring[ring_ind]; |
369 | |
370 | ring->size_mask = ring->actual_size - 1; |
371 | mlx4_en_update_rx_prod_db(ring); |
372 | } |
373 | |
374 | return 0; |
375 | |
376 | err_buffers: |
377 | for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) |
378 | mlx4_en_free_rx_buf(priv, ring: priv->rx_ring[ring_ind]); |
379 | |
380 | ring_ind = priv->rx_ring_num - 1; |
381 | while (ring_ind >= 0) { |
382 | if (priv->rx_ring[ring_ind]->stride <= TXBB_SIZE) |
383 | priv->rx_ring[ring_ind]->buf -= TXBB_SIZE; |
384 | ring_ind--; |
385 | } |
386 | return err; |
387 | } |
388 | |
389 | /* We recover from out of memory by scheduling our napi poll |
390 | * function (mlx4_en_process_cq), which tries to allocate |
391 | * all missing RX buffers (call to mlx4_en_refill_rx_buffers). |
392 | */ |
393 | void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv) |
394 | { |
395 | int ring; |
396 | |
397 | if (!priv->port_up) |
398 | return; |
399 | |
400 | for (ring = 0; ring < priv->rx_ring_num; ring++) { |
401 | if (mlx4_en_is_ring_empty(ring: priv->rx_ring[ring])) { |
402 | local_bh_disable(); |
403 | napi_schedule(n: &priv->rx_cq[ring]->napi); |
404 | local_bh_enable(); |
405 | } |
406 | } |
407 | } |
408 | |
409 | /* When the rx ring is running in page-per-packet mode, a released frame can go |
410 | * directly into a small cache, to avoid unmapping or touching the page |
411 | * allocator. In bpf prog performance scenarios, buffers are either forwarded |
412 | * or dropped, never converted to skbs, so every page can come directly from |
413 | * this cache when it is sized to be a multiple of the napi budget. |
414 | */ |
415 | bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, |
416 | struct mlx4_en_rx_alloc *frame) |
417 | { |
418 | struct mlx4_en_page_cache *cache = &ring->page_cache; |
419 | |
420 | if (cache->index >= MLX4_EN_CACHE_SIZE) |
421 | return false; |
422 | |
423 | cache->buf[cache->index].page = frame->page; |
424 | cache->buf[cache->index].dma = frame->dma; |
425 | cache->index++; |
426 | return true; |
427 | } |
428 | |
429 | void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, |
430 | struct mlx4_en_rx_ring **pring, |
431 | u32 size, u16 stride) |
432 | { |
433 | struct mlx4_en_dev *mdev = priv->mdev; |
434 | struct mlx4_en_rx_ring *ring = *pring; |
435 | struct bpf_prog *old_prog; |
436 | |
437 | old_prog = rcu_dereference_protected( |
438 | ring->xdp_prog, |
439 | lockdep_is_held(&mdev->state_lock)); |
440 | if (old_prog) |
441 | bpf_prog_put(prog: old_prog); |
442 | xdp_rxq_info_unreg(xdp_rxq: &ring->xdp_rxq); |
443 | mlx4_free_hwq_res(mdev: mdev->dev, wqres: &ring->wqres, size: size * stride + TXBB_SIZE); |
444 | kvfree(addr: ring->rx_info); |
445 | ring->rx_info = NULL; |
446 | kfree(objp: ring); |
447 | *pring = NULL; |
448 | } |
449 | |
450 | void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, |
451 | struct mlx4_en_rx_ring *ring) |
452 | { |
453 | int i; |
454 | |
455 | for (i = 0; i < ring->page_cache.index; i++) { |
456 | dma_unmap_page(priv->ddev, ring->page_cache.buf[i].dma, |
457 | PAGE_SIZE, priv->dma_dir); |
458 | put_page(page: ring->page_cache.buf[i].page); |
459 | } |
460 | ring->page_cache.index = 0; |
461 | mlx4_en_free_rx_buf(priv, ring); |
462 | if (ring->stride <= TXBB_SIZE) |
463 | ring->buf -= TXBB_SIZE; |
464 | } |
465 | |
466 | |
467 | static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, |
468 | struct mlx4_en_rx_alloc *frags, |
469 | struct sk_buff *skb, |
470 | int length) |
471 | { |
472 | const struct mlx4_en_frag_info *frag_info = priv->frag_info; |
473 | unsigned int truesize = 0; |
474 | bool release = true; |
475 | int nr, frag_size; |
476 | struct page *page; |
477 | dma_addr_t dma; |
478 | |
479 | /* Collect used fragments while replacing them in the HW descriptors */ |
480 | for (nr = 0;; frags++) { |
481 | frag_size = min_t(int, length, frag_info->frag_size); |
482 | |
483 | page = frags->page; |
484 | if (unlikely(!page)) |
485 | goto fail; |
486 | |
487 | dma = frags->dma; |
488 | dma_sync_single_range_for_cpu(dev: priv->ddev, addr: dma, offset: frags->page_offset, |
489 | size: frag_size, dir: priv->dma_dir); |
490 | |
491 | __skb_fill_page_desc(skb, i: nr, page, off: frags->page_offset, |
492 | size: frag_size); |
493 | |
494 | truesize += frag_info->frag_stride; |
495 | if (frag_info->frag_stride == PAGE_SIZE / 2) { |
496 | frags->page_offset ^= PAGE_SIZE / 2; |
497 | release = page_count(page) != 1 || |
498 | page_is_pfmemalloc(page) || |
499 | page_to_nid(page) != numa_mem_id(); |
500 | } else if (!priv->rx_headroom) { |
501 | /* rx_headroom for non XDP setup is always 0. |
502 | * When XDP is set, the above condition will |
503 | * guarantee page is always released. |
504 | */ |
505 | u32 sz_align = ALIGN(frag_size, SMP_CACHE_BYTES); |
506 | |
507 | frags->page_offset += sz_align; |
508 | release = frags->page_offset + frag_info->frag_size > PAGE_SIZE; |
509 | } |
510 | if (release) { |
511 | dma_unmap_page(priv->ddev, dma, PAGE_SIZE, priv->dma_dir); |
512 | frags->page = NULL; |
513 | } else { |
514 | page_ref_inc(page); |
515 | } |
516 | |
517 | nr++; |
518 | length -= frag_size; |
519 | if (!length) |
520 | break; |
521 | frag_info++; |
522 | } |
523 | skb->truesize += truesize; |
524 | return nr; |
525 | |
526 | fail: |
527 | while (nr > 0) { |
528 | nr--; |
529 | __skb_frag_unref(skb_shinfo(skb)->frags + nr, recycle: false); |
530 | } |
531 | return 0; |
532 | } |
533 | |
534 | static void validate_loopback(struct mlx4_en_priv *priv, void *va) |
535 | { |
536 | const unsigned char *data = va + ETH_HLEN; |
537 | int i; |
538 | |
539 | for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++) { |
540 | if (data[i] != (unsigned char)i) |
541 | return; |
542 | } |
543 | /* Loopback found */ |
544 | priv->loopback_ok = 1; |
545 | } |
546 | |
547 | static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, |
548 | struct mlx4_en_rx_ring *ring) |
549 | { |
550 | u32 missing = ring->actual_size - (ring->prod - ring->cons); |
551 | |
552 | /* Try to batch allocations, but not too much. */ |
553 | if (missing < 8) |
554 | return; |
555 | do { |
556 | if (mlx4_en_prepare_rx_desc(priv, ring, |
557 | index: ring->prod & ring->size_mask, |
558 | GFP_ATOMIC | __GFP_MEMALLOC)) |
559 | break; |
560 | ring->prod++; |
561 | } while (likely(--missing)); |
562 | |
563 | mlx4_en_update_rx_prod_db(ring); |
564 | } |
565 | |
566 | /* When hardware doesn't strip the vlan, we need to calculate the checksum |
567 | * over it and add it to the hardware's checksum calculation |
568 | */ |
569 | static inline __wsum get_fixed_vlan_csum(__wsum hw_checksum, |
570 | struct vlan_hdr *vlanh) |
571 | { |
572 | return csum_add(csum: hw_checksum, addend: *(__wsum *)vlanh); |
573 | } |
574 | |
575 | /* Although the stack expects checksum which doesn't include the pseudo |
576 | * header, the HW adds it. To address that, we are subtracting the pseudo |
577 | * header checksum from the checksum value provided by the HW. |
578 | */ |
579 | static int get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb, |
580 | struct iphdr *iph) |
581 | { |
582 | __u16 length_for_csum = 0; |
583 | __wsum = 0; |
584 | __u8 ipproto = iph->protocol; |
585 | |
586 | if (unlikely(ipproto == IPPROTO_SCTP)) |
587 | return -1; |
588 | |
589 | length_for_csum = (be16_to_cpu(iph->tot_len) - (iph->ihl << 2)); |
590 | csum_pseudo_header = csum_tcpudp_nofold(saddr: iph->saddr, daddr: iph->daddr, |
591 | len: length_for_csum, proto: ipproto, sum: 0); |
592 | skb->csum = csum_sub(csum: hw_checksum, addend: csum_pseudo_header); |
593 | return 0; |
594 | } |
595 | |
596 | #if IS_ENABLED(CONFIG_IPV6) |
597 | /* In IPv6 packets, hw_checksum lacks 6 bytes from IPv6 header: |
598 | * 4 first bytes : priority, version, flow_lbl |
599 | * and 2 additional bytes : nexthdr, hop_limit. |
600 | */ |
601 | static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, |
602 | struct ipv6hdr *ipv6h) |
603 | { |
604 | __u8 nexthdr = ipv6h->nexthdr; |
605 | __wsum temp; |
606 | |
607 | if (unlikely(nexthdr == IPPROTO_FRAGMENT || |
608 | nexthdr == IPPROTO_HOPOPTS || |
609 | nexthdr == IPPROTO_SCTP)) |
610 | return -1; |
611 | |
612 | /* priority, version, flow_lbl */ |
613 | temp = csum_add(csum: hw_checksum, addend: *(__wsum *)ipv6h); |
614 | /* nexthdr and hop_limit */ |
615 | skb->csum = csum_add(csum: temp, addend: (__force __wsum)*(__be16 *)&ipv6h->nexthdr); |
616 | return 0; |
617 | } |
618 | #endif |
619 | |
620 | #define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) |
621 | |
622 | /* We reach this function only after checking that any of |
623 | * the (IPv4 | IPv6) bits are set in cqe->status. |
624 | */ |
625 | static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, |
626 | netdev_features_t dev_features) |
627 | { |
628 | __wsum hw_checksum = 0; |
629 | void *hdr; |
630 | |
631 | /* CQE csum doesn't cover padding octets in short ethernet |
632 | * frames. And the pad field is appended prior to calculating |
633 | * and appending the FCS field. |
634 | * |
635 | * Detecting these padded frames requires to verify and parse |
636 | * IP headers, so we simply force all those small frames to skip |
637 | * checksum complete. |
638 | */ |
639 | if (short_frame(skb->len)) |
640 | return -EINVAL; |
641 | |
642 | hdr = (u8 *)va + sizeof(struct ethhdr); |
643 | hw_checksum = csum_unfold(n: (__force __sum16)cqe->checksum); |
644 | |
645 | if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK) && |
646 | !(dev_features & NETIF_F_HW_VLAN_CTAG_RX)) { |
647 | hw_checksum = get_fixed_vlan_csum(hw_checksum, vlanh: hdr); |
648 | hdr += sizeof(struct vlan_hdr); |
649 | } |
650 | |
651 | #if IS_ENABLED(CONFIG_IPV6) |
652 | if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) |
653 | return get_fixed_ipv6_csum(hw_checksum, skb, ipv6h: hdr); |
654 | #endif |
655 | return get_fixed_ipv4_csum(hw_checksum, skb, iph: hdr); |
656 | } |
657 | |
658 | #if IS_ENABLED(CONFIG_IPV6) |
659 | #define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV6) |
660 | #else |
661 | #define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4) |
662 | #endif |
663 | |
664 | struct mlx4_en_xdp_buff { |
665 | struct xdp_buff xdp; |
666 | struct mlx4_cqe *cqe; |
667 | struct mlx4_en_dev *mdev; |
668 | struct mlx4_en_rx_ring *ring; |
669 | struct net_device *dev; |
670 | }; |
671 | |
672 | int mlx4_en_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) |
673 | { |
674 | struct mlx4_en_xdp_buff *_ctx = (void *)ctx; |
675 | |
676 | if (unlikely(_ctx->ring->hwtstamp_rx_filter != HWTSTAMP_FILTER_ALL)) |
677 | return -ENODATA; |
678 | |
679 | *timestamp = mlx4_en_get_hwtstamp(mdev: _ctx->mdev, |
680 | timestamp: mlx4_en_get_cqe_ts(cqe: _ctx->cqe)); |
681 | return 0; |
682 | } |
683 | |
684 | int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, |
685 | enum xdp_rss_hash_type *) |
686 | { |
687 | struct mlx4_en_xdp_buff *_ctx = (void *)ctx; |
688 | struct mlx4_cqe *cqe = _ctx->cqe; |
689 | enum xdp_rss_hash_type xht = 0; |
690 | __be16 status; |
691 | |
692 | if (unlikely(!(_ctx->dev->features & NETIF_F_RXHASH))) |
693 | return -ENODATA; |
694 | |
695 | *hash = be32_to_cpu(cqe->immed_rss_invalid); |
696 | status = cqe->status; |
697 | if (status & cpu_to_be16(MLX4_CQE_STATUS_TCP)) |
698 | xht = XDP_RSS_L4_TCP; |
699 | if (status & cpu_to_be16(MLX4_CQE_STATUS_UDP)) |
700 | xht = XDP_RSS_L4_UDP; |
701 | if (status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV4F)) |
702 | xht |= XDP_RSS_L3_IPV4; |
703 | if (status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) { |
704 | xht |= XDP_RSS_L3_IPV6; |
705 | if (cqe->ipv6_ext_mask) |
706 | xht |= XDP_RSS_L3_DYNHDR; |
707 | } |
708 | *rss_type = xht; |
709 | |
710 | return 0; |
711 | } |
712 | |
713 | int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) |
714 | { |
715 | struct mlx4_en_priv *priv = netdev_priv(dev); |
716 | struct mlx4_en_xdp_buff mxbuf = {}; |
717 | int factor = priv->cqe_factor; |
718 | struct mlx4_en_rx_ring *ring; |
719 | struct bpf_prog *xdp_prog; |
720 | int cq_ring = cq->ring; |
721 | bool doorbell_pending; |
722 | bool xdp_redir_flush; |
723 | struct mlx4_cqe *cqe; |
724 | int polled = 0; |
725 | int index; |
726 | |
727 | if (unlikely(!priv->port_up || budget <= 0)) |
728 | return 0; |
729 | |
730 | ring = priv->rx_ring[cq_ring]; |
731 | |
732 | xdp_prog = rcu_dereference_bh(ring->xdp_prog); |
733 | xdp_init_buff(xdp: &mxbuf.xdp, frame_sz: priv->frag_info[0].frag_stride, rxq: &ring->xdp_rxq); |
734 | doorbell_pending = false; |
735 | xdp_redir_flush = false; |
736 | |
737 | /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx |
738 | * descriptor offset can be deduced from the CQE index instead of |
739 | * reading 'cqe->index' */ |
740 | index = cq->mcq.cons_index & ring->size_mask; |
741 | cqe = mlx4_en_get_cqe(buf: cq->buf, idx: index, cqe_sz: priv->cqe_size) + factor; |
742 | |
743 | /* Process all completed CQEs */ |
744 | while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, |
745 | cq->mcq.cons_index & cq->size)) { |
746 | struct mlx4_en_rx_alloc *frags; |
747 | enum pkt_hash_types hash_type; |
748 | struct sk_buff *skb; |
749 | unsigned int length; |
750 | int ip_summed; |
751 | void *va; |
752 | int nr; |
753 | |
754 | frags = ring->rx_info + (index << priv->log_rx_info); |
755 | va = page_address(frags[0].page) + frags[0].page_offset; |
756 | net_prefetchw(p: va); |
757 | /* |
758 | * make sure we read the CQE after we read the ownership bit |
759 | */ |
760 | dma_rmb(); |
761 | |
762 | /* Drop packet on bad receive or bad checksum */ |
763 | if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == |
764 | MLX4_CQE_OPCODE_ERROR)) { |
765 | en_err(priv, "CQE completed in error - vendor syndrome:%d syndrome:%d\n" , |
766 | ((struct mlx4_err_cqe *)cqe)->vendor_err_syndrome, |
767 | ((struct mlx4_err_cqe *)cqe)->syndrome); |
768 | goto next; |
769 | } |
770 | if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) { |
771 | en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n" ); |
772 | goto next; |
773 | } |
774 | |
775 | /* Check if we need to drop the packet if SRIOV is not enabled |
776 | * and not performing the selftest or flb disabled |
777 | */ |
778 | if (priv->flags & MLX4_EN_FLAG_RX_FILTER_NEEDED) { |
779 | const struct ethhdr *ethh = va; |
780 | dma_addr_t dma; |
781 | /* Get pointer to first fragment since we haven't |
782 | * skb yet and cast it to ethhdr struct |
783 | */ |
784 | dma = frags[0].dma + frags[0].page_offset; |
785 | dma_sync_single_for_cpu(dev: priv->ddev, addr: dma, size: sizeof(*ethh), |
786 | dir: DMA_FROM_DEVICE); |
787 | |
788 | if (is_multicast_ether_addr(addr: ethh->h_dest)) { |
789 | struct mlx4_mac_entry *entry; |
790 | struct hlist_head *bucket; |
791 | unsigned int mac_hash; |
792 | |
793 | /* Drop the packet, since HW loopback-ed it */ |
794 | mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX]; |
795 | bucket = &priv->mac_hash[mac_hash]; |
796 | hlist_for_each_entry_rcu_bh(entry, bucket, hlist) { |
797 | if (ether_addr_equal_64bits(addr1: entry->mac, |
798 | addr2: ethh->h_source)) |
799 | goto next; |
800 | } |
801 | } |
802 | } |
803 | |
804 | if (unlikely(priv->validate_loopback)) { |
805 | validate_loopback(priv, va); |
806 | goto next; |
807 | } |
808 | |
809 | /* |
810 | * Packet is OK - process it. |
811 | */ |
812 | length = be32_to_cpu(cqe->byte_cnt); |
813 | length -= ring->fcs_del; |
814 | |
815 | /* A bpf program gets first chance to drop the packet. It may |
816 | * read bytes but not past the end of the frag. |
817 | */ |
818 | if (xdp_prog) { |
819 | dma_addr_t dma; |
820 | void *orig_data; |
821 | u32 act; |
822 | |
823 | dma = frags[0].dma + frags[0].page_offset; |
824 | dma_sync_single_for_cpu(dev: priv->ddev, addr: dma, |
825 | size: priv->frag_info[0].frag_size, |
826 | dir: DMA_FROM_DEVICE); |
827 | |
828 | xdp_prepare_buff(xdp: &mxbuf.xdp, hard_start: va - frags[0].page_offset, |
829 | headroom: frags[0].page_offset, data_len: length, meta_valid: true); |
830 | orig_data = mxbuf.xdp.data; |
831 | mxbuf.cqe = cqe; |
832 | mxbuf.mdev = priv->mdev; |
833 | mxbuf.ring = ring; |
834 | mxbuf.dev = dev; |
835 | |
836 | act = bpf_prog_run_xdp(prog: xdp_prog, xdp: &mxbuf.xdp); |
837 | |
838 | length = mxbuf.xdp.data_end - mxbuf.xdp.data; |
839 | if (mxbuf.xdp.data != orig_data) { |
840 | frags[0].page_offset = mxbuf.xdp.data - |
841 | mxbuf.xdp.data_hard_start; |
842 | va = mxbuf.xdp.data; |
843 | } |
844 | |
845 | switch (act) { |
846 | case XDP_PASS: |
847 | break; |
848 | case XDP_REDIRECT: |
849 | if (likely(!xdp_do_redirect(dev, &mxbuf.xdp, xdp_prog))) { |
850 | ring->xdp_redirect++; |
851 | xdp_redir_flush = true; |
852 | frags[0].page = NULL; |
853 | goto next; |
854 | } |
855 | ring->xdp_redirect_fail++; |
856 | trace_xdp_exception(dev, xdp: xdp_prog, act); |
857 | goto xdp_drop_no_cnt; |
858 | case XDP_TX: |
859 | if (likely(!mlx4_en_xmit_frame(ring, frags, priv, |
860 | length, cq_ring, |
861 | &doorbell_pending))) { |
862 | frags[0].page = NULL; |
863 | goto next; |
864 | } |
865 | trace_xdp_exception(dev, xdp: xdp_prog, act); |
866 | goto xdp_drop_no_cnt; /* Drop on xmit failure */ |
867 | default: |
868 | bpf_warn_invalid_xdp_action(dev, prog: xdp_prog, act); |
869 | fallthrough; |
870 | case XDP_ABORTED: |
871 | trace_xdp_exception(dev, xdp: xdp_prog, act); |
872 | fallthrough; |
873 | case XDP_DROP: |
874 | ring->xdp_drop++; |
875 | xdp_drop_no_cnt: |
876 | goto next; |
877 | } |
878 | } |
879 | |
880 | ring->bytes += length; |
881 | ring->packets++; |
882 | |
883 | skb = napi_get_frags(napi: &cq->napi); |
884 | if (unlikely(!skb)) |
885 | goto next; |
886 | |
887 | if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) { |
888 | u64 timestamp = mlx4_en_get_cqe_ts(cqe); |
889 | |
890 | mlx4_en_fill_hwtstamps(mdev: priv->mdev, hwts: skb_hwtstamps(skb), |
891 | timestamp); |
892 | } |
893 | skb_record_rx_queue(skb, rx_queue: cq_ring); |
894 | |
895 | if (likely(dev->features & NETIF_F_RXCSUM)) { |
896 | /* TODO: For IP non TCP/UDP packets when csum complete is |
897 | * not an option (not supported or any other reason) we can |
898 | * actually check cqe IPOK status bit and report |
899 | * CHECKSUM_UNNECESSARY rather than CHECKSUM_NONE |
900 | */ |
901 | if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | |
902 | MLX4_CQE_STATUS_UDP)) && |
903 | (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && |
904 | cqe->checksum == cpu_to_be16(0xffff)) { |
905 | bool l2_tunnel; |
906 | |
907 | l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && |
908 | (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); |
909 | ip_summed = CHECKSUM_UNNECESSARY; |
910 | hash_type = PKT_HASH_TYPE_L4; |
911 | if (l2_tunnel) |
912 | skb->csum_level = 1; |
913 | ring->csum_ok++; |
914 | } else { |
915 | if (!(priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP && |
916 | (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IP_ANY)))) |
917 | goto csum_none; |
918 | if (check_csum(cqe, skb, va, dev_features: dev->features)) |
919 | goto csum_none; |
920 | ip_summed = CHECKSUM_COMPLETE; |
921 | hash_type = PKT_HASH_TYPE_L3; |
922 | ring->csum_complete++; |
923 | } |
924 | } else { |
925 | csum_none: |
926 | ip_summed = CHECKSUM_NONE; |
927 | hash_type = PKT_HASH_TYPE_L3; |
928 | ring->csum_none++; |
929 | } |
930 | skb->ip_summed = ip_summed; |
931 | if (dev->features & NETIF_F_RXHASH) |
932 | skb_set_hash(skb, |
933 | be32_to_cpu(cqe->immed_rss_invalid), |
934 | type: hash_type); |
935 | |
936 | if ((cqe->vlan_my_qpn & |
937 | cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) && |
938 | (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) |
939 | __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), |
940 | be16_to_cpu(cqe->sl_vid)); |
941 | else if ((cqe->vlan_my_qpn & |
942 | cpu_to_be32(MLX4_CQE_SVLAN_PRESENT_MASK)) && |
943 | (dev->features & NETIF_F_HW_VLAN_STAG_RX)) |
944 | __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD), |
945 | be16_to_cpu(cqe->sl_vid)); |
946 | |
947 | nr = mlx4_en_complete_rx_desc(priv, frags, skb, length); |
948 | if (likely(nr)) { |
949 | skb_shinfo(skb)->nr_frags = nr; |
950 | skb->len = length; |
951 | skb->data_len = length; |
952 | napi_gro_frags(napi: &cq->napi); |
953 | } else { |
954 | __vlan_hwaccel_clear_tag(skb); |
955 | skb_clear_hash(skb); |
956 | } |
957 | next: |
958 | ++cq->mcq.cons_index; |
959 | index = (cq->mcq.cons_index) & ring->size_mask; |
960 | cqe = mlx4_en_get_cqe(buf: cq->buf, idx: index, cqe_sz: priv->cqe_size) + factor; |
961 | if (unlikely(++polled == budget)) |
962 | break; |
963 | } |
964 | |
965 | if (xdp_redir_flush) |
966 | xdp_do_flush(); |
967 | |
968 | if (likely(polled)) { |
969 | if (doorbell_pending) { |
970 | priv->tx_cq[TX_XDP][cq_ring]->xdp_busy = true; |
971 | mlx4_en_xmit_doorbell(ring: priv->tx_ring[TX_XDP][cq_ring]); |
972 | } |
973 | |
974 | mlx4_cq_set_ci(cq: &cq->mcq); |
975 | wmb(); /* ensure HW sees CQ consumer before we post new buffers */ |
976 | ring->cons = cq->mcq.cons_index; |
977 | } |
978 | |
979 | mlx4_en_refill_rx_buffers(priv, ring); |
980 | |
981 | return polled; |
982 | } |
983 | |
984 | |
985 | void mlx4_en_rx_irq(struct mlx4_cq *mcq) |
986 | { |
987 | struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); |
988 | struct mlx4_en_priv *priv = netdev_priv(dev: cq->dev); |
989 | |
990 | if (likely(priv->port_up)) |
991 | napi_schedule_irqoff(n: &cq->napi); |
992 | else |
993 | mlx4_en_arm_cq(priv, cq); |
994 | } |
995 | |
996 | /* Rx CQ polling - called by NAPI */ |
997 | int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) |
998 | { |
999 | struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); |
1000 | struct net_device *dev = cq->dev; |
1001 | struct mlx4_en_priv *priv = netdev_priv(dev); |
1002 | struct mlx4_en_cq *xdp_tx_cq = NULL; |
1003 | bool clean_complete = true; |
1004 | int done; |
1005 | |
1006 | if (!budget) |
1007 | return 0; |
1008 | |
1009 | if (priv->tx_ring_num[TX_XDP]) { |
1010 | xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring]; |
1011 | if (xdp_tx_cq->xdp_busy) { |
1012 | clean_complete = mlx4_en_process_tx_cq(dev, cq: xdp_tx_cq, |
1013 | napi_budget: budget) < budget; |
1014 | xdp_tx_cq->xdp_busy = !clean_complete; |
1015 | } |
1016 | } |
1017 | |
1018 | done = mlx4_en_process_rx_cq(dev, cq, budget); |
1019 | |
1020 | /* If we used up all the quota - we're probably not done yet... */ |
1021 | if (done == budget || !clean_complete) { |
1022 | int cpu_curr; |
1023 | |
1024 | /* in case we got here because of !clean_complete */ |
1025 | done = budget; |
1026 | |
1027 | cpu_curr = smp_processor_id(); |
1028 | |
1029 | if (likely(cpumask_test_cpu(cpu_curr, cq->aff_mask))) |
1030 | return budget; |
1031 | |
1032 | /* Current cpu is not according to smp_irq_affinity - |
1033 | * probably affinity changed. Need to stop this NAPI |
1034 | * poll, and restart it on the right CPU. |
1035 | * Try to avoid returning a too small value (like 0), |
1036 | * to not fool net_rx_action() and its netdev_budget |
1037 | */ |
1038 | if (done) |
1039 | done--; |
1040 | } |
1041 | /* Done for now */ |
1042 | if (likely(napi_complete_done(napi, done))) |
1043 | mlx4_en_arm_cq(priv, cq); |
1044 | return done; |
1045 | } |
1046 | |
1047 | void mlx4_en_calc_rx_buf(struct net_device *dev) |
1048 | { |
1049 | struct mlx4_en_priv *priv = netdev_priv(dev); |
1050 | int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu); |
1051 | int i = 0; |
1052 | |
1053 | /* bpf requires buffers to be set up as 1 packet per page. |
1054 | * This only works when num_frags == 1. |
1055 | */ |
1056 | if (priv->tx_ring_num[TX_XDP]) { |
1057 | priv->frag_info[0].frag_size = eff_mtu; |
1058 | /* This will gain efficient xdp frame recycling at the |
1059 | * expense of more costly truesize accounting |
1060 | */ |
1061 | priv->frag_info[0].frag_stride = PAGE_SIZE; |
1062 | priv->dma_dir = DMA_BIDIRECTIONAL; |
1063 | priv->rx_headroom = XDP_PACKET_HEADROOM; |
1064 | i = 1; |
1065 | } else { |
1066 | int frag_size_max = 2048, buf_size = 0; |
1067 | |
1068 | /* should not happen, right ? */ |
1069 | if (eff_mtu > PAGE_SIZE + (MLX4_EN_MAX_RX_FRAGS - 1) * 2048) |
1070 | frag_size_max = PAGE_SIZE; |
1071 | |
1072 | while (buf_size < eff_mtu) { |
1073 | int frag_stride, frag_size = eff_mtu - buf_size; |
1074 | int pad, nb; |
1075 | |
1076 | if (i < MLX4_EN_MAX_RX_FRAGS - 1) |
1077 | frag_size = min(frag_size, frag_size_max); |
1078 | |
1079 | priv->frag_info[i].frag_size = frag_size; |
1080 | frag_stride = ALIGN(frag_size, SMP_CACHE_BYTES); |
1081 | /* We can only pack 2 1536-bytes frames in on 4K page |
1082 | * Therefore, each frame would consume more bytes (truesize) |
1083 | */ |
1084 | nb = PAGE_SIZE / frag_stride; |
1085 | pad = (PAGE_SIZE - nb * frag_stride) / nb; |
1086 | pad &= ~(SMP_CACHE_BYTES - 1); |
1087 | priv->frag_info[i].frag_stride = frag_stride + pad; |
1088 | |
1089 | buf_size += frag_size; |
1090 | i++; |
1091 | } |
1092 | priv->dma_dir = DMA_FROM_DEVICE; |
1093 | priv->rx_headroom = 0; |
1094 | } |
1095 | |
1096 | priv->num_frags = i; |
1097 | priv->rx_skb_size = eff_mtu; |
1098 | priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct mlx4_en_rx_alloc)); |
1099 | |
1100 | en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d num_frags:%d):\n" , |
1101 | eff_mtu, priv->num_frags); |
1102 | for (i = 0; i < priv->num_frags; i++) { |
1103 | en_dbg(DRV, |
1104 | priv, |
1105 | " frag:%d - size:%d stride:%d\n" , |
1106 | i, |
1107 | priv->frag_info[i].frag_size, |
1108 | priv->frag_info[i].frag_stride); |
1109 | } |
1110 | } |
1111 | |
1112 | /* RSS related functions */ |
1113 | |
1114 | static int (struct mlx4_en_priv *priv, int qpn, |
1115 | struct mlx4_en_rx_ring *ring, |
1116 | enum mlx4_qp_state *state, |
1117 | struct mlx4_qp *qp) |
1118 | { |
1119 | struct mlx4_en_dev *mdev = priv->mdev; |
1120 | struct mlx4_qp_context *context; |
1121 | int err = 0; |
1122 | |
1123 | context = kzalloc(size: sizeof(*context), GFP_KERNEL); |
1124 | if (!context) |
1125 | return -ENOMEM; |
1126 | |
1127 | err = mlx4_qp_alloc(dev: mdev->dev, qpn, qp); |
1128 | if (err) { |
1129 | en_err(priv, "Failed to allocate qp #%x\n" , qpn); |
1130 | goto out; |
1131 | } |
1132 | qp->event = mlx4_en_sqp_event; |
1133 | |
1134 | mlx4_en_fill_qp_context(priv, size: ring->actual_size, stride: ring->stride, is_tx: 0, rss: 0, |
1135 | qpn, cqn: ring->cqn, user_prio: -1, context); |
1136 | context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma); |
1137 | |
1138 | /* Cancel FCS removal if FW allows */ |
1139 | if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) { |
1140 | context->param3 |= cpu_to_be32(1 << 29); |
1141 | if (priv->dev->features & NETIF_F_RXFCS) |
1142 | ring->fcs_del = 0; |
1143 | else |
1144 | ring->fcs_del = ETH_FCS_LEN; |
1145 | } else |
1146 | ring->fcs_del = 0; |
1147 | |
1148 | err = mlx4_qp_to_ready(dev: mdev->dev, mtt: &ring->wqres.mtt, context, qp, qp_state: state); |
1149 | if (err) { |
1150 | mlx4_qp_remove(dev: mdev->dev, qp); |
1151 | mlx4_qp_free(dev: mdev->dev, qp); |
1152 | } |
1153 | mlx4_en_update_rx_prod_db(ring); |
1154 | out: |
1155 | kfree(objp: context); |
1156 | return err; |
1157 | } |
1158 | |
1159 | int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) |
1160 | { |
1161 | int err; |
1162 | u32 qpn; |
1163 | |
1164 | err = mlx4_qp_reserve_range(dev: priv->mdev->dev, cnt: 1, align: 1, base: &qpn, |
1165 | flags: MLX4_RESERVE_A0_QP, |
1166 | usage: MLX4_RES_USAGE_DRIVER); |
1167 | if (err) { |
1168 | en_err(priv, "Failed reserving drop qpn\n" ); |
1169 | return err; |
1170 | } |
1171 | err = mlx4_qp_alloc(dev: priv->mdev->dev, qpn, qp: &priv->drop_qp); |
1172 | if (err) { |
1173 | en_err(priv, "Failed allocating drop qp\n" ); |
1174 | mlx4_qp_release_range(dev: priv->mdev->dev, base_qpn: qpn, cnt: 1); |
1175 | return err; |
1176 | } |
1177 | |
1178 | return 0; |
1179 | } |
1180 | |
1181 | void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv) |
1182 | { |
1183 | u32 qpn; |
1184 | |
1185 | qpn = priv->drop_qp.qpn; |
1186 | mlx4_qp_remove(dev: priv->mdev->dev, qp: &priv->drop_qp); |
1187 | mlx4_qp_free(dev: priv->mdev->dev, qp: &priv->drop_qp); |
1188 | mlx4_qp_release_range(dev: priv->mdev->dev, base_qpn: qpn, cnt: 1); |
1189 | } |
1190 | |
1191 | /* Allocate rx qp's and configure them according to rss map */ |
1192 | int (struct mlx4_en_priv *priv) |
1193 | { |
1194 | struct mlx4_en_dev *mdev = priv->mdev; |
1195 | struct mlx4_en_rss_map * = &priv->rss_map; |
1196 | struct mlx4_qp_context context; |
1197 | struct mlx4_rss_context *; |
1198 | int ; |
1199 | void *ptr; |
1200 | u8 = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 | |
1201 | MLX4_RSS_TCP_IPV6); |
1202 | int i, qpn; |
1203 | int err = 0; |
1204 | int good_qps = 0; |
1205 | u8 flags; |
1206 | |
1207 | en_dbg(DRV, priv, "Configuring rss steering\n" ); |
1208 | |
1209 | flags = priv->rx_ring_num == 1 ? MLX4_RESERVE_A0_QP : 0; |
1210 | err = mlx4_qp_reserve_range(dev: mdev->dev, cnt: priv->rx_ring_num, |
1211 | align: priv->rx_ring_num, |
1212 | base: &rss_map->base_qpn, flags, |
1213 | usage: MLX4_RES_USAGE_DRIVER); |
1214 | if (err) { |
1215 | en_err(priv, "Failed reserving %d qps\n" , priv->rx_ring_num); |
1216 | return err; |
1217 | } |
1218 | |
1219 | for (i = 0; i < priv->rx_ring_num; i++) { |
1220 | qpn = rss_map->base_qpn + i; |
1221 | err = mlx4_en_config_rss_qp(priv, qpn, ring: priv->rx_ring[i], |
1222 | state: &rss_map->state[i], |
1223 | qp: &rss_map->qps[i]); |
1224 | if (err) |
1225 | goto rss_err; |
1226 | |
1227 | ++good_qps; |
1228 | } |
1229 | |
1230 | if (priv->rx_ring_num == 1) { |
1231 | rss_map->indir_qp = &rss_map->qps[0]; |
1232 | priv->base_qpn = rss_map->indir_qp->qpn; |
1233 | en_info(priv, "Optimized Non-RSS steering\n" ); |
1234 | return 0; |
1235 | } |
1236 | |
1237 | rss_map->indir_qp = kzalloc(size: sizeof(*rss_map->indir_qp), GFP_KERNEL); |
1238 | if (!rss_map->indir_qp) { |
1239 | err = -ENOMEM; |
1240 | goto rss_err; |
1241 | } |
1242 | |
1243 | /* Configure RSS indirection qp */ |
1244 | err = mlx4_qp_alloc(dev: mdev->dev, qpn: priv->base_qpn, qp: rss_map->indir_qp); |
1245 | if (err) { |
1246 | en_err(priv, "Failed to allocate RSS indirection QP\n" ); |
1247 | goto qp_alloc_err; |
1248 | } |
1249 | |
1250 | rss_map->indir_qp->event = mlx4_en_sqp_event; |
1251 | mlx4_en_fill_qp_context(priv, size: 0, stride: 0, is_tx: 0, rss: 1, qpn: priv->base_qpn, |
1252 | cqn: priv->rx_ring[0]->cqn, user_prio: -1, context: &context); |
1253 | |
1254 | if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num) |
1255 | rss_rings = priv->rx_ring_num; |
1256 | else |
1257 | rss_rings = priv->prof->rss_rings; |
1258 | |
1259 | ptr = ((void *) &context) + offsetof(struct mlx4_qp_context, pri_path) |
1260 | + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH; |
1261 | rss_context = ptr; |
1262 | rss_context->base_qpn = cpu_to_be32(ilog2(rss_rings) << 24 | |
1263 | (rss_map->base_qpn)); |
1264 | rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn); |
1265 | if (priv->mdev->profile.udp_rss) { |
1266 | rss_mask |= MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6; |
1267 | rss_context->base_qpn_udp = rss_context->default_qpn; |
1268 | } |
1269 | |
1270 | if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { |
1271 | en_info(priv, "Setting RSS context tunnel type to RSS on inner headers\n" ); |
1272 | rss_mask |= MLX4_RSS_BY_INNER_HEADERS; |
1273 | } |
1274 | |
1275 | rss_context->flags = rss_mask; |
1276 | rss_context->hash_fn = MLX4_RSS_HASH_TOP; |
1277 | if (priv->rss_hash_fn == ETH_RSS_HASH_XOR) { |
1278 | rss_context->hash_fn = MLX4_RSS_HASH_XOR; |
1279 | } else if (priv->rss_hash_fn == ETH_RSS_HASH_TOP) { |
1280 | rss_context->hash_fn = MLX4_RSS_HASH_TOP; |
1281 | memcpy(rss_context->rss_key, priv->rss_key, |
1282 | MLX4_EN_RSS_KEY_SIZE); |
1283 | } else { |
1284 | en_err(priv, "Unknown RSS hash function requested\n" ); |
1285 | err = -EINVAL; |
1286 | goto indir_err; |
1287 | } |
1288 | |
1289 | err = mlx4_qp_to_ready(dev: mdev->dev, mtt: &priv->res.mtt, context: &context, |
1290 | qp: rss_map->indir_qp, qp_state: &rss_map->indir_state); |
1291 | if (err) |
1292 | goto indir_err; |
1293 | |
1294 | return 0; |
1295 | |
1296 | indir_err: |
1297 | mlx4_qp_modify(dev: mdev->dev, NULL, cur_state: rss_map->indir_state, |
1298 | new_state: MLX4_QP_STATE_RST, NULL, optpar: 0, sqd_event: 0, qp: rss_map->indir_qp); |
1299 | mlx4_qp_remove(dev: mdev->dev, qp: rss_map->indir_qp); |
1300 | mlx4_qp_free(dev: mdev->dev, qp: rss_map->indir_qp); |
1301 | qp_alloc_err: |
1302 | kfree(objp: rss_map->indir_qp); |
1303 | rss_map->indir_qp = NULL; |
1304 | : |
1305 | for (i = 0; i < good_qps; i++) { |
1306 | mlx4_qp_modify(dev: mdev->dev, NULL, cur_state: rss_map->state[i], |
1307 | new_state: MLX4_QP_STATE_RST, NULL, optpar: 0, sqd_event: 0, qp: &rss_map->qps[i]); |
1308 | mlx4_qp_remove(dev: mdev->dev, qp: &rss_map->qps[i]); |
1309 | mlx4_qp_free(dev: mdev->dev, qp: &rss_map->qps[i]); |
1310 | } |
1311 | mlx4_qp_release_range(dev: mdev->dev, base_qpn: rss_map->base_qpn, cnt: priv->rx_ring_num); |
1312 | return err; |
1313 | } |
1314 | |
1315 | void (struct mlx4_en_priv *priv) |
1316 | { |
1317 | struct mlx4_en_dev *mdev = priv->mdev; |
1318 | struct mlx4_en_rss_map * = &priv->rss_map; |
1319 | int i; |
1320 | |
1321 | if (priv->rx_ring_num > 1) { |
1322 | mlx4_qp_modify(dev: mdev->dev, NULL, cur_state: rss_map->indir_state, |
1323 | new_state: MLX4_QP_STATE_RST, NULL, optpar: 0, sqd_event: 0, |
1324 | qp: rss_map->indir_qp); |
1325 | mlx4_qp_remove(dev: mdev->dev, qp: rss_map->indir_qp); |
1326 | mlx4_qp_free(dev: mdev->dev, qp: rss_map->indir_qp); |
1327 | kfree(objp: rss_map->indir_qp); |
1328 | rss_map->indir_qp = NULL; |
1329 | } |
1330 | |
1331 | for (i = 0; i < priv->rx_ring_num; i++) { |
1332 | mlx4_qp_modify(dev: mdev->dev, NULL, cur_state: rss_map->state[i], |
1333 | new_state: MLX4_QP_STATE_RST, NULL, optpar: 0, sqd_event: 0, qp: &rss_map->qps[i]); |
1334 | mlx4_qp_remove(dev: mdev->dev, qp: &rss_map->qps[i]); |
1335 | mlx4_qp_free(dev: mdev->dev, qp: &rss_map->qps[i]); |
1336 | } |
1337 | mlx4_qp_release_range(dev: mdev->dev, base_qpn: rss_map->base_qpn, cnt: priv->rx_ring_num); |
1338 | } |
1339 | |