1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /**************************************************************************** |
3 | * Driver for Solarflare network controllers and boards |
4 | * Copyright 2018 Solarflare Communications Inc. |
5 | * |
6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms of the GNU General Public License version 2 as published |
8 | * by the Free Software Foundation, incorporated herein by reference. |
9 | */ |
10 | |
11 | #include "net_driver.h" |
12 | #include <linux/module.h> |
13 | #include <linux/iommu.h> |
14 | #include <net/rps.h> |
15 | #include "efx.h" |
16 | #include "nic.h" |
17 | #include "rx_common.h" |
18 | |
19 | /* This is the percentage fill level below which new RX descriptors |
20 | * will be added to the RX descriptor ring. |
21 | */ |
22 | static unsigned int rx_refill_threshold; |
23 | module_param(rx_refill_threshold, uint, 0444); |
24 | MODULE_PARM_DESC(rx_refill_threshold, |
25 | "RX descriptor ring refill threshold (%)" ); |
26 | |
27 | /* RX maximum head room required. |
28 | * |
29 | * This must be at least 1 to prevent overflow, plus one packet-worth |
30 | * to allow pipelined receives. |
31 | */ |
32 | #define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS) |
33 | |
34 | /* Check the RX page recycle ring for a page that can be reused. */ |
35 | static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue) |
36 | { |
37 | struct efx_nic *efx = rx_queue->efx; |
38 | struct efx_rx_page_state *state; |
39 | unsigned int index; |
40 | struct page *page; |
41 | |
42 | if (unlikely(!rx_queue->page_ring)) |
43 | return NULL; |
44 | index = rx_queue->page_remove & rx_queue->page_ptr_mask; |
45 | page = rx_queue->page_ring[index]; |
46 | if (page == NULL) |
47 | return NULL; |
48 | |
49 | rx_queue->page_ring[index] = NULL; |
50 | /* page_remove cannot exceed page_add. */ |
51 | if (rx_queue->page_remove != rx_queue->page_add) |
52 | ++rx_queue->page_remove; |
53 | |
54 | /* If page_count is 1 then we hold the only reference to this page. */ |
55 | if (page_count(page) == 1) { |
56 | ++rx_queue->page_recycle_count; |
57 | return page; |
58 | } else { |
59 | state = page_address(page); |
60 | dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, |
61 | PAGE_SIZE << efx->rx_buffer_order, |
62 | DMA_FROM_DEVICE); |
63 | put_page(page); |
64 | ++rx_queue->page_recycle_failed; |
65 | } |
66 | |
67 | return NULL; |
68 | } |
69 | |
70 | /* Attempt to recycle the page if there is an RX recycle ring; the page can |
71 | * only be added if this is the final RX buffer, to prevent pages being used in |
72 | * the descriptor ring and appearing in the recycle ring simultaneously. |
73 | */ |
74 | static void efx_recycle_rx_page(struct efx_channel *channel, |
75 | struct efx_rx_buffer *rx_buf) |
76 | { |
77 | struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); |
78 | struct efx_nic *efx = rx_queue->efx; |
79 | struct page *page = rx_buf->page; |
80 | unsigned int index; |
81 | |
82 | /* Only recycle the page after processing the final buffer. */ |
83 | if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE)) |
84 | return; |
85 | |
86 | index = rx_queue->page_add & rx_queue->page_ptr_mask; |
87 | if (rx_queue->page_ring[index] == NULL) { |
88 | unsigned int read_index = rx_queue->page_remove & |
89 | rx_queue->page_ptr_mask; |
90 | |
91 | /* The next slot in the recycle ring is available, but |
92 | * increment page_remove if the read pointer currently |
93 | * points here. |
94 | */ |
95 | if (read_index == index) |
96 | ++rx_queue->page_remove; |
97 | rx_queue->page_ring[index] = page; |
98 | ++rx_queue->page_add; |
99 | return; |
100 | } |
101 | ++rx_queue->page_recycle_full; |
102 | efx_unmap_rx_buffer(efx, rx_buf); |
103 | put_page(page: rx_buf->page); |
104 | } |
105 | |
106 | /* Recycle the pages that are used by buffers that have just been received. */ |
107 | void efx_recycle_rx_pages(struct efx_channel *channel, |
108 | struct efx_rx_buffer *rx_buf, |
109 | unsigned int n_frags) |
110 | { |
111 | struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); |
112 | |
113 | if (unlikely(!rx_queue->page_ring)) |
114 | return; |
115 | |
116 | do { |
117 | efx_recycle_rx_page(channel, rx_buf); |
118 | rx_buf = efx_rx_buf_next(rx_queue, rx_buf); |
119 | } while (--n_frags); |
120 | } |
121 | |
122 | void efx_discard_rx_packet(struct efx_channel *channel, |
123 | struct efx_rx_buffer *rx_buf, |
124 | unsigned int n_frags) |
125 | { |
126 | struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); |
127 | |
128 | efx_recycle_rx_pages(channel, rx_buf, n_frags); |
129 | |
130 | efx_free_rx_buffers(rx_queue, rx_buf, num_bufs: n_frags); |
131 | } |
132 | |
133 | static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue) |
134 | { |
135 | unsigned int bufs_in_recycle_ring, page_ring_size; |
136 | struct efx_nic *efx = rx_queue->efx; |
137 | |
138 | bufs_in_recycle_ring = efx_rx_recycle_ring_size(efx); |
139 | page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring / |
140 | efx->rx_bufs_per_page); |
141 | rx_queue->page_ring = kcalloc(n: page_ring_size, |
142 | size: sizeof(*rx_queue->page_ring), GFP_KERNEL); |
143 | if (!rx_queue->page_ring) |
144 | rx_queue->page_ptr_mask = 0; |
145 | else |
146 | rx_queue->page_ptr_mask = page_ring_size - 1; |
147 | } |
148 | |
149 | static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue) |
150 | { |
151 | struct efx_nic *efx = rx_queue->efx; |
152 | int i; |
153 | |
154 | if (unlikely(!rx_queue->page_ring)) |
155 | return; |
156 | |
157 | /* Unmap and release the pages in the recycle ring. Remove the ring. */ |
158 | for (i = 0; i <= rx_queue->page_ptr_mask; i++) { |
159 | struct page *page = rx_queue->page_ring[i]; |
160 | struct efx_rx_page_state *state; |
161 | |
162 | if (page == NULL) |
163 | continue; |
164 | |
165 | state = page_address(page); |
166 | dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, |
167 | PAGE_SIZE << efx->rx_buffer_order, |
168 | DMA_FROM_DEVICE); |
169 | put_page(page); |
170 | } |
171 | kfree(objp: rx_queue->page_ring); |
172 | rx_queue->page_ring = NULL; |
173 | } |
174 | |
175 | static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, |
176 | struct efx_rx_buffer *rx_buf) |
177 | { |
178 | /* Release the page reference we hold for the buffer. */ |
179 | if (rx_buf->page) |
180 | put_page(page: rx_buf->page); |
181 | |
182 | /* If this is the last buffer in a page, unmap and free it. */ |
183 | if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) { |
184 | efx_unmap_rx_buffer(efx: rx_queue->efx, rx_buf); |
185 | efx_free_rx_buffers(rx_queue, rx_buf, num_bufs: 1); |
186 | } |
187 | rx_buf->page = NULL; |
188 | } |
189 | |
190 | int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) |
191 | { |
192 | struct efx_nic *efx = rx_queue->efx; |
193 | unsigned int entries; |
194 | int rc; |
195 | |
196 | /* Create the smallest power-of-two aligned ring */ |
197 | entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE); |
198 | EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE); |
199 | rx_queue->ptr_mask = entries - 1; |
200 | |
201 | netif_dbg(efx, probe, efx->net_dev, |
202 | "creating RX queue %d size %#x mask %#x\n" , |
203 | efx_rx_queue_index(rx_queue), efx->rxq_entries, |
204 | rx_queue->ptr_mask); |
205 | |
206 | /* Allocate RX buffers */ |
207 | rx_queue->buffer = kcalloc(n: entries, size: sizeof(*rx_queue->buffer), |
208 | GFP_KERNEL); |
209 | if (!rx_queue->buffer) |
210 | return -ENOMEM; |
211 | |
212 | rc = efx_nic_probe_rx(rx_queue); |
213 | if (rc) { |
214 | kfree(objp: rx_queue->buffer); |
215 | rx_queue->buffer = NULL; |
216 | } |
217 | |
218 | return rc; |
219 | } |
220 | |
221 | void efx_init_rx_queue(struct efx_rx_queue *rx_queue) |
222 | { |
223 | unsigned int max_fill, trigger, max_trigger; |
224 | struct efx_nic *efx = rx_queue->efx; |
225 | int rc = 0; |
226 | |
227 | netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, |
228 | "initialising RX queue %d\n" , efx_rx_queue_index(rx_queue)); |
229 | |
230 | /* Initialise ptr fields */ |
231 | rx_queue->added_count = 0; |
232 | rx_queue->notified_count = 0; |
233 | rx_queue->granted_count = 0; |
234 | rx_queue->removed_count = 0; |
235 | rx_queue->min_fill = -1U; |
236 | efx_init_rx_recycle_ring(rx_queue); |
237 | |
238 | rx_queue->page_remove = 0; |
239 | rx_queue->page_add = rx_queue->page_ptr_mask + 1; |
240 | rx_queue->page_recycle_count = 0; |
241 | rx_queue->page_recycle_failed = 0; |
242 | rx_queue->page_recycle_full = 0; |
243 | |
244 | /* Initialise limit fields */ |
245 | max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM; |
246 | max_trigger = |
247 | max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page; |
248 | if (rx_refill_threshold != 0) { |
249 | trigger = max_fill * min(rx_refill_threshold, 100U) / 100U; |
250 | if (trigger > max_trigger) |
251 | trigger = max_trigger; |
252 | } else { |
253 | trigger = max_trigger; |
254 | } |
255 | |
256 | rx_queue->max_fill = max_fill; |
257 | rx_queue->fast_fill_trigger = trigger; |
258 | rx_queue->refill_enabled = true; |
259 | |
260 | /* Initialise XDP queue information */ |
261 | rc = xdp_rxq_info_reg(xdp_rxq: &rx_queue->xdp_rxq_info, dev: efx->net_dev, |
262 | queue_index: rx_queue->core_index, napi_id: 0); |
263 | |
264 | if (rc) { |
265 | netif_err(efx, rx_err, efx->net_dev, |
266 | "Failure to initialise XDP queue information rc=%d\n" , |
267 | rc); |
268 | efx->xdp_rxq_info_failed = true; |
269 | } else { |
270 | rx_queue->xdp_rxq_info_valid = true; |
271 | } |
272 | |
273 | /* Set up RX descriptor ring */ |
274 | efx_nic_init_rx(rx_queue); |
275 | } |
276 | |
277 | void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) |
278 | { |
279 | struct efx_rx_buffer *rx_buf; |
280 | int i; |
281 | |
282 | netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, |
283 | "shutting down RX queue %d\n" , efx_rx_queue_index(rx_queue)); |
284 | |
285 | del_timer_sync(timer: &rx_queue->slow_fill); |
286 | if (rx_queue->grant_credits) |
287 | flush_work(work: &rx_queue->grant_work); |
288 | |
289 | /* Release RX buffers from the current read ptr to the write ptr */ |
290 | if (rx_queue->buffer) { |
291 | for (i = rx_queue->removed_count; i < rx_queue->added_count; |
292 | i++) { |
293 | unsigned int index = i & rx_queue->ptr_mask; |
294 | |
295 | rx_buf = efx_rx_buffer(rx_queue, index); |
296 | efx_fini_rx_buffer(rx_queue, rx_buf); |
297 | } |
298 | } |
299 | |
300 | efx_fini_rx_recycle_ring(rx_queue); |
301 | |
302 | if (rx_queue->xdp_rxq_info_valid) |
303 | xdp_rxq_info_unreg(xdp_rxq: &rx_queue->xdp_rxq_info); |
304 | |
305 | rx_queue->xdp_rxq_info_valid = false; |
306 | } |
307 | |
308 | void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) |
309 | { |
310 | netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, |
311 | "destroying RX queue %d\n" , efx_rx_queue_index(rx_queue)); |
312 | |
313 | efx_nic_remove_rx(rx_queue); |
314 | |
315 | kfree(objp: rx_queue->buffer); |
316 | rx_queue->buffer = NULL; |
317 | } |
318 | |
319 | /* Unmap a DMA-mapped page. This function is only called for the final RX |
320 | * buffer in a page. |
321 | */ |
322 | void efx_unmap_rx_buffer(struct efx_nic *efx, |
323 | struct efx_rx_buffer *rx_buf) |
324 | { |
325 | struct page *page = rx_buf->page; |
326 | |
327 | if (page) { |
328 | struct efx_rx_page_state *state = page_address(page); |
329 | |
330 | dma_unmap_page(&efx->pci_dev->dev, |
331 | state->dma_addr, |
332 | PAGE_SIZE << efx->rx_buffer_order, |
333 | DMA_FROM_DEVICE); |
334 | } |
335 | } |
336 | |
337 | void efx_free_rx_buffers(struct efx_rx_queue *rx_queue, |
338 | struct efx_rx_buffer *rx_buf, |
339 | unsigned int num_bufs) |
340 | { |
341 | do { |
342 | if (rx_buf->page) { |
343 | put_page(page: rx_buf->page); |
344 | rx_buf->page = NULL; |
345 | } |
346 | rx_buf = efx_rx_buf_next(rx_queue, rx_buf); |
347 | } while (--num_bufs); |
348 | } |
349 | |
350 | void efx_rx_slow_fill(struct timer_list *t) |
351 | { |
352 | struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill); |
353 | |
354 | /* Post an event to cause NAPI to run and refill the queue */ |
355 | efx_nic_generate_fill_event(rx_queue); |
356 | ++rx_queue->slow_fill_count; |
357 | } |
358 | |
359 | void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) |
360 | { |
361 | mod_timer(timer: &rx_queue->slow_fill, expires: jiffies + msecs_to_jiffies(m: 10)); |
362 | } |
363 | |
364 | /* efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers |
365 | * |
366 | * @rx_queue: Efx RX queue |
367 | * |
368 | * This allocates a batch of pages, maps them for DMA, and populates |
369 | * struct efx_rx_buffers for each one. Return a negative error code or |
370 | * 0 on success. If a single page can be used for multiple buffers, |
371 | * then the page will either be inserted fully, or not at all. |
372 | */ |
373 | static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) |
374 | { |
375 | unsigned int page_offset, index, count; |
376 | struct efx_nic *efx = rx_queue->efx; |
377 | struct efx_rx_page_state *state; |
378 | struct efx_rx_buffer *rx_buf; |
379 | dma_addr_t dma_addr; |
380 | struct page *page; |
381 | |
382 | count = 0; |
383 | do { |
384 | page = efx_reuse_page(rx_queue); |
385 | if (page == NULL) { |
386 | page = alloc_pages(__GFP_COMP | |
387 | (atomic ? GFP_ATOMIC : GFP_KERNEL), |
388 | order: efx->rx_buffer_order); |
389 | if (unlikely(page == NULL)) |
390 | return -ENOMEM; |
391 | dma_addr = |
392 | dma_map_page(&efx->pci_dev->dev, page, 0, |
393 | PAGE_SIZE << efx->rx_buffer_order, |
394 | DMA_FROM_DEVICE); |
395 | if (unlikely(dma_mapping_error(&efx->pci_dev->dev, |
396 | dma_addr))) { |
397 | __free_pages(page, order: efx->rx_buffer_order); |
398 | return -EIO; |
399 | } |
400 | state = page_address(page); |
401 | state->dma_addr = dma_addr; |
402 | } else { |
403 | state = page_address(page); |
404 | dma_addr = state->dma_addr; |
405 | } |
406 | |
407 | dma_addr += sizeof(struct efx_rx_page_state); |
408 | page_offset = sizeof(struct efx_rx_page_state); |
409 | |
410 | do { |
411 | index = rx_queue->added_count & rx_queue->ptr_mask; |
412 | rx_buf = efx_rx_buffer(rx_queue, index); |
413 | rx_buf->dma_addr = dma_addr + efx->rx_ip_align + |
414 | EFX_XDP_HEADROOM; |
415 | rx_buf->page = page; |
416 | rx_buf->page_offset = page_offset + efx->rx_ip_align + |
417 | EFX_XDP_HEADROOM; |
418 | rx_buf->len = efx->rx_dma_len; |
419 | rx_buf->flags = 0; |
420 | ++rx_queue->added_count; |
421 | get_page(page); |
422 | dma_addr += efx->rx_page_buf_step; |
423 | page_offset += efx->rx_page_buf_step; |
424 | } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE); |
425 | |
426 | rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE; |
427 | } while (++count < efx->rx_pages_per_batch); |
428 | |
429 | return 0; |
430 | } |
431 | |
432 | void efx_rx_config_page_split(struct efx_nic *efx) |
433 | { |
434 | efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align + |
435 | EFX_XDP_HEADROOM + EFX_XDP_TAILROOM, |
436 | EFX_RX_BUF_ALIGNMENT); |
437 | efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 : |
438 | ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) / |
439 | efx->rx_page_buf_step); |
440 | efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) / |
441 | efx->rx_bufs_per_page; |
442 | efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH, |
443 | efx->rx_bufs_per_page); |
444 | } |
445 | |
446 | /* efx_fast_push_rx_descriptors - push new RX descriptors quickly |
447 | * @rx_queue: RX descriptor queue |
448 | * |
449 | * This will aim to fill the RX descriptor queue up to |
450 | * @rx_queue->@max_fill. If there is insufficient atomic |
451 | * memory to do so, a slow fill will be scheduled. |
452 | * |
453 | * The caller must provide serialisation (none is used here). In practise, |
454 | * this means this function must run from the NAPI handler, or be called |
455 | * when NAPI is disabled. |
456 | */ |
457 | void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic) |
458 | { |
459 | struct efx_nic *efx = rx_queue->efx; |
460 | unsigned int fill_level, batch_size; |
461 | int space, rc = 0; |
462 | |
463 | if (!rx_queue->refill_enabled) |
464 | return; |
465 | |
466 | /* Calculate current fill level, and exit if we don't need to fill */ |
467 | fill_level = (rx_queue->added_count - rx_queue->removed_count); |
468 | EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries); |
469 | if (fill_level >= rx_queue->fast_fill_trigger) |
470 | goto out; |
471 | |
472 | /* Record minimum fill level */ |
473 | if (unlikely(fill_level < rx_queue->min_fill)) { |
474 | if (fill_level) |
475 | rx_queue->min_fill = fill_level; |
476 | } |
477 | |
478 | batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page; |
479 | space = rx_queue->max_fill - fill_level; |
480 | EFX_WARN_ON_ONCE_PARANOID(space < batch_size); |
481 | |
482 | netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, |
483 | "RX queue %d fast-filling descriptor ring from" |
484 | " level %d to level %d\n" , |
485 | efx_rx_queue_index(rx_queue), fill_level, |
486 | rx_queue->max_fill); |
487 | |
488 | do { |
489 | rc = efx_init_rx_buffers(rx_queue, atomic); |
490 | if (unlikely(rc)) { |
491 | /* Ensure that we don't leave the rx queue empty */ |
492 | efx_schedule_slow_fill(rx_queue); |
493 | goto out; |
494 | } |
495 | } while ((space -= batch_size) >= batch_size); |
496 | |
497 | netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, |
498 | "RX queue %d fast-filled descriptor ring " |
499 | "to level %d\n" , efx_rx_queue_index(rx_queue), |
500 | rx_queue->added_count - rx_queue->removed_count); |
501 | |
502 | out: |
503 | if (rx_queue->notified_count != rx_queue->added_count) |
504 | efx_nic_notify_rx_desc(rx_queue); |
505 | } |
506 | |
507 | /* Pass a received packet up through GRO. GRO can handle pages |
508 | * regardless of checksum state and skbs with a good checksum. |
509 | */ |
510 | void |
511 | efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, |
512 | unsigned int n_frags, u8 *eh, __wsum csum) |
513 | { |
514 | struct napi_struct *napi = &channel->napi_str; |
515 | struct efx_nic *efx = channel->efx; |
516 | struct sk_buff *skb; |
517 | |
518 | skb = napi_get_frags(napi); |
519 | if (unlikely(!skb)) { |
520 | struct efx_rx_queue *rx_queue; |
521 | |
522 | rx_queue = efx_channel_get_rx_queue(channel); |
523 | efx_free_rx_buffers(rx_queue, rx_buf, num_bufs: n_frags); |
524 | return; |
525 | } |
526 | |
527 | if (efx->net_dev->features & NETIF_F_RXHASH && |
528 | efx_rx_buf_hash_valid(efx, prefix: eh)) |
529 | skb_set_hash(skb, hash: efx_rx_buf_hash(efx, eh), |
530 | type: PKT_HASH_TYPE_L3); |
531 | if (csum) { |
532 | skb->csum = csum; |
533 | skb->ip_summed = CHECKSUM_COMPLETE; |
534 | } else { |
535 | skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? |
536 | CHECKSUM_UNNECESSARY : CHECKSUM_NONE); |
537 | } |
538 | skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL); |
539 | |
540 | for (;;) { |
541 | skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, |
542 | page: rx_buf->page, off: rx_buf->page_offset, |
543 | size: rx_buf->len); |
544 | rx_buf->page = NULL; |
545 | skb->len += rx_buf->len; |
546 | if (skb_shinfo(skb)->nr_frags == n_frags) |
547 | break; |
548 | |
549 | rx_buf = efx_rx_buf_next(rx_queue: &channel->rx_queue, rx_buf); |
550 | } |
551 | |
552 | skb->data_len = skb->len; |
553 | skb->truesize += n_frags * efx->rx_buffer_truesize; |
554 | |
555 | skb_record_rx_queue(skb, rx_queue: channel->rx_queue.core_index); |
556 | |
557 | napi_gro_frags(napi); |
558 | } |
559 | |
560 | /* RSS contexts. We're using linked lists and crappy O(n) algorithms, because |
561 | * (a) this is an infrequent control-plane operation and (b) n is small (max 64) |
562 | */ |
563 | struct efx_rss_context *(struct efx_nic *efx) |
564 | { |
565 | struct list_head *head = &efx->rss_context.list; |
566 | struct efx_rss_context *ctx, *new; |
567 | u32 id = 1; /* Don't use zero, that refers to the master RSS context */ |
568 | |
569 | WARN_ON(!mutex_is_locked(&efx->rss_lock)); |
570 | |
571 | /* Search for first gap in the numbering */ |
572 | list_for_each_entry(ctx, head, list) { |
573 | if (ctx->user_id != id) |
574 | break; |
575 | id++; |
576 | /* Check for wrap. If this happens, we have nearly 2^32 |
577 | * allocated RSS contexts, which seems unlikely. |
578 | */ |
579 | if (WARN_ON_ONCE(!id)) |
580 | return NULL; |
581 | } |
582 | |
583 | /* Create the new entry */ |
584 | new = kmalloc(size: sizeof(*new), GFP_KERNEL); |
585 | if (!new) |
586 | return NULL; |
587 | new->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; |
588 | new->rx_hash_udp_4tuple = false; |
589 | |
590 | /* Insert the new entry into the gap */ |
591 | new->user_id = id; |
592 | list_add_tail(new: &new->list, head: &ctx->list); |
593 | return new; |
594 | } |
595 | |
596 | struct efx_rss_context *(struct efx_nic *efx, u32 id) |
597 | { |
598 | struct list_head *head = &efx->rss_context.list; |
599 | struct efx_rss_context *ctx; |
600 | |
601 | WARN_ON(!mutex_is_locked(&efx->rss_lock)); |
602 | |
603 | list_for_each_entry(ctx, head, list) |
604 | if (ctx->user_id == id) |
605 | return ctx; |
606 | return NULL; |
607 | } |
608 | |
609 | void (struct efx_rss_context *ctx) |
610 | { |
611 | list_del(entry: &ctx->list); |
612 | kfree(objp: ctx); |
613 | } |
614 | |
615 | void efx_set_default_rx_indir_table(struct efx_nic *efx, |
616 | struct efx_rss_context *ctx) |
617 | { |
618 | size_t i; |
619 | |
620 | for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++) |
621 | ctx->rx_indir_table[i] = |
622 | ethtool_rxfh_indir_default(index: i, n_rx_rings: efx->rss_spread); |
623 | } |
624 | |
625 | /** |
626 | * efx_filter_is_mc_recipient - test whether spec is a multicast recipient |
627 | * @spec: Specification to test |
628 | * |
629 | * Return: %true if the specification is a non-drop RX filter that |
630 | * matches a local MAC address I/G bit value of 1 or matches a local |
631 | * IPv4 or IPv6 address value in the respective multicast address |
632 | * range. Otherwise %false. |
633 | */ |
634 | bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec) |
635 | { |
636 | if (!(spec->flags & EFX_FILTER_FLAG_RX) || |
637 | spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP) |
638 | return false; |
639 | |
640 | if (spec->match_flags & |
641 | (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) && |
642 | is_multicast_ether_addr(addr: spec->loc_mac)) |
643 | return true; |
644 | |
645 | if ((spec->match_flags & |
646 | (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) == |
647 | (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) { |
648 | if (spec->ether_type == htons(ETH_P_IP) && |
649 | ipv4_is_multicast(addr: spec->loc_host[0])) |
650 | return true; |
651 | if (spec->ether_type == htons(ETH_P_IPV6) && |
652 | ((const u8 *)spec->loc_host)[0] == 0xff) |
653 | return true; |
654 | } |
655 | |
656 | return false; |
657 | } |
658 | |
659 | bool efx_filter_spec_equal(const struct efx_filter_spec *left, |
660 | const struct efx_filter_spec *right) |
661 | { |
662 | if ((left->match_flags ^ right->match_flags) | |
663 | ((left->flags ^ right->flags) & |
664 | (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) |
665 | return false; |
666 | |
667 | return memcmp(p: &left->vport_id, q: &right->vport_id, |
668 | size: sizeof(struct efx_filter_spec) - |
669 | offsetof(struct efx_filter_spec, vport_id)) == 0; |
670 | } |
671 | |
672 | u32 efx_filter_spec_hash(const struct efx_filter_spec *spec) |
673 | { |
674 | BUILD_BUG_ON(offsetof(struct efx_filter_spec, vport_id) & 3); |
675 | return jhash2(k: (const u32 *)&spec->vport_id, |
676 | length: (sizeof(struct efx_filter_spec) - |
677 | offsetof(struct efx_filter_spec, vport_id)) / 4, |
678 | initval: 0); |
679 | } |
680 | |
681 | #ifdef CONFIG_RFS_ACCEL |
682 | bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, |
683 | bool *force) |
684 | { |
685 | if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) { |
686 | /* ARFS is currently updating this entry, leave it */ |
687 | return false; |
688 | } |
689 | if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) { |
690 | /* ARFS tried and failed to update this, so it's probably out |
691 | * of date. Remove the filter and the ARFS rule entry. |
692 | */ |
693 | rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; |
694 | *force = true; |
695 | return true; |
696 | } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */ |
697 | /* ARFS has moved on, so old filter is not needed. Since we did |
698 | * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will |
699 | * not be removed by efx_rps_hash_del() subsequently. |
700 | */ |
701 | *force = true; |
702 | return true; |
703 | } |
704 | /* Remove it iff ARFS wants to. */ |
705 | return true; |
706 | } |
707 | |
708 | static |
709 | struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx, |
710 | const struct efx_filter_spec *spec) |
711 | { |
712 | u32 hash = efx_filter_spec_hash(spec); |
713 | |
714 | lockdep_assert_held(&efx->rps_hash_lock); |
715 | if (!efx->rps_hash_table) |
716 | return NULL; |
717 | return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE]; |
718 | } |
719 | |
720 | struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, |
721 | const struct efx_filter_spec *spec) |
722 | { |
723 | struct efx_arfs_rule *rule; |
724 | struct hlist_head *head; |
725 | struct hlist_node *node; |
726 | |
727 | head = efx_rps_hash_bucket(efx, spec); |
728 | if (!head) |
729 | return NULL; |
730 | hlist_for_each(node, head) { |
731 | rule = container_of(node, struct efx_arfs_rule, node); |
732 | if (efx_filter_spec_equal(left: spec, right: &rule->spec)) |
733 | return rule; |
734 | } |
735 | return NULL; |
736 | } |
737 | |
738 | struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, |
739 | const struct efx_filter_spec *spec, |
740 | bool *new) |
741 | { |
742 | struct efx_arfs_rule *rule; |
743 | struct hlist_head *head; |
744 | struct hlist_node *node; |
745 | |
746 | head = efx_rps_hash_bucket(efx, spec); |
747 | if (!head) |
748 | return NULL; |
749 | hlist_for_each(node, head) { |
750 | rule = container_of(node, struct efx_arfs_rule, node); |
751 | if (efx_filter_spec_equal(left: spec, right: &rule->spec)) { |
752 | *new = false; |
753 | return rule; |
754 | } |
755 | } |
756 | rule = kmalloc(size: sizeof(*rule), GFP_ATOMIC); |
757 | *new = true; |
758 | if (rule) { |
759 | memcpy(&rule->spec, spec, sizeof(rule->spec)); |
760 | hlist_add_head(n: &rule->node, h: head); |
761 | } |
762 | return rule; |
763 | } |
764 | |
765 | void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec) |
766 | { |
767 | struct efx_arfs_rule *rule; |
768 | struct hlist_head *head; |
769 | struct hlist_node *node; |
770 | |
771 | head = efx_rps_hash_bucket(efx, spec); |
772 | if (WARN_ON(!head)) |
773 | return; |
774 | hlist_for_each(node, head) { |
775 | rule = container_of(node, struct efx_arfs_rule, node); |
776 | if (efx_filter_spec_equal(left: spec, right: &rule->spec)) { |
777 | /* Someone already reused the entry. We know that if |
778 | * this check doesn't fire (i.e. filter_id == REMOVING) |
779 | * then the REMOVING mark was put there by our caller, |
780 | * because caller is holding a lock on filter table and |
781 | * only holders of that lock set REMOVING. |
782 | */ |
783 | if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING) |
784 | return; |
785 | hlist_del(n: node); |
786 | kfree(objp: rule); |
787 | return; |
788 | } |
789 | } |
790 | /* We didn't find it. */ |
791 | WARN_ON(1); |
792 | } |
793 | #endif |
794 | |
795 | int efx_probe_filters(struct efx_nic *efx) |
796 | { |
797 | int rc; |
798 | |
799 | mutex_lock(&efx->mac_lock); |
800 | rc = efx->type->filter_table_probe(efx); |
801 | if (rc) |
802 | goto out_unlock; |
803 | |
804 | #ifdef CONFIG_RFS_ACCEL |
805 | if (efx->type->offload_features & NETIF_F_NTUPLE) { |
806 | struct efx_channel *channel; |
807 | int i, success = 1; |
808 | |
809 | efx_for_each_channel(channel, efx) { |
810 | channel->rps_flow_id = |
811 | kcalloc(n: efx->type->max_rx_ip_filters, |
812 | size: sizeof(*channel->rps_flow_id), |
813 | GFP_KERNEL); |
814 | if (!channel->rps_flow_id) |
815 | success = 0; |
816 | else |
817 | for (i = 0; |
818 | i < efx->type->max_rx_ip_filters; |
819 | ++i) |
820 | channel->rps_flow_id[i] = |
821 | RPS_FLOW_ID_INVALID; |
822 | channel->rfs_expire_index = 0; |
823 | channel->rfs_filter_count = 0; |
824 | } |
825 | |
826 | if (!success) { |
827 | efx_for_each_channel(channel, efx) { |
828 | kfree(objp: channel->rps_flow_id); |
829 | channel->rps_flow_id = NULL; |
830 | } |
831 | efx->type->filter_table_remove(efx); |
832 | rc = -ENOMEM; |
833 | goto out_unlock; |
834 | } |
835 | } |
836 | #endif |
837 | out_unlock: |
838 | mutex_unlock(lock: &efx->mac_lock); |
839 | return rc; |
840 | } |
841 | |
842 | void efx_remove_filters(struct efx_nic *efx) |
843 | { |
844 | #ifdef CONFIG_RFS_ACCEL |
845 | struct efx_channel *channel; |
846 | |
847 | efx_for_each_channel(channel, efx) { |
848 | cancel_delayed_work_sync(dwork: &channel->filter_work); |
849 | kfree(objp: channel->rps_flow_id); |
850 | channel->rps_flow_id = NULL; |
851 | } |
852 | #endif |
853 | efx->type->filter_table_remove(efx); |
854 | } |
855 | |
856 | #ifdef CONFIG_RFS_ACCEL |
857 | |
858 | static void efx_filter_rfs_work(struct work_struct *data) |
859 | { |
860 | struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion, |
861 | work); |
862 | struct efx_nic *efx = efx_netdev_priv(dev: req->net_dev); |
863 | struct efx_channel *channel = efx_get_channel(efx, index: req->rxq_index); |
864 | int slot_idx = req - efx->rps_slot; |
865 | struct efx_arfs_rule *rule; |
866 | u16 arfs_id = 0; |
867 | int rc; |
868 | |
869 | rc = efx->type->filter_insert(efx, &req->spec, true); |
870 | if (rc >= 0) |
871 | /* Discard 'priority' part of EF10+ filter ID (mcdi_filters) */ |
872 | rc %= efx->type->max_rx_ip_filters; |
873 | if (efx->rps_hash_table) { |
874 | spin_lock_bh(lock: &efx->rps_hash_lock); |
875 | rule = efx_rps_hash_find(efx, spec: &req->spec); |
876 | /* The rule might have already gone, if someone else's request |
877 | * for the same spec was already worked and then expired before |
878 | * we got around to our work. In that case we have nothing |
879 | * tying us to an arfs_id, meaning that as soon as the filter |
880 | * is considered for expiry it will be removed. |
881 | */ |
882 | if (rule) { |
883 | if (rc < 0) |
884 | rule->filter_id = EFX_ARFS_FILTER_ID_ERROR; |
885 | else |
886 | rule->filter_id = rc; |
887 | arfs_id = rule->arfs_id; |
888 | } |
889 | spin_unlock_bh(lock: &efx->rps_hash_lock); |
890 | } |
891 | if (rc >= 0) { |
892 | /* Remember this so we can check whether to expire the filter |
893 | * later. |
894 | */ |
895 | mutex_lock(&efx->rps_mutex); |
896 | if (channel->rps_flow_id[rc] == RPS_FLOW_ID_INVALID) |
897 | channel->rfs_filter_count++; |
898 | channel->rps_flow_id[rc] = req->flow_id; |
899 | mutex_unlock(lock: &efx->rps_mutex); |
900 | |
901 | if (req->spec.ether_type == htons(ETH_P_IP)) |
902 | netif_info(efx, rx_status, efx->net_dev, |
903 | "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n" , |
904 | (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP" , |
905 | req->spec.rem_host, ntohs(req->spec.rem_port), |
906 | req->spec.loc_host, ntohs(req->spec.loc_port), |
907 | req->rxq_index, req->flow_id, rc, arfs_id); |
908 | else |
909 | netif_info(efx, rx_status, efx->net_dev, |
910 | "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n" , |
911 | (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP" , |
912 | req->spec.rem_host, ntohs(req->spec.rem_port), |
913 | req->spec.loc_host, ntohs(req->spec.loc_port), |
914 | req->rxq_index, req->flow_id, rc, arfs_id); |
915 | channel->n_rfs_succeeded++; |
916 | } else { |
917 | if (req->spec.ether_type == htons(ETH_P_IP)) |
918 | netif_dbg(efx, rx_status, efx->net_dev, |
919 | "failed to steer %s %pI4:%u:%pI4:%u to queue %u [flow %u rc %d id %u]\n" , |
920 | (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP" , |
921 | req->spec.rem_host, ntohs(req->spec.rem_port), |
922 | req->spec.loc_host, ntohs(req->spec.loc_port), |
923 | req->rxq_index, req->flow_id, rc, arfs_id); |
924 | else |
925 | netif_dbg(efx, rx_status, efx->net_dev, |
926 | "failed to steer %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u rc %d id %u]\n" , |
927 | (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP" , |
928 | req->spec.rem_host, ntohs(req->spec.rem_port), |
929 | req->spec.loc_host, ntohs(req->spec.loc_port), |
930 | req->rxq_index, req->flow_id, rc, arfs_id); |
931 | channel->n_rfs_failed++; |
932 | /* We're overloading the NIC's filter tables, so let's do a |
933 | * chunk of extra expiry work. |
934 | */ |
935 | __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count, |
936 | 100u)); |
937 | } |
938 | |
939 | /* Release references */ |
940 | clear_bit(nr: slot_idx, addr: &efx->rps_slot_map); |
941 | dev_put(dev: req->net_dev); |
942 | } |
943 | |
944 | int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, |
945 | u16 rxq_index, u32 flow_id) |
946 | { |
947 | struct efx_nic *efx = efx_netdev_priv(dev: net_dev); |
948 | struct efx_async_filter_insertion *req; |
949 | struct efx_arfs_rule *rule; |
950 | struct flow_keys fk; |
951 | int slot_idx; |
952 | bool new; |
953 | int rc; |
954 | |
955 | /* find a free slot */ |
956 | for (slot_idx = 0; slot_idx < EFX_RPS_MAX_IN_FLIGHT; slot_idx++) |
957 | if (!test_and_set_bit(nr: slot_idx, addr: &efx->rps_slot_map)) |
958 | break; |
959 | if (slot_idx >= EFX_RPS_MAX_IN_FLIGHT) |
960 | return -EBUSY; |
961 | |
962 | if (flow_id == RPS_FLOW_ID_INVALID) { |
963 | rc = -EINVAL; |
964 | goto out_clear; |
965 | } |
966 | |
967 | if (!skb_flow_dissect_flow_keys(skb, flow: &fk, flags: 0)) { |
968 | rc = -EPROTONOSUPPORT; |
969 | goto out_clear; |
970 | } |
971 | |
972 | if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) { |
973 | rc = -EPROTONOSUPPORT; |
974 | goto out_clear; |
975 | } |
976 | if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) { |
977 | rc = -EPROTONOSUPPORT; |
978 | goto out_clear; |
979 | } |
980 | |
981 | req = efx->rps_slot + slot_idx; |
982 | efx_filter_init_rx(spec: &req->spec, priority: EFX_FILTER_PRI_HINT, |
983 | flags: efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0, |
984 | rxq_id: rxq_index); |
985 | req->spec.match_flags = |
986 | EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO | |
987 | EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT | |
988 | EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT; |
989 | req->spec.ether_type = fk.basic.n_proto; |
990 | req->spec.ip_proto = fk.basic.ip_proto; |
991 | |
992 | if (fk.basic.n_proto == htons(ETH_P_IP)) { |
993 | req->spec.rem_host[0] = fk.addrs.v4addrs.src; |
994 | req->spec.loc_host[0] = fk.addrs.v4addrs.dst; |
995 | } else { |
996 | memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src, |
997 | sizeof(struct in6_addr)); |
998 | memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst, |
999 | sizeof(struct in6_addr)); |
1000 | } |
1001 | |
1002 | req->spec.rem_port = fk.ports.src; |
1003 | req->spec.loc_port = fk.ports.dst; |
1004 | |
1005 | if (efx->rps_hash_table) { |
1006 | /* Add it to ARFS hash table */ |
1007 | spin_lock(lock: &efx->rps_hash_lock); |
1008 | rule = efx_rps_hash_add(efx, spec: &req->spec, new: &new); |
1009 | if (!rule) { |
1010 | rc = -ENOMEM; |
1011 | goto out_unlock; |
1012 | } |
1013 | if (new) |
1014 | rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER; |
1015 | rc = rule->arfs_id; |
1016 | /* Skip if existing or pending filter already does the right thing */ |
1017 | if (!new && rule->rxq_index == rxq_index && |
1018 | rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING) |
1019 | goto out_unlock; |
1020 | rule->rxq_index = rxq_index; |
1021 | rule->filter_id = EFX_ARFS_FILTER_ID_PENDING; |
1022 | spin_unlock(lock: &efx->rps_hash_lock); |
1023 | } else { |
1024 | /* Without an ARFS hash table, we just use arfs_id 0 for all |
1025 | * filters. This means if multiple flows hash to the same |
1026 | * flow_id, all but the most recently touched will be eligible |
1027 | * for expiry. |
1028 | */ |
1029 | rc = 0; |
1030 | } |
1031 | |
1032 | /* Queue the request */ |
1033 | dev_hold(dev: req->net_dev = net_dev); |
1034 | INIT_WORK(&req->work, efx_filter_rfs_work); |
1035 | req->rxq_index = rxq_index; |
1036 | req->flow_id = flow_id; |
1037 | schedule_work(work: &req->work); |
1038 | return rc; |
1039 | out_unlock: |
1040 | spin_unlock(lock: &efx->rps_hash_lock); |
1041 | out_clear: |
1042 | clear_bit(nr: slot_idx, addr: &efx->rps_slot_map); |
1043 | return rc; |
1044 | } |
1045 | |
1046 | bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota) |
1047 | { |
1048 | bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index); |
1049 | struct efx_nic *efx = channel->efx; |
1050 | unsigned int index, size, start; |
1051 | u32 flow_id; |
1052 | |
1053 | if (!mutex_trylock(lock: &efx->rps_mutex)) |
1054 | return false; |
1055 | expire_one = efx->type->filter_rfs_expire_one; |
1056 | index = channel->rfs_expire_index; |
1057 | start = index; |
1058 | size = efx->type->max_rx_ip_filters; |
1059 | while (quota) { |
1060 | flow_id = channel->rps_flow_id[index]; |
1061 | |
1062 | if (flow_id != RPS_FLOW_ID_INVALID) { |
1063 | quota--; |
1064 | if (expire_one(efx, flow_id, index)) { |
1065 | netif_info(efx, rx_status, efx->net_dev, |
1066 | "expired filter %d [channel %u flow %u]\n" , |
1067 | index, channel->channel, flow_id); |
1068 | channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID; |
1069 | channel->rfs_filter_count--; |
1070 | } |
1071 | } |
1072 | if (++index == size) |
1073 | index = 0; |
1074 | /* If we were called with a quota that exceeds the total number |
1075 | * of filters in the table (which shouldn't happen, but could |
1076 | * if two callers race), ensure that we don't loop forever - |
1077 | * stop when we've examined every row of the table. |
1078 | */ |
1079 | if (index == start) |
1080 | break; |
1081 | } |
1082 | |
1083 | channel->rfs_expire_index = index; |
1084 | mutex_unlock(lock: &efx->rps_mutex); |
1085 | return true; |
1086 | } |
1087 | |
1088 | #endif /* CONFIG_RFS_ACCEL */ |
1089 | |