efx_channels.c source code [linux/drivers/net/ethernet/sfc/efx_channels.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/****************************************************************************
3	* Driver for Solarflare network controllers and boards
4	* Copyright 2018 Solarflare Communications Inc.
5	*
6	* This program is free software; you can redistribute it and/or modify it
7	* under the terms of the GNU General Public License version 2 as published
8	* by the Free Software Foundation, incorporated herein by reference.
9	*/
10
11	#include "net_driver.h"
12	#include <linux/module.h>
13	#include <linux/filter.h>
14	#include "efx_channels.h"
15	#include "efx.h"
16	#include "efx_common.h"
17	#include "tx_common.h"
18	#include "rx_common.h"
19	#include "nic.h"
20	#include "sriov.h"
21	#include "workarounds.h"
22
23	/ This is the first interrupt mode to try out of:*
24	* 0 => MSI-X
25	* 1 => MSI
26	* 2 => legacy
27	*/
28	unsigned int efx_interrupt_mode = EFX_INT_MODE_MSIX;
29
30	/ This is the requested number of CPUs to use for Receive-Side Scaling (RSS),*
31	* i.e. the number of CPUs among which we may distribute simultaneous
32	* interrupt handling.
33	*
34	* Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
35	* The default (0) means to assign an interrupt to each core.
36	*/
37	unsigned int rss_cpus;
38
39	static unsigned int irq_adapt_low_thresh = `8000`;
40	module_param(irq_adapt_low_thresh, uint, `0644`);
41	MODULE_PARM_DESC(irq_adapt_low_thresh,
42	"Threshold score for reducing IRQ moderation");
43
44	static unsigned int irq_adapt_high_thresh = `16000`;
45	module_param(irq_adapt_high_thresh, uint, `0644`);
46	MODULE_PARM_DESC(irq_adapt_high_thresh,
47	"Threshold score for increasing IRQ moderation");
48
49	static const struct efx_channel_type efx_default_channel_type;
50
51	/*************
52	* INTERRUPTS
53	*************/
54
55	static unsigned int count_online_cores(struct efx_nic *efx, bool local_node)
56	{
57	cpumask_var_t filter_mask;
58	unsigned int count;
59	int cpu;
60
61	if (unlikely(!zalloc_cpumask_var(&filter_mask, GFP_KERNEL))) {
62	netif_warn(efx, probe, efx->net_dev,
63	"RSS disabled due to allocation failure\n");
64	return `1`;
65	}
66
67	cpumask_copy(dstp: filter_mask, cpu_online_mask);
68	if (local_node)
69	cpumask_and(dstp: filter_mask, src1p: filter_mask,
70	src2p: cpumask_of_pcibus(bus: efx->pci_dev->bus));
71
72	count = `0`;
73	for_each_cpu(cpu, filter_mask) {
74	++count;
75	cpumask_andnot(dstp: filter_mask, src1p: filter_mask, topology_sibling_cpumask(cpu));
76	}
77
78	free_cpumask_var(mask: filter_mask);
79
80	return count;
81	}
82
83	static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
84	{
85	unsigned int count;
86
87	if (rss_cpus) {
88	count = rss_cpus;
89	} else {
90	count = count_online_cores(efx, local_node: true);
91
92	/ If no online CPUs in local node, fallback to any online CPUs /
93	if (count == `0`)
94	count = count_online_cores(efx, local_node: false);
95	}
96
97	if (count > EFX_MAX_RX_QUEUES) {
98	netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
99	"Reducing number of rx queues from %u to %u.\n",
100	count, EFX_MAX_RX_QUEUES);
101	count = EFX_MAX_RX_QUEUES;
102	}
103
104	/ If RSS is requested for the PF and VFs then we can't write RSS*
105	* table entries that are inaccessible to VFs
106	*/
107	#ifdef CONFIG_SFC_SRIOV
108	if (efx->type->sriov_wanted) {
109	if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > `1` &&
110	count > efx_vf_size(efx)) {
111	netif_warn(efx, probe, efx->net_dev,
112	"Reducing number of RSS channels from %u to %u for "
113	"VF support. Increase vf-msix-limit to use more "
114	"channels on the PF.\n",
115	count, efx_vf_size(efx));
116	count = efx_vf_size(efx);
117	}
118	}
119	#endif
120
121	return count;
122	}
123
124	static int efx_allocate_msix_channels(struct efx_nic *efx,
125	unsigned int max_channels,
126	unsigned int extra_channels,
127	unsigned int parallelism)
128	{
129	unsigned int n_channels = parallelism;
130	int vec_count;
131	int tx_per_ev;
132	int n_xdp_tx;
133	int n_xdp_ev;
134
135	if (efx_separate_tx_channels)
136	n_channels *= `2`;
137	n_channels += extra_channels;
138
139	/ To allow XDP transmit to happen from arbitrary NAPI contexts*
140	* we allocate a TX queue per CPU. We share event queues across
141	* multiple tx queues, assuming tx and ev queues are both
142	* maximum size.
143	*/
144	tx_per_ev = EFX_MAX_EVQ_SIZE / EFX_TXQ_MAX_ENT(efx);
145	tx_per_ev = min(tx_per_ev, EFX_MAX_TXQ_PER_CHANNEL);
146	n_xdp_tx = num_possible_cpus();
147	n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, tx_per_ev);
148
149	vec_count = pci_msix_vec_count(dev: efx->pci_dev);
150	if (vec_count < `0`)
151	return vec_count;
152
153	max_channels = min_t(unsigned int, vec_count, max_channels);
154
155	/ Check resources.*
156	* We need a channel per event queue, plus a VI per tx queue.
157	* This may be more pessimistic than it needs to be.
158	*/
159	if (n_channels >= max_channels) {
160	efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED;
161	netif_warn(efx, drv, efx->net_dev,
162	"Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
163	n_xdp_ev, n_channels, max_channels);
164	netif_warn(efx, drv, efx->net_dev,
165	"XDP_TX and XDP_REDIRECT might decrease device's performance\n");
166	} else if (n_channels + n_xdp_tx > efx->max_vis) {
167	efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED;
168	netif_warn(efx, drv, efx->net_dev,
169	"Insufficient resources for %d XDP TX queues (%d other channels, max VIs %d)\n",
170	n_xdp_tx, n_channels, efx->max_vis);
171	netif_warn(efx, drv, efx->net_dev,
172	"XDP_TX and XDP_REDIRECT might decrease device's performance\n");
173	} else if (n_channels + n_xdp_ev > max_channels) {
174	efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_SHARED;
175	netif_warn(efx, drv, efx->net_dev,
176	"Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
177	n_xdp_ev, n_channels, max_channels);
178
179	n_xdp_ev = max_channels - n_channels;
180	netif_warn(efx, drv, efx->net_dev,
181	"XDP_TX and XDP_REDIRECT will work with reduced performance (%d cpus/tx_queue)\n",
182	DIV_ROUND_UP(n_xdp_tx, tx_per_ev * n_xdp_ev));
183	} else {
184	efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_DEDICATED;
185	}
186
187	if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_BORROWED) {
188	efx->n_xdp_channels = n_xdp_ev;
189	efx->xdp_tx_per_channel = tx_per_ev;
190	efx->xdp_tx_queue_count = n_xdp_tx;
191	n_channels += n_xdp_ev;
192	netif_dbg(efx, drv, efx->net_dev,
193	"Allocating %d TX and %d event queues for XDP\n",
194	n_xdp_ev * tx_per_ev, n_xdp_ev);
195	} else {
196	efx->n_xdp_channels = `0`;
197	efx->xdp_tx_per_channel = `0`;
198	efx->xdp_tx_queue_count = n_xdp_tx;
199	}
200
201	if (vec_count < n_channels) {
202	netif_err(efx, drv, efx->net_dev,
203	"WARNING: Insufficient MSI-X vectors available (%d < %u).\n",
204	vec_count, n_channels);
205	netif_err(efx, drv, efx->net_dev,
206	"WARNING: Performance may be reduced.\n");
207	n_channels = vec_count;
208	}
209
210	n_channels = min(n_channels, max_channels);
211
212	efx->n_channels = n_channels;
213
214	/ Ignore XDP tx channels when creating rx channels. /
215	n_channels -= efx->n_xdp_channels;
216
217	if (efx_separate_tx_channels) {
218	efx->n_tx_channels =
219	min(max(n_channels / `2`, `1U`),
220	efx->max_tx_channels);
221	efx->tx_channel_offset =
222	n_channels - efx->n_tx_channels;
223	efx->n_rx_channels =
224	max(n_channels -
225	efx->n_tx_channels, `1U`);
226	} else {
227	efx->n_tx_channels = min(n_channels, efx->max_tx_channels);
228	efx->tx_channel_offset = `0`;
229	efx->n_rx_channels = n_channels;
230	}
231
232	efx->n_rx_channels = min(efx->n_rx_channels, parallelism);
233	efx->n_tx_channels = min(efx->n_tx_channels, parallelism);
234
235	efx->xdp_channel_offset = n_channels;
236
237	netif_dbg(efx, drv, efx->net_dev,
238	"Allocating %u RX channels\n",
239	efx->n_rx_channels);
240
241	return efx->n_channels;
242	}
243
244	/ Probe the number and type of interrupts we are able to obtain, and*
245	* the resulting numbers of channels and RX queues.
246	*/
247	int efx_probe_interrupts(struct efx_nic *efx)
248	{
249	unsigned int extra_channels = `0`;
250	unsigned int rss_spread;
251	unsigned int i, j;
252	int rc;
253
254	for (i = `0`; i < EFX_MAX_EXTRA_CHANNELS; i++)
255	if (efx->extra_channel_type[i])
256	++extra_channels;
257
258	if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
259	unsigned int parallelism = efx_wanted_parallelism(efx);
260	struct msix_entry xentries[EFX_MAX_CHANNELS];
261	unsigned int n_channels;
262
263	rc = efx_allocate_msix_channels(efx, max_channels: efx->max_channels,
264	extra_channels, parallelism);
265	if (rc >= `0`) {
266	n_channels = rc;
267	for (i = `0`; i < n_channels; i++)
268	xentries[i].entry = i;
269	rc = pci_enable_msix_range(dev: efx->pci_dev, entries: xentries, minvec: `1`,
270	maxvec: n_channels);
271	}
272	if (rc < `0`) {
273	/ Fall back to single channel MSI /
274	netif_err(efx, drv, efx->net_dev,
275	"could not enable MSI-X\n");
276	if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI)
277	efx->interrupt_mode = EFX_INT_MODE_MSI;
278	else
279	return rc;
280	} else if (rc < n_channels) {
281	netif_err(efx, drv, efx->net_dev,
282	"WARNING: Insufficient MSI-X vectors"
283	" available (%d < %u).\n", rc, n_channels);
284	netif_err(efx, drv, efx->net_dev,
285	"WARNING: Performance may be reduced.\n");
286	n_channels = rc;
287	}
288
289	if (rc > `0`) {
290	for (i = `0`; i < efx->n_channels; i++)
291	efx_get_channel(efx, index: i)->irq =
292	xentries[i].vector;
293	}
294	}
295
296	/ Try single interrupt MSI /
297	if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
298	efx->n_channels = `1`;
299	efx->n_rx_channels = `1`;
300	efx->n_tx_channels = `1`;
301	efx->tx_channel_offset = `0`;
302	efx->n_xdp_channels = `0`;
303	efx->xdp_channel_offset = efx->n_channels;
304	efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED;
305	rc = pci_enable_msi(dev: efx->pci_dev);
306	if (rc == `0`) {
307	efx_get_channel(efx, index: `0`)->irq = efx->pci_dev->irq;
308	} else {
309	netif_err(efx, drv, efx->net_dev,
310	"could not enable MSI\n");
311	if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY)
312	efx->interrupt_mode = EFX_INT_MODE_LEGACY;
313	else
314	return rc;
315	}
316	}
317
318	/ Assume legacy interrupts /
319	if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
320	efx->n_channels = `1` + (efx_separate_tx_channels ? `1` : `0`);
321	efx->n_rx_channels = `1`;
322	efx->n_tx_channels = `1`;
323	efx->tx_channel_offset = efx_separate_tx_channels ? `1` : `0`;
324	efx->n_xdp_channels = `0`;
325	efx->xdp_channel_offset = efx->n_channels;
326	efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED;
327	efx->legacy_irq = efx->pci_dev->irq;
328	}
329
330	/ Assign extra channels if possible, before XDP channels /
331	efx->n_extra_tx_channels = `0`;
332	j = efx->xdp_channel_offset;
333	for (i = `0`; i < EFX_MAX_EXTRA_CHANNELS; i++) {
334	if (!efx->extra_channel_type[i])
335	continue;
336	if (j <= efx->tx_channel_offset + efx->n_tx_channels) {
337	efx->extra_channel_type[i]->handle_no_channel(efx);
338	} else {
339	--j;
340	efx_get_channel(efx, index: j)->type =
341	efx->extra_channel_type[i];
342	if (efx_channel_has_tx_queues(channel: efx_get_channel(efx, index: j)))
343	efx->n_extra_tx_channels++;
344	}
345	}
346
347	rss_spread = efx->n_rx_channels;
348	/ RSS might be usable on VFs even if it is disabled on the PF /
349	#ifdef CONFIG_SFC_SRIOV
350	if (efx->type->sriov_wanted) {
351	efx->rss_spread = ((rss_spread > `1` \|\|
352	!efx->type->sriov_wanted(efx)) ?
353	rss_spread : efx_vf_size(efx));
354	return `0`;
355	}
356	#endif
357	efx->rss_spread = rss_spread;
358
359	return `0`;
360	}
361
362	#if defined(CONFIG_SMP)
363	void efx_set_interrupt_affinity(struct efx_nic *efx)
364	{
365	const struct cpumask *numa_mask = cpumask_of_pcibus(bus: efx->pci_dev->bus);
366	struct efx_channel *channel;
367	unsigned int cpu;
368
369	/ If no online CPUs in local node, fallback to any online CPU /
370	if (cpumask_first_and(cpu_online_mask, srcp2: numa_mask) >= nr_cpu_ids)
371	numa_mask = cpu_online_mask;
372
373	cpu = -`1`;
374	efx_for_each_channel(channel, efx) {
375	cpu = cpumask_next_and(n: cpu, cpu_online_mask, src2p: numa_mask);
376	if (cpu >= nr_cpu_ids)
377	cpu = cpumask_first_and(cpu_online_mask, srcp2: numa_mask);
378	irq_set_affinity_hint(irq: channel->irq, cpumask_of(cpu));
379	}
380	}
381
382	void efx_clear_interrupt_affinity(struct efx_nic *efx)
383	{
384	struct efx_channel *channel;
385
386	efx_for_each_channel(channel, efx)
387	irq_set_affinity_hint(irq: channel->irq, NULL);
388	}
389	#else
390	void
391	efx_set_interrupt_affinity(struct efx_nic efx __attribute__* ((unused)))
392	{
393	}
394
395	void
396	efx_clear_interrupt_affinity(struct efx_nic efx __attribute__* ((unused)))
397	{
398	}
399	#endif /* CONFIG_SMP */
400
401	void efx_remove_interrupts(struct efx_nic *efx)
402	{
403	struct efx_channel *channel;
404
405	/ Remove MSI/MSI-X interrupts /
406	efx_for_each_channel(channel, efx)
407	channel->irq = `0`;
408	pci_disable_msi(dev: efx->pci_dev);
409	pci_disable_msix(dev: efx->pci_dev);
410
411	/ Remove legacy interrupt /
412	efx->legacy_irq = `0`;
413	}
414
415	/***************
416	* EVENT QUEUES
417	***************/
418
419	/ Create event queue*
420	* Event queue memory allocations are done only once. If the channel
421	* is reset, the memory buffer will be reused; this guards against
422	* errors during channel reset and also simplifies interrupt handling.
423	*/
424	int efx_probe_eventq(struct efx_channel *channel)
425	{
426	struct efx_nic *efx = channel->efx;
427	unsigned long entries;
428
429	netif_dbg(efx, probe, efx->net_dev,
430	"chan %d create event queue\n", channel->channel);
431
432	/ Build an event queue with room for one event per tx and rx buffer,*
433	* plus some extra for link state events and MCDI completions.
434	*/
435	entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + `128`);
436	EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
437	channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - `1`;
438
439	return efx_nic_probe_eventq(channel);
440	}
441
442	/ Prepare channel's event queue /
443	int efx_init_eventq(struct efx_channel *channel)
444	{
445	struct efx_nic *efx = channel->efx;
446	int rc;
447
448	EFX_WARN_ON_PARANOID(channel->eventq_init);
449
450	netif_dbg(efx, drv, efx->net_dev,
451	"chan %d init event queue\n", channel->channel);
452
453	rc = efx_nic_init_eventq(channel);
454	if (rc == `0`) {
455	efx->type->push_irq_moderation(channel);
456	channel->eventq_read_ptr = `0`;
457	channel->eventq_init = true;
458	}
459	return rc;
460	}
461
462	/ Enable event queue processing and NAPI /
463	void efx_start_eventq(struct efx_channel *channel)
464	{
465	netif_dbg(channel->efx, ifup, channel->efx->net_dev,
466	"chan %d start event queue\n", channel->channel);
467
468	/ Make sure the NAPI handler sees the enabled flag set /
469	channel->enabled = true;
470	smp_wmb();
471
472	napi_enable(n: &channel->napi_str);
473	efx_nic_eventq_read_ack(channel);
474	}
475
476	/ Disable event queue processing and NAPI /
477	void efx_stop_eventq(struct efx_channel *channel)
478	{
479	if (!channel->enabled)
480	return;
481
482	napi_disable(n: &channel->napi_str);
483	channel->enabled = false;
484	}
485
486	void efx_fini_eventq(struct efx_channel *channel)
487	{
488	if (!channel->eventq_init)
489	return;
490
491	netif_dbg(channel->efx, drv, channel->efx->net_dev,
492	"chan %d fini event queue\n", channel->channel);
493
494	efx_nic_fini_eventq(channel);
495	channel->eventq_init = false;
496	}
497
498	void efx_remove_eventq(struct efx_channel *channel)
499	{
500	netif_dbg(channel->efx, drv, channel->efx->net_dev,
501	"chan %d remove event queue\n", channel->channel);
502
503	efx_nic_remove_eventq(channel);
504	}
505
506	/**************************************************************************
507	*
508	* Channel handling
509	*
510	*************************************************************************/
511
512	#ifdef CONFIG_RFS_ACCEL
513	static void efx_filter_rfs_expire(struct work_struct *data)
514	{
515	struct delayed_work *dwork = to_delayed_work(work: data);
516	struct efx_channel *channel;
517	unsigned int time, quota;
518
519	channel = container_of(dwork, struct efx_channel, filter_work);
520	time = jiffies - channel->rfs_last_expiry;
521	quota = channel->rfs_filter_count * time / (`30` * HZ);
522	if (quota >= `20` && __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count, quota)))
523	channel->rfs_last_expiry += time;
524	/ Ensure we do more work eventually even if NAPI poll is not happening /
525	schedule_delayed_work(dwork, delay: `30` * HZ);
526	}
527	#endif
528
529	/ Allocate and initialise a channel structure. /
530	static struct efx_channel efx_alloc_channel(struct* efx_nic efx, int* i)
531	{
532	struct efx_rx_queue *rx_queue;
533	struct efx_tx_queue *tx_queue;
534	struct efx_channel *channel;
535	int j;
536
537	channel = kzalloc(size: sizeof(*channel), GFP_KERNEL);
538	if (!channel)
539	return NULL;
540
541	channel->efx = efx;
542	channel->channel = i;
543	channel->type = &efx_default_channel_type;
544
545	for (j = `0`; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
546	tx_queue = &channel->tx_queue[j];
547	tx_queue->efx = efx;
548	tx_queue->queue = -`1`;
549	tx_queue->label = j;
550	tx_queue->channel = channel;
551	}
552
553	#ifdef CONFIG_RFS_ACCEL
554	INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
555	#endif
556
557	rx_queue = &channel->rx_queue;
558	rx_queue->efx = efx;
559	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, `0`);
560
561	return channel;
562	}
563
564	int efx_init_channels(struct efx_nic *efx)
565	{
566	unsigned int i;
567
568	for (i = `0`; i < EFX_MAX_CHANNELS; i++) {
569	efx->channel[i] = efx_alloc_channel(efx, i);
570	if (!efx->channel[i])
571	return -ENOMEM;
572	efx->msi_context[i].efx = efx;
573	efx->msi_context[i].index = i;
574	}
575
576	/ Higher numbered interrupt modes are less capable! /
577	efx->interrupt_mode = min(efx->type->min_interrupt_mode,
578	efx_interrupt_mode);
579
580	efx->max_channels = EFX_MAX_CHANNELS;
581	efx->max_tx_channels = EFX_MAX_CHANNELS;
582
583	return `0`;
584	}
585
586	void efx_fini_channels(struct efx_nic *efx)
587	{
588	unsigned int i;
589
590	for (i = `0`; i < EFX_MAX_CHANNELS; i++)
591	if (efx->channel[i]) {
592	kfree(objp: efx->channel[i]);
593	efx->channel[i] = NULL;
594	}
595	}
596
597	/ Allocate and initialise a channel structure, copying parameters*
598	* (but not resources) from an old channel structure.
599	*/
600	struct efx_channel efx_copy_channel(const* struct efx_channel *old_channel)
601	{
602	struct efx_rx_queue *rx_queue;
603	struct efx_tx_queue *tx_queue;
604	struct efx_channel *channel;
605	int j;
606
607	channel = kmalloc(size: sizeof(*channel), GFP_KERNEL);
608	if (!channel)
609	return NULL;
610
611	channel = old_channel;
612
613	channel->napi_dev = NULL;
614	INIT_HLIST_NODE(h: &channel->napi_str.napi_hash_node);
615	channel->napi_str.napi_id = `0`;
616	channel->napi_str.state = `0`;
617	memset(&channel->eventq, `0`, sizeof(channel->eventq));
618
619	for (j = `0`; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
620	tx_queue = &channel->tx_queue[j];
621	if (tx_queue->channel)
622	tx_queue->channel = channel;
623	tx_queue->buffer = NULL;
624	tx_queue->cb_page = NULL;
625	memset(&tx_queue->txd, `0`, sizeof(tx_queue->txd));
626	}
627
628	rx_queue = &channel->rx_queue;
629	rx_queue->buffer = NULL;
630	memset(&rx_queue->rxd, `0`, sizeof(rx_queue->rxd));
631	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, `0`);
632	#ifdef CONFIG_RFS_ACCEL
633	INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
634	#endif
635
636	return channel;
637	}
638
639	static int efx_probe_channel(struct efx_channel *channel)
640	{
641	struct efx_tx_queue *tx_queue;
642	struct efx_rx_queue *rx_queue;
643	int rc;
644
645	netif_dbg(channel->efx, probe, channel->efx->net_dev,
646	"creating channel %d\n", channel->channel);
647
648	rc = channel->type->pre_probe(channel);
649	if (rc)
650	goto fail;
651
652	rc = efx_probe_eventq(channel);
653	if (rc)
654	goto fail;
655
656	efx_for_each_channel_tx_queue(tx_queue, channel) {
657	rc = efx_probe_tx_queue(tx_queue);
658	if (rc)
659	goto fail;
660	}
661
662	efx_for_each_channel_rx_queue(rx_queue, channel) {
663	rc = efx_probe_rx_queue(rx_queue);
664	if (rc)
665	goto fail;
666	}
667
668	channel->rx_list = NULL;
669
670	return `0`;
671
672	fail:
673	efx_remove_channel(channel);
674	return rc;
675	}
676
677	static void efx_get_channel_name(struct efx_channel channel, char* *buf,
678	size_t len)
679	{
680	struct efx_nic *efx = channel->efx;
681	const char *type;
682	int number;
683
684	number = channel->channel;
685
686	if (number >= efx->xdp_channel_offset &&
687	!WARN_ON_ONCE(!efx->n_xdp_channels)) {
688	type = "-xdp";
689	number -= efx->xdp_channel_offset;
690	} else if (efx->tx_channel_offset == `0`) {
691	type = "";
692	} else if (number < efx->tx_channel_offset) {
693	type = "-rx";
694	} else {
695	type = "-tx";
696	number -= efx->tx_channel_offset;
697	}
698	snprintf(buf, size: len, fmt: "%s%s-%d", efx->name, type, number);
699	}
700
701	void efx_set_channel_names(struct efx_nic *efx)
702	{
703	struct efx_channel *channel;
704
705	efx_for_each_channel(channel, efx)
706	channel->type->get_name(channel,
707	efx->msi_context[channel->channel].name,
708	sizeof(efx->msi_context[`0`].name));
709	}
710
711	int efx_probe_channels(struct efx_nic *efx)
712	{
713	struct efx_channel *channel;
714	int rc;
715
716	/ Probe channels in reverse, so that any 'extra' channels*
717	* use the start of the buffer table. This allows the traffic
718	* channels to be resized without moving them or wasting the
719	* entries before them.
720	*/
721	efx_for_each_channel_rev(channel, efx) {
722	rc = efx_probe_channel(channel);
723	if (rc) {
724	netif_err(efx, probe, efx->net_dev,
725	"failed to create channel %d\n",
726	channel->channel);
727	goto fail;
728	}
729	}
730	efx_set_channel_names(efx);
731
732	return `0`;
733
734	fail:
735	efx_remove_channels(efx);
736	return rc;
737	}
738
739	void efx_remove_channel(struct efx_channel *channel)
740	{
741	struct efx_tx_queue *tx_queue;
742	struct efx_rx_queue *rx_queue;
743
744	netif_dbg(channel->efx, drv, channel->efx->net_dev,
745	"destroy chan %d\n", channel->channel);
746
747	efx_for_each_channel_rx_queue(rx_queue, channel)
748	efx_remove_rx_queue(rx_queue);
749	efx_for_each_channel_tx_queue(tx_queue, channel)
750	efx_remove_tx_queue(tx_queue);
751	efx_remove_eventq(channel);
752	channel->type->post_remove(channel);
753	}
754
755	void efx_remove_channels(struct efx_nic *efx)
756	{
757	struct efx_channel *channel;
758
759	efx_for_each_channel(channel, efx)
760	efx_remove_channel(channel);
761
762	kfree(objp: efx->xdp_tx_queues);
763	}
764
765	static int efx_set_xdp_tx_queue(struct efx_nic efx, int* xdp_queue_number,
766	struct efx_tx_queue *tx_queue)
767	{
768	if (xdp_queue_number >= efx->xdp_tx_queue_count)
769	return -EINVAL;
770
771	netif_dbg(efx, drv, efx->net_dev,
772	"Channel %u TXQ %u is XDP %u, HW %u\n",
773	tx_queue->channel->channel, tx_queue->label,
774	xdp_queue_number, tx_queue->queue);
775	efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
776	return `0`;
777	}
778
779	static void efx_set_xdp_channels(struct efx_nic *efx)
780	{
781	struct efx_tx_queue *tx_queue;
782	struct efx_channel *channel;
783	unsigned int next_queue = `0`;
784	int xdp_queue_number = `0`;
785	int rc;
786
787	/ We need to mark which channels really have RX and TX*
788	* queues, and adjust the TX queue numbers if we have separate
789	* RX-only and TX-only channels.
790	*/
791	efx_for_each_channel(channel, efx) {
792	if (channel->channel < efx->tx_channel_offset)
793	continue;
794
795	if (efx_channel_is_xdp_tx(channel)) {
796	efx_for_each_channel_tx_queue(tx_queue, channel) {
797	tx_queue->queue = next_queue++;
798	rc = efx_set_xdp_tx_queue(efx, xdp_queue_number,
799	tx_queue);
800	if (rc == `0`)
801	xdp_queue_number++;
802	}
803	} else {
804	efx_for_each_channel_tx_queue(tx_queue, channel) {
805	tx_queue->queue = next_queue++;
806	netif_dbg(efx, drv, efx->net_dev,
807	"Channel %u TXQ %u is HW %u\n",
808	channel->channel, tx_queue->label,
809	tx_queue->queue);
810	}
811
812	/ If XDP is borrowing queues from net stack, it must*
813	* use the queue with no csum offload, which is the
814	* first one of the channel
815	* (note: tx_queue_by_type is not initialized yet)
816	*/
817	if (efx->xdp_txq_queues_mode ==
818	EFX_XDP_TX_QUEUES_BORROWED) {
819	tx_queue = &channel->tx_queue[`0`];
820	rc = efx_set_xdp_tx_queue(efx, xdp_queue_number,
821	tx_queue);
822	if (rc == `0`)
823	xdp_queue_number++;
824	}
825	}
826	}
827	WARN_ON(efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_DEDICATED &&
828	xdp_queue_number != efx->xdp_tx_queue_count);
829	WARN_ON(efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED &&
830	xdp_queue_number > efx->xdp_tx_queue_count);
831
832	/ If we have more CPUs than assigned XDP TX queues, assign the already*
833	* existing queues to the exceeding CPUs
834	*/
835	next_queue = `0`;
836	while (xdp_queue_number < efx->xdp_tx_queue_count) {
837	tx_queue = efx->xdp_tx_queues[next_queue++];
838	rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue);
839	if (rc == `0`)
840	xdp_queue_number++;
841	}
842	}
843
844	int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
845	{
846	struct efx_channel other_channel[EFX_MAX_CHANNELS], channel,
847	*ptp_channel = efx_ptp_channel(efx);
848	struct efx_ptp_data *ptp_data = efx->ptp_data;
849	u32 old_rxq_entries, old_txq_entries;
850	unsigned int i;
851	int rc, rc2;
852
853	rc = efx_check_disabled(efx);
854	if (rc)
855	return rc;
856
857	efx_device_detach_sync(efx);
858	efx_stop_all(efx);
859	efx_soft_disable_interrupts(efx);
860
861	/ Clone channels (where possible) /
862	memset(other_channel, `0`, sizeof(other_channel));
863	for (i = `0`; i < efx->n_channels; i++) {
864	channel = efx->channel[i];
865	if (channel->type->copy)
866	channel = channel->type->copy(channel);
867	if (!channel) {
868	rc = -ENOMEM;
869	goto out;
870	}
871	other_channel[i] = channel;
872	}
873
874	/ Swap entry counts and channel pointers /
875	old_rxq_entries = efx->rxq_entries;
876	old_txq_entries = efx->txq_entries;
877	efx->rxq_entries = rxq_entries;
878	efx->txq_entries = txq_entries;
879	for (i = `0`; i < efx->n_channels; i++)
880	swap(efx->channel[i], other_channel[i]);
881
882	for (i = `0`; i < efx->n_channels; i++) {
883	channel = efx->channel[i];
884	if (!channel->type->copy)
885	continue;
886	rc = efx_probe_channel(channel);
887	if (rc)
888	goto rollback;
889	efx_init_napi_channel(channel: efx->channel[i]);
890	}
891
892	efx_set_xdp_channels(efx);
893	out:
894	efx->ptp_data = NULL;
895	/ Destroy unused channel structures /
896	for (i = `0`; i < efx->n_channels; i++) {
897	channel = other_channel[i];
898	if (channel && channel->type->copy) {
899	efx_fini_napi_channel(channel);
900	efx_remove_channel(channel);
901	kfree(objp: channel);
902	}
903	}
904
905	efx->ptp_data = ptp_data;
906	rc2 = efx_soft_enable_interrupts(efx);
907	if (rc2) {
908	rc = rc ? rc : rc2;
909	netif_err(efx, drv, efx->net_dev,
910	"unable to restart interrupts on channel reallocation\n");
911	efx_schedule_reset(efx, type: RESET_TYPE_DISABLE);
912	} else {
913	efx_start_all(efx);
914	efx_device_attach_if_not_resetting(efx);
915	}
916	return rc;
917
918	rollback:
919	/ Swap back /
920	efx->rxq_entries = old_rxq_entries;
921	efx->txq_entries = old_txq_entries;
922	for (i = `0`; i < efx->n_channels; i++)
923	swap(efx->channel[i], other_channel[i]);
924	efx_ptp_update_channel(efx, channel: ptp_channel);
925	goto out;
926	}
927
928	int efx_set_channels(struct efx_nic *efx)
929	{
930	struct efx_channel *channel;
931	int rc;
932
933	if (efx->xdp_tx_queue_count) {
934	EFX_WARN_ON_PARANOID(efx->xdp_tx_queues);
935
936	/ Allocate array for XDP TX queue lookup. /
937	efx->xdp_tx_queues = kcalloc(n: efx->xdp_tx_queue_count,
938	size: sizeof(*efx->xdp_tx_queues),
939	GFP_KERNEL);
940	if (!efx->xdp_tx_queues)
941	return -ENOMEM;
942	}
943
944	efx_for_each_channel(channel, efx) {
945	if (channel->channel < efx->n_rx_channels)
946	channel->rx_queue.core_index = channel->channel;
947	else
948	channel->rx_queue.core_index = -`1`;
949	}
950
951	efx_set_xdp_channels(efx);
952
953	rc = netif_set_real_num_tx_queues(dev: efx->net_dev, txq: efx->n_tx_channels);
954	if (rc)
955	return rc;
956	return netif_set_real_num_rx_queues(dev: efx->net_dev, rxq: efx->n_rx_channels);
957	}
958
959	static bool efx_default_channel_want_txqs(struct efx_channel *channel)
960	{
961	return channel->channel - channel->efx->tx_channel_offset <
962	channel->efx->n_tx_channels;
963	}
964
965	/*************
966	* START/STOP
967	*************/
968
969	int efx_soft_enable_interrupts(struct efx_nic *efx)
970	{
971	struct efx_channel channel, end_channel;
972	int rc;
973
974	BUG_ON(efx->state == STATE_DISABLED);
975
976	efx->irq_soft_enabled = true;
977	smp_wmb();
978
979	efx_for_each_channel(channel, efx) {
980	if (!channel->type->keep_eventq) {
981	rc = efx_init_eventq(channel);
982	if (rc)
983	goto fail;
984	}
985	efx_start_eventq(channel);
986	}
987
988	efx_mcdi_mode_event(efx);
989
990	return `0`;
991	fail:
992	end_channel = channel;
993	efx_for_each_channel(channel, efx) {
994	if (channel == end_channel)
995	break;
996	efx_stop_eventq(channel);
997	if (!channel->type->keep_eventq)
998	efx_fini_eventq(channel);
999	}
1000
1001	return rc;
1002	}
1003
1004	void efx_soft_disable_interrupts(struct efx_nic *efx)
1005	{
1006	struct efx_channel *channel;
1007
1008	if (efx->state == STATE_DISABLED)
1009	return;
1010
1011	efx_mcdi_mode_poll(efx);
1012
1013	efx->irq_soft_enabled = false;
1014	smp_wmb();
1015
1016	if (efx->legacy_irq)
1017	synchronize_irq(irq: efx->legacy_irq);
1018
1019	efx_for_each_channel(channel, efx) {
1020	if (channel->irq)
1021	synchronize_irq(irq: channel->irq);
1022
1023	efx_stop_eventq(channel);
1024	if (!channel->type->keep_eventq)
1025	efx_fini_eventq(channel);
1026	}
1027
1028	/ Flush the asynchronous MCDI request queue /
1029	efx_mcdi_flush_async(efx);
1030	}
1031
1032	int efx_enable_interrupts(struct efx_nic *efx)
1033	{
1034	struct efx_channel channel, end_channel;
1035	int rc;
1036
1037	/ TODO: Is this really a bug? /
1038	BUG_ON(efx->state == STATE_DISABLED);
1039
1040	if (efx->eeh_disabled_legacy_irq) {
1041	enable_irq(irq: efx->legacy_irq);
1042	efx->eeh_disabled_legacy_irq = false;
1043	}
1044
1045	efx->type->irq_enable_master(efx);
1046
1047	efx_for_each_channel(channel, efx) {
1048	if (channel->type->keep_eventq) {
1049	rc = efx_init_eventq(channel);
1050	if (rc)
1051	goto fail;
1052	}
1053	}
1054
1055	rc = efx_soft_enable_interrupts(efx);
1056	if (rc)
1057	goto fail;
1058
1059	return `0`;
1060
1061	fail:
1062	end_channel = channel;
1063	efx_for_each_channel(channel, efx) {
1064	if (channel == end_channel)
1065	break;
1066	if (channel->type->keep_eventq)
1067	efx_fini_eventq(channel);
1068	}
1069
1070	efx->type->irq_disable_non_ev(efx);
1071
1072	return rc;
1073	}
1074
1075	void efx_disable_interrupts(struct efx_nic *efx)
1076	{
1077	struct efx_channel *channel;
1078
1079	efx_soft_disable_interrupts(efx);
1080
1081	efx_for_each_channel(channel, efx) {
1082	if (channel->type->keep_eventq)
1083	efx_fini_eventq(channel);
1084	}
1085
1086	efx->type->irq_disable_non_ev(efx);
1087	}
1088
1089	void efx_start_channels(struct efx_nic *efx)
1090	{
1091	struct efx_tx_queue *tx_queue;
1092	struct efx_rx_queue *rx_queue;
1093	struct efx_channel *channel;
1094
1095	efx_for_each_channel_rev(channel, efx) {
1096	if (channel->type->start)
1097	channel->type->start(channel);
1098	efx_for_each_channel_tx_queue(tx_queue, channel) {
1099	efx_init_tx_queue(tx_queue);
1100	atomic_inc(v: &efx->active_queues);
1101	}
1102
1103	efx_for_each_channel_rx_queue(rx_queue, channel) {
1104	efx_init_rx_queue(rx_queue);
1105	atomic_inc(v: &efx->active_queues);
1106	efx_stop_eventq(channel);
1107	efx_fast_push_rx_descriptors(rx_queue, atomic: false);
1108	efx_start_eventq(channel);
1109	}
1110
1111	WARN_ON(channel->rx_pkt_n_frags);
1112	}
1113	}
1114
1115	void efx_stop_channels(struct efx_nic *efx)
1116	{
1117	struct efx_tx_queue *tx_queue;
1118	struct efx_rx_queue *rx_queue;
1119	struct efx_channel *channel;
1120	int rc = `0`;
1121
1122	/ Stop special channels and RX refill.*
1123	* The channel's stop has to be called first, since it might wait
1124	* for a sentinel RX to indicate the channel has fully drained.
1125	*/
1126	efx_for_each_channel(channel, efx) {
1127	if (channel->type->stop)
1128	channel->type->stop(channel);
1129	efx_for_each_channel_rx_queue(rx_queue, channel)
1130	rx_queue->refill_enabled = false;
1131	}
1132
1133	efx_for_each_channel(channel, efx) {
1134	/ RX packet processing is pipelined, so wait for the*
1135	* NAPI handler to complete. At least event queue 0
1136	* might be kept active by non-data events, so don't
1137	* use napi_synchronize() but actually disable NAPI
1138	* temporarily.
1139	*/
1140	if (efx_channel_has_rx_queue(channel)) {
1141	efx_stop_eventq(channel);
1142	efx_start_eventq(channel);
1143	}
1144	}
1145
1146	if (efx->type->fini_dmaq)
1147	rc = efx->type->fini_dmaq(efx);
1148
1149	if (rc) {
1150	netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
1151	} else {
1152	netif_dbg(efx, drv, efx->net_dev,
1153	"successfully flushed all queues\n");
1154	}
1155
1156	efx_for_each_channel(channel, efx) {
1157	efx_for_each_channel_rx_queue(rx_queue, channel)
1158	efx_fini_rx_queue(rx_queue);
1159	efx_for_each_channel_tx_queue(tx_queue, channel)
1160	efx_fini_tx_queue(tx_queue);
1161	}
1162	}
1163
1164	/**************************************************************************
1165	*
1166	* NAPI interface
1167	*
1168	*************************************************************************/
1169
1170	/ Process channel's event queue*
1171	*
1172	* This function is responsible for processing the event queue of a
1173	* single channel. The caller must guarantee that this function will
1174	* never be concurrently called more than once on the same channel,
1175	* though different channels may be being processed concurrently.
1176	*/
1177	static int efx_process_channel(struct efx_channel channel, int* budget)
1178	{
1179	struct efx_tx_queue *tx_queue;
1180	struct list_head rx_list;
1181	int spent;
1182
1183	if (unlikely(!channel->enabled))
1184	return `0`;
1185
1186	/ Prepare the batch receive list /
1187	EFX_WARN_ON_PARANOID(channel->rx_list != NULL);
1188	INIT_LIST_HEAD(list: &rx_list);
1189	channel->rx_list = &rx_list;
1190
1191	efx_for_each_channel_tx_queue(tx_queue, channel) {
1192	tx_queue->pkts_compl = `0`;
1193	tx_queue->bytes_compl = `0`;
1194	}
1195
1196	spent = efx_nic_process_eventq(channel, quota: budget);
1197	if (spent && efx_channel_has_rx_queue(channel)) {
1198	struct efx_rx_queue *rx_queue =
1199	efx_channel_get_rx_queue(channel);
1200
1201	efx_rx_flush_packet(channel);
1202	efx_fast_push_rx_descriptors(rx_queue, atomic: true);
1203	}
1204
1205	/ Update BQL /
1206	efx_for_each_channel_tx_queue(tx_queue, channel) {
1207	if (tx_queue->bytes_compl) {
1208	netdev_tx_completed_queue(dev_queue: tx_queue->core_txq,
1209	pkts: tx_queue->pkts_compl,
1210	bytes: tx_queue->bytes_compl);
1211	}
1212	}
1213
1214	/ Receive any packets we queued up /
1215	netif_receive_skb_list(head: channel->rx_list);
1216	channel->rx_list = NULL;
1217
1218	return spent;
1219	}
1220
1221	static void efx_update_irq_mod(struct efx_nic efx, struct* efx_channel *channel)
1222	{
1223	int step = efx->irq_mod_step_us;
1224
1225	if (channel->irq_mod_score < irq_adapt_low_thresh) {
1226	if (channel->irq_moderation_us > step) {
1227	channel->irq_moderation_us -= step;
1228	efx->type->push_irq_moderation(channel);
1229	}
1230	} else if (channel->irq_mod_score > irq_adapt_high_thresh) {
1231	if (channel->irq_moderation_us <
1232	efx->irq_rx_moderation_us) {
1233	channel->irq_moderation_us += step;
1234	efx->type->push_irq_moderation(channel);
1235	}
1236	}
1237
1238	channel->irq_count = `0`;
1239	channel->irq_mod_score = `0`;
1240	}
1241
1242	/ NAPI poll handler*
1243	*
1244	* NAPI guarantees serialisation of polls of the same device, which
1245	* provides the guarantee required by efx_process_channel().
1246	*/
1247	static int efx_poll(struct napi_struct napi, int* budget)
1248	{
1249	struct efx_channel *channel =
1250	container_of(napi, struct efx_channel, napi_str);
1251	struct efx_nic *efx = channel->efx;
1252	#ifdef CONFIG_RFS_ACCEL
1253	unsigned int time;
1254	#endif
1255	int spent;
1256
1257	netif_vdbg(efx, intr, efx->net_dev,
1258	"channel %d NAPI poll executing on CPU %d\n",
1259	channel->channel, raw_smp_processor_id());
1260
1261	spent = efx_process_channel(channel, budget);
1262
1263	xdp_do_flush();
1264
1265	if (spent < budget) {
1266	if (efx_channel_has_rx_queue(channel) &&
1267	efx->irq_rx_adaptive &&
1268	unlikely(++channel->irq_count == `1000`)) {
1269	efx_update_irq_mod(efx, channel);
1270	}
1271
1272	#ifdef CONFIG_RFS_ACCEL
1273	/ Perhaps expire some ARFS filters /
1274	time = jiffies - channel->rfs_last_expiry;
1275	/ Would our quota be >= 20? /
1276	if (channel->rfs_filter_count * time >= `600` * HZ)
1277	mod_delayed_work(wq: system_wq, dwork: &channel->filter_work, delay: `0`);
1278	#endif
1279
1280	/ There is no race here; although napi_disable() will*
1281	* only wait for napi_complete(), this isn't a problem
1282	* since efx_nic_eventq_read_ack() will have no effect if
1283	* interrupts have already been disabled.
1284	*/
1285	if (napi_complete_done(n: napi, work_done: spent))
1286	efx_nic_eventq_read_ack(channel);
1287	}
1288
1289	return spent;
1290	}
1291
1292	void efx_init_napi_channel(struct efx_channel *channel)
1293	{
1294	struct efx_nic *efx = channel->efx;
1295
1296	channel->napi_dev = efx->net_dev;
1297	netif_napi_add(dev: channel->napi_dev, napi: &channel->napi_str, poll: efx_poll);
1298	}
1299
1300	void efx_init_napi(struct efx_nic *efx)
1301	{
1302	struct efx_channel *channel;
1303
1304	efx_for_each_channel(channel, efx)
1305	efx_init_napi_channel(channel);
1306	}
1307
1308	void efx_fini_napi_channel(struct efx_channel *channel)
1309	{
1310	if (channel->napi_dev)
1311	netif_napi_del(napi: &channel->napi_str);
1312
1313	channel->napi_dev = NULL;
1314	}
1315
1316	void efx_fini_napi(struct efx_nic *efx)
1317	{
1318	struct efx_channel *channel;
1319
1320	efx_for_each_channel(channel, efx)
1321	efx_fini_napi_channel(channel);
1322	}
1323
1324	/***************
1325	* Housekeeping
1326	***************/
1327
1328	static int efx_channel_dummy_op_int(struct efx_channel *channel)
1329	{
1330	return `0`;
1331	}
1332
1333	void efx_channel_dummy_op_void(struct efx_channel *channel)
1334	{
1335	}
1336
1337	static const struct efx_channel_type efx_default_channel_type = {
1338	.pre_probe = efx_channel_dummy_op_int,
1339	.post_remove = efx_channel_dummy_op_void,
1340	.get_name = efx_get_channel_name,
1341	.copy = efx_copy_channel,
1342	.want_txqs = efx_default_channel_want_txqs,
1343	.keep_eventq = false,
1344	.want_pio = true,
1345	};
1346

source code of linux/drivers/net/ethernet/sfc/efx_channels.c