ice_main.c source code [linux/drivers/net/ethernet/intel/ice/ice_main.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/ Copyright (c) 2018-2023, Intel Corporation. /
3
4	/ Intel(R) Ethernet Connection E800 Series Linux Driver /
5
6	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8	#include <generated/utsrelease.h>
9	#include <linux/crash_dump.h>
10	#include "ice.h"
11	#include "ice_base.h"
12	#include "ice_lib.h"
13	#include "ice_fltr.h"
14	#include "ice_dcb_lib.h"
15	#include "ice_dcb_nl.h"
16	#include "ice_devlink.h"
17	#include "ice_hwmon.h"
18	/ Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the*
19	* ice tracepoint functions. This must be done exactly once across the
20	* ice driver.
21	*/
22	#define CREATE_TRACE_POINTS
23	#include "ice_trace.h"
24	#include "ice_eswitch.h"
25	#include "ice_tc_lib.h"
26	#include "ice_vsi_vlan_ops.h"
27	#include <net/xdp_sock_drv.h>
28
29	#define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver"
30	static const char ice_driver_string[] = DRV_SUMMARY;
31	static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation.";
32
33	/ DDP Package file located in firmware search paths (e.g. /lib/firmware/) /
34	#define ICE_DDP_PKG_PATH "intel/ice/ddp/"
35	#define ICE_DDP_PKG_FILE ICE_DDP_PKG_PATH "ice.pkg"
36
37	MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
38	MODULE_DESCRIPTION(DRV_SUMMARY);
39	MODULE_LICENSE("GPL v2");
40	MODULE_FIRMWARE(ICE_DDP_PKG_FILE);
41
42	static int debug = -`1`;
43	module_param(debug, int, `0644`);
44	#ifndef CONFIG_DYNAMIC_DEBUG
45	MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)");
46	#else
47	MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
48	#endif /* !CONFIG_DYNAMIC_DEBUG */
49
50	DEFINE_STATIC_KEY_FALSE(ice_xdp_locking_key);
51	EXPORT_SYMBOL(ice_xdp_locking_key);
52
53	/**
54	* ice_hw_to_dev - Get device pointer from the hardware structure
55	* @hw: pointer to the device HW structure
56	*
57	* Used to access the device pointer from compilation units which can't easily
58	* include the definition of struct ice_pf without leading to circular header
59	* dependencies.
60	*/
61	struct device ice_hw_to_dev(struct* ice_hw *hw)
62	{
63	struct ice_pf pf = container_of(hw, struct* ice_pf, hw);
64
65	return &pf->pdev->dev;
66	}
67
68	static struct workqueue_struct *ice_wq;
69	struct workqueue_struct *ice_lag_wq;
70	static const struct net_device_ops ice_netdev_safe_mode_ops;
71	static const struct net_device_ops ice_netdev_ops;
72
73	static void ice_rebuild(struct ice_pf pf, enum* ice_reset_req reset_type);
74
75	static void ice_vsi_release_all(struct ice_pf *pf);
76
77	static int ice_rebuild_channels(struct ice_pf *pf);
78	static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_adv_fltr);
79
80	static int
81	ice_indr_setup_tc_cb(struct net_device netdev, struct* Qdisc *sch,
82	void cb_priv, enum* tc_setup_type type, void *type_data,
83	void *data,
84	void (cleanup)(struct* flow_block_cb *block_cb));
85
86	bool netif_is_ice(const struct net_device *dev)
87	{
88	return dev && (dev->netdev_ops == &ice_netdev_ops);
89	}
90
91	/**
92	* ice_get_tx_pending - returns number of Tx descriptors not processed
93	* @ring: the ring of descriptors
94	*/
95	static u16 ice_get_tx_pending(struct ice_tx_ring *ring)
96	{
97	u16 head, tail;
98
99	head = ring->next_to_clean;
100	tail = ring->next_to_use;
101
102	if (head != tail)
103	return (head < tail) ?
104	tail - head : (tail + ring->count - head);
105	return `0`;
106	}
107
108	/**
109	* ice_check_for_hang_subtask - check for and recover hung queues
110	* @pf: pointer to PF struct
111	*/
112	static void ice_check_for_hang_subtask(struct ice_pf *pf)
113	{
114	struct ice_vsi *vsi = NULL;
115	struct ice_hw *hw;
116	unsigned int i;
117	int packets;
118	u32 v;
119
120	ice_for_each_vsi(pf, v)
121	if (pf->vsi[v] && pf->vsi[v]->type == ICE_VSI_PF) {
122	vsi = pf->vsi[v];
123	break;
124	}
125
126	if (!vsi \|\| test_bit(ICE_VSI_DOWN, vsi->state))
127	return;
128
129	if (!(vsi->netdev && netif_carrier_ok(dev: vsi->netdev)))
130	return;
131
132	hw = &vsi->back->hw;
133
134	ice_for_each_txq(vsi, i) {
135	struct ice_tx_ring *tx_ring = vsi->tx_rings[i];
136	struct ice_ring_stats *ring_stats;
137
138	if (!tx_ring)
139	continue;
140	if (ice_ring_ch_enabled(ring: tx_ring))
141	continue;
142
143	ring_stats = tx_ring->ring_stats;
144	if (!ring_stats)
145	continue;
146
147	if (tx_ring->desc) {
148	/ If packet counter has not changed the queue is*
149	* likely stalled, so force an interrupt for this
150	* queue.
151	*
152	* prev_pkt would be negative if there was no
153	* pending work.
154	*/
155	packets = ring_stats->stats.pkts & INT_MAX;
156	if (ring_stats->tx_stats.prev_pkt == packets) {
157	/ Trigger sw interrupt to revive the queue /
158	ice_trigger_sw_intr(hw, q_vector: tx_ring->q_vector);
159	continue;
160	}
161
162	/ Memory barrier between read of packet count and call*
163	* to ice_get_tx_pending()
164	*/
165	smp_rmb();
166	ring_stats->tx_stats.prev_pkt =
167	ice_get_tx_pending(ring: tx_ring) ? packets : -`1`;
168	}
169	}
170	}
171
172	/**
173	* ice_init_mac_fltr - Set initial MAC filters
174	* @pf: board private structure
175	*
176	* Set initial set of MAC filters for PF VSI; configure filters for permanent
177	* address and broadcast address. If an error is encountered, netdevice will be
178	* unregistered.
179	*/
180	static int ice_init_mac_fltr(struct ice_pf *pf)
181	{
182	struct ice_vsi *vsi;
183	u8 *perm_addr;
184
185	vsi = ice_get_main_vsi(pf);
186	if (!vsi)
187	return -EINVAL;
188
189	perm_addr = vsi->port_info->mac.perm_addr;
190	return ice_fltr_add_mac_and_broadcast(vsi, mac: perm_addr, action: ICE_FWD_TO_VSI);
191	}
192
193	/**
194	* ice_add_mac_to_sync_list - creates list of MAC addresses to be synced
195	* @netdev: the net device on which the sync is happening
196	* @addr: MAC address to sync
197	*
198	* This is a callback function which is called by the in kernel device sync
199	* functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only
200	* populates the tmp_sync_list, which is later used by ice_add_mac to add the
201	* MAC filters from the hardware.
202	*/
203	static int ice_add_mac_to_sync_list(struct net_device netdev, const* u8 *addr)
204	{
205	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
206	struct ice_vsi *vsi = np->vsi;
207
208	if (ice_fltr_add_mac_to_list(vsi, list: &vsi->tmp_sync_list, mac: addr,
209	action: ICE_FWD_TO_VSI))
210	return -EINVAL;
211
212	return `0`;
213	}
214
215	/**
216	* ice_add_mac_to_unsync_list - creates list of MAC addresses to be unsynced
217	* @netdev: the net device on which the unsync is happening
218	* @addr: MAC address to unsync
219	*
220	* This is a callback function which is called by the in kernel device unsync
221	* functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only
222	* populates the tmp_unsync_list, which is later used by ice_remove_mac to
223	* delete the MAC filters from the hardware.
224	*/
225	static int ice_add_mac_to_unsync_list(struct net_device netdev, const* u8 *addr)
226	{
227	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
228	struct ice_vsi *vsi = np->vsi;
229
230	/ Under some circumstances, we might receive a request to delete our*
231	* own device address from our uc list. Because we store the device
232	* address in the VSI's MAC filter list, we need to ignore such
233	* requests and not delete our device address from this list.
234	*/
235	if (ether_addr_equal(addr1: addr, addr2: netdev->dev_addr))
236	return `0`;
237
238	if (ice_fltr_add_mac_to_list(vsi, list: &vsi->tmp_unsync_list, mac: addr,
239	action: ICE_FWD_TO_VSI))
240	return -EINVAL;
241
242	return `0`;
243	}
244
245	/**
246	* ice_vsi_fltr_changed - check if filter state changed
247	* @vsi: VSI to be checked
248	*
249	* returns true if filter state has changed, false otherwise.
250	*/
251	static bool ice_vsi_fltr_changed(struct ice_vsi *vsi)
252	{
253	return test_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state) \|\|
254	test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
255	}
256
257	/**
258	* ice_set_promisc - Enable promiscuous mode for a given PF
259	* @vsi: the VSI being configured
260	* @promisc_m: mask of promiscuous config bits
261	*
262	*/
263	static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m)
264	{
265	int status;
266
267	if (vsi->type != ICE_VSI_PF)
268	return `0`;
269
270	if (ice_vsi_has_non_zero_vlans(vsi)) {
271	promisc_m \|= (ICE_PROMISC_VLAN_RX \| ICE_PROMISC_VLAN_TX);
272	status = ice_fltr_set_vlan_vsi_promisc(hw: &vsi->back->hw, vsi,
273	promisc_mask: promisc_m);
274	} else {
275	status = ice_fltr_set_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
276	promisc_mask: promisc_m, vid: `0`);
277	}
278	if (status && status != -EEXIST)
279	return status;
280
281	netdev_dbg(vsi->netdev, "set promisc filter bits for VSI %i: 0x%x\n",
282	vsi->vsi_num, promisc_m);
283	return `0`;
284	}
285
286	/**
287	* ice_clear_promisc - Disable promiscuous mode for a given PF
288	* @vsi: the VSI being configured
289	* @promisc_m: mask of promiscuous config bits
290	*
291	*/
292	static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m)
293	{
294	int status;
295
296	if (vsi->type != ICE_VSI_PF)
297	return `0`;
298
299	if (ice_vsi_has_non_zero_vlans(vsi)) {
300	promisc_m \|= (ICE_PROMISC_VLAN_RX \| ICE_PROMISC_VLAN_TX);
301	status = ice_fltr_clear_vlan_vsi_promisc(hw: &vsi->back->hw, vsi,
302	promisc_mask: promisc_m);
303	} else {
304	status = ice_fltr_clear_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
305	promisc_mask: promisc_m, vid: `0`);
306	}
307
308	netdev_dbg(vsi->netdev, "clear promisc filter bits for VSI %i: 0x%x\n",
309	vsi->vsi_num, promisc_m);
310	return status;
311	}
312
313	/**
314	* ice_vsi_sync_fltr - Update the VSI filter list to the HW
315	* @vsi: ptr to the VSI
316	*
317	* Push any outstanding VSI filter changes through the AdminQ.
318	*/
319	static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
320	{
321	struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
322	struct device *dev = ice_pf_to_dev(vsi->back);
323	struct net_device *netdev = vsi->netdev;
324	bool promisc_forced_on = false;
325	struct ice_pf *pf = vsi->back;
326	struct ice_hw *hw = &pf->hw;
327	u32 changed_flags = `0`;
328	int err;
329
330	if (!vsi->netdev)
331	return -EINVAL;
332
333	while (test_and_set_bit(nr: ICE_CFG_BUSY, addr: vsi->state))
334	usleep_range(min: `1000`, max: `2000`);
335
336	changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
337	vsi->current_netdev_flags = vsi->netdev->flags;
338
339	INIT_LIST_HEAD(list: &vsi->tmp_sync_list);
340	INIT_LIST_HEAD(list: &vsi->tmp_unsync_list);
341
342	if (ice_vsi_fltr_changed(vsi)) {
343	clear_bit(nr: ICE_VSI_UMAC_FLTR_CHANGED, addr: vsi->state);
344	clear_bit(nr: ICE_VSI_MMAC_FLTR_CHANGED, addr: vsi->state);
345
346	/ grab the netdev's addr_list_lock /
347	netif_addr_lock_bh(dev: netdev);
348	__dev_uc_sync(dev: netdev, sync: ice_add_mac_to_sync_list,
349	unsync: ice_add_mac_to_unsync_list);
350	__dev_mc_sync(dev: netdev, sync: ice_add_mac_to_sync_list,
351	unsync: ice_add_mac_to_unsync_list);
352	/ our temp lists are populated. release lock /
353	netif_addr_unlock_bh(dev: netdev);
354	}
355
356	/ Remove MAC addresses in the unsync list /
357	err = ice_fltr_remove_mac_list(vsi, list: &vsi->tmp_unsync_list);
358	ice_fltr_free_list(dev, h: &vsi->tmp_unsync_list);
359	if (err) {
360	netdev_err(dev: netdev, format: "Failed to delete MAC filters\n");
361	/ if we failed because of alloc failures, just bail /
362	if (err == -ENOMEM)
363	goto out;
364	}
365
366	/ Add MAC addresses in the sync list /
367	err = ice_fltr_add_mac_list(vsi, list: &vsi->tmp_sync_list);
368	ice_fltr_free_list(dev, h: &vsi->tmp_sync_list);
369	/ If filter is added successfully or already exists, do not go into*
370	* 'if' condition and report it as error. Instead continue processing
371	* rest of the function.
372	*/
373	if (err && err != -EEXIST) {
374	netdev_err(dev: netdev, format: "Failed to add MAC filters\n");
375	/ If there is no more space for new umac filters, VSI*
376	* should go into promiscuous mode. There should be some
377	* space reserved for promiscuous filters.
378	*/
379	if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC &&
380	!test_and_set_bit(nr: ICE_FLTR_OVERFLOW_PROMISC,
381	addr: vsi->state)) {
382	promisc_forced_on = true;
383	netdev_warn(dev: netdev, format: "Reached MAC filter limit, forcing promisc mode on VSI %d\n",
384	vsi->vsi_num);
385	} else {
386	goto out;
387	}
388	}
389	err = `0`;
390	/ check for changes in promiscuous modes /
391	if (changed_flags & IFF_ALLMULTI) {
392	if (vsi->current_netdev_flags & IFF_ALLMULTI) {
393	err = ice_set_promisc(vsi, ICE_MCAST_PROMISC_BITS);
394	if (err) {
395	vsi->current_netdev_flags &= ~IFF_ALLMULTI;
396	goto out_promisc;
397	}
398	} else {
399	/ !(vsi->current_netdev_flags & IFF_ALLMULTI) /
400	err = ice_clear_promisc(vsi, ICE_MCAST_PROMISC_BITS);
401	if (err) {
402	vsi->current_netdev_flags \|= IFF_ALLMULTI;
403	goto out_promisc;
404	}
405	}
406	}
407
408	if (((changed_flags & IFF_PROMISC) \|\| promisc_forced_on) \|\|
409	test_bit(ICE_VSI_PROMISC_CHANGED, vsi->state)) {
410	clear_bit(nr: ICE_VSI_PROMISC_CHANGED, addr: vsi->state);
411	if (vsi->current_netdev_flags & IFF_PROMISC) {
412	/ Apply Rx filter rule to get traffic from wire /
413	if (!ice_is_dflt_vsi_in_use(pi: vsi->port_info)) {
414	err = ice_set_dflt_vsi(vsi);
415	if (err && err != -EEXIST) {
416	netdev_err(dev: netdev, format: "Error %d setting default VSI %i Rx rule\n",
417	err, vsi->vsi_num);
418	vsi->current_netdev_flags &=
419	~IFF_PROMISC;
420	goto out_promisc;
421	}
422	err = `0`;
423	vlan_ops->dis_rx_filtering(vsi);
424
425	/ promiscuous mode implies allmulticast so*
426	* that VSIs that are in promiscuous mode are
427	* subscribed to multicast packets coming to
428	* the port
429	*/
430	err = ice_set_promisc(vsi,
431	ICE_MCAST_PROMISC_BITS);
432	if (err)
433	goto out_promisc;
434	}
435	} else {
436	/ Clear Rx filter to remove traffic from wire /
437	if (ice_is_vsi_dflt_vsi(vsi)) {
438	err = ice_clear_dflt_vsi(vsi);
439	if (err) {
440	netdev_err(dev: netdev, format: "Error %d clearing default VSI %i Rx rule\n",
441	err, vsi->vsi_num);
442	vsi->current_netdev_flags \|=
443	IFF_PROMISC;
444	goto out_promisc;
445	}
446	if (vsi->netdev->features &
447	NETIF_F_HW_VLAN_CTAG_FILTER)
448	vlan_ops->ena_rx_filtering(vsi);
449	}
450
451	/ disable allmulti here, but only if allmulti is not*
452	* still enabled for the netdev
453	*/
454	if (!(vsi->current_netdev_flags & IFF_ALLMULTI)) {
455	err = ice_clear_promisc(vsi,
456	ICE_MCAST_PROMISC_BITS);
457	if (err) {
458	netdev_err(dev: netdev, format: "Error %d clearing multicast promiscuous on VSI %i\n",
459	err, vsi->vsi_num);
460	}
461	}
462	}
463	}
464	goto exit;
465
466	out_promisc:
467	set_bit(nr: ICE_VSI_PROMISC_CHANGED, addr: vsi->state);
468	goto exit;
469	out:
470	/ if something went wrong then set the changed flag so we try again /
471	set_bit(nr: ICE_VSI_UMAC_FLTR_CHANGED, addr: vsi->state);
472	set_bit(nr: ICE_VSI_MMAC_FLTR_CHANGED, addr: vsi->state);
473	exit:
474	clear_bit(nr: ICE_CFG_BUSY, addr: vsi->state);
475	return err;
476	}
477
478	/**
479	* ice_sync_fltr_subtask - Sync the VSI filter list with HW
480	* @pf: board private structure
481	*/
482	static void ice_sync_fltr_subtask(struct ice_pf *pf)
483	{
484	int v;
485
486	if (!pf \|\| !(test_bit(ICE_FLAG_FLTR_SYNC, pf->flags)))
487	return;
488
489	clear_bit(nr: ICE_FLAG_FLTR_SYNC, addr: pf->flags);
490
491	ice_for_each_vsi(pf, v)
492	if (pf->vsi[v] && ice_vsi_fltr_changed(vsi: pf->vsi[v]) &&
493	ice_vsi_sync_fltr(vsi: pf->vsi[v])) {
494	/ come back and try again later /
495	set_bit(nr: ICE_FLAG_FLTR_SYNC, addr: pf->flags);
496	break;
497	}
498	}
499
500	/**
501	* ice_pf_dis_all_vsi - Pause all VSIs on a PF
502	* @pf: the PF
503	* @locked: is the rtnl_lock already held
504	*/
505	static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
506	{
507	int node;
508	int v;
509
510	ice_for_each_vsi(pf, v)
511	if (pf->vsi[v])
512	ice_dis_vsi(vsi: pf->vsi[v], locked);
513
514	for (node = `0`; node < ICE_MAX_PF_AGG_NODES; node++)
515	pf->pf_agg_node[node].num_vsis = `0`;
516
517	for (node = `0`; node < ICE_MAX_VF_AGG_NODES; node++)
518	pf->vf_agg_node[node].num_vsis = `0`;
519	}
520
521	/**
522	* ice_clear_sw_switch_recipes - clear switch recipes
523	* @pf: board private structure
524	*
525	* Mark switch recipes as not created in sw structures. There are cases where
526	* rules (especially advanced rules) need to be restored, either re-read from
527	* hardware or added again. For example after the reset. 'recp_created' flag
528	* prevents from doing that and need to be cleared upfront.
529	*/
530	static void ice_clear_sw_switch_recipes(struct ice_pf *pf)
531	{
532	struct ice_sw_recipe *recp;
533	u8 i;
534
535	recp = pf->hw.switch_info->recp_list;
536	for (i = `0`; i < ICE_MAX_NUM_RECIPES; i++)
537	recp[i].recp_created = false;
538	}
539
540	/**
541	* ice_prepare_for_reset - prep for reset
542	* @pf: board private structure
543	* @reset_type: reset type requested
544	*
545	* Inform or close all dependent features in prep for reset.
546	*/
547	static void
548	ice_prepare_for_reset(struct ice_pf pf, enum* ice_reset_req reset_type)
549	{
550	struct ice_hw *hw = &pf->hw;
551	struct ice_vsi *vsi;
552	struct ice_vf *vf;
553	unsigned int bkt;
554
555	dev_dbg(ice_pf_to_dev(pf), "reset_type=%d\n", reset_type);
556
557	/ already prepared for reset /
558	if (test_bit(ICE_PREPARED_FOR_RESET, pf->state))
559	return;
560
561	ice_unplug_aux_dev(pf);
562
563	/ Notify VFs of impending reset /
564	if (ice_check_sq_alive(hw, cq: &hw->mailboxq))
565	ice_vc_notify_reset(pf);
566
567	/ Disable VFs until reset is completed /
568	mutex_lock(&pf->vfs.table_lock);
569	ice_for_each_vf(pf, bkt, vf)
570	ice_set_vf_state_dis(vf);
571	mutex_unlock(lock: &pf->vfs.table_lock);
572
573	if (ice_is_eswitch_mode_switchdev(pf)) {
574	if (reset_type != ICE_RESET_PFR)
575	ice_clear_sw_switch_recipes(pf);
576	}
577
578	/ release ADQ specific HW and SW resources /
579	vsi = ice_get_main_vsi(pf);
580	if (!vsi)
581	goto skip;
582
583	/ to be on safe side, reset orig_rss_size so that normal flow*
584	* of deciding rss_size can take precedence
585	*/
586	vsi->orig_rss_size = `0`;
587
588	if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
589	if (reset_type == ICE_RESET_PFR) {
590	vsi->old_ena_tc = vsi->all_enatc;
591	vsi->old_numtc = vsi->all_numtc;
592	} else {
593	ice_remove_q_channels(vsi, rem_adv_fltr: true);
594
595	/ for other reset type, do not support channel rebuild*
596	* hence reset needed info
597	*/
598	vsi->old_ena_tc = `0`;
599	vsi->all_enatc = `0`;
600	vsi->old_numtc = `0`;
601	vsi->all_numtc = `0`;
602	vsi->req_txq = `0`;
603	vsi->req_rxq = `0`;
604	clear_bit(nr: ICE_FLAG_TC_MQPRIO, addr: pf->flags);
605	memset(&vsi->mqprio_qopt, `0`, sizeof(vsi->mqprio_qopt));
606	}
607	}
608	skip:
609
610	/ clear SW filtering DB /
611	ice_clear_hw_tbls(hw);
612	/ disable the VSIs and their queues that are not already DOWN /
613	ice_pf_dis_all_vsi(pf, locked: false);
614
615	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
616	ice_ptp_prepare_for_reset(pf, reset_type);
617
618	if (ice_is_feature_supported(pf, f: ICE_F_GNSS))
619	ice_gnss_exit(pf);
620
621	if (hw->port_info)
622	ice_sched_clear_port(pi: hw->port_info);
623
624	ice_shutdown_all_ctrlq(hw);
625
626	set_bit(nr: ICE_PREPARED_FOR_RESET, addr: pf->state);
627	}
628
629	/**
630	* ice_do_reset - Initiate one of many types of resets
631	* @pf: board private structure
632	* @reset_type: reset type requested before this function was called.
633	*/
634	static void ice_do_reset(struct ice_pf pf, enum* ice_reset_req reset_type)
635	{
636	struct device *dev = ice_pf_to_dev(pf);
637	struct ice_hw *hw = &pf->hw;
638
639	dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
640
641	if (pf->lag && pf->lag->bonded && reset_type == ICE_RESET_PFR) {
642	dev_dbg(dev, "PFR on a bonded interface, promoting to CORER\n");
643	reset_type = ICE_RESET_CORER;
644	}
645
646	ice_prepare_for_reset(pf, reset_type);
647
648	/ trigger the reset /
649	if (ice_reset(hw, req: reset_type)) {
650	dev_err(dev, "reset %d failed\n", reset_type);
651	set_bit(nr: ICE_RESET_FAILED, addr: pf->state);
652	clear_bit(nr: ICE_RESET_OICR_RECV, addr: pf->state);
653	clear_bit(nr: ICE_PREPARED_FOR_RESET, addr: pf->state);
654	clear_bit(nr: ICE_PFR_REQ, addr: pf->state);
655	clear_bit(nr: ICE_CORER_REQ, addr: pf->state);
656	clear_bit(nr: ICE_GLOBR_REQ, addr: pf->state);
657	wake_up(&pf->reset_wait_queue);
658	return;
659	}
660
661	/ PFR is a bit of a special case because it doesn't result in an OICR*
662	* interrupt. So for PFR, rebuild after the reset and clear the reset-
663	* associated state bits.
664	*/
665	if (reset_type == ICE_RESET_PFR) {
666	pf->pfr_count++;
667	ice_rebuild(pf, reset_type);
668	clear_bit(nr: ICE_PREPARED_FOR_RESET, addr: pf->state);
669	clear_bit(nr: ICE_PFR_REQ, addr: pf->state);
670	wake_up(&pf->reset_wait_queue);
671	ice_reset_all_vfs(pf);
672	}
673	}
674
675	/**
676	* ice_reset_subtask - Set up for resetting the device and driver
677	* @pf: board private structure
678	*/
679	static void ice_reset_subtask(struct ice_pf *pf)
680	{
681	enum ice_reset_req reset_type = ICE_RESET_INVAL;
682
683	/ When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an*
684	* OICR interrupt. The OICR handler (ice_misc_intr) determines what type
685	* of reset is pending and sets bits in pf->state indicating the reset
686	* type and ICE_RESET_OICR_RECV. So, if the latter bit is set
687	* prepare for pending reset if not already (for PF software-initiated
688	* global resets the software should already be prepared for it as
689	* indicated by ICE_PREPARED_FOR_RESET; for global resets initiated
690	* by firmware or software on other PFs, that bit is not set so prepare
691	* for the reset now), poll for reset done, rebuild and return.
692	*/
693	if (test_bit(ICE_RESET_OICR_RECV, pf->state)) {
694	/ Perform the largest reset requested /
695	if (test_and_clear_bit(nr: ICE_CORER_RECV, addr: pf->state))
696	reset_type = ICE_RESET_CORER;
697	if (test_and_clear_bit(nr: ICE_GLOBR_RECV, addr: pf->state))
698	reset_type = ICE_RESET_GLOBR;
699	if (test_and_clear_bit(nr: ICE_EMPR_RECV, addr: pf->state))
700	reset_type = ICE_RESET_EMPR;
701	/ return if no valid reset type requested /
702	if (reset_type == ICE_RESET_INVAL)
703	return;
704	ice_prepare_for_reset(pf, reset_type);
705
706	/ make sure we are ready to rebuild /
707	if (ice_check_reset(hw: &pf->hw)) {
708	set_bit(nr: ICE_RESET_FAILED, addr: pf->state);
709	} else {
710	/ done with reset. start rebuild /
711	pf->hw.reset_ongoing = false;
712	ice_rebuild(pf, reset_type);
713	/ clear bit to resume normal operations, but*
714	* ICE_NEEDS_RESTART bit is set in case rebuild failed
715	*/
716	clear_bit(nr: ICE_RESET_OICR_RECV, addr: pf->state);
717	clear_bit(nr: ICE_PREPARED_FOR_RESET, addr: pf->state);
718	clear_bit(nr: ICE_PFR_REQ, addr: pf->state);
719	clear_bit(nr: ICE_CORER_REQ, addr: pf->state);
720	clear_bit(nr: ICE_GLOBR_REQ, addr: pf->state);
721	wake_up(&pf->reset_wait_queue);
722	ice_reset_all_vfs(pf);
723	}
724
725	return;
726	}
727
728	/ No pending resets to finish processing. Check for new resets /
729	if (test_bit(ICE_PFR_REQ, pf->state)) {
730	reset_type = ICE_RESET_PFR;
731	if (pf->lag && pf->lag->bonded) {
732	dev_dbg(ice_pf_to_dev(pf), "PFR on a bonded interface, promoting to CORER\n");
733	reset_type = ICE_RESET_CORER;
734	}
735	}
736	if (test_bit(ICE_CORER_REQ, pf->state))
737	reset_type = ICE_RESET_CORER;
738	if (test_bit(ICE_GLOBR_REQ, pf->state))
739	reset_type = ICE_RESET_GLOBR;
740	/ If no valid reset type requested just return /
741	if (reset_type == ICE_RESET_INVAL)
742	return;
743
744	/ reset if not already down or busy /
745	if (!test_bit(ICE_DOWN, pf->state) &&
746	!test_bit(ICE_CFG_BUSY, pf->state)) {
747	ice_do_reset(pf, reset_type);
748	}
749	}
750
751	/**
752	* ice_print_topo_conflict - print topology conflict message
753	* @vsi: the VSI whose topology status is being checked
754	*/
755	static void ice_print_topo_conflict(struct ice_vsi *vsi)
756	{
757	switch (vsi->port_info->phy.link_info.topo_media_conflict) {
758	case ICE_AQ_LINK_TOPO_CONFLICT:
759	case ICE_AQ_LINK_MEDIA_CONFLICT:
760	case ICE_AQ_LINK_TOPO_UNREACH_PRT:
761	case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT:
762	case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA:
763	netdev_info(dev: vsi->netdev, format: "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n");
764	break;
765	case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA:
766	if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, vsi->back->flags))
767	netdev_warn(dev: vsi->netdev, format: "An unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules\n");
768	else
769	netdev_err(dev: vsi->netdev, format: "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
770	break;
771	default:
772	break;
773	}
774	}
775
776	/**
777	* ice_print_link_msg - print link up or down message
778	* @vsi: the VSI whose link status is being queried
779	* @isup: boolean for if the link is now up or down
780	*/
781	void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
782	{
783	struct ice_aqc_get_phy_caps_data *caps;
784	const char *an_advertised;
785	const char *fec_req;
786	const char *speed;
787	const char *fec;
788	const char *fc;
789	const char *an;
790	int status;
791
792	if (!vsi)
793	return;
794
795	if (vsi->current_isup == isup)
796	return;
797
798	vsi->current_isup = isup;
799
800	if (!isup) {
801	netdev_info(dev: vsi->netdev, format: "NIC Link is Down\n");
802	return;
803	}
804
805	switch (vsi->port_info->phy.link_info.link_speed) {
806	case ICE_AQ_LINK_SPEED_100GB:
807	speed = "100 G";
808	break;
809	case ICE_AQ_LINK_SPEED_50GB:
810	speed = "50 G";
811	break;
812	case ICE_AQ_LINK_SPEED_40GB:
813	speed = "40 G";
814	break;
815	case ICE_AQ_LINK_SPEED_25GB:
816	speed = "25 G";
817	break;
818	case ICE_AQ_LINK_SPEED_20GB:
819	speed = "20 G";
820	break;
821	case ICE_AQ_LINK_SPEED_10GB:
822	speed = "10 G";
823	break;
824	case ICE_AQ_LINK_SPEED_5GB:
825	speed = "5 G";
826	break;
827	case ICE_AQ_LINK_SPEED_2500MB:
828	speed = "2.5 G";
829	break;
830	case ICE_AQ_LINK_SPEED_1000MB:
831	speed = "1 G";
832	break;
833	case ICE_AQ_LINK_SPEED_100MB:
834	speed = "100 M";
835	break;
836	default:
837	speed = "Unknown ";
838	break;
839	}
840
841	switch (vsi->port_info->fc.current_mode) {
842	case ICE_FC_FULL:
843	fc = "Rx/Tx";
844	break;
845	case ICE_FC_TX_PAUSE:
846	fc = "Tx";
847	break;
848	case ICE_FC_RX_PAUSE:
849	fc = "Rx";
850	break;
851	case ICE_FC_NONE:
852	fc = "None";
853	break;
854	default:
855	fc = "Unknown";
856	break;
857	}
858
859	/ Get FEC mode based on negotiated link info /
860	switch (vsi->port_info->phy.link_info.fec_info) {
861	case ICE_AQ_LINK_25G_RS_528_FEC_EN:
862	case ICE_AQ_LINK_25G_RS_544_FEC_EN:
863	fec = "RS-FEC";
864	break;
865	case ICE_AQ_LINK_25G_KR_FEC_EN:
866	fec = "FC-FEC/BASE-R";
867	break;
868	default:
869	fec = "NONE";
870	break;
871	}
872
873	/ check if autoneg completed, might be false due to not supported /
874	if (vsi->port_info->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
875	an = "True";
876	else
877	an = "False";
878
879	/ Get FEC mode requested based on PHY caps last SW configuration /
880	caps = kzalloc(size: sizeof(*caps), GFP_KERNEL);
881	if (!caps) {
882	fec_req = "Unknown";
883	an_advertised = "Unknown";
884	goto done;
885	}
886
887	status = ice_aq_get_phy_caps(pi: vsi->port_info, qual_mods: false,
888	ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL);
889	if (status)
890	netdev_info(dev: vsi->netdev, format: "Get phy capability failed.\n");
891
892	an_advertised = ice_is_phy_caps_an_enabled(caps) ? "On" : "Off";
893
894	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ \|\|
895	caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
896	fec_req = "RS-FEC";
897	else if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ \|\|
898	caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
899	fec_req = "FC-FEC/BASE-R";
900	else
901	fec_req = "NONE";
902
903	kfree(objp: caps);
904
905	done:
906	netdev_info(dev: vsi->netdev, format: "NIC Link is up %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg Advertised: %s, Autoneg Negotiated: %s, Flow Control: %s\n",
907	speed, fec_req, fec, an_advertised, an, fc);
908	ice_print_topo_conflict(vsi);
909	}
910
911	/**
912	* ice_vsi_link_event - update the VSI's netdev
913	* @vsi: the VSI on which the link event occurred
914	* @link_up: whether or not the VSI needs to be set up or down
915	*/
916	static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
917	{
918	if (!vsi)
919	return;
920
921	if (test_bit(ICE_VSI_DOWN, vsi->state) \|\| !vsi->netdev)
922	return;
923
924	if (vsi->type == ICE_VSI_PF) {
925	if (link_up == netif_carrier_ok(dev: vsi->netdev))
926	return;
927
928	if (link_up) {
929	netif_carrier_on(dev: vsi->netdev);
930	netif_tx_wake_all_queues(dev: vsi->netdev);
931	} else {
932	netif_carrier_off(dev: vsi->netdev);
933	netif_tx_stop_all_queues(dev: vsi->netdev);
934	}
935	}
936	}
937
938	/**
939	* ice_set_dflt_mib - send a default config MIB to the FW
940	* @pf: private PF struct
941	*
942	* This function sends a default configuration MIB to the FW.
943	*
944	* If this function errors out at any point, the driver is still able to
945	* function. The main impact is that LFC may not operate as expected.
946	* Therefore an error state in this function should be treated with a DBG
947	* message and continue on with driver rebuild/reenable.
948	*/
949	static void ice_set_dflt_mib(struct ice_pf *pf)
950	{
951	struct device *dev = ice_pf_to_dev(pf);
952	u8 mib_type, buf, lldpmib = NULL;
953	u16 len, typelen, offset = `0`;
954	struct ice_lldp_org_tlv *tlv;
955	struct ice_hw *hw = &pf->hw;
956	u32 ouisubtype;
957
958	mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB;
959	lldpmib = kzalloc(ICE_LLDPDU_SIZE, GFP_KERNEL);
960	if (!lldpmib) {
961	dev_dbg(dev, "%s Failed to allocate MIB memory\n",
962	__func__);
963	return;
964	}
965
966	/ Add ETS CFG TLV /
967	tlv = (struct ice_lldp_org_tlv *)lldpmib;
968	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) \|
969	ICE_IEEE_ETS_TLV_LEN);
970	tlv->typelen = htons(typelen);
971	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) \|
972	ICE_IEEE_SUBTYPE_ETS_CFG);
973	tlv->ouisubtype = htonl(ouisubtype);
974
975	buf = tlv->tlvinfo;
976	buf[`0`] = `0`;
977
978	/ ETS CFG all UPs map to TC 0. Next 4 (1 - 4) Octets = 0.*
979	* Octets 5 - 12 are BW values, set octet 5 to 100% BW.
980	* Octets 13 - 20 are TSA values - leave as zeros
981	*/
982	buf[`5`] = `0x64`;
983	len = FIELD_GET(ICE_LLDP_TLV_LEN_M, typelen);
984	offset += len + `2`;
985	tlv = (struct ice_lldp_org_tlv *)
986	((char )tlv + sizeof*(tlv->typelen) + len);
987
988	/ Add ETS REC TLV /
989	buf = tlv->tlvinfo;
990	tlv->typelen = htons(typelen);
991
992	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) \|
993	ICE_IEEE_SUBTYPE_ETS_REC);
994	tlv->ouisubtype = htonl(ouisubtype);
995
996	/ First octet of buf is reserved*
997	* Octets 1 - 4 map UP to TC - all UPs map to zero
998	* Octets 5 - 12 are BW values - set TC 0 to 100%.
999	* Octets 13 - 20 are TSA value - leave as zeros
1000	*/
1001	buf[`5`] = `0x64`;
1002	offset += len + `2`;
1003	tlv = (struct ice_lldp_org_tlv *)
1004	((char )tlv + sizeof*(tlv->typelen) + len);
1005
1006	/ Add PFC CFG TLV /
1007	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) \|
1008	ICE_IEEE_PFC_TLV_LEN);
1009	tlv->typelen = htons(typelen);
1010
1011	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) \|
1012	ICE_IEEE_SUBTYPE_PFC_CFG);
1013	tlv->ouisubtype = htonl(ouisubtype);
1014
1015	/ Octet 1 left as all zeros - PFC disabled /
1016	buf[`0`] = `0x08`;
1017	len = FIELD_GET(ICE_LLDP_TLV_LEN_M, typelen);
1018	offset += len + `2`;
1019
1020	if (ice_aq_set_lldp_mib(hw, mib_type, buf: (void *)lldpmib, buf_size: offset, NULL))
1021	dev_dbg(dev, "%s Failed to set default LLDP MIB\n", __func__);
1022
1023	kfree(objp: lldpmib);
1024	}
1025
1026	/**
1027	* ice_check_phy_fw_load - check if PHY FW load failed
1028	* @pf: pointer to PF struct
1029	* @link_cfg_err: bitmap from the link info structure
1030	*
1031	* check if external PHY FW load failed and print an error message if it did
1032	*/
1033	static void ice_check_phy_fw_load(struct ice_pf *pf, u8 link_cfg_err)
1034	{
1035	if (!(link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE)) {
1036	clear_bit(nr: ICE_FLAG_PHY_FW_LOAD_FAILED, addr: pf->flags);
1037	return;
1038	}
1039
1040	if (test_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags))
1041	return;
1042
1043	if (link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE) {
1044	dev_err(ice_pf_to_dev(pf), "Device failed to load the FW for the external PHY. Please download and install the latest NVM for your device and try again\n");
1045	set_bit(nr: ICE_FLAG_PHY_FW_LOAD_FAILED, addr: pf->flags);
1046	}
1047	}
1048
1049	/**
1050	* ice_check_module_power
1051	* @pf: pointer to PF struct
1052	* @link_cfg_err: bitmap from the link info structure
1053	*
1054	* check module power level returned by a previous call to aq_get_link_info
1055	* and print error messages if module power level is not supported
1056	*/
1057	static void ice_check_module_power(struct ice_pf *pf, u8 link_cfg_err)
1058	{
1059	/ if module power level is supported, clear the flag /
1060	if (!(link_cfg_err & (ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT \|
1061	ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED))) {
1062	clear_bit(nr: ICE_FLAG_MOD_POWER_UNSUPPORTED, addr: pf->flags);
1063	return;
1064	}
1065
1066	/ if ICE_FLAG_MOD_POWER_UNSUPPORTED was previously set and the*
1067	* above block didn't clear this bit, there's nothing to do
1068	*/
1069	if (test_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags))
1070	return;
1071
1072	if (link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT) {
1073	dev_err(ice_pf_to_dev(pf), "The installed module is incompatible with the device's NVM image. Cannot start link\n");
1074	set_bit(nr: ICE_FLAG_MOD_POWER_UNSUPPORTED, addr: pf->flags);
1075	} else if (link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED) {
1076	dev_err(ice_pf_to_dev(pf), "The module's power requirements exceed the device's power supply. Cannot start link\n");
1077	set_bit(nr: ICE_FLAG_MOD_POWER_UNSUPPORTED, addr: pf->flags);
1078	}
1079	}
1080
1081	/**
1082	* ice_check_link_cfg_err - check if link configuration failed
1083	* @pf: pointer to the PF struct
1084	* @link_cfg_err: bitmap from the link info structure
1085	*
1086	* print if any link configuration failure happens due to the value in the
1087	* link_cfg_err parameter in the link info structure
1088	*/
1089	static void ice_check_link_cfg_err(struct ice_pf *pf, u8 link_cfg_err)
1090	{
1091	ice_check_module_power(pf, link_cfg_err);
1092	ice_check_phy_fw_load(pf, link_cfg_err);
1093	}
1094
1095	/**
1096	* ice_link_event - process the link event
1097	* @pf: PF that the link event is associated with
1098	* @pi: port_info for the port that the link event is associated with
1099	* @link_up: true if the physical link is up and false if it is down
1100	* @link_speed: current link speed received from the link event
1101	*
1102	* Returns 0 on success and negative on failure
1103	*/
1104	static int
1105	ice_link_event(struct ice_pf pf, struct* ice_port_info *pi, bool link_up,
1106	u16 link_speed)
1107	{
1108	struct device *dev = ice_pf_to_dev(pf);
1109	struct ice_phy_info *phy_info;
1110	struct ice_vsi *vsi;
1111	u16 old_link_speed;
1112	bool old_link;
1113	int status;
1114
1115	phy_info = &pi->phy;
1116	phy_info->link_info_old = phy_info->link_info;
1117
1118	old_link = !!(phy_info->link_info_old.link_info & ICE_AQ_LINK_UP);
1119	old_link_speed = phy_info->link_info_old.link_speed;
1120
1121	/ update the link info structures and re-enable link events,*
1122	* don't bail on failure due to other book keeping needed
1123	*/
1124	status = ice_update_link_info(pi);
1125	if (status)
1126	dev_dbg(dev, "Failed to update link status on port %d, err %d aq_err %s\n",
1127	pi->lport, status,
1128	ice_aq_str(pi->hw->adminq.sq_last_status));
1129
1130	ice_check_link_cfg_err(pf, link_cfg_err: pi->phy.link_info.link_cfg_err);
1131
1132	/ Check if the link state is up after updating link info, and treat*
1133	* this event as an UP event since the link is actually UP now.
1134	*/
1135	if (phy_info->link_info.link_info & ICE_AQ_LINK_UP)
1136	link_up = true;
1137
1138	vsi = ice_get_main_vsi(pf);
1139	if (!vsi \|\| !vsi->port_info)
1140	return -EINVAL;
1141
1142	/ turn off PHY if media was removed /
1143	if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) &&
1144	!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
1145	set_bit(nr: ICE_FLAG_NO_MEDIA, addr: pf->flags);
1146	ice_set_link(vsi, ena: false);
1147	}
1148
1149	/ if the old link up/down and speed is the same as the new /
1150	if (link_up == old_link && link_speed == old_link_speed)
1151	return `0`;
1152
1153	ice_ptp_link_change(pf, port: pf->hw.pf_id, linkup: link_up);
1154
1155	if (ice_is_dcb_active(pf)) {
1156	if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
1157	ice_dcb_rebuild(pf);
1158	} else {
1159	if (link_up)
1160	ice_set_dflt_mib(pf);
1161	}
1162	ice_vsi_link_event(vsi, link_up);
1163	ice_print_link_msg(vsi, isup: link_up);
1164
1165	ice_vc_notify_link_state(pf);
1166
1167	return `0`;
1168	}
1169
1170	/**
1171	* ice_watchdog_subtask - periodic tasks not using event driven scheduling
1172	* @pf: board private structure
1173	*/
1174	static void ice_watchdog_subtask(struct ice_pf *pf)
1175	{
1176	int i;
1177
1178	/ if interface is down do nothing /
1179	if (test_bit(ICE_DOWN, pf->state) \|\|
1180	test_bit(ICE_CFG_BUSY, pf->state))
1181	return;
1182
1183	/ make sure we don't do these things too often /
1184	if (time_before(jiffies,
1185	pf->serv_tmr_prev + pf->serv_tmr_period))
1186	return;
1187
1188	pf->serv_tmr_prev = jiffies;
1189
1190	/ Update the stats for active netdevs so the network stack*
1191	* can look at updated numbers whenever it cares to
1192	*/
1193	ice_update_pf_stats(pf);
1194	ice_for_each_vsi(pf, i)
1195	if (pf->vsi[i] && pf->vsi[i]->netdev)
1196	ice_update_vsi_stats(vsi: pf->vsi[i]);
1197	}
1198
1199	/**
1200	* ice_init_link_events - enable/initialize link events
1201	* @pi: pointer to the port_info instance
1202	*
1203	* Returns -EIO on failure, 0 on success
1204	*/
1205	static int ice_init_link_events(struct ice_port_info *pi)
1206	{
1207	u16 mask;
1208
1209	mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN \| ICE_AQ_LINK_EVENT_MEDIA_NA \|
1210	ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL \|
1211	ICE_AQ_LINK_EVENT_PHY_FW_LOAD_FAIL));
1212
1213	if (ice_aq_set_event_mask(hw: pi->hw, port_num: pi->lport, mask, NULL)) {
1214	dev_dbg(ice_hw_to_dev(pi->hw), "Failed to set link event mask for port %d\n",
1215	pi->lport);
1216	return -EIO;
1217	}
1218
1219	if (ice_aq_get_link_info(pi, ena_lse: true, NULL, NULL)) {
1220	dev_dbg(ice_hw_to_dev(pi->hw), "Failed to enable link events for port %d\n",
1221	pi->lport);
1222	return -EIO;
1223	}
1224
1225	return `0`;
1226	}
1227
1228	/**
1229	* ice_handle_link_event - handle link event via ARQ
1230	* @pf: PF that the link event is associated with
1231	* @event: event structure containing link status info
1232	*/
1233	static int
1234	ice_handle_link_event(struct ice_pf pf, struct* ice_rq_event_info *event)
1235	{
1236	struct ice_aqc_get_link_status_data *link_data;
1237	struct ice_port_info *port_info;
1238	int status;
1239
1240	link_data = (struct ice_aqc_get_link_status_data *)event->msg_buf;
1241	port_info = pf->hw.port_info;
1242	if (!port_info)
1243	return -EINVAL;
1244
1245	status = ice_link_event(pf, pi: port_info,
1246	link_up: !!(link_data->link_info & ICE_AQ_LINK_UP),
1247	le16_to_cpu(link_data->link_speed));
1248	if (status)
1249	dev_dbg(ice_pf_to_dev(pf), "Could not process link event, error %d\n",
1250	status);
1251
1252	return status;
1253	}
1254
1255	/**
1256	* ice_get_fwlog_data - copy the FW log data from ARQ event
1257	* @pf: PF that the FW log event is associated with
1258	* @event: event structure containing FW log data
1259	*/
1260	static void
1261	ice_get_fwlog_data(struct ice_pf pf, struct* ice_rq_event_info *event)
1262	{
1263	struct ice_fwlog_data *fwlog;
1264	struct ice_hw *hw = &pf->hw;
1265
1266	fwlog = &hw->fwlog_ring.rings[hw->fwlog_ring.tail];
1267
1268	memset(fwlog->data, `0`, PAGE_SIZE);
1269	fwlog->data_size = le16_to_cpu(event->desc.datalen);
1270
1271	memcpy(fwlog->data, event->msg_buf, fwlog->data_size);
1272	ice_fwlog_ring_increment(item: &hw->fwlog_ring.tail, size: hw->fwlog_ring.size);
1273
1274	if (ice_fwlog_ring_full(rings: &hw->fwlog_ring)) {
1275	/ the rings are full so bump the head to create room /
1276	ice_fwlog_ring_increment(item: &hw->fwlog_ring.head,
1277	size: hw->fwlog_ring.size);
1278	}
1279	}
1280
1281	/**
1282	* ice_aq_prep_for_event - Prepare to wait for an AdminQ event from firmware
1283	* @pf: pointer to the PF private structure
1284	* @task: intermediate helper storage and identifier for waiting
1285	* @opcode: the opcode to wait for
1286	*
1287	* Prepares to wait for a specific AdminQ completion event on the ARQ for
1288	* a given PF. Actual wait would be done by a call to ice_aq_wait_for_event().
1289	*
1290	* Calls are separated to allow caller registering for event before sending
1291	* the command, which mitigates a race between registering and FW responding.
1292	*
1293	* To obtain only the descriptor contents, pass an task->event with null
1294	* msg_buf. If the complete data buffer is desired, allocate the
1295	* task->event.msg_buf with enough space ahead of time.
1296	*/
1297	void ice_aq_prep_for_event(struct ice_pf pf, struct* ice_aq_task *task,
1298	u16 opcode)
1299	{
1300	INIT_HLIST_NODE(h: &task->entry);
1301	task->opcode = opcode;
1302	task->state = ICE_AQ_TASK_WAITING;
1303
1304	spin_lock_bh(lock: &pf->aq_wait_lock);
1305	hlist_add_head(n: &task->entry, h: &pf->aq_wait_list);
1306	spin_unlock_bh(lock: &pf->aq_wait_lock);
1307	}
1308
1309	/**
1310	* ice_aq_wait_for_event - Wait for an AdminQ event from firmware
1311	* @pf: pointer to the PF private structure
1312	* @task: ptr prepared by ice_aq_prep_for_event()
1313	* @timeout: how long to wait, in jiffies
1314	*
1315	* Waits for a specific AdminQ completion event on the ARQ for a given PF. The
1316	* current thread will be put to sleep until the specified event occurs or
1317	* until the given timeout is reached.
1318	*
1319	* Returns: zero on success, or a negative error code on failure.
1320	*/
1321	int ice_aq_wait_for_event(struct ice_pf pf, struct* ice_aq_task *task,
1322	unsigned long timeout)
1323	{
1324	enum ice_aq_task_state *state = &task->state;
1325	struct device *dev = ice_pf_to_dev(pf);
1326	unsigned long start = jiffies;
1327	long ret;
1328	int err;
1329
1330	ret = wait_event_interruptible_timeout(pf->aq_wait_queue,
1331	*state != ICE_AQ_TASK_WAITING,
1332	timeout);
1333	switch (*state) {
1334	case ICE_AQ_TASK_NOT_PREPARED:
1335	WARN(`1`, "call to %s without ice_aq_prep_for_event()", __func__);
1336	err = -EINVAL;
1337	break;
1338	case ICE_AQ_TASK_WAITING:
1339	err = ret < `0` ? ret : -ETIMEDOUT;
1340	break;
1341	case ICE_AQ_TASK_CANCELED:
1342	err = ret < `0` ? ret : -ECANCELED;
1343	break;
1344	case ICE_AQ_TASK_COMPLETE:
1345	err = ret < `0` ? ret : `0`;
1346	break;
1347	default:
1348	WARN(`1`, "Unexpected AdminQ wait task state %u", *state);
1349	err = -EINVAL;
1350	break;
1351	}
1352
1353	dev_dbg(dev, "Waited %u msecs (max %u msecs) for firmware response to op 0x%04x\n",
1354	jiffies_to_msecs(jiffies - start),
1355	jiffies_to_msecs(timeout),
1356	task->opcode);
1357
1358	spin_lock_bh(lock: &pf->aq_wait_lock);
1359	hlist_del(n: &task->entry);
1360	spin_unlock_bh(lock: &pf->aq_wait_lock);
1361
1362	return err;
1363	}
1364
1365	/**
1366	* ice_aq_check_events - Check if any thread is waiting for an AdminQ event
1367	* @pf: pointer to the PF private structure
1368	* @opcode: the opcode of the event
1369	* @event: the event to check
1370	*
1371	* Loops over the current list of pending threads waiting for an AdminQ event.
1372	* For each matching task, copy the contents of the event into the task
1373	* structure and wake up the thread.
1374	*
1375	* If multiple threads wait for the same opcode, they will all be woken up.
1376	*
1377	* Note that event->msg_buf will only be duplicated if the event has a buffer
1378	* with enough space already allocated. Otherwise, only the descriptor and
1379	* message length will be copied.
1380	*
1381	* Returns: true if an event was found, false otherwise
1382	*/
1383	static void ice_aq_check_events(struct ice_pf *pf, u16 opcode,
1384	struct ice_rq_event_info *event)
1385	{
1386	struct ice_rq_event_info *task_ev;
1387	struct ice_aq_task *task;
1388	bool found = false;
1389
1390	spin_lock_bh(lock: &pf->aq_wait_lock);
1391	hlist_for_each_entry(task, &pf->aq_wait_list, entry) {
1392	if (task->state != ICE_AQ_TASK_WAITING)
1393	continue;
1394	if (task->opcode != opcode)
1395	continue;
1396
1397	task_ev = &task->event;
1398	memcpy(&task_ev->desc, &event->desc, sizeof(event->desc));
1399	task_ev->msg_len = event->msg_len;
1400
1401	/ Only copy the data buffer if a destination was set /
1402	if (task_ev->msg_buf && task_ev->buf_len >= event->buf_len) {
1403	memcpy(task_ev->msg_buf, event->msg_buf,
1404	event->buf_len);
1405	task_ev->buf_len = event->buf_len;
1406	}
1407
1408	task->state = ICE_AQ_TASK_COMPLETE;
1409	found = true;
1410	}
1411	spin_unlock_bh(lock: &pf->aq_wait_lock);
1412
1413	if (found)
1414	wake_up(&pf->aq_wait_queue);
1415	}
1416
1417	/**
1418	* ice_aq_cancel_waiting_tasks - Immediately cancel all waiting tasks
1419	* @pf: the PF private structure
1420	*
1421	* Set all waiting tasks to ICE_AQ_TASK_CANCELED, and wake up their threads.
1422	* This will then cause ice_aq_wait_for_event to exit with -ECANCELED.
1423	*/
1424	static void ice_aq_cancel_waiting_tasks(struct ice_pf *pf)
1425	{
1426	struct ice_aq_task *task;
1427
1428	spin_lock_bh(lock: &pf->aq_wait_lock);
1429	hlist_for_each_entry(task, &pf->aq_wait_list, entry)
1430	task->state = ICE_AQ_TASK_CANCELED;
1431	spin_unlock_bh(lock: &pf->aq_wait_lock);
1432
1433	wake_up(&pf->aq_wait_queue);
1434	}
1435
1436	#define ICE_MBX_OVERFLOW_WATERMARK 64
1437
1438	/**
1439	* __ice_clean_ctrlq - helper function to clean controlq rings
1440	* @pf: ptr to struct ice_pf
1441	* @q_type: specific Control queue type
1442	*/
1443	static int __ice_clean_ctrlq(struct ice_pf pf, enum* ice_ctl_q q_type)
1444	{
1445	struct device *dev = ice_pf_to_dev(pf);
1446	struct ice_rq_event_info event;
1447	struct ice_hw *hw = &pf->hw;
1448	struct ice_ctl_q_info *cq;
1449	u16 pending, i = `0`;
1450	const char *qtype;
1451	u32 oldval, val;
1452
1453	/ Do not clean control queue if/when PF reset fails /
1454	if (test_bit(ICE_RESET_FAILED, pf->state))
1455	return `0`;
1456
1457	switch (q_type) {
1458	case ICE_CTL_Q_ADMIN:
1459	cq = &hw->adminq;
1460	qtype = "Admin";
1461	break;
1462	case ICE_CTL_Q_SB:
1463	cq = &hw->sbq;
1464	qtype = "Sideband";
1465	break;
1466	case ICE_CTL_Q_MAILBOX:
1467	cq = &hw->mailboxq;
1468	qtype = "Mailbox";
1469	/ we are going to try to detect a malicious VF, so set the*
1470	* state to begin detection
1471	*/
1472	hw->mbx_snapshot.mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
1473	break;
1474	default:
1475	dev_warn(dev, "Unknown control queue type 0x%x\n", q_type);
1476	return `0`;
1477	}
1478
1479	/ check for error indications - PF_xx_AxQLEN register layout for*
1480	* FW/MBX/SB are identical so just use defines for PF_FW_AxQLEN.
1481	*/
1482	val = rd32(hw, cq->rq.len);
1483	if (val & (PF_FW_ARQLEN_ARQVFE_M \| PF_FW_ARQLEN_ARQOVFL_M \|
1484	PF_FW_ARQLEN_ARQCRIT_M)) {
1485	oldval = val;
1486	if (val & PF_FW_ARQLEN_ARQVFE_M)
1487	dev_dbg(dev, "%s Receive Queue VF Error detected\n",
1488	qtype);
1489	if (val & PF_FW_ARQLEN_ARQOVFL_M) {
1490	dev_dbg(dev, "%s Receive Queue Overflow Error detected\n",
1491	qtype);
1492	}
1493	if (val & PF_FW_ARQLEN_ARQCRIT_M)
1494	dev_dbg(dev, "%s Receive Queue Critical Error detected\n",
1495	qtype);
1496	val &= ~(PF_FW_ARQLEN_ARQVFE_M \| PF_FW_ARQLEN_ARQOVFL_M \|
1497	PF_FW_ARQLEN_ARQCRIT_M);
1498	if (oldval != val)
1499	wr32(hw, cq->rq.len, val);
1500	}
1501
1502	val = rd32(hw, cq->sq.len);
1503	if (val & (PF_FW_ATQLEN_ATQVFE_M \| PF_FW_ATQLEN_ATQOVFL_M \|
1504	PF_FW_ATQLEN_ATQCRIT_M)) {
1505	oldval = val;
1506	if (val & PF_FW_ATQLEN_ATQVFE_M)
1507	dev_dbg(dev, "%s Send Queue VF Error detected\n",
1508	qtype);
1509	if (val & PF_FW_ATQLEN_ATQOVFL_M) {
1510	dev_dbg(dev, "%s Send Queue Overflow Error detected\n",
1511	qtype);
1512	}
1513	if (val & PF_FW_ATQLEN_ATQCRIT_M)
1514	dev_dbg(dev, "%s Send Queue Critical Error detected\n",
1515	qtype);
1516	val &= ~(PF_FW_ATQLEN_ATQVFE_M \| PF_FW_ATQLEN_ATQOVFL_M \|
1517	PF_FW_ATQLEN_ATQCRIT_M);
1518	if (oldval != val)
1519	wr32(hw, cq->sq.len, val);
1520	}
1521
1522	event.buf_len = cq->rq_buf_size;
1523	event.msg_buf = kzalloc(size: event.buf_len, GFP_KERNEL);
1524	if (!event.msg_buf)
1525	return `0`;
1526
1527	do {
1528	struct ice_mbx_data data = {};
1529	u16 opcode;
1530	int ret;
1531
1532	ret = ice_clean_rq_elem(hw, cq, e: &event, pending: &pending);
1533	if (ret == -EALREADY)
1534	break;
1535	if (ret) {
1536	dev_err(dev, "%s Receive Queue event error %d\n", qtype,
1537	ret);
1538	break;
1539	}
1540
1541	opcode = le16_to_cpu(event.desc.opcode);
1542
1543	/ Notify any thread that might be waiting for this event /
1544	ice_aq_check_events(pf, opcode, event: &event);
1545
1546	switch (opcode) {
1547	case ice_aqc_opc_get_link_status:
1548	if (ice_handle_link_event(pf, event: &event))
1549	dev_err(dev, "Could not handle link event\n");
1550	break;
1551	case ice_aqc_opc_event_lan_overflow:
1552	ice_vf_lan_overflow_event(pf, event: &event);
1553	break;
1554	case ice_mbx_opc_send_msg_to_pf:
1555	data.num_msg_proc = i;
1556	data.num_pending_arq = pending;
1557	data.max_num_msgs_mbx = hw->mailboxq.num_rq_entries;
1558	data.async_watermark_val = ICE_MBX_OVERFLOW_WATERMARK;
1559
1560	ice_vc_process_vf_msg(pf, event: &event, mbxdata: &data);
1561	break;
1562	case ice_aqc_opc_fw_logs_event:
1563	ice_get_fwlog_data(pf, event: &event);
1564	break;
1565	case ice_aqc_opc_lldp_set_mib_change:
1566	ice_dcb_process_lldp_set_mib_change(pf, event: &event);
1567	break;
1568	default:
1569	dev_dbg(dev, "%s Receive Queue unknown event 0x%04x ignored\n",
1570	qtype, opcode);
1571	break;
1572	}
1573	} while (pending && (i++ < ICE_DFLT_IRQ_WORK));
1574
1575	kfree(objp: event.msg_buf);
1576
1577	return pending && (i == ICE_DFLT_IRQ_WORK);
1578	}
1579
1580	/**
1581	* ice_ctrlq_pending - check if there is a difference between ntc and ntu
1582	* @hw: pointer to hardware info
1583	* @cq: control queue information
1584	*
1585	* returns true if there are pending messages in a queue, false if there aren't
1586	*/
1587	static bool ice_ctrlq_pending(struct ice_hw hw, struct* ice_ctl_q_info *cq)
1588	{
1589	u16 ntu;
1590
1591	ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
1592	return cq->rq.next_to_clean != ntu;
1593	}
1594
1595	/**
1596	* ice_clean_adminq_subtask - clean the AdminQ rings
1597	* @pf: board private structure
1598	*/
1599	static void ice_clean_adminq_subtask(struct ice_pf *pf)
1600	{
1601	struct ice_hw *hw = &pf->hw;
1602
1603	if (!test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
1604	return;
1605
1606	if (__ice_clean_ctrlq(pf, q_type: ICE_CTL_Q_ADMIN))
1607	return;
1608
1609	clear_bit(nr: ICE_ADMINQ_EVENT_PENDING, addr: pf->state);
1610
1611	/ There might be a situation where new messages arrive to a control*
1612	* queue between processing the last message and clearing the
1613	* EVENT_PENDING bit. So before exiting, check queue head again (using
1614	* ice_ctrlq_pending) and process new messages if any.
1615	*/
1616	if (ice_ctrlq_pending(hw, cq: &hw->adminq))
1617	__ice_clean_ctrlq(pf, q_type: ICE_CTL_Q_ADMIN);
1618
1619	ice_flush(hw);
1620	}
1621
1622	/**
1623	* ice_clean_mailboxq_subtask - clean the MailboxQ rings
1624	* @pf: board private structure
1625	*/
1626	static void ice_clean_mailboxq_subtask(struct ice_pf *pf)
1627	{
1628	struct ice_hw *hw = &pf->hw;
1629
1630	if (!test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state))
1631	return;
1632
1633	if (__ice_clean_ctrlq(pf, q_type: ICE_CTL_Q_MAILBOX))
1634	return;
1635
1636	clear_bit(nr: ICE_MAILBOXQ_EVENT_PENDING, addr: pf->state);
1637
1638	if (ice_ctrlq_pending(hw, cq: &hw->mailboxq))
1639	__ice_clean_ctrlq(pf, q_type: ICE_CTL_Q_MAILBOX);
1640
1641	ice_flush(hw);
1642	}
1643
1644	/**
1645	* ice_clean_sbq_subtask - clean the Sideband Queue rings
1646	* @pf: board private structure
1647	*/
1648	static void ice_clean_sbq_subtask(struct ice_pf *pf)
1649	{
1650	struct ice_hw *hw = &pf->hw;
1651
1652	/ if mac_type is not generic, sideband is not supported*
1653	* and there's nothing to do here
1654	*/
1655	if (!ice_is_generic_mac(hw)) {
1656	clear_bit(nr: ICE_SIDEBANDQ_EVENT_PENDING, addr: pf->state);
1657	return;
1658	}
1659
1660	if (!test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state))
1661	return;
1662
1663	if (__ice_clean_ctrlq(pf, q_type: ICE_CTL_Q_SB))
1664	return;
1665
1666	clear_bit(nr: ICE_SIDEBANDQ_EVENT_PENDING, addr: pf->state);
1667
1668	if (ice_ctrlq_pending(hw, cq: &hw->sbq))
1669	__ice_clean_ctrlq(pf, q_type: ICE_CTL_Q_SB);
1670
1671	ice_flush(hw);
1672	}
1673
1674	/**
1675	* ice_service_task_schedule - schedule the service task to wake up
1676	* @pf: board private structure
1677	*
1678	* If not already scheduled, this puts the task into the work queue.
1679	*/
1680	void ice_service_task_schedule(struct ice_pf *pf)
1681	{
1682	if (!test_bit(ICE_SERVICE_DIS, pf->state) &&
1683	!test_and_set_bit(nr: ICE_SERVICE_SCHED, addr: pf->state) &&
1684	!test_bit(ICE_NEEDS_RESTART, pf->state))
1685	queue_work(wq: ice_wq, work: &pf->serv_task);
1686	}
1687
1688	/**
1689	* ice_service_task_complete - finish up the service task
1690	* @pf: board private structure
1691	*/
1692	static void ice_service_task_complete(struct ice_pf *pf)
1693	{
1694	WARN_ON(!test_bit(ICE_SERVICE_SCHED, pf->state));
1695
1696	/ force memory (pf->state) to sync before next service task /
1697	smp_mb__before_atomic();
1698	clear_bit(nr: ICE_SERVICE_SCHED, addr: pf->state);
1699	}
1700
1701	/**
1702	* ice_service_task_stop - stop service task and cancel works
1703	* @pf: board private structure
1704	*
1705	* Return 0 if the ICE_SERVICE_DIS bit was not already set,
1706	* 1 otherwise.
1707	*/
1708	static int ice_service_task_stop(struct ice_pf *pf)
1709	{
1710	int ret;
1711
1712	ret = test_and_set_bit(nr: ICE_SERVICE_DIS, addr: pf->state);
1713
1714	if (pf->serv_tmr.function)
1715	del_timer_sync(timer: &pf->serv_tmr);
1716	if (pf->serv_task.func)
1717	cancel_work_sync(work: &pf->serv_task);
1718
1719	clear_bit(nr: ICE_SERVICE_SCHED, addr: pf->state);
1720	return ret;
1721	}
1722
1723	/**
1724	* ice_service_task_restart - restart service task and schedule works
1725	* @pf: board private structure
1726	*
1727	* This function is needed for suspend and resume works (e.g WoL scenario)
1728	*/
1729	static void ice_service_task_restart(struct ice_pf *pf)
1730	{
1731	clear_bit(nr: ICE_SERVICE_DIS, addr: pf->state);
1732	ice_service_task_schedule(pf);
1733	}
1734
1735	/**
1736	* ice_service_timer - timer callback to schedule service task
1737	* @t: pointer to timer_list
1738	*/
1739	static void ice_service_timer(struct timer_list *t)
1740	{
1741	struct ice_pf *pf = from_timer(pf, t, serv_tmr);
1742
1743	mod_timer(timer: &pf->serv_tmr, expires: round_jiffies(j: pf->serv_tmr_period + jiffies));
1744	ice_service_task_schedule(pf);
1745	}
1746
1747	/**
1748	* ice_handle_mdd_event - handle malicious driver detect event
1749	* @pf: pointer to the PF structure
1750	*
1751	* Called from service task. OICR interrupt handler indicates MDD event.
1752	* VF MDD logging is guarded by net_ratelimit. Additional PF and VF log
1753	* messages are wrapped by netif_msg_[rx\|tx]_err. Since VF Rx MDD events
1754	* disable the queue, the PF can be configured to reset the VF using ethtool
1755	* private flag mdd-auto-reset-vf.
1756	*/
1757	static void ice_handle_mdd_event(struct ice_pf *pf)
1758	{
1759	struct device *dev = ice_pf_to_dev(pf);
1760	struct ice_hw *hw = &pf->hw;
1761	struct ice_vf *vf;
1762	unsigned int bkt;
1763	u32 reg;
1764
1765	if (!test_and_clear_bit(nr: ICE_MDD_EVENT_PENDING, addr: pf->state)) {
1766	/ Since the VF MDD event logging is rate limited, check if*
1767	* there are pending MDD events.
1768	*/
1769	ice_print_vfs_mdd_events(pf);
1770	return;
1771	}
1772
1773	/ find what triggered an MDD event /
1774	reg = rd32(hw, GL_MDET_TX_PQM);
1775	if (reg & GL_MDET_TX_PQM_VALID_M) {
1776	u8 pf_num = FIELD_GET(GL_MDET_TX_PQM_PF_NUM_M, reg);
1777	u16 vf_num = FIELD_GET(GL_MDET_TX_PQM_VF_NUM_M, reg);
1778	u8 event = FIELD_GET(GL_MDET_TX_PQM_MAL_TYPE_M, reg);
1779	u16 queue = FIELD_GET(GL_MDET_TX_PQM_QNUM_M, reg);
1780
1781	if (netif_msg_tx_err(pf))
1782	dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
1783	event, queue, pf_num, vf_num);
1784	wr32(hw, GL_MDET_TX_PQM, `0xffffffff`);
1785	}
1786
1787	reg = rd32(hw, GL_MDET_TX_TCLAN_BY_MAC(hw));
1788	if (reg & GL_MDET_TX_TCLAN_VALID_M) {
1789	u8 pf_num = FIELD_GET(GL_MDET_TX_TCLAN_PF_NUM_M, reg);
1790	u16 vf_num = FIELD_GET(GL_MDET_TX_TCLAN_VF_NUM_M, reg);
1791	u8 event = FIELD_GET(GL_MDET_TX_TCLAN_MAL_TYPE_M, reg);
1792	u16 queue = FIELD_GET(GL_MDET_TX_TCLAN_QNUM_M, reg);
1793
1794	if (netif_msg_tx_err(pf))
1795	dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
1796	event, queue, pf_num, vf_num);
1797	wr32(hw, GL_MDET_TX_TCLAN_BY_MAC(hw), U32_MAX);
1798	}
1799
1800	reg = rd32(hw, GL_MDET_RX);
1801	if (reg & GL_MDET_RX_VALID_M) {
1802	u8 pf_num = FIELD_GET(GL_MDET_RX_PF_NUM_M, reg);
1803	u16 vf_num = FIELD_GET(GL_MDET_RX_VF_NUM_M, reg);
1804	u8 event = FIELD_GET(GL_MDET_RX_MAL_TYPE_M, reg);
1805	u16 queue = FIELD_GET(GL_MDET_RX_QNUM_M, reg);
1806
1807	if (netif_msg_rx_err(pf))
1808	dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
1809	event, queue, pf_num, vf_num);
1810	wr32(hw, GL_MDET_RX, `0xffffffff`);
1811	}
1812
1813	/ check to see if this PF caused an MDD event /
1814	reg = rd32(hw, PF_MDET_TX_PQM);
1815	if (reg & PF_MDET_TX_PQM_VALID_M) {
1816	wr32(hw, PF_MDET_TX_PQM, `0xFFFF`);
1817	if (netif_msg_tx_err(pf))
1818	dev_info(dev, "Malicious Driver Detection event TX_PQM detected on PF\n");
1819	}
1820
1821	reg = rd32(hw, PF_MDET_TX_TCLAN_BY_MAC(hw));
1822	if (reg & PF_MDET_TX_TCLAN_VALID_M) {
1823	wr32(hw, PF_MDET_TX_TCLAN_BY_MAC(hw), `0xffff`);
1824	if (netif_msg_tx_err(pf))
1825	dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on PF\n");
1826	}
1827
1828	reg = rd32(hw, PF_MDET_RX);
1829	if (reg & PF_MDET_RX_VALID_M) {
1830	wr32(hw, PF_MDET_RX, `0xFFFF`);
1831	if (netif_msg_rx_err(pf))
1832	dev_info(dev, "Malicious Driver Detection event RX detected on PF\n");
1833	}
1834
1835	/ Check to see if one of the VFs caused an MDD event, and then*
1836	* increment counters and set print pending
1837	*/
1838	mutex_lock(&pf->vfs.table_lock);
1839	ice_for_each_vf(pf, bkt, vf) {
1840	reg = rd32(hw, VP_MDET_TX_PQM(vf->vf_id));
1841	if (reg & VP_MDET_TX_PQM_VALID_M) {
1842	wr32(hw, VP_MDET_TX_PQM(vf->vf_id), `0xFFFF`);
1843	vf->mdd_tx_events.count++;
1844	set_bit(nr: ICE_MDD_VF_PRINT_PENDING, addr: pf->state);
1845	if (netif_msg_tx_err(pf))
1846	dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n",
1847	vf->vf_id);
1848	}
1849
1850	reg = rd32(hw, VP_MDET_TX_TCLAN(vf->vf_id));
1851	if (reg & VP_MDET_TX_TCLAN_VALID_M) {
1852	wr32(hw, VP_MDET_TX_TCLAN(vf->vf_id), `0xFFFF`);
1853	vf->mdd_tx_events.count++;
1854	set_bit(nr: ICE_MDD_VF_PRINT_PENDING, addr: pf->state);
1855	if (netif_msg_tx_err(pf))
1856	dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n",
1857	vf->vf_id);
1858	}
1859
1860	reg = rd32(hw, VP_MDET_TX_TDPU(vf->vf_id));
1861	if (reg & VP_MDET_TX_TDPU_VALID_M) {
1862	wr32(hw, VP_MDET_TX_TDPU(vf->vf_id), `0xFFFF`);
1863	vf->mdd_tx_events.count++;
1864	set_bit(nr: ICE_MDD_VF_PRINT_PENDING, addr: pf->state);
1865	if (netif_msg_tx_err(pf))
1866	dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n",
1867	vf->vf_id);
1868	}
1869
1870	reg = rd32(hw, VP_MDET_RX(vf->vf_id));
1871	if (reg & VP_MDET_RX_VALID_M) {
1872	wr32(hw, VP_MDET_RX(vf->vf_id), `0xFFFF`);
1873	vf->mdd_rx_events.count++;
1874	set_bit(nr: ICE_MDD_VF_PRINT_PENDING, addr: pf->state);
1875	if (netif_msg_rx_err(pf))
1876	dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n",
1877	vf->vf_id);
1878
1879	/ Since the queue is disabled on VF Rx MDD events, the*
1880	* PF can be configured to reset the VF through ethtool
1881	* private flag mdd-auto-reset-vf.
1882	*/
1883	if (test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
1884	/ VF MDD event counters will be cleared by*
1885	* reset, so print the event prior to reset.
1886	*/
1887	ice_print_vf_rx_mdd_event(vf);
1888	ice_reset_vf(vf, flags: ICE_VF_RESET_LOCK);
1889	}
1890	}
1891	}
1892	mutex_unlock(lock: &pf->vfs.table_lock);
1893
1894	ice_print_vfs_mdd_events(pf);
1895	}
1896
1897	/**
1898	* ice_force_phys_link_state - Force the physical link state
1899	* @vsi: VSI to force the physical link state to up/down
1900	* @link_up: true/false indicates to set the physical link to up/down
1901	*
1902	* Force the physical link state by getting the current PHY capabilities from
1903	* hardware and setting the PHY config based on the determined capabilities. If
1904	* link changes a link event will be triggered because both the Enable Automatic
1905	* Link Update and LESM Enable bits are set when setting the PHY capabilities.
1906	*
1907	* Returns 0 on success, negative on failure
1908	*/
1909	static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
1910	{
1911	struct ice_aqc_get_phy_caps_data *pcaps;
1912	struct ice_aqc_set_phy_cfg_data *cfg;
1913	struct ice_port_info *pi;
1914	struct device *dev;
1915	int retcode;
1916
1917	if (!vsi \|\| !vsi->port_info \|\| !vsi->back)
1918	return -EINVAL;
1919	if (vsi->type != ICE_VSI_PF)
1920	return `0`;
1921
1922	dev = ice_pf_to_dev(vsi->back);
1923
1924	pi = vsi->port_info;
1925
1926	pcaps = kzalloc(size: sizeof(*pcaps), GFP_KERNEL);
1927	if (!pcaps)
1928	return -ENOMEM;
1929
1930	retcode = ice_aq_get_phy_caps(pi, qual_mods: false, ICE_AQC_REPORT_ACTIVE_CFG, caps: pcaps,
1931	NULL);
1932	if (retcode) {
1933	dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n",
1934	vsi->vsi_num, retcode);
1935	retcode = -EIO;
1936	goto out;
1937	}
1938
1939	/ No change in link /
1940	if (link_up == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) &&
1941	link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP))
1942	goto out;
1943
1944	/ Use the current user PHY configuration. The current user PHY*
1945	* configuration is initialized during probe from PHY capabilities
1946	* software mode, and updated on set PHY configuration.
1947	*/
1948	cfg = kmemdup(p: &pi->phy.curr_user_phy_cfg, size: sizeof(*cfg), GFP_KERNEL);
1949	if (!cfg) {
1950	retcode = -ENOMEM;
1951	goto out;
1952	}
1953
1954	cfg->caps \|= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
1955	if (link_up)
1956	cfg->caps \|= ICE_AQ_PHY_ENA_LINK;
1957	else
1958	cfg->caps &= ~ICE_AQ_PHY_ENA_LINK;
1959
1960	retcode = ice_aq_set_phy_cfg(hw: &vsi->back->hw, pi, cfg, NULL);
1961	if (retcode) {
1962	dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
1963	vsi->vsi_num, retcode);
1964	retcode = -EIO;
1965	}
1966
1967	kfree(objp: cfg);
1968	out:
1969	kfree(objp: pcaps);
1970	return retcode;
1971	}
1972
1973	/**
1974	* ice_init_nvm_phy_type - Initialize the NVM PHY type
1975	* @pi: port info structure
1976	*
1977	* Initialize nvm_phy_type_[low\|high] for link lenient mode support
1978	*/
1979	static int ice_init_nvm_phy_type(struct ice_port_info *pi)
1980	{
1981	struct ice_aqc_get_phy_caps_data *pcaps;
1982	struct ice_pf *pf = pi->hw->back;
1983	int err;
1984
1985	pcaps = kzalloc(size: sizeof(*pcaps), GFP_KERNEL);
1986	if (!pcaps)
1987	return -ENOMEM;
1988
1989	err = ice_aq_get_phy_caps(pi, qual_mods: false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA,
1990	caps: pcaps, NULL);
1991
1992	if (err) {
1993	dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
1994	goto out;
1995	}
1996
1997	pf->nvm_phy_type_hi = pcaps->phy_type_high;
1998	pf->nvm_phy_type_lo = pcaps->phy_type_low;
1999
2000	out:
2001	kfree(objp: pcaps);
2002	return err;
2003	}
2004
2005	/**
2006	* ice_init_link_dflt_override - Initialize link default override
2007	* @pi: port info structure
2008	*
2009	* Initialize link default override and PHY total port shutdown during probe
2010	*/
2011	static void ice_init_link_dflt_override(struct ice_port_info *pi)
2012	{
2013	struct ice_link_default_override_tlv *ldo;
2014	struct ice_pf *pf = pi->hw->back;
2015
2016	ldo = &pf->link_dflt_override;
2017	if (ice_get_link_default_override(ldo, pi))
2018	return;
2019
2020	if (!(ldo->options & ICE_LINK_OVERRIDE_PORT_DIS))
2021	return;
2022
2023	/ Enable Total Port Shutdown (override/replace link-down-on-close*
2024	* ethtool private flag) for ports with Port Disable bit set.
2025	*/
2026	set_bit(nr: ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, addr: pf->flags);
2027	set_bit(nr: ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, addr: pf->flags);
2028	}
2029
2030	/**
2031	* ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings
2032	* @pi: port info structure
2033	*
2034	* If default override is enabled, initialize the user PHY cfg speed and FEC
2035	* settings using the default override mask from the NVM.
2036	*
2037	* The PHY should only be configured with the default override settings the
2038	* first time media is available. The ICE_LINK_DEFAULT_OVERRIDE_PENDING state
2039	* is used to indicate that the user PHY cfg default override is initialized
2040	* and the PHY has not been configured with the default override settings. The
2041	* state is set here, and cleared in ice_configure_phy the first time the PHY is
2042	* configured.
2043	*
2044	* This function should be called only if the FW doesn't support default
2045	* configuration mode, as reported by ice_fw_supports_report_dflt_cfg.
2046	*/
2047	static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi)
2048	{
2049	struct ice_link_default_override_tlv *ldo;
2050	struct ice_aqc_set_phy_cfg_data *cfg;
2051	struct ice_phy_info *phy = &pi->phy;
2052	struct ice_pf *pf = pi->hw->back;
2053
2054	ldo = &pf->link_dflt_override;
2055
2056	/ If link default override is enabled, use to mask NVM PHY capabilities*
2057	* for speed and FEC default configuration.
2058	*/
2059	cfg = &phy->curr_user_phy_cfg;
2060
2061	if (ldo->phy_type_low \|\| ldo->phy_type_high) {
2062	cfg->phy_type_low = pf->nvm_phy_type_lo &
2063	cpu_to_le64(ldo->phy_type_low);
2064	cfg->phy_type_high = pf->nvm_phy_type_hi &
2065	cpu_to_le64(ldo->phy_type_high);
2066	}
2067	cfg->link_fec_opt = ldo->fec_options;
2068	phy->curr_user_fec_req = ICE_FEC_AUTO;
2069
2070	set_bit(nr: ICE_LINK_DEFAULT_OVERRIDE_PENDING, addr: pf->state);
2071	}
2072
2073	/**
2074	* ice_init_phy_user_cfg - Initialize the PHY user configuration
2075	* @pi: port info structure
2076	*
2077	* Initialize the current user PHY configuration, speed, FEC, and FC requested
2078	* mode to default. The PHY defaults are from get PHY capabilities topology
2079	* with media so call when media is first available. An error is returned if
2080	* called when media is not available. The PHY initialization completed state is
2081	* set here.
2082	*
2083	* These configurations are used when setting PHY
2084	* configuration. The user PHY configuration is updated on set PHY
2085	* configuration. Returns 0 on success, negative on failure
2086	*/
2087	static int ice_init_phy_user_cfg(struct ice_port_info *pi)
2088	{
2089	struct ice_aqc_get_phy_caps_data *pcaps;
2090	struct ice_phy_info *phy = &pi->phy;
2091	struct ice_pf *pf = pi->hw->back;
2092	int err;
2093
2094	if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
2095	return -EIO;
2096
2097	pcaps = kzalloc(size: sizeof(*pcaps), GFP_KERNEL);
2098	if (!pcaps)
2099	return -ENOMEM;
2100
2101	if (ice_fw_supports_report_dflt_cfg(hw: pi->hw))
2102	err = ice_aq_get_phy_caps(pi, qual_mods: false, ICE_AQC_REPORT_DFLT_CFG,
2103	caps: pcaps, NULL);
2104	else
2105	err = ice_aq_get_phy_caps(pi, qual_mods: false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
2106	caps: pcaps, NULL);
2107	if (err) {
2108	dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
2109	goto err_out;
2110	}
2111
2112	ice_copy_phy_caps_to_cfg(pi, caps: pcaps, cfg: &pi->phy.curr_user_phy_cfg);
2113
2114	/ check if lenient mode is supported and enabled /
2115	if (ice_fw_supports_link_override(hw: pi->hw) &&
2116	!(pcaps->module_compliance_enforcement &
2117	ICE_AQC_MOD_ENFORCE_STRICT_MODE)) {
2118	set_bit(nr: ICE_FLAG_LINK_LENIENT_MODE_ENA, addr: pf->flags);
2119
2120	/ if the FW supports default PHY configuration mode, then the driver*
2121	* does not have to apply link override settings. If not,
2122	* initialize user PHY configuration with link override values
2123	*/
2124	if (!ice_fw_supports_report_dflt_cfg(hw: pi->hw) &&
2125	(pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN)) {
2126	ice_init_phy_cfg_dflt_override(pi);
2127	goto out;
2128	}
2129	}
2130
2131	/ if link default override is not enabled, set user flow control and*
2132	* FEC settings based on what get_phy_caps returned
2133	*/
2134	phy->curr_user_fec_req = ice_caps_to_fec_mode(caps: pcaps->caps,
2135	fec_options: pcaps->link_fec_options);
2136	phy->curr_user_fc_req = ice_caps_to_fc_mode(caps: pcaps->caps);
2137
2138	out:
2139	phy->curr_user_speed_req = ICE_AQ_LINK_SPEED_M;
2140	set_bit(nr: ICE_PHY_INIT_COMPLETE, addr: pf->state);
2141	err_out:
2142	kfree(objp: pcaps);
2143	return err;
2144	}
2145
2146	/**
2147	* ice_configure_phy - configure PHY
2148	* @vsi: VSI of PHY
2149	*
2150	* Set the PHY configuration. If the current PHY configuration is the same as
2151	* the curr_user_phy_cfg, then do nothing to avoid link flap. Otherwise
2152	* configure the based get PHY capabilities for topology with media.
2153	*/
2154	static int ice_configure_phy(struct ice_vsi *vsi)
2155	{
2156	struct device *dev = ice_pf_to_dev(vsi->back);
2157	struct ice_port_info *pi = vsi->port_info;
2158	struct ice_aqc_get_phy_caps_data *pcaps;
2159	struct ice_aqc_set_phy_cfg_data *cfg;
2160	struct ice_phy_info *phy = &pi->phy;
2161	struct ice_pf *pf = vsi->back;
2162	int err;
2163
2164	/ Ensure we have media as we cannot configure a medialess port /
2165	if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
2166	return -ENOMEDIUM;
2167
2168	ice_print_topo_conflict(vsi);
2169
2170	if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags) &&
2171	phy->link_info.topo_media_conflict == ICE_AQ_LINK_TOPO_UNSUPP_MEDIA)
2172	return -EPERM;
2173
2174	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags))
2175	return ice_force_phys_link_state(vsi, link_up: true);
2176
2177	pcaps = kzalloc(size: sizeof(*pcaps), GFP_KERNEL);
2178	if (!pcaps)
2179	return -ENOMEM;
2180
2181	/ Get current PHY config /
2182	err = ice_aq_get_phy_caps(pi, qual_mods: false, ICE_AQC_REPORT_ACTIVE_CFG, caps: pcaps,
2183	NULL);
2184	if (err) {
2185	dev_err(dev, "Failed to get PHY configuration, VSI %d error %d\n",
2186	vsi->vsi_num, err);
2187	goto done;
2188	}
2189
2190	/ If PHY enable link is configured and configuration has not changed,*
2191	* there's nothing to do
2192	*/
2193	if (pcaps->caps & ICE_AQC_PHY_EN_LINK &&
2194	ice_phy_caps_equals_cfg(caps: pcaps, cfg: &phy->curr_user_phy_cfg))
2195	goto done;
2196
2197	/ Use PHY topology as baseline for configuration /
2198	memset(pcaps, `0`, sizeof(*pcaps));
2199	if (ice_fw_supports_report_dflt_cfg(hw: pi->hw))
2200	err = ice_aq_get_phy_caps(pi, qual_mods: false, ICE_AQC_REPORT_DFLT_CFG,
2201	caps: pcaps, NULL);
2202	else
2203	err = ice_aq_get_phy_caps(pi, qual_mods: false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
2204	caps: pcaps, NULL);
2205	if (err) {
2206	dev_err(dev, "Failed to get PHY caps, VSI %d error %d\n",
2207	vsi->vsi_num, err);
2208	goto done;
2209	}
2210
2211	cfg = kzalloc(size: sizeof(*cfg), GFP_KERNEL);
2212	if (!cfg) {
2213	err = -ENOMEM;
2214	goto done;
2215	}
2216
2217	ice_copy_phy_caps_to_cfg(pi, caps: pcaps, cfg);
2218
2219	/ Speed - If default override pending, use curr_user_phy_cfg set in*
2220	* ice_init_phy_user_cfg_ldo.
2221	*/
2222	if (test_and_clear_bit(nr: ICE_LINK_DEFAULT_OVERRIDE_PENDING,
2223	addr: vsi->back->state)) {
2224	cfg->phy_type_low = phy->curr_user_phy_cfg.phy_type_low;
2225	cfg->phy_type_high = phy->curr_user_phy_cfg.phy_type_high;
2226	} else {
2227	u64 phy_low = `0`, phy_high = `0`;
2228
2229	ice_update_phy_type(phy_type_low: &phy_low, phy_type_high: &phy_high,
2230	link_speeds_bitmap: pi->phy.curr_user_speed_req);
2231	cfg->phy_type_low = pcaps->phy_type_low & cpu_to_le64(phy_low);
2232	cfg->phy_type_high = pcaps->phy_type_high &
2233	cpu_to_le64(phy_high);
2234	}
2235
2236	/ Can't provide what was requested; use PHY capabilities /
2237	if (!cfg->phy_type_low && !cfg->phy_type_high) {
2238	cfg->phy_type_low = pcaps->phy_type_low;
2239	cfg->phy_type_high = pcaps->phy_type_high;
2240	}
2241
2242	/ FEC /
2243	ice_cfg_phy_fec(pi, cfg, fec: phy->curr_user_fec_req);
2244
2245	/ Can't provide what was requested; use PHY capabilities /
2246	if (cfg->link_fec_opt !=
2247	(cfg->link_fec_opt & pcaps->link_fec_options)) {
2248	cfg->caps \|= pcaps->caps & ICE_AQC_PHY_EN_AUTO_FEC;
2249	cfg->link_fec_opt = pcaps->link_fec_options;
2250	}
2251
2252	/ Flow Control - always supported; no need to check against*
2253	* capabilities
2254	*/
2255	ice_cfg_phy_fc(pi, cfg, req_mode: phy->curr_user_fc_req);
2256
2257	/ Enable link and link update /
2258	cfg->caps \|= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT \| ICE_AQ_PHY_ENA_LINK;
2259
2260	err = ice_aq_set_phy_cfg(hw: &pf->hw, pi, cfg, NULL);
2261	if (err)
2262	dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
2263	vsi->vsi_num, err);
2264
2265	kfree(objp: cfg);
2266	done:
2267	kfree(objp: pcaps);
2268	return err;
2269	}
2270
2271	/**
2272	* ice_check_media_subtask - Check for media
2273	* @pf: pointer to PF struct
2274	*
2275	* If media is available, then initialize PHY user configuration if it is not
2276	* been, and configure the PHY if the interface is up.
2277	*/
2278	static void ice_check_media_subtask(struct ice_pf *pf)
2279	{
2280	struct ice_port_info *pi;
2281	struct ice_vsi *vsi;
2282	int err;
2283
2284	/ No need to check for media if it's already present /
2285	if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags))
2286	return;
2287
2288	vsi = ice_get_main_vsi(pf);
2289	if (!vsi)
2290	return;
2291
2292	/ Refresh link info and check if media is present /
2293	pi = vsi->port_info;
2294	err = ice_update_link_info(pi);
2295	if (err)
2296	return;
2297
2298	ice_check_link_cfg_err(pf, link_cfg_err: pi->phy.link_info.link_cfg_err);
2299
2300	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
2301	if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state))
2302	ice_init_phy_user_cfg(pi);
2303
2304	/ PHY settings are reset on media insertion, reconfigure*
2305	* PHY to preserve settings.
2306	*/
2307	if (test_bit(ICE_VSI_DOWN, vsi->state) &&
2308	test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags))
2309	return;
2310
2311	err = ice_configure_phy(vsi);
2312	if (!err)
2313	clear_bit(nr: ICE_FLAG_NO_MEDIA, addr: pf->flags);
2314
2315	/ A Link Status Event will be generated; the event handler*
2316	* will complete bringing the interface up
2317	*/
2318	}
2319	}
2320
2321	/**
2322	* ice_service_task - manage and run subtasks
2323	* @work: pointer to work_struct contained by the PF struct
2324	*/
2325	static void ice_service_task(struct work_struct *work)
2326	{
2327	struct ice_pf pf = container_of(work, struct* ice_pf, serv_task);
2328	unsigned long start_time = jiffies;
2329
2330	/ subtasks /
2331
2332	/ process reset requests first /
2333	ice_reset_subtask(pf);
2334
2335	/ bail if a reset/recovery cycle is pending or rebuild failed /
2336	if (ice_is_reset_in_progress(state: pf->state) \|\|
2337	test_bit(ICE_SUSPENDED, pf->state) \|\|
2338	test_bit(ICE_NEEDS_RESTART, pf->state)) {
2339	ice_service_task_complete(pf);
2340	return;
2341	}
2342
2343	if (test_and_clear_bit(nr: ICE_AUX_ERR_PENDING, addr: pf->state)) {
2344	struct iidc_event *event;
2345
2346	event = kzalloc(size: sizeof(*event), GFP_KERNEL);
2347	if (event) {
2348	set_bit(nr: IIDC_EVENT_CRIT_ERR, addr: event->type);
2349	/ report the entire OICR value to AUX driver /
2350	swap(event->reg, pf->oicr_err_reg);
2351	ice_send_event_to_aux(pf, event);
2352	kfree(objp: event);
2353	}
2354	}
2355
2356	/ unplug aux dev per request, if an unplug request came in*
2357	* while processing a plug request, this will handle it
2358	*/
2359	if (test_and_clear_bit(nr: ICE_FLAG_UNPLUG_AUX_DEV, addr: pf->flags))
2360	ice_unplug_aux_dev(pf);
2361
2362	/ Plug aux device per request /
2363	if (test_and_clear_bit(nr: ICE_FLAG_PLUG_AUX_DEV, addr: pf->flags))
2364	ice_plug_aux_dev(pf);
2365
2366	if (test_and_clear_bit(nr: ICE_FLAG_MTU_CHANGED, addr: pf->flags)) {
2367	struct iidc_event *event;
2368
2369	event = kzalloc(size: sizeof(*event), GFP_KERNEL);
2370	if (event) {
2371	set_bit(nr: IIDC_EVENT_AFTER_MTU_CHANGE, addr: event->type);
2372	ice_send_event_to_aux(pf, event);
2373	kfree(objp: event);
2374	}
2375	}
2376
2377	ice_clean_adminq_subtask(pf);
2378	ice_check_media_subtask(pf);
2379	ice_check_for_hang_subtask(pf);
2380	ice_sync_fltr_subtask(pf);
2381	ice_handle_mdd_event(pf);
2382	ice_watchdog_subtask(pf);
2383
2384	if (ice_is_safe_mode(pf)) {
2385	ice_service_task_complete(pf);
2386	return;
2387	}
2388
2389	ice_process_vflr_event(pf);
2390	ice_clean_mailboxq_subtask(pf);
2391	ice_clean_sbq_subtask(pf);
2392	ice_sync_arfs_fltrs(pf);
2393	ice_flush_fdir_ctx(pf);
2394
2395	/ Clear ICE_SERVICE_SCHED flag to allow scheduling next event /
2396	ice_service_task_complete(pf);
2397
2398	/ If the tasks have taken longer than one service timer period*
2399	* or there is more work to be done, reset the service timer to
2400	* schedule the service task now.
2401	*/
2402	if (time_after(jiffies, (start_time + pf->serv_tmr_period)) \|\|
2403	test_bit(ICE_MDD_EVENT_PENDING, pf->state) \|\|
2404	test_bit(ICE_VFLR_EVENT_PENDING, pf->state) \|\|
2405	test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state) \|\|
2406	test_bit(ICE_FD_VF_FLUSH_CTX, pf->state) \|\|
2407	test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state) \|\|
2408	test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
2409	mod_timer(timer: &pf->serv_tmr, expires: jiffies);
2410	}
2411
2412	/**
2413	* ice_set_ctrlq_len - helper function to set controlq length
2414	* @hw: pointer to the HW instance
2415	*/
2416	static void ice_set_ctrlq_len(struct ice_hw *hw)
2417	{
2418	hw->adminq.num_rq_entries = ICE_AQ_LEN;
2419	hw->adminq.num_sq_entries = ICE_AQ_LEN;
2420	hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
2421	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
2422	hw->mailboxq.num_rq_entries = PF_MBX_ARQLEN_ARQLEN_M;
2423	hw->mailboxq.num_sq_entries = ICE_MBXSQ_LEN;
2424	hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
2425	hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
2426	hw->sbq.num_rq_entries = ICE_SBQ_LEN;
2427	hw->sbq.num_sq_entries = ICE_SBQ_LEN;
2428	hw->sbq.rq_buf_size = ICE_SBQ_MAX_BUF_LEN;
2429	hw->sbq.sq_buf_size = ICE_SBQ_MAX_BUF_LEN;
2430	}
2431
2432	/**
2433	* ice_schedule_reset - schedule a reset
2434	* @pf: board private structure
2435	* @reset: reset being requested
2436	*/
2437	int ice_schedule_reset(struct ice_pf pf, enum* ice_reset_req reset)
2438	{
2439	struct device *dev = ice_pf_to_dev(pf);
2440
2441	/ bail out if earlier reset has failed /
2442	if (test_bit(ICE_RESET_FAILED, pf->state)) {
2443	dev_dbg(dev, "earlier reset has failed\n");
2444	return -EIO;
2445	}
2446	/ bail if reset/recovery already in progress /
2447	if (ice_is_reset_in_progress(state: pf->state)) {
2448	dev_dbg(dev, "Reset already in progress\n");
2449	return -EBUSY;
2450	}
2451
2452	switch (reset) {
2453	case ICE_RESET_PFR:
2454	set_bit(nr: ICE_PFR_REQ, addr: pf->state);
2455	break;
2456	case ICE_RESET_CORER:
2457	set_bit(nr: ICE_CORER_REQ, addr: pf->state);
2458	break;
2459	case ICE_RESET_GLOBR:
2460	set_bit(nr: ICE_GLOBR_REQ, addr: pf->state);
2461	break;
2462	default:
2463	return -EINVAL;
2464	}
2465
2466	ice_service_task_schedule(pf);
2467	return `0`;
2468	}
2469
2470	/**
2471	* ice_irq_affinity_notify - Callback for affinity changes
2472	* @notify: context as to what irq was changed
2473	* @mask: the new affinity mask
2474	*
2475	* This is a callback function used by the irq_set_affinity_notifier function
2476	* so that we may register to receive changes to the irq affinity masks.
2477	*/
2478	static void
2479	ice_irq_affinity_notify(struct irq_affinity_notify *notify,
2480	const cpumask_t *mask)
2481	{
2482	struct ice_q_vector *q_vector =
2483	container_of(notify, struct ice_q_vector, affinity_notify);
2484
2485	cpumask_copy(dstp: &q_vector->affinity_mask, srcp: mask);
2486	}
2487
2488	/**
2489	* ice_irq_affinity_release - Callback for affinity notifier release
2490	* @ref: internal core kernel usage
2491	*
2492	* This is a callback function used by the irq_set_affinity_notifier function
2493	* to inform the current notification subscriber that they will no longer
2494	* receive notifications.
2495	*/
2496	static void ice_irq_affinity_release(struct kref __always_unused *ref) {}
2497
2498	/**
2499	* ice_vsi_ena_irq - Enable IRQ for the given VSI
2500	* @vsi: the VSI being configured
2501	*/
2502	static int ice_vsi_ena_irq(struct ice_vsi *vsi)
2503	{
2504	struct ice_hw *hw = &vsi->back->hw;
2505	int i;
2506
2507	ice_for_each_q_vector(vsi, i)
2508	ice_irq_dynamic_ena(hw, vsi, q_vector: vsi->q_vectors[i]);
2509
2510	ice_flush(hw);
2511	return `0`;
2512	}
2513
2514	/**
2515	* ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI
2516	* @vsi: the VSI being configured
2517	* @basename: name for the vector
2518	*/
2519	static int ice_vsi_req_irq_msix(struct ice_vsi vsi, char* *basename)
2520	{
2521	int q_vectors = vsi->num_q_vectors;
2522	struct ice_pf *pf = vsi->back;
2523	struct device *dev;
2524	int rx_int_idx = `0`;
2525	int tx_int_idx = `0`;
2526	int vector, err;
2527	int irq_num;
2528
2529	dev = ice_pf_to_dev(pf);
2530	for (vector = `0`; vector < q_vectors; vector++) {
2531	struct ice_q_vector *q_vector = vsi->q_vectors[vector];
2532
2533	irq_num = q_vector->irq.virq;
2534
2535	if (q_vector->tx.tx_ring && q_vector->rx.rx_ring) {
2536	snprintf(buf: q_vector->name, size: sizeof(q_vector->name) - `1`,
2537	fmt: "%s-%s-%d", basename, "TxRx", rx_int_idx++);
2538	tx_int_idx++;
2539	} else if (q_vector->rx.rx_ring) {
2540	snprintf(buf: q_vector->name, size: sizeof(q_vector->name) - `1`,
2541	fmt: "%s-%s-%d", basename, "rx", rx_int_idx++);
2542	} else if (q_vector->tx.tx_ring) {
2543	snprintf(buf: q_vector->name, size: sizeof(q_vector->name) - `1`,
2544	fmt: "%s-%s-%d", basename, "tx", tx_int_idx++);
2545	} else {
2546	/ skip this unused q_vector /
2547	continue;
2548	}
2549	if (vsi->type == ICE_VSI_CTRL && vsi->vf)
2550	err = devm_request_irq(dev, irq: irq_num, handler: vsi->irq_handler,
2551	IRQF_SHARED, devname: q_vector->name,
2552	dev_id: q_vector);
2553	else
2554	err = devm_request_irq(dev, irq: irq_num, handler: vsi->irq_handler,
2555	irqflags: `0`, devname: q_vector->name, dev_id: q_vector);
2556	if (err) {
2557	netdev_err(dev: vsi->netdev, format: "MSIX request_irq failed, error: %d\n",
2558	err);
2559	goto free_q_irqs;
2560	}
2561
2562	/ register for affinity change notifications /
2563	if (!IS_ENABLED(CONFIG_RFS_ACCEL)) {
2564	struct irq_affinity_notify *affinity_notify;
2565
2566	affinity_notify = &q_vector->affinity_notify;
2567	affinity_notify->notify = ice_irq_affinity_notify;
2568	affinity_notify->release = ice_irq_affinity_release;
2569	irq_set_affinity_notifier(irq: irq_num, notify: affinity_notify);
2570	}
2571
2572	/ assign the mask for this irq /
2573	irq_set_affinity_hint(irq: irq_num, m: &q_vector->affinity_mask);
2574	}
2575
2576	err = ice_set_cpu_rx_rmap(vsi);
2577	if (err) {
2578	netdev_err(dev: vsi->netdev, format: "Failed to setup CPU RMAP on VSI %u: %pe\n",
2579	vsi->vsi_num, ERR_PTR(error: err));
2580	goto free_q_irqs;
2581	}
2582
2583	vsi->irqs_ready = true;
2584	return `0`;
2585
2586	free_q_irqs:
2587	while (vector--) {
2588	irq_num = vsi->q_vectors[vector]->irq.virq;
2589	if (!IS_ENABLED(CONFIG_RFS_ACCEL))
2590	irq_set_affinity_notifier(irq: irq_num, NULL);
2591	irq_set_affinity_hint(irq: irq_num, NULL);
2592	devm_free_irq(dev, irq: irq_num, dev_id: &vsi->q_vectors[vector]);
2593	}
2594	return err;
2595	}
2596
2597	/**
2598	* ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP
2599	* @vsi: VSI to setup Tx rings used by XDP
2600	*
2601	* Return 0 on success and negative value on error
2602	*/
2603	static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
2604	{
2605	struct device *dev = ice_pf_to_dev(vsi->back);
2606	struct ice_tx_desc *tx_desc;
2607	int i, j;
2608
2609	ice_for_each_xdp_txq(vsi, i) {
2610	u16 xdp_q_idx = vsi->alloc_txq + i;
2611	struct ice_ring_stats *ring_stats;
2612	struct ice_tx_ring *xdp_ring;
2613
2614	xdp_ring = kzalloc(size: sizeof(*xdp_ring), GFP_KERNEL);
2615	if (!xdp_ring)
2616	goto free_xdp_rings;
2617
2618	ring_stats = kzalloc(size: sizeof(*ring_stats), GFP_KERNEL);
2619	if (!ring_stats) {
2620	ice_free_tx_ring(tx_ring: xdp_ring);
2621	goto free_xdp_rings;
2622	}
2623
2624	xdp_ring->ring_stats = ring_stats;
2625	xdp_ring->q_index = xdp_q_idx;
2626	xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
2627	xdp_ring->vsi = vsi;
2628	xdp_ring->netdev = NULL;
2629	xdp_ring->dev = dev;
2630	xdp_ring->count = vsi->num_tx_desc;
2631	WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);
2632	if (ice_setup_tx_ring(tx_ring: xdp_ring))
2633	goto free_xdp_rings;
2634	ice_set_ring_xdp(ring: xdp_ring);
2635	spin_lock_init(&xdp_ring->tx_lock);
2636	for (j = `0`; j < xdp_ring->count; j++) {
2637	tx_desc = ICE_TX_DESC(xdp_ring, j);
2638	tx_desc->cmd_type_offset_bsz = `0`;
2639	}
2640	}
2641
2642	return `0`;
2643
2644	free_xdp_rings:
2645	for (; i >= `0`; i--) {
2646	if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) {
2647	kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu);
2648	vsi->xdp_rings[i]->ring_stats = NULL;
2649	ice_free_tx_ring(tx_ring: vsi->xdp_rings[i]);
2650	}
2651	}
2652	return -ENOMEM;
2653	}
2654
2655	/**
2656	* ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
2657	* @vsi: VSI to set the bpf prog on
2658	* @prog: the bpf prog pointer
2659	*/
2660	static void ice_vsi_assign_bpf_prog(struct ice_vsi vsi, struct* bpf_prog *prog)
2661	{
2662	struct bpf_prog *old_prog;
2663	int i;
2664
2665	old_prog = xchg(&vsi->xdp_prog, prog);
2666	ice_for_each_rxq(vsi, i)
2667	WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
2668
2669	if (old_prog)
2670	bpf_prog_put(prog: old_prog);
2671	}
2672
2673	/**
2674	* ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
2675	* @vsi: VSI to bring up Tx rings used by XDP
2676	* @prog: bpf program that will be assigned to VSI
2677	*
2678	* Return 0 on success and negative value on error
2679	*/
2680	int ice_prepare_xdp_rings(struct ice_vsi vsi, struct* bpf_prog *prog)
2681	{
2682	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { `0` };
2683	int xdp_rings_rem = vsi->num_xdp_txq;
2684	struct ice_pf *pf = vsi->back;
2685	struct ice_qs_cfg xdp_qs_cfg = {
2686	.qs_mutex = &pf->avail_q_mutex,
2687	.pf_map = pf->avail_txqs,
2688	.pf_map_size = pf->max_pf_txqs,
2689	.q_count = vsi->num_xdp_txq,
2690	.scatter_count = ICE_MAX_SCATTER_TXQS,
2691	.vsi_map = vsi->txq_map,
2692	.vsi_map_offset = vsi->alloc_txq,
2693	.mapping_mode = ICE_VSI_MAP_CONTIG
2694	};
2695	struct device *dev;
2696	int i, v_idx;
2697	int status;
2698
2699	dev = ice_pf_to_dev(pf);
2700	vsi->xdp_rings = devm_kcalloc(dev, n: vsi->num_xdp_txq,
2701	size: sizeof(*vsi->xdp_rings), GFP_KERNEL);
2702	if (!vsi->xdp_rings)
2703	return -ENOMEM;
2704
2705	vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode;
2706	if (__ice_vsi_get_qs(qs_cfg: &xdp_qs_cfg))
2707	goto err_map_xdp;
2708
2709	if (static_key_enabled(&ice_xdp_locking_key))
2710	netdev_warn(dev: vsi->netdev,
2711	format: "Could not allocate one XDP Tx ring per CPU, XDP_TX/XDP_REDIRECT actions will be slower\n");
2712
2713	if (ice_xdp_alloc_setup_rings(vsi))
2714	goto clear_xdp_rings;
2715
2716	/ follow the logic from ice_vsi_map_rings_to_vectors /
2717	ice_for_each_q_vector(vsi, v_idx) {
2718	struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
2719	int xdp_rings_per_v, q_id, q_base;
2720
2721	xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
2722	vsi->num_q_vectors - v_idx);
2723	q_base = vsi->num_xdp_txq - xdp_rings_rem;
2724
2725	for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
2726	struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_id];
2727
2728	xdp_ring->q_vector = q_vector;
2729	xdp_ring->next = q_vector->tx.tx_ring;
2730	q_vector->tx.tx_ring = xdp_ring;
2731	}
2732	xdp_rings_rem -= xdp_rings_per_v;
2733	}
2734
2735	ice_for_each_rxq(vsi, i) {
2736	if (static_key_enabled(&ice_xdp_locking_key)) {
2737	vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq];
2738	} else {
2739	struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector;
2740	struct ice_tx_ring *ring;
2741
2742	ice_for_each_tx_ring(ring, q_vector->tx) {
2743	if (ice_ring_is_xdp(ring)) {
2744	vsi->rx_rings[i]->xdp_ring = ring;
2745	break;
2746	}
2747	}
2748	}
2749	ice_tx_xsk_pool(vsi, qid: i);
2750	}
2751
2752	/ omit the scheduler update if in reset path; XDP queues will be*
2753	* taken into account at the end of ice_vsi_rebuild, where
2754	* ice_cfg_vsi_lan is being called
2755	*/
2756	if (ice_is_reset_in_progress(state: pf->state))
2757	return `0`;
2758
2759	/ tell the Tx scheduler that right now we have*
2760	* additional queues
2761	*/
2762	for (i = `0`; i < vsi->tc_cfg.numtc; i++)
2763	max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq;
2764
2765	status = ice_cfg_vsi_lan(pi: vsi->port_info, vsi_handle: vsi->idx, tc_bitmap: vsi->tc_cfg.ena_tc,
2766	max_lanqs: max_txqs);
2767	if (status) {
2768	dev_err(dev, "Failed VSI LAN queue config for XDP, error: %d\n",
2769	status);
2770	goto clear_xdp_rings;
2771	}
2772
2773	/ assign the prog only when it's not already present on VSI;*
2774	* this flow is a subject of both ethtool -L and ndo_bpf flows;
2775	* VSI rebuild that happens under ethtool -L can expose us to
2776	* the bpf_prog refcount issues as we would be swapping same
2777	* bpf_prog pointers from vsi->xdp_prog and calling bpf_prog_put
2778	* on it as it would be treated as an 'old_prog'; for ndo_bpf
2779	* this is not harmful as dev_xdp_install bumps the refcount
2780	* before calling the op exposed by the driver;
2781	*/
2782	if (!ice_is_xdp_ena_vsi(vsi))
2783	ice_vsi_assign_bpf_prog(vsi, prog);
2784
2785	return `0`;
2786	clear_xdp_rings:
2787	ice_for_each_xdp_txq(vsi, i)
2788	if (vsi->xdp_rings[i]) {
2789	kfree_rcu(vsi->xdp_rings[i], rcu);
2790	vsi->xdp_rings[i] = NULL;
2791	}
2792
2793	err_map_xdp:
2794	mutex_lock(&pf->avail_q_mutex);
2795	ice_for_each_xdp_txq(vsi, i) {
2796	clear_bit(nr: vsi->txq_map[i + vsi->alloc_txq], addr: pf->avail_txqs);
2797	vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
2798	}
2799	mutex_unlock(lock: &pf->avail_q_mutex);
2800
2801	devm_kfree(dev, p: vsi->xdp_rings);
2802	return -ENOMEM;
2803	}
2804
2805	/**
2806	* ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
2807	* @vsi: VSI to remove XDP rings
2808	*
2809	* Detach XDP rings from irq vectors, clean up the PF bitmap and free
2810	* resources
2811	*/
2812	int ice_destroy_xdp_rings(struct ice_vsi *vsi)
2813	{
2814	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { `0` };
2815	struct ice_pf *pf = vsi->back;
2816	int i, v_idx;
2817
2818	/ q_vectors are freed in reset path so there's no point in detaching*
2819	* rings; in case of rebuild being triggered not from reset bits
2820	* in pf->state won't be set, so additionally check first q_vector
2821	* against NULL
2822	*/
2823	if (ice_is_reset_in_progress(state: pf->state) \|\| !vsi->q_vectors[`0`])
2824	goto free_qmap;
2825
2826	ice_for_each_q_vector(vsi, v_idx) {
2827	struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
2828	struct ice_tx_ring *ring;
2829
2830	ice_for_each_tx_ring(ring, q_vector->tx)
2831	if (!ring->tx_buf \|\| !ice_ring_is_xdp(ring))
2832	break;
2833
2834	/ restore the value of last node prior to XDP setup /
2835	q_vector->tx.tx_ring = ring;
2836	}
2837
2838	free_qmap:
2839	mutex_lock(&pf->avail_q_mutex);
2840	ice_for_each_xdp_txq(vsi, i) {
2841	clear_bit(nr: vsi->txq_map[i + vsi->alloc_txq], addr: pf->avail_txqs);
2842	vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
2843	}
2844	mutex_unlock(lock: &pf->avail_q_mutex);
2845
2846	ice_for_each_xdp_txq(vsi, i)
2847	if (vsi->xdp_rings[i]) {
2848	if (vsi->xdp_rings[i]->desc) {
2849	synchronize_rcu();
2850	ice_free_tx_ring(tx_ring: vsi->xdp_rings[i]);
2851	}
2852	kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu);
2853	vsi->xdp_rings[i]->ring_stats = NULL;
2854	kfree_rcu(vsi->xdp_rings[i], rcu);
2855	vsi->xdp_rings[i] = NULL;
2856	}
2857
2858	devm_kfree(ice_pf_to_dev(pf), p: vsi->xdp_rings);
2859	vsi->xdp_rings = NULL;
2860
2861	if (static_key_enabled(&ice_xdp_locking_key))
2862	static_branch_dec(&ice_xdp_locking_key);
2863
2864	if (ice_is_reset_in_progress(state: pf->state) \|\| !vsi->q_vectors[`0`])
2865	return `0`;
2866
2867	ice_vsi_assign_bpf_prog(vsi, NULL);
2868
2869	/ notify Tx scheduler that we destroyed XDP queues and bring*
2870	* back the old number of child nodes
2871	*/
2872	for (i = `0`; i < vsi->tc_cfg.numtc; i++)
2873	max_txqs[i] = vsi->num_txq;
2874
2875	/ change number of XDP Tx queues to 0 /
2876	vsi->num_xdp_txq = `0`;
2877
2878	return ice_cfg_vsi_lan(pi: vsi->port_info, vsi_handle: vsi->idx, tc_bitmap: vsi->tc_cfg.ena_tc,
2879	max_lanqs: max_txqs);
2880	}
2881
2882	/**
2883	* ice_vsi_rx_napi_schedule - Schedule napi on RX queues from VSI
2884	* @vsi: VSI to schedule napi on
2885	*/
2886	static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi)
2887	{
2888	int i;
2889
2890	ice_for_each_rxq(vsi, i) {
2891	struct ice_rx_ring *rx_ring = vsi->rx_rings[i];
2892
2893	if (rx_ring->xsk_pool)
2894	napi_schedule(n: &rx_ring->q_vector->napi);
2895	}
2896	}
2897
2898	/**
2899	* ice_vsi_determine_xdp_res - figure out how many Tx qs can XDP have
2900	* @vsi: VSI to determine the count of XDP Tx qs
2901	*
2902	* returns 0 if Tx qs count is higher than at least half of CPU count,
2903	* -ENOMEM otherwise
2904	*/
2905	int ice_vsi_determine_xdp_res(struct ice_vsi *vsi)
2906	{
2907	u16 avail = ice_get_avail_txq_count(pf: vsi->back);
2908	u16 cpus = num_possible_cpus();
2909
2910	if (avail < cpus / `2`)
2911	return -ENOMEM;
2912
2913	vsi->num_xdp_txq = min_t(u16, avail, cpus);
2914
2915	if (vsi->num_xdp_txq < cpus)
2916	static_branch_inc(&ice_xdp_locking_key);
2917
2918	return `0`;
2919	}
2920
2921	/**
2922	* ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
2923	* @vsi: Pointer to VSI structure
2924	*/
2925	static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
2926	{
2927	if (test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
2928	return ICE_RXBUF_1664;
2929	else
2930	return ICE_RXBUF_3072;
2931	}
2932
2933	/**
2934	* ice_xdp_setup_prog - Add or remove XDP eBPF program
2935	* @vsi: VSI to setup XDP for
2936	* @prog: XDP program
2937	* @extack: netlink extended ack
2938	*/
2939	static int
2940	ice_xdp_setup_prog(struct ice_vsi vsi, struct* bpf_prog *prog,
2941	struct netlink_ext_ack *extack)
2942	{
2943	unsigned int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
2944	bool if_running = netif_running(dev: vsi->netdev);
2945	int ret = `0`, xdp_ring_err = `0`;
2946
2947	if (prog && !prog->aux->xdp_has_frags) {
2948	if (frame_size > ice_max_xdp_frame_size(vsi)) {
2949	NL_SET_ERR_MSG_MOD(extack,
2950	"MTU is too large for linear frames and XDP prog does not support frags");
2951	return -EOPNOTSUPP;
2952	}
2953	}
2954
2955	/ hot swap progs and avoid toggling link /
2956	if (ice_is_xdp_ena_vsi(vsi) == !!prog) {
2957	ice_vsi_assign_bpf_prog(vsi, prog);
2958	return `0`;
2959	}
2960
2961	/ need to stop netdev while setting up the program for Rx rings /
2962	if (if_running && !test_and_set_bit(nr: ICE_VSI_DOWN, addr: vsi->state)) {
2963	ret = ice_down(vsi);
2964	if (ret) {
2965	NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed");
2966	return ret;
2967	}
2968	}
2969
2970	if (!ice_is_xdp_ena_vsi(vsi) && prog) {
2971	xdp_ring_err = ice_vsi_determine_xdp_res(vsi);
2972	if (xdp_ring_err) {
2973	NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP");
2974	} else {
2975	xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
2976	if (xdp_ring_err)
2977	NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");
2978	}
2979	xdp_features_set_redirect_target(dev: vsi->netdev, support_sg: true);
2980	/ reallocate Rx queues that are used for zero-copy /
2981	xdp_ring_err = ice_realloc_zc_buf(vsi, zc: true);
2982	if (xdp_ring_err)
2983	NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed");
2984	} else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
2985	xdp_features_clear_redirect_target(dev: vsi->netdev);
2986	xdp_ring_err = ice_destroy_xdp_rings(vsi);
2987	if (xdp_ring_err)
2988	NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
2989	/ reallocate Rx queues that were used for zero-copy /
2990	xdp_ring_err = ice_realloc_zc_buf(vsi, zc: false);
2991	if (xdp_ring_err)
2992	NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed");
2993	}
2994
2995	if (if_running)
2996	ret = ice_up(vsi);
2997
2998	if (!ret && prog)
2999	ice_vsi_rx_napi_schedule(vsi);
3000
3001	return (ret \|\| xdp_ring_err) ? -ENOMEM : `0`;
3002	}
3003
3004	/**
3005	* ice_xdp_safe_mode - XDP handler for safe mode
3006	* @dev: netdevice
3007	* @xdp: XDP command
3008	*/
3009	static int ice_xdp_safe_mode(struct net_device __always_unused *dev,
3010	struct netdev_bpf *xdp)
3011	{
3012	NL_SET_ERR_MSG_MOD(xdp->extack,
3013	"Please provide working DDP firmware package in order to use XDP\n"
3014	"Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst");
3015	return -EOPNOTSUPP;
3016	}
3017
3018	/**
3019	* ice_xdp - implements XDP handler
3020	* @dev: netdevice
3021	* @xdp: XDP command
3022	*/
3023	static int ice_xdp(struct net_device dev, struct* netdev_bpf *xdp)
3024	{
3025	struct ice_netdev_priv *np = netdev_priv(dev);
3026	struct ice_vsi *vsi = np->vsi;
3027
3028	if (vsi->type != ICE_VSI_PF) {
3029	NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF VSI");
3030	return -EINVAL;
3031	}
3032
3033	switch (xdp->command) {
3034	case XDP_SETUP_PROG:
3035	return ice_xdp_setup_prog(vsi, prog: xdp->prog, extack: xdp->extack);
3036	case XDP_SETUP_XSK_POOL:
3037	return ice_xsk_pool_setup(vsi, pool: xdp->xsk.pool,
3038	qid: xdp->xsk.queue_id);
3039	default:
3040	return -EINVAL;
3041	}
3042	}
3043
3044	/**
3045	* ice_ena_misc_vector - enable the non-queue interrupts
3046	* @pf: board private structure
3047	*/
3048	static void ice_ena_misc_vector(struct ice_pf *pf)
3049	{
3050	struct ice_hw *hw = &pf->hw;
3051	u32 pf_intr_start_offset;
3052	u32 val;
3053
3054	/ Disable anti-spoof detection interrupt to prevent spurious event*
3055	* interrupts during a function reset. Anti-spoof functionally is
3056	* still supported.
3057	*/
3058	val = rd32(hw, GL_MDCK_TX_TDPU);
3059	val \|= GL_MDCK_TX_TDPU_RCU_ANTISPOOF_ITR_DIS_M;
3060	wr32(hw, GL_MDCK_TX_TDPU, val);
3061
3062	/ clear things first /
3063	wr32(hw, PFINT_OICR_ENA, `0`); / disable all /
3064	rd32(hw, PFINT_OICR); / read to clear /
3065
3066	val = (PFINT_OICR_ECC_ERR_M \|
3067	PFINT_OICR_MAL_DETECT_M \|
3068	PFINT_OICR_GRST_M \|
3069	PFINT_OICR_PCI_EXCEPTION_M \|
3070	PFINT_OICR_VFLR_M \|
3071	PFINT_OICR_HMC_ERR_M \|
3072	PFINT_OICR_PE_PUSH_M \|
3073	PFINT_OICR_PE_CRITERR_M);
3074
3075	wr32(hw, PFINT_OICR_ENA, val);
3076
3077	/ SW_ITR_IDX = 0, but don't change INTENA /
3078	wr32(hw, GLINT_DYN_CTL(pf->oicr_irq.index),
3079	GLINT_DYN_CTL_SW_ITR_INDX_M \| GLINT_DYN_CTL_INTENA_MSK_M);
3080
3081	if (!pf->hw.dev_caps.ts_dev_info.ts_ll_int_read)
3082	return;
3083	pf_intr_start_offset = rd32(hw, PFINT_ALLOC) & PFINT_ALLOC_FIRST;
3084	wr32(hw, GLINT_DYN_CTL(pf->ll_ts_irq.index + pf_intr_start_offset),
3085	GLINT_DYN_CTL_SW_ITR_INDX_M \| GLINT_DYN_CTL_INTENA_MSK_M);
3086	}
3087
3088	/**
3089	* ice_ll_ts_intr - ll_ts interrupt handler
3090	* @irq: interrupt number
3091	* @data: pointer to a q_vector
3092	*/
3093	static irqreturn_t ice_ll_ts_intr(int __always_unused irq, void *data)
3094	{
3095	struct ice_pf *pf = data;
3096	u32 pf_intr_start_offset;
3097	struct ice_ptp_tx *tx;
3098	unsigned long flags;
3099	struct ice_hw *hw;
3100	u32 val;
3101	u8 idx;
3102
3103	hw = &pf->hw;
3104	tx = &pf->ptp.port.tx;
3105	spin_lock_irqsave(&tx->lock, flags);
3106	ice_ptp_complete_tx_single_tstamp(tx);
3107
3108	idx = find_next_bit_wrap(addr: tx->in_use, size: tx->len,
3109	offset: tx->last_ll_ts_idx_read + `1`);
3110	if (idx != tx->len)
3111	ice_ptp_req_tx_single_tstamp(tx, idx);
3112	spin_unlock_irqrestore(lock: &tx->lock, flags);
3113
3114	val = GLINT_DYN_CTL_INTENA_M \| GLINT_DYN_CTL_CLEARPBA_M \|
3115	(ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S);
3116	pf_intr_start_offset = rd32(hw, PFINT_ALLOC) & PFINT_ALLOC_FIRST;
3117	wr32(hw, GLINT_DYN_CTL(pf->ll_ts_irq.index + pf_intr_start_offset),
3118	val);
3119
3120	return IRQ_HANDLED;
3121	}
3122
3123	/**
3124	* ice_misc_intr - misc interrupt handler
3125	* @irq: interrupt number
3126	* @data: pointer to a q_vector
3127	*/
3128	static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
3129	{
3130	struct ice_pf pf = (struct* ice_pf *)data;
3131	irqreturn_t ret = IRQ_HANDLED;
3132	struct ice_hw *hw = &pf->hw;
3133	struct device *dev;
3134	u32 oicr, ena_mask;
3135
3136	dev = ice_pf_to_dev(pf);
3137	set_bit(nr: ICE_ADMINQ_EVENT_PENDING, addr: pf->state);
3138	set_bit(nr: ICE_MAILBOXQ_EVENT_PENDING, addr: pf->state);
3139	set_bit(nr: ICE_SIDEBANDQ_EVENT_PENDING, addr: pf->state);
3140
3141	oicr = rd32(hw, PFINT_OICR);
3142	ena_mask = rd32(hw, PFINT_OICR_ENA);
3143
3144	if (oicr & PFINT_OICR_SWINT_M) {
3145	ena_mask &= ~PFINT_OICR_SWINT_M;
3146	pf->sw_int_count++;
3147	}
3148
3149	if (oicr & PFINT_OICR_MAL_DETECT_M) {
3150	ena_mask &= ~PFINT_OICR_MAL_DETECT_M;
3151	set_bit(nr: ICE_MDD_EVENT_PENDING, addr: pf->state);
3152	}
3153	if (oicr & PFINT_OICR_VFLR_M) {
3154	/ disable any further VFLR event notifications /
3155	if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) {
3156	u32 reg = rd32(hw, PFINT_OICR_ENA);
3157
3158	reg &= ~PFINT_OICR_VFLR_M;
3159	wr32(hw, PFINT_OICR_ENA, reg);
3160	} else {
3161	ena_mask &= ~PFINT_OICR_VFLR_M;
3162	set_bit(nr: ICE_VFLR_EVENT_PENDING, addr: pf->state);
3163	}
3164	}
3165
3166	if (oicr & PFINT_OICR_GRST_M) {
3167	u32 reset;
3168
3169	/ we have a reset warning /
3170	ena_mask &= ~PFINT_OICR_GRST_M;
3171	reset = FIELD_GET(GLGEN_RSTAT_RESET_TYPE_M,
3172	rd32(hw, GLGEN_RSTAT));
3173
3174	if (reset == ICE_RESET_CORER)
3175	pf->corer_count++;
3176	else if (reset == ICE_RESET_GLOBR)
3177	pf->globr_count++;
3178	else if (reset == ICE_RESET_EMPR)
3179	pf->empr_count++;
3180	else
3181	dev_dbg(dev, "Invalid reset type %d\n", reset);
3182
3183	/ If a reset cycle isn't already in progress, we set a bit in*
3184	* pf->state so that the service task can start a reset/rebuild.
3185	*/
3186	if (!test_and_set_bit(nr: ICE_RESET_OICR_RECV, addr: pf->state)) {
3187	if (reset == ICE_RESET_CORER)
3188	set_bit(nr: ICE_CORER_RECV, addr: pf->state);
3189	else if (reset == ICE_RESET_GLOBR)
3190	set_bit(nr: ICE_GLOBR_RECV, addr: pf->state);
3191	else
3192	set_bit(nr: ICE_EMPR_RECV, addr: pf->state);
3193
3194	/ There are couple of different bits at play here.*
3195	* hw->reset_ongoing indicates whether the hardware is
3196	* in reset. This is set to true when a reset interrupt
3197	* is received and set back to false after the driver
3198	* has determined that the hardware is out of reset.
3199	*
3200	* ICE_RESET_OICR_RECV in pf->state indicates
3201	* that a post reset rebuild is required before the
3202	* driver is operational again. This is set above.
3203	*
3204	* As this is the start of the reset/rebuild cycle, set
3205	* both to indicate that.
3206	*/
3207	hw->reset_ongoing = true;
3208	}
3209	}
3210
3211	if (oicr & PFINT_OICR_TSYN_TX_M) {
3212	ena_mask &= ~PFINT_OICR_TSYN_TX_M;
3213	if (ice_pf_state_is_nominal(pf) &&
3214	pf->hw.dev_caps.ts_dev_info.ts_ll_int_read) {
3215	struct ice_ptp_tx *tx = &pf->ptp.port.tx;
3216	unsigned long flags;
3217	u8 idx;
3218
3219	spin_lock_irqsave(&tx->lock, flags);
3220	idx = find_next_bit_wrap(addr: tx->in_use, size: tx->len,
3221	offset: tx->last_ll_ts_idx_read + `1`);
3222	if (idx != tx->len)
3223	ice_ptp_req_tx_single_tstamp(tx, idx);
3224	spin_unlock_irqrestore(lock: &tx->lock, flags);
3225	} else if (ice_ptp_pf_handles_tx_interrupt(pf)) {
3226	set_bit(nr: ICE_MISC_THREAD_TX_TSTAMP, addr: pf->misc_thread);
3227	ret = IRQ_WAKE_THREAD;
3228	}
3229	}
3230
3231	if (oicr & PFINT_OICR_TSYN_EVNT_M) {
3232	u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
3233	u32 gltsyn_stat = rd32(hw, GLTSYN_STAT(tmr_idx));
3234
3235	ena_mask &= ~PFINT_OICR_TSYN_EVNT_M;
3236
3237	if (ice_pf_src_tmr_owned(pf)) {
3238	/ Save EVENTs from GLTSYN register /
3239	pf->ptp.ext_ts_irq \|= gltsyn_stat &
3240	(GLTSYN_STAT_EVENT0_M \|
3241	GLTSYN_STAT_EVENT1_M \|
3242	GLTSYN_STAT_EVENT2_M);
3243
3244	ice_ptp_extts_event(pf);
3245	}
3246	}
3247
3248	#define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M \| PFINT_OICR_HMC_ERR_M \| PFINT_OICR_PE_PUSH_M)
3249	if (oicr & ICE_AUX_CRIT_ERR) {
3250	pf->oicr_err_reg \|= oicr;
3251	set_bit(nr: ICE_AUX_ERR_PENDING, addr: pf->state);
3252	ena_mask &= ~ICE_AUX_CRIT_ERR;
3253	}
3254
3255	/ Report any remaining unexpected interrupts /
3256	oicr &= ena_mask;
3257	if (oicr) {
3258	dev_dbg(dev, "unhandled interrupt oicr=0x%08x\n", oicr);
3259	/ If a critical error is pending there is no choice but to*
3260	* reset the device.
3261	*/
3262	if (oicr & (PFINT_OICR_PCI_EXCEPTION_M \|
3263	PFINT_OICR_ECC_ERR_M)) {
3264	set_bit(nr: ICE_PFR_REQ, addr: pf->state);
3265	}
3266	}
3267	ice_service_task_schedule(pf);
3268	if (ret == IRQ_HANDLED)
3269	ice_irq_dynamic_ena(hw, NULL, NULL);
3270
3271	return ret;
3272	}
3273
3274	/**
3275	* ice_misc_intr_thread_fn - misc interrupt thread function
3276	* @irq: interrupt number
3277	* @data: pointer to a q_vector
3278	*/
3279	static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data)
3280	{
3281	struct ice_pf *pf = data;
3282	struct ice_hw *hw;
3283
3284	hw = &pf->hw;
3285
3286	if (ice_is_reset_in_progress(state: pf->state))
3287	goto skip_irq;
3288
3289	if (test_and_clear_bit(nr: ICE_MISC_THREAD_TX_TSTAMP, addr: pf->misc_thread)) {
3290	/ Process outstanding Tx timestamps. If there is more work,*
3291	* re-arm the interrupt to trigger again.
3292	*/
3293	if (ice_ptp_process_ts(pf) == ICE_TX_TSTAMP_WORK_PENDING) {
3294	wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M);
3295	ice_flush(hw);
3296	}
3297	}
3298
3299	skip_irq:
3300	ice_irq_dynamic_ena(hw, NULL, NULL);
3301
3302	return IRQ_HANDLED;
3303	}
3304
3305	/**
3306	* ice_dis_ctrlq_interrupts - disable control queue interrupts
3307	* @hw: pointer to HW structure
3308	*/
3309	static void ice_dis_ctrlq_interrupts(struct ice_hw *hw)
3310	{
3311	/ disable Admin queue Interrupt causes /
3312	wr32(hw, PFINT_FW_CTL,
3313	rd32(hw, PFINT_FW_CTL) & ~PFINT_FW_CTL_CAUSE_ENA_M);
3314
3315	/ disable Mailbox queue Interrupt causes /
3316	wr32(hw, PFINT_MBX_CTL,
3317	rd32(hw, PFINT_MBX_CTL) & ~PFINT_MBX_CTL_CAUSE_ENA_M);
3318
3319	wr32(hw, PFINT_SB_CTL,
3320	rd32(hw, PFINT_SB_CTL) & ~PFINT_SB_CTL_CAUSE_ENA_M);
3321
3322	/ disable Control queue Interrupt causes /
3323	wr32(hw, PFINT_OICR_CTL,
3324	rd32(hw, PFINT_OICR_CTL) & ~PFINT_OICR_CTL_CAUSE_ENA_M);
3325
3326	ice_flush(hw);
3327	}
3328
3329	/**
3330	* ice_free_irq_msix_ll_ts- Unroll ll_ts vector setup
3331	* @pf: board private structure
3332	*/
3333	static void ice_free_irq_msix_ll_ts(struct ice_pf *pf)
3334	{
3335	int irq_num = pf->ll_ts_irq.virq;
3336
3337	synchronize_irq(irq: irq_num);
3338	devm_free_irq(ice_pf_to_dev(pf), irq: irq_num, dev_id: pf);
3339
3340	ice_free_irq(pf, map: pf->ll_ts_irq);
3341	}
3342
3343	/**
3344	* ice_free_irq_msix_misc - Unroll misc vector setup
3345	* @pf: board private structure
3346	*/
3347	static void ice_free_irq_msix_misc(struct ice_pf *pf)
3348	{
3349	int misc_irq_num = pf->oicr_irq.virq;
3350	struct ice_hw *hw = &pf->hw;
3351
3352	ice_dis_ctrlq_interrupts(hw);
3353
3354	/ disable OICR interrupt /
3355	wr32(hw, PFINT_OICR_ENA, `0`);
3356	ice_flush(hw);
3357
3358	synchronize_irq(irq: misc_irq_num);
3359	devm_free_irq(ice_pf_to_dev(pf), irq: misc_irq_num, dev_id: pf);
3360
3361	ice_free_irq(pf, map: pf->oicr_irq);
3362	if (pf->hw.dev_caps.ts_dev_info.ts_ll_int_read)
3363	ice_free_irq_msix_ll_ts(pf);
3364	}
3365
3366	/**
3367	* ice_ena_ctrlq_interrupts - enable control queue interrupts
3368	* @hw: pointer to HW structure
3369	* @reg_idx: HW vector index to associate the control queue interrupts with
3370	*/
3371	static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx)
3372	{
3373	u32 val;
3374
3375	val = ((reg_idx & PFINT_OICR_CTL_MSIX_INDX_M) \|
3376	PFINT_OICR_CTL_CAUSE_ENA_M);
3377	wr32(hw, PFINT_OICR_CTL, val);
3378
3379	/ enable Admin queue Interrupt causes /
3380	val = ((reg_idx & PFINT_FW_CTL_MSIX_INDX_M) \|
3381	PFINT_FW_CTL_CAUSE_ENA_M);
3382	wr32(hw, PFINT_FW_CTL, val);
3383
3384	/ enable Mailbox queue Interrupt causes /
3385	val = ((reg_idx & PFINT_MBX_CTL_MSIX_INDX_M) \|
3386	PFINT_MBX_CTL_CAUSE_ENA_M);
3387	wr32(hw, PFINT_MBX_CTL, val);
3388
3389	if (!hw->dev_caps.ts_dev_info.ts_ll_int_read) {
3390	/ enable Sideband queue Interrupt causes /
3391	val = ((reg_idx & PFINT_SB_CTL_MSIX_INDX_M) \|
3392	PFINT_SB_CTL_CAUSE_ENA_M);
3393	wr32(hw, PFINT_SB_CTL, val);
3394	}
3395
3396	ice_flush(hw);
3397	}
3398
3399	/**
3400	* ice_req_irq_msix_misc - Setup the misc vector to handle non queue events
3401	* @pf: board private structure
3402	*
3403	* This sets up the handler for MSIX 0, which is used to manage the
3404	* non-queue interrupts, e.g. AdminQ and errors. This is not used
3405	* when in MSI or Legacy interrupt mode.
3406	*/
3407	static int ice_req_irq_msix_misc(struct ice_pf *pf)
3408	{
3409	struct device *dev = ice_pf_to_dev(pf);
3410	struct ice_hw *hw = &pf->hw;
3411	u32 pf_intr_start_offset;
3412	struct msi_map irq;
3413	int err = `0`;
3414
3415	if (!pf->int_name[`0`])
3416	snprintf(buf: pf->int_name, size: sizeof(pf->int_name) - `1`, fmt: "%s-%s:misc",
3417	dev_driver_string(dev), dev_name(dev));
3418
3419	if (!pf->int_name_ll_ts[`0`])
3420	snprintf(buf: pf->int_name_ll_ts, size: sizeof(pf->int_name_ll_ts) - `1`,
3421	fmt: "%s-%s:ll_ts", dev_driver_string(dev), dev_name(dev));
3422	/ Do not request IRQ but do enable OICR interrupt since settings are*
3423	* lost during reset. Note that this function is called only during
3424	* rebuild path and not while reset is in progress.
3425	*/
3426	if (ice_is_reset_in_progress(state: pf->state))
3427	goto skip_req_irq;
3428
3429	/ reserve one vector in irq_tracker for misc interrupts /
3430	irq = ice_alloc_irq(pf, dyn_only: false);
3431	if (irq.index < `0`)
3432	return irq.index;
3433
3434	pf->oicr_irq = irq;
3435	err = devm_request_threaded_irq(dev, irq: pf->oicr_irq.virq, handler: ice_misc_intr,
3436	thread_fn: ice_misc_intr_thread_fn, irqflags: `0`,
3437	devname: pf->int_name, dev_id: pf);
3438	if (err) {
3439	dev_err(dev, "devm_request_threaded_irq for %s failed: %d\n",
3440	pf->int_name, err);
3441	ice_free_irq(pf, map: pf->oicr_irq);
3442	return err;
3443	}
3444
3445	/ reserve one vector in irq_tracker for ll_ts interrupt /
3446	if (!pf->hw.dev_caps.ts_dev_info.ts_ll_int_read)
3447	goto skip_req_irq;
3448
3449	irq = ice_alloc_irq(pf, dyn_only: false);
3450	if (irq.index < `0`)
3451	return irq.index;
3452
3453	pf->ll_ts_irq = irq;
3454	err = devm_request_irq(dev, irq: pf->ll_ts_irq.virq, handler: ice_ll_ts_intr, irqflags: `0`,
3455	devname: pf->int_name_ll_ts, dev_id: pf);
3456	if (err) {
3457	dev_err(dev, "devm_request_irq for %s failed: %d\n",
3458	pf->int_name_ll_ts, err);
3459	ice_free_irq(pf, map: pf->ll_ts_irq);
3460	return err;
3461	}
3462
3463	skip_req_irq:
3464	ice_ena_misc_vector(pf);
3465
3466	ice_ena_ctrlq_interrupts(hw, reg_idx: pf->oicr_irq.index);
3467	/ This enables LL TS interrupt /
3468	pf_intr_start_offset = rd32(hw, PFINT_ALLOC) & PFINT_ALLOC_FIRST;
3469	if (pf->hw.dev_caps.ts_dev_info.ts_ll_int_read)
3470	wr32(hw, PFINT_SB_CTL,
3471	((pf->ll_ts_irq.index + pf_intr_start_offset) &
3472	PFINT_SB_CTL_MSIX_INDX_M) \| PFINT_SB_CTL_CAUSE_ENA_M);
3473	wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_irq.index),
3474	ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S);
3475
3476	ice_flush(hw);
3477	ice_irq_dynamic_ena(hw, NULL, NULL);
3478
3479	return `0`;
3480	}
3481
3482	/**
3483	* ice_napi_add - register NAPI handler for the VSI
3484	* @vsi: VSI for which NAPI handler is to be registered
3485	*
3486	* This function is only called in the driver's load path. Registering the NAPI
3487	* handler is done in ice_vsi_alloc_q_vector() for all other cases (i.e. resume,
3488	* reset/rebuild, etc.)
3489	*/
3490	static void ice_napi_add(struct ice_vsi *vsi)
3491	{
3492	int v_idx;
3493
3494	if (!vsi->netdev)
3495	return;
3496
3497	ice_for_each_q_vector(vsi, v_idx) {
3498	netif_napi_add(dev: vsi->netdev, napi: &vsi->q_vectors[v_idx]->napi,
3499	poll: ice_napi_poll);
3500	__ice_q_vector_set_napi_queues(q_vector: vsi->q_vectors[v_idx], locked: false);
3501	}
3502	}
3503
3504	/**
3505	* ice_set_ops - set netdev and ethtools ops for the given netdev
3506	* @vsi: the VSI associated with the new netdev
3507	*/
3508	static void ice_set_ops(struct ice_vsi *vsi)
3509	{
3510	struct net_device *netdev = vsi->netdev;
3511	struct ice_pf *pf = ice_netdev_to_pf(netdev);
3512
3513	if (ice_is_safe_mode(pf)) {
3514	netdev->netdev_ops = &ice_netdev_safe_mode_ops;
3515	ice_set_ethtool_safe_mode_ops(netdev);
3516	return;
3517	}
3518
3519	netdev->netdev_ops = &ice_netdev_ops;
3520	netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
3521	netdev->xdp_metadata_ops = &ice_xdp_md_ops;
3522	ice_set_ethtool_ops(netdev);
3523
3524	if (vsi->type != ICE_VSI_PF)
3525	return;
3526
3527	netdev->xdp_features = NETDEV_XDP_ACT_BASIC \| NETDEV_XDP_ACT_REDIRECT \|
3528	NETDEV_XDP_ACT_XSK_ZEROCOPY \|
3529	NETDEV_XDP_ACT_RX_SG;
3530	netdev->xdp_zc_max_segs = ICE_MAX_BUF_TXD;
3531	}
3532
3533	/**
3534	* ice_set_netdev_features - set features for the given netdev
3535	* @netdev: netdev instance
3536	*/
3537	static void ice_set_netdev_features(struct net_device *netdev)
3538	{
3539	struct ice_pf *pf = ice_netdev_to_pf(netdev);
3540	bool is_dvm_ena = ice_is_dvm_ena(hw: &pf->hw);
3541	netdev_features_t csumo_features;
3542	netdev_features_t vlano_features;
3543	netdev_features_t dflt_features;
3544	netdev_features_t tso_features;
3545
3546	if (ice_is_safe_mode(pf)) {
3547	/ safe mode /
3548	netdev->features = NETIF_F_SG \| NETIF_F_HIGHDMA;
3549	netdev->hw_features = netdev->features;
3550	return;
3551	}
3552
3553	dflt_features = NETIF_F_SG \|
3554	NETIF_F_HIGHDMA \|
3555	NETIF_F_NTUPLE \|
3556	NETIF_F_RXHASH;
3557
3558	csumo_features = NETIF_F_RXCSUM \|
3559	NETIF_F_IP_CSUM \|
3560	NETIF_F_SCTP_CRC \|
3561	NETIF_F_IPV6_CSUM;
3562
3563	vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER \|
3564	NETIF_F_HW_VLAN_CTAG_TX \|
3565	NETIF_F_HW_VLAN_CTAG_RX;
3566
3567	/ Enable CTAG/STAG filtering by default in Double VLAN Mode (DVM) /
3568	if (is_dvm_ena)
3569	vlano_features \|= NETIF_F_HW_VLAN_STAG_FILTER;
3570
3571	tso_features = NETIF_F_TSO \|
3572	NETIF_F_TSO_ECN \|
3573	NETIF_F_TSO6 \|
3574	NETIF_F_GSO_GRE \|
3575	NETIF_F_GSO_UDP_TUNNEL \|
3576	NETIF_F_GSO_GRE_CSUM \|
3577	NETIF_F_GSO_UDP_TUNNEL_CSUM \|
3578	NETIF_F_GSO_PARTIAL \|
3579	NETIF_F_GSO_IPXIP4 \|
3580	NETIF_F_GSO_IPXIP6 \|
3581	NETIF_F_GSO_UDP_L4;
3582
3583	netdev->gso_partial_features \|= NETIF_F_GSO_UDP_TUNNEL_CSUM \|
3584	NETIF_F_GSO_GRE_CSUM;
3585	/ set features that user can change /
3586	netdev->hw_features = dflt_features \| csumo_features \|
3587	vlano_features \| tso_features;
3588
3589	/ add support for HW_CSUM on packets with MPLS header /
3590	netdev->mpls_features = NETIF_F_HW_CSUM \|
3591	NETIF_F_TSO \|
3592	NETIF_F_TSO6;
3593
3594	/ enable features /
3595	netdev->features \|= netdev->hw_features;
3596
3597	netdev->hw_features \|= NETIF_F_HW_TC;
3598	netdev->hw_features \|= NETIF_F_LOOPBACK;
3599
3600	/ encap and VLAN devices inherit default, csumo and tso features /
3601	netdev->hw_enc_features \|= dflt_features \| csumo_features \|
3602	tso_features;
3603	netdev->vlan_features \|= dflt_features \| csumo_features \|
3604	tso_features;
3605
3606	/ advertise support but don't enable by default since only one type of*
3607	* VLAN offload can be enabled at a time (i.e. CTAG or STAG). When one
3608	* type turns on the other has to be turned off. This is enforced by the
3609	* ice_fix_features() ndo callback.
3610	*/
3611	if (is_dvm_ena)
3612	netdev->hw_features \|= NETIF_F_HW_VLAN_STAG_RX \|
3613	NETIF_F_HW_VLAN_STAG_TX;
3614
3615	/ Leave CRC / FCS stripping enabled by default, but allow the value to*
3616	* be changed at runtime
3617	*/
3618	netdev->hw_features \|= NETIF_F_RXFCS;
3619
3620	netif_set_tso_max_size(dev: netdev, ICE_MAX_TSO_SIZE);
3621	}
3622
3623	/**
3624	* ice_fill_rss_lut - Fill the RSS lookup table with default values
3625	* @lut: Lookup table
3626	* @rss_table_size: Lookup table size
3627	* @rss_size: Range of queue number for hashing
3628	*/
3629	void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size)
3630	{
3631	u16 i;
3632
3633	for (i = `0`; i < rss_table_size; i++)
3634	lut[i] = i % rss_size;
3635	}
3636
3637	/**
3638	* ice_pf_vsi_setup - Set up a PF VSI
3639	* @pf: board private structure
3640	* @pi: pointer to the port_info instance
3641	*
3642	* Returns pointer to the successfully allocated VSI software struct
3643	* on success, otherwise returns NULL on failure.
3644	*/
3645	static struct ice_vsi *
3646	ice_pf_vsi_setup(struct ice_pf pf, struct* ice_port_info *pi)
3647	{
3648	struct ice_vsi_cfg_params params = {};
3649
3650	params.type = ICE_VSI_PF;
3651	params.pi = pi;
3652	params.flags = ICE_VSI_FLAG_INIT;
3653
3654	return ice_vsi_setup(pf, params: &params);
3655	}
3656
3657	static struct ice_vsi *
3658	ice_chnl_vsi_setup(struct ice_pf pf, struct* ice_port_info *pi,
3659	struct ice_channel *ch)
3660	{
3661	struct ice_vsi_cfg_params params = {};
3662
3663	params.type = ICE_VSI_CHNL;
3664	params.pi = pi;
3665	params.ch = ch;
3666	params.flags = ICE_VSI_FLAG_INIT;
3667
3668	return ice_vsi_setup(pf, params: &params);
3669	}
3670
3671	/**
3672	* ice_ctrl_vsi_setup - Set up a control VSI
3673	* @pf: board private structure
3674	* @pi: pointer to the port_info instance
3675	*
3676	* Returns pointer to the successfully allocated VSI software struct
3677	* on success, otherwise returns NULL on failure.
3678	*/
3679	static struct ice_vsi *
3680	ice_ctrl_vsi_setup(struct ice_pf pf, struct* ice_port_info *pi)
3681	{
3682	struct ice_vsi_cfg_params params = {};
3683
3684	params.type = ICE_VSI_CTRL;
3685	params.pi = pi;
3686	params.flags = ICE_VSI_FLAG_INIT;
3687
3688	return ice_vsi_setup(pf, params: &params);
3689	}
3690
3691	/**
3692	* ice_lb_vsi_setup - Set up a loopback VSI
3693	* @pf: board private structure
3694	* @pi: pointer to the port_info instance
3695	*
3696	* Returns pointer to the successfully allocated VSI software struct
3697	* on success, otherwise returns NULL on failure.
3698	*/
3699	struct ice_vsi *
3700	ice_lb_vsi_setup(struct ice_pf pf, struct* ice_port_info *pi)
3701	{
3702	struct ice_vsi_cfg_params params = {};
3703
3704	params.type = ICE_VSI_LB;
3705	params.pi = pi;
3706	params.flags = ICE_VSI_FLAG_INIT;
3707
3708	return ice_vsi_setup(pf, params: &params);
3709	}
3710
3711	/**
3712	* ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload
3713	* @netdev: network interface to be adjusted
3714	* @proto: VLAN TPID
3715	* @vid: VLAN ID to be added
3716	*
3717	* net_device_ops implementation for adding VLAN IDs
3718	*/
3719	static int
3720	ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
3721	{
3722	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
3723	struct ice_vsi_vlan_ops *vlan_ops;
3724	struct ice_vsi *vsi = np->vsi;
3725	struct ice_vlan vlan;
3726	int ret;
3727
3728	/ VLAN 0 is added by default during load/reset /
3729	if (!vid)
3730	return `0`;
3731
3732	while (test_and_set_bit(nr: ICE_CFG_BUSY, addr: vsi->state))
3733	usleep_range(min: `1000`, max: `2000`);
3734
3735	/ Add multicast promisc rule for the VLAN ID to be added if*
3736	* all-multicast is currently enabled.
3737	*/
3738	if (vsi->current_netdev_flags & IFF_ALLMULTI) {
3739	ret = ice_fltr_set_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
3740	ICE_MCAST_VLAN_PROMISC_BITS,
3741	vid);
3742	if (ret)
3743	goto finish;
3744	}
3745
3746	vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
3747
3748	/ Add a switch rule for this VLAN ID so its corresponding VLAN tagged*
3749	* packets aren't pruned by the device's internal switch on Rx
3750	*/
3751	vlan = ICE_VLAN(be16_to_cpu(proto), vid, `0`);
3752	ret = vlan_ops->add_vlan(vsi, &vlan);
3753	if (ret)
3754	goto finish;
3755
3756	/ If all-multicast is currently enabled and this VLAN ID is only one*
3757	* besides VLAN-0 we have to update look-up type of multicast promisc
3758	* rule for VLAN-0 from ICE_SW_LKUP_PROMISC to ICE_SW_LKUP_PROMISC_VLAN.
3759	*/
3760	if ((vsi->current_netdev_flags & IFF_ALLMULTI) &&
3761	ice_vsi_num_non_zero_vlans(vsi) == `1`) {
3762	ice_fltr_clear_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
3763	ICE_MCAST_PROMISC_BITS, vid: `0`);
3764	ice_fltr_set_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
3765	ICE_MCAST_VLAN_PROMISC_BITS, vid: `0`);
3766	}
3767
3768	finish:
3769	clear_bit(nr: ICE_CFG_BUSY, addr: vsi->state);
3770
3771	return ret;
3772	}
3773
3774	/**
3775	* ice_vlan_rx_kill_vid - Remove a VLAN ID filter from HW offload
3776	* @netdev: network interface to be adjusted
3777	* @proto: VLAN TPID
3778	* @vid: VLAN ID to be removed
3779	*
3780	* net_device_ops implementation for removing VLAN IDs
3781	*/
3782	static int
3783	ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
3784	{
3785	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
3786	struct ice_vsi_vlan_ops *vlan_ops;
3787	struct ice_vsi *vsi = np->vsi;
3788	struct ice_vlan vlan;
3789	int ret;
3790
3791	/ don't allow removal of VLAN 0 /
3792	if (!vid)
3793	return `0`;
3794
3795	while (test_and_set_bit(nr: ICE_CFG_BUSY, addr: vsi->state))
3796	usleep_range(min: `1000`, max: `2000`);
3797
3798	ret = ice_clear_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
3799	ICE_MCAST_VLAN_PROMISC_BITS, vid);
3800	if (ret) {
3801	netdev_err(dev: netdev, format: "Error clearing multicast promiscuous mode on VSI %i\n",
3802	vsi->vsi_num);
3803	vsi->current_netdev_flags \|= IFF_ALLMULTI;
3804	}
3805
3806	vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
3807
3808	/ Make sure VLAN delete is successful before updating VLAN*
3809	* information
3810	*/
3811	vlan = ICE_VLAN(be16_to_cpu(proto), vid, `0`);
3812	ret = vlan_ops->del_vlan(vsi, &vlan);
3813	if (ret)
3814	goto finish;
3815
3816	/ Remove multicast promisc rule for the removed VLAN ID if*
3817	* all-multicast is enabled.
3818	*/
3819	if (vsi->current_netdev_flags & IFF_ALLMULTI)
3820	ice_fltr_clear_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
3821	ICE_MCAST_VLAN_PROMISC_BITS, vid);
3822
3823	if (!ice_vsi_has_non_zero_vlans(vsi)) {
3824	/ Update look-up type of multicast promisc rule for VLAN 0*
3825	* from ICE_SW_LKUP_PROMISC_VLAN to ICE_SW_LKUP_PROMISC when
3826	* all-multicast is enabled and VLAN 0 is the only VLAN rule.
3827	*/
3828	if (vsi->current_netdev_flags & IFF_ALLMULTI) {
3829	ice_fltr_clear_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
3830	ICE_MCAST_VLAN_PROMISC_BITS,
3831	vid: `0`);
3832	ice_fltr_set_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
3833	ICE_MCAST_PROMISC_BITS, vid: `0`);
3834	}
3835	}
3836
3837	finish:
3838	clear_bit(nr: ICE_CFG_BUSY, addr: vsi->state);
3839
3840	return ret;
3841	}
3842
3843	/**
3844	* ice_rep_indr_tc_block_unbind
3845	* @cb_priv: indirection block private data
3846	*/
3847	static void ice_rep_indr_tc_block_unbind(void *cb_priv)
3848	{
3849	struct ice_indr_block_priv *indr_priv = cb_priv;
3850
3851	list_del(entry: &indr_priv->list);
3852	kfree(objp: indr_priv);
3853	}
3854
3855	/**
3856	* ice_tc_indir_block_unregister - Unregister TC indirect block notifications
3857	* @vsi: VSI struct which has the netdev
3858	*/
3859	static void ice_tc_indir_block_unregister(struct ice_vsi *vsi)
3860	{
3861	struct ice_netdev_priv *np = netdev_priv(dev: vsi->netdev);
3862
3863	flow_indr_dev_unregister(cb: ice_indr_setup_tc_cb, cb_priv: np,
3864	release: ice_rep_indr_tc_block_unbind);
3865	}
3866
3867	/**
3868	* ice_tc_indir_block_register - Register TC indirect block notifications
3869	* @vsi: VSI struct which has the netdev
3870	*
3871	* Returns 0 on success, negative value on failure
3872	*/
3873	static int ice_tc_indir_block_register(struct ice_vsi *vsi)
3874	{
3875	struct ice_netdev_priv *np;
3876
3877	if (!vsi \|\| !vsi->netdev)
3878	return -EINVAL;
3879
3880	np = netdev_priv(dev: vsi->netdev);
3881
3882	INIT_LIST_HEAD(list: &np->tc_indr_block_priv_list);
3883	return flow_indr_dev_register(cb: ice_indr_setup_tc_cb, cb_priv: np);
3884	}
3885
3886	/**
3887	* ice_get_avail_q_count - Get count of queues in use
3888	* @pf_qmap: bitmap to get queue use count from
3889	* @lock: pointer to a mutex that protects access to pf_qmap
3890	* @size: size of the bitmap
3891	*/
3892	static u16
3893	ice_get_avail_q_count(unsigned long pf_qmap, struct* mutex *lock, u16 size)
3894	{
3895	unsigned long bit;
3896	u16 count = `0`;
3897
3898	mutex_lock(lock);
3899	for_each_clear_bit(bit, pf_qmap, size)
3900	count++;
3901	mutex_unlock(lock);
3902
3903	return count;
3904	}
3905
3906	/**
3907	* ice_get_avail_txq_count - Get count of Tx queues in use
3908	* @pf: pointer to an ice_pf instance
3909	*/
3910	u16 ice_get_avail_txq_count(struct ice_pf *pf)
3911	{
3912	return ice_get_avail_q_count(pf_qmap: pf->avail_txqs, lock: &pf->avail_q_mutex,
3913	size: pf->max_pf_txqs);
3914	}
3915
3916	/**
3917	* ice_get_avail_rxq_count - Get count of Rx queues in use
3918	* @pf: pointer to an ice_pf instance
3919	*/
3920	u16 ice_get_avail_rxq_count(struct ice_pf *pf)
3921	{
3922	return ice_get_avail_q_count(pf_qmap: pf->avail_rxqs, lock: &pf->avail_q_mutex,
3923	size: pf->max_pf_rxqs);
3924	}
3925
3926	/**
3927	* ice_deinit_pf - Unrolls initialziations done by ice_init_pf
3928	* @pf: board private structure to initialize
3929	*/
3930	static void ice_deinit_pf(struct ice_pf *pf)
3931	{
3932	ice_service_task_stop(pf);
3933	mutex_destroy(lock: &pf->lag_mutex);
3934	mutex_destroy(lock: &pf->adev_mutex);
3935	mutex_destroy(lock: &pf->sw_mutex);
3936	mutex_destroy(lock: &pf->tc_mutex);
3937	mutex_destroy(lock: &pf->avail_q_mutex);
3938	mutex_destroy(lock: &pf->vfs.table_lock);
3939
3940	if (pf->avail_txqs) {
3941	bitmap_free(bitmap: pf->avail_txqs);
3942	pf->avail_txqs = NULL;
3943	}
3944
3945	if (pf->avail_rxqs) {
3946	bitmap_free(bitmap: pf->avail_rxqs);
3947	pf->avail_rxqs = NULL;
3948	}
3949
3950	if (pf->ptp.clock)
3951	ptp_clock_unregister(ptp: pf->ptp.clock);
3952	}
3953
3954	/**
3955	* ice_set_pf_caps - set PFs capability flags
3956	* @pf: pointer to the PF instance
3957	*/
3958	static void ice_set_pf_caps(struct ice_pf *pf)
3959	{
3960	struct ice_hw_func_caps *func_caps = &pf->hw.func_caps;
3961
3962	clear_bit(nr: ICE_FLAG_RDMA_ENA, addr: pf->flags);
3963	if (func_caps->common_cap.rdma)
3964	set_bit(nr: ICE_FLAG_RDMA_ENA, addr: pf->flags);
3965	clear_bit(nr: ICE_FLAG_DCB_CAPABLE, addr: pf->flags);
3966	if (func_caps->common_cap.dcb)
3967	set_bit(nr: ICE_FLAG_DCB_CAPABLE, addr: pf->flags);
3968	clear_bit(nr: ICE_FLAG_SRIOV_CAPABLE, addr: pf->flags);
3969	if (func_caps->common_cap.sr_iov_1_1) {
3970	set_bit(nr: ICE_FLAG_SRIOV_CAPABLE, addr: pf->flags);
3971	pf->vfs.num_supported = min_t(int, func_caps->num_allocd_vfs,
3972	ICE_MAX_SRIOV_VFS);
3973	}
3974	clear_bit(nr: ICE_FLAG_RSS_ENA, addr: pf->flags);
3975	if (func_caps->common_cap.rss_table_size)
3976	set_bit(nr: ICE_FLAG_RSS_ENA, addr: pf->flags);
3977
3978	clear_bit(nr: ICE_FLAG_FD_ENA, addr: pf->flags);
3979	if (func_caps->fd_fltr_guar > `0` \|\| func_caps->fd_fltr_best_effort > `0`) {
3980	u16 unused;
3981
3982	/ ctrl_vsi_idx will be set to a valid value when flow director*
3983	* is setup by ice_init_fdir
3984	*/
3985	pf->ctrl_vsi_idx = ICE_NO_VSI;
3986	set_bit(nr: ICE_FLAG_FD_ENA, addr: pf->flags);
3987	/ force guaranteed filter pool for PF /
3988	ice_alloc_fd_guar_item(hw: &pf->hw, cntr_id: &unused,
3989	num_fltr: func_caps->fd_fltr_guar);
3990	/ force shared filter pool for PF /
3991	ice_alloc_fd_shrd_item(hw: &pf->hw, cntr_id: &unused,
3992	num_fltr: func_caps->fd_fltr_best_effort);
3993	}
3994
3995	clear_bit(nr: ICE_FLAG_PTP_SUPPORTED, addr: pf->flags);
3996	if (func_caps->common_cap.ieee_1588 &&
3997	!(pf->hw.mac_type == ICE_MAC_E830))
3998	set_bit(nr: ICE_FLAG_PTP_SUPPORTED, addr: pf->flags);
3999
4000	pf->max_pf_txqs = func_caps->common_cap.num_txq;
4001	pf->max_pf_rxqs = func_caps->common_cap.num_rxq;
4002	}
4003
4004	/**
4005	* ice_init_pf - Initialize general software structures (struct ice_pf)
4006	* @pf: board private structure to initialize
4007	*/
4008	static int ice_init_pf(struct ice_pf *pf)
4009	{
4010	ice_set_pf_caps(pf);
4011
4012	mutex_init(&pf->sw_mutex);
4013	mutex_init(&pf->tc_mutex);
4014	mutex_init(&pf->adev_mutex);
4015	mutex_init(&pf->lag_mutex);
4016
4017	INIT_HLIST_HEAD(&pf->aq_wait_list);
4018	spin_lock_init(&pf->aq_wait_lock);
4019	init_waitqueue_head(&pf->aq_wait_queue);
4020
4021	init_waitqueue_head(&pf->reset_wait_queue);
4022
4023	/ setup service timer and periodic service task /
4024	timer_setup(&pf->serv_tmr, ice_service_timer, `0`);
4025	pf->serv_tmr_period = HZ;
4026	INIT_WORK(&pf->serv_task, ice_service_task);
4027	clear_bit(nr: ICE_SERVICE_SCHED, addr: pf->state);
4028
4029	mutex_init(&pf->avail_q_mutex);
4030	pf->avail_txqs = bitmap_zalloc(nbits: pf->max_pf_txqs, GFP_KERNEL);
4031	if (!pf->avail_txqs)
4032	return -ENOMEM;
4033
4034	pf->avail_rxqs = bitmap_zalloc(nbits: pf->max_pf_rxqs, GFP_KERNEL);
4035	if (!pf->avail_rxqs) {
4036	bitmap_free(bitmap: pf->avail_txqs);
4037	pf->avail_txqs = NULL;
4038	return -ENOMEM;
4039	}
4040
4041	mutex_init(&pf->vfs.table_lock);
4042	hash_init(pf->vfs.table);
4043	ice_mbx_init_snapshot(hw: &pf->hw);
4044
4045	return `0`;
4046	}
4047
4048	/**
4049	* ice_is_wol_supported - check if WoL is supported
4050	* @hw: pointer to hardware info
4051	*
4052	* Check if WoL is supported based on the HW configuration.
4053	* Returns true if NVM supports and enables WoL for this port, false otherwise
4054	*/
4055	bool ice_is_wol_supported(struct ice_hw *hw)
4056	{
4057	u16 wol_ctrl;
4058
4059	/ A bit set to 1 in the NVM Software Reserved Word 2 (WoL control*
4060	* word) indicates WoL is not supported on the corresponding PF ID.
4061	*/
4062	if (ice_read_sr_word(hw, ICE_SR_NVM_WOL_CFG, data: &wol_ctrl))
4063	return false;
4064
4065	return !(BIT(hw->port_info->lport) & wol_ctrl);
4066	}
4067
4068	/**
4069	* ice_vsi_recfg_qs - Change the number of queues on a VSI
4070	* @vsi: VSI being changed
4071	* @new_rx: new number of Rx queues
4072	* @new_tx: new number of Tx queues
4073	* @locked: is adev device_lock held
4074	*
4075	* Only change the number of queues if new_tx, or new_rx is non-0.
4076	*
4077	* Returns 0 on success.
4078	*/
4079	int ice_vsi_recfg_qs(struct ice_vsi vsi, int* new_rx, int new_tx, bool locked)
4080	{
4081	struct ice_pf *pf = vsi->back;
4082	int err = `0`, timeout = `50`;
4083
4084	if (!new_rx && !new_tx)
4085	return -EINVAL;
4086
4087	while (test_and_set_bit(nr: ICE_CFG_BUSY, addr: pf->state)) {
4088	timeout--;
4089	if (!timeout)
4090	return -EBUSY;
4091	usleep_range(min: `1000`, max: `2000`);
4092	}
4093
4094	if (new_tx)
4095	vsi->req_txq = (u16)new_tx;
4096	if (new_rx)
4097	vsi->req_rxq = (u16)new_rx;
4098
4099	/ set for the next time the netdev is started /
4100	if (!netif_running(dev: vsi->netdev)) {
4101	ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);
4102	dev_dbg(ice_pf_to_dev(pf), "Link is down, queue count change happens when link is brought up\n");
4103	goto done;
4104	}
4105
4106	ice_vsi_close(vsi);
4107	ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);
4108	ice_pf_dcb_recfg(pf, locked);
4109	ice_vsi_open(vsi);
4110	done:
4111	clear_bit(nr: ICE_CFG_BUSY, addr: pf->state);
4112	return err;
4113	}
4114
4115	/**
4116	* ice_set_safe_mode_vlan_cfg - configure PF VSI to allow all VLANs in safe mode
4117	* @pf: PF to configure
4118	*
4119	* No VLAN offloads/filtering are advertised in safe mode so make sure the PF
4120	* VSI can still Tx/Rx VLAN tagged packets.
4121	*/
4122	static void ice_set_safe_mode_vlan_cfg(struct ice_pf *pf)
4123	{
4124	struct ice_vsi *vsi = ice_get_main_vsi(pf);
4125	struct ice_vsi_ctx *ctxt;
4126	struct ice_hw *hw;
4127	int status;
4128
4129	if (!vsi)
4130	return;
4131
4132	ctxt = kzalloc(size: sizeof(*ctxt), GFP_KERNEL);
4133	if (!ctxt)
4134	return;
4135
4136	hw = &pf->hw;
4137	ctxt->info = vsi->info;
4138
4139	ctxt->info.valid_sections =
4140	cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID \|
4141	ICE_AQ_VSI_PROP_SECURITY_VALID \|
4142	ICE_AQ_VSI_PROP_SW_VALID);
4143
4144	/ disable VLAN anti-spoof /
4145	ctxt->info.sec_flags &= ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
4146	ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
4147
4148	/ disable VLAN pruning and keep all other settings /
4149	ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
4150
4151	/ allow all VLANs on Tx and don't strip on Rx /
4152	ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL \|
4153	ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING;
4154
4155	status = ice_update_vsi(hw, vsi_handle: vsi->idx, vsi_ctx: ctxt, NULL);
4156	if (status) {
4157	dev_err(ice_pf_to_dev(vsi->back), "Failed to update VSI for safe mode VLANs, err %d aq_err %s\n",
4158	status, ice_aq_str(hw->adminq.sq_last_status));
4159	} else {
4160	vsi->info.sec_flags = ctxt->info.sec_flags;
4161	vsi->info.sw_flags2 = ctxt->info.sw_flags2;
4162	vsi->info.inner_vlan_flags = ctxt->info.inner_vlan_flags;
4163	}
4164
4165	kfree(objp: ctxt);
4166	}
4167
4168	/**
4169	* ice_log_pkg_init - log result of DDP package load
4170	* @hw: pointer to hardware info
4171	* @state: state of package load
4172	*/
4173	static void ice_log_pkg_init(struct ice_hw hw, enum* ice_ddp_state state)
4174	{
4175	struct ice_pf *pf = hw->back;
4176	struct device *dev;
4177
4178	dev = ice_pf_to_dev(pf);
4179
4180	switch (state) {
4181	case ICE_DDP_PKG_SUCCESS:
4182	dev_info(dev, "The DDP package was successfully loaded: %s version %d.%d.%d.%d\n",
4183	hw->active_pkg_name,
4184	hw->active_pkg_ver.major,
4185	hw->active_pkg_ver.minor,
4186	hw->active_pkg_ver.update,
4187	hw->active_pkg_ver.draft);
4188	break;
4189	case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED:
4190	dev_info(dev, "DDP package already present on device: %s version %d.%d.%d.%d\n",
4191	hw->active_pkg_name,
4192	hw->active_pkg_ver.major,
4193	hw->active_pkg_ver.minor,
4194	hw->active_pkg_ver.update,
4195	hw->active_pkg_ver.draft);
4196	break;
4197	case ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED:
4198	dev_err(dev, "The device has a DDP package that is not supported by the driver. The device has package '%s' version %d.%d.x.x. The driver requires version %d.%d.x.x. Entering Safe Mode.\n",
4199	hw->active_pkg_name,
4200	hw->active_pkg_ver.major,
4201	hw->active_pkg_ver.minor,
4202	ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
4203	break;
4204	case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED:
4205	dev_info(dev, "The driver could not load the DDP package file because a compatible DDP package is already present on the device. The device has package '%s' version %d.%d.%d.%d. The package file found by the driver: '%s' version %d.%d.%d.%d.\n",
4206	hw->active_pkg_name,
4207	hw->active_pkg_ver.major,
4208	hw->active_pkg_ver.minor,
4209	hw->active_pkg_ver.update,
4210	hw->active_pkg_ver.draft,
4211	hw->pkg_name,
4212	hw->pkg_ver.major,
4213	hw->pkg_ver.minor,
4214	hw->pkg_ver.update,
4215	hw->pkg_ver.draft);
4216	break;
4217	case ICE_DDP_PKG_FW_MISMATCH:
4218	dev_err(dev, "The firmware loaded on the device is not compatible with the DDP package. Please update the device's NVM. Entering safe mode.\n");
4219	break;
4220	case ICE_DDP_PKG_INVALID_FILE:
4221	dev_err(dev, "The DDP package file is invalid. Entering Safe Mode.\n");
4222	break;
4223	case ICE_DDP_PKG_FILE_VERSION_TOO_HIGH:
4224	dev_err(dev, "The DDP package file version is higher than the driver supports. Please use an updated driver. Entering Safe Mode.\n");
4225	break;
4226	case ICE_DDP_PKG_FILE_VERSION_TOO_LOW:
4227	dev_err(dev, "The DDP package file version is lower than the driver supports. The driver requires version %d.%d.x.x. Please use an updated DDP Package file. Entering Safe Mode.\n",
4228	ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
4229	break;
4230	case ICE_DDP_PKG_FILE_SIGNATURE_INVALID:
4231	dev_err(dev, "The DDP package could not be loaded because its signature is not valid. Please use a valid DDP Package. Entering Safe Mode.\n");
4232	break;
4233	case ICE_DDP_PKG_FILE_REVISION_TOO_LOW:
4234	dev_err(dev, "The DDP Package could not be loaded because its security revision is too low. Please use an updated DDP Package. Entering Safe Mode.\n");
4235	break;
4236	case ICE_DDP_PKG_LOAD_ERROR:
4237	dev_err(dev, "An error occurred on the device while loading the DDP package. The device will be reset.\n");
4238	/ poll for reset to complete /
4239	if (ice_check_reset(hw))
4240	dev_err(dev, "Error resetting device. Please reload the driver\n");
4241	break;
4242	case ICE_DDP_PKG_ERR:
4243	default:
4244	dev_err(dev, "An unknown error occurred when loading the DDP package. Entering Safe Mode.\n");
4245	break;
4246	}
4247	}
4248
4249	/**
4250	* ice_load_pkg - load/reload the DDP Package file
4251	* @firmware: firmware structure when firmware requested or NULL for reload
4252	* @pf: pointer to the PF instance
4253	*
4254	* Called on probe and post CORER/GLOBR rebuild to load DDP Package and
4255	* initialize HW tables.
4256	*/
4257	static void
4258	ice_load_pkg(const struct firmware firmware, struct* ice_pf *pf)
4259	{
4260	enum ice_ddp_state state = ICE_DDP_PKG_ERR;
4261	struct device *dev = ice_pf_to_dev(pf);
4262	struct ice_hw *hw = &pf->hw;
4263
4264	/ Load DDP Package /
4265	if (firmware && !hw->pkg_copy) {
4266	state = ice_copy_and_init_pkg(hw, buf: firmware->data,
4267	len: firmware->size);
4268	ice_log_pkg_init(hw, state);
4269	} else if (!firmware && hw->pkg_copy) {
4270	/ Reload package during rebuild after CORER/GLOBR reset /
4271	state = ice_init_pkg(hw, buff: hw->pkg_copy, len: hw->pkg_size);
4272	ice_log_pkg_init(hw, state);
4273	} else {
4274	dev_err(dev, "The DDP package file failed to load. Entering Safe Mode.\n");
4275	}
4276
4277	if (!ice_is_init_pkg_successful(state)) {
4278	/ Safe Mode /
4279	clear_bit(nr: ICE_FLAG_ADV_FEATURES, addr: pf->flags);
4280	return;
4281	}
4282
4283	/ Successful download package is the precondition for advanced*
4284	* features, hence setting the ICE_FLAG_ADV_FEATURES flag
4285	*/
4286	set_bit(nr: ICE_FLAG_ADV_FEATURES, addr: pf->flags);
4287	}
4288
4289	/**
4290	* ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines
4291	* @pf: pointer to the PF structure
4292	*
4293	* There is no error returned here because the driver should be able to handle
4294	* 128 Byte cache lines, so we only print a warning in case issues are seen,
4295	* specifically with Tx.
4296	*/
4297	static void ice_verify_cacheline_size(struct ice_pf *pf)
4298	{
4299	if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M)
4300	dev_warn(ice_pf_to_dev(pf), "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n",
4301	ICE_CACHE_LINE_BYTES);
4302	}
4303
4304	/**
4305	* ice_send_version - update firmware with driver version
4306	* @pf: PF struct
4307	*
4308	* Returns 0 on success, else error code
4309	*/
4310	static int ice_send_version(struct ice_pf *pf)
4311	{
4312	struct ice_driver_ver dv;
4313
4314	dv.major_ver = `0xff`;
4315	dv.minor_ver = `0xff`;
4316	dv.build_ver = `0xff`;
4317	dv.subbuild_ver = `0`;
4318	strscpy((char *)dv.driver_string, UTS_RELEASE,
4319	sizeof(dv.driver_string));
4320	return ice_aq_send_driver_ver(hw: &pf->hw, dv: &dv, NULL);
4321	}
4322
4323	/**
4324	* ice_init_fdir - Initialize flow director VSI and configuration
4325	* @pf: pointer to the PF instance
4326	*
4327	* returns 0 on success, negative on error
4328	*/
4329	static int ice_init_fdir(struct ice_pf *pf)
4330	{
4331	struct device *dev = ice_pf_to_dev(pf);
4332	struct ice_vsi *ctrl_vsi;
4333	int err;
4334
4335	/ Side Band Flow Director needs to have a control VSI.*
4336	* Allocate it and store it in the PF.
4337	*/
4338	ctrl_vsi = ice_ctrl_vsi_setup(pf, pi: pf->hw.port_info);
4339	if (!ctrl_vsi) {
4340	dev_dbg(dev, "could not create control VSI\n");
4341	return -ENOMEM;
4342	}
4343
4344	err = ice_vsi_open_ctrl(vsi: ctrl_vsi);
4345	if (err) {
4346	dev_dbg(dev, "could not open control VSI\n");
4347	goto err_vsi_open;
4348	}
4349
4350	mutex_init(&pf->hw.fdir_fltr_lock);
4351
4352	err = ice_fdir_create_dflt_rules(pf);
4353	if (err)
4354	goto err_fdir_rule;
4355
4356	return `0`;
4357
4358	err_fdir_rule:
4359	ice_fdir_release_flows(hw: &pf->hw);
4360	ice_vsi_close(vsi: ctrl_vsi);
4361	err_vsi_open:
4362	ice_vsi_release(vsi: ctrl_vsi);
4363	if (pf->ctrl_vsi_idx != ICE_NO_VSI) {
4364	pf->vsi[pf->ctrl_vsi_idx] = NULL;
4365	pf->ctrl_vsi_idx = ICE_NO_VSI;
4366	}
4367	return err;
4368	}
4369
4370	static void ice_deinit_fdir(struct ice_pf *pf)
4371	{
4372	struct ice_vsi *vsi = ice_get_ctrl_vsi(pf);
4373
4374	if (!vsi)
4375	return;
4376
4377	ice_vsi_manage_fdir(vsi, ena: false);
4378	ice_vsi_release(vsi);
4379	if (pf->ctrl_vsi_idx != ICE_NO_VSI) {
4380	pf->vsi[pf->ctrl_vsi_idx] = NULL;
4381	pf->ctrl_vsi_idx = ICE_NO_VSI;
4382	}
4383
4384	mutex_destroy(lock: &(&pf->hw)->fdir_fltr_lock);
4385	}
4386
4387	/**
4388	* ice_get_opt_fw_name - return optional firmware file name or NULL
4389	* @pf: pointer to the PF instance
4390	*/
4391	static char ice_get_opt_fw_name(struct* ice_pf *pf)
4392	{
4393	/ Optional firmware name same as default with additional dash*
4394	* followed by a EUI-64 identifier (PCIe Device Serial Number)
4395	*/
4396	struct pci_dev *pdev = pf->pdev;
4397	char *opt_fw_filename;
4398	u64 dsn;
4399
4400	/ Determine the name of the optional file using the DSN (two*
4401	* dwords following the start of the DSN Capability).
4402	*/
4403	dsn = pci_get_dsn(dev: pdev);
4404	if (!dsn)
4405	return NULL;
4406
4407	opt_fw_filename = kzalloc(NAME_MAX, GFP_KERNEL);
4408	if (!opt_fw_filename)
4409	return NULL;
4410
4411	snprintf(buf: opt_fw_filename, NAME_MAX, fmt: "%sice-%016llx.pkg",
4412	ICE_DDP_PKG_PATH, dsn);
4413
4414	return opt_fw_filename;
4415	}
4416
4417	/**
4418	* ice_request_fw - Device initialization routine
4419	* @pf: pointer to the PF instance
4420	*/
4421	static void ice_request_fw(struct ice_pf *pf)
4422	{
4423	char *opt_fw_filename = ice_get_opt_fw_name(pf);
4424	const struct firmware *firmware = NULL;
4425	struct device *dev = ice_pf_to_dev(pf);
4426	int err = `0`;
4427
4428	/ optional device-specific DDP (if present) overrides the default DDP*
4429	* package file. kernel logs a debug message if the file doesn't exist,
4430	* and warning messages for other errors.
4431	*/
4432	if (opt_fw_filename) {
4433	err = firmware_request_nowarn(fw: &firmware, name: opt_fw_filename, device: dev);
4434	if (err) {
4435	kfree(objp: opt_fw_filename);
4436	goto dflt_pkg_load;
4437	}
4438
4439	/ request for firmware was successful. Download to device /
4440	ice_load_pkg(firmware, pf);
4441	kfree(objp: opt_fw_filename);
4442	release_firmware(fw: firmware);
4443	return;
4444	}
4445
4446	dflt_pkg_load:
4447	err = request_firmware(fw: &firmware, ICE_DDP_PKG_FILE, device: dev);
4448	if (err) {
4449	dev_err(dev, "The DDP package file was not found or could not be read. Entering Safe Mode\n");
4450	return;
4451	}
4452
4453	/ request for firmware was successful. Download to device /
4454	ice_load_pkg(firmware, pf);
4455	release_firmware(fw: firmware);
4456	}
4457
4458	/**
4459	* ice_print_wake_reason - show the wake up cause in the log
4460	* @pf: pointer to the PF struct
4461	*/
4462	static void ice_print_wake_reason(struct ice_pf *pf)
4463	{
4464	u32 wus = pf->wakeup_reason;
4465	const char *wake_str;
4466
4467	/ if no wake event, nothing to print /
4468	if (!wus)
4469	return;
4470
4471	if (wus & PFPM_WUS_LNKC_M)
4472	wake_str = "Link\n";
4473	else if (wus & PFPM_WUS_MAG_M)
4474	wake_str = "Magic Packet\n";
4475	else if (wus & PFPM_WUS_MNG_M)
4476	wake_str = "Management\n";
4477	else if (wus & PFPM_WUS_FW_RST_WK_M)
4478	wake_str = "Firmware Reset\n";
4479	else
4480	wake_str = "Unknown\n";
4481
4482	dev_info(ice_pf_to_dev(pf), "Wake reason: %s", wake_str);
4483	}
4484
4485	/**
4486	* ice_pf_fwlog_update_module - update 1 module
4487	* @pf: pointer to the PF struct
4488	* @log_level: log_level to use for the @module
4489	* @module: module to update
4490	*/
4491	void ice_pf_fwlog_update_module(struct ice_pf pf, int* log_level, int module)
4492	{
4493	struct ice_hw *hw = &pf->hw;
4494
4495	hw->fwlog_cfg.module_entries[module].log_level = log_level;
4496	}
4497
4498	/**
4499	* ice_register_netdev - register netdev
4500	* @vsi: pointer to the VSI struct
4501	*/
4502	static int ice_register_netdev(struct ice_vsi *vsi)
4503	{
4504	int err;
4505
4506	if (!vsi \|\| !vsi->netdev)
4507	return -EIO;
4508
4509	err = register_netdev(dev: vsi->netdev);
4510	if (err)
4511	return err;
4512
4513	set_bit(nr: ICE_VSI_NETDEV_REGISTERED, addr: vsi->state);
4514	netif_carrier_off(dev: vsi->netdev);
4515	netif_tx_stop_all_queues(dev: vsi->netdev);
4516
4517	return `0`;
4518	}
4519
4520	static void ice_unregister_netdev(struct ice_vsi *vsi)
4521	{
4522	if (!vsi \|\| !vsi->netdev)
4523	return;
4524
4525	unregister_netdev(dev: vsi->netdev);
4526	clear_bit(nr: ICE_VSI_NETDEV_REGISTERED, addr: vsi->state);
4527	}
4528
4529	/**
4530	* ice_cfg_netdev - Allocate, configure and register a netdev
4531	* @vsi: the VSI associated with the new netdev
4532	*
4533	* Returns 0 on success, negative value on failure
4534	*/
4535	static int ice_cfg_netdev(struct ice_vsi *vsi)
4536	{
4537	struct ice_netdev_priv *np;
4538	struct net_device *netdev;
4539	u8 mac_addr[ETH_ALEN];
4540
4541	netdev = alloc_etherdev_mqs(sizeof_priv: sizeof(*np), txqs: vsi->alloc_txq,
4542	rxqs: vsi->alloc_rxq);
4543	if (!netdev)
4544	return -ENOMEM;
4545
4546	set_bit(nr: ICE_VSI_NETDEV_ALLOCD, addr: vsi->state);
4547	vsi->netdev = netdev;
4548	np = netdev_priv(dev: netdev);
4549	np->vsi = vsi;
4550
4551	ice_set_netdev_features(netdev);
4552	ice_set_ops(vsi);
4553
4554	if (vsi->type == ICE_VSI_PF) {
4555	SET_NETDEV_DEV(netdev, ice_pf_to_dev(vsi->back));
4556	ether_addr_copy(dst: mac_addr, src: vsi->port_info->mac.perm_addr);
4557	eth_hw_addr_set(dev: netdev, addr: mac_addr);
4558	}
4559
4560	netdev->priv_flags \|= IFF_UNICAST_FLT;
4561
4562	/ Setup netdev TC information /
4563	ice_vsi_cfg_netdev_tc(vsi, ena_tc: vsi->tc_cfg.ena_tc);
4564
4565	netdev->max_mtu = ICE_MAX_MTU;
4566
4567	return `0`;
4568	}
4569
4570	static void ice_decfg_netdev(struct ice_vsi *vsi)
4571	{
4572	clear_bit(nr: ICE_VSI_NETDEV_ALLOCD, addr: vsi->state);
4573	free_netdev(dev: vsi->netdev);
4574	vsi->netdev = NULL;
4575	}
4576
4577	/**
4578	* ice_wait_for_fw - wait for full FW readiness
4579	* @hw: pointer to the hardware structure
4580	* @timeout: milliseconds that can elapse before timing out
4581	*/
4582	static int ice_wait_for_fw(struct ice_hw *hw, u32 timeout)
4583	{
4584	int fw_loading;
4585	u32 elapsed = `0`;
4586
4587	while (elapsed <= timeout) {
4588	fw_loading = rd32(hw, GL_MNG_FWSM) & GL_MNG_FWSM_FW_LOADING_M;
4589
4590	/ firmware was not yet loaded, we have to wait more /
4591	if (fw_loading) {
4592	elapsed += `100`;
4593	msleep(msecs: `100`);
4594	continue;
4595	}
4596	return `0`;
4597	}
4598
4599	return -ETIMEDOUT;
4600	}
4601
4602	int ice_init_dev(struct ice_pf *pf)
4603	{
4604	struct device *dev = ice_pf_to_dev(pf);
4605	struct ice_hw *hw = &pf->hw;
4606	int err;
4607
4608	err = ice_init_hw(hw);
4609	if (err) {
4610	dev_err(dev, "ice_init_hw failed: %d\n", err);
4611	return err;
4612	}
4613
4614	/ Some cards require longer initialization times*
4615	* due to necessity of loading FW from an external source.
4616	* This can take even half a minute.
4617	*/
4618	if (ice_is_pf_c827(hw)) {
4619	err = ice_wait_for_fw(hw, timeout: `30000`);
4620	if (err) {
4621	dev_err(dev, "ice_wait_for_fw timed out");
4622	return err;
4623	}
4624	}
4625
4626	ice_init_feature_support(pf);
4627
4628	ice_request_fw(pf);
4629
4630	/ if ice_request_fw fails, ICE_FLAG_ADV_FEATURES bit won't be*
4631	* set in pf->state, which will cause ice_is_safe_mode to return
4632	* true
4633	*/
4634	if (ice_is_safe_mode(pf)) {
4635	/ we already got function/device capabilities but these don't*
4636	* reflect what the driver needs to do in safe mode. Instead of
4637	* adding conditional logic everywhere to ignore these
4638	* device/function capabilities, override them.
4639	*/
4640	ice_set_safe_mode_caps(hw);
4641	}
4642
4643	err = ice_init_pf(pf);
4644	if (err) {
4645	dev_err(dev, "ice_init_pf failed: %d\n", err);
4646	goto err_init_pf;
4647	}
4648
4649	pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port;
4650	pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port;
4651	pf->hw.udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
4652	pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared;
4653	if (pf->hw.tnl.valid_count[TNL_VXLAN]) {
4654	pf->hw.udp_tunnel_nic.tables[`0`].n_entries =
4655	pf->hw.tnl.valid_count[TNL_VXLAN];
4656	pf->hw.udp_tunnel_nic.tables[`0`].tunnel_types =
4657	UDP_TUNNEL_TYPE_VXLAN;
4658	}
4659	if (pf->hw.tnl.valid_count[TNL_GENEVE]) {
4660	pf->hw.udp_tunnel_nic.tables[`1`].n_entries =
4661	pf->hw.tnl.valid_count[TNL_GENEVE];
4662	pf->hw.udp_tunnel_nic.tables[`1`].tunnel_types =
4663	UDP_TUNNEL_TYPE_GENEVE;
4664	}
4665
4666	err = ice_init_interrupt_scheme(pf);
4667	if (err) {
4668	dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err);
4669	err = -EIO;
4670	goto err_init_interrupt_scheme;
4671	}
4672
4673	/ In case of MSIX we are going to setup the misc vector right here*
4674	* to handle admin queue events etc. In case of legacy and MSI
4675	* the misc functionality and queue processing is combined in
4676	* the same vector and that gets setup at open.
4677	*/
4678	err = ice_req_irq_msix_misc(pf);
4679	if (err) {
4680	dev_err(dev, "setup of misc vector failed: %d\n", err);
4681	goto err_req_irq_msix_misc;
4682	}
4683
4684	return `0`;
4685
4686	err_req_irq_msix_misc:
4687	ice_clear_interrupt_scheme(pf);
4688	err_init_interrupt_scheme:
4689	ice_deinit_pf(pf);
4690	err_init_pf:
4691	ice_deinit_hw(hw);
4692	return err;
4693	}
4694
4695	void ice_deinit_dev(struct ice_pf *pf)
4696	{
4697	ice_free_irq_msix_misc(pf);
4698	ice_deinit_pf(pf);
4699	ice_deinit_hw(hw: &pf->hw);
4700
4701	/ Service task is already stopped, so call reset directly. /
4702	ice_reset(hw: &pf->hw, req: ICE_RESET_PFR);
4703	pci_wait_for_pending_transaction(dev: pf->pdev);
4704	ice_clear_interrupt_scheme(pf);
4705	}
4706
4707	static void ice_init_features(struct ice_pf *pf)
4708	{
4709	struct device *dev = ice_pf_to_dev(pf);
4710
4711	if (ice_is_safe_mode(pf))
4712	return;
4713
4714	/ initialize DDP driven features /
4715	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
4716	ice_ptp_init(pf);
4717
4718	if (ice_is_feature_supported(pf, f: ICE_F_GNSS))
4719	ice_gnss_init(pf);
4720
4721	if (ice_is_feature_supported(pf, f: ICE_F_CGU) \|\|
4722	ice_is_feature_supported(pf, f: ICE_F_PHY_RCLK))
4723	ice_dpll_init(pf);
4724
4725	/ Note: Flow director init failure is non-fatal to load /
4726	if (ice_init_fdir(pf))
4727	dev_err(dev, "could not initialize flow director\n");
4728
4729	/ Note: DCB init failure is non-fatal to load /
4730	if (ice_init_pf_dcb(pf, locked: false)) {
4731	clear_bit(nr: ICE_FLAG_DCB_CAPABLE, addr: pf->flags);
4732	clear_bit(nr: ICE_FLAG_DCB_ENA, addr: pf->flags);
4733	} else {
4734	ice_cfg_lldp_mib_change(hw: &pf->hw, ena_mib: true);
4735	}
4736
4737	if (ice_init_lag(pf))
4738	dev_warn(dev, "Failed to init link aggregation support\n");
4739
4740	ice_hwmon_init(pf);
4741	}
4742
4743	static void ice_deinit_features(struct ice_pf *pf)
4744	{
4745	if (ice_is_safe_mode(pf))
4746	return;
4747
4748	ice_deinit_lag(pf);
4749	if (test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))
4750	ice_cfg_lldp_mib_change(hw: &pf->hw, ena_mib: false);
4751	ice_deinit_fdir(pf);
4752	if (ice_is_feature_supported(pf, f: ICE_F_GNSS))
4753	ice_gnss_exit(pf);
4754	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
4755	ice_ptp_release(pf);
4756	if (test_bit(ICE_FLAG_DPLL, pf->flags))
4757	ice_dpll_deinit(pf);
4758	if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
4759	xa_destroy(&pf->eswitch.reprs);
4760	}
4761
4762	static void ice_init_wakeup(struct ice_pf *pf)
4763	{
4764	/ Save wakeup reason register for later use /
4765	pf->wakeup_reason = rd32(&pf->hw, PFPM_WUS);
4766
4767	/ check for a power management event /
4768	ice_print_wake_reason(pf);
4769
4770	/ clear wake status, all bits /
4771	wr32(&pf->hw, PFPM_WUS, U32_MAX);
4772
4773	/ Disable WoL at init, wait for user to enable /
4774	device_set_wakeup_enable(ice_pf_to_dev(pf), enable: false);
4775	}
4776
4777	static int ice_init_link(struct ice_pf *pf)
4778	{
4779	struct device *dev = ice_pf_to_dev(pf);
4780	int err;
4781
4782	err = ice_init_link_events(pi: pf->hw.port_info);
4783	if (err) {
4784	dev_err(dev, "ice_init_link_events failed: %d\n", err);
4785	return err;
4786	}
4787
4788	/ not a fatal error if this fails /
4789	err = ice_init_nvm_phy_type(pi: pf->hw.port_info);
4790	if (err)
4791	dev_err(dev, "ice_init_nvm_phy_type failed: %d\n", err);
4792
4793	/ not a fatal error if this fails /
4794	err = ice_update_link_info(pi: pf->hw.port_info);
4795	if (err)
4796	dev_err(dev, "ice_update_link_info failed: %d\n", err);
4797
4798	ice_init_link_dflt_override(pi: pf->hw.port_info);
4799
4800	ice_check_link_cfg_err(pf,
4801	link_cfg_err: pf->hw.port_info->phy.link_info.link_cfg_err);
4802
4803	/ if media available, initialize PHY settings /
4804	if (pf->hw.port_info->phy.link_info.link_info &
4805	ICE_AQ_MEDIA_AVAILABLE) {
4806	/ not a fatal error if this fails /
4807	err = ice_init_phy_user_cfg(pi: pf->hw.port_info);
4808	if (err)
4809	dev_err(dev, "ice_init_phy_user_cfg failed: %d\n", err);
4810
4811	if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) {
4812	struct ice_vsi *vsi = ice_get_main_vsi(pf);
4813
4814	if (vsi)
4815	ice_configure_phy(vsi);
4816	}
4817	} else {
4818	set_bit(nr: ICE_FLAG_NO_MEDIA, addr: pf->flags);
4819	}
4820
4821	return err;
4822	}
4823
4824	static int ice_init_pf_sw(struct ice_pf *pf)
4825	{
4826	bool dvm = ice_is_dvm_ena(hw: &pf->hw);
4827	struct ice_vsi *vsi;
4828	int err;
4829
4830	/ create switch struct for the switch element created by FW on boot /
4831	pf->first_sw = kzalloc(size: sizeof(*pf->first_sw), GFP_KERNEL);
4832	if (!pf->first_sw)
4833	return -ENOMEM;
4834
4835	if (pf->hw.evb_veb)
4836	pf->first_sw->bridge_mode = BRIDGE_MODE_VEB;
4837	else
4838	pf->first_sw->bridge_mode = BRIDGE_MODE_VEPA;
4839
4840	pf->first_sw->pf = pf;
4841
4842	/ record the sw_id available for later use /
4843	pf->first_sw->sw_id = pf->hw.port_info->sw_id;
4844
4845	err = ice_aq_set_port_params(pi: pf->hw.port_info, double_vlan: dvm, NULL);
4846	if (err)
4847	goto err_aq_set_port_params;
4848
4849	vsi = ice_pf_vsi_setup(pf, pi: pf->hw.port_info);
4850	if (!vsi) {
4851	err = -ENOMEM;
4852	goto err_pf_vsi_setup;
4853	}
4854
4855	return `0`;
4856
4857	err_pf_vsi_setup:
4858	err_aq_set_port_params:
4859	kfree(objp: pf->first_sw);
4860	return err;
4861	}
4862
4863	static void ice_deinit_pf_sw(struct ice_pf *pf)
4864	{
4865	struct ice_vsi *vsi = ice_get_main_vsi(pf);
4866
4867	if (!vsi)
4868	return;
4869
4870	ice_vsi_release(vsi);
4871	kfree(objp: pf->first_sw);
4872	}
4873
4874	static int ice_alloc_vsis(struct ice_pf *pf)
4875	{
4876	struct device *dev = ice_pf_to_dev(pf);
4877
4878	pf->num_alloc_vsi = pf->hw.func_caps.guar_num_vsi;
4879	if (!pf->num_alloc_vsi)
4880	return -EIO;
4881
4882	if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) {
4883	dev_warn(dev,
4884	"limiting the VSI count due to UDP tunnel limitation %d > %d\n",
4885	pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES);
4886	pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES;
4887	}
4888
4889	pf->vsi = devm_kcalloc(dev, n: pf->num_alloc_vsi, size: sizeof(*pf->vsi),
4890	GFP_KERNEL);
4891	if (!pf->vsi)
4892	return -ENOMEM;
4893
4894	pf->vsi_stats = devm_kcalloc(dev, n: pf->num_alloc_vsi,
4895	size: sizeof(*pf->vsi_stats), GFP_KERNEL);
4896	if (!pf->vsi_stats) {
4897	devm_kfree(dev, p: pf->vsi);
4898	return -ENOMEM;
4899	}
4900
4901	return `0`;
4902	}
4903
4904	static void ice_dealloc_vsis(struct ice_pf *pf)
4905	{
4906	devm_kfree(ice_pf_to_dev(pf), p: pf->vsi_stats);
4907	pf->vsi_stats = NULL;
4908
4909	pf->num_alloc_vsi = `0`;
4910	devm_kfree(ice_pf_to_dev(pf), p: pf->vsi);
4911	pf->vsi = NULL;
4912	}
4913
4914	static int ice_init_devlink(struct ice_pf *pf)
4915	{
4916	int err;
4917
4918	err = ice_devlink_register_params(pf);
4919	if (err)
4920	return err;
4921
4922	ice_devlink_init_regions(pf);
4923	ice_devlink_register(pf);
4924
4925	return `0`;
4926	}
4927
4928	static void ice_deinit_devlink(struct ice_pf *pf)
4929	{
4930	ice_devlink_unregister(pf);
4931	ice_devlink_destroy_regions(pf);
4932	ice_devlink_unregister_params(pf);
4933	}
4934
4935	static int ice_init(struct ice_pf *pf)
4936	{
4937	int err;
4938
4939	err = ice_init_dev(pf);
4940	if (err)
4941	return err;
4942
4943	err = ice_alloc_vsis(pf);
4944	if (err)
4945	goto err_alloc_vsis;
4946
4947	err = ice_init_pf_sw(pf);
4948	if (err)
4949	goto err_init_pf_sw;
4950
4951	ice_init_wakeup(pf);
4952
4953	err = ice_init_link(pf);
4954	if (err)
4955	goto err_init_link;
4956
4957	err = ice_send_version(pf);
4958	if (err)
4959	goto err_init_link;
4960
4961	ice_verify_cacheline_size(pf);
4962
4963	if (ice_is_safe_mode(pf))
4964	ice_set_safe_mode_vlan_cfg(pf);
4965	else
4966	/ print PCI link speed and width /
4967	pcie_print_link_status(dev: pf->pdev);
4968
4969	/ ready to go, so clear down state bit /
4970	clear_bit(nr: ICE_DOWN, addr: pf->state);
4971	clear_bit(nr: ICE_SERVICE_DIS, addr: pf->state);
4972
4973	/ since everything is good, start the service timer /
4974	mod_timer(timer: &pf->serv_tmr, expires: round_jiffies(j: jiffies + pf->serv_tmr_period));
4975
4976	return `0`;
4977
4978	err_init_link:
4979	ice_deinit_pf_sw(pf);
4980	err_init_pf_sw:
4981	ice_dealloc_vsis(pf);
4982	err_alloc_vsis:
4983	ice_deinit_dev(pf);
4984	return err;
4985	}
4986
4987	static void ice_deinit(struct ice_pf *pf)
4988	{
4989	set_bit(nr: ICE_SERVICE_DIS, addr: pf->state);
4990	set_bit(nr: ICE_DOWN, addr: pf->state);
4991
4992	ice_deinit_pf_sw(pf);
4993	ice_dealloc_vsis(pf);
4994	ice_deinit_dev(pf);
4995	}
4996
4997	/**
4998	* ice_load - load pf by init hw and starting VSI
4999	* @pf: pointer to the pf instance
5000	*
5001	* This function has to be called under devl_lock.
5002	*/
5003	int ice_load(struct ice_pf *pf)
5004	{
5005	struct ice_vsi *vsi;
5006	int err;
5007
5008	devl_assert_locked(devlink: priv_to_devlink(priv: pf));
5009
5010	vsi = ice_get_main_vsi(pf);
5011
5012	/ init channel list /
5013	INIT_LIST_HEAD(list: &vsi->ch_list);
5014
5015	err = ice_cfg_netdev(vsi);
5016	if (err)
5017	return err;
5018
5019	/ Setup DCB netlink interface /
5020	ice_dcbnl_setup(vsi);
5021
5022	err = ice_init_mac_fltr(pf);
5023	if (err)
5024	goto err_init_mac_fltr;
5025
5026	err = ice_devlink_create_pf_port(pf);
5027	if (err)
5028	goto err_devlink_create_pf_port;
5029
5030	SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port);
5031
5032	err = ice_register_netdev(vsi);
5033	if (err)
5034	goto err_register_netdev;
5035
5036	err = ice_tc_indir_block_register(vsi);
5037	if (err)
5038	goto err_tc_indir_block_register;
5039
5040	ice_napi_add(vsi);
5041
5042	err = ice_init_rdma(pf);
5043	if (err)
5044	goto err_init_rdma;
5045
5046	ice_init_features(pf);
5047	ice_service_task_restart(pf);
5048
5049	clear_bit(nr: ICE_DOWN, addr: pf->state);
5050
5051	return `0`;
5052
5053	err_init_rdma:
5054	ice_tc_indir_block_unregister(vsi);
5055	err_tc_indir_block_register:
5056	ice_unregister_netdev(vsi);
5057	err_register_netdev:
5058	ice_devlink_destroy_pf_port(pf);
5059	err_devlink_create_pf_port:
5060	err_init_mac_fltr:
5061	ice_decfg_netdev(vsi);
5062	return err;
5063	}
5064
5065	/**
5066	* ice_unload - unload pf by stopping VSI and deinit hw
5067	* @pf: pointer to the pf instance
5068	*
5069	* This function has to be called under devl_lock.
5070	*/
5071	void ice_unload(struct ice_pf *pf)
5072	{
5073	struct ice_vsi *vsi = ice_get_main_vsi(pf);
5074
5075	devl_assert_locked(devlink: priv_to_devlink(priv: pf));
5076
5077	ice_deinit_features(pf);
5078	ice_deinit_rdma(pf);
5079	ice_tc_indir_block_unregister(vsi);
5080	ice_unregister_netdev(vsi);
5081	ice_devlink_destroy_pf_port(pf);
5082	ice_decfg_netdev(vsi);
5083	}
5084
5085	/**
5086	* ice_probe - Device initialization routine
5087	* @pdev: PCI device information struct
5088	* @ent: entry in ice_pci_tbl
5089	*
5090	* Returns 0 on success, negative on failure
5091	*/
5092	static int
5093	ice_probe(struct pci_dev pdev, const* struct pci_device_id __always_unused *ent)
5094	{
5095	struct device *dev = &pdev->dev;
5096	struct ice_pf *pf;
5097	struct ice_hw *hw;
5098	int err;
5099
5100	if (pdev->is_virtfn) {
5101	dev_err(dev, "can't probe a virtual function\n");
5102	return -EINVAL;
5103	}
5104
5105	/ when under a kdump kernel initiate a reset before enabling the*
5106	* device in order to clear out any pending DMA transactions. These
5107	* transactions can cause some systems to machine check when doing
5108	* the pcim_enable_device() below.
5109	*/
5110	if (is_kdump_kernel()) {
5111	pci_save_state(dev: pdev);
5112	pci_clear_master(dev: pdev);
5113	err = pcie_flr(dev: pdev);
5114	if (err)
5115	return err;
5116	pci_restore_state(dev: pdev);
5117	}
5118
5119	/ this driver uses devres, see*
5120	* Documentation/driver-api/driver-model/devres.rst
5121	*/
5122	err = pcim_enable_device(pdev);
5123	if (err)
5124	return err;
5125
5126	err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), name: dev_driver_string(dev));
5127	if (err) {
5128	dev_err(dev, "BAR0 I/O map error %d\n", err);
5129	return err;
5130	}
5131
5132	pf = ice_allocate_pf(dev);
5133	if (!pf)
5134	return -ENOMEM;
5135
5136	/ initialize Auxiliary index to invalid value /
5137	pf->aux_idx = -`1`;
5138
5139	/ set up for high or low DMA /
5140	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(`64`));
5141	if (err) {
5142	dev_err(dev, "DMA configuration failed: 0x%x\n", err);
5143	return err;
5144	}
5145
5146	pci_set_master(dev: pdev);
5147
5148	pf->pdev = pdev;
5149	pci_set_drvdata(pdev, data: pf);
5150	set_bit(nr: ICE_DOWN, addr: pf->state);
5151	/ Disable service task until DOWN bit is cleared /
5152	set_bit(nr: ICE_SERVICE_DIS, addr: pf->state);
5153
5154	hw = &pf->hw;
5155	hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0];
5156	pci_save_state(dev: pdev);
5157
5158	hw->back = pf;
5159	hw->port_info = NULL;
5160	hw->vendor_id = pdev->vendor;
5161	hw->device_id = pdev->device;
5162	pci_read_config_byte(dev: pdev, PCI_REVISION_ID, val: &hw->revision_id);
5163	hw->subsystem_vendor_id = pdev->subsystem_vendor;
5164	hw->subsystem_device_id = pdev->subsystem_device;
5165	hw->bus.device = PCI_SLOT(pdev->devfn);
5166	hw->bus.func = PCI_FUNC(pdev->devfn);
5167	ice_set_ctrlq_len(hw);
5168
5169	pf->msg_enable = netif_msg_init(debug_value: debug, ICE_DFLT_NETIF_M);
5170
5171	#ifndef CONFIG_DYNAMIC_DEBUG
5172	if (debug < -`1`)
5173	hw->debug_mask = debug;
5174	#endif
5175
5176	err = ice_init(pf);
5177	if (err)
5178	goto err_init;
5179
5180	devl_lock(devlink: priv_to_devlink(priv: pf));
5181	err = ice_load(pf);
5182	devl_unlock(devlink: priv_to_devlink(priv: pf));
5183	if (err)
5184	goto err_load;
5185
5186	err = ice_init_devlink(pf);
5187	if (err)
5188	goto err_init_devlink;
5189
5190	return `0`;
5191
5192	err_init_devlink:
5193	devl_lock(devlink: priv_to_devlink(priv: pf));
5194	ice_unload(pf);
5195	devl_unlock(devlink: priv_to_devlink(priv: pf));
5196	err_load:
5197	ice_deinit(pf);
5198	err_init:
5199	pci_disable_device(dev: pdev);
5200	return err;
5201	}
5202
5203	/**
5204	* ice_set_wake - enable or disable Wake on LAN
5205	* @pf: pointer to the PF struct
5206	*
5207	* Simple helper for WoL control
5208	*/
5209	static void ice_set_wake(struct ice_pf *pf)
5210	{
5211	struct ice_hw *hw = &pf->hw;
5212	bool wol = pf->wol_ena;
5213
5214	/ clear wake state, otherwise new wake events won't fire /
5215	wr32(hw, PFPM_WUS, U32_MAX);
5216
5217	/ enable / disable APM wake up, no RMW needed /
5218	wr32(hw, PFPM_APM, wol ? PFPM_APM_APME_M : `0`);
5219
5220	/ set magic packet filter enabled /
5221	wr32(hw, PFPM_WUFC, wol ? PFPM_WUFC_MAG_M : `0`);
5222	}
5223
5224	/**
5225	* ice_setup_mc_magic_wake - setup device to wake on multicast magic packet
5226	* @pf: pointer to the PF struct
5227	*
5228	* Issue firmware command to enable multicast magic wake, making
5229	* sure that any locally administered address (LAA) is used for
5230	* wake, and that PF reset doesn't undo the LAA.
5231	*/
5232	static void ice_setup_mc_magic_wake(struct ice_pf *pf)
5233	{
5234	struct device *dev = ice_pf_to_dev(pf);
5235	struct ice_hw *hw = &pf->hw;
5236	u8 mac_addr[ETH_ALEN];
5237	struct ice_vsi *vsi;
5238	int status;
5239	u8 flags;
5240
5241	if (!pf->wol_ena)
5242	return;
5243
5244	vsi = ice_get_main_vsi(pf);
5245	if (!vsi)
5246	return;
5247
5248	/ Get current MAC address in case it's an LAA /
5249	if (vsi->netdev)
5250	ether_addr_copy(dst: mac_addr, src: vsi->netdev->dev_addr);
5251	else
5252	ether_addr_copy(dst: mac_addr, src: vsi->port_info->mac.perm_addr);
5253
5254	flags = ICE_AQC_MAN_MAC_WR_MC_MAG_EN \|
5255	ICE_AQC_MAN_MAC_UPDATE_LAA_WOL \|
5256	ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP;
5257
5258	status = ice_aq_manage_mac_write(hw, mac_addr, flags, NULL);
5259	if (status)
5260	dev_err(dev, "Failed to enable Multicast Magic Packet wake, err %d aq_err %s\n",
5261	status, ice_aq_str(hw->adminq.sq_last_status));
5262	}
5263
5264	/**
5265	* ice_remove - Device removal routine
5266	* @pdev: PCI device information struct
5267	*/
5268	static void ice_remove(struct pci_dev *pdev)
5269	{
5270	struct ice_pf *pf = pci_get_drvdata(pdev);
5271	int i;
5272
5273	for (i = `0`; i < ICE_MAX_RESET_WAIT; i++) {
5274	if (!ice_is_reset_in_progress(state: pf->state))
5275	break;
5276	msleep(msecs: `100`);
5277	}
5278
5279	if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) {
5280	set_bit(nr: ICE_VF_RESETS_DISABLED, addr: pf->state);
5281	ice_free_vfs(pf);
5282	}
5283
5284	ice_hwmon_exit(pf);
5285
5286	ice_service_task_stop(pf);
5287	ice_aq_cancel_waiting_tasks(pf);
5288	set_bit(nr: ICE_DOWN, addr: pf->state);
5289
5290	if (!ice_is_safe_mode(pf))
5291	ice_remove_arfs(pf);
5292
5293	ice_deinit_devlink(pf);
5294
5295	devl_lock(devlink: priv_to_devlink(priv: pf));
5296	ice_unload(pf);
5297	devl_unlock(devlink: priv_to_devlink(priv: pf));
5298
5299	ice_deinit(pf);
5300	ice_vsi_release_all(pf);
5301
5302	ice_setup_mc_magic_wake(pf);
5303	ice_set_wake(pf);
5304
5305	pci_disable_device(dev: pdev);
5306	}
5307
5308	/**
5309	* ice_shutdown - PCI callback for shutting down device
5310	* @pdev: PCI device information struct
5311	*/
5312	static void ice_shutdown(struct pci_dev *pdev)
5313	{
5314	struct ice_pf *pf = pci_get_drvdata(pdev);
5315
5316	ice_remove(pdev);
5317
5318	if (system_state == SYSTEM_POWER_OFF) {
5319	pci_wake_from_d3(dev: pdev, enable: pf->wol_ena);
5320	pci_set_power_state(dev: pdev, PCI_D3hot);
5321	}
5322	}
5323
5324	#ifdef CONFIG_PM
5325	/**
5326	* ice_prepare_for_shutdown - prep for PCI shutdown
5327	* @pf: board private structure
5328	*
5329	* Inform or close all dependent features in prep for PCI device shutdown
5330	*/
5331	static void ice_prepare_for_shutdown(struct ice_pf *pf)
5332	{
5333	struct ice_hw *hw = &pf->hw;
5334	u32 v;
5335
5336	/ Notify VFs of impending reset /
5337	if (ice_check_sq_alive(hw, cq: &hw->mailboxq))
5338	ice_vc_notify_reset(pf);
5339
5340	dev_dbg(ice_pf_to_dev(pf), "Tearing down internal switch for shutdown\n");
5341
5342	/ disable the VSIs and their queues that are not already DOWN /
5343	ice_pf_dis_all_vsi(pf, locked: false);
5344
5345	ice_for_each_vsi(pf, v)
5346	if (pf->vsi[v])
5347	pf->vsi[v]->vsi_num = `0`;
5348
5349	ice_shutdown_all_ctrlq(hw);
5350	}
5351
5352	/**
5353	* ice_reinit_interrupt_scheme - Reinitialize interrupt scheme
5354	* @pf: board private structure to reinitialize
5355	*
5356	* This routine reinitialize interrupt scheme that was cleared during
5357	* power management suspend callback.
5358	*
5359	* This should be called during resume routine to re-allocate the q_vectors
5360	* and reacquire interrupts.
5361	*/
5362	static int ice_reinit_interrupt_scheme(struct ice_pf *pf)
5363	{
5364	struct device *dev = ice_pf_to_dev(pf);
5365	int ret, v;
5366
5367	/ Since we clear MSIX flag during suspend, we need to*
5368	* set it back during resume...
5369	*/
5370
5371	ret = ice_init_interrupt_scheme(pf);
5372	if (ret) {
5373	dev_err(dev, "Failed to re-initialize interrupt %d\n", ret);
5374	return ret;
5375	}
5376
5377	/ Remap vectors and rings, after successful re-init interrupts /
5378	ice_for_each_vsi(pf, v) {
5379	if (!pf->vsi[v])
5380	continue;
5381
5382	ret = ice_vsi_alloc_q_vectors(vsi: pf->vsi[v]);
5383	if (ret)
5384	goto err_reinit;
5385	ice_vsi_map_rings_to_vectors(vsi: pf->vsi[v]);
5386	ice_vsi_set_napi_queues(vsi: pf->vsi[v]);
5387	}
5388
5389	ret = ice_req_irq_msix_misc(pf);
5390	if (ret) {
5391	dev_err(dev, "Setting up misc vector failed after device suspend %d\n",
5392	ret);
5393	goto err_reinit;
5394	}
5395
5396	return `0`;
5397
5398	err_reinit:
5399	while (v--)
5400	if (pf->vsi[v])
5401	ice_vsi_free_q_vectors(vsi: pf->vsi[v]);
5402
5403	return ret;
5404	}
5405
5406	/**
5407	* ice_suspend
5408	* @dev: generic device information structure
5409	*
5410	* Power Management callback to quiesce the device and prepare
5411	* for D3 transition.
5412	*/
5413	static int __maybe_unused ice_suspend(struct device *dev)
5414	{
5415	struct pci_dev *pdev = to_pci_dev(dev);
5416	struct ice_pf *pf;
5417	int disabled, v;
5418
5419	pf = pci_get_drvdata(pdev);
5420
5421	if (!ice_pf_state_is_nominal(pf)) {
5422	dev_err(dev, "Device is not ready, no need to suspend it\n");
5423	return -EBUSY;
5424	}
5425
5426	/ Stop watchdog tasks until resume completion.*
5427	* Even though it is most likely that the service task is
5428	* disabled if the device is suspended or down, the service task's
5429	* state is controlled by a different state bit, and we should
5430	* store and honor whatever state that bit is in at this point.
5431	*/
5432	disabled = ice_service_task_stop(pf);
5433
5434	ice_unplug_aux_dev(pf);
5435
5436	/ Already suspended?, then there is nothing to do /
5437	if (test_and_set_bit(nr: ICE_SUSPENDED, addr: pf->state)) {
5438	if (!disabled)
5439	ice_service_task_restart(pf);
5440	return `0`;
5441	}
5442
5443	if (test_bit(ICE_DOWN, pf->state) \|\|
5444	ice_is_reset_in_progress(state: pf->state)) {
5445	dev_err(dev, "can't suspend device in reset or already down\n");
5446	if (!disabled)
5447	ice_service_task_restart(pf);
5448	return `0`;
5449	}
5450
5451	ice_setup_mc_magic_wake(pf);
5452
5453	ice_prepare_for_shutdown(pf);
5454
5455	ice_set_wake(pf);
5456
5457	/ Free vectors, clear the interrupt scheme and release IRQs*
5458	* for proper hibernation, especially with large number of CPUs.
5459	* Otherwise hibernation might fail when mapping all the vectors back
5460	* to CPU0.
5461	*/
5462	ice_free_irq_msix_misc(pf);
5463	ice_for_each_vsi(pf, v) {
5464	if (!pf->vsi[v])
5465	continue;
5466	ice_vsi_free_q_vectors(vsi: pf->vsi[v]);
5467	}
5468	ice_clear_interrupt_scheme(pf);
5469
5470	pci_save_state(dev: pdev);
5471	pci_wake_from_d3(dev: pdev, enable: pf->wol_ena);
5472	pci_set_power_state(dev: pdev, PCI_D3hot);
5473	return `0`;
5474	}
5475
5476	/**
5477	* ice_resume - PM callback for waking up from D3
5478	* @dev: generic device information structure
5479	*/
5480	static int __maybe_unused ice_resume(struct device *dev)
5481	{
5482	struct pci_dev *pdev = to_pci_dev(dev);
5483	enum ice_reset_req reset_type;
5484	struct ice_pf *pf;
5485	struct ice_hw *hw;
5486	int ret;
5487
5488	pci_set_power_state(dev: pdev, PCI_D0);
5489	pci_restore_state(dev: pdev);
5490	pci_save_state(dev: pdev);
5491
5492	if (!pci_device_is_present(pdev))
5493	return -ENODEV;
5494
5495	ret = pci_enable_device_mem(dev: pdev);
5496	if (ret) {
5497	dev_err(dev, "Cannot enable device after suspend\n");
5498	return ret;
5499	}
5500
5501	pf = pci_get_drvdata(pdev);
5502	hw = &pf->hw;
5503
5504	pf->wakeup_reason = rd32(hw, PFPM_WUS);
5505	ice_print_wake_reason(pf);
5506
5507	/ We cleared the interrupt scheme when we suspended, so we need to*
5508	* restore it now to resume device functionality.
5509	*/
5510	ret = ice_reinit_interrupt_scheme(pf);
5511	if (ret)
5512	dev_err(dev, "Cannot restore interrupt scheme: %d\n", ret);
5513
5514	clear_bit(nr: ICE_DOWN, addr: pf->state);
5515	/ Now perform PF reset and rebuild /
5516	reset_type = ICE_RESET_PFR;
5517	/ re-enable service task for reset, but allow reset to schedule it /
5518	clear_bit(nr: ICE_SERVICE_DIS, addr: pf->state);
5519
5520	if (ice_schedule_reset(pf, reset: reset_type))
5521	dev_err(dev, "Reset during resume failed.\n");
5522
5523	clear_bit(nr: ICE_SUSPENDED, addr: pf->state);
5524	ice_service_task_restart(pf);
5525
5526	/ Restart the service task /
5527	mod_timer(timer: &pf->serv_tmr, expires: round_jiffies(j: jiffies + pf->serv_tmr_period));
5528
5529	return `0`;
5530	}
5531	#endif /* CONFIG_PM */
5532
5533	/**
5534	* ice_pci_err_detected - warning that PCI error has been detected
5535	* @pdev: PCI device information struct
5536	* @err: the type of PCI error
5537	*
5538	* Called to warn that something happened on the PCI bus and the error handling
5539	* is in progress. Allows the driver to gracefully prepare/handle PCI errors.
5540	*/
5541	static pci_ers_result_t
5542	ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err)
5543	{
5544	struct ice_pf *pf = pci_get_drvdata(pdev);
5545
5546	if (!pf) {
5547	dev_err(&pdev->dev, "%s: unrecoverable device error %d\n",
5548	__func__, err);
5549	return PCI_ERS_RESULT_DISCONNECT;
5550	}
5551
5552	if (!test_bit(ICE_SUSPENDED, pf->state)) {
5553	ice_service_task_stop(pf);
5554
5555	if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
5556	set_bit(nr: ICE_PFR_REQ, addr: pf->state);
5557	ice_prepare_for_reset(pf, reset_type: ICE_RESET_PFR);
5558	}
5559	}
5560
5561	return PCI_ERS_RESULT_NEED_RESET;
5562	}
5563
5564	/**
5565	* ice_pci_err_slot_reset - a PCI slot reset has just happened
5566	* @pdev: PCI device information struct
5567	*
5568	* Called to determine if the driver can recover from the PCI slot reset by
5569	* using a register read to determine if the device is recoverable.
5570	*/
5571	static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev)
5572	{
5573	struct ice_pf *pf = pci_get_drvdata(pdev);
5574	pci_ers_result_t result;
5575	int err;
5576	u32 reg;
5577
5578	err = pci_enable_device_mem(dev: pdev);
5579	if (err) {
5580	dev_err(&pdev->dev, "Cannot re-enable PCI device after reset, error %d\n",
5581	err);
5582	result = PCI_ERS_RESULT_DISCONNECT;
5583	} else {
5584	pci_set_master(dev: pdev);
5585	pci_restore_state(dev: pdev);
5586	pci_save_state(dev: pdev);
5587	pci_wake_from_d3(dev: pdev, enable: false);
5588
5589	/ Check for life /
5590	reg = rd32(&pf->hw, GLGEN_RTRIG);
5591	if (!reg)
5592	result = PCI_ERS_RESULT_RECOVERED;
5593	else
5594	result = PCI_ERS_RESULT_DISCONNECT;
5595	}
5596
5597	return result;
5598	}
5599
5600	/**
5601	* ice_pci_err_resume - restart operations after PCI error recovery
5602	* @pdev: PCI device information struct
5603	*
5604	* Called to allow the driver to bring things back up after PCI error and/or
5605	* reset recovery have finished
5606	*/
5607	static void ice_pci_err_resume(struct pci_dev *pdev)
5608	{
5609	struct ice_pf *pf = pci_get_drvdata(pdev);
5610
5611	if (!pf) {
5612	dev_err(&pdev->dev, "%s failed, device is unrecoverable\n",
5613	__func__);
5614	return;
5615	}
5616
5617	if (test_bit(ICE_SUSPENDED, pf->state)) {
5618	dev_dbg(&pdev->dev, "%s failed to resume normal operations!\n",
5619	__func__);
5620	return;
5621	}
5622
5623	ice_restore_all_vfs_msi_state(pf);
5624
5625	ice_do_reset(pf, reset_type: ICE_RESET_PFR);
5626	ice_service_task_restart(pf);
5627	mod_timer(timer: &pf->serv_tmr, expires: round_jiffies(j: jiffies + pf->serv_tmr_period));
5628	}
5629
5630	/**
5631	* ice_pci_err_reset_prepare - prepare device driver for PCI reset
5632	* @pdev: PCI device information struct
5633	*/
5634	static void ice_pci_err_reset_prepare(struct pci_dev *pdev)
5635	{
5636	struct ice_pf *pf = pci_get_drvdata(pdev);
5637
5638	if (!test_bit(ICE_SUSPENDED, pf->state)) {
5639	ice_service_task_stop(pf);
5640
5641	if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
5642	set_bit(nr: ICE_PFR_REQ, addr: pf->state);
5643	ice_prepare_for_reset(pf, reset_type: ICE_RESET_PFR);
5644	}
5645	}
5646	}
5647
5648	/**
5649	* ice_pci_err_reset_done - PCI reset done, device driver reset can begin
5650	* @pdev: PCI device information struct
5651	*/
5652	static void ice_pci_err_reset_done(struct pci_dev *pdev)
5653	{
5654	ice_pci_err_resume(pdev);
5655	}
5656
5657	/ ice_pci_tbl - PCI Device ID Table*
5658	*
5659	* Wildcard entries (PCI_ANY_ID) should come last
5660	* Last entry must be all 0s
5661	*
5662	* { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
5663	* Class, Class Mask, private data (not used) }
5664	*/
5665	static const struct pci_device_id ice_pci_tbl[] = {
5666	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE) },
5667	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP) },
5668	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP) },
5669	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_BACKPLANE) },
5670	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_QSFP) },
5671	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_SFP) },
5672	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_BACKPLANE) },
5673	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_QSFP) },
5674	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SFP) },
5675	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_10G_BASE_T) },
5676	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SGMII) },
5677	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_BACKPLANE) },
5678	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_QSFP) },
5679	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SFP) },
5680	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_10G_BASE_T) },
5681	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SGMII) },
5682	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_BACKPLANE) },
5683	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SFP) },
5684	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_10G_BASE_T) },
5685	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SGMII) },
5686	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_BACKPLANE) },
5687	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_SFP) },
5688	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_10G_BASE_T) },
5689	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_1GBE) },
5690	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_QSFP) },
5691	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822_SI_DFLT) },
5692	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_BACKPLANE), },
5693	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_QSFP), },
5694	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_SFP), },
5695	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_SGMII), },
5696	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_BACKPLANE) },
5697	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_QSFP56) },
5698	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_SFP) },
5699	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_SFP_DD) },
5700	/ required last entry /
5701	{}
5702	};
5703	MODULE_DEVICE_TABLE(pci, ice_pci_tbl);
5704
5705	static __maybe_unused SIMPLE_DEV_PM_OPS(ice_pm_ops, ice_suspend, ice_resume);
5706
5707	static const struct pci_error_handlers ice_pci_err_handler = {
5708	.error_detected = ice_pci_err_detected,
5709	.slot_reset = ice_pci_err_slot_reset,
5710	.reset_prepare = ice_pci_err_reset_prepare,
5711	.reset_done = ice_pci_err_reset_done,
5712	.resume = ice_pci_err_resume
5713	};
5714
5715	static struct pci_driver ice_driver = {
5716	.name = KBUILD_MODNAME,
5717	.id_table = ice_pci_tbl,
5718	.probe = ice_probe,
5719	.remove = ice_remove,
5720	#ifdef CONFIG_PM
5721	.driver.pm = &ice_pm_ops,
5722	#endif /* CONFIG_PM */
5723	.shutdown = ice_shutdown,
5724	.sriov_configure = ice_sriov_configure,
5725	.sriov_get_vf_total_msix = ice_sriov_get_vf_total_msix,
5726	.sriov_set_msix_vec_count = ice_sriov_set_msix_vec_count,
5727	.err_handler = &ice_pci_err_handler
5728	};
5729
5730	/**
5731	* ice_module_init - Driver registration routine
5732	*
5733	* ice_module_init is the first routine called when the driver is
5734	* loaded. All it does is register with the PCI subsystem.
5735	*/
5736	static int __init ice_module_init(void)
5737	{
5738	int status = -ENOMEM;
5739
5740	pr_info("%s\n", ice_driver_string);
5741	pr_info("%s\n", ice_copyright);
5742
5743	ice_adv_lnk_speed_maps_init();
5744
5745	ice_wq = alloc_workqueue(fmt: "%s", flags: `0`, max_active: `0`, KBUILD_MODNAME);
5746	if (!ice_wq) {
5747	pr_err("Failed to create workqueue\n");
5748	return status;
5749	}
5750
5751	ice_lag_wq = alloc_ordered_workqueue("ice_lag_wq", `0`);
5752	if (!ice_lag_wq) {
5753	pr_err("Failed to create LAG workqueue\n");
5754	goto err_dest_wq;
5755	}
5756
5757	ice_debugfs_init();
5758
5759	status = pci_register_driver(&ice_driver);
5760	if (status) {
5761	pr_err("failed to register PCI driver, err %d\n", status);
5762	goto err_dest_lag_wq;
5763	}
5764
5765	return `0`;
5766
5767	err_dest_lag_wq:
5768	destroy_workqueue(wq: ice_lag_wq);
5769	ice_debugfs_exit();
5770	err_dest_wq:
5771	destroy_workqueue(wq: ice_wq);
5772	return status;
5773	}
5774	module_init(ice_module_init);
5775
5776	/**
5777	* ice_module_exit - Driver exit cleanup routine
5778	*
5779	* ice_module_exit is called just before the driver is removed
5780	* from memory.
5781	*/
5782	static void __exit ice_module_exit(void)
5783	{
5784	pci_unregister_driver(dev: &ice_driver);
5785	ice_debugfs_exit();
5786	destroy_workqueue(wq: ice_wq);
5787	destroy_workqueue(wq: ice_lag_wq);
5788	pr_info("module unloaded\n");
5789	}
5790	module_exit(ice_module_exit);
5791
5792	/**
5793	* ice_set_mac_address - NDO callback to set MAC address
5794	* @netdev: network interface device structure
5795	* @pi: pointer to an address structure
5796	*
5797	* Returns 0 on success, negative on failure
5798	*/
5799	static int ice_set_mac_address(struct net_device netdev, void* *pi)
5800	{
5801	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
5802	struct ice_vsi *vsi = np->vsi;
5803	struct ice_pf *pf = vsi->back;
5804	struct ice_hw *hw = &pf->hw;
5805	struct sockaddr *addr = pi;
5806	u8 old_mac[ETH_ALEN];
5807	u8 flags = `0`;
5808	u8 *mac;
5809	int err;
5810
5811	mac = (u8 *)addr->sa_data;
5812
5813	if (!is_valid_ether_addr(addr: mac))
5814	return -EADDRNOTAVAIL;
5815
5816	if (test_bit(ICE_DOWN, pf->state) \|\|
5817	ice_is_reset_in_progress(state: pf->state)) {
5818	netdev_err(dev: netdev, format: "can't set mac %pM. device not ready\n",
5819	mac);
5820	return -EBUSY;
5821	}
5822
5823	if (ice_chnl_dmac_fltr_cnt(pf)) {
5824	netdev_err(dev: netdev, format: "can't set mac %pM. Device has tc-flower filters, delete all of them and try again\n",
5825	mac);
5826	return -EAGAIN;
5827	}
5828
5829	netif_addr_lock_bh(dev: netdev);
5830	ether_addr_copy(dst: old_mac, src: netdev->dev_addr);
5831	/ change the netdev's MAC address /
5832	eth_hw_addr_set(dev: netdev, addr: mac);
5833	netif_addr_unlock_bh(dev: netdev);
5834
5835	/ Clean up old MAC filter. Not an error if old filter doesn't exist /
5836	err = ice_fltr_remove_mac(vsi, mac: old_mac, action: ICE_FWD_TO_VSI);
5837	if (err && err != -ENOENT) {
5838	err = -EADDRNOTAVAIL;
5839	goto err_update_filters;
5840	}
5841
5842	/ Add filter for new MAC. If filter exists, return success /
5843	err = ice_fltr_add_mac(vsi, mac, action: ICE_FWD_TO_VSI);
5844	if (err == -EEXIST) {
5845	/ Although this MAC filter is already present in hardware it's*
5846	* possible in some cases (e.g. bonding) that dev_addr was
5847	* modified outside of the driver and needs to be restored back
5848	* to this value.
5849	*/
5850	netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac);
5851
5852	return `0`;
5853	} else if (err) {
5854	/ error if the new filter addition failed /
5855	err = -EADDRNOTAVAIL;
5856	}
5857
5858	err_update_filters:
5859	if (err) {
5860	netdev_err(dev: netdev, format: "can't set MAC %pM. filter update failed\n",
5861	mac);
5862	netif_addr_lock_bh(dev: netdev);
5863	eth_hw_addr_set(dev: netdev, addr: old_mac);
5864	netif_addr_unlock_bh(dev: netdev);
5865	return err;
5866	}
5867
5868	netdev_dbg(vsi->netdev, "updated MAC address to %pM\n",
5869	netdev->dev_addr);
5870
5871	/ write new MAC address to the firmware /
5872	flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL;
5873	err = ice_aq_manage_mac_write(hw, mac_addr: mac, flags, NULL);
5874	if (err) {
5875	netdev_err(dev: netdev, format: "can't set MAC %pM. write to firmware failed error %d\n",
5876	mac, err);
5877	}
5878	return `0`;
5879	}
5880
5881	/**
5882	* ice_set_rx_mode - NDO callback to set the netdev filters
5883	* @netdev: network interface device structure
5884	*/
5885	static void ice_set_rx_mode(struct net_device *netdev)
5886	{
5887	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
5888	struct ice_vsi *vsi = np->vsi;
5889
5890	if (!vsi \|\| ice_is_switchdev_running(pf: vsi->back))
5891	return;
5892
5893	/ Set the flags to synchronize filters*
5894	* ndo_set_rx_mode may be triggered even without a change in netdev
5895	* flags
5896	*/
5897	set_bit(nr: ICE_VSI_UMAC_FLTR_CHANGED, addr: vsi->state);
5898	set_bit(nr: ICE_VSI_MMAC_FLTR_CHANGED, addr: vsi->state);
5899	set_bit(nr: ICE_FLAG_FLTR_SYNC, addr: vsi->back->flags);
5900
5901	/ schedule our worker thread which will take care of*
5902	* applying the new filter changes
5903	*/
5904	ice_service_task_schedule(pf: vsi->back);
5905	}
5906
5907	/**
5908	* ice_set_tx_maxrate - NDO callback to set the maximum per-queue bitrate
5909	* @netdev: network interface device structure
5910	* @queue_index: Queue ID
5911	* @maxrate: maximum bandwidth in Mbps
5912	*/
5913	static int
5914	ice_set_tx_maxrate(struct net_device netdev, int* queue_index, u32 maxrate)
5915	{
5916	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
5917	struct ice_vsi *vsi = np->vsi;
5918	u16 q_handle;
5919	int status;
5920	u8 tc;
5921
5922	/ Validate maxrate requested is within permitted range /
5923	if (maxrate && (maxrate > (ICE_SCHED_MAX_BW / `1000`))) {
5924	netdev_err(dev: netdev, format: "Invalid max rate %d specified for the queue %d\n",
5925	maxrate, queue_index);
5926	return -EINVAL;
5927	}
5928
5929	q_handle = vsi->tx_rings[queue_index]->q_handle;
5930	tc = ice_dcb_get_tc(vsi, queue_index);
5931
5932	vsi = ice_locate_vsi_using_queue(vsi, queue: queue_index);
5933	if (!vsi) {
5934	netdev_err(dev: netdev, format: "Invalid VSI for given queue %d\n",
5935	queue_index);
5936	return -EINVAL;
5937	}
5938
5939	/ Set BW back to default, when user set maxrate to 0 /
5940	if (!maxrate)
5941	status = ice_cfg_q_bw_dflt_lmt(pi: vsi->port_info, vsi_handle: vsi->idx, tc,
5942	q_handle, rl_type: ICE_MAX_BW);
5943	else
5944	status = ice_cfg_q_bw_lmt(pi: vsi->port_info, vsi_handle: vsi->idx, tc,
5945	q_handle, rl_type: ICE_MAX_BW, bw: maxrate * `1000`);
5946	if (status)
5947	netdev_err(dev: netdev, format: "Unable to set Tx max rate, error %d\n",
5948	status);
5949
5950	return status;
5951	}
5952
5953	/**
5954	* ice_fdb_add - add an entry to the hardware database
5955	* @ndm: the input from the stack
5956	* @tb: pointer to array of nladdr (unused)
5957	* @dev: the net device pointer
5958	* @addr: the MAC address entry being added
5959	* @vid: VLAN ID
5960	* @flags: instructions from stack about fdb operation
5961	* @extack: netlink extended ack
5962	*/
5963	static int
5964	ice_fdb_add(struct ndmsg ndm, struct* nlattr __always_unused *tb[],
5965	struct net_device dev, const* unsigned char *addr, u16 vid,
5966	u16 flags, struct netlink_ext_ack __always_unused *extack)
5967	{
5968	int err;
5969
5970	if (vid) {
5971	netdev_err(dev, format: "VLANs aren't supported yet for dev_uc\|mc_add()\n");
5972	return -EINVAL;
5973	}
5974	if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
5975	netdev_err(dev, format: "FDB only supports static addresses\n");
5976	return -EINVAL;
5977	}
5978
5979	if (is_unicast_ether_addr(addr) \|\| is_link_local_ether_addr(addr))
5980	err = dev_uc_add_excl(dev, addr);
5981	else if (is_multicast_ether_addr(addr))
5982	err = dev_mc_add_excl(dev, addr);
5983	else
5984	err = -EINVAL;
5985
5986	/ Only return duplicate errors if NLM_F_EXCL is set /
5987	if (err == -EEXIST && !(flags & NLM_F_EXCL))
5988	err = `0`;
5989
5990	return err;
5991	}
5992
5993	/**
5994	* ice_fdb_del - delete an entry from the hardware database
5995	* @ndm: the input from the stack
5996	* @tb: pointer to array of nladdr (unused)
5997	* @dev: the net device pointer
5998	* @addr: the MAC address entry being added
5999	* @vid: VLAN ID
6000	* @extack: netlink extended ack
6001	*/
6002	static int
6003	ice_fdb_del(struct ndmsg ndm, __always_unused struct* nlattr *tb[],
6004	struct net_device dev, const* unsigned char *addr,
6005	__always_unused u16 vid, struct netlink_ext_ack *extack)
6006	{
6007	int err;
6008
6009	if (ndm->ndm_state & NUD_PERMANENT) {
6010	netdev_err(dev, format: "FDB only supports static addresses\n");
6011	return -EINVAL;
6012	}
6013
6014	if (is_unicast_ether_addr(addr))
6015	err = dev_uc_del(dev, addr);
6016	else if (is_multicast_ether_addr(addr))
6017	err = dev_mc_del(dev, addr);
6018	else
6019	err = -EINVAL;
6020
6021	return err;
6022	}
6023
6024	#define NETIF_VLAN_OFFLOAD_FEATURES (NETIF_F_HW_VLAN_CTAG_RX \| \
6025	NETIF_F_HW_VLAN_CTAG_TX \| \
6026	NETIF_F_HW_VLAN_STAG_RX \| \
6027	NETIF_F_HW_VLAN_STAG_TX)
6028
6029	#define NETIF_VLAN_STRIPPING_FEATURES (NETIF_F_HW_VLAN_CTAG_RX \| \
6030	NETIF_F_HW_VLAN_STAG_RX)
6031
6032	#define NETIF_VLAN_FILTERING_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER \| \
6033	NETIF_F_HW_VLAN_STAG_FILTER)
6034
6035	/**
6036	* ice_fix_features - fix the netdev features flags based on device limitations
6037	* @netdev: ptr to the netdev that flags are being fixed on
6038	* @features: features that need to be checked and possibly fixed
6039	*
6040	* Make sure any fixups are made to features in this callback. This enables the
6041	* driver to not have to check unsupported configurations throughout the driver
6042	* because that's the responsiblity of this callback.
6043	*
6044	* Single VLAN Mode (SVM) Supported Features:
6045	* NETIF_F_HW_VLAN_CTAG_FILTER
6046	* NETIF_F_HW_VLAN_CTAG_RX
6047	* NETIF_F_HW_VLAN_CTAG_TX
6048	*
6049	* Double VLAN Mode (DVM) Supported Features:
6050	* NETIF_F_HW_VLAN_CTAG_FILTER
6051	* NETIF_F_HW_VLAN_CTAG_RX
6052	* NETIF_F_HW_VLAN_CTAG_TX
6053	*
6054	* NETIF_F_HW_VLAN_STAG_FILTER
6055	* NETIF_HW_VLAN_STAG_RX
6056	* NETIF_HW_VLAN_STAG_TX
6057	*
6058	* Features that need fixing:
6059	* Cannot simultaneously enable CTAG and STAG stripping and/or insertion.
6060	* These are mutually exlusive as the VSI context cannot support multiple
6061	* VLAN ethertypes simultaneously for stripping and/or insertion. If this
6062	* is not done, then default to clearing the requested STAG offload
6063	* settings.
6064	*
6065	* All supported filtering has to be enabled or disabled together. For
6066	* example, in DVM, CTAG and STAG filtering have to be enabled and disabled
6067	* together. If this is not done, then default to VLAN filtering disabled.
6068	* These are mutually exclusive as there is currently no way to
6069	* enable/disable VLAN filtering based on VLAN ethertype when using VLAN
6070	* prune rules.
6071	*/
6072	static netdev_features_t
6073	ice_fix_features(struct net_device *netdev, netdev_features_t features)
6074	{
6075	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
6076	netdev_features_t req_vlan_fltr, cur_vlan_fltr;
6077	bool cur_ctag, cur_stag, req_ctag, req_stag;
6078
6079	cur_vlan_fltr = netdev->features & NETIF_VLAN_FILTERING_FEATURES;
6080	cur_ctag = cur_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
6081	cur_stag = cur_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
6082
6083	req_vlan_fltr = features & NETIF_VLAN_FILTERING_FEATURES;
6084	req_ctag = req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
6085	req_stag = req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
6086
6087	if (req_vlan_fltr != cur_vlan_fltr) {
6088	if (ice_is_dvm_ena(hw: &np->vsi->back->hw)) {
6089	if (req_ctag && req_stag) {
6090	features \|= NETIF_VLAN_FILTERING_FEATURES;
6091	} else if (!req_ctag && !req_stag) {
6092	features &= ~NETIF_VLAN_FILTERING_FEATURES;
6093	} else if ((!cur_ctag && req_ctag && !cur_stag) \|\|
6094	(!cur_stag && req_stag && !cur_ctag)) {
6095	features \|= NETIF_VLAN_FILTERING_FEATURES;
6096	netdev_warn(dev: netdev, format: "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been enabled for both types.\n");
6097	} else if ((cur_ctag && !req_ctag && cur_stag) \|\|
6098	(cur_stag && !req_stag && cur_ctag)) {
6099	features &= ~NETIF_VLAN_FILTERING_FEATURES;
6100	netdev_warn(dev: netdev, format: "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been disabled for both types.\n");
6101	}
6102	} else {
6103	if (req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER)
6104	netdev_warn(dev: netdev, format: "cannot support requested 802.1ad filtering setting in SVM mode\n");
6105
6106	if (req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER)
6107	features \|= NETIF_F_HW_VLAN_CTAG_FILTER;
6108	}
6109	}
6110
6111	if ((features & (NETIF_F_HW_VLAN_CTAG_RX \| NETIF_F_HW_VLAN_CTAG_TX)) &&
6112	(features & (NETIF_F_HW_VLAN_STAG_RX \| NETIF_F_HW_VLAN_STAG_TX))) {
6113	netdev_warn(dev: netdev, format: "cannot support CTAG and STAG VLAN stripping and/or insertion simultaneously since CTAG and STAG offloads are mutually exclusive, clearing STAG offload settings\n");
6114	features &= ~(NETIF_F_HW_VLAN_STAG_RX \|
6115	NETIF_F_HW_VLAN_STAG_TX);
6116	}
6117
6118	if (!(netdev->features & NETIF_F_RXFCS) &&
6119	(features & NETIF_F_RXFCS) &&
6120	(features & NETIF_VLAN_STRIPPING_FEATURES) &&
6121	!ice_vsi_has_non_zero_vlans(vsi: np->vsi)) {
6122	netdev_warn(dev: netdev, format: "Disabling VLAN stripping as FCS/CRC stripping is also disabled and there is no VLAN configured\n");
6123	features &= ~NETIF_VLAN_STRIPPING_FEATURES;
6124	}
6125
6126	return features;
6127	}
6128
6129	/**
6130	* ice_set_rx_rings_vlan_proto - update rings with new stripped VLAN proto
6131	* @vsi: PF's VSI
6132	* @vlan_ethertype: VLAN ethertype (802.1Q or 802.1ad) in network byte order
6133	*
6134	* Store current stripped VLAN proto in ring packet context,
6135	* so it can be accessed more efficiently by packet processing code.
6136	*/
6137	static void
6138	ice_set_rx_rings_vlan_proto(struct ice_vsi *vsi, __be16 vlan_ethertype)
6139	{
6140	u16 i;
6141
6142	ice_for_each_alloc_rxq(vsi, i)
6143	vsi->rx_rings[i]->pkt_ctx.vlan_proto = vlan_ethertype;
6144	}
6145
6146	/**
6147	* ice_set_vlan_offload_features - set VLAN offload features for the PF VSI
6148	* @vsi: PF's VSI
6149	* @features: features used to determine VLAN offload settings
6150	*
6151	* First, determine the vlan_ethertype based on the VLAN offload bits in
6152	* features. Then determine if stripping and insertion should be enabled or
6153	* disabled. Finally enable or disable VLAN stripping and insertion.
6154	*/
6155	static int
6156	ice_set_vlan_offload_features(struct ice_vsi *vsi, netdev_features_t features)
6157	{
6158	bool enable_stripping = true, enable_insertion = true;
6159	struct ice_vsi_vlan_ops *vlan_ops;
6160	int strip_err = `0`, insert_err = `0`;
6161	u16 vlan_ethertype = `0`;
6162
6163	vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
6164
6165	if (features & (NETIF_F_HW_VLAN_STAG_RX \| NETIF_F_HW_VLAN_STAG_TX))
6166	vlan_ethertype = ETH_P_8021AD;
6167	else if (features & (NETIF_F_HW_VLAN_CTAG_RX \| NETIF_F_HW_VLAN_CTAG_TX))
6168	vlan_ethertype = ETH_P_8021Q;
6169
6170	if (!(features & (NETIF_F_HW_VLAN_STAG_RX \| NETIF_F_HW_VLAN_CTAG_RX)))
6171	enable_stripping = false;
6172	if (!(features & (NETIF_F_HW_VLAN_STAG_TX \| NETIF_F_HW_VLAN_CTAG_TX)))
6173	enable_insertion = false;
6174
6175	if (enable_stripping)
6176	strip_err = vlan_ops->ena_stripping(vsi, vlan_ethertype);
6177	else
6178	strip_err = vlan_ops->dis_stripping(vsi);
6179
6180	if (enable_insertion)
6181	insert_err = vlan_ops->ena_insertion(vsi, vlan_ethertype);
6182	else
6183	insert_err = vlan_ops->dis_insertion(vsi);
6184
6185	if (strip_err \|\| insert_err)
6186	return -EIO;
6187
6188	ice_set_rx_rings_vlan_proto(vsi, vlan_ethertype: enable_stripping ?
6189	htons(vlan_ethertype) : `0`);
6190
6191	return `0`;
6192	}
6193
6194	/**
6195	* ice_set_vlan_filtering_features - set VLAN filtering features for the PF VSI
6196	* @vsi: PF's VSI
6197	* @features: features used to determine VLAN filtering settings
6198	*
6199	* Enable or disable Rx VLAN filtering based on the VLAN filtering bits in the
6200	* features.
6201	*/
6202	static int
6203	ice_set_vlan_filtering_features(struct ice_vsi *vsi, netdev_features_t features)
6204	{
6205	struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
6206	int err = `0`;
6207
6208	/ support Single VLAN Mode (SVM) and Double VLAN Mode (DVM) by checking*
6209	* if either bit is set
6210	*/
6211	if (features &
6212	(NETIF_F_HW_VLAN_CTAG_FILTER \| NETIF_F_HW_VLAN_STAG_FILTER))
6213	err = vlan_ops->ena_rx_filtering(vsi);
6214	else
6215	err = vlan_ops->dis_rx_filtering(vsi);
6216
6217	return err;
6218	}
6219
6220	/**
6221	* ice_set_vlan_features - set VLAN settings based on suggested feature set
6222	* @netdev: ptr to the netdev being adjusted
6223	* @features: the feature set that the stack is suggesting
6224	*
6225	* Only update VLAN settings if the requested_vlan_features are different than
6226	* the current_vlan_features.
6227	*/
6228	static int
6229	ice_set_vlan_features(struct net_device *netdev, netdev_features_t features)
6230	{
6231	netdev_features_t current_vlan_features, requested_vlan_features;
6232	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
6233	struct ice_vsi *vsi = np->vsi;
6234	int err;
6235
6236	current_vlan_features = netdev->features & NETIF_VLAN_OFFLOAD_FEATURES;
6237	requested_vlan_features = features & NETIF_VLAN_OFFLOAD_FEATURES;
6238	if (current_vlan_features ^ requested_vlan_features) {
6239	if ((features & NETIF_F_RXFCS) &&
6240	(features & NETIF_VLAN_STRIPPING_FEATURES)) {
6241	dev_err(ice_pf_to_dev(vsi->back),
6242	"To enable VLAN stripping, you must first enable FCS/CRC stripping\n");
6243	return -EIO;
6244	}
6245
6246	err = ice_set_vlan_offload_features(vsi, features);
6247	if (err)
6248	return err;
6249	}
6250
6251	current_vlan_features = netdev->features &
6252	NETIF_VLAN_FILTERING_FEATURES;
6253	requested_vlan_features = features & NETIF_VLAN_FILTERING_FEATURES;
6254	if (current_vlan_features ^ requested_vlan_features) {
6255	err = ice_set_vlan_filtering_features(vsi, features);
6256	if (err)
6257	return err;
6258	}
6259
6260	return `0`;
6261	}
6262
6263	/**
6264	* ice_set_loopback - turn on/off loopback mode on underlying PF
6265	* @vsi: ptr to VSI
6266	* @ena: flag to indicate the on/off setting
6267	*/
6268	static int ice_set_loopback(struct ice_vsi *vsi, bool ena)
6269	{
6270	bool if_running = netif_running(dev: vsi->netdev);
6271	int ret;
6272
6273	if (if_running && !test_and_set_bit(nr: ICE_VSI_DOWN, addr: vsi->state)) {
6274	ret = ice_down(vsi);
6275	if (ret) {
6276	netdev_err(dev: vsi->netdev, format: "Preparing device to toggle loopback failed\n");
6277	return ret;
6278	}
6279	}
6280	ret = ice_aq_set_mac_loopback(hw: &vsi->back->hw, ena_lpbk: ena, NULL);
6281	if (ret)
6282	netdev_err(dev: vsi->netdev, format: "Failed to toggle loopback state\n");
6283	if (if_running)
6284	ret = ice_up(vsi);
6285
6286	return ret;
6287	}
6288
6289	/**
6290	* ice_set_features - set the netdev feature flags
6291	* @netdev: ptr to the netdev being adjusted
6292	* @features: the feature set that the stack is suggesting
6293	*/
6294	static int
6295	ice_set_features(struct net_device *netdev, netdev_features_t features)
6296	{
6297	netdev_features_t changed = netdev->features ^ features;
6298	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
6299	struct ice_vsi *vsi = np->vsi;
6300	struct ice_pf *pf = vsi->back;
6301	int ret = `0`;
6302
6303	/ Don't set any netdev advanced features with device in Safe Mode /
6304	if (ice_is_safe_mode(pf)) {
6305	dev_err(ice_pf_to_dev(pf),
6306	"Device is in Safe Mode - not enabling advanced netdev features\n");
6307	return ret;
6308	}
6309
6310	/ Do not change setting during reset /
6311	if (ice_is_reset_in_progress(state: pf->state)) {
6312	dev_err(ice_pf_to_dev(pf),
6313	"Device is resetting, changing advanced netdev features temporarily unavailable.\n");
6314	return -EBUSY;
6315	}
6316
6317	/ Multiple features can be changed in one call so keep features in*
6318	* separate if/else statements to guarantee each feature is checked
6319	*/
6320	if (changed & NETIF_F_RXHASH)
6321	ice_vsi_manage_rss_lut(vsi, ena: !!(features & NETIF_F_RXHASH));
6322
6323	ret = ice_set_vlan_features(netdev, features);
6324	if (ret)
6325	return ret;
6326
6327	/ Turn on receive of FCS aka CRC, and after setting this*
6328	* flag the packet data will have the 4 byte CRC appended
6329	*/
6330	if (changed & NETIF_F_RXFCS) {
6331	if ((features & NETIF_F_RXFCS) &&
6332	(features & NETIF_VLAN_STRIPPING_FEATURES)) {
6333	dev_err(ice_pf_to_dev(vsi->back),
6334	"To disable FCS/CRC stripping, you must first disable VLAN stripping\n");
6335	return -EIO;
6336	}
6337
6338	ice_vsi_cfg_crc_strip(vsi, disable: !!(features & NETIF_F_RXFCS));
6339	ret = ice_down_up(vsi);
6340	if (ret)
6341	return ret;
6342	}
6343
6344	if (changed & NETIF_F_NTUPLE) {
6345	bool ena = !!(features & NETIF_F_NTUPLE);
6346
6347	ice_vsi_manage_fdir(vsi, ena);
6348	ena ? ice_init_arfs(vsi) : ice_clear_arfs(vsi);
6349	}
6350
6351	/ don't turn off hw_tc_offload when ADQ is already enabled /
6352	if (!(features & NETIF_F_HW_TC) && ice_is_adq_active(pf)) {
6353	dev_err(ice_pf_to_dev(pf), "ADQ is active, can't turn hw_tc_offload off\n");
6354	return -EACCES;
6355	}
6356
6357	if (changed & NETIF_F_HW_TC) {
6358	bool ena = !!(features & NETIF_F_HW_TC);
6359
6360	ena ? set_bit(nr: ICE_FLAG_CLS_FLOWER, addr: pf->flags) :
6361	clear_bit(nr: ICE_FLAG_CLS_FLOWER, addr: pf->flags);
6362	}
6363
6364	if (changed & NETIF_F_LOOPBACK)
6365	ret = ice_set_loopback(vsi, ena: !!(features & NETIF_F_LOOPBACK));
6366
6367	return ret;
6368	}
6369
6370	/**
6371	* ice_vsi_vlan_setup - Setup VLAN offload properties on a PF VSI
6372	* @vsi: VSI to setup VLAN properties for
6373	*/
6374	static int ice_vsi_vlan_setup(struct ice_vsi *vsi)
6375	{
6376	int err;
6377
6378	err = ice_set_vlan_offload_features(vsi, features: vsi->netdev->features);
6379	if (err)
6380	return err;
6381
6382	err = ice_set_vlan_filtering_features(vsi, features: vsi->netdev->features);
6383	if (err)
6384	return err;
6385
6386	return ice_vsi_add_vlan_zero(vsi);
6387	}
6388
6389	/**
6390	* ice_vsi_cfg_lan - Setup the VSI lan related config
6391	* @vsi: the VSI being configured
6392	*
6393	* Return 0 on success and negative value on error
6394	*/
6395	int ice_vsi_cfg_lan(struct ice_vsi *vsi)
6396	{
6397	int err;
6398
6399	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
6400	ice_set_rx_mode(netdev: vsi->netdev);
6401
6402	err = ice_vsi_vlan_setup(vsi);
6403	if (err)
6404	return err;
6405	}
6406	ice_vsi_cfg_dcb_rings(vsi);
6407
6408	err = ice_vsi_cfg_lan_txqs(vsi);
6409	if (!err && ice_is_xdp_ena_vsi(vsi))
6410	err = ice_vsi_cfg_xdp_txqs(vsi);
6411	if (!err)
6412	err = ice_vsi_cfg_rxqs(vsi);
6413
6414	return err;
6415	}
6416
6417	/ THEORY OF MODERATION:*
6418	* The ice driver hardware works differently than the hardware that DIMLIB was
6419	* originally made for. ice hardware doesn't have packet count limits that
6420	* can trigger an interrupt, but it does have interrupt rate limit support,
6421	* which is hard-coded to a limit of 250,000 ints/second.
6422	* If not using dynamic moderation, the INTRL value can be modified
6423	* by ethtool rx-usecs-high.
6424	*/
6425	struct ice_dim {
6426	/ the throttle rate for interrupts, basically worst case delay before*
6427	* an initial interrupt fires, value is stored in microseconds.
6428	*/
6429	u16 itr;
6430	};
6431
6432	/ Make a different profile for Rx that doesn't allow quite so aggressive*
6433	* moderation at the high end (it maxes out at 126us or about 8k interrupts a
6434	* second.
6435	*/
6436	static const struct ice_dim rx_profile[] = {
6437	{`2`}, / 500,000 ints/s, capped at 250K by INTRL /
6438	{`8`}, / 125,000 ints/s /
6439	{`16`}, / 62,500 ints/s /
6440	{`62`}, / 16,129 ints/s /
6441	{`126`} / 7,936 ints/s /
6442	};
6443
6444	/ The transmit profile, which has the same sorts of values*
6445	* as the previous struct
6446	*/
6447	static const struct ice_dim tx_profile[] = {
6448	{`2`}, / 500,000 ints/s, capped at 250K by INTRL /
6449	{`8`}, / 125,000 ints/s /
6450	{`40`}, / 16,125 ints/s /
6451	{`128`}, / 7,812 ints/s /
6452	{`256`} / 3,906 ints/s /
6453	};
6454
6455	static void ice_tx_dim_work(struct work_struct *work)
6456	{
6457	struct ice_ring_container *rc;
6458	struct dim *dim;
6459	u16 itr;
6460
6461	dim = container_of(work, struct dim, work);
6462	rc = dim->priv;
6463
6464	WARN_ON(dim->profile_ix >= ARRAY_SIZE(tx_profile));
6465
6466	/ look up the values in our local table /
6467	itr = tx_profile[dim->profile_ix].itr;
6468
6469	ice_trace(tx_dim_work, container_of(rc, struct ice_q_vector, tx), dim);
6470	ice_write_itr(rc, itr);
6471
6472	dim->state = DIM_START_MEASURE;
6473	}
6474
6475	static void ice_rx_dim_work(struct work_struct *work)
6476	{
6477	struct ice_ring_container *rc;
6478	struct dim *dim;
6479	u16 itr;
6480
6481	dim = container_of(work, struct dim, work);
6482	rc = dim->priv;
6483
6484	WARN_ON(dim->profile_ix >= ARRAY_SIZE(rx_profile));
6485
6486	/ look up the values in our local table /
6487	itr = rx_profile[dim->profile_ix].itr;
6488
6489	ice_trace(rx_dim_work, container_of(rc, struct ice_q_vector, rx), dim);
6490	ice_write_itr(rc, itr);
6491
6492	dim->state = DIM_START_MEASURE;
6493	}
6494
6495	#define ICE_DIM_DEFAULT_PROFILE_IX 1
6496
6497	/**
6498	* ice_init_moderation - set up interrupt moderation
6499	* @q_vector: the vector containing rings to be configured
6500	*
6501	* Set up interrupt moderation registers, with the intent to do the right thing
6502	* when called from reset or from probe, and whether or not dynamic moderation
6503	* is enabled or not. Take special care to write all the registers in both
6504	* dynamic moderation mode or not in order to make sure hardware is in a known
6505	* state.
6506	*/
6507	static void ice_init_moderation(struct ice_q_vector *q_vector)
6508	{
6509	struct ice_ring_container *rc;
6510	bool tx_dynamic, rx_dynamic;
6511
6512	rc = &q_vector->tx;
6513	INIT_WORK(&rc->dim.work, ice_tx_dim_work);
6514	rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
6515	rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX;
6516	rc->dim.priv = rc;
6517	tx_dynamic = ITR_IS_DYNAMIC(rc);
6518
6519	/ set the initial TX ITR to match the above /
6520	ice_write_itr(rc, itr: tx_dynamic ?
6521	tx_profile[rc->dim.profile_ix].itr : rc->itr_setting);
6522
6523	rc = &q_vector->rx;
6524	INIT_WORK(&rc->dim.work, ice_rx_dim_work);
6525	rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
6526	rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX;
6527	rc->dim.priv = rc;
6528	rx_dynamic = ITR_IS_DYNAMIC(rc);
6529
6530	/ set the initial RX ITR to match the above /
6531	ice_write_itr(rc, itr: rx_dynamic ? rx_profile[rc->dim.profile_ix].itr :
6532	rc->itr_setting);
6533
6534	ice_set_q_vector_intrl(q_vector);
6535	}
6536
6537	/**
6538	* ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI
6539	* @vsi: the VSI being configured
6540	*/
6541	static void ice_napi_enable_all(struct ice_vsi *vsi)
6542	{
6543	int q_idx;
6544
6545	if (!vsi->netdev)
6546	return;
6547
6548	ice_for_each_q_vector(vsi, q_idx) {
6549	struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
6550
6551	ice_init_moderation(q_vector);
6552
6553	if (q_vector->rx.rx_ring \|\| q_vector->tx.tx_ring)
6554	napi_enable(n: &q_vector->napi);
6555	}
6556	}
6557
6558	/**
6559	* ice_up_complete - Finish the last steps of bringing up a connection
6560	* @vsi: The VSI being configured
6561	*
6562	* Return 0 on success and negative value on error
6563	*/
6564	static int ice_up_complete(struct ice_vsi *vsi)
6565	{
6566	struct ice_pf *pf = vsi->back;
6567	int err;
6568
6569	ice_vsi_cfg_msix(vsi);
6570
6571	/ Enable only Rx rings, Tx rings were enabled by the FW when the*
6572	* Tx queue group list was configured and the context bits were
6573	* programmed using ice_vsi_cfg_txqs
6574	*/
6575	err = ice_vsi_start_all_rx_rings(vsi);
6576	if (err)
6577	return err;
6578
6579	clear_bit(nr: ICE_VSI_DOWN, addr: vsi->state);
6580	ice_napi_enable_all(vsi);
6581	ice_vsi_ena_irq(vsi);
6582
6583	if (vsi->port_info &&
6584	(vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) &&
6585	vsi->netdev && vsi->type == ICE_VSI_PF) {
6586	ice_print_link_msg(vsi, isup: true);
6587	netif_tx_start_all_queues(dev: vsi->netdev);
6588	netif_carrier_on(dev: vsi->netdev);
6589	ice_ptp_link_change(pf, port: pf->hw.pf_id, linkup: true);
6590	}
6591
6592	/ Perform an initial read of the statistics registers now to*
6593	* set the baseline so counters are ready when interface is up
6594	*/
6595	ice_update_eth_stats(vsi);
6596
6597	if (vsi->type == ICE_VSI_PF)
6598	ice_service_task_schedule(pf);
6599
6600	return `0`;
6601	}
6602
6603	/**
6604	* ice_up - Bring the connection back up after being down
6605	* @vsi: VSI being configured
6606	*/
6607	int ice_up(struct ice_vsi *vsi)
6608	{
6609	int err;
6610
6611	err = ice_vsi_cfg_lan(vsi);
6612	if (!err)
6613	err = ice_up_complete(vsi);
6614
6615	return err;
6616	}
6617
6618	/**
6619	* ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring
6620	* @syncp: pointer to u64_stats_sync
6621	* @stats: stats that pkts and bytes count will be taken from
6622	* @pkts: packets stats counter
6623	* @bytes: bytes stats counter
6624	*
6625	* This function fetches stats from the ring considering the atomic operations
6626	* that needs to be performed to read u64 values in 32 bit machine.
6627	*/
6628	void
6629	ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp,
6630	struct ice_q_stats stats, u64 pkts, u64 bytes)
6631	{
6632	unsigned int start;
6633
6634	do {
6635	start = u64_stats_fetch_begin(syncp);
6636	*pkts = stats.pkts;
6637	*bytes = stats.bytes;
6638	} while (u64_stats_fetch_retry(syncp, start));
6639	}
6640
6641	/**
6642	* ice_update_vsi_tx_ring_stats - Update VSI Tx ring stats counters
6643	* @vsi: the VSI to be updated
6644	* @vsi_stats: the stats struct to be updated
6645	* @rings: rings to work on
6646	* @count: number of rings
6647	*/
6648	static void
6649	ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi,
6650	struct rtnl_link_stats64 *vsi_stats,
6651	struct ice_tx_ring **rings, u16 count)
6652	{
6653	u16 i;
6654
6655	for (i = `0`; i < count; i++) {
6656	struct ice_tx_ring *ring;
6657	u64 pkts = `0`, bytes = `0`;
6658
6659	ring = READ_ONCE(rings[i]);
6660	if (!ring \|\| !ring->ring_stats)
6661	continue;
6662	ice_fetch_u64_stats_per_ring(syncp: &ring->ring_stats->syncp,
6663	stats: ring->ring_stats->stats, pkts: &pkts,
6664	bytes: &bytes);
6665	vsi_stats->tx_packets += pkts;
6666	vsi_stats->tx_bytes += bytes;
6667	vsi->tx_restart += ring->ring_stats->tx_stats.restart_q;
6668	vsi->tx_busy += ring->ring_stats->tx_stats.tx_busy;
6669	vsi->tx_linearize += ring->ring_stats->tx_stats.tx_linearize;
6670	}
6671	}
6672
6673	/**
6674	* ice_update_vsi_ring_stats - Update VSI stats counters
6675	* @vsi: the VSI to be updated
6676	*/
6677	static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
6678	{
6679	struct rtnl_link_stats64 net_stats, stats_prev;
6680	struct rtnl_link_stats64 *vsi_stats;
6681	struct ice_pf *pf = vsi->back;
6682	u64 pkts, bytes;
6683	int i;
6684
6685	vsi_stats = kzalloc(size: sizeof(*vsi_stats), GFP_ATOMIC);
6686	if (!vsi_stats)
6687	return;
6688
6689	/ reset non-netdev (extended) stats /
6690	vsi->tx_restart = `0`;
6691	vsi->tx_busy = `0`;
6692	vsi->tx_linearize = `0`;
6693	vsi->rx_buf_failed = `0`;
6694	vsi->rx_page_failed = `0`;
6695
6696	rcu_read_lock();
6697
6698	/ update Tx rings counters /
6699	ice_update_vsi_tx_ring_stats(vsi, vsi_stats, rings: vsi->tx_rings,
6700	count: vsi->num_txq);
6701
6702	/ update Rx rings counters /
6703	ice_for_each_rxq(vsi, i) {
6704	struct ice_rx_ring *ring = READ_ONCE(vsi->rx_rings[i]);
6705	struct ice_ring_stats *ring_stats;
6706
6707	ring_stats = ring->ring_stats;
6708	ice_fetch_u64_stats_per_ring(syncp: &ring_stats->syncp,
6709	stats: ring_stats->stats, pkts: &pkts,
6710	bytes: &bytes);
6711	vsi_stats->rx_packets += pkts;
6712	vsi_stats->rx_bytes += bytes;
6713	vsi->rx_buf_failed += ring_stats->rx_stats.alloc_buf_failed;
6714	vsi->rx_page_failed += ring_stats->rx_stats.alloc_page_failed;
6715	}
6716
6717	/ update XDP Tx rings counters /
6718	if (ice_is_xdp_ena_vsi(vsi))
6719	ice_update_vsi_tx_ring_stats(vsi, vsi_stats, rings: vsi->xdp_rings,
6720	count: vsi->num_xdp_txq);
6721
6722	rcu_read_unlock();
6723
6724	net_stats = &vsi->net_stats;
6725	stats_prev = &vsi->net_stats_prev;
6726
6727	/ Update netdev counters, but keep in mind that values could start at*
6728	* random value after PF reset. And as we increase the reported stat by
6729	* diff of Prev-Cur, we need to be sure that Prev is valid. If it's not,
6730	* let's skip this round.
6731	*/
6732	if (likely(pf->stat_prev_loaded)) {
6733	net_stats->tx_packets += vsi_stats->tx_packets - stats_prev->tx_packets;
6734	net_stats->tx_bytes += vsi_stats->tx_bytes - stats_prev->tx_bytes;
6735	net_stats->rx_packets += vsi_stats->rx_packets - stats_prev->rx_packets;
6736	net_stats->rx_bytes += vsi_stats->rx_bytes - stats_prev->rx_bytes;
6737	}
6738
6739	stats_prev->tx_packets = vsi_stats->tx_packets;
6740	stats_prev->tx_bytes = vsi_stats->tx_bytes;
6741	stats_prev->rx_packets = vsi_stats->rx_packets;
6742	stats_prev->rx_bytes = vsi_stats->rx_bytes;
6743
6744	kfree(objp: vsi_stats);
6745	}
6746
6747	/**
6748	* ice_update_vsi_stats - Update VSI stats counters
6749	* @vsi: the VSI to be updated
6750	*/
6751	void ice_update_vsi_stats(struct ice_vsi *vsi)
6752	{
6753	struct rtnl_link_stats64 *cur_ns = &vsi->net_stats;
6754	struct ice_eth_stats *cur_es = &vsi->eth_stats;
6755	struct ice_pf *pf = vsi->back;
6756
6757	if (test_bit(ICE_VSI_DOWN, vsi->state) \|\|
6758	test_bit(ICE_CFG_BUSY, pf->state))
6759	return;
6760
6761	/ get stats as recorded by Tx/Rx rings /
6762	ice_update_vsi_ring_stats(vsi);
6763
6764	/ get VSI stats as recorded by the hardware /
6765	ice_update_eth_stats(vsi);
6766
6767	cur_ns->tx_errors = cur_es->tx_errors;
6768	cur_ns->rx_dropped = cur_es->rx_discards;
6769	cur_ns->tx_dropped = cur_es->tx_discards;
6770	cur_ns->multicast = cur_es->rx_multicast;
6771
6772	/ update some more netdev stats if this is main VSI /
6773	if (vsi->type == ICE_VSI_PF) {
6774	cur_ns->rx_crc_errors = pf->stats.crc_errors;
6775	cur_ns->rx_errors = pf->stats.crc_errors +
6776	pf->stats.illegal_bytes +
6777	pf->stats.rx_undersize +
6778	pf->hw_csum_rx_error +
6779	pf->stats.rx_jabber +
6780	pf->stats.rx_fragments +
6781	pf->stats.rx_oversize;
6782	/ record drops from the port level /
6783	cur_ns->rx_missed_errors = pf->stats.eth.rx_discards;
6784	}
6785	}
6786
6787	/**
6788	* ice_update_pf_stats - Update PF port stats counters
6789	* @pf: PF whose stats needs to be updated
6790	*/
6791	void ice_update_pf_stats(struct ice_pf *pf)
6792	{
6793	struct ice_hw_port_stats prev_ps, cur_ps;
6794	struct ice_hw *hw = &pf->hw;
6795	u16 fd_ctr_base;
6796	u8 port;
6797
6798	port = hw->port_info->lport;
6799	prev_ps = &pf->stats_prev;
6800	cur_ps = &pf->stats;
6801
6802	if (ice_is_reset_in_progress(state: pf->state))
6803	pf->stat_prev_loaded = false;
6804
6805	ice_stat_update40(hw, GLPRT_GORCL(port), prev_stat_loaded: pf->stat_prev_loaded,
6806	prev_stat: &prev_ps->eth.rx_bytes,
6807	cur_stat: &cur_ps->eth.rx_bytes);
6808
6809	ice_stat_update40(hw, GLPRT_UPRCL(port), prev_stat_loaded: pf->stat_prev_loaded,
6810	prev_stat: &prev_ps->eth.rx_unicast,
6811	cur_stat: &cur_ps->eth.rx_unicast);
6812
6813	ice_stat_update40(hw, GLPRT_MPRCL(port), prev_stat_loaded: pf->stat_prev_loaded,
6814	prev_stat: &prev_ps->eth.rx_multicast,
6815	cur_stat: &cur_ps->eth.rx_multicast);
6816
6817	ice_stat_update40(hw, GLPRT_BPRCL(port), prev_stat_loaded: pf->stat_prev_loaded,
6818	prev_stat: &prev_ps->eth.rx_broadcast,
6819	cur_stat: &cur_ps->eth.rx_broadcast);
6820
6821	ice_stat_update32(hw, PRTRPB_RDPC, prev_stat_loaded: pf->stat_prev_loaded,
6822	prev_stat: &prev_ps->eth.rx_discards,
6823	cur_stat: &cur_ps->eth.rx_discards);
6824
6825	ice_stat_update40(hw, GLPRT_GOTCL(port), prev_stat_loaded: pf->stat_prev_loaded,
6826	prev_stat: &prev_ps->eth.tx_bytes,
6827	cur_stat: &cur_ps->eth.tx_bytes);
6828
6829	ice_stat_update40(hw, GLPRT_UPTCL(port), prev_stat_loaded: pf->stat_prev_loaded,
6830	prev_stat: &prev_ps->eth.tx_unicast,
6831	cur_stat: &cur_ps->eth.tx_unicast);
6832
6833	ice_stat_update40(hw, GLPRT_MPTCL(port), prev_stat_loaded: pf->stat_prev_loaded,
6834	prev_stat: &prev_ps->eth.tx_multicast,
6835	cur_stat: &cur_ps->eth.tx_multicast);
6836
6837	ice_stat_update40(hw, GLPRT_BPTCL(port), prev_stat_loaded: pf->stat_prev_loaded,
6838	prev_stat: &prev_ps->eth.tx_broadcast,
6839	cur_stat: &cur_ps->eth.tx_broadcast);
6840
6841	ice_stat_update32(hw, GLPRT_TDOLD(port), prev_stat_loaded: pf->stat_prev_loaded,
6842	prev_stat: &prev_ps->tx_dropped_link_down,
6843	cur_stat: &cur_ps->tx_dropped_link_down);
6844
6845	ice_stat_update40(hw, GLPRT_PRC64L(port), prev_stat_loaded: pf->stat_prev_loaded,
6846	prev_stat: &prev_ps->rx_size_64, cur_stat: &cur_ps->rx_size_64);
6847
6848	ice_stat_update40(hw, GLPRT_PRC127L(port), prev_stat_loaded: pf->stat_prev_loaded,
6849	prev_stat: &prev_ps->rx_size_127, cur_stat: &cur_ps->rx_size_127);
6850
6851	ice_stat_update40(hw, GLPRT_PRC255L(port), prev_stat_loaded: pf->stat_prev_loaded,
6852	prev_stat: &prev_ps->rx_size_255, cur_stat: &cur_ps->rx_size_255);
6853
6854	ice_stat_update40(hw, GLPRT_PRC511L(port), prev_stat_loaded: pf->stat_prev_loaded,
6855	prev_stat: &prev_ps->rx_size_511, cur_stat: &cur_ps->rx_size_511);
6856
6857	ice_stat_update40(hw, GLPRT_PRC1023L(port), prev_stat_loaded: pf->stat_prev_loaded,
6858	prev_stat: &prev_ps->rx_size_1023, cur_stat: &cur_ps->rx_size_1023);
6859
6860	ice_stat_update40(hw, GLPRT_PRC1522L(port), prev_stat_loaded: pf->stat_prev_loaded,
6861	prev_stat: &prev_ps->rx_size_1522, cur_stat: &cur_ps->rx_size_1522);
6862
6863	ice_stat_update40(hw, GLPRT_PRC9522L(port), prev_stat_loaded: pf->stat_prev_loaded,
6864	prev_stat: &prev_ps->rx_size_big, cur_stat: &cur_ps->rx_size_big);
6865
6866	ice_stat_update40(hw, GLPRT_PTC64L(port), prev_stat_loaded: pf->stat_prev_loaded,
6867	prev_stat: &prev_ps->tx_size_64, cur_stat: &cur_ps->tx_size_64);
6868
6869	ice_stat_update40(hw, GLPRT_PTC127L(port), prev_stat_loaded: pf->stat_prev_loaded,
6870	prev_stat: &prev_ps->tx_size_127, cur_stat: &cur_ps->tx_size_127);
6871
6872	ice_stat_update40(hw, GLPRT_PTC255L(port), prev_stat_loaded: pf->stat_prev_loaded,
6873	prev_stat: &prev_ps->tx_size_255, cur_stat: &cur_ps->tx_size_255);
6874
6875	ice_stat_update40(hw, GLPRT_PTC511L(port), prev_stat_loaded: pf->stat_prev_loaded,
6876	prev_stat: &prev_ps->tx_size_511, cur_stat: &cur_ps->tx_size_511);
6877
6878	ice_stat_update40(hw, GLPRT_PTC1023L(port), prev_stat_loaded: pf->stat_prev_loaded,
6879	prev_stat: &prev_ps->tx_size_1023, cur_stat: &cur_ps->tx_size_1023);
6880
6881	ice_stat_update40(hw, GLPRT_PTC1522L(port), prev_stat_loaded: pf->stat_prev_loaded,
6882	prev_stat: &prev_ps->tx_size_1522, cur_stat: &cur_ps->tx_size_1522);
6883
6884	ice_stat_update40(hw, GLPRT_PTC9522L(port), prev_stat_loaded: pf->stat_prev_loaded,
6885	prev_stat: &prev_ps->tx_size_big, cur_stat: &cur_ps->tx_size_big);
6886
6887	fd_ctr_base = hw->fd_ctr_base;
6888
6889	ice_stat_update40(hw,
6890	GLSTAT_FD_CNT0L(ICE_FD_SB_STAT_IDX(fd_ctr_base)),
6891	prev_stat_loaded: pf->stat_prev_loaded, prev_stat: &prev_ps->fd_sb_match,
6892	cur_stat: &cur_ps->fd_sb_match);
6893	ice_stat_update32(hw, GLPRT_LXONRXC(port), prev_stat_loaded: pf->stat_prev_loaded,
6894	prev_stat: &prev_ps->link_xon_rx, cur_stat: &cur_ps->link_xon_rx);
6895
6896	ice_stat_update32(hw, GLPRT_LXOFFRXC(port), prev_stat_loaded: pf->stat_prev_loaded,
6897	prev_stat: &prev_ps->link_xoff_rx, cur_stat: &cur_ps->link_xoff_rx);
6898
6899	ice_stat_update32(hw, GLPRT_LXONTXC(port), prev_stat_loaded: pf->stat_prev_loaded,
6900	prev_stat: &prev_ps->link_xon_tx, cur_stat: &cur_ps->link_xon_tx);
6901
6902	ice_stat_update32(hw, GLPRT_LXOFFTXC(port), prev_stat_loaded: pf->stat_prev_loaded,
6903	prev_stat: &prev_ps->link_xoff_tx, cur_stat: &cur_ps->link_xoff_tx);
6904
6905	ice_update_dcb_stats(pf);
6906
6907	ice_stat_update32(hw, GLPRT_CRCERRS(port), prev_stat_loaded: pf->stat_prev_loaded,
6908	prev_stat: &prev_ps->crc_errors, cur_stat: &cur_ps->crc_errors);
6909
6910	ice_stat_update32(hw, GLPRT_ILLERRC(port), prev_stat_loaded: pf->stat_prev_loaded,
6911	prev_stat: &prev_ps->illegal_bytes, cur_stat: &cur_ps->illegal_bytes);
6912
6913	ice_stat_update32(hw, GLPRT_MLFC(port), prev_stat_loaded: pf->stat_prev_loaded,
6914	prev_stat: &prev_ps->mac_local_faults,
6915	cur_stat: &cur_ps->mac_local_faults);
6916
6917	ice_stat_update32(hw, GLPRT_MRFC(port), prev_stat_loaded: pf->stat_prev_loaded,
6918	prev_stat: &prev_ps->mac_remote_faults,
6919	cur_stat: &cur_ps->mac_remote_faults);
6920
6921	ice_stat_update32(hw, GLPRT_RUC(port), prev_stat_loaded: pf->stat_prev_loaded,
6922	prev_stat: &prev_ps->rx_undersize, cur_stat: &cur_ps->rx_undersize);
6923
6924	ice_stat_update32(hw, GLPRT_RFC(port), prev_stat_loaded: pf->stat_prev_loaded,
6925	prev_stat: &prev_ps->rx_fragments, cur_stat: &cur_ps->rx_fragments);
6926
6927	ice_stat_update32(hw, GLPRT_ROC(port), prev_stat_loaded: pf->stat_prev_loaded,
6928	prev_stat: &prev_ps->rx_oversize, cur_stat: &cur_ps->rx_oversize);
6929
6930	ice_stat_update32(hw, GLPRT_RJC(port), prev_stat_loaded: pf->stat_prev_loaded,
6931	prev_stat: &prev_ps->rx_jabber, cur_stat: &cur_ps->rx_jabber);
6932
6933	cur_ps->fd_sb_status = test_bit(ICE_FLAG_FD_ENA, pf->flags) ? `1` : `0`;
6934
6935	pf->stat_prev_loaded = true;
6936	}
6937
6938	/**
6939	* ice_get_stats64 - get statistics for network device structure
6940	* @netdev: network interface device structure
6941	* @stats: main device statistics structure
6942	*/
6943	static
6944	void ice_get_stats64(struct net_device netdev, struct* rtnl_link_stats64 *stats)
6945	{
6946	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
6947	struct rtnl_link_stats64 *vsi_stats;
6948	struct ice_vsi *vsi = np->vsi;
6949
6950	vsi_stats = &vsi->net_stats;
6951
6952	if (!vsi->num_txq \|\| !vsi->num_rxq)
6953	return;
6954
6955	/ netdev packet/byte stats come from ring counter. These are obtained*
6956	* by summing up ring counters (done by ice_update_vsi_ring_stats).
6957	* But, only call the update routine and read the registers if VSI is
6958	* not down.
6959	*/
6960	if (!test_bit(ICE_VSI_DOWN, vsi->state))
6961	ice_update_vsi_ring_stats(vsi);
6962	stats->tx_packets = vsi_stats->tx_packets;
6963	stats->tx_bytes = vsi_stats->tx_bytes;
6964	stats->rx_packets = vsi_stats->rx_packets;
6965	stats->rx_bytes = vsi_stats->rx_bytes;
6966
6967	/ The rest of the stats can be read from the hardware but instead we*
6968	* just return values that the watchdog task has already obtained from
6969	* the hardware.
6970	*/
6971	stats->multicast = vsi_stats->multicast;
6972	stats->tx_errors = vsi_stats->tx_errors;
6973	stats->tx_dropped = vsi_stats->tx_dropped;
6974	stats->rx_errors = vsi_stats->rx_errors;
6975	stats->rx_dropped = vsi_stats->rx_dropped;
6976	stats->rx_crc_errors = vsi_stats->rx_crc_errors;
6977	stats->rx_length_errors = vsi_stats->rx_length_errors;
6978	}
6979
6980	/**
6981	* ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI
6982	* @vsi: VSI having NAPI disabled
6983	*/
6984	static void ice_napi_disable_all(struct ice_vsi *vsi)
6985	{
6986	int q_idx;
6987
6988	if (!vsi->netdev)
6989	return;
6990
6991	ice_for_each_q_vector(vsi, q_idx) {
6992	struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
6993
6994	if (q_vector->rx.rx_ring \|\| q_vector->tx.tx_ring)
6995	napi_disable(n: &q_vector->napi);
6996
6997	cancel_work_sync(work: &q_vector->tx.dim.work);
6998	cancel_work_sync(work: &q_vector->rx.dim.work);
6999	}
7000	}
7001
7002	/**
7003	* ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI
7004	* @vsi: the VSI being un-configured
7005	*/
7006	static void ice_vsi_dis_irq(struct ice_vsi *vsi)
7007	{
7008	struct ice_pf *pf = vsi->back;
7009	struct ice_hw *hw = &pf->hw;
7010	u32 val;
7011	int i;
7012
7013	/ disable interrupt causation from each Rx queue; Tx queues are*
7014	* handled in ice_vsi_stop_tx_ring()
7015	*/
7016	if (vsi->rx_rings) {
7017	ice_for_each_rxq(vsi, i) {
7018	if (vsi->rx_rings[i]) {
7019	u16 reg;
7020
7021	reg = vsi->rx_rings[i]->reg_idx;
7022	val = rd32(hw, QINT_RQCTL(reg));
7023	val &= ~QINT_RQCTL_CAUSE_ENA_M;
7024	wr32(hw, QINT_RQCTL(reg), val);
7025	}
7026	}
7027	}
7028
7029	/ disable each interrupt /
7030	ice_for_each_q_vector(vsi, i) {
7031	if (!vsi->q_vectors[i])
7032	continue;
7033	wr32(hw, GLINT_DYN_CTL(vsi->q_vectors[i]->reg_idx), `0`);
7034	}
7035
7036	ice_flush(hw);
7037
7038	/ don't call synchronize_irq() for VF's from the host /
7039	if (vsi->type == ICE_VSI_VF)
7040	return;
7041
7042	ice_for_each_q_vector(vsi, i)
7043	synchronize_irq(irq: vsi->q_vectors[i]->irq.virq);
7044	}
7045
7046	/**
7047	* ice_down - Shutdown the connection
7048	* @vsi: The VSI being stopped
7049	*
7050	* Caller of this function is expected to set the vsi->state ICE_DOWN bit
7051	*/
7052	int ice_down(struct ice_vsi *vsi)
7053	{
7054	int i, tx_err, rx_err, vlan_err = `0`;
7055
7056	WARN_ON(!test_bit(ICE_VSI_DOWN, vsi->state));
7057
7058	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
7059	vlan_err = ice_vsi_del_vlan_zero(vsi);
7060	ice_ptp_link_change(pf: vsi->back, port: vsi->back->hw.pf_id, linkup: false);
7061	netif_carrier_off(dev: vsi->netdev);
7062	netif_tx_disable(dev: vsi->netdev);
7063	} else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) {
7064	ice_eswitch_stop_all_tx_queues(pf: vsi->back);
7065	}
7066
7067	ice_vsi_dis_irq(vsi);
7068
7069	tx_err = ice_vsi_stop_lan_tx_rings(vsi, rst_src: ICE_NO_RESET, rel_vmvf_num: `0`);
7070	if (tx_err)
7071	netdev_err(dev: vsi->netdev, format: "Failed stop Tx rings, VSI %d error %d\n",
7072	vsi->vsi_num, tx_err);
7073	if (!tx_err && ice_is_xdp_ena_vsi(vsi)) {
7074	tx_err = ice_vsi_stop_xdp_tx_rings(vsi);
7075	if (tx_err)
7076	netdev_err(dev: vsi->netdev, format: "Failed stop XDP rings, VSI %d error %d\n",
7077	vsi->vsi_num, tx_err);
7078	}
7079
7080	rx_err = ice_vsi_stop_all_rx_rings(vsi);
7081	if (rx_err)
7082	netdev_err(dev: vsi->netdev, format: "Failed stop Rx rings, VSI %d error %d\n",
7083	vsi->vsi_num, rx_err);
7084
7085	ice_napi_disable_all(vsi);
7086
7087	ice_for_each_txq(vsi, i)
7088	ice_clean_tx_ring(tx_ring: vsi->tx_rings[i]);
7089
7090	if (ice_is_xdp_ena_vsi(vsi))
7091	ice_for_each_xdp_txq(vsi, i)
7092	ice_clean_tx_ring(tx_ring: vsi->xdp_rings[i]);
7093
7094	ice_for_each_rxq(vsi, i)
7095	ice_clean_rx_ring(rx_ring: vsi->rx_rings[i]);
7096
7097	if (tx_err \|\| rx_err \|\| vlan_err) {
7098	netdev_err(dev: vsi->netdev, format: "Failed to close VSI 0x%04X on switch 0x%04X\n",
7099	vsi->vsi_num, vsi->vsw->sw_id);
7100	return -EIO;
7101	}
7102
7103	return `0`;
7104	}
7105
7106	/**
7107	* ice_down_up - shutdown the VSI connection and bring it up
7108	* @vsi: the VSI to be reconnected
7109	*/
7110	int ice_down_up(struct ice_vsi *vsi)
7111	{
7112	int ret;
7113
7114	/ if DOWN already set, nothing to do /
7115	if (test_and_set_bit(nr: ICE_VSI_DOWN, addr: vsi->state))
7116	return `0`;
7117
7118	ret = ice_down(vsi);
7119	if (ret)
7120	return ret;
7121
7122	ret = ice_up(vsi);
7123	if (ret) {
7124	netdev_err(dev: vsi->netdev, format: "reallocating resources failed during netdev features change, may need to reload driver\n");
7125	return ret;
7126	}
7127
7128	return `0`;
7129	}
7130
7131	/**
7132	* ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources
7133	* @vsi: VSI having resources allocated
7134	*
7135	* Return 0 on success, negative on failure
7136	*/
7137	int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
7138	{
7139	int i, err = `0`;
7140
7141	if (!vsi->num_txq) {
7142	dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Tx queues\n",
7143	vsi->vsi_num);
7144	return -EINVAL;
7145	}
7146
7147	ice_for_each_txq(vsi, i) {
7148	struct ice_tx_ring *ring = vsi->tx_rings[i];
7149
7150	if (!ring)
7151	return -EINVAL;
7152
7153	if (vsi->netdev)
7154	ring->netdev = vsi->netdev;
7155	err = ice_setup_tx_ring(tx_ring: ring);
7156	if (err)
7157	break;
7158	}
7159
7160	return err;
7161	}
7162
7163	/**
7164	* ice_vsi_setup_rx_rings - Allocate VSI Rx queue resources
7165	* @vsi: VSI having resources allocated
7166	*
7167	* Return 0 on success, negative on failure
7168	*/
7169	int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
7170	{
7171	int i, err = `0`;
7172
7173	if (!vsi->num_rxq) {
7174	dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Rx queues\n",
7175	vsi->vsi_num);
7176	return -EINVAL;
7177	}
7178
7179	ice_for_each_rxq(vsi, i) {
7180	struct ice_rx_ring *ring = vsi->rx_rings[i];
7181
7182	if (!ring)
7183	return -EINVAL;
7184
7185	if (vsi->netdev)
7186	ring->netdev = vsi->netdev;
7187	err = ice_setup_rx_ring(rx_ring: ring);
7188	if (err)
7189	break;
7190	}
7191
7192	return err;
7193	}
7194
7195	/**
7196	* ice_vsi_open_ctrl - open control VSI for use
7197	* @vsi: the VSI to open
7198	*
7199	* Initialization of the Control VSI
7200	*
7201	* Returns 0 on success, negative value on error
7202	*/
7203	int ice_vsi_open_ctrl(struct ice_vsi *vsi)
7204	{
7205	char int_name[ICE_INT_NAME_STR_LEN];
7206	struct ice_pf *pf = vsi->back;
7207	struct device *dev;
7208	int err;
7209
7210	dev = ice_pf_to_dev(pf);
7211	/ allocate descriptors /
7212	err = ice_vsi_setup_tx_rings(vsi);
7213	if (err)
7214	goto err_setup_tx;
7215
7216	err = ice_vsi_setup_rx_rings(vsi);
7217	if (err)
7218	goto err_setup_rx;
7219
7220	err = ice_vsi_cfg_lan(vsi);
7221	if (err)
7222	goto err_setup_rx;
7223
7224	snprintf(buf: int_name, size: sizeof(int_name) - `1`, fmt: "%s-%s:ctrl",
7225	dev_driver_string(dev), dev_name(dev));
7226	err = ice_vsi_req_irq_msix(vsi, basename: int_name);
7227	if (err)
7228	goto err_setup_rx;
7229
7230	ice_vsi_cfg_msix(vsi);
7231
7232	err = ice_vsi_start_all_rx_rings(vsi);
7233	if (err)
7234	goto err_up_complete;
7235
7236	clear_bit(nr: ICE_VSI_DOWN, addr: vsi->state);
7237	ice_vsi_ena_irq(vsi);
7238
7239	return `0`;
7240
7241	err_up_complete:
7242	ice_down(vsi);
7243	err_setup_rx:
7244	ice_vsi_free_rx_rings(vsi);
7245	err_setup_tx:
7246	ice_vsi_free_tx_rings(vsi);
7247
7248	return err;
7249	}
7250
7251	/**
7252	* ice_vsi_open - Called when a network interface is made active
7253	* @vsi: the VSI to open
7254	*
7255	* Initialization of the VSI
7256	*
7257	* Returns 0 on success, negative value on error
7258	*/
7259	int ice_vsi_open(struct ice_vsi *vsi)
7260	{
7261	char int_name[ICE_INT_NAME_STR_LEN];
7262	struct ice_pf *pf = vsi->back;
7263	int err;
7264
7265	/ allocate descriptors /
7266	err = ice_vsi_setup_tx_rings(vsi);
7267	if (err)
7268	goto err_setup_tx;
7269
7270	err = ice_vsi_setup_rx_rings(vsi);
7271	if (err)
7272	goto err_setup_rx;
7273
7274	err = ice_vsi_cfg_lan(vsi);
7275	if (err)
7276	goto err_setup_rx;
7277
7278	snprintf(buf: int_name, size: sizeof(int_name) - `1`, fmt: "%s-%s",
7279	dev_driver_string(ice_pf_to_dev(pf)), vsi->netdev->name);
7280	err = ice_vsi_req_irq_msix(vsi, basename: int_name);
7281	if (err)
7282	goto err_setup_rx;
7283
7284	ice_vsi_cfg_netdev_tc(vsi, ena_tc: vsi->tc_cfg.ena_tc);
7285
7286	if (vsi->type == ICE_VSI_PF) {
7287	/ Notify the stack of the actual queue counts. /
7288	err = netif_set_real_num_tx_queues(dev: vsi->netdev, txq: vsi->num_txq);
7289	if (err)
7290	goto err_set_qs;
7291
7292	err = netif_set_real_num_rx_queues(dev: vsi->netdev, rxq: vsi->num_rxq);
7293	if (err)
7294	goto err_set_qs;
7295	}
7296
7297	err = ice_up_complete(vsi);
7298	if (err)
7299	goto err_up_complete;
7300
7301	return `0`;
7302
7303	err_up_complete:
7304	ice_down(vsi);
7305	err_set_qs:
7306	ice_vsi_free_irq(vsi);
7307	err_setup_rx:
7308	ice_vsi_free_rx_rings(vsi);
7309	err_setup_tx:
7310	ice_vsi_free_tx_rings(vsi);
7311
7312	return err;
7313	}
7314
7315	/**
7316	* ice_vsi_release_all - Delete all VSIs
7317	* @pf: PF from which all VSIs are being removed
7318	*/
7319	static void ice_vsi_release_all(struct ice_pf *pf)
7320	{
7321	int err, i;
7322
7323	if (!pf->vsi)
7324	return;
7325
7326	ice_for_each_vsi(pf, i) {
7327	if (!pf->vsi[i])
7328	continue;
7329
7330	if (pf->vsi[i]->type == ICE_VSI_CHNL)
7331	continue;
7332
7333	err = ice_vsi_release(vsi: pf->vsi[i]);
7334	if (err)
7335	dev_dbg(ice_pf_to_dev(pf), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n",
7336	i, err, pf->vsi[i]->vsi_num);
7337	}
7338	}
7339
7340	/**
7341	* ice_vsi_rebuild_by_type - Rebuild VSI of a given type
7342	* @pf: pointer to the PF instance
7343	* @type: VSI type to rebuild
7344	*
7345	* Iterates through the pf->vsi array and rebuilds VSIs of the requested type
7346	*/
7347	static int ice_vsi_rebuild_by_type(struct ice_pf pf, enum* ice_vsi_type type)
7348	{
7349	struct device *dev = ice_pf_to_dev(pf);
7350	int i, err;
7351
7352	ice_for_each_vsi(pf, i) {
7353	struct ice_vsi *vsi = pf->vsi[i];
7354
7355	if (!vsi \|\| vsi->type != type)
7356	continue;
7357
7358	/ rebuild the VSI /
7359	err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_INIT);
7360	if (err) {
7361	dev_err(dev, "rebuild VSI failed, err %d, VSI index %d, type %s\n",
7362	err, vsi->idx, ice_vsi_type_str(type));
7363	return err;
7364	}
7365
7366	/ replay filters for the VSI /
7367	err = ice_replay_vsi(hw: &pf->hw, vsi_handle: vsi->idx);
7368	if (err) {
7369	dev_err(dev, "replay VSI failed, error %d, VSI index %d, type %s\n",
7370	err, vsi->idx, ice_vsi_type_str(type));
7371	return err;
7372	}
7373
7374	/ Re-map HW VSI number, using VSI handle that has been*
7375	* previously validated in ice_replay_vsi() call above
7376	*/
7377	vsi->vsi_num = ice_get_hw_vsi_num(hw: &pf->hw, vsi_handle: vsi->idx);
7378
7379	/ enable the VSI /
7380	err = ice_ena_vsi(vsi, locked: false);
7381	if (err) {
7382	dev_err(dev, "enable VSI failed, err %d, VSI index %d, type %s\n",
7383	err, vsi->idx, ice_vsi_type_str(type));
7384	return err;
7385	}
7386
7387	dev_info(dev, "VSI rebuilt. VSI index %d, type %s\n", vsi->idx,
7388	ice_vsi_type_str(type));
7389	}
7390
7391	return `0`;
7392	}
7393
7394	/**
7395	* ice_update_pf_netdev_link - Update PF netdev link status
7396	* @pf: pointer to the PF instance
7397	*/
7398	static void ice_update_pf_netdev_link(struct ice_pf *pf)
7399	{
7400	bool link_up;
7401	int i;
7402
7403	ice_for_each_vsi(pf, i) {
7404	struct ice_vsi *vsi = pf->vsi[i];
7405
7406	if (!vsi \|\| vsi->type != ICE_VSI_PF)
7407	return;
7408
7409	ice_get_link_status(pi: pf->vsi[i]->port_info, link_up: &link_up);
7410	if (link_up) {
7411	netif_carrier_on(dev: pf->vsi[i]->netdev);
7412	netif_tx_wake_all_queues(dev: pf->vsi[i]->netdev);
7413	} else {
7414	netif_carrier_off(dev: pf->vsi[i]->netdev);
7415	netif_tx_stop_all_queues(dev: pf->vsi[i]->netdev);
7416	}
7417	}
7418	}
7419
7420	/**
7421	* ice_rebuild - rebuild after reset
7422	* @pf: PF to rebuild
7423	* @reset_type: type of reset
7424	*
7425	* Do not rebuild VF VSI in this flow because that is already handled via
7426	* ice_reset_all_vfs(). This is because requirements for resetting a VF after a
7427	* PFR/CORER/GLOBER/etc. are different than the normal flow. Also, we don't want
7428	* to reset/rebuild all the VF VSI twice.
7429	*/
7430	static void ice_rebuild(struct ice_pf pf, enum* ice_reset_req reset_type)
7431	{
7432	struct device *dev = ice_pf_to_dev(pf);
7433	struct ice_hw *hw = &pf->hw;
7434	bool dvm;
7435	int err;
7436
7437	if (test_bit(ICE_DOWN, pf->state))
7438	goto clear_recovery;
7439
7440	dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type);
7441
7442	#define ICE_EMP_RESET_SLEEP_MS 5000
7443	if (reset_type == ICE_RESET_EMPR) {
7444	/ If an EMP reset has occurred, any previously pending flash*
7445	* update will have completed. We no longer know whether or
7446	* not the NVM update EMP reset is restricted.
7447	*/
7448	pf->fw_emp_reset_disabled = false;
7449
7450	msleep(ICE_EMP_RESET_SLEEP_MS);
7451	}
7452
7453	err = ice_init_all_ctrlq(hw);
7454	if (err) {
7455	dev_err(dev, "control queues init failed %d\n", err);
7456	goto err_init_ctrlq;
7457	}
7458
7459	/ if DDP was previously loaded successfully /
7460	if (!ice_is_safe_mode(pf)) {
7461	/ reload the SW DB of filter tables /
7462	if (reset_type == ICE_RESET_PFR)
7463	ice_fill_blk_tbls(hw);
7464	else
7465	/ Reload DDP Package after CORER/GLOBR reset /
7466	ice_load_pkg(NULL, pf);
7467	}
7468
7469	err = ice_clear_pf_cfg(hw);
7470	if (err) {
7471	dev_err(dev, "clear PF configuration failed %d\n", err);
7472	goto err_init_ctrlq;
7473	}
7474
7475	ice_clear_pxe_mode(hw);
7476
7477	err = ice_init_nvm(hw);
7478	if (err) {
7479	dev_err(dev, "ice_init_nvm failed %d\n", err);
7480	goto err_init_ctrlq;
7481	}
7482
7483	err = ice_get_caps(hw);
7484	if (err) {
7485	dev_err(dev, "ice_get_caps failed %d\n", err);
7486	goto err_init_ctrlq;
7487	}
7488
7489	err = ice_aq_set_mac_cfg(hw, ICE_AQ_SET_MAC_FRAME_SIZE_MAX, NULL);
7490	if (err) {
7491	dev_err(dev, "set_mac_cfg failed %d\n", err);
7492	goto err_init_ctrlq;
7493	}
7494
7495	dvm = ice_is_dvm_ena(hw);
7496
7497	err = ice_aq_set_port_params(pi: pf->hw.port_info, double_vlan: dvm, NULL);
7498	if (err)
7499	goto err_init_ctrlq;
7500
7501	err = ice_sched_init_port(pi: hw->port_info);
7502	if (err)
7503	goto err_sched_init_port;
7504
7505	/ start misc vector /
7506	err = ice_req_irq_msix_misc(pf);
7507	if (err) {
7508	dev_err(dev, "misc vector setup failed: %d\n", err);
7509	goto err_sched_init_port;
7510	}
7511
7512	if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
7513	wr32(hw, PFQF_FD_ENA, PFQF_FD_ENA_FD_ENA_M);
7514	if (!rd32(hw, PFQF_FD_SIZE)) {
7515	u16 unused, guar, b_effort;
7516
7517	guar = hw->func_caps.fd_fltr_guar;
7518	b_effort = hw->func_caps.fd_fltr_best_effort;
7519
7520	/ force guaranteed filter pool for PF /
7521	ice_alloc_fd_guar_item(hw, cntr_id: &unused, num_fltr: guar);
7522	/ force shared filter pool for PF /
7523	ice_alloc_fd_shrd_item(hw, cntr_id: &unused, num_fltr: b_effort);
7524	}
7525	}
7526
7527	if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
7528	ice_dcb_rebuild(pf);
7529
7530	/ If the PF previously had enabled PTP, PTP init needs to happen before*
7531	* the VSI rebuild. If not, this causes the PTP link status events to
7532	* fail.
7533	*/
7534	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
7535	ice_ptp_rebuild(pf, reset_type);
7536
7537	if (ice_is_feature_supported(pf, f: ICE_F_GNSS))
7538	ice_gnss_init(pf);
7539
7540	/ rebuild PF VSI /
7541	err = ice_vsi_rebuild_by_type(pf, type: ICE_VSI_PF);
7542	if (err) {
7543	dev_err(dev, "PF VSI rebuild failed: %d\n", err);
7544	goto err_vsi_rebuild;
7545	}
7546
7547	err = ice_eswitch_rebuild(pf);
7548	if (err) {
7549	dev_err(dev, "Switchdev rebuild failed: %d\n", err);
7550	goto err_vsi_rebuild;
7551	}
7552
7553	if (reset_type == ICE_RESET_PFR) {
7554	err = ice_rebuild_channels(pf);
7555	if (err) {
7556	dev_err(dev, "failed to rebuild and replay ADQ VSIs, err %d\n",
7557	err);
7558	goto err_vsi_rebuild;
7559	}
7560	}
7561
7562	/ If Flow Director is active /
7563	if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
7564	err = ice_vsi_rebuild_by_type(pf, type: ICE_VSI_CTRL);
7565	if (err) {
7566	dev_err(dev, "control VSI rebuild failed: %d\n", err);
7567	goto err_vsi_rebuild;
7568	}
7569
7570	/ replay HW Flow Director recipes /
7571	if (hw->fdir_prof)
7572	ice_fdir_replay_flows(hw);
7573
7574	/ replay Flow Director filters /
7575	ice_fdir_replay_fltrs(pf);
7576
7577	ice_rebuild_arfs(pf);
7578	}
7579
7580	ice_update_pf_netdev_link(pf);
7581
7582	/ tell the firmware we are up /
7583	err = ice_send_version(pf);
7584	if (err) {
7585	dev_err(dev, "Rebuild failed due to error sending driver version: %d\n",
7586	err);
7587	goto err_vsi_rebuild;
7588	}
7589
7590	ice_replay_post(hw);
7591
7592	/ if we get here, reset flow is successful /
7593	clear_bit(nr: ICE_RESET_FAILED, addr: pf->state);
7594
7595	ice_plug_aux_dev(pf);
7596	if (ice_is_feature_supported(pf, f: ICE_F_SRIOV_LAG))
7597	ice_lag_rebuild(pf);
7598
7599	/ Restore timestamp mode settings after VSI rebuild /
7600	ice_ptp_restore_timestamp_mode(pf);
7601	return;
7602
7603	err_vsi_rebuild:
7604	err_sched_init_port:
7605	ice_sched_cleanup_all(hw);
7606	err_init_ctrlq:
7607	ice_shutdown_all_ctrlq(hw);
7608	set_bit(nr: ICE_RESET_FAILED, addr: pf->state);
7609	clear_recovery:
7610	/ set this bit in PF state to control service task scheduling /
7611	set_bit(nr: ICE_NEEDS_RESTART, addr: pf->state);
7612	dev_err(dev, "Rebuild failed, unload and reload driver\n");
7613	}
7614
7615	/**
7616	* ice_change_mtu - NDO callback to change the MTU
7617	* @netdev: network interface device structure
7618	* @new_mtu: new value for maximum frame size
7619	*
7620	* Returns 0 on success, negative on failure
7621	*/
7622	static int ice_change_mtu(struct net_device netdev, int* new_mtu)
7623	{
7624	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
7625	struct ice_vsi *vsi = np->vsi;
7626	struct ice_pf *pf = vsi->back;
7627	struct bpf_prog *prog;
7628	u8 count = `0`;
7629	int err = `0`;
7630
7631	if (new_mtu == (int)netdev->mtu) {
7632	netdev_warn(dev: netdev, format: "MTU is already %u\n", netdev->mtu);
7633	return `0`;
7634	}
7635
7636	prog = vsi->xdp_prog;
7637	if (prog && !prog->aux->xdp_has_frags) {
7638	int frame_size = ice_max_xdp_frame_size(vsi);
7639
7640	if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
7641	netdev_err(dev: netdev, format: "max MTU for XDP usage is %d\n",
7642	frame_size - ICE_ETH_PKT_HDR_PAD);
7643	return -EINVAL;
7644	}
7645	} else if (test_bit(ICE_FLAG_LEGACY_RX, pf->flags)) {
7646	if (new_mtu + ICE_ETH_PKT_HDR_PAD > ICE_MAX_FRAME_LEGACY_RX) {
7647	netdev_err(dev: netdev, format: "Too big MTU for legacy-rx; Max is %d\n",
7648	ICE_MAX_FRAME_LEGACY_RX - ICE_ETH_PKT_HDR_PAD);
7649	return -EINVAL;
7650	}
7651	}
7652
7653	/ if a reset is in progress, wait for some time for it to complete /
7654	do {
7655	if (ice_is_reset_in_progress(state: pf->state)) {
7656	count++;
7657	usleep_range(min: `1000`, max: `2000`);
7658	} else {
7659	break;
7660	}
7661
7662	} while (count < `100`);
7663
7664	if (count == `100`) {
7665	netdev_err(dev: netdev, format: "can't change MTU. Device is busy\n");
7666	return -EBUSY;
7667	}
7668
7669	netdev->mtu = (unsigned int)new_mtu;
7670	err = ice_down_up(vsi);
7671	if (err)
7672	return err;
7673
7674	netdev_dbg(netdev, "changed MTU to %d\n", new_mtu);
7675	set_bit(nr: ICE_FLAG_MTU_CHANGED, addr: pf->flags);
7676
7677	return err;
7678	}
7679
7680	/**
7681	* ice_eth_ioctl - Access the hwtstamp interface
7682	* @netdev: network interface device structure
7683	* @ifr: interface request data
7684	* @cmd: ioctl command
7685	*/
7686	static int ice_eth_ioctl(struct net_device netdev, struct* ifreq ifr, int* cmd)
7687	{
7688	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
7689	struct ice_pf *pf = np->vsi->back;
7690
7691	switch (cmd) {
7692	case SIOCGHWTSTAMP:
7693	return ice_ptp_get_ts_config(pf, ifr);
7694	case SIOCSHWTSTAMP:
7695	return ice_ptp_set_ts_config(pf, ifr);
7696	default:
7697	return -EOPNOTSUPP;
7698	}
7699	}
7700
7701	/**
7702	* ice_aq_str - convert AQ err code to a string
7703	* @aq_err: the AQ error code to convert
7704	*/
7705	const char ice_aq_str(enum* ice_aq_err aq_err)
7706	{
7707	switch (aq_err) {
7708	case ICE_AQ_RC_OK:
7709	return "OK";
7710	case ICE_AQ_RC_EPERM:
7711	return "ICE_AQ_RC_EPERM";
7712	case ICE_AQ_RC_ENOENT:
7713	return "ICE_AQ_RC_ENOENT";
7714	case ICE_AQ_RC_ENOMEM:
7715	return "ICE_AQ_RC_ENOMEM";
7716	case ICE_AQ_RC_EBUSY:
7717	return "ICE_AQ_RC_EBUSY";
7718	case ICE_AQ_RC_EEXIST:
7719	return "ICE_AQ_RC_EEXIST";
7720	case ICE_AQ_RC_EINVAL:
7721	return "ICE_AQ_RC_EINVAL";
7722	case ICE_AQ_RC_ENOSPC:
7723	return "ICE_AQ_RC_ENOSPC";
7724	case ICE_AQ_RC_ENOSYS:
7725	return "ICE_AQ_RC_ENOSYS";
7726	case ICE_AQ_RC_EMODE:
7727	return "ICE_AQ_RC_EMODE";
7728	case ICE_AQ_RC_ENOSEC:
7729	return "ICE_AQ_RC_ENOSEC";
7730	case ICE_AQ_RC_EBADSIG:
7731	return "ICE_AQ_RC_EBADSIG";
7732	case ICE_AQ_RC_ESVN:
7733	return "ICE_AQ_RC_ESVN";
7734	case ICE_AQ_RC_EBADMAN:
7735	return "ICE_AQ_RC_EBADMAN";
7736	case ICE_AQ_RC_EBADBUF:
7737	return "ICE_AQ_RC_EBADBUF";
7738	}
7739
7740	return "ICE_AQ_RC_UNKNOWN";
7741	}
7742
7743	/**
7744	* ice_set_rss_lut - Set RSS LUT
7745	* @vsi: Pointer to VSI structure
7746	* @lut: Lookup table
7747	* @lut_size: Lookup table size
7748	*
7749	* Returns 0 on success, negative on failure
7750	*/
7751	int ice_set_rss_lut(struct ice_vsi vsi, u8 lut, u16 lut_size)
7752	{
7753	struct ice_aq_get_set_rss_lut_params params = {};
7754	struct ice_hw *hw = &vsi->back->hw;
7755	int status;
7756
7757	if (!lut)
7758	return -EINVAL;
7759
7760	params.vsi_handle = vsi->idx;
7761	params.lut_size = lut_size;
7762	params.lut_type = vsi->rss_lut_type;
7763	params.lut = lut;
7764
7765	status = ice_aq_set_rss_lut(hw, set_params: &params);
7766	if (status)
7767	dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS lut, err %d aq_err %s\n",
7768	status, ice_aq_str(hw->adminq.sq_last_status));
7769
7770	return status;
7771	}
7772
7773	/**
7774	* ice_set_rss_key - Set RSS key
7775	* @vsi: Pointer to the VSI structure
7776	* @seed: RSS hash seed
7777	*
7778	* Returns 0 on success, negative on failure
7779	*/
7780	int ice_set_rss_key(struct ice_vsi vsi, u8 seed)
7781	{
7782	struct ice_hw *hw = &vsi->back->hw;
7783	int status;
7784
7785	if (!seed)
7786	return -EINVAL;
7787
7788	status = ice_aq_set_rss_key(hw, vsi_handle: vsi->idx, keys: (struct ice_aqc_get_set_rss_keys *)seed);
7789	if (status)
7790	dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS key, err %d aq_err %s\n",
7791	status, ice_aq_str(hw->adminq.sq_last_status));
7792
7793	return status;
7794	}
7795
7796	/**
7797	* ice_get_rss_lut - Get RSS LUT
7798	* @vsi: Pointer to VSI structure
7799	* @lut: Buffer to store the lookup table entries
7800	* @lut_size: Size of buffer to store the lookup table entries
7801	*
7802	* Returns 0 on success, negative on failure
7803	*/
7804	int ice_get_rss_lut(struct ice_vsi vsi, u8 lut, u16 lut_size)
7805	{
7806	struct ice_aq_get_set_rss_lut_params params = {};
7807	struct ice_hw *hw = &vsi->back->hw;
7808	int status;
7809
7810	if (!lut)
7811	return -EINVAL;
7812
7813	params.vsi_handle = vsi->idx;
7814	params.lut_size = lut_size;
7815	params.lut_type = vsi->rss_lut_type;
7816	params.lut = lut;
7817
7818	status = ice_aq_get_rss_lut(hw, get_params: &params);
7819	if (status)
7820	dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS lut, err %d aq_err %s\n",
7821	status, ice_aq_str(hw->adminq.sq_last_status));
7822
7823	return status;
7824	}
7825
7826	/**
7827	* ice_get_rss_key - Get RSS key
7828	* @vsi: Pointer to VSI structure
7829	* @seed: Buffer to store the key in
7830	*
7831	* Returns 0 on success, negative on failure
7832	*/
7833	int ice_get_rss_key(struct ice_vsi vsi, u8 seed)
7834	{
7835	struct ice_hw *hw = &vsi->back->hw;
7836	int status;
7837
7838	if (!seed)
7839	return -EINVAL;
7840
7841	status = ice_aq_get_rss_key(hw, vsi_handle: vsi->idx, keys: (struct ice_aqc_get_set_rss_keys *)seed);
7842	if (status)
7843	dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS key, err %d aq_err %s\n",
7844	status, ice_aq_str(hw->adminq.sq_last_status));
7845
7846	return status;
7847	}
7848
7849	/**
7850	* ice_set_rss_hfunc - Set RSS HASH function
7851	* @vsi: Pointer to VSI structure
7852	* @hfunc: hash function (ICE_AQ_VSI_Q_OPT_RSS_*)
7853	*
7854	* Returns 0 on success, negative on failure
7855	*/
7856	int ice_set_rss_hfunc(struct ice_vsi *vsi, u8 hfunc)
7857	{
7858	struct ice_hw *hw = &vsi->back->hw;
7859	struct ice_vsi_ctx *ctx;
7860	bool symm;
7861	int err;
7862
7863	if (hfunc == vsi->rss_hfunc)
7864	return `0`;
7865
7866	if (hfunc != ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ &&
7867	hfunc != ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ)
7868	return -EOPNOTSUPP;
7869
7870	ctx = kzalloc(size: sizeof(*ctx), GFP_KERNEL);
7871	if (!ctx)
7872	return -ENOMEM;
7873
7874	ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
7875	ctx->info.q_opt_rss = vsi->info.q_opt_rss;
7876	ctx->info.q_opt_rss &= ~ICE_AQ_VSI_Q_OPT_RSS_HASH_M;
7877	ctx->info.q_opt_rss \|=
7878	FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, hfunc);
7879	ctx->info.q_opt_tc = vsi->info.q_opt_tc;
7880	ctx->info.q_opt_flags = vsi->info.q_opt_rss;
7881
7882	err = ice_update_vsi(hw, vsi_handle: vsi->idx, vsi_ctx: ctx, NULL);
7883	if (err) {
7884	dev_err(ice_pf_to_dev(vsi->back), "Failed to configure RSS hash for VSI %d, error %d\n",
7885	vsi->vsi_num, err);
7886	} else {
7887	vsi->info.q_opt_rss = ctx->info.q_opt_rss;
7888	vsi->rss_hfunc = hfunc;
7889	netdev_info(dev: vsi->netdev, format: "Hash function set to: %sToeplitz\n",
7890	hfunc == ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ ?
7891	"Symmetric " : "");
7892	}
7893	kfree(objp: ctx);
7894	if (err)
7895	return err;
7896
7897	/ Fix the symmetry setting for all existing RSS configurations /
7898	symm = !!(hfunc == ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ);
7899	return ice_set_rss_cfg_symm(hw, vsi, symm);
7900	}
7901
7902	/**
7903	* ice_bridge_getlink - Get the hardware bridge mode
7904	* @skb: skb buff
7905	* @pid: process ID
7906	* @seq: RTNL message seq
7907	* @dev: the netdev being configured
7908	* @filter_mask: filter mask passed in
7909	* @nlflags: netlink flags passed in
7910	*
7911	* Return the bridge mode (VEB/VEPA)
7912	*/
7913	static int
7914	ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
7915	struct net_device dev, u32 filter_mask, int* nlflags)
7916	{
7917	struct ice_netdev_priv *np = netdev_priv(dev);
7918	struct ice_vsi *vsi = np->vsi;
7919	struct ice_pf *pf = vsi->back;
7920	u16 bmode;
7921
7922	bmode = pf->first_sw->bridge_mode;
7923
7924	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode: bmode, flags: `0`, mask: `0`, nlflags,
7925	filter_mask, NULL);
7926	}
7927
7928	/**
7929	* ice_vsi_update_bridge_mode - Update VSI for switching bridge mode (VEB/VEPA)
7930	* @vsi: Pointer to VSI structure
7931	* @bmode: Hardware bridge mode (VEB/VEPA)
7932	*
7933	* Returns 0 on success, negative on failure
7934	*/
7935	static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
7936	{
7937	struct ice_aqc_vsi_props *vsi_props;
7938	struct ice_hw *hw = &vsi->back->hw;
7939	struct ice_vsi_ctx *ctxt;
7940	int ret;
7941
7942	vsi_props = &vsi->info;
7943
7944	ctxt = kzalloc(size: sizeof(*ctxt), GFP_KERNEL);
7945	if (!ctxt)
7946	return -ENOMEM;
7947
7948	ctxt->info = vsi->info;
7949
7950	if (bmode == BRIDGE_MODE_VEB)
7951	/ change from VEPA to VEB mode /
7952	ctxt->info.sw_flags \|= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
7953	else
7954	/ change from VEB to VEPA mode /
7955	ctxt->info.sw_flags &= ~ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
7956	ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
7957
7958	ret = ice_update_vsi(hw, vsi_handle: vsi->idx, vsi_ctx: ctxt, NULL);
7959	if (ret) {
7960	dev_err(ice_pf_to_dev(vsi->back), "update VSI for bridge mode failed, bmode = %d err %d aq_err %s\n",
7961	bmode, ret, ice_aq_str(hw->adminq.sq_last_status));
7962	goto out;
7963	}
7964	/ Update sw flags for book keeping /
7965	vsi_props->sw_flags = ctxt->info.sw_flags;
7966
7967	out:
7968	kfree(objp: ctxt);
7969	return ret;
7970	}
7971
7972	/**
7973	* ice_bridge_setlink - Set the hardware bridge mode
7974	* @dev: the netdev being configured
7975	* @nlh: RTNL message
7976	* @flags: bridge setlink flags
7977	* @extack: netlink extended ack
7978	*
7979	* Sets the bridge mode (VEB/VEPA) of the switch to which the netdev (VSI) is
7980	* hooked up to. Iterates through the PF VSI list and sets the loopback mode (if
7981	* not already set for all VSIs connected to this switch. And also update the
7982	* unicast switch filter rules for the corresponding switch of the netdev.
7983	*/
7984	static int
7985	ice_bridge_setlink(struct net_device dev, struct* nlmsghdr *nlh,
7986	u16 __always_unused flags,
7987	struct netlink_ext_ack __always_unused *extack)
7988	{
7989	struct ice_netdev_priv *np = netdev_priv(dev);
7990	struct ice_pf *pf = np->vsi->back;
7991	struct nlattr attr, br_spec;
7992	struct ice_hw *hw = &pf->hw;
7993	struct ice_sw *pf_sw;
7994	int rem, v, err = `0`;
7995
7996	pf_sw = pf->first_sw;
7997	/ find the attribute in the netlink message /
7998	br_spec = nlmsg_find_attr(nlh, hdrlen: sizeof(struct ifinfomsg), attrtype: IFLA_AF_SPEC);
7999	if (!br_spec)
8000	return -EINVAL;
8001
8002	nla_for_each_nested(attr, br_spec, rem) {
8003	__u16 mode;
8004
8005	if (nla_type(nla: attr) != IFLA_BRIDGE_MODE)
8006	continue;
8007	mode = nla_get_u16(nla: attr);
8008	if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
8009	return -EINVAL;
8010	/ Continue if bridge mode is not being flipped /
8011	if (mode == pf_sw->bridge_mode)
8012	continue;
8013	/ Iterates through the PF VSI list and update the loopback*
8014	* mode of the VSI
8015	*/
8016	ice_for_each_vsi(pf, v) {
8017	if (!pf->vsi[v])
8018	continue;
8019	err = ice_vsi_update_bridge_mode(vsi: pf->vsi[v], bmode: mode);
8020	if (err)
8021	return err;
8022	}
8023
8024	hw->evb_veb = (mode == BRIDGE_MODE_VEB);
8025	/ Update the unicast switch filter rules for the corresponding*
8026	* switch of the netdev
8027	*/
8028	err = ice_update_sw_rule_bridge_mode(hw);
8029	if (err) {
8030	netdev_err(dev, format: "switch rule update failed, mode = %d err %d aq_err %s\n",
8031	mode, err,
8032	ice_aq_str(aq_err: hw->adminq.sq_last_status));
8033	/ revert hw->evb_veb /
8034	hw->evb_veb = (pf_sw->bridge_mode == BRIDGE_MODE_VEB);
8035	return err;
8036	}
8037
8038	pf_sw->bridge_mode = mode;
8039	}
8040
8041	return `0`;
8042	}
8043
8044	/**
8045	* ice_tx_timeout - Respond to a Tx Hang
8046	* @netdev: network interface device structure
8047	* @txqueue: Tx queue
8048	*/
8049	static void ice_tx_timeout(struct net_device netdev, unsigned* int txqueue)
8050	{
8051	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
8052	struct ice_tx_ring *tx_ring = NULL;
8053	struct ice_vsi *vsi = np->vsi;
8054	struct ice_pf *pf = vsi->back;
8055	u32 i;
8056
8057	pf->tx_timeout_count++;
8058
8059	/ Check if PFC is enabled for the TC to which the queue belongs*
8060	* to. If yes then Tx timeout is not caused by a hung queue, no
8061	* need to reset and rebuild
8062	*/
8063	if (ice_is_pfc_causing_hung_q(pf, txqueue)) {
8064	dev_info(ice_pf_to_dev(pf), "Fake Tx hang detected on queue %u, timeout caused by PFC storm\n",
8065	txqueue);
8066	return;
8067	}
8068
8069	/ now that we have an index, find the tx_ring struct /
8070	ice_for_each_txq(vsi, i)
8071	if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
8072	if (txqueue == vsi->tx_rings[i]->q_index) {
8073	tx_ring = vsi->tx_rings[i];
8074	break;
8075	}
8076
8077	/ Reset recovery level if enough time has elapsed after last timeout.*
8078	* Also ensure no new reset action happens before next timeout period.
8079	*/
8080	if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ * `20`)))
8081	pf->tx_timeout_recovery_level = `1`;
8082	else if (time_before(jiffies, (pf->tx_timeout_last_recovery +
8083	netdev->watchdog_timeo)))
8084	return;
8085
8086	if (tx_ring) {
8087	struct ice_hw *hw = &pf->hw;
8088	u32 head, val = `0`;
8089
8090	head = FIELD_GET(QTX_COMM_HEAD_HEAD_M,
8091	rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue])));
8092	/ Read interrupt register /
8093	val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx));
8094
8095	netdev_info(dev: netdev, format: "tx_timeout: VSI_num: %d, Q %u, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n",
8096	vsi->vsi_num, txqueue, tx_ring->next_to_clean,
8097	head, tx_ring->next_to_use, val);
8098	}
8099
8100	pf->tx_timeout_last_recovery = jiffies;
8101	netdev_info(dev: netdev, format: "tx_timeout recovery level %d, txqueue %u\n",
8102	pf->tx_timeout_recovery_level, txqueue);
8103
8104	switch (pf->tx_timeout_recovery_level) {
8105	case `1`:
8106	set_bit(nr: ICE_PFR_REQ, addr: pf->state);
8107	break;
8108	case `2`:
8109	set_bit(nr: ICE_CORER_REQ, addr: pf->state);
8110	break;
8111	case `3`:
8112	set_bit(nr: ICE_GLOBR_REQ, addr: pf->state);
8113	break;
8114	default:
8115	netdev_err(dev: netdev, format: "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n");
8116	set_bit(nr: ICE_DOWN, addr: pf->state);
8117	set_bit(nr: ICE_VSI_NEEDS_RESTART, addr: vsi->state);
8118	set_bit(nr: ICE_SERVICE_DIS, addr: pf->state);
8119	break;
8120	}
8121
8122	ice_service_task_schedule(pf);
8123	pf->tx_timeout_recovery_level++;
8124	}
8125
8126	/**
8127	* ice_setup_tc_cls_flower - flower classifier offloads
8128	* @np: net device to configure
8129	* @filter_dev: device on which filter is added
8130	* @cls_flower: offload data
8131	*/
8132	static int
8133	ice_setup_tc_cls_flower(struct ice_netdev_priv *np,
8134	struct net_device *filter_dev,
8135	struct flow_cls_offload *cls_flower)
8136	{
8137	struct ice_vsi *vsi = np->vsi;
8138
8139	if (cls_flower->common.chain_index)
8140	return -EOPNOTSUPP;
8141
8142	switch (cls_flower->command) {
8143	case FLOW_CLS_REPLACE:
8144	return ice_add_cls_flower(netdev: filter_dev, vsi, cls_flower);
8145	case FLOW_CLS_DESTROY:
8146	return ice_del_cls_flower(vsi, cls_flower);
8147	default:
8148	return -EINVAL;
8149	}
8150	}
8151
8152	/**
8153	* ice_setup_tc_block_cb - callback handler registered for TC block
8154	* @type: TC SETUP type
8155	* @type_data: TC flower offload data that contains user input
8156	* @cb_priv: netdev private data
8157	*/
8158	static int
8159	ice_setup_tc_block_cb(enum tc_setup_type type, void type_data, void* *cb_priv)
8160	{
8161	struct ice_netdev_priv *np = cb_priv;
8162
8163	switch (type) {
8164	case TC_SETUP_CLSFLOWER:
8165	return ice_setup_tc_cls_flower(np, filter_dev: np->vsi->netdev,
8166	cls_flower: type_data);
8167	default:
8168	return -EOPNOTSUPP;
8169	}
8170	}
8171
8172	/**
8173	* ice_validate_mqprio_qopt - Validate TCF input parameters
8174	* @vsi: Pointer to VSI
8175	* @mqprio_qopt: input parameters for mqprio queue configuration
8176	*
8177	* This function validates MQPRIO params, such as qcount (power of 2 wherever
8178	* needed), and make sure user doesn't specify qcount and BW rate limit
8179	* for TCs, which are more than "num_tc"
8180	*/
8181	static int
8182	ice_validate_mqprio_qopt(struct ice_vsi *vsi,
8183	struct tc_mqprio_qopt_offload *mqprio_qopt)
8184	{
8185	int non_power_of_2_qcount = `0`;
8186	struct ice_pf *pf = vsi->back;
8187	int max_rss_q_cnt = `0`;
8188	u64 sum_min_rate = `0`;
8189	struct device *dev;
8190	int i, speed;
8191	u8 num_tc;
8192
8193	if (vsi->type != ICE_VSI_PF)
8194	return -EINVAL;
8195
8196	if (mqprio_qopt->qopt.offset[`0`] != `0` \|\|
8197	mqprio_qopt->qopt.num_tc < `1` \|\|
8198	mqprio_qopt->qopt.num_tc > ICE_CHNL_MAX_TC)
8199	return -EINVAL;
8200
8201	dev = ice_pf_to_dev(pf);
8202	vsi->ch_rss_size = `0`;
8203	num_tc = mqprio_qopt->qopt.num_tc;
8204	speed = ice_get_link_speed_kbps(vsi);
8205
8206	for (i = `0`; num_tc; i++) {
8207	int qcount = mqprio_qopt->qopt.count[i];
8208	u64 max_rate, min_rate, rem;
8209
8210	if (!qcount)
8211	return -EINVAL;
8212
8213	if (is_power_of_2(n: qcount)) {
8214	if (non_power_of_2_qcount &&
8215	qcount > non_power_of_2_qcount) {
8216	dev_err(dev, "qcount[%d] cannot be greater than non power of 2 qcount[%d]\n",
8217	qcount, non_power_of_2_qcount);
8218	return -EINVAL;
8219	}
8220	if (qcount > max_rss_q_cnt)
8221	max_rss_q_cnt = qcount;
8222	} else {
8223	if (non_power_of_2_qcount &&
8224	qcount != non_power_of_2_qcount) {
8225	dev_err(dev, "Only one non power of 2 qcount allowed[%d,%d]\n",
8226	qcount, non_power_of_2_qcount);
8227	return -EINVAL;
8228	}
8229	if (qcount < max_rss_q_cnt) {
8230	dev_err(dev, "non power of 2 qcount[%d] cannot be less than other qcount[%d]\n",
8231	qcount, max_rss_q_cnt);
8232	return -EINVAL;
8233	}
8234	max_rss_q_cnt = qcount;
8235	non_power_of_2_qcount = qcount;
8236	}
8237
8238	/ TC command takes input in K/N/Gbps or K/M/Gbit etc but*
8239	* converts the bandwidth rate limit into Bytes/s when
8240	* passing it down to the driver. So convert input bandwidth
8241	* from Bytes/s to Kbps
8242	*/
8243	max_rate = mqprio_qopt->max_rate[i];
8244	max_rate = div_u64(dividend: max_rate, ICE_BW_KBPS_DIVISOR);
8245
8246	/ min_rate is minimum guaranteed rate and it can't be zero /
8247	min_rate = mqprio_qopt->min_rate[i];
8248	min_rate = div_u64(dividend: min_rate, ICE_BW_KBPS_DIVISOR);
8249	sum_min_rate += min_rate;
8250
8251	if (min_rate && min_rate < ICE_MIN_BW_LIMIT) {
8252	dev_err(dev, "TC%d: min_rate(%llu Kbps) < %u Kbps\n", i,
8253	min_rate, ICE_MIN_BW_LIMIT);
8254	return -EINVAL;
8255	}
8256
8257	if (max_rate && max_rate > speed) {
8258	dev_err(dev, "TC%d: max_rate(%llu Kbps) > link speed of %u Kbps\n",
8259	i, max_rate, speed);
8260	return -EINVAL;
8261	}
8262
8263	iter_div_u64_rem(dividend: min_rate, ICE_MIN_BW_LIMIT, remainder: &rem);
8264	if (rem) {
8265	dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps",
8266	i, ICE_MIN_BW_LIMIT);
8267	return -EINVAL;
8268	}
8269
8270	iter_div_u64_rem(dividend: max_rate, ICE_MIN_BW_LIMIT, remainder: &rem);
8271	if (rem) {
8272	dev_err(dev, "TC%d: Max Rate not multiple of %u Kbps",
8273	i, ICE_MIN_BW_LIMIT);
8274	return -EINVAL;
8275	}
8276
8277	/ min_rate can't be more than max_rate, except when max_rate*
8278	* is zero (implies max_rate sought is max line rate). In such
8279	* a case min_rate can be more than max.
8280	*/
8281	if (max_rate && min_rate > max_rate) {
8282	dev_err(dev, "min_rate %llu Kbps can't be more than max_rate %llu Kbps\n",
8283	min_rate, max_rate);
8284	return -EINVAL;
8285	}
8286
8287	if (i >= mqprio_qopt->qopt.num_tc - `1`)
8288	break;
8289	if (mqprio_qopt->qopt.offset[i + `1`] !=
8290	(mqprio_qopt->qopt.offset[i] + qcount))
8291	return -EINVAL;
8292	}
8293	if (vsi->num_rxq <
8294	(mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
8295	return -EINVAL;
8296	if (vsi->num_txq <
8297	(mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
8298	return -EINVAL;
8299
8300	if (sum_min_rate && sum_min_rate > (u64)speed) {
8301	dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n",
8302	sum_min_rate, speed);
8303	return -EINVAL;
8304	}
8305
8306	/ make sure vsi->ch_rss_size is set correctly based on TC's qcount /
8307	vsi->ch_rss_size = max_rss_q_cnt;
8308
8309	return `0`;
8310	}
8311
8312	/**
8313	* ice_add_vsi_to_fdir - add a VSI to the flow director group for PF
8314	* @pf: ptr to PF device
8315	* @vsi: ptr to VSI
8316	*/
8317	static int ice_add_vsi_to_fdir(struct ice_pf pf, struct* ice_vsi *vsi)
8318	{
8319	struct device *dev = ice_pf_to_dev(pf);
8320	bool added = false;
8321	struct ice_hw *hw;
8322	int flow;
8323
8324	if (!(vsi->num_gfltr \|\| vsi->num_bfltr))
8325	return -EINVAL;
8326
8327	hw = &pf->hw;
8328	for (flow = `0`; flow < ICE_FLTR_PTYPE_MAX; flow++) {
8329	struct ice_fd_hw_prof *prof;
8330	int tun, status;
8331	u64 entry_h;
8332
8333	if (!(hw->fdir_prof && hw->fdir_prof[flow] &&
8334	hw->fdir_prof[flow]->cnt))
8335	continue;
8336
8337	for (tun = `0`; tun < ICE_FD_HW_SEG_MAX; tun++) {
8338	enum ice_flow_priority prio;
8339
8340	/ add this VSI to FDir profile for this flow /
8341	prio = ICE_FLOW_PRIO_NORMAL;
8342	prof = hw->fdir_prof[flow];
8343	status = ice_flow_add_entry(hw, blk: ICE_BLK_FD,
8344	prof_id: prof->prof_id[tun],
8345	entry_id: prof->vsi_h[`0`], vsi: vsi->idx,
8346	prio, data: prof->fdir_seg[tun],
8347	entry_h: &entry_h);
8348	if (status) {
8349	dev_err(dev, "channel VSI idx %d, not able to add to group %d\n",
8350	vsi->idx, flow);
8351	continue;
8352	}
8353
8354	prof->entry_h[prof->cnt][tun] = entry_h;
8355	}
8356
8357	/ store VSI for filter replay and delete /
8358	prof->vsi_h[prof->cnt] = vsi->idx;
8359	prof->cnt++;
8360
8361	added = true;
8362	dev_dbg(dev, "VSI idx %d added to fdir group %d\n", vsi->idx,
8363	flow);
8364	}
8365
8366	if (!added)
8367	dev_dbg(dev, "VSI idx %d not added to fdir groups\n", vsi->idx);
8368
8369	return `0`;
8370	}
8371
8372	/**
8373	* ice_add_channel - add a channel by adding VSI
8374	* @pf: ptr to PF device
8375	* @sw_id: underlying HW switching element ID
8376	* @ch: ptr to channel structure
8377	*
8378	* Add a channel (VSI) using add_vsi and queue_map
8379	*/
8380	static int ice_add_channel(struct ice_pf pf, u16 sw_id, struct* ice_channel *ch)
8381	{
8382	struct device *dev = ice_pf_to_dev(pf);
8383	struct ice_vsi *vsi;
8384
8385	if (ch->type != ICE_VSI_CHNL) {
8386	dev_err(dev, "add new VSI failed, ch->type %d\n", ch->type);
8387	return -EINVAL;
8388	}
8389
8390	vsi = ice_chnl_vsi_setup(pf, pi: pf->hw.port_info, ch);
8391	if (!vsi \|\| vsi->type != ICE_VSI_CHNL) {
8392	dev_err(dev, "create chnl VSI failure\n");
8393	return -EINVAL;
8394	}
8395
8396	ice_add_vsi_to_fdir(pf, vsi);
8397
8398	ch->sw_id = sw_id;
8399	ch->vsi_num = vsi->vsi_num;
8400	ch->info.mapping_flags = vsi->info.mapping_flags;
8401	ch->ch_vsi = vsi;
8402	/ set the back pointer of channel for newly created VSI /
8403	vsi->ch = ch;
8404
8405	memcpy(&ch->info.q_mapping, &vsi->info.q_mapping,
8406	sizeof(vsi->info.q_mapping));
8407	memcpy(&ch->info.tc_mapping, vsi->info.tc_mapping,
8408	sizeof(vsi->info.tc_mapping));
8409
8410	return `0`;
8411	}
8412
8413	/**
8414	* ice_chnl_cfg_res
8415	* @vsi: the VSI being setup
8416	* @ch: ptr to channel structure
8417	*
8418	* Configure channel specific resources such as rings, vector.
8419	*/
8420	static void ice_chnl_cfg_res(struct ice_vsi vsi, struct* ice_channel *ch)
8421	{
8422	int i;
8423
8424	for (i = `0`; i < ch->num_txq; i++) {
8425	struct ice_q_vector tx_q_vector, rx_q_vector;
8426	struct ice_ring_container *rc;
8427	struct ice_tx_ring *tx_ring;
8428	struct ice_rx_ring *rx_ring;
8429
8430	tx_ring = vsi->tx_rings[ch->base_q + i];
8431	rx_ring = vsi->rx_rings[ch->base_q + i];
8432	if (!tx_ring \|\| !rx_ring)
8433	continue;
8434
8435	/ setup ring being channel enabled /
8436	tx_ring->ch = ch;
8437	rx_ring->ch = ch;
8438
8439	/ following code block sets up vector specific attributes /
8440	tx_q_vector = tx_ring->q_vector;
8441	rx_q_vector = rx_ring->q_vector;
8442	if (!tx_q_vector && !rx_q_vector)
8443	continue;
8444
8445	if (tx_q_vector) {
8446	tx_q_vector->ch = ch;
8447	/ setup Tx and Rx ITR setting if DIM is off /
8448	rc = &tx_q_vector->tx;
8449	if (!ITR_IS_DYNAMIC(rc))
8450	ice_write_itr(rc, itr: rc->itr_setting);
8451	}
8452	if (rx_q_vector) {
8453	rx_q_vector->ch = ch;
8454	/ setup Tx and Rx ITR setting if DIM is off /
8455	rc = &rx_q_vector->rx;
8456	if (!ITR_IS_DYNAMIC(rc))
8457	ice_write_itr(rc, itr: rc->itr_setting);
8458	}
8459	}
8460
8461	/ it is safe to assume that, if channel has non-zero num_t[r]xq, then*
8462	* GLINT_ITR register would have written to perform in-context
8463	* update, hence perform flush
8464	*/
8465	if (ch->num_txq \|\| ch->num_rxq)
8466	ice_flush(&vsi->back->hw);
8467	}
8468
8469	/**
8470	* ice_cfg_chnl_all_res - configure channel resources
8471	* @vsi: pte to main_vsi
8472	* @ch: ptr to channel structure
8473	*
8474	* This function configures channel specific resources such as flow-director
8475	* counter index, and other resources such as queues, vectors, ITR settings
8476	*/
8477	static void
8478	ice_cfg_chnl_all_res(struct ice_vsi vsi, struct* ice_channel *ch)
8479	{
8480	/ configure channel (aka ADQ) resources such as queues, vectors,*
8481	* ITR settings for channel specific vectors and anything else
8482	*/
8483	ice_chnl_cfg_res(vsi, ch);
8484	}
8485
8486	/**
8487	* ice_setup_hw_channel - setup new channel
8488	* @pf: ptr to PF device
8489	* @vsi: the VSI being setup
8490	* @ch: ptr to channel structure
8491	* @sw_id: underlying HW switching element ID
8492	* @type: type of channel to be created (VMDq2/VF)
8493	*
8494	* Setup new channel (VSI) based on specified type (VMDq2/VF)
8495	* and configures Tx rings accordingly
8496	*/
8497	static int
8498	ice_setup_hw_channel(struct ice_pf pf, struct* ice_vsi *vsi,
8499	struct ice_channel *ch, u16 sw_id, u8 type)
8500	{
8501	struct device *dev = ice_pf_to_dev(pf);
8502	int ret;
8503
8504	ch->base_q = vsi->next_base_q;
8505	ch->type = type;
8506
8507	ret = ice_add_channel(pf, sw_id, ch);
8508	if (ret) {
8509	dev_err(dev, "failed to add_channel using sw_id %u\n", sw_id);
8510	return ret;
8511	}
8512
8513	/ configure/setup ADQ specific resources /
8514	ice_cfg_chnl_all_res(vsi, ch);
8515
8516	/ make sure to update the next_base_q so that subsequent channel's*
8517	* (aka ADQ) VSI queue map is correct
8518	*/
8519	vsi->next_base_q = vsi->next_base_q + ch->num_rxq;
8520	dev_dbg(dev, "added channel: vsi_num %u, num_rxq %u\n", ch->vsi_num,
8521	ch->num_rxq);
8522
8523	return `0`;
8524	}
8525
8526	/**
8527	* ice_setup_channel - setup new channel using uplink element
8528	* @pf: ptr to PF device
8529	* @vsi: the VSI being setup
8530	* @ch: ptr to channel structure
8531	*
8532	* Setup new channel (VSI) based on specified type (VMDq2/VF)
8533	* and uplink switching element
8534	*/
8535	static bool
8536	ice_setup_channel(struct ice_pf pf, struct* ice_vsi *vsi,
8537	struct ice_channel *ch)
8538	{
8539	struct device *dev = ice_pf_to_dev(pf);
8540	u16 sw_id;
8541	int ret;
8542
8543	if (vsi->type != ICE_VSI_PF) {
8544	dev_err(dev, "unsupported parent VSI type(%d)\n", vsi->type);
8545	return false;
8546	}
8547
8548	sw_id = pf->first_sw->sw_id;
8549
8550	/ create channel (VSI) /
8551	ret = ice_setup_hw_channel(pf, vsi, ch, sw_id, type: ICE_VSI_CHNL);
8552	if (ret) {
8553	dev_err(dev, "failed to setup hw_channel\n");
8554	return false;
8555	}
8556	dev_dbg(dev, "successfully created channel()\n");
8557
8558	return ch->ch_vsi ? true : false;
8559	}
8560
8561	/**
8562	* ice_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
8563	* @vsi: VSI to be configured
8564	* @max_tx_rate: max Tx rate in Kbps to be configured as maximum BW limit
8565	* @min_tx_rate: min Tx rate in Kbps to be configured as minimum BW limit
8566	*/
8567	static int
8568	ice_set_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate, u64 min_tx_rate)
8569	{
8570	int err;
8571
8572	err = ice_set_min_bw_limit(vsi, min_tx_rate);
8573	if (err)
8574	return err;
8575
8576	return ice_set_max_bw_limit(vsi, max_tx_rate);
8577	}
8578
8579	/**
8580	* ice_create_q_channel - function to create channel
8581	* @vsi: VSI to be configured
8582	* @ch: ptr to channel (it contains channel specific params)
8583	*
8584	* This function creates channel (VSI) using num_queues specified by user,
8585	* reconfigs RSS if needed.
8586	*/
8587	static int ice_create_q_channel(struct ice_vsi vsi, struct* ice_channel *ch)
8588	{
8589	struct ice_pf *pf = vsi->back;
8590	struct device *dev;
8591
8592	if (!ch)
8593	return -EINVAL;
8594
8595	dev = ice_pf_to_dev(pf);
8596	if (!ch->num_txq \|\| !ch->num_rxq) {
8597	dev_err(dev, "Invalid num_queues requested: %d\n", ch->num_rxq);
8598	return -EINVAL;
8599	}
8600
8601	if (!vsi->cnt_q_avail \|\| vsi->cnt_q_avail < ch->num_txq) {
8602	dev_err(dev, "cnt_q_avail (%u) less than num_queues %d\n",
8603	vsi->cnt_q_avail, ch->num_txq);
8604	return -EINVAL;
8605	}
8606
8607	if (!ice_setup_channel(pf, vsi, ch)) {
8608	dev_info(dev, "Failed to setup channel\n");
8609	return -EINVAL;
8610	}
8611	/ configure BW rate limit /
8612	if (ch->ch_vsi && (ch->max_tx_rate \|\| ch->min_tx_rate)) {
8613	int ret;
8614
8615	ret = ice_set_bw_limit(vsi: ch->ch_vsi, max_tx_rate: ch->max_tx_rate,
8616	min_tx_rate: ch->min_tx_rate);
8617	if (ret)
8618	dev_err(dev, "failed to set Tx rate of %llu Kbps for VSI(%u)\n",
8619	ch->max_tx_rate, ch->ch_vsi->vsi_num);
8620	else
8621	dev_dbg(dev, "set Tx rate of %llu Kbps for VSI(%u)\n",
8622	ch->max_tx_rate, ch->ch_vsi->vsi_num);
8623	}
8624
8625	vsi->cnt_q_avail -= ch->num_txq;
8626
8627	return `0`;
8628	}
8629
8630	/**
8631	* ice_rem_all_chnl_fltrs - removes all channel filters
8632	* @pf: ptr to PF, TC-flower based filter are tracked at PF level
8633	*
8634	* Remove all advanced switch filters only if they are channel specific
8635	* tc-flower based filter
8636	*/
8637	static void ice_rem_all_chnl_fltrs(struct ice_pf *pf)
8638	{
8639	struct ice_tc_flower_fltr *fltr;
8640	struct hlist_node *node;
8641
8642	/ to remove all channel filters, iterate an ordered list of filters /
8643	hlist_for_each_entry_safe(fltr, node,
8644	&pf->tc_flower_fltr_list,
8645	tc_flower_node) {
8646	struct ice_rule_query_data rule;
8647	int status;
8648
8649	/ for now process only channel specific filters /
8650	if (!ice_is_chnl_fltr(f: fltr))
8651	continue;
8652
8653	rule.rid = fltr->rid;
8654	rule.rule_id = fltr->rule_id;
8655	rule.vsi_handle = fltr->dest_vsi_handle;
8656	status = ice_rem_adv_rule_by_id(hw: &pf->hw, remove_entry: &rule);
8657	if (status) {
8658	if (status == -ENOENT)
8659	dev_dbg(ice_pf_to_dev(pf), "TC flower filter (rule_id %u) does not exist\n",
8660	rule.rule_id);
8661	else
8662	dev_err(ice_pf_to_dev(pf), "failed to delete TC flower filter, status %d\n",
8663	status);
8664	} else if (fltr->dest_vsi) {
8665	/ update advanced switch filter count /
8666	if (fltr->dest_vsi->type == ICE_VSI_CHNL) {
8667	u32 flags = fltr->flags;
8668
8669	fltr->dest_vsi->num_chnl_fltr--;
8670	if (flags & (ICE_TC_FLWR_FIELD_DST_MAC \|
8671	ICE_TC_FLWR_FIELD_ENC_DST_MAC))
8672	pf->num_dmac_chnl_fltrs--;
8673	}
8674	}
8675
8676	hlist_del(n: &fltr->tc_flower_node);
8677	kfree(objp: fltr);
8678	}
8679	}
8680
8681	/**
8682	* ice_remove_q_channels - Remove queue channels for the TCs
8683	* @vsi: VSI to be configured
8684	* @rem_fltr: delete advanced switch filter or not
8685	*
8686	* Remove queue channels for the TCs
8687	*/
8688	static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_fltr)
8689	{
8690	struct ice_channel ch, ch_tmp;
8691	struct ice_pf *pf = vsi->back;
8692	int i;
8693
8694	/ remove all tc-flower based filter if they are channel filters only /
8695	if (rem_fltr)
8696	ice_rem_all_chnl_fltrs(pf);
8697
8698	/ remove ntuple filters since queue configuration is being changed /
8699	if (vsi->netdev->features & NETIF_F_NTUPLE) {
8700	struct ice_hw *hw = &pf->hw;
8701
8702	mutex_lock(&hw->fdir_fltr_lock);
8703	ice_fdir_del_all_fltrs(vsi);
8704	mutex_unlock(lock: &hw->fdir_fltr_lock);
8705	}
8706
8707	/ perform cleanup for channels if they exist /
8708	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
8709	struct ice_vsi *ch_vsi;
8710
8711	list_del(entry: &ch->list);
8712	ch_vsi = ch->ch_vsi;
8713	if (!ch_vsi) {
8714	kfree(objp: ch);
8715	continue;
8716	}
8717
8718	/ Reset queue contexts /
8719	for (i = `0`; i < ch->num_rxq; i++) {
8720	struct ice_tx_ring *tx_ring;
8721	struct ice_rx_ring *rx_ring;
8722
8723	tx_ring = vsi->tx_rings[ch->base_q + i];
8724	rx_ring = vsi->rx_rings[ch->base_q + i];
8725	if (tx_ring) {
8726	tx_ring->ch = NULL;
8727	if (tx_ring->q_vector)
8728	tx_ring->q_vector->ch = NULL;
8729	}
8730	if (rx_ring) {
8731	rx_ring->ch = NULL;
8732	if (rx_ring->q_vector)
8733	rx_ring->q_vector->ch = NULL;
8734	}
8735	}
8736
8737	/ Release FD resources for the channel VSI /
8738	ice_fdir_rem_adq_chnl(hw: &pf->hw, vsi_idx: ch->ch_vsi->idx);
8739
8740	/ clear the VSI from scheduler tree /
8741	ice_rm_vsi_lan_cfg(pi: ch->ch_vsi->port_info, vsi_handle: ch->ch_vsi->idx);
8742
8743	/ Delete VSI from FW, PF and HW VSI arrays /
8744	ice_vsi_delete(vsi: ch->ch_vsi);
8745
8746	/ free the channel /
8747	kfree(objp: ch);
8748	}
8749
8750	/ clear the channel VSI map which is stored in main VSI /
8751	ice_for_each_chnl_tc(i)
8752	vsi->tc_map_vsi[i] = NULL;
8753
8754	/ reset main VSI's all TC information /
8755	vsi->all_enatc = `0`;
8756	vsi->all_numtc = `0`;
8757	}
8758
8759	/**
8760	* ice_rebuild_channels - rebuild channel
8761	* @pf: ptr to PF
8762	*
8763	* Recreate channel VSIs and replay filters
8764	*/
8765	static int ice_rebuild_channels(struct ice_pf *pf)
8766	{
8767	struct device *dev = ice_pf_to_dev(pf);
8768	struct ice_vsi *main_vsi;
8769	bool rem_adv_fltr = true;
8770	struct ice_channel *ch;
8771	struct ice_vsi *vsi;
8772	int tc_idx = `1`;
8773	int i, err;
8774
8775	main_vsi = ice_get_main_vsi(pf);
8776	if (!main_vsi)
8777	return `0`;
8778
8779	if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) \|\|
8780	main_vsi->old_numtc == `1`)
8781	return `0`; / nothing to be done /
8782
8783	/ reconfigure main VSI based on old value of TC and cached values*
8784	* for MQPRIO opts
8785	*/
8786	err = ice_vsi_cfg_tc(vsi: main_vsi, ena_tc: main_vsi->old_ena_tc);
8787	if (err) {
8788	dev_err(dev, "failed configuring TC(ena_tc:0x%02x) for HW VSI=%u\n",
8789	main_vsi->old_ena_tc, main_vsi->vsi_num);
8790	return err;
8791	}
8792
8793	/ rebuild ADQ VSIs /
8794	ice_for_each_vsi(pf, i) {
8795	enum ice_vsi_type type;
8796
8797	vsi = pf->vsi[i];
8798	if (!vsi \|\| vsi->type != ICE_VSI_CHNL)
8799	continue;
8800
8801	type = vsi->type;
8802
8803	/ rebuild ADQ VSI /
8804	err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_INIT);
8805	if (err) {
8806	dev_err(dev, "VSI (type:%s) at index %d rebuild failed, err %d\n",
8807	ice_vsi_type_str(type), vsi->idx, err);
8808	goto cleanup;
8809	}
8810
8811	/ Re-map HW VSI number, using VSI handle that has been*
8812	* previously validated in ice_replay_vsi() call above
8813	*/
8814	vsi->vsi_num = ice_get_hw_vsi_num(hw: &pf->hw, vsi_handle: vsi->idx);
8815
8816	/ replay filters for the VSI /
8817	err = ice_replay_vsi(hw: &pf->hw, vsi_handle: vsi->idx);
8818	if (err) {
8819	dev_err(dev, "VSI (type:%s) replay failed, err %d, VSI index %d\n",
8820	ice_vsi_type_str(type), err, vsi->idx);
8821	rem_adv_fltr = false;
8822	goto cleanup;
8823	}
8824	dev_info(dev, "VSI (type:%s) at index %d rebuilt successfully\n",
8825	ice_vsi_type_str(type), vsi->idx);
8826
8827	/ store ADQ VSI at correct TC index in main VSI's*
8828	* map of TC to VSI
8829	*/
8830	main_vsi->tc_map_vsi[tc_idx++] = vsi;
8831	}
8832
8833	/ ADQ VSI(s) has been rebuilt successfully, so setup*
8834	* channel for main VSI's Tx and Rx rings
8835	*/
8836	list_for_each_entry(ch, &main_vsi->ch_list, list) {
8837	struct ice_vsi *ch_vsi;
8838
8839	ch_vsi = ch->ch_vsi;
8840	if (!ch_vsi)
8841	continue;
8842
8843	/ reconfig channel resources /
8844	ice_cfg_chnl_all_res(vsi: main_vsi, ch);
8845
8846	/ replay BW rate limit if it is non-zero /
8847	if (!ch->max_tx_rate && !ch->min_tx_rate)
8848	continue;
8849
8850	err = ice_set_bw_limit(vsi: ch_vsi, max_tx_rate: ch->max_tx_rate,
8851	min_tx_rate: ch->min_tx_rate);
8852	if (err)
8853	dev_err(dev, "failed (err:%d) to rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
8854	err, ch->max_tx_rate, ch->min_tx_rate,
8855	ch_vsi->vsi_num);
8856	else
8857	dev_dbg(dev, "successfully rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
8858	ch->max_tx_rate, ch->min_tx_rate,
8859	ch_vsi->vsi_num);
8860	}
8861
8862	/ reconfig RSS for main VSI /
8863	if (main_vsi->ch_rss_size)
8864	ice_vsi_cfg_rss_lut_key(vsi: main_vsi);
8865
8866	return `0`;
8867
8868	cleanup:
8869	ice_remove_q_channels(vsi: main_vsi, rem_fltr: rem_adv_fltr);
8870	return err;
8871	}
8872
8873	/**
8874	* ice_create_q_channels - Add queue channel for the given TCs
8875	* @vsi: VSI to be configured
8876	*
8877	* Configures queue channel mapping to the given TCs
8878	*/
8879	static int ice_create_q_channels(struct ice_vsi *vsi)
8880	{
8881	struct ice_pf *pf = vsi->back;
8882	struct ice_channel *ch;
8883	int ret = `0`, i;
8884
8885	ice_for_each_chnl_tc(i) {
8886	if (!(vsi->all_enatc & BIT(i)))
8887	continue;
8888
8889	ch = kzalloc(size: sizeof(*ch), GFP_KERNEL);
8890	if (!ch) {
8891	ret = -ENOMEM;
8892	goto err_free;
8893	}
8894	INIT_LIST_HEAD(list: &ch->list);
8895	ch->num_rxq = vsi->mqprio_qopt.qopt.count[i];
8896	ch->num_txq = vsi->mqprio_qopt.qopt.count[i];
8897	ch->base_q = vsi->mqprio_qopt.qopt.offset[i];
8898	ch->max_tx_rate = vsi->mqprio_qopt.max_rate[i];
8899	ch->min_tx_rate = vsi->mqprio_qopt.min_rate[i];
8900
8901	/ convert to Kbits/s /
8902	if (ch->max_tx_rate)
8903	ch->max_tx_rate = div_u64(dividend: ch->max_tx_rate,
8904	ICE_BW_KBPS_DIVISOR);
8905	if (ch->min_tx_rate)
8906	ch->min_tx_rate = div_u64(dividend: ch->min_tx_rate,
8907	ICE_BW_KBPS_DIVISOR);
8908
8909	ret = ice_create_q_channel(vsi, ch);
8910	if (ret) {
8911	dev_err(ice_pf_to_dev(pf),
8912	"failed creating channel TC:%d\n", i);
8913	kfree(objp: ch);
8914	goto err_free;
8915	}
8916	list_add_tail(new: &ch->list, head: &vsi->ch_list);
8917	vsi->tc_map_vsi[i] = ch->ch_vsi;
8918	dev_dbg(ice_pf_to_dev(pf),
8919	"successfully created channel: VSI %pK\n", ch->ch_vsi);
8920	}
8921	return `0`;
8922
8923	err_free:
8924	ice_remove_q_channels(vsi, rem_fltr: false);
8925
8926	return ret;
8927	}
8928
8929	/**
8930	* ice_setup_tc_mqprio_qdisc - configure multiple traffic classes
8931	* @netdev: net device to configure
8932	* @type_data: TC offload data
8933	*/
8934	static int ice_setup_tc_mqprio_qdisc(struct net_device netdev, void* *type_data)
8935	{
8936	struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
8937	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
8938	struct ice_vsi *vsi = np->vsi;
8939	struct ice_pf *pf = vsi->back;
8940	u16 mode, ena_tc_qdisc = `0`;
8941	int cur_txq, cur_rxq;
8942	u8 hw = `0`, num_tcf;
8943	struct device *dev;
8944	int ret, i;
8945
8946	dev = ice_pf_to_dev(pf);
8947	num_tcf = mqprio_qopt->qopt.num_tc;
8948	hw = mqprio_qopt->qopt.hw;
8949	mode = mqprio_qopt->mode;
8950	if (!hw) {
8951	clear_bit(nr: ICE_FLAG_TC_MQPRIO, addr: pf->flags);
8952	vsi->ch_rss_size = `0`;
8953	memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
8954	goto config_tcf;
8955	}
8956
8957	/ Generate queue region map for number of TCF requested /
8958	for (i = `0`; i < num_tcf; i++)
8959	ena_tc_qdisc \|= BIT(i);
8960
8961	switch (mode) {
8962	case TC_MQPRIO_MODE_CHANNEL:
8963
8964	if (pf->hw.port_info->is_custom_tx_enabled) {
8965	dev_err(dev, "Custom Tx scheduler feature enabled, can't configure ADQ\n");
8966	return -EBUSY;
8967	}
8968	ice_tear_down_devlink_rate_tree(pf);
8969
8970	ret = ice_validate_mqprio_qopt(vsi, mqprio_qopt);
8971	if (ret) {
8972	netdev_err(dev: netdev, format: "failed to validate_mqprio_qopt(), ret %d\n",
8973	ret);
8974	return ret;
8975	}
8976	memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
8977	set_bit(nr: ICE_FLAG_TC_MQPRIO, addr: pf->flags);
8978	/ don't assume state of hw_tc_offload during driver load*
8979	* and set the flag for TC flower filter if hw_tc_offload
8980	* already ON
8981	*/
8982	if (vsi->netdev->features & NETIF_F_HW_TC)
8983	set_bit(nr: ICE_FLAG_CLS_FLOWER, addr: pf->flags);
8984	break;
8985	default:
8986	return -EINVAL;
8987	}
8988
8989	config_tcf:
8990
8991	/ Requesting same TCF configuration as already enabled /
8992	if (ena_tc_qdisc == vsi->tc_cfg.ena_tc &&
8993	mode != TC_MQPRIO_MODE_CHANNEL)
8994	return `0`;
8995
8996	/ Pause VSI queues /
8997	ice_dis_vsi(vsi, locked: true);
8998
8999	if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
9000	ice_remove_q_channels(vsi, rem_fltr: true);
9001
9002	if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
9003	vsi->req_txq = min_t(int, ice_get_avail_txq_count(pf),
9004	num_online_cpus());
9005	vsi->req_rxq = min_t(int, ice_get_avail_rxq_count(pf),
9006	num_online_cpus());
9007	} else {
9008	/ logic to rebuild VSI, same like ethtool -L /
9009	u16 offset = `0`, qcount_tx = `0`, qcount_rx = `0`;
9010
9011	for (i = `0`; i < num_tcf; i++) {
9012	if (!(ena_tc_qdisc & BIT(i)))
9013	continue;
9014
9015	offset = vsi->mqprio_qopt.qopt.offset[i];
9016	qcount_rx = vsi->mqprio_qopt.qopt.count[i];
9017	qcount_tx = vsi->mqprio_qopt.qopt.count[i];
9018	}
9019	vsi->req_txq = offset + qcount_tx;
9020	vsi->req_rxq = offset + qcount_rx;
9021
9022	/ store away original rss_size info, so that it gets reused*
9023	* form ice_vsi_rebuild during tc-qdisc delete stage - to
9024	* determine, what should be the rss_sizefor main VSI
9025	*/
9026	vsi->orig_rss_size = vsi->rss_size;
9027	}
9028
9029	/ save current values of Tx and Rx queues before calling VSI rebuild*
9030	* for fallback option
9031	*/
9032	cur_txq = vsi->num_txq;
9033	cur_rxq = vsi->num_rxq;
9034
9035	/ proceed with rebuild main VSI using correct number of queues /
9036	ret = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);
9037	if (ret) {
9038	/ fallback to current number of queues /
9039	dev_info(dev, "Rebuild failed with new queues, try with current number of queues\n");
9040	vsi->req_txq = cur_txq;
9041	vsi->req_rxq = cur_rxq;
9042	clear_bit(nr: ICE_RESET_FAILED, addr: pf->state);
9043	if (ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT)) {
9044	dev_err(dev, "Rebuild of main VSI failed again\n");
9045	return ret;
9046	}
9047	}
9048
9049	vsi->all_numtc = num_tcf;
9050	vsi->all_enatc = ena_tc_qdisc;
9051	ret = ice_vsi_cfg_tc(vsi, ena_tc: ena_tc_qdisc);
9052	if (ret) {
9053	netdev_err(dev: netdev, format: "failed configuring TC for VSI id=%d\n",
9054	vsi->vsi_num);
9055	goto exit;
9056	}
9057
9058	if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
9059	u64 max_tx_rate = vsi->mqprio_qopt.max_rate[`0`];
9060	u64 min_tx_rate = vsi->mqprio_qopt.min_rate[`0`];
9061
9062	/ set TC0 rate limit if specified /
9063	if (max_tx_rate \|\| min_tx_rate) {
9064	/ convert to Kbits/s /
9065	if (max_tx_rate)
9066	max_tx_rate = div_u64(dividend: max_tx_rate, ICE_BW_KBPS_DIVISOR);
9067	if (min_tx_rate)
9068	min_tx_rate = div_u64(dividend: min_tx_rate, ICE_BW_KBPS_DIVISOR);
9069
9070	ret = ice_set_bw_limit(vsi, max_tx_rate, min_tx_rate);
9071	if (!ret) {
9072	dev_dbg(dev, "set Tx rate max %llu min %llu for VSI(%u)\n",
9073	max_tx_rate, min_tx_rate, vsi->vsi_num);
9074	} else {
9075	dev_err(dev, "failed to set Tx rate max %llu min %llu for VSI(%u)\n",
9076	max_tx_rate, min_tx_rate, vsi->vsi_num);
9077	goto exit;
9078	}
9079	}
9080	ret = ice_create_q_channels(vsi);
9081	if (ret) {
9082	netdev_err(dev: netdev, format: "failed configuring queue channels\n");
9083	goto exit;
9084	} else {
9085	netdev_dbg(netdev, "successfully configured channels\n");
9086	}
9087	}
9088
9089	if (vsi->ch_rss_size)
9090	ice_vsi_cfg_rss_lut_key(vsi);
9091
9092	exit:
9093	/ if error, reset the all_numtc and all_enatc /
9094	if (ret) {
9095	vsi->all_numtc = `0`;
9096	vsi->all_enatc = `0`;
9097	}
9098	/ resume VSI /
9099	ice_ena_vsi(vsi, locked: true);
9100
9101	return ret;
9102	}
9103
9104	static LIST_HEAD(ice_block_cb_list);
9105
9106	static int
9107	ice_setup_tc(struct net_device netdev, enum* tc_setup_type type,
9108	void *type_data)
9109	{
9110	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
9111	struct ice_pf *pf = np->vsi->back;
9112	bool locked = false;
9113	int err;
9114
9115	switch (type) {
9116	case TC_SETUP_BLOCK:
9117	return flow_block_cb_setup_simple(f: type_data,
9118	driver_list: &ice_block_cb_list,
9119	cb: ice_setup_tc_block_cb,
9120	cb_ident: np, cb_priv: np, ingress_only: true);
9121	case TC_SETUP_QDISC_MQPRIO:
9122	if (ice_is_eswitch_mode_switchdev(pf)) {
9123	netdev_err(dev: netdev, format: "TC MQPRIO offload not supported, switchdev is enabled\n");
9124	return -EOPNOTSUPP;
9125	}
9126
9127	if (pf->adev) {
9128	mutex_lock(&pf->adev_mutex);
9129	device_lock(dev: &pf->adev->dev);
9130	locked = true;
9131	if (pf->adev->dev.driver) {
9132	netdev_err(dev: netdev, format: "Cannot change qdisc when RDMA is active\n");
9133	err = -EBUSY;
9134	goto adev_unlock;
9135	}
9136	}
9137
9138	/ setup traffic classifier for receive side /
9139	mutex_lock(&pf->tc_mutex);
9140	err = ice_setup_tc_mqprio_qdisc(netdev, type_data);
9141	mutex_unlock(lock: &pf->tc_mutex);
9142
9143	adev_unlock:
9144	if (locked) {
9145	device_unlock(dev: &pf->adev->dev);
9146	mutex_unlock(lock: &pf->adev_mutex);
9147	}
9148	return err;
9149	default:
9150	return -EOPNOTSUPP;
9151	}
9152	return -EOPNOTSUPP;
9153	}
9154
9155	static struct ice_indr_block_priv *
9156	ice_indr_block_priv_lookup(struct ice_netdev_priv *np,
9157	struct net_device *netdev)
9158	{
9159	struct ice_indr_block_priv *cb_priv;
9160
9161	list_for_each_entry(cb_priv, &np->tc_indr_block_priv_list, list) {
9162	if (!cb_priv->netdev)
9163	return NULL;
9164	if (cb_priv->netdev == netdev)
9165	return cb_priv;
9166	}
9167	return NULL;
9168	}
9169
9170	static int
9171	ice_indr_setup_block_cb(enum tc_setup_type type, void *type_data,
9172	void *indr_priv)
9173	{
9174	struct ice_indr_block_priv *priv = indr_priv;
9175	struct ice_netdev_priv *np = priv->np;
9176
9177	switch (type) {
9178	case TC_SETUP_CLSFLOWER:
9179	return ice_setup_tc_cls_flower(np, filter_dev: priv->netdev,
9180	cls_flower: (struct flow_cls_offload *)
9181	type_data);
9182	default:
9183	return -EOPNOTSUPP;
9184	}
9185	}
9186
9187	static int
9188	ice_indr_setup_tc_block(struct net_device netdev, struct* Qdisc *sch,
9189	struct ice_netdev_priv *np,
9190	struct flow_block_offload f, void* *data,
9191	void (cleanup)(struct* flow_block_cb *block_cb))
9192	{
9193	struct ice_indr_block_priv *indr_priv;
9194	struct flow_block_cb *block_cb;
9195
9196	if (!ice_is_tunnel_supported(dev: netdev) &&
9197	!(is_vlan_dev(dev: netdev) &&
9198	vlan_dev_real_dev(dev: netdev) == np->vsi->netdev))
9199	return -EOPNOTSUPP;
9200
9201	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
9202	return -EOPNOTSUPP;
9203
9204	switch (f->command) {
9205	case FLOW_BLOCK_BIND:
9206	indr_priv = ice_indr_block_priv_lookup(np, netdev);
9207	if (indr_priv)
9208	return -EEXIST;
9209
9210	indr_priv = kzalloc(size: sizeof(*indr_priv), GFP_KERNEL);
9211	if (!indr_priv)
9212	return -ENOMEM;
9213
9214	indr_priv->netdev = netdev;
9215	indr_priv->np = np;
9216	list_add(new: &indr_priv->list, head: &np->tc_indr_block_priv_list);
9217
9218	block_cb =
9219	flow_indr_block_cb_alloc(cb: ice_indr_setup_block_cb,
9220	cb_ident: indr_priv, cb_priv: indr_priv,
9221	release: ice_rep_indr_tc_block_unbind,
9222	bo: f, dev: netdev, sch, data, indr_cb_priv: np,
9223	cleanup);
9224
9225	if (IS_ERR(ptr: block_cb)) {
9226	list_del(entry: &indr_priv->list);
9227	kfree(objp: indr_priv);
9228	return PTR_ERR(ptr: block_cb);
9229	}
9230	flow_block_cb_add(block_cb, offload: f);
9231	list_add_tail(new: &block_cb->driver_list, head: &ice_block_cb_list);
9232	break;
9233	case FLOW_BLOCK_UNBIND:
9234	indr_priv = ice_indr_block_priv_lookup(np, netdev);
9235	if (!indr_priv)
9236	return -ENOENT;
9237
9238	block_cb = flow_block_cb_lookup(block: f->block,
9239	cb: ice_indr_setup_block_cb,
9240	cb_ident: indr_priv);
9241	if (!block_cb)
9242	return -ENOENT;
9243
9244	flow_indr_block_cb_remove(block_cb, offload: f);
9245
9246	list_del(entry: &block_cb->driver_list);
9247	break;
9248	default:
9249	return -EOPNOTSUPP;
9250	}
9251	return `0`;
9252	}
9253
9254	static int
9255	ice_indr_setup_tc_cb(struct net_device netdev, struct* Qdisc *sch,
9256	void cb_priv, enum* tc_setup_type type, void *type_data,
9257	void *data,
9258	void (cleanup)(struct* flow_block_cb *block_cb))
9259	{
9260	switch (type) {
9261	case TC_SETUP_BLOCK:
9262	return ice_indr_setup_tc_block(netdev, sch, np: cb_priv, f: type_data,
9263	data, cleanup);
9264
9265	default:
9266	return -EOPNOTSUPP;
9267	}
9268	}
9269
9270	/**
9271	* ice_open - Called when a network interface becomes active
9272	* @netdev: network interface device structure
9273	*
9274	* The open entry point is called when a network interface is made
9275	* active by the system (IFF_UP). At this point all resources needed
9276	* for transmit and receive operations are allocated, the interrupt
9277	* handler is registered with the OS, the netdev watchdog is enabled,
9278	* and the stack is notified that the interface is ready.
9279	*
9280	* Returns 0 on success, negative value on failure
9281	*/
9282	int ice_open(struct net_device *netdev)
9283	{
9284	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
9285	struct ice_pf *pf = np->vsi->back;
9286
9287	if (ice_is_reset_in_progress(state: pf->state)) {
9288	netdev_err(dev: netdev, format: "can't open net device while reset is in progress");
9289	return -EBUSY;
9290	}
9291
9292	return ice_open_internal(netdev);
9293	}
9294
9295	/**
9296	* ice_open_internal - Called when a network interface becomes active
9297	* @netdev: network interface device structure
9298	*
9299	* Internal ice_open implementation. Should not be used directly except for ice_open and reset
9300	* handling routine
9301	*
9302	* Returns 0 on success, negative value on failure
9303	*/
9304	int ice_open_internal(struct net_device *netdev)
9305	{
9306	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
9307	struct ice_vsi *vsi = np->vsi;
9308	struct ice_pf *pf = vsi->back;
9309	struct ice_port_info *pi;
9310	int err;
9311
9312	if (test_bit(ICE_NEEDS_RESTART, pf->state)) {
9313	netdev_err(dev: netdev, format: "driver needs to be unloaded and reloaded\n");
9314	return -EIO;
9315	}
9316
9317	netif_carrier_off(dev: netdev);
9318
9319	pi = vsi->port_info;
9320	err = ice_update_link_info(pi);
9321	if (err) {
9322	netdev_err(dev: netdev, format: "Failed to get link info, error %d\n", err);
9323	return err;
9324	}
9325
9326	ice_check_link_cfg_err(pf, link_cfg_err: pi->phy.link_info.link_cfg_err);
9327
9328	/ Set PHY if there is media, otherwise, turn off PHY /
9329	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
9330	clear_bit(nr: ICE_FLAG_NO_MEDIA, addr: pf->flags);
9331	if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) {
9332	err = ice_init_phy_user_cfg(pi);
9333	if (err) {
9334	netdev_err(dev: netdev, format: "Failed to initialize PHY settings, error %d\n",
9335	err);
9336	return err;
9337	}
9338	}
9339
9340	err = ice_configure_phy(vsi);
9341	if (err) {
9342	netdev_err(dev: netdev, format: "Failed to set physical link up, error %d\n",
9343	err);
9344	return err;
9345	}
9346	} else {
9347	set_bit(nr: ICE_FLAG_NO_MEDIA, addr: pf->flags);
9348	ice_set_link(vsi, ena: false);
9349	}
9350
9351	err = ice_vsi_open(vsi);
9352	if (err)
9353	netdev_err(dev: netdev, format: "Failed to open VSI 0x%04X on switch 0x%04X\n",
9354	vsi->vsi_num, vsi->vsw->sw_id);
9355
9356	/ Update existing tunnels information /
9357	udp_tunnel_get_rx_info(dev: netdev);
9358
9359	return err;
9360	}
9361
9362	/**
9363	* ice_stop - Disables a network interface
9364	* @netdev: network interface device structure
9365	*
9366	* The stop entry point is called when an interface is de-activated by the OS,
9367	* and the netdevice enters the DOWN state. The hardware is still under the
9368	* driver's control, but the netdev interface is disabled.
9369	*
9370	* Returns success only - not allowed to fail
9371	*/
9372	int ice_stop(struct net_device *netdev)
9373	{
9374	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
9375	struct ice_vsi *vsi = np->vsi;
9376	struct ice_pf *pf = vsi->back;
9377
9378	if (ice_is_reset_in_progress(state: pf->state)) {
9379	netdev_err(dev: netdev, format: "can't stop net device while reset is in progress");
9380	return -EBUSY;
9381	}
9382
9383	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) {
9384	int link_err = ice_force_phys_link_state(vsi, link_up: false);
9385
9386	if (link_err) {
9387	if (link_err == -ENOMEDIUM)
9388	netdev_info(dev: vsi->netdev, format: "Skipping link reconfig - no media attached, VSI %d\n",
9389	vsi->vsi_num);
9390	else
9391	netdev_err(dev: vsi->netdev, format: "Failed to set physical link down, VSI %d error %d\n",
9392	vsi->vsi_num, link_err);
9393
9394	ice_vsi_close(vsi);
9395	return -EIO;
9396	}
9397	}
9398
9399	ice_vsi_close(vsi);
9400
9401	return `0`;
9402	}
9403
9404	/**
9405	* ice_features_check - Validate encapsulated packet conforms to limits
9406	* @skb: skb buffer
9407	* @netdev: This port's netdev
9408	* @features: Offload features that the stack believes apply
9409	*/
9410	static netdev_features_t
9411	ice_features_check(struct sk_buff *skb,
9412	struct net_device __always_unused *netdev,
9413	netdev_features_t features)
9414	{
9415	bool gso = skb_is_gso(skb);
9416	size_t len;
9417
9418	/ No point in doing any of this if neither checksum nor GSO are*
9419	* being requested for this frame. We can rule out both by just
9420	* checking for CHECKSUM_PARTIAL
9421	*/
9422	if (skb->ip_summed != CHECKSUM_PARTIAL)
9423	return features;
9424
9425	/ We cannot support GSO if the MSS is going to be less than*
9426	* 64 bytes. If it is then we need to drop support for GSO.
9427	*/
9428	if (gso && (skb_shinfo(skb)->gso_size < ICE_TXD_CTX_MIN_MSS))
9429	features &= ~NETIF_F_GSO_MASK;
9430
9431	len = skb_network_offset(skb);
9432	if (len > ICE_TXD_MACLEN_MAX \|\| len & `0x1`)
9433	goto out_rm_features;
9434
9435	len = skb_network_header_len(skb);
9436	if (len > ICE_TXD_IPLEN_MAX \|\| len & `0x1`)
9437	goto out_rm_features;
9438
9439	if (skb->encapsulation) {
9440	/ this must work for VXLAN frames AND IPIP/SIT frames, and in*
9441	* the case of IPIP frames, the transport header pointer is
9442	* after the inner header! So check to make sure that this
9443	* is a GRE or UDP_TUNNEL frame before doing that math.
9444	*/
9445	if (gso && (skb_shinfo(skb)->gso_type &
9446	(SKB_GSO_GRE \| SKB_GSO_UDP_TUNNEL))) {
9447	len = skb_inner_network_header(skb) -
9448	skb_transport_header(skb);
9449	if (len > ICE_TXD_L4LEN_MAX \|\| len & `0x1`)
9450	goto out_rm_features;
9451	}
9452
9453	len = skb_inner_network_header_len(skb);
9454	if (len > ICE_TXD_IPLEN_MAX \|\| len & `0x1`)
9455	goto out_rm_features;
9456	}
9457
9458	return features;
9459	out_rm_features:
9460	return features & ~(NETIF_F_CSUM_MASK \| NETIF_F_GSO_MASK);
9461	}
9462
9463	static const struct net_device_ops ice_netdev_safe_mode_ops = {
9464	.ndo_open = ice_open,
9465	.ndo_stop = ice_stop,
9466	.ndo_start_xmit = ice_start_xmit,
9467	.ndo_set_mac_address = ice_set_mac_address,
9468	.ndo_validate_addr = eth_validate_addr,
9469	.ndo_change_mtu = ice_change_mtu,
9470	.ndo_get_stats64 = ice_get_stats64,
9471	.ndo_tx_timeout = ice_tx_timeout,
9472	.ndo_bpf = ice_xdp_safe_mode,
9473	};
9474
9475	static const struct net_device_ops ice_netdev_ops = {
9476	.ndo_open = ice_open,
9477	.ndo_stop = ice_stop,
9478	.ndo_start_xmit = ice_start_xmit,
9479	.ndo_select_queue = ice_select_queue,
9480	.ndo_features_check = ice_features_check,
9481	.ndo_fix_features = ice_fix_features,
9482	.ndo_set_rx_mode = ice_set_rx_mode,
9483	.ndo_set_mac_address = ice_set_mac_address,
9484	.ndo_validate_addr = eth_validate_addr,
9485	.ndo_change_mtu = ice_change_mtu,
9486	.ndo_get_stats64 = ice_get_stats64,
9487	.ndo_set_tx_maxrate = ice_set_tx_maxrate,
9488	.ndo_eth_ioctl = ice_eth_ioctl,
9489	.ndo_set_vf_spoofchk = ice_set_vf_spoofchk,
9490	.ndo_set_vf_mac = ice_set_vf_mac,
9491	.ndo_get_vf_config = ice_get_vf_cfg,
9492	.ndo_set_vf_trust = ice_set_vf_trust,
9493	.ndo_set_vf_vlan = ice_set_vf_port_vlan,
9494	.ndo_set_vf_link_state = ice_set_vf_link_state,
9495	.ndo_get_vf_stats = ice_get_vf_stats,
9496	.ndo_set_vf_rate = ice_set_vf_bw,
9497	.ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
9498	.ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
9499	.ndo_setup_tc = ice_setup_tc,
9500	.ndo_set_features = ice_set_features,
9501	.ndo_bridge_getlink = ice_bridge_getlink,
9502	.ndo_bridge_setlink = ice_bridge_setlink,
9503	.ndo_fdb_add = ice_fdb_add,
9504	.ndo_fdb_del = ice_fdb_del,
9505	#ifdef CONFIG_RFS_ACCEL
9506	.ndo_rx_flow_steer = ice_rx_flow_steer,
9507	#endif
9508	.ndo_tx_timeout = ice_tx_timeout,
9509	.ndo_bpf = ice_xdp,
9510	.ndo_xdp_xmit = ice_xdp_xmit,
9511	.ndo_xsk_wakeup = ice_xsk_wakeup,
9512	};
9513

source code of linux/drivers/net/ethernet/intel/ice/ice_main.c