1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /**************************************************************************** |
3 | * Driver for Solarflare network controllers and boards |
4 | * Copyright 2018 Solarflare Communications Inc. |
5 | * |
6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms of the GNU General Public License version 2 as published |
8 | * by the Free Software Foundation, incorporated herein by reference. |
9 | */ |
10 | |
11 | #include "net_driver.h" |
12 | #include <linux/filter.h> |
13 | #include <linux/module.h> |
14 | #include <linux/netdevice.h> |
15 | #include <net/gre.h> |
16 | #include "efx_common.h" |
17 | #include "efx_channels.h" |
18 | #include "efx.h" |
19 | #include "mcdi.h" |
20 | #include "selftest.h" |
21 | #include "rx_common.h" |
22 | #include "tx_common.h" |
23 | #include "nic.h" |
24 | #include "mcdi_port_common.h" |
25 | #include "io.h" |
26 | #include "mcdi_pcol.h" |
27 | #include "ef100_rep.h" |
28 | |
29 | static unsigned int debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE | |
30 | NETIF_MSG_LINK | NETIF_MSG_IFDOWN | |
31 | NETIF_MSG_IFUP | NETIF_MSG_RX_ERR | |
32 | NETIF_MSG_TX_ERR | NETIF_MSG_HW); |
33 | module_param(debug, uint, 0); |
34 | MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value" ); |
35 | |
36 | /* This is the time (in jiffies) between invocations of the hardware |
37 | * monitor. |
38 | */ |
39 | static unsigned int efx_monitor_interval = 1 * HZ; |
40 | |
41 | /* How often and how many times to poll for a reset while waiting for a |
42 | * BIST that another function started to complete. |
43 | */ |
44 | #define BIST_WAIT_DELAY_MS 100 |
45 | #define BIST_WAIT_DELAY_COUNT 100 |
46 | |
47 | /* Default stats update time */ |
48 | #define STATS_PERIOD_MS_DEFAULT 1000 |
49 | |
50 | static const unsigned int efx_reset_type_max = RESET_TYPE_MAX; |
51 | static const char *const efx_reset_type_names[] = { |
52 | [RESET_TYPE_INVISIBLE] = "INVISIBLE" , |
53 | [RESET_TYPE_ALL] = "ALL" , |
54 | [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL" , |
55 | [RESET_TYPE_WORLD] = "WORLD" , |
56 | [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE" , |
57 | [RESET_TYPE_DATAPATH] = "DATAPATH" , |
58 | [RESET_TYPE_MC_BIST] = "MC_BIST" , |
59 | [RESET_TYPE_DISABLE] = "DISABLE" , |
60 | [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG" , |
61 | [RESET_TYPE_INT_ERROR] = "INT_ERROR" , |
62 | [RESET_TYPE_DMA_ERROR] = "DMA_ERROR" , |
63 | [RESET_TYPE_TX_SKIP] = "TX_SKIP" , |
64 | [RESET_TYPE_MC_FAILURE] = "MC_FAILURE" , |
65 | [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)" , |
66 | }; |
67 | |
68 | #define RESET_TYPE(type) \ |
69 | STRING_TABLE_LOOKUP(type, efx_reset_type) |
70 | |
71 | /* Loopback mode names (see LOOPBACK_MODE()) */ |
72 | const unsigned int efx_loopback_mode_max = LOOPBACK_MAX; |
73 | const char *const efx_loopback_mode_names[] = { |
74 | [LOOPBACK_NONE] = "NONE" , |
75 | [LOOPBACK_DATA] = "DATAPATH" , |
76 | [LOOPBACK_GMAC] = "GMAC" , |
77 | [LOOPBACK_XGMII] = "XGMII" , |
78 | [LOOPBACK_XGXS] = "XGXS" , |
79 | [LOOPBACK_XAUI] = "XAUI" , |
80 | [LOOPBACK_GMII] = "GMII" , |
81 | [LOOPBACK_SGMII] = "SGMII" , |
82 | [LOOPBACK_XGBR] = "XGBR" , |
83 | [LOOPBACK_XFI] = "XFI" , |
84 | [LOOPBACK_XAUI_FAR] = "XAUI_FAR" , |
85 | [LOOPBACK_GMII_FAR] = "GMII_FAR" , |
86 | [LOOPBACK_SGMII_FAR] = "SGMII_FAR" , |
87 | [LOOPBACK_XFI_FAR] = "XFI_FAR" , |
88 | [LOOPBACK_GPHY] = "GPHY" , |
89 | [LOOPBACK_PHYXS] = "PHYXS" , |
90 | [LOOPBACK_PCS] = "PCS" , |
91 | [LOOPBACK_PMAPMD] = "PMA/PMD" , |
92 | [LOOPBACK_XPORT] = "XPORT" , |
93 | [LOOPBACK_XGMII_WS] = "XGMII_WS" , |
94 | [LOOPBACK_XAUI_WS] = "XAUI_WS" , |
95 | [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR" , |
96 | [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR" , |
97 | [LOOPBACK_GMII_WS] = "GMII_WS" , |
98 | [LOOPBACK_XFI_WS] = "XFI_WS" , |
99 | [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR" , |
100 | [LOOPBACK_PHYXS_WS] = "PHYXS_WS" , |
101 | }; |
102 | |
103 | /* Reset workqueue. If any NIC has a hardware failure then a reset will be |
104 | * queued onto this work queue. This is not a per-nic work queue, because |
105 | * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. |
106 | */ |
107 | static struct workqueue_struct *reset_workqueue; |
108 | |
109 | int efx_create_reset_workqueue(void) |
110 | { |
111 | reset_workqueue = create_singlethread_workqueue("sfc_reset" ); |
112 | if (!reset_workqueue) { |
113 | printk(KERN_ERR "Failed to create reset workqueue\n" ); |
114 | return -ENOMEM; |
115 | } |
116 | |
117 | return 0; |
118 | } |
119 | |
120 | void efx_queue_reset_work(struct efx_nic *efx) |
121 | { |
122 | queue_work(wq: reset_workqueue, work: &efx->reset_work); |
123 | } |
124 | |
125 | void efx_flush_reset_workqueue(struct efx_nic *efx) |
126 | { |
127 | cancel_work_sync(work: &efx->reset_work); |
128 | } |
129 | |
130 | void efx_destroy_reset_workqueue(void) |
131 | { |
132 | if (reset_workqueue) { |
133 | destroy_workqueue(wq: reset_workqueue); |
134 | reset_workqueue = NULL; |
135 | } |
136 | } |
137 | |
138 | /* We assume that efx->type->reconfigure_mac will always try to sync RX |
139 | * filters and therefore needs to read-lock the filter table against freeing |
140 | */ |
141 | void efx_mac_reconfigure(struct efx_nic *efx, bool mtu_only) |
142 | { |
143 | if (efx->type->reconfigure_mac) { |
144 | down_read(sem: &efx->filter_sem); |
145 | efx->type->reconfigure_mac(efx, mtu_only); |
146 | up_read(sem: &efx->filter_sem); |
147 | } |
148 | } |
149 | |
150 | /* Asynchronous work item for changing MAC promiscuity and multicast |
151 | * hash. Avoid a drain/rx_ingress enable by reconfiguring the current |
152 | * MAC directly. |
153 | */ |
154 | static void efx_mac_work(struct work_struct *data) |
155 | { |
156 | struct efx_nic *efx = container_of(data, struct efx_nic, mac_work); |
157 | |
158 | mutex_lock(&efx->mac_lock); |
159 | if (efx->port_enabled) |
160 | efx_mac_reconfigure(efx, mtu_only: false); |
161 | mutex_unlock(lock: &efx->mac_lock); |
162 | } |
163 | |
164 | int efx_set_mac_address(struct net_device *net_dev, void *data) |
165 | { |
166 | struct efx_nic *efx = efx_netdev_priv(dev: net_dev); |
167 | struct sockaddr *addr = data; |
168 | u8 *new_addr = addr->sa_data; |
169 | u8 old_addr[6]; |
170 | int rc; |
171 | |
172 | if (!is_valid_ether_addr(addr: new_addr)) { |
173 | netif_err(efx, drv, efx->net_dev, |
174 | "invalid ethernet MAC address requested: %pM\n" , |
175 | new_addr); |
176 | return -EADDRNOTAVAIL; |
177 | } |
178 | |
179 | /* save old address */ |
180 | ether_addr_copy(dst: old_addr, src: net_dev->dev_addr); |
181 | eth_hw_addr_set(dev: net_dev, addr: new_addr); |
182 | if (efx->type->set_mac_address) { |
183 | rc = efx->type->set_mac_address(efx); |
184 | if (rc) { |
185 | eth_hw_addr_set(dev: net_dev, addr: old_addr); |
186 | return rc; |
187 | } |
188 | } |
189 | |
190 | /* Reconfigure the MAC */ |
191 | mutex_lock(&efx->mac_lock); |
192 | efx_mac_reconfigure(efx, mtu_only: false); |
193 | mutex_unlock(lock: &efx->mac_lock); |
194 | |
195 | return 0; |
196 | } |
197 | |
198 | /* Context: netif_addr_lock held, BHs disabled. */ |
199 | void efx_set_rx_mode(struct net_device *net_dev) |
200 | { |
201 | struct efx_nic *efx = efx_netdev_priv(dev: net_dev); |
202 | |
203 | if (efx->port_enabled) |
204 | queue_work(wq: efx->workqueue, work: &efx->mac_work); |
205 | /* Otherwise efx_start_port() will do this */ |
206 | } |
207 | |
208 | int efx_set_features(struct net_device *net_dev, netdev_features_t data) |
209 | { |
210 | struct efx_nic *efx = efx_netdev_priv(dev: net_dev); |
211 | int rc; |
212 | |
213 | /* If disabling RX n-tuple filtering, clear existing filters */ |
214 | if (net_dev->features & ~data & NETIF_F_NTUPLE) { |
215 | rc = efx->type->filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL); |
216 | if (rc) |
217 | return rc; |
218 | } |
219 | |
220 | /* If Rx VLAN filter is changed, update filters via mac_reconfigure. |
221 | * If rx-fcs is changed, mac_reconfigure updates that too. |
222 | */ |
223 | if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER | |
224 | NETIF_F_RXFCS)) { |
225 | /* efx_set_rx_mode() will schedule MAC work to update filters |
226 | * when a new features are finally set in net_dev. |
227 | */ |
228 | efx_set_rx_mode(net_dev); |
229 | } |
230 | |
231 | return 0; |
232 | } |
233 | |
234 | /* This ensures that the kernel is kept informed (via |
235 | * netif_carrier_on/off) of the link status, and also maintains the |
236 | * link status's stop on the port's TX queue. |
237 | */ |
238 | void efx_link_status_changed(struct efx_nic *efx) |
239 | { |
240 | struct efx_link_state *link_state = &efx->link_state; |
241 | |
242 | /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure |
243 | * that no events are triggered between unregister_netdev() and the |
244 | * driver unloading. A more general condition is that NETDEV_CHANGE |
245 | * can only be generated between NETDEV_UP and NETDEV_DOWN |
246 | */ |
247 | if (!netif_running(dev: efx->net_dev)) |
248 | return; |
249 | |
250 | if (link_state->up != netif_carrier_ok(dev: efx->net_dev)) { |
251 | efx->n_link_state_changes++; |
252 | |
253 | if (link_state->up) |
254 | netif_carrier_on(dev: efx->net_dev); |
255 | else |
256 | netif_carrier_off(dev: efx->net_dev); |
257 | } |
258 | |
259 | /* Status message for kernel log */ |
260 | if (link_state->up) |
261 | netif_info(efx, link, efx->net_dev, |
262 | "link up at %uMbps %s-duplex (MTU %d)\n" , |
263 | link_state->speed, link_state->fd ? "full" : "half" , |
264 | efx->net_dev->mtu); |
265 | else |
266 | netif_info(efx, link, efx->net_dev, "link down\n" ); |
267 | } |
268 | |
269 | unsigned int efx_xdp_max_mtu(struct efx_nic *efx) |
270 | { |
271 | /* The maximum MTU that we can fit in a single page, allowing for |
272 | * framing, overhead and XDP headroom + tailroom. |
273 | */ |
274 | int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) + |
275 | efx->rx_prefix_size + efx->type->rx_buffer_padding + |
276 | efx->rx_ip_align + EFX_XDP_HEADROOM + EFX_XDP_TAILROOM; |
277 | |
278 | return PAGE_SIZE - overhead; |
279 | } |
280 | |
281 | /* Context: process, rtnl_lock() held. */ |
282 | int efx_change_mtu(struct net_device *net_dev, int new_mtu) |
283 | { |
284 | struct efx_nic *efx = efx_netdev_priv(dev: net_dev); |
285 | int rc; |
286 | |
287 | rc = efx_check_disabled(efx); |
288 | if (rc) |
289 | return rc; |
290 | |
291 | if (rtnl_dereference(efx->xdp_prog) && |
292 | new_mtu > efx_xdp_max_mtu(efx)) { |
293 | netif_err(efx, drv, efx->net_dev, |
294 | "Requested MTU of %d too big for XDP (max: %d)\n" , |
295 | new_mtu, efx_xdp_max_mtu(efx)); |
296 | return -EINVAL; |
297 | } |
298 | |
299 | netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n" , new_mtu); |
300 | |
301 | efx_device_detach_sync(efx); |
302 | efx_stop_all(efx); |
303 | |
304 | mutex_lock(&efx->mac_lock); |
305 | net_dev->mtu = new_mtu; |
306 | efx_mac_reconfigure(efx, mtu_only: true); |
307 | mutex_unlock(lock: &efx->mac_lock); |
308 | |
309 | efx_start_all(efx); |
310 | efx_device_attach_if_not_resetting(efx); |
311 | return 0; |
312 | } |
313 | |
314 | /************************************************************************** |
315 | * |
316 | * Hardware monitor |
317 | * |
318 | **************************************************************************/ |
319 | |
320 | /* Run periodically off the general workqueue */ |
321 | static void efx_monitor(struct work_struct *data) |
322 | { |
323 | struct efx_nic *efx = container_of(data, struct efx_nic, |
324 | monitor_work.work); |
325 | |
326 | netif_vdbg(efx, timer, efx->net_dev, |
327 | "hardware monitor executing on CPU %d\n" , |
328 | raw_smp_processor_id()); |
329 | BUG_ON(efx->type->monitor == NULL); |
330 | |
331 | /* If the mac_lock is already held then it is likely a port |
332 | * reconfiguration is already in place, which will likely do |
333 | * most of the work of monitor() anyway. |
334 | */ |
335 | if (mutex_trylock(lock: &efx->mac_lock)) { |
336 | if (efx->port_enabled && efx->type->monitor) |
337 | efx->type->monitor(efx); |
338 | mutex_unlock(lock: &efx->mac_lock); |
339 | } |
340 | |
341 | efx_start_monitor(efx); |
342 | } |
343 | |
344 | void efx_start_monitor(struct efx_nic *efx) |
345 | { |
346 | if (efx->type->monitor) |
347 | queue_delayed_work(wq: efx->workqueue, dwork: &efx->monitor_work, |
348 | delay: efx_monitor_interval); |
349 | } |
350 | |
351 | /************************************************************************** |
352 | * |
353 | * Event queue processing |
354 | * |
355 | *************************************************************************/ |
356 | |
357 | /* Channels are shutdown and reinitialised whilst the NIC is running |
358 | * to propagate configuration changes (mtu, checksum offload), or |
359 | * to clear hardware error conditions |
360 | */ |
361 | static void efx_start_datapath(struct efx_nic *efx) |
362 | { |
363 | netdev_features_t old_features = efx->net_dev->features; |
364 | bool old_rx_scatter = efx->rx_scatter; |
365 | size_t rx_buf_len; |
366 | |
367 | /* Calculate the rx buffer allocation parameters required to |
368 | * support the current MTU, including padding for header |
369 | * alignment and overruns. |
370 | */ |
371 | efx->rx_dma_len = (efx->rx_prefix_size + |
372 | EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + |
373 | efx->type->rx_buffer_padding); |
374 | rx_buf_len = (sizeof(struct efx_rx_page_state) + EFX_XDP_HEADROOM + |
375 | efx->rx_ip_align + efx->rx_dma_len + EFX_XDP_TAILROOM); |
376 | |
377 | if (rx_buf_len <= PAGE_SIZE) { |
378 | efx->rx_scatter = efx->type->always_rx_scatter; |
379 | efx->rx_buffer_order = 0; |
380 | } else if (efx->type->can_rx_scatter) { |
381 | BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES); |
382 | BUILD_BUG_ON(sizeof(struct efx_rx_page_state) + |
383 | 2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE, |
384 | EFX_RX_BUF_ALIGNMENT) > |
385 | PAGE_SIZE); |
386 | efx->rx_scatter = true; |
387 | efx->rx_dma_len = EFX_RX_USR_BUF_SIZE; |
388 | efx->rx_buffer_order = 0; |
389 | } else { |
390 | efx->rx_scatter = false; |
391 | efx->rx_buffer_order = get_order(size: rx_buf_len); |
392 | } |
393 | |
394 | efx_rx_config_page_split(efx); |
395 | if (efx->rx_buffer_order) |
396 | netif_dbg(efx, drv, efx->net_dev, |
397 | "RX buf len=%u; page order=%u batch=%u\n" , |
398 | efx->rx_dma_len, efx->rx_buffer_order, |
399 | efx->rx_pages_per_batch); |
400 | else |
401 | netif_dbg(efx, drv, efx->net_dev, |
402 | "RX buf len=%u step=%u bpp=%u; page batch=%u\n" , |
403 | efx->rx_dma_len, efx->rx_page_buf_step, |
404 | efx->rx_bufs_per_page, efx->rx_pages_per_batch); |
405 | |
406 | /* Restore previously fixed features in hw_features and remove |
407 | * features which are fixed now |
408 | */ |
409 | efx->net_dev->hw_features |= efx->net_dev->features; |
410 | efx->net_dev->hw_features &= ~efx->fixed_features; |
411 | efx->net_dev->features |= efx->fixed_features; |
412 | if (efx->net_dev->features != old_features) |
413 | netdev_features_change(dev: efx->net_dev); |
414 | |
415 | /* RX filters may also have scatter-enabled flags */ |
416 | if ((efx->rx_scatter != old_rx_scatter) && |
417 | efx->type->filter_update_rx_scatter) |
418 | efx->type->filter_update_rx_scatter(efx); |
419 | |
420 | /* We must keep at least one descriptor in a TX ring empty. |
421 | * We could avoid this when the queue size does not exactly |
422 | * match the hardware ring size, but it's not that important. |
423 | * Therefore we stop the queue when one more skb might fill |
424 | * the ring completely. We wake it when half way back to |
425 | * empty. |
426 | */ |
427 | efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx); |
428 | efx->txq_wake_thresh = efx->txq_stop_thresh / 2; |
429 | |
430 | /* Initialise the channels */ |
431 | efx_start_channels(efx); |
432 | |
433 | efx_ptp_start_datapath(efx); |
434 | |
435 | if (netif_device_present(dev: efx->net_dev)) |
436 | netif_tx_wake_all_queues(dev: efx->net_dev); |
437 | } |
438 | |
439 | static void efx_stop_datapath(struct efx_nic *efx) |
440 | { |
441 | EFX_ASSERT_RESET_SERIALISED(efx); |
442 | BUG_ON(efx->port_enabled); |
443 | |
444 | efx_ptp_stop_datapath(efx); |
445 | |
446 | efx_stop_channels(efx); |
447 | } |
448 | |
449 | /************************************************************************** |
450 | * |
451 | * Port handling |
452 | * |
453 | **************************************************************************/ |
454 | |
455 | /* Equivalent to efx_link_set_advertising with all-zeroes, except does not |
456 | * force the Autoneg bit on. |
457 | */ |
458 | void efx_link_clear_advertising(struct efx_nic *efx) |
459 | { |
460 | bitmap_zero(dst: efx->link_advertising, nbits: __ETHTOOL_LINK_MODE_MASK_NBITS); |
461 | efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); |
462 | } |
463 | |
464 | void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc) |
465 | { |
466 | efx->wanted_fc = wanted_fc; |
467 | if (efx->link_advertising[0]) { |
468 | if (wanted_fc & EFX_FC_RX) |
469 | efx->link_advertising[0] |= (ADVERTISED_Pause | |
470 | ADVERTISED_Asym_Pause); |
471 | else |
472 | efx->link_advertising[0] &= ~(ADVERTISED_Pause | |
473 | ADVERTISED_Asym_Pause); |
474 | if (wanted_fc & EFX_FC_TX) |
475 | efx->link_advertising[0] ^= ADVERTISED_Asym_Pause; |
476 | } |
477 | } |
478 | |
479 | static void efx_start_port(struct efx_nic *efx) |
480 | { |
481 | netif_dbg(efx, ifup, efx->net_dev, "start port\n" ); |
482 | BUG_ON(efx->port_enabled); |
483 | |
484 | mutex_lock(&efx->mac_lock); |
485 | efx->port_enabled = true; |
486 | |
487 | /* Ensure MAC ingress/egress is enabled */ |
488 | efx_mac_reconfigure(efx, mtu_only: false); |
489 | |
490 | mutex_unlock(lock: &efx->mac_lock); |
491 | } |
492 | |
493 | /* Cancel work for MAC reconfiguration, periodic hardware monitoring |
494 | * and the async self-test, wait for them to finish and prevent them |
495 | * being scheduled again. This doesn't cover online resets, which |
496 | * should only be cancelled when removing the device. |
497 | */ |
498 | static void efx_stop_port(struct efx_nic *efx) |
499 | { |
500 | netif_dbg(efx, ifdown, efx->net_dev, "stop port\n" ); |
501 | |
502 | EFX_ASSERT_RESET_SERIALISED(efx); |
503 | |
504 | mutex_lock(&efx->mac_lock); |
505 | efx->port_enabled = false; |
506 | mutex_unlock(lock: &efx->mac_lock); |
507 | |
508 | /* Serialise against efx_set_multicast_list() */ |
509 | netif_addr_lock_bh(dev: efx->net_dev); |
510 | netif_addr_unlock_bh(dev: efx->net_dev); |
511 | |
512 | cancel_delayed_work_sync(dwork: &efx->monitor_work); |
513 | efx_selftest_async_cancel(efx); |
514 | cancel_work_sync(work: &efx->mac_work); |
515 | } |
516 | |
517 | /* If the interface is supposed to be running but is not, start |
518 | * the hardware and software data path, regular activity for the port |
519 | * (MAC statistics, link polling, etc.) and schedule the port to be |
520 | * reconfigured. Interrupts must already be enabled. This function |
521 | * is safe to call multiple times, so long as the NIC is not disabled. |
522 | * Requires the RTNL lock. |
523 | */ |
524 | void efx_start_all(struct efx_nic *efx) |
525 | { |
526 | EFX_ASSERT_RESET_SERIALISED(efx); |
527 | BUG_ON(efx->state == STATE_DISABLED); |
528 | |
529 | /* Check that it is appropriate to restart the interface. All |
530 | * of these flags are safe to read under just the rtnl lock |
531 | */ |
532 | if (efx->port_enabled || !netif_running(dev: efx->net_dev) || |
533 | efx->reset_pending) |
534 | return; |
535 | |
536 | efx_start_port(efx); |
537 | efx_start_datapath(efx); |
538 | |
539 | /* Start the hardware monitor if there is one */ |
540 | efx_start_monitor(efx); |
541 | |
542 | efx_selftest_async_start(efx); |
543 | |
544 | /* Link state detection is normally event-driven; we have |
545 | * to poll now because we could have missed a change |
546 | */ |
547 | mutex_lock(&efx->mac_lock); |
548 | if (efx_mcdi_phy_poll(efx)) |
549 | efx_link_status_changed(efx); |
550 | mutex_unlock(lock: &efx->mac_lock); |
551 | |
552 | if (efx->type->start_stats) { |
553 | efx->type->start_stats(efx); |
554 | efx->type->pull_stats(efx); |
555 | spin_lock_bh(lock: &efx->stats_lock); |
556 | efx->type->update_stats(efx, NULL, NULL); |
557 | spin_unlock_bh(lock: &efx->stats_lock); |
558 | } |
559 | } |
560 | |
561 | /* Quiesce the hardware and software data path, and regular activity |
562 | * for the port without bringing the link down. Safe to call multiple |
563 | * times with the NIC in almost any state, but interrupts should be |
564 | * enabled. Requires the RTNL lock. |
565 | */ |
566 | void efx_stop_all(struct efx_nic *efx) |
567 | { |
568 | EFX_ASSERT_RESET_SERIALISED(efx); |
569 | |
570 | /* port_enabled can be read safely under the rtnl lock */ |
571 | if (!efx->port_enabled) |
572 | return; |
573 | |
574 | if (efx->type->update_stats) { |
575 | /* update stats before we go down so we can accurately count |
576 | * rx_nodesc_drops |
577 | */ |
578 | efx->type->pull_stats(efx); |
579 | spin_lock_bh(lock: &efx->stats_lock); |
580 | efx->type->update_stats(efx, NULL, NULL); |
581 | spin_unlock_bh(lock: &efx->stats_lock); |
582 | efx->type->stop_stats(efx); |
583 | } |
584 | |
585 | efx_stop_port(efx); |
586 | |
587 | /* Stop the kernel transmit interface. This is only valid if |
588 | * the device is stopped or detached; otherwise the watchdog |
589 | * may fire immediately. |
590 | */ |
591 | WARN_ON(netif_running(efx->net_dev) && |
592 | netif_device_present(efx->net_dev)); |
593 | netif_tx_disable(dev: efx->net_dev); |
594 | |
595 | efx_stop_datapath(efx); |
596 | } |
597 | |
598 | /* Context: process, rcu_read_lock or RTNL held, non-blocking. */ |
599 | void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats) |
600 | { |
601 | struct efx_nic *efx = efx_netdev_priv(dev: net_dev); |
602 | |
603 | spin_lock_bh(lock: &efx->stats_lock); |
604 | efx_nic_update_stats_atomic(efx, NULL, core_stats: stats); |
605 | spin_unlock_bh(lock: &efx->stats_lock); |
606 | } |
607 | |
608 | /* Push loopback/power/transmit disable settings to the PHY, and reconfigure |
609 | * the MAC appropriately. All other PHY configuration changes are pushed |
610 | * through phy_op->set_settings(), and pushed asynchronously to the MAC |
611 | * through efx_monitor(). |
612 | * |
613 | * Callers must hold the mac_lock |
614 | */ |
615 | int __efx_reconfigure_port(struct efx_nic *efx) |
616 | { |
617 | enum efx_phy_mode phy_mode; |
618 | int rc = 0; |
619 | |
620 | WARN_ON(!mutex_is_locked(&efx->mac_lock)); |
621 | |
622 | /* Disable PHY transmit in mac level loopbacks */ |
623 | phy_mode = efx->phy_mode; |
624 | if (LOOPBACK_INTERNAL(efx)) |
625 | efx->phy_mode |= PHY_MODE_TX_DISABLED; |
626 | else |
627 | efx->phy_mode &= ~PHY_MODE_TX_DISABLED; |
628 | |
629 | if (efx->type->reconfigure_port) |
630 | rc = efx->type->reconfigure_port(efx); |
631 | |
632 | if (rc) |
633 | efx->phy_mode = phy_mode; |
634 | |
635 | return rc; |
636 | } |
637 | |
638 | /* Reinitialise the MAC to pick up new PHY settings, even if the port is |
639 | * disabled. |
640 | */ |
641 | int efx_reconfigure_port(struct efx_nic *efx) |
642 | { |
643 | int rc; |
644 | |
645 | EFX_ASSERT_RESET_SERIALISED(efx); |
646 | |
647 | mutex_lock(&efx->mac_lock); |
648 | rc = __efx_reconfigure_port(efx); |
649 | mutex_unlock(lock: &efx->mac_lock); |
650 | |
651 | return rc; |
652 | } |
653 | |
654 | /************************************************************************** |
655 | * |
656 | * Device reset and suspend |
657 | * |
658 | **************************************************************************/ |
659 | |
660 | static void efx_wait_for_bist_end(struct efx_nic *efx) |
661 | { |
662 | int i; |
663 | |
664 | for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) { |
665 | if (efx_mcdi_poll_reboot(efx)) |
666 | goto out; |
667 | msleep(BIST_WAIT_DELAY_MS); |
668 | } |
669 | |
670 | netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n" ); |
671 | out: |
672 | /* Either way unset the BIST flag. If we found no reboot we probably |
673 | * won't recover, but we should try. |
674 | */ |
675 | efx->mc_bist_for_other_fn = false; |
676 | } |
677 | |
678 | /* Try recovery mechanisms. |
679 | * For now only EEH is supported. |
680 | * Returns 0 if the recovery mechanisms are unsuccessful. |
681 | * Returns a non-zero value otherwise. |
682 | */ |
683 | int efx_try_recovery(struct efx_nic *efx) |
684 | { |
685 | #ifdef CONFIG_EEH |
686 | /* A PCI error can occur and not be seen by EEH because nothing |
687 | * happens on the PCI bus. In this case the driver may fail and |
688 | * schedule a 'recover or reset', leading to this recovery handler. |
689 | * Manually call the eeh failure check function. |
690 | */ |
691 | struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev); |
692 | if (eeh_dev_check_failure(eehdev)) { |
693 | /* The EEH mechanisms will handle the error and reset the |
694 | * device if necessary. |
695 | */ |
696 | return 1; |
697 | } |
698 | #endif |
699 | return 0; |
700 | } |
701 | |
702 | /* Tears down the entire software state and most of the hardware state |
703 | * before reset. |
704 | */ |
705 | void efx_reset_down(struct efx_nic *efx, enum reset_type method) |
706 | { |
707 | EFX_ASSERT_RESET_SERIALISED(efx); |
708 | |
709 | if (method == RESET_TYPE_MCDI_TIMEOUT) |
710 | efx->type->prepare_flr(efx); |
711 | |
712 | efx_stop_all(efx); |
713 | efx_disable_interrupts(efx); |
714 | |
715 | mutex_lock(&efx->mac_lock); |
716 | down_write(sem: &efx->filter_sem); |
717 | mutex_lock(&efx->rss_lock); |
718 | efx->type->fini(efx); |
719 | } |
720 | |
721 | /* Context: netif_tx_lock held, BHs disabled. */ |
722 | void efx_watchdog(struct net_device *net_dev, unsigned int txqueue) |
723 | { |
724 | struct efx_nic *efx = efx_netdev_priv(dev: net_dev); |
725 | |
726 | netif_err(efx, tx_err, efx->net_dev, |
727 | "TX stuck with port_enabled=%d: resetting channels\n" , |
728 | efx->port_enabled); |
729 | |
730 | efx_schedule_reset(efx, type: RESET_TYPE_TX_WATCHDOG); |
731 | } |
732 | |
733 | /* This function will always ensure that the locks acquired in |
734 | * efx_reset_down() are released. A failure return code indicates |
735 | * that we were unable to reinitialise the hardware, and the |
736 | * driver should be disabled. If ok is false, then the rx and tx |
737 | * engines are not restarted, pending a RESET_DISABLE. |
738 | */ |
739 | int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) |
740 | { |
741 | int rc; |
742 | |
743 | EFX_ASSERT_RESET_SERIALISED(efx); |
744 | |
745 | if (method == RESET_TYPE_MCDI_TIMEOUT) |
746 | efx->type->finish_flr(efx); |
747 | |
748 | /* Ensure that SRAM is initialised even if we're disabling the device */ |
749 | rc = efx->type->init(efx); |
750 | if (rc) { |
751 | netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n" ); |
752 | goto fail; |
753 | } |
754 | |
755 | if (!ok) |
756 | goto fail; |
757 | |
758 | if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && |
759 | method != RESET_TYPE_DATAPATH) { |
760 | rc = efx_mcdi_port_reconfigure(efx); |
761 | if (rc && rc != -EPERM) |
762 | netif_err(efx, drv, efx->net_dev, |
763 | "could not restore PHY settings\n" ); |
764 | } |
765 | |
766 | rc = efx_enable_interrupts(efx); |
767 | if (rc) |
768 | goto fail; |
769 | |
770 | #ifdef CONFIG_SFC_SRIOV |
771 | rc = efx->type->vswitching_restore(efx); |
772 | if (rc) /* not fatal; the PF will still work fine */ |
773 | netif_warn(efx, probe, efx->net_dev, |
774 | "failed to restore vswitching rc=%d;" |
775 | " VFs may not function\n" , rc); |
776 | #endif |
777 | |
778 | if (efx->type->rx_restore_rss_contexts) |
779 | efx->type->rx_restore_rss_contexts(efx); |
780 | mutex_unlock(lock: &efx->rss_lock); |
781 | efx->type->filter_table_restore(efx); |
782 | up_write(sem: &efx->filter_sem); |
783 | |
784 | mutex_unlock(lock: &efx->mac_lock); |
785 | |
786 | efx_start_all(efx); |
787 | |
788 | if (efx->type->udp_tnl_push_ports) |
789 | efx->type->udp_tnl_push_ports(efx); |
790 | |
791 | return 0; |
792 | |
793 | fail: |
794 | efx->port_initialized = false; |
795 | |
796 | mutex_unlock(lock: &efx->rss_lock); |
797 | up_write(sem: &efx->filter_sem); |
798 | mutex_unlock(lock: &efx->mac_lock); |
799 | |
800 | return rc; |
801 | } |
802 | |
803 | /* Reset the NIC using the specified method. Note that the reset may |
804 | * fail, in which case the card will be left in an unusable state. |
805 | * |
806 | * Caller must hold the rtnl_lock. |
807 | */ |
808 | int efx_reset(struct efx_nic *efx, enum reset_type method) |
809 | { |
810 | int rc, rc2 = 0; |
811 | bool disabled; |
812 | |
813 | netif_info(efx, drv, efx->net_dev, "resetting (%s)\n" , |
814 | RESET_TYPE(method)); |
815 | |
816 | efx_device_detach_sync(efx); |
817 | /* efx_reset_down() grabs locks that prevent recovery on EF100. |
818 | * EF100 reset is handled in the efx_nic_type callback below. |
819 | */ |
820 | if (efx_nic_rev(efx) != EFX_REV_EF100) |
821 | efx_reset_down(efx, method); |
822 | |
823 | rc = efx->type->reset(efx, method); |
824 | if (rc) { |
825 | netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n" ); |
826 | goto out; |
827 | } |
828 | |
829 | /* Clear flags for the scopes we covered. We assume the NIC and |
830 | * driver are now quiescent so that there is no race here. |
831 | */ |
832 | if (method < RESET_TYPE_MAX_METHOD) |
833 | efx->reset_pending &= -(1 << (method + 1)); |
834 | else /* it doesn't fit into the well-ordered scope hierarchy */ |
835 | __clear_bit(method, &efx->reset_pending); |
836 | |
837 | /* Reinitialise bus-mastering, which may have been turned off before |
838 | * the reset was scheduled. This is still appropriate, even in the |
839 | * RESET_TYPE_DISABLE since this driver generally assumes the hardware |
840 | * can respond to requests. |
841 | */ |
842 | pci_set_master(dev: efx->pci_dev); |
843 | |
844 | out: |
845 | /* Leave device stopped if necessary */ |
846 | disabled = rc || |
847 | method == RESET_TYPE_DISABLE || |
848 | method == RESET_TYPE_RECOVER_OR_DISABLE; |
849 | if (efx_nic_rev(efx) != EFX_REV_EF100) |
850 | rc2 = efx_reset_up(efx, method, ok: !disabled); |
851 | if (rc2) { |
852 | disabled = true; |
853 | if (!rc) |
854 | rc = rc2; |
855 | } |
856 | |
857 | if (disabled) { |
858 | dev_close(dev: efx->net_dev); |
859 | netif_err(efx, drv, efx->net_dev, "has been disabled\n" ); |
860 | efx->state = STATE_DISABLED; |
861 | } else { |
862 | netif_dbg(efx, drv, efx->net_dev, "reset complete\n" ); |
863 | efx_device_attach_if_not_resetting(efx); |
864 | } |
865 | return rc; |
866 | } |
867 | |
868 | /* The worker thread exists so that code that cannot sleep can |
869 | * schedule a reset for later. |
870 | */ |
871 | static void efx_reset_work(struct work_struct *data) |
872 | { |
873 | struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); |
874 | unsigned long pending; |
875 | enum reset_type method; |
876 | |
877 | pending = READ_ONCE(efx->reset_pending); |
878 | method = fls(x: pending) - 1; |
879 | |
880 | if (method == RESET_TYPE_MC_BIST) |
881 | efx_wait_for_bist_end(efx); |
882 | |
883 | if ((method == RESET_TYPE_RECOVER_OR_DISABLE || |
884 | method == RESET_TYPE_RECOVER_OR_ALL) && |
885 | efx_try_recovery(efx)) |
886 | return; |
887 | |
888 | if (!pending) |
889 | return; |
890 | |
891 | rtnl_lock(); |
892 | |
893 | /* We checked the state in efx_schedule_reset() but it may |
894 | * have changed by now. Now that we have the RTNL lock, |
895 | * it cannot change again. |
896 | */ |
897 | if (efx_net_active(state: efx->state)) |
898 | (void)efx_reset(efx, method); |
899 | |
900 | rtnl_unlock(); |
901 | } |
902 | |
903 | void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) |
904 | { |
905 | enum reset_type method; |
906 | |
907 | if (efx_recovering(state: efx->state)) { |
908 | netif_dbg(efx, drv, efx->net_dev, |
909 | "recovering: skip scheduling %s reset\n" , |
910 | RESET_TYPE(type)); |
911 | return; |
912 | } |
913 | |
914 | switch (type) { |
915 | case RESET_TYPE_INVISIBLE: |
916 | case RESET_TYPE_ALL: |
917 | case RESET_TYPE_RECOVER_OR_ALL: |
918 | case RESET_TYPE_WORLD: |
919 | case RESET_TYPE_DISABLE: |
920 | case RESET_TYPE_RECOVER_OR_DISABLE: |
921 | case RESET_TYPE_DATAPATH: |
922 | case RESET_TYPE_MC_BIST: |
923 | case RESET_TYPE_MCDI_TIMEOUT: |
924 | method = type; |
925 | netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n" , |
926 | RESET_TYPE(method)); |
927 | break; |
928 | default: |
929 | method = efx->type->map_reset_reason(type); |
930 | netif_dbg(efx, drv, efx->net_dev, |
931 | "scheduling %s reset for %s\n" , |
932 | RESET_TYPE(method), RESET_TYPE(type)); |
933 | break; |
934 | } |
935 | |
936 | set_bit(nr: method, addr: &efx->reset_pending); |
937 | smp_mb(); /* ensure we change reset_pending before checking state */ |
938 | |
939 | /* If we're not READY then just leave the flags set as the cue |
940 | * to abort probing or reschedule the reset later. |
941 | */ |
942 | if (!efx_net_active(READ_ONCE(efx->state))) |
943 | return; |
944 | |
945 | /* efx_process_channel() will no longer read events once a |
946 | * reset is scheduled. So switch back to poll'd MCDI completions. |
947 | */ |
948 | efx_mcdi_mode_poll(efx); |
949 | |
950 | efx_queue_reset_work(efx); |
951 | } |
952 | |
953 | /************************************************************************** |
954 | * |
955 | * Dummy NIC operations |
956 | * |
957 | * Can be used for some unimplemented operations |
958 | * Needed so all function pointers are valid and do not have to be tested |
959 | * before use |
960 | * |
961 | **************************************************************************/ |
962 | int efx_port_dummy_op_int(struct efx_nic *efx) |
963 | { |
964 | return 0; |
965 | } |
966 | void efx_port_dummy_op_void(struct efx_nic *efx) {} |
967 | |
968 | /************************************************************************** |
969 | * |
970 | * Data housekeeping |
971 | * |
972 | **************************************************************************/ |
973 | |
974 | /* This zeroes out and then fills in the invariants in a struct |
975 | * efx_nic (including all sub-structures). |
976 | */ |
977 | int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev) |
978 | { |
979 | int rc = -ENOMEM; |
980 | |
981 | /* Initialise common structures */ |
982 | INIT_LIST_HEAD(list: &efx->node); |
983 | INIT_LIST_HEAD(list: &efx->secondary_list); |
984 | spin_lock_init(&efx->biu_lock); |
985 | #ifdef CONFIG_SFC_MTD |
986 | INIT_LIST_HEAD(list: &efx->mtd_list); |
987 | #endif |
988 | INIT_WORK(&efx->reset_work, efx_reset_work); |
989 | INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor); |
990 | efx_selftest_async_init(efx); |
991 | efx->pci_dev = pci_dev; |
992 | efx->msg_enable = debug; |
993 | efx->state = STATE_UNINIT; |
994 | strscpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); |
995 | |
996 | efx->rx_prefix_size = efx->type->rx_prefix_size; |
997 | efx->rx_ip_align = |
998 | NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0; |
999 | efx->rx_packet_hash_offset = |
1000 | efx->type->rx_hash_offset - efx->type->rx_prefix_size; |
1001 | efx->rx_packet_ts_offset = |
1002 | efx->type->rx_ts_offset - efx->type->rx_prefix_size; |
1003 | INIT_LIST_HEAD(list: &efx->rss_context.list); |
1004 | efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; |
1005 | mutex_init(&efx->rss_lock); |
1006 | efx->vport_id = EVB_PORT_ID_ASSIGNED; |
1007 | spin_lock_init(&efx->stats_lock); |
1008 | efx->vi_stride = EFX_DEFAULT_VI_STRIDE; |
1009 | efx->num_mac_stats = MC_CMD_MAC_NSTATS; |
1010 | BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END); |
1011 | mutex_init(&efx->mac_lock); |
1012 | init_rwsem(&efx->filter_sem); |
1013 | #ifdef CONFIG_RFS_ACCEL |
1014 | mutex_init(&efx->rps_mutex); |
1015 | spin_lock_init(&efx->rps_hash_lock); |
1016 | /* Failure to allocate is not fatal, but may degrade ARFS performance */ |
1017 | efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE, |
1018 | size: sizeof(*efx->rps_hash_table), GFP_KERNEL); |
1019 | #endif |
1020 | spin_lock_init(&efx->vf_reps_lock); |
1021 | INIT_LIST_HEAD(list: &efx->vf_reps); |
1022 | INIT_WORK(&efx->mac_work, efx_mac_work); |
1023 | init_waitqueue_head(&efx->flush_wq); |
1024 | |
1025 | efx->tx_queues_per_channel = 1; |
1026 | efx->rxq_entries = EFX_DEFAULT_DMAQ_SIZE; |
1027 | efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE; |
1028 | |
1029 | efx->mem_bar = UINT_MAX; |
1030 | |
1031 | rc = efx_init_channels(efx); |
1032 | if (rc) |
1033 | goto fail; |
1034 | |
1035 | /* Would be good to use the net_dev name, but we're too early */ |
1036 | snprintf(buf: efx->workqueue_name, size: sizeof(efx->workqueue_name), fmt: "sfc%s" , |
1037 | pci_name(pdev: pci_dev)); |
1038 | efx->workqueue = create_singlethread_workqueue(efx->workqueue_name); |
1039 | if (!efx->workqueue) { |
1040 | rc = -ENOMEM; |
1041 | goto fail; |
1042 | } |
1043 | |
1044 | return 0; |
1045 | |
1046 | fail: |
1047 | efx_fini_struct(efx); |
1048 | return rc; |
1049 | } |
1050 | |
1051 | void efx_fini_struct(struct efx_nic *efx) |
1052 | { |
1053 | #ifdef CONFIG_RFS_ACCEL |
1054 | kfree(objp: efx->rps_hash_table); |
1055 | #endif |
1056 | |
1057 | efx_fini_channels(efx); |
1058 | |
1059 | kfree(objp: efx->vpd_sn); |
1060 | |
1061 | if (efx->workqueue) { |
1062 | destroy_workqueue(wq: efx->workqueue); |
1063 | efx->workqueue = NULL; |
1064 | } |
1065 | } |
1066 | |
1067 | /* This configures the PCI device to enable I/O and DMA. */ |
1068 | int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask, |
1069 | unsigned int mem_map_size) |
1070 | { |
1071 | struct pci_dev *pci_dev = efx->pci_dev; |
1072 | int rc; |
1073 | |
1074 | efx->mem_bar = UINT_MAX; |
1075 | pci_dbg(pci_dev, "initialising I/O bar=%d\n" , bar); |
1076 | |
1077 | rc = pci_enable_device(dev: pci_dev); |
1078 | if (rc) { |
1079 | pci_err(pci_dev, "failed to enable PCI device\n" ); |
1080 | goto fail1; |
1081 | } |
1082 | |
1083 | pci_set_master(dev: pci_dev); |
1084 | |
1085 | rc = dma_set_mask_and_coherent(dev: &pci_dev->dev, mask: dma_mask); |
1086 | if (rc) { |
1087 | pci_err(efx->pci_dev, "could not find a suitable DMA mask\n" ); |
1088 | goto fail2; |
1089 | } |
1090 | pci_dbg(efx->pci_dev, "using DMA mask %llx\n" , (unsigned long long)dma_mask); |
1091 | |
1092 | efx->membase_phys = pci_resource_start(efx->pci_dev, bar); |
1093 | if (!efx->membase_phys) { |
1094 | pci_err(efx->pci_dev, |
1095 | "ERROR: No BAR%d mapping from the BIOS. Try pci=realloc on the kernel command line\n" , |
1096 | bar); |
1097 | rc = -ENODEV; |
1098 | goto fail3; |
1099 | } |
1100 | |
1101 | rc = pci_request_region(pci_dev, bar, "sfc" ); |
1102 | if (rc) { |
1103 | pci_err(efx->pci_dev, |
1104 | "request for memory BAR[%d] failed\n" , bar); |
1105 | rc = -EIO; |
1106 | goto fail3; |
1107 | } |
1108 | efx->mem_bar = bar; |
1109 | efx->membase = ioremap(offset: efx->membase_phys, size: mem_map_size); |
1110 | if (!efx->membase) { |
1111 | pci_err(efx->pci_dev, |
1112 | "could not map memory BAR[%d] at %llx+%x\n" , bar, |
1113 | (unsigned long long)efx->membase_phys, mem_map_size); |
1114 | rc = -ENOMEM; |
1115 | goto fail4; |
1116 | } |
1117 | pci_dbg(efx->pci_dev, |
1118 | "memory BAR[%d] at %llx+%x (virtual %p)\n" , bar, |
1119 | (unsigned long long)efx->membase_phys, mem_map_size, |
1120 | efx->membase); |
1121 | |
1122 | return 0; |
1123 | |
1124 | fail4: |
1125 | pci_release_region(efx->pci_dev, bar); |
1126 | fail3: |
1127 | efx->membase_phys = 0; |
1128 | fail2: |
1129 | pci_disable_device(dev: efx->pci_dev); |
1130 | fail1: |
1131 | return rc; |
1132 | } |
1133 | |
1134 | void efx_fini_io(struct efx_nic *efx) |
1135 | { |
1136 | pci_dbg(efx->pci_dev, "shutting down I/O\n" ); |
1137 | |
1138 | if (efx->membase) { |
1139 | iounmap(addr: efx->membase); |
1140 | efx->membase = NULL; |
1141 | } |
1142 | |
1143 | if (efx->membase_phys) { |
1144 | pci_release_region(efx->pci_dev, efx->mem_bar); |
1145 | efx->membase_phys = 0; |
1146 | efx->mem_bar = UINT_MAX; |
1147 | } |
1148 | |
1149 | /* Don't disable bus-mastering if VFs are assigned */ |
1150 | if (!pci_vfs_assigned(dev: efx->pci_dev)) |
1151 | pci_disable_device(dev: efx->pci_dev); |
1152 | } |
1153 | |
1154 | #ifdef CONFIG_SFC_MCDI_LOGGING |
1155 | static ssize_t mcdi_logging_show(struct device *dev, |
1156 | struct device_attribute *attr, |
1157 | char *buf) |
1158 | { |
1159 | struct efx_nic *efx = dev_get_drvdata(dev); |
1160 | struct efx_mcdi_iface *mcdi = efx_mcdi(efx); |
1161 | |
1162 | return sysfs_emit(buf, fmt: "%d\n" , mcdi->logging_enabled); |
1163 | } |
1164 | |
1165 | static ssize_t mcdi_logging_store(struct device *dev, |
1166 | struct device_attribute *attr, |
1167 | const char *buf, size_t count) |
1168 | { |
1169 | struct efx_nic *efx = dev_get_drvdata(dev); |
1170 | struct efx_mcdi_iface *mcdi = efx_mcdi(efx); |
1171 | bool enable = count > 0 && *buf != '0'; |
1172 | |
1173 | mcdi->logging_enabled = enable; |
1174 | return count; |
1175 | } |
1176 | |
1177 | static DEVICE_ATTR_RW(mcdi_logging); |
1178 | |
1179 | void efx_init_mcdi_logging(struct efx_nic *efx) |
1180 | { |
1181 | int rc = device_create_file(device: &efx->pci_dev->dev, entry: &dev_attr_mcdi_logging); |
1182 | |
1183 | if (rc) { |
1184 | netif_warn(efx, drv, efx->net_dev, |
1185 | "failed to init net dev attributes\n" ); |
1186 | } |
1187 | } |
1188 | |
1189 | void efx_fini_mcdi_logging(struct efx_nic *efx) |
1190 | { |
1191 | device_remove_file(dev: &efx->pci_dev->dev, attr: &dev_attr_mcdi_logging); |
1192 | } |
1193 | #endif |
1194 | |
1195 | /* A PCI error affecting this device was detected. |
1196 | * At this point MMIO and DMA may be disabled. |
1197 | * Stop the software path and request a slot reset. |
1198 | */ |
1199 | static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev, |
1200 | pci_channel_state_t state) |
1201 | { |
1202 | pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; |
1203 | struct efx_nic *efx = pci_get_drvdata(pdev); |
1204 | |
1205 | if (state == pci_channel_io_perm_failure) |
1206 | return PCI_ERS_RESULT_DISCONNECT; |
1207 | |
1208 | rtnl_lock(); |
1209 | |
1210 | if (efx->state != STATE_DISABLED) { |
1211 | efx->state = efx_recover(state: efx->state); |
1212 | efx->reset_pending = 0; |
1213 | |
1214 | efx_device_detach_sync(efx); |
1215 | |
1216 | if (efx_net_active(state: efx->state)) { |
1217 | efx_stop_all(efx); |
1218 | efx_disable_interrupts(efx); |
1219 | } |
1220 | |
1221 | status = PCI_ERS_RESULT_NEED_RESET; |
1222 | } else { |
1223 | /* If the interface is disabled we don't want to do anything |
1224 | * with it. |
1225 | */ |
1226 | status = PCI_ERS_RESULT_RECOVERED; |
1227 | } |
1228 | |
1229 | rtnl_unlock(); |
1230 | |
1231 | pci_disable_device(dev: pdev); |
1232 | |
1233 | return status; |
1234 | } |
1235 | |
1236 | /* Fake a successful reset, which will be performed later in efx_io_resume. */ |
1237 | static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev) |
1238 | { |
1239 | struct efx_nic *efx = pci_get_drvdata(pdev); |
1240 | pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; |
1241 | |
1242 | if (pci_enable_device(dev: pdev)) { |
1243 | netif_err(efx, hw, efx->net_dev, |
1244 | "Cannot re-enable PCI device after reset.\n" ); |
1245 | status = PCI_ERS_RESULT_DISCONNECT; |
1246 | } |
1247 | |
1248 | return status; |
1249 | } |
1250 | |
1251 | /* Perform the actual reset and resume I/O operations. */ |
1252 | static void efx_io_resume(struct pci_dev *pdev) |
1253 | { |
1254 | struct efx_nic *efx = pci_get_drvdata(pdev); |
1255 | int rc; |
1256 | |
1257 | rtnl_lock(); |
1258 | |
1259 | if (efx->state == STATE_DISABLED) |
1260 | goto out; |
1261 | |
1262 | rc = efx_reset(efx, method: RESET_TYPE_ALL); |
1263 | if (rc) { |
1264 | netif_err(efx, hw, efx->net_dev, |
1265 | "efx_reset failed after PCI error (%d)\n" , rc); |
1266 | } else { |
1267 | efx->state = efx_recovered(state: efx->state); |
1268 | netif_dbg(efx, hw, efx->net_dev, |
1269 | "Done resetting and resuming IO after PCI error.\n" ); |
1270 | } |
1271 | |
1272 | out: |
1273 | rtnl_unlock(); |
1274 | } |
1275 | |
1276 | /* For simplicity and reliability, we always require a slot reset and try to |
1277 | * reset the hardware when a pci error affecting the device is detected. |
1278 | * We leave both the link_reset and mmio_enabled callback unimplemented: |
1279 | * with our request for slot reset the mmio_enabled callback will never be |
1280 | * called, and the link_reset callback is not used by AER or EEH mechanisms. |
1281 | */ |
1282 | const struct pci_error_handlers efx_err_handlers = { |
1283 | .error_detected = efx_io_error_detected, |
1284 | .slot_reset = efx_io_slot_reset, |
1285 | .resume = efx_io_resume, |
1286 | }; |
1287 | |
1288 | /* Determine whether the NIC will be able to handle TX offloads for a given |
1289 | * encapsulated packet. |
1290 | */ |
1291 | static bool efx_can_encap_offloads(struct efx_nic *efx, struct sk_buff *skb) |
1292 | { |
1293 | struct gre_base_hdr *greh; |
1294 | __be16 dst_port; |
1295 | u8 ipproto; |
1296 | |
1297 | /* Does the NIC support encap offloads? |
1298 | * If not, we should never get here, because we shouldn't have |
1299 | * advertised encap offload feature flags in the first place. |
1300 | */ |
1301 | if (WARN_ON_ONCE(!efx->type->udp_tnl_has_port)) |
1302 | return false; |
1303 | |
1304 | /* Determine encapsulation protocol in use */ |
1305 | switch (skb->protocol) { |
1306 | case htons(ETH_P_IP): |
1307 | ipproto = ip_hdr(skb)->protocol; |
1308 | break; |
1309 | case htons(ETH_P_IPV6): |
1310 | /* If there are extension headers, this will cause us to |
1311 | * think we can't offload something that we maybe could have. |
1312 | */ |
1313 | ipproto = ipv6_hdr(skb)->nexthdr; |
1314 | break; |
1315 | default: |
1316 | /* Not IP, so can't offload it */ |
1317 | return false; |
1318 | } |
1319 | switch (ipproto) { |
1320 | case IPPROTO_GRE: |
1321 | /* We support NVGRE but not IP over GRE or random gretaps. |
1322 | * Specifically, the NIC will accept GRE as encapsulated if |
1323 | * the inner protocol is Ethernet, but only handle it |
1324 | * correctly if the GRE header is 8 bytes long. Moreover, |
1325 | * it will not update the Checksum or Sequence Number fields |
1326 | * if they are present. (The Routing Present flag, |
1327 | * GRE_ROUTING, cannot be set else the header would be more |
1328 | * than 8 bytes long; so we don't have to worry about it.) |
1329 | */ |
1330 | if (skb->inner_protocol_type != ENCAP_TYPE_ETHER) |
1331 | return false; |
1332 | if (ntohs(skb->inner_protocol) != ETH_P_TEB) |
1333 | return false; |
1334 | if (skb_inner_mac_header(skb) - skb_transport_header(skb) != 8) |
1335 | return false; |
1336 | greh = (struct gre_base_hdr *)skb_transport_header(skb); |
1337 | return !(greh->flags & (GRE_CSUM | GRE_SEQ)); |
1338 | case IPPROTO_UDP: |
1339 | /* If the port is registered for a UDP tunnel, we assume the |
1340 | * packet is for that tunnel, and the NIC will handle it as |
1341 | * such. If not, the NIC won't know what to do with it. |
1342 | */ |
1343 | dst_port = udp_hdr(skb)->dest; |
1344 | return efx->type->udp_tnl_has_port(efx, dst_port); |
1345 | default: |
1346 | return false; |
1347 | } |
1348 | } |
1349 | |
1350 | netdev_features_t efx_features_check(struct sk_buff *skb, struct net_device *dev, |
1351 | netdev_features_t features) |
1352 | { |
1353 | struct efx_nic *efx = efx_netdev_priv(dev); |
1354 | |
1355 | if (skb->encapsulation) { |
1356 | if (features & NETIF_F_GSO_MASK) |
1357 | /* Hardware can only do TSO with at most 208 bytes |
1358 | * of headers. |
1359 | */ |
1360 | if (skb_inner_transport_offset(skb) > |
1361 | EFX_TSO2_MAX_HDRLEN) |
1362 | features &= ~(NETIF_F_GSO_MASK); |
1363 | if (features & (NETIF_F_GSO_MASK | NETIF_F_CSUM_MASK)) |
1364 | if (!efx_can_encap_offloads(efx, skb)) |
1365 | features &= ~(NETIF_F_GSO_MASK | |
1366 | NETIF_F_CSUM_MASK); |
1367 | } |
1368 | return features; |
1369 | } |
1370 | |
1371 | int efx_get_phys_port_id(struct net_device *net_dev, |
1372 | struct netdev_phys_item_id *ppid) |
1373 | { |
1374 | struct efx_nic *efx = efx_netdev_priv(dev: net_dev); |
1375 | |
1376 | if (efx->type->get_phys_port_id) |
1377 | return efx->type->get_phys_port_id(efx, ppid); |
1378 | else |
1379 | return -EOPNOTSUPP; |
1380 | } |
1381 | |
1382 | int efx_get_phys_port_name(struct net_device *net_dev, char *name, size_t len) |
1383 | { |
1384 | struct efx_nic *efx = efx_netdev_priv(dev: net_dev); |
1385 | |
1386 | if (snprintf(buf: name, size: len, fmt: "p%u" , efx->port_num) >= len) |
1387 | return -EINVAL; |
1388 | return 0; |
1389 | } |
1390 | |
1391 | void efx_detach_reps(struct efx_nic *efx) |
1392 | { |
1393 | struct net_device *rep_dev; |
1394 | struct efx_rep *efv; |
1395 | |
1396 | ASSERT_RTNL(); |
1397 | netif_dbg(efx, drv, efx->net_dev, "Detaching VF representors\n" ); |
1398 | list_for_each_entry(efv, &efx->vf_reps, list) { |
1399 | rep_dev = efv->net_dev; |
1400 | if (!rep_dev) |
1401 | continue; |
1402 | netif_carrier_off(dev: rep_dev); |
1403 | /* See efx_device_detach_sync() */ |
1404 | netif_tx_lock_bh(dev: rep_dev); |
1405 | netif_tx_stop_all_queues(dev: rep_dev); |
1406 | netif_tx_unlock_bh(dev: rep_dev); |
1407 | } |
1408 | } |
1409 | |
1410 | void efx_attach_reps(struct efx_nic *efx) |
1411 | { |
1412 | struct net_device *rep_dev; |
1413 | struct efx_rep *efv; |
1414 | |
1415 | ASSERT_RTNL(); |
1416 | netif_dbg(efx, drv, efx->net_dev, "Attaching VF representors\n" ); |
1417 | list_for_each_entry(efv, &efx->vf_reps, list) { |
1418 | rep_dev = efv->net_dev; |
1419 | if (!rep_dev) |
1420 | continue; |
1421 | netif_tx_wake_all_queues(dev: rep_dev); |
1422 | netif_carrier_on(dev: rep_dev); |
1423 | } |
1424 | } |
1425 | |