1 | /* |
2 | * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. |
3 | * |
4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU |
6 | * General Public License (GPL) Version 2, available from the file |
7 | * COPYING in the main directory of this source tree, or the |
8 | * OpenIB.org BSD license below: |
9 | * |
10 | * Redistribution and use in source and binary forms, with or |
11 | * without modification, are permitted provided that the following |
12 | * conditions are met: |
13 | * |
14 | * - Redistributions of source code must retain the above |
15 | * copyright notice, this list of conditions and the following |
16 | * disclaimer. |
17 | * |
18 | * - Redistributions in binary form must reproduce the above |
19 | * copyright notice, this list of conditions and the following |
20 | * disclaimer in the documentation and/or other materials |
21 | * provided with the distribution. |
22 | * |
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
30 | * SOFTWARE. |
31 | */ |
32 | |
33 | #include <linux/kernel.h> |
34 | #include <linux/random.h> |
35 | #include <linux/vmalloc.h> |
36 | #include <linux/hardirq.h> |
37 | #include <linux/mlx5/driver.h> |
38 | #include <linux/kern_levels.h> |
39 | #include "mlx5_core.h" |
40 | #include "lib/eq.h" |
41 | #include "lib/mlx5.h" |
42 | #include "lib/events.h" |
43 | #include "lib/pci_vsc.h" |
44 | #include "lib/tout.h" |
45 | #include "diag/fw_tracer.h" |
46 | #include "diag/reporter_vnic.h" |
47 | |
48 | enum { |
49 | MAX_MISSES = 3, |
50 | }; |
51 | |
52 | enum { |
53 | MLX5_DROP_HEALTH_WORK, |
54 | }; |
55 | |
56 | enum { |
57 | MLX5_SENSOR_NO_ERR = 0, |
58 | MLX5_SENSOR_PCI_COMM_ERR = 1, |
59 | MLX5_SENSOR_PCI_ERR = 2, |
60 | MLX5_SENSOR_NIC_DISABLED = 3, |
61 | MLX5_SENSOR_NIC_SW_RESET = 4, |
62 | MLX5_SENSOR_FW_SYND_RFR = 5, |
63 | }; |
64 | |
65 | enum { |
66 | MLX5_SEVERITY_MASK = 0x7, |
67 | MLX5_SEVERITY_VALID_MASK = 0x8, |
68 | }; |
69 | |
70 | u8 mlx5_get_nic_state(struct mlx5_core_dev *dev) |
71 | { |
72 | return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7; |
73 | } |
74 | |
75 | void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) |
76 | { |
77 | u32 cur_cmdq_addr_l_sz; |
78 | |
79 | cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz); |
80 | iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) | |
81 | state << MLX5_NIC_IFC_OFFSET, |
82 | &dev->iseg->cmdq_addr_l_sz); |
83 | } |
84 | |
85 | static bool sensor_pci_not_working(struct mlx5_core_dev *dev) |
86 | { |
87 | struct mlx5_core_health *health = &dev->priv.health; |
88 | struct health_buffer __iomem *h = health->health; |
89 | |
90 | /* Offline PCI reads return 0xffffffff */ |
91 | return (ioread32be(&h->fw_ver) == 0xffffffff); |
92 | } |
93 | |
94 | static int mlx5_health_get_rfr(u8 rfr_severity) |
95 | { |
96 | return rfr_severity >> MLX5_RFR_BIT_OFFSET; |
97 | } |
98 | |
99 | static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev) |
100 | { |
101 | struct mlx5_core_health *health = &dev->priv.health; |
102 | struct health_buffer __iomem *h = health->health; |
103 | u8 synd = ioread8(&h->synd); |
104 | u8 rfr; |
105 | |
106 | rfr = mlx5_health_get_rfr(rfr_severity: ioread8(&h->rfr_severity)); |
107 | |
108 | if (rfr && synd) |
109 | mlx5_core_dbg(dev, "FW requests reset, synd: %d\n" , synd); |
110 | return rfr && synd; |
111 | } |
112 | |
113 | u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev) |
114 | { |
115 | if (sensor_pci_not_working(dev)) |
116 | return MLX5_SENSOR_PCI_COMM_ERR; |
117 | if (pci_channel_offline(pdev: dev->pdev)) |
118 | return MLX5_SENSOR_PCI_ERR; |
119 | if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED) |
120 | return MLX5_SENSOR_NIC_DISABLED; |
121 | if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET) |
122 | return MLX5_SENSOR_NIC_SW_RESET; |
123 | if (sensor_fw_synd_rfr(dev)) |
124 | return MLX5_SENSOR_FW_SYND_RFR; |
125 | |
126 | return MLX5_SENSOR_NO_ERR; |
127 | } |
128 | |
129 | static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock) |
130 | { |
131 | enum mlx5_vsc_state state; |
132 | int ret; |
133 | |
134 | if (!mlx5_core_is_pf(dev)) |
135 | return -EBUSY; |
136 | |
137 | /* Try to lock GW access, this stage doesn't return |
138 | * EBUSY because locked GW does not mean that other PF |
139 | * already started the reset. |
140 | */ |
141 | ret = mlx5_vsc_gw_lock(dev); |
142 | if (ret == -EBUSY) |
143 | return -EINVAL; |
144 | if (ret) |
145 | return ret; |
146 | |
147 | state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK; |
148 | /* At this stage, if the return status == EBUSY, then we know |
149 | * for sure that another PF started the reset, so don't allow |
150 | * another reset. |
151 | */ |
152 | ret = mlx5_vsc_sem_set_space(dev, space: MLX5_SEMAPHORE_SW_RESET, state); |
153 | if (ret) |
154 | mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n" ); |
155 | |
156 | /* Unlock GW access */ |
157 | mlx5_vsc_gw_unlock(dev); |
158 | |
159 | return ret; |
160 | } |
161 | |
162 | static bool reset_fw_if_needed(struct mlx5_core_dev *dev) |
163 | { |
164 | bool supported = (ioread32be(&dev->iseg->initializing) >> |
165 | MLX5_FW_RESET_SUPPORTED_OFFSET) & 1; |
166 | u32 fatal_error; |
167 | |
168 | if (!supported) |
169 | return false; |
170 | |
171 | /* The reset only needs to be issued by one PF. The health buffer is |
172 | * shared between all functions, and will be cleared during a reset. |
173 | * Check again to avoid a redundant 2nd reset. If the fatal errors was |
174 | * PCI related a reset won't help. |
175 | */ |
176 | fatal_error = mlx5_health_check_fatal_sensors(dev); |
177 | if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR || |
178 | fatal_error == MLX5_SENSOR_NIC_DISABLED || |
179 | fatal_error == MLX5_SENSOR_NIC_SW_RESET) { |
180 | mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help." ); |
181 | return false; |
182 | } |
183 | |
184 | mlx5_core_warn(dev, "Issuing FW Reset\n" ); |
185 | /* Write the NIC interface field to initiate the reset, the command |
186 | * interface address also resides here, don't overwrite it. |
187 | */ |
188 | mlx5_set_nic_state(dev, state: MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET); |
189 | |
190 | return true; |
191 | } |
192 | |
193 | static void enter_error_state(struct mlx5_core_dev *dev, bool force) |
194 | { |
195 | if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */ |
196 | dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; |
197 | mlx5_cmd_flush(dev); |
198 | } |
199 | |
200 | mlx5_notifier_call_chain(events: dev->priv.events, event: MLX5_DEV_EVENT_SYS_ERROR, data: (void *)1); |
201 | } |
202 | |
203 | void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) |
204 | { |
205 | bool err_detected = false; |
206 | |
207 | /* Mark the device as fatal in order to abort FW commands */ |
208 | if ((mlx5_health_check_fatal_sensors(dev) || force) && |
209 | dev->state == MLX5_DEVICE_STATE_UP) { |
210 | dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; |
211 | err_detected = true; |
212 | } |
213 | mutex_lock(&dev->intf_state_mutex); |
214 | if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) |
215 | goto unlock;/* a previous error is still being handled */ |
216 | |
217 | enter_error_state(dev, force); |
218 | unlock: |
219 | mutex_unlock(lock: &dev->intf_state_mutex); |
220 | } |
221 | |
222 | void mlx5_error_sw_reset(struct mlx5_core_dev *dev) |
223 | { |
224 | unsigned long end, delay_ms = mlx5_tout_ms(dev, PCI_TOGGLE); |
225 | int lock = -EBUSY; |
226 | |
227 | mutex_lock(&dev->intf_state_mutex); |
228 | if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) |
229 | goto unlock; |
230 | |
231 | mlx5_core_err(dev, "start\n" ); |
232 | |
233 | if (mlx5_health_check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) { |
234 | /* Get cr-dump and reset FW semaphore */ |
235 | lock = lock_sem_sw_reset(dev, lock: true); |
236 | |
237 | if (lock == -EBUSY) { |
238 | delay_ms = mlx5_tout_ms(dev, FULL_CRDUMP); |
239 | goto recover_from_sw_reset; |
240 | } |
241 | /* Execute SW reset */ |
242 | reset_fw_if_needed(dev); |
243 | } |
244 | |
245 | recover_from_sw_reset: |
246 | /* Recover from SW reset */ |
247 | end = jiffies + msecs_to_jiffies(m: delay_ms); |
248 | do { |
249 | if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED) |
250 | break; |
251 | |
252 | msleep(msecs: 20); |
253 | } while (!time_after(jiffies, end)); |
254 | |
255 | if (mlx5_get_nic_state(dev) != MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED) { |
256 | dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n" , |
257 | mlx5_get_nic_state(dev), delay_ms); |
258 | } |
259 | |
260 | /* Release FW semaphore if you are the lock owner */ |
261 | if (!lock) |
262 | lock_sem_sw_reset(dev, lock: false); |
263 | |
264 | mlx5_core_err(dev, "end\n" ); |
265 | |
266 | unlock: |
267 | mutex_unlock(lock: &dev->intf_state_mutex); |
268 | } |
269 | |
270 | static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) |
271 | { |
272 | u8 nic_interface = mlx5_get_nic_state(dev); |
273 | |
274 | switch (nic_interface) { |
275 | case MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER: |
276 | mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n" ); |
277 | break; |
278 | |
279 | case MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED: |
280 | mlx5_core_warn(dev, "starting teardown\n" ); |
281 | break; |
282 | |
283 | case MLX5_INITIAL_SEG_NIC_INTERFACE_NO_DRAM_NIC: |
284 | mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n" ); |
285 | break; |
286 | |
287 | case MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET: |
288 | /* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases: |
289 | * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded |
290 | * and this is a VF), this is not recoverable by SW reset. |
291 | * Logging of this is handled elsewhere. |
292 | * 2. FW reset has been issued by another function, driver can |
293 | * be reloaded to recover after the mode switches to |
294 | * MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED. |
295 | */ |
296 | if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR) |
297 | mlx5_core_warn(dev, "NIC SW reset in progress\n" ); |
298 | break; |
299 | |
300 | default: |
301 | mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n" , |
302 | nic_interface); |
303 | } |
304 | |
305 | mlx5_disable_device(dev); |
306 | } |
307 | |
308 | int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev) |
309 | { |
310 | unsigned long end; |
311 | |
312 | end = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FW_RESET)); |
313 | while (sensor_pci_not_working(dev)) { |
314 | if (time_after(jiffies, end)) |
315 | return -ETIMEDOUT; |
316 | if (test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) { |
317 | mlx5_core_warn(dev, "device is being removed, stop waiting for PCI\n" ); |
318 | return -ENODEV; |
319 | } |
320 | msleep(msecs: 100); |
321 | } |
322 | return 0; |
323 | } |
324 | |
325 | static int mlx5_health_try_recover(struct mlx5_core_dev *dev) |
326 | { |
327 | mlx5_core_warn(dev, "handling bad device here\n" ); |
328 | mlx5_handle_bad_state(dev); |
329 | if (mlx5_health_wait_pci_up(dev)) { |
330 | mlx5_core_err(dev, "health recovery flow aborted, PCI reads still not working\n" ); |
331 | return -EIO; |
332 | } |
333 | mlx5_core_err(dev, "starting health recovery flow\n" ); |
334 | if (mlx5_recover_device(dev) || mlx5_health_check_fatal_sensors(dev)) { |
335 | mlx5_core_err(dev, "health recovery failed\n" ); |
336 | return -EIO; |
337 | } |
338 | |
339 | mlx5_core_info(dev, "health recovery succeeded\n" ); |
340 | return 0; |
341 | } |
342 | |
343 | static const char *hsynd_str(u8 synd) |
344 | { |
345 | switch (synd) { |
346 | case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_INTERNAL_ERR: |
347 | return "firmware internal error" ; |
348 | case MLX5_INITIAL_SEG_HEALTH_SYNDROME_DEAD_IRISC: |
349 | return "irisc not responding" ; |
350 | case MLX5_INITIAL_SEG_HEALTH_SYNDROME_HW_FATAL_ERR: |
351 | return "unrecoverable hardware error" ; |
352 | case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_CRC_ERR: |
353 | return "firmware CRC error" ; |
354 | case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_FETCH_PCI_ERR: |
355 | return "ICM fetch PCI error" ; |
356 | case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PAGE_ERR: |
357 | return "HW fatal error\n" ; |
358 | case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ASYNCHRONOUS_EQ_BUF_OVERRUN: |
359 | return "async EQ buffer overrun" ; |
360 | case MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_IN_ERR: |
361 | return "EQ error" ; |
362 | case MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_INV: |
363 | return "Invalid EQ referenced" ; |
364 | case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR: |
365 | return "FFSER error" ; |
366 | case MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR: |
367 | return "High temperature" ; |
368 | case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PCI_POISONED_ERR: |
369 | return "ICM fetch PCI data poisoned error" ; |
370 | default: |
371 | return "unrecognized error" ; |
372 | } |
373 | } |
374 | |
375 | static const char *mlx5_loglevel_str(int level) |
376 | { |
377 | switch (level) { |
378 | case LOGLEVEL_EMERG: |
379 | return "EMERGENCY" ; |
380 | case LOGLEVEL_ALERT: |
381 | return "ALERT" ; |
382 | case LOGLEVEL_CRIT: |
383 | return "CRITICAL" ; |
384 | case LOGLEVEL_ERR: |
385 | return "ERROR" ; |
386 | case LOGLEVEL_WARNING: |
387 | return "WARNING" ; |
388 | case LOGLEVEL_NOTICE: |
389 | return "NOTICE" ; |
390 | case LOGLEVEL_INFO: |
391 | return "INFO" ; |
392 | case LOGLEVEL_DEBUG: |
393 | return "DEBUG" ; |
394 | } |
395 | return "Unknown log level" ; |
396 | } |
397 | |
398 | static int mlx5_health_get_severity(u8 rfr_severity) |
399 | { |
400 | return rfr_severity & MLX5_SEVERITY_VALID_MASK ? |
401 | rfr_severity & MLX5_SEVERITY_MASK : LOGLEVEL_ERR; |
402 | } |
403 | |
404 | static void print_health_info(struct mlx5_core_dev *dev) |
405 | { |
406 | struct mlx5_core_health *health = &dev->priv.health; |
407 | struct health_buffer __iomem *h = health->health; |
408 | u8 rfr_severity; |
409 | int severity; |
410 | int i; |
411 | |
412 | /* If the syndrome is 0, the device is OK and no need to print buffer */ |
413 | if (!ioread8(&h->synd)) |
414 | return; |
415 | |
416 | if (ioread32be(&h->fw_ver) == 0xFFFFFFFF) { |
417 | mlx5_log(dev, LOGLEVEL_ERR, "PCI slot is unavailable\n" ); |
418 | return; |
419 | } |
420 | |
421 | rfr_severity = ioread8(&h->rfr_severity); |
422 | severity = mlx5_health_get_severity(rfr_severity); |
423 | mlx5_log(dev, severity, "Health issue observed, %s, severity(%d) %s:\n" , |
424 | hsynd_str(ioread8(&h->synd)), severity, mlx5_loglevel_str(severity)); |
425 | |
426 | for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) |
427 | mlx5_log(dev, severity, "assert_var[%d] 0x%08x\n" , i, |
428 | ioread32be(h->assert_var + i)); |
429 | |
430 | mlx5_log(dev, severity, "assert_exit_ptr 0x%08x\n" , ioread32be(&h->assert_exit_ptr)); |
431 | mlx5_log(dev, severity, "assert_callra 0x%08x\n" , ioread32be(&h->assert_callra)); |
432 | mlx5_log(dev, severity, "fw_ver %d.%d.%d" , fw_rev_maj(dev), fw_rev_min(dev), |
433 | fw_rev_sub(dev)); |
434 | mlx5_log(dev, severity, "time %u\n" , ioread32be(&h->time)); |
435 | mlx5_log(dev, severity, "hw_id 0x%08x\n" , ioread32be(&h->hw_id)); |
436 | mlx5_log(dev, severity, "rfr %d\n" , mlx5_health_get_rfr(rfr_severity)); |
437 | mlx5_log(dev, severity, "severity %d (%s)\n" , severity, mlx5_loglevel_str(severity)); |
438 | mlx5_log(dev, severity, "irisc_index %d\n" , ioread8(&h->irisc_index)); |
439 | mlx5_log(dev, severity, "synd 0x%x: %s\n" , ioread8(&h->synd), |
440 | hsynd_str(ioread8(&h->synd))); |
441 | mlx5_log(dev, severity, "ext_synd 0x%04x\n" , ioread16be(&h->ext_synd)); |
442 | mlx5_log(dev, severity, "raw fw_ver 0x%08x\n" , ioread32be(&h->fw_ver)); |
443 | } |
444 | |
445 | static int |
446 | mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter, |
447 | struct devlink_fmsg *fmsg, |
448 | struct netlink_ext_ack *extack) |
449 | { |
450 | struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); |
451 | struct mlx5_core_health *health = &dev->priv.health; |
452 | struct health_buffer __iomem *h = health->health; |
453 | u8 synd = ioread8(&h->synd); |
454 | |
455 | devlink_fmsg_u8_pair_put(fmsg, name: "Syndrome" , value: synd); |
456 | if (!synd) |
457 | return 0; |
458 | |
459 | devlink_fmsg_string_pair_put(fmsg, name: "Description" , value: hsynd_str(synd)); |
460 | |
461 | return 0; |
462 | } |
463 | |
464 | struct mlx5_fw_reporter_ctx { |
465 | u8 err_synd; |
466 | int miss_counter; |
467 | }; |
468 | |
469 | static void |
470 | mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg, |
471 | struct mlx5_fw_reporter_ctx *fw_reporter_ctx) |
472 | { |
473 | devlink_fmsg_u8_pair_put(fmsg, name: "syndrome" , value: fw_reporter_ctx->err_synd); |
474 | devlink_fmsg_u32_pair_put(fmsg, name: "fw_miss_counter" , value: fw_reporter_ctx->miss_counter); |
475 | } |
476 | |
477 | static void |
478 | mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev, |
479 | struct devlink_fmsg *fmsg) |
480 | { |
481 | struct mlx5_core_health *health = &dev->priv.health; |
482 | struct health_buffer __iomem *h = health->health; |
483 | u8 rfr_severity; |
484 | int i; |
485 | |
486 | if (!ioread8(&h->synd)) |
487 | return; |
488 | |
489 | devlink_fmsg_pair_nest_start(fmsg, name: "health buffer" ); |
490 | devlink_fmsg_obj_nest_start(fmsg); |
491 | devlink_fmsg_arr_pair_nest_start(fmsg, name: "assert_var" ); |
492 | for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) |
493 | devlink_fmsg_u32_put(fmsg, value: ioread32be(h->assert_var + i)); |
494 | devlink_fmsg_arr_pair_nest_end(fmsg); |
495 | devlink_fmsg_u32_pair_put(fmsg, name: "assert_exit_ptr" , |
496 | value: ioread32be(&h->assert_exit_ptr)); |
497 | devlink_fmsg_u32_pair_put(fmsg, name: "assert_callra" , |
498 | value: ioread32be(&h->assert_callra)); |
499 | devlink_fmsg_u32_pair_put(fmsg, name: "time" , value: ioread32be(&h->time)); |
500 | devlink_fmsg_u32_pair_put(fmsg, name: "hw_id" , value: ioread32be(&h->hw_id)); |
501 | rfr_severity = ioread8(&h->rfr_severity); |
502 | devlink_fmsg_u8_pair_put(fmsg, name: "rfr" , value: mlx5_health_get_rfr(rfr_severity)); |
503 | devlink_fmsg_u8_pair_put(fmsg, name: "severity" , value: mlx5_health_get_severity(rfr_severity)); |
504 | devlink_fmsg_u8_pair_put(fmsg, name: "irisc_index" , value: ioread8(&h->irisc_index)); |
505 | devlink_fmsg_u8_pair_put(fmsg, name: "synd" , value: ioread8(&h->synd)); |
506 | devlink_fmsg_u32_pair_put(fmsg, name: "ext_synd" , value: ioread16be(&h->ext_synd)); |
507 | devlink_fmsg_u32_pair_put(fmsg, name: "raw_fw_ver" , value: ioread32be(&h->fw_ver)); |
508 | devlink_fmsg_obj_nest_end(fmsg); |
509 | devlink_fmsg_pair_nest_end(fmsg); |
510 | } |
511 | |
512 | static int |
513 | mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter, |
514 | struct devlink_fmsg *fmsg, void *priv_ctx, |
515 | struct netlink_ext_ack *extack) |
516 | { |
517 | struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); |
518 | int err; |
519 | |
520 | err = mlx5_fw_tracer_trigger_core_dump_general(dev); |
521 | if (err) |
522 | return err; |
523 | |
524 | if (priv_ctx) { |
525 | struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; |
526 | |
527 | mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx); |
528 | } |
529 | |
530 | mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg); |
531 | |
532 | return mlx5_fw_tracer_get_saved_traces_objects(tracer: dev->tracer, fmsg); |
533 | } |
534 | |
535 | static void mlx5_fw_reporter_err_work(struct work_struct *work) |
536 | { |
537 | struct mlx5_fw_reporter_ctx fw_reporter_ctx; |
538 | struct mlx5_core_health *health; |
539 | |
540 | health = container_of(work, struct mlx5_core_health, report_work); |
541 | |
542 | if (IS_ERR_OR_NULL(ptr: health->fw_reporter)) |
543 | return; |
544 | |
545 | fw_reporter_ctx.err_synd = health->synd; |
546 | fw_reporter_ctx.miss_counter = health->miss_counter; |
547 | if (fw_reporter_ctx.err_synd) { |
548 | devlink_health_report(reporter: health->fw_reporter, |
549 | msg: "FW syndrome reported" , priv_ctx: &fw_reporter_ctx); |
550 | return; |
551 | } |
552 | if (fw_reporter_ctx.miss_counter) |
553 | devlink_health_report(reporter: health->fw_reporter, |
554 | msg: "FW miss counter reported" , |
555 | priv_ctx: &fw_reporter_ctx); |
556 | } |
557 | |
558 | static const struct devlink_health_reporter_ops mlx5_fw_reporter_pf_ops = { |
559 | .name = "fw" , |
560 | .diagnose = mlx5_fw_reporter_diagnose, |
561 | .dump = mlx5_fw_reporter_dump, |
562 | }; |
563 | |
564 | static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = { |
565 | .name = "fw" , |
566 | .diagnose = mlx5_fw_reporter_diagnose, |
567 | }; |
568 | |
569 | static int |
570 | mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter, |
571 | void *priv_ctx, |
572 | struct netlink_ext_ack *extack) |
573 | { |
574 | struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); |
575 | |
576 | return mlx5_health_try_recover(dev); |
577 | } |
578 | |
579 | static int |
580 | mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter, |
581 | struct devlink_fmsg *fmsg, void *priv_ctx, |
582 | struct netlink_ext_ack *extack) |
583 | { |
584 | struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); |
585 | u32 crdump_size = dev->priv.health.crdump_size; |
586 | u32 *cr_data; |
587 | int err; |
588 | |
589 | if (!mlx5_core_is_pf(dev)) |
590 | return -EPERM; |
591 | |
592 | cr_data = kvmalloc(size: crdump_size, GFP_KERNEL); |
593 | if (!cr_data) |
594 | return -ENOMEM; |
595 | err = mlx5_crdump_collect(dev, cr_data); |
596 | if (err) |
597 | goto free_data; |
598 | |
599 | if (priv_ctx) { |
600 | struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; |
601 | |
602 | mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx); |
603 | } |
604 | |
605 | devlink_fmsg_binary_pair_put(fmsg, name: "crdump_data" , value: cr_data, value_len: crdump_size); |
606 | |
607 | free_data: |
608 | kvfree(addr: cr_data); |
609 | return err; |
610 | } |
611 | |
612 | static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) |
613 | { |
614 | struct mlx5_fw_reporter_ctx fw_reporter_ctx; |
615 | struct mlx5_core_health *health; |
616 | struct mlx5_core_dev *dev; |
617 | struct devlink *devlink; |
618 | struct mlx5_priv *priv; |
619 | |
620 | health = container_of(work, struct mlx5_core_health, fatal_report_work); |
621 | priv = container_of(health, struct mlx5_priv, health); |
622 | dev = container_of(priv, struct mlx5_core_dev, priv); |
623 | devlink = priv_to_devlink(priv: dev); |
624 | |
625 | mutex_lock(&dev->intf_state_mutex); |
626 | if (test_bit(MLX5_DROP_HEALTH_WORK, &health->flags)) { |
627 | mlx5_core_err(dev, "health works are not permitted at this stage\n" ); |
628 | mutex_unlock(lock: &dev->intf_state_mutex); |
629 | return; |
630 | } |
631 | mutex_unlock(lock: &dev->intf_state_mutex); |
632 | enter_error_state(dev, force: false); |
633 | if (IS_ERR_OR_NULL(ptr: health->fw_fatal_reporter)) { |
634 | devl_lock(devlink); |
635 | if (mlx5_health_try_recover(dev)) |
636 | mlx5_core_err(dev, "health recovery failed\n" ); |
637 | devl_unlock(devlink); |
638 | return; |
639 | } |
640 | fw_reporter_ctx.err_synd = health->synd; |
641 | fw_reporter_ctx.miss_counter = health->miss_counter; |
642 | if (devlink_health_report(reporter: health->fw_fatal_reporter, |
643 | msg: "FW fatal error reported" , priv_ctx: &fw_reporter_ctx) == -ECANCELED) { |
644 | /* If recovery wasn't performed, due to grace period, |
645 | * unload the driver. This ensures that the driver |
646 | * closes all its resources and it is not subjected to |
647 | * requests from the kernel. |
648 | */ |
649 | mlx5_core_err(dev, "Driver is in error state. Unloading\n" ); |
650 | mlx5_unload_one(dev, suspend: false); |
651 | } |
652 | } |
653 | |
654 | static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_pf_ops = { |
655 | .name = "fw_fatal" , |
656 | .recover = mlx5_fw_fatal_reporter_recover, |
657 | .dump = mlx5_fw_fatal_reporter_dump, |
658 | }; |
659 | |
660 | static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { |
661 | .name = "fw_fatal" , |
662 | .recover = mlx5_fw_fatal_reporter_recover, |
663 | }; |
664 | |
665 | #define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000 |
666 | #define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000 |
667 | #define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000 |
668 | #define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD |
669 | |
670 | void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) |
671 | { |
672 | const struct devlink_health_reporter_ops *fw_fatal_ops; |
673 | struct mlx5_core_health *health = &dev->priv.health; |
674 | const struct devlink_health_reporter_ops *fw_ops; |
675 | struct devlink *devlink = priv_to_devlink(priv: dev); |
676 | u64 grace_period; |
677 | |
678 | fw_fatal_ops = &mlx5_fw_fatal_reporter_pf_ops; |
679 | fw_ops = &mlx5_fw_reporter_pf_ops; |
680 | if (mlx5_core_is_ecpf(dev)) { |
681 | grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD; |
682 | } else if (mlx5_core_is_pf(dev)) { |
683 | grace_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD; |
684 | } else { |
685 | /* VF or SF */ |
686 | grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD; |
687 | fw_fatal_ops = &mlx5_fw_fatal_reporter_ops; |
688 | fw_ops = &mlx5_fw_reporter_ops; |
689 | } |
690 | |
691 | health->fw_reporter = |
692 | devl_health_reporter_create(devlink, ops: fw_ops, graceful_period: 0, priv: dev); |
693 | if (IS_ERR(ptr: health->fw_reporter)) |
694 | mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n" , |
695 | PTR_ERR(health->fw_reporter)); |
696 | |
697 | health->fw_fatal_reporter = |
698 | devl_health_reporter_create(devlink, |
699 | ops: fw_fatal_ops, |
700 | graceful_period: grace_period, |
701 | priv: dev); |
702 | if (IS_ERR(ptr: health->fw_fatal_reporter)) |
703 | mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n" , |
704 | PTR_ERR(health->fw_fatal_reporter)); |
705 | } |
706 | |
707 | static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev) |
708 | { |
709 | struct mlx5_core_health *health = &dev->priv.health; |
710 | |
711 | if (!IS_ERR_OR_NULL(ptr: health->fw_reporter)) |
712 | devlink_health_reporter_destroy(reporter: health->fw_reporter); |
713 | |
714 | if (!IS_ERR_OR_NULL(ptr: health->fw_fatal_reporter)) |
715 | devlink_health_reporter_destroy(reporter: health->fw_fatal_reporter); |
716 | } |
717 | |
718 | static unsigned long get_next_poll_jiffies(struct mlx5_core_dev *dev) |
719 | { |
720 | unsigned long next; |
721 | |
722 | get_random_bytes(buf: &next, len: sizeof(next)); |
723 | next %= HZ; |
724 | next += jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL)); |
725 | |
726 | return next; |
727 | } |
728 | |
729 | void mlx5_trigger_health_work(struct mlx5_core_dev *dev) |
730 | { |
731 | struct mlx5_core_health *health = &dev->priv.health; |
732 | |
733 | if (!mlx5_dev_is_lightweight(dev)) |
734 | queue_work(wq: health->wq, work: &health->fatal_report_work); |
735 | } |
736 | |
737 | #define MLX5_MSEC_PER_HOUR (MSEC_PER_SEC * 60 * 60) |
738 | static void mlx5_health_log_ts_update(struct work_struct *work) |
739 | { |
740 | struct delayed_work *dwork = to_delayed_work(work); |
741 | u32 out[MLX5_ST_SZ_DW(mrtc_reg)] = {}; |
742 | u32 in[MLX5_ST_SZ_DW(mrtc_reg)] = {}; |
743 | struct mlx5_core_health *health; |
744 | struct mlx5_core_dev *dev; |
745 | struct mlx5_priv *priv; |
746 | u64 now_us; |
747 | |
748 | health = container_of(dwork, struct mlx5_core_health, update_fw_log_ts_work); |
749 | priv = container_of(health, struct mlx5_priv, health); |
750 | dev = container_of(priv, struct mlx5_core_dev, priv); |
751 | |
752 | now_us = ktime_to_us(kt: ktime_get_real()); |
753 | |
754 | MLX5_SET(mrtc_reg, in, time_h, now_us >> 32); |
755 | MLX5_SET(mrtc_reg, in, time_l, now_us & 0xFFFFFFFF); |
756 | mlx5_core_access_reg(dev, data_in: in, size_in: sizeof(in), data_out: out, size_out: sizeof(out), reg_num: MLX5_REG_MRTC, arg: 0, write: 1); |
757 | |
758 | queue_delayed_work(wq: health->wq, dwork: &health->update_fw_log_ts_work, |
759 | delay: msecs_to_jiffies(MLX5_MSEC_PER_HOUR)); |
760 | } |
761 | |
762 | static void poll_health(struct timer_list *t) |
763 | { |
764 | struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer); |
765 | struct mlx5_core_health *health = &dev->priv.health; |
766 | struct health_buffer __iomem *h = health->health; |
767 | u32 fatal_error; |
768 | u8 prev_synd; |
769 | u32 count; |
770 | |
771 | if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) |
772 | goto out; |
773 | |
774 | fatal_error = mlx5_health_check_fatal_sensors(dev); |
775 | |
776 | if (fatal_error && !health->fatal_error) { |
777 | mlx5_core_err(dev, "Fatal error %u detected\n" , fatal_error); |
778 | dev->priv.health.fatal_error = fatal_error; |
779 | print_health_info(dev); |
780 | dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; |
781 | mlx5_trigger_health_work(dev); |
782 | return; |
783 | } |
784 | |
785 | count = ioread32be(health->health_counter); |
786 | if (count == health->prev) |
787 | ++health->miss_counter; |
788 | else |
789 | health->miss_counter = 0; |
790 | |
791 | health->prev = count; |
792 | if (health->miss_counter == MAX_MISSES) { |
793 | mlx5_core_err(dev, "device's health compromised - reached miss count\n" ); |
794 | print_health_info(dev); |
795 | queue_work(wq: health->wq, work: &health->report_work); |
796 | } |
797 | |
798 | prev_synd = health->synd; |
799 | health->synd = ioread8(&h->synd); |
800 | if (health->synd && health->synd != prev_synd) |
801 | queue_work(wq: health->wq, work: &health->report_work); |
802 | |
803 | out: |
804 | mod_timer(timer: &health->timer, expires: get_next_poll_jiffies(dev)); |
805 | } |
806 | |
807 | void mlx5_start_health_poll(struct mlx5_core_dev *dev) |
808 | { |
809 | u64 poll_interval_ms = mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL); |
810 | struct mlx5_core_health *health = &dev->priv.health; |
811 | |
812 | timer_setup(&health->timer, poll_health, 0); |
813 | health->fatal_error = MLX5_SENSOR_NO_ERR; |
814 | clear_bit(nr: MLX5_DROP_HEALTH_WORK, addr: &health->flags); |
815 | health->health = &dev->iseg->health; |
816 | health->health_counter = &dev->iseg->health_counter; |
817 | |
818 | health->timer.expires = jiffies + msecs_to_jiffies(m: poll_interval_ms); |
819 | add_timer(timer: &health->timer); |
820 | } |
821 | |
822 | void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) |
823 | { |
824 | struct mlx5_core_health *health = &dev->priv.health; |
825 | |
826 | if (disable_health) |
827 | set_bit(nr: MLX5_DROP_HEALTH_WORK, addr: &health->flags); |
828 | |
829 | del_timer_sync(timer: &health->timer); |
830 | } |
831 | |
832 | void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev) |
833 | { |
834 | struct mlx5_core_health *health = &dev->priv.health; |
835 | |
836 | if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc)) |
837 | queue_delayed_work(wq: health->wq, dwork: &health->update_fw_log_ts_work, delay: 0); |
838 | } |
839 | |
840 | void mlx5_drain_health_wq(struct mlx5_core_dev *dev) |
841 | { |
842 | struct mlx5_core_health *health = &dev->priv.health; |
843 | |
844 | set_bit(nr: MLX5_DROP_HEALTH_WORK, addr: &health->flags); |
845 | cancel_delayed_work_sync(dwork: &health->update_fw_log_ts_work); |
846 | cancel_work_sync(work: &health->report_work); |
847 | cancel_work_sync(work: &health->fatal_report_work); |
848 | } |
849 | |
850 | void mlx5_health_cleanup(struct mlx5_core_dev *dev) |
851 | { |
852 | struct mlx5_core_health *health = &dev->priv.health; |
853 | |
854 | cancel_delayed_work_sync(dwork: &health->update_fw_log_ts_work); |
855 | destroy_workqueue(wq: health->wq); |
856 | mlx5_reporter_vnic_destroy(dev); |
857 | mlx5_fw_reporters_destroy(dev); |
858 | } |
859 | |
860 | int mlx5_health_init(struct mlx5_core_dev *dev) |
861 | { |
862 | struct devlink *devlink = priv_to_devlink(priv: dev); |
863 | struct mlx5_core_health *health; |
864 | char *name; |
865 | |
866 | if (!mlx5_dev_is_lightweight(dev)) { |
867 | devl_lock(devlink); |
868 | mlx5_fw_reporters_create(dev); |
869 | devl_unlock(devlink); |
870 | } |
871 | mlx5_reporter_vnic_create(dev); |
872 | |
873 | health = &dev->priv.health; |
874 | name = kmalloc(size: 64, GFP_KERNEL); |
875 | if (!name) |
876 | goto out_err; |
877 | |
878 | strcpy(p: name, q: "mlx5_health" ); |
879 | strcat(p: name, q: dev_name(dev: dev->device)); |
880 | health->wq = create_singlethread_workqueue(name); |
881 | kfree(objp: name); |
882 | if (!health->wq) |
883 | goto out_err; |
884 | INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work); |
885 | INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work); |
886 | INIT_DELAYED_WORK(&health->update_fw_log_ts_work, mlx5_health_log_ts_update); |
887 | |
888 | return 0; |
889 | |
890 | out_err: |
891 | mlx5_reporter_vnic_destroy(dev); |
892 | mlx5_fw_reporters_destroy(dev); |
893 | return -ENOMEM; |
894 | } |
895 | |