1/*
2 * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/etherdevice.h>
34#include <linux/mlx5/driver.h>
35
36#include "mlx5_core.h"
37#include "lib/mlx5.h"
38#include "lib/eq.h"
39#include "fpga/core.h"
40#include "fpga/conn.h"
41
42static const char *const mlx5_fpga_error_strings[] = {
43 "Null Syndrome",
44 "Corrupted DDR",
45 "Flash Timeout",
46 "Internal Link Error",
47 "Watchdog HW Failure",
48 "I2C Failure",
49 "Image Changed",
50 "Temperature Critical",
51};
52
53static const char * const mlx5_fpga_qp_error_strings[] = {
54 "Null Syndrome",
55 "Retry Counter Expired",
56 "RNR Expired",
57};
58static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void)
59{
60 struct mlx5_fpga_device *fdev;
61
62 fdev = kzalloc(size: sizeof(*fdev), GFP_KERNEL);
63 if (!fdev)
64 return NULL;
65
66 spin_lock_init(&fdev->state_lock);
67 fdev->state = MLX5_FPGA_STATUS_NONE;
68 return fdev;
69}
70
71static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
72{
73 switch (image) {
74 case MLX5_FPGA_IMAGE_USER:
75 return "user";
76 case MLX5_FPGA_IMAGE_FACTORY:
77 return "factory";
78 default:
79 return "unknown";
80 }
81}
82
83static const char *mlx5_fpga_name(u32 fpga_id)
84{
85 static char ret[32];
86
87 switch (fpga_id) {
88 case MLX5_FPGA_NEWTON:
89 return "Newton";
90 case MLX5_FPGA_EDISON:
91 return "Edison";
92 case MLX5_FPGA_MORSE:
93 return "Morse";
94 case MLX5_FPGA_MORSEQ:
95 return "MorseQ";
96 }
97
98 snprintf(buf: ret, size: sizeof(ret), fmt: "Unknown %d", fpga_id);
99 return ret;
100}
101
102static int mlx5_is_fpga_lookaside(u32 fpga_id)
103{
104 return fpga_id != MLX5_FPGA_NEWTON && fpga_id != MLX5_FPGA_EDISON;
105}
106
107static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
108{
109 struct mlx5_fpga_query query;
110 int err;
111
112 err = mlx5_fpga_query(dev: fdev->mdev, query: &query);
113 if (err) {
114 mlx5_fpga_err(fdev, "Failed to query status: %d\n", err);
115 return err;
116 }
117
118 fdev->last_admin_image = query.admin_image;
119 fdev->last_oper_image = query.oper_image;
120
121 mlx5_fpga_info(fdev, "Status %u; Admin image %u; Oper image %u\n",
122 query.status, query.admin_image, query.oper_image);
123
124 /* for FPGA lookaside projects FPGA load status is not important */
125 if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
126 return 0;
127
128 if (query.status != MLX5_FPGA_STATUS_SUCCESS) {
129 mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
130 mlx5_fpga_image_name(fdev->last_oper_image),
131 query.status);
132 return -EIO;
133 }
134
135 return 0;
136}
137
138static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
139{
140 int err;
141 struct mlx5_core_dev *mdev = fdev->mdev;
142
143 err = mlx5_fpga_ctrl_op(dev: mdev, op: MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
144 if (err) {
145 mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
146 return err;
147 }
148 err = mlx5_fpga_ctrl_op(dev: mdev, op: MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
149 if (err) {
150 mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
151 return err;
152 }
153 err = mlx5_fpga_ctrl_op(dev: mdev, op: MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
154 if (err) {
155 mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
156 return err;
157 }
158 return 0;
159}
160
161static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
162
163static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
164{
165 struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
166
167 return mlx5_fpga_event(fdev, event, eqe);
168}
169
170static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
171{
172 struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
173
174 return mlx5_fpga_event(fdev, event, eqe);
175}
176
177int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
178{
179 struct mlx5_fpga_device *fdev = mdev->fpga;
180 unsigned int max_num_qps;
181 unsigned long flags;
182 u32 fpga_id;
183 int err;
184
185 if (!fdev)
186 return 0;
187
188 err = mlx5_fpga_caps(dev: fdev->mdev);
189 if (err)
190 goto out;
191
192 err = mlx5_fpga_device_load_check(fdev);
193 if (err)
194 goto out;
195
196 fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id);
197 mlx5_fpga_info(fdev, "FPGA card %s:%u\n", mlx5_fpga_name(fpga_id), fpga_id);
198
199 /* No QPs if FPGA does not participate in net processing */
200 if (mlx5_is_fpga_lookaside(fpga_id))
201 goto out;
202
203 mlx5_fpga_info(fdev, "%s(%d): image, version %u; SBU %06x:%04x version %d\n",
204 mlx5_fpga_image_name(fdev->last_oper_image),
205 fdev->last_oper_image,
206 MLX5_CAP_FPGA(fdev->mdev, image_version),
207 MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id),
208 MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id),
209 MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version));
210
211 max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
212 if (!max_num_qps) {
213 mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n");
214 err = -ENOTSUPP;
215 goto out;
216 }
217
218 err = mlx5_core_reserve_gids(dev: mdev, count: max_num_qps);
219 if (err)
220 goto out;
221
222 MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
223 MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
224 mlx5_eq_notifier_register(dev: fdev->mdev, nb: &fdev->fpga_err_nb);
225 mlx5_eq_notifier_register(dev: fdev->mdev, nb: &fdev->fpga_qp_err_nb);
226
227 err = mlx5_fpga_conn_device_init(fdev);
228 if (err)
229 goto err_rsvd_gid;
230
231 if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
232 err = mlx5_fpga_device_brb(fdev);
233 if (err)
234 goto err_conn_init;
235 }
236
237 goto out;
238
239err_conn_init:
240 mlx5_fpga_conn_device_cleanup(fdev);
241
242err_rsvd_gid:
243 mlx5_eq_notifier_unregister(dev: fdev->mdev, nb: &fdev->fpga_err_nb);
244 mlx5_eq_notifier_unregister(dev: fdev->mdev, nb: &fdev->fpga_qp_err_nb);
245 mlx5_core_unreserve_gids(dev: mdev, count: max_num_qps);
246out:
247 spin_lock_irqsave(&fdev->state_lock, flags);
248 fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
249 spin_unlock_irqrestore(lock: &fdev->state_lock, flags);
250 return err;
251}
252
253int mlx5_fpga_init(struct mlx5_core_dev *mdev)
254{
255 struct mlx5_fpga_device *fdev;
256
257 if (!MLX5_CAP_GEN(mdev, fpga)) {
258 mlx5_core_dbg(mdev, "FPGA capability not present\n");
259 return 0;
260 }
261
262 mlx5_core_dbg(mdev, "Initializing FPGA\n");
263
264 fdev = mlx5_fpga_device_alloc();
265 if (!fdev)
266 return -ENOMEM;
267
268 fdev->mdev = mdev;
269 mdev->fpga = fdev;
270
271 return 0;
272}
273
274void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
275{
276 struct mlx5_fpga_device *fdev = mdev->fpga;
277 unsigned int max_num_qps;
278 unsigned long flags;
279 int err;
280
281 if (!fdev)
282 return;
283
284 if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
285 return;
286
287 spin_lock_irqsave(&fdev->state_lock, flags);
288 if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
289 spin_unlock_irqrestore(lock: &fdev->state_lock, flags);
290 return;
291 }
292 fdev->state = MLX5_FPGA_STATUS_NONE;
293 spin_unlock_irqrestore(lock: &fdev->state_lock, flags);
294
295 if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
296 err = mlx5_fpga_ctrl_op(dev: mdev, op: MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
297 if (err)
298 mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
299 err);
300 }
301
302 mlx5_fpga_conn_device_cleanup(fdev);
303 mlx5_eq_notifier_unregister(dev: fdev->mdev, nb: &fdev->fpga_err_nb);
304 mlx5_eq_notifier_unregister(dev: fdev->mdev, nb: &fdev->fpga_qp_err_nb);
305
306 max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
307 mlx5_core_unreserve_gids(dev: mdev, count: max_num_qps);
308}
309
310void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
311{
312 struct mlx5_fpga_device *fdev = mdev->fpga;
313
314 mlx5_fpga_device_stop(mdev);
315 kfree(objp: fdev);
316 mdev->fpga = NULL;
317}
318
319static const char *mlx5_fpga_syndrome_to_string(u8 syndrome)
320{
321 if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings))
322 return mlx5_fpga_error_strings[syndrome];
323 return "Unknown";
324}
325
326static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
327{
328 if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings))
329 return mlx5_fpga_qp_error_strings[syndrome];
330 return "Unknown";
331}
332
333static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
334 unsigned long event, void *eqe)
335{
336 void *data = ((struct mlx5_eqe *)eqe)->data.raw;
337 const char *event_name;
338 bool teardown = false;
339 unsigned long flags;
340 u8 syndrome;
341
342 switch (event) {
343 case MLX5_EVENT_TYPE_FPGA_ERROR:
344 syndrome = MLX5_GET(fpga_error_event, data, syndrome);
345 event_name = mlx5_fpga_syndrome_to_string(syndrome);
346 break;
347 case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
348 syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome);
349 event_name = mlx5_fpga_qp_syndrome_to_string(syndrome);
350 break;
351 default:
352 return NOTIFY_DONE;
353 }
354
355 spin_lock_irqsave(&fdev->state_lock, flags);
356 switch (fdev->state) {
357 case MLX5_FPGA_STATUS_SUCCESS:
358 mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name);
359 teardown = true;
360 break;
361 default:
362 mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n",
363 syndrome, event_name);
364 }
365 spin_unlock_irqrestore(lock: &fdev->state_lock, flags);
366 /* We tear-down the card's interfaces and functionality because
367 * the FPGA bump-on-the-wire is misbehaving and we lose ability
368 * to communicate with the network. User may still be able to
369 * recover by re-programming or debugging the FPGA
370 */
371 if (teardown)
372 mlx5_trigger_health_work(dev: fdev->mdev);
373
374 return NOTIFY_OK;
375}
376

source code of linux/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c