1 | // SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) |
2 | |
3 | #include <linux/bitmap.h> |
4 | #include <linux/delay.h> |
5 | #include <linux/interrupt.h> |
6 | #include <linux/io.h> |
7 | #include <linux/io-64-nonatomic-lo-hi.h> |
8 | #include <linux/mm.h> |
9 | #include <linux/module.h> |
10 | #include <linux/nvme.h> |
11 | #include <linux/pci.h> |
12 | #include <linux/wait.h> |
13 | #include <linux/sched/signal.h> |
14 | |
15 | #include "fun_queue.h" |
16 | #include "fun_dev.h" |
17 | |
18 | #define FUN_ADMIN_CMD_TO_MS 3000 |
19 | |
20 | enum { |
21 | AQA_ASQS_SHIFT = 0, |
22 | AQA_ACQS_SHIFT = 16, |
23 | AQA_MIN_QUEUE_SIZE = 2, |
24 | AQA_MAX_QUEUE_SIZE = 4096 |
25 | }; |
26 | |
27 | /* context for admin commands */ |
28 | struct fun_cmd_ctx { |
29 | fun_admin_callback_t cb; /* callback to invoke on completion */ |
30 | void *cb_data; /* user data provided to callback */ |
31 | int cpu; /* CPU where the cmd's tag was allocated */ |
32 | }; |
33 | |
34 | /* Context for synchronous admin commands. */ |
35 | struct fun_sync_cmd_ctx { |
36 | struct completion compl; |
37 | u8 *rsp_buf; /* caller provided response buffer */ |
38 | unsigned int rsp_len; /* response buffer size */ |
39 | u8 rsp_status; /* command response status */ |
40 | }; |
41 | |
42 | /* Wait for the CSTS.RDY bit to match @enabled. */ |
43 | static int fun_wait_ready(struct fun_dev *fdev, bool enabled) |
44 | { |
45 | unsigned int cap_to = NVME_CAP_TIMEOUT(fdev->cap_reg); |
46 | u32 bit = enabled ? NVME_CSTS_RDY : 0; |
47 | unsigned long deadline; |
48 | |
49 | deadline = ((cap_to + 1) * HZ / 2) + jiffies; /* CAP.TO is in 500ms */ |
50 | |
51 | for (;;) { |
52 | u32 csts = readl(addr: fdev->bar + NVME_REG_CSTS); |
53 | |
54 | if (csts == ~0) { |
55 | dev_err(fdev->dev, "CSTS register read %#x\n" , csts); |
56 | return -EIO; |
57 | } |
58 | |
59 | if ((csts & NVME_CSTS_RDY) == bit) |
60 | return 0; |
61 | |
62 | if (time_is_before_jiffies(deadline)) |
63 | break; |
64 | |
65 | msleep(msecs: 100); |
66 | } |
67 | |
68 | dev_err(fdev->dev, |
69 | "Timed out waiting for device to indicate RDY %u; aborting %s\n" , |
70 | enabled, enabled ? "initialization" : "reset" ); |
71 | return -ETIMEDOUT; |
72 | } |
73 | |
74 | /* Check CSTS and return an error if it is unreadable or has unexpected |
75 | * RDY value. |
76 | */ |
77 | static int fun_check_csts_rdy(struct fun_dev *fdev, unsigned int expected_rdy) |
78 | { |
79 | u32 csts = readl(addr: fdev->bar + NVME_REG_CSTS); |
80 | u32 actual_rdy = csts & NVME_CSTS_RDY; |
81 | |
82 | if (csts == ~0) { |
83 | dev_err(fdev->dev, "CSTS register read %#x\n" , csts); |
84 | return -EIO; |
85 | } |
86 | if (actual_rdy != expected_rdy) { |
87 | dev_err(fdev->dev, "Unexpected CSTS RDY %u\n" , actual_rdy); |
88 | return -EINVAL; |
89 | } |
90 | return 0; |
91 | } |
92 | |
93 | /* Check that CSTS RDY has the expected value. Then write a new value to the CC |
94 | * register and wait for CSTS RDY to match the new CC ENABLE state. |
95 | */ |
96 | static int fun_update_cc_enable(struct fun_dev *fdev, unsigned int initial_rdy) |
97 | { |
98 | int rc = fun_check_csts_rdy(fdev, expected_rdy: initial_rdy); |
99 | |
100 | if (rc) |
101 | return rc; |
102 | writel(val: fdev->cc_reg, addr: fdev->bar + NVME_REG_CC); |
103 | return fun_wait_ready(fdev, enabled: !!(fdev->cc_reg & NVME_CC_ENABLE)); |
104 | } |
105 | |
106 | static int fun_disable_ctrl(struct fun_dev *fdev) |
107 | { |
108 | fdev->cc_reg &= ~(NVME_CC_SHN_MASK | NVME_CC_ENABLE); |
109 | return fun_update_cc_enable(fdev, initial_rdy: 1); |
110 | } |
111 | |
112 | static int fun_enable_ctrl(struct fun_dev *fdev, u32 admin_cqesz_log2, |
113 | u32 admin_sqesz_log2) |
114 | { |
115 | fdev->cc_reg = (admin_cqesz_log2 << NVME_CC_IOCQES_SHIFT) | |
116 | (admin_sqesz_log2 << NVME_CC_IOSQES_SHIFT) | |
117 | ((PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT) | |
118 | NVME_CC_ENABLE; |
119 | |
120 | return fun_update_cc_enable(fdev, initial_rdy: 0); |
121 | } |
122 | |
123 | static int fun_map_bars(struct fun_dev *fdev, const char *name) |
124 | { |
125 | struct pci_dev *pdev = to_pci_dev(fdev->dev); |
126 | int err; |
127 | |
128 | err = pci_request_mem_regions(pdev, name); |
129 | if (err) { |
130 | dev_err(&pdev->dev, |
131 | "Couldn't get PCI memory resources, err %d\n" , err); |
132 | return err; |
133 | } |
134 | |
135 | fdev->bar = pci_ioremap_bar(pdev, bar: 0); |
136 | if (!fdev->bar) { |
137 | dev_err(&pdev->dev, "Couldn't map BAR 0\n" ); |
138 | pci_release_mem_regions(pdev); |
139 | return -ENOMEM; |
140 | } |
141 | |
142 | return 0; |
143 | } |
144 | |
145 | static void fun_unmap_bars(struct fun_dev *fdev) |
146 | { |
147 | struct pci_dev *pdev = to_pci_dev(fdev->dev); |
148 | |
149 | if (fdev->bar) { |
150 | iounmap(addr: fdev->bar); |
151 | fdev->bar = NULL; |
152 | pci_release_mem_regions(pdev); |
153 | } |
154 | } |
155 | |
156 | static int fun_set_dma_masks(struct device *dev) |
157 | { |
158 | int err; |
159 | |
160 | err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); |
161 | if (err) |
162 | dev_err(dev, "DMA mask configuration failed, err %d\n" , err); |
163 | return err; |
164 | } |
165 | |
166 | static irqreturn_t fun_admin_irq(int irq, void *data) |
167 | { |
168 | struct fun_queue *funq = data; |
169 | |
170 | return fun_process_cq(funq, max: 0) ? IRQ_HANDLED : IRQ_NONE; |
171 | } |
172 | |
173 | static void fun_complete_admin_cmd(struct fun_queue *funq, void *data, |
174 | void *entry, const struct fun_cqe_info *info) |
175 | { |
176 | const struct fun_admin_rsp_common *rsp_common = entry; |
177 | struct fun_dev *fdev = funq->fdev; |
178 | struct fun_cmd_ctx *cmd_ctx; |
179 | int cpu; |
180 | u16 cid; |
181 | |
182 | if (info->sqhd == cpu_to_be16(0xffff)) { |
183 | dev_dbg(fdev->dev, "adminq event" ); |
184 | if (fdev->adminq_cb) |
185 | fdev->adminq_cb(fdev, entry); |
186 | return; |
187 | } |
188 | |
189 | cid = be16_to_cpu(rsp_common->cid); |
190 | dev_dbg(fdev->dev, "admin CQE cid %u, op %u, ret %u\n" , cid, |
191 | rsp_common->op, rsp_common->ret); |
192 | |
193 | cmd_ctx = &fdev->cmd_ctx[cid]; |
194 | if (cmd_ctx->cpu < 0) { |
195 | dev_err(fdev->dev, |
196 | "admin CQE with CID=%u, op=%u does not match a pending command\n" , |
197 | cid, rsp_common->op); |
198 | return; |
199 | } |
200 | |
201 | if (cmd_ctx->cb) |
202 | cmd_ctx->cb(fdev, entry, xchg(&cmd_ctx->cb_data, NULL)); |
203 | |
204 | cpu = cmd_ctx->cpu; |
205 | cmd_ctx->cpu = -1; |
206 | sbitmap_queue_clear(sbq: &fdev->admin_sbq, nr: cid, cpu); |
207 | } |
208 | |
209 | static int fun_init_cmd_ctx(struct fun_dev *fdev, unsigned int ntags) |
210 | { |
211 | unsigned int i; |
212 | |
213 | fdev->cmd_ctx = kvcalloc(n: ntags, size: sizeof(*fdev->cmd_ctx), GFP_KERNEL); |
214 | if (!fdev->cmd_ctx) |
215 | return -ENOMEM; |
216 | |
217 | for (i = 0; i < ntags; i++) |
218 | fdev->cmd_ctx[i].cpu = -1; |
219 | |
220 | return 0; |
221 | } |
222 | |
223 | /* Allocate and enable an admin queue and assign it the first IRQ vector. */ |
224 | static int fun_enable_admin_queue(struct fun_dev *fdev, |
225 | const struct fun_dev_params *areq) |
226 | { |
227 | struct fun_queue_alloc_req qreq = { |
228 | .cqe_size_log2 = areq->cqe_size_log2, |
229 | .sqe_size_log2 = areq->sqe_size_log2, |
230 | .cq_depth = areq->cq_depth, |
231 | .sq_depth = areq->sq_depth, |
232 | .rq_depth = areq->rq_depth, |
233 | }; |
234 | unsigned int ntags = areq->sq_depth - 1; |
235 | struct fun_queue *funq; |
236 | int rc; |
237 | |
238 | if (fdev->admin_q) |
239 | return -EEXIST; |
240 | |
241 | if (areq->sq_depth < AQA_MIN_QUEUE_SIZE || |
242 | areq->sq_depth > AQA_MAX_QUEUE_SIZE || |
243 | areq->cq_depth < AQA_MIN_QUEUE_SIZE || |
244 | areq->cq_depth > AQA_MAX_QUEUE_SIZE) |
245 | return -EINVAL; |
246 | |
247 | fdev->admin_q = fun_alloc_queue(fdev, qid: 0, req: &qreq); |
248 | if (!fdev->admin_q) |
249 | return -ENOMEM; |
250 | |
251 | rc = fun_init_cmd_ctx(fdev, ntags); |
252 | if (rc) |
253 | goto free_q; |
254 | |
255 | rc = sbitmap_queue_init_node(sbq: &fdev->admin_sbq, depth: ntags, shift: -1, round_robin: false, |
256 | GFP_KERNEL, node: dev_to_node(dev: fdev->dev)); |
257 | if (rc) |
258 | goto free_cmd_ctx; |
259 | |
260 | funq = fdev->admin_q; |
261 | funq->cq_vector = 0; |
262 | rc = fun_request_irq(funq, devname: dev_name(dev: fdev->dev), handler: fun_admin_irq, data: funq); |
263 | if (rc) |
264 | goto free_sbq; |
265 | |
266 | fun_set_cq_callback(funq, cb: fun_complete_admin_cmd, NULL); |
267 | fdev->adminq_cb = areq->event_cb; |
268 | |
269 | writel(val: (funq->sq_depth - 1) << AQA_ASQS_SHIFT | |
270 | (funq->cq_depth - 1) << AQA_ACQS_SHIFT, |
271 | addr: fdev->bar + NVME_REG_AQA); |
272 | |
273 | writeq(val: funq->sq_dma_addr, addr: fdev->bar + NVME_REG_ASQ); |
274 | writeq(val: funq->cq_dma_addr, addr: fdev->bar + NVME_REG_ACQ); |
275 | |
276 | rc = fun_enable_ctrl(fdev, admin_cqesz_log2: areq->cqe_size_log2, admin_sqesz_log2: areq->sqe_size_log2); |
277 | if (rc) |
278 | goto free_irq; |
279 | |
280 | if (areq->rq_depth) { |
281 | rc = fun_create_rq(funq); |
282 | if (rc) |
283 | goto disable_ctrl; |
284 | |
285 | funq_rq_post(funq); |
286 | } |
287 | |
288 | return 0; |
289 | |
290 | disable_ctrl: |
291 | fun_disable_ctrl(fdev); |
292 | free_irq: |
293 | fun_free_irq(funq); |
294 | free_sbq: |
295 | sbitmap_queue_free(sbq: &fdev->admin_sbq); |
296 | free_cmd_ctx: |
297 | kvfree(addr: fdev->cmd_ctx); |
298 | fdev->cmd_ctx = NULL; |
299 | free_q: |
300 | fun_free_queue(funq: fdev->admin_q); |
301 | fdev->admin_q = NULL; |
302 | return rc; |
303 | } |
304 | |
305 | static void fun_disable_admin_queue(struct fun_dev *fdev) |
306 | { |
307 | struct fun_queue *admq = fdev->admin_q; |
308 | |
309 | if (!admq) |
310 | return; |
311 | |
312 | fun_disable_ctrl(fdev); |
313 | |
314 | fun_free_irq(funq: admq); |
315 | __fun_process_cq(funq: admq, max: 0); |
316 | |
317 | sbitmap_queue_free(sbq: &fdev->admin_sbq); |
318 | |
319 | kvfree(addr: fdev->cmd_ctx); |
320 | fdev->cmd_ctx = NULL; |
321 | |
322 | fun_free_queue(funq: admq); |
323 | fdev->admin_q = NULL; |
324 | } |
325 | |
326 | /* Return %true if the admin queue has stopped servicing commands as can be |
327 | * detected through registers. This isn't exhaustive and may provide false |
328 | * negatives. |
329 | */ |
330 | static bool fun_adminq_stopped(struct fun_dev *fdev) |
331 | { |
332 | u32 csts = readl(addr: fdev->bar + NVME_REG_CSTS); |
333 | |
334 | return (csts & (NVME_CSTS_CFS | NVME_CSTS_RDY)) != NVME_CSTS_RDY; |
335 | } |
336 | |
337 | static int fun_wait_for_tag(struct fun_dev *fdev, int *cpup) |
338 | { |
339 | struct sbitmap_queue *sbq = &fdev->admin_sbq; |
340 | struct sbq_wait_state *ws = &sbq->ws[0]; |
341 | DEFINE_SBQ_WAIT(wait); |
342 | int tag; |
343 | |
344 | for (;;) { |
345 | sbitmap_prepare_to_wait(sbq, ws, sbq_wait: &wait, TASK_UNINTERRUPTIBLE); |
346 | if (fdev->suppress_cmds) { |
347 | tag = -ESHUTDOWN; |
348 | break; |
349 | } |
350 | tag = sbitmap_queue_get(sbq, cpu: cpup); |
351 | if (tag >= 0) |
352 | break; |
353 | schedule(); |
354 | } |
355 | |
356 | sbitmap_finish_wait(sbq, ws, sbq_wait: &wait); |
357 | return tag; |
358 | } |
359 | |
360 | /* Submit an asynchronous admin command. Caller is responsible for implementing |
361 | * any waiting or timeout. Upon command completion the callback @cb is called. |
362 | */ |
363 | int fun_submit_admin_cmd(struct fun_dev *fdev, struct fun_admin_req_common *cmd, |
364 | fun_admin_callback_t cb, void *cb_data, bool wait_ok) |
365 | { |
366 | struct fun_queue *funq = fdev->admin_q; |
367 | unsigned int cmdsize = cmd->len8 * 8; |
368 | struct fun_cmd_ctx *cmd_ctx; |
369 | int tag, cpu, rc = 0; |
370 | |
371 | if (WARN_ON(cmdsize > (1 << funq->sqe_size_log2))) |
372 | return -EMSGSIZE; |
373 | |
374 | tag = sbitmap_queue_get(sbq: &fdev->admin_sbq, cpu: &cpu); |
375 | if (tag < 0) { |
376 | if (!wait_ok) |
377 | return -EAGAIN; |
378 | tag = fun_wait_for_tag(fdev, cpup: &cpu); |
379 | if (tag < 0) |
380 | return tag; |
381 | } |
382 | |
383 | cmd->cid = cpu_to_be16(tag); |
384 | |
385 | cmd_ctx = &fdev->cmd_ctx[tag]; |
386 | cmd_ctx->cb = cb; |
387 | cmd_ctx->cb_data = cb_data; |
388 | |
389 | spin_lock(lock: &funq->sq_lock); |
390 | |
391 | if (unlikely(fdev->suppress_cmds)) { |
392 | rc = -ESHUTDOWN; |
393 | sbitmap_queue_clear(sbq: &fdev->admin_sbq, nr: tag, cpu); |
394 | } else { |
395 | cmd_ctx->cpu = cpu; |
396 | memcpy(fun_sqe_at(funq, funq->sq_tail), cmd, cmdsize); |
397 | |
398 | dev_dbg(fdev->dev, "admin cmd @ %u: %8ph\n" , funq->sq_tail, |
399 | cmd); |
400 | |
401 | if (++funq->sq_tail == funq->sq_depth) |
402 | funq->sq_tail = 0; |
403 | writel(val: funq->sq_tail, addr: funq->sq_db); |
404 | } |
405 | spin_unlock(lock: &funq->sq_lock); |
406 | return rc; |
407 | } |
408 | |
409 | /* Abandon a pending admin command by clearing the issuer's callback data. |
410 | * Failure indicates that the command either has already completed or its |
411 | * completion is racing with this call. |
412 | */ |
413 | static bool fun_abandon_admin_cmd(struct fun_dev *fd, |
414 | const struct fun_admin_req_common *cmd, |
415 | void *cb_data) |
416 | { |
417 | u16 cid = be16_to_cpu(cmd->cid); |
418 | struct fun_cmd_ctx *cmd_ctx = &fd->cmd_ctx[cid]; |
419 | |
420 | return cmpxchg(&cmd_ctx->cb_data, cb_data, NULL) == cb_data; |
421 | } |
422 | |
423 | /* Stop submission of new admin commands and wake up any processes waiting for |
424 | * tags. Already submitted commands are left to complete or time out. |
425 | */ |
426 | static void fun_admin_stop(struct fun_dev *fdev) |
427 | { |
428 | spin_lock(lock: &fdev->admin_q->sq_lock); |
429 | fdev->suppress_cmds = true; |
430 | spin_unlock(lock: &fdev->admin_q->sq_lock); |
431 | sbitmap_queue_wake_all(sbq: &fdev->admin_sbq); |
432 | } |
433 | |
434 | /* The callback for synchronous execution of admin commands. It copies the |
435 | * command response to the caller's buffer and signals completion. |
436 | */ |
437 | static void fun_admin_cmd_sync_cb(struct fun_dev *fd, void *rsp, void *cb_data) |
438 | { |
439 | const struct fun_admin_rsp_common *rsp_common = rsp; |
440 | struct fun_sync_cmd_ctx *ctx = cb_data; |
441 | |
442 | if (!ctx) |
443 | return; /* command issuer timed out and left */ |
444 | if (ctx->rsp_buf) { |
445 | unsigned int rsp_len = rsp_common->len8 * 8; |
446 | |
447 | if (unlikely(rsp_len > ctx->rsp_len)) { |
448 | dev_err(fd->dev, |
449 | "response for op %u is %uB > response buffer %uB\n" , |
450 | rsp_common->op, rsp_len, ctx->rsp_len); |
451 | rsp_len = ctx->rsp_len; |
452 | } |
453 | memcpy(ctx->rsp_buf, rsp, rsp_len); |
454 | } |
455 | ctx->rsp_status = rsp_common->ret; |
456 | complete(&ctx->compl); |
457 | } |
458 | |
459 | /* Submit a synchronous admin command. */ |
460 | int fun_submit_admin_sync_cmd(struct fun_dev *fdev, |
461 | struct fun_admin_req_common *cmd, void *rsp, |
462 | size_t rspsize, unsigned int timeout) |
463 | { |
464 | struct fun_sync_cmd_ctx ctx = { |
465 | .compl = COMPLETION_INITIALIZER_ONSTACK(ctx.compl), |
466 | .rsp_buf = rsp, |
467 | .rsp_len = rspsize, |
468 | }; |
469 | unsigned int cmdlen = cmd->len8 * 8; |
470 | unsigned long jiffies_left; |
471 | int ret; |
472 | |
473 | ret = fun_submit_admin_cmd(fdev, cmd, cb: fun_admin_cmd_sync_cb, cb_data: &ctx, |
474 | wait_ok: true); |
475 | if (ret) |
476 | return ret; |
477 | |
478 | if (!timeout) |
479 | timeout = FUN_ADMIN_CMD_TO_MS; |
480 | |
481 | jiffies_left = wait_for_completion_timeout(x: &ctx.compl, |
482 | timeout: msecs_to_jiffies(m: timeout)); |
483 | if (!jiffies_left) { |
484 | /* The command timed out. Attempt to cancel it so we can return. |
485 | * But if the command is in the process of completing we'll |
486 | * wait for it. |
487 | */ |
488 | if (fun_abandon_admin_cmd(fd: fdev, cmd, cb_data: &ctx)) { |
489 | dev_err(fdev->dev, "admin command timed out: %*ph\n" , |
490 | cmdlen, cmd); |
491 | fun_admin_stop(fdev); |
492 | /* see if the timeout was due to a queue failure */ |
493 | if (fun_adminq_stopped(fdev)) |
494 | dev_err(fdev->dev, |
495 | "device does not accept admin commands\n" ); |
496 | |
497 | return -ETIMEDOUT; |
498 | } |
499 | wait_for_completion(&ctx.compl); |
500 | } |
501 | |
502 | if (ctx.rsp_status) { |
503 | dev_err(fdev->dev, "admin command failed, err %d: %*ph\n" , |
504 | ctx.rsp_status, cmdlen, cmd); |
505 | } |
506 | |
507 | return -ctx.rsp_status; |
508 | } |
509 | EXPORT_SYMBOL_GPL(fun_submit_admin_sync_cmd); |
510 | |
511 | /* Return the number of device resources of the requested type. */ |
512 | int fun_get_res_count(struct fun_dev *fdev, enum fun_admin_op res) |
513 | { |
514 | union { |
515 | struct fun_admin_res_count_req req; |
516 | struct fun_admin_res_count_rsp rsp; |
517 | } cmd; |
518 | int rc; |
519 | |
520 | cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(res, sizeof(cmd.req)); |
521 | cmd.req.count = FUN_ADMIN_SIMPLE_SUBOP_INIT(FUN_ADMIN_SUBOP_RES_COUNT, |
522 | 0, 0); |
523 | |
524 | rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common, &cmd.rsp, |
525 | sizeof(cmd), 0); |
526 | return rc ? rc : be32_to_cpu(cmd.rsp.count.data); |
527 | } |
528 | EXPORT_SYMBOL_GPL(fun_get_res_count); |
529 | |
530 | /* Request that the instance of resource @res with the given id be deleted. */ |
531 | int fun_res_destroy(struct fun_dev *fdev, enum fun_admin_op res, |
532 | unsigned int flags, u32 id) |
533 | { |
534 | struct fun_admin_generic_destroy_req req = { |
535 | .common = FUN_ADMIN_REQ_COMMON_INIT2(res, sizeof(req)), |
536 | .destroy = FUN_ADMIN_SIMPLE_SUBOP_INIT(FUN_ADMIN_SUBOP_DESTROY, |
537 | flags, id) |
538 | }; |
539 | |
540 | return fun_submit_admin_sync_cmd(fdev, &req.common, NULL, 0, 0); |
541 | } |
542 | EXPORT_SYMBOL_GPL(fun_res_destroy); |
543 | |
544 | /* Bind two entities of the given types and IDs. */ |
545 | int fun_bind(struct fun_dev *fdev, enum fun_admin_bind_type type0, |
546 | unsigned int id0, enum fun_admin_bind_type type1, |
547 | unsigned int id1) |
548 | { |
549 | struct { |
550 | struct fun_admin_bind_req req; |
551 | struct fun_admin_bind_entry entry[2]; |
552 | } cmd = { |
553 | .req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_BIND, |
554 | sizeof(cmd)), |
555 | .entry[0] = FUN_ADMIN_BIND_ENTRY_INIT(type0, id0), |
556 | .entry[1] = FUN_ADMIN_BIND_ENTRY_INIT(type1, id1), |
557 | }; |
558 | |
559 | return fun_submit_admin_sync_cmd(fdev, &cmd.req.common, NULL, 0, 0); |
560 | } |
561 | EXPORT_SYMBOL_GPL(fun_bind); |
562 | |
563 | static int fun_get_dev_limits(struct fun_dev *fdev) |
564 | { |
565 | struct pci_dev *pdev = to_pci_dev(fdev->dev); |
566 | unsigned int cq_count, sq_count, num_dbs; |
567 | int rc; |
568 | |
569 | rc = fun_get_res_count(fdev, FUN_ADMIN_OP_EPCQ); |
570 | if (rc < 0) |
571 | return rc; |
572 | cq_count = rc; |
573 | |
574 | rc = fun_get_res_count(fdev, FUN_ADMIN_OP_EPSQ); |
575 | if (rc < 0) |
576 | return rc; |
577 | sq_count = rc; |
578 | |
579 | /* The admin queue consumes 1 CQ and at least 1 SQ. To be usable the |
580 | * device must provide additional queues. |
581 | */ |
582 | if (cq_count < 2 || sq_count < 2 + !!fdev->admin_q->rq_depth) |
583 | return -EINVAL; |
584 | |
585 | /* Calculate the max QID based on SQ/CQ/doorbell counts. |
586 | * SQ/CQ doorbells alternate. |
587 | */ |
588 | num_dbs = (pci_resource_len(pdev, 0) - NVME_REG_DBS) >> |
589 | (2 + NVME_CAP_STRIDE(fdev->cap_reg)); |
590 | fdev->max_qid = min3(cq_count, sq_count, num_dbs / 2) - 1; |
591 | fdev->kern_end_qid = fdev->max_qid + 1; |
592 | return 0; |
593 | } |
594 | |
595 | /* Allocate all MSI-X vectors available on a function and at least @min_vecs. */ |
596 | static int fun_alloc_irqs(struct pci_dev *pdev, unsigned int min_vecs) |
597 | { |
598 | int vecs, num_msix = pci_msix_vec_count(dev: pdev); |
599 | |
600 | if (num_msix < 0) |
601 | return num_msix; |
602 | if (min_vecs > num_msix) |
603 | return -ERANGE; |
604 | |
605 | vecs = pci_alloc_irq_vectors(dev: pdev, min_vecs, max_vecs: num_msix, PCI_IRQ_MSIX); |
606 | if (vecs > 0) { |
607 | dev_info(&pdev->dev, |
608 | "Allocated %d IRQ vectors of %d requested\n" , |
609 | vecs, num_msix); |
610 | } else { |
611 | dev_err(&pdev->dev, |
612 | "Unable to allocate at least %u IRQ vectors\n" , |
613 | min_vecs); |
614 | } |
615 | return vecs; |
616 | } |
617 | |
618 | /* Allocate and initialize the IRQ manager state. */ |
619 | static int fun_alloc_irq_mgr(struct fun_dev *fdev) |
620 | { |
621 | fdev->irq_map = bitmap_zalloc(nbits: fdev->num_irqs, GFP_KERNEL); |
622 | if (!fdev->irq_map) |
623 | return -ENOMEM; |
624 | |
625 | spin_lock_init(&fdev->irqmgr_lock); |
626 | /* mark IRQ 0 allocated, it is used by the admin queue */ |
627 | __set_bit(0, fdev->irq_map); |
628 | fdev->irqs_avail = fdev->num_irqs - 1; |
629 | return 0; |
630 | } |
631 | |
632 | /* Reserve @nirqs of the currently available IRQs and return their indices. */ |
633 | int fun_reserve_irqs(struct fun_dev *fdev, unsigned int nirqs, u16 *irq_indices) |
634 | { |
635 | unsigned int b, n = 0; |
636 | int err = -ENOSPC; |
637 | |
638 | if (!nirqs) |
639 | return 0; |
640 | |
641 | spin_lock(lock: &fdev->irqmgr_lock); |
642 | if (nirqs > fdev->irqs_avail) |
643 | goto unlock; |
644 | |
645 | for_each_clear_bit(b, fdev->irq_map, fdev->num_irqs) { |
646 | __set_bit(b, fdev->irq_map); |
647 | irq_indices[n++] = b; |
648 | if (n >= nirqs) |
649 | break; |
650 | } |
651 | |
652 | WARN_ON(n < nirqs); |
653 | fdev->irqs_avail -= n; |
654 | err = n; |
655 | unlock: |
656 | spin_unlock(lock: &fdev->irqmgr_lock); |
657 | return err; |
658 | } |
659 | EXPORT_SYMBOL(fun_reserve_irqs); |
660 | |
661 | /* Release @nirqs previously allocated IRQS with the supplied indices. */ |
662 | void fun_release_irqs(struct fun_dev *fdev, unsigned int nirqs, |
663 | u16 *irq_indices) |
664 | { |
665 | unsigned int i; |
666 | |
667 | spin_lock(lock: &fdev->irqmgr_lock); |
668 | for (i = 0; i < nirqs; i++) |
669 | __clear_bit(irq_indices[i], fdev->irq_map); |
670 | fdev->irqs_avail += nirqs; |
671 | spin_unlock(lock: &fdev->irqmgr_lock); |
672 | } |
673 | EXPORT_SYMBOL(fun_release_irqs); |
674 | |
675 | static void fun_serv_handler(struct work_struct *work) |
676 | { |
677 | struct fun_dev *fd = container_of(work, struct fun_dev, service_task); |
678 | |
679 | if (test_bit(FUN_SERV_DISABLED, &fd->service_flags)) |
680 | return; |
681 | if (fd->serv_cb) |
682 | fd->serv_cb(fd); |
683 | } |
684 | |
685 | void fun_serv_stop(struct fun_dev *fd) |
686 | { |
687 | set_bit(nr: FUN_SERV_DISABLED, addr: &fd->service_flags); |
688 | cancel_work_sync(work: &fd->service_task); |
689 | } |
690 | EXPORT_SYMBOL_GPL(fun_serv_stop); |
691 | |
692 | void fun_serv_restart(struct fun_dev *fd) |
693 | { |
694 | clear_bit(nr: FUN_SERV_DISABLED, addr: &fd->service_flags); |
695 | if (fd->service_flags) |
696 | schedule_work(work: &fd->service_task); |
697 | } |
698 | EXPORT_SYMBOL_GPL(fun_serv_restart); |
699 | |
700 | void fun_serv_sched(struct fun_dev *fd) |
701 | { |
702 | if (!test_bit(FUN_SERV_DISABLED, &fd->service_flags)) |
703 | schedule_work(work: &fd->service_task); |
704 | } |
705 | EXPORT_SYMBOL_GPL(fun_serv_sched); |
706 | |
707 | /* Check and try to get the device into a proper state for initialization, |
708 | * i.e., CSTS.RDY = CC.EN = 0. |
709 | */ |
710 | static int sanitize_dev(struct fun_dev *fdev) |
711 | { |
712 | int rc; |
713 | |
714 | fdev->cap_reg = readq(addr: fdev->bar + NVME_REG_CAP); |
715 | fdev->cc_reg = readl(addr: fdev->bar + NVME_REG_CC); |
716 | |
717 | /* First get RDY to agree with the current EN. Give RDY the opportunity |
718 | * to complete a potential recent EN change. |
719 | */ |
720 | rc = fun_wait_ready(fdev, enabled: fdev->cc_reg & NVME_CC_ENABLE); |
721 | if (rc) |
722 | return rc; |
723 | |
724 | /* Next, reset the device if EN is currently 1. */ |
725 | if (fdev->cc_reg & NVME_CC_ENABLE) |
726 | rc = fun_disable_ctrl(fdev); |
727 | |
728 | return rc; |
729 | } |
730 | |
731 | /* Undo the device initialization of fun_dev_enable(). */ |
732 | void fun_dev_disable(struct fun_dev *fdev) |
733 | { |
734 | struct pci_dev *pdev = to_pci_dev(fdev->dev); |
735 | |
736 | pci_set_drvdata(pdev, NULL); |
737 | |
738 | if (fdev->fw_handle != FUN_HCI_ID_INVALID) { |
739 | fun_res_destroy(fdev, FUN_ADMIN_OP_SWUPGRADE, 0, |
740 | fdev->fw_handle); |
741 | fdev->fw_handle = FUN_HCI_ID_INVALID; |
742 | } |
743 | |
744 | fun_disable_admin_queue(fdev); |
745 | |
746 | bitmap_free(bitmap: fdev->irq_map); |
747 | pci_free_irq_vectors(dev: pdev); |
748 | |
749 | pci_disable_device(dev: pdev); |
750 | |
751 | fun_unmap_bars(fdev); |
752 | } |
753 | EXPORT_SYMBOL(fun_dev_disable); |
754 | |
755 | /* Perform basic initialization of a device, including |
756 | * - PCI config space setup and BAR0 mapping |
757 | * - interrupt management initialization |
758 | * - 1 admin queue setup |
759 | * - determination of some device limits, such as number of queues. |
760 | */ |
761 | int fun_dev_enable(struct fun_dev *fdev, struct pci_dev *pdev, |
762 | const struct fun_dev_params *areq, const char *name) |
763 | { |
764 | int rc; |
765 | |
766 | fdev->dev = &pdev->dev; |
767 | rc = fun_map_bars(fdev, name); |
768 | if (rc) |
769 | return rc; |
770 | |
771 | rc = fun_set_dma_masks(dev: fdev->dev); |
772 | if (rc) |
773 | goto unmap; |
774 | |
775 | rc = pci_enable_device_mem(dev: pdev); |
776 | if (rc) { |
777 | dev_err(&pdev->dev, "Couldn't enable device, err %d\n" , rc); |
778 | goto unmap; |
779 | } |
780 | |
781 | rc = sanitize_dev(fdev); |
782 | if (rc) |
783 | goto disable_dev; |
784 | |
785 | fdev->fw_handle = FUN_HCI_ID_INVALID; |
786 | fdev->q_depth = NVME_CAP_MQES(fdev->cap_reg) + 1; |
787 | fdev->db_stride = 1 << NVME_CAP_STRIDE(fdev->cap_reg); |
788 | fdev->dbs = fdev->bar + NVME_REG_DBS; |
789 | |
790 | INIT_WORK(&fdev->service_task, fun_serv_handler); |
791 | fdev->service_flags = FUN_SERV_DISABLED; |
792 | fdev->serv_cb = areq->serv_cb; |
793 | |
794 | rc = fun_alloc_irqs(pdev, min_vecs: areq->min_msix + 1); /* +1 for admin CQ */ |
795 | if (rc < 0) |
796 | goto disable_dev; |
797 | fdev->num_irqs = rc; |
798 | |
799 | rc = fun_alloc_irq_mgr(fdev); |
800 | if (rc) |
801 | goto free_irqs; |
802 | |
803 | pci_set_master(dev: pdev); |
804 | rc = fun_enable_admin_queue(fdev, areq); |
805 | if (rc) |
806 | goto free_irq_mgr; |
807 | |
808 | rc = fun_get_dev_limits(fdev); |
809 | if (rc < 0) |
810 | goto disable_admin; |
811 | |
812 | pci_save_state(dev: pdev); |
813 | pci_set_drvdata(pdev, data: fdev); |
814 | pcie_print_link_status(dev: pdev); |
815 | dev_dbg(fdev->dev, "q_depth %u, db_stride %u, max qid %d kern_end_qid %d\n" , |
816 | fdev->q_depth, fdev->db_stride, fdev->max_qid, |
817 | fdev->kern_end_qid); |
818 | return 0; |
819 | |
820 | disable_admin: |
821 | fun_disable_admin_queue(fdev); |
822 | free_irq_mgr: |
823 | bitmap_free(bitmap: fdev->irq_map); |
824 | free_irqs: |
825 | pci_free_irq_vectors(dev: pdev); |
826 | disable_dev: |
827 | pci_disable_device(dev: pdev); |
828 | unmap: |
829 | fun_unmap_bars(fdev); |
830 | return rc; |
831 | } |
832 | EXPORT_SYMBOL(fun_dev_enable); |
833 | |
834 | MODULE_AUTHOR("Dimitris Michailidis <dmichail@fungible.com>" ); |
835 | MODULE_DESCRIPTION("Core services driver for Fungible devices" ); |
836 | MODULE_LICENSE("Dual BSD/GPL" ); |
837 | |