1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (c) 2016 Avago Technologies. All rights reserved. |
4 | */ |
5 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
6 | #include <linux/module.h> |
7 | #include <linux/parser.h> |
8 | #include <uapi/scsi/fc/fc_fs.h> |
9 | #include <uapi/scsi/fc/fc_els.h> |
10 | #include <linux/delay.h> |
11 | #include <linux/overflow.h> |
12 | #include <linux/blk-cgroup.h> |
13 | #include "nvme.h" |
14 | #include "fabrics.h" |
15 | #include <linux/nvme-fc-driver.h> |
16 | #include <linux/nvme-fc.h> |
17 | #include "fc.h" |
18 | #include <scsi/scsi_transport_fc.h> |
19 | #include <linux/blk-mq-pci.h> |
20 | |
21 | /* *************************** Data Structures/Defines ****************** */ |
22 | |
23 | |
24 | enum nvme_fc_queue_flags { |
25 | NVME_FC_Q_CONNECTED = 0, |
26 | NVME_FC_Q_LIVE, |
27 | }; |
28 | |
29 | #define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */ |
30 | #define NVME_FC_DEFAULT_RECONNECT_TMO 2 /* delay between reconnects |
31 | * when connected and a |
32 | * connection failure. |
33 | */ |
34 | |
35 | struct nvme_fc_queue { |
36 | struct nvme_fc_ctrl *ctrl; |
37 | struct device *dev; |
38 | struct blk_mq_hw_ctx *hctx; |
39 | void *lldd_handle; |
40 | size_t cmnd_capsule_len; |
41 | u32 qnum; |
42 | u32 rqcnt; |
43 | u32 seqno; |
44 | |
45 | u64 connection_id; |
46 | atomic_t csn; |
47 | |
48 | unsigned long flags; |
49 | } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ |
50 | |
51 | enum nvme_fcop_flags { |
52 | FCOP_FLAGS_TERMIO = (1 << 0), |
53 | FCOP_FLAGS_AEN = (1 << 1), |
54 | }; |
55 | |
56 | struct nvmefc_ls_req_op { |
57 | struct nvmefc_ls_req ls_req; |
58 | |
59 | struct nvme_fc_rport *rport; |
60 | struct nvme_fc_queue *queue; |
61 | struct request *rq; |
62 | u32 flags; |
63 | |
64 | int ls_error; |
65 | struct completion ls_done; |
66 | struct list_head lsreq_list; /* rport->ls_req_list */ |
67 | bool req_queued; |
68 | }; |
69 | |
70 | struct nvmefc_ls_rcv_op { |
71 | struct nvme_fc_rport *rport; |
72 | struct nvmefc_ls_rsp *lsrsp; |
73 | union nvmefc_ls_requests *rqstbuf; |
74 | union nvmefc_ls_responses *rspbuf; |
75 | u16 rqstdatalen; |
76 | bool handled; |
77 | dma_addr_t rspdma; |
78 | struct list_head lsrcv_list; /* rport->ls_rcv_list */ |
79 | } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ |
80 | |
81 | enum nvme_fcpop_state { |
82 | FCPOP_STATE_UNINIT = 0, |
83 | FCPOP_STATE_IDLE = 1, |
84 | FCPOP_STATE_ACTIVE = 2, |
85 | FCPOP_STATE_ABORTED = 3, |
86 | FCPOP_STATE_COMPLETE = 4, |
87 | }; |
88 | |
89 | struct nvme_fc_fcp_op { |
90 | struct nvme_request nreq; /* |
91 | * nvme/host/core.c |
92 | * requires this to be |
93 | * the 1st element in the |
94 | * private structure |
95 | * associated with the |
96 | * request. |
97 | */ |
98 | struct nvmefc_fcp_req fcp_req; |
99 | |
100 | struct nvme_fc_ctrl *ctrl; |
101 | struct nvme_fc_queue *queue; |
102 | struct request *rq; |
103 | |
104 | atomic_t state; |
105 | u32 flags; |
106 | u32 rqno; |
107 | u32 nents; |
108 | |
109 | struct nvme_fc_cmd_iu cmd_iu; |
110 | struct nvme_fc_ersp_iu rsp_iu; |
111 | }; |
112 | |
113 | struct nvme_fcp_op_w_sgl { |
114 | struct nvme_fc_fcp_op op; |
115 | struct scatterlist sgl[NVME_INLINE_SG_CNT]; |
116 | uint8_t priv[]; |
117 | }; |
118 | |
119 | struct nvme_fc_lport { |
120 | struct nvme_fc_local_port localport; |
121 | |
122 | struct ida endp_cnt; |
123 | struct list_head port_list; /* nvme_fc_port_list */ |
124 | struct list_head endp_list; |
125 | struct device *dev; /* physical device for dma */ |
126 | struct nvme_fc_port_template *ops; |
127 | struct kref ref; |
128 | atomic_t act_rport_cnt; |
129 | } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ |
130 | |
131 | struct nvme_fc_rport { |
132 | struct nvme_fc_remote_port remoteport; |
133 | |
134 | struct list_head endp_list; /* for lport->endp_list */ |
135 | struct list_head ctrl_list; |
136 | struct list_head ls_req_list; |
137 | struct list_head ls_rcv_list; |
138 | struct list_head disc_list; |
139 | struct device *dev; /* physical device for dma */ |
140 | struct nvme_fc_lport *lport; |
141 | spinlock_t lock; |
142 | struct kref ref; |
143 | atomic_t act_ctrl_cnt; |
144 | unsigned long dev_loss_end; |
145 | struct work_struct lsrcv_work; |
146 | } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ |
147 | |
148 | /* fc_ctrl flags values - specified as bit positions */ |
149 | #define ASSOC_ACTIVE 0 |
150 | #define ASSOC_FAILED 1 |
151 | #define FCCTRL_TERMIO 2 |
152 | |
153 | struct nvme_fc_ctrl { |
154 | spinlock_t lock; |
155 | struct nvme_fc_queue *queues; |
156 | struct device *dev; |
157 | struct nvme_fc_lport *lport; |
158 | struct nvme_fc_rport *rport; |
159 | u32 cnum; |
160 | |
161 | bool ioq_live; |
162 | u64 association_id; |
163 | struct nvmefc_ls_rcv_op *rcv_disconn; |
164 | |
165 | struct list_head ctrl_list; /* rport->ctrl_list */ |
166 | |
167 | struct blk_mq_tag_set admin_tag_set; |
168 | struct blk_mq_tag_set tag_set; |
169 | |
170 | struct work_struct ioerr_work; |
171 | struct delayed_work connect_work; |
172 | |
173 | struct kref ref; |
174 | unsigned long flags; |
175 | u32 iocnt; |
176 | wait_queue_head_t ioabort_wait; |
177 | |
178 | struct nvme_fc_fcp_op aen_ops[NVME_NR_AEN_COMMANDS]; |
179 | |
180 | struct nvme_ctrl ctrl; |
181 | }; |
182 | |
183 | static inline struct nvme_fc_ctrl * |
184 | to_fc_ctrl(struct nvme_ctrl *ctrl) |
185 | { |
186 | return container_of(ctrl, struct nvme_fc_ctrl, ctrl); |
187 | } |
188 | |
189 | static inline struct nvme_fc_lport * |
190 | localport_to_lport(struct nvme_fc_local_port *portptr) |
191 | { |
192 | return container_of(portptr, struct nvme_fc_lport, localport); |
193 | } |
194 | |
195 | static inline struct nvme_fc_rport * |
196 | remoteport_to_rport(struct nvme_fc_remote_port *portptr) |
197 | { |
198 | return container_of(portptr, struct nvme_fc_rport, remoteport); |
199 | } |
200 | |
201 | static inline struct nvmefc_ls_req_op * |
202 | ls_req_to_lsop(struct nvmefc_ls_req *lsreq) |
203 | { |
204 | return container_of(lsreq, struct nvmefc_ls_req_op, ls_req); |
205 | } |
206 | |
207 | static inline struct nvme_fc_fcp_op * |
208 | fcp_req_to_fcp_op(struct nvmefc_fcp_req *fcpreq) |
209 | { |
210 | return container_of(fcpreq, struct nvme_fc_fcp_op, fcp_req); |
211 | } |
212 | |
213 | |
214 | |
215 | /* *************************** Globals **************************** */ |
216 | |
217 | |
218 | static DEFINE_SPINLOCK(nvme_fc_lock); |
219 | |
220 | static LIST_HEAD(nvme_fc_lport_list); |
221 | static DEFINE_IDA(nvme_fc_local_port_cnt); |
222 | static DEFINE_IDA(nvme_fc_ctrl_cnt); |
223 | |
224 | static struct workqueue_struct *nvme_fc_wq; |
225 | |
226 | static bool nvme_fc_waiting_to_unload; |
227 | static DECLARE_COMPLETION(nvme_fc_unload_proceed); |
228 | |
229 | /* |
230 | * These items are short-term. They will eventually be moved into |
231 | * a generic FC class. See comments in module init. |
232 | */ |
233 | static struct device *fc_udev_device; |
234 | |
235 | static void nvme_fc_complete_rq(struct request *rq); |
236 | |
237 | /* *********************** FC-NVME Port Management ************************ */ |
238 | |
239 | static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, |
240 | struct nvme_fc_queue *, unsigned int); |
241 | |
242 | static void nvme_fc_handle_ls_rqst_work(struct work_struct *work); |
243 | |
244 | |
245 | static void |
246 | nvme_fc_free_lport(struct kref *ref) |
247 | { |
248 | struct nvme_fc_lport *lport = |
249 | container_of(ref, struct nvme_fc_lport, ref); |
250 | unsigned long flags; |
251 | |
252 | WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED); |
253 | WARN_ON(!list_empty(&lport->endp_list)); |
254 | |
255 | /* remove from transport list */ |
256 | spin_lock_irqsave(&nvme_fc_lock, flags); |
257 | list_del(entry: &lport->port_list); |
258 | if (nvme_fc_waiting_to_unload && list_empty(head: &nvme_fc_lport_list)) |
259 | complete(&nvme_fc_unload_proceed); |
260 | spin_unlock_irqrestore(lock: &nvme_fc_lock, flags); |
261 | |
262 | ida_free(&nvme_fc_local_port_cnt, id: lport->localport.port_num); |
263 | ida_destroy(ida: &lport->endp_cnt); |
264 | |
265 | put_device(dev: lport->dev); |
266 | |
267 | kfree(objp: lport); |
268 | } |
269 | |
270 | static void |
271 | nvme_fc_lport_put(struct nvme_fc_lport *lport) |
272 | { |
273 | kref_put(kref: &lport->ref, release: nvme_fc_free_lport); |
274 | } |
275 | |
276 | static int |
277 | nvme_fc_lport_get(struct nvme_fc_lport *lport) |
278 | { |
279 | return kref_get_unless_zero(kref: &lport->ref); |
280 | } |
281 | |
282 | |
283 | static struct nvme_fc_lport * |
284 | nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo, |
285 | struct nvme_fc_port_template *ops, |
286 | struct device *dev) |
287 | { |
288 | struct nvme_fc_lport *lport; |
289 | unsigned long flags; |
290 | |
291 | spin_lock_irqsave(&nvme_fc_lock, flags); |
292 | |
293 | list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { |
294 | if (lport->localport.node_name != pinfo->node_name || |
295 | lport->localport.port_name != pinfo->port_name) |
296 | continue; |
297 | |
298 | if (lport->dev != dev) { |
299 | lport = ERR_PTR(error: -EXDEV); |
300 | goto out_done; |
301 | } |
302 | |
303 | if (lport->localport.port_state != FC_OBJSTATE_DELETED) { |
304 | lport = ERR_PTR(error: -EEXIST); |
305 | goto out_done; |
306 | } |
307 | |
308 | if (!nvme_fc_lport_get(lport)) { |
309 | /* |
310 | * fails if ref cnt already 0. If so, |
311 | * act as if lport already deleted |
312 | */ |
313 | lport = NULL; |
314 | goto out_done; |
315 | } |
316 | |
317 | /* resume the lport */ |
318 | |
319 | lport->ops = ops; |
320 | lport->localport.port_role = pinfo->port_role; |
321 | lport->localport.port_id = pinfo->port_id; |
322 | lport->localport.port_state = FC_OBJSTATE_ONLINE; |
323 | |
324 | spin_unlock_irqrestore(lock: &nvme_fc_lock, flags); |
325 | |
326 | return lport; |
327 | } |
328 | |
329 | lport = NULL; |
330 | |
331 | out_done: |
332 | spin_unlock_irqrestore(lock: &nvme_fc_lock, flags); |
333 | |
334 | return lport; |
335 | } |
336 | |
337 | /** |
338 | * nvme_fc_register_localport - transport entry point called by an |
339 | * LLDD to register the existence of a NVME |
340 | * host FC port. |
341 | * @pinfo: pointer to information about the port to be registered |
342 | * @template: LLDD entrypoints and operational parameters for the port |
343 | * @dev: physical hardware device node port corresponds to. Will be |
344 | * used for DMA mappings |
345 | * @portptr: pointer to a local port pointer. Upon success, the routine |
346 | * will allocate a nvme_fc_local_port structure and place its |
347 | * address in the local port pointer. Upon failure, local port |
348 | * pointer will be set to 0. |
349 | * |
350 | * Returns: |
351 | * a completion status. Must be 0 upon success; a negative errno |
352 | * (ex: -ENXIO) upon failure. |
353 | */ |
354 | int |
355 | nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, |
356 | struct nvme_fc_port_template *template, |
357 | struct device *dev, |
358 | struct nvme_fc_local_port **portptr) |
359 | { |
360 | struct nvme_fc_lport *newrec; |
361 | unsigned long flags; |
362 | int ret, idx; |
363 | |
364 | if (!template->localport_delete || !template->remoteport_delete || |
365 | !template->ls_req || !template->fcp_io || |
366 | !template->ls_abort || !template->fcp_abort || |
367 | !template->max_hw_queues || !template->max_sgl_segments || |
368 | !template->max_dif_sgl_segments || !template->dma_boundary) { |
369 | ret = -EINVAL; |
370 | goto out_reghost_failed; |
371 | } |
372 | |
373 | /* |
374 | * look to see if there is already a localport that had been |
375 | * deregistered and in the process of waiting for all the |
376 | * references to fully be removed. If the references haven't |
377 | * expired, we can simply re-enable the localport. Remoteports |
378 | * and controller reconnections should resume naturally. |
379 | */ |
380 | newrec = nvme_fc_attach_to_unreg_lport(pinfo, ops: template, dev); |
381 | |
382 | /* found an lport, but something about its state is bad */ |
383 | if (IS_ERR(ptr: newrec)) { |
384 | ret = PTR_ERR(ptr: newrec); |
385 | goto out_reghost_failed; |
386 | |
387 | /* found existing lport, which was resumed */ |
388 | } else if (newrec) { |
389 | *portptr = &newrec->localport; |
390 | return 0; |
391 | } |
392 | |
393 | /* nothing found - allocate a new localport struct */ |
394 | |
395 | newrec = kmalloc(size: (sizeof(*newrec) + template->local_priv_sz), |
396 | GFP_KERNEL); |
397 | if (!newrec) { |
398 | ret = -ENOMEM; |
399 | goto out_reghost_failed; |
400 | } |
401 | |
402 | idx = ida_alloc(ida: &nvme_fc_local_port_cnt, GFP_KERNEL); |
403 | if (idx < 0) { |
404 | ret = -ENOSPC; |
405 | goto out_fail_kfree; |
406 | } |
407 | |
408 | if (!get_device(dev) && dev) { |
409 | ret = -ENODEV; |
410 | goto out_ida_put; |
411 | } |
412 | |
413 | INIT_LIST_HEAD(list: &newrec->port_list); |
414 | INIT_LIST_HEAD(list: &newrec->endp_list); |
415 | kref_init(kref: &newrec->ref); |
416 | atomic_set(v: &newrec->act_rport_cnt, i: 0); |
417 | newrec->ops = template; |
418 | newrec->dev = dev; |
419 | ida_init(ida: &newrec->endp_cnt); |
420 | if (template->local_priv_sz) |
421 | newrec->localport.private = &newrec[1]; |
422 | else |
423 | newrec->localport.private = NULL; |
424 | newrec->localport.node_name = pinfo->node_name; |
425 | newrec->localport.port_name = pinfo->port_name; |
426 | newrec->localport.port_role = pinfo->port_role; |
427 | newrec->localport.port_id = pinfo->port_id; |
428 | newrec->localport.port_state = FC_OBJSTATE_ONLINE; |
429 | newrec->localport.port_num = idx; |
430 | |
431 | spin_lock_irqsave(&nvme_fc_lock, flags); |
432 | list_add_tail(new: &newrec->port_list, head: &nvme_fc_lport_list); |
433 | spin_unlock_irqrestore(lock: &nvme_fc_lock, flags); |
434 | |
435 | if (dev) |
436 | dma_set_seg_boundary(dev, mask: template->dma_boundary); |
437 | |
438 | *portptr = &newrec->localport; |
439 | return 0; |
440 | |
441 | out_ida_put: |
442 | ida_free(&nvme_fc_local_port_cnt, id: idx); |
443 | out_fail_kfree: |
444 | kfree(objp: newrec); |
445 | out_reghost_failed: |
446 | *portptr = NULL; |
447 | |
448 | return ret; |
449 | } |
450 | EXPORT_SYMBOL_GPL(nvme_fc_register_localport); |
451 | |
452 | /** |
453 | * nvme_fc_unregister_localport - transport entry point called by an |
454 | * LLDD to deregister/remove a previously |
455 | * registered a NVME host FC port. |
456 | * @portptr: pointer to the (registered) local port that is to be deregistered. |
457 | * |
458 | * Returns: |
459 | * a completion status. Must be 0 upon success; a negative errno |
460 | * (ex: -ENXIO) upon failure. |
461 | */ |
462 | int |
463 | nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr) |
464 | { |
465 | struct nvme_fc_lport *lport = localport_to_lport(portptr); |
466 | unsigned long flags; |
467 | |
468 | if (!portptr) |
469 | return -EINVAL; |
470 | |
471 | spin_lock_irqsave(&nvme_fc_lock, flags); |
472 | |
473 | if (portptr->port_state != FC_OBJSTATE_ONLINE) { |
474 | spin_unlock_irqrestore(lock: &nvme_fc_lock, flags); |
475 | return -EINVAL; |
476 | } |
477 | portptr->port_state = FC_OBJSTATE_DELETED; |
478 | |
479 | spin_unlock_irqrestore(lock: &nvme_fc_lock, flags); |
480 | |
481 | if (atomic_read(v: &lport->act_rport_cnt) == 0) |
482 | lport->ops->localport_delete(&lport->localport); |
483 | |
484 | nvme_fc_lport_put(lport); |
485 | |
486 | return 0; |
487 | } |
488 | EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport); |
489 | |
490 | /* |
491 | * TRADDR strings, per FC-NVME are fixed format: |
492 | * "nn-0x<16hexdigits>:pn-0x<16hexdigits>" - 43 characters |
493 | * udev event will only differ by prefix of what field is |
494 | * being specified: |
495 | * "NVMEFC_HOST_TRADDR=" or "NVMEFC_TRADDR=" - 19 max characters |
496 | * 19 + 43 + null_fudge = 64 characters |
497 | */ |
498 | #define FCNVME_TRADDR_LENGTH 64 |
499 | |
500 | static void |
501 | nvme_fc_signal_discovery_scan(struct nvme_fc_lport *lport, |
502 | struct nvme_fc_rport *rport) |
503 | { |
504 | char hostaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_HOST_TRADDR=...*/ |
505 | char tgtaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_TRADDR=...*/ |
506 | char *envp[4] = { "FC_EVENT=nvmediscovery" , hostaddr, tgtaddr, NULL }; |
507 | |
508 | if (!(rport->remoteport.port_role & FC_PORT_ROLE_NVME_DISCOVERY)) |
509 | return; |
510 | |
511 | snprintf(buf: hostaddr, size: sizeof(hostaddr), |
512 | fmt: "NVMEFC_HOST_TRADDR=nn-0x%016llx:pn-0x%016llx" , |
513 | lport->localport.node_name, lport->localport.port_name); |
514 | snprintf(buf: tgtaddr, size: sizeof(tgtaddr), |
515 | fmt: "NVMEFC_TRADDR=nn-0x%016llx:pn-0x%016llx" , |
516 | rport->remoteport.node_name, rport->remoteport.port_name); |
517 | kobject_uevent_env(kobj: &fc_udev_device->kobj, action: KOBJ_CHANGE, envp); |
518 | } |
519 | |
520 | static void |
521 | nvme_fc_free_rport(struct kref *ref) |
522 | { |
523 | struct nvme_fc_rport *rport = |
524 | container_of(ref, struct nvme_fc_rport, ref); |
525 | struct nvme_fc_lport *lport = |
526 | localport_to_lport(portptr: rport->remoteport.localport); |
527 | unsigned long flags; |
528 | |
529 | WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED); |
530 | WARN_ON(!list_empty(&rport->ctrl_list)); |
531 | |
532 | /* remove from lport list */ |
533 | spin_lock_irqsave(&nvme_fc_lock, flags); |
534 | list_del(entry: &rport->endp_list); |
535 | spin_unlock_irqrestore(lock: &nvme_fc_lock, flags); |
536 | |
537 | WARN_ON(!list_empty(&rport->disc_list)); |
538 | ida_free(&lport->endp_cnt, id: rport->remoteport.port_num); |
539 | |
540 | kfree(objp: rport); |
541 | |
542 | nvme_fc_lport_put(lport); |
543 | } |
544 | |
545 | static void |
546 | nvme_fc_rport_put(struct nvme_fc_rport *rport) |
547 | { |
548 | kref_put(kref: &rport->ref, release: nvme_fc_free_rport); |
549 | } |
550 | |
551 | static int |
552 | nvme_fc_rport_get(struct nvme_fc_rport *rport) |
553 | { |
554 | return kref_get_unless_zero(kref: &rport->ref); |
555 | } |
556 | |
557 | static void |
558 | nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl) |
559 | { |
560 | switch (ctrl->ctrl.state) { |
561 | case NVME_CTRL_NEW: |
562 | case NVME_CTRL_CONNECTING: |
563 | /* |
564 | * As all reconnects were suppressed, schedule a |
565 | * connect. |
566 | */ |
567 | dev_info(ctrl->ctrl.device, |
568 | "NVME-FC{%d}: connectivity re-established. " |
569 | "Attempting reconnect\n" , ctrl->cnum); |
570 | |
571 | queue_delayed_work(wq: nvme_wq, dwork: &ctrl->connect_work, delay: 0); |
572 | break; |
573 | |
574 | case NVME_CTRL_RESETTING: |
575 | /* |
576 | * Controller is already in the process of terminating the |
577 | * association. No need to do anything further. The reconnect |
578 | * step will naturally occur after the reset completes. |
579 | */ |
580 | break; |
581 | |
582 | default: |
583 | /* no action to take - let it delete */ |
584 | break; |
585 | } |
586 | } |
587 | |
588 | static struct nvme_fc_rport * |
589 | nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport, |
590 | struct nvme_fc_port_info *pinfo) |
591 | { |
592 | struct nvme_fc_rport *rport; |
593 | struct nvme_fc_ctrl *ctrl; |
594 | unsigned long flags; |
595 | |
596 | spin_lock_irqsave(&nvme_fc_lock, flags); |
597 | |
598 | list_for_each_entry(rport, &lport->endp_list, endp_list) { |
599 | if (rport->remoteport.node_name != pinfo->node_name || |
600 | rport->remoteport.port_name != pinfo->port_name) |
601 | continue; |
602 | |
603 | if (!nvme_fc_rport_get(rport)) { |
604 | rport = ERR_PTR(error: -ENOLCK); |
605 | goto out_done; |
606 | } |
607 | |
608 | spin_unlock_irqrestore(lock: &nvme_fc_lock, flags); |
609 | |
610 | spin_lock_irqsave(&rport->lock, flags); |
611 | |
612 | /* has it been unregistered */ |
613 | if (rport->remoteport.port_state != FC_OBJSTATE_DELETED) { |
614 | /* means lldd called us twice */ |
615 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
616 | nvme_fc_rport_put(rport); |
617 | return ERR_PTR(error: -ESTALE); |
618 | } |
619 | |
620 | rport->remoteport.port_role = pinfo->port_role; |
621 | rport->remoteport.port_id = pinfo->port_id; |
622 | rport->remoteport.port_state = FC_OBJSTATE_ONLINE; |
623 | rport->dev_loss_end = 0; |
624 | |
625 | /* |
626 | * kick off a reconnect attempt on all associations to the |
627 | * remote port. A successful reconnects will resume i/o. |
628 | */ |
629 | list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) |
630 | nvme_fc_resume_controller(ctrl); |
631 | |
632 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
633 | |
634 | return rport; |
635 | } |
636 | |
637 | rport = NULL; |
638 | |
639 | out_done: |
640 | spin_unlock_irqrestore(lock: &nvme_fc_lock, flags); |
641 | |
642 | return rport; |
643 | } |
644 | |
645 | static inline void |
646 | __nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport, |
647 | struct nvme_fc_port_info *pinfo) |
648 | { |
649 | if (pinfo->dev_loss_tmo) |
650 | rport->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo; |
651 | else |
652 | rport->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO; |
653 | } |
654 | |
655 | /** |
656 | * nvme_fc_register_remoteport - transport entry point called by an |
657 | * LLDD to register the existence of a NVME |
658 | * subsystem FC port on its fabric. |
659 | * @localport: pointer to the (registered) local port that the remote |
660 | * subsystem port is connected to. |
661 | * @pinfo: pointer to information about the port to be registered |
662 | * @portptr: pointer to a remote port pointer. Upon success, the routine |
663 | * will allocate a nvme_fc_remote_port structure and place its |
664 | * address in the remote port pointer. Upon failure, remote port |
665 | * pointer will be set to 0. |
666 | * |
667 | * Returns: |
668 | * a completion status. Must be 0 upon success; a negative errno |
669 | * (ex: -ENXIO) upon failure. |
670 | */ |
671 | int |
672 | nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, |
673 | struct nvme_fc_port_info *pinfo, |
674 | struct nvme_fc_remote_port **portptr) |
675 | { |
676 | struct nvme_fc_lport *lport = localport_to_lport(portptr: localport); |
677 | struct nvme_fc_rport *newrec; |
678 | unsigned long flags; |
679 | int ret, idx; |
680 | |
681 | if (!nvme_fc_lport_get(lport)) { |
682 | ret = -ESHUTDOWN; |
683 | goto out_reghost_failed; |
684 | } |
685 | |
686 | /* |
687 | * look to see if there is already a remoteport that is waiting |
688 | * for a reconnect (within dev_loss_tmo) with the same WWN's. |
689 | * If so, transition to it and reconnect. |
690 | */ |
691 | newrec = nvme_fc_attach_to_suspended_rport(lport, pinfo); |
692 | |
693 | /* found an rport, but something about its state is bad */ |
694 | if (IS_ERR(ptr: newrec)) { |
695 | ret = PTR_ERR(ptr: newrec); |
696 | goto out_lport_put; |
697 | |
698 | /* found existing rport, which was resumed */ |
699 | } else if (newrec) { |
700 | nvme_fc_lport_put(lport); |
701 | __nvme_fc_set_dev_loss_tmo(rport: newrec, pinfo); |
702 | nvme_fc_signal_discovery_scan(lport, rport: newrec); |
703 | *portptr = &newrec->remoteport; |
704 | return 0; |
705 | } |
706 | |
707 | /* nothing found - allocate a new remoteport struct */ |
708 | |
709 | newrec = kmalloc(size: (sizeof(*newrec) + lport->ops->remote_priv_sz), |
710 | GFP_KERNEL); |
711 | if (!newrec) { |
712 | ret = -ENOMEM; |
713 | goto out_lport_put; |
714 | } |
715 | |
716 | idx = ida_alloc(ida: &lport->endp_cnt, GFP_KERNEL); |
717 | if (idx < 0) { |
718 | ret = -ENOSPC; |
719 | goto out_kfree_rport; |
720 | } |
721 | |
722 | INIT_LIST_HEAD(list: &newrec->endp_list); |
723 | INIT_LIST_HEAD(list: &newrec->ctrl_list); |
724 | INIT_LIST_HEAD(list: &newrec->ls_req_list); |
725 | INIT_LIST_HEAD(list: &newrec->disc_list); |
726 | kref_init(kref: &newrec->ref); |
727 | atomic_set(v: &newrec->act_ctrl_cnt, i: 0); |
728 | spin_lock_init(&newrec->lock); |
729 | newrec->remoteport.localport = &lport->localport; |
730 | INIT_LIST_HEAD(list: &newrec->ls_rcv_list); |
731 | newrec->dev = lport->dev; |
732 | newrec->lport = lport; |
733 | if (lport->ops->remote_priv_sz) |
734 | newrec->remoteport.private = &newrec[1]; |
735 | else |
736 | newrec->remoteport.private = NULL; |
737 | newrec->remoteport.port_role = pinfo->port_role; |
738 | newrec->remoteport.node_name = pinfo->node_name; |
739 | newrec->remoteport.port_name = pinfo->port_name; |
740 | newrec->remoteport.port_id = pinfo->port_id; |
741 | newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; |
742 | newrec->remoteport.port_num = idx; |
743 | __nvme_fc_set_dev_loss_tmo(rport: newrec, pinfo); |
744 | INIT_WORK(&newrec->lsrcv_work, nvme_fc_handle_ls_rqst_work); |
745 | |
746 | spin_lock_irqsave(&nvme_fc_lock, flags); |
747 | list_add_tail(new: &newrec->endp_list, head: &lport->endp_list); |
748 | spin_unlock_irqrestore(lock: &nvme_fc_lock, flags); |
749 | |
750 | nvme_fc_signal_discovery_scan(lport, rport: newrec); |
751 | |
752 | *portptr = &newrec->remoteport; |
753 | return 0; |
754 | |
755 | out_kfree_rport: |
756 | kfree(objp: newrec); |
757 | out_lport_put: |
758 | nvme_fc_lport_put(lport); |
759 | out_reghost_failed: |
760 | *portptr = NULL; |
761 | return ret; |
762 | } |
763 | EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); |
764 | |
765 | static int |
766 | nvme_fc_abort_lsops(struct nvme_fc_rport *rport) |
767 | { |
768 | struct nvmefc_ls_req_op *lsop; |
769 | unsigned long flags; |
770 | |
771 | restart: |
772 | spin_lock_irqsave(&rport->lock, flags); |
773 | |
774 | list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) { |
775 | if (!(lsop->flags & FCOP_FLAGS_TERMIO)) { |
776 | lsop->flags |= FCOP_FLAGS_TERMIO; |
777 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
778 | rport->lport->ops->ls_abort(&rport->lport->localport, |
779 | &rport->remoteport, |
780 | &lsop->ls_req); |
781 | goto restart; |
782 | } |
783 | } |
784 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
785 | |
786 | return 0; |
787 | } |
788 | |
789 | static void |
790 | nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) |
791 | { |
792 | dev_info(ctrl->ctrl.device, |
793 | "NVME-FC{%d}: controller connectivity lost. Awaiting " |
794 | "Reconnect" , ctrl->cnum); |
795 | |
796 | switch (ctrl->ctrl.state) { |
797 | case NVME_CTRL_NEW: |
798 | case NVME_CTRL_LIVE: |
799 | /* |
800 | * Schedule a controller reset. The reset will terminate the |
801 | * association and schedule the reconnect timer. Reconnects |
802 | * will be attempted until either the ctlr_loss_tmo |
803 | * (max_retries * connect_delay) expires or the remoteport's |
804 | * dev_loss_tmo expires. |
805 | */ |
806 | if (nvme_reset_ctrl(ctrl: &ctrl->ctrl)) { |
807 | dev_warn(ctrl->ctrl.device, |
808 | "NVME-FC{%d}: Couldn't schedule reset.\n" , |
809 | ctrl->cnum); |
810 | nvme_delete_ctrl(ctrl: &ctrl->ctrl); |
811 | } |
812 | break; |
813 | |
814 | case NVME_CTRL_CONNECTING: |
815 | /* |
816 | * The association has already been terminated and the |
817 | * controller is attempting reconnects. No need to do anything |
818 | * futher. Reconnects will be attempted until either the |
819 | * ctlr_loss_tmo (max_retries * connect_delay) expires or the |
820 | * remoteport's dev_loss_tmo expires. |
821 | */ |
822 | break; |
823 | |
824 | case NVME_CTRL_RESETTING: |
825 | /* |
826 | * Controller is already in the process of terminating the |
827 | * association. No need to do anything further. The reconnect |
828 | * step will kick in naturally after the association is |
829 | * terminated. |
830 | */ |
831 | break; |
832 | |
833 | case NVME_CTRL_DELETING: |
834 | case NVME_CTRL_DELETING_NOIO: |
835 | default: |
836 | /* no action to take - let it delete */ |
837 | break; |
838 | } |
839 | } |
840 | |
841 | /** |
842 | * nvme_fc_unregister_remoteport - transport entry point called by an |
843 | * LLDD to deregister/remove a previously |
844 | * registered a NVME subsystem FC port. |
845 | * @portptr: pointer to the (registered) remote port that is to be |
846 | * deregistered. |
847 | * |
848 | * Returns: |
849 | * a completion status. Must be 0 upon success; a negative errno |
850 | * (ex: -ENXIO) upon failure. |
851 | */ |
852 | int |
853 | nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) |
854 | { |
855 | struct nvme_fc_rport *rport = remoteport_to_rport(portptr); |
856 | struct nvme_fc_ctrl *ctrl; |
857 | unsigned long flags; |
858 | |
859 | if (!portptr) |
860 | return -EINVAL; |
861 | |
862 | spin_lock_irqsave(&rport->lock, flags); |
863 | |
864 | if (portptr->port_state != FC_OBJSTATE_ONLINE) { |
865 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
866 | return -EINVAL; |
867 | } |
868 | portptr->port_state = FC_OBJSTATE_DELETED; |
869 | |
870 | rport->dev_loss_end = jiffies + (portptr->dev_loss_tmo * HZ); |
871 | |
872 | list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { |
873 | /* if dev_loss_tmo==0, dev loss is immediate */ |
874 | if (!portptr->dev_loss_tmo) { |
875 | dev_warn(ctrl->ctrl.device, |
876 | "NVME-FC{%d}: controller connectivity lost.\n" , |
877 | ctrl->cnum); |
878 | nvme_delete_ctrl(ctrl: &ctrl->ctrl); |
879 | } else |
880 | nvme_fc_ctrl_connectivity_loss(ctrl); |
881 | } |
882 | |
883 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
884 | |
885 | nvme_fc_abort_lsops(rport); |
886 | |
887 | if (atomic_read(v: &rport->act_ctrl_cnt) == 0) |
888 | rport->lport->ops->remoteport_delete(portptr); |
889 | |
890 | /* |
891 | * release the reference, which will allow, if all controllers |
892 | * go away, which should only occur after dev_loss_tmo occurs, |
893 | * for the rport to be torn down. |
894 | */ |
895 | nvme_fc_rport_put(rport); |
896 | |
897 | return 0; |
898 | } |
899 | EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport); |
900 | |
901 | /** |
902 | * nvme_fc_rescan_remoteport - transport entry point called by an |
903 | * LLDD to request a nvme device rescan. |
904 | * @remoteport: pointer to the (registered) remote port that is to be |
905 | * rescanned. |
906 | * |
907 | * Returns: N/A |
908 | */ |
909 | void |
910 | nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport) |
911 | { |
912 | struct nvme_fc_rport *rport = remoteport_to_rport(portptr: remoteport); |
913 | |
914 | nvme_fc_signal_discovery_scan(lport: rport->lport, rport); |
915 | } |
916 | EXPORT_SYMBOL_GPL(nvme_fc_rescan_remoteport); |
917 | |
918 | int |
919 | nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *portptr, |
920 | u32 dev_loss_tmo) |
921 | { |
922 | struct nvme_fc_rport *rport = remoteport_to_rport(portptr); |
923 | unsigned long flags; |
924 | |
925 | spin_lock_irqsave(&rport->lock, flags); |
926 | |
927 | if (portptr->port_state != FC_OBJSTATE_ONLINE) { |
928 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
929 | return -EINVAL; |
930 | } |
931 | |
932 | /* a dev_loss_tmo of 0 (immediate) is allowed to be set */ |
933 | rport->remoteport.dev_loss_tmo = dev_loss_tmo; |
934 | |
935 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
936 | |
937 | return 0; |
938 | } |
939 | EXPORT_SYMBOL_GPL(nvme_fc_set_remoteport_devloss); |
940 | |
941 | |
942 | /* *********************** FC-NVME DMA Handling **************************** */ |
943 | |
944 | /* |
945 | * The fcloop device passes in a NULL device pointer. Real LLD's will |
946 | * pass in a valid device pointer. If NULL is passed to the dma mapping |
947 | * routines, depending on the platform, it may or may not succeed, and |
948 | * may crash. |
949 | * |
950 | * As such: |
951 | * Wrapper all the dma routines and check the dev pointer. |
952 | * |
953 | * If simple mappings (return just a dma address, we'll noop them, |
954 | * returning a dma address of 0. |
955 | * |
956 | * On more complex mappings (dma_map_sg), a pseudo routine fills |
957 | * in the scatter list, setting all dma addresses to 0. |
958 | */ |
959 | |
960 | static inline dma_addr_t |
961 | fc_dma_map_single(struct device *dev, void *ptr, size_t size, |
962 | enum dma_data_direction dir) |
963 | { |
964 | return dev ? dma_map_single(dev, ptr, size, dir) : (dma_addr_t)0L; |
965 | } |
966 | |
967 | static inline int |
968 | fc_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) |
969 | { |
970 | return dev ? dma_mapping_error(dev, dma_addr) : 0; |
971 | } |
972 | |
973 | static inline void |
974 | fc_dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, |
975 | enum dma_data_direction dir) |
976 | { |
977 | if (dev) |
978 | dma_unmap_single(dev, addr, size, dir); |
979 | } |
980 | |
981 | static inline void |
982 | fc_dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, |
983 | enum dma_data_direction dir) |
984 | { |
985 | if (dev) |
986 | dma_sync_single_for_cpu(dev, addr, size, dir); |
987 | } |
988 | |
989 | static inline void |
990 | fc_dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, |
991 | enum dma_data_direction dir) |
992 | { |
993 | if (dev) |
994 | dma_sync_single_for_device(dev, addr, size, dir); |
995 | } |
996 | |
997 | /* pseudo dma_map_sg call */ |
998 | static int |
999 | fc_map_sg(struct scatterlist *sg, int nents) |
1000 | { |
1001 | struct scatterlist *s; |
1002 | int i; |
1003 | |
1004 | WARN_ON(nents == 0 || sg[0].length == 0); |
1005 | |
1006 | for_each_sg(sg, s, nents, i) { |
1007 | s->dma_address = 0L; |
1008 | #ifdef CONFIG_NEED_SG_DMA_LENGTH |
1009 | s->dma_length = s->length; |
1010 | #endif |
1011 | } |
1012 | return nents; |
1013 | } |
1014 | |
1015 | static inline int |
1016 | fc_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, |
1017 | enum dma_data_direction dir) |
1018 | { |
1019 | return dev ? dma_map_sg(dev, sg, nents, dir) : fc_map_sg(sg, nents); |
1020 | } |
1021 | |
1022 | static inline void |
1023 | fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, |
1024 | enum dma_data_direction dir) |
1025 | { |
1026 | if (dev) |
1027 | dma_unmap_sg(dev, sg, nents, dir); |
1028 | } |
1029 | |
1030 | /* *********************** FC-NVME LS Handling **************************** */ |
1031 | |
1032 | static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *); |
1033 | static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); |
1034 | |
1035 | static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); |
1036 | |
1037 | static void |
1038 | __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) |
1039 | { |
1040 | struct nvme_fc_rport *rport = lsop->rport; |
1041 | struct nvmefc_ls_req *lsreq = &lsop->ls_req; |
1042 | unsigned long flags; |
1043 | |
1044 | spin_lock_irqsave(&rport->lock, flags); |
1045 | |
1046 | if (!lsop->req_queued) { |
1047 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
1048 | return; |
1049 | } |
1050 | |
1051 | list_del(entry: &lsop->lsreq_list); |
1052 | |
1053 | lsop->req_queued = false; |
1054 | |
1055 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
1056 | |
1057 | fc_dma_unmap_single(dev: rport->dev, addr: lsreq->rqstdma, |
1058 | size: (lsreq->rqstlen + lsreq->rsplen), |
1059 | dir: DMA_BIDIRECTIONAL); |
1060 | |
1061 | nvme_fc_rport_put(rport); |
1062 | } |
1063 | |
1064 | static int |
1065 | __nvme_fc_send_ls_req(struct nvme_fc_rport *rport, |
1066 | struct nvmefc_ls_req_op *lsop, |
1067 | void (*done)(struct nvmefc_ls_req *req, int status)) |
1068 | { |
1069 | struct nvmefc_ls_req *lsreq = &lsop->ls_req; |
1070 | unsigned long flags; |
1071 | int ret = 0; |
1072 | |
1073 | if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) |
1074 | return -ECONNREFUSED; |
1075 | |
1076 | if (!nvme_fc_rport_get(rport)) |
1077 | return -ESHUTDOWN; |
1078 | |
1079 | lsreq->done = done; |
1080 | lsop->rport = rport; |
1081 | lsop->req_queued = false; |
1082 | INIT_LIST_HEAD(list: &lsop->lsreq_list); |
1083 | init_completion(x: &lsop->ls_done); |
1084 | |
1085 | lsreq->rqstdma = fc_dma_map_single(dev: rport->dev, ptr: lsreq->rqstaddr, |
1086 | size: lsreq->rqstlen + lsreq->rsplen, |
1087 | dir: DMA_BIDIRECTIONAL); |
1088 | if (fc_dma_mapping_error(dev: rport->dev, dma_addr: lsreq->rqstdma)) { |
1089 | ret = -EFAULT; |
1090 | goto out_putrport; |
1091 | } |
1092 | lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; |
1093 | |
1094 | spin_lock_irqsave(&rport->lock, flags); |
1095 | |
1096 | list_add_tail(new: &lsop->lsreq_list, head: &rport->ls_req_list); |
1097 | |
1098 | lsop->req_queued = true; |
1099 | |
1100 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
1101 | |
1102 | ret = rport->lport->ops->ls_req(&rport->lport->localport, |
1103 | &rport->remoteport, lsreq); |
1104 | if (ret) |
1105 | goto out_unlink; |
1106 | |
1107 | return 0; |
1108 | |
1109 | out_unlink: |
1110 | lsop->ls_error = ret; |
1111 | spin_lock_irqsave(&rport->lock, flags); |
1112 | lsop->req_queued = false; |
1113 | list_del(entry: &lsop->lsreq_list); |
1114 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
1115 | fc_dma_unmap_single(dev: rport->dev, addr: lsreq->rqstdma, |
1116 | size: (lsreq->rqstlen + lsreq->rsplen), |
1117 | dir: DMA_BIDIRECTIONAL); |
1118 | out_putrport: |
1119 | nvme_fc_rport_put(rport); |
1120 | |
1121 | return ret; |
1122 | } |
1123 | |
1124 | static void |
1125 | nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status) |
1126 | { |
1127 | struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); |
1128 | |
1129 | lsop->ls_error = status; |
1130 | complete(&lsop->ls_done); |
1131 | } |
1132 | |
1133 | static int |
1134 | nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop) |
1135 | { |
1136 | struct nvmefc_ls_req *lsreq = &lsop->ls_req; |
1137 | struct fcnvme_ls_rjt *rjt = lsreq->rspaddr; |
1138 | int ret; |
1139 | |
1140 | ret = __nvme_fc_send_ls_req(rport, lsop, done: nvme_fc_send_ls_req_done); |
1141 | |
1142 | if (!ret) { |
1143 | /* |
1144 | * No timeout/not interruptible as we need the struct |
1145 | * to exist until the lldd calls us back. Thus mandate |
1146 | * wait until driver calls back. lldd responsible for |
1147 | * the timeout action |
1148 | */ |
1149 | wait_for_completion(&lsop->ls_done); |
1150 | |
1151 | __nvme_fc_finish_ls_req(lsop); |
1152 | |
1153 | ret = lsop->ls_error; |
1154 | } |
1155 | |
1156 | if (ret) |
1157 | return ret; |
1158 | |
1159 | /* ACC or RJT payload ? */ |
1160 | if (rjt->w0.ls_cmd == FCNVME_LS_RJT) |
1161 | return -ENXIO; |
1162 | |
1163 | return 0; |
1164 | } |
1165 | |
1166 | static int |
1167 | nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, |
1168 | struct nvmefc_ls_req_op *lsop, |
1169 | void (*done)(struct nvmefc_ls_req *req, int status)) |
1170 | { |
1171 | /* don't wait for completion */ |
1172 | |
1173 | return __nvme_fc_send_ls_req(rport, lsop, done); |
1174 | } |
1175 | |
1176 | static int |
1177 | nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, |
1178 | struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio) |
1179 | { |
1180 | struct nvmefc_ls_req_op *lsop; |
1181 | struct nvmefc_ls_req *lsreq; |
1182 | struct fcnvme_ls_cr_assoc_rqst *assoc_rqst; |
1183 | struct fcnvme_ls_cr_assoc_acc *assoc_acc; |
1184 | unsigned long flags; |
1185 | int ret, fcret = 0; |
1186 | |
1187 | lsop = kzalloc(size: (sizeof(*lsop) + |
1188 | sizeof(*assoc_rqst) + sizeof(*assoc_acc) + |
1189 | ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); |
1190 | if (!lsop) { |
1191 | dev_info(ctrl->ctrl.device, |
1192 | "NVME-FC{%d}: send Create Association failed: ENOMEM\n" , |
1193 | ctrl->cnum); |
1194 | ret = -ENOMEM; |
1195 | goto out_no_memory; |
1196 | } |
1197 | |
1198 | assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)&lsop[1]; |
1199 | assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1]; |
1200 | lsreq = &lsop->ls_req; |
1201 | if (ctrl->lport->ops->lsrqst_priv_sz) |
1202 | lsreq->private = &assoc_acc[1]; |
1203 | else |
1204 | lsreq->private = NULL; |
1205 | |
1206 | assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION; |
1207 | assoc_rqst->desc_list_len = |
1208 | cpu_to_be32(sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); |
1209 | |
1210 | assoc_rqst->assoc_cmd.desc_tag = |
1211 | cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD); |
1212 | assoc_rqst->assoc_cmd.desc_len = |
1213 | fcnvme_lsdesc_len( |
1214 | sz: sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); |
1215 | |
1216 | assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); |
1217 | assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize - 1); |
1218 | /* Linux supports only Dynamic controllers */ |
1219 | assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff); |
1220 | uuid_copy(dst: &assoc_rqst->assoc_cmd.hostid, src: &ctrl->ctrl.opts->host->id); |
1221 | strncpy(p: assoc_rqst->assoc_cmd.hostnqn, q: ctrl->ctrl.opts->host->nqn, |
1222 | min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE)); |
1223 | strncpy(p: assoc_rqst->assoc_cmd.subnqn, q: ctrl->ctrl.opts->subsysnqn, |
1224 | min(FCNVME_ASSOC_SUBNQN_LEN, NVMF_NQN_SIZE)); |
1225 | |
1226 | lsop->queue = queue; |
1227 | lsreq->rqstaddr = assoc_rqst; |
1228 | lsreq->rqstlen = sizeof(*assoc_rqst); |
1229 | lsreq->rspaddr = assoc_acc; |
1230 | lsreq->rsplen = sizeof(*assoc_acc); |
1231 | lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; |
1232 | |
1233 | ret = nvme_fc_send_ls_req(rport: ctrl->rport, lsop); |
1234 | if (ret) |
1235 | goto out_free_buffer; |
1236 | |
1237 | /* process connect LS completion */ |
1238 | |
1239 | /* validate the ACC response */ |
1240 | if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) |
1241 | fcret = VERR_LSACC; |
1242 | else if (assoc_acc->hdr.desc_list_len != |
1243 | fcnvme_lsdesc_len( |
1244 | sz: sizeof(struct fcnvme_ls_cr_assoc_acc))) |
1245 | fcret = VERR_CR_ASSOC_ACC_LEN; |
1246 | else if (assoc_acc->hdr.rqst.desc_tag != |
1247 | cpu_to_be32(FCNVME_LSDESC_RQST)) |
1248 | fcret = VERR_LSDESC_RQST; |
1249 | else if (assoc_acc->hdr.rqst.desc_len != |
1250 | fcnvme_lsdesc_len(sz: sizeof(struct fcnvme_lsdesc_rqst))) |
1251 | fcret = VERR_LSDESC_RQST_LEN; |
1252 | else if (assoc_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_ASSOCIATION) |
1253 | fcret = VERR_CR_ASSOC; |
1254 | else if (assoc_acc->associd.desc_tag != |
1255 | cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) |
1256 | fcret = VERR_ASSOC_ID; |
1257 | else if (assoc_acc->associd.desc_len != |
1258 | fcnvme_lsdesc_len( |
1259 | sz: sizeof(struct fcnvme_lsdesc_assoc_id))) |
1260 | fcret = VERR_ASSOC_ID_LEN; |
1261 | else if (assoc_acc->connectid.desc_tag != |
1262 | cpu_to_be32(FCNVME_LSDESC_CONN_ID)) |
1263 | fcret = VERR_CONN_ID; |
1264 | else if (assoc_acc->connectid.desc_len != |
1265 | fcnvme_lsdesc_len(sz: sizeof(struct fcnvme_lsdesc_conn_id))) |
1266 | fcret = VERR_CONN_ID_LEN; |
1267 | |
1268 | if (fcret) { |
1269 | ret = -EBADF; |
1270 | dev_err(ctrl->dev, |
1271 | "q %d Create Association LS failed: %s\n" , |
1272 | queue->qnum, validation_errors[fcret]); |
1273 | } else { |
1274 | spin_lock_irqsave(&ctrl->lock, flags); |
1275 | ctrl->association_id = |
1276 | be64_to_cpu(assoc_acc->associd.association_id); |
1277 | queue->connection_id = |
1278 | be64_to_cpu(assoc_acc->connectid.connection_id); |
1279 | set_bit(nr: NVME_FC_Q_CONNECTED, addr: &queue->flags); |
1280 | spin_unlock_irqrestore(lock: &ctrl->lock, flags); |
1281 | } |
1282 | |
1283 | out_free_buffer: |
1284 | kfree(objp: lsop); |
1285 | out_no_memory: |
1286 | if (ret) |
1287 | dev_err(ctrl->dev, |
1288 | "queue %d connect admin queue failed (%d).\n" , |
1289 | queue->qnum, ret); |
1290 | return ret; |
1291 | } |
1292 | |
1293 | static int |
1294 | nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, |
1295 | u16 qsize, u16 ersp_ratio) |
1296 | { |
1297 | struct nvmefc_ls_req_op *lsop; |
1298 | struct nvmefc_ls_req *lsreq; |
1299 | struct fcnvme_ls_cr_conn_rqst *conn_rqst; |
1300 | struct fcnvme_ls_cr_conn_acc *conn_acc; |
1301 | int ret, fcret = 0; |
1302 | |
1303 | lsop = kzalloc(size: (sizeof(*lsop) + |
1304 | sizeof(*conn_rqst) + sizeof(*conn_acc) + |
1305 | ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); |
1306 | if (!lsop) { |
1307 | dev_info(ctrl->ctrl.device, |
1308 | "NVME-FC{%d}: send Create Connection failed: ENOMEM\n" , |
1309 | ctrl->cnum); |
1310 | ret = -ENOMEM; |
1311 | goto out_no_memory; |
1312 | } |
1313 | |
1314 | conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)&lsop[1]; |
1315 | conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1]; |
1316 | lsreq = &lsop->ls_req; |
1317 | if (ctrl->lport->ops->lsrqst_priv_sz) |
1318 | lsreq->private = (void *)&conn_acc[1]; |
1319 | else |
1320 | lsreq->private = NULL; |
1321 | |
1322 | conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION; |
1323 | conn_rqst->desc_list_len = cpu_to_be32( |
1324 | sizeof(struct fcnvme_lsdesc_assoc_id) + |
1325 | sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); |
1326 | |
1327 | conn_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); |
1328 | conn_rqst->associd.desc_len = |
1329 | fcnvme_lsdesc_len( |
1330 | sz: sizeof(struct fcnvme_lsdesc_assoc_id)); |
1331 | conn_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); |
1332 | conn_rqst->connect_cmd.desc_tag = |
1333 | cpu_to_be32(FCNVME_LSDESC_CREATE_CONN_CMD); |
1334 | conn_rqst->connect_cmd.desc_len = |
1335 | fcnvme_lsdesc_len( |
1336 | sz: sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); |
1337 | conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); |
1338 | conn_rqst->connect_cmd.qid = cpu_to_be16(queue->qnum); |
1339 | conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize - 1); |
1340 | |
1341 | lsop->queue = queue; |
1342 | lsreq->rqstaddr = conn_rqst; |
1343 | lsreq->rqstlen = sizeof(*conn_rqst); |
1344 | lsreq->rspaddr = conn_acc; |
1345 | lsreq->rsplen = sizeof(*conn_acc); |
1346 | lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; |
1347 | |
1348 | ret = nvme_fc_send_ls_req(rport: ctrl->rport, lsop); |
1349 | if (ret) |
1350 | goto out_free_buffer; |
1351 | |
1352 | /* process connect LS completion */ |
1353 | |
1354 | /* validate the ACC response */ |
1355 | if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) |
1356 | fcret = VERR_LSACC; |
1357 | else if (conn_acc->hdr.desc_list_len != |
1358 | fcnvme_lsdesc_len(sz: sizeof(struct fcnvme_ls_cr_conn_acc))) |
1359 | fcret = VERR_CR_CONN_ACC_LEN; |
1360 | else if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) |
1361 | fcret = VERR_LSDESC_RQST; |
1362 | else if (conn_acc->hdr.rqst.desc_len != |
1363 | fcnvme_lsdesc_len(sz: sizeof(struct fcnvme_lsdesc_rqst))) |
1364 | fcret = VERR_LSDESC_RQST_LEN; |
1365 | else if (conn_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_CONNECTION) |
1366 | fcret = VERR_CR_CONN; |
1367 | else if (conn_acc->connectid.desc_tag != |
1368 | cpu_to_be32(FCNVME_LSDESC_CONN_ID)) |
1369 | fcret = VERR_CONN_ID; |
1370 | else if (conn_acc->connectid.desc_len != |
1371 | fcnvme_lsdesc_len(sz: sizeof(struct fcnvme_lsdesc_conn_id))) |
1372 | fcret = VERR_CONN_ID_LEN; |
1373 | |
1374 | if (fcret) { |
1375 | ret = -EBADF; |
1376 | dev_err(ctrl->dev, |
1377 | "q %d Create I/O Connection LS failed: %s\n" , |
1378 | queue->qnum, validation_errors[fcret]); |
1379 | } else { |
1380 | queue->connection_id = |
1381 | be64_to_cpu(conn_acc->connectid.connection_id); |
1382 | set_bit(nr: NVME_FC_Q_CONNECTED, addr: &queue->flags); |
1383 | } |
1384 | |
1385 | out_free_buffer: |
1386 | kfree(objp: lsop); |
1387 | out_no_memory: |
1388 | if (ret) |
1389 | dev_err(ctrl->dev, |
1390 | "queue %d connect I/O queue failed (%d).\n" , |
1391 | queue->qnum, ret); |
1392 | return ret; |
1393 | } |
1394 | |
1395 | static void |
1396 | nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) |
1397 | { |
1398 | struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); |
1399 | |
1400 | __nvme_fc_finish_ls_req(lsop); |
1401 | |
1402 | /* fc-nvme initiator doesn't care about success or failure of cmd */ |
1403 | |
1404 | kfree(objp: lsop); |
1405 | } |
1406 | |
1407 | /* |
1408 | * This routine sends a FC-NVME LS to disconnect (aka terminate) |
1409 | * the FC-NVME Association. Terminating the association also |
1410 | * terminates the FC-NVME connections (per queue, both admin and io |
1411 | * queues) that are part of the association. E.g. things are torn |
1412 | * down, and the related FC-NVME Association ID and Connection IDs |
1413 | * become invalid. |
1414 | * |
1415 | * The behavior of the fc-nvme initiator is such that it's |
1416 | * understanding of the association and connections will implicitly |
1417 | * be torn down. The action is implicit as it may be due to a loss of |
1418 | * connectivity with the fc-nvme target, so you may never get a |
1419 | * response even if you tried. As such, the action of this routine |
1420 | * is to asynchronously send the LS, ignore any results of the LS, and |
1421 | * continue on with terminating the association. If the fc-nvme target |
1422 | * is present and receives the LS, it too can tear down. |
1423 | */ |
1424 | static void |
1425 | nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) |
1426 | { |
1427 | struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst; |
1428 | struct fcnvme_ls_disconnect_assoc_acc *discon_acc; |
1429 | struct nvmefc_ls_req_op *lsop; |
1430 | struct nvmefc_ls_req *lsreq; |
1431 | int ret; |
1432 | |
1433 | lsop = kzalloc(size: (sizeof(*lsop) + |
1434 | sizeof(*discon_rqst) + sizeof(*discon_acc) + |
1435 | ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); |
1436 | if (!lsop) { |
1437 | dev_info(ctrl->ctrl.device, |
1438 | "NVME-FC{%d}: send Disconnect Association " |
1439 | "failed: ENOMEM\n" , |
1440 | ctrl->cnum); |
1441 | return; |
1442 | } |
1443 | |
1444 | discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1]; |
1445 | discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1]; |
1446 | lsreq = &lsop->ls_req; |
1447 | if (ctrl->lport->ops->lsrqst_priv_sz) |
1448 | lsreq->private = (void *)&discon_acc[1]; |
1449 | else |
1450 | lsreq->private = NULL; |
1451 | |
1452 | nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc, |
1453 | association_id: ctrl->association_id); |
1454 | |
1455 | ret = nvme_fc_send_ls_req_async(rport: ctrl->rport, lsop, |
1456 | done: nvme_fc_disconnect_assoc_done); |
1457 | if (ret) |
1458 | kfree(objp: lsop); |
1459 | } |
1460 | |
1461 | static void |
1462 | nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) |
1463 | { |
1464 | struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private; |
1465 | struct nvme_fc_rport *rport = lsop->rport; |
1466 | struct nvme_fc_lport *lport = rport->lport; |
1467 | unsigned long flags; |
1468 | |
1469 | spin_lock_irqsave(&rport->lock, flags); |
1470 | list_del(entry: &lsop->lsrcv_list); |
1471 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
1472 | |
1473 | fc_dma_sync_single_for_cpu(dev: lport->dev, addr: lsop->rspdma, |
1474 | size: sizeof(*lsop->rspbuf), dir: DMA_TO_DEVICE); |
1475 | fc_dma_unmap_single(dev: lport->dev, addr: lsop->rspdma, |
1476 | size: sizeof(*lsop->rspbuf), dir: DMA_TO_DEVICE); |
1477 | |
1478 | kfree(objp: lsop->rspbuf); |
1479 | kfree(objp: lsop->rqstbuf); |
1480 | kfree(objp: lsop); |
1481 | |
1482 | nvme_fc_rport_put(rport); |
1483 | } |
1484 | |
1485 | static void |
1486 | nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop) |
1487 | { |
1488 | struct nvme_fc_rport *rport = lsop->rport; |
1489 | struct nvme_fc_lport *lport = rport->lport; |
1490 | struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0; |
1491 | int ret; |
1492 | |
1493 | fc_dma_sync_single_for_device(dev: lport->dev, addr: lsop->rspdma, |
1494 | size: sizeof(*lsop->rspbuf), dir: DMA_TO_DEVICE); |
1495 | |
1496 | ret = lport->ops->xmt_ls_rsp(&lport->localport, &rport->remoteport, |
1497 | lsop->lsrsp); |
1498 | if (ret) { |
1499 | dev_warn(lport->dev, |
1500 | "LLDD rejected LS RSP xmt: LS %d status %d\n" , |
1501 | w0->ls_cmd, ret); |
1502 | nvme_fc_xmt_ls_rsp_done(lsrsp: lsop->lsrsp); |
1503 | return; |
1504 | } |
1505 | } |
1506 | |
1507 | static struct nvme_fc_ctrl * |
1508 | nvme_fc_match_disconn_ls(struct nvme_fc_rport *rport, |
1509 | struct nvmefc_ls_rcv_op *lsop) |
1510 | { |
1511 | struct fcnvme_ls_disconnect_assoc_rqst *rqst = |
1512 | &lsop->rqstbuf->rq_dis_assoc; |
1513 | struct nvme_fc_ctrl *ctrl, *ret = NULL; |
1514 | struct nvmefc_ls_rcv_op *oldls = NULL; |
1515 | u64 association_id = be64_to_cpu(rqst->associd.association_id); |
1516 | unsigned long flags; |
1517 | |
1518 | spin_lock_irqsave(&rport->lock, flags); |
1519 | |
1520 | list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { |
1521 | if (!nvme_fc_ctrl_get(ctrl)) |
1522 | continue; |
1523 | spin_lock(lock: &ctrl->lock); |
1524 | if (association_id == ctrl->association_id) { |
1525 | oldls = ctrl->rcv_disconn; |
1526 | ctrl->rcv_disconn = lsop; |
1527 | ret = ctrl; |
1528 | } |
1529 | spin_unlock(lock: &ctrl->lock); |
1530 | if (ret) |
1531 | /* leave the ctrl get reference */ |
1532 | break; |
1533 | nvme_fc_ctrl_put(ctrl); |
1534 | } |
1535 | |
1536 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
1537 | |
1538 | /* transmit a response for anything that was pending */ |
1539 | if (oldls) { |
1540 | dev_info(rport->lport->dev, |
1541 | "NVME-FC{%d}: Multiple Disconnect Association " |
1542 | "LS's received\n" , ctrl->cnum); |
1543 | /* overwrite good response with bogus failure */ |
1544 | oldls->lsrsp->rsplen = nvme_fc_format_rjt(buf: oldls->rspbuf, |
1545 | buflen: sizeof(*oldls->rspbuf), |
1546 | ls_cmd: rqst->w0.ls_cmd, |
1547 | reason: FCNVME_RJT_RC_UNAB, |
1548 | explanation: FCNVME_RJT_EXP_NONE, vendor: 0); |
1549 | nvme_fc_xmt_ls_rsp(lsop: oldls); |
1550 | } |
1551 | |
1552 | return ret; |
1553 | } |
1554 | |
1555 | /* |
1556 | * returns true to mean LS handled and ls_rsp can be sent |
1557 | * returns false to defer ls_rsp xmt (will be done as part of |
1558 | * association termination) |
1559 | */ |
1560 | static bool |
1561 | nvme_fc_ls_disconnect_assoc(struct nvmefc_ls_rcv_op *lsop) |
1562 | { |
1563 | struct nvme_fc_rport *rport = lsop->rport; |
1564 | struct fcnvme_ls_disconnect_assoc_rqst *rqst = |
1565 | &lsop->rqstbuf->rq_dis_assoc; |
1566 | struct fcnvme_ls_disconnect_assoc_acc *acc = |
1567 | &lsop->rspbuf->rsp_dis_assoc; |
1568 | struct nvme_fc_ctrl *ctrl = NULL; |
1569 | int ret = 0; |
1570 | |
1571 | memset(acc, 0, sizeof(*acc)); |
1572 | |
1573 | ret = nvmefc_vldt_lsreq_discon_assoc(rqstlen: lsop->rqstdatalen, rqst); |
1574 | if (!ret) { |
1575 | /* match an active association */ |
1576 | ctrl = nvme_fc_match_disconn_ls(rport, lsop); |
1577 | if (!ctrl) |
1578 | ret = VERR_NO_ASSOC; |
1579 | } |
1580 | |
1581 | if (ret) { |
1582 | dev_info(rport->lport->dev, |
1583 | "Disconnect LS failed: %s\n" , |
1584 | validation_errors[ret]); |
1585 | lsop->lsrsp->rsplen = nvme_fc_format_rjt(buf: acc, |
1586 | buflen: sizeof(*acc), ls_cmd: rqst->w0.ls_cmd, |
1587 | reason: (ret == VERR_NO_ASSOC) ? |
1588 | FCNVME_RJT_RC_INV_ASSOC : |
1589 | FCNVME_RJT_RC_LOGIC, |
1590 | explanation: FCNVME_RJT_EXP_NONE, vendor: 0); |
1591 | return true; |
1592 | } |
1593 | |
1594 | /* format an ACCept response */ |
1595 | |
1596 | lsop->lsrsp->rsplen = sizeof(*acc); |
1597 | |
1598 | nvme_fc_format_rsp_hdr(buf: acc, ls_cmd: FCNVME_LS_ACC, |
1599 | desc_len: fcnvme_lsdesc_len( |
1600 | sz: sizeof(struct fcnvme_ls_disconnect_assoc_acc)), |
1601 | rqst_ls_cmd: FCNVME_LS_DISCONNECT_ASSOC); |
1602 | |
1603 | /* |
1604 | * the transmit of the response will occur after the exchanges |
1605 | * for the association have been ABTS'd by |
1606 | * nvme_fc_delete_association(). |
1607 | */ |
1608 | |
1609 | /* fail the association */ |
1610 | nvme_fc_error_recovery(ctrl, errmsg: "Disconnect Association LS received" ); |
1611 | |
1612 | /* release the reference taken by nvme_fc_match_disconn_ls() */ |
1613 | nvme_fc_ctrl_put(ctrl); |
1614 | |
1615 | return false; |
1616 | } |
1617 | |
1618 | /* |
1619 | * Actual Processing routine for received FC-NVME LS Requests from the LLD |
1620 | * returns true if a response should be sent afterward, false if rsp will |
1621 | * be sent asynchronously. |
1622 | */ |
1623 | static bool |
1624 | nvme_fc_handle_ls_rqst(struct nvmefc_ls_rcv_op *lsop) |
1625 | { |
1626 | struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0; |
1627 | bool ret = true; |
1628 | |
1629 | lsop->lsrsp->nvme_fc_private = lsop; |
1630 | lsop->lsrsp->rspbuf = lsop->rspbuf; |
1631 | lsop->lsrsp->rspdma = lsop->rspdma; |
1632 | lsop->lsrsp->done = nvme_fc_xmt_ls_rsp_done; |
1633 | /* Be preventative. handlers will later set to valid length */ |
1634 | lsop->lsrsp->rsplen = 0; |
1635 | |
1636 | /* |
1637 | * handlers: |
1638 | * parse request input, execute the request, and format the |
1639 | * LS response |
1640 | */ |
1641 | switch (w0->ls_cmd) { |
1642 | case FCNVME_LS_DISCONNECT_ASSOC: |
1643 | ret = nvme_fc_ls_disconnect_assoc(lsop); |
1644 | break; |
1645 | case FCNVME_LS_DISCONNECT_CONN: |
1646 | lsop->lsrsp->rsplen = nvme_fc_format_rjt(buf: lsop->rspbuf, |
1647 | buflen: sizeof(*lsop->rspbuf), ls_cmd: w0->ls_cmd, |
1648 | reason: FCNVME_RJT_RC_UNSUP, explanation: FCNVME_RJT_EXP_NONE, vendor: 0); |
1649 | break; |
1650 | case FCNVME_LS_CREATE_ASSOCIATION: |
1651 | case FCNVME_LS_CREATE_CONNECTION: |
1652 | lsop->lsrsp->rsplen = nvme_fc_format_rjt(buf: lsop->rspbuf, |
1653 | buflen: sizeof(*lsop->rspbuf), ls_cmd: w0->ls_cmd, |
1654 | reason: FCNVME_RJT_RC_LOGIC, explanation: FCNVME_RJT_EXP_NONE, vendor: 0); |
1655 | break; |
1656 | default: |
1657 | lsop->lsrsp->rsplen = nvme_fc_format_rjt(buf: lsop->rspbuf, |
1658 | buflen: sizeof(*lsop->rspbuf), ls_cmd: w0->ls_cmd, |
1659 | reason: FCNVME_RJT_RC_INVAL, explanation: FCNVME_RJT_EXP_NONE, vendor: 0); |
1660 | break; |
1661 | } |
1662 | |
1663 | return(ret); |
1664 | } |
1665 | |
1666 | static void |
1667 | nvme_fc_handle_ls_rqst_work(struct work_struct *work) |
1668 | { |
1669 | struct nvme_fc_rport *rport = |
1670 | container_of(work, struct nvme_fc_rport, lsrcv_work); |
1671 | struct fcnvme_ls_rqst_w0 *w0; |
1672 | struct nvmefc_ls_rcv_op *lsop; |
1673 | unsigned long flags; |
1674 | bool sendrsp; |
1675 | |
1676 | restart: |
1677 | sendrsp = true; |
1678 | spin_lock_irqsave(&rport->lock, flags); |
1679 | list_for_each_entry(lsop, &rport->ls_rcv_list, lsrcv_list) { |
1680 | if (lsop->handled) |
1681 | continue; |
1682 | |
1683 | lsop->handled = true; |
1684 | if (rport->remoteport.port_state == FC_OBJSTATE_ONLINE) { |
1685 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
1686 | sendrsp = nvme_fc_handle_ls_rqst(lsop); |
1687 | } else { |
1688 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
1689 | w0 = &lsop->rqstbuf->w0; |
1690 | lsop->lsrsp->rsplen = nvme_fc_format_rjt( |
1691 | buf: lsop->rspbuf, |
1692 | buflen: sizeof(*lsop->rspbuf), |
1693 | ls_cmd: w0->ls_cmd, |
1694 | reason: FCNVME_RJT_RC_UNAB, |
1695 | explanation: FCNVME_RJT_EXP_NONE, vendor: 0); |
1696 | } |
1697 | if (sendrsp) |
1698 | nvme_fc_xmt_ls_rsp(lsop); |
1699 | goto restart; |
1700 | } |
1701 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
1702 | } |
1703 | |
1704 | static |
1705 | void nvme_fc_rcv_ls_req_err_msg(struct nvme_fc_lport *lport, |
1706 | struct fcnvme_ls_rqst_w0 *w0) |
1707 | { |
1708 | dev_info(lport->dev, "RCV %s LS failed: No memory\n" , |
1709 | (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? |
1710 | nvmefc_ls_names[w0->ls_cmd] : "" ); |
1711 | } |
1712 | |
1713 | /** |
1714 | * nvme_fc_rcv_ls_req - transport entry point called by an LLDD |
1715 | * upon the reception of a NVME LS request. |
1716 | * |
1717 | * The nvme-fc layer will copy payload to an internal structure for |
1718 | * processing. As such, upon completion of the routine, the LLDD may |
1719 | * immediately free/reuse the LS request buffer passed in the call. |
1720 | * |
1721 | * If this routine returns error, the LLDD should abort the exchange. |
1722 | * |
1723 | * @portptr: pointer to the (registered) remote port that the LS |
1724 | * was received from. The remoteport is associated with |
1725 | * a specific localport. |
1726 | * @lsrsp: pointer to a nvmefc_ls_rsp response structure to be |
1727 | * used to reference the exchange corresponding to the LS |
1728 | * when issuing an ls response. |
1729 | * @lsreqbuf: pointer to the buffer containing the LS Request |
1730 | * @lsreqbuf_len: length, in bytes, of the received LS request |
1731 | */ |
1732 | int |
1733 | nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr, |
1734 | struct nvmefc_ls_rsp *lsrsp, |
1735 | void *lsreqbuf, u32 lsreqbuf_len) |
1736 | { |
1737 | struct nvme_fc_rport *rport = remoteport_to_rport(portptr); |
1738 | struct nvme_fc_lport *lport = rport->lport; |
1739 | struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf; |
1740 | struct nvmefc_ls_rcv_op *lsop; |
1741 | unsigned long flags; |
1742 | int ret; |
1743 | |
1744 | nvme_fc_rport_get(rport); |
1745 | |
1746 | /* validate there's a routine to transmit a response */ |
1747 | if (!lport->ops->xmt_ls_rsp) { |
1748 | dev_info(lport->dev, |
1749 | "RCV %s LS failed: no LLDD xmt_ls_rsp\n" , |
1750 | (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? |
1751 | nvmefc_ls_names[w0->ls_cmd] : "" ); |
1752 | ret = -EINVAL; |
1753 | goto out_put; |
1754 | } |
1755 | |
1756 | if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) { |
1757 | dev_info(lport->dev, |
1758 | "RCV %s LS failed: payload too large\n" , |
1759 | (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? |
1760 | nvmefc_ls_names[w0->ls_cmd] : "" ); |
1761 | ret = -E2BIG; |
1762 | goto out_put; |
1763 | } |
1764 | |
1765 | lsop = kzalloc(size: sizeof(*lsop), GFP_KERNEL); |
1766 | if (!lsop) { |
1767 | nvme_fc_rcv_ls_req_err_msg(lport, w0); |
1768 | ret = -ENOMEM; |
1769 | goto out_put; |
1770 | } |
1771 | |
1772 | lsop->rqstbuf = kzalloc(size: sizeof(*lsop->rqstbuf), GFP_KERNEL); |
1773 | lsop->rspbuf = kzalloc(size: sizeof(*lsop->rspbuf), GFP_KERNEL); |
1774 | if (!lsop->rqstbuf || !lsop->rspbuf) { |
1775 | nvme_fc_rcv_ls_req_err_msg(lport, w0); |
1776 | ret = -ENOMEM; |
1777 | goto out_free; |
1778 | } |
1779 | |
1780 | lsop->rspdma = fc_dma_map_single(dev: lport->dev, ptr: lsop->rspbuf, |
1781 | size: sizeof(*lsop->rspbuf), |
1782 | dir: DMA_TO_DEVICE); |
1783 | if (fc_dma_mapping_error(dev: lport->dev, dma_addr: lsop->rspdma)) { |
1784 | dev_info(lport->dev, |
1785 | "RCV %s LS failed: DMA mapping failure\n" , |
1786 | (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? |
1787 | nvmefc_ls_names[w0->ls_cmd] : "" ); |
1788 | ret = -EFAULT; |
1789 | goto out_free; |
1790 | } |
1791 | |
1792 | lsop->rport = rport; |
1793 | lsop->lsrsp = lsrsp; |
1794 | |
1795 | memcpy(lsop->rqstbuf, lsreqbuf, lsreqbuf_len); |
1796 | lsop->rqstdatalen = lsreqbuf_len; |
1797 | |
1798 | spin_lock_irqsave(&rport->lock, flags); |
1799 | if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) { |
1800 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
1801 | ret = -ENOTCONN; |
1802 | goto out_unmap; |
1803 | } |
1804 | list_add_tail(new: &lsop->lsrcv_list, head: &rport->ls_rcv_list); |
1805 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
1806 | |
1807 | schedule_work(work: &rport->lsrcv_work); |
1808 | |
1809 | return 0; |
1810 | |
1811 | out_unmap: |
1812 | fc_dma_unmap_single(dev: lport->dev, addr: lsop->rspdma, |
1813 | size: sizeof(*lsop->rspbuf), dir: DMA_TO_DEVICE); |
1814 | out_free: |
1815 | kfree(objp: lsop->rspbuf); |
1816 | kfree(objp: lsop->rqstbuf); |
1817 | kfree(objp: lsop); |
1818 | out_put: |
1819 | nvme_fc_rport_put(rport); |
1820 | return ret; |
1821 | } |
1822 | EXPORT_SYMBOL_GPL(nvme_fc_rcv_ls_req); |
1823 | |
1824 | |
1825 | /* *********************** NVME Ctrl Routines **************************** */ |
1826 | |
1827 | static void |
1828 | __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, |
1829 | struct nvme_fc_fcp_op *op) |
1830 | { |
1831 | fc_dma_unmap_single(dev: ctrl->lport->dev, addr: op->fcp_req.rspdma, |
1832 | size: sizeof(op->rsp_iu), dir: DMA_FROM_DEVICE); |
1833 | fc_dma_unmap_single(dev: ctrl->lport->dev, addr: op->fcp_req.cmddma, |
1834 | size: sizeof(op->cmd_iu), dir: DMA_TO_DEVICE); |
1835 | |
1836 | atomic_set(v: &op->state, i: FCPOP_STATE_UNINIT); |
1837 | } |
1838 | |
1839 | static void |
1840 | nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq, |
1841 | unsigned int hctx_idx) |
1842 | { |
1843 | struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); |
1844 | |
1845 | return __nvme_fc_exit_request(ctrl: to_fc_ctrl(ctrl: set->driver_data), op); |
1846 | } |
1847 | |
1848 | static int |
1849 | __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) |
1850 | { |
1851 | unsigned long flags; |
1852 | int opstate; |
1853 | |
1854 | spin_lock_irqsave(&ctrl->lock, flags); |
1855 | opstate = atomic_xchg(v: &op->state, new: FCPOP_STATE_ABORTED); |
1856 | if (opstate != FCPOP_STATE_ACTIVE) |
1857 | atomic_set(v: &op->state, i: opstate); |
1858 | else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) { |
1859 | op->flags |= FCOP_FLAGS_TERMIO; |
1860 | ctrl->iocnt++; |
1861 | } |
1862 | spin_unlock_irqrestore(lock: &ctrl->lock, flags); |
1863 | |
1864 | if (opstate != FCPOP_STATE_ACTIVE) |
1865 | return -ECANCELED; |
1866 | |
1867 | ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, |
1868 | &ctrl->rport->remoteport, |
1869 | op->queue->lldd_handle, |
1870 | &op->fcp_req); |
1871 | |
1872 | return 0; |
1873 | } |
1874 | |
1875 | static void |
1876 | nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) |
1877 | { |
1878 | struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; |
1879 | int i; |
1880 | |
1881 | /* ensure we've initialized the ops once */ |
1882 | if (!(aen_op->flags & FCOP_FLAGS_AEN)) |
1883 | return; |
1884 | |
1885 | for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) |
1886 | __nvme_fc_abort_op(ctrl, op: aen_op); |
1887 | } |
1888 | |
1889 | static inline void |
1890 | __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, |
1891 | struct nvme_fc_fcp_op *op, int opstate) |
1892 | { |
1893 | unsigned long flags; |
1894 | |
1895 | if (opstate == FCPOP_STATE_ABORTED) { |
1896 | spin_lock_irqsave(&ctrl->lock, flags); |
1897 | if (test_bit(FCCTRL_TERMIO, &ctrl->flags) && |
1898 | op->flags & FCOP_FLAGS_TERMIO) { |
1899 | if (!--ctrl->iocnt) |
1900 | wake_up(&ctrl->ioabort_wait); |
1901 | } |
1902 | spin_unlock_irqrestore(lock: &ctrl->lock, flags); |
1903 | } |
1904 | } |
1905 | |
1906 | static void |
1907 | nvme_fc_ctrl_ioerr_work(struct work_struct *work) |
1908 | { |
1909 | struct nvme_fc_ctrl *ctrl = |
1910 | container_of(work, struct nvme_fc_ctrl, ioerr_work); |
1911 | |
1912 | nvme_fc_error_recovery(ctrl, errmsg: "transport detected io error" ); |
1913 | } |
1914 | |
1915 | /* |
1916 | * nvme_fc_io_getuuid - Routine called to get the appid field |
1917 | * associated with request by the lldd |
1918 | * @req:IO request from nvme fc to driver |
1919 | * Returns: UUID if there is an appid associated with VM or |
1920 | * NULL if the user/libvirt has not set the appid to VM |
1921 | */ |
1922 | char *nvme_fc_io_getuuid(struct nvmefc_fcp_req *req) |
1923 | { |
1924 | struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(fcpreq: req); |
1925 | struct request *rq = op->rq; |
1926 | |
1927 | if (!IS_ENABLED(CONFIG_BLK_CGROUP_FC_APPID) || !rq || !rq->bio) |
1928 | return NULL; |
1929 | return blkcg_get_fc_appid(bio: rq->bio); |
1930 | } |
1931 | EXPORT_SYMBOL_GPL(nvme_fc_io_getuuid); |
1932 | |
1933 | static void |
1934 | nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) |
1935 | { |
1936 | struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(fcpreq: req); |
1937 | struct request *rq = op->rq; |
1938 | struct nvmefc_fcp_req *freq = &op->fcp_req; |
1939 | struct nvme_fc_ctrl *ctrl = op->ctrl; |
1940 | struct nvme_fc_queue *queue = op->queue; |
1941 | struct nvme_completion *cqe = &op->rsp_iu.cqe; |
1942 | struct nvme_command *sqe = &op->cmd_iu.sqe; |
1943 | __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); |
1944 | union nvme_result result; |
1945 | bool terminate_assoc = true; |
1946 | int opstate; |
1947 | |
1948 | /* |
1949 | * WARNING: |
1950 | * The current linux implementation of a nvme controller |
1951 | * allocates a single tag set for all io queues and sizes |
1952 | * the io queues to fully hold all possible tags. Thus, the |
1953 | * implementation does not reference or care about the sqhd |
1954 | * value as it never needs to use the sqhd/sqtail pointers |
1955 | * for submission pacing. |
1956 | * |
1957 | * This affects the FC-NVME implementation in two ways: |
1958 | * 1) As the value doesn't matter, we don't need to waste |
1959 | * cycles extracting it from ERSPs and stamping it in the |
1960 | * cases where the transport fabricates CQEs on successful |
1961 | * completions. |
1962 | * 2) The FC-NVME implementation requires that delivery of |
1963 | * ERSP completions are to go back to the nvme layer in order |
1964 | * relative to the rsn, such that the sqhd value will always |
1965 | * be "in order" for the nvme layer. As the nvme layer in |
1966 | * linux doesn't care about sqhd, there's no need to return |
1967 | * them in order. |
1968 | * |
1969 | * Additionally: |
1970 | * As the core nvme layer in linux currently does not look at |
1971 | * every field in the cqe - in cases where the FC transport must |
1972 | * fabricate a CQE, the following fields will not be set as they |
1973 | * are not referenced: |
1974 | * cqe.sqid, cqe.sqhd, cqe.command_id |
1975 | * |
1976 | * Failure or error of an individual i/o, in a transport |
1977 | * detected fashion unrelated to the nvme completion status, |
1978 | * potentially cause the initiator and target sides to get out |
1979 | * of sync on SQ head/tail (aka outstanding io count allowed). |
1980 | * Per FC-NVME spec, failure of an individual command requires |
1981 | * the connection to be terminated, which in turn requires the |
1982 | * association to be terminated. |
1983 | */ |
1984 | |
1985 | opstate = atomic_xchg(v: &op->state, new: FCPOP_STATE_COMPLETE); |
1986 | |
1987 | fc_dma_sync_single_for_cpu(dev: ctrl->lport->dev, addr: op->fcp_req.rspdma, |
1988 | size: sizeof(op->rsp_iu), dir: DMA_FROM_DEVICE); |
1989 | |
1990 | if (opstate == FCPOP_STATE_ABORTED) |
1991 | status = cpu_to_le16(NVME_SC_HOST_ABORTED_CMD << 1); |
1992 | else if (freq->status) { |
1993 | status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); |
1994 | dev_info(ctrl->ctrl.device, |
1995 | "NVME-FC{%d}: io failed due to lldd error %d\n" , |
1996 | ctrl->cnum, freq->status); |
1997 | } |
1998 | |
1999 | /* |
2000 | * For the linux implementation, if we have an unsuccesful |
2001 | * status, they blk-mq layer can typically be called with the |
2002 | * non-zero status and the content of the cqe isn't important. |
2003 | */ |
2004 | if (status) |
2005 | goto done; |
2006 | |
2007 | /* |
2008 | * command completed successfully relative to the wire |
2009 | * protocol. However, validate anything received and |
2010 | * extract the status and result from the cqe (create it |
2011 | * where necessary). |
2012 | */ |
2013 | |
2014 | switch (freq->rcv_rsplen) { |
2015 | |
2016 | case 0: |
2017 | case NVME_FC_SIZEOF_ZEROS_RSP: |
2018 | /* |
2019 | * No response payload or 12 bytes of payload (which |
2020 | * should all be zeros) are considered successful and |
2021 | * no payload in the CQE by the transport. |
2022 | */ |
2023 | if (freq->transferred_length != |
2024 | be32_to_cpu(op->cmd_iu.data_len)) { |
2025 | status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); |
2026 | dev_info(ctrl->ctrl.device, |
2027 | "NVME-FC{%d}: io failed due to bad transfer " |
2028 | "length: %d vs expected %d\n" , |
2029 | ctrl->cnum, freq->transferred_length, |
2030 | be32_to_cpu(op->cmd_iu.data_len)); |
2031 | goto done; |
2032 | } |
2033 | result.u64 = 0; |
2034 | break; |
2035 | |
2036 | case sizeof(struct nvme_fc_ersp_iu): |
2037 | /* |
2038 | * The ERSP IU contains a full completion with CQE. |
2039 | * Validate ERSP IU and look at cqe. |
2040 | */ |
2041 | if (unlikely(be16_to_cpu(op->rsp_iu.iu_len) != |
2042 | (freq->rcv_rsplen / 4) || |
2043 | be32_to_cpu(op->rsp_iu.xfrd_len) != |
2044 | freq->transferred_length || |
2045 | op->rsp_iu.ersp_result || |
2046 | sqe->common.command_id != cqe->command_id)) { |
2047 | status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); |
2048 | dev_info(ctrl->ctrl.device, |
2049 | "NVME-FC{%d}: io failed due to bad NVMe_ERSP: " |
2050 | "iu len %d, xfr len %d vs %d, status code " |
2051 | "%d, cmdid %d vs %d\n" , |
2052 | ctrl->cnum, be16_to_cpu(op->rsp_iu.iu_len), |
2053 | be32_to_cpu(op->rsp_iu.xfrd_len), |
2054 | freq->transferred_length, |
2055 | op->rsp_iu.ersp_result, |
2056 | sqe->common.command_id, |
2057 | cqe->command_id); |
2058 | goto done; |
2059 | } |
2060 | result = cqe->result; |
2061 | status = cqe->status; |
2062 | break; |
2063 | |
2064 | default: |
2065 | status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); |
2066 | dev_info(ctrl->ctrl.device, |
2067 | "NVME-FC{%d}: io failed due to odd NVMe_xRSP iu " |
2068 | "len %d\n" , |
2069 | ctrl->cnum, freq->rcv_rsplen); |
2070 | goto done; |
2071 | } |
2072 | |
2073 | terminate_assoc = false; |
2074 | |
2075 | done: |
2076 | if (op->flags & FCOP_FLAGS_AEN) { |
2077 | nvme_complete_async_event(ctrl: &queue->ctrl->ctrl, status, res: &result); |
2078 | __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); |
2079 | atomic_set(v: &op->state, i: FCPOP_STATE_IDLE); |
2080 | op->flags = FCOP_FLAGS_AEN; /* clear other flags */ |
2081 | nvme_fc_ctrl_put(ctrl); |
2082 | goto check_error; |
2083 | } |
2084 | |
2085 | __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); |
2086 | if (!nvme_try_complete_req(req: rq, status, result)) |
2087 | nvme_fc_complete_rq(rq); |
2088 | |
2089 | check_error: |
2090 | if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING) |
2091 | queue_work(wq: nvme_reset_wq, work: &ctrl->ioerr_work); |
2092 | } |
2093 | |
2094 | static int |
2095 | __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, |
2096 | struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op, |
2097 | struct request *rq, u32 rqno) |
2098 | { |
2099 | struct nvme_fcp_op_w_sgl *op_w_sgl = |
2100 | container_of(op, typeof(*op_w_sgl), op); |
2101 | struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; |
2102 | int ret = 0; |
2103 | |
2104 | memset(op, 0, sizeof(*op)); |
2105 | op->fcp_req.cmdaddr = &op->cmd_iu; |
2106 | op->fcp_req.cmdlen = sizeof(op->cmd_iu); |
2107 | op->fcp_req.rspaddr = &op->rsp_iu; |
2108 | op->fcp_req.rsplen = sizeof(op->rsp_iu); |
2109 | op->fcp_req.done = nvme_fc_fcpio_done; |
2110 | op->ctrl = ctrl; |
2111 | op->queue = queue; |
2112 | op->rq = rq; |
2113 | op->rqno = rqno; |
2114 | |
2115 | cmdiu->format_id = NVME_CMD_FORMAT_ID; |
2116 | cmdiu->fc_id = NVME_CMD_FC_ID; |
2117 | cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); |
2118 | if (queue->qnum) |
2119 | cmdiu->rsv_cat = fccmnd_set_cat_css(rsv_cat: 0, |
2120 | css: (NVME_CC_CSS_NVM >> NVME_CC_CSS_SHIFT)); |
2121 | else |
2122 | cmdiu->rsv_cat = fccmnd_set_cat_admin(rsv_cat: 0); |
2123 | |
2124 | op->fcp_req.cmddma = fc_dma_map_single(dev: ctrl->lport->dev, |
2125 | ptr: &op->cmd_iu, size: sizeof(op->cmd_iu), dir: DMA_TO_DEVICE); |
2126 | if (fc_dma_mapping_error(dev: ctrl->lport->dev, dma_addr: op->fcp_req.cmddma)) { |
2127 | dev_err(ctrl->dev, |
2128 | "FCP Op failed - cmdiu dma mapping failed.\n" ); |
2129 | ret = -EFAULT; |
2130 | goto out_on_error; |
2131 | } |
2132 | |
2133 | op->fcp_req.rspdma = fc_dma_map_single(dev: ctrl->lport->dev, |
2134 | ptr: &op->rsp_iu, size: sizeof(op->rsp_iu), |
2135 | dir: DMA_FROM_DEVICE); |
2136 | if (fc_dma_mapping_error(dev: ctrl->lport->dev, dma_addr: op->fcp_req.rspdma)) { |
2137 | dev_err(ctrl->dev, |
2138 | "FCP Op failed - rspiu dma mapping failed.\n" ); |
2139 | ret = -EFAULT; |
2140 | } |
2141 | |
2142 | atomic_set(v: &op->state, i: FCPOP_STATE_IDLE); |
2143 | out_on_error: |
2144 | return ret; |
2145 | } |
2146 | |
2147 | static int |
2148 | nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, |
2149 | unsigned int hctx_idx, unsigned int numa_node) |
2150 | { |
2151 | struct nvme_fc_ctrl *ctrl = to_fc_ctrl(ctrl: set->driver_data); |
2152 | struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq); |
2153 | int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; |
2154 | struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; |
2155 | int res; |
2156 | |
2157 | res = __nvme_fc_init_request(ctrl, queue, op: &op->op, rq, rqno: queue->rqcnt++); |
2158 | if (res) |
2159 | return res; |
2160 | op->op.fcp_req.first_sgl = op->sgl; |
2161 | op->op.fcp_req.private = &op->priv[0]; |
2162 | nvme_req(req: rq)->ctrl = &ctrl->ctrl; |
2163 | nvme_req(req: rq)->cmd = &op->op.cmd_iu.sqe; |
2164 | return res; |
2165 | } |
2166 | |
2167 | static int |
2168 | nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) |
2169 | { |
2170 | struct nvme_fc_fcp_op *aen_op; |
2171 | struct nvme_fc_cmd_iu *cmdiu; |
2172 | struct nvme_command *sqe; |
2173 | void *private = NULL; |
2174 | int i, ret; |
2175 | |
2176 | aen_op = ctrl->aen_ops; |
2177 | for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { |
2178 | if (ctrl->lport->ops->fcprqst_priv_sz) { |
2179 | private = kzalloc(size: ctrl->lport->ops->fcprqst_priv_sz, |
2180 | GFP_KERNEL); |
2181 | if (!private) |
2182 | return -ENOMEM; |
2183 | } |
2184 | |
2185 | cmdiu = &aen_op->cmd_iu; |
2186 | sqe = &cmdiu->sqe; |
2187 | ret = __nvme_fc_init_request(ctrl, queue: &ctrl->queues[0], |
2188 | op: aen_op, rq: (struct request *)NULL, |
2189 | rqno: (NVME_AQ_BLK_MQ_DEPTH + i)); |
2190 | if (ret) { |
2191 | kfree(objp: private); |
2192 | return ret; |
2193 | } |
2194 | |
2195 | aen_op->flags = FCOP_FLAGS_AEN; |
2196 | aen_op->fcp_req.private = private; |
2197 | |
2198 | memset(sqe, 0, sizeof(*sqe)); |
2199 | sqe->common.opcode = nvme_admin_async_event; |
2200 | /* Note: core layer may overwrite the sqe.command_id value */ |
2201 | sqe->common.command_id = NVME_AQ_BLK_MQ_DEPTH + i; |
2202 | } |
2203 | return 0; |
2204 | } |
2205 | |
2206 | static void |
2207 | nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) |
2208 | { |
2209 | struct nvme_fc_fcp_op *aen_op; |
2210 | int i; |
2211 | |
2212 | cancel_work_sync(work: &ctrl->ctrl.async_event_work); |
2213 | aen_op = ctrl->aen_ops; |
2214 | for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { |
2215 | __nvme_fc_exit_request(ctrl, op: aen_op); |
2216 | |
2217 | kfree(objp: aen_op->fcp_req.private); |
2218 | aen_op->fcp_req.private = NULL; |
2219 | } |
2220 | } |
2221 | |
2222 | static inline int |
2223 | __nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int qidx) |
2224 | { |
2225 | struct nvme_fc_ctrl *ctrl = to_fc_ctrl(ctrl: data); |
2226 | struct nvme_fc_queue *queue = &ctrl->queues[qidx]; |
2227 | |
2228 | hctx->driver_data = queue; |
2229 | queue->hctx = hctx; |
2230 | return 0; |
2231 | } |
2232 | |
2233 | static int |
2234 | nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) |
2235 | { |
2236 | return __nvme_fc_init_hctx(hctx, data, qidx: hctx_idx + 1); |
2237 | } |
2238 | |
2239 | static int |
2240 | nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, |
2241 | unsigned int hctx_idx) |
2242 | { |
2243 | return __nvme_fc_init_hctx(hctx, data, qidx: hctx_idx); |
2244 | } |
2245 | |
2246 | static void |
2247 | nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx) |
2248 | { |
2249 | struct nvme_fc_queue *queue; |
2250 | |
2251 | queue = &ctrl->queues[idx]; |
2252 | memset(queue, 0, sizeof(*queue)); |
2253 | queue->ctrl = ctrl; |
2254 | queue->qnum = idx; |
2255 | atomic_set(v: &queue->csn, i: 0); |
2256 | queue->dev = ctrl->dev; |
2257 | |
2258 | if (idx > 0) |
2259 | queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16; |
2260 | else |
2261 | queue->cmnd_capsule_len = sizeof(struct nvme_command); |
2262 | |
2263 | /* |
2264 | * Considered whether we should allocate buffers for all SQEs |
2265 | * and CQEs and dma map them - mapping their respective entries |
2266 | * into the request structures (kernel vm addr and dma address) |
2267 | * thus the driver could use the buffers/mappings directly. |
2268 | * It only makes sense if the LLDD would use them for its |
2269 | * messaging api. It's very unlikely most adapter api's would use |
2270 | * a native NVME sqe/cqe. More reasonable if FC-NVME IU payload |
2271 | * structures were used instead. |
2272 | */ |
2273 | } |
2274 | |
2275 | /* |
2276 | * This routine terminates a queue at the transport level. |
2277 | * The transport has already ensured that all outstanding ios on |
2278 | * the queue have been terminated. |
2279 | * The transport will send a Disconnect LS request to terminate |
2280 | * the queue's connection. Termination of the admin queue will also |
2281 | * terminate the association at the target. |
2282 | */ |
2283 | static void |
2284 | nvme_fc_free_queue(struct nvme_fc_queue *queue) |
2285 | { |
2286 | if (!test_and_clear_bit(nr: NVME_FC_Q_CONNECTED, addr: &queue->flags)) |
2287 | return; |
2288 | |
2289 | clear_bit(nr: NVME_FC_Q_LIVE, addr: &queue->flags); |
2290 | /* |
2291 | * Current implementation never disconnects a single queue. |
2292 | * It always terminates a whole association. So there is never |
2293 | * a disconnect(queue) LS sent to the target. |
2294 | */ |
2295 | |
2296 | queue->connection_id = 0; |
2297 | atomic_set(v: &queue->csn, i: 0); |
2298 | } |
2299 | |
2300 | static void |
2301 | __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl, |
2302 | struct nvme_fc_queue *queue, unsigned int qidx) |
2303 | { |
2304 | if (ctrl->lport->ops->delete_queue) |
2305 | ctrl->lport->ops->delete_queue(&ctrl->lport->localport, qidx, |
2306 | queue->lldd_handle); |
2307 | queue->lldd_handle = NULL; |
2308 | } |
2309 | |
2310 | static void |
2311 | nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl) |
2312 | { |
2313 | int i; |
2314 | |
2315 | for (i = 1; i < ctrl->ctrl.queue_count; i++) |
2316 | nvme_fc_free_queue(queue: &ctrl->queues[i]); |
2317 | } |
2318 | |
2319 | static int |
2320 | __nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl, |
2321 | struct nvme_fc_queue *queue, unsigned int qidx, u16 qsize) |
2322 | { |
2323 | int ret = 0; |
2324 | |
2325 | queue->lldd_handle = NULL; |
2326 | if (ctrl->lport->ops->create_queue) |
2327 | ret = ctrl->lport->ops->create_queue(&ctrl->lport->localport, |
2328 | qidx, qsize, &queue->lldd_handle); |
2329 | |
2330 | return ret; |
2331 | } |
2332 | |
2333 | static void |
2334 | nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl) |
2335 | { |
2336 | struct nvme_fc_queue *queue = &ctrl->queues[ctrl->ctrl.queue_count - 1]; |
2337 | int i; |
2338 | |
2339 | for (i = ctrl->ctrl.queue_count - 1; i >= 1; i--, queue--) |
2340 | __nvme_fc_delete_hw_queue(ctrl, queue, qidx: i); |
2341 | } |
2342 | |
2343 | static int |
2344 | nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) |
2345 | { |
2346 | struct nvme_fc_queue *queue = &ctrl->queues[1]; |
2347 | int i, ret; |
2348 | |
2349 | for (i = 1; i < ctrl->ctrl.queue_count; i++, queue++) { |
2350 | ret = __nvme_fc_create_hw_queue(ctrl, queue, qidx: i, qsize); |
2351 | if (ret) |
2352 | goto delete_queues; |
2353 | } |
2354 | |
2355 | return 0; |
2356 | |
2357 | delete_queues: |
2358 | for (; i > 0; i--) |
2359 | __nvme_fc_delete_hw_queue(ctrl, queue: &ctrl->queues[i], qidx: i); |
2360 | return ret; |
2361 | } |
2362 | |
2363 | static int |
2364 | nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) |
2365 | { |
2366 | int i, ret = 0; |
2367 | |
2368 | for (i = 1; i < ctrl->ctrl.queue_count; i++) { |
2369 | ret = nvme_fc_connect_queue(ctrl, queue: &ctrl->queues[i], qsize, |
2370 | ersp_ratio: (qsize / 5)); |
2371 | if (ret) |
2372 | break; |
2373 | ret = nvmf_connect_io_queue(ctrl: &ctrl->ctrl, qid: i); |
2374 | if (ret) |
2375 | break; |
2376 | |
2377 | set_bit(nr: NVME_FC_Q_LIVE, addr: &ctrl->queues[i].flags); |
2378 | } |
2379 | |
2380 | return ret; |
2381 | } |
2382 | |
2383 | static void |
2384 | nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl) |
2385 | { |
2386 | int i; |
2387 | |
2388 | for (i = 1; i < ctrl->ctrl.queue_count; i++) |
2389 | nvme_fc_init_queue(ctrl, idx: i); |
2390 | } |
2391 | |
2392 | static void |
2393 | nvme_fc_ctrl_free(struct kref *ref) |
2394 | { |
2395 | struct nvme_fc_ctrl *ctrl = |
2396 | container_of(ref, struct nvme_fc_ctrl, ref); |
2397 | unsigned long flags; |
2398 | |
2399 | if (ctrl->ctrl.tagset) |
2400 | nvme_remove_io_tag_set(ctrl: &ctrl->ctrl); |
2401 | |
2402 | /* remove from rport list */ |
2403 | spin_lock_irqsave(&ctrl->rport->lock, flags); |
2404 | list_del(entry: &ctrl->ctrl_list); |
2405 | spin_unlock_irqrestore(lock: &ctrl->rport->lock, flags); |
2406 | |
2407 | nvme_unquiesce_admin_queue(ctrl: &ctrl->ctrl); |
2408 | nvme_remove_admin_tag_set(ctrl: &ctrl->ctrl); |
2409 | |
2410 | kfree(objp: ctrl->queues); |
2411 | |
2412 | put_device(dev: ctrl->dev); |
2413 | nvme_fc_rport_put(rport: ctrl->rport); |
2414 | |
2415 | ida_free(&nvme_fc_ctrl_cnt, id: ctrl->cnum); |
2416 | if (ctrl->ctrl.opts) |
2417 | nvmf_free_options(opts: ctrl->ctrl.opts); |
2418 | kfree(objp: ctrl); |
2419 | } |
2420 | |
2421 | static void |
2422 | nvme_fc_ctrl_put(struct nvme_fc_ctrl *ctrl) |
2423 | { |
2424 | kref_put(kref: &ctrl->ref, release: nvme_fc_ctrl_free); |
2425 | } |
2426 | |
2427 | static int |
2428 | nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl) |
2429 | { |
2430 | return kref_get_unless_zero(kref: &ctrl->ref); |
2431 | } |
2432 | |
2433 | /* |
2434 | * All accesses from nvme core layer done - can now free the |
2435 | * controller. Called after last nvme_put_ctrl() call |
2436 | */ |
2437 | static void |
2438 | nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) |
2439 | { |
2440 | struct nvme_fc_ctrl *ctrl = to_fc_ctrl(ctrl: nctrl); |
2441 | |
2442 | WARN_ON(nctrl != &ctrl->ctrl); |
2443 | |
2444 | nvme_fc_ctrl_put(ctrl); |
2445 | } |
2446 | |
2447 | /* |
2448 | * This routine is used by the transport when it needs to find active |
2449 | * io on a queue that is to be terminated. The transport uses |
2450 | * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke |
2451 | * this routine to kill them on a 1 by 1 basis. |
2452 | * |
2453 | * As FC allocates FC exchange for each io, the transport must contact |
2454 | * the LLDD to terminate the exchange, thus releasing the FC exchange. |
2455 | * After terminating the exchange the LLDD will call the transport's |
2456 | * normal io done path for the request, but it will have an aborted |
2457 | * status. The done path will return the io request back to the block |
2458 | * layer with an error status. |
2459 | */ |
2460 | static bool nvme_fc_terminate_exchange(struct request *req, void *data) |
2461 | { |
2462 | struct nvme_ctrl *nctrl = data; |
2463 | struct nvme_fc_ctrl *ctrl = to_fc_ctrl(ctrl: nctrl); |
2464 | struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq: req); |
2465 | |
2466 | op->nreq.flags |= NVME_REQ_CANCELLED; |
2467 | __nvme_fc_abort_op(ctrl, op); |
2468 | return true; |
2469 | } |
2470 | |
2471 | /* |
2472 | * This routine runs through all outstanding commands on the association |
2473 | * and aborts them. This routine is typically be called by the |
2474 | * delete_association routine. It is also called due to an error during |
2475 | * reconnect. In that scenario, it is most likely a command that initializes |
2476 | * the controller, including fabric Connect commands on io queues, that |
2477 | * may have timed out or failed thus the io must be killed for the connect |
2478 | * thread to see the error. |
2479 | */ |
2480 | static void |
2481 | __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) |
2482 | { |
2483 | int q; |
2484 | |
2485 | /* |
2486 | * if aborting io, the queues are no longer good, mark them |
2487 | * all as not live. |
2488 | */ |
2489 | if (ctrl->ctrl.queue_count > 1) { |
2490 | for (q = 1; q < ctrl->ctrl.queue_count; q++) |
2491 | clear_bit(nr: NVME_FC_Q_LIVE, addr: &ctrl->queues[q].flags); |
2492 | } |
2493 | clear_bit(nr: NVME_FC_Q_LIVE, addr: &ctrl->queues[0].flags); |
2494 | |
2495 | /* |
2496 | * If io queues are present, stop them and terminate all outstanding |
2497 | * ios on them. As FC allocates FC exchange for each io, the |
2498 | * transport must contact the LLDD to terminate the exchange, |
2499 | * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() |
2500 | * to tell us what io's are busy and invoke a transport routine |
2501 | * to kill them with the LLDD. After terminating the exchange |
2502 | * the LLDD will call the transport's normal io done path, but it |
2503 | * will have an aborted status. The done path will return the |
2504 | * io requests back to the block layer as part of normal completions |
2505 | * (but with error status). |
2506 | */ |
2507 | if (ctrl->ctrl.queue_count > 1) { |
2508 | nvme_quiesce_io_queues(ctrl: &ctrl->ctrl); |
2509 | nvme_sync_io_queues(ctrl: &ctrl->ctrl); |
2510 | blk_mq_tagset_busy_iter(tagset: &ctrl->tag_set, |
2511 | fn: nvme_fc_terminate_exchange, priv: &ctrl->ctrl); |
2512 | blk_mq_tagset_wait_completed_request(tagset: &ctrl->tag_set); |
2513 | if (start_queues) |
2514 | nvme_unquiesce_io_queues(ctrl: &ctrl->ctrl); |
2515 | } |
2516 | |
2517 | /* |
2518 | * Other transports, which don't have link-level contexts bound |
2519 | * to sqe's, would try to gracefully shutdown the controller by |
2520 | * writing the registers for shutdown and polling (call |
2521 | * nvme_disable_ctrl()). Given a bunch of i/o was potentially |
2522 | * just aborted and we will wait on those contexts, and given |
2523 | * there was no indication of how live the controlelr is on the |
2524 | * link, don't send more io to create more contexts for the |
2525 | * shutdown. Let the controller fail via keepalive failure if |
2526 | * its still present. |
2527 | */ |
2528 | |
2529 | /* |
2530 | * clean up the admin queue. Same thing as above. |
2531 | */ |
2532 | nvme_quiesce_admin_queue(ctrl: &ctrl->ctrl); |
2533 | blk_sync_queue(q: ctrl->ctrl.admin_q); |
2534 | blk_mq_tagset_busy_iter(tagset: &ctrl->admin_tag_set, |
2535 | fn: nvme_fc_terminate_exchange, priv: &ctrl->ctrl); |
2536 | blk_mq_tagset_wait_completed_request(tagset: &ctrl->admin_tag_set); |
2537 | if (start_queues) |
2538 | nvme_unquiesce_admin_queue(ctrl: &ctrl->ctrl); |
2539 | } |
2540 | |
2541 | static void |
2542 | nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) |
2543 | { |
2544 | /* |
2545 | * if an error (io timeout, etc) while (re)connecting, the remote |
2546 | * port requested terminating of the association (disconnect_ls) |
2547 | * or an error (timeout or abort) occurred on an io while creating |
2548 | * the controller. Abort any ios on the association and let the |
2549 | * create_association error path resolve things. |
2550 | */ |
2551 | enum nvme_ctrl_state state; |
2552 | unsigned long flags; |
2553 | |
2554 | spin_lock_irqsave(&ctrl->lock, flags); |
2555 | state = ctrl->ctrl.state; |
2556 | if (state == NVME_CTRL_CONNECTING) { |
2557 | set_bit(ASSOC_FAILED, addr: &ctrl->flags); |
2558 | spin_unlock_irqrestore(lock: &ctrl->lock, flags); |
2559 | __nvme_fc_abort_outstanding_ios(ctrl, start_queues: true); |
2560 | dev_warn(ctrl->ctrl.device, |
2561 | "NVME-FC{%d}: transport error during (re)connect\n" , |
2562 | ctrl->cnum); |
2563 | return; |
2564 | } |
2565 | spin_unlock_irqrestore(lock: &ctrl->lock, flags); |
2566 | |
2567 | /* Otherwise, only proceed if in LIVE state - e.g. on first error */ |
2568 | if (state != NVME_CTRL_LIVE) |
2569 | return; |
2570 | |
2571 | dev_warn(ctrl->ctrl.device, |
2572 | "NVME-FC{%d}: transport association event: %s\n" , |
2573 | ctrl->cnum, errmsg); |
2574 | dev_warn(ctrl->ctrl.device, |
2575 | "NVME-FC{%d}: resetting controller\n" , ctrl->cnum); |
2576 | |
2577 | nvme_reset_ctrl(ctrl: &ctrl->ctrl); |
2578 | } |
2579 | |
2580 | static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq) |
2581 | { |
2582 | struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); |
2583 | struct nvme_fc_ctrl *ctrl = op->ctrl; |
2584 | struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; |
2585 | struct nvme_command *sqe = &cmdiu->sqe; |
2586 | |
2587 | /* |
2588 | * Attempt to abort the offending command. Command completion |
2589 | * will detect the aborted io and will fail the connection. |
2590 | */ |
2591 | dev_info(ctrl->ctrl.device, |
2592 | "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: " |
2593 | "x%08x/x%08x\n" , |
2594 | ctrl->cnum, op->queue->qnum, sqe->common.opcode, |
2595 | sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11); |
2596 | if (__nvme_fc_abort_op(ctrl, op)) |
2597 | nvme_fc_error_recovery(ctrl, errmsg: "io timeout abort failed" ); |
2598 | |
2599 | /* |
2600 | * the io abort has been initiated. Have the reset timer |
2601 | * restarted and the abort completion will complete the io |
2602 | * shortly. Avoids a synchronous wait while the abort finishes. |
2603 | */ |
2604 | return BLK_EH_RESET_TIMER; |
2605 | } |
2606 | |
2607 | static int |
2608 | nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq, |
2609 | struct nvme_fc_fcp_op *op) |
2610 | { |
2611 | struct nvmefc_fcp_req *freq = &op->fcp_req; |
2612 | int ret; |
2613 | |
2614 | freq->sg_cnt = 0; |
2615 | |
2616 | if (!blk_rq_nr_phys_segments(rq)) |
2617 | return 0; |
2618 | |
2619 | freq->sg_table.sgl = freq->first_sgl; |
2620 | ret = sg_alloc_table_chained(table: &freq->sg_table, |
2621 | nents: blk_rq_nr_phys_segments(rq), first_chunk: freq->sg_table.sgl, |
2622 | NVME_INLINE_SG_CNT); |
2623 | if (ret) |
2624 | return -ENOMEM; |
2625 | |
2626 | op->nents = blk_rq_map_sg(q: rq->q, rq, sglist: freq->sg_table.sgl); |
2627 | WARN_ON(op->nents > blk_rq_nr_phys_segments(rq)); |
2628 | freq->sg_cnt = fc_dma_map_sg(dev: ctrl->lport->dev, sg: freq->sg_table.sgl, |
2629 | nents: op->nents, rq_dma_dir(rq)); |
2630 | if (unlikely(freq->sg_cnt <= 0)) { |
2631 | sg_free_table_chained(table: &freq->sg_table, NVME_INLINE_SG_CNT); |
2632 | freq->sg_cnt = 0; |
2633 | return -EFAULT; |
2634 | } |
2635 | |
2636 | /* |
2637 | * TODO: blk_integrity_rq(rq) for DIF |
2638 | */ |
2639 | return 0; |
2640 | } |
2641 | |
2642 | static void |
2643 | nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq, |
2644 | struct nvme_fc_fcp_op *op) |
2645 | { |
2646 | struct nvmefc_fcp_req *freq = &op->fcp_req; |
2647 | |
2648 | if (!freq->sg_cnt) |
2649 | return; |
2650 | |
2651 | fc_dma_unmap_sg(dev: ctrl->lport->dev, sg: freq->sg_table.sgl, nents: op->nents, |
2652 | rq_dma_dir(rq)); |
2653 | |
2654 | sg_free_table_chained(table: &freq->sg_table, NVME_INLINE_SG_CNT); |
2655 | |
2656 | freq->sg_cnt = 0; |
2657 | } |
2658 | |
2659 | /* |
2660 | * In FC, the queue is a logical thing. At transport connect, the target |
2661 | * creates its "queue" and returns a handle that is to be given to the |
2662 | * target whenever it posts something to the corresponding SQ. When an |
2663 | * SQE is sent on a SQ, FC effectively considers the SQE, or rather the |
2664 | * command contained within the SQE, an io, and assigns a FC exchange |
2665 | * to it. The SQE and the associated SQ handle are sent in the initial |
2666 | * CMD IU sents on the exchange. All transfers relative to the io occur |
2667 | * as part of the exchange. The CQE is the last thing for the io, |
2668 | * which is transferred (explicitly or implicitly) with the RSP IU |
2669 | * sent on the exchange. After the CQE is received, the FC exchange is |
2670 | * terminaed and the Exchange may be used on a different io. |
2671 | * |
2672 | * The transport to LLDD api has the transport making a request for a |
2673 | * new fcp io request to the LLDD. The LLDD then allocates a FC exchange |
2674 | * resource and transfers the command. The LLDD will then process all |
2675 | * steps to complete the io. Upon completion, the transport done routine |
2676 | * is called. |
2677 | * |
2678 | * So - while the operation is outstanding to the LLDD, there is a link |
2679 | * level FC exchange resource that is also outstanding. This must be |
2680 | * considered in all cleanup operations. |
2681 | */ |
2682 | static blk_status_t |
2683 | nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, |
2684 | struct nvme_fc_fcp_op *op, u32 data_len, |
2685 | enum nvmefc_fcp_datadir io_dir) |
2686 | { |
2687 | struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; |
2688 | struct nvme_command *sqe = &cmdiu->sqe; |
2689 | int ret, opstate; |
2690 | |
2691 | /* |
2692 | * before attempting to send the io, check to see if we believe |
2693 | * the target device is present |
2694 | */ |
2695 | if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) |
2696 | return BLK_STS_RESOURCE; |
2697 | |
2698 | if (!nvme_fc_ctrl_get(ctrl)) |
2699 | return BLK_STS_IOERR; |
2700 | |
2701 | /* format the FC-NVME CMD IU and fcp_req */ |
2702 | cmdiu->connection_id = cpu_to_be64(queue->connection_id); |
2703 | cmdiu->data_len = cpu_to_be32(data_len); |
2704 | switch (io_dir) { |
2705 | case NVMEFC_FCP_WRITE: |
2706 | cmdiu->flags = FCNVME_CMD_FLAGS_WRITE; |
2707 | break; |
2708 | case NVMEFC_FCP_READ: |
2709 | cmdiu->flags = FCNVME_CMD_FLAGS_READ; |
2710 | break; |
2711 | case NVMEFC_FCP_NODATA: |
2712 | cmdiu->flags = 0; |
2713 | break; |
2714 | } |
2715 | op->fcp_req.payload_length = data_len; |
2716 | op->fcp_req.io_dir = io_dir; |
2717 | op->fcp_req.transferred_length = 0; |
2718 | op->fcp_req.rcv_rsplen = 0; |
2719 | op->fcp_req.status = NVME_SC_SUCCESS; |
2720 | op->fcp_req.sqid = cpu_to_le16(queue->qnum); |
2721 | |
2722 | /* |
2723 | * validate per fabric rules, set fields mandated by fabric spec |
2724 | * as well as those by FC-NVME spec. |
2725 | */ |
2726 | WARN_ON_ONCE(sqe->common.metadata); |
2727 | sqe->common.flags |= NVME_CMD_SGL_METABUF; |
2728 | |
2729 | /* |
2730 | * format SQE DPTR field per FC-NVME rules: |
2731 | * type=0x5 Transport SGL Data Block Descriptor |
2732 | * subtype=0xA Transport-specific value |
2733 | * address=0 |
2734 | * length=length of the data series |
2735 | */ |
2736 | sqe->rw.dptr.sgl.type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | |
2737 | NVME_SGL_FMT_TRANSPORT_A; |
2738 | sqe->rw.dptr.sgl.length = cpu_to_le32(data_len); |
2739 | sqe->rw.dptr.sgl.addr = 0; |
2740 | |
2741 | if (!(op->flags & FCOP_FLAGS_AEN)) { |
2742 | ret = nvme_fc_map_data(ctrl, rq: op->rq, op); |
2743 | if (ret < 0) { |
2744 | nvme_cleanup_cmd(req: op->rq); |
2745 | nvme_fc_ctrl_put(ctrl); |
2746 | if (ret == -ENOMEM || ret == -EAGAIN) |
2747 | return BLK_STS_RESOURCE; |
2748 | return BLK_STS_IOERR; |
2749 | } |
2750 | } |
2751 | |
2752 | fc_dma_sync_single_for_device(dev: ctrl->lport->dev, addr: op->fcp_req.cmddma, |
2753 | size: sizeof(op->cmd_iu), dir: DMA_TO_DEVICE); |
2754 | |
2755 | atomic_set(v: &op->state, i: FCPOP_STATE_ACTIVE); |
2756 | |
2757 | if (!(op->flags & FCOP_FLAGS_AEN)) |
2758 | nvme_start_request(rq: op->rq); |
2759 | |
2760 | cmdiu->csn = cpu_to_be32(atomic_inc_return(&queue->csn)); |
2761 | ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, |
2762 | &ctrl->rport->remoteport, |
2763 | queue->lldd_handle, &op->fcp_req); |
2764 | |
2765 | if (ret) { |
2766 | /* |
2767 | * If the lld fails to send the command is there an issue with |
2768 | * the csn value? If the command that fails is the Connect, |
2769 | * no - as the connection won't be live. If it is a command |
2770 | * post-connect, it's possible a gap in csn may be created. |
2771 | * Does this matter? As Linux initiators don't send fused |
2772 | * commands, no. The gap would exist, but as there's nothing |
2773 | * that depends on csn order to be delivered on the target |
2774 | * side, it shouldn't hurt. It would be difficult for a |
2775 | * target to even detect the csn gap as it has no idea when the |
2776 | * cmd with the csn was supposed to arrive. |
2777 | */ |
2778 | opstate = atomic_xchg(v: &op->state, new: FCPOP_STATE_COMPLETE); |
2779 | __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); |
2780 | |
2781 | if (!(op->flags & FCOP_FLAGS_AEN)) { |
2782 | nvme_fc_unmap_data(ctrl, rq: op->rq, op); |
2783 | nvme_cleanup_cmd(req: op->rq); |
2784 | } |
2785 | |
2786 | nvme_fc_ctrl_put(ctrl); |
2787 | |
2788 | if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE && |
2789 | ret != -EBUSY) |
2790 | return BLK_STS_IOERR; |
2791 | |
2792 | return BLK_STS_RESOURCE; |
2793 | } |
2794 | |
2795 | return BLK_STS_OK; |
2796 | } |
2797 | |
2798 | static blk_status_t |
2799 | nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, |
2800 | const struct blk_mq_queue_data *bd) |
2801 | { |
2802 | struct nvme_ns *ns = hctx->queue->queuedata; |
2803 | struct nvme_fc_queue *queue = hctx->driver_data; |
2804 | struct nvme_fc_ctrl *ctrl = queue->ctrl; |
2805 | struct request *rq = bd->rq; |
2806 | struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); |
2807 | enum nvmefc_fcp_datadir io_dir; |
2808 | bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags); |
2809 | u32 data_len; |
2810 | blk_status_t ret; |
2811 | |
2812 | if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE || |
2813 | !nvme_check_ready(ctrl: &queue->ctrl->ctrl, rq, queue_live: queue_ready)) |
2814 | return nvme_fail_nonready_command(ctrl: &queue->ctrl->ctrl, req: rq); |
2815 | |
2816 | ret = nvme_setup_cmd(ns, req: rq); |
2817 | if (ret) |
2818 | return ret; |
2819 | |
2820 | /* |
2821 | * nvme core doesn't quite treat the rq opaquely. Commands such |
2822 | * as WRITE ZEROES will return a non-zero rq payload_bytes yet |
2823 | * there is no actual payload to be transferred. |
2824 | * To get it right, key data transmission on there being 1 or |
2825 | * more physical segments in the sg list. If there is no |
2826 | * physical segments, there is no payload. |
2827 | */ |
2828 | if (blk_rq_nr_phys_segments(rq)) { |
2829 | data_len = blk_rq_payload_bytes(rq); |
2830 | io_dir = ((rq_data_dir(rq) == WRITE) ? |
2831 | NVMEFC_FCP_WRITE : NVMEFC_FCP_READ); |
2832 | } else { |
2833 | data_len = 0; |
2834 | io_dir = NVMEFC_FCP_NODATA; |
2835 | } |
2836 | |
2837 | |
2838 | return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir); |
2839 | } |
2840 | |
2841 | static void |
2842 | nvme_fc_submit_async_event(struct nvme_ctrl *arg) |
2843 | { |
2844 | struct nvme_fc_ctrl *ctrl = to_fc_ctrl(ctrl: arg); |
2845 | struct nvme_fc_fcp_op *aen_op; |
2846 | blk_status_t ret; |
2847 | |
2848 | if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) |
2849 | return; |
2850 | |
2851 | aen_op = &ctrl->aen_ops[0]; |
2852 | |
2853 | ret = nvme_fc_start_fcp_op(ctrl, queue: aen_op->queue, op: aen_op, data_len: 0, |
2854 | io_dir: NVMEFC_FCP_NODATA); |
2855 | if (ret) |
2856 | dev_err(ctrl->ctrl.device, |
2857 | "failed async event work\n" ); |
2858 | } |
2859 | |
2860 | static void |
2861 | nvme_fc_complete_rq(struct request *rq) |
2862 | { |
2863 | struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); |
2864 | struct nvme_fc_ctrl *ctrl = op->ctrl; |
2865 | |
2866 | atomic_set(v: &op->state, i: FCPOP_STATE_IDLE); |
2867 | op->flags &= ~FCOP_FLAGS_TERMIO; |
2868 | |
2869 | nvme_fc_unmap_data(ctrl, rq, op); |
2870 | nvme_complete_rq(req: rq); |
2871 | nvme_fc_ctrl_put(ctrl); |
2872 | } |
2873 | |
2874 | static void nvme_fc_map_queues(struct blk_mq_tag_set *set) |
2875 | { |
2876 | struct nvme_fc_ctrl *ctrl = to_fc_ctrl(ctrl: set->driver_data); |
2877 | int i; |
2878 | |
2879 | for (i = 0; i < set->nr_maps; i++) { |
2880 | struct blk_mq_queue_map *map = &set->map[i]; |
2881 | |
2882 | if (!map->nr_queues) { |
2883 | WARN_ON(i == HCTX_TYPE_DEFAULT); |
2884 | continue; |
2885 | } |
2886 | |
2887 | /* Call LLDD map queue functionality if defined */ |
2888 | if (ctrl->lport->ops->map_queues) |
2889 | ctrl->lport->ops->map_queues(&ctrl->lport->localport, |
2890 | map); |
2891 | else |
2892 | blk_mq_map_queues(qmap: map); |
2893 | } |
2894 | } |
2895 | |
2896 | static const struct blk_mq_ops nvme_fc_mq_ops = { |
2897 | .queue_rq = nvme_fc_queue_rq, |
2898 | .complete = nvme_fc_complete_rq, |
2899 | .init_request = nvme_fc_init_request, |
2900 | .exit_request = nvme_fc_exit_request, |
2901 | .init_hctx = nvme_fc_init_hctx, |
2902 | .timeout = nvme_fc_timeout, |
2903 | .map_queues = nvme_fc_map_queues, |
2904 | }; |
2905 | |
2906 | static int |
2907 | nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) |
2908 | { |
2909 | struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; |
2910 | unsigned int nr_io_queues; |
2911 | int ret; |
2912 | |
2913 | nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), |
2914 | ctrl->lport->ops->max_hw_queues); |
2915 | ret = nvme_set_queue_count(ctrl: &ctrl->ctrl, count: &nr_io_queues); |
2916 | if (ret) { |
2917 | dev_info(ctrl->ctrl.device, |
2918 | "set_queue_count failed: %d\n" , ret); |
2919 | return ret; |
2920 | } |
2921 | |
2922 | ctrl->ctrl.queue_count = nr_io_queues + 1; |
2923 | if (!nr_io_queues) |
2924 | return 0; |
2925 | |
2926 | nvme_fc_init_io_queues(ctrl); |
2927 | |
2928 | ret = nvme_alloc_io_tag_set(ctrl: &ctrl->ctrl, set: &ctrl->tag_set, |
2929 | ops: &nvme_fc_mq_ops, nr_maps: 1, |
2930 | struct_size_t(struct nvme_fcp_op_w_sgl, priv, |
2931 | ctrl->lport->ops->fcprqst_priv_sz)); |
2932 | if (ret) |
2933 | return ret; |
2934 | |
2935 | ret = nvme_fc_create_hw_io_queues(ctrl, qsize: ctrl->ctrl.sqsize + 1); |
2936 | if (ret) |
2937 | goto out_cleanup_tagset; |
2938 | |
2939 | ret = nvme_fc_connect_io_queues(ctrl, qsize: ctrl->ctrl.sqsize + 1); |
2940 | if (ret) |
2941 | goto out_delete_hw_queues; |
2942 | |
2943 | ctrl->ioq_live = true; |
2944 | |
2945 | return 0; |
2946 | |
2947 | out_delete_hw_queues: |
2948 | nvme_fc_delete_hw_io_queues(ctrl); |
2949 | out_cleanup_tagset: |
2950 | nvme_remove_io_tag_set(ctrl: &ctrl->ctrl); |
2951 | nvme_fc_free_io_queues(ctrl); |
2952 | |
2953 | /* force put free routine to ignore io queues */ |
2954 | ctrl->ctrl.tagset = NULL; |
2955 | |
2956 | return ret; |
2957 | } |
2958 | |
2959 | static int |
2960 | nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) |
2961 | { |
2962 | struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; |
2963 | u32 prior_ioq_cnt = ctrl->ctrl.queue_count - 1; |
2964 | unsigned int nr_io_queues; |
2965 | int ret; |
2966 | |
2967 | nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), |
2968 | ctrl->lport->ops->max_hw_queues); |
2969 | ret = nvme_set_queue_count(ctrl: &ctrl->ctrl, count: &nr_io_queues); |
2970 | if (ret) { |
2971 | dev_info(ctrl->ctrl.device, |
2972 | "set_queue_count failed: %d\n" , ret); |
2973 | return ret; |
2974 | } |
2975 | |
2976 | if (!nr_io_queues && prior_ioq_cnt) { |
2977 | dev_info(ctrl->ctrl.device, |
2978 | "Fail Reconnect: At least 1 io queue " |
2979 | "required (was %d)\n" , prior_ioq_cnt); |
2980 | return -ENOSPC; |
2981 | } |
2982 | |
2983 | ctrl->ctrl.queue_count = nr_io_queues + 1; |
2984 | /* check for io queues existing */ |
2985 | if (ctrl->ctrl.queue_count == 1) |
2986 | return 0; |
2987 | |
2988 | if (prior_ioq_cnt != nr_io_queues) { |
2989 | dev_info(ctrl->ctrl.device, |
2990 | "reconnect: revising io queue count from %d to %d\n" , |
2991 | prior_ioq_cnt, nr_io_queues); |
2992 | blk_mq_update_nr_hw_queues(set: &ctrl->tag_set, nr_hw_queues: nr_io_queues); |
2993 | } |
2994 | |
2995 | ret = nvme_fc_create_hw_io_queues(ctrl, qsize: ctrl->ctrl.sqsize + 1); |
2996 | if (ret) |
2997 | goto out_free_io_queues; |
2998 | |
2999 | ret = nvme_fc_connect_io_queues(ctrl, qsize: ctrl->ctrl.sqsize + 1); |
3000 | if (ret) |
3001 | goto out_delete_hw_queues; |
3002 | |
3003 | return 0; |
3004 | |
3005 | out_delete_hw_queues: |
3006 | nvme_fc_delete_hw_io_queues(ctrl); |
3007 | out_free_io_queues: |
3008 | nvme_fc_free_io_queues(ctrl); |
3009 | return ret; |
3010 | } |
3011 | |
3012 | static void |
3013 | nvme_fc_rport_active_on_lport(struct nvme_fc_rport *rport) |
3014 | { |
3015 | struct nvme_fc_lport *lport = rport->lport; |
3016 | |
3017 | atomic_inc(v: &lport->act_rport_cnt); |
3018 | } |
3019 | |
3020 | static void |
3021 | nvme_fc_rport_inactive_on_lport(struct nvme_fc_rport *rport) |
3022 | { |
3023 | struct nvme_fc_lport *lport = rport->lport; |
3024 | u32 cnt; |
3025 | |
3026 | cnt = atomic_dec_return(v: &lport->act_rport_cnt); |
3027 | if (cnt == 0 && lport->localport.port_state == FC_OBJSTATE_DELETED) |
3028 | lport->ops->localport_delete(&lport->localport); |
3029 | } |
3030 | |
3031 | static int |
3032 | nvme_fc_ctlr_active_on_rport(struct nvme_fc_ctrl *ctrl) |
3033 | { |
3034 | struct nvme_fc_rport *rport = ctrl->rport; |
3035 | u32 cnt; |
3036 | |
3037 | if (test_and_set_bit(ASSOC_ACTIVE, addr: &ctrl->flags)) |
3038 | return 1; |
3039 | |
3040 | cnt = atomic_inc_return(v: &rport->act_ctrl_cnt); |
3041 | if (cnt == 1) |
3042 | nvme_fc_rport_active_on_lport(rport); |
3043 | |
3044 | return 0; |
3045 | } |
3046 | |
3047 | static int |
3048 | nvme_fc_ctlr_inactive_on_rport(struct nvme_fc_ctrl *ctrl) |
3049 | { |
3050 | struct nvme_fc_rport *rport = ctrl->rport; |
3051 | struct nvme_fc_lport *lport = rport->lport; |
3052 | u32 cnt; |
3053 | |
3054 | /* clearing of ctrl->flags ASSOC_ACTIVE bit is in association delete */ |
3055 | |
3056 | cnt = atomic_dec_return(v: &rport->act_ctrl_cnt); |
3057 | if (cnt == 0) { |
3058 | if (rport->remoteport.port_state == FC_OBJSTATE_DELETED) |
3059 | lport->ops->remoteport_delete(&rport->remoteport); |
3060 | nvme_fc_rport_inactive_on_lport(rport); |
3061 | } |
3062 | |
3063 | return 0; |
3064 | } |
3065 | |
3066 | /* |
3067 | * This routine restarts the controller on the host side, and |
3068 | * on the link side, recreates the controller association. |
3069 | */ |
3070 | static int |
3071 | nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) |
3072 | { |
3073 | struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; |
3074 | struct nvmefc_ls_rcv_op *disls = NULL; |
3075 | unsigned long flags; |
3076 | int ret; |
3077 | bool changed; |
3078 | |
3079 | ++ctrl->ctrl.nr_reconnects; |
3080 | |
3081 | if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) |
3082 | return -ENODEV; |
3083 | |
3084 | if (nvme_fc_ctlr_active_on_rport(ctrl)) |
3085 | return -ENOTUNIQ; |
3086 | |
3087 | dev_info(ctrl->ctrl.device, |
3088 | "NVME-FC{%d}: create association : host wwpn 0x%016llx " |
3089 | " rport wwpn 0x%016llx: NQN \"%s\"\n" , |
3090 | ctrl->cnum, ctrl->lport->localport.port_name, |
3091 | ctrl->rport->remoteport.port_name, ctrl->ctrl.opts->subsysnqn); |
3092 | |
3093 | clear_bit(ASSOC_FAILED, addr: &ctrl->flags); |
3094 | |
3095 | /* |
3096 | * Create the admin queue |
3097 | */ |
3098 | |
3099 | ret = __nvme_fc_create_hw_queue(ctrl, queue: &ctrl->queues[0], qidx: 0, |
3100 | NVME_AQ_DEPTH); |
3101 | if (ret) |
3102 | goto out_free_queue; |
3103 | |
3104 | ret = nvme_fc_connect_admin_queue(ctrl, queue: &ctrl->queues[0], |
3105 | NVME_AQ_DEPTH, ersp_ratio: (NVME_AQ_DEPTH / 4)); |
3106 | if (ret) |
3107 | goto out_delete_hw_queue; |
3108 | |
3109 | ret = nvmf_connect_admin_queue(ctrl: &ctrl->ctrl); |
3110 | if (ret) |
3111 | goto out_disconnect_admin_queue; |
3112 | |
3113 | set_bit(nr: NVME_FC_Q_LIVE, addr: &ctrl->queues[0].flags); |
3114 | |
3115 | /* |
3116 | * Check controller capabilities |
3117 | * |
3118 | * todo:- add code to check if ctrl attributes changed from |
3119 | * prior connection values |
3120 | */ |
3121 | |
3122 | ret = nvme_enable_ctrl(ctrl: &ctrl->ctrl); |
3123 | if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) |
3124 | ret = -EIO; |
3125 | if (ret) |
3126 | goto out_disconnect_admin_queue; |
3127 | |
3128 | ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments; |
3129 | ctrl->ctrl.max_hw_sectors = ctrl->ctrl.max_segments << |
3130 | (ilog2(SZ_4K) - 9); |
3131 | |
3132 | nvme_unquiesce_admin_queue(ctrl: &ctrl->ctrl); |
3133 | |
3134 | ret = nvme_init_ctrl_finish(ctrl: &ctrl->ctrl, was_suspended: false); |
3135 | if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) |
3136 | ret = -EIO; |
3137 | if (ret) |
3138 | goto out_disconnect_admin_queue; |
3139 | |
3140 | /* sanity checks */ |
3141 | |
3142 | /* FC-NVME does not have other data in the capsule */ |
3143 | if (ctrl->ctrl.icdoff) { |
3144 | dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n" , |
3145 | ctrl->ctrl.icdoff); |
3146 | ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR; |
3147 | goto out_disconnect_admin_queue; |
3148 | } |
3149 | |
3150 | /* FC-NVME supports normal SGL Data Block Descriptors */ |
3151 | if (!nvme_ctrl_sgl_supported(ctrl: &ctrl->ctrl)) { |
3152 | dev_err(ctrl->ctrl.device, |
3153 | "Mandatory sgls are not supported!\n" ); |
3154 | ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR; |
3155 | goto out_disconnect_admin_queue; |
3156 | } |
3157 | |
3158 | if (opts->queue_size > ctrl->ctrl.maxcmd) { |
3159 | /* warn if maxcmd is lower than queue_size */ |
3160 | dev_warn(ctrl->ctrl.device, |
3161 | "queue_size %zu > ctrl maxcmd %u, reducing " |
3162 | "to maxcmd\n" , |
3163 | opts->queue_size, ctrl->ctrl.maxcmd); |
3164 | opts->queue_size = ctrl->ctrl.maxcmd; |
3165 | ctrl->ctrl.sqsize = opts->queue_size - 1; |
3166 | } |
3167 | |
3168 | ret = nvme_fc_init_aen_ops(ctrl); |
3169 | if (ret) |
3170 | goto out_term_aen_ops; |
3171 | |
3172 | /* |
3173 | * Create the io queues |
3174 | */ |
3175 | |
3176 | if (ctrl->ctrl.queue_count > 1) { |
3177 | if (!ctrl->ioq_live) |
3178 | ret = nvme_fc_create_io_queues(ctrl); |
3179 | else |
3180 | ret = nvme_fc_recreate_io_queues(ctrl); |
3181 | } |
3182 | |
3183 | spin_lock_irqsave(&ctrl->lock, flags); |
3184 | if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) |
3185 | ret = -EIO; |
3186 | if (ret) { |
3187 | spin_unlock_irqrestore(lock: &ctrl->lock, flags); |
3188 | goto out_term_aen_ops; |
3189 | } |
3190 | changed = nvme_change_ctrl_state(ctrl: &ctrl->ctrl, new_state: NVME_CTRL_LIVE); |
3191 | spin_unlock_irqrestore(lock: &ctrl->lock, flags); |
3192 | |
3193 | ctrl->ctrl.nr_reconnects = 0; |
3194 | |
3195 | if (changed) |
3196 | nvme_start_ctrl(ctrl: &ctrl->ctrl); |
3197 | |
3198 | return 0; /* Success */ |
3199 | |
3200 | out_term_aen_ops: |
3201 | nvme_fc_term_aen_ops(ctrl); |
3202 | out_disconnect_admin_queue: |
3203 | dev_warn(ctrl->ctrl.device, |
3204 | "NVME-FC{%d}: create_assoc failed, assoc_id %llx ret %d\n" , |
3205 | ctrl->cnum, ctrl->association_id, ret); |
3206 | /* send a Disconnect(association) LS to fc-nvme target */ |
3207 | nvme_fc_xmt_disconnect_assoc(ctrl); |
3208 | spin_lock_irqsave(&ctrl->lock, flags); |
3209 | ctrl->association_id = 0; |
3210 | disls = ctrl->rcv_disconn; |
3211 | ctrl->rcv_disconn = NULL; |
3212 | spin_unlock_irqrestore(lock: &ctrl->lock, flags); |
3213 | if (disls) |
3214 | nvme_fc_xmt_ls_rsp(lsop: disls); |
3215 | out_delete_hw_queue: |
3216 | __nvme_fc_delete_hw_queue(ctrl, queue: &ctrl->queues[0], qidx: 0); |
3217 | out_free_queue: |
3218 | nvme_fc_free_queue(queue: &ctrl->queues[0]); |
3219 | clear_bit(ASSOC_ACTIVE, addr: &ctrl->flags); |
3220 | nvme_fc_ctlr_inactive_on_rport(ctrl); |
3221 | |
3222 | return ret; |
3223 | } |
3224 | |
3225 | |
3226 | /* |
3227 | * This routine stops operation of the controller on the host side. |
3228 | * On the host os stack side: Admin and IO queues are stopped, |
3229 | * outstanding ios on them terminated via FC ABTS. |
3230 | * On the link side: the association is terminated. |
3231 | */ |
3232 | static void |
3233 | nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) |
3234 | { |
3235 | struct nvmefc_ls_rcv_op *disls = NULL; |
3236 | unsigned long flags; |
3237 | |
3238 | if (!test_and_clear_bit(ASSOC_ACTIVE, addr: &ctrl->flags)) |
3239 | return; |
3240 | |
3241 | spin_lock_irqsave(&ctrl->lock, flags); |
3242 | set_bit(FCCTRL_TERMIO, addr: &ctrl->flags); |
3243 | ctrl->iocnt = 0; |
3244 | spin_unlock_irqrestore(lock: &ctrl->lock, flags); |
3245 | |
3246 | __nvme_fc_abort_outstanding_ios(ctrl, start_queues: false); |
3247 | |
3248 | /* kill the aens as they are a separate path */ |
3249 | nvme_fc_abort_aen_ops(ctrl); |
3250 | |
3251 | /* wait for all io that had to be aborted */ |
3252 | spin_lock_irq(lock: &ctrl->lock); |
3253 | wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock); |
3254 | clear_bit(FCCTRL_TERMIO, addr: &ctrl->flags); |
3255 | spin_unlock_irq(lock: &ctrl->lock); |
3256 | |
3257 | nvme_fc_term_aen_ops(ctrl); |
3258 | |
3259 | /* |
3260 | * send a Disconnect(association) LS to fc-nvme target |
3261 | * Note: could have been sent at top of process, but |
3262 | * cleaner on link traffic if after the aborts complete. |
3263 | * Note: if association doesn't exist, association_id will be 0 |
3264 | */ |
3265 | if (ctrl->association_id) |
3266 | nvme_fc_xmt_disconnect_assoc(ctrl); |
3267 | |
3268 | spin_lock_irqsave(&ctrl->lock, flags); |
3269 | ctrl->association_id = 0; |
3270 | disls = ctrl->rcv_disconn; |
3271 | ctrl->rcv_disconn = NULL; |
3272 | spin_unlock_irqrestore(lock: &ctrl->lock, flags); |
3273 | if (disls) |
3274 | /* |
3275 | * if a Disconnect Request was waiting for a response, send |
3276 | * now that all ABTS's have been issued (and are complete). |
3277 | */ |
3278 | nvme_fc_xmt_ls_rsp(lsop: disls); |
3279 | |
3280 | if (ctrl->ctrl.tagset) { |
3281 | nvme_fc_delete_hw_io_queues(ctrl); |
3282 | nvme_fc_free_io_queues(ctrl); |
3283 | } |
3284 | |
3285 | __nvme_fc_delete_hw_queue(ctrl, queue: &ctrl->queues[0], qidx: 0); |
3286 | nvme_fc_free_queue(queue: &ctrl->queues[0]); |
3287 | |
3288 | /* re-enable the admin_q so anything new can fast fail */ |
3289 | nvme_unquiesce_admin_queue(ctrl: &ctrl->ctrl); |
3290 | |
3291 | /* resume the io queues so that things will fast fail */ |
3292 | nvme_unquiesce_io_queues(ctrl: &ctrl->ctrl); |
3293 | |
3294 | nvme_fc_ctlr_inactive_on_rport(ctrl); |
3295 | } |
3296 | |
3297 | static void |
3298 | nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) |
3299 | { |
3300 | struct nvme_fc_ctrl *ctrl = to_fc_ctrl(ctrl: nctrl); |
3301 | |
3302 | cancel_work_sync(work: &ctrl->ioerr_work); |
3303 | cancel_delayed_work_sync(dwork: &ctrl->connect_work); |
3304 | /* |
3305 | * kill the association on the link side. this will block |
3306 | * waiting for io to terminate |
3307 | */ |
3308 | nvme_fc_delete_association(ctrl); |
3309 | } |
3310 | |
3311 | static void |
3312 | nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) |
3313 | { |
3314 | struct nvme_fc_rport *rport = ctrl->rport; |
3315 | struct nvme_fc_remote_port *portptr = &rport->remoteport; |
3316 | unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ; |
3317 | bool recon = true; |
3318 | |
3319 | if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) |
3320 | return; |
3321 | |
3322 | if (portptr->port_state == FC_OBJSTATE_ONLINE) { |
3323 | dev_info(ctrl->ctrl.device, |
3324 | "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n" , |
3325 | ctrl->cnum, status); |
3326 | if (status > 0 && (status & NVME_SC_DNR)) |
3327 | recon = false; |
3328 | } else if (time_after_eq(jiffies, rport->dev_loss_end)) |
3329 | recon = false; |
3330 | |
3331 | if (recon && nvmf_should_reconnect(ctrl: &ctrl->ctrl)) { |
3332 | if (portptr->port_state == FC_OBJSTATE_ONLINE) |
3333 | dev_info(ctrl->ctrl.device, |
3334 | "NVME-FC{%d}: Reconnect attempt in %ld " |
3335 | "seconds\n" , |
3336 | ctrl->cnum, recon_delay / HZ); |
3337 | else if (time_after(jiffies + recon_delay, rport->dev_loss_end)) |
3338 | recon_delay = rport->dev_loss_end - jiffies; |
3339 | |
3340 | queue_delayed_work(wq: nvme_wq, dwork: &ctrl->connect_work, delay: recon_delay); |
3341 | } else { |
3342 | if (portptr->port_state == FC_OBJSTATE_ONLINE) { |
3343 | if (status > 0 && (status & NVME_SC_DNR)) |
3344 | dev_warn(ctrl->ctrl.device, |
3345 | "NVME-FC{%d}: reconnect failure\n" , |
3346 | ctrl->cnum); |
3347 | else |
3348 | dev_warn(ctrl->ctrl.device, |
3349 | "NVME-FC{%d}: Max reconnect attempts " |
3350 | "(%d) reached.\n" , |
3351 | ctrl->cnum, ctrl->ctrl.nr_reconnects); |
3352 | } else |
3353 | dev_warn(ctrl->ctrl.device, |
3354 | "NVME-FC{%d}: dev_loss_tmo (%d) expired " |
3355 | "while waiting for remoteport connectivity.\n" , |
3356 | ctrl->cnum, min_t(int, portptr->dev_loss_tmo, |
3357 | (ctrl->ctrl.opts->max_reconnects * |
3358 | ctrl->ctrl.opts->reconnect_delay))); |
3359 | WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); |
3360 | } |
3361 | } |
3362 | |
3363 | static void |
3364 | nvme_fc_reset_ctrl_work(struct work_struct *work) |
3365 | { |
3366 | struct nvme_fc_ctrl *ctrl = |
3367 | container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); |
3368 | |
3369 | nvme_stop_ctrl(ctrl: &ctrl->ctrl); |
3370 | |
3371 | /* will block will waiting for io to terminate */ |
3372 | nvme_fc_delete_association(ctrl); |
3373 | |
3374 | if (!nvme_change_ctrl_state(ctrl: &ctrl->ctrl, new_state: NVME_CTRL_CONNECTING)) |
3375 | dev_err(ctrl->ctrl.device, |
3376 | "NVME-FC{%d}: error_recovery: Couldn't change state " |
3377 | "to CONNECTING\n" , ctrl->cnum); |
3378 | |
3379 | if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) { |
3380 | if (!queue_delayed_work(wq: nvme_wq, dwork: &ctrl->connect_work, delay: 0)) { |
3381 | dev_err(ctrl->ctrl.device, |
3382 | "NVME-FC{%d}: failed to schedule connect " |
3383 | "after reset\n" , ctrl->cnum); |
3384 | } else { |
3385 | flush_delayed_work(dwork: &ctrl->connect_work); |
3386 | } |
3387 | } else { |
3388 | nvme_fc_reconnect_or_delete(ctrl, status: -ENOTCONN); |
3389 | } |
3390 | } |
3391 | |
3392 | |
3393 | static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { |
3394 | .name = "fc" , |
3395 | .module = THIS_MODULE, |
3396 | .flags = NVME_F_FABRICS, |
3397 | .reg_read32 = nvmf_reg_read32, |
3398 | .reg_read64 = nvmf_reg_read64, |
3399 | .reg_write32 = nvmf_reg_write32, |
3400 | .free_ctrl = nvme_fc_nvme_ctrl_freed, |
3401 | .submit_async_event = nvme_fc_submit_async_event, |
3402 | .delete_ctrl = nvme_fc_delete_ctrl, |
3403 | .get_address = nvmf_get_address, |
3404 | }; |
3405 | |
3406 | static void |
3407 | nvme_fc_connect_ctrl_work(struct work_struct *work) |
3408 | { |
3409 | int ret; |
3410 | |
3411 | struct nvme_fc_ctrl *ctrl = |
3412 | container_of(to_delayed_work(work), |
3413 | struct nvme_fc_ctrl, connect_work); |
3414 | |
3415 | ret = nvme_fc_create_association(ctrl); |
3416 | if (ret) |
3417 | nvme_fc_reconnect_or_delete(ctrl, status: ret); |
3418 | else |
3419 | dev_info(ctrl->ctrl.device, |
3420 | "NVME-FC{%d}: controller connect complete\n" , |
3421 | ctrl->cnum); |
3422 | } |
3423 | |
3424 | |
3425 | static const struct blk_mq_ops nvme_fc_admin_mq_ops = { |
3426 | .queue_rq = nvme_fc_queue_rq, |
3427 | .complete = nvme_fc_complete_rq, |
3428 | .init_request = nvme_fc_init_request, |
3429 | .exit_request = nvme_fc_exit_request, |
3430 | .init_hctx = nvme_fc_init_admin_hctx, |
3431 | .timeout = nvme_fc_timeout, |
3432 | }; |
3433 | |
3434 | |
3435 | /* |
3436 | * Fails a controller request if it matches an existing controller |
3437 | * (association) with the same tuple: |
3438 | * <Host NQN, Host ID, local FC port, remote FC port, SUBSYS NQN> |
3439 | * |
3440 | * The ports don't need to be compared as they are intrinsically |
3441 | * already matched by the port pointers supplied. |
3442 | */ |
3443 | static bool |
3444 | nvme_fc_existing_controller(struct nvme_fc_rport *rport, |
3445 | struct nvmf_ctrl_options *opts) |
3446 | { |
3447 | struct nvme_fc_ctrl *ctrl; |
3448 | unsigned long flags; |
3449 | bool found = false; |
3450 | |
3451 | spin_lock_irqsave(&rport->lock, flags); |
3452 | list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { |
3453 | found = nvmf_ctlr_matches_baseopts(ctrl: &ctrl->ctrl, opts); |
3454 | if (found) |
3455 | break; |
3456 | } |
3457 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
3458 | |
3459 | return found; |
3460 | } |
3461 | |
3462 | static struct nvme_ctrl * |
3463 | nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, |
3464 | struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) |
3465 | { |
3466 | struct nvme_fc_ctrl *ctrl; |
3467 | unsigned long flags; |
3468 | int ret, idx, ctrl_loss_tmo; |
3469 | |
3470 | if (!(rport->remoteport.port_role & |
3471 | (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { |
3472 | ret = -EBADR; |
3473 | goto out_fail; |
3474 | } |
3475 | |
3476 | if (!opts->duplicate_connect && |
3477 | nvme_fc_existing_controller(rport, opts)) { |
3478 | ret = -EALREADY; |
3479 | goto out_fail; |
3480 | } |
3481 | |
3482 | ctrl = kzalloc(size: sizeof(*ctrl), GFP_KERNEL); |
3483 | if (!ctrl) { |
3484 | ret = -ENOMEM; |
3485 | goto out_fail; |
3486 | } |
3487 | |
3488 | idx = ida_alloc(ida: &nvme_fc_ctrl_cnt, GFP_KERNEL); |
3489 | if (idx < 0) { |
3490 | ret = -ENOSPC; |
3491 | goto out_free_ctrl; |
3492 | } |
3493 | |
3494 | /* |
3495 | * if ctrl_loss_tmo is being enforced and the default reconnect delay |
3496 | * is being used, change to a shorter reconnect delay for FC. |
3497 | */ |
3498 | if (opts->max_reconnects != -1 && |
3499 | opts->reconnect_delay == NVMF_DEF_RECONNECT_DELAY && |
3500 | opts->reconnect_delay > NVME_FC_DEFAULT_RECONNECT_TMO) { |
3501 | ctrl_loss_tmo = opts->max_reconnects * opts->reconnect_delay; |
3502 | opts->reconnect_delay = NVME_FC_DEFAULT_RECONNECT_TMO; |
3503 | opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo, |
3504 | opts->reconnect_delay); |
3505 | } |
3506 | |
3507 | ctrl->ctrl.opts = opts; |
3508 | ctrl->ctrl.nr_reconnects = 0; |
3509 | if (lport->dev) |
3510 | ctrl->ctrl.numa_node = dev_to_node(dev: lport->dev); |
3511 | else |
3512 | ctrl->ctrl.numa_node = NUMA_NO_NODE; |
3513 | INIT_LIST_HEAD(list: &ctrl->ctrl_list); |
3514 | ctrl->lport = lport; |
3515 | ctrl->rport = rport; |
3516 | ctrl->dev = lport->dev; |
3517 | ctrl->cnum = idx; |
3518 | ctrl->ioq_live = false; |
3519 | init_waitqueue_head(&ctrl->ioabort_wait); |
3520 | |
3521 | get_device(dev: ctrl->dev); |
3522 | kref_init(kref: &ctrl->ref); |
3523 | |
3524 | INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); |
3525 | INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); |
3526 | INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work); |
3527 | spin_lock_init(&ctrl->lock); |
3528 | |
3529 | /* io queue count */ |
3530 | ctrl->ctrl.queue_count = min_t(unsigned int, |
3531 | opts->nr_io_queues, |
3532 | lport->ops->max_hw_queues); |
3533 | ctrl->ctrl.queue_count++; /* +1 for admin queue */ |
3534 | |
3535 | ctrl->ctrl.sqsize = opts->queue_size - 1; |
3536 | ctrl->ctrl.kato = opts->kato; |
3537 | ctrl->ctrl.cntlid = 0xffff; |
3538 | |
3539 | ret = -ENOMEM; |
3540 | ctrl->queues = kcalloc(n: ctrl->ctrl.queue_count, |
3541 | size: sizeof(struct nvme_fc_queue), GFP_KERNEL); |
3542 | if (!ctrl->queues) |
3543 | goto out_free_ida; |
3544 | |
3545 | nvme_fc_init_queue(ctrl, idx: 0); |
3546 | |
3547 | /* |
3548 | * Would have been nice to init io queues tag set as well. |
3549 | * However, we require interaction from the controller |
3550 | * for max io queue count before we can do so. |
3551 | * Defer this to the connect path. |
3552 | */ |
3553 | |
3554 | ret = nvme_init_ctrl(ctrl: &ctrl->ctrl, dev, ops: &nvme_fc_ctrl_ops, quirks: 0); |
3555 | if (ret) |
3556 | goto out_free_queues; |
3557 | |
3558 | /* at this point, teardown path changes to ref counting on nvme ctrl */ |
3559 | |
3560 | ret = nvme_alloc_admin_tag_set(ctrl: &ctrl->ctrl, set: &ctrl->admin_tag_set, |
3561 | ops: &nvme_fc_admin_mq_ops, |
3562 | struct_size_t(struct nvme_fcp_op_w_sgl, priv, |
3563 | ctrl->lport->ops->fcprqst_priv_sz)); |
3564 | if (ret) |
3565 | goto fail_ctrl; |
3566 | |
3567 | spin_lock_irqsave(&rport->lock, flags); |
3568 | list_add_tail(new: &ctrl->ctrl_list, head: &rport->ctrl_list); |
3569 | spin_unlock_irqrestore(lock: &rport->lock, flags); |
3570 | |
3571 | if (!nvme_change_ctrl_state(ctrl: &ctrl->ctrl, new_state: NVME_CTRL_RESETTING) || |
3572 | !nvme_change_ctrl_state(ctrl: &ctrl->ctrl, new_state: NVME_CTRL_CONNECTING)) { |
3573 | dev_err(ctrl->ctrl.device, |
3574 | "NVME-FC{%d}: failed to init ctrl state\n" , ctrl->cnum); |
3575 | goto fail_ctrl; |
3576 | } |
3577 | |
3578 | if (!queue_delayed_work(wq: nvme_wq, dwork: &ctrl->connect_work, delay: 0)) { |
3579 | dev_err(ctrl->ctrl.device, |
3580 | "NVME-FC{%d}: failed to schedule initial connect\n" , |
3581 | ctrl->cnum); |
3582 | goto fail_ctrl; |
3583 | } |
3584 | |
3585 | flush_delayed_work(dwork: &ctrl->connect_work); |
3586 | |
3587 | dev_info(ctrl->ctrl.device, |
3588 | "NVME-FC{%d}: new ctrl: NQN \"%s\"\n" , |
3589 | ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl)); |
3590 | |
3591 | return &ctrl->ctrl; |
3592 | |
3593 | fail_ctrl: |
3594 | nvme_change_ctrl_state(ctrl: &ctrl->ctrl, new_state: NVME_CTRL_DELETING); |
3595 | cancel_work_sync(work: &ctrl->ioerr_work); |
3596 | cancel_work_sync(work: &ctrl->ctrl.reset_work); |
3597 | cancel_delayed_work_sync(dwork: &ctrl->connect_work); |
3598 | |
3599 | ctrl->ctrl.opts = NULL; |
3600 | |
3601 | /* initiate nvme ctrl ref counting teardown */ |
3602 | nvme_uninit_ctrl(ctrl: &ctrl->ctrl); |
3603 | |
3604 | /* Remove core ctrl ref. */ |
3605 | nvme_put_ctrl(ctrl: &ctrl->ctrl); |
3606 | |
3607 | /* as we're past the point where we transition to the ref |
3608 | * counting teardown path, if we return a bad pointer here, |
3609 | * the calling routine, thinking it's prior to the |
3610 | * transition, will do an rport put. Since the teardown |
3611 | * path also does a rport put, we do an extra get here to |
3612 | * so proper order/teardown happens. |
3613 | */ |
3614 | nvme_fc_rport_get(rport); |
3615 | |
3616 | return ERR_PTR(error: -EIO); |
3617 | |
3618 | out_free_queues: |
3619 | kfree(objp: ctrl->queues); |
3620 | out_free_ida: |
3621 | put_device(dev: ctrl->dev); |
3622 | ida_free(&nvme_fc_ctrl_cnt, id: ctrl->cnum); |
3623 | out_free_ctrl: |
3624 | kfree(objp: ctrl); |
3625 | out_fail: |
3626 | /* exit via here doesn't follow ctlr ref points */ |
3627 | return ERR_PTR(error: ret); |
3628 | } |
3629 | |
3630 | |
3631 | struct nvmet_fc_traddr { |
3632 | u64 nn; |
3633 | u64 pn; |
3634 | }; |
3635 | |
3636 | static int |
3637 | __nvme_fc_parse_u64(substring_t *sstr, u64 *val) |
3638 | { |
3639 | u64 token64; |
3640 | |
3641 | if (match_u64(sstr, result: &token64)) |
3642 | return -EINVAL; |
3643 | *val = token64; |
3644 | |
3645 | return 0; |
3646 | } |
3647 | |
3648 | /* |
3649 | * This routine validates and extracts the WWN's from the TRADDR string. |
3650 | * As kernel parsers need the 0x to determine number base, universally |
3651 | * build string to parse with 0x prefix before parsing name strings. |
3652 | */ |
3653 | static int |
3654 | nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen) |
3655 | { |
3656 | char name[2 + NVME_FC_TRADDR_HEXNAMELEN + 1]; |
3657 | substring_t wwn = { name, &name[sizeof(name)-1] }; |
3658 | int nnoffset, pnoffset; |
3659 | |
3660 | /* validate if string is one of the 2 allowed formats */ |
3661 | if (strnlen(p: buf, maxlen: blen) == NVME_FC_TRADDR_MAXLENGTH && |
3662 | !strncmp(buf, "nn-0x" , NVME_FC_TRADDR_OXNNLEN) && |
3663 | !strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET], |
3664 | "pn-0x" , NVME_FC_TRADDR_OXNNLEN)) { |
3665 | nnoffset = NVME_FC_TRADDR_OXNNLEN; |
3666 | pnoffset = NVME_FC_TRADDR_MAX_PN_OFFSET + |
3667 | NVME_FC_TRADDR_OXNNLEN; |
3668 | } else if ((strnlen(p: buf, maxlen: blen) == NVME_FC_TRADDR_MINLENGTH && |
3669 | !strncmp(buf, "nn-" , NVME_FC_TRADDR_NNLEN) && |
3670 | !strncmp(&buf[NVME_FC_TRADDR_MIN_PN_OFFSET], |
3671 | "pn-" , NVME_FC_TRADDR_NNLEN))) { |
3672 | nnoffset = NVME_FC_TRADDR_NNLEN; |
3673 | pnoffset = NVME_FC_TRADDR_MIN_PN_OFFSET + NVME_FC_TRADDR_NNLEN; |
3674 | } else |
3675 | goto out_einval; |
3676 | |
3677 | name[0] = '0'; |
3678 | name[1] = 'x'; |
3679 | name[2 + NVME_FC_TRADDR_HEXNAMELEN] = 0; |
3680 | |
3681 | memcpy(&name[2], &buf[nnoffset], NVME_FC_TRADDR_HEXNAMELEN); |
3682 | if (__nvme_fc_parse_u64(sstr: &wwn, val: &traddr->nn)) |
3683 | goto out_einval; |
3684 | |
3685 | memcpy(&name[2], &buf[pnoffset], NVME_FC_TRADDR_HEXNAMELEN); |
3686 | if (__nvme_fc_parse_u64(sstr: &wwn, val: &traddr->pn)) |
3687 | goto out_einval; |
3688 | |
3689 | return 0; |
3690 | |
3691 | out_einval: |
3692 | pr_warn("%s: bad traddr string\n" , __func__); |
3693 | return -EINVAL; |
3694 | } |
3695 | |
3696 | static struct nvme_ctrl * |
3697 | nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) |
3698 | { |
3699 | struct nvme_fc_lport *lport; |
3700 | struct nvme_fc_rport *rport; |
3701 | struct nvme_ctrl *ctrl; |
3702 | struct nvmet_fc_traddr laddr = { 0L, 0L }; |
3703 | struct nvmet_fc_traddr raddr = { 0L, 0L }; |
3704 | unsigned long flags; |
3705 | int ret; |
3706 | |
3707 | ret = nvme_fc_parse_traddr(traddr: &raddr, buf: opts->traddr, NVMF_TRADDR_SIZE); |
3708 | if (ret || !raddr.nn || !raddr.pn) |
3709 | return ERR_PTR(error: -EINVAL); |
3710 | |
3711 | ret = nvme_fc_parse_traddr(traddr: &laddr, buf: opts->host_traddr, NVMF_TRADDR_SIZE); |
3712 | if (ret || !laddr.nn || !laddr.pn) |
3713 | return ERR_PTR(error: -EINVAL); |
3714 | |
3715 | /* find the host and remote ports to connect together */ |
3716 | spin_lock_irqsave(&nvme_fc_lock, flags); |
3717 | list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { |
3718 | if (lport->localport.node_name != laddr.nn || |
3719 | lport->localport.port_name != laddr.pn || |
3720 | lport->localport.port_state != FC_OBJSTATE_ONLINE) |
3721 | continue; |
3722 | |
3723 | list_for_each_entry(rport, &lport->endp_list, endp_list) { |
3724 | if (rport->remoteport.node_name != raddr.nn || |
3725 | rport->remoteport.port_name != raddr.pn || |
3726 | rport->remoteport.port_state != FC_OBJSTATE_ONLINE) |
3727 | continue; |
3728 | |
3729 | /* if fail to get reference fall through. Will error */ |
3730 | if (!nvme_fc_rport_get(rport)) |
3731 | break; |
3732 | |
3733 | spin_unlock_irqrestore(lock: &nvme_fc_lock, flags); |
3734 | |
3735 | ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport); |
3736 | if (IS_ERR(ptr: ctrl)) |
3737 | nvme_fc_rport_put(rport); |
3738 | return ctrl; |
3739 | } |
3740 | } |
3741 | spin_unlock_irqrestore(lock: &nvme_fc_lock, flags); |
3742 | |
3743 | pr_warn("%s: %s - %s combination not found\n" , |
3744 | __func__, opts->traddr, opts->host_traddr); |
3745 | return ERR_PTR(error: -ENOENT); |
3746 | } |
3747 | |
3748 | |
3749 | static struct nvmf_transport_ops nvme_fc_transport = { |
3750 | .name = "fc" , |
3751 | .module = THIS_MODULE, |
3752 | .required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR, |
3753 | .allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO, |
3754 | .create_ctrl = nvme_fc_create_ctrl, |
3755 | }; |
3756 | |
3757 | /* Arbitrary successive failures max. With lots of subsystems could be high */ |
3758 | #define DISCOVERY_MAX_FAIL 20 |
3759 | |
3760 | static ssize_t nvme_fc_nvme_discovery_store(struct device *dev, |
3761 | struct device_attribute *attr, const char *buf, size_t count) |
3762 | { |
3763 | unsigned long flags; |
3764 | LIST_HEAD(local_disc_list); |
3765 | struct nvme_fc_lport *lport; |
3766 | struct nvme_fc_rport *rport; |
3767 | int failcnt = 0; |
3768 | |
3769 | spin_lock_irqsave(&nvme_fc_lock, flags); |
3770 | restart: |
3771 | list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { |
3772 | list_for_each_entry(rport, &lport->endp_list, endp_list) { |
3773 | if (!nvme_fc_lport_get(lport)) |
3774 | continue; |
3775 | if (!nvme_fc_rport_get(rport)) { |
3776 | /* |
3777 | * This is a temporary condition. Upon restart |
3778 | * this rport will be gone from the list. |
3779 | * |
3780 | * Revert the lport put and retry. Anything |
3781 | * added to the list already will be skipped (as |
3782 | * they are no longer list_empty). Loops should |
3783 | * resume at rports that were not yet seen. |
3784 | */ |
3785 | nvme_fc_lport_put(lport); |
3786 | |
3787 | if (failcnt++ < DISCOVERY_MAX_FAIL) |
3788 | goto restart; |
3789 | |
3790 | pr_err("nvme_discovery: too many reference " |
3791 | "failures\n" ); |
3792 | goto process_local_list; |
3793 | } |
3794 | if (list_empty(head: &rport->disc_list)) |
3795 | list_add_tail(new: &rport->disc_list, |
3796 | head: &local_disc_list); |
3797 | } |
3798 | } |
3799 | |
3800 | process_local_list: |
3801 | while (!list_empty(head: &local_disc_list)) { |
3802 | rport = list_first_entry(&local_disc_list, |
3803 | struct nvme_fc_rport, disc_list); |
3804 | list_del_init(entry: &rport->disc_list); |
3805 | spin_unlock_irqrestore(lock: &nvme_fc_lock, flags); |
3806 | |
3807 | lport = rport->lport; |
3808 | /* signal discovery. Won't hurt if it repeats */ |
3809 | nvme_fc_signal_discovery_scan(lport, rport); |
3810 | nvme_fc_rport_put(rport); |
3811 | nvme_fc_lport_put(lport); |
3812 | |
3813 | spin_lock_irqsave(&nvme_fc_lock, flags); |
3814 | } |
3815 | spin_unlock_irqrestore(lock: &nvme_fc_lock, flags); |
3816 | |
3817 | return count; |
3818 | } |
3819 | |
3820 | static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store); |
3821 | |
3822 | #ifdef CONFIG_BLK_CGROUP_FC_APPID |
3823 | /* Parse the cgroup id from a buf and return the length of cgrpid */ |
3824 | static int fc_parse_cgrpid(const char *buf, u64 *id) |
3825 | { |
3826 | char cgrp_id[16+1]; |
3827 | int cgrpid_len, j; |
3828 | |
3829 | memset(cgrp_id, 0x0, sizeof(cgrp_id)); |
3830 | for (cgrpid_len = 0, j = 0; cgrpid_len < 17; cgrpid_len++) { |
3831 | if (buf[cgrpid_len] != ':') |
3832 | cgrp_id[cgrpid_len] = buf[cgrpid_len]; |
3833 | else { |
3834 | j = 1; |
3835 | break; |
3836 | } |
3837 | } |
3838 | if (!j) |
3839 | return -EINVAL; |
3840 | if (kstrtou64(s: cgrp_id, base: 16, res: id) < 0) |
3841 | return -EINVAL; |
3842 | return cgrpid_len; |
3843 | } |
3844 | |
3845 | /* |
3846 | * Parse and update the appid in the blkcg associated with the cgroupid. |
3847 | */ |
3848 | static ssize_t fc_appid_store(struct device *dev, |
3849 | struct device_attribute *attr, const char *buf, size_t count) |
3850 | { |
3851 | size_t orig_count = count; |
3852 | u64 cgrp_id; |
3853 | int appid_len = 0; |
3854 | int cgrpid_len = 0; |
3855 | char app_id[FC_APPID_LEN]; |
3856 | int ret = 0; |
3857 | |
3858 | if (buf[count-1] == '\n') |
3859 | count--; |
3860 | |
3861 | if ((count > (16+1+FC_APPID_LEN)) || (!strchr(buf, ':'))) |
3862 | return -EINVAL; |
3863 | |
3864 | cgrpid_len = fc_parse_cgrpid(buf, id: &cgrp_id); |
3865 | if (cgrpid_len < 0) |
3866 | return -EINVAL; |
3867 | appid_len = count - cgrpid_len - 1; |
3868 | if (appid_len > FC_APPID_LEN) |
3869 | return -EINVAL; |
3870 | |
3871 | memset(app_id, 0x0, sizeof(app_id)); |
3872 | memcpy(app_id, &buf[cgrpid_len+1], appid_len); |
3873 | ret = blkcg_set_fc_appid(app_id, cgrp_id, app_id_len: sizeof(app_id)); |
3874 | if (ret < 0) |
3875 | return ret; |
3876 | return orig_count; |
3877 | } |
3878 | static DEVICE_ATTR(appid_store, 0200, NULL, fc_appid_store); |
3879 | #endif /* CONFIG_BLK_CGROUP_FC_APPID */ |
3880 | |
3881 | static struct attribute *nvme_fc_attrs[] = { |
3882 | &dev_attr_nvme_discovery.attr, |
3883 | #ifdef CONFIG_BLK_CGROUP_FC_APPID |
3884 | &dev_attr_appid_store.attr, |
3885 | #endif |
3886 | NULL |
3887 | }; |
3888 | |
3889 | static const struct attribute_group nvme_fc_attr_group = { |
3890 | .attrs = nvme_fc_attrs, |
3891 | }; |
3892 | |
3893 | static const struct attribute_group *nvme_fc_attr_groups[] = { |
3894 | &nvme_fc_attr_group, |
3895 | NULL |
3896 | }; |
3897 | |
3898 | static struct class fc_class = { |
3899 | .name = "fc" , |
3900 | .dev_groups = nvme_fc_attr_groups, |
3901 | }; |
3902 | |
3903 | static int __init nvme_fc_init_module(void) |
3904 | { |
3905 | int ret; |
3906 | |
3907 | nvme_fc_wq = alloc_workqueue(fmt: "nvme_fc_wq" , flags: WQ_MEM_RECLAIM, max_active: 0); |
3908 | if (!nvme_fc_wq) |
3909 | return -ENOMEM; |
3910 | |
3911 | /* |
3912 | * NOTE: |
3913 | * It is expected that in the future the kernel will combine |
3914 | * the FC-isms that are currently under scsi and now being |
3915 | * added to by NVME into a new standalone FC class. The SCSI |
3916 | * and NVME protocols and their devices would be under this |
3917 | * new FC class. |
3918 | * |
3919 | * As we need something to post FC-specific udev events to, |
3920 | * specifically for nvme probe events, start by creating the |
3921 | * new device class. When the new standalone FC class is |
3922 | * put in place, this code will move to a more generic |
3923 | * location for the class. |
3924 | */ |
3925 | ret = class_register(class: &fc_class); |
3926 | if (ret) { |
3927 | pr_err("couldn't register class fc\n" ); |
3928 | goto out_destroy_wq; |
3929 | } |
3930 | |
3931 | /* |
3932 | * Create a device for the FC-centric udev events |
3933 | */ |
3934 | fc_udev_device = device_create(cls: &fc_class, NULL, MKDEV(0, 0), NULL, |
3935 | fmt: "fc_udev_device" ); |
3936 | if (IS_ERR(ptr: fc_udev_device)) { |
3937 | pr_err("couldn't create fc_udev device!\n" ); |
3938 | ret = PTR_ERR(ptr: fc_udev_device); |
3939 | goto out_destroy_class; |
3940 | } |
3941 | |
3942 | ret = nvmf_register_transport(ops: &nvme_fc_transport); |
3943 | if (ret) |
3944 | goto out_destroy_device; |
3945 | |
3946 | return 0; |
3947 | |
3948 | out_destroy_device: |
3949 | device_destroy(cls: &fc_class, MKDEV(0, 0)); |
3950 | out_destroy_class: |
3951 | class_unregister(class: &fc_class); |
3952 | out_destroy_wq: |
3953 | destroy_workqueue(wq: nvme_fc_wq); |
3954 | |
3955 | return ret; |
3956 | } |
3957 | |
3958 | static void |
3959 | nvme_fc_delete_controllers(struct nvme_fc_rport *rport) |
3960 | { |
3961 | struct nvme_fc_ctrl *ctrl; |
3962 | |
3963 | spin_lock(lock: &rport->lock); |
3964 | list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { |
3965 | dev_warn(ctrl->ctrl.device, |
3966 | "NVME-FC{%d}: transport unloading: deleting ctrl\n" , |
3967 | ctrl->cnum); |
3968 | nvme_delete_ctrl(ctrl: &ctrl->ctrl); |
3969 | } |
3970 | spin_unlock(lock: &rport->lock); |
3971 | } |
3972 | |
3973 | static void |
3974 | nvme_fc_cleanup_for_unload(void) |
3975 | { |
3976 | struct nvme_fc_lport *lport; |
3977 | struct nvme_fc_rport *rport; |
3978 | |
3979 | list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { |
3980 | list_for_each_entry(rport, &lport->endp_list, endp_list) { |
3981 | nvme_fc_delete_controllers(rport); |
3982 | } |
3983 | } |
3984 | } |
3985 | |
3986 | static void __exit nvme_fc_exit_module(void) |
3987 | { |
3988 | unsigned long flags; |
3989 | bool need_cleanup = false; |
3990 | |
3991 | spin_lock_irqsave(&nvme_fc_lock, flags); |
3992 | nvme_fc_waiting_to_unload = true; |
3993 | if (!list_empty(head: &nvme_fc_lport_list)) { |
3994 | need_cleanup = true; |
3995 | nvme_fc_cleanup_for_unload(); |
3996 | } |
3997 | spin_unlock_irqrestore(lock: &nvme_fc_lock, flags); |
3998 | if (need_cleanup) { |
3999 | pr_info("%s: waiting for ctlr deletes\n" , __func__); |
4000 | wait_for_completion(&nvme_fc_unload_proceed); |
4001 | pr_info("%s: ctrl deletes complete\n" , __func__); |
4002 | } |
4003 | |
4004 | nvmf_unregister_transport(ops: &nvme_fc_transport); |
4005 | |
4006 | ida_destroy(ida: &nvme_fc_local_port_cnt); |
4007 | ida_destroy(ida: &nvme_fc_ctrl_cnt); |
4008 | |
4009 | device_destroy(cls: &fc_class, MKDEV(0, 0)); |
4010 | class_unregister(class: &fc_class); |
4011 | destroy_workqueue(wq: nvme_fc_wq); |
4012 | } |
4013 | |
4014 | module_init(nvme_fc_init_module); |
4015 | module_exit(nvme_fc_exit_module); |
4016 | |
4017 | MODULE_LICENSE("GPL v2" ); |
4018 | |