fc.c source code [linux/drivers/nvme/host/fc.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Copyright (c) 2016 Avago Technologies. All rights reserved.
4	*/
5	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
6	#include <linux/module.h>
7	#include <linux/parser.h>
8	#include <uapi/scsi/fc/fc_fs.h>
9	#include <uapi/scsi/fc/fc_els.h>
10	#include <linux/delay.h>
11	#include <linux/overflow.h>
12	#include <linux/blk-cgroup.h>
13	#include "nvme.h"
14	#include "fabrics.h"
15	#include <linux/nvme-fc-driver.h>
16	#include <linux/nvme-fc.h>
17	#include "fc.h"
18	#include <scsi/scsi_transport_fc.h>
19	#include <linux/blk-mq-pci.h>
20
21	/ ************************* Data Structures/Defines **************** /
22
23
24	enum nvme_fc_queue_flags {
25	NVME_FC_Q_CONNECTED = `0`,
26	NVME_FC_Q_LIVE,
27	};
28
29	#define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */
30	#define NVME_FC_DEFAULT_RECONNECT_TMO 2 /* delay between reconnects
31	* when connected and a
32	* connection failure.
33	*/
34
35	struct nvme_fc_queue {
36	struct nvme_fc_ctrl *ctrl;
37	struct device *dev;
38	struct blk_mq_hw_ctx *hctx;
39	void *lldd_handle;
40	size_t cmnd_capsule_len;
41	u32 qnum;
42	u32 rqcnt;
43	u32 seqno;
44
45	u64 connection_id;
46	atomic_t csn;
47
48	unsigned long flags;
49	} __aligned(sizeof(u64)); / alignment for other things alloc'd with /
50
51	enum nvme_fcop_flags {
52	FCOP_FLAGS_TERMIO = (`1` << `0`),
53	FCOP_FLAGS_AEN = (`1` << `1`),
54	};
55
56	struct nvmefc_ls_req_op {
57	struct nvmefc_ls_req ls_req;
58
59	struct nvme_fc_rport *rport;
60	struct nvme_fc_queue *queue;
61	struct request *rq;
62	u32 flags;
63
64	int ls_error;
65	struct completion ls_done;
66	struct list_head lsreq_list; / rport->ls_req_list /
67	bool req_queued;
68	};
69
70	struct nvmefc_ls_rcv_op {
71	struct nvme_fc_rport *rport;
72	struct nvmefc_ls_rsp *lsrsp;
73	union nvmefc_ls_requests *rqstbuf;
74	union nvmefc_ls_responses *rspbuf;
75	u16 rqstdatalen;
76	bool handled;
77	dma_addr_t rspdma;
78	struct list_head lsrcv_list; / rport->ls_rcv_list /
79	} __aligned(sizeof(u64)); / alignment for other things alloc'd with /
80
81	enum nvme_fcpop_state {
82	FCPOP_STATE_UNINIT = `0`,
83	FCPOP_STATE_IDLE = `1`,
84	FCPOP_STATE_ACTIVE = `2`,
85	FCPOP_STATE_ABORTED = `3`,
86	FCPOP_STATE_COMPLETE = `4`,
87	};
88
89	struct nvme_fc_fcp_op {
90	struct nvme_request nreq; /*
91	* nvme/host/core.c
92	* requires this to be
93	* the 1st element in the
94	* private structure
95	* associated with the
96	* request.
97	*/
98	struct nvmefc_fcp_req fcp_req;
99
100	struct nvme_fc_ctrl *ctrl;
101	struct nvme_fc_queue *queue;
102	struct request *rq;
103
104	atomic_t state;
105	u32 flags;
106	u32 rqno;
107	u32 nents;
108
109	struct nvme_fc_cmd_iu cmd_iu;
110	struct nvme_fc_ersp_iu rsp_iu;
111	};
112
113	struct nvme_fcp_op_w_sgl {
114	struct nvme_fc_fcp_op op;
115	struct scatterlist sgl[NVME_INLINE_SG_CNT];
116	uint8_t priv[];
117	};
118
119	struct nvme_fc_lport {
120	struct nvme_fc_local_port localport;
121
122	struct ida endp_cnt;
123	struct list_head port_list; / nvme_fc_port_list /
124	struct list_head endp_list;
125	struct device dev; /* physical device for dma /
126	struct nvme_fc_port_template *ops;
127	struct kref ref;
128	atomic_t act_rport_cnt;
129	} __aligned(sizeof(u64)); / alignment for other things alloc'd with /
130
131	struct nvme_fc_rport {
132	struct nvme_fc_remote_port remoteport;
133
134	struct list_head endp_list; / for lport->endp_list /
135	struct list_head ctrl_list;
136	struct list_head ls_req_list;
137	struct list_head ls_rcv_list;
138	struct list_head disc_list;
139	struct device dev; /* physical device for dma /
140	struct nvme_fc_lport *lport;
141	spinlock_t lock;
142	struct kref ref;
143	atomic_t act_ctrl_cnt;
144	unsigned long dev_loss_end;
145	struct work_struct lsrcv_work;
146	} __aligned(sizeof(u64)); / alignment for other things alloc'd with /
147
148	/ fc_ctrl flags values - specified as bit positions /
149	#define ASSOC_ACTIVE 0
150	#define ASSOC_FAILED 1
151	#define FCCTRL_TERMIO 2
152
153	struct nvme_fc_ctrl {
154	spinlock_t lock;
155	struct nvme_fc_queue *queues;
156	struct device *dev;
157	struct nvme_fc_lport *lport;
158	struct nvme_fc_rport *rport;
159	u32 cnum;
160
161	bool ioq_live;
162	u64 association_id;
163	struct nvmefc_ls_rcv_op *rcv_disconn;
164
165	struct list_head ctrl_list; / rport->ctrl_list /
166
167	struct blk_mq_tag_set admin_tag_set;
168	struct blk_mq_tag_set tag_set;
169
170	struct work_struct ioerr_work;
171	struct delayed_work connect_work;
172
173	struct kref ref;
174	unsigned long flags;
175	u32 iocnt;
176	wait_queue_head_t ioabort_wait;
177
178	struct nvme_fc_fcp_op aen_ops[NVME_NR_AEN_COMMANDS];
179
180	struct nvme_ctrl ctrl;
181	};
182
183	static inline struct nvme_fc_ctrl *
184	to_fc_ctrl(struct nvme_ctrl *ctrl)
185	{
186	return container_of(ctrl, struct nvme_fc_ctrl, ctrl);
187	}
188
189	static inline struct nvme_fc_lport *
190	localport_to_lport(struct nvme_fc_local_port *portptr)
191	{
192	return container_of(portptr, struct nvme_fc_lport, localport);
193	}
194
195	static inline struct nvme_fc_rport *
196	remoteport_to_rport(struct nvme_fc_remote_port *portptr)
197	{
198	return container_of(portptr, struct nvme_fc_rport, remoteport);
199	}
200
201	static inline struct nvmefc_ls_req_op *
202	ls_req_to_lsop(struct nvmefc_ls_req *lsreq)
203	{
204	return container_of(lsreq, struct nvmefc_ls_req_op, ls_req);
205	}
206
207	static inline struct nvme_fc_fcp_op *
208	fcp_req_to_fcp_op(struct nvmefc_fcp_req *fcpreq)
209	{
210	return container_of(fcpreq, struct nvme_fc_fcp_op, fcp_req);
211	}
212
213
214
215	/ ************************* Globals ************************** /
216
217
218	static DEFINE_SPINLOCK(nvme_fc_lock);
219
220	static LIST_HEAD(nvme_fc_lport_list);
221	static DEFINE_IDA(nvme_fc_local_port_cnt);
222	static DEFINE_IDA(nvme_fc_ctrl_cnt);
223
224	static struct workqueue_struct *nvme_fc_wq;
225
226	static bool nvme_fc_waiting_to_unload;
227	static DECLARE_COMPLETION(nvme_fc_unload_proceed);
228
229	/*
230	* These items are short-term. They will eventually be moved into
231	* a generic FC class. See comments in module init.
232	*/
233	static struct device *fc_udev_device;
234
235	static void nvme_fc_complete_rq(struct request *rq);
236
237	/ ********************* FC-NVME Port Management ********************** /
238
239	static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *,
240	struct nvme_fc_queue , unsigned* int);
241
242	static void nvme_fc_handle_ls_rqst_work(struct work_struct *work);
243
244
245	static void
246	nvme_fc_free_lport(struct kref *ref)
247	{
248	struct nvme_fc_lport *lport =
249	container_of(ref, struct nvme_fc_lport, ref);
250	unsigned long flags;
251
252	WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED);
253	WARN_ON(!list_empty(&lport->endp_list));
254
255	/ remove from transport list /
256	spin_lock_irqsave(&nvme_fc_lock, flags);
257	list_del(entry: &lport->port_list);
258	if (nvme_fc_waiting_to_unload && list_empty(head: &nvme_fc_lport_list))
259	complete(&nvme_fc_unload_proceed);
260	spin_unlock_irqrestore(lock: &nvme_fc_lock, flags);
261
262	ida_free(&nvme_fc_local_port_cnt, id: lport->localport.port_num);
263	ida_destroy(ida: &lport->endp_cnt);
264
265	put_device(dev: lport->dev);
266
267	kfree(objp: lport);
268	}
269
270	static void
271	nvme_fc_lport_put(struct nvme_fc_lport *lport)
272	{
273	kref_put(kref: &lport->ref, release: nvme_fc_free_lport);
274	}
275
276	static int
277	nvme_fc_lport_get(struct nvme_fc_lport *lport)
278	{
279	return kref_get_unless_zero(kref: &lport->ref);
280	}
281
282
283	static struct nvme_fc_lport *
284	nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo,
285	struct nvme_fc_port_template *ops,
286	struct device *dev)
287	{
288	struct nvme_fc_lport *lport;
289	unsigned long flags;
290
291	spin_lock_irqsave(&nvme_fc_lock, flags);
292
293	list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
294	if (lport->localport.node_name != pinfo->node_name \|\|
295	lport->localport.port_name != pinfo->port_name)
296	continue;
297
298	if (lport->dev != dev) {
299	lport = ERR_PTR(error: -EXDEV);
300	goto out_done;
301	}
302
303	if (lport->localport.port_state != FC_OBJSTATE_DELETED) {
304	lport = ERR_PTR(error: -EEXIST);
305	goto out_done;
306	}
307
308	if (!nvme_fc_lport_get(lport)) {
309	/*
310	* fails if ref cnt already 0. If so,
311	* act as if lport already deleted
312	*/
313	lport = NULL;
314	goto out_done;
315	}
316
317	/ resume the lport /
318
319	lport->ops = ops;
320	lport->localport.port_role = pinfo->port_role;
321	lport->localport.port_id = pinfo->port_id;
322	lport->localport.port_state = FC_OBJSTATE_ONLINE;
323
324	spin_unlock_irqrestore(lock: &nvme_fc_lock, flags);
325
326	return lport;
327	}
328
329	lport = NULL;
330
331	out_done:
332	spin_unlock_irqrestore(lock: &nvme_fc_lock, flags);
333
334	return lport;
335	}
336
337	/**
338	* nvme_fc_register_localport - transport entry point called by an
339	* LLDD to register the existence of a NVME
340	* host FC port.
341	* @pinfo: pointer to information about the port to be registered
342	* @template: LLDD entrypoints and operational parameters for the port
343	* @dev: physical hardware device node port corresponds to. Will be
344	* used for DMA mappings
345	* @portptr: pointer to a local port pointer. Upon success, the routine
346	* will allocate a nvme_fc_local_port structure and place its
347	* address in the local port pointer. Upon failure, local port
348	* pointer will be set to 0.
349	*
350	* Returns:
351	* a completion status. Must be 0 upon success; a negative errno
352	* (ex: -ENXIO) upon failure.
353	*/
354	int
355	nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
356	struct nvme_fc_port_template *template,
357	struct device *dev,
358	struct nvme_fc_local_port **portptr)
359	{
360	struct nvme_fc_lport *newrec;
361	unsigned long flags;
362	int ret, idx;
363
364	if (!template->localport_delete \|\| !template->remoteport_delete \|\|
365	!template->ls_req \|\| !template->fcp_io \|\|
366	!template->ls_abort \|\| !template->fcp_abort \|\|
367	!template->max_hw_queues \|\| !template->max_sgl_segments \|\|
368	!template->max_dif_sgl_segments \|\| !template->dma_boundary) {
369	ret = -EINVAL;
370	goto out_reghost_failed;
371	}
372
373	/*
374	* look to see if there is already a localport that had been
375	* deregistered and in the process of waiting for all the
376	* references to fully be removed. If the references haven't
377	* expired, we can simply re-enable the localport. Remoteports
378	* and controller reconnections should resume naturally.
379	*/
380	newrec = nvme_fc_attach_to_unreg_lport(pinfo, ops: template, dev);
381
382	/ found an lport, but something about its state is bad /
383	if (IS_ERR(ptr: newrec)) {
384	ret = PTR_ERR(ptr: newrec);
385	goto out_reghost_failed;
386
387	/ found existing lport, which was resumed /
388	} else if (newrec) {
389	*portptr = &newrec->localport;
390	return `0`;
391	}
392
393	/ nothing found - allocate a new localport struct /
394
395	newrec = kmalloc(size: (sizeof(*newrec) + template->local_priv_sz),
396	GFP_KERNEL);
397	if (!newrec) {
398	ret = -ENOMEM;
399	goto out_reghost_failed;
400	}
401
402	idx = ida_alloc(ida: &nvme_fc_local_port_cnt, GFP_KERNEL);
403	if (idx < `0`) {
404	ret = -ENOSPC;
405	goto out_fail_kfree;
406	}
407
408	if (!get_device(dev) && dev) {
409	ret = -ENODEV;
410	goto out_ida_put;
411	}
412
413	INIT_LIST_HEAD(list: &newrec->port_list);
414	INIT_LIST_HEAD(list: &newrec->endp_list);
415	kref_init(kref: &newrec->ref);
416	atomic_set(v: &newrec->act_rport_cnt, i: `0`);
417	newrec->ops = template;
418	newrec->dev = dev;
419	ida_init(ida: &newrec->endp_cnt);
420	if (template->local_priv_sz)
421	newrec->localport.private = &newrec[`1`];
422	else
423	newrec->localport.private = NULL;
424	newrec->localport.node_name = pinfo->node_name;
425	newrec->localport.port_name = pinfo->port_name;
426	newrec->localport.port_role = pinfo->port_role;
427	newrec->localport.port_id = pinfo->port_id;
428	newrec->localport.port_state = FC_OBJSTATE_ONLINE;
429	newrec->localport.port_num = idx;
430
431	spin_lock_irqsave(&nvme_fc_lock, flags);
432	list_add_tail(new: &newrec->port_list, head: &nvme_fc_lport_list);
433	spin_unlock_irqrestore(lock: &nvme_fc_lock, flags);
434
435	if (dev)
436	dma_set_seg_boundary(dev, mask: template->dma_boundary);
437
438	*portptr = &newrec->localport;
439	return `0`;
440
441	out_ida_put:
442	ida_free(&nvme_fc_local_port_cnt, id: idx);
443	out_fail_kfree:
444	kfree(objp: newrec);
445	out_reghost_failed:
446	*portptr = NULL;
447
448	return ret;
449	}
450	EXPORT_SYMBOL_GPL(nvme_fc_register_localport);
451
452	/**
453	* nvme_fc_unregister_localport - transport entry point called by an
454	* LLDD to deregister/remove a previously
455	* registered a NVME host FC port.
456	* @portptr: pointer to the (registered) local port that is to be deregistered.
457	*
458	* Returns:
459	* a completion status. Must be 0 upon success; a negative errno
460	* (ex: -ENXIO) upon failure.
461	*/
462	int
463	nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr)
464	{
465	struct nvme_fc_lport *lport = localport_to_lport(portptr);
466	unsigned long flags;
467
468	if (!portptr)
469	return -EINVAL;
470
471	spin_lock_irqsave(&nvme_fc_lock, flags);
472
473	if (portptr->port_state != FC_OBJSTATE_ONLINE) {
474	spin_unlock_irqrestore(lock: &nvme_fc_lock, flags);
475	return -EINVAL;
476	}
477	portptr->port_state = FC_OBJSTATE_DELETED;
478
479	spin_unlock_irqrestore(lock: &nvme_fc_lock, flags);
480
481	if (atomic_read(v: &lport->act_rport_cnt) == `0`)
482	lport->ops->localport_delete(&lport->localport);
483
484	nvme_fc_lport_put(lport);
485
486	return `0`;
487	}
488	EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport);
489
490	/*
491	* TRADDR strings, per FC-NVME are fixed format:
492	* "nn-0x<16hexdigits>:pn-0x<16hexdigits>" - 43 characters
493	* udev event will only differ by prefix of what field is
494	* being specified:
495	* "NVMEFC_HOST_TRADDR=" or "NVMEFC_TRADDR=" - 19 max characters
496	* 19 + 43 + null_fudge = 64 characters
497	*/
498	#define FCNVME_TRADDR_LENGTH 64
499
500	static void
501	nvme_fc_signal_discovery_scan(struct nvme_fc_lport *lport,
502	struct nvme_fc_rport *rport)
503	{
504	char hostaddr[FCNVME_TRADDR_LENGTH]; / NVMEFC_HOST_TRADDR=.../
505	char tgtaddr[FCNVME_TRADDR_LENGTH]; / NVMEFC_TRADDR=.../
506	char *envp[`4`] = { "FC_EVENT=nvmediscovery", hostaddr, tgtaddr, NULL };
507
508	if (!(rport->remoteport.port_role & FC_PORT_ROLE_NVME_DISCOVERY))
509	return;
510
511	snprintf(buf: hostaddr, size: sizeof(hostaddr),
512	fmt: "NVMEFC_HOST_TRADDR=nn-0x%016llx:pn-0x%016llx",
513	lport->localport.node_name, lport->localport.port_name);
514	snprintf(buf: tgtaddr, size: sizeof(tgtaddr),
515	fmt: "NVMEFC_TRADDR=nn-0x%016llx:pn-0x%016llx",
516	rport->remoteport.node_name, rport->remoteport.port_name);
517	kobject_uevent_env(kobj: &fc_udev_device->kobj, action: KOBJ_CHANGE, envp);
518	}
519
520	static void
521	nvme_fc_free_rport(struct kref *ref)
522	{
523	struct nvme_fc_rport *rport =
524	container_of(ref, struct nvme_fc_rport, ref);
525	struct nvme_fc_lport *lport =
526	localport_to_lport(portptr: rport->remoteport.localport);
527	unsigned long flags;
528
529	WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED);
530	WARN_ON(!list_empty(&rport->ctrl_list));
531
532	/ remove from lport list /
533	spin_lock_irqsave(&nvme_fc_lock, flags);
534	list_del(entry: &rport->endp_list);
535	spin_unlock_irqrestore(lock: &nvme_fc_lock, flags);
536
537	WARN_ON(!list_empty(&rport->disc_list));
538	ida_free(&lport->endp_cnt, id: rport->remoteport.port_num);
539
540	kfree(objp: rport);
541
542	nvme_fc_lport_put(lport);
543	}
544
545	static void
546	nvme_fc_rport_put(struct nvme_fc_rport *rport)
547	{
548	kref_put(kref: &rport->ref, release: nvme_fc_free_rport);
549	}
550
551	static int
552	nvme_fc_rport_get(struct nvme_fc_rport *rport)
553	{
554	return kref_get_unless_zero(kref: &rport->ref);
555	}
556
557	static void
558	nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
559	{
560	switch (ctrl->ctrl.state) {
561	case NVME_CTRL_NEW:
562	case NVME_CTRL_CONNECTING:
563	/*
564	* As all reconnects were suppressed, schedule a
565	* connect.
566	*/
567	dev_info(ctrl->ctrl.device,
568	"NVME-FC{%d}: connectivity re-established. "
569	"Attempting reconnect\n", ctrl->cnum);
570
571	queue_delayed_work(wq: nvme_wq, dwork: &ctrl->connect_work, delay: `0`);
572	break;
573
574	case NVME_CTRL_RESETTING:
575	/*
576	* Controller is already in the process of terminating the
577	* association. No need to do anything further. The reconnect
578	* step will naturally occur after the reset completes.
579	*/
580	break;
581
582	default:
583	/ no action to take - let it delete /
584	break;
585	}
586	}
587
588	static struct nvme_fc_rport *
589	nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport,
590	struct nvme_fc_port_info *pinfo)
591	{
592	struct nvme_fc_rport *rport;
593	struct nvme_fc_ctrl *ctrl;
594	unsigned long flags;
595
596	spin_lock_irqsave(&nvme_fc_lock, flags);
597
598	list_for_each_entry(rport, &lport->endp_list, endp_list) {
599	if (rport->remoteport.node_name != pinfo->node_name \|\|
600	rport->remoteport.port_name != pinfo->port_name)
601	continue;
602
603	if (!nvme_fc_rport_get(rport)) {
604	rport = ERR_PTR(error: -ENOLCK);
605	goto out_done;
606	}
607
608	spin_unlock_irqrestore(lock: &nvme_fc_lock, flags);
609
610	spin_lock_irqsave(&rport->lock, flags);
611
612	/ has it been unregistered /
613	if (rport->remoteport.port_state != FC_OBJSTATE_DELETED) {
614	/ means lldd called us twice /
615	spin_unlock_irqrestore(lock: &rport->lock, flags);
616	nvme_fc_rport_put(rport);
617	return ERR_PTR(error: -ESTALE);
618	}
619
620	rport->remoteport.port_role = pinfo->port_role;
621	rport->remoteport.port_id = pinfo->port_id;
622	rport->remoteport.port_state = FC_OBJSTATE_ONLINE;
623	rport->dev_loss_end = `0`;
624
625	/*
626	* kick off a reconnect attempt on all associations to the
627	* remote port. A successful reconnects will resume i/o.
628	*/
629	list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
630	nvme_fc_resume_controller(ctrl);
631
632	spin_unlock_irqrestore(lock: &rport->lock, flags);
633
634	return rport;
635	}
636
637	rport = NULL;
638
639	out_done:
640	spin_unlock_irqrestore(lock: &nvme_fc_lock, flags);
641
642	return rport;
643	}
644
645	static inline void
646	__nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport,
647	struct nvme_fc_port_info *pinfo)
648	{
649	if (pinfo->dev_loss_tmo)
650	rport->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo;
651	else
652	rport->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO;
653	}
654
655	/**
656	* nvme_fc_register_remoteport - transport entry point called by an
657	* LLDD to register the existence of a NVME
658	* subsystem FC port on its fabric.
659	* @localport: pointer to the (registered) local port that the remote
660	* subsystem port is connected to.
661	* @pinfo: pointer to information about the port to be registered
662	* @portptr: pointer to a remote port pointer. Upon success, the routine
663	* will allocate a nvme_fc_remote_port structure and place its
664	* address in the remote port pointer. Upon failure, remote port
665	* pointer will be set to 0.
666	*
667	* Returns:
668	* a completion status. Must be 0 upon success; a negative errno
669	* (ex: -ENXIO) upon failure.
670	*/
671	int
672	nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
673	struct nvme_fc_port_info *pinfo,
674	struct nvme_fc_remote_port **portptr)
675	{
676	struct nvme_fc_lport *lport = localport_to_lport(portptr: localport);
677	struct nvme_fc_rport *newrec;
678	unsigned long flags;
679	int ret, idx;
680
681	if (!nvme_fc_lport_get(lport)) {
682	ret = -ESHUTDOWN;
683	goto out_reghost_failed;
684	}
685
686	/*
687	* look to see if there is already a remoteport that is waiting
688	* for a reconnect (within dev_loss_tmo) with the same WWN's.
689	* If so, transition to it and reconnect.
690	*/
691	newrec = nvme_fc_attach_to_suspended_rport(lport, pinfo);
692
693	/ found an rport, but something about its state is bad /
694	if (IS_ERR(ptr: newrec)) {
695	ret = PTR_ERR(ptr: newrec);
696	goto out_lport_put;
697
698	/ found existing rport, which was resumed /
699	} else if (newrec) {
700	nvme_fc_lport_put(lport);
701	__nvme_fc_set_dev_loss_tmo(rport: newrec, pinfo);
702	nvme_fc_signal_discovery_scan(lport, rport: newrec);
703	*portptr = &newrec->remoteport;
704	return `0`;
705	}
706
707	/ nothing found - allocate a new remoteport struct /
708
709	newrec = kmalloc(size: (sizeof(*newrec) + lport->ops->remote_priv_sz),
710	GFP_KERNEL);
711	if (!newrec) {
712	ret = -ENOMEM;
713	goto out_lport_put;
714	}
715
716	idx = ida_alloc(ida: &lport->endp_cnt, GFP_KERNEL);
717	if (idx < `0`) {
718	ret = -ENOSPC;
719	goto out_kfree_rport;
720	}
721
722	INIT_LIST_HEAD(list: &newrec->endp_list);
723	INIT_LIST_HEAD(list: &newrec->ctrl_list);
724	INIT_LIST_HEAD(list: &newrec->ls_req_list);
725	INIT_LIST_HEAD(list: &newrec->disc_list);
726	kref_init(kref: &newrec->ref);
727	atomic_set(v: &newrec->act_ctrl_cnt, i: `0`);
728	spin_lock_init(&newrec->lock);
729	newrec->remoteport.localport = &lport->localport;
730	INIT_LIST_HEAD(list: &newrec->ls_rcv_list);
731	newrec->dev = lport->dev;
732	newrec->lport = lport;
733	if (lport->ops->remote_priv_sz)
734	newrec->remoteport.private = &newrec[`1`];
735	else
736	newrec->remoteport.private = NULL;
737	newrec->remoteport.port_role = pinfo->port_role;
738	newrec->remoteport.node_name = pinfo->node_name;
739	newrec->remoteport.port_name = pinfo->port_name;
740	newrec->remoteport.port_id = pinfo->port_id;
741	newrec->remoteport.port_state = FC_OBJSTATE_ONLINE;
742	newrec->remoteport.port_num = idx;
743	__nvme_fc_set_dev_loss_tmo(rport: newrec, pinfo);
744	INIT_WORK(&newrec->lsrcv_work, nvme_fc_handle_ls_rqst_work);
745
746	spin_lock_irqsave(&nvme_fc_lock, flags);
747	list_add_tail(new: &newrec->endp_list, head: &lport->endp_list);
748	spin_unlock_irqrestore(lock: &nvme_fc_lock, flags);
749
750	nvme_fc_signal_discovery_scan(lport, rport: newrec);
751
752	*portptr = &newrec->remoteport;
753	return `0`;
754
755	out_kfree_rport:
756	kfree(objp: newrec);
757	out_lport_put:
758	nvme_fc_lport_put(lport);
759	out_reghost_failed:
760	*portptr = NULL;
761	return ret;
762	}
763	EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport);
764
765	static int
766	nvme_fc_abort_lsops(struct nvme_fc_rport *rport)
767	{
768	struct nvmefc_ls_req_op *lsop;
769	unsigned long flags;
770
771	restart:
772	spin_lock_irqsave(&rport->lock, flags);
773
774	list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) {
775	if (!(lsop->flags & FCOP_FLAGS_TERMIO)) {
776	lsop->flags \|= FCOP_FLAGS_TERMIO;
777	spin_unlock_irqrestore(lock: &rport->lock, flags);
778	rport->lport->ops->ls_abort(&rport->lport->localport,
779	&rport->remoteport,
780	&lsop->ls_req);
781	goto restart;
782	}
783	}
784	spin_unlock_irqrestore(lock: &rport->lock, flags);
785
786	return `0`;
787	}
788
789	static void
790	nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
791	{
792	dev_info(ctrl->ctrl.device,
793	"NVME-FC{%d}: controller connectivity lost. Awaiting "
794	"Reconnect", ctrl->cnum);
795
796	switch (ctrl->ctrl.state) {
797	case NVME_CTRL_NEW:
798	case NVME_CTRL_LIVE:
799	/*
800	* Schedule a controller reset. The reset will terminate the
801	* association and schedule the reconnect timer. Reconnects
802	* will be attempted until either the ctlr_loss_tmo
803	* (max_retries * connect_delay) expires or the remoteport's
804	* dev_loss_tmo expires.
805	*/
806	if (nvme_reset_ctrl(ctrl: &ctrl->ctrl)) {
807	dev_warn(ctrl->ctrl.device,
808	"NVME-FC{%d}: Couldn't schedule reset.\n",
809	ctrl->cnum);
810	nvme_delete_ctrl(ctrl: &ctrl->ctrl);
811	}
812	break;
813
814	case NVME_CTRL_CONNECTING:
815	/*
816	* The association has already been terminated and the
817	* controller is attempting reconnects. No need to do anything
818	* futher. Reconnects will be attempted until either the
819	* ctlr_loss_tmo (max_retries * connect_delay) expires or the
820	* remoteport's dev_loss_tmo expires.
821	*/
822	break;
823
824	case NVME_CTRL_RESETTING:
825	/*
826	* Controller is already in the process of terminating the
827	* association. No need to do anything further. The reconnect
828	* step will kick in naturally after the association is
829	* terminated.
830	*/
831	break;
832
833	case NVME_CTRL_DELETING:
834	case NVME_CTRL_DELETING_NOIO:
835	default:
836	/ no action to take - let it delete /
837	break;
838	}
839	}
840
841	/**
842	* nvme_fc_unregister_remoteport - transport entry point called by an
843	* LLDD to deregister/remove a previously
844	* registered a NVME subsystem FC port.
845	* @portptr: pointer to the (registered) remote port that is to be
846	* deregistered.
847	*
848	* Returns:
849	* a completion status. Must be 0 upon success; a negative errno
850	* (ex: -ENXIO) upon failure.
851	*/
852	int
853	nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr)
854	{
855	struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
856	struct nvme_fc_ctrl *ctrl;
857	unsigned long flags;
858
859	if (!portptr)
860	return -EINVAL;
861
862	spin_lock_irqsave(&rport->lock, flags);
863
864	if (portptr->port_state != FC_OBJSTATE_ONLINE) {
865	spin_unlock_irqrestore(lock: &rport->lock, flags);
866	return -EINVAL;
867	}
868	portptr->port_state = FC_OBJSTATE_DELETED;
869
870	rport->dev_loss_end = jiffies + (portptr->dev_loss_tmo * HZ);
871
872	list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
873	/ if dev_loss_tmo==0, dev loss is immediate /
874	if (!portptr->dev_loss_tmo) {
875	dev_warn(ctrl->ctrl.device,
876	"NVME-FC{%d}: controller connectivity lost.\n",
877	ctrl->cnum);
878	nvme_delete_ctrl(ctrl: &ctrl->ctrl);
879	} else
880	nvme_fc_ctrl_connectivity_loss(ctrl);
881	}
882
883	spin_unlock_irqrestore(lock: &rport->lock, flags);
884
885	nvme_fc_abort_lsops(rport);
886
887	if (atomic_read(v: &rport->act_ctrl_cnt) == `0`)
888	rport->lport->ops->remoteport_delete(portptr);
889
890	/*
891	* release the reference, which will allow, if all controllers
892	* go away, which should only occur after dev_loss_tmo occurs,
893	* for the rport to be torn down.
894	*/
895	nvme_fc_rport_put(rport);
896
897	return `0`;
898	}
899	EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport);
900
901	/**
902	* nvme_fc_rescan_remoteport - transport entry point called by an
903	* LLDD to request a nvme device rescan.
904	* @remoteport: pointer to the (registered) remote port that is to be
905	* rescanned.
906	*
907	* Returns: N/A
908	*/
909	void
910	nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport)
911	{
912	struct nvme_fc_rport *rport = remoteport_to_rport(portptr: remoteport);
913
914	nvme_fc_signal_discovery_scan(lport: rport->lport, rport);
915	}
916	EXPORT_SYMBOL_GPL(nvme_fc_rescan_remoteport);
917
918	int
919	nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *portptr,
920	u32 dev_loss_tmo)
921	{
922	struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
923	unsigned long flags;
924
925	spin_lock_irqsave(&rport->lock, flags);
926
927	if (portptr->port_state != FC_OBJSTATE_ONLINE) {
928	spin_unlock_irqrestore(lock: &rport->lock, flags);
929	return -EINVAL;
930	}
931
932	/ a dev_loss_tmo of 0 (immediate) is allowed to be set /
933	rport->remoteport.dev_loss_tmo = dev_loss_tmo;
934
935	spin_unlock_irqrestore(lock: &rport->lock, flags);
936
937	return `0`;
938	}
939	EXPORT_SYMBOL_GPL(nvme_fc_set_remoteport_devloss);
940
941
942	/ ********************* FC-NVME DMA Handling ************************** /
943
944	/*
945	* The fcloop device passes in a NULL device pointer. Real LLD's will
946	* pass in a valid device pointer. If NULL is passed to the dma mapping
947	* routines, depending on the platform, it may or may not succeed, and
948	* may crash.
949	*
950	* As such:
951	* Wrapper all the dma routines and check the dev pointer.
952	*
953	* If simple mappings (return just a dma address, we'll noop them,
954	* returning a dma address of 0.
955	*
956	* On more complex mappings (dma_map_sg), a pseudo routine fills
957	* in the scatter list, setting all dma addresses to 0.
958	*/
959
960	static inline dma_addr_t
961	fc_dma_map_single(struct device dev, void* *ptr, size_t size,
962	enum dma_data_direction dir)
963	{
964	return dev ? dma_map_single(dev, ptr, size, dir) : (dma_addr_t)`0L`;
965	}
966
967	static inline int
968	fc_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
969	{
970	return dev ? dma_mapping_error(dev, dma_addr) : `0`;
971	}
972
973	static inline void
974	fc_dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size,
975	enum dma_data_direction dir)
976	{
977	if (dev)
978	dma_unmap_single(dev, addr, size, dir);
979	}
980
981	static inline void
982	fc_dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
983	enum dma_data_direction dir)
984	{
985	if (dev)
986	dma_sync_single_for_cpu(dev, addr, size, dir);
987	}
988
989	static inline void
990	fc_dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size,
991	enum dma_data_direction dir)
992	{
993	if (dev)
994	dma_sync_single_for_device(dev, addr, size, dir);
995	}
996
997	/ pseudo dma_map_sg call /
998	static int
999	fc_map_sg(struct scatterlist sg, int* nents)
1000	{
1001	struct scatterlist *s;
1002	int i;
1003
1004	WARN_ON(nents == `0` \|\| sg[`0`].length == `0`);
1005
1006	for_each_sg(sg, s, nents, i) {
1007	s->dma_address = `0L`;
1008	#ifdef CONFIG_NEED_SG_DMA_LENGTH
1009	s->dma_length = s->length;
1010	#endif
1011	}
1012	return nents;
1013	}
1014
1015	static inline int
1016	fc_dma_map_sg(struct device dev, struct* scatterlist sg, int* nents,
1017	enum dma_data_direction dir)
1018	{
1019	return dev ? dma_map_sg(dev, sg, nents, dir) : fc_map_sg(sg, nents);
1020	}
1021
1022	static inline void
1023	fc_dma_unmap_sg(struct device dev, struct* scatterlist sg, int* nents,
1024	enum dma_data_direction dir)
1025	{
1026	if (dev)
1027	dma_unmap_sg(dev, sg, nents, dir);
1028	}
1029
1030	/ ********************* FC-NVME LS Handling ************************** /
1031
1032	static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *);
1033	static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *);
1034
1035	static void nvme_fc_error_recovery(struct nvme_fc_ctrl ctrl, char* *errmsg);
1036
1037	static void
1038	__nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop)
1039	{
1040	struct nvme_fc_rport *rport = lsop->rport;
1041	struct nvmefc_ls_req *lsreq = &lsop->ls_req;
1042	unsigned long flags;
1043
1044	spin_lock_irqsave(&rport->lock, flags);
1045
1046	if (!lsop->req_queued) {
1047	spin_unlock_irqrestore(lock: &rport->lock, flags);
1048	return;
1049	}
1050
1051	list_del(entry: &lsop->lsreq_list);
1052
1053	lsop->req_queued = false;
1054
1055	spin_unlock_irqrestore(lock: &rport->lock, flags);
1056
1057	fc_dma_unmap_single(dev: rport->dev, addr: lsreq->rqstdma,
1058	size: (lsreq->rqstlen + lsreq->rsplen),
1059	dir: DMA_BIDIRECTIONAL);
1060
1061	nvme_fc_rport_put(rport);
1062	}
1063
1064	static int
1065	__nvme_fc_send_ls_req(struct nvme_fc_rport *rport,
1066	struct nvmefc_ls_req_op *lsop,
1067	void (done)(struct* nvmefc_ls_req req, int* status))
1068	{
1069	struct nvmefc_ls_req *lsreq = &lsop->ls_req;
1070	unsigned long flags;
1071	int ret = `0`;
1072
1073	if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
1074	return -ECONNREFUSED;
1075
1076	if (!nvme_fc_rport_get(rport))
1077	return -ESHUTDOWN;
1078
1079	lsreq->done = done;
1080	lsop->rport = rport;
1081	lsop->req_queued = false;
1082	INIT_LIST_HEAD(list: &lsop->lsreq_list);
1083	init_completion(x: &lsop->ls_done);
1084
1085	lsreq->rqstdma = fc_dma_map_single(dev: rport->dev, ptr: lsreq->rqstaddr,
1086	size: lsreq->rqstlen + lsreq->rsplen,
1087	dir: DMA_BIDIRECTIONAL);
1088	if (fc_dma_mapping_error(dev: rport->dev, dma_addr: lsreq->rqstdma)) {
1089	ret = -EFAULT;
1090	goto out_putrport;
1091	}
1092	lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen;
1093
1094	spin_lock_irqsave(&rport->lock, flags);
1095
1096	list_add_tail(new: &lsop->lsreq_list, head: &rport->ls_req_list);
1097
1098	lsop->req_queued = true;
1099
1100	spin_unlock_irqrestore(lock: &rport->lock, flags);
1101
1102	ret = rport->lport->ops->ls_req(&rport->lport->localport,
1103	&rport->remoteport, lsreq);
1104	if (ret)
1105	goto out_unlink;
1106
1107	return `0`;
1108
1109	out_unlink:
1110	lsop->ls_error = ret;
1111	spin_lock_irqsave(&rport->lock, flags);
1112	lsop->req_queued = false;
1113	list_del(entry: &lsop->lsreq_list);
1114	spin_unlock_irqrestore(lock: &rport->lock, flags);
1115	fc_dma_unmap_single(dev: rport->dev, addr: lsreq->rqstdma,
1116	size: (lsreq->rqstlen + lsreq->rsplen),
1117	dir: DMA_BIDIRECTIONAL);
1118	out_putrport:
1119	nvme_fc_rport_put(rport);
1120
1121	return ret;
1122	}
1123
1124	static void
1125	nvme_fc_send_ls_req_done(struct nvmefc_ls_req lsreq, int* status)
1126	{
1127	struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq);
1128
1129	lsop->ls_error = status;
1130	complete(&lsop->ls_done);
1131	}
1132
1133	static int
1134	nvme_fc_send_ls_req(struct nvme_fc_rport rport, struct* nvmefc_ls_req_op *lsop)
1135	{
1136	struct nvmefc_ls_req *lsreq = &lsop->ls_req;
1137	struct fcnvme_ls_rjt *rjt = lsreq->rspaddr;
1138	int ret;
1139
1140	ret = __nvme_fc_send_ls_req(rport, lsop, done: nvme_fc_send_ls_req_done);
1141
1142	if (!ret) {
1143	/*
1144	* No timeout/not interruptible as we need the struct
1145	* to exist until the lldd calls us back. Thus mandate
1146	* wait until driver calls back. lldd responsible for
1147	* the timeout action
1148	*/
1149	wait_for_completion(&lsop->ls_done);
1150
1151	__nvme_fc_finish_ls_req(lsop);
1152
1153	ret = lsop->ls_error;
1154	}
1155
1156	if (ret)
1157	return ret;
1158
1159	/ ACC or RJT payload ? /
1160	if (rjt->w0.ls_cmd == FCNVME_LS_RJT)
1161	return -ENXIO;
1162
1163	return `0`;
1164	}
1165
1166	static int
1167	nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport,
1168	struct nvmefc_ls_req_op *lsop,
1169	void (done)(struct* nvmefc_ls_req req, int* status))
1170	{
1171	/ don't wait for completion /
1172
1173	return __nvme_fc_send_ls_req(rport, lsop, done);
1174	}
1175
1176	static int
1177	nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
1178	struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio)
1179	{
1180	struct nvmefc_ls_req_op *lsop;
1181	struct nvmefc_ls_req *lsreq;
1182	struct fcnvme_ls_cr_assoc_rqst *assoc_rqst;
1183	struct fcnvme_ls_cr_assoc_acc *assoc_acc;
1184	unsigned long flags;
1185	int ret, fcret = `0`;
1186
1187	lsop = kzalloc(size: (sizeof(*lsop) +
1188	sizeof(assoc_rqst) + sizeof(assoc_acc) +
1189	ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
1190	if (!lsop) {
1191	dev_info(ctrl->ctrl.device,
1192	"NVME-FC{%d}: send Create Association failed: ENOMEM\n",
1193	ctrl->cnum);
1194	ret = -ENOMEM;
1195	goto out_no_memory;
1196	}
1197
1198	assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)&lsop[`1`];
1199	assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[`1`];
1200	lsreq = &lsop->ls_req;
1201	if (ctrl->lport->ops->lsrqst_priv_sz)
1202	lsreq->private = &assoc_acc[`1`];
1203	else
1204	lsreq->private = NULL;
1205
1206	assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION;
1207	assoc_rqst->desc_list_len =
1208	cpu_to_be32(sizeof(struct fcnvme_lsdesc_cr_assoc_cmd));
1209
1210	assoc_rqst->assoc_cmd.desc_tag =
1211	cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD);
1212	assoc_rqst->assoc_cmd.desc_len =
1213	fcnvme_lsdesc_len(
1214	sz: sizeof(struct fcnvme_lsdesc_cr_assoc_cmd));
1215
1216	assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio);
1217	assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize - `1`);
1218	/ Linux supports only Dynamic controllers /
1219	assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(`0xffff`);
1220	uuid_copy(dst: &assoc_rqst->assoc_cmd.hostid, src: &ctrl->ctrl.opts->host->id);
1221	strncpy(p: assoc_rqst->assoc_cmd.hostnqn, q: ctrl->ctrl.opts->host->nqn,
1222	min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE));
1223	strncpy(p: assoc_rqst->assoc_cmd.subnqn, q: ctrl->ctrl.opts->subsysnqn,
1224	min(FCNVME_ASSOC_SUBNQN_LEN, NVMF_NQN_SIZE));
1225
1226	lsop->queue = queue;
1227	lsreq->rqstaddr = assoc_rqst;
1228	lsreq->rqstlen = sizeof(*assoc_rqst);
1229	lsreq->rspaddr = assoc_acc;
1230	lsreq->rsplen = sizeof(*assoc_acc);
1231	lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
1232
1233	ret = nvme_fc_send_ls_req(rport: ctrl->rport, lsop);
1234	if (ret)
1235	goto out_free_buffer;
1236
1237	/ process connect LS completion /
1238
1239	/ validate the ACC response /
1240	if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC)
1241	fcret = VERR_LSACC;
1242	else if (assoc_acc->hdr.desc_list_len !=
1243	fcnvme_lsdesc_len(
1244	sz: sizeof(struct fcnvme_ls_cr_assoc_acc)))
1245	fcret = VERR_CR_ASSOC_ACC_LEN;
1246	else if (assoc_acc->hdr.rqst.desc_tag !=
1247	cpu_to_be32(FCNVME_LSDESC_RQST))
1248	fcret = VERR_LSDESC_RQST;
1249	else if (assoc_acc->hdr.rqst.desc_len !=
1250	fcnvme_lsdesc_len(sz: sizeof(struct fcnvme_lsdesc_rqst)))
1251	fcret = VERR_LSDESC_RQST_LEN;
1252	else if (assoc_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_ASSOCIATION)
1253	fcret = VERR_CR_ASSOC;
1254	else if (assoc_acc->associd.desc_tag !=
1255	cpu_to_be32(FCNVME_LSDESC_ASSOC_ID))
1256	fcret = VERR_ASSOC_ID;
1257	else if (assoc_acc->associd.desc_len !=
1258	fcnvme_lsdesc_len(
1259	sz: sizeof(struct fcnvme_lsdesc_assoc_id)))
1260	fcret = VERR_ASSOC_ID_LEN;
1261	else if (assoc_acc->connectid.desc_tag !=
1262	cpu_to_be32(FCNVME_LSDESC_CONN_ID))
1263	fcret = VERR_CONN_ID;
1264	else if (assoc_acc->connectid.desc_len !=
1265	fcnvme_lsdesc_len(sz: sizeof(struct fcnvme_lsdesc_conn_id)))
1266	fcret = VERR_CONN_ID_LEN;
1267
1268	if (fcret) {
1269	ret = -EBADF;
1270	dev_err(ctrl->dev,
1271	"q %d Create Association LS failed: %s\n",
1272	queue->qnum, validation_errors[fcret]);
1273	} else {
1274	spin_lock_irqsave(&ctrl->lock, flags);
1275	ctrl->association_id =
1276	be64_to_cpu(assoc_acc->associd.association_id);
1277	queue->connection_id =
1278	be64_to_cpu(assoc_acc->connectid.connection_id);
1279	set_bit(nr: NVME_FC_Q_CONNECTED, addr: &queue->flags);
1280	spin_unlock_irqrestore(lock: &ctrl->lock, flags);
1281	}
1282
1283	out_free_buffer:
1284	kfree(objp: lsop);
1285	out_no_memory:
1286	if (ret)
1287	dev_err(ctrl->dev,
1288	"queue %d connect admin queue failed (%d).\n",
1289	queue->qnum, ret);
1290	return ret;
1291	}
1292
1293	static int
1294	nvme_fc_connect_queue(struct nvme_fc_ctrl ctrl, struct* nvme_fc_queue *queue,
1295	u16 qsize, u16 ersp_ratio)
1296	{
1297	struct nvmefc_ls_req_op *lsop;
1298	struct nvmefc_ls_req *lsreq;
1299	struct fcnvme_ls_cr_conn_rqst *conn_rqst;
1300	struct fcnvme_ls_cr_conn_acc *conn_acc;
1301	int ret, fcret = `0`;
1302
1303	lsop = kzalloc(size: (sizeof(*lsop) +
1304	sizeof(conn_rqst) + sizeof(conn_acc) +
1305	ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
1306	if (!lsop) {
1307	dev_info(ctrl->ctrl.device,
1308	"NVME-FC{%d}: send Create Connection failed: ENOMEM\n",
1309	ctrl->cnum);
1310	ret = -ENOMEM;
1311	goto out_no_memory;
1312	}
1313
1314	conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)&lsop[`1`];
1315	conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[`1`];
1316	lsreq = &lsop->ls_req;
1317	if (ctrl->lport->ops->lsrqst_priv_sz)
1318	lsreq->private = (void *)&conn_acc[`1`];
1319	else
1320	lsreq->private = NULL;
1321
1322	conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION;
1323	conn_rqst->desc_list_len = cpu_to_be32(
1324	sizeof(struct fcnvme_lsdesc_assoc_id) +
1325	sizeof(struct fcnvme_lsdesc_cr_conn_cmd));
1326
1327	conn_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID);
1328	conn_rqst->associd.desc_len =
1329	fcnvme_lsdesc_len(
1330	sz: sizeof(struct fcnvme_lsdesc_assoc_id));
1331	conn_rqst->associd.association_id = cpu_to_be64(ctrl->association_id);
1332	conn_rqst->connect_cmd.desc_tag =
1333	cpu_to_be32(FCNVME_LSDESC_CREATE_CONN_CMD);
1334	conn_rqst->connect_cmd.desc_len =
1335	fcnvme_lsdesc_len(
1336	sz: sizeof(struct fcnvme_lsdesc_cr_conn_cmd));
1337	conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio);
1338	conn_rqst->connect_cmd.qid = cpu_to_be16(queue->qnum);
1339	conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize - `1`);
1340
1341	lsop->queue = queue;
1342	lsreq->rqstaddr = conn_rqst;
1343	lsreq->rqstlen = sizeof(*conn_rqst);
1344	lsreq->rspaddr = conn_acc;
1345	lsreq->rsplen = sizeof(*conn_acc);
1346	lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
1347
1348	ret = nvme_fc_send_ls_req(rport: ctrl->rport, lsop);
1349	if (ret)
1350	goto out_free_buffer;
1351
1352	/ process connect LS completion /
1353
1354	/ validate the ACC response /
1355	if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC)
1356	fcret = VERR_LSACC;
1357	else if (conn_acc->hdr.desc_list_len !=
1358	fcnvme_lsdesc_len(sz: sizeof(struct fcnvme_ls_cr_conn_acc)))
1359	fcret = VERR_CR_CONN_ACC_LEN;
1360	else if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST))
1361	fcret = VERR_LSDESC_RQST;
1362	else if (conn_acc->hdr.rqst.desc_len !=
1363	fcnvme_lsdesc_len(sz: sizeof(struct fcnvme_lsdesc_rqst)))
1364	fcret = VERR_LSDESC_RQST_LEN;
1365	else if (conn_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_CONNECTION)
1366	fcret = VERR_CR_CONN;
1367	else if (conn_acc->connectid.desc_tag !=
1368	cpu_to_be32(FCNVME_LSDESC_CONN_ID))
1369	fcret = VERR_CONN_ID;
1370	else if (conn_acc->connectid.desc_len !=
1371	fcnvme_lsdesc_len(sz: sizeof(struct fcnvme_lsdesc_conn_id)))
1372	fcret = VERR_CONN_ID_LEN;
1373
1374	if (fcret) {
1375	ret = -EBADF;
1376	dev_err(ctrl->dev,
1377	"q %d Create I/O Connection LS failed: %s\n",
1378	queue->qnum, validation_errors[fcret]);
1379	} else {
1380	queue->connection_id =
1381	be64_to_cpu(conn_acc->connectid.connection_id);
1382	set_bit(nr: NVME_FC_Q_CONNECTED, addr: &queue->flags);
1383	}
1384
1385	out_free_buffer:
1386	kfree(objp: lsop);
1387	out_no_memory:
1388	if (ret)
1389	dev_err(ctrl->dev,
1390	"queue %d connect I/O queue failed (%d).\n",
1391	queue->qnum, ret);
1392	return ret;
1393	}
1394
1395	static void
1396	nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req lsreq, int* status)
1397	{
1398	struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq);
1399
1400	__nvme_fc_finish_ls_req(lsop);
1401
1402	/ fc-nvme initiator doesn't care about success or failure of cmd /
1403
1404	kfree(objp: lsop);
1405	}
1406
1407	/*
1408	* This routine sends a FC-NVME LS to disconnect (aka terminate)
1409	* the FC-NVME Association. Terminating the association also
1410	* terminates the FC-NVME connections (per queue, both admin and io
1411	* queues) that are part of the association. E.g. things are torn
1412	* down, and the related FC-NVME Association ID and Connection IDs
1413	* become invalid.
1414	*
1415	* The behavior of the fc-nvme initiator is such that it's
1416	* understanding of the association and connections will implicitly
1417	* be torn down. The action is implicit as it may be due to a loss of
1418	* connectivity with the fc-nvme target, so you may never get a
1419	* response even if you tried. As such, the action of this routine
1420	* is to asynchronously send the LS, ignore any results of the LS, and
1421	* continue on with terminating the association. If the fc-nvme target
1422	* is present and receives the LS, it too can tear down.
1423	*/
1424	static void
1425	nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
1426	{
1427	struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst;
1428	struct fcnvme_ls_disconnect_assoc_acc *discon_acc;
1429	struct nvmefc_ls_req_op *lsop;
1430	struct nvmefc_ls_req *lsreq;
1431	int ret;
1432
1433	lsop = kzalloc(size: (sizeof(*lsop) +
1434	sizeof(discon_rqst) + sizeof(discon_acc) +
1435	ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
1436	if (!lsop) {
1437	dev_info(ctrl->ctrl.device,
1438	"NVME-FC{%d}: send Disconnect Association "
1439	"failed: ENOMEM\n",
1440	ctrl->cnum);
1441	return;
1442	}
1443
1444	discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[`1`];
1445	discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[`1`];
1446	lsreq = &lsop->ls_req;
1447	if (ctrl->lport->ops->lsrqst_priv_sz)
1448	lsreq->private = (void *)&discon_acc[`1`];
1449	else
1450	lsreq->private = NULL;
1451
1452	nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc,
1453	association_id: ctrl->association_id);
1454
1455	ret = nvme_fc_send_ls_req_async(rport: ctrl->rport, lsop,
1456	done: nvme_fc_disconnect_assoc_done);
1457	if (ret)
1458	kfree(objp: lsop);
1459	}
1460
1461	static void
1462	nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp)
1463	{
1464	struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private;
1465	struct nvme_fc_rport *rport = lsop->rport;
1466	struct nvme_fc_lport *lport = rport->lport;
1467	unsigned long flags;
1468
1469	spin_lock_irqsave(&rport->lock, flags);
1470	list_del(entry: &lsop->lsrcv_list);
1471	spin_unlock_irqrestore(lock: &rport->lock, flags);
1472
1473	fc_dma_sync_single_for_cpu(dev: lport->dev, addr: lsop->rspdma,
1474	size: sizeof(*lsop->rspbuf), dir: DMA_TO_DEVICE);
1475	fc_dma_unmap_single(dev: lport->dev, addr: lsop->rspdma,
1476	size: sizeof(*lsop->rspbuf), dir: DMA_TO_DEVICE);
1477
1478	kfree(objp: lsop->rspbuf);
1479	kfree(objp: lsop->rqstbuf);
1480	kfree(objp: lsop);
1481
1482	nvme_fc_rport_put(rport);
1483	}
1484
1485	static void
1486	nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop)
1487	{
1488	struct nvme_fc_rport *rport = lsop->rport;
1489	struct nvme_fc_lport *lport = rport->lport;
1490	struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0;
1491	int ret;
1492
1493	fc_dma_sync_single_for_device(dev: lport->dev, addr: lsop->rspdma,
1494	size: sizeof(*lsop->rspbuf), dir: DMA_TO_DEVICE);
1495
1496	ret = lport->ops->xmt_ls_rsp(&lport->localport, &rport->remoteport,
1497	lsop->lsrsp);
1498	if (ret) {
1499	dev_warn(lport->dev,
1500	"LLDD rejected LS RSP xmt: LS %d status %d\n",
1501	w0->ls_cmd, ret);
1502	nvme_fc_xmt_ls_rsp_done(lsrsp: lsop->lsrsp);
1503	return;
1504	}
1505	}
1506
1507	static struct nvme_fc_ctrl *
1508	nvme_fc_match_disconn_ls(struct nvme_fc_rport *rport,
1509	struct nvmefc_ls_rcv_op *lsop)
1510	{
1511	struct fcnvme_ls_disconnect_assoc_rqst *rqst =
1512	&lsop->rqstbuf->rq_dis_assoc;
1513	struct nvme_fc_ctrl ctrl, ret = NULL;
1514	struct nvmefc_ls_rcv_op *oldls = NULL;
1515	u64 association_id = be64_to_cpu(rqst->associd.association_id);
1516	unsigned long flags;
1517
1518	spin_lock_irqsave(&rport->lock, flags);
1519
1520	list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
1521	if (!nvme_fc_ctrl_get(ctrl))
1522	continue;
1523	spin_lock(lock: &ctrl->lock);
1524	if (association_id == ctrl->association_id) {
1525	oldls = ctrl->rcv_disconn;
1526	ctrl->rcv_disconn = lsop;
1527	ret = ctrl;
1528	}
1529	spin_unlock(lock: &ctrl->lock);
1530	if (ret)
1531	/ leave the ctrl get reference /
1532	break;
1533	nvme_fc_ctrl_put(ctrl);
1534	}
1535
1536	spin_unlock_irqrestore(lock: &rport->lock, flags);
1537
1538	/ transmit a response for anything that was pending /
1539	if (oldls) {
1540	dev_info(rport->lport->dev,
1541	"NVME-FC{%d}: Multiple Disconnect Association "
1542	"LS's received\n", ctrl->cnum);
1543	/ overwrite good response with bogus failure /
1544	oldls->lsrsp->rsplen = nvme_fc_format_rjt(buf: oldls->rspbuf,
1545	buflen: sizeof(*oldls->rspbuf),
1546	ls_cmd: rqst->w0.ls_cmd,
1547	reason: FCNVME_RJT_RC_UNAB,
1548	explanation: FCNVME_RJT_EXP_NONE, vendor: `0`);
1549	nvme_fc_xmt_ls_rsp(lsop: oldls);
1550	}
1551
1552	return ret;
1553	}
1554
1555	/*
1556	* returns true to mean LS handled and ls_rsp can be sent
1557	* returns false to defer ls_rsp xmt (will be done as part of
1558	* association termination)
1559	*/
1560	static bool
1561	nvme_fc_ls_disconnect_assoc(struct nvmefc_ls_rcv_op *lsop)
1562	{
1563	struct nvme_fc_rport *rport = lsop->rport;
1564	struct fcnvme_ls_disconnect_assoc_rqst *rqst =
1565	&lsop->rqstbuf->rq_dis_assoc;
1566	struct fcnvme_ls_disconnect_assoc_acc *acc =
1567	&lsop->rspbuf->rsp_dis_assoc;
1568	struct nvme_fc_ctrl *ctrl = NULL;
1569	int ret = `0`;
1570
1571	memset(acc, `0`, sizeof(*acc));
1572
1573	ret = nvmefc_vldt_lsreq_discon_assoc(rqstlen: lsop->rqstdatalen, rqst);
1574	if (!ret) {
1575	/ match an active association /
1576	ctrl = nvme_fc_match_disconn_ls(rport, lsop);
1577	if (!ctrl)
1578	ret = VERR_NO_ASSOC;
1579	}
1580
1581	if (ret) {
1582	dev_info(rport->lport->dev,
1583	"Disconnect LS failed: %s\n",
1584	validation_errors[ret]);
1585	lsop->lsrsp->rsplen = nvme_fc_format_rjt(buf: acc,
1586	buflen: sizeof(*acc), ls_cmd: rqst->w0.ls_cmd,
1587	reason: (ret == VERR_NO_ASSOC) ?
1588	FCNVME_RJT_RC_INV_ASSOC :
1589	FCNVME_RJT_RC_LOGIC,
1590	explanation: FCNVME_RJT_EXP_NONE, vendor: `0`);
1591	return true;
1592	}
1593
1594	/ format an ACCept response /
1595
1596	lsop->lsrsp->rsplen = sizeof(*acc);
1597
1598	nvme_fc_format_rsp_hdr(buf: acc, ls_cmd: FCNVME_LS_ACC,
1599	desc_len: fcnvme_lsdesc_len(
1600	sz: sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
1601	rqst_ls_cmd: FCNVME_LS_DISCONNECT_ASSOC);
1602
1603	/*
1604	* the transmit of the response will occur after the exchanges
1605	* for the association have been ABTS'd by
1606	* nvme_fc_delete_association().
1607	*/
1608
1609	/ fail the association /
1610	nvme_fc_error_recovery(ctrl, errmsg: "Disconnect Association LS received");
1611
1612	/ release the reference taken by nvme_fc_match_disconn_ls() /
1613	nvme_fc_ctrl_put(ctrl);
1614
1615	return false;
1616	}
1617
1618	/*
1619	* Actual Processing routine for received FC-NVME LS Requests from the LLD
1620	* returns true if a response should be sent afterward, false if rsp will
1621	* be sent asynchronously.
1622	*/
1623	static bool
1624	nvme_fc_handle_ls_rqst(struct nvmefc_ls_rcv_op *lsop)
1625	{
1626	struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0;
1627	bool ret = true;
1628
1629	lsop->lsrsp->nvme_fc_private = lsop;
1630	lsop->lsrsp->rspbuf = lsop->rspbuf;
1631	lsop->lsrsp->rspdma = lsop->rspdma;
1632	lsop->lsrsp->done = nvme_fc_xmt_ls_rsp_done;
1633	/ Be preventative. handlers will later set to valid length /
1634	lsop->lsrsp->rsplen = `0`;
1635
1636	/*
1637	* handlers:
1638	* parse request input, execute the request, and format the
1639	* LS response
1640	*/
1641	switch (w0->ls_cmd) {
1642	case FCNVME_LS_DISCONNECT_ASSOC:
1643	ret = nvme_fc_ls_disconnect_assoc(lsop);
1644	break;
1645	case FCNVME_LS_DISCONNECT_CONN:
1646	lsop->lsrsp->rsplen = nvme_fc_format_rjt(buf: lsop->rspbuf,
1647	buflen: sizeof(*lsop->rspbuf), ls_cmd: w0->ls_cmd,
1648	reason: FCNVME_RJT_RC_UNSUP, explanation: FCNVME_RJT_EXP_NONE, vendor: `0`);
1649	break;
1650	case FCNVME_LS_CREATE_ASSOCIATION:
1651	case FCNVME_LS_CREATE_CONNECTION:
1652	lsop->lsrsp->rsplen = nvme_fc_format_rjt(buf: lsop->rspbuf,
1653	buflen: sizeof(*lsop->rspbuf), ls_cmd: w0->ls_cmd,
1654	reason: FCNVME_RJT_RC_LOGIC, explanation: FCNVME_RJT_EXP_NONE, vendor: `0`);
1655	break;
1656	default:
1657	lsop->lsrsp->rsplen = nvme_fc_format_rjt(buf: lsop->rspbuf,
1658	buflen: sizeof(*lsop->rspbuf), ls_cmd: w0->ls_cmd,
1659	reason: FCNVME_RJT_RC_INVAL, explanation: FCNVME_RJT_EXP_NONE, vendor: `0`);
1660	break;
1661	}
1662
1663	return(ret);
1664	}
1665
1666	static void
1667	nvme_fc_handle_ls_rqst_work(struct work_struct *work)
1668	{
1669	struct nvme_fc_rport *rport =
1670	container_of(work, struct nvme_fc_rport, lsrcv_work);
1671	struct fcnvme_ls_rqst_w0 *w0;
1672	struct nvmefc_ls_rcv_op *lsop;
1673	unsigned long flags;
1674	bool sendrsp;
1675
1676	restart:
1677	sendrsp = true;
1678	spin_lock_irqsave(&rport->lock, flags);
1679	list_for_each_entry(lsop, &rport->ls_rcv_list, lsrcv_list) {
1680	if (lsop->handled)
1681	continue;
1682
1683	lsop->handled = true;
1684	if (rport->remoteport.port_state == FC_OBJSTATE_ONLINE) {
1685	spin_unlock_irqrestore(lock: &rport->lock, flags);
1686	sendrsp = nvme_fc_handle_ls_rqst(lsop);
1687	} else {
1688	spin_unlock_irqrestore(lock: &rport->lock, flags);
1689	w0 = &lsop->rqstbuf->w0;
1690	lsop->lsrsp->rsplen = nvme_fc_format_rjt(
1691	buf: lsop->rspbuf,
1692	buflen: sizeof(*lsop->rspbuf),
1693	ls_cmd: w0->ls_cmd,
1694	reason: FCNVME_RJT_RC_UNAB,
1695	explanation: FCNVME_RJT_EXP_NONE, vendor: `0`);
1696	}
1697	if (sendrsp)
1698	nvme_fc_xmt_ls_rsp(lsop);
1699	goto restart;
1700	}
1701	spin_unlock_irqrestore(lock: &rport->lock, flags);
1702	}
1703
1704	static
1705	void nvme_fc_rcv_ls_req_err_msg(struct nvme_fc_lport *lport,
1706	struct fcnvme_ls_rqst_w0 *w0)
1707	{
1708	dev_info(lport->dev, "RCV %s LS failed: No memory\n",
1709	(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
1710	nvmefc_ls_names[w0->ls_cmd] : "");
1711	}
1712
1713	/**
1714	* nvme_fc_rcv_ls_req - transport entry point called by an LLDD
1715	* upon the reception of a NVME LS request.
1716	*
1717	* The nvme-fc layer will copy payload to an internal structure for
1718	* processing. As such, upon completion of the routine, the LLDD may
1719	* immediately free/reuse the LS request buffer passed in the call.
1720	*
1721	* If this routine returns error, the LLDD should abort the exchange.
1722	*
1723	* @portptr: pointer to the (registered) remote port that the LS
1724	* was received from. The remoteport is associated with
1725	* a specific localport.
1726	* @lsrsp: pointer to a nvmefc_ls_rsp response structure to be
1727	* used to reference the exchange corresponding to the LS
1728	* when issuing an ls response.
1729	* @lsreqbuf: pointer to the buffer containing the LS Request
1730	* @lsreqbuf_len: length, in bytes, of the received LS request
1731	*/
1732	int
1733	nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr,
1734	struct nvmefc_ls_rsp *lsrsp,
1735	void *lsreqbuf, u32 lsreqbuf_len)
1736	{
1737	struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
1738	struct nvme_fc_lport *lport = rport->lport;
1739	struct fcnvme_ls_rqst_w0 w0 = (struct* fcnvme_ls_rqst_w0 *)lsreqbuf;
1740	struct nvmefc_ls_rcv_op *lsop;
1741	unsigned long flags;
1742	int ret;
1743
1744	nvme_fc_rport_get(rport);
1745
1746	/ validate there's a routine to transmit a response /
1747	if (!lport->ops->xmt_ls_rsp) {
1748	dev_info(lport->dev,
1749	"RCV %s LS failed: no LLDD xmt_ls_rsp\n",
1750	(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
1751	nvmefc_ls_names[w0->ls_cmd] : "");
1752	ret = -EINVAL;
1753	goto out_put;
1754	}
1755
1756	if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) {
1757	dev_info(lport->dev,
1758	"RCV %s LS failed: payload too large\n",
1759	(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
1760	nvmefc_ls_names[w0->ls_cmd] : "");
1761	ret = -E2BIG;
1762	goto out_put;
1763	}
1764
1765	lsop = kzalloc(size: sizeof(*lsop), GFP_KERNEL);
1766	if (!lsop) {
1767	nvme_fc_rcv_ls_req_err_msg(lport, w0);
1768	ret = -ENOMEM;
1769	goto out_put;
1770	}
1771
1772	lsop->rqstbuf = kzalloc(size: sizeof(*lsop->rqstbuf), GFP_KERNEL);
1773	lsop->rspbuf = kzalloc(size: sizeof(*lsop->rspbuf), GFP_KERNEL);
1774	if (!lsop->rqstbuf \|\| !lsop->rspbuf) {
1775	nvme_fc_rcv_ls_req_err_msg(lport, w0);
1776	ret = -ENOMEM;
1777	goto out_free;
1778	}
1779
1780	lsop->rspdma = fc_dma_map_single(dev: lport->dev, ptr: lsop->rspbuf,
1781	size: sizeof(*lsop->rspbuf),
1782	dir: DMA_TO_DEVICE);
1783	if (fc_dma_mapping_error(dev: lport->dev, dma_addr: lsop->rspdma)) {
1784	dev_info(lport->dev,
1785	"RCV %s LS failed: DMA mapping failure\n",
1786	(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
1787	nvmefc_ls_names[w0->ls_cmd] : "");
1788	ret = -EFAULT;
1789	goto out_free;
1790	}
1791
1792	lsop->rport = rport;
1793	lsop->lsrsp = lsrsp;
1794
1795	memcpy(lsop->rqstbuf, lsreqbuf, lsreqbuf_len);
1796	lsop->rqstdatalen = lsreqbuf_len;
1797
1798	spin_lock_irqsave(&rport->lock, flags);
1799	if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) {
1800	spin_unlock_irqrestore(lock: &rport->lock, flags);
1801	ret = -ENOTCONN;
1802	goto out_unmap;
1803	}
1804	list_add_tail(new: &lsop->lsrcv_list, head: &rport->ls_rcv_list);
1805	spin_unlock_irqrestore(lock: &rport->lock, flags);
1806
1807	schedule_work(work: &rport->lsrcv_work);
1808
1809	return `0`;
1810
1811	out_unmap:
1812	fc_dma_unmap_single(dev: lport->dev, addr: lsop->rspdma,
1813	size: sizeof(*lsop->rspbuf), dir: DMA_TO_DEVICE);
1814	out_free:
1815	kfree(objp: lsop->rspbuf);
1816	kfree(objp: lsop->rqstbuf);
1817	kfree(objp: lsop);
1818	out_put:
1819	nvme_fc_rport_put(rport);
1820	return ret;
1821	}
1822	EXPORT_SYMBOL_GPL(nvme_fc_rcv_ls_req);
1823
1824
1825	/ ********************* NVME Ctrl Routines ************************** /
1826
1827	static void
1828	__nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl,
1829	struct nvme_fc_fcp_op *op)
1830	{
1831	fc_dma_unmap_single(dev: ctrl->lport->dev, addr: op->fcp_req.rspdma,
1832	size: sizeof(op->rsp_iu), dir: DMA_FROM_DEVICE);
1833	fc_dma_unmap_single(dev: ctrl->lport->dev, addr: op->fcp_req.cmddma,
1834	size: sizeof(op->cmd_iu), dir: DMA_TO_DEVICE);
1835
1836	atomic_set(v: &op->state, i: FCPOP_STATE_UNINIT);
1837	}
1838
1839	static void
1840	nvme_fc_exit_request(struct blk_mq_tag_set set, struct* request *rq,
1841	unsigned int hctx_idx)
1842	{
1843	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
1844
1845	return __nvme_fc_exit_request(ctrl: to_fc_ctrl(ctrl: set->driver_data), op);
1846	}
1847
1848	static int
1849	__nvme_fc_abort_op(struct nvme_fc_ctrl ctrl, struct* nvme_fc_fcp_op *op)
1850	{
1851	unsigned long flags;
1852	int opstate;
1853
1854	spin_lock_irqsave(&ctrl->lock, flags);
1855	opstate = atomic_xchg(v: &op->state, new: FCPOP_STATE_ABORTED);
1856	if (opstate != FCPOP_STATE_ACTIVE)
1857	atomic_set(v: &op->state, i: opstate);
1858	else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
1859	op->flags \|= FCOP_FLAGS_TERMIO;
1860	ctrl->iocnt++;
1861	}
1862	spin_unlock_irqrestore(lock: &ctrl->lock, flags);
1863
1864	if (opstate != FCPOP_STATE_ACTIVE)
1865	return -ECANCELED;
1866
1867	ctrl->lport->ops->fcp_abort(&ctrl->lport->localport,
1868	&ctrl->rport->remoteport,
1869	op->queue->lldd_handle,
1870	&op->fcp_req);
1871
1872	return `0`;
1873	}
1874
1875	static void
1876	nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
1877	{
1878	struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops;
1879	int i;
1880
1881	/ ensure we've initialized the ops once /
1882	if (!(aen_op->flags & FCOP_FLAGS_AEN))
1883	return;
1884
1885	for (i = `0`; i < NVME_NR_AEN_COMMANDS; i++, aen_op++)
1886	__nvme_fc_abort_op(ctrl, op: aen_op);
1887	}
1888
1889	static inline void
1890	__nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
1891	struct nvme_fc_fcp_op op, int* opstate)
1892	{
1893	unsigned long flags;
1894
1895	if (opstate == FCPOP_STATE_ABORTED) {
1896	spin_lock_irqsave(&ctrl->lock, flags);
1897	if (test_bit(FCCTRL_TERMIO, &ctrl->flags) &&
1898	op->flags & FCOP_FLAGS_TERMIO) {
1899	if (!--ctrl->iocnt)
1900	wake_up(&ctrl->ioabort_wait);
1901	}
1902	spin_unlock_irqrestore(lock: &ctrl->lock, flags);
1903	}
1904	}
1905
1906	static void
1907	nvme_fc_ctrl_ioerr_work(struct work_struct *work)
1908	{
1909	struct nvme_fc_ctrl *ctrl =
1910	container_of(work, struct nvme_fc_ctrl, ioerr_work);
1911
1912	nvme_fc_error_recovery(ctrl, errmsg: "transport detected io error");
1913	}
1914
1915	/*
1916	* nvme_fc_io_getuuid - Routine called to get the appid field
1917	* associated with request by the lldd
1918	* @req:IO request from nvme fc to driver
1919	* Returns: UUID if there is an appid associated with VM or
1920	* NULL if the user/libvirt has not set the appid to VM
1921	*/
1922	char nvme_fc_io_getuuid(struct* nvmefc_fcp_req *req)
1923	{
1924	struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(fcpreq: req);
1925	struct request *rq = op->rq;
1926
1927	if (!IS_ENABLED(CONFIG_BLK_CGROUP_FC_APPID) \|\| !rq \|\| !rq->bio)
1928	return NULL;
1929	return blkcg_get_fc_appid(bio: rq->bio);
1930	}
1931	EXPORT_SYMBOL_GPL(nvme_fc_io_getuuid);
1932
1933	static void
1934	nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
1935	{
1936	struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(fcpreq: req);
1937	struct request *rq = op->rq;
1938	struct nvmefc_fcp_req *freq = &op->fcp_req;
1939	struct nvme_fc_ctrl *ctrl = op->ctrl;
1940	struct nvme_fc_queue *queue = op->queue;
1941	struct nvme_completion *cqe = &op->rsp_iu.cqe;
1942	struct nvme_command *sqe = &op->cmd_iu.sqe;
1943	__le16 status = cpu_to_le16(NVME_SC_SUCCESS << `1`);
1944	union nvme_result result;
1945	bool terminate_assoc = true;
1946	int opstate;
1947
1948	/*
1949	* WARNING:
1950	* The current linux implementation of a nvme controller
1951	* allocates a single tag set for all io queues and sizes
1952	* the io queues to fully hold all possible tags. Thus, the
1953	* implementation does not reference or care about the sqhd
1954	* value as it never needs to use the sqhd/sqtail pointers
1955	* for submission pacing.
1956	*
1957	* This affects the FC-NVME implementation in two ways:
1958	* 1) As the value doesn't matter, we don't need to waste
1959	* cycles extracting it from ERSPs and stamping it in the
1960	* cases where the transport fabricates CQEs on successful
1961	* completions.
1962	* 2) The FC-NVME implementation requires that delivery of
1963	* ERSP completions are to go back to the nvme layer in order
1964	* relative to the rsn, such that the sqhd value will always
1965	* be "in order" for the nvme layer. As the nvme layer in
1966	* linux doesn't care about sqhd, there's no need to return
1967	* them in order.
1968	*
1969	* Additionally:
1970	* As the core nvme layer in linux currently does not look at
1971	* every field in the cqe - in cases where the FC transport must
1972	* fabricate a CQE, the following fields will not be set as they
1973	* are not referenced:
1974	* cqe.sqid, cqe.sqhd, cqe.command_id
1975	*
1976	* Failure or error of an individual i/o, in a transport
1977	* detected fashion unrelated to the nvme completion status,
1978	* potentially cause the initiator and target sides to get out
1979	* of sync on SQ head/tail (aka outstanding io count allowed).
1980	* Per FC-NVME spec, failure of an individual command requires
1981	* the connection to be terminated, which in turn requires the
1982	* association to be terminated.
1983	*/
1984
1985	opstate = atomic_xchg(v: &op->state, new: FCPOP_STATE_COMPLETE);
1986
1987	fc_dma_sync_single_for_cpu(dev: ctrl->lport->dev, addr: op->fcp_req.rspdma,
1988	size: sizeof(op->rsp_iu), dir: DMA_FROM_DEVICE);
1989
1990	if (opstate == FCPOP_STATE_ABORTED)
1991	status = cpu_to_le16(NVME_SC_HOST_ABORTED_CMD << `1`);
1992	else if (freq->status) {
1993	status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << `1`);
1994	dev_info(ctrl->ctrl.device,
1995	"NVME-FC{%d}: io failed due to lldd error %d\n",
1996	ctrl->cnum, freq->status);
1997	}
1998
1999	/*
2000	* For the linux implementation, if we have an unsuccesful
2001	* status, they blk-mq layer can typically be called with the
2002	* non-zero status and the content of the cqe isn't important.
2003	*/
2004	if (status)
2005	goto done;
2006
2007	/*
2008	* command completed successfully relative to the wire
2009	* protocol. However, validate anything received and
2010	* extract the status and result from the cqe (create it
2011	* where necessary).
2012	*/
2013
2014	switch (freq->rcv_rsplen) {
2015
2016	case `0`:
2017	case NVME_FC_SIZEOF_ZEROS_RSP:
2018	/*
2019	* No response payload or 12 bytes of payload (which
2020	* should all be zeros) are considered successful and
2021	* no payload in the CQE by the transport.
2022	*/
2023	if (freq->transferred_length !=
2024	be32_to_cpu(op->cmd_iu.data_len)) {
2025	status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << `1`);
2026	dev_info(ctrl->ctrl.device,
2027	"NVME-FC{%d}: io failed due to bad transfer "
2028	"length: %d vs expected %d\n",
2029	ctrl->cnum, freq->transferred_length,
2030	be32_to_cpu(op->cmd_iu.data_len));
2031	goto done;
2032	}
2033	result.u64 = `0`;
2034	break;
2035
2036	case sizeof(struct nvme_fc_ersp_iu):
2037	/*
2038	* The ERSP IU contains a full completion with CQE.
2039	* Validate ERSP IU and look at cqe.
2040	*/
2041	if (unlikely(be16_to_cpu(op->rsp_iu.iu_len) !=
2042	(freq->rcv_rsplen / `4`) \|\|
2043	be32_to_cpu(op->rsp_iu.xfrd_len) !=
2044	freq->transferred_length \|\|
2045	op->rsp_iu.ersp_result \|\|
2046	sqe->common.command_id != cqe->command_id)) {
2047	status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << `1`);
2048	dev_info(ctrl->ctrl.device,
2049	"NVME-FC{%d}: io failed due to bad NVMe_ERSP: "
2050	"iu len %d, xfr len %d vs %d, status code "
2051	"%d, cmdid %d vs %d\n",
2052	ctrl->cnum, be16_to_cpu(op->rsp_iu.iu_len),
2053	be32_to_cpu(op->rsp_iu.xfrd_len),
2054	freq->transferred_length,
2055	op->rsp_iu.ersp_result,
2056	sqe->common.command_id,
2057	cqe->command_id);
2058	goto done;
2059	}
2060	result = cqe->result;
2061	status = cqe->status;
2062	break;
2063
2064	default:
2065	status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << `1`);
2066	dev_info(ctrl->ctrl.device,
2067	"NVME-FC{%d}: io failed due to odd NVMe_xRSP iu "
2068	"len %d\n",
2069	ctrl->cnum, freq->rcv_rsplen);
2070	goto done;
2071	}
2072
2073	terminate_assoc = false;
2074
2075	done:
2076	if (op->flags & FCOP_FLAGS_AEN) {
2077	nvme_complete_async_event(ctrl: &queue->ctrl->ctrl, status, res: &result);
2078	__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
2079	atomic_set(v: &op->state, i: FCPOP_STATE_IDLE);
2080	op->flags = FCOP_FLAGS_AEN; / clear other flags /
2081	nvme_fc_ctrl_put(ctrl);
2082	goto check_error;
2083	}
2084
2085	__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
2086	if (!nvme_try_complete_req(req: rq, status, result))
2087	nvme_fc_complete_rq(rq);
2088
2089	check_error:
2090	if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING)
2091	queue_work(wq: nvme_reset_wq, work: &ctrl->ioerr_work);
2092	}
2093
2094	static int
2095	__nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
2096	struct nvme_fc_queue queue, struct* nvme_fc_fcp_op *op,
2097	struct request *rq, u32 rqno)
2098	{
2099	struct nvme_fcp_op_w_sgl *op_w_sgl =
2100	container_of(op, typeof(*op_w_sgl), op);
2101	struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
2102	int ret = `0`;
2103
2104	memset(op, `0`, sizeof(*op));
2105	op->fcp_req.cmdaddr = &op->cmd_iu;
2106	op->fcp_req.cmdlen = sizeof(op->cmd_iu);
2107	op->fcp_req.rspaddr = &op->rsp_iu;
2108	op->fcp_req.rsplen = sizeof(op->rsp_iu);
2109	op->fcp_req.done = nvme_fc_fcpio_done;
2110	op->ctrl = ctrl;
2111	op->queue = queue;
2112	op->rq = rq;
2113	op->rqno = rqno;
2114
2115	cmdiu->format_id = NVME_CMD_FORMAT_ID;
2116	cmdiu->fc_id = NVME_CMD_FC_ID;
2117	cmdiu->iu_len = cpu_to_be16(sizeof(cmdiu) / sizeof*(u32));
2118	if (queue->qnum)
2119	cmdiu->rsv_cat = fccmnd_set_cat_css(rsv_cat: `0`,
2120	css: (NVME_CC_CSS_NVM >> NVME_CC_CSS_SHIFT));
2121	else
2122	cmdiu->rsv_cat = fccmnd_set_cat_admin(rsv_cat: `0`);
2123
2124	op->fcp_req.cmddma = fc_dma_map_single(dev: ctrl->lport->dev,
2125	ptr: &op->cmd_iu, size: sizeof(op->cmd_iu), dir: DMA_TO_DEVICE);
2126	if (fc_dma_mapping_error(dev: ctrl->lport->dev, dma_addr: op->fcp_req.cmddma)) {
2127	dev_err(ctrl->dev,
2128	"FCP Op failed - cmdiu dma mapping failed.\n");
2129	ret = -EFAULT;
2130	goto out_on_error;
2131	}
2132
2133	op->fcp_req.rspdma = fc_dma_map_single(dev: ctrl->lport->dev,
2134	ptr: &op->rsp_iu, size: sizeof(op->rsp_iu),
2135	dir: DMA_FROM_DEVICE);
2136	if (fc_dma_mapping_error(dev: ctrl->lport->dev, dma_addr: op->fcp_req.rspdma)) {
2137	dev_err(ctrl->dev,
2138	"FCP Op failed - rspiu dma mapping failed.\n");
2139	ret = -EFAULT;
2140	}
2141
2142	atomic_set(v: &op->state, i: FCPOP_STATE_IDLE);
2143	out_on_error:
2144	return ret;
2145	}
2146
2147	static int
2148	nvme_fc_init_request(struct blk_mq_tag_set set, struct* request *rq,
2149	unsigned int hctx_idx, unsigned int numa_node)
2150	{
2151	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(ctrl: set->driver_data);
2152	struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq);
2153	int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + `1` : `0`;
2154	struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
2155	int res;
2156
2157	res = __nvme_fc_init_request(ctrl, queue, op: &op->op, rq, rqno: queue->rqcnt++);
2158	if (res)
2159	return res;
2160	op->op.fcp_req.first_sgl = op->sgl;
2161	op->op.fcp_req.private = &op->priv[`0`];
2162	nvme_req(req: rq)->ctrl = &ctrl->ctrl;
2163	nvme_req(req: rq)->cmd = &op->op.cmd_iu.sqe;
2164	return res;
2165	}
2166
2167	static int
2168	nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl)
2169	{
2170	struct nvme_fc_fcp_op *aen_op;
2171	struct nvme_fc_cmd_iu *cmdiu;
2172	struct nvme_command *sqe;
2173	void *private = NULL;
2174	int i, ret;
2175
2176	aen_op = ctrl->aen_ops;
2177	for (i = `0`; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
2178	if (ctrl->lport->ops->fcprqst_priv_sz) {
2179	private = kzalloc(size: ctrl->lport->ops->fcprqst_priv_sz,
2180	GFP_KERNEL);
2181	if (!private)
2182	return -ENOMEM;
2183	}
2184
2185	cmdiu = &aen_op->cmd_iu;
2186	sqe = &cmdiu->sqe;
2187	ret = __nvme_fc_init_request(ctrl, queue: &ctrl->queues[`0`],
2188	op: aen_op, rq: (struct request *)NULL,
2189	rqno: (NVME_AQ_BLK_MQ_DEPTH + i));
2190	if (ret) {
2191	kfree(objp: private);
2192	return ret;
2193	}
2194
2195	aen_op->flags = FCOP_FLAGS_AEN;
2196	aen_op->fcp_req.private = private;
2197
2198	memset(sqe, `0`, sizeof(*sqe));
2199	sqe->common.opcode = nvme_admin_async_event;
2200	/ Note: core layer may overwrite the sqe.command_id value /
2201	sqe->common.command_id = NVME_AQ_BLK_MQ_DEPTH + i;
2202	}
2203	return `0`;
2204	}
2205
2206	static void
2207	nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl)
2208	{
2209	struct nvme_fc_fcp_op *aen_op;
2210	int i;
2211
2212	cancel_work_sync(work: &ctrl->ctrl.async_event_work);
2213	aen_op = ctrl->aen_ops;
2214	for (i = `0`; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
2215	__nvme_fc_exit_request(ctrl, op: aen_op);
2216
2217	kfree(objp: aen_op->fcp_req.private);
2218	aen_op->fcp_req.private = NULL;
2219	}
2220	}
2221
2222	static inline int
2223	__nvme_fc_init_hctx(struct blk_mq_hw_ctx hctx, void* data, unsigned* int qidx)
2224	{
2225	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(ctrl: data);
2226	struct nvme_fc_queue *queue = &ctrl->queues[qidx];
2227
2228	hctx->driver_data = queue;
2229	queue->hctx = hctx;
2230	return `0`;
2231	}
2232
2233	static int
2234	nvme_fc_init_hctx(struct blk_mq_hw_ctx hctx, void* data, unsigned* int hctx_idx)
2235	{
2236	return __nvme_fc_init_hctx(hctx, data, qidx: hctx_idx + `1`);
2237	}
2238
2239	static int
2240	nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx hctx, void* *data,
2241	unsigned int hctx_idx)
2242	{
2243	return __nvme_fc_init_hctx(hctx, data, qidx: hctx_idx);
2244	}
2245
2246	static void
2247	nvme_fc_init_queue(struct nvme_fc_ctrl ctrl, int* idx)
2248	{
2249	struct nvme_fc_queue *queue;
2250
2251	queue = &ctrl->queues[idx];
2252	memset(queue, `0`, sizeof(*queue));
2253	queue->ctrl = ctrl;
2254	queue->qnum = idx;
2255	atomic_set(v: &queue->csn, i: `0`);
2256	queue->dev = ctrl->dev;
2257
2258	if (idx > `0`)
2259	queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * `16`;
2260	else
2261	queue->cmnd_capsule_len = sizeof(struct nvme_command);
2262
2263	/*
2264	* Considered whether we should allocate buffers for all SQEs
2265	* and CQEs and dma map them - mapping their respective entries
2266	* into the request structures (kernel vm addr and dma address)
2267	* thus the driver could use the buffers/mappings directly.
2268	* It only makes sense if the LLDD would use them for its
2269	* messaging api. It's very unlikely most adapter api's would use
2270	* a native NVME sqe/cqe. More reasonable if FC-NVME IU payload
2271	* structures were used instead.
2272	*/
2273	}
2274
2275	/*
2276	* This routine terminates a queue at the transport level.
2277	* The transport has already ensured that all outstanding ios on
2278	* the queue have been terminated.
2279	* The transport will send a Disconnect LS request to terminate
2280	* the queue's connection. Termination of the admin queue will also
2281	* terminate the association at the target.
2282	*/
2283	static void
2284	nvme_fc_free_queue(struct nvme_fc_queue *queue)
2285	{
2286	if (!test_and_clear_bit(nr: NVME_FC_Q_CONNECTED, addr: &queue->flags))
2287	return;
2288
2289	clear_bit(nr: NVME_FC_Q_LIVE, addr: &queue->flags);
2290	/*
2291	* Current implementation never disconnects a single queue.
2292	* It always terminates a whole association. So there is never
2293	* a disconnect(queue) LS sent to the target.
2294	*/
2295
2296	queue->connection_id = `0`;
2297	atomic_set(v: &queue->csn, i: `0`);
2298	}
2299
2300	static void
2301	__nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl,
2302	struct nvme_fc_queue queue, unsigned* int qidx)
2303	{
2304	if (ctrl->lport->ops->delete_queue)
2305	ctrl->lport->ops->delete_queue(&ctrl->lport->localport, qidx,
2306	queue->lldd_handle);
2307	queue->lldd_handle = NULL;
2308	}
2309
2310	static void
2311	nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl)
2312	{
2313	int i;
2314
2315	for (i = `1`; i < ctrl->ctrl.queue_count; i++)
2316	nvme_fc_free_queue(queue: &ctrl->queues[i]);
2317	}
2318
2319	static int
2320	__nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl,
2321	struct nvme_fc_queue queue, unsigned* int qidx, u16 qsize)
2322	{
2323	int ret = `0`;
2324
2325	queue->lldd_handle = NULL;
2326	if (ctrl->lport->ops->create_queue)
2327	ret = ctrl->lport->ops->create_queue(&ctrl->lport->localport,
2328	qidx, qsize, &queue->lldd_handle);
2329
2330	return ret;
2331	}
2332
2333	static void
2334	nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl)
2335	{
2336	struct nvme_fc_queue *queue = &ctrl->queues[ctrl->ctrl.queue_count - `1`];
2337	int i;
2338
2339	for (i = ctrl->ctrl.queue_count - `1`; i >= `1`; i--, queue--)
2340	__nvme_fc_delete_hw_queue(ctrl, queue, qidx: i);
2341	}
2342
2343	static int
2344	nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
2345	{
2346	struct nvme_fc_queue *queue = &ctrl->queues[`1`];
2347	int i, ret;
2348
2349	for (i = `1`; i < ctrl->ctrl.queue_count; i++, queue++) {
2350	ret = __nvme_fc_create_hw_queue(ctrl, queue, qidx: i, qsize);
2351	if (ret)
2352	goto delete_queues;
2353	}
2354
2355	return `0`;
2356
2357	delete_queues:
2358	for (; i > `0`; i--)
2359	__nvme_fc_delete_hw_queue(ctrl, queue: &ctrl->queues[i], qidx: i);
2360	return ret;
2361	}
2362
2363	static int
2364	nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
2365	{
2366	int i, ret = `0`;
2367
2368	for (i = `1`; i < ctrl->ctrl.queue_count; i++) {
2369	ret = nvme_fc_connect_queue(ctrl, queue: &ctrl->queues[i], qsize,
2370	ersp_ratio: (qsize / `5`));
2371	if (ret)
2372	break;
2373	ret = nvmf_connect_io_queue(ctrl: &ctrl->ctrl, qid: i);
2374	if (ret)
2375	break;
2376
2377	set_bit(nr: NVME_FC_Q_LIVE, addr: &ctrl->queues[i].flags);
2378	}
2379
2380	return ret;
2381	}
2382
2383	static void
2384	nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl)
2385	{
2386	int i;
2387
2388	for (i = `1`; i < ctrl->ctrl.queue_count; i++)
2389	nvme_fc_init_queue(ctrl, idx: i);
2390	}
2391
2392	static void
2393	nvme_fc_ctrl_free(struct kref *ref)
2394	{
2395	struct nvme_fc_ctrl *ctrl =
2396	container_of(ref, struct nvme_fc_ctrl, ref);
2397	unsigned long flags;
2398
2399	if (ctrl->ctrl.tagset)
2400	nvme_remove_io_tag_set(ctrl: &ctrl->ctrl);
2401
2402	/ remove from rport list /
2403	spin_lock_irqsave(&ctrl->rport->lock, flags);
2404	list_del(entry: &ctrl->ctrl_list);
2405	spin_unlock_irqrestore(lock: &ctrl->rport->lock, flags);
2406
2407	nvme_unquiesce_admin_queue(ctrl: &ctrl->ctrl);
2408	nvme_remove_admin_tag_set(ctrl: &ctrl->ctrl);
2409
2410	kfree(objp: ctrl->queues);
2411
2412	put_device(dev: ctrl->dev);
2413	nvme_fc_rport_put(rport: ctrl->rport);
2414
2415	ida_free(&nvme_fc_ctrl_cnt, id: ctrl->cnum);
2416	if (ctrl->ctrl.opts)
2417	nvmf_free_options(opts: ctrl->ctrl.opts);
2418	kfree(objp: ctrl);
2419	}
2420
2421	static void
2422	nvme_fc_ctrl_put(struct nvme_fc_ctrl *ctrl)
2423	{
2424	kref_put(kref: &ctrl->ref, release: nvme_fc_ctrl_free);
2425	}
2426
2427	static int
2428	nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl)
2429	{
2430	return kref_get_unless_zero(kref: &ctrl->ref);
2431	}
2432
2433	/*
2434	* All accesses from nvme core layer done - can now free the
2435	* controller. Called after last nvme_put_ctrl() call
2436	*/
2437	static void
2438	nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
2439	{
2440	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(ctrl: nctrl);
2441
2442	WARN_ON(nctrl != &ctrl->ctrl);
2443
2444	nvme_fc_ctrl_put(ctrl);
2445	}
2446
2447	/*
2448	* This routine is used by the transport when it needs to find active
2449	* io on a queue that is to be terminated. The transport uses
2450	* blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
2451	* this routine to kill them on a 1 by 1 basis.
2452	*
2453	* As FC allocates FC exchange for each io, the transport must contact
2454	* the LLDD to terminate the exchange, thus releasing the FC exchange.
2455	* After terminating the exchange the LLDD will call the transport's
2456	* normal io done path for the request, but it will have an aborted
2457	* status. The done path will return the io request back to the block
2458	* layer with an error status.
2459	*/
2460	static bool nvme_fc_terminate_exchange(struct request req, void* *data)
2461	{
2462	struct nvme_ctrl *nctrl = data;
2463	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(ctrl: nctrl);
2464	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq: req);
2465
2466	op->nreq.flags \|= NVME_REQ_CANCELLED;
2467	__nvme_fc_abort_op(ctrl, op);
2468	return true;
2469	}
2470
2471	/*
2472	* This routine runs through all outstanding commands on the association
2473	* and aborts them. This routine is typically be called by the
2474	* delete_association routine. It is also called due to an error during
2475	* reconnect. In that scenario, it is most likely a command that initializes
2476	* the controller, including fabric Connect commands on io queues, that
2477	* may have timed out or failed thus the io must be killed for the connect
2478	* thread to see the error.
2479	*/
2480	static void
2481	__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
2482	{
2483	int q;
2484
2485	/*
2486	* if aborting io, the queues are no longer good, mark them
2487	* all as not live.
2488	*/
2489	if (ctrl->ctrl.queue_count > `1`) {
2490	for (q = `1`; q < ctrl->ctrl.queue_count; q++)
2491	clear_bit(nr: NVME_FC_Q_LIVE, addr: &ctrl->queues[q].flags);
2492	}
2493	clear_bit(nr: NVME_FC_Q_LIVE, addr: &ctrl->queues[`0`].flags);
2494
2495	/*
2496	* If io queues are present, stop them and terminate all outstanding
2497	* ios on them. As FC allocates FC exchange for each io, the
2498	* transport must contact the LLDD to terminate the exchange,
2499	* thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
2500	* to tell us what io's are busy and invoke a transport routine
2501	* to kill them with the LLDD. After terminating the exchange
2502	* the LLDD will call the transport's normal io done path, but it
2503	* will have an aborted status. The done path will return the
2504	* io requests back to the block layer as part of normal completions
2505	* (but with error status).
2506	*/
2507	if (ctrl->ctrl.queue_count > `1`) {
2508	nvme_quiesce_io_queues(ctrl: &ctrl->ctrl);
2509	nvme_sync_io_queues(ctrl: &ctrl->ctrl);
2510	blk_mq_tagset_busy_iter(tagset: &ctrl->tag_set,
2511	fn: nvme_fc_terminate_exchange, priv: &ctrl->ctrl);
2512	blk_mq_tagset_wait_completed_request(tagset: &ctrl->tag_set);
2513	if (start_queues)
2514	nvme_unquiesce_io_queues(ctrl: &ctrl->ctrl);
2515	}
2516
2517	/*
2518	* Other transports, which don't have link-level contexts bound
2519	* to sqe's, would try to gracefully shutdown the controller by
2520	* writing the registers for shutdown and polling (call
2521	* nvme_disable_ctrl()). Given a bunch of i/o was potentially
2522	* just aborted and we will wait on those contexts, and given
2523	* there was no indication of how live the controlelr is on the
2524	* link, don't send more io to create more contexts for the
2525	* shutdown. Let the controller fail via keepalive failure if
2526	* its still present.
2527	*/
2528
2529	/*
2530	* clean up the admin queue. Same thing as above.
2531	*/
2532	nvme_quiesce_admin_queue(ctrl: &ctrl->ctrl);
2533	blk_sync_queue(q: ctrl->ctrl.admin_q);
2534	blk_mq_tagset_busy_iter(tagset: &ctrl->admin_tag_set,
2535	fn: nvme_fc_terminate_exchange, priv: &ctrl->ctrl);
2536	blk_mq_tagset_wait_completed_request(tagset: &ctrl->admin_tag_set);
2537	if (start_queues)
2538	nvme_unquiesce_admin_queue(ctrl: &ctrl->ctrl);
2539	}
2540
2541	static void
2542	nvme_fc_error_recovery(struct nvme_fc_ctrl ctrl, char* *errmsg)
2543	{
2544	/*
2545	* if an error (io timeout, etc) while (re)connecting, the remote
2546	* port requested terminating of the association (disconnect_ls)
2547	* or an error (timeout or abort) occurred on an io while creating
2548	* the controller. Abort any ios on the association and let the
2549	* create_association error path resolve things.
2550	*/
2551	enum nvme_ctrl_state state;
2552	unsigned long flags;
2553
2554	spin_lock_irqsave(&ctrl->lock, flags);
2555	state = ctrl->ctrl.state;
2556	if (state == NVME_CTRL_CONNECTING) {
2557	set_bit(ASSOC_FAILED, addr: &ctrl->flags);
2558	spin_unlock_irqrestore(lock: &ctrl->lock, flags);
2559	__nvme_fc_abort_outstanding_ios(ctrl, start_queues: true);
2560	dev_warn(ctrl->ctrl.device,
2561	"NVME-FC{%d}: transport error during (re)connect\n",
2562	ctrl->cnum);
2563	return;
2564	}
2565	spin_unlock_irqrestore(lock: &ctrl->lock, flags);
2566
2567	/ Otherwise, only proceed if in LIVE state - e.g. on first error /
2568	if (state != NVME_CTRL_LIVE)
2569	return;
2570
2571	dev_warn(ctrl->ctrl.device,
2572	"NVME-FC{%d}: transport association event: %s\n",
2573	ctrl->cnum, errmsg);
2574	dev_warn(ctrl->ctrl.device,
2575	"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
2576
2577	nvme_reset_ctrl(ctrl: &ctrl->ctrl);
2578	}
2579
2580	static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq)
2581	{
2582	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
2583	struct nvme_fc_ctrl *ctrl = op->ctrl;
2584	struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
2585	struct nvme_command *sqe = &cmdiu->sqe;
2586
2587	/*
2588	* Attempt to abort the offending command. Command completion
2589	* will detect the aborted io and will fail the connection.
2590	*/
2591	dev_info(ctrl->ctrl.device,
2592	"NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: "
2593	"x%08x/x%08x\n",
2594	ctrl->cnum, op->queue->qnum, sqe->common.opcode,
2595	sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11);
2596	if (__nvme_fc_abort_op(ctrl, op))
2597	nvme_fc_error_recovery(ctrl, errmsg: "io timeout abort failed");
2598
2599	/*
2600	* the io abort has been initiated. Have the reset timer
2601	* restarted and the abort completion will complete the io
2602	* shortly. Avoids a synchronous wait while the abort finishes.
2603	*/
2604	return BLK_EH_RESET_TIMER;
2605	}
2606
2607	static int
2608	nvme_fc_map_data(struct nvme_fc_ctrl ctrl, struct* request *rq,
2609	struct nvme_fc_fcp_op *op)
2610	{
2611	struct nvmefc_fcp_req *freq = &op->fcp_req;
2612	int ret;
2613
2614	freq->sg_cnt = `0`;
2615
2616	if (!blk_rq_nr_phys_segments(rq))
2617	return `0`;
2618
2619	freq->sg_table.sgl = freq->first_sgl;
2620	ret = sg_alloc_table_chained(table: &freq->sg_table,
2621	nents: blk_rq_nr_phys_segments(rq), first_chunk: freq->sg_table.sgl,
2622	NVME_INLINE_SG_CNT);
2623	if (ret)
2624	return -ENOMEM;
2625
2626	op->nents = blk_rq_map_sg(q: rq->q, rq, sglist: freq->sg_table.sgl);
2627	WARN_ON(op->nents > blk_rq_nr_phys_segments(rq));
2628	freq->sg_cnt = fc_dma_map_sg(dev: ctrl->lport->dev, sg: freq->sg_table.sgl,
2629	nents: op->nents, rq_dma_dir(rq));
2630	if (unlikely(freq->sg_cnt <= `0`)) {
2631	sg_free_table_chained(table: &freq->sg_table, NVME_INLINE_SG_CNT);
2632	freq->sg_cnt = `0`;
2633	return -EFAULT;
2634	}
2635
2636	/*
2637	* TODO: blk_integrity_rq(rq) for DIF
2638	*/
2639	return `0`;
2640	}
2641
2642	static void
2643	nvme_fc_unmap_data(struct nvme_fc_ctrl ctrl, struct* request *rq,
2644	struct nvme_fc_fcp_op *op)
2645	{
2646	struct nvmefc_fcp_req *freq = &op->fcp_req;
2647
2648	if (!freq->sg_cnt)
2649	return;
2650
2651	fc_dma_unmap_sg(dev: ctrl->lport->dev, sg: freq->sg_table.sgl, nents: op->nents,
2652	rq_dma_dir(rq));
2653
2654	sg_free_table_chained(table: &freq->sg_table, NVME_INLINE_SG_CNT);
2655
2656	freq->sg_cnt = `0`;
2657	}
2658
2659	/*
2660	* In FC, the queue is a logical thing. At transport connect, the target
2661	* creates its "queue" and returns a handle that is to be given to the
2662	* target whenever it posts something to the corresponding SQ. When an
2663	* SQE is sent on a SQ, FC effectively considers the SQE, or rather the
2664	* command contained within the SQE, an io, and assigns a FC exchange
2665	* to it. The SQE and the associated SQ handle are sent in the initial
2666	* CMD IU sents on the exchange. All transfers relative to the io occur
2667	* as part of the exchange. The CQE is the last thing for the io,
2668	* which is transferred (explicitly or implicitly) with the RSP IU
2669	* sent on the exchange. After the CQE is received, the FC exchange is
2670	* terminaed and the Exchange may be used on a different io.
2671	*
2672	* The transport to LLDD api has the transport making a request for a
2673	* new fcp io request to the LLDD. The LLDD then allocates a FC exchange
2674	* resource and transfers the command. The LLDD will then process all
2675	* steps to complete the io. Upon completion, the transport done routine
2676	* is called.
2677	*
2678	* So - while the operation is outstanding to the LLDD, there is a link
2679	* level FC exchange resource that is also outstanding. This must be
2680	* considered in all cleanup operations.
2681	*/
2682	static blk_status_t
2683	nvme_fc_start_fcp_op(struct nvme_fc_ctrl ctrl, struct* nvme_fc_queue *queue,
2684	struct nvme_fc_fcp_op *op, u32 data_len,
2685	enum nvmefc_fcp_datadir io_dir)
2686	{
2687	struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
2688	struct nvme_command *sqe = &cmdiu->sqe;
2689	int ret, opstate;
2690
2691	/*
2692	* before attempting to send the io, check to see if we believe
2693	* the target device is present
2694	*/
2695	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
2696	return BLK_STS_RESOURCE;
2697
2698	if (!nvme_fc_ctrl_get(ctrl))
2699	return BLK_STS_IOERR;
2700
2701	/ format the FC-NVME CMD IU and fcp_req /
2702	cmdiu->connection_id = cpu_to_be64(queue->connection_id);
2703	cmdiu->data_len = cpu_to_be32(data_len);
2704	switch (io_dir) {
2705	case NVMEFC_FCP_WRITE:
2706	cmdiu->flags = FCNVME_CMD_FLAGS_WRITE;
2707	break;
2708	case NVMEFC_FCP_READ:
2709	cmdiu->flags = FCNVME_CMD_FLAGS_READ;
2710	break;
2711	case NVMEFC_FCP_NODATA:
2712	cmdiu->flags = `0`;
2713	break;
2714	}
2715	op->fcp_req.payload_length = data_len;
2716	op->fcp_req.io_dir = io_dir;
2717	op->fcp_req.transferred_length = `0`;
2718	op->fcp_req.rcv_rsplen = `0`;
2719	op->fcp_req.status = NVME_SC_SUCCESS;
2720	op->fcp_req.sqid = cpu_to_le16(queue->qnum);
2721
2722	/*
2723	* validate per fabric rules, set fields mandated by fabric spec
2724	* as well as those by FC-NVME spec.
2725	*/
2726	WARN_ON_ONCE(sqe->common.metadata);
2727	sqe->common.flags \|= NVME_CMD_SGL_METABUF;
2728
2729	/*
2730	* format SQE DPTR field per FC-NVME rules:
2731	* type=0x5 Transport SGL Data Block Descriptor
2732	* subtype=0xA Transport-specific value
2733	* address=0
2734	* length=length of the data series
2735	*/
2736	sqe->rw.dptr.sgl.type = (NVME_TRANSPORT_SGL_DATA_DESC << `4`) \|
2737	NVME_SGL_FMT_TRANSPORT_A;
2738	sqe->rw.dptr.sgl.length = cpu_to_le32(data_len);
2739	sqe->rw.dptr.sgl.addr = `0`;
2740
2741	if (!(op->flags & FCOP_FLAGS_AEN)) {
2742	ret = nvme_fc_map_data(ctrl, rq: op->rq, op);
2743	if (ret < `0`) {
2744	nvme_cleanup_cmd(req: op->rq);
2745	nvme_fc_ctrl_put(ctrl);
2746	if (ret == -ENOMEM \|\| ret == -EAGAIN)
2747	return BLK_STS_RESOURCE;
2748	return BLK_STS_IOERR;
2749	}
2750	}
2751
2752	fc_dma_sync_single_for_device(dev: ctrl->lport->dev, addr: op->fcp_req.cmddma,
2753	size: sizeof(op->cmd_iu), dir: DMA_TO_DEVICE);
2754
2755	atomic_set(v: &op->state, i: FCPOP_STATE_ACTIVE);
2756
2757	if (!(op->flags & FCOP_FLAGS_AEN))
2758	nvme_start_request(rq: op->rq);
2759
2760	cmdiu->csn = cpu_to_be32(atomic_inc_return(&queue->csn));
2761	ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport,
2762	&ctrl->rport->remoteport,
2763	queue->lldd_handle, &op->fcp_req);
2764
2765	if (ret) {
2766	/*
2767	* If the lld fails to send the command is there an issue with
2768	* the csn value? If the command that fails is the Connect,
2769	* no - as the connection won't be live. If it is a command
2770	* post-connect, it's possible a gap in csn may be created.
2771	* Does this matter? As Linux initiators don't send fused
2772	* commands, no. The gap would exist, but as there's nothing
2773	* that depends on csn order to be delivered on the target
2774	* side, it shouldn't hurt. It would be difficult for a
2775	* target to even detect the csn gap as it has no idea when the
2776	* cmd with the csn was supposed to arrive.
2777	*/
2778	opstate = atomic_xchg(v: &op->state, new: FCPOP_STATE_COMPLETE);
2779	__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
2780
2781	if (!(op->flags & FCOP_FLAGS_AEN)) {
2782	nvme_fc_unmap_data(ctrl, rq: op->rq, op);
2783	nvme_cleanup_cmd(req: op->rq);
2784	}
2785
2786	nvme_fc_ctrl_put(ctrl);
2787
2788	if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE &&
2789	ret != -EBUSY)
2790	return BLK_STS_IOERR;
2791
2792	return BLK_STS_RESOURCE;
2793	}
2794
2795	return BLK_STS_OK;
2796	}
2797
2798	static blk_status_t
2799	nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
2800	const struct blk_mq_queue_data *bd)
2801	{
2802	struct nvme_ns *ns = hctx->queue->queuedata;
2803	struct nvme_fc_queue *queue = hctx->driver_data;
2804	struct nvme_fc_ctrl *ctrl = queue->ctrl;
2805	struct request *rq = bd->rq;
2806	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
2807	enum nvmefc_fcp_datadir io_dir;
2808	bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags);
2809	u32 data_len;
2810	blk_status_t ret;
2811
2812	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE \|\|
2813	!nvme_check_ready(ctrl: &queue->ctrl->ctrl, rq, queue_live: queue_ready))
2814	return nvme_fail_nonready_command(ctrl: &queue->ctrl->ctrl, req: rq);
2815
2816	ret = nvme_setup_cmd(ns, req: rq);
2817	if (ret)
2818	return ret;
2819
2820	/*
2821	* nvme core doesn't quite treat the rq opaquely. Commands such
2822	* as WRITE ZEROES will return a non-zero rq payload_bytes yet
2823	* there is no actual payload to be transferred.
2824	* To get it right, key data transmission on there being 1 or
2825	* more physical segments in the sg list. If there is no
2826	* physical segments, there is no payload.
2827	*/
2828	if (blk_rq_nr_phys_segments(rq)) {
2829	data_len = blk_rq_payload_bytes(rq);
2830	io_dir = ((rq_data_dir(rq) == WRITE) ?
2831	NVMEFC_FCP_WRITE : NVMEFC_FCP_READ);
2832	} else {
2833	data_len = `0`;
2834	io_dir = NVMEFC_FCP_NODATA;
2835	}
2836
2837
2838	return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir);
2839	}
2840
2841	static void
2842	nvme_fc_submit_async_event(struct nvme_ctrl *arg)
2843	{
2844	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(ctrl: arg);
2845	struct nvme_fc_fcp_op *aen_op;
2846	blk_status_t ret;
2847
2848	if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
2849	return;
2850
2851	aen_op = &ctrl->aen_ops[`0`];
2852
2853	ret = nvme_fc_start_fcp_op(ctrl, queue: aen_op->queue, op: aen_op, data_len: `0`,
2854	io_dir: NVMEFC_FCP_NODATA);
2855	if (ret)
2856	dev_err(ctrl->ctrl.device,
2857	"failed async event work\n");
2858	}
2859
2860	static void
2861	nvme_fc_complete_rq(struct request *rq)
2862	{
2863	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
2864	struct nvme_fc_ctrl *ctrl = op->ctrl;
2865
2866	atomic_set(v: &op->state, i: FCPOP_STATE_IDLE);
2867	op->flags &= ~FCOP_FLAGS_TERMIO;
2868
2869	nvme_fc_unmap_data(ctrl, rq, op);
2870	nvme_complete_rq(req: rq);
2871	nvme_fc_ctrl_put(ctrl);
2872	}
2873
2874	static void nvme_fc_map_queues(struct blk_mq_tag_set *set)
2875	{
2876	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(ctrl: set->driver_data);
2877	int i;
2878
2879	for (i = `0`; i < set->nr_maps; i++) {
2880	struct blk_mq_queue_map *map = &set->map[i];
2881
2882	if (!map->nr_queues) {
2883	WARN_ON(i == HCTX_TYPE_DEFAULT);
2884	continue;
2885	}
2886
2887	/ Call LLDD map queue functionality if defined /
2888	if (ctrl->lport->ops->map_queues)
2889	ctrl->lport->ops->map_queues(&ctrl->lport->localport,
2890	map);
2891	else
2892	blk_mq_map_queues(qmap: map);
2893	}
2894	}
2895
2896	static const struct blk_mq_ops nvme_fc_mq_ops = {
2897	.queue_rq = nvme_fc_queue_rq,
2898	.complete = nvme_fc_complete_rq,
2899	.init_request = nvme_fc_init_request,
2900	.exit_request = nvme_fc_exit_request,
2901	.init_hctx = nvme_fc_init_hctx,
2902	.timeout = nvme_fc_timeout,
2903	.map_queues = nvme_fc_map_queues,
2904	};
2905
2906	static int
2907	nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
2908	{
2909	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
2910	unsigned int nr_io_queues;
2911	int ret;
2912
2913	nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
2914	ctrl->lport->ops->max_hw_queues);
2915	ret = nvme_set_queue_count(ctrl: &ctrl->ctrl, count: &nr_io_queues);
2916	if (ret) {
2917	dev_info(ctrl->ctrl.device,
2918	"set_queue_count failed: %d\n", ret);
2919	return ret;
2920	}
2921
2922	ctrl->ctrl.queue_count = nr_io_queues + `1`;
2923	if (!nr_io_queues)
2924	return `0`;
2925
2926	nvme_fc_init_io_queues(ctrl);
2927
2928	ret = nvme_alloc_io_tag_set(ctrl: &ctrl->ctrl, set: &ctrl->tag_set,
2929	ops: &nvme_fc_mq_ops, nr_maps: `1`,
2930	struct_size_t(struct nvme_fcp_op_w_sgl, priv,
2931	ctrl->lport->ops->fcprqst_priv_sz));
2932	if (ret)
2933	return ret;
2934
2935	ret = nvme_fc_create_hw_io_queues(ctrl, qsize: ctrl->ctrl.sqsize + `1`);
2936	if (ret)
2937	goto out_cleanup_tagset;
2938
2939	ret = nvme_fc_connect_io_queues(ctrl, qsize: ctrl->ctrl.sqsize + `1`);
2940	if (ret)
2941	goto out_delete_hw_queues;
2942
2943	ctrl->ioq_live = true;
2944
2945	return `0`;
2946
2947	out_delete_hw_queues:
2948	nvme_fc_delete_hw_io_queues(ctrl);
2949	out_cleanup_tagset:
2950	nvme_remove_io_tag_set(ctrl: &ctrl->ctrl);
2951	nvme_fc_free_io_queues(ctrl);
2952
2953	/ force put free routine to ignore io queues /
2954	ctrl->ctrl.tagset = NULL;
2955
2956	return ret;
2957	}
2958
2959	static int
2960	nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
2961	{
2962	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
2963	u32 prior_ioq_cnt = ctrl->ctrl.queue_count - `1`;
2964	unsigned int nr_io_queues;
2965	int ret;
2966
2967	nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
2968	ctrl->lport->ops->max_hw_queues);
2969	ret = nvme_set_queue_count(ctrl: &ctrl->ctrl, count: &nr_io_queues);
2970	if (ret) {
2971	dev_info(ctrl->ctrl.device,
2972	"set_queue_count failed: %d\n", ret);
2973	return ret;
2974	}
2975
2976	if (!nr_io_queues && prior_ioq_cnt) {
2977	dev_info(ctrl->ctrl.device,
2978	"Fail Reconnect: At least 1 io queue "
2979	"required (was %d)\n", prior_ioq_cnt);
2980	return -ENOSPC;
2981	}
2982
2983	ctrl->ctrl.queue_count = nr_io_queues + `1`;
2984	/ check for io queues existing /
2985	if (ctrl->ctrl.queue_count == `1`)
2986	return `0`;
2987
2988	if (prior_ioq_cnt != nr_io_queues) {
2989	dev_info(ctrl->ctrl.device,
2990	"reconnect: revising io queue count from %d to %d\n",
2991	prior_ioq_cnt, nr_io_queues);
2992	blk_mq_update_nr_hw_queues(set: &ctrl->tag_set, nr_hw_queues: nr_io_queues);
2993	}
2994
2995	ret = nvme_fc_create_hw_io_queues(ctrl, qsize: ctrl->ctrl.sqsize + `1`);
2996	if (ret)
2997	goto out_free_io_queues;
2998
2999	ret = nvme_fc_connect_io_queues(ctrl, qsize: ctrl->ctrl.sqsize + `1`);
3000	if (ret)
3001	goto out_delete_hw_queues;
3002
3003	return `0`;
3004
3005	out_delete_hw_queues:
3006	nvme_fc_delete_hw_io_queues(ctrl);
3007	out_free_io_queues:
3008	nvme_fc_free_io_queues(ctrl);
3009	return ret;
3010	}
3011
3012	static void
3013	nvme_fc_rport_active_on_lport(struct nvme_fc_rport *rport)
3014	{
3015	struct nvme_fc_lport *lport = rport->lport;
3016
3017	atomic_inc(v: &lport->act_rport_cnt);
3018	}
3019
3020	static void
3021	nvme_fc_rport_inactive_on_lport(struct nvme_fc_rport *rport)
3022	{
3023	struct nvme_fc_lport *lport = rport->lport;
3024	u32 cnt;
3025
3026	cnt = atomic_dec_return(v: &lport->act_rport_cnt);
3027	if (cnt == `0` && lport->localport.port_state == FC_OBJSTATE_DELETED)
3028	lport->ops->localport_delete(&lport->localport);
3029	}
3030
3031	static int
3032	nvme_fc_ctlr_active_on_rport(struct nvme_fc_ctrl *ctrl)
3033	{
3034	struct nvme_fc_rport *rport = ctrl->rport;
3035	u32 cnt;
3036
3037	if (test_and_set_bit(ASSOC_ACTIVE, addr: &ctrl->flags))
3038	return `1`;
3039
3040	cnt = atomic_inc_return(v: &rport->act_ctrl_cnt);
3041	if (cnt == `1`)
3042	nvme_fc_rport_active_on_lport(rport);
3043
3044	return `0`;
3045	}
3046
3047	static int
3048	nvme_fc_ctlr_inactive_on_rport(struct nvme_fc_ctrl *ctrl)
3049	{
3050	struct nvme_fc_rport *rport = ctrl->rport;
3051	struct nvme_fc_lport *lport = rport->lport;
3052	u32 cnt;
3053
3054	/ clearing of ctrl->flags ASSOC_ACTIVE bit is in association delete /
3055
3056	cnt = atomic_dec_return(v: &rport->act_ctrl_cnt);
3057	if (cnt == `0`) {
3058	if (rport->remoteport.port_state == FC_OBJSTATE_DELETED)
3059	lport->ops->remoteport_delete(&rport->remoteport);
3060	nvme_fc_rport_inactive_on_lport(rport);
3061	}
3062
3063	return `0`;
3064	}
3065
3066	/*
3067	* This routine restarts the controller on the host side, and
3068	* on the link side, recreates the controller association.
3069	*/
3070	static int
3071	nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
3072	{
3073	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
3074	struct nvmefc_ls_rcv_op *disls = NULL;
3075	unsigned long flags;
3076	int ret;
3077	bool changed;
3078
3079	++ctrl->ctrl.nr_reconnects;
3080
3081	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
3082	return -ENODEV;
3083
3084	if (nvme_fc_ctlr_active_on_rport(ctrl))
3085	return -ENOTUNIQ;
3086
3087	dev_info(ctrl->ctrl.device,
3088	"NVME-FC{%d}: create association : host wwpn 0x%016llx "
3089	" rport wwpn 0x%016llx: NQN \"%s\"\n",
3090	ctrl->cnum, ctrl->lport->localport.port_name,
3091	ctrl->rport->remoteport.port_name, ctrl->ctrl.opts->subsysnqn);
3092
3093	clear_bit(ASSOC_FAILED, addr: &ctrl->flags);
3094
3095	/*
3096	* Create the admin queue
3097	*/
3098
3099	ret = __nvme_fc_create_hw_queue(ctrl, queue: &ctrl->queues[`0`], qidx: `0`,
3100	NVME_AQ_DEPTH);
3101	if (ret)
3102	goto out_free_queue;
3103
3104	ret = nvme_fc_connect_admin_queue(ctrl, queue: &ctrl->queues[`0`],
3105	NVME_AQ_DEPTH, ersp_ratio: (NVME_AQ_DEPTH / `4`));
3106	if (ret)
3107	goto out_delete_hw_queue;
3108
3109	ret = nvmf_connect_admin_queue(ctrl: &ctrl->ctrl);
3110	if (ret)
3111	goto out_disconnect_admin_queue;
3112
3113	set_bit(nr: NVME_FC_Q_LIVE, addr: &ctrl->queues[`0`].flags);
3114
3115	/*
3116	* Check controller capabilities
3117	*
3118	* todo:- add code to check if ctrl attributes changed from
3119	* prior connection values
3120	*/
3121
3122	ret = nvme_enable_ctrl(ctrl: &ctrl->ctrl);
3123	if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
3124	ret = -EIO;
3125	if (ret)
3126	goto out_disconnect_admin_queue;
3127
3128	ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments;
3129	ctrl->ctrl.max_hw_sectors = ctrl->ctrl.max_segments <<
3130	(ilog2(SZ_4K) - `9`);
3131
3132	nvme_unquiesce_admin_queue(ctrl: &ctrl->ctrl);
3133
3134	ret = nvme_init_ctrl_finish(ctrl: &ctrl->ctrl, was_suspended: false);
3135	if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
3136	ret = -EIO;
3137	if (ret)
3138	goto out_disconnect_admin_queue;
3139
3140	/ sanity checks /
3141
3142	/ FC-NVME does not have other data in the capsule /
3143	if (ctrl->ctrl.icdoff) {
3144	dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
3145	ctrl->ctrl.icdoff);
3146	ret = NVME_SC_INVALID_FIELD \| NVME_SC_DNR;
3147	goto out_disconnect_admin_queue;
3148	}
3149
3150	/ FC-NVME supports normal SGL Data Block Descriptors /
3151	if (!nvme_ctrl_sgl_supported(ctrl: &ctrl->ctrl)) {
3152	dev_err(ctrl->ctrl.device,
3153	"Mandatory sgls are not supported!\n");
3154	ret = NVME_SC_INVALID_FIELD \| NVME_SC_DNR;
3155	goto out_disconnect_admin_queue;
3156	}
3157
3158	if (opts->queue_size > ctrl->ctrl.maxcmd) {
3159	/ warn if maxcmd is lower than queue_size /
3160	dev_warn(ctrl->ctrl.device,
3161	"queue_size %zu > ctrl maxcmd %u, reducing "
3162	"to maxcmd\n",
3163	opts->queue_size, ctrl->ctrl.maxcmd);
3164	opts->queue_size = ctrl->ctrl.maxcmd;
3165	ctrl->ctrl.sqsize = opts->queue_size - `1`;
3166	}
3167
3168	ret = nvme_fc_init_aen_ops(ctrl);
3169	if (ret)
3170	goto out_term_aen_ops;
3171
3172	/*
3173	* Create the io queues
3174	*/
3175
3176	if (ctrl->ctrl.queue_count > `1`) {
3177	if (!ctrl->ioq_live)
3178	ret = nvme_fc_create_io_queues(ctrl);
3179	else
3180	ret = nvme_fc_recreate_io_queues(ctrl);
3181	}
3182
3183	spin_lock_irqsave(&ctrl->lock, flags);
3184	if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
3185	ret = -EIO;
3186	if (ret) {
3187	spin_unlock_irqrestore(lock: &ctrl->lock, flags);
3188	goto out_term_aen_ops;
3189	}
3190	changed = nvme_change_ctrl_state(ctrl: &ctrl->ctrl, new_state: NVME_CTRL_LIVE);
3191	spin_unlock_irqrestore(lock: &ctrl->lock, flags);
3192
3193	ctrl->ctrl.nr_reconnects = `0`;
3194
3195	if (changed)
3196	nvme_start_ctrl(ctrl: &ctrl->ctrl);
3197
3198	return `0`; / Success /
3199
3200	out_term_aen_ops:
3201	nvme_fc_term_aen_ops(ctrl);
3202	out_disconnect_admin_queue:
3203	dev_warn(ctrl->ctrl.device,
3204	"NVME-FC{%d}: create_assoc failed, assoc_id %llx ret %d\n",
3205	ctrl->cnum, ctrl->association_id, ret);
3206	/ send a Disconnect(association) LS to fc-nvme target /
3207	nvme_fc_xmt_disconnect_assoc(ctrl);
3208	spin_lock_irqsave(&ctrl->lock, flags);
3209	ctrl->association_id = `0`;
3210	disls = ctrl->rcv_disconn;
3211	ctrl->rcv_disconn = NULL;
3212	spin_unlock_irqrestore(lock: &ctrl->lock, flags);
3213	if (disls)
3214	nvme_fc_xmt_ls_rsp(lsop: disls);
3215	out_delete_hw_queue:
3216	__nvme_fc_delete_hw_queue(ctrl, queue: &ctrl->queues[`0`], qidx: `0`);
3217	out_free_queue:
3218	nvme_fc_free_queue(queue: &ctrl->queues[`0`]);
3219	clear_bit(ASSOC_ACTIVE, addr: &ctrl->flags);
3220	nvme_fc_ctlr_inactive_on_rport(ctrl);
3221
3222	return ret;
3223	}
3224
3225
3226	/*
3227	* This routine stops operation of the controller on the host side.
3228	* On the host os stack side: Admin and IO queues are stopped,
3229	* outstanding ios on them terminated via FC ABTS.
3230	* On the link side: the association is terminated.
3231	*/
3232	static void
3233	nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
3234	{
3235	struct nvmefc_ls_rcv_op *disls = NULL;
3236	unsigned long flags;
3237
3238	if (!test_and_clear_bit(ASSOC_ACTIVE, addr: &ctrl->flags))
3239	return;
3240
3241	spin_lock_irqsave(&ctrl->lock, flags);
3242	set_bit(FCCTRL_TERMIO, addr: &ctrl->flags);
3243	ctrl->iocnt = `0`;
3244	spin_unlock_irqrestore(lock: &ctrl->lock, flags);
3245
3246	__nvme_fc_abort_outstanding_ios(ctrl, start_queues: false);
3247
3248	/ kill the aens as they are a separate path /
3249	nvme_fc_abort_aen_ops(ctrl);
3250
3251	/ wait for all io that had to be aborted /
3252	spin_lock_irq(lock: &ctrl->lock);
3253	wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == `0`, ctrl->lock);
3254	clear_bit(FCCTRL_TERMIO, addr: &ctrl->flags);
3255	spin_unlock_irq(lock: &ctrl->lock);
3256
3257	nvme_fc_term_aen_ops(ctrl);
3258
3259	/*
3260	* send a Disconnect(association) LS to fc-nvme target
3261	* Note: could have been sent at top of process, but
3262	* cleaner on link traffic if after the aborts complete.
3263	* Note: if association doesn't exist, association_id will be 0
3264	*/
3265	if (ctrl->association_id)
3266	nvme_fc_xmt_disconnect_assoc(ctrl);
3267
3268	spin_lock_irqsave(&ctrl->lock, flags);
3269	ctrl->association_id = `0`;
3270	disls = ctrl->rcv_disconn;
3271	ctrl->rcv_disconn = NULL;
3272	spin_unlock_irqrestore(lock: &ctrl->lock, flags);
3273	if (disls)
3274	/*
3275	* if a Disconnect Request was waiting for a response, send
3276	* now that all ABTS's have been issued (and are complete).
3277	*/
3278	nvme_fc_xmt_ls_rsp(lsop: disls);
3279
3280	if (ctrl->ctrl.tagset) {
3281	nvme_fc_delete_hw_io_queues(ctrl);
3282	nvme_fc_free_io_queues(ctrl);
3283	}
3284
3285	__nvme_fc_delete_hw_queue(ctrl, queue: &ctrl->queues[`0`], qidx: `0`);
3286	nvme_fc_free_queue(queue: &ctrl->queues[`0`]);
3287
3288	/ re-enable the admin_q so anything new can fast fail /
3289	nvme_unquiesce_admin_queue(ctrl: &ctrl->ctrl);
3290
3291	/ resume the io queues so that things will fast fail /
3292	nvme_unquiesce_io_queues(ctrl: &ctrl->ctrl);
3293
3294	nvme_fc_ctlr_inactive_on_rport(ctrl);
3295	}
3296
3297	static void
3298	nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
3299	{
3300	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(ctrl: nctrl);
3301
3302	cancel_work_sync(work: &ctrl->ioerr_work);
3303	cancel_delayed_work_sync(dwork: &ctrl->connect_work);
3304	/*
3305	* kill the association on the link side. this will block
3306	* waiting for io to terminate
3307	*/
3308	nvme_fc_delete_association(ctrl);
3309	}
3310
3311	static void
3312	nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl ctrl, int* status)
3313	{
3314	struct nvme_fc_rport *rport = ctrl->rport;
3315	struct nvme_fc_remote_port *portptr = &rport->remoteport;
3316	unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
3317	bool recon = true;
3318
3319	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)
3320	return;
3321
3322	if (portptr->port_state == FC_OBJSTATE_ONLINE) {
3323	dev_info(ctrl->ctrl.device,
3324	"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
3325	ctrl->cnum, status);
3326	if (status > `0` && (status & NVME_SC_DNR))
3327	recon = false;
3328	} else if (time_after_eq(jiffies, rport->dev_loss_end))
3329	recon = false;
3330
3331	if (recon && nvmf_should_reconnect(ctrl: &ctrl->ctrl)) {
3332	if (portptr->port_state == FC_OBJSTATE_ONLINE)
3333	dev_info(ctrl->ctrl.device,
3334	"NVME-FC{%d}: Reconnect attempt in %ld "
3335	"seconds\n",
3336	ctrl->cnum, recon_delay / HZ);
3337	else if (time_after(jiffies + recon_delay, rport->dev_loss_end))
3338	recon_delay = rport->dev_loss_end - jiffies;
3339
3340	queue_delayed_work(wq: nvme_wq, dwork: &ctrl->connect_work, delay: recon_delay);
3341	} else {
3342	if (portptr->port_state == FC_OBJSTATE_ONLINE) {
3343	if (status > `0` && (status & NVME_SC_DNR))
3344	dev_warn(ctrl->ctrl.device,
3345	"NVME-FC{%d}: reconnect failure\n",
3346	ctrl->cnum);
3347	else
3348	dev_warn(ctrl->ctrl.device,
3349	"NVME-FC{%d}: Max reconnect attempts "
3350	"(%d) reached.\n",
3351	ctrl->cnum, ctrl->ctrl.nr_reconnects);
3352	} else
3353	dev_warn(ctrl->ctrl.device,
3354	"NVME-FC{%d}: dev_loss_tmo (%d) expired "
3355	"while waiting for remoteport connectivity.\n",
3356	ctrl->cnum, min_t(int, portptr->dev_loss_tmo,
3357	(ctrl->ctrl.opts->max_reconnects *
3358	ctrl->ctrl.opts->reconnect_delay)));
3359	WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
3360	}
3361	}
3362
3363	static void
3364	nvme_fc_reset_ctrl_work(struct work_struct *work)
3365	{
3366	struct nvme_fc_ctrl *ctrl =
3367	container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
3368
3369	nvme_stop_ctrl(ctrl: &ctrl->ctrl);
3370
3371	/ will block will waiting for io to terminate /
3372	nvme_fc_delete_association(ctrl);
3373
3374	if (!nvme_change_ctrl_state(ctrl: &ctrl->ctrl, new_state: NVME_CTRL_CONNECTING))
3375	dev_err(ctrl->ctrl.device,
3376	"NVME-FC{%d}: error_recovery: Couldn't change state "
3377	"to CONNECTING\n", ctrl->cnum);
3378
3379	if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) {
3380	if (!queue_delayed_work(wq: nvme_wq, dwork: &ctrl->connect_work, delay: `0`)) {
3381	dev_err(ctrl->ctrl.device,
3382	"NVME-FC{%d}: failed to schedule connect "
3383	"after reset\n", ctrl->cnum);
3384	} else {
3385	flush_delayed_work(dwork: &ctrl->connect_work);
3386	}
3387	} else {
3388	nvme_fc_reconnect_or_delete(ctrl, status: -ENOTCONN);
3389	}
3390	}
3391
3392
3393	static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
3394	.name = "fc",
3395	.module = THIS_MODULE,
3396	.flags = NVME_F_FABRICS,
3397	.reg_read32 = nvmf_reg_read32,
3398	.reg_read64 = nvmf_reg_read64,
3399	.reg_write32 = nvmf_reg_write32,
3400	.free_ctrl = nvme_fc_nvme_ctrl_freed,
3401	.submit_async_event = nvme_fc_submit_async_event,
3402	.delete_ctrl = nvme_fc_delete_ctrl,
3403	.get_address = nvmf_get_address,
3404	};
3405
3406	static void
3407	nvme_fc_connect_ctrl_work(struct work_struct *work)
3408	{
3409	int ret;
3410
3411	struct nvme_fc_ctrl *ctrl =
3412	container_of(to_delayed_work(work),
3413	struct nvme_fc_ctrl, connect_work);
3414
3415	ret = nvme_fc_create_association(ctrl);
3416	if (ret)
3417	nvme_fc_reconnect_or_delete(ctrl, status: ret);
3418	else
3419	dev_info(ctrl->ctrl.device,
3420	"NVME-FC{%d}: controller connect complete\n",
3421	ctrl->cnum);
3422	}
3423
3424
3425	static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
3426	.queue_rq = nvme_fc_queue_rq,
3427	.complete = nvme_fc_complete_rq,
3428	.init_request = nvme_fc_init_request,
3429	.exit_request = nvme_fc_exit_request,
3430	.init_hctx = nvme_fc_init_admin_hctx,
3431	.timeout = nvme_fc_timeout,
3432	};
3433
3434
3435	/*
3436	* Fails a controller request if it matches an existing controller
3437	* (association) with the same tuple:
3438	* <Host NQN, Host ID, local FC port, remote FC port, SUBSYS NQN>
3439	*
3440	* The ports don't need to be compared as they are intrinsically
3441	* already matched by the port pointers supplied.
3442	*/
3443	static bool
3444	nvme_fc_existing_controller(struct nvme_fc_rport *rport,
3445	struct nvmf_ctrl_options *opts)
3446	{
3447	struct nvme_fc_ctrl *ctrl;
3448	unsigned long flags;
3449	bool found = false;
3450
3451	spin_lock_irqsave(&rport->lock, flags);
3452	list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
3453	found = nvmf_ctlr_matches_baseopts(ctrl: &ctrl->ctrl, opts);
3454	if (found)
3455	break;
3456	}
3457	spin_unlock_irqrestore(lock: &rport->lock, flags);
3458
3459	return found;
3460	}
3461
3462	static struct nvme_ctrl *
3463	nvme_fc_init_ctrl(struct device dev, struct* nvmf_ctrl_options *opts,
3464	struct nvme_fc_lport lport, struct* nvme_fc_rport *rport)
3465	{
3466	struct nvme_fc_ctrl *ctrl;
3467	unsigned long flags;
3468	int ret, idx, ctrl_loss_tmo;
3469
3470	if (!(rport->remoteport.port_role &
3471	(FC_PORT_ROLE_NVME_DISCOVERY \| FC_PORT_ROLE_NVME_TARGET))) {
3472	ret = -EBADR;
3473	goto out_fail;
3474	}
3475
3476	if (!opts->duplicate_connect &&
3477	nvme_fc_existing_controller(rport, opts)) {
3478	ret = -EALREADY;
3479	goto out_fail;
3480	}
3481
3482	ctrl = kzalloc(size: sizeof(*ctrl), GFP_KERNEL);
3483	if (!ctrl) {
3484	ret = -ENOMEM;
3485	goto out_fail;
3486	}
3487
3488	idx = ida_alloc(ida: &nvme_fc_ctrl_cnt, GFP_KERNEL);
3489	if (idx < `0`) {
3490	ret = -ENOSPC;
3491	goto out_free_ctrl;
3492	}
3493
3494	/*
3495	* if ctrl_loss_tmo is being enforced and the default reconnect delay
3496	* is being used, change to a shorter reconnect delay for FC.
3497	*/
3498	if (opts->max_reconnects != -`1` &&
3499	opts->reconnect_delay == NVMF_DEF_RECONNECT_DELAY &&
3500	opts->reconnect_delay > NVME_FC_DEFAULT_RECONNECT_TMO) {
3501	ctrl_loss_tmo = opts->max_reconnects * opts->reconnect_delay;
3502	opts->reconnect_delay = NVME_FC_DEFAULT_RECONNECT_TMO;
3503	opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
3504	opts->reconnect_delay);
3505	}
3506
3507	ctrl->ctrl.opts = opts;
3508	ctrl->ctrl.nr_reconnects = `0`;
3509	if (lport->dev)
3510	ctrl->ctrl.numa_node = dev_to_node(dev: lport->dev);
3511	else
3512	ctrl->ctrl.numa_node = NUMA_NO_NODE;
3513	INIT_LIST_HEAD(list: &ctrl->ctrl_list);
3514	ctrl->lport = lport;
3515	ctrl->rport = rport;
3516	ctrl->dev = lport->dev;
3517	ctrl->cnum = idx;
3518	ctrl->ioq_live = false;
3519	init_waitqueue_head(&ctrl->ioabort_wait);
3520
3521	get_device(dev: ctrl->dev);
3522	kref_init(kref: &ctrl->ref);
3523
3524	INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
3525	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
3526	INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work);
3527	spin_lock_init(&ctrl->lock);
3528
3529	/ io queue count /
3530	ctrl->ctrl.queue_count = min_t(unsigned int,
3531	opts->nr_io_queues,
3532	lport->ops->max_hw_queues);
3533	ctrl->ctrl.queue_count++; / +1 for admin queue /
3534
3535	ctrl->ctrl.sqsize = opts->queue_size - `1`;
3536	ctrl->ctrl.kato = opts->kato;
3537	ctrl->ctrl.cntlid = `0xffff`;
3538
3539	ret = -ENOMEM;
3540	ctrl->queues = kcalloc(n: ctrl->ctrl.queue_count,
3541	size: sizeof(struct nvme_fc_queue), GFP_KERNEL);
3542	if (!ctrl->queues)
3543	goto out_free_ida;
3544
3545	nvme_fc_init_queue(ctrl, idx: `0`);
3546
3547	/*
3548	* Would have been nice to init io queues tag set as well.
3549	* However, we require interaction from the controller
3550	* for max io queue count before we can do so.
3551	* Defer this to the connect path.
3552	*/
3553
3554	ret = nvme_init_ctrl(ctrl: &ctrl->ctrl, dev, ops: &nvme_fc_ctrl_ops, quirks: `0`);
3555	if (ret)
3556	goto out_free_queues;
3557
3558	/ at this point, teardown path changes to ref counting on nvme ctrl /
3559
3560	ret = nvme_alloc_admin_tag_set(ctrl: &ctrl->ctrl, set: &ctrl->admin_tag_set,
3561	ops: &nvme_fc_admin_mq_ops,
3562	struct_size_t(struct nvme_fcp_op_w_sgl, priv,
3563	ctrl->lport->ops->fcprqst_priv_sz));
3564	if (ret)
3565	goto fail_ctrl;
3566
3567	spin_lock_irqsave(&rport->lock, flags);
3568	list_add_tail(new: &ctrl->ctrl_list, head: &rport->ctrl_list);
3569	spin_unlock_irqrestore(lock: &rport->lock, flags);
3570
3571	if (!nvme_change_ctrl_state(ctrl: &ctrl->ctrl, new_state: NVME_CTRL_RESETTING) \|\|
3572	!nvme_change_ctrl_state(ctrl: &ctrl->ctrl, new_state: NVME_CTRL_CONNECTING)) {
3573	dev_err(ctrl->ctrl.device,
3574	"NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum);
3575	goto fail_ctrl;
3576	}
3577
3578	if (!queue_delayed_work(wq: nvme_wq, dwork: &ctrl->connect_work, delay: `0`)) {
3579	dev_err(ctrl->ctrl.device,
3580	"NVME-FC{%d}: failed to schedule initial connect\n",
3581	ctrl->cnum);
3582	goto fail_ctrl;
3583	}
3584
3585	flush_delayed_work(dwork: &ctrl->connect_work);
3586
3587	dev_info(ctrl->ctrl.device,
3588	"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
3589	ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl));
3590
3591	return &ctrl->ctrl;
3592
3593	fail_ctrl:
3594	nvme_change_ctrl_state(ctrl: &ctrl->ctrl, new_state: NVME_CTRL_DELETING);
3595	cancel_work_sync(work: &ctrl->ioerr_work);
3596	cancel_work_sync(work: &ctrl->ctrl.reset_work);
3597	cancel_delayed_work_sync(dwork: &ctrl->connect_work);
3598
3599	ctrl->ctrl.opts = NULL;
3600
3601	/ initiate nvme ctrl ref counting teardown /
3602	nvme_uninit_ctrl(ctrl: &ctrl->ctrl);
3603
3604	/ Remove core ctrl ref. /
3605	nvme_put_ctrl(ctrl: &ctrl->ctrl);
3606
3607	/ as we're past the point where we transition to the ref*
3608	* counting teardown path, if we return a bad pointer here,
3609	* the calling routine, thinking it's prior to the
3610	* transition, will do an rport put. Since the teardown
3611	* path also does a rport put, we do an extra get here to
3612	* so proper order/teardown happens.
3613	*/
3614	nvme_fc_rport_get(rport);
3615
3616	return ERR_PTR(error: -EIO);
3617
3618	out_free_queues:
3619	kfree(objp: ctrl->queues);
3620	out_free_ida:
3621	put_device(dev: ctrl->dev);
3622	ida_free(&nvme_fc_ctrl_cnt, id: ctrl->cnum);
3623	out_free_ctrl:
3624	kfree(objp: ctrl);
3625	out_fail:
3626	/ exit via here doesn't follow ctlr ref points /
3627	return ERR_PTR(error: ret);
3628	}
3629
3630
3631	struct nvmet_fc_traddr {
3632	u64 nn;
3633	u64 pn;
3634	};
3635
3636	static int
3637	__nvme_fc_parse_u64(substring_t sstr, u64 val)
3638	{
3639	u64 token64;
3640
3641	if (match_u64(sstr, result: &token64))
3642	return -EINVAL;
3643	*val = token64;
3644
3645	return `0`;
3646	}
3647
3648	/*
3649	* This routine validates and extracts the WWN's from the TRADDR string.
3650	* As kernel parsers need the 0x to determine number base, universally
3651	* build string to parse with 0x prefix before parsing name strings.
3652	*/
3653	static int
3654	nvme_fc_parse_traddr(struct nvmet_fc_traddr traddr, char* *buf, size_t blen)
3655	{
3656	char name[`2` + NVME_FC_TRADDR_HEXNAMELEN + `1`];
3657	substring_t wwn = { name, &name[sizeof(name)-`1`] };
3658	int nnoffset, pnoffset;
3659
3660	/ validate if string is one of the 2 allowed formats /
3661	if (strnlen(p: buf, maxlen: blen) == NVME_FC_TRADDR_MAXLENGTH &&
3662	!strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) &&
3663	!strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET],
3664	"pn-0x", NVME_FC_TRADDR_OXNNLEN)) {
3665	nnoffset = NVME_FC_TRADDR_OXNNLEN;
3666	pnoffset = NVME_FC_TRADDR_MAX_PN_OFFSET +
3667	NVME_FC_TRADDR_OXNNLEN;
3668	} else if ((strnlen(p: buf, maxlen: blen) == NVME_FC_TRADDR_MINLENGTH &&
3669	!strncmp(buf, "nn-", NVME_FC_TRADDR_NNLEN) &&
3670	!strncmp(&buf[NVME_FC_TRADDR_MIN_PN_OFFSET],
3671	"pn-", NVME_FC_TRADDR_NNLEN))) {
3672	nnoffset = NVME_FC_TRADDR_NNLEN;
3673	pnoffset = NVME_FC_TRADDR_MIN_PN_OFFSET + NVME_FC_TRADDR_NNLEN;
3674	} else
3675	goto out_einval;
3676
3677	name[`0`] = `'0'`;
3678	name[`1`] = `'x'`;
3679	name[`2` + NVME_FC_TRADDR_HEXNAMELEN] = `0`;
3680
3681	memcpy(&name[`2`], &buf[nnoffset], NVME_FC_TRADDR_HEXNAMELEN);
3682	if (__nvme_fc_parse_u64(sstr: &wwn, val: &traddr->nn))
3683	goto out_einval;
3684
3685	memcpy(&name[`2`], &buf[pnoffset], NVME_FC_TRADDR_HEXNAMELEN);
3686	if (__nvme_fc_parse_u64(sstr: &wwn, val: &traddr->pn))
3687	goto out_einval;
3688
3689	return `0`;
3690
3691	out_einval:
3692	pr_warn("%s: bad traddr string\n", __func__);
3693	return -EINVAL;
3694	}
3695
3696	static struct nvme_ctrl *
3697	nvme_fc_create_ctrl(struct device dev, struct* nvmf_ctrl_options *opts)
3698	{
3699	struct nvme_fc_lport *lport;
3700	struct nvme_fc_rport *rport;
3701	struct nvme_ctrl *ctrl;
3702	struct nvmet_fc_traddr laddr = { `0L`, `0L` };
3703	struct nvmet_fc_traddr raddr = { `0L`, `0L` };
3704	unsigned long flags;
3705	int ret;
3706
3707	ret = nvme_fc_parse_traddr(traddr: &raddr, buf: opts->traddr, NVMF_TRADDR_SIZE);
3708	if (ret \|\| !raddr.nn \|\| !raddr.pn)
3709	return ERR_PTR(error: -EINVAL);
3710
3711	ret = nvme_fc_parse_traddr(traddr: &laddr, buf: opts->host_traddr, NVMF_TRADDR_SIZE);
3712	if (ret \|\| !laddr.nn \|\| !laddr.pn)
3713	return ERR_PTR(error: -EINVAL);
3714
3715	/ find the host and remote ports to connect together /
3716	spin_lock_irqsave(&nvme_fc_lock, flags);
3717	list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
3718	if (lport->localport.node_name != laddr.nn \|\|
3719	lport->localport.port_name != laddr.pn \|\|
3720	lport->localport.port_state != FC_OBJSTATE_ONLINE)
3721	continue;
3722
3723	list_for_each_entry(rport, &lport->endp_list, endp_list) {
3724	if (rport->remoteport.node_name != raddr.nn \|\|
3725	rport->remoteport.port_name != raddr.pn \|\|
3726	rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
3727	continue;
3728
3729	/ if fail to get reference fall through. Will error /
3730	if (!nvme_fc_rport_get(rport))
3731	break;
3732
3733	spin_unlock_irqrestore(lock: &nvme_fc_lock, flags);
3734
3735	ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport);
3736	if (IS_ERR(ptr: ctrl))
3737	nvme_fc_rport_put(rport);
3738	return ctrl;
3739	}
3740	}
3741	spin_unlock_irqrestore(lock: &nvme_fc_lock, flags);
3742
3743	pr_warn("%s: %s - %s combination not found\n",
3744	__func__, opts->traddr, opts->host_traddr);
3745	return ERR_PTR(error: -ENOENT);
3746	}
3747
3748
3749	static struct nvmf_transport_ops nvme_fc_transport = {
3750	.name = "fc",
3751	.module = THIS_MODULE,
3752	.required_opts = NVMF_OPT_TRADDR \| NVMF_OPT_HOST_TRADDR,
3753	.allowed_opts = NVMF_OPT_RECONNECT_DELAY \| NVMF_OPT_CTRL_LOSS_TMO,
3754	.create_ctrl = nvme_fc_create_ctrl,
3755	};
3756
3757	/ Arbitrary successive failures max. With lots of subsystems could be high /
3758	#define DISCOVERY_MAX_FAIL 20
3759
3760	static ssize_t nvme_fc_nvme_discovery_store(struct device *dev,
3761	struct device_attribute attr, const* char *buf, size_t count)
3762	{
3763	unsigned long flags;
3764	LIST_HEAD(local_disc_list);
3765	struct nvme_fc_lport *lport;
3766	struct nvme_fc_rport *rport;
3767	int failcnt = `0`;
3768
3769	spin_lock_irqsave(&nvme_fc_lock, flags);
3770	restart:
3771	list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
3772	list_for_each_entry(rport, &lport->endp_list, endp_list) {
3773	if (!nvme_fc_lport_get(lport))
3774	continue;
3775	if (!nvme_fc_rport_get(rport)) {
3776	/*
3777	* This is a temporary condition. Upon restart
3778	* this rport will be gone from the list.
3779	*
3780	* Revert the lport put and retry. Anything
3781	* added to the list already will be skipped (as
3782	* they are no longer list_empty). Loops should
3783	* resume at rports that were not yet seen.
3784	*/
3785	nvme_fc_lport_put(lport);
3786
3787	if (failcnt++ < DISCOVERY_MAX_FAIL)
3788	goto restart;
3789
3790	pr_err("nvme_discovery: too many reference "
3791	"failures\n");
3792	goto process_local_list;
3793	}
3794	if (list_empty(head: &rport->disc_list))
3795	list_add_tail(new: &rport->disc_list,
3796	head: &local_disc_list);
3797	}
3798	}
3799
3800	process_local_list:
3801	while (!list_empty(head: &local_disc_list)) {
3802	rport = list_first_entry(&local_disc_list,
3803	struct nvme_fc_rport, disc_list);
3804	list_del_init(entry: &rport->disc_list);
3805	spin_unlock_irqrestore(lock: &nvme_fc_lock, flags);
3806
3807	lport = rport->lport;
3808	/ signal discovery. Won't hurt if it repeats /
3809	nvme_fc_signal_discovery_scan(lport, rport);
3810	nvme_fc_rport_put(rport);
3811	nvme_fc_lport_put(lport);
3812
3813	spin_lock_irqsave(&nvme_fc_lock, flags);
3814	}
3815	spin_unlock_irqrestore(lock: &nvme_fc_lock, flags);
3816
3817	return count;
3818	}
3819
3820	static DEVICE_ATTR(nvme_discovery, `0200`, NULL, nvme_fc_nvme_discovery_store);
3821
3822	#ifdef CONFIG_BLK_CGROUP_FC_APPID
3823	/ Parse the cgroup id from a buf and return the length of cgrpid /
3824	static int fc_parse_cgrpid(const char buf, u64 id)
3825	{
3826	char cgrp_id[`16`+`1`];
3827	int cgrpid_len, j;
3828
3829	memset(cgrp_id, `0x0`, sizeof(cgrp_id));
3830	for (cgrpid_len = `0`, j = `0`; cgrpid_len < `17`; cgrpid_len++) {
3831	if (buf[cgrpid_len] != `':'`)
3832	cgrp_id[cgrpid_len] = buf[cgrpid_len];
3833	else {
3834	j = `1`;
3835	break;
3836	}
3837	}
3838	if (!j)
3839	return -EINVAL;
3840	if (kstrtou64(s: cgrp_id, base: `16`, res: id) < `0`)
3841	return -EINVAL;
3842	return cgrpid_len;
3843	}
3844
3845	/*
3846	* Parse and update the appid in the blkcg associated with the cgroupid.
3847	*/
3848	static ssize_t fc_appid_store(struct device *dev,
3849	struct device_attribute attr, const* char *buf, size_t count)
3850	{
3851	size_t orig_count = count;
3852	u64 cgrp_id;
3853	int appid_len = `0`;
3854	int cgrpid_len = `0`;
3855	char app_id[FC_APPID_LEN];
3856	int ret = `0`;
3857
3858	if (buf[count-`1`] == `'\n'`)
3859	count--;
3860
3861	if ((count > (`16`+`1`+FC_APPID_LEN)) \|\| (!strchr(buf, `':'`)))
3862	return -EINVAL;
3863
3864	cgrpid_len = fc_parse_cgrpid(buf, id: &cgrp_id);
3865	if (cgrpid_len < `0`)
3866	return -EINVAL;
3867	appid_len = count - cgrpid_len - `1`;
3868	if (appid_len > FC_APPID_LEN)
3869	return -EINVAL;
3870
3871	memset(app_id, `0x0`, sizeof(app_id));
3872	memcpy(app_id, &buf[cgrpid_len+`1`], appid_len);
3873	ret = blkcg_set_fc_appid(app_id, cgrp_id, app_id_len: sizeof(app_id));
3874	if (ret < `0`)
3875	return ret;
3876	return orig_count;
3877	}
3878	static DEVICE_ATTR(appid_store, `0200`, NULL, fc_appid_store);
3879	#endif /* CONFIG_BLK_CGROUP_FC_APPID */
3880
3881	static struct attribute *nvme_fc_attrs[] = {
3882	&dev_attr_nvme_discovery.attr,
3883	#ifdef CONFIG_BLK_CGROUP_FC_APPID
3884	&dev_attr_appid_store.attr,
3885	#endif
3886	NULL
3887	};
3888
3889	static const struct attribute_group nvme_fc_attr_group = {
3890	.attrs = nvme_fc_attrs,
3891	};
3892
3893	static const struct attribute_group *nvme_fc_attr_groups[] = {
3894	&nvme_fc_attr_group,
3895	NULL
3896	};
3897
3898	static struct class fc_class = {
3899	.name = "fc",
3900	.dev_groups = nvme_fc_attr_groups,
3901	};
3902
3903	static int __init nvme_fc_init_module(void)
3904	{
3905	int ret;
3906
3907	nvme_fc_wq = alloc_workqueue(fmt: "nvme_fc_wq", flags: WQ_MEM_RECLAIM, max_active: `0`);
3908	if (!nvme_fc_wq)
3909	return -ENOMEM;
3910
3911	/*
3912	* NOTE:
3913	* It is expected that in the future the kernel will combine
3914	* the FC-isms that are currently under scsi and now being
3915	* added to by NVME into a new standalone FC class. The SCSI
3916	* and NVME protocols and their devices would be under this
3917	* new FC class.
3918	*
3919	* As we need something to post FC-specific udev events to,
3920	* specifically for nvme probe events, start by creating the
3921	* new device class. When the new standalone FC class is
3922	* put in place, this code will move to a more generic
3923	* location for the class.
3924	*/
3925	ret = class_register(class: &fc_class);
3926	if (ret) {
3927	pr_err("couldn't register class fc\n");
3928	goto out_destroy_wq;
3929	}
3930
3931	/*
3932	* Create a device for the FC-centric udev events
3933	*/
3934	fc_udev_device = device_create(cls: &fc_class, NULL, MKDEV(`0`, `0`), NULL,
3935	fmt: "fc_udev_device");
3936	if (IS_ERR(ptr: fc_udev_device)) {
3937	pr_err("couldn't create fc_udev device!\n");
3938	ret = PTR_ERR(ptr: fc_udev_device);
3939	goto out_destroy_class;
3940	}
3941
3942	ret = nvmf_register_transport(ops: &nvme_fc_transport);
3943	if (ret)
3944	goto out_destroy_device;
3945
3946	return `0`;
3947
3948	out_destroy_device:
3949	device_destroy(cls: &fc_class, MKDEV(`0`, `0`));
3950	out_destroy_class:
3951	class_unregister(class: &fc_class);
3952	out_destroy_wq:
3953	destroy_workqueue(wq: nvme_fc_wq);
3954
3955	return ret;
3956	}
3957
3958	static void
3959	nvme_fc_delete_controllers(struct nvme_fc_rport *rport)
3960	{
3961	struct nvme_fc_ctrl *ctrl;
3962
3963	spin_lock(lock: &rport->lock);
3964	list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
3965	dev_warn(ctrl->ctrl.device,
3966	"NVME-FC{%d}: transport unloading: deleting ctrl\n",
3967	ctrl->cnum);
3968	nvme_delete_ctrl(ctrl: &ctrl->ctrl);
3969	}
3970	spin_unlock(lock: &rport->lock);
3971	}
3972
3973	static void
3974	nvme_fc_cleanup_for_unload(void)
3975	{
3976	struct nvme_fc_lport *lport;
3977	struct nvme_fc_rport *rport;
3978
3979	list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
3980	list_for_each_entry(rport, &lport->endp_list, endp_list) {
3981	nvme_fc_delete_controllers(rport);
3982	}
3983	}
3984	}
3985
3986	static void __exit nvme_fc_exit_module(void)
3987	{
3988	unsigned long flags;
3989	bool need_cleanup = false;
3990
3991	spin_lock_irqsave(&nvme_fc_lock, flags);
3992	nvme_fc_waiting_to_unload = true;
3993	if (!list_empty(head: &nvme_fc_lport_list)) {
3994	need_cleanup = true;
3995	nvme_fc_cleanup_for_unload();
3996	}
3997	spin_unlock_irqrestore(lock: &nvme_fc_lock, flags);
3998	if (need_cleanup) {
3999	pr_info("%s: waiting for ctlr deletes\n", __func__);
4000	wait_for_completion(&nvme_fc_unload_proceed);
4001	pr_info("%s: ctrl deletes complete\n", __func__);
4002	}
4003
4004	nvmf_unregister_transport(ops: &nvme_fc_transport);
4005
4006	ida_destroy(ida: &nvme_fc_local_port_cnt);
4007	ida_destroy(ida: &nvme_fc_ctrl_cnt);
4008
4009	device_destroy(cls: &fc_class, MKDEV(`0`, `0`));
4010	class_unregister(class: &fc_class);
4011	destroy_workqueue(wq: nvme_fc_wq);
4012	}
4013
4014	module_init(nvme_fc_init_module);
4015	module_exit(nvme_fc_exit_module);
4016
4017	MODULE_LICENSE("GPL v2");
4018

source code of linux/drivers/nvme/host/fc.c