1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Generic SCSI-3 ALUA SCSI Device Handler |
4 | * |
5 | * Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH. |
6 | * All rights reserved. |
7 | */ |
8 | #include <linux/slab.h> |
9 | #include <linux/delay.h> |
10 | #include <linux/module.h> |
11 | #include <asm/unaligned.h> |
12 | #include <scsi/scsi.h> |
13 | #include <scsi/scsi_proto.h> |
14 | #include <scsi/scsi_dbg.h> |
15 | #include <scsi/scsi_eh.h> |
16 | #include <scsi/scsi_dh.h> |
17 | |
18 | #define ALUA_DH_NAME "alua" |
19 | #define ALUA_DH_VER "2.0" |
20 | |
21 | #define TPGS_SUPPORT_NONE 0x00 |
22 | #define TPGS_SUPPORT_OPTIMIZED 0x01 |
23 | #define TPGS_SUPPORT_NONOPTIMIZED 0x02 |
24 | #define TPGS_SUPPORT_STANDBY 0x04 |
25 | #define TPGS_SUPPORT_UNAVAILABLE 0x08 |
26 | #define TPGS_SUPPORT_LBA_DEPENDENT 0x10 |
27 | #define TPGS_SUPPORT_OFFLINE 0x40 |
28 | #define TPGS_SUPPORT_TRANSITION 0x80 |
29 | #define TPGS_SUPPORT_ALL 0xdf |
30 | |
31 | #define RTPG_FMT_MASK 0x70 |
32 | #define RTPG_FMT_EXT_HDR 0x10 |
33 | |
34 | #define TPGS_MODE_UNINITIALIZED -1 |
35 | #define TPGS_MODE_NONE 0x0 |
36 | #define TPGS_MODE_IMPLICIT 0x1 |
37 | #define TPGS_MODE_EXPLICIT 0x2 |
38 | |
39 | #define ALUA_RTPG_SIZE 128 |
40 | #define ALUA_FAILOVER_TIMEOUT 60 |
41 | #define ALUA_FAILOVER_RETRIES 5 |
42 | #define ALUA_RTPG_DELAY_MSECS 5 |
43 | #define ALUA_RTPG_RETRY_DELAY 2 |
44 | |
45 | /* device handler flags */ |
46 | #define ALUA_OPTIMIZE_STPG 0x01 |
47 | #define ALUA_RTPG_EXT_HDR_UNSUPP 0x02 |
48 | /* State machine flags */ |
49 | #define ALUA_PG_RUN_RTPG 0x10 |
50 | #define ALUA_PG_RUN_STPG 0x20 |
51 | #define ALUA_PG_RUNNING 0x40 |
52 | |
53 | static uint optimize_stpg; |
54 | module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR); |
55 | MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0." ); |
56 | |
57 | static LIST_HEAD(port_group_list); |
58 | static DEFINE_SPINLOCK(port_group_lock); |
59 | static struct workqueue_struct *kaluad_wq; |
60 | |
61 | struct alua_port_group { |
62 | struct kref kref; |
63 | struct rcu_head rcu; |
64 | struct list_head node; |
65 | struct list_head dh_list; |
66 | unsigned char device_id_str[256]; |
67 | int device_id_len; |
68 | int group_id; |
69 | int tpgs; |
70 | int state; |
71 | int pref; |
72 | int valid_states; |
73 | unsigned flags; /* used for optimizing STPG */ |
74 | unsigned char transition_tmo; |
75 | unsigned long expiry; |
76 | unsigned long interval; |
77 | struct delayed_work rtpg_work; |
78 | spinlock_t lock; |
79 | struct list_head rtpg_list; |
80 | struct scsi_device *rtpg_sdev; |
81 | }; |
82 | |
83 | struct alua_dh_data { |
84 | struct list_head node; |
85 | struct alua_port_group __rcu *pg; |
86 | int group_id; |
87 | spinlock_t pg_lock; |
88 | struct scsi_device *sdev; |
89 | int init_error; |
90 | struct mutex init_mutex; |
91 | bool disabled; |
92 | }; |
93 | |
94 | struct alua_queue_data { |
95 | struct list_head entry; |
96 | activate_complete callback_fn; |
97 | void *callback_data; |
98 | }; |
99 | |
100 | #define ALUA_POLICY_SWITCH_CURRENT 0 |
101 | #define ALUA_POLICY_SWITCH_ALL 1 |
102 | |
103 | static void alua_rtpg_work(struct work_struct *work); |
104 | static bool alua_rtpg_queue(struct alua_port_group *pg, |
105 | struct scsi_device *sdev, |
106 | struct alua_queue_data *qdata, bool force); |
107 | static void alua_check(struct scsi_device *sdev, bool force); |
108 | |
109 | static void release_port_group(struct kref *kref) |
110 | { |
111 | struct alua_port_group *pg; |
112 | |
113 | pg = container_of(kref, struct alua_port_group, kref); |
114 | if (pg->rtpg_sdev) |
115 | flush_delayed_work(dwork: &pg->rtpg_work); |
116 | spin_lock(lock: &port_group_lock); |
117 | list_del(entry: &pg->node); |
118 | spin_unlock(lock: &port_group_lock); |
119 | kfree_rcu(pg, rcu); |
120 | } |
121 | |
122 | /* |
123 | * submit_rtpg - Issue a REPORT TARGET GROUP STATES command |
124 | * @sdev: sdev the command should be sent to |
125 | */ |
126 | static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff, |
127 | int bufflen, struct scsi_sense_hdr *sshdr, int flags) |
128 | { |
129 | u8 cdb[MAX_COMMAND_SIZE]; |
130 | blk_opf_t opf = REQ_OP_DRV_IN | REQ_FAILFAST_DEV | |
131 | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER; |
132 | const struct scsi_exec_args exec_args = { |
133 | .sshdr = sshdr, |
134 | }; |
135 | |
136 | /* Prepare the command. */ |
137 | memset(cdb, 0x0, MAX_COMMAND_SIZE); |
138 | cdb[0] = MAINTENANCE_IN; |
139 | if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP)) |
140 | cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT; |
141 | else |
142 | cdb[1] = MI_REPORT_TARGET_PGS; |
143 | put_unaligned_be32(val: bufflen, p: &cdb[6]); |
144 | |
145 | return scsi_execute_cmd(sdev, cmd: cdb, opf, buffer: buff, bufflen, |
146 | ALUA_FAILOVER_TIMEOUT * HZ, |
147 | ALUA_FAILOVER_RETRIES, args: &exec_args); |
148 | } |
149 | |
150 | /* |
151 | * submit_stpg - Issue a SET TARGET PORT GROUP command |
152 | * |
153 | * Currently we're only setting the current target port group state |
154 | * to 'active/optimized' and let the array firmware figure out |
155 | * the states of the remaining groups. |
156 | */ |
157 | static int submit_stpg(struct scsi_device *sdev, int group_id, |
158 | struct scsi_sense_hdr *sshdr) |
159 | { |
160 | u8 cdb[MAX_COMMAND_SIZE]; |
161 | unsigned char stpg_data[8]; |
162 | int stpg_len = 8; |
163 | blk_opf_t opf = REQ_OP_DRV_OUT | REQ_FAILFAST_DEV | |
164 | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER; |
165 | const struct scsi_exec_args exec_args = { |
166 | .sshdr = sshdr, |
167 | }; |
168 | |
169 | /* Prepare the data buffer */ |
170 | memset(stpg_data, 0, stpg_len); |
171 | stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL; |
172 | put_unaligned_be16(val: group_id, p: &stpg_data[6]); |
173 | |
174 | /* Prepare the command. */ |
175 | memset(cdb, 0x0, MAX_COMMAND_SIZE); |
176 | cdb[0] = MAINTENANCE_OUT; |
177 | cdb[1] = MO_SET_TARGET_PGS; |
178 | put_unaligned_be32(val: stpg_len, p: &cdb[6]); |
179 | |
180 | return scsi_execute_cmd(sdev, cmd: cdb, opf, buffer: stpg_data, |
181 | bufflen: stpg_len, ALUA_FAILOVER_TIMEOUT * HZ, |
182 | ALUA_FAILOVER_RETRIES, args: &exec_args); |
183 | } |
184 | |
185 | static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size, |
186 | int group_id) |
187 | { |
188 | struct alua_port_group *pg; |
189 | |
190 | if (!id_str || !id_size || !strlen(id_str)) |
191 | return NULL; |
192 | |
193 | list_for_each_entry(pg, &port_group_list, node) { |
194 | if (pg->group_id != group_id) |
195 | continue; |
196 | if (!pg->device_id_len || pg->device_id_len != id_size) |
197 | continue; |
198 | if (strncmp(pg->device_id_str, id_str, id_size)) |
199 | continue; |
200 | if (!kref_get_unless_zero(kref: &pg->kref)) |
201 | continue; |
202 | return pg; |
203 | } |
204 | |
205 | return NULL; |
206 | } |
207 | |
208 | /* |
209 | * alua_alloc_pg - Allocate a new port_group structure |
210 | * @sdev: scsi device |
211 | * @group_id: port group id |
212 | * @tpgs: target port group settings |
213 | * |
214 | * Allocate a new port_group structure for a given |
215 | * device. |
216 | */ |
217 | static struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev, |
218 | int group_id, int tpgs) |
219 | { |
220 | struct alua_port_group *pg, *tmp_pg; |
221 | |
222 | pg = kzalloc(size: sizeof(struct alua_port_group), GFP_KERNEL); |
223 | if (!pg) |
224 | return ERR_PTR(error: -ENOMEM); |
225 | |
226 | pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str, |
227 | sizeof(pg->device_id_str)); |
228 | if (pg->device_id_len <= 0) { |
229 | /* |
230 | * TPGS supported but no device identification found. |
231 | * Generate private device identification. |
232 | */ |
233 | sdev_printk(KERN_INFO, sdev, |
234 | "%s: No device descriptors found\n" , |
235 | ALUA_DH_NAME); |
236 | pg->device_id_str[0] = '\0'; |
237 | pg->device_id_len = 0; |
238 | } |
239 | pg->group_id = group_id; |
240 | pg->tpgs = tpgs; |
241 | pg->state = SCSI_ACCESS_STATE_OPTIMAL; |
242 | pg->valid_states = TPGS_SUPPORT_ALL; |
243 | if (optimize_stpg) |
244 | pg->flags |= ALUA_OPTIMIZE_STPG; |
245 | kref_init(kref: &pg->kref); |
246 | INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work); |
247 | INIT_LIST_HEAD(list: &pg->rtpg_list); |
248 | INIT_LIST_HEAD(list: &pg->node); |
249 | INIT_LIST_HEAD(list: &pg->dh_list); |
250 | spin_lock_init(&pg->lock); |
251 | |
252 | spin_lock(lock: &port_group_lock); |
253 | tmp_pg = alua_find_get_pg(id_str: pg->device_id_str, id_size: pg->device_id_len, |
254 | group_id); |
255 | if (tmp_pg) { |
256 | spin_unlock(lock: &port_group_lock); |
257 | kfree(objp: pg); |
258 | return tmp_pg; |
259 | } |
260 | |
261 | list_add(new: &pg->node, head: &port_group_list); |
262 | spin_unlock(lock: &port_group_lock); |
263 | |
264 | return pg; |
265 | } |
266 | |
267 | /* |
268 | * alua_check_tpgs - Evaluate TPGS setting |
269 | * @sdev: device to be checked |
270 | * |
271 | * Examine the TPGS setting of the sdev to find out if ALUA |
272 | * is supported. |
273 | */ |
274 | static int alua_check_tpgs(struct scsi_device *sdev) |
275 | { |
276 | int tpgs = TPGS_MODE_NONE; |
277 | |
278 | /* |
279 | * ALUA support for non-disk devices is fraught with |
280 | * difficulties, so disable it for now. |
281 | */ |
282 | if (sdev->type != TYPE_DISK) { |
283 | sdev_printk(KERN_INFO, sdev, |
284 | "%s: disable for non-disk devices\n" , |
285 | ALUA_DH_NAME); |
286 | return tpgs; |
287 | } |
288 | |
289 | tpgs = scsi_device_tpgs(sdev); |
290 | switch (tpgs) { |
291 | case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT: |
292 | sdev_printk(KERN_INFO, sdev, |
293 | "%s: supports implicit and explicit TPGS\n" , |
294 | ALUA_DH_NAME); |
295 | break; |
296 | case TPGS_MODE_EXPLICIT: |
297 | sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n" , |
298 | ALUA_DH_NAME); |
299 | break; |
300 | case TPGS_MODE_IMPLICIT: |
301 | sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n" , |
302 | ALUA_DH_NAME); |
303 | break; |
304 | case TPGS_MODE_NONE: |
305 | sdev_printk(KERN_INFO, sdev, "%s: not supported\n" , |
306 | ALUA_DH_NAME); |
307 | break; |
308 | default: |
309 | sdev_printk(KERN_INFO, sdev, |
310 | "%s: unsupported TPGS setting %d\n" , |
311 | ALUA_DH_NAME, tpgs); |
312 | tpgs = TPGS_MODE_NONE; |
313 | break; |
314 | } |
315 | |
316 | return tpgs; |
317 | } |
318 | |
319 | /* |
320 | * alua_check_vpd - Evaluate INQUIRY vpd page 0x83 |
321 | * @sdev: device to be checked |
322 | * |
323 | * Extract the relative target port and the target port group |
324 | * descriptor from the list of identificators. |
325 | */ |
326 | static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h, |
327 | int tpgs) |
328 | { |
329 | int rel_port = -1, group_id; |
330 | struct alua_port_group *pg, *old_pg = NULL; |
331 | bool pg_updated = false; |
332 | unsigned long flags; |
333 | |
334 | group_id = scsi_vpd_tpg_id(sdev, &rel_port); |
335 | if (group_id < 0) { |
336 | /* |
337 | * Internal error; TPGS supported but required |
338 | * VPD identification descriptors not present. |
339 | * Disable ALUA support |
340 | */ |
341 | sdev_printk(KERN_INFO, sdev, |
342 | "%s: No target port descriptors found\n" , |
343 | ALUA_DH_NAME); |
344 | return SCSI_DH_DEV_UNSUPP; |
345 | } |
346 | |
347 | pg = alua_alloc_pg(sdev, group_id, tpgs); |
348 | if (IS_ERR(ptr: pg)) { |
349 | if (PTR_ERR(ptr: pg) == -ENOMEM) |
350 | return SCSI_DH_NOMEM; |
351 | return SCSI_DH_DEV_UNSUPP; |
352 | } |
353 | if (pg->device_id_len) |
354 | sdev_printk(KERN_INFO, sdev, |
355 | "%s: device %s port group %x rel port %x\n" , |
356 | ALUA_DH_NAME, pg->device_id_str, |
357 | group_id, rel_port); |
358 | else |
359 | sdev_printk(KERN_INFO, sdev, |
360 | "%s: port group %x rel port %x\n" , |
361 | ALUA_DH_NAME, group_id, rel_port); |
362 | |
363 | kref_get(kref: &pg->kref); |
364 | |
365 | /* Check for existing port group references */ |
366 | spin_lock(lock: &h->pg_lock); |
367 | old_pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); |
368 | if (old_pg != pg) { |
369 | /* port group has changed. Update to new port group */ |
370 | if (h->pg) { |
371 | spin_lock_irqsave(&old_pg->lock, flags); |
372 | list_del_rcu(entry: &h->node); |
373 | spin_unlock_irqrestore(lock: &old_pg->lock, flags); |
374 | } |
375 | rcu_assign_pointer(h->pg, pg); |
376 | pg_updated = true; |
377 | } |
378 | |
379 | spin_lock_irqsave(&pg->lock, flags); |
380 | if (pg_updated) |
381 | list_add_rcu(new: &h->node, head: &pg->dh_list); |
382 | spin_unlock_irqrestore(lock: &pg->lock, flags); |
383 | |
384 | spin_unlock(lock: &h->pg_lock); |
385 | |
386 | alua_rtpg_queue(pg, sdev, NULL, force: true); |
387 | kref_put(kref: &pg->kref, release: release_port_group); |
388 | |
389 | if (old_pg) |
390 | kref_put(kref: &old_pg->kref, release: release_port_group); |
391 | |
392 | return SCSI_DH_OK; |
393 | } |
394 | |
395 | static char print_alua_state(unsigned char state) |
396 | { |
397 | switch (state) { |
398 | case SCSI_ACCESS_STATE_OPTIMAL: |
399 | return 'A'; |
400 | case SCSI_ACCESS_STATE_ACTIVE: |
401 | return 'N'; |
402 | case SCSI_ACCESS_STATE_STANDBY: |
403 | return 'S'; |
404 | case SCSI_ACCESS_STATE_UNAVAILABLE: |
405 | return 'U'; |
406 | case SCSI_ACCESS_STATE_LBA: |
407 | return 'L'; |
408 | case SCSI_ACCESS_STATE_OFFLINE: |
409 | return 'O'; |
410 | case SCSI_ACCESS_STATE_TRANSITIONING: |
411 | return 'T'; |
412 | default: |
413 | return 'X'; |
414 | } |
415 | } |
416 | |
417 | static enum scsi_disposition alua_check_sense(struct scsi_device *sdev, |
418 | struct scsi_sense_hdr *sense_hdr) |
419 | { |
420 | struct alua_dh_data *h = sdev->handler_data; |
421 | struct alua_port_group *pg; |
422 | |
423 | switch (sense_hdr->sense_key) { |
424 | case NOT_READY: |
425 | if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) { |
426 | /* |
427 | * LUN Not Accessible - ALUA state transition |
428 | */ |
429 | rcu_read_lock(); |
430 | pg = rcu_dereference(h->pg); |
431 | if (pg) |
432 | pg->state = SCSI_ACCESS_STATE_TRANSITIONING; |
433 | rcu_read_unlock(); |
434 | alua_check(sdev, force: false); |
435 | return NEEDS_RETRY; |
436 | } |
437 | break; |
438 | case UNIT_ATTENTION: |
439 | if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) { |
440 | /* |
441 | * Power On, Reset, or Bus Device Reset. |
442 | * Might have obscured a state transition, |
443 | * so schedule a recheck. |
444 | */ |
445 | alua_check(sdev, force: true); |
446 | return ADD_TO_MLQUEUE; |
447 | } |
448 | if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04) |
449 | /* |
450 | * Device internal reset |
451 | */ |
452 | return ADD_TO_MLQUEUE; |
453 | if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01) |
454 | /* |
455 | * Mode Parameters Changed |
456 | */ |
457 | return ADD_TO_MLQUEUE; |
458 | if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) { |
459 | /* |
460 | * ALUA state changed |
461 | */ |
462 | alua_check(sdev, force: true); |
463 | return ADD_TO_MLQUEUE; |
464 | } |
465 | if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) { |
466 | /* |
467 | * Implicit ALUA state transition failed |
468 | */ |
469 | alua_check(sdev, force: true); |
470 | return ADD_TO_MLQUEUE; |
471 | } |
472 | if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03) |
473 | /* |
474 | * Inquiry data has changed |
475 | */ |
476 | return ADD_TO_MLQUEUE; |
477 | if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e) |
478 | /* |
479 | * REPORTED_LUNS_DATA_HAS_CHANGED is reported |
480 | * when switching controllers on targets like |
481 | * Intel Multi-Flex. We can just retry. |
482 | */ |
483 | return ADD_TO_MLQUEUE; |
484 | break; |
485 | } |
486 | |
487 | return SCSI_RETURN_NOT_HANDLED; |
488 | } |
489 | |
490 | /* |
491 | * alua_tur - Send a TEST UNIT READY |
492 | * @sdev: device to which the TEST UNIT READY command should be send |
493 | * |
494 | * Send a TEST UNIT READY to @sdev to figure out the device state |
495 | * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING, |
496 | * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise. |
497 | */ |
498 | static int alua_tur(struct scsi_device *sdev) |
499 | { |
500 | struct scsi_sense_hdr sense_hdr; |
501 | int retval; |
502 | |
503 | retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ, |
504 | ALUA_FAILOVER_RETRIES, sshdr: &sense_hdr); |
505 | if (sense_hdr.sense_key == NOT_READY && |
506 | sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) |
507 | return SCSI_DH_RETRY; |
508 | else if (retval) |
509 | return SCSI_DH_IO; |
510 | else |
511 | return SCSI_DH_OK; |
512 | } |
513 | |
514 | /* |
515 | * alua_rtpg - Evaluate REPORT TARGET GROUP STATES |
516 | * @sdev: the device to be evaluated. |
517 | * |
518 | * Evaluate the Target Port Group State. |
519 | * Returns SCSI_DH_DEV_OFFLINED if the path is |
520 | * found to be unusable. |
521 | */ |
522 | static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) |
523 | { |
524 | struct scsi_sense_hdr sense_hdr; |
525 | struct alua_port_group *tmp_pg; |
526 | int len, k, off, bufflen = ALUA_RTPG_SIZE; |
527 | int group_id_old, state_old, pref_old, valid_states_old; |
528 | unsigned char *desc, *buff; |
529 | unsigned err; |
530 | int retval; |
531 | unsigned int tpg_desc_tbl_off; |
532 | unsigned char orig_transition_tmo; |
533 | unsigned long flags; |
534 | bool transitioning_sense = false; |
535 | |
536 | group_id_old = pg->group_id; |
537 | state_old = pg->state; |
538 | pref_old = pg->pref; |
539 | valid_states_old = pg->valid_states; |
540 | |
541 | if (!pg->expiry) { |
542 | unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ; |
543 | |
544 | if (pg->transition_tmo) |
545 | transition_tmo = pg->transition_tmo * HZ; |
546 | |
547 | pg->expiry = round_jiffies_up(j: jiffies + transition_tmo); |
548 | } |
549 | |
550 | buff = kzalloc(size: bufflen, GFP_KERNEL); |
551 | if (!buff) |
552 | return SCSI_DH_DEV_TEMP_BUSY; |
553 | |
554 | retry: |
555 | err = 0; |
556 | retval = submit_rtpg(sdev, buff, bufflen, sshdr: &sense_hdr, flags: pg->flags); |
557 | |
558 | if (retval) { |
559 | /* |
560 | * Some (broken) implementations have a habit of returning |
561 | * an error during things like firmware update etc. |
562 | * But if the target only supports active/optimized there's |
563 | * not much we can do; it's not that we can switch paths |
564 | * or anything. |
565 | * So ignore any errors to avoid spurious failures during |
566 | * path failover. |
567 | */ |
568 | if ((pg->valid_states & ~TPGS_SUPPORT_OPTIMIZED) == 0) { |
569 | sdev_printk(KERN_INFO, sdev, |
570 | "%s: ignoring rtpg result %d\n" , |
571 | ALUA_DH_NAME, retval); |
572 | kfree(objp: buff); |
573 | return SCSI_DH_OK; |
574 | } |
575 | if (retval < 0 || !scsi_sense_valid(sshdr: &sense_hdr)) { |
576 | sdev_printk(KERN_INFO, sdev, |
577 | "%s: rtpg failed, result %d\n" , |
578 | ALUA_DH_NAME, retval); |
579 | kfree(objp: buff); |
580 | if (retval < 0) |
581 | return SCSI_DH_DEV_TEMP_BUSY; |
582 | if (host_byte(retval) == DID_NO_CONNECT) |
583 | return SCSI_DH_RES_TEMP_UNAVAIL; |
584 | return SCSI_DH_IO; |
585 | } |
586 | |
587 | /* |
588 | * submit_rtpg() has failed on existing arrays |
589 | * when requesting extended header info, and |
590 | * the array doesn't support extended headers, |
591 | * even though it shouldn't according to T10. |
592 | * The retry without rtpg_ext_hdr_req set |
593 | * handles this. |
594 | * Note: some arrays return a sense key of ILLEGAL_REQUEST |
595 | * with ASC 00h if they don't support the extended header. |
596 | */ |
597 | if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) && |
598 | sense_hdr.sense_key == ILLEGAL_REQUEST) { |
599 | pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP; |
600 | goto retry; |
601 | } |
602 | /* |
603 | * If the array returns with 'ALUA state transition' |
604 | * sense code here it cannot return RTPG data during |
605 | * transition. So set the state to 'transitioning' directly. |
606 | */ |
607 | if (sense_hdr.sense_key == NOT_READY && |
608 | sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) { |
609 | transitioning_sense = true; |
610 | goto skip_rtpg; |
611 | } |
612 | /* |
613 | * Retry on any other UNIT ATTENTION occurred. |
614 | */ |
615 | if (sense_hdr.sense_key == UNIT_ATTENTION) |
616 | err = SCSI_DH_RETRY; |
617 | if (err == SCSI_DH_RETRY && |
618 | pg->expiry != 0 && time_before(jiffies, pg->expiry)) { |
619 | sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n" , |
620 | ALUA_DH_NAME); |
621 | scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); |
622 | kfree(objp: buff); |
623 | return err; |
624 | } |
625 | sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n" , |
626 | ALUA_DH_NAME); |
627 | scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); |
628 | kfree(objp: buff); |
629 | pg->expiry = 0; |
630 | return SCSI_DH_IO; |
631 | } |
632 | |
633 | len = get_unaligned_be32(p: &buff[0]) + 4; |
634 | |
635 | if (len > bufflen) { |
636 | /* Resubmit with the correct length */ |
637 | kfree(objp: buff); |
638 | bufflen = len; |
639 | buff = kmalloc(size: bufflen, GFP_KERNEL); |
640 | if (!buff) { |
641 | sdev_printk(KERN_WARNING, sdev, |
642 | "%s: kmalloc buffer failed\n" ,__func__); |
643 | /* Temporary failure, bypass */ |
644 | pg->expiry = 0; |
645 | return SCSI_DH_DEV_TEMP_BUSY; |
646 | } |
647 | goto retry; |
648 | } |
649 | |
650 | orig_transition_tmo = pg->transition_tmo; |
651 | if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0) |
652 | pg->transition_tmo = buff[5]; |
653 | else |
654 | pg->transition_tmo = ALUA_FAILOVER_TIMEOUT; |
655 | |
656 | if (orig_transition_tmo != pg->transition_tmo) { |
657 | sdev_printk(KERN_INFO, sdev, |
658 | "%s: transition timeout set to %d seconds\n" , |
659 | ALUA_DH_NAME, pg->transition_tmo); |
660 | pg->expiry = jiffies + pg->transition_tmo * HZ; |
661 | } |
662 | |
663 | if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR) |
664 | tpg_desc_tbl_off = 8; |
665 | else |
666 | tpg_desc_tbl_off = 4; |
667 | |
668 | for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off; |
669 | k < len; |
670 | k += off, desc += off) { |
671 | u16 group_id = get_unaligned_be16(p: &desc[2]); |
672 | |
673 | spin_lock_irqsave(&port_group_lock, flags); |
674 | tmp_pg = alua_find_get_pg(id_str: pg->device_id_str, id_size: pg->device_id_len, |
675 | group_id); |
676 | spin_unlock_irqrestore(lock: &port_group_lock, flags); |
677 | if (tmp_pg) { |
678 | if (spin_trylock_irqsave(&tmp_pg->lock, flags)) { |
679 | if ((tmp_pg == pg) || |
680 | !(tmp_pg->flags & ALUA_PG_RUNNING)) { |
681 | struct alua_dh_data *h; |
682 | |
683 | tmp_pg->state = desc[0] & 0x0f; |
684 | tmp_pg->pref = desc[0] >> 7; |
685 | rcu_read_lock(); |
686 | list_for_each_entry_rcu(h, |
687 | &tmp_pg->dh_list, node) { |
688 | if (!h->sdev) |
689 | continue; |
690 | h->sdev->access_state = desc[0]; |
691 | } |
692 | rcu_read_unlock(); |
693 | } |
694 | if (tmp_pg == pg) |
695 | tmp_pg->valid_states = desc[1]; |
696 | spin_unlock_irqrestore(lock: &tmp_pg->lock, flags); |
697 | } |
698 | kref_put(kref: &tmp_pg->kref, release: release_port_group); |
699 | } |
700 | off = 8 + (desc[7] * 4); |
701 | } |
702 | |
703 | skip_rtpg: |
704 | spin_lock_irqsave(&pg->lock, flags); |
705 | if (transitioning_sense) |
706 | pg->state = SCSI_ACCESS_STATE_TRANSITIONING; |
707 | |
708 | if (group_id_old != pg->group_id || state_old != pg->state || |
709 | pref_old != pg->pref || valid_states_old != pg->valid_states) |
710 | sdev_printk(KERN_INFO, sdev, |
711 | "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n" , |
712 | ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state), |
713 | pg->pref ? "preferred" : "non-preferred" , |
714 | pg->valid_states&TPGS_SUPPORT_TRANSITION?'T':'t', |
715 | pg->valid_states&TPGS_SUPPORT_OFFLINE?'O':'o', |
716 | pg->valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l', |
717 | pg->valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u', |
718 | pg->valid_states&TPGS_SUPPORT_STANDBY?'S':'s', |
719 | pg->valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n', |
720 | pg->valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a'); |
721 | |
722 | switch (pg->state) { |
723 | case SCSI_ACCESS_STATE_TRANSITIONING: |
724 | if (time_before(jiffies, pg->expiry)) { |
725 | /* State transition, retry */ |
726 | pg->interval = ALUA_RTPG_RETRY_DELAY; |
727 | err = SCSI_DH_RETRY; |
728 | } else { |
729 | struct alua_dh_data *h; |
730 | |
731 | /* Transitioning time exceeded, set port to standby */ |
732 | err = SCSI_DH_IO; |
733 | pg->state = SCSI_ACCESS_STATE_STANDBY; |
734 | pg->expiry = 0; |
735 | rcu_read_lock(); |
736 | list_for_each_entry_rcu(h, &pg->dh_list, node) { |
737 | if (!h->sdev) |
738 | continue; |
739 | h->sdev->access_state = |
740 | (pg->state & SCSI_ACCESS_STATE_MASK); |
741 | if (pg->pref) |
742 | h->sdev->access_state |= |
743 | SCSI_ACCESS_STATE_PREFERRED; |
744 | } |
745 | rcu_read_unlock(); |
746 | } |
747 | break; |
748 | case SCSI_ACCESS_STATE_OFFLINE: |
749 | /* Path unusable */ |
750 | err = SCSI_DH_DEV_OFFLINED; |
751 | pg->expiry = 0; |
752 | break; |
753 | default: |
754 | /* Useable path if active */ |
755 | err = SCSI_DH_OK; |
756 | pg->expiry = 0; |
757 | break; |
758 | } |
759 | spin_unlock_irqrestore(lock: &pg->lock, flags); |
760 | kfree(objp: buff); |
761 | return err; |
762 | } |
763 | |
764 | /* |
765 | * alua_stpg - Issue a SET TARGET PORT GROUP command |
766 | * |
767 | * Issue a SET TARGET PORT GROUP command and evaluate the |
768 | * response. Returns SCSI_DH_RETRY per default to trigger |
769 | * a re-evaluation of the target group state or SCSI_DH_OK |
770 | * if no further action needs to be taken. |
771 | */ |
772 | static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg) |
773 | { |
774 | int retval; |
775 | struct scsi_sense_hdr sense_hdr; |
776 | |
777 | if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) { |
778 | /* Only implicit ALUA supported, retry */ |
779 | return SCSI_DH_RETRY; |
780 | } |
781 | switch (pg->state) { |
782 | case SCSI_ACCESS_STATE_OPTIMAL: |
783 | return SCSI_DH_OK; |
784 | case SCSI_ACCESS_STATE_ACTIVE: |
785 | if ((pg->flags & ALUA_OPTIMIZE_STPG) && |
786 | !pg->pref && |
787 | (pg->tpgs & TPGS_MODE_IMPLICIT)) |
788 | return SCSI_DH_OK; |
789 | break; |
790 | case SCSI_ACCESS_STATE_STANDBY: |
791 | case SCSI_ACCESS_STATE_UNAVAILABLE: |
792 | break; |
793 | case SCSI_ACCESS_STATE_OFFLINE: |
794 | return SCSI_DH_IO; |
795 | case SCSI_ACCESS_STATE_TRANSITIONING: |
796 | break; |
797 | default: |
798 | sdev_printk(KERN_INFO, sdev, |
799 | "%s: stpg failed, unhandled TPGS state %d" , |
800 | ALUA_DH_NAME, pg->state); |
801 | return SCSI_DH_NOSYS; |
802 | } |
803 | retval = submit_stpg(sdev, group_id: pg->group_id, sshdr: &sense_hdr); |
804 | |
805 | if (retval) { |
806 | if (retval < 0 || !scsi_sense_valid(sshdr: &sense_hdr)) { |
807 | sdev_printk(KERN_INFO, sdev, |
808 | "%s: stpg failed, result %d" , |
809 | ALUA_DH_NAME, retval); |
810 | if (retval < 0) |
811 | return SCSI_DH_DEV_TEMP_BUSY; |
812 | } else { |
813 | sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n" , |
814 | ALUA_DH_NAME); |
815 | scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); |
816 | } |
817 | } |
818 | /* Retry RTPG */ |
819 | return SCSI_DH_RETRY; |
820 | } |
821 | |
822 | /* |
823 | * The caller must call scsi_device_put() on the returned pointer if it is not |
824 | * NULL. |
825 | */ |
826 | static struct scsi_device * __must_check |
827 | alua_rtpg_select_sdev(struct alua_port_group *pg) |
828 | { |
829 | struct alua_dh_data *h; |
830 | struct scsi_device *sdev = NULL, *prev_sdev; |
831 | |
832 | lockdep_assert_held(&pg->lock); |
833 | if (WARN_ON(!pg->rtpg_sdev)) |
834 | return NULL; |
835 | |
836 | /* |
837 | * RCU protection isn't necessary for dh_list here |
838 | * as we hold pg->lock, but for access to h->pg. |
839 | */ |
840 | rcu_read_lock(); |
841 | list_for_each_entry_rcu(h, &pg->dh_list, node) { |
842 | if (!h->sdev) |
843 | continue; |
844 | if (h->sdev == pg->rtpg_sdev) { |
845 | h->disabled = true; |
846 | continue; |
847 | } |
848 | if (rcu_dereference(h->pg) == pg && |
849 | !h->disabled && |
850 | !scsi_device_get(h->sdev)) { |
851 | sdev = h->sdev; |
852 | break; |
853 | } |
854 | } |
855 | rcu_read_unlock(); |
856 | |
857 | if (!sdev) { |
858 | pr_warn("%s: no device found for rtpg\n" , |
859 | (pg->device_id_len ? |
860 | (char *)pg->device_id_str : "(nameless PG)" )); |
861 | return NULL; |
862 | } |
863 | |
864 | sdev_printk(KERN_INFO, sdev, "rtpg retry on different device\n" ); |
865 | |
866 | prev_sdev = pg->rtpg_sdev; |
867 | pg->rtpg_sdev = sdev; |
868 | |
869 | return prev_sdev; |
870 | } |
871 | |
872 | static void alua_rtpg_work(struct work_struct *work) |
873 | { |
874 | struct alua_port_group *pg = |
875 | container_of(work, struct alua_port_group, rtpg_work.work); |
876 | struct scsi_device *sdev, *prev_sdev = NULL; |
877 | LIST_HEAD(qdata_list); |
878 | int err = SCSI_DH_OK; |
879 | struct alua_queue_data *qdata, *tmp; |
880 | struct alua_dh_data *h; |
881 | unsigned long flags; |
882 | |
883 | spin_lock_irqsave(&pg->lock, flags); |
884 | sdev = pg->rtpg_sdev; |
885 | if (!sdev) { |
886 | WARN_ON(pg->flags & ALUA_PG_RUN_RTPG); |
887 | WARN_ON(pg->flags & ALUA_PG_RUN_STPG); |
888 | spin_unlock_irqrestore(lock: &pg->lock, flags); |
889 | kref_put(kref: &pg->kref, release: release_port_group); |
890 | return; |
891 | } |
892 | pg->flags |= ALUA_PG_RUNNING; |
893 | if (pg->flags & ALUA_PG_RUN_RTPG) { |
894 | int state = pg->state; |
895 | |
896 | pg->flags &= ~ALUA_PG_RUN_RTPG; |
897 | spin_unlock_irqrestore(lock: &pg->lock, flags); |
898 | if (state == SCSI_ACCESS_STATE_TRANSITIONING) { |
899 | if (alua_tur(sdev) == SCSI_DH_RETRY) { |
900 | spin_lock_irqsave(&pg->lock, flags); |
901 | pg->flags &= ~ALUA_PG_RUNNING; |
902 | pg->flags |= ALUA_PG_RUN_RTPG; |
903 | if (!pg->interval) |
904 | pg->interval = ALUA_RTPG_RETRY_DELAY; |
905 | spin_unlock_irqrestore(lock: &pg->lock, flags); |
906 | queue_delayed_work(wq: kaluad_wq, dwork: &pg->rtpg_work, |
907 | delay: pg->interval * HZ); |
908 | return; |
909 | } |
910 | /* Send RTPG on failure or if TUR indicates SUCCESS */ |
911 | } |
912 | err = alua_rtpg(sdev, pg); |
913 | spin_lock_irqsave(&pg->lock, flags); |
914 | |
915 | /* If RTPG failed on the current device, try using another */ |
916 | if (err == SCSI_DH_RES_TEMP_UNAVAIL && |
917 | (prev_sdev = alua_rtpg_select_sdev(pg))) |
918 | err = SCSI_DH_IMM_RETRY; |
919 | |
920 | if (err == SCSI_DH_RETRY || err == SCSI_DH_IMM_RETRY || |
921 | pg->flags & ALUA_PG_RUN_RTPG) { |
922 | pg->flags &= ~ALUA_PG_RUNNING; |
923 | if (err == SCSI_DH_IMM_RETRY) |
924 | pg->interval = 0; |
925 | else if (!pg->interval && !(pg->flags & ALUA_PG_RUN_RTPG)) |
926 | pg->interval = ALUA_RTPG_RETRY_DELAY; |
927 | pg->flags |= ALUA_PG_RUN_RTPG; |
928 | spin_unlock_irqrestore(lock: &pg->lock, flags); |
929 | goto queue_rtpg; |
930 | } |
931 | if (err != SCSI_DH_OK) |
932 | pg->flags &= ~ALUA_PG_RUN_STPG; |
933 | } |
934 | if (pg->flags & ALUA_PG_RUN_STPG) { |
935 | pg->flags &= ~ALUA_PG_RUN_STPG; |
936 | spin_unlock_irqrestore(lock: &pg->lock, flags); |
937 | err = alua_stpg(sdev, pg); |
938 | spin_lock_irqsave(&pg->lock, flags); |
939 | if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { |
940 | pg->flags |= ALUA_PG_RUN_RTPG; |
941 | pg->interval = 0; |
942 | pg->flags &= ~ALUA_PG_RUNNING; |
943 | spin_unlock_irqrestore(lock: &pg->lock, flags); |
944 | goto queue_rtpg; |
945 | } |
946 | } |
947 | |
948 | list_splice_init(list: &pg->rtpg_list, head: &qdata_list); |
949 | /* |
950 | * We went through an RTPG, for good or bad. |
951 | * Re-enable all devices for the next attempt. |
952 | */ |
953 | list_for_each_entry(h, &pg->dh_list, node) |
954 | h->disabled = false; |
955 | pg->rtpg_sdev = NULL; |
956 | spin_unlock_irqrestore(lock: &pg->lock, flags); |
957 | |
958 | if (prev_sdev) |
959 | scsi_device_put(prev_sdev); |
960 | |
961 | list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) { |
962 | list_del(entry: &qdata->entry); |
963 | if (qdata->callback_fn) |
964 | qdata->callback_fn(qdata->callback_data, err); |
965 | kfree(objp: qdata); |
966 | } |
967 | spin_lock_irqsave(&pg->lock, flags); |
968 | pg->flags &= ~ALUA_PG_RUNNING; |
969 | spin_unlock_irqrestore(lock: &pg->lock, flags); |
970 | scsi_device_put(sdev); |
971 | kref_put(kref: &pg->kref, release: release_port_group); |
972 | return; |
973 | |
974 | queue_rtpg: |
975 | if (prev_sdev) |
976 | scsi_device_put(prev_sdev); |
977 | queue_delayed_work(wq: kaluad_wq, dwork: &pg->rtpg_work, delay: pg->interval * HZ); |
978 | } |
979 | |
980 | /** |
981 | * alua_rtpg_queue() - cause RTPG to be submitted asynchronously |
982 | * @pg: ALUA port group associated with @sdev. |
983 | * @sdev: SCSI device for which to submit an RTPG. |
984 | * @qdata: Information about the callback to invoke after the RTPG. |
985 | * @force: Whether or not to submit an RTPG if a work item that will submit an |
986 | * RTPG already has been scheduled. |
987 | * |
988 | * Returns true if and only if alua_rtpg_work() will be called asynchronously. |
989 | * That function is responsible for calling @qdata->fn(). |
990 | * |
991 | * Context: may be called from atomic context (alua_check()) only if the caller |
992 | * holds an sdev reference. |
993 | */ |
994 | static bool alua_rtpg_queue(struct alua_port_group *pg, |
995 | struct scsi_device *sdev, |
996 | struct alua_queue_data *qdata, bool force) |
997 | { |
998 | int start_queue = 0; |
999 | unsigned long flags; |
1000 | |
1001 | if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev)) |
1002 | return false; |
1003 | |
1004 | spin_lock_irqsave(&pg->lock, flags); |
1005 | if (qdata) { |
1006 | list_add_tail(new: &qdata->entry, head: &pg->rtpg_list); |
1007 | pg->flags |= ALUA_PG_RUN_STPG; |
1008 | force = true; |
1009 | } |
1010 | if (pg->rtpg_sdev == NULL) { |
1011 | struct alua_dh_data *h = sdev->handler_data; |
1012 | |
1013 | rcu_read_lock(); |
1014 | if (h && rcu_dereference(h->pg) == pg) { |
1015 | pg->interval = 0; |
1016 | pg->flags |= ALUA_PG_RUN_RTPG; |
1017 | kref_get(kref: &pg->kref); |
1018 | pg->rtpg_sdev = sdev; |
1019 | start_queue = 1; |
1020 | } |
1021 | rcu_read_unlock(); |
1022 | } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) { |
1023 | pg->flags |= ALUA_PG_RUN_RTPG; |
1024 | /* Do not queue if the worker is already running */ |
1025 | if (!(pg->flags & ALUA_PG_RUNNING)) { |
1026 | kref_get(kref: &pg->kref); |
1027 | start_queue = 1; |
1028 | } |
1029 | } |
1030 | |
1031 | spin_unlock_irqrestore(lock: &pg->lock, flags); |
1032 | |
1033 | if (start_queue) { |
1034 | if (queue_delayed_work(wq: kaluad_wq, dwork: &pg->rtpg_work, |
1035 | delay: msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) |
1036 | sdev = NULL; |
1037 | else |
1038 | kref_put(kref: &pg->kref, release: release_port_group); |
1039 | } |
1040 | if (sdev) |
1041 | scsi_device_put(sdev); |
1042 | |
1043 | return true; |
1044 | } |
1045 | |
1046 | /* |
1047 | * alua_initialize - Initialize ALUA state |
1048 | * @sdev: the device to be initialized |
1049 | * |
1050 | * For the prep_fn to work correctly we have |
1051 | * to initialize the ALUA state for the device. |
1052 | */ |
1053 | static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h) |
1054 | { |
1055 | int err = SCSI_DH_DEV_UNSUPP, tpgs; |
1056 | |
1057 | mutex_lock(&h->init_mutex); |
1058 | h->disabled = false; |
1059 | tpgs = alua_check_tpgs(sdev); |
1060 | if (tpgs != TPGS_MODE_NONE) |
1061 | err = alua_check_vpd(sdev, h, tpgs); |
1062 | h->init_error = err; |
1063 | mutex_unlock(lock: &h->init_mutex); |
1064 | return err; |
1065 | } |
1066 | /* |
1067 | * alua_set_params - set/unset the optimize flag |
1068 | * @sdev: device on the path to be activated |
1069 | * params - parameters in the following format |
1070 | * "no_of_params\0param1\0param2\0param3\0...\0" |
1071 | * For example, to set the flag pass the following parameters |
1072 | * from multipath.conf |
1073 | * hardware_handler "2 alua 1" |
1074 | */ |
1075 | static int alua_set_params(struct scsi_device *sdev, const char *params) |
1076 | { |
1077 | struct alua_dh_data *h = sdev->handler_data; |
1078 | struct alua_port_group *pg = NULL; |
1079 | unsigned int optimize = 0, argc; |
1080 | const char *p = params; |
1081 | int result = SCSI_DH_OK; |
1082 | unsigned long flags; |
1083 | |
1084 | if ((sscanf(params, "%u" , &argc) != 1) || (argc != 1)) |
1085 | return -EINVAL; |
1086 | |
1087 | while (*p++) |
1088 | ; |
1089 | if ((sscanf(p, "%u" , &optimize) != 1) || (optimize > 1)) |
1090 | return -EINVAL; |
1091 | |
1092 | rcu_read_lock(); |
1093 | pg = rcu_dereference(h->pg); |
1094 | if (!pg) { |
1095 | rcu_read_unlock(); |
1096 | return -ENXIO; |
1097 | } |
1098 | spin_lock_irqsave(&pg->lock, flags); |
1099 | if (optimize) |
1100 | pg->flags |= ALUA_OPTIMIZE_STPG; |
1101 | else |
1102 | pg->flags &= ~ALUA_OPTIMIZE_STPG; |
1103 | spin_unlock_irqrestore(lock: &pg->lock, flags); |
1104 | rcu_read_unlock(); |
1105 | |
1106 | return result; |
1107 | } |
1108 | |
1109 | /* |
1110 | * alua_activate - activate a path |
1111 | * @sdev: device on the path to be activated |
1112 | * |
1113 | * We're currently switching the port group to be activated only and |
1114 | * let the array figure out the rest. |
1115 | * There may be other arrays which require us to switch all port groups |
1116 | * based on a certain policy. But until we actually encounter them it |
1117 | * should be okay. |
1118 | */ |
1119 | static int alua_activate(struct scsi_device *sdev, |
1120 | activate_complete fn, void *data) |
1121 | { |
1122 | struct alua_dh_data *h = sdev->handler_data; |
1123 | int err = SCSI_DH_OK; |
1124 | struct alua_queue_data *qdata; |
1125 | struct alua_port_group *pg; |
1126 | |
1127 | qdata = kzalloc(size: sizeof(*qdata), GFP_KERNEL); |
1128 | if (!qdata) { |
1129 | err = SCSI_DH_RES_TEMP_UNAVAIL; |
1130 | goto out; |
1131 | } |
1132 | qdata->callback_fn = fn; |
1133 | qdata->callback_data = data; |
1134 | |
1135 | mutex_lock(&h->init_mutex); |
1136 | rcu_read_lock(); |
1137 | pg = rcu_dereference(h->pg); |
1138 | if (!pg || !kref_get_unless_zero(kref: &pg->kref)) { |
1139 | rcu_read_unlock(); |
1140 | kfree(objp: qdata); |
1141 | err = h->init_error; |
1142 | mutex_unlock(lock: &h->init_mutex); |
1143 | goto out; |
1144 | } |
1145 | rcu_read_unlock(); |
1146 | mutex_unlock(lock: &h->init_mutex); |
1147 | |
1148 | if (alua_rtpg_queue(pg, sdev, qdata, force: true)) { |
1149 | fn = NULL; |
1150 | } else { |
1151 | kfree(objp: qdata); |
1152 | err = SCSI_DH_DEV_OFFLINED; |
1153 | } |
1154 | kref_put(kref: &pg->kref, release: release_port_group); |
1155 | out: |
1156 | if (fn) |
1157 | fn(data, err); |
1158 | return 0; |
1159 | } |
1160 | |
1161 | /* |
1162 | * alua_check - check path status |
1163 | * @sdev: device on the path to be checked |
1164 | * |
1165 | * Check the device status |
1166 | */ |
1167 | static void alua_check(struct scsi_device *sdev, bool force) |
1168 | { |
1169 | struct alua_dh_data *h = sdev->handler_data; |
1170 | struct alua_port_group *pg; |
1171 | |
1172 | rcu_read_lock(); |
1173 | pg = rcu_dereference(h->pg); |
1174 | if (!pg || !kref_get_unless_zero(kref: &pg->kref)) { |
1175 | rcu_read_unlock(); |
1176 | return; |
1177 | } |
1178 | rcu_read_unlock(); |
1179 | alua_rtpg_queue(pg, sdev, NULL, force); |
1180 | kref_put(kref: &pg->kref, release: release_port_group); |
1181 | } |
1182 | |
1183 | /* |
1184 | * alua_prep_fn - request callback |
1185 | * |
1186 | * Fail I/O to all paths not in state |
1187 | * active/optimized or active/non-optimized. |
1188 | */ |
1189 | static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req) |
1190 | { |
1191 | struct alua_dh_data *h = sdev->handler_data; |
1192 | struct alua_port_group *pg; |
1193 | unsigned char state = SCSI_ACCESS_STATE_OPTIMAL; |
1194 | |
1195 | rcu_read_lock(); |
1196 | pg = rcu_dereference(h->pg); |
1197 | if (pg) |
1198 | state = pg->state; |
1199 | rcu_read_unlock(); |
1200 | |
1201 | switch (state) { |
1202 | case SCSI_ACCESS_STATE_OPTIMAL: |
1203 | case SCSI_ACCESS_STATE_ACTIVE: |
1204 | case SCSI_ACCESS_STATE_LBA: |
1205 | case SCSI_ACCESS_STATE_TRANSITIONING: |
1206 | return BLK_STS_OK; |
1207 | default: |
1208 | req->rq_flags |= RQF_QUIET; |
1209 | return BLK_STS_IOERR; |
1210 | } |
1211 | } |
1212 | |
1213 | static void alua_rescan(struct scsi_device *sdev) |
1214 | { |
1215 | struct alua_dh_data *h = sdev->handler_data; |
1216 | |
1217 | alua_initialize(sdev, h); |
1218 | } |
1219 | |
1220 | /* |
1221 | * alua_bus_attach - Attach device handler |
1222 | * @sdev: device to be attached to |
1223 | */ |
1224 | static int alua_bus_attach(struct scsi_device *sdev) |
1225 | { |
1226 | struct alua_dh_data *h; |
1227 | int err; |
1228 | |
1229 | h = kzalloc(size: sizeof(*h) , GFP_KERNEL); |
1230 | if (!h) |
1231 | return SCSI_DH_NOMEM; |
1232 | spin_lock_init(&h->pg_lock); |
1233 | rcu_assign_pointer(h->pg, NULL); |
1234 | h->init_error = SCSI_DH_OK; |
1235 | h->sdev = sdev; |
1236 | INIT_LIST_HEAD(list: &h->node); |
1237 | |
1238 | mutex_init(&h->init_mutex); |
1239 | err = alua_initialize(sdev, h); |
1240 | if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED) |
1241 | goto failed; |
1242 | |
1243 | sdev->handler_data = h; |
1244 | return SCSI_DH_OK; |
1245 | failed: |
1246 | kfree(objp: h); |
1247 | return err; |
1248 | } |
1249 | |
1250 | /* |
1251 | * alua_bus_detach - Detach device handler |
1252 | * @sdev: device to be detached from |
1253 | */ |
1254 | static void alua_bus_detach(struct scsi_device *sdev) |
1255 | { |
1256 | struct alua_dh_data *h = sdev->handler_data; |
1257 | struct alua_port_group *pg; |
1258 | |
1259 | spin_lock(lock: &h->pg_lock); |
1260 | pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); |
1261 | rcu_assign_pointer(h->pg, NULL); |
1262 | spin_unlock(lock: &h->pg_lock); |
1263 | if (pg) { |
1264 | spin_lock_irq(lock: &pg->lock); |
1265 | list_del_rcu(entry: &h->node); |
1266 | spin_unlock_irq(lock: &pg->lock); |
1267 | kref_put(kref: &pg->kref, release: release_port_group); |
1268 | } |
1269 | sdev->handler_data = NULL; |
1270 | synchronize_rcu(); |
1271 | kfree(objp: h); |
1272 | } |
1273 | |
1274 | static struct scsi_device_handler alua_dh = { |
1275 | .name = ALUA_DH_NAME, |
1276 | .module = THIS_MODULE, |
1277 | .attach = alua_bus_attach, |
1278 | .detach = alua_bus_detach, |
1279 | .prep_fn = alua_prep_fn, |
1280 | .check_sense = alua_check_sense, |
1281 | .activate = alua_activate, |
1282 | .rescan = alua_rescan, |
1283 | .set_params = alua_set_params, |
1284 | }; |
1285 | |
1286 | static int __init alua_init(void) |
1287 | { |
1288 | int r; |
1289 | |
1290 | kaluad_wq = alloc_workqueue(fmt: "kaluad" , flags: WQ_MEM_RECLAIM, max_active: 0); |
1291 | if (!kaluad_wq) |
1292 | return -ENOMEM; |
1293 | |
1294 | r = scsi_register_device_handler(scsi_dh: &alua_dh); |
1295 | if (r != 0) { |
1296 | printk(KERN_ERR "%s: Failed to register scsi device handler" , |
1297 | ALUA_DH_NAME); |
1298 | destroy_workqueue(wq: kaluad_wq); |
1299 | } |
1300 | return r; |
1301 | } |
1302 | |
1303 | static void __exit alua_exit(void) |
1304 | { |
1305 | scsi_unregister_device_handler(scsi_dh: &alua_dh); |
1306 | destroy_workqueue(wq: kaluad_wq); |
1307 | } |
1308 | |
1309 | module_init(alua_init); |
1310 | module_exit(alua_exit); |
1311 | |
1312 | MODULE_DESCRIPTION("DM Multipath ALUA support" ); |
1313 | MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>" ); |
1314 | MODULE_LICENSE("GPL" ); |
1315 | MODULE_VERSION(ALUA_DH_VER); |
1316 | |