1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * NVM Express device driver |
4 | * Copyright (c) 2011-2014, Intel Corporation. |
5 | */ |
6 | |
7 | #include <linux/blkdev.h> |
8 | #include <linux/blk-mq.h> |
9 | #include <linux/delay.h> |
10 | #include <linux/errno.h> |
11 | #include <linux/hdreg.h> |
12 | #include <linux/kernel.h> |
13 | #include <linux/module.h> |
14 | #include <linux/list_sort.h> |
15 | #include <linux/slab.h> |
16 | #include <linux/types.h> |
17 | #include <linux/pr.h> |
18 | #include <linux/ptrace.h> |
19 | #include <linux/nvme_ioctl.h> |
20 | #include <linux/t10-pi.h> |
21 | #include <linux/pm_qos.h> |
22 | #include <asm/unaligned.h> |
23 | |
24 | #define CREATE_TRACE_POINTS |
25 | #include "trace.h" |
26 | |
27 | #include "nvme.h" |
28 | #include "fabrics.h" |
29 | |
30 | #define NVME_MINORS (1U << MINORBITS) |
31 | |
32 | unsigned int admin_timeout = 60; |
33 | module_param(admin_timeout, uint, 0644); |
34 | MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands" ); |
35 | EXPORT_SYMBOL_GPL(admin_timeout); |
36 | |
37 | unsigned int nvme_io_timeout = 30; |
38 | module_param_named(io_timeout, nvme_io_timeout, uint, 0644); |
39 | MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O" ); |
40 | EXPORT_SYMBOL_GPL(nvme_io_timeout); |
41 | |
42 | static unsigned char shutdown_timeout = 5; |
43 | module_param(shutdown_timeout, byte, 0644); |
44 | MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown" ); |
45 | |
46 | static u8 nvme_max_retries = 5; |
47 | module_param_named(max_retries, nvme_max_retries, byte, 0644); |
48 | MODULE_PARM_DESC(max_retries, "max number of retries a command may have" ); |
49 | |
50 | static unsigned long default_ps_max_latency_us = 100000; |
51 | module_param(default_ps_max_latency_us, ulong, 0644); |
52 | MODULE_PARM_DESC(default_ps_max_latency_us, |
53 | "max power saving latency for new devices; use PM QOS to change per device" ); |
54 | |
55 | static bool force_apst; |
56 | module_param(force_apst, bool, 0644); |
57 | MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off" ); |
58 | |
59 | static bool streams; |
60 | module_param(streams, bool, 0644); |
61 | MODULE_PARM_DESC(streams, "turn on support for Streams write directives" ); |
62 | |
63 | /* |
64 | * nvme_wq - hosts nvme related works that are not reset or delete |
65 | * nvme_reset_wq - hosts nvme reset works |
66 | * nvme_delete_wq - hosts nvme delete works |
67 | * |
68 | * nvme_wq will host works such are scan, aen handling, fw activation, |
69 | * keep-alive error recovery, periodic reconnects etc. nvme_reset_wq |
70 | * runs reset works which also flush works hosted on nvme_wq for |
71 | * serialization purposes. nvme_delete_wq host controller deletion |
72 | * works which flush reset works for serialization. |
73 | */ |
74 | struct workqueue_struct *nvme_wq; |
75 | EXPORT_SYMBOL_GPL(nvme_wq); |
76 | |
77 | struct workqueue_struct *nvme_reset_wq; |
78 | EXPORT_SYMBOL_GPL(nvme_reset_wq); |
79 | |
80 | struct workqueue_struct *nvme_delete_wq; |
81 | EXPORT_SYMBOL_GPL(nvme_delete_wq); |
82 | |
83 | static DEFINE_IDA(nvme_subsystems_ida); |
84 | static LIST_HEAD(nvme_subsystems); |
85 | static DEFINE_MUTEX(nvme_subsystems_lock); |
86 | |
87 | static DEFINE_IDA(nvme_instance_ida); |
88 | static dev_t nvme_chr_devt; |
89 | static struct class *nvme_class; |
90 | static struct class *nvme_subsys_class; |
91 | |
92 | static int nvme_revalidate_disk(struct gendisk *disk); |
93 | static void nvme_put_subsystem(struct nvme_subsystem *subsys); |
94 | static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, |
95 | unsigned nsid); |
96 | |
97 | static void nvme_set_queue_dying(struct nvme_ns *ns) |
98 | { |
99 | /* |
100 | * Revalidating a dead namespace sets capacity to 0. This will end |
101 | * buffered writers dirtying pages that can't be synced. |
102 | */ |
103 | if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags)) |
104 | return; |
105 | revalidate_disk(ns->disk); |
106 | blk_set_queue_dying(ns->queue); |
107 | /* Forcibly unquiesce queues to avoid blocking dispatch */ |
108 | blk_mq_unquiesce_queue(ns->queue); |
109 | } |
110 | |
111 | static void nvme_queue_scan(struct nvme_ctrl *ctrl) |
112 | { |
113 | /* |
114 | * Only new queue scan work when admin and IO queues are both alive |
115 | */ |
116 | if (ctrl->state == NVME_CTRL_LIVE) |
117 | queue_work(nvme_wq, &ctrl->scan_work); |
118 | } |
119 | |
120 | int nvme_reset_ctrl(struct nvme_ctrl *ctrl) |
121 | { |
122 | if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) |
123 | return -EBUSY; |
124 | if (!queue_work(nvme_reset_wq, &ctrl->reset_work)) |
125 | return -EBUSY; |
126 | return 0; |
127 | } |
128 | EXPORT_SYMBOL_GPL(nvme_reset_ctrl); |
129 | |
130 | int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) |
131 | { |
132 | int ret; |
133 | |
134 | ret = nvme_reset_ctrl(ctrl); |
135 | if (!ret) { |
136 | flush_work(&ctrl->reset_work); |
137 | if (ctrl->state != NVME_CTRL_LIVE && |
138 | ctrl->state != NVME_CTRL_ADMIN_ONLY) |
139 | ret = -ENETRESET; |
140 | } |
141 | |
142 | return ret; |
143 | } |
144 | EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync); |
145 | |
146 | static void nvme_do_delete_ctrl(struct nvme_ctrl *ctrl) |
147 | { |
148 | dev_info(ctrl->device, |
149 | "Removing ctrl: NQN \"%s\"\n" , ctrl->opts->subsysnqn); |
150 | |
151 | flush_work(&ctrl->reset_work); |
152 | nvme_stop_ctrl(ctrl); |
153 | nvme_remove_namespaces(ctrl); |
154 | ctrl->ops->delete_ctrl(ctrl); |
155 | nvme_uninit_ctrl(ctrl); |
156 | nvme_put_ctrl(ctrl); |
157 | } |
158 | |
159 | static void nvme_delete_ctrl_work(struct work_struct *work) |
160 | { |
161 | struct nvme_ctrl *ctrl = |
162 | container_of(work, struct nvme_ctrl, delete_work); |
163 | |
164 | nvme_do_delete_ctrl(ctrl); |
165 | } |
166 | |
167 | int nvme_delete_ctrl(struct nvme_ctrl *ctrl) |
168 | { |
169 | if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING)) |
170 | return -EBUSY; |
171 | if (!queue_work(nvme_delete_wq, &ctrl->delete_work)) |
172 | return -EBUSY; |
173 | return 0; |
174 | } |
175 | EXPORT_SYMBOL_GPL(nvme_delete_ctrl); |
176 | |
177 | static int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) |
178 | { |
179 | int ret = 0; |
180 | |
181 | /* |
182 | * Keep a reference until nvme_do_delete_ctrl() complete, |
183 | * since ->delete_ctrl can free the controller. |
184 | */ |
185 | nvme_get_ctrl(ctrl); |
186 | if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING)) |
187 | ret = -EBUSY; |
188 | if (!ret) |
189 | nvme_do_delete_ctrl(ctrl); |
190 | nvme_put_ctrl(ctrl); |
191 | return ret; |
192 | } |
193 | |
194 | static inline bool nvme_ns_has_pi(struct nvme_ns *ns) |
195 | { |
196 | return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple); |
197 | } |
198 | |
199 | static blk_status_t nvme_error_status(struct request *req) |
200 | { |
201 | switch (nvme_req(req)->status & 0x7ff) { |
202 | case NVME_SC_SUCCESS: |
203 | return BLK_STS_OK; |
204 | case NVME_SC_CAP_EXCEEDED: |
205 | return BLK_STS_NOSPC; |
206 | case NVME_SC_LBA_RANGE: |
207 | return BLK_STS_TARGET; |
208 | case NVME_SC_BAD_ATTRIBUTES: |
209 | case NVME_SC_ONCS_NOT_SUPPORTED: |
210 | case NVME_SC_INVALID_OPCODE: |
211 | case NVME_SC_INVALID_FIELD: |
212 | case NVME_SC_INVALID_NS: |
213 | return BLK_STS_NOTSUPP; |
214 | case NVME_SC_WRITE_FAULT: |
215 | case NVME_SC_READ_ERROR: |
216 | case NVME_SC_UNWRITTEN_BLOCK: |
217 | case NVME_SC_ACCESS_DENIED: |
218 | case NVME_SC_READ_ONLY: |
219 | case NVME_SC_COMPARE_FAILED: |
220 | return BLK_STS_MEDIUM; |
221 | case NVME_SC_GUARD_CHECK: |
222 | case NVME_SC_APPTAG_CHECK: |
223 | case NVME_SC_REFTAG_CHECK: |
224 | case NVME_SC_INVALID_PI: |
225 | return BLK_STS_PROTECTION; |
226 | case NVME_SC_RESERVATION_CONFLICT: |
227 | return BLK_STS_NEXUS; |
228 | default: |
229 | return BLK_STS_IOERR; |
230 | } |
231 | } |
232 | |
233 | static inline bool nvme_req_needs_retry(struct request *req) |
234 | { |
235 | if (blk_noretry_request(req)) |
236 | return false; |
237 | if (nvme_req(req)->status & NVME_SC_DNR) |
238 | return false; |
239 | if (nvme_req(req)->retries >= nvme_max_retries) |
240 | return false; |
241 | return true; |
242 | } |
243 | |
244 | static void nvme_retry_req(struct request *req) |
245 | { |
246 | struct nvme_ns *ns = req->q->queuedata; |
247 | unsigned long delay = 0; |
248 | u16 crd; |
249 | |
250 | /* The mask and shift result must be <= 3 */ |
251 | crd = (nvme_req(req)->status & NVME_SC_CRD) >> 11; |
252 | if (ns && crd) |
253 | delay = ns->ctrl->crdt[crd - 1] * 100; |
254 | |
255 | nvme_req(req)->retries++; |
256 | blk_mq_requeue_request(req, false); |
257 | blk_mq_delay_kick_requeue_list(req->q, delay); |
258 | } |
259 | |
260 | void nvme_complete_rq(struct request *req) |
261 | { |
262 | blk_status_t status = nvme_error_status(req); |
263 | |
264 | trace_nvme_complete_rq(req); |
265 | |
266 | if (nvme_req(req)->ctrl->kas) |
267 | nvme_req(req)->ctrl->comp_seen = true; |
268 | |
269 | if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) { |
270 | if ((req->cmd_flags & REQ_NVME_MPATH) && |
271 | blk_path_error(status)) { |
272 | nvme_failover_req(req); |
273 | return; |
274 | } |
275 | |
276 | if (!blk_queue_dying(req->q)) { |
277 | nvme_retry_req(req); |
278 | return; |
279 | } |
280 | } |
281 | blk_mq_end_request(req, status); |
282 | } |
283 | EXPORT_SYMBOL_GPL(nvme_complete_rq); |
284 | |
285 | bool nvme_cancel_request(struct request *req, void *data, bool reserved) |
286 | { |
287 | dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device, |
288 | "Cancelling I/O %d" , req->tag); |
289 | |
290 | nvme_req(req)->status = NVME_SC_ABORT_REQ; |
291 | blk_mq_complete_request(req); |
292 | return true; |
293 | } |
294 | EXPORT_SYMBOL_GPL(nvme_cancel_request); |
295 | |
296 | bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, |
297 | enum nvme_ctrl_state new_state) |
298 | { |
299 | enum nvme_ctrl_state old_state; |
300 | unsigned long flags; |
301 | bool changed = false; |
302 | |
303 | spin_lock_irqsave(&ctrl->lock, flags); |
304 | |
305 | old_state = ctrl->state; |
306 | switch (new_state) { |
307 | case NVME_CTRL_ADMIN_ONLY: |
308 | switch (old_state) { |
309 | case NVME_CTRL_CONNECTING: |
310 | changed = true; |
311 | /* FALLTHRU */ |
312 | default: |
313 | break; |
314 | } |
315 | break; |
316 | case NVME_CTRL_LIVE: |
317 | switch (old_state) { |
318 | case NVME_CTRL_NEW: |
319 | case NVME_CTRL_RESETTING: |
320 | case NVME_CTRL_CONNECTING: |
321 | changed = true; |
322 | /* FALLTHRU */ |
323 | default: |
324 | break; |
325 | } |
326 | break; |
327 | case NVME_CTRL_RESETTING: |
328 | switch (old_state) { |
329 | case NVME_CTRL_NEW: |
330 | case NVME_CTRL_LIVE: |
331 | case NVME_CTRL_ADMIN_ONLY: |
332 | changed = true; |
333 | /* FALLTHRU */ |
334 | default: |
335 | break; |
336 | } |
337 | break; |
338 | case NVME_CTRL_CONNECTING: |
339 | switch (old_state) { |
340 | case NVME_CTRL_NEW: |
341 | case NVME_CTRL_RESETTING: |
342 | changed = true; |
343 | /* FALLTHRU */ |
344 | default: |
345 | break; |
346 | } |
347 | break; |
348 | case NVME_CTRL_DELETING: |
349 | switch (old_state) { |
350 | case NVME_CTRL_LIVE: |
351 | case NVME_CTRL_ADMIN_ONLY: |
352 | case NVME_CTRL_RESETTING: |
353 | case NVME_CTRL_CONNECTING: |
354 | changed = true; |
355 | /* FALLTHRU */ |
356 | default: |
357 | break; |
358 | } |
359 | break; |
360 | case NVME_CTRL_DEAD: |
361 | switch (old_state) { |
362 | case NVME_CTRL_DELETING: |
363 | changed = true; |
364 | /* FALLTHRU */ |
365 | default: |
366 | break; |
367 | } |
368 | break; |
369 | default: |
370 | break; |
371 | } |
372 | |
373 | if (changed) |
374 | ctrl->state = new_state; |
375 | |
376 | spin_unlock_irqrestore(&ctrl->lock, flags); |
377 | if (changed && ctrl->state == NVME_CTRL_LIVE) |
378 | nvme_kick_requeue_lists(ctrl); |
379 | return changed; |
380 | } |
381 | EXPORT_SYMBOL_GPL(nvme_change_ctrl_state); |
382 | |
383 | static void nvme_free_ns_head(struct kref *ref) |
384 | { |
385 | struct nvme_ns_head *head = |
386 | container_of(ref, struct nvme_ns_head, ref); |
387 | |
388 | nvme_mpath_remove_disk(head); |
389 | ida_simple_remove(&head->subsys->ns_ida, head->instance); |
390 | list_del_init(&head->entry); |
391 | cleanup_srcu_struct_quiesced(&head->srcu); |
392 | nvme_put_subsystem(head->subsys); |
393 | kfree(head); |
394 | } |
395 | |
396 | static void nvme_put_ns_head(struct nvme_ns_head *head) |
397 | { |
398 | kref_put(&head->ref, nvme_free_ns_head); |
399 | } |
400 | |
401 | static void nvme_free_ns(struct kref *kref) |
402 | { |
403 | struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); |
404 | |
405 | if (ns->ndev) |
406 | nvme_nvm_unregister(ns); |
407 | |
408 | put_disk(ns->disk); |
409 | nvme_put_ns_head(ns->head); |
410 | nvme_put_ctrl(ns->ctrl); |
411 | kfree(ns); |
412 | } |
413 | |
414 | static void nvme_put_ns(struct nvme_ns *ns) |
415 | { |
416 | kref_put(&ns->kref, nvme_free_ns); |
417 | } |
418 | |
419 | static inline void nvme_clear_nvme_request(struct request *req) |
420 | { |
421 | if (!(req->rq_flags & RQF_DONTPREP)) { |
422 | nvme_req(req)->retries = 0; |
423 | nvme_req(req)->flags = 0; |
424 | req->rq_flags |= RQF_DONTPREP; |
425 | } |
426 | } |
427 | |
428 | struct request *nvme_alloc_request(struct request_queue *q, |
429 | struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) |
430 | { |
431 | unsigned op = nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN; |
432 | struct request *req; |
433 | |
434 | if (qid == NVME_QID_ANY) { |
435 | req = blk_mq_alloc_request(q, op, flags); |
436 | } else { |
437 | req = blk_mq_alloc_request_hctx(q, op, flags, |
438 | qid ? qid - 1 : 0); |
439 | } |
440 | if (IS_ERR(req)) |
441 | return req; |
442 | |
443 | req->cmd_flags |= REQ_FAILFAST_DRIVER; |
444 | nvme_clear_nvme_request(req); |
445 | nvme_req(req)->cmd = cmd; |
446 | |
447 | return req; |
448 | } |
449 | EXPORT_SYMBOL_GPL(nvme_alloc_request); |
450 | |
451 | static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable) |
452 | { |
453 | struct nvme_command c; |
454 | |
455 | memset(&c, 0, sizeof(c)); |
456 | |
457 | c.directive.opcode = nvme_admin_directive_send; |
458 | c.directive.nsid = cpu_to_le32(NVME_NSID_ALL); |
459 | c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE; |
460 | c.directive.dtype = NVME_DIR_IDENTIFY; |
461 | c.directive.tdtype = NVME_DIR_STREAMS; |
462 | c.directive.endir = enable ? NVME_DIR_ENDIR : 0; |
463 | |
464 | return nvme_submit_sync_cmd(ctrl->admin_q, &c, NULL, 0); |
465 | } |
466 | |
467 | static int nvme_disable_streams(struct nvme_ctrl *ctrl) |
468 | { |
469 | return nvme_toggle_streams(ctrl, false); |
470 | } |
471 | |
472 | static int nvme_enable_streams(struct nvme_ctrl *ctrl) |
473 | { |
474 | return nvme_toggle_streams(ctrl, true); |
475 | } |
476 | |
477 | static int nvme_get_stream_params(struct nvme_ctrl *ctrl, |
478 | struct streams_directive_params *s, u32 nsid) |
479 | { |
480 | struct nvme_command c; |
481 | |
482 | memset(&c, 0, sizeof(c)); |
483 | memset(s, 0, sizeof(*s)); |
484 | |
485 | c.directive.opcode = nvme_admin_directive_recv; |
486 | c.directive.nsid = cpu_to_le32(nsid); |
487 | c.directive.numd = cpu_to_le32((sizeof(*s) >> 2) - 1); |
488 | c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM; |
489 | c.directive.dtype = NVME_DIR_STREAMS; |
490 | |
491 | return nvme_submit_sync_cmd(ctrl->admin_q, &c, s, sizeof(*s)); |
492 | } |
493 | |
494 | static int nvme_configure_directives(struct nvme_ctrl *ctrl) |
495 | { |
496 | struct streams_directive_params s; |
497 | int ret; |
498 | |
499 | if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES)) |
500 | return 0; |
501 | if (!streams) |
502 | return 0; |
503 | |
504 | ret = nvme_enable_streams(ctrl); |
505 | if (ret) |
506 | return ret; |
507 | |
508 | ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL); |
509 | if (ret) |
510 | return ret; |
511 | |
512 | ctrl->nssa = le16_to_cpu(s.nssa); |
513 | if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) { |
514 | dev_info(ctrl->device, "too few streams (%u) available\n" , |
515 | ctrl->nssa); |
516 | nvme_disable_streams(ctrl); |
517 | return 0; |
518 | } |
519 | |
520 | ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1); |
521 | dev_info(ctrl->device, "Using %u streams\n" , ctrl->nr_streams); |
522 | return 0; |
523 | } |
524 | |
525 | /* |
526 | * Check if 'req' has a write hint associated with it. If it does, assign |
527 | * a valid namespace stream to the write. |
528 | */ |
529 | static void nvme_assign_write_stream(struct nvme_ctrl *ctrl, |
530 | struct request *req, u16 *control, |
531 | u32 *dsmgmt) |
532 | { |
533 | enum rw_hint streamid = req->write_hint; |
534 | |
535 | if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE) |
536 | streamid = 0; |
537 | else { |
538 | streamid--; |
539 | if (WARN_ON_ONCE(streamid > ctrl->nr_streams)) |
540 | return; |
541 | |
542 | *control |= NVME_RW_DTYPE_STREAMS; |
543 | *dsmgmt |= streamid << 16; |
544 | } |
545 | |
546 | if (streamid < ARRAY_SIZE(req->q->write_hints)) |
547 | req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9; |
548 | } |
549 | |
550 | static inline void nvme_setup_flush(struct nvme_ns *ns, |
551 | struct nvme_command *cmnd) |
552 | { |
553 | cmnd->common.opcode = nvme_cmd_flush; |
554 | cmnd->common.nsid = cpu_to_le32(ns->head->ns_id); |
555 | } |
556 | |
557 | static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, |
558 | struct nvme_command *cmnd) |
559 | { |
560 | unsigned short segments = blk_rq_nr_discard_segments(req), n = 0; |
561 | struct nvme_dsm_range *range; |
562 | struct bio *bio; |
563 | |
564 | range = kmalloc_array(segments, sizeof(*range), |
565 | GFP_ATOMIC | __GFP_NOWARN); |
566 | if (!range) { |
567 | /* |
568 | * If we fail allocation our range, fallback to the controller |
569 | * discard page. If that's also busy, it's safe to return |
570 | * busy, as we know we can make progress once that's freed. |
571 | */ |
572 | if (test_and_set_bit_lock(0, &ns->ctrl->discard_page_busy)) |
573 | return BLK_STS_RESOURCE; |
574 | |
575 | range = page_address(ns->ctrl->discard_page); |
576 | } |
577 | |
578 | __rq_for_each_bio(bio, req) { |
579 | u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector); |
580 | u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift; |
581 | |
582 | if (n < segments) { |
583 | range[n].cattr = cpu_to_le32(0); |
584 | range[n].nlb = cpu_to_le32(nlb); |
585 | range[n].slba = cpu_to_le64(slba); |
586 | } |
587 | n++; |
588 | } |
589 | |
590 | if (WARN_ON_ONCE(n != segments)) { |
591 | if (virt_to_page(range) == ns->ctrl->discard_page) |
592 | clear_bit_unlock(0, &ns->ctrl->discard_page_busy); |
593 | else |
594 | kfree(range); |
595 | return BLK_STS_IOERR; |
596 | } |
597 | |
598 | cmnd->dsm.opcode = nvme_cmd_dsm; |
599 | cmnd->dsm.nsid = cpu_to_le32(ns->head->ns_id); |
600 | cmnd->dsm.nr = cpu_to_le32(segments - 1); |
601 | cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); |
602 | |
603 | req->special_vec.bv_page = virt_to_page(range); |
604 | req->special_vec.bv_offset = offset_in_page(range); |
605 | req->special_vec.bv_len = sizeof(*range) * segments; |
606 | req->rq_flags |= RQF_SPECIAL_PAYLOAD; |
607 | |
608 | return BLK_STS_OK; |
609 | } |
610 | |
611 | static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, |
612 | struct request *req, struct nvme_command *cmnd) |
613 | { |
614 | if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) |
615 | return nvme_setup_discard(ns, req, cmnd); |
616 | |
617 | cmnd->write_zeroes.opcode = nvme_cmd_write_zeroes; |
618 | cmnd->write_zeroes.nsid = cpu_to_le32(ns->head->ns_id); |
619 | cmnd->write_zeroes.slba = |
620 | cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); |
621 | cmnd->write_zeroes.length = |
622 | cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); |
623 | cmnd->write_zeroes.control = 0; |
624 | return BLK_STS_OK; |
625 | } |
626 | |
627 | static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, |
628 | struct request *req, struct nvme_command *cmnd) |
629 | { |
630 | struct nvme_ctrl *ctrl = ns->ctrl; |
631 | u16 control = 0; |
632 | u32 dsmgmt = 0; |
633 | |
634 | if (req->cmd_flags & REQ_FUA) |
635 | control |= NVME_RW_FUA; |
636 | if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD)) |
637 | control |= NVME_RW_LR; |
638 | |
639 | if (req->cmd_flags & REQ_RAHEAD) |
640 | dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; |
641 | |
642 | cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); |
643 | cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id); |
644 | cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); |
645 | cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); |
646 | |
647 | if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams) |
648 | nvme_assign_write_stream(ctrl, req, &control, &dsmgmt); |
649 | |
650 | if (ns->ms) { |
651 | /* |
652 | * If formated with metadata, the block layer always provides a |
653 | * metadata buffer if CONFIG_BLK_DEV_INTEGRITY is enabled. Else |
654 | * we enable the PRACT bit for protection information or set the |
655 | * namespace capacity to zero to prevent any I/O. |
656 | */ |
657 | if (!blk_integrity_rq(req)) { |
658 | if (WARN_ON_ONCE(!nvme_ns_has_pi(ns))) |
659 | return BLK_STS_NOTSUPP; |
660 | control |= NVME_RW_PRINFO_PRACT; |
661 | } else if (req_op(req) == REQ_OP_WRITE) { |
662 | t10_pi_prepare(req, ns->pi_type); |
663 | } |
664 | |
665 | switch (ns->pi_type) { |
666 | case NVME_NS_DPS_PI_TYPE3: |
667 | control |= NVME_RW_PRINFO_PRCHK_GUARD; |
668 | break; |
669 | case NVME_NS_DPS_PI_TYPE1: |
670 | case NVME_NS_DPS_PI_TYPE2: |
671 | control |= NVME_RW_PRINFO_PRCHK_GUARD | |
672 | NVME_RW_PRINFO_PRCHK_REF; |
673 | cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req)); |
674 | break; |
675 | } |
676 | } |
677 | |
678 | cmnd->rw.control = cpu_to_le16(control); |
679 | cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); |
680 | return 0; |
681 | } |
682 | |
683 | void nvme_cleanup_cmd(struct request *req) |
684 | { |
685 | if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ && |
686 | nvme_req(req)->status == 0) { |
687 | struct nvme_ns *ns = req->rq_disk->private_data; |
688 | |
689 | t10_pi_complete(req, ns->pi_type, |
690 | blk_rq_bytes(req) >> ns->lba_shift); |
691 | } |
692 | if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { |
693 | struct nvme_ns *ns = req->rq_disk->private_data; |
694 | struct page *page = req->special_vec.bv_page; |
695 | |
696 | if (page == ns->ctrl->discard_page) |
697 | clear_bit_unlock(0, &ns->ctrl->discard_page_busy); |
698 | else |
699 | kfree(page_address(page) + req->special_vec.bv_offset); |
700 | } |
701 | } |
702 | EXPORT_SYMBOL_GPL(nvme_cleanup_cmd); |
703 | |
704 | blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, |
705 | struct nvme_command *cmd) |
706 | { |
707 | blk_status_t ret = BLK_STS_OK; |
708 | |
709 | nvme_clear_nvme_request(req); |
710 | |
711 | memset(cmd, 0, sizeof(*cmd)); |
712 | switch (req_op(req)) { |
713 | case REQ_OP_DRV_IN: |
714 | case REQ_OP_DRV_OUT: |
715 | memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd)); |
716 | break; |
717 | case REQ_OP_FLUSH: |
718 | nvme_setup_flush(ns, cmd); |
719 | break; |
720 | case REQ_OP_WRITE_ZEROES: |
721 | ret = nvme_setup_write_zeroes(ns, req, cmd); |
722 | break; |
723 | case REQ_OP_DISCARD: |
724 | ret = nvme_setup_discard(ns, req, cmd); |
725 | break; |
726 | case REQ_OP_READ: |
727 | case REQ_OP_WRITE: |
728 | ret = nvme_setup_rw(ns, req, cmd); |
729 | break; |
730 | default: |
731 | WARN_ON_ONCE(1); |
732 | return BLK_STS_IOERR; |
733 | } |
734 | |
735 | cmd->common.command_id = req->tag; |
736 | trace_nvme_setup_cmd(req, cmd); |
737 | return ret; |
738 | } |
739 | EXPORT_SYMBOL_GPL(nvme_setup_cmd); |
740 | |
741 | static void nvme_end_sync_rq(struct request *rq, blk_status_t error) |
742 | { |
743 | struct completion *waiting = rq->end_io_data; |
744 | |
745 | rq->end_io_data = NULL; |
746 | complete(waiting); |
747 | } |
748 | |
749 | static void nvme_execute_rq_polled(struct request_queue *q, |
750 | struct gendisk *bd_disk, struct request *rq, int at_head) |
751 | { |
752 | DECLARE_COMPLETION_ONSTACK(wait); |
753 | |
754 | WARN_ON_ONCE(!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)); |
755 | |
756 | rq->cmd_flags |= REQ_HIPRI; |
757 | rq->end_io_data = &wait; |
758 | blk_execute_rq_nowait(q, bd_disk, rq, at_head, nvme_end_sync_rq); |
759 | |
760 | while (!completion_done(&wait)) { |
761 | blk_poll(q, request_to_qc_t(rq->mq_hctx, rq), true); |
762 | cond_resched(); |
763 | } |
764 | } |
765 | |
766 | /* |
767 | * Returns 0 on success. If the result is negative, it's a Linux error code; |
768 | * if the result is positive, it's an NVM Express status code |
769 | */ |
770 | int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, |
771 | union nvme_result *result, void *buffer, unsigned bufflen, |
772 | unsigned timeout, int qid, int at_head, |
773 | blk_mq_req_flags_t flags, bool poll) |
774 | { |
775 | struct request *req; |
776 | int ret; |
777 | |
778 | req = nvme_alloc_request(q, cmd, flags, qid); |
779 | if (IS_ERR(req)) |
780 | return PTR_ERR(req); |
781 | |
782 | req->timeout = timeout ? timeout : ADMIN_TIMEOUT; |
783 | |
784 | if (buffer && bufflen) { |
785 | ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL); |
786 | if (ret) |
787 | goto out; |
788 | } |
789 | |
790 | if (poll) |
791 | nvme_execute_rq_polled(req->q, NULL, req, at_head); |
792 | else |
793 | blk_execute_rq(req->q, NULL, req, at_head); |
794 | if (result) |
795 | *result = nvme_req(req)->result; |
796 | if (nvme_req(req)->flags & NVME_REQ_CANCELLED) |
797 | ret = -EINTR; |
798 | else |
799 | ret = nvme_req(req)->status; |
800 | out: |
801 | blk_mq_free_request(req); |
802 | return ret; |
803 | } |
804 | EXPORT_SYMBOL_GPL(__nvme_submit_sync_cmd); |
805 | |
806 | int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, |
807 | void *buffer, unsigned bufflen) |
808 | { |
809 | return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, 0, |
810 | NVME_QID_ANY, 0, 0, false); |
811 | } |
812 | EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd); |
813 | |
814 | static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf, |
815 | unsigned len, u32 seed, bool write) |
816 | { |
817 | struct bio_integrity_payload *bip; |
818 | int ret = -ENOMEM; |
819 | void *buf; |
820 | |
821 | buf = kmalloc(len, GFP_KERNEL); |
822 | if (!buf) |
823 | goto out; |
824 | |
825 | ret = -EFAULT; |
826 | if (write && copy_from_user(buf, ubuf, len)) |
827 | goto out_free_meta; |
828 | |
829 | bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); |
830 | if (IS_ERR(bip)) { |
831 | ret = PTR_ERR(bip); |
832 | goto out_free_meta; |
833 | } |
834 | |
835 | bip->bip_iter.bi_size = len; |
836 | bip->bip_iter.bi_sector = seed; |
837 | ret = bio_integrity_add_page(bio, virt_to_page(buf), len, |
838 | offset_in_page(buf)); |
839 | if (ret == len) |
840 | return buf; |
841 | ret = -ENOMEM; |
842 | out_free_meta: |
843 | kfree(buf); |
844 | out: |
845 | return ERR_PTR(ret); |
846 | } |
847 | |
848 | static int nvme_submit_user_cmd(struct request_queue *q, |
849 | struct nvme_command *cmd, void __user *ubuffer, |
850 | unsigned bufflen, void __user *meta_buffer, unsigned meta_len, |
851 | u32 meta_seed, u32 *result, unsigned timeout) |
852 | { |
853 | bool write = nvme_is_write(cmd); |
854 | struct nvme_ns *ns = q->queuedata; |
855 | struct gendisk *disk = ns ? ns->disk : NULL; |
856 | struct request *req; |
857 | struct bio *bio = NULL; |
858 | void *meta = NULL; |
859 | int ret; |
860 | |
861 | req = nvme_alloc_request(q, cmd, 0, NVME_QID_ANY); |
862 | if (IS_ERR(req)) |
863 | return PTR_ERR(req); |
864 | |
865 | req->timeout = timeout ? timeout : ADMIN_TIMEOUT; |
866 | nvme_req(req)->flags |= NVME_REQ_USERCMD; |
867 | |
868 | if (ubuffer && bufflen) { |
869 | ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, |
870 | GFP_KERNEL); |
871 | if (ret) |
872 | goto out; |
873 | bio = req->bio; |
874 | bio->bi_disk = disk; |
875 | if (disk && meta_buffer && meta_len) { |
876 | meta = nvme_add_user_metadata(bio, meta_buffer, meta_len, |
877 | meta_seed, write); |
878 | if (IS_ERR(meta)) { |
879 | ret = PTR_ERR(meta); |
880 | goto out_unmap; |
881 | } |
882 | req->cmd_flags |= REQ_INTEGRITY; |
883 | } |
884 | } |
885 | |
886 | blk_execute_rq(req->q, disk, req, 0); |
887 | if (nvme_req(req)->flags & NVME_REQ_CANCELLED) |
888 | ret = -EINTR; |
889 | else |
890 | ret = nvme_req(req)->status; |
891 | if (result) |
892 | *result = le32_to_cpu(nvme_req(req)->result.u32); |
893 | if (meta && !ret && !write) { |
894 | if (copy_to_user(meta_buffer, meta, meta_len)) |
895 | ret = -EFAULT; |
896 | } |
897 | kfree(meta); |
898 | out_unmap: |
899 | if (bio) |
900 | blk_rq_unmap_user(bio); |
901 | out: |
902 | blk_mq_free_request(req); |
903 | return ret; |
904 | } |
905 | |
906 | static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) |
907 | { |
908 | struct nvme_ctrl *ctrl = rq->end_io_data; |
909 | unsigned long flags; |
910 | bool startka = false; |
911 | |
912 | blk_mq_free_request(rq); |
913 | |
914 | if (status) { |
915 | dev_err(ctrl->device, |
916 | "failed nvme_keep_alive_end_io error=%d\n" , |
917 | status); |
918 | return; |
919 | } |
920 | |
921 | ctrl->comp_seen = false; |
922 | spin_lock_irqsave(&ctrl->lock, flags); |
923 | if (ctrl->state == NVME_CTRL_LIVE || |
924 | ctrl->state == NVME_CTRL_CONNECTING) |
925 | startka = true; |
926 | spin_unlock_irqrestore(&ctrl->lock, flags); |
927 | if (startka) |
928 | schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); |
929 | } |
930 | |
931 | static int nvme_keep_alive(struct nvme_ctrl *ctrl) |
932 | { |
933 | struct request *rq; |
934 | |
935 | rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, BLK_MQ_REQ_RESERVED, |
936 | NVME_QID_ANY); |
937 | if (IS_ERR(rq)) |
938 | return PTR_ERR(rq); |
939 | |
940 | rq->timeout = ctrl->kato * HZ; |
941 | rq->end_io_data = ctrl; |
942 | |
943 | blk_execute_rq_nowait(rq->q, NULL, rq, 0, nvme_keep_alive_end_io); |
944 | |
945 | return 0; |
946 | } |
947 | |
948 | static void nvme_keep_alive_work(struct work_struct *work) |
949 | { |
950 | struct nvme_ctrl *ctrl = container_of(to_delayed_work(work), |
951 | struct nvme_ctrl, ka_work); |
952 | bool comp_seen = ctrl->comp_seen; |
953 | |
954 | if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) { |
955 | dev_dbg(ctrl->device, |
956 | "reschedule traffic based keep-alive timer\n" ); |
957 | ctrl->comp_seen = false; |
958 | schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); |
959 | return; |
960 | } |
961 | |
962 | if (nvme_keep_alive(ctrl)) { |
963 | /* allocation failure, reset the controller */ |
964 | dev_err(ctrl->device, "keep-alive failed\n" ); |
965 | nvme_reset_ctrl(ctrl); |
966 | return; |
967 | } |
968 | } |
969 | |
970 | static void nvme_start_keep_alive(struct nvme_ctrl *ctrl) |
971 | { |
972 | if (unlikely(ctrl->kato == 0)) |
973 | return; |
974 | |
975 | schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); |
976 | } |
977 | |
978 | void nvme_stop_keep_alive(struct nvme_ctrl *ctrl) |
979 | { |
980 | if (unlikely(ctrl->kato == 0)) |
981 | return; |
982 | |
983 | cancel_delayed_work_sync(&ctrl->ka_work); |
984 | } |
985 | EXPORT_SYMBOL_GPL(nvme_stop_keep_alive); |
986 | |
987 | static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) |
988 | { |
989 | struct nvme_command c = { }; |
990 | int error; |
991 | |
992 | /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ |
993 | c.identify.opcode = nvme_admin_identify; |
994 | c.identify.cns = NVME_ID_CNS_CTRL; |
995 | |
996 | *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); |
997 | if (!*id) |
998 | return -ENOMEM; |
999 | |
1000 | error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, |
1001 | sizeof(struct nvme_id_ctrl)); |
1002 | if (error) |
1003 | kfree(*id); |
1004 | return error; |
1005 | } |
1006 | |
1007 | static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, |
1008 | struct nvme_ns_ids *ids) |
1009 | { |
1010 | struct nvme_command c = { }; |
1011 | int status; |
1012 | void *data; |
1013 | int pos; |
1014 | int len; |
1015 | |
1016 | c.identify.opcode = nvme_admin_identify; |
1017 | c.identify.nsid = cpu_to_le32(nsid); |
1018 | c.identify.cns = NVME_ID_CNS_NS_DESC_LIST; |
1019 | |
1020 | data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL); |
1021 | if (!data) |
1022 | return -ENOMEM; |
1023 | |
1024 | status = nvme_submit_sync_cmd(ctrl->admin_q, &c, data, |
1025 | NVME_IDENTIFY_DATA_SIZE); |
1026 | if (status) |
1027 | goto free_data; |
1028 | |
1029 | for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) { |
1030 | struct nvme_ns_id_desc *cur = data + pos; |
1031 | |
1032 | if (cur->nidl == 0) |
1033 | break; |
1034 | |
1035 | switch (cur->nidt) { |
1036 | case NVME_NIDT_EUI64: |
1037 | if (cur->nidl != NVME_NIDT_EUI64_LEN) { |
1038 | dev_warn(ctrl->device, |
1039 | "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n" , |
1040 | cur->nidl); |
1041 | goto free_data; |
1042 | } |
1043 | len = NVME_NIDT_EUI64_LEN; |
1044 | memcpy(ids->eui64, data + pos + sizeof(*cur), len); |
1045 | break; |
1046 | case NVME_NIDT_NGUID: |
1047 | if (cur->nidl != NVME_NIDT_NGUID_LEN) { |
1048 | dev_warn(ctrl->device, |
1049 | "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n" , |
1050 | cur->nidl); |
1051 | goto free_data; |
1052 | } |
1053 | len = NVME_NIDT_NGUID_LEN; |
1054 | memcpy(ids->nguid, data + pos + sizeof(*cur), len); |
1055 | break; |
1056 | case NVME_NIDT_UUID: |
1057 | if (cur->nidl != NVME_NIDT_UUID_LEN) { |
1058 | dev_warn(ctrl->device, |
1059 | "ctrl returned bogus length: %d for NVME_NIDT_UUID\n" , |
1060 | cur->nidl); |
1061 | goto free_data; |
1062 | } |
1063 | len = NVME_NIDT_UUID_LEN; |
1064 | uuid_copy(&ids->uuid, data + pos + sizeof(*cur)); |
1065 | break; |
1066 | default: |
1067 | /* Skip unknown types */ |
1068 | len = cur->nidl; |
1069 | break; |
1070 | } |
1071 | |
1072 | len += sizeof(*cur); |
1073 | } |
1074 | free_data: |
1075 | kfree(data); |
1076 | return status; |
1077 | } |
1078 | |
1079 | static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list) |
1080 | { |
1081 | struct nvme_command c = { }; |
1082 | |
1083 | c.identify.opcode = nvme_admin_identify; |
1084 | c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST; |
1085 | c.identify.nsid = cpu_to_le32(nsid); |
1086 | return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, |
1087 | NVME_IDENTIFY_DATA_SIZE); |
1088 | } |
1089 | |
1090 | static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, |
1091 | unsigned nsid) |
1092 | { |
1093 | struct nvme_id_ns *id; |
1094 | struct nvme_command c = { }; |
1095 | int error; |
1096 | |
1097 | /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ |
1098 | c.identify.opcode = nvme_admin_identify; |
1099 | c.identify.nsid = cpu_to_le32(nsid); |
1100 | c.identify.cns = NVME_ID_CNS_NS; |
1101 | |
1102 | id = kmalloc(sizeof(*id), GFP_KERNEL); |
1103 | if (!id) |
1104 | return NULL; |
1105 | |
1106 | error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id)); |
1107 | if (error) { |
1108 | dev_warn(ctrl->device, "Identify namespace failed\n" ); |
1109 | kfree(id); |
1110 | return NULL; |
1111 | } |
1112 | |
1113 | return id; |
1114 | } |
1115 | |
1116 | static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, |
1117 | void *buffer, size_t buflen, u32 *result) |
1118 | { |
1119 | struct nvme_command c; |
1120 | union nvme_result res; |
1121 | int ret; |
1122 | |
1123 | memset(&c, 0, sizeof(c)); |
1124 | c.features.opcode = nvme_admin_set_features; |
1125 | c.features.fid = cpu_to_le32(fid); |
1126 | c.features.dword11 = cpu_to_le32(dword11); |
1127 | |
1128 | ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, |
1129 | buffer, buflen, 0, NVME_QID_ANY, 0, 0, false); |
1130 | if (ret >= 0 && result) |
1131 | *result = le32_to_cpu(res.u32); |
1132 | return ret; |
1133 | } |
1134 | |
1135 | int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) |
1136 | { |
1137 | u32 q_count = (*count - 1) | ((*count - 1) << 16); |
1138 | u32 result; |
1139 | int status, nr_io_queues; |
1140 | |
1141 | status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, NULL, 0, |
1142 | &result); |
1143 | if (status < 0) |
1144 | return status; |
1145 | |
1146 | /* |
1147 | * Degraded controllers might return an error when setting the queue |
1148 | * count. We still want to be able to bring them online and offer |
1149 | * access to the admin queue, as that might be only way to fix them up. |
1150 | */ |
1151 | if (status > 0) { |
1152 | dev_err(ctrl->device, "Could not set queue count (%d)\n" , status); |
1153 | *count = 0; |
1154 | } else { |
1155 | nr_io_queues = min(result & 0xffff, result >> 16) + 1; |
1156 | *count = min(*count, nr_io_queues); |
1157 | } |
1158 | |
1159 | return 0; |
1160 | } |
1161 | EXPORT_SYMBOL_GPL(nvme_set_queue_count); |
1162 | |
1163 | #define NVME_AEN_SUPPORTED \ |
1164 | (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | NVME_AEN_CFG_ANA_CHANGE) |
1165 | |
1166 | static void nvme_enable_aen(struct nvme_ctrl *ctrl) |
1167 | { |
1168 | u32 result, supported_aens = ctrl->oaes & NVME_AEN_SUPPORTED; |
1169 | int status; |
1170 | |
1171 | if (!supported_aens) |
1172 | return; |
1173 | |
1174 | status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT, supported_aens, |
1175 | NULL, 0, &result); |
1176 | if (status) |
1177 | dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n" , |
1178 | supported_aens); |
1179 | } |
1180 | |
1181 | static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) |
1182 | { |
1183 | struct nvme_user_io io; |
1184 | struct nvme_command c; |
1185 | unsigned length, meta_len; |
1186 | void __user *metadata; |
1187 | |
1188 | if (copy_from_user(&io, uio, sizeof(io))) |
1189 | return -EFAULT; |
1190 | if (io.flags) |
1191 | return -EINVAL; |
1192 | |
1193 | switch (io.opcode) { |
1194 | case nvme_cmd_write: |
1195 | case nvme_cmd_read: |
1196 | case nvme_cmd_compare: |
1197 | break; |
1198 | default: |
1199 | return -EINVAL; |
1200 | } |
1201 | |
1202 | length = (io.nblocks + 1) << ns->lba_shift; |
1203 | meta_len = (io.nblocks + 1) * ns->ms; |
1204 | metadata = (void __user *)(uintptr_t)io.metadata; |
1205 | |
1206 | if (ns->ext) { |
1207 | length += meta_len; |
1208 | meta_len = 0; |
1209 | } else if (meta_len) { |
1210 | if ((io.metadata & 3) || !io.metadata) |
1211 | return -EINVAL; |
1212 | } |
1213 | |
1214 | memset(&c, 0, sizeof(c)); |
1215 | c.rw.opcode = io.opcode; |
1216 | c.rw.flags = io.flags; |
1217 | c.rw.nsid = cpu_to_le32(ns->head->ns_id); |
1218 | c.rw.slba = cpu_to_le64(io.slba); |
1219 | c.rw.length = cpu_to_le16(io.nblocks); |
1220 | c.rw.control = cpu_to_le16(io.control); |
1221 | c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); |
1222 | c.rw.reftag = cpu_to_le32(io.reftag); |
1223 | c.rw.apptag = cpu_to_le16(io.apptag); |
1224 | c.rw.appmask = cpu_to_le16(io.appmask); |
1225 | |
1226 | return nvme_submit_user_cmd(ns->queue, &c, |
1227 | (void __user *)(uintptr_t)io.addr, length, |
1228 | metadata, meta_len, lower_32_bits(io.slba), NULL, 0); |
1229 | } |
1230 | |
1231 | static u32 nvme_known_admin_effects(u8 opcode) |
1232 | { |
1233 | switch (opcode) { |
1234 | case nvme_admin_format_nvm: |
1235 | return NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | |
1236 | NVME_CMD_EFFECTS_CSE_MASK; |
1237 | case nvme_admin_sanitize_nvm: |
1238 | return NVME_CMD_EFFECTS_CSE_MASK; |
1239 | default: |
1240 | break; |
1241 | } |
1242 | return 0; |
1243 | } |
1244 | |
1245 | static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, |
1246 | u8 opcode) |
1247 | { |
1248 | u32 effects = 0; |
1249 | |
1250 | if (ns) { |
1251 | if (ctrl->effects) |
1252 | effects = le32_to_cpu(ctrl->effects->iocs[opcode]); |
1253 | if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC)) |
1254 | dev_warn(ctrl->device, |
1255 | "IO command:%02x has unhandled effects:%08x\n" , |
1256 | opcode, effects); |
1257 | return 0; |
1258 | } |
1259 | |
1260 | if (ctrl->effects) |
1261 | effects = le32_to_cpu(ctrl->effects->acs[opcode]); |
1262 | else |
1263 | effects = nvme_known_admin_effects(opcode); |
1264 | |
1265 | /* |
1266 | * For simplicity, IO to all namespaces is quiesced even if the command |
1267 | * effects say only one namespace is affected. |
1268 | */ |
1269 | if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { |
1270 | mutex_lock(&ctrl->scan_lock); |
1271 | nvme_start_freeze(ctrl); |
1272 | nvme_wait_freeze(ctrl); |
1273 | } |
1274 | return effects; |
1275 | } |
1276 | |
1277 | static void nvme_update_formats(struct nvme_ctrl *ctrl) |
1278 | { |
1279 | struct nvme_ns *ns; |
1280 | |
1281 | down_read(&ctrl->namespaces_rwsem); |
1282 | list_for_each_entry(ns, &ctrl->namespaces, list) |
1283 | if (ns->disk && nvme_revalidate_disk(ns->disk)) |
1284 | nvme_set_queue_dying(ns); |
1285 | up_read(&ctrl->namespaces_rwsem); |
1286 | |
1287 | nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL); |
1288 | } |
1289 | |
1290 | static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) |
1291 | { |
1292 | /* |
1293 | * Revalidate LBA changes prior to unfreezing. This is necessary to |
1294 | * prevent memory corruption if a logical block size was changed by |
1295 | * this command. |
1296 | */ |
1297 | if (effects & NVME_CMD_EFFECTS_LBCC) |
1298 | nvme_update_formats(ctrl); |
1299 | if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { |
1300 | nvme_unfreeze(ctrl); |
1301 | mutex_unlock(&ctrl->scan_lock); |
1302 | } |
1303 | if (effects & NVME_CMD_EFFECTS_CCC) |
1304 | nvme_init_identify(ctrl); |
1305 | if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) |
1306 | nvme_queue_scan(ctrl); |
1307 | } |
1308 | |
1309 | static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, |
1310 | struct nvme_passthru_cmd __user *ucmd) |
1311 | { |
1312 | struct nvme_passthru_cmd cmd; |
1313 | struct nvme_command c; |
1314 | unsigned timeout = 0; |
1315 | u32 effects; |
1316 | int status; |
1317 | |
1318 | if (!capable(CAP_SYS_ADMIN)) |
1319 | return -EACCES; |
1320 | if (copy_from_user(&cmd, ucmd, sizeof(cmd))) |
1321 | return -EFAULT; |
1322 | if (cmd.flags) |
1323 | return -EINVAL; |
1324 | |
1325 | memset(&c, 0, sizeof(c)); |
1326 | c.common.opcode = cmd.opcode; |
1327 | c.common.flags = cmd.flags; |
1328 | c.common.nsid = cpu_to_le32(cmd.nsid); |
1329 | c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); |
1330 | c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); |
1331 | c.common.cdw10 = cpu_to_le32(cmd.cdw10); |
1332 | c.common.cdw11 = cpu_to_le32(cmd.cdw11); |
1333 | c.common.cdw12 = cpu_to_le32(cmd.cdw12); |
1334 | c.common.cdw13 = cpu_to_le32(cmd.cdw13); |
1335 | c.common.cdw14 = cpu_to_le32(cmd.cdw14); |
1336 | c.common.cdw15 = cpu_to_le32(cmd.cdw15); |
1337 | |
1338 | if (cmd.timeout_ms) |
1339 | timeout = msecs_to_jiffies(cmd.timeout_ms); |
1340 | |
1341 | effects = nvme_passthru_start(ctrl, ns, cmd.opcode); |
1342 | status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, |
1343 | (void __user *)(uintptr_t)cmd.addr, cmd.data_len, |
1344 | (void __user *)(uintptr_t)cmd.metadata, cmd.metadata_len, |
1345 | 0, &cmd.result, timeout); |
1346 | nvme_passthru_end(ctrl, effects); |
1347 | |
1348 | if (status >= 0) { |
1349 | if (put_user(cmd.result, &ucmd->result)) |
1350 | return -EFAULT; |
1351 | } |
1352 | |
1353 | return status; |
1354 | } |
1355 | |
1356 | /* |
1357 | * Issue ioctl requests on the first available path. Note that unlike normal |
1358 | * block layer requests we will not retry failed request on another controller. |
1359 | */ |
1360 | static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk, |
1361 | struct nvme_ns_head **head, int *srcu_idx) |
1362 | { |
1363 | #ifdef CONFIG_NVME_MULTIPATH |
1364 | if (disk->fops == &nvme_ns_head_ops) { |
1365 | *head = disk->private_data; |
1366 | *srcu_idx = srcu_read_lock(&(*head)->srcu); |
1367 | return nvme_find_path(*head); |
1368 | } |
1369 | #endif |
1370 | *head = NULL; |
1371 | *srcu_idx = -1; |
1372 | return disk->private_data; |
1373 | } |
1374 | |
1375 | static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx) |
1376 | { |
1377 | if (head) |
1378 | srcu_read_unlock(&head->srcu, idx); |
1379 | } |
1380 | |
1381 | static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned cmd, unsigned long arg) |
1382 | { |
1383 | switch (cmd) { |
1384 | case NVME_IOCTL_ID: |
1385 | force_successful_syscall_return(); |
1386 | return ns->head->ns_id; |
1387 | case NVME_IOCTL_ADMIN_CMD: |
1388 | return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg); |
1389 | case NVME_IOCTL_IO_CMD: |
1390 | return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg); |
1391 | case NVME_IOCTL_SUBMIT_IO: |
1392 | return nvme_submit_io(ns, (void __user *)arg); |
1393 | default: |
1394 | #ifdef CONFIG_NVM |
1395 | if (ns->ndev) |
1396 | return nvme_nvm_ioctl(ns, cmd, arg); |
1397 | #endif |
1398 | if (is_sed_ioctl(cmd)) |
1399 | return sed_ioctl(ns->ctrl->opal_dev, cmd, |
1400 | (void __user *) arg); |
1401 | return -ENOTTY; |
1402 | } |
1403 | } |
1404 | |
1405 | static int nvme_ioctl(struct block_device *bdev, fmode_t mode, |
1406 | unsigned int cmd, unsigned long arg) |
1407 | { |
1408 | struct nvme_ns_head *head = NULL; |
1409 | struct nvme_ns *ns; |
1410 | int srcu_idx, ret; |
1411 | |
1412 | ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); |
1413 | if (unlikely(!ns)) |
1414 | ret = -EWOULDBLOCK; |
1415 | else |
1416 | ret = nvme_ns_ioctl(ns, cmd, arg); |
1417 | nvme_put_ns_from_disk(head, srcu_idx); |
1418 | return ret; |
1419 | } |
1420 | |
1421 | static int nvme_open(struct block_device *bdev, fmode_t mode) |
1422 | { |
1423 | struct nvme_ns *ns = bdev->bd_disk->private_data; |
1424 | |
1425 | #ifdef CONFIG_NVME_MULTIPATH |
1426 | /* should never be called due to GENHD_FL_HIDDEN */ |
1427 | if (WARN_ON_ONCE(ns->head->disk)) |
1428 | goto fail; |
1429 | #endif |
1430 | if (!kref_get_unless_zero(&ns->kref)) |
1431 | goto fail; |
1432 | if (!try_module_get(ns->ctrl->ops->module)) |
1433 | goto fail_put_ns; |
1434 | |
1435 | return 0; |
1436 | |
1437 | fail_put_ns: |
1438 | nvme_put_ns(ns); |
1439 | fail: |
1440 | return -ENXIO; |
1441 | } |
1442 | |
1443 | static void nvme_release(struct gendisk *disk, fmode_t mode) |
1444 | { |
1445 | struct nvme_ns *ns = disk->private_data; |
1446 | |
1447 | module_put(ns->ctrl->ops->module); |
1448 | nvme_put_ns(ns); |
1449 | } |
1450 | |
1451 | static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) |
1452 | { |
1453 | /* some standard values */ |
1454 | geo->heads = 1 << 6; |
1455 | geo->sectors = 1 << 5; |
1456 | geo->cylinders = get_capacity(bdev->bd_disk) >> 11; |
1457 | return 0; |
1458 | } |
1459 | |
1460 | #ifdef CONFIG_BLK_DEV_INTEGRITY |
1461 | static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) |
1462 | { |
1463 | struct blk_integrity integrity; |
1464 | |
1465 | memset(&integrity, 0, sizeof(integrity)); |
1466 | switch (pi_type) { |
1467 | case NVME_NS_DPS_PI_TYPE3: |
1468 | integrity.profile = &t10_pi_type3_crc; |
1469 | integrity.tag_size = sizeof(u16) + sizeof(u32); |
1470 | integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; |
1471 | break; |
1472 | case NVME_NS_DPS_PI_TYPE1: |
1473 | case NVME_NS_DPS_PI_TYPE2: |
1474 | integrity.profile = &t10_pi_type1_crc; |
1475 | integrity.tag_size = sizeof(u16); |
1476 | integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; |
1477 | break; |
1478 | default: |
1479 | integrity.profile = NULL; |
1480 | break; |
1481 | } |
1482 | integrity.tuple_size = ms; |
1483 | blk_integrity_register(disk, &integrity); |
1484 | blk_queue_max_integrity_segments(disk->queue, 1); |
1485 | } |
1486 | #else |
1487 | static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) |
1488 | { |
1489 | } |
1490 | #endif /* CONFIG_BLK_DEV_INTEGRITY */ |
1491 | |
1492 | static void nvme_set_chunk_size(struct nvme_ns *ns) |
1493 | { |
1494 | u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9)); |
1495 | blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); |
1496 | } |
1497 | |
1498 | static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) |
1499 | { |
1500 | struct nvme_ctrl *ctrl = ns->ctrl; |
1501 | struct request_queue *queue = disk->queue; |
1502 | u32 size = queue_logical_block_size(queue); |
1503 | |
1504 | if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) { |
1505 | blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue); |
1506 | return; |
1507 | } |
1508 | |
1509 | if (ctrl->nr_streams && ns->sws && ns->sgs) |
1510 | size *= ns->sws * ns->sgs; |
1511 | |
1512 | BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < |
1513 | NVME_DSM_MAX_RANGES); |
1514 | |
1515 | queue->limits.discard_alignment = 0; |
1516 | queue->limits.discard_granularity = size; |
1517 | |
1518 | /* If discard is already enabled, don't reset queue limits */ |
1519 | if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue)) |
1520 | return; |
1521 | |
1522 | blk_queue_max_discard_sectors(queue, UINT_MAX); |
1523 | blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); |
1524 | |
1525 | if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) |
1526 | blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); |
1527 | } |
1528 | |
1529 | static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) |
1530 | { |
1531 | u32 max_sectors; |
1532 | unsigned short bs = 1 << ns->lba_shift; |
1533 | |
1534 | if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) || |
1535 | (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) |
1536 | return; |
1537 | /* |
1538 | * Even though NVMe spec explicitly states that MDTS is not |
1539 | * applicable to the write-zeroes:- "The restriction does not apply to |
1540 | * commands that do not transfer data between the host and the |
1541 | * controller (e.g., Write Uncorrectable ro Write Zeroes command).". |
1542 | * In order to be more cautious use controller's max_hw_sectors value |
1543 | * to configure the maximum sectors for the write-zeroes which is |
1544 | * configured based on the controller's MDTS field in the |
1545 | * nvme_init_identify() if available. |
1546 | */ |
1547 | if (ns->ctrl->max_hw_sectors == UINT_MAX) |
1548 | max_sectors = ((u32)(USHRT_MAX + 1) * bs) >> 9; |
1549 | else |
1550 | max_sectors = ((u32)(ns->ctrl->max_hw_sectors + 1) * bs) >> 9; |
1551 | |
1552 | blk_queue_max_write_zeroes_sectors(disk->queue, max_sectors); |
1553 | } |
1554 | |
1555 | static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, |
1556 | struct nvme_id_ns *id, struct nvme_ns_ids *ids) |
1557 | { |
1558 | memset(ids, 0, sizeof(*ids)); |
1559 | |
1560 | if (ctrl->vs >= NVME_VS(1, 1, 0)) |
1561 | memcpy(ids->eui64, id->eui64, sizeof(id->eui64)); |
1562 | if (ctrl->vs >= NVME_VS(1, 2, 0)) |
1563 | memcpy(ids->nguid, id->nguid, sizeof(id->nguid)); |
1564 | if (ctrl->vs >= NVME_VS(1, 3, 0)) { |
1565 | /* Don't treat error as fatal we potentially |
1566 | * already have a NGUID or EUI-64 |
1567 | */ |
1568 | if (nvme_identify_ns_descs(ctrl, nsid, ids)) |
1569 | dev_warn(ctrl->device, |
1570 | "%s: Identify Descriptors failed\n" , __func__); |
1571 | } |
1572 | } |
1573 | |
1574 | static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids) |
1575 | { |
1576 | return !uuid_is_null(&ids->uuid) || |
1577 | memchr_inv(ids->nguid, 0, sizeof(ids->nguid)) || |
1578 | memchr_inv(ids->eui64, 0, sizeof(ids->eui64)); |
1579 | } |
1580 | |
1581 | static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) |
1582 | { |
1583 | return uuid_equal(&a->uuid, &b->uuid) && |
1584 | memcmp(&a->nguid, &b->nguid, sizeof(a->nguid)) == 0 && |
1585 | memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0; |
1586 | } |
1587 | |
1588 | static void nvme_update_disk_info(struct gendisk *disk, |
1589 | struct nvme_ns *ns, struct nvme_id_ns *id) |
1590 | { |
1591 | sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9); |
1592 | unsigned short bs = 1 << ns->lba_shift; |
1593 | |
1594 | blk_mq_freeze_queue(disk->queue); |
1595 | blk_integrity_unregister(disk); |
1596 | |
1597 | blk_queue_logical_block_size(disk->queue, bs); |
1598 | blk_queue_physical_block_size(disk->queue, bs); |
1599 | blk_queue_io_min(disk->queue, bs); |
1600 | |
1601 | if (ns->ms && !ns->ext && |
1602 | (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) |
1603 | nvme_init_integrity(disk, ns->ms, ns->pi_type); |
1604 | if (ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) |
1605 | capacity = 0; |
1606 | |
1607 | set_capacity(disk, capacity); |
1608 | |
1609 | nvme_config_discard(disk, ns); |
1610 | nvme_config_write_zeroes(disk, ns); |
1611 | |
1612 | if (id->nsattr & (1 << 0)) |
1613 | set_disk_ro(disk, true); |
1614 | else |
1615 | set_disk_ro(disk, false); |
1616 | |
1617 | blk_mq_unfreeze_queue(disk->queue); |
1618 | } |
1619 | |
1620 | static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) |
1621 | { |
1622 | struct nvme_ns *ns = disk->private_data; |
1623 | |
1624 | /* |
1625 | * If identify namespace failed, use default 512 byte block size so |
1626 | * block layer can use before failing read/write for 0 capacity. |
1627 | */ |
1628 | ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds; |
1629 | if (ns->lba_shift == 0) |
1630 | ns->lba_shift = 9; |
1631 | ns->noiob = le16_to_cpu(id->noiob); |
1632 | ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); |
1633 | ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); |
1634 | /* the PI implementation requires metadata equal t10 pi tuple size */ |
1635 | if (ns->ms == sizeof(struct t10_pi_tuple)) |
1636 | ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; |
1637 | else |
1638 | ns->pi_type = 0; |
1639 | |
1640 | if (ns->noiob) |
1641 | nvme_set_chunk_size(ns); |
1642 | nvme_update_disk_info(disk, ns, id); |
1643 | #ifdef CONFIG_NVME_MULTIPATH |
1644 | if (ns->head->disk) { |
1645 | nvme_update_disk_info(ns->head->disk, ns, id); |
1646 | blk_queue_stack_limits(ns->head->disk->queue, ns->queue); |
1647 | } |
1648 | #endif |
1649 | } |
1650 | |
1651 | static int nvme_revalidate_disk(struct gendisk *disk) |
1652 | { |
1653 | struct nvme_ns *ns = disk->private_data; |
1654 | struct nvme_ctrl *ctrl = ns->ctrl; |
1655 | struct nvme_id_ns *id; |
1656 | struct nvme_ns_ids ids; |
1657 | int ret = 0; |
1658 | |
1659 | if (test_bit(NVME_NS_DEAD, &ns->flags)) { |
1660 | set_capacity(disk, 0); |
1661 | return -ENODEV; |
1662 | } |
1663 | |
1664 | id = nvme_identify_ns(ctrl, ns->head->ns_id); |
1665 | if (!id) |
1666 | return -ENODEV; |
1667 | |
1668 | if (id->ncap == 0) { |
1669 | ret = -ENODEV; |
1670 | goto out; |
1671 | } |
1672 | |
1673 | __nvme_revalidate_disk(disk, id); |
1674 | nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids); |
1675 | if (!nvme_ns_ids_equal(&ns->head->ids, &ids)) { |
1676 | dev_err(ctrl->device, |
1677 | "identifiers changed for nsid %d\n" , ns->head->ns_id); |
1678 | ret = -ENODEV; |
1679 | } |
1680 | |
1681 | out: |
1682 | kfree(id); |
1683 | return ret; |
1684 | } |
1685 | |
1686 | static char nvme_pr_type(enum pr_type type) |
1687 | { |
1688 | switch (type) { |
1689 | case PR_WRITE_EXCLUSIVE: |
1690 | return 1; |
1691 | case PR_EXCLUSIVE_ACCESS: |
1692 | return 2; |
1693 | case PR_WRITE_EXCLUSIVE_REG_ONLY: |
1694 | return 3; |
1695 | case PR_EXCLUSIVE_ACCESS_REG_ONLY: |
1696 | return 4; |
1697 | case PR_WRITE_EXCLUSIVE_ALL_REGS: |
1698 | return 5; |
1699 | case PR_EXCLUSIVE_ACCESS_ALL_REGS: |
1700 | return 6; |
1701 | default: |
1702 | return 0; |
1703 | } |
1704 | }; |
1705 | |
1706 | static int nvme_pr_command(struct block_device *bdev, u32 cdw10, |
1707 | u64 key, u64 sa_key, u8 op) |
1708 | { |
1709 | struct nvme_ns_head *head = NULL; |
1710 | struct nvme_ns *ns; |
1711 | struct nvme_command c; |
1712 | int srcu_idx, ret; |
1713 | u8 data[16] = { 0, }; |
1714 | |
1715 | ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); |
1716 | if (unlikely(!ns)) |
1717 | return -EWOULDBLOCK; |
1718 | |
1719 | put_unaligned_le64(key, &data[0]); |
1720 | put_unaligned_le64(sa_key, &data[8]); |
1721 | |
1722 | memset(&c, 0, sizeof(c)); |
1723 | c.common.opcode = op; |
1724 | c.common.nsid = cpu_to_le32(ns->head->ns_id); |
1725 | c.common.cdw10 = cpu_to_le32(cdw10); |
1726 | |
1727 | ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16); |
1728 | nvme_put_ns_from_disk(head, srcu_idx); |
1729 | return ret; |
1730 | } |
1731 | |
1732 | static int nvme_pr_register(struct block_device *bdev, u64 old, |
1733 | u64 new, unsigned flags) |
1734 | { |
1735 | u32 cdw10; |
1736 | |
1737 | if (flags & ~PR_FL_IGNORE_KEY) |
1738 | return -EOPNOTSUPP; |
1739 | |
1740 | cdw10 = old ? 2 : 0; |
1741 | cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0; |
1742 | cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */ |
1743 | return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register); |
1744 | } |
1745 | |
1746 | static int nvme_pr_reserve(struct block_device *bdev, u64 key, |
1747 | enum pr_type type, unsigned flags) |
1748 | { |
1749 | u32 cdw10; |
1750 | |
1751 | if (flags & ~PR_FL_IGNORE_KEY) |
1752 | return -EOPNOTSUPP; |
1753 | |
1754 | cdw10 = nvme_pr_type(type) << 8; |
1755 | cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0); |
1756 | return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire); |
1757 | } |
1758 | |
1759 | static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, |
1760 | enum pr_type type, bool abort) |
1761 | { |
1762 | u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1); |
1763 | return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); |
1764 | } |
1765 | |
1766 | static int nvme_pr_clear(struct block_device *bdev, u64 key) |
1767 | { |
1768 | u32 cdw10 = 1 | (key ? 1 << 3 : 0); |
1769 | return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register); |
1770 | } |
1771 | |
1772 | static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) |
1773 | { |
1774 | u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0); |
1775 | return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); |
1776 | } |
1777 | |
1778 | static const struct pr_ops nvme_pr_ops = { |
1779 | .pr_register = nvme_pr_register, |
1780 | .pr_reserve = nvme_pr_reserve, |
1781 | .pr_release = nvme_pr_release, |
1782 | .pr_preempt = nvme_pr_preempt, |
1783 | .pr_clear = nvme_pr_clear, |
1784 | }; |
1785 | |
1786 | #ifdef CONFIG_BLK_SED_OPAL |
1787 | int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, |
1788 | bool send) |
1789 | { |
1790 | struct nvme_ctrl *ctrl = data; |
1791 | struct nvme_command cmd; |
1792 | |
1793 | memset(&cmd, 0, sizeof(cmd)); |
1794 | if (send) |
1795 | cmd.common.opcode = nvme_admin_security_send; |
1796 | else |
1797 | cmd.common.opcode = nvme_admin_security_recv; |
1798 | cmd.common.nsid = 0; |
1799 | cmd.common.cdw10 = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8); |
1800 | cmd.common.cdw11 = cpu_to_le32(len); |
1801 | |
1802 | return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, |
1803 | ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0, false); |
1804 | } |
1805 | EXPORT_SYMBOL_GPL(nvme_sec_submit); |
1806 | #endif /* CONFIG_BLK_SED_OPAL */ |
1807 | |
1808 | static const struct block_device_operations nvme_fops = { |
1809 | .owner = THIS_MODULE, |
1810 | .ioctl = nvme_ioctl, |
1811 | .compat_ioctl = nvme_ioctl, |
1812 | .open = nvme_open, |
1813 | .release = nvme_release, |
1814 | .getgeo = nvme_getgeo, |
1815 | .revalidate_disk= nvme_revalidate_disk, |
1816 | .pr_ops = &nvme_pr_ops, |
1817 | }; |
1818 | |
1819 | #ifdef CONFIG_NVME_MULTIPATH |
1820 | static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode) |
1821 | { |
1822 | struct nvme_ns_head *head = bdev->bd_disk->private_data; |
1823 | |
1824 | if (!kref_get_unless_zero(&head->ref)) |
1825 | return -ENXIO; |
1826 | return 0; |
1827 | } |
1828 | |
1829 | static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode) |
1830 | { |
1831 | nvme_put_ns_head(disk->private_data); |
1832 | } |
1833 | |
1834 | const struct block_device_operations nvme_ns_head_ops = { |
1835 | .owner = THIS_MODULE, |
1836 | .open = nvme_ns_head_open, |
1837 | .release = nvme_ns_head_release, |
1838 | .ioctl = nvme_ioctl, |
1839 | .compat_ioctl = nvme_ioctl, |
1840 | .getgeo = nvme_getgeo, |
1841 | .pr_ops = &nvme_pr_ops, |
1842 | }; |
1843 | #endif /* CONFIG_NVME_MULTIPATH */ |
1844 | |
1845 | static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) |
1846 | { |
1847 | unsigned long timeout = |
1848 | ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; |
1849 | u32 csts, bit = enabled ? NVME_CSTS_RDY : 0; |
1850 | int ret; |
1851 | |
1852 | while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) { |
1853 | if (csts == ~0) |
1854 | return -ENODEV; |
1855 | if ((csts & NVME_CSTS_RDY) == bit) |
1856 | break; |
1857 | |
1858 | msleep(100); |
1859 | if (fatal_signal_pending(current)) |
1860 | return -EINTR; |
1861 | if (time_after(jiffies, timeout)) { |
1862 | dev_err(ctrl->device, |
1863 | "Device not ready; aborting %s\n" , enabled ? |
1864 | "initialisation" : "reset" ); |
1865 | return -ENODEV; |
1866 | } |
1867 | } |
1868 | |
1869 | return ret; |
1870 | } |
1871 | |
1872 | /* |
1873 | * If the device has been passed off to us in an enabled state, just clear |
1874 | * the enabled bit. The spec says we should set the 'shutdown notification |
1875 | * bits', but doing so may cause the device to complete commands to the |
1876 | * admin queue ... and we don't know what memory that might be pointing at! |
1877 | */ |
1878 | int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap) |
1879 | { |
1880 | int ret; |
1881 | |
1882 | ctrl->ctrl_config &= ~NVME_CC_SHN_MASK; |
1883 | ctrl->ctrl_config &= ~NVME_CC_ENABLE; |
1884 | |
1885 | ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); |
1886 | if (ret) |
1887 | return ret; |
1888 | |
1889 | if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) |
1890 | msleep(NVME_QUIRK_DELAY_AMOUNT); |
1891 | |
1892 | return nvme_wait_ready(ctrl, cap, false); |
1893 | } |
1894 | EXPORT_SYMBOL_GPL(nvme_disable_ctrl); |
1895 | |
1896 | int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap) |
1897 | { |
1898 | /* |
1899 | * Default to a 4K page size, with the intention to update this |
1900 | * path in the future to accomodate architectures with differing |
1901 | * kernel and IO page sizes. |
1902 | */ |
1903 | unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12; |
1904 | int ret; |
1905 | |
1906 | if (page_shift < dev_page_min) { |
1907 | dev_err(ctrl->device, |
1908 | "Minimum device page size %u too large for host (%u)\n" , |
1909 | 1 << dev_page_min, 1 << page_shift); |
1910 | return -ENODEV; |
1911 | } |
1912 | |
1913 | ctrl->page_size = 1 << page_shift; |
1914 | |
1915 | ctrl->ctrl_config = NVME_CC_CSS_NVM; |
1916 | ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; |
1917 | ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; |
1918 | ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; |
1919 | ctrl->ctrl_config |= NVME_CC_ENABLE; |
1920 | |
1921 | ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); |
1922 | if (ret) |
1923 | return ret; |
1924 | return nvme_wait_ready(ctrl, cap, true); |
1925 | } |
1926 | EXPORT_SYMBOL_GPL(nvme_enable_ctrl); |
1927 | |
1928 | int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) |
1929 | { |
1930 | unsigned long timeout = jiffies + (ctrl->shutdown_timeout * HZ); |
1931 | u32 csts; |
1932 | int ret; |
1933 | |
1934 | ctrl->ctrl_config &= ~NVME_CC_SHN_MASK; |
1935 | ctrl->ctrl_config |= NVME_CC_SHN_NORMAL; |
1936 | |
1937 | ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); |
1938 | if (ret) |
1939 | return ret; |
1940 | |
1941 | while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) { |
1942 | if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT) |
1943 | break; |
1944 | |
1945 | msleep(100); |
1946 | if (fatal_signal_pending(current)) |
1947 | return -EINTR; |
1948 | if (time_after(jiffies, timeout)) { |
1949 | dev_err(ctrl->device, |
1950 | "Device shutdown incomplete; abort shutdown\n" ); |
1951 | return -ENODEV; |
1952 | } |
1953 | } |
1954 | |
1955 | return ret; |
1956 | } |
1957 | EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl); |
1958 | |
1959 | static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, |
1960 | struct request_queue *q) |
1961 | { |
1962 | bool vwc = false; |
1963 | |
1964 | if (ctrl->max_hw_sectors) { |
1965 | u32 max_segments = |
1966 | (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1; |
1967 | |
1968 | max_segments = min_not_zero(max_segments, ctrl->max_segments); |
1969 | blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); |
1970 | blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); |
1971 | } |
1972 | if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && |
1973 | is_power_of_2(ctrl->max_hw_sectors)) |
1974 | blk_queue_chunk_sectors(q, ctrl->max_hw_sectors); |
1975 | blk_queue_virt_boundary(q, ctrl->page_size - 1); |
1976 | if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) |
1977 | vwc = true; |
1978 | blk_queue_write_cache(q, vwc, vwc); |
1979 | } |
1980 | |
1981 | static int nvme_configure_timestamp(struct nvme_ctrl *ctrl) |
1982 | { |
1983 | __le64 ts; |
1984 | int ret; |
1985 | |
1986 | if (!(ctrl->oncs & NVME_CTRL_ONCS_TIMESTAMP)) |
1987 | return 0; |
1988 | |
1989 | ts = cpu_to_le64(ktime_to_ms(ktime_get_real())); |
1990 | ret = nvme_set_features(ctrl, NVME_FEAT_TIMESTAMP, 0, &ts, sizeof(ts), |
1991 | NULL); |
1992 | if (ret) |
1993 | dev_warn_once(ctrl->device, |
1994 | "could not set timestamp (%d)\n" , ret); |
1995 | return ret; |
1996 | } |
1997 | |
1998 | static int nvme_configure_acre(struct nvme_ctrl *ctrl) |
1999 | { |
2000 | struct nvme_feat_host_behavior *host; |
2001 | int ret; |
2002 | |
2003 | /* Don't bother enabling the feature if retry delay is not reported */ |
2004 | if (!ctrl->crdt[0]) |
2005 | return 0; |
2006 | |
2007 | host = kzalloc(sizeof(*host), GFP_KERNEL); |
2008 | if (!host) |
2009 | return 0; |
2010 | |
2011 | host->acre = NVME_ENABLE_ACRE; |
2012 | ret = nvme_set_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0, |
2013 | host, sizeof(*host), NULL); |
2014 | kfree(host); |
2015 | return ret; |
2016 | } |
2017 | |
2018 | static int nvme_configure_apst(struct nvme_ctrl *ctrl) |
2019 | { |
2020 | /* |
2021 | * APST (Autonomous Power State Transition) lets us program a |
2022 | * table of power state transitions that the controller will |
2023 | * perform automatically. We configure it with a simple |
2024 | * heuristic: we are willing to spend at most 2% of the time |
2025 | * transitioning between power states. Therefore, when running |
2026 | * in any given state, we will enter the next lower-power |
2027 | * non-operational state after waiting 50 * (enlat + exlat) |
2028 | * microseconds, as long as that state's exit latency is under |
2029 | * the requested maximum latency. |
2030 | * |
2031 | * We will not autonomously enter any non-operational state for |
2032 | * which the total latency exceeds ps_max_latency_us. Users |
2033 | * can set ps_max_latency_us to zero to turn off APST. |
2034 | */ |
2035 | |
2036 | unsigned apste; |
2037 | struct nvme_feat_auto_pst *table; |
2038 | u64 max_lat_us = 0; |
2039 | int max_ps = -1; |
2040 | int ret; |
2041 | |
2042 | /* |
2043 | * If APST isn't supported or if we haven't been initialized yet, |
2044 | * then don't do anything. |
2045 | */ |
2046 | if (!ctrl->apsta) |
2047 | return 0; |
2048 | |
2049 | if (ctrl->npss > 31) { |
2050 | dev_warn(ctrl->device, "NPSS is invalid; not using APST\n" ); |
2051 | return 0; |
2052 | } |
2053 | |
2054 | table = kzalloc(sizeof(*table), GFP_KERNEL); |
2055 | if (!table) |
2056 | return 0; |
2057 | |
2058 | if (!ctrl->apst_enabled || ctrl->ps_max_latency_us == 0) { |
2059 | /* Turn off APST. */ |
2060 | apste = 0; |
2061 | dev_dbg(ctrl->device, "APST disabled\n" ); |
2062 | } else { |
2063 | __le64 target = cpu_to_le64(0); |
2064 | int state; |
2065 | |
2066 | /* |
2067 | * Walk through all states from lowest- to highest-power. |
2068 | * According to the spec, lower-numbered states use more |
2069 | * power. NPSS, despite the name, is the index of the |
2070 | * lowest-power state, not the number of states. |
2071 | */ |
2072 | for (state = (int)ctrl->npss; state >= 0; state--) { |
2073 | u64 total_latency_us, exit_latency_us, transition_ms; |
2074 | |
2075 | if (target) |
2076 | table->entries[state] = target; |
2077 | |
2078 | /* |
2079 | * Don't allow transitions to the deepest state |
2080 | * if it's quirked off. |
2081 | */ |
2082 | if (state == ctrl->npss && |
2083 | (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) |
2084 | continue; |
2085 | |
2086 | /* |
2087 | * Is this state a useful non-operational state for |
2088 | * higher-power states to autonomously transition to? |
2089 | */ |
2090 | if (!(ctrl->psd[state].flags & |
2091 | NVME_PS_FLAGS_NON_OP_STATE)) |
2092 | continue; |
2093 | |
2094 | exit_latency_us = |
2095 | (u64)le32_to_cpu(ctrl->psd[state].exit_lat); |
2096 | if (exit_latency_us > ctrl->ps_max_latency_us) |
2097 | continue; |
2098 | |
2099 | total_latency_us = |
2100 | exit_latency_us + |
2101 | le32_to_cpu(ctrl->psd[state].entry_lat); |
2102 | |
2103 | /* |
2104 | * This state is good. Use it as the APST idle |
2105 | * target for higher power states. |
2106 | */ |
2107 | transition_ms = total_latency_us + 19; |
2108 | do_div(transition_ms, 20); |
2109 | if (transition_ms > (1 << 24) - 1) |
2110 | transition_ms = (1 << 24) - 1; |
2111 | |
2112 | target = cpu_to_le64((state << 3) | |
2113 | (transition_ms << 8)); |
2114 | |
2115 | if (max_ps == -1) |
2116 | max_ps = state; |
2117 | |
2118 | if (total_latency_us > max_lat_us) |
2119 | max_lat_us = total_latency_us; |
2120 | } |
2121 | |
2122 | apste = 1; |
2123 | |
2124 | if (max_ps == -1) { |
2125 | dev_dbg(ctrl->device, "APST enabled but no non-operational states are available\n" ); |
2126 | } else { |
2127 | dev_dbg(ctrl->device, "APST enabled: max PS = %d, max round-trip latency = %lluus, table = %*phN\n" , |
2128 | max_ps, max_lat_us, (int)sizeof(*table), table); |
2129 | } |
2130 | } |
2131 | |
2132 | ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste, |
2133 | table, sizeof(*table), NULL); |
2134 | if (ret) |
2135 | dev_err(ctrl->device, "failed to set APST feature (%d)\n" , ret); |
2136 | |
2137 | kfree(table); |
2138 | return ret; |
2139 | } |
2140 | |
2141 | static void nvme_set_latency_tolerance(struct device *dev, s32 val) |
2142 | { |
2143 | struct nvme_ctrl *ctrl = dev_get_drvdata(dev); |
2144 | u64 latency; |
2145 | |
2146 | switch (val) { |
2147 | case PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT: |
2148 | case PM_QOS_LATENCY_ANY: |
2149 | latency = U64_MAX; |
2150 | break; |
2151 | |
2152 | default: |
2153 | latency = val; |
2154 | } |
2155 | |
2156 | if (ctrl->ps_max_latency_us != latency) { |
2157 | ctrl->ps_max_latency_us = latency; |
2158 | nvme_configure_apst(ctrl); |
2159 | } |
2160 | } |
2161 | |
2162 | struct nvme_core_quirk_entry { |
2163 | /* |
2164 | * NVMe model and firmware strings are padded with spaces. For |
2165 | * simplicity, strings in the quirk table are padded with NULLs |
2166 | * instead. |
2167 | */ |
2168 | u16 vid; |
2169 | const char *mn; |
2170 | const char *fr; |
2171 | unsigned long quirks; |
2172 | }; |
2173 | |
2174 | static const struct nvme_core_quirk_entry core_quirks[] = { |
2175 | { |
2176 | /* |
2177 | * This Toshiba device seems to die using any APST states. See: |
2178 | * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1678184/comments/11 |
2179 | */ |
2180 | .vid = 0x1179, |
2181 | .mn = "THNSF5256GPUK TOSHIBA" , |
2182 | .quirks = NVME_QUIRK_NO_APST, |
2183 | } |
2184 | }; |
2185 | |
2186 | /* match is null-terminated but idstr is space-padded. */ |
2187 | static bool string_matches(const char *idstr, const char *match, size_t len) |
2188 | { |
2189 | size_t matchlen; |
2190 | |
2191 | if (!match) |
2192 | return true; |
2193 | |
2194 | matchlen = strlen(match); |
2195 | WARN_ON_ONCE(matchlen > len); |
2196 | |
2197 | if (memcmp(idstr, match, matchlen)) |
2198 | return false; |
2199 | |
2200 | for (; matchlen < len; matchlen++) |
2201 | if (idstr[matchlen] != ' ') |
2202 | return false; |
2203 | |
2204 | return true; |
2205 | } |
2206 | |
2207 | static bool quirk_matches(const struct nvme_id_ctrl *id, |
2208 | const struct nvme_core_quirk_entry *q) |
2209 | { |
2210 | return q->vid == le16_to_cpu(id->vid) && |
2211 | string_matches(id->mn, q->mn, sizeof(id->mn)) && |
2212 | string_matches(id->fr, q->fr, sizeof(id->fr)); |
2213 | } |
2214 | |
2215 | static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ctrl, |
2216 | struct nvme_id_ctrl *id) |
2217 | { |
2218 | size_t nqnlen; |
2219 | int off; |
2220 | |
2221 | if(!(ctrl->quirks & NVME_QUIRK_IGNORE_DEV_SUBNQN)) { |
2222 | nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE); |
2223 | if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) { |
2224 | strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); |
2225 | return; |
2226 | } |
2227 | |
2228 | if (ctrl->vs >= NVME_VS(1, 2, 1)) |
2229 | dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n" ); |
2230 | } |
2231 | |
2232 | /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */ |
2233 | off = snprintf(subsys->subnqn, NVMF_NQN_SIZE, |
2234 | "nqn.2014.08.org.nvmexpress:%04x%04x" , |
2235 | le16_to_cpu(id->vid), le16_to_cpu(id->ssvid)); |
2236 | memcpy(subsys->subnqn + off, id->sn, sizeof(id->sn)); |
2237 | off += sizeof(id->sn); |
2238 | memcpy(subsys->subnqn + off, id->mn, sizeof(id->mn)); |
2239 | off += sizeof(id->mn); |
2240 | memset(subsys->subnqn + off, 0, sizeof(subsys->subnqn) - off); |
2241 | } |
2242 | |
2243 | static void __nvme_release_subsystem(struct nvme_subsystem *subsys) |
2244 | { |
2245 | ida_simple_remove(&nvme_subsystems_ida, subsys->instance); |
2246 | kfree(subsys); |
2247 | } |
2248 | |
2249 | static void nvme_release_subsystem(struct device *dev) |
2250 | { |
2251 | __nvme_release_subsystem(container_of(dev, struct nvme_subsystem, dev)); |
2252 | } |
2253 | |
2254 | static void nvme_destroy_subsystem(struct kref *ref) |
2255 | { |
2256 | struct nvme_subsystem *subsys = |
2257 | container_of(ref, struct nvme_subsystem, ref); |
2258 | |
2259 | mutex_lock(&nvme_subsystems_lock); |
2260 | list_del(&subsys->entry); |
2261 | mutex_unlock(&nvme_subsystems_lock); |
2262 | |
2263 | ida_destroy(&subsys->ns_ida); |
2264 | device_del(&subsys->dev); |
2265 | put_device(&subsys->dev); |
2266 | } |
2267 | |
2268 | static void nvme_put_subsystem(struct nvme_subsystem *subsys) |
2269 | { |
2270 | kref_put(&subsys->ref, nvme_destroy_subsystem); |
2271 | } |
2272 | |
2273 | static struct nvme_subsystem *__nvme_find_get_subsystem(const char *subsysnqn) |
2274 | { |
2275 | struct nvme_subsystem *subsys; |
2276 | |
2277 | lockdep_assert_held(&nvme_subsystems_lock); |
2278 | |
2279 | list_for_each_entry(subsys, &nvme_subsystems, entry) { |
2280 | if (strcmp(subsys->subnqn, subsysnqn)) |
2281 | continue; |
2282 | if (!kref_get_unless_zero(&subsys->ref)) |
2283 | continue; |
2284 | return subsys; |
2285 | } |
2286 | |
2287 | return NULL; |
2288 | } |
2289 | |
2290 | #define SUBSYS_ATTR_RO(_name, _mode, _show) \ |
2291 | struct device_attribute subsys_attr_##_name = \ |
2292 | __ATTR(_name, _mode, _show, NULL) |
2293 | |
2294 | static ssize_t nvme_subsys_show_nqn(struct device *dev, |
2295 | struct device_attribute *attr, |
2296 | char *buf) |
2297 | { |
2298 | struct nvme_subsystem *subsys = |
2299 | container_of(dev, struct nvme_subsystem, dev); |
2300 | |
2301 | return snprintf(buf, PAGE_SIZE, "%s\n" , subsys->subnqn); |
2302 | } |
2303 | static SUBSYS_ATTR_RO(subsysnqn, S_IRUGO, nvme_subsys_show_nqn); |
2304 | |
2305 | #define nvme_subsys_show_str_function(field) \ |
2306 | static ssize_t subsys_##field##_show(struct device *dev, \ |
2307 | struct device_attribute *attr, char *buf) \ |
2308 | { \ |
2309 | struct nvme_subsystem *subsys = \ |
2310 | container_of(dev, struct nvme_subsystem, dev); \ |
2311 | return sprintf(buf, "%.*s\n", \ |
2312 | (int)sizeof(subsys->field), subsys->field); \ |
2313 | } \ |
2314 | static SUBSYS_ATTR_RO(field, S_IRUGO, subsys_##field##_show); |
2315 | |
2316 | nvme_subsys_show_str_function(model); |
2317 | nvme_subsys_show_str_function(serial); |
2318 | nvme_subsys_show_str_function(firmware_rev); |
2319 | |
2320 | static struct attribute *nvme_subsys_attrs[] = { |
2321 | &subsys_attr_model.attr, |
2322 | &subsys_attr_serial.attr, |
2323 | &subsys_attr_firmware_rev.attr, |
2324 | &subsys_attr_subsysnqn.attr, |
2325 | #ifdef CONFIG_NVME_MULTIPATH |
2326 | &subsys_attr_iopolicy.attr, |
2327 | #endif |
2328 | NULL, |
2329 | }; |
2330 | |
2331 | static struct attribute_group nvme_subsys_attrs_group = { |
2332 | .attrs = nvme_subsys_attrs, |
2333 | }; |
2334 | |
2335 | static const struct attribute_group *nvme_subsys_attrs_groups[] = { |
2336 | &nvme_subsys_attrs_group, |
2337 | NULL, |
2338 | }; |
2339 | |
2340 | static int nvme_active_ctrls(struct nvme_subsystem *subsys) |
2341 | { |
2342 | int count = 0; |
2343 | struct nvme_ctrl *ctrl; |
2344 | |
2345 | mutex_lock(&subsys->lock); |
2346 | list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { |
2347 | if (ctrl->state != NVME_CTRL_DELETING && |
2348 | ctrl->state != NVME_CTRL_DEAD) |
2349 | count++; |
2350 | } |
2351 | mutex_unlock(&subsys->lock); |
2352 | |
2353 | return count; |
2354 | } |
2355 | |
2356 | static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) |
2357 | { |
2358 | struct nvme_subsystem *subsys, *found; |
2359 | int ret; |
2360 | |
2361 | subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); |
2362 | if (!subsys) |
2363 | return -ENOMEM; |
2364 | ret = ida_simple_get(&nvme_subsystems_ida, 0, 0, GFP_KERNEL); |
2365 | if (ret < 0) { |
2366 | kfree(subsys); |
2367 | return ret; |
2368 | } |
2369 | subsys->instance = ret; |
2370 | mutex_init(&subsys->lock); |
2371 | kref_init(&subsys->ref); |
2372 | INIT_LIST_HEAD(&subsys->ctrls); |
2373 | INIT_LIST_HEAD(&subsys->nsheads); |
2374 | nvme_init_subnqn(subsys, ctrl, id); |
2375 | memcpy(subsys->serial, id->sn, sizeof(subsys->serial)); |
2376 | memcpy(subsys->model, id->mn, sizeof(subsys->model)); |
2377 | memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev)); |
2378 | subsys->vendor_id = le16_to_cpu(id->vid); |
2379 | subsys->cmic = id->cmic; |
2380 | #ifdef CONFIG_NVME_MULTIPATH |
2381 | subsys->iopolicy = NVME_IOPOLICY_NUMA; |
2382 | #endif |
2383 | |
2384 | subsys->dev.class = nvme_subsys_class; |
2385 | subsys->dev.release = nvme_release_subsystem; |
2386 | subsys->dev.groups = nvme_subsys_attrs_groups; |
2387 | dev_set_name(&subsys->dev, "nvme-subsys%d" , subsys->instance); |
2388 | device_initialize(&subsys->dev); |
2389 | |
2390 | mutex_lock(&nvme_subsystems_lock); |
2391 | found = __nvme_find_get_subsystem(subsys->subnqn); |
2392 | if (found) { |
2393 | /* |
2394 | * Verify that the subsystem actually supports multiple |
2395 | * controllers, else bail out. |
2396 | */ |
2397 | if (!(ctrl->opts && ctrl->opts->discovery_nqn) && |
2398 | nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { |
2399 | dev_err(ctrl->device, |
2400 | "ignoring ctrl due to duplicate subnqn (%s).\n" , |
2401 | found->subnqn); |
2402 | nvme_put_subsystem(found); |
2403 | ret = -EINVAL; |
2404 | goto out_unlock; |
2405 | } |
2406 | |
2407 | __nvme_release_subsystem(subsys); |
2408 | subsys = found; |
2409 | } else { |
2410 | ret = device_add(&subsys->dev); |
2411 | if (ret) { |
2412 | dev_err(ctrl->device, |
2413 | "failed to register subsystem device.\n" ); |
2414 | goto out_unlock; |
2415 | } |
2416 | ida_init(&subsys->ns_ida); |
2417 | list_add_tail(&subsys->entry, &nvme_subsystems); |
2418 | } |
2419 | |
2420 | ctrl->subsys = subsys; |
2421 | mutex_unlock(&nvme_subsystems_lock); |
2422 | |
2423 | if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj, |
2424 | dev_name(ctrl->device))) { |
2425 | dev_err(ctrl->device, |
2426 | "failed to create sysfs link from subsystem.\n" ); |
2427 | /* the transport driver will eventually put the subsystem */ |
2428 | return -EINVAL; |
2429 | } |
2430 | |
2431 | mutex_lock(&subsys->lock); |
2432 | list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); |
2433 | mutex_unlock(&subsys->lock); |
2434 | |
2435 | return 0; |
2436 | |
2437 | out_unlock: |
2438 | mutex_unlock(&nvme_subsystems_lock); |
2439 | put_device(&subsys->dev); |
2440 | return ret; |
2441 | } |
2442 | |
2443 | int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, |
2444 | void *log, size_t size, u64 offset) |
2445 | { |
2446 | struct nvme_command c = { }; |
2447 | unsigned long dwlen = size / 4 - 1; |
2448 | |
2449 | c.get_log_page.opcode = nvme_admin_get_log_page; |
2450 | c.get_log_page.nsid = cpu_to_le32(nsid); |
2451 | c.get_log_page.lid = log_page; |
2452 | c.get_log_page.lsp = lsp; |
2453 | c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1)); |
2454 | c.get_log_page.numdu = cpu_to_le16(dwlen >> 16); |
2455 | c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset)); |
2456 | c.get_log_page.lpou = cpu_to_le32(upper_32_bits(offset)); |
2457 | |
2458 | return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size); |
2459 | } |
2460 | |
2461 | static int nvme_get_effects_log(struct nvme_ctrl *ctrl) |
2462 | { |
2463 | int ret; |
2464 | |
2465 | if (!ctrl->effects) |
2466 | ctrl->effects = kzalloc(sizeof(*ctrl->effects), GFP_KERNEL); |
2467 | |
2468 | if (!ctrl->effects) |
2469 | return 0; |
2470 | |
2471 | ret = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CMD_EFFECTS, 0, |
2472 | ctrl->effects, sizeof(*ctrl->effects), 0); |
2473 | if (ret) { |
2474 | kfree(ctrl->effects); |
2475 | ctrl->effects = NULL; |
2476 | } |
2477 | return ret; |
2478 | } |
2479 | |
2480 | /* |
2481 | * Initialize the cached copies of the Identify data and various controller |
2482 | * register in our nvme_ctrl structure. This should be called as soon as |
2483 | * the admin queue is fully up and running. |
2484 | */ |
2485 | int nvme_init_identify(struct nvme_ctrl *ctrl) |
2486 | { |
2487 | struct nvme_id_ctrl *id; |
2488 | u64 cap; |
2489 | int ret, page_shift; |
2490 | u32 max_hw_sectors; |
2491 | bool prev_apst_enabled; |
2492 | |
2493 | ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs); |
2494 | if (ret) { |
2495 | dev_err(ctrl->device, "Reading VS failed (%d)\n" , ret); |
2496 | return ret; |
2497 | } |
2498 | |
2499 | ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap); |
2500 | if (ret) { |
2501 | dev_err(ctrl->device, "Reading CAP failed (%d)\n" , ret); |
2502 | return ret; |
2503 | } |
2504 | page_shift = NVME_CAP_MPSMIN(cap) + 12; |
2505 | |
2506 | if (ctrl->vs >= NVME_VS(1, 1, 0)) |
2507 | ctrl->subsystem = NVME_CAP_NSSRC(cap); |
2508 | |
2509 | ret = nvme_identify_ctrl(ctrl, &id); |
2510 | if (ret) { |
2511 | dev_err(ctrl->device, "Identify Controller failed (%d)\n" , ret); |
2512 | return -EIO; |
2513 | } |
2514 | |
2515 | if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) { |
2516 | ret = nvme_get_effects_log(ctrl); |
2517 | if (ret < 0) |
2518 | goto out_free; |
2519 | } |
2520 | |
2521 | if (!ctrl->identified) { |
2522 | int i; |
2523 | |
2524 | ret = nvme_init_subsystem(ctrl, id); |
2525 | if (ret) |
2526 | goto out_free; |
2527 | |
2528 | /* |
2529 | * Check for quirks. Quirk can depend on firmware version, |
2530 | * so, in principle, the set of quirks present can change |
2531 | * across a reset. As a possible future enhancement, we |
2532 | * could re-scan for quirks every time we reinitialize |
2533 | * the device, but we'd have to make sure that the driver |
2534 | * behaves intelligently if the quirks change. |
2535 | */ |
2536 | for (i = 0; i < ARRAY_SIZE(core_quirks); i++) { |
2537 | if (quirk_matches(id, &core_quirks[i])) |
2538 | ctrl->quirks |= core_quirks[i].quirks; |
2539 | } |
2540 | } |
2541 | |
2542 | if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) { |
2543 | dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n" ); |
2544 | ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS; |
2545 | } |
2546 | |
2547 | ctrl->crdt[0] = le16_to_cpu(id->crdt1); |
2548 | ctrl->crdt[1] = le16_to_cpu(id->crdt2); |
2549 | ctrl->crdt[2] = le16_to_cpu(id->crdt3); |
2550 | |
2551 | ctrl->oacs = le16_to_cpu(id->oacs); |
2552 | ctrl->oncs = le16_to_cpup(&id->oncs); |
2553 | ctrl->oaes = le32_to_cpu(id->oaes); |
2554 | atomic_set(&ctrl->abort_limit, id->acl + 1); |
2555 | ctrl->vwc = id->vwc; |
2556 | if (id->mdts) |
2557 | max_hw_sectors = 1 << (id->mdts + page_shift - 9); |
2558 | else |
2559 | max_hw_sectors = UINT_MAX; |
2560 | ctrl->max_hw_sectors = |
2561 | min_not_zero(ctrl->max_hw_sectors, max_hw_sectors); |
2562 | |
2563 | nvme_set_queue_limits(ctrl, ctrl->admin_q); |
2564 | ctrl->sgls = le32_to_cpu(id->sgls); |
2565 | ctrl->kas = le16_to_cpu(id->kas); |
2566 | ctrl->max_namespaces = le32_to_cpu(id->mnan); |
2567 | ctrl->ctratt = le32_to_cpu(id->ctratt); |
2568 | |
2569 | if (id->rtd3e) { |
2570 | /* us -> s */ |
2571 | u32 transition_time = le32_to_cpu(id->rtd3e) / 1000000; |
2572 | |
2573 | ctrl->shutdown_timeout = clamp_t(unsigned int, transition_time, |
2574 | shutdown_timeout, 60); |
2575 | |
2576 | if (ctrl->shutdown_timeout != shutdown_timeout) |
2577 | dev_info(ctrl->device, |
2578 | "Shutdown timeout set to %u seconds\n" , |
2579 | ctrl->shutdown_timeout); |
2580 | } else |
2581 | ctrl->shutdown_timeout = shutdown_timeout; |
2582 | |
2583 | ctrl->npss = id->npss; |
2584 | ctrl->apsta = id->apsta; |
2585 | prev_apst_enabled = ctrl->apst_enabled; |
2586 | if (ctrl->quirks & NVME_QUIRK_NO_APST) { |
2587 | if (force_apst && id->apsta) { |
2588 | dev_warn(ctrl->device, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n" ); |
2589 | ctrl->apst_enabled = true; |
2590 | } else { |
2591 | ctrl->apst_enabled = false; |
2592 | } |
2593 | } else { |
2594 | ctrl->apst_enabled = id->apsta; |
2595 | } |
2596 | memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd)); |
2597 | |
2598 | if (ctrl->ops->flags & NVME_F_FABRICS) { |
2599 | ctrl->icdoff = le16_to_cpu(id->icdoff); |
2600 | ctrl->ioccsz = le32_to_cpu(id->ioccsz); |
2601 | ctrl->iorcsz = le32_to_cpu(id->iorcsz); |
2602 | ctrl->maxcmd = le16_to_cpu(id->maxcmd); |
2603 | |
2604 | /* |
2605 | * In fabrics we need to verify the cntlid matches the |
2606 | * admin connect |
2607 | */ |
2608 | if (ctrl->cntlid != le16_to_cpu(id->cntlid)) { |
2609 | ret = -EINVAL; |
2610 | goto out_free; |
2611 | } |
2612 | |
2613 | if (!ctrl->opts->discovery_nqn && !ctrl->kas) { |
2614 | dev_err(ctrl->device, |
2615 | "keep-alive support is mandatory for fabrics\n" ); |
2616 | ret = -EINVAL; |
2617 | goto out_free; |
2618 | } |
2619 | } else { |
2620 | ctrl->cntlid = le16_to_cpu(id->cntlid); |
2621 | ctrl->hmpre = le32_to_cpu(id->hmpre); |
2622 | ctrl->hmmin = le32_to_cpu(id->hmmin); |
2623 | ctrl->hmminds = le32_to_cpu(id->hmminds); |
2624 | ctrl->hmmaxd = le16_to_cpu(id->hmmaxd); |
2625 | } |
2626 | |
2627 | ret = nvme_mpath_init(ctrl, id); |
2628 | kfree(id); |
2629 | |
2630 | if (ret < 0) |
2631 | return ret; |
2632 | |
2633 | if (ctrl->apst_enabled && !prev_apst_enabled) |
2634 | dev_pm_qos_expose_latency_tolerance(ctrl->device); |
2635 | else if (!ctrl->apst_enabled && prev_apst_enabled) |
2636 | dev_pm_qos_hide_latency_tolerance(ctrl->device); |
2637 | |
2638 | ret = nvme_configure_apst(ctrl); |
2639 | if (ret < 0) |
2640 | return ret; |
2641 | |
2642 | ret = nvme_configure_timestamp(ctrl); |
2643 | if (ret < 0) |
2644 | return ret; |
2645 | |
2646 | ret = nvme_configure_directives(ctrl); |
2647 | if (ret < 0) |
2648 | return ret; |
2649 | |
2650 | ret = nvme_configure_acre(ctrl); |
2651 | if (ret < 0) |
2652 | return ret; |
2653 | |
2654 | ctrl->identified = true; |
2655 | |
2656 | return 0; |
2657 | |
2658 | out_free: |
2659 | kfree(id); |
2660 | return ret; |
2661 | } |
2662 | EXPORT_SYMBOL_GPL(nvme_init_identify); |
2663 | |
2664 | static int nvme_dev_open(struct inode *inode, struct file *file) |
2665 | { |
2666 | struct nvme_ctrl *ctrl = |
2667 | container_of(inode->i_cdev, struct nvme_ctrl, cdev); |
2668 | |
2669 | switch (ctrl->state) { |
2670 | case NVME_CTRL_LIVE: |
2671 | case NVME_CTRL_ADMIN_ONLY: |
2672 | break; |
2673 | default: |
2674 | return -EWOULDBLOCK; |
2675 | } |
2676 | |
2677 | file->private_data = ctrl; |
2678 | return 0; |
2679 | } |
2680 | |
2681 | static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) |
2682 | { |
2683 | struct nvme_ns *ns; |
2684 | int ret; |
2685 | |
2686 | down_read(&ctrl->namespaces_rwsem); |
2687 | if (list_empty(&ctrl->namespaces)) { |
2688 | ret = -ENOTTY; |
2689 | goto out_unlock; |
2690 | } |
2691 | |
2692 | ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list); |
2693 | if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { |
2694 | dev_warn(ctrl->device, |
2695 | "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n" ); |
2696 | ret = -EINVAL; |
2697 | goto out_unlock; |
2698 | } |
2699 | |
2700 | dev_warn(ctrl->device, |
2701 | "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n" ); |
2702 | kref_get(&ns->kref); |
2703 | up_read(&ctrl->namespaces_rwsem); |
2704 | |
2705 | ret = nvme_user_cmd(ctrl, ns, argp); |
2706 | nvme_put_ns(ns); |
2707 | return ret; |
2708 | |
2709 | out_unlock: |
2710 | up_read(&ctrl->namespaces_rwsem); |
2711 | return ret; |
2712 | } |
2713 | |
2714 | static long nvme_dev_ioctl(struct file *file, unsigned int cmd, |
2715 | unsigned long arg) |
2716 | { |
2717 | struct nvme_ctrl *ctrl = file->private_data; |
2718 | void __user *argp = (void __user *)arg; |
2719 | |
2720 | switch (cmd) { |
2721 | case NVME_IOCTL_ADMIN_CMD: |
2722 | return nvme_user_cmd(ctrl, NULL, argp); |
2723 | case NVME_IOCTL_IO_CMD: |
2724 | return nvme_dev_user_cmd(ctrl, argp); |
2725 | case NVME_IOCTL_RESET: |
2726 | dev_warn(ctrl->device, "resetting controller\n" ); |
2727 | return nvme_reset_ctrl_sync(ctrl); |
2728 | case NVME_IOCTL_SUBSYS_RESET: |
2729 | return nvme_reset_subsystem(ctrl); |
2730 | case NVME_IOCTL_RESCAN: |
2731 | nvme_queue_scan(ctrl); |
2732 | return 0; |
2733 | default: |
2734 | return -ENOTTY; |
2735 | } |
2736 | } |
2737 | |
2738 | static const struct file_operations nvme_dev_fops = { |
2739 | .owner = THIS_MODULE, |
2740 | .open = nvme_dev_open, |
2741 | .unlocked_ioctl = nvme_dev_ioctl, |
2742 | .compat_ioctl = nvme_dev_ioctl, |
2743 | }; |
2744 | |
2745 | static ssize_t nvme_sysfs_reset(struct device *dev, |
2746 | struct device_attribute *attr, const char *buf, |
2747 | size_t count) |
2748 | { |
2749 | struct nvme_ctrl *ctrl = dev_get_drvdata(dev); |
2750 | int ret; |
2751 | |
2752 | ret = nvme_reset_ctrl_sync(ctrl); |
2753 | if (ret < 0) |
2754 | return ret; |
2755 | return count; |
2756 | } |
2757 | static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset); |
2758 | |
2759 | static ssize_t nvme_sysfs_rescan(struct device *dev, |
2760 | struct device_attribute *attr, const char *buf, |
2761 | size_t count) |
2762 | { |
2763 | struct nvme_ctrl *ctrl = dev_get_drvdata(dev); |
2764 | |
2765 | nvme_queue_scan(ctrl); |
2766 | return count; |
2767 | } |
2768 | static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan); |
2769 | |
2770 | static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev) |
2771 | { |
2772 | struct gendisk *disk = dev_to_disk(dev); |
2773 | |
2774 | if (disk->fops == &nvme_fops) |
2775 | return nvme_get_ns_from_dev(dev)->head; |
2776 | else |
2777 | return disk->private_data; |
2778 | } |
2779 | |
2780 | static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, |
2781 | char *buf) |
2782 | { |
2783 | struct nvme_ns_head *head = dev_to_ns_head(dev); |
2784 | struct nvme_ns_ids *ids = &head->ids; |
2785 | struct nvme_subsystem *subsys = head->subsys; |
2786 | int serial_len = sizeof(subsys->serial); |
2787 | int model_len = sizeof(subsys->model); |
2788 | |
2789 | if (!uuid_is_null(&ids->uuid)) |
2790 | return sprintf(buf, "uuid.%pU\n" , &ids->uuid); |
2791 | |
2792 | if (memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) |
2793 | return sprintf(buf, "eui.%16phN\n" , ids->nguid); |
2794 | |
2795 | if (memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) |
2796 | return sprintf(buf, "eui.%8phN\n" , ids->eui64); |
2797 | |
2798 | while (serial_len > 0 && (subsys->serial[serial_len - 1] == ' ' || |
2799 | subsys->serial[serial_len - 1] == '\0')) |
2800 | serial_len--; |
2801 | while (model_len > 0 && (subsys->model[model_len - 1] == ' ' || |
2802 | subsys->model[model_len - 1] == '\0')) |
2803 | model_len--; |
2804 | |
2805 | return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n" , subsys->vendor_id, |
2806 | serial_len, subsys->serial, model_len, subsys->model, |
2807 | head->ns_id); |
2808 | } |
2809 | static DEVICE_ATTR_RO(wwid); |
2810 | |
2811 | static ssize_t nguid_show(struct device *dev, struct device_attribute *attr, |
2812 | char *buf) |
2813 | { |
2814 | return sprintf(buf, "%pU\n" , dev_to_ns_head(dev)->ids.nguid); |
2815 | } |
2816 | static DEVICE_ATTR_RO(nguid); |
2817 | |
2818 | static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, |
2819 | char *buf) |
2820 | { |
2821 | struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids; |
2822 | |
2823 | /* For backward compatibility expose the NGUID to userspace if |
2824 | * we have no UUID set |
2825 | */ |
2826 | if (uuid_is_null(&ids->uuid)) { |
2827 | printk_ratelimited(KERN_WARNING |
2828 | "No UUID available providing old NGUID\n" ); |
2829 | return sprintf(buf, "%pU\n" , ids->nguid); |
2830 | } |
2831 | return sprintf(buf, "%pU\n" , &ids->uuid); |
2832 | } |
2833 | static DEVICE_ATTR_RO(uuid); |
2834 | |
2835 | static ssize_t eui_show(struct device *dev, struct device_attribute *attr, |
2836 | char *buf) |
2837 | { |
2838 | return sprintf(buf, "%8ph\n" , dev_to_ns_head(dev)->ids.eui64); |
2839 | } |
2840 | static DEVICE_ATTR_RO(eui); |
2841 | |
2842 | static ssize_t nsid_show(struct device *dev, struct device_attribute *attr, |
2843 | char *buf) |
2844 | { |
2845 | return sprintf(buf, "%d\n" , dev_to_ns_head(dev)->ns_id); |
2846 | } |
2847 | static DEVICE_ATTR_RO(nsid); |
2848 | |
2849 | static struct attribute *nvme_ns_id_attrs[] = { |
2850 | &dev_attr_wwid.attr, |
2851 | &dev_attr_uuid.attr, |
2852 | &dev_attr_nguid.attr, |
2853 | &dev_attr_eui.attr, |
2854 | &dev_attr_nsid.attr, |
2855 | #ifdef CONFIG_NVME_MULTIPATH |
2856 | &dev_attr_ana_grpid.attr, |
2857 | &dev_attr_ana_state.attr, |
2858 | #endif |
2859 | NULL, |
2860 | }; |
2861 | |
2862 | static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, |
2863 | struct attribute *a, int n) |
2864 | { |
2865 | struct device *dev = container_of(kobj, struct device, kobj); |
2866 | struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids; |
2867 | |
2868 | if (a == &dev_attr_uuid.attr) { |
2869 | if (uuid_is_null(&ids->uuid) && |
2870 | !memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) |
2871 | return 0; |
2872 | } |
2873 | if (a == &dev_attr_nguid.attr) { |
2874 | if (!memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) |
2875 | return 0; |
2876 | } |
2877 | if (a == &dev_attr_eui.attr) { |
2878 | if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) |
2879 | return 0; |
2880 | } |
2881 | #ifdef CONFIG_NVME_MULTIPATH |
2882 | if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) { |
2883 | if (dev_to_disk(dev)->fops != &nvme_fops) /* per-path attr */ |
2884 | return 0; |
2885 | if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl)) |
2886 | return 0; |
2887 | } |
2888 | #endif |
2889 | return a->mode; |
2890 | } |
2891 | |
2892 | static const struct attribute_group nvme_ns_id_attr_group = { |
2893 | .attrs = nvme_ns_id_attrs, |
2894 | .is_visible = nvme_ns_id_attrs_are_visible, |
2895 | }; |
2896 | |
2897 | const struct attribute_group *nvme_ns_id_attr_groups[] = { |
2898 | &nvme_ns_id_attr_group, |
2899 | #ifdef CONFIG_NVM |
2900 | &nvme_nvm_attr_group, |
2901 | #endif |
2902 | NULL, |
2903 | }; |
2904 | |
2905 | #define nvme_show_str_function(field) \ |
2906 | static ssize_t field##_show(struct device *dev, \ |
2907 | struct device_attribute *attr, char *buf) \ |
2908 | { \ |
2909 | struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ |
2910 | return sprintf(buf, "%.*s\n", \ |
2911 | (int)sizeof(ctrl->subsys->field), ctrl->subsys->field); \ |
2912 | } \ |
2913 | static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); |
2914 | |
2915 | nvme_show_str_function(model); |
2916 | nvme_show_str_function(serial); |
2917 | nvme_show_str_function(firmware_rev); |
2918 | |
2919 | #define nvme_show_int_function(field) \ |
2920 | static ssize_t field##_show(struct device *dev, \ |
2921 | struct device_attribute *attr, char *buf) \ |
2922 | { \ |
2923 | struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ |
2924 | return sprintf(buf, "%d\n", ctrl->field); \ |
2925 | } \ |
2926 | static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); |
2927 | |
2928 | nvme_show_int_function(cntlid); |
2929 | nvme_show_int_function(numa_node); |
2930 | |
2931 | static ssize_t nvme_sysfs_delete(struct device *dev, |
2932 | struct device_attribute *attr, const char *buf, |
2933 | size_t count) |
2934 | { |
2935 | struct nvme_ctrl *ctrl = dev_get_drvdata(dev); |
2936 | |
2937 | if (device_remove_file_self(dev, attr)) |
2938 | nvme_delete_ctrl_sync(ctrl); |
2939 | return count; |
2940 | } |
2941 | static DEVICE_ATTR(delete_controller, S_IWUSR, NULL, nvme_sysfs_delete); |
2942 | |
2943 | static ssize_t nvme_sysfs_show_transport(struct device *dev, |
2944 | struct device_attribute *attr, |
2945 | char *buf) |
2946 | { |
2947 | struct nvme_ctrl *ctrl = dev_get_drvdata(dev); |
2948 | |
2949 | return snprintf(buf, PAGE_SIZE, "%s\n" , ctrl->ops->name); |
2950 | } |
2951 | static DEVICE_ATTR(transport, S_IRUGO, nvme_sysfs_show_transport, NULL); |
2952 | |
2953 | static ssize_t nvme_sysfs_show_state(struct device *dev, |
2954 | struct device_attribute *attr, |
2955 | char *buf) |
2956 | { |
2957 | struct nvme_ctrl *ctrl = dev_get_drvdata(dev); |
2958 | static const char *const state_name[] = { |
2959 | [NVME_CTRL_NEW] = "new" , |
2960 | [NVME_CTRL_LIVE] = "live" , |
2961 | [NVME_CTRL_ADMIN_ONLY] = "only-admin" , |
2962 | [NVME_CTRL_RESETTING] = "resetting" , |
2963 | [NVME_CTRL_CONNECTING] = "connecting" , |
2964 | [NVME_CTRL_DELETING] = "deleting" , |
2965 | [NVME_CTRL_DEAD] = "dead" , |
2966 | }; |
2967 | |
2968 | if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) && |
2969 | state_name[ctrl->state]) |
2970 | return sprintf(buf, "%s\n" , state_name[ctrl->state]); |
2971 | |
2972 | return sprintf(buf, "unknown state\n" ); |
2973 | } |
2974 | |
2975 | static |
---|