1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * NVMe I/O command implementation. |
4 | * Copyright (c) 2015-2016 HGST, a Western Digital Company. |
5 | */ |
6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
7 | #include <linux/blkdev.h> |
8 | #include <linux/blk-integrity.h> |
9 | #include <linux/memremap.h> |
10 | #include <linux/module.h> |
11 | #include "nvmet.h" |
12 | |
13 | void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) |
14 | { |
15 | /* Logical blocks per physical block, 0's based. */ |
16 | const __le16 lpp0b = to0based(a: bdev_physical_block_size(bdev) / |
17 | bdev_logical_block_size(bdev)); |
18 | |
19 | /* |
20 | * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN, |
21 | * NAWUPF, and NACWU are defined for this namespace and should be |
22 | * used by the host for this namespace instead of the AWUN, AWUPF, |
23 | * and ACWU fields in the Identify Controller data structure. If |
24 | * any of these fields are zero that means that the corresponding |
25 | * field from the identify controller data structure should be used. |
26 | */ |
27 | id->nsfeat |= 1 << 1; |
28 | id->nawun = lpp0b; |
29 | id->nawupf = lpp0b; |
30 | id->nacwu = lpp0b; |
31 | |
32 | /* |
33 | * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and |
34 | * NOWS are defined for this namespace and should be used by |
35 | * the host for I/O optimization. |
36 | */ |
37 | id->nsfeat |= 1 << 4; |
38 | /* NPWG = Namespace Preferred Write Granularity. 0's based */ |
39 | id->npwg = lpp0b; |
40 | /* NPWA = Namespace Preferred Write Alignment. 0's based */ |
41 | id->npwa = id->npwg; |
42 | /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */ |
43 | id->npdg = to0based(a: bdev_discard_granularity(bdev) / |
44 | bdev_logical_block_size(bdev)); |
45 | /* NPDG = Namespace Preferred Deallocate Alignment */ |
46 | id->npda = id->npdg; |
47 | /* NOWS = Namespace Optimal Write Size */ |
48 | id->nows = to0based(a: bdev_io_opt(bdev) / bdev_logical_block_size(bdev)); |
49 | } |
50 | |
51 | void nvmet_bdev_ns_disable(struct nvmet_ns *ns) |
52 | { |
53 | if (ns->bdev_handle) { |
54 | bdev_release(handle: ns->bdev_handle); |
55 | ns->bdev = NULL; |
56 | ns->bdev_handle = NULL; |
57 | } |
58 | } |
59 | |
60 | static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns) |
61 | { |
62 | struct blk_integrity *bi = bdev_get_integrity(bdev: ns->bdev); |
63 | |
64 | if (bi) { |
65 | ns->metadata_size = bi->tuple_size; |
66 | if (bi->profile == &t10_pi_type1_crc) |
67 | ns->pi_type = NVME_NS_DPS_PI_TYPE1; |
68 | else if (bi->profile == &t10_pi_type3_crc) |
69 | ns->pi_type = NVME_NS_DPS_PI_TYPE3; |
70 | else |
71 | /* Unsupported metadata type */ |
72 | ns->metadata_size = 0; |
73 | } |
74 | } |
75 | |
76 | int nvmet_bdev_ns_enable(struct nvmet_ns *ns) |
77 | { |
78 | int ret; |
79 | |
80 | /* |
81 | * When buffered_io namespace attribute is enabled that means user want |
82 | * this block device to be used as a file, so block device can take |
83 | * an advantage of cache. |
84 | */ |
85 | if (ns->buffered_io) |
86 | return -ENOTBLK; |
87 | |
88 | ns->bdev_handle = bdev_open_by_path(path: ns->device_path, |
89 | BLK_OPEN_READ | BLK_OPEN_WRITE, NULL, NULL); |
90 | if (IS_ERR(ptr: ns->bdev_handle)) { |
91 | ret = PTR_ERR(ptr: ns->bdev_handle); |
92 | if (ret != -ENOTBLK) { |
93 | pr_err("failed to open block device %s: (%d)\n" , |
94 | ns->device_path, ret); |
95 | } |
96 | ns->bdev_handle = NULL; |
97 | return ret; |
98 | } |
99 | ns->bdev = ns->bdev_handle->bdev; |
100 | ns->size = bdev_nr_bytes(bdev: ns->bdev); |
101 | ns->blksize_shift = blksize_bits(size: bdev_logical_block_size(bdev: ns->bdev)); |
102 | |
103 | ns->pi_type = 0; |
104 | ns->metadata_size = 0; |
105 | if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10)) |
106 | nvmet_bdev_ns_enable_integrity(ns); |
107 | |
108 | if (bdev_is_zoned(bdev: ns->bdev)) { |
109 | if (!nvmet_bdev_zns_enable(ns)) { |
110 | nvmet_bdev_ns_disable(ns); |
111 | return -EINVAL; |
112 | } |
113 | ns->csi = NVME_CSI_ZNS; |
114 | } |
115 | |
116 | return 0; |
117 | } |
118 | |
119 | void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns) |
120 | { |
121 | ns->size = bdev_nr_bytes(bdev: ns->bdev); |
122 | } |
123 | |
124 | u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) |
125 | { |
126 | u16 status = NVME_SC_SUCCESS; |
127 | |
128 | if (likely(blk_sts == BLK_STS_OK)) |
129 | return status; |
130 | /* |
131 | * Right now there exists M : 1 mapping between block layer error |
132 | * to the NVMe status code (see nvme_error_status()). For consistency, |
133 | * when we reverse map we use most appropriate NVMe Status code from |
134 | * the group of the NVMe staus codes used in the nvme_error_status(). |
135 | */ |
136 | switch (blk_sts) { |
137 | case BLK_STS_NOSPC: |
138 | status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR; |
139 | req->error_loc = offsetof(struct nvme_rw_command, length); |
140 | break; |
141 | case BLK_STS_TARGET: |
142 | status = NVME_SC_LBA_RANGE | NVME_SC_DNR; |
143 | req->error_loc = offsetof(struct nvme_rw_command, slba); |
144 | break; |
145 | case BLK_STS_NOTSUPP: |
146 | req->error_loc = offsetof(struct nvme_common_command, opcode); |
147 | switch (req->cmd->common.opcode) { |
148 | case nvme_cmd_dsm: |
149 | case nvme_cmd_write_zeroes: |
150 | status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR; |
151 | break; |
152 | default: |
153 | status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR; |
154 | } |
155 | break; |
156 | case BLK_STS_MEDIUM: |
157 | status = NVME_SC_ACCESS_DENIED; |
158 | req->error_loc = offsetof(struct nvme_rw_command, nsid); |
159 | break; |
160 | case BLK_STS_IOERR: |
161 | default: |
162 | status = NVME_SC_INTERNAL | NVME_SC_DNR; |
163 | req->error_loc = offsetof(struct nvme_common_command, opcode); |
164 | } |
165 | |
166 | switch (req->cmd->common.opcode) { |
167 | case nvme_cmd_read: |
168 | case nvme_cmd_write: |
169 | req->error_slba = le64_to_cpu(req->cmd->rw.slba); |
170 | break; |
171 | case nvme_cmd_write_zeroes: |
172 | req->error_slba = |
173 | le64_to_cpu(req->cmd->write_zeroes.slba); |
174 | break; |
175 | default: |
176 | req->error_slba = 0; |
177 | } |
178 | return status; |
179 | } |
180 | |
181 | static void nvmet_bio_done(struct bio *bio) |
182 | { |
183 | struct nvmet_req *req = bio->bi_private; |
184 | |
185 | nvmet_req_complete(req, status: blk_to_nvme_status(req, blk_sts: bio->bi_status)); |
186 | nvmet_req_bio_put(req, bio); |
187 | } |
188 | |
189 | #ifdef CONFIG_BLK_DEV_INTEGRITY |
190 | static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, |
191 | struct sg_mapping_iter *miter) |
192 | { |
193 | struct blk_integrity *bi; |
194 | struct bio_integrity_payload *bip; |
195 | int rc; |
196 | size_t resid, len; |
197 | |
198 | bi = bdev_get_integrity(bdev: req->ns->bdev); |
199 | if (unlikely(!bi)) { |
200 | pr_err("Unable to locate bio_integrity\n" ); |
201 | return -ENODEV; |
202 | } |
203 | |
204 | bip = bio_integrity_alloc(bio, GFP_NOIO, |
205 | bio_max_segs(nr_segs: req->metadata_sg_cnt)); |
206 | if (IS_ERR(ptr: bip)) { |
207 | pr_err("Unable to allocate bio_integrity_payload\n" ); |
208 | return PTR_ERR(ptr: bip); |
209 | } |
210 | |
211 | /* virtual start sector must be in integrity interval units */ |
212 | bip_set_seed(bip, seed: bio->bi_iter.bi_sector >> |
213 | (bi->interval_exp - SECTOR_SHIFT)); |
214 | |
215 | resid = bio_integrity_bytes(bi, bio_sectors(bio)); |
216 | while (resid > 0 && sg_miter_next(miter)) { |
217 | len = min_t(size_t, miter->length, resid); |
218 | rc = bio_integrity_add_page(bio, miter->page, len, |
219 | offset_in_page(miter->addr)); |
220 | if (unlikely(rc != len)) { |
221 | pr_err("bio_integrity_add_page() failed; %d\n" , rc); |
222 | sg_miter_stop(miter); |
223 | return -ENOMEM; |
224 | } |
225 | |
226 | resid -= len; |
227 | if (len < miter->length) |
228 | miter->consumed -= miter->length - len; |
229 | } |
230 | sg_miter_stop(miter); |
231 | |
232 | return 0; |
233 | } |
234 | #else |
235 | static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, |
236 | struct sg_mapping_iter *miter) |
237 | { |
238 | return -EINVAL; |
239 | } |
240 | #endif /* CONFIG_BLK_DEV_INTEGRITY */ |
241 | |
242 | static void nvmet_bdev_execute_rw(struct nvmet_req *req) |
243 | { |
244 | unsigned int sg_cnt = req->sg_cnt; |
245 | struct bio *bio; |
246 | struct scatterlist *sg; |
247 | struct blk_plug plug; |
248 | sector_t sector; |
249 | blk_opf_t opf; |
250 | int i, rc; |
251 | struct sg_mapping_iter prot_miter; |
252 | unsigned int iter_flags; |
253 | unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len; |
254 | |
255 | if (!nvmet_check_transfer_len(req, len: total_len)) |
256 | return; |
257 | |
258 | if (!req->sg_cnt) { |
259 | nvmet_req_complete(req, status: 0); |
260 | return; |
261 | } |
262 | |
263 | if (req->cmd->rw.opcode == nvme_cmd_write) { |
264 | opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; |
265 | if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) |
266 | opf |= REQ_FUA; |
267 | iter_flags = SG_MITER_TO_SG; |
268 | } else { |
269 | opf = REQ_OP_READ; |
270 | iter_flags = SG_MITER_FROM_SG; |
271 | } |
272 | |
273 | if (is_pci_p2pdma_page(page: sg_page(sg: req->sg))) |
274 | opf |= REQ_NOMERGE; |
275 | |
276 | sector = nvmet_lba_to_sect(ns: req->ns, lba: req->cmd->rw.slba); |
277 | |
278 | if (nvmet_use_inline_bvec(req)) { |
279 | bio = &req->b.inline_bio; |
280 | bio_init(bio, bdev: req->ns->bdev, table: req->inline_bvec, |
281 | ARRAY_SIZE(req->inline_bvec), opf); |
282 | } else { |
283 | bio = bio_alloc(bdev: req->ns->bdev, nr_vecs: bio_max_segs(nr_segs: sg_cnt), opf, |
284 | GFP_KERNEL); |
285 | } |
286 | bio->bi_iter.bi_sector = sector; |
287 | bio->bi_private = req; |
288 | bio->bi_end_io = nvmet_bio_done; |
289 | |
290 | blk_start_plug(&plug); |
291 | if (req->metadata_len) |
292 | sg_miter_start(miter: &prot_miter, sgl: req->metadata_sg, |
293 | nents: req->metadata_sg_cnt, flags: iter_flags); |
294 | |
295 | for_each_sg(req->sg, sg, req->sg_cnt, i) { |
296 | while (bio_add_page(bio, page: sg_page(sg), len: sg->length, off: sg->offset) |
297 | != sg->length) { |
298 | struct bio *prev = bio; |
299 | |
300 | if (req->metadata_len) { |
301 | rc = nvmet_bdev_alloc_bip(req, bio, |
302 | miter: &prot_miter); |
303 | if (unlikely(rc)) { |
304 | bio_io_error(bio); |
305 | return; |
306 | } |
307 | } |
308 | |
309 | bio = bio_alloc(bdev: req->ns->bdev, nr_vecs: bio_max_segs(nr_segs: sg_cnt), |
310 | opf, GFP_KERNEL); |
311 | bio->bi_iter.bi_sector = sector; |
312 | |
313 | bio_chain(bio, prev); |
314 | submit_bio(bio: prev); |
315 | } |
316 | |
317 | sector += sg->length >> 9; |
318 | sg_cnt--; |
319 | } |
320 | |
321 | if (req->metadata_len) { |
322 | rc = nvmet_bdev_alloc_bip(req, bio, miter: &prot_miter); |
323 | if (unlikely(rc)) { |
324 | bio_io_error(bio); |
325 | return; |
326 | } |
327 | } |
328 | |
329 | submit_bio(bio); |
330 | blk_finish_plug(&plug); |
331 | } |
332 | |
333 | static void nvmet_bdev_execute_flush(struct nvmet_req *req) |
334 | { |
335 | struct bio *bio = &req->b.inline_bio; |
336 | |
337 | if (!bdev_write_cache(bdev: req->ns->bdev)) { |
338 | nvmet_req_complete(req, status: NVME_SC_SUCCESS); |
339 | return; |
340 | } |
341 | |
342 | if (!nvmet_check_transfer_len(req, len: 0)) |
343 | return; |
344 | |
345 | bio_init(bio, bdev: req->ns->bdev, table: req->inline_bvec, |
346 | ARRAY_SIZE(req->inline_bvec), opf: REQ_OP_WRITE | REQ_PREFLUSH); |
347 | bio->bi_private = req; |
348 | bio->bi_end_io = nvmet_bio_done; |
349 | |
350 | submit_bio(bio); |
351 | } |
352 | |
353 | u16 nvmet_bdev_flush(struct nvmet_req *req) |
354 | { |
355 | if (!bdev_write_cache(bdev: req->ns->bdev)) |
356 | return 0; |
357 | |
358 | if (blkdev_issue_flush(bdev: req->ns->bdev)) |
359 | return NVME_SC_INTERNAL | NVME_SC_DNR; |
360 | return 0; |
361 | } |
362 | |
363 | static u16 nvmet_bdev_discard_range(struct nvmet_req *req, |
364 | struct nvme_dsm_range *range, struct bio **bio) |
365 | { |
366 | struct nvmet_ns *ns = req->ns; |
367 | int ret; |
368 | |
369 | ret = __blkdev_issue_discard(bdev: ns->bdev, |
370 | sector: nvmet_lba_to_sect(ns, lba: range->slba), |
371 | le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), |
372 | GFP_KERNEL, biop: bio); |
373 | if (ret && ret != -EOPNOTSUPP) { |
374 | req->error_slba = le64_to_cpu(range->slba); |
375 | return errno_to_nvme_status(req, errno: ret); |
376 | } |
377 | return NVME_SC_SUCCESS; |
378 | } |
379 | |
380 | static void nvmet_bdev_execute_discard(struct nvmet_req *req) |
381 | { |
382 | struct nvme_dsm_range range; |
383 | struct bio *bio = NULL; |
384 | int i; |
385 | u16 status; |
386 | |
387 | for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) { |
388 | status = nvmet_copy_from_sgl(req, off: i * sizeof(range), buf: &range, |
389 | len: sizeof(range)); |
390 | if (status) |
391 | break; |
392 | |
393 | status = nvmet_bdev_discard_range(req, range: &range, bio: &bio); |
394 | if (status) |
395 | break; |
396 | } |
397 | |
398 | if (bio) { |
399 | bio->bi_private = req; |
400 | bio->bi_end_io = nvmet_bio_done; |
401 | if (status) |
402 | bio_io_error(bio); |
403 | else |
404 | submit_bio(bio); |
405 | } else { |
406 | nvmet_req_complete(req, status); |
407 | } |
408 | } |
409 | |
410 | static void nvmet_bdev_execute_dsm(struct nvmet_req *req) |
411 | { |
412 | if (!nvmet_check_data_len_lte(req, data_len: nvmet_dsm_len(req))) |
413 | return; |
414 | |
415 | switch (le32_to_cpu(req->cmd->dsm.attributes)) { |
416 | case NVME_DSMGMT_AD: |
417 | nvmet_bdev_execute_discard(req); |
418 | return; |
419 | case NVME_DSMGMT_IDR: |
420 | case NVME_DSMGMT_IDW: |
421 | default: |
422 | /* Not supported yet */ |
423 | nvmet_req_complete(req, status: 0); |
424 | return; |
425 | } |
426 | } |
427 | |
428 | static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) |
429 | { |
430 | struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes; |
431 | struct bio *bio = NULL; |
432 | sector_t sector; |
433 | sector_t nr_sector; |
434 | int ret; |
435 | |
436 | if (!nvmet_check_transfer_len(req, len: 0)) |
437 | return; |
438 | |
439 | sector = nvmet_lba_to_sect(ns: req->ns, lba: write_zeroes->slba); |
440 | nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << |
441 | (req->ns->blksize_shift - 9)); |
442 | |
443 | ret = __blkdev_issue_zeroout(bdev: req->ns->bdev, sector, nr_sects: nr_sector, |
444 | GFP_KERNEL, biop: &bio, flags: 0); |
445 | if (bio) { |
446 | bio->bi_private = req; |
447 | bio->bi_end_io = nvmet_bio_done; |
448 | submit_bio(bio); |
449 | } else { |
450 | nvmet_req_complete(req, status: errno_to_nvme_status(req, errno: ret)); |
451 | } |
452 | } |
453 | |
454 | u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) |
455 | { |
456 | switch (req->cmd->common.opcode) { |
457 | case nvme_cmd_read: |
458 | case nvme_cmd_write: |
459 | req->execute = nvmet_bdev_execute_rw; |
460 | if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(ns: req->ns)) |
461 | req->metadata_len = nvmet_rw_metadata_len(req); |
462 | return 0; |
463 | case nvme_cmd_flush: |
464 | req->execute = nvmet_bdev_execute_flush; |
465 | return 0; |
466 | case nvme_cmd_dsm: |
467 | req->execute = nvmet_bdev_execute_dsm; |
468 | return 0; |
469 | case nvme_cmd_write_zeroes: |
470 | req->execute = nvmet_bdev_execute_write_zeroes; |
471 | return 0; |
472 | default: |
473 | return nvmet_report_invalid_opcode(req); |
474 | } |
475 | } |
476 | |