1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Block driver for s390 storage class memory. |
4 | * |
5 | * Copyright IBM Corp. 2012 |
6 | * Author(s): Sebastian Ott <sebott@linux.vnet.ibm.com> |
7 | */ |
8 | |
9 | #define KMSG_COMPONENT "scm_block" |
10 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
11 | |
12 | #include <linux/interrupt.h> |
13 | #include <linux/spinlock.h> |
14 | #include <linux/mempool.h> |
15 | #include <linux/module.h> |
16 | #include <linux/blkdev.h> |
17 | #include <linux/blk-mq.h> |
18 | #include <linux/slab.h> |
19 | #include <linux/list.h> |
20 | #include <asm/eadm.h> |
21 | #include "scm_blk.h" |
22 | |
23 | debug_info_t *scm_debug; |
24 | static int scm_major; |
25 | static mempool_t *aidaw_pool; |
26 | static DEFINE_SPINLOCK(list_lock); |
27 | static LIST_HEAD(inactive_requests); |
28 | static unsigned int nr_requests = 64; |
29 | static unsigned int nr_requests_per_io = 8; |
30 | static atomic_t nr_devices = ATOMIC_INIT(0); |
31 | module_param(nr_requests, uint, S_IRUGO); |
32 | MODULE_PARM_DESC(nr_requests, "Number of parallel requests." ); |
33 | |
34 | module_param(nr_requests_per_io, uint, S_IRUGO); |
35 | MODULE_PARM_DESC(nr_requests_per_io, "Number of requests per IO." ); |
36 | |
37 | MODULE_DESCRIPTION("Block driver for s390 storage class memory." ); |
38 | MODULE_LICENSE("GPL" ); |
39 | MODULE_ALIAS("scm:scmdev*" ); |
40 | |
41 | static void __scm_free_rq(struct scm_request *scmrq) |
42 | { |
43 | struct *aobrq = to_aobrq(scmrq); |
44 | |
45 | free_page((unsigned long) scmrq->aob); |
46 | kfree(objp: scmrq->request); |
47 | kfree(objp: aobrq); |
48 | } |
49 | |
50 | static void scm_free_rqs(void) |
51 | { |
52 | struct list_head *iter, *safe; |
53 | struct scm_request *scmrq; |
54 | |
55 | spin_lock_irq(lock: &list_lock); |
56 | list_for_each_safe(iter, safe, &inactive_requests) { |
57 | scmrq = list_entry(iter, struct scm_request, list); |
58 | list_del(entry: &scmrq->list); |
59 | __scm_free_rq(scmrq); |
60 | } |
61 | spin_unlock_irq(lock: &list_lock); |
62 | |
63 | mempool_destroy(pool: aidaw_pool); |
64 | } |
65 | |
66 | static int __scm_alloc_rq(void) |
67 | { |
68 | struct *aobrq; |
69 | struct scm_request *scmrq; |
70 | |
71 | aobrq = kzalloc(sizeof(*aobrq) + sizeof(*scmrq), GFP_KERNEL); |
72 | if (!aobrq) |
73 | return -ENOMEM; |
74 | |
75 | scmrq = (void *) aobrq->data; |
76 | scmrq->aob = (void *) get_zeroed_page(GFP_DMA); |
77 | if (!scmrq->aob) |
78 | goto free; |
79 | |
80 | scmrq->request = kcalloc(n: nr_requests_per_io, size: sizeof(scmrq->request[0]), |
81 | GFP_KERNEL); |
82 | if (!scmrq->request) |
83 | goto free; |
84 | |
85 | INIT_LIST_HEAD(list: &scmrq->list); |
86 | spin_lock_irq(lock: &list_lock); |
87 | list_add(new: &scmrq->list, head: &inactive_requests); |
88 | spin_unlock_irq(lock: &list_lock); |
89 | |
90 | return 0; |
91 | free: |
92 | __scm_free_rq(scmrq); |
93 | return -ENOMEM; |
94 | } |
95 | |
96 | static int scm_alloc_rqs(unsigned int nrqs) |
97 | { |
98 | int ret = 0; |
99 | |
100 | aidaw_pool = mempool_create_page_pool(max(nrqs/8, 1U), order: 0); |
101 | if (!aidaw_pool) |
102 | return -ENOMEM; |
103 | |
104 | while (nrqs-- && !ret) |
105 | ret = __scm_alloc_rq(); |
106 | |
107 | return ret; |
108 | } |
109 | |
110 | static struct scm_request *scm_request_fetch(void) |
111 | { |
112 | struct scm_request *scmrq = NULL; |
113 | |
114 | spin_lock_irq(lock: &list_lock); |
115 | if (list_empty(head: &inactive_requests)) |
116 | goto out; |
117 | scmrq = list_first_entry(&inactive_requests, struct scm_request, list); |
118 | list_del(entry: &scmrq->list); |
119 | out: |
120 | spin_unlock_irq(lock: &list_lock); |
121 | return scmrq; |
122 | } |
123 | |
124 | static void scm_request_done(struct scm_request *scmrq) |
125 | { |
126 | unsigned long flags; |
127 | struct msb *msb; |
128 | u64 aidaw; |
129 | int i; |
130 | |
131 | for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) { |
132 | msb = &scmrq->aob->msb[i]; |
133 | aidaw = msb->data_addr; |
134 | |
135 | if ((msb->flags & MSB_FLAG_IDA) && aidaw && |
136 | IS_ALIGNED(aidaw, PAGE_SIZE)) |
137 | mempool_free(virt_to_page((void *)aidaw), pool: aidaw_pool); |
138 | } |
139 | |
140 | spin_lock_irqsave(&list_lock, flags); |
141 | list_add(new: &scmrq->list, head: &inactive_requests); |
142 | spin_unlock_irqrestore(lock: &list_lock, flags); |
143 | } |
144 | |
145 | static bool scm_permit_request(struct scm_blk_dev *bdev, struct request *req) |
146 | { |
147 | return rq_data_dir(req) != WRITE || bdev->state != SCM_WR_PROHIBIT; |
148 | } |
149 | |
150 | static inline struct aidaw *scm_aidaw_alloc(void) |
151 | { |
152 | struct page *page = mempool_alloc(pool: aidaw_pool, GFP_ATOMIC); |
153 | |
154 | return page ? page_address(page) : NULL; |
155 | } |
156 | |
157 | static inline unsigned long scm_aidaw_bytes(struct aidaw *aidaw) |
158 | { |
159 | unsigned long _aidaw = (unsigned long) aidaw; |
160 | unsigned long bytes = ALIGN(_aidaw, PAGE_SIZE) - _aidaw; |
161 | |
162 | return (bytes / sizeof(*aidaw)) * PAGE_SIZE; |
163 | } |
164 | |
165 | struct aidaw *scm_aidaw_fetch(struct scm_request *scmrq, unsigned int bytes) |
166 | { |
167 | struct aidaw *aidaw; |
168 | |
169 | if (scm_aidaw_bytes(aidaw: scmrq->next_aidaw) >= bytes) |
170 | return scmrq->next_aidaw; |
171 | |
172 | aidaw = scm_aidaw_alloc(); |
173 | if (aidaw) |
174 | memset(aidaw, 0, PAGE_SIZE); |
175 | return aidaw; |
176 | } |
177 | |
178 | static int scm_request_prepare(struct scm_request *scmrq) |
179 | { |
180 | struct scm_blk_dev *bdev = scmrq->bdev; |
181 | struct scm_device *scmdev = bdev->gendisk->private_data; |
182 | int pos = scmrq->aob->request.msb_count; |
183 | struct msb *msb = &scmrq->aob->msb[pos]; |
184 | struct request *req = scmrq->request[pos]; |
185 | struct req_iterator iter; |
186 | struct aidaw *aidaw; |
187 | struct bio_vec bv; |
188 | |
189 | aidaw = scm_aidaw_fetch(scmrq, bytes: blk_rq_bytes(rq: req)); |
190 | if (!aidaw) |
191 | return -ENOMEM; |
192 | |
193 | msb->bs = MSB_BS_4K; |
194 | scmrq->aob->request.msb_count++; |
195 | msb->scm_addr = scmdev->address + ((u64) blk_rq_pos(rq: req) << 9); |
196 | msb->oc = (rq_data_dir(req) == READ) ? MSB_OC_READ : MSB_OC_WRITE; |
197 | msb->flags |= MSB_FLAG_IDA; |
198 | msb->data_addr = (u64) aidaw; |
199 | |
200 | rq_for_each_segment(bv, req, iter) { |
201 | WARN_ON(bv.bv_offset); |
202 | msb->blk_count += bv.bv_len >> 12; |
203 | aidaw->data_addr = (u64) page_address(bv.bv_page); |
204 | aidaw++; |
205 | } |
206 | |
207 | scmrq->next_aidaw = aidaw; |
208 | return 0; |
209 | } |
210 | |
211 | static inline void scm_request_set(struct scm_request *scmrq, |
212 | struct request *req) |
213 | { |
214 | scmrq->request[scmrq->aob->request.msb_count] = req; |
215 | } |
216 | |
217 | static inline void scm_request_init(struct scm_blk_dev *bdev, |
218 | struct scm_request *scmrq) |
219 | { |
220 | struct *aobrq = to_aobrq(scmrq); |
221 | struct aob *aob = scmrq->aob; |
222 | |
223 | memset(scmrq->request, 0, |
224 | nr_requests_per_io * sizeof(scmrq->request[0])); |
225 | memset(aob, 0, sizeof(*aob)); |
226 | aobrq->scmdev = bdev->scmdev; |
227 | aob->request.cmd_code = ARQB_CMD_MOVE; |
228 | aob->request.data = (u64) aobrq; |
229 | scmrq->bdev = bdev; |
230 | scmrq->retries = 4; |
231 | scmrq->error = BLK_STS_OK; |
232 | /* We don't use all msbs - place aidaws at the end of the aob page. */ |
233 | scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io]; |
234 | } |
235 | |
236 | static void scm_request_requeue(struct scm_request *scmrq) |
237 | { |
238 | struct scm_blk_dev *bdev = scmrq->bdev; |
239 | int i; |
240 | |
241 | for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) |
242 | blk_mq_requeue_request(rq: scmrq->request[i], kick_requeue_list: false); |
243 | |
244 | atomic_dec(v: &bdev->queued_reqs); |
245 | scm_request_done(scmrq); |
246 | blk_mq_kick_requeue_list(q: bdev->rq); |
247 | } |
248 | |
249 | static void scm_request_finish(struct scm_request *scmrq) |
250 | { |
251 | struct scm_blk_dev *bdev = scmrq->bdev; |
252 | blk_status_t *error; |
253 | int i; |
254 | |
255 | for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) { |
256 | error = blk_mq_rq_to_pdu(rq: scmrq->request[i]); |
257 | *error = scmrq->error; |
258 | if (likely(!blk_should_fake_timeout(scmrq->request[i]->q))) |
259 | blk_mq_complete_request(rq: scmrq->request[i]); |
260 | } |
261 | |
262 | atomic_dec(v: &bdev->queued_reqs); |
263 | scm_request_done(scmrq); |
264 | } |
265 | |
266 | static void scm_request_start(struct scm_request *scmrq) |
267 | { |
268 | struct scm_blk_dev *bdev = scmrq->bdev; |
269 | |
270 | atomic_inc(v: &bdev->queued_reqs); |
271 | if (eadm_start_aob(scmrq->aob)) { |
272 | SCM_LOG(5, "no subchannel" ); |
273 | scm_request_requeue(scmrq); |
274 | } |
275 | } |
276 | |
277 | struct scm_queue { |
278 | struct scm_request *scmrq; |
279 | spinlock_t lock; |
280 | }; |
281 | |
282 | static blk_status_t scm_blk_request(struct blk_mq_hw_ctx *hctx, |
283 | const struct blk_mq_queue_data *qd) |
284 | { |
285 | struct scm_device *scmdev = hctx->queue->queuedata; |
286 | struct scm_blk_dev *bdev = dev_get_drvdata(dev: &scmdev->dev); |
287 | struct scm_queue *sq = hctx->driver_data; |
288 | struct request *req = qd->rq; |
289 | struct scm_request *scmrq; |
290 | |
291 | spin_lock(lock: &sq->lock); |
292 | if (!scm_permit_request(bdev, req)) { |
293 | spin_unlock(lock: &sq->lock); |
294 | return BLK_STS_RESOURCE; |
295 | } |
296 | |
297 | scmrq = sq->scmrq; |
298 | if (!scmrq) { |
299 | scmrq = scm_request_fetch(); |
300 | if (!scmrq) { |
301 | SCM_LOG(5, "no request" ); |
302 | spin_unlock(lock: &sq->lock); |
303 | return BLK_STS_RESOURCE; |
304 | } |
305 | scm_request_init(bdev, scmrq); |
306 | sq->scmrq = scmrq; |
307 | } |
308 | scm_request_set(scmrq, req); |
309 | |
310 | if (scm_request_prepare(scmrq)) { |
311 | SCM_LOG(5, "aidaw alloc failed" ); |
312 | scm_request_set(scmrq, NULL); |
313 | |
314 | if (scmrq->aob->request.msb_count) |
315 | scm_request_start(scmrq); |
316 | |
317 | sq->scmrq = NULL; |
318 | spin_unlock(lock: &sq->lock); |
319 | return BLK_STS_RESOURCE; |
320 | } |
321 | blk_mq_start_request(rq: req); |
322 | |
323 | if (qd->last || scmrq->aob->request.msb_count == nr_requests_per_io) { |
324 | scm_request_start(scmrq); |
325 | sq->scmrq = NULL; |
326 | } |
327 | spin_unlock(lock: &sq->lock); |
328 | return BLK_STS_OK; |
329 | } |
330 | |
331 | static int scm_blk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, |
332 | unsigned int idx) |
333 | { |
334 | struct scm_queue *qd = kzalloc(size: sizeof(*qd), GFP_KERNEL); |
335 | |
336 | if (!qd) |
337 | return -ENOMEM; |
338 | |
339 | spin_lock_init(&qd->lock); |
340 | hctx->driver_data = qd; |
341 | |
342 | return 0; |
343 | } |
344 | |
345 | static void scm_blk_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int idx) |
346 | { |
347 | struct scm_queue *qd = hctx->driver_data; |
348 | |
349 | WARN_ON(qd->scmrq); |
350 | kfree(objp: hctx->driver_data); |
351 | hctx->driver_data = NULL; |
352 | } |
353 | |
354 | static void __scmrq_log_error(struct scm_request *scmrq) |
355 | { |
356 | struct aob *aob = scmrq->aob; |
357 | |
358 | if (scmrq->error == BLK_STS_TIMEOUT) |
359 | SCM_LOG(1, "Request timeout" ); |
360 | else { |
361 | SCM_LOG(1, "Request error" ); |
362 | SCM_LOG_HEX(level: 1, data: &aob->response, length: sizeof(aob->response)); |
363 | } |
364 | if (scmrq->retries) |
365 | SCM_LOG(1, "Retry request" ); |
366 | else |
367 | pr_err("An I/O operation to SCM failed with rc=%d\n" , |
368 | scmrq->error); |
369 | } |
370 | |
371 | static void scm_blk_handle_error(struct scm_request *scmrq) |
372 | { |
373 | struct scm_blk_dev *bdev = scmrq->bdev; |
374 | unsigned long flags; |
375 | |
376 | if (scmrq->error != BLK_STS_IOERR) |
377 | goto restart; |
378 | |
379 | /* For -EIO the response block is valid. */ |
380 | switch (scmrq->aob->response.eqc) { |
381 | case EQC_WR_PROHIBIT: |
382 | spin_lock_irqsave(&bdev->lock, flags); |
383 | if (bdev->state != SCM_WR_PROHIBIT) |
384 | pr_info("%lx: Write access to the SCM increment is suspended\n" , |
385 | (unsigned long) bdev->scmdev->address); |
386 | bdev->state = SCM_WR_PROHIBIT; |
387 | spin_unlock_irqrestore(lock: &bdev->lock, flags); |
388 | goto requeue; |
389 | default: |
390 | break; |
391 | } |
392 | |
393 | restart: |
394 | if (!eadm_start_aob(scmrq->aob)) |
395 | return; |
396 | |
397 | requeue: |
398 | scm_request_requeue(scmrq); |
399 | } |
400 | |
401 | void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error) |
402 | { |
403 | struct scm_request *scmrq = data; |
404 | |
405 | scmrq->error = error; |
406 | if (error) { |
407 | __scmrq_log_error(scmrq); |
408 | if (scmrq->retries-- > 0) { |
409 | scm_blk_handle_error(scmrq); |
410 | return; |
411 | } |
412 | } |
413 | |
414 | scm_request_finish(scmrq); |
415 | } |
416 | |
417 | static void scm_blk_request_done(struct request *req) |
418 | { |
419 | blk_status_t *error = blk_mq_rq_to_pdu(rq: req); |
420 | |
421 | blk_mq_end_request(rq: req, error: *error); |
422 | } |
423 | |
424 | static const struct block_device_operations scm_blk_devops = { |
425 | .owner = THIS_MODULE, |
426 | }; |
427 | |
428 | static const struct blk_mq_ops scm_mq_ops = { |
429 | .queue_rq = scm_blk_request, |
430 | .complete = scm_blk_request_done, |
431 | .init_hctx = scm_blk_init_hctx, |
432 | .exit_hctx = scm_blk_exit_hctx, |
433 | }; |
434 | |
435 | int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) |
436 | { |
437 | unsigned int devindex, nr_max_blk; |
438 | struct request_queue *rq; |
439 | int len, ret; |
440 | |
441 | devindex = atomic_inc_return(v: &nr_devices) - 1; |
442 | /* scma..scmz + scmaa..scmzz */ |
443 | if (devindex > 701) { |
444 | ret = -ENODEV; |
445 | goto out; |
446 | } |
447 | |
448 | bdev->scmdev = scmdev; |
449 | bdev->state = SCM_OPER; |
450 | spin_lock_init(&bdev->lock); |
451 | atomic_set(v: &bdev->queued_reqs, i: 0); |
452 | |
453 | bdev->tag_set.ops = &scm_mq_ops; |
454 | bdev->tag_set.cmd_size = sizeof(blk_status_t); |
455 | bdev->tag_set.nr_hw_queues = nr_requests; |
456 | bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests; |
457 | bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; |
458 | bdev->tag_set.numa_node = NUMA_NO_NODE; |
459 | |
460 | ret = blk_mq_alloc_tag_set(set: &bdev->tag_set); |
461 | if (ret) |
462 | goto out; |
463 | |
464 | bdev->gendisk = blk_mq_alloc_disk(&bdev->tag_set, scmdev); |
465 | if (IS_ERR(ptr: bdev->gendisk)) { |
466 | ret = PTR_ERR(ptr: bdev->gendisk); |
467 | goto out_tag; |
468 | } |
469 | rq = bdev->rq = bdev->gendisk->queue; |
470 | nr_max_blk = min(scmdev->nr_max_block, |
471 | (unsigned int) (PAGE_SIZE / sizeof(struct aidaw))); |
472 | |
473 | blk_queue_logical_block_size(rq, 1 << 12); |
474 | blk_queue_max_hw_sectors(rq, nr_max_blk << 3); /* 8 * 512 = blk_size */ |
475 | blk_queue_max_segments(rq, nr_max_blk); |
476 | blk_queue_flag_set(QUEUE_FLAG_NONROT, q: rq); |
477 | blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q: rq); |
478 | |
479 | bdev->gendisk->private_data = scmdev; |
480 | bdev->gendisk->fops = &scm_blk_devops; |
481 | bdev->gendisk->major = scm_major; |
482 | bdev->gendisk->first_minor = devindex * SCM_NR_PARTS; |
483 | bdev->gendisk->minors = SCM_NR_PARTS; |
484 | |
485 | len = snprintf(buf: bdev->gendisk->disk_name, DISK_NAME_LEN, fmt: "scm" ); |
486 | if (devindex > 25) { |
487 | len += snprintf(buf: bdev->gendisk->disk_name + len, |
488 | DISK_NAME_LEN - len, fmt: "%c" , |
489 | 'a' + (devindex / 26) - 1); |
490 | devindex = devindex % 26; |
491 | } |
492 | snprintf(buf: bdev->gendisk->disk_name + len, DISK_NAME_LEN - len, fmt: "%c" , |
493 | 'a' + devindex); |
494 | |
495 | /* 512 byte sectors */ |
496 | set_capacity(disk: bdev->gendisk, size: scmdev->size >> 9); |
497 | ret = device_add_disk(parent: &scmdev->dev, disk: bdev->gendisk, NULL); |
498 | if (ret) |
499 | goto out_cleanup_disk; |
500 | |
501 | return 0; |
502 | |
503 | out_cleanup_disk: |
504 | put_disk(disk: bdev->gendisk); |
505 | out_tag: |
506 | blk_mq_free_tag_set(set: &bdev->tag_set); |
507 | out: |
508 | atomic_dec(v: &nr_devices); |
509 | return ret; |
510 | } |
511 | |
512 | void scm_blk_dev_cleanup(struct scm_blk_dev *bdev) |
513 | { |
514 | del_gendisk(gp: bdev->gendisk); |
515 | put_disk(disk: bdev->gendisk); |
516 | blk_mq_free_tag_set(set: &bdev->tag_set); |
517 | } |
518 | |
519 | void scm_blk_set_available(struct scm_blk_dev *bdev) |
520 | { |
521 | unsigned long flags; |
522 | |
523 | spin_lock_irqsave(&bdev->lock, flags); |
524 | if (bdev->state == SCM_WR_PROHIBIT) |
525 | pr_info("%lx: Write access to the SCM increment is restored\n" , |
526 | (unsigned long) bdev->scmdev->address); |
527 | bdev->state = SCM_OPER; |
528 | spin_unlock_irqrestore(lock: &bdev->lock, flags); |
529 | } |
530 | |
531 | static bool __init scm_blk_params_valid(void) |
532 | { |
533 | if (!nr_requests_per_io || nr_requests_per_io > 64) |
534 | return false; |
535 | |
536 | return true; |
537 | } |
538 | |
539 | static int __init scm_blk_init(void) |
540 | { |
541 | int ret = -EINVAL; |
542 | |
543 | if (!scm_blk_params_valid()) |
544 | goto out; |
545 | |
546 | ret = register_blkdev(0, "scm" ); |
547 | if (ret < 0) |
548 | goto out; |
549 | |
550 | scm_major = ret; |
551 | ret = scm_alloc_rqs(nrqs: nr_requests); |
552 | if (ret) |
553 | goto out_free; |
554 | |
555 | scm_debug = debug_register("scm_log" , 16, 1, 16); |
556 | if (!scm_debug) { |
557 | ret = -ENOMEM; |
558 | goto out_free; |
559 | } |
560 | |
561 | debug_register_view(scm_debug, &debug_hex_ascii_view); |
562 | debug_set_level(scm_debug, 2); |
563 | |
564 | ret = scm_drv_init(); |
565 | if (ret) |
566 | goto out_dbf; |
567 | |
568 | return ret; |
569 | |
570 | out_dbf: |
571 | debug_unregister(scm_debug); |
572 | out_free: |
573 | scm_free_rqs(); |
574 | unregister_blkdev(major: scm_major, name: "scm" ); |
575 | out: |
576 | return ret; |
577 | } |
578 | module_init(scm_blk_init); |
579 | |
580 | static void __exit scm_blk_cleanup(void) |
581 | { |
582 | scm_drv_cleanup(); |
583 | debug_unregister(scm_debug); |
584 | scm_free_rqs(); |
585 | unregister_blkdev(major: scm_major, name: "scm" ); |
586 | } |
587 | module_exit(scm_blk_cleanup); |
588 | |