1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | #define pr_fmt(fmt) "papr-scm: " fmt |
4 | |
5 | #include <linux/of.h> |
6 | #include <linux/kernel.h> |
7 | #include <linux/module.h> |
8 | #include <linux/ioport.h> |
9 | #include <linux/slab.h> |
10 | #include <linux/ndctl.h> |
11 | #include <linux/sched.h> |
12 | #include <linux/libnvdimm.h> |
13 | #include <linux/platform_device.h> |
14 | #include <linux/delay.h> |
15 | #include <linux/seq_buf.h> |
16 | #include <linux/nd.h> |
17 | |
18 | #include <asm/plpar_wrappers.h> |
19 | #include <asm/papr_pdsm.h> |
20 | #include <asm/mce.h> |
21 | #include <asm/unaligned.h> |
22 | #include <linux/perf_event.h> |
23 | |
24 | #define BIND_ANY_ADDR (~0ul) |
25 | |
26 | #define PAPR_SCM_DIMM_CMD_MASK \ |
27 | ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ |
28 | (1ul << ND_CMD_GET_CONFIG_DATA) | \ |
29 | (1ul << ND_CMD_SET_CONFIG_DATA) | \ |
30 | (1ul << ND_CMD_CALL)) |
31 | |
32 | /* DIMM health bitmap indicators */ |
33 | /* SCM device is unable to persist memory contents */ |
34 | #define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) |
35 | /* SCM device failed to persist memory contents */ |
36 | #define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) |
37 | /* SCM device contents are persisted from previous IPL */ |
38 | #define PAPR_PMEM_SHUTDOWN_CLEAN (1ULL << (63 - 2)) |
39 | /* SCM device contents are not persisted from previous IPL */ |
40 | #define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) |
41 | /* SCM device memory life remaining is critically low */ |
42 | #define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) |
43 | /* SCM device will be garded off next IPL due to failure */ |
44 | #define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) |
45 | /* SCM contents cannot persist due to current platform health status */ |
46 | #define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) |
47 | /* SCM device is unable to persist memory contents in certain conditions */ |
48 | #define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7)) |
49 | /* SCM device is encrypted */ |
50 | #define PAPR_PMEM_ENCRYPTED (1ULL << (63 - 8)) |
51 | /* SCM device has been scrubbed and locked */ |
52 | #define PAPR_PMEM_SCRUBBED_AND_LOCKED (1ULL << (63 - 9)) |
53 | |
54 | /* Bits status indicators for health bitmap indicating unarmed dimm */ |
55 | #define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ |
56 | PAPR_PMEM_HEALTH_UNHEALTHY) |
57 | |
58 | /* Bits status indicators for health bitmap indicating unflushed dimm */ |
59 | #define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) |
60 | |
61 | /* Bits status indicators for health bitmap indicating unrestored dimm */ |
62 | #define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) |
63 | |
64 | /* Bit status indicators for smart event notification */ |
65 | #define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ |
66 | PAPR_PMEM_HEALTH_FATAL | \ |
67 | PAPR_PMEM_HEALTH_UNHEALTHY) |
68 | |
69 | #define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS) |
70 | #define PAPR_SCM_PERF_STATS_VERSION 0x1 |
71 | |
72 | /* Struct holding a single performance metric */ |
73 | struct papr_scm_perf_stat { |
74 | u8 stat_id[8]; |
75 | __be64 stat_val; |
76 | } __packed; |
77 | |
78 | /* Struct exchanged between kernel and PHYP for fetching drc perf stats */ |
79 | struct papr_scm_perf_stats { |
80 | u8 eye_catcher[8]; |
81 | /* Should be PAPR_SCM_PERF_STATS_VERSION */ |
82 | __be32 stats_version; |
83 | /* Number of stats following */ |
84 | __be32 num_statistics; |
85 | /* zero or more performance matrics */ |
86 | struct papr_scm_perf_stat scm_statistic[]; |
87 | } __packed; |
88 | |
89 | /* private struct associated with each region */ |
90 | struct papr_scm_priv { |
91 | struct platform_device *pdev; |
92 | struct device_node *dn; |
93 | uint32_t drc_index; |
94 | uint64_t blocks; |
95 | uint64_t block_size; |
96 | int metadata_size; |
97 | bool is_volatile; |
98 | bool hcall_flush_required; |
99 | |
100 | uint64_t bound_addr; |
101 | |
102 | struct nvdimm_bus_descriptor bus_desc; |
103 | struct nvdimm_bus *bus; |
104 | struct nvdimm *nvdimm; |
105 | struct resource res; |
106 | struct nd_region *region; |
107 | struct nd_interleave_set nd_set; |
108 | struct list_head region_list; |
109 | |
110 | /* Protect dimm health data from concurrent read/writes */ |
111 | struct mutex health_mutex; |
112 | |
113 | /* Last time the health information of the dimm was updated */ |
114 | unsigned long lasthealth_jiffies; |
115 | |
116 | /* Health information for the dimm */ |
117 | u64 health_bitmap; |
118 | |
119 | /* Holds the last known dirty shutdown counter value */ |
120 | u64 dirty_shutdown_counter; |
121 | |
122 | /* length of the stat buffer as expected by phyp */ |
123 | size_t stat_buffer_len; |
124 | |
125 | /* The bits which needs to be overridden */ |
126 | u64 health_bitmap_inject_mask; |
127 | }; |
128 | |
129 | static int papr_scm_pmem_flush(struct nd_region *nd_region, |
130 | struct bio *bio __maybe_unused) |
131 | { |
132 | struct papr_scm_priv *p = nd_region_provider_data(nd_region); |
133 | unsigned long ret_buf[PLPAR_HCALL_BUFSIZE], token = 0; |
134 | long rc; |
135 | |
136 | dev_dbg(&p->pdev->dev, "flush drc 0x%x" , p->drc_index); |
137 | |
138 | do { |
139 | rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token); |
140 | token = ret_buf[0]; |
141 | |
142 | /* Check if we are stalled for some time */ |
143 | if (H_IS_LONG_BUSY(rc)) { |
144 | msleep(get_longbusy_msecs(rc)); |
145 | rc = H_BUSY; |
146 | } else if (rc == H_BUSY) { |
147 | cond_resched(); |
148 | } |
149 | } while (rc == H_BUSY); |
150 | |
151 | if (rc) { |
152 | dev_err(&p->pdev->dev, "flush error: %ld" , rc); |
153 | rc = -EIO; |
154 | } else { |
155 | dev_dbg(&p->pdev->dev, "flush drc 0x%x complete" , p->drc_index); |
156 | } |
157 | |
158 | return rc; |
159 | } |
160 | |
161 | static LIST_HEAD(papr_nd_regions); |
162 | static DEFINE_MUTEX(papr_ndr_lock); |
163 | |
164 | static int drc_pmem_bind(struct papr_scm_priv *p) |
165 | { |
166 | unsigned long ret[PLPAR_HCALL_BUFSIZE]; |
167 | uint64_t saved = 0; |
168 | uint64_t token; |
169 | int64_t rc; |
170 | |
171 | /* |
172 | * When the hypervisor cannot map all the requested memory in a single |
173 | * hcall it returns H_BUSY and we call again with the token until |
174 | * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS |
175 | * leave the system in an undefined state, so we wait. |
176 | */ |
177 | token = 0; |
178 | |
179 | do { |
180 | rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0, |
181 | p->blocks, BIND_ANY_ADDR, token); |
182 | token = ret[0]; |
183 | if (!saved) |
184 | saved = ret[1]; |
185 | cond_resched(); |
186 | } while (rc == H_BUSY); |
187 | |
188 | if (rc) |
189 | return rc; |
190 | |
191 | p->bound_addr = saved; |
192 | dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n" , |
193 | p->drc_index, (unsigned long)saved); |
194 | return rc; |
195 | } |
196 | |
197 | static void drc_pmem_unbind(struct papr_scm_priv *p) |
198 | { |
199 | unsigned long ret[PLPAR_HCALL_BUFSIZE]; |
200 | uint64_t token = 0; |
201 | int64_t rc; |
202 | |
203 | dev_dbg(&p->pdev->dev, "unbind drc 0x%x\n" , p->drc_index); |
204 | |
205 | /* NB: unbind has the same retry requirements as drc_pmem_bind() */ |
206 | do { |
207 | |
208 | /* Unbind of all SCM resources associated with drcIndex */ |
209 | rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC, |
210 | p->drc_index, token); |
211 | token = ret[0]; |
212 | |
213 | /* Check if we are stalled for some time */ |
214 | if (H_IS_LONG_BUSY(rc)) { |
215 | msleep(get_longbusy_msecs(rc)); |
216 | rc = H_BUSY; |
217 | } else if (rc == H_BUSY) { |
218 | cond_resched(); |
219 | } |
220 | |
221 | } while (rc == H_BUSY); |
222 | |
223 | if (rc) |
224 | dev_err(&p->pdev->dev, "unbind error: %lld\n" , rc); |
225 | else |
226 | dev_dbg(&p->pdev->dev, "unbind drc 0x%x complete\n" , |
227 | p->drc_index); |
228 | |
229 | return; |
230 | } |
231 | |
232 | static int drc_pmem_query_n_bind(struct papr_scm_priv *p) |
233 | { |
234 | unsigned long start_addr; |
235 | unsigned long end_addr; |
236 | unsigned long ret[PLPAR_HCALL_BUFSIZE]; |
237 | int64_t rc; |
238 | |
239 | |
240 | rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret, |
241 | p->drc_index, 0); |
242 | if (rc) |
243 | goto err_out; |
244 | start_addr = ret[0]; |
245 | |
246 | /* Make sure the full region is bound. */ |
247 | rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret, |
248 | p->drc_index, p->blocks - 1); |
249 | if (rc) |
250 | goto err_out; |
251 | end_addr = ret[0]; |
252 | |
253 | if ((end_addr - start_addr) != ((p->blocks - 1) * p->block_size)) |
254 | goto err_out; |
255 | |
256 | p->bound_addr = start_addr; |
257 | dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n" , p->drc_index, start_addr); |
258 | return rc; |
259 | |
260 | err_out: |
261 | dev_info(&p->pdev->dev, |
262 | "Failed to query, trying an unbind followed by bind" ); |
263 | drc_pmem_unbind(p); |
264 | return drc_pmem_bind(p); |
265 | } |
266 | |
267 | /* |
268 | * Query the Dimm performance stats from PHYP and copy them (if returned) to |
269 | * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast |
270 | * (num_stats + header) bytes. |
271 | * - If buff_stats == NULL the return value is the size in bytes of the buffer |
272 | * needed to hold all supported performance-statistics. |
273 | * - If buff_stats != NULL and num_stats == 0 then we copy all known |
274 | * performance-statistics to 'buff_stat' and expect to be large enough to |
275 | * hold them. |
276 | * - if buff_stats != NULL and num_stats > 0 then copy the requested |
277 | * performance-statistics to buff_stats. |
278 | */ |
279 | static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, |
280 | struct papr_scm_perf_stats *buff_stats, |
281 | unsigned int num_stats) |
282 | { |
283 | unsigned long ret[PLPAR_HCALL_BUFSIZE]; |
284 | size_t size; |
285 | s64 rc; |
286 | |
287 | /* Setup the out buffer */ |
288 | if (buff_stats) { |
289 | memcpy(buff_stats->eye_catcher, |
290 | PAPR_SCM_PERF_STATS_EYECATCHER, 8); |
291 | buff_stats->stats_version = |
292 | cpu_to_be32(PAPR_SCM_PERF_STATS_VERSION); |
293 | buff_stats->num_statistics = |
294 | cpu_to_be32(num_stats); |
295 | |
296 | /* |
297 | * Calculate the buffer size based on num-stats provided |
298 | * or use the prefetched max buffer length |
299 | */ |
300 | if (num_stats) |
301 | /* Calculate size from the num_stats */ |
302 | size = sizeof(struct papr_scm_perf_stats) + |
303 | num_stats * sizeof(struct papr_scm_perf_stat); |
304 | else |
305 | size = p->stat_buffer_len; |
306 | } else { |
307 | /* In case of no out buffer ignore the size */ |
308 | size = 0; |
309 | } |
310 | |
311 | /* Do the HCALL asking PHYP for info */ |
312 | rc = plpar_hcall(H_SCM_PERFORMANCE_STATS, ret, p->drc_index, |
313 | buff_stats ? virt_to_phys(buff_stats) : 0, |
314 | size); |
315 | |
316 | /* Check if the error was due to an unknown stat-id */ |
317 | if (rc == H_PARTIAL) { |
318 | dev_err(&p->pdev->dev, |
319 | "Unknown performance stats, Err:0x%016lX\n" , ret[0]); |
320 | return -ENOENT; |
321 | } else if (rc == H_AUTHORITY) { |
322 | dev_info(&p->pdev->dev, |
323 | "Permission denied while accessing performance stats" ); |
324 | return -EPERM; |
325 | } else if (rc == H_UNSUPPORTED) { |
326 | dev_dbg(&p->pdev->dev, "Performance stats unsupported\n" ); |
327 | return -EOPNOTSUPP; |
328 | } else if (rc != H_SUCCESS) { |
329 | dev_err(&p->pdev->dev, |
330 | "Failed to query performance stats, Err:%lld\n" , rc); |
331 | return -EIO; |
332 | |
333 | } else if (!size) { |
334 | /* Handle case where stat buffer size was requested */ |
335 | dev_dbg(&p->pdev->dev, |
336 | "Performance stats size %ld\n" , ret[0]); |
337 | return ret[0]; |
338 | } |
339 | |
340 | /* Successfully fetched the requested stats from phyp */ |
341 | dev_dbg(&p->pdev->dev, |
342 | "Performance stats returned %d stats\n" , |
343 | be32_to_cpu(buff_stats->num_statistics)); |
344 | return 0; |
345 | } |
346 | |
347 | #ifdef CONFIG_PERF_EVENTS |
348 | #define to_nvdimm_pmu(_pmu) container_of(_pmu, struct nvdimm_pmu, pmu) |
349 | |
350 | static const char * const nvdimm_events_map[] = { |
351 | [1] = "CtlResCt" , |
352 | [2] = "CtlResTm" , |
353 | [3] = "PonSecs " , |
354 | [4] = "MemLife " , |
355 | [5] = "CritRscU" , |
356 | [6] = "HostLCnt" , |
357 | [7] = "HostSCnt" , |
358 | [8] = "HostSDur" , |
359 | [9] = "HostLDur" , |
360 | [10] = "MedRCnt " , |
361 | [11] = "MedWCnt " , |
362 | [12] = "MedRDur " , |
363 | [13] = "MedWDur " , |
364 | [14] = "CchRHCnt" , |
365 | [15] = "CchWHCnt" , |
366 | [16] = "FastWCnt" , |
367 | }; |
368 | |
369 | static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count) |
370 | { |
371 | struct papr_scm_perf_stat *stat; |
372 | struct papr_scm_perf_stats *stats; |
373 | struct papr_scm_priv *p = dev_get_drvdata(dev); |
374 | int rc, size; |
375 | |
376 | /* Invalid eventcode */ |
377 | if (event->attr.config == 0 || event->attr.config >= ARRAY_SIZE(nvdimm_events_map)) |
378 | return -EINVAL; |
379 | |
380 | /* Allocate request buffer enough to hold single performance stat */ |
381 | size = sizeof(struct papr_scm_perf_stats) + |
382 | sizeof(struct papr_scm_perf_stat); |
383 | |
384 | if (!p) |
385 | return -EINVAL; |
386 | |
387 | stats = kzalloc(size, GFP_KERNEL); |
388 | if (!stats) |
389 | return -ENOMEM; |
390 | |
391 | stat = &stats->scm_statistic[0]; |
392 | memcpy(&stat->stat_id, |
393 | nvdimm_events_map[event->attr.config], |
394 | sizeof(stat->stat_id)); |
395 | stat->stat_val = 0; |
396 | |
397 | rc = drc_pmem_query_stats(p, buff_stats: stats, num_stats: 1); |
398 | if (rc < 0) { |
399 | kfree(objp: stats); |
400 | return rc; |
401 | } |
402 | |
403 | *count = be64_to_cpu(stat->stat_val); |
404 | kfree(objp: stats); |
405 | return 0; |
406 | } |
407 | |
408 | static int papr_scm_pmu_event_init(struct perf_event *event) |
409 | { |
410 | struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); |
411 | struct papr_scm_priv *p; |
412 | |
413 | if (!nd_pmu) |
414 | return -EINVAL; |
415 | |
416 | /* test the event attr type for PMU enumeration */ |
417 | if (event->attr.type != event->pmu->type) |
418 | return -ENOENT; |
419 | |
420 | /* it does not support event sampling mode */ |
421 | if (is_sampling_event(event)) |
422 | return -EOPNOTSUPP; |
423 | |
424 | /* no branch sampling */ |
425 | if (has_branch_stack(event)) |
426 | return -EOPNOTSUPP; |
427 | |
428 | p = (struct papr_scm_priv *)nd_pmu->dev->driver_data; |
429 | if (!p) |
430 | return -EINVAL; |
431 | |
432 | /* Invalid eventcode */ |
433 | if (event->attr.config == 0 || event->attr.config > 16) |
434 | return -EINVAL; |
435 | |
436 | return 0; |
437 | } |
438 | |
439 | static int papr_scm_pmu_add(struct perf_event *event, int flags) |
440 | { |
441 | u64 count; |
442 | int rc; |
443 | struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); |
444 | |
445 | if (!nd_pmu) |
446 | return -EINVAL; |
447 | |
448 | if (flags & PERF_EF_START) { |
449 | rc = papr_scm_pmu_get_value(event, dev: nd_pmu->dev, count: &count); |
450 | if (rc) |
451 | return rc; |
452 | |
453 | local64_set(&event->hw.prev_count, count); |
454 | } |
455 | |
456 | return 0; |
457 | } |
458 | |
459 | static void papr_scm_pmu_read(struct perf_event *event) |
460 | { |
461 | u64 prev, now; |
462 | int rc; |
463 | struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); |
464 | |
465 | if (!nd_pmu) |
466 | return; |
467 | |
468 | rc = papr_scm_pmu_get_value(event, dev: nd_pmu->dev, count: &now); |
469 | if (rc) |
470 | return; |
471 | |
472 | prev = local64_xchg(&event->hw.prev_count, now); |
473 | local64_add(now - prev, &event->count); |
474 | } |
475 | |
476 | static void papr_scm_pmu_del(struct perf_event *event, int flags) |
477 | { |
478 | papr_scm_pmu_read(event); |
479 | } |
480 | |
481 | static void papr_scm_pmu_register(struct papr_scm_priv *p) |
482 | { |
483 | struct nvdimm_pmu *nd_pmu; |
484 | int rc, nodeid; |
485 | |
486 | nd_pmu = kzalloc(size: sizeof(*nd_pmu), GFP_KERNEL); |
487 | if (!nd_pmu) { |
488 | rc = -ENOMEM; |
489 | goto pmu_err_print; |
490 | } |
491 | |
492 | if (!p->stat_buffer_len) { |
493 | rc = -ENOENT; |
494 | goto pmu_check_events_err; |
495 | } |
496 | |
497 | nd_pmu->pmu.task_ctx_nr = perf_invalid_context; |
498 | nd_pmu->pmu.name = nvdimm_name(nvdimm: p->nvdimm); |
499 | nd_pmu->pmu.event_init = papr_scm_pmu_event_init; |
500 | nd_pmu->pmu.read = papr_scm_pmu_read; |
501 | nd_pmu->pmu.add = papr_scm_pmu_add; |
502 | nd_pmu->pmu.del = papr_scm_pmu_del; |
503 | |
504 | nd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_INTERRUPT | |
505 | PERF_PMU_CAP_NO_EXCLUDE; |
506 | |
507 | /*updating the cpumask variable */ |
508 | nodeid = numa_map_to_online_node(dev_to_node(&p->pdev->dev)); |
509 | nd_pmu->arch_cpumask = *cpumask_of_node(node: nodeid); |
510 | |
511 | rc = register_nvdimm_pmu(nvdimm: nd_pmu, pdev: p->pdev); |
512 | if (rc) |
513 | goto pmu_check_events_err; |
514 | |
515 | /* |
516 | * Set archdata.priv value to nvdimm_pmu structure, to handle the |
517 | * unregistering of pmu device. |
518 | */ |
519 | p->pdev->archdata.priv = nd_pmu; |
520 | return; |
521 | |
522 | pmu_check_events_err: |
523 | kfree(objp: nd_pmu); |
524 | pmu_err_print: |
525 | dev_info(&p->pdev->dev, "nvdimm pmu didn't register rc=%d\n" , rc); |
526 | } |
527 | |
528 | #else |
529 | static void papr_scm_pmu_register(struct papr_scm_priv *p) { } |
530 | #endif |
531 | |
532 | /* |
533 | * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the |
534 | * health information. |
535 | */ |
536 | static int __drc_pmem_query_health(struct papr_scm_priv *p) |
537 | { |
538 | unsigned long ret[PLPAR_HCALL_BUFSIZE]; |
539 | u64 bitmap = 0; |
540 | long rc; |
541 | |
542 | /* issue the hcall */ |
543 | rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); |
544 | if (rc == H_SUCCESS) |
545 | bitmap = ret[0] & ret[1]; |
546 | else if (rc == H_FUNCTION) |
547 | dev_info_once(&p->pdev->dev, |
548 | "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap" ); |
549 | else { |
550 | |
551 | dev_err(&p->pdev->dev, |
552 | "Failed to query health information, Err:%ld\n" , rc); |
553 | return -ENXIO; |
554 | } |
555 | |
556 | p->lasthealth_jiffies = jiffies; |
557 | /* Allow injecting specific health bits via inject mask. */ |
558 | if (p->health_bitmap_inject_mask) |
559 | bitmap = (bitmap & ~p->health_bitmap_inject_mask) | |
560 | p->health_bitmap_inject_mask; |
561 | WRITE_ONCE(p->health_bitmap, bitmap); |
562 | dev_dbg(&p->pdev->dev, |
563 | "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n" , |
564 | ret[0], ret[1]); |
565 | |
566 | return 0; |
567 | } |
568 | |
569 | /* Min interval in seconds for assuming stable dimm health */ |
570 | #define MIN_HEALTH_QUERY_INTERVAL 60 |
571 | |
572 | /* Query cached health info and if needed call drc_pmem_query_health */ |
573 | static int drc_pmem_query_health(struct papr_scm_priv *p) |
574 | { |
575 | unsigned long cache_timeout; |
576 | int rc; |
577 | |
578 | /* Protect concurrent modifications to papr_scm_priv */ |
579 | rc = mutex_lock_interruptible(&p->health_mutex); |
580 | if (rc) |
581 | return rc; |
582 | |
583 | /* Jiffies offset for which the health data is assumed to be same */ |
584 | cache_timeout = p->lasthealth_jiffies + |
585 | msecs_to_jiffies(MIN_HEALTH_QUERY_INTERVAL * 1000); |
586 | |
587 | /* Fetch new health info is its older than MIN_HEALTH_QUERY_INTERVAL */ |
588 | if (time_after(jiffies, cache_timeout)) |
589 | rc = __drc_pmem_query_health(p); |
590 | else |
591 | /* Assume cached health data is valid */ |
592 | rc = 0; |
593 | |
594 | mutex_unlock(lock: &p->health_mutex); |
595 | return rc; |
596 | } |
597 | |
598 | static int papr_scm_meta_get(struct papr_scm_priv *p, |
599 | struct nd_cmd_get_config_data_hdr *hdr) |
600 | { |
601 | unsigned long data[PLPAR_HCALL_BUFSIZE]; |
602 | unsigned long offset, data_offset; |
603 | int len, read; |
604 | int64_t ret; |
605 | |
606 | if ((hdr->in_offset + hdr->in_length) > p->metadata_size) |
607 | return -EINVAL; |
608 | |
609 | for (len = hdr->in_length; len; len -= read) { |
610 | |
611 | data_offset = hdr->in_length - len; |
612 | offset = hdr->in_offset + data_offset; |
613 | |
614 | if (len >= 8) |
615 | read = 8; |
616 | else if (len >= 4) |
617 | read = 4; |
618 | else if (len >= 2) |
619 | read = 2; |
620 | else |
621 | read = 1; |
622 | |
623 | ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index, |
624 | offset, read); |
625 | |
626 | if (ret == H_PARAMETER) /* bad DRC index */ |
627 | return -ENODEV; |
628 | if (ret) |
629 | return -EINVAL; /* other invalid parameter */ |
630 | |
631 | switch (read) { |
632 | case 8: |
633 | *(uint64_t *)(hdr->out_buf + data_offset) = be64_to_cpu(data[0]); |
634 | break; |
635 | case 4: |
636 | *(uint32_t *)(hdr->out_buf + data_offset) = be32_to_cpu(data[0] & 0xffffffff); |
637 | break; |
638 | |
639 | case 2: |
640 | *(uint16_t *)(hdr->out_buf + data_offset) = be16_to_cpu(data[0] & 0xffff); |
641 | break; |
642 | |
643 | case 1: |
644 | *(uint8_t *)(hdr->out_buf + data_offset) = (data[0] & 0xff); |
645 | break; |
646 | } |
647 | } |
648 | return 0; |
649 | } |
650 | |
651 | static int papr_scm_meta_set(struct papr_scm_priv *p, |
652 | struct nd_cmd_set_config_hdr *hdr) |
653 | { |
654 | unsigned long offset, data_offset; |
655 | int len, wrote; |
656 | unsigned long data; |
657 | __be64 data_be; |
658 | int64_t ret; |
659 | |
660 | if ((hdr->in_offset + hdr->in_length) > p->metadata_size) |
661 | return -EINVAL; |
662 | |
663 | for (len = hdr->in_length; len; len -= wrote) { |
664 | |
665 | data_offset = hdr->in_length - len; |
666 | offset = hdr->in_offset + data_offset; |
667 | |
668 | if (len >= 8) { |
669 | data = *(uint64_t *)(hdr->in_buf + data_offset); |
670 | data_be = cpu_to_be64(data); |
671 | wrote = 8; |
672 | } else if (len >= 4) { |
673 | data = *(uint32_t *)(hdr->in_buf + data_offset); |
674 | data &= 0xffffffff; |
675 | data_be = cpu_to_be32(data); |
676 | wrote = 4; |
677 | } else if (len >= 2) { |
678 | data = *(uint16_t *)(hdr->in_buf + data_offset); |
679 | data &= 0xffff; |
680 | data_be = cpu_to_be16(data); |
681 | wrote = 2; |
682 | } else { |
683 | data_be = *(uint8_t *)(hdr->in_buf + data_offset); |
684 | data_be &= 0xff; |
685 | wrote = 1; |
686 | } |
687 | |
688 | ret = plpar_hcall_norets(H_SCM_WRITE_METADATA, p->drc_index, |
689 | offset, data_be, wrote); |
690 | if (ret == H_PARAMETER) /* bad DRC index */ |
691 | return -ENODEV; |
692 | if (ret) |
693 | return -EINVAL; /* other invalid parameter */ |
694 | } |
695 | |
696 | return 0; |
697 | } |
698 | |
699 | /* |
700 | * Do a sanity checks on the inputs args to dimm-control function and return |
701 | * '0' if valid. Validation of PDSM payloads happens later in |
702 | * papr_scm_service_pdsm. |
703 | */ |
704 | static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, |
705 | unsigned int buf_len) |
706 | { |
707 | unsigned long cmd_mask = PAPR_SCM_DIMM_CMD_MASK; |
708 | struct nd_cmd_pkg *nd_cmd; |
709 | struct papr_scm_priv *p; |
710 | enum papr_pdsm pdsm; |
711 | |
712 | /* Only dimm-specific calls are supported atm */ |
713 | if (!nvdimm) |
714 | return -EINVAL; |
715 | |
716 | /* get the provider data from struct nvdimm */ |
717 | p = nvdimm_provider_data(nvdimm); |
718 | |
719 | if (!test_bit(cmd, &cmd_mask)) { |
720 | dev_dbg(&p->pdev->dev, "Unsupported cmd=%u\n" , cmd); |
721 | return -EINVAL; |
722 | } |
723 | |
724 | /* For CMD_CALL verify pdsm request */ |
725 | if (cmd == ND_CMD_CALL) { |
726 | /* Verify the envelope and envelop size */ |
727 | if (!buf || |
728 | buf_len < (sizeof(struct nd_cmd_pkg) + ND_PDSM_HDR_SIZE)) { |
729 | dev_dbg(&p->pdev->dev, "Invalid pkg size=%u\n" , |
730 | buf_len); |
731 | return -EINVAL; |
732 | } |
733 | |
734 | /* Verify that the nd_cmd_pkg.nd_family is correct */ |
735 | nd_cmd = (struct nd_cmd_pkg *)buf; |
736 | |
737 | if (nd_cmd->nd_family != NVDIMM_FAMILY_PAPR) { |
738 | dev_dbg(&p->pdev->dev, "Invalid pkg family=0x%llx\n" , |
739 | nd_cmd->nd_family); |
740 | return -EINVAL; |
741 | } |
742 | |
743 | pdsm = (enum papr_pdsm)nd_cmd->nd_command; |
744 | |
745 | /* Verify if the pdsm command is valid */ |
746 | if (pdsm <= PAPR_PDSM_MIN || pdsm >= PAPR_PDSM_MAX) { |
747 | dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid PDSM\n" , |
748 | pdsm); |
749 | return -EINVAL; |
750 | } |
751 | |
752 | /* Have enough space to hold returned 'nd_pkg_pdsm' header */ |
753 | if (nd_cmd->nd_size_out < ND_PDSM_HDR_SIZE) { |
754 | dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid payload\n" , |
755 | pdsm); |
756 | return -EINVAL; |
757 | } |
758 | } |
759 | |
760 | /* Let the command be further processed */ |
761 | return 0; |
762 | } |
763 | |
764 | static int papr_pdsm_fuel_gauge(struct papr_scm_priv *p, |
765 | union nd_pdsm_payload *payload) |
766 | { |
767 | int rc, size; |
768 | u64 statval; |
769 | struct papr_scm_perf_stat *stat; |
770 | struct papr_scm_perf_stats *stats; |
771 | |
772 | /* Silently fail if fetching performance metrics isn't supported */ |
773 | if (!p->stat_buffer_len) |
774 | return 0; |
775 | |
776 | /* Allocate request buffer enough to hold single performance stat */ |
777 | size = sizeof(struct papr_scm_perf_stats) + |
778 | sizeof(struct papr_scm_perf_stat); |
779 | |
780 | stats = kzalloc(size, GFP_KERNEL); |
781 | if (!stats) |
782 | return -ENOMEM; |
783 | |
784 | stat = &stats->scm_statistic[0]; |
785 | memcpy(&stat->stat_id, "MemLife " , sizeof(stat->stat_id)); |
786 | stat->stat_val = 0; |
787 | |
788 | /* Fetch the fuel gauge and populate it in payload */ |
789 | rc = drc_pmem_query_stats(p, buff_stats: stats, num_stats: 1); |
790 | if (rc < 0) { |
791 | dev_dbg(&p->pdev->dev, "Err(%d) fetching fuel gauge\n" , rc); |
792 | goto free_stats; |
793 | } |
794 | |
795 | statval = be64_to_cpu(stat->stat_val); |
796 | dev_dbg(&p->pdev->dev, |
797 | "Fetched fuel-gauge %llu" , statval); |
798 | payload->health.extension_flags |= |
799 | PDSM_DIMM_HEALTH_RUN_GAUGE_VALID; |
800 | payload->health.dimm_fuel_gauge = statval; |
801 | |
802 | rc = sizeof(struct nd_papr_pdsm_health); |
803 | |
804 | free_stats: |
805 | kfree(objp: stats); |
806 | return rc; |
807 | } |
808 | |
809 | /* Add the dirty-shutdown-counter value to the pdsm */ |
810 | static int papr_pdsm_dsc(struct papr_scm_priv *p, |
811 | union nd_pdsm_payload *payload) |
812 | { |
813 | payload->health.extension_flags |= PDSM_DIMM_DSC_VALID; |
814 | payload->health.dimm_dsc = p->dirty_shutdown_counter; |
815 | |
816 | return sizeof(struct nd_papr_pdsm_health); |
817 | } |
818 | |
819 | /* Fetch the DIMM health info and populate it in provided package. */ |
820 | static int papr_pdsm_health(struct papr_scm_priv *p, |
821 | union nd_pdsm_payload *payload) |
822 | { |
823 | int rc; |
824 | |
825 | /* Ensure dimm health mutex is taken preventing concurrent access */ |
826 | rc = mutex_lock_interruptible(&p->health_mutex); |
827 | if (rc) |
828 | goto out; |
829 | |
830 | /* Always fetch upto date dimm health data ignoring cached values */ |
831 | rc = __drc_pmem_query_health(p); |
832 | if (rc) { |
833 | mutex_unlock(lock: &p->health_mutex); |
834 | goto out; |
835 | } |
836 | |
837 | /* update health struct with various flags derived from health bitmap */ |
838 | payload->health = (struct nd_papr_pdsm_health) { |
839 | .extension_flags = 0, |
840 | .dimm_unarmed = !!(p->health_bitmap & PAPR_PMEM_UNARMED_MASK), |
841 | .dimm_bad_shutdown = !!(p->health_bitmap & PAPR_PMEM_BAD_SHUTDOWN_MASK), |
842 | .dimm_bad_restore = !!(p->health_bitmap & PAPR_PMEM_BAD_RESTORE_MASK), |
843 | .dimm_scrubbed = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), |
844 | .dimm_locked = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), |
845 | .dimm_encrypted = !!(p->health_bitmap & PAPR_PMEM_ENCRYPTED), |
846 | .dimm_health = PAPR_PDSM_DIMM_HEALTHY, |
847 | }; |
848 | |
849 | /* Update field dimm_health based on health_bitmap flags */ |
850 | if (p->health_bitmap & PAPR_PMEM_HEALTH_FATAL) |
851 | payload->health.dimm_health = PAPR_PDSM_DIMM_FATAL; |
852 | else if (p->health_bitmap & PAPR_PMEM_HEALTH_CRITICAL) |
853 | payload->health.dimm_health = PAPR_PDSM_DIMM_CRITICAL; |
854 | else if (p->health_bitmap & PAPR_PMEM_HEALTH_UNHEALTHY) |
855 | payload->health.dimm_health = PAPR_PDSM_DIMM_UNHEALTHY; |
856 | |
857 | /* struct populated hence can release the mutex now */ |
858 | mutex_unlock(lock: &p->health_mutex); |
859 | |
860 | /* Populate the fuel gauge meter in the payload */ |
861 | papr_pdsm_fuel_gauge(p, payload); |
862 | /* Populate the dirty-shutdown-counter field */ |
863 | papr_pdsm_dsc(p, payload); |
864 | |
865 | rc = sizeof(struct nd_papr_pdsm_health); |
866 | |
867 | out: |
868 | return rc; |
869 | } |
870 | |
871 | /* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */ |
872 | static int papr_pdsm_smart_inject(struct papr_scm_priv *p, |
873 | union nd_pdsm_payload *payload) |
874 | { |
875 | int rc; |
876 | u32 supported_flags = 0; |
877 | u64 inject_mask = 0, clear_mask = 0; |
878 | u64 mask; |
879 | |
880 | /* Check for individual smart error flags and update inject/clear masks */ |
881 | if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) { |
882 | supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL; |
883 | if (payload->smart_inject.fatal_enable) |
884 | inject_mask |= PAPR_PMEM_HEALTH_FATAL; |
885 | else |
886 | clear_mask |= PAPR_PMEM_HEALTH_FATAL; |
887 | } |
888 | |
889 | if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) { |
890 | supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN; |
891 | if (payload->smart_inject.unsafe_shutdown_enable) |
892 | inject_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; |
893 | else |
894 | clear_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; |
895 | } |
896 | |
897 | dev_dbg(&p->pdev->dev, "[Smart-inject] inject_mask=%#llx clear_mask=%#llx\n" , |
898 | inject_mask, clear_mask); |
899 | |
900 | /* Prevent concurrent access to dimm health bitmap related members */ |
901 | rc = mutex_lock_interruptible(&p->health_mutex); |
902 | if (rc) |
903 | return rc; |
904 | |
905 | /* Use inject/clear masks to set health_bitmap_inject_mask */ |
906 | mask = READ_ONCE(p->health_bitmap_inject_mask); |
907 | mask = (mask & ~clear_mask) | inject_mask; |
908 | WRITE_ONCE(p->health_bitmap_inject_mask, mask); |
909 | |
910 | /* Invalidate cached health bitmap */ |
911 | p->lasthealth_jiffies = 0; |
912 | |
913 | mutex_unlock(lock: &p->health_mutex); |
914 | |
915 | /* Return the supported flags back to userspace */ |
916 | payload->smart_inject.flags = supported_flags; |
917 | |
918 | return sizeof(struct nd_papr_pdsm_health); |
919 | } |
920 | |
921 | /* |
922 | * 'struct pdsm_cmd_desc' |
923 | * Identifies supported PDSMs' expected length of in/out payloads |
924 | * and pdsm service function. |
925 | * |
926 | * size_in : Size of input payload if any in the PDSM request. |
927 | * size_out : Size of output payload if any in the PDSM request. |
928 | * service : Service function for the PDSM request. Return semantics: |
929 | * rc < 0 : Error servicing PDSM and rc indicates the error. |
930 | * rc >=0 : Serviced successfully and 'rc' indicate number of |
931 | * bytes written to payload. |
932 | */ |
933 | struct pdsm_cmd_desc { |
934 | u32 size_in; |
935 | u32 size_out; |
936 | int (*service)(struct papr_scm_priv *dimm, |
937 | union nd_pdsm_payload *payload); |
938 | }; |
939 | |
940 | /* Holds all supported PDSMs' command descriptors */ |
941 | static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { |
942 | [PAPR_PDSM_MIN] = { |
943 | .size_in = 0, |
944 | .size_out = 0, |
945 | .service = NULL, |
946 | }, |
947 | /* New PDSM command descriptors to be added below */ |
948 | |
949 | [PAPR_PDSM_HEALTH] = { |
950 | .size_in = 0, |
951 | .size_out = sizeof(struct nd_papr_pdsm_health), |
952 | .service = papr_pdsm_health, |
953 | }, |
954 | |
955 | [PAPR_PDSM_SMART_INJECT] = { |
956 | .size_in = sizeof(struct nd_papr_pdsm_smart_inject), |
957 | .size_out = sizeof(struct nd_papr_pdsm_smart_inject), |
958 | .service = papr_pdsm_smart_inject, |
959 | }, |
960 | /* Empty */ |
961 | [PAPR_PDSM_MAX] = { |
962 | .size_in = 0, |
963 | .size_out = 0, |
964 | .service = NULL, |
965 | }, |
966 | }; |
967 | |
968 | /* Given a valid pdsm cmd return its command descriptor else return NULL */ |
969 | static inline const struct pdsm_cmd_desc *pdsm_cmd_desc(enum papr_pdsm cmd) |
970 | { |
971 | if (cmd >= 0 || cmd < ARRAY_SIZE(__pdsm_cmd_descriptors)) |
972 | return &__pdsm_cmd_descriptors[cmd]; |
973 | |
974 | return NULL; |
975 | } |
976 | |
977 | /* |
978 | * For a given pdsm request call an appropriate service function. |
979 | * Returns errors if any while handling the pdsm command package. |
980 | */ |
981 | static int papr_scm_service_pdsm(struct papr_scm_priv *p, |
982 | struct nd_cmd_pkg *pkg) |
983 | { |
984 | /* Get the PDSM header and PDSM command */ |
985 | struct nd_pkg_pdsm *pdsm_pkg = (struct nd_pkg_pdsm *)pkg->nd_payload; |
986 | enum papr_pdsm pdsm = (enum papr_pdsm)pkg->nd_command; |
987 | const struct pdsm_cmd_desc *pdsc; |
988 | int rc; |
989 | |
990 | /* Fetch corresponding pdsm descriptor for validation and servicing */ |
991 | pdsc = pdsm_cmd_desc(cmd: pdsm); |
992 | |
993 | /* Validate pdsm descriptor */ |
994 | /* Ensure that reserved fields are 0 */ |
995 | if (pdsm_pkg->reserved[0] || pdsm_pkg->reserved[1]) { |
996 | dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid reserved field\n" , |
997 | pdsm); |
998 | return -EINVAL; |
999 | } |
1000 | |
1001 | /* If pdsm expects some input, then ensure that the size_in matches */ |
1002 | if (pdsc->size_in && |
1003 | pkg->nd_size_in != (pdsc->size_in + ND_PDSM_HDR_SIZE)) { |
1004 | dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_in=%d\n" , |
1005 | pdsm, pkg->nd_size_in); |
1006 | return -EINVAL; |
1007 | } |
1008 | |
1009 | /* If pdsm wants to return data, then ensure that size_out matches */ |
1010 | if (pdsc->size_out && |
1011 | pkg->nd_size_out != (pdsc->size_out + ND_PDSM_HDR_SIZE)) { |
1012 | dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_out=%d\n" , |
1013 | pdsm, pkg->nd_size_out); |
1014 | return -EINVAL; |
1015 | } |
1016 | |
1017 | /* Service the pdsm */ |
1018 | if (pdsc->service) { |
1019 | dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Servicing..\n" , pdsm); |
1020 | |
1021 | rc = pdsc->service(p, &pdsm_pkg->payload); |
1022 | |
1023 | if (rc < 0) { |
1024 | /* error encountered while servicing pdsm */ |
1025 | pdsm_pkg->cmd_status = rc; |
1026 | pkg->nd_fw_size = ND_PDSM_HDR_SIZE; |
1027 | } else { |
1028 | /* pdsm serviced and 'rc' bytes written to payload */ |
1029 | pdsm_pkg->cmd_status = 0; |
1030 | pkg->nd_fw_size = ND_PDSM_HDR_SIZE + rc; |
1031 | } |
1032 | } else { |
1033 | dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Unsupported PDSM request\n" , |
1034 | pdsm); |
1035 | pdsm_pkg->cmd_status = -ENOENT; |
1036 | pkg->nd_fw_size = ND_PDSM_HDR_SIZE; |
1037 | } |
1038 | |
1039 | return pdsm_pkg->cmd_status; |
1040 | } |
1041 | |
1042 | static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, |
1043 | struct nvdimm *nvdimm, unsigned int cmd, void *buf, |
1044 | unsigned int buf_len, int *cmd_rc) |
1045 | { |
1046 | struct nd_cmd_get_config_size *get_size_hdr; |
1047 | struct nd_cmd_pkg *call_pkg = NULL; |
1048 | struct papr_scm_priv *p; |
1049 | int rc; |
1050 | |
1051 | rc = is_cmd_valid(nvdimm, cmd, buf, buf_len); |
1052 | if (rc) { |
1053 | pr_debug("Invalid cmd=0x%x. Err=%d\n" , cmd, rc); |
1054 | return rc; |
1055 | } |
1056 | |
1057 | /* Use a local variable in case cmd_rc pointer is NULL */ |
1058 | if (!cmd_rc) |
1059 | cmd_rc = &rc; |
1060 | |
1061 | p = nvdimm_provider_data(nvdimm); |
1062 | |
1063 | switch (cmd) { |
1064 | case ND_CMD_GET_CONFIG_SIZE: |
1065 | get_size_hdr = buf; |
1066 | |
1067 | get_size_hdr->status = 0; |
1068 | get_size_hdr->max_xfer = 8; |
1069 | get_size_hdr->config_size = p->metadata_size; |
1070 | *cmd_rc = 0; |
1071 | break; |
1072 | |
1073 | case ND_CMD_GET_CONFIG_DATA: |
1074 | *cmd_rc = papr_scm_meta_get(p, hdr: buf); |
1075 | break; |
1076 | |
1077 | case ND_CMD_SET_CONFIG_DATA: |
1078 | *cmd_rc = papr_scm_meta_set(p, hdr: buf); |
1079 | break; |
1080 | |
1081 | case ND_CMD_CALL: |
1082 | call_pkg = (struct nd_cmd_pkg *)buf; |
1083 | *cmd_rc = papr_scm_service_pdsm(p, pkg: call_pkg); |
1084 | break; |
1085 | |
1086 | default: |
1087 | dev_dbg(&p->pdev->dev, "Unknown command = %d\n" , cmd); |
1088 | return -EINVAL; |
1089 | } |
1090 | |
1091 | dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n" , *cmd_rc); |
1092 | |
1093 | return 0; |
1094 | } |
1095 | |
1096 | static ssize_t health_bitmap_inject_show(struct device *dev, |
1097 | struct device_attribute *attr, |
1098 | char *buf) |
1099 | { |
1100 | struct nvdimm *dimm = to_nvdimm(dev); |
1101 | struct papr_scm_priv *p = nvdimm_provider_data(nvdimm: dimm); |
1102 | |
1103 | return sprintf(buf, fmt: "%#llx\n" , |
1104 | READ_ONCE(p->health_bitmap_inject_mask)); |
1105 | } |
1106 | |
1107 | static DEVICE_ATTR_ADMIN_RO(health_bitmap_inject); |
1108 | |
1109 | static ssize_t perf_stats_show(struct device *dev, |
1110 | struct device_attribute *attr, char *buf) |
1111 | { |
1112 | int index; |
1113 | ssize_t rc; |
1114 | struct seq_buf s; |
1115 | struct papr_scm_perf_stat *stat; |
1116 | struct papr_scm_perf_stats *stats; |
1117 | struct nvdimm *dimm = to_nvdimm(dev); |
1118 | struct papr_scm_priv *p = nvdimm_provider_data(nvdimm: dimm); |
1119 | |
1120 | if (!p->stat_buffer_len) |
1121 | return -ENOENT; |
1122 | |
1123 | /* Allocate the buffer for phyp where stats are written */ |
1124 | stats = kzalloc(size: p->stat_buffer_len, GFP_KERNEL); |
1125 | if (!stats) |
1126 | return -ENOMEM; |
1127 | |
1128 | /* Ask phyp to return all dimm perf stats */ |
1129 | rc = drc_pmem_query_stats(p, buff_stats: stats, num_stats: 0); |
1130 | if (rc) |
1131 | goto free_stats; |
1132 | /* |
1133 | * Go through the returned output buffer and print stats and |
1134 | * values. Since stat_id is essentially a char string of |
1135 | * 8 bytes, simply use the string format specifier to print it. |
1136 | */ |
1137 | seq_buf_init(s: &s, buf, PAGE_SIZE); |
1138 | for (index = 0, stat = stats->scm_statistic; |
1139 | index < be32_to_cpu(stats->num_statistics); |
1140 | ++index, ++stat) { |
1141 | seq_buf_printf(s: &s, fmt: "%.8s = 0x%016llX\n" , |
1142 | stat->stat_id, |
1143 | be64_to_cpu(stat->stat_val)); |
1144 | } |
1145 | |
1146 | free_stats: |
1147 | kfree(objp: stats); |
1148 | return rc ? rc : (ssize_t)seq_buf_used(s: &s); |
1149 | } |
1150 | static DEVICE_ATTR_ADMIN_RO(perf_stats); |
1151 | |
1152 | static ssize_t flags_show(struct device *dev, |
1153 | struct device_attribute *attr, char *buf) |
1154 | { |
1155 | struct nvdimm *dimm = to_nvdimm(dev); |
1156 | struct papr_scm_priv *p = nvdimm_provider_data(nvdimm: dimm); |
1157 | struct seq_buf s; |
1158 | u64 health; |
1159 | int rc; |
1160 | |
1161 | rc = drc_pmem_query_health(p); |
1162 | if (rc) |
1163 | return rc; |
1164 | |
1165 | /* Copy health_bitmap locally, check masks & update out buffer */ |
1166 | health = READ_ONCE(p->health_bitmap); |
1167 | |
1168 | seq_buf_init(s: &s, buf, PAGE_SIZE); |
1169 | if (health & PAPR_PMEM_UNARMED_MASK) |
1170 | seq_buf_printf(s: &s, fmt: "not_armed " ); |
1171 | |
1172 | if (health & PAPR_PMEM_BAD_SHUTDOWN_MASK) |
1173 | seq_buf_printf(s: &s, fmt: "flush_fail " ); |
1174 | |
1175 | if (health & PAPR_PMEM_BAD_RESTORE_MASK) |
1176 | seq_buf_printf(s: &s, fmt: "restore_fail " ); |
1177 | |
1178 | if (health & PAPR_PMEM_ENCRYPTED) |
1179 | seq_buf_printf(s: &s, fmt: "encrypted " ); |
1180 | |
1181 | if (health & PAPR_PMEM_SMART_EVENT_MASK) |
1182 | seq_buf_printf(s: &s, fmt: "smart_notify " ); |
1183 | |
1184 | if (health & PAPR_PMEM_SCRUBBED_AND_LOCKED) |
1185 | seq_buf_printf(s: &s, fmt: "scrubbed locked " ); |
1186 | |
1187 | if (seq_buf_used(s: &s)) |
1188 | seq_buf_printf(s: &s, fmt: "\n" ); |
1189 | |
1190 | return seq_buf_used(s: &s); |
1191 | } |
1192 | DEVICE_ATTR_RO(flags); |
1193 | |
1194 | static ssize_t dirty_shutdown_show(struct device *dev, |
1195 | struct device_attribute *attr, char *buf) |
1196 | { |
1197 | struct nvdimm *dimm = to_nvdimm(dev); |
1198 | struct papr_scm_priv *p = nvdimm_provider_data(nvdimm: dimm); |
1199 | |
1200 | return sysfs_emit(buf, fmt: "%llu\n" , p->dirty_shutdown_counter); |
1201 | } |
1202 | DEVICE_ATTR_RO(dirty_shutdown); |
1203 | |
1204 | static umode_t papr_nd_attribute_visible(struct kobject *kobj, |
1205 | struct attribute *attr, int n) |
1206 | { |
1207 | struct device *dev = kobj_to_dev(kobj); |
1208 | struct nvdimm *nvdimm = to_nvdimm(dev); |
1209 | struct papr_scm_priv *p = nvdimm_provider_data(nvdimm); |
1210 | |
1211 | /* For if perf-stats not available remove perf_stats sysfs */ |
1212 | if (attr == &dev_attr_perf_stats.attr && p->stat_buffer_len == 0) |
1213 | return 0; |
1214 | |
1215 | return attr->mode; |
1216 | } |
1217 | |
1218 | /* papr_scm specific dimm attributes */ |
1219 | static struct attribute *papr_nd_attributes[] = { |
1220 | &dev_attr_flags.attr, |
1221 | &dev_attr_perf_stats.attr, |
1222 | &dev_attr_dirty_shutdown.attr, |
1223 | &dev_attr_health_bitmap_inject.attr, |
1224 | NULL, |
1225 | }; |
1226 | |
1227 | static const struct attribute_group papr_nd_attribute_group = { |
1228 | .name = "papr" , |
1229 | .is_visible = papr_nd_attribute_visible, |
1230 | .attrs = papr_nd_attributes, |
1231 | }; |
1232 | |
1233 | static const struct attribute_group *papr_nd_attr_groups[] = { |
1234 | &papr_nd_attribute_group, |
1235 | NULL, |
1236 | }; |
1237 | |
1238 | static int papr_scm_nvdimm_init(struct papr_scm_priv *p) |
1239 | { |
1240 | struct device *dev = &p->pdev->dev; |
1241 | struct nd_mapping_desc mapping; |
1242 | struct nd_region_desc ndr_desc; |
1243 | unsigned long dimm_flags; |
1244 | int target_nid, online_nid; |
1245 | |
1246 | p->bus_desc.ndctl = papr_scm_ndctl; |
1247 | p->bus_desc.module = THIS_MODULE; |
1248 | p->bus_desc.of_node = p->pdev->dev.of_node; |
1249 | p->bus_desc.provider_name = kstrdup(s: p->pdev->name, GFP_KERNEL); |
1250 | |
1251 | /* Set the dimm command family mask to accept PDSMs */ |
1252 | set_bit(NVDIMM_FAMILY_PAPR, addr: &p->bus_desc.dimm_family_mask); |
1253 | |
1254 | if (!p->bus_desc.provider_name) |
1255 | return -ENOMEM; |
1256 | |
1257 | p->bus = nvdimm_bus_register(NULL, nfit_desc: &p->bus_desc); |
1258 | if (!p->bus) { |
1259 | dev_err(dev, "Error creating nvdimm bus %pOF\n" , p->dn); |
1260 | kfree(objp: p->bus_desc.provider_name); |
1261 | return -ENXIO; |
1262 | } |
1263 | |
1264 | dimm_flags = 0; |
1265 | set_bit(nr: NDD_LABELING, addr: &dimm_flags); |
1266 | |
1267 | /* |
1268 | * Check if the nvdimm is unarmed. No locking needed as we are still |
1269 | * initializing. Ignore error encountered if any. |
1270 | */ |
1271 | __drc_pmem_query_health(p); |
1272 | |
1273 | if (p->health_bitmap & PAPR_PMEM_UNARMED_MASK) |
1274 | set_bit(nr: NDD_UNARMED, addr: &dimm_flags); |
1275 | |
1276 | p->nvdimm = nvdimm_create(nvdimm_bus: p->bus, provider_data: p, groups: papr_nd_attr_groups, |
1277 | flags: dimm_flags, PAPR_SCM_DIMM_CMD_MASK, num_flush: 0, NULL); |
1278 | if (!p->nvdimm) { |
1279 | dev_err(dev, "Error creating DIMM object for %pOF\n" , p->dn); |
1280 | goto err; |
1281 | } |
1282 | |
1283 | if (nvdimm_bus_check_dimm_count(nvdimm_bus: p->bus, dimm_count: 1)) |
1284 | goto err; |
1285 | |
1286 | /* now add the region */ |
1287 | |
1288 | memset(&mapping, 0, sizeof(mapping)); |
1289 | mapping.nvdimm = p->nvdimm; |
1290 | mapping.start = 0; |
1291 | mapping.size = p->blocks * p->block_size; // XXX: potential overflow? |
1292 | |
1293 | memset(&ndr_desc, 0, sizeof(ndr_desc)); |
1294 | target_nid = dev_to_node(dev: &p->pdev->dev); |
1295 | online_nid = numa_map_to_online_node(target_nid); |
1296 | ndr_desc.numa_node = online_nid; |
1297 | ndr_desc.target_node = target_nid; |
1298 | ndr_desc.res = &p->res; |
1299 | ndr_desc.of_node = p->dn; |
1300 | ndr_desc.provider_data = p; |
1301 | ndr_desc.mapping = &mapping; |
1302 | ndr_desc.num_mappings = 1; |
1303 | ndr_desc.nd_set = &p->nd_set; |
1304 | |
1305 | if (p->hcall_flush_required) { |
1306 | set_bit(nr: ND_REGION_ASYNC, addr: &ndr_desc.flags); |
1307 | ndr_desc.flush = papr_scm_pmem_flush; |
1308 | } |
1309 | |
1310 | if (p->is_volatile) |
1311 | p->region = nvdimm_volatile_region_create(nvdimm_bus: p->bus, ndr_desc: &ndr_desc); |
1312 | else { |
1313 | set_bit(nr: ND_REGION_PERSIST_MEMCTRL, addr: &ndr_desc.flags); |
1314 | p->region = nvdimm_pmem_region_create(nvdimm_bus: p->bus, ndr_desc: &ndr_desc); |
1315 | } |
1316 | if (!p->region) { |
1317 | dev_err(dev, "Error registering region %pR from %pOF\n" , |
1318 | ndr_desc.res, p->dn); |
1319 | goto err; |
1320 | } |
1321 | if (target_nid != online_nid) |
1322 | dev_info(dev, "Region registered with target node %d and online node %d" , |
1323 | target_nid, online_nid); |
1324 | |
1325 | mutex_lock(&papr_ndr_lock); |
1326 | list_add_tail(new: &p->region_list, head: &papr_nd_regions); |
1327 | mutex_unlock(lock: &papr_ndr_lock); |
1328 | |
1329 | return 0; |
1330 | |
1331 | err: nvdimm_bus_unregister(nvdimm_bus: p->bus); |
1332 | kfree(objp: p->bus_desc.provider_name); |
1333 | return -ENXIO; |
1334 | } |
1335 | |
1336 | static void papr_scm_add_badblock(struct nd_region *region, |
1337 | struct nvdimm_bus *bus, u64 phys_addr) |
1338 | { |
1339 | u64 aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES); |
1340 | |
1341 | if (nvdimm_bus_add_badrange(nvdimm_bus: bus, addr: aligned_addr, L1_CACHE_BYTES)) { |
1342 | pr_err("Bad block registration for 0x%llx failed\n" , phys_addr); |
1343 | return; |
1344 | } |
1345 | |
1346 | pr_debug("Add memory range (0x%llx - 0x%llx) as bad range\n" , |
1347 | aligned_addr, aligned_addr + L1_CACHE_BYTES); |
1348 | |
1349 | nvdimm_region_notify(nd_region: region, event: NVDIMM_REVALIDATE_POISON); |
1350 | } |
1351 | |
1352 | static int handle_mce_ue(struct notifier_block *nb, unsigned long val, |
1353 | void *data) |
1354 | { |
1355 | struct machine_check_event *evt = data; |
1356 | struct papr_scm_priv *p; |
1357 | u64 phys_addr; |
1358 | bool found = false; |
1359 | |
1360 | if (evt->error_type != MCE_ERROR_TYPE_UE) |
1361 | return NOTIFY_DONE; |
1362 | |
1363 | if (list_empty(head: &papr_nd_regions)) |
1364 | return NOTIFY_DONE; |
1365 | |
1366 | /* |
1367 | * The physical address obtained here is PAGE_SIZE aligned, so get the |
1368 | * exact address from the effective address |
1369 | */ |
1370 | phys_addr = evt->u.ue_error.physical_address + |
1371 | (evt->u.ue_error.effective_address & ~PAGE_MASK); |
1372 | |
1373 | if (!evt->u.ue_error.physical_address_provided || |
1374 | !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) |
1375 | return NOTIFY_DONE; |
1376 | |
1377 | /* mce notifier is called from a process context, so mutex is safe */ |
1378 | mutex_lock(&papr_ndr_lock); |
1379 | list_for_each_entry(p, &papr_nd_regions, region_list) { |
1380 | if (phys_addr >= p->res.start && phys_addr <= p->res.end) { |
1381 | found = true; |
1382 | break; |
1383 | } |
1384 | } |
1385 | |
1386 | if (found) |
1387 | papr_scm_add_badblock(region: p->region, bus: p->bus, phys_addr); |
1388 | |
1389 | mutex_unlock(lock: &papr_ndr_lock); |
1390 | |
1391 | return found ? NOTIFY_OK : NOTIFY_DONE; |
1392 | } |
1393 | |
1394 | static struct notifier_block mce_ue_nb = { |
1395 | .notifier_call = handle_mce_ue |
1396 | }; |
1397 | |
1398 | static int papr_scm_probe(struct platform_device *pdev) |
1399 | { |
1400 | struct device_node *dn = pdev->dev.of_node; |
1401 | u32 drc_index, metadata_size; |
1402 | u64 blocks, block_size; |
1403 | struct papr_scm_priv *p; |
1404 | u8 uuid_raw[UUID_SIZE]; |
1405 | const char *uuid_str; |
1406 | ssize_t stat_size; |
1407 | uuid_t uuid; |
1408 | int rc; |
1409 | |
1410 | /* check we have all the required DT properties */ |
1411 | if (of_property_read_u32(np: dn, propname: "ibm,my-drc-index" , out_value: &drc_index)) { |
1412 | dev_err(&pdev->dev, "%pOF: missing drc-index!\n" , dn); |
1413 | return -ENODEV; |
1414 | } |
1415 | |
1416 | if (of_property_read_u64(np: dn, propname: "ibm,block-size" , out_value: &block_size)) { |
1417 | dev_err(&pdev->dev, "%pOF: missing block-size!\n" , dn); |
1418 | return -ENODEV; |
1419 | } |
1420 | |
1421 | if (of_property_read_u64(np: dn, propname: "ibm,number-of-blocks" , out_value: &blocks)) { |
1422 | dev_err(&pdev->dev, "%pOF: missing number-of-blocks!\n" , dn); |
1423 | return -ENODEV; |
1424 | } |
1425 | |
1426 | if (of_property_read_string(np: dn, propname: "ibm,unit-guid" , out_string: &uuid_str)) { |
1427 | dev_err(&pdev->dev, "%pOF: missing unit-guid!\n" , dn); |
1428 | return -ENODEV; |
1429 | } |
1430 | |
1431 | /* |
1432 | * open firmware platform device create won't update the NUMA |
1433 | * distance table. For PAPR SCM devices we use numa_map_to_online_node() |
1434 | * to find the nearest online NUMA node and that requires correct |
1435 | * distance table information. |
1436 | */ |
1437 | update_numa_distance(dn); |
1438 | |
1439 | p = kzalloc(size: sizeof(*p), GFP_KERNEL); |
1440 | if (!p) |
1441 | return -ENOMEM; |
1442 | |
1443 | /* Initialize the dimm mutex */ |
1444 | mutex_init(&p->health_mutex); |
1445 | |
1446 | /* optional DT properties */ |
1447 | of_property_read_u32(np: dn, propname: "ibm,metadata-size" , out_value: &metadata_size); |
1448 | |
1449 | p->dn = dn; |
1450 | p->drc_index = drc_index; |
1451 | p->block_size = block_size; |
1452 | p->blocks = blocks; |
1453 | p->is_volatile = !of_property_read_bool(np: dn, propname: "ibm,cache-flush-required" ); |
1454 | p->hcall_flush_required = of_property_read_bool(np: dn, propname: "ibm,hcall-flush-required" ); |
1455 | |
1456 | if (of_property_read_u64(np: dn, propname: "ibm,persistence-failed-count" , |
1457 | out_value: &p->dirty_shutdown_counter)) |
1458 | p->dirty_shutdown_counter = 0; |
1459 | |
1460 | /* We just need to ensure that set cookies are unique across */ |
1461 | uuid_parse(uuid: uuid_str, u: &uuid); |
1462 | |
1463 | /* |
1464 | * The cookie1 and cookie2 are not really little endian. |
1465 | * We store a raw buffer representation of the |
1466 | * uuid string so that we can compare this with the label |
1467 | * area cookie irrespective of the endian configuration |
1468 | * with which the kernel is built. |
1469 | * |
1470 | * Historically we stored the cookie in the below format. |
1471 | * for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa |
1472 | * cookie1 was 0xfd423b0b671b5172 |
1473 | * cookie2 was 0xaabce8cae35b1d8d |
1474 | */ |
1475 | export_uuid(dst: uuid_raw, src: &uuid); |
1476 | p->nd_set.cookie1 = get_unaligned_le64(p: &uuid_raw[0]); |
1477 | p->nd_set.cookie2 = get_unaligned_le64(p: &uuid_raw[8]); |
1478 | |
1479 | /* might be zero */ |
1480 | p->metadata_size = metadata_size; |
1481 | p->pdev = pdev; |
1482 | |
1483 | /* request the hypervisor to bind this region to somewhere in memory */ |
1484 | rc = drc_pmem_bind(p); |
1485 | |
1486 | /* If phyp says drc memory still bound then force unbound and retry */ |
1487 | if (rc == H_OVERLAP) |
1488 | rc = drc_pmem_query_n_bind(p); |
1489 | |
1490 | if (rc != H_SUCCESS) { |
1491 | dev_err(&p->pdev->dev, "bind err: %d\n" , rc); |
1492 | rc = -ENXIO; |
1493 | goto err; |
1494 | } |
1495 | |
1496 | /* setup the resource for the newly bound range */ |
1497 | p->res.start = p->bound_addr; |
1498 | p->res.end = p->bound_addr + p->blocks * p->block_size - 1; |
1499 | p->res.name = pdev->name; |
1500 | p->res.flags = IORESOURCE_MEM; |
1501 | |
1502 | /* Try retrieving the stat buffer and see if its supported */ |
1503 | stat_size = drc_pmem_query_stats(p, NULL, num_stats: 0); |
1504 | if (stat_size > 0) { |
1505 | p->stat_buffer_len = stat_size; |
1506 | dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n" , |
1507 | p->stat_buffer_len); |
1508 | } |
1509 | |
1510 | rc = papr_scm_nvdimm_init(p); |
1511 | if (rc) |
1512 | goto err2; |
1513 | |
1514 | platform_set_drvdata(pdev, data: p); |
1515 | papr_scm_pmu_register(p); |
1516 | |
1517 | return 0; |
1518 | |
1519 | err2: drc_pmem_unbind(p); |
1520 | err: kfree(objp: p); |
1521 | return rc; |
1522 | } |
1523 | |
1524 | static void papr_scm_remove(struct platform_device *pdev) |
1525 | { |
1526 | struct papr_scm_priv *p = platform_get_drvdata(pdev); |
1527 | |
1528 | mutex_lock(&papr_ndr_lock); |
1529 | list_del(entry: &p->region_list); |
1530 | mutex_unlock(lock: &papr_ndr_lock); |
1531 | |
1532 | nvdimm_bus_unregister(nvdimm_bus: p->bus); |
1533 | drc_pmem_unbind(p); |
1534 | |
1535 | if (pdev->archdata.priv) |
1536 | unregister_nvdimm_pmu(nd_pmu: pdev->archdata.priv); |
1537 | |
1538 | pdev->archdata.priv = NULL; |
1539 | kfree(objp: p->bus_desc.provider_name); |
1540 | kfree(objp: p); |
1541 | } |
1542 | |
1543 | static const struct of_device_id papr_scm_match[] = { |
1544 | { .compatible = "ibm,pmemory" }, |
1545 | { .compatible = "ibm,pmemory-v2" }, |
1546 | { }, |
1547 | }; |
1548 | |
1549 | static struct platform_driver papr_scm_driver = { |
1550 | .probe = papr_scm_probe, |
1551 | .remove_new = papr_scm_remove, |
1552 | .driver = { |
1553 | .name = "papr_scm" , |
1554 | .of_match_table = papr_scm_match, |
1555 | }, |
1556 | }; |
1557 | |
1558 | static int __init papr_scm_init(void) |
1559 | { |
1560 | int ret; |
1561 | |
1562 | ret = platform_driver_register(&papr_scm_driver); |
1563 | if (!ret) |
1564 | mce_register_notifier(&mce_ue_nb); |
1565 | |
1566 | return ret; |
1567 | } |
1568 | module_init(papr_scm_init); |
1569 | |
1570 | static void __exit papr_scm_exit(void) |
1571 | { |
1572 | mce_unregister_notifier(&mce_ue_nb); |
1573 | platform_driver_unregister(&papr_scm_driver); |
1574 | } |
1575 | module_exit(papr_scm_exit); |
1576 | |
1577 | MODULE_DEVICE_TABLE(of, papr_scm_match); |
1578 | MODULE_LICENSE("GPL" ); |
1579 | MODULE_AUTHOR("IBM Corporation" ); |
1580 | |