1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * In-Memory Collection (IMC) Performance Monitor counter support. |
4 | * |
5 | * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation. |
6 | * (C) 2017 Anju T Sudhakar, IBM Corporation. |
7 | * (C) 2017 Hemant K Shaw, IBM Corporation. |
8 | */ |
9 | #include <linux/of.h> |
10 | #include <linux/perf_event.h> |
11 | #include <linux/slab.h> |
12 | #include <asm/opal.h> |
13 | #include <asm/imc-pmu.h> |
14 | #include <asm/cputhreads.h> |
15 | #include <asm/smp.h> |
16 | #include <linux/string.h> |
17 | #include <linux/spinlock.h> |
18 | |
19 | /* Nest IMC data structures and variables */ |
20 | |
21 | /* |
22 | * Used to avoid races in counting the nest-pmu units during hotplug |
23 | * register and unregister |
24 | */ |
25 | static DEFINE_MUTEX(nest_init_lock); |
26 | static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc); |
27 | static struct imc_pmu **per_nest_pmu_arr; |
28 | static cpumask_t nest_imc_cpumask; |
29 | static struct imc_pmu_ref *nest_imc_refc; |
30 | static int nest_pmus; |
31 | |
32 | /* Core IMC data structures and variables */ |
33 | |
34 | static cpumask_t core_imc_cpumask; |
35 | static struct imc_pmu_ref *core_imc_refc; |
36 | static struct imc_pmu *core_imc_pmu; |
37 | |
38 | /* Thread IMC data structures and variables */ |
39 | |
40 | static DEFINE_PER_CPU(u64 *, thread_imc_mem); |
41 | static struct imc_pmu *thread_imc_pmu; |
42 | static int thread_imc_mem_size; |
43 | |
44 | /* Trace IMC data structures */ |
45 | static DEFINE_PER_CPU(u64 *, trace_imc_mem); |
46 | static struct imc_pmu_ref *trace_imc_refc; |
47 | static int trace_imc_mem_size; |
48 | |
49 | /* |
50 | * Global data structure used to avoid races between thread, |
51 | * core and trace-imc |
52 | */ |
53 | static struct imc_pmu_ref imc_global_refc = { |
54 | .lock = __SPIN_LOCK_UNLOCKED(imc_global_refc.lock), |
55 | .id = 0, |
56 | .refc = 0, |
57 | }; |
58 | |
59 | static struct imc_pmu *imc_event_to_pmu(struct perf_event *event) |
60 | { |
61 | return container_of(event->pmu, struct imc_pmu, pmu); |
62 | } |
63 | |
64 | PMU_FORMAT_ATTR(event, "config:0-61" ); |
65 | PMU_FORMAT_ATTR(offset, "config:0-31" ); |
66 | PMU_FORMAT_ATTR(rvalue, "config:32" ); |
67 | PMU_FORMAT_ATTR(mode, "config:33-40" ); |
68 | static struct attribute *imc_format_attrs[] = { |
69 | &format_attr_event.attr, |
70 | &format_attr_offset.attr, |
71 | &format_attr_rvalue.attr, |
72 | &format_attr_mode.attr, |
73 | NULL, |
74 | }; |
75 | |
76 | static const struct attribute_group imc_format_group = { |
77 | .name = "format" , |
78 | .attrs = imc_format_attrs, |
79 | }; |
80 | |
81 | /* Format attribute for imc trace-mode */ |
82 | PMU_FORMAT_ATTR(cpmc_reserved, "config:0-19" ); |
83 | PMU_FORMAT_ATTR(cpmc_event, "config:20-27" ); |
84 | PMU_FORMAT_ATTR(cpmc_samplesel, "config:28-29" ); |
85 | PMU_FORMAT_ATTR(cpmc_load, "config:30-61" ); |
86 | static struct attribute *trace_imc_format_attrs[] = { |
87 | &format_attr_event.attr, |
88 | &format_attr_cpmc_reserved.attr, |
89 | &format_attr_cpmc_event.attr, |
90 | &format_attr_cpmc_samplesel.attr, |
91 | &format_attr_cpmc_load.attr, |
92 | NULL, |
93 | }; |
94 | |
95 | static const struct attribute_group trace_imc_format_group = { |
96 | .name = "format" , |
97 | .attrs = trace_imc_format_attrs, |
98 | }; |
99 | |
100 | /* Get the cpumask printed to a buffer "buf" */ |
101 | static ssize_t imc_pmu_cpumask_get_attr(struct device *dev, |
102 | struct device_attribute *attr, |
103 | char *buf) |
104 | { |
105 | struct pmu *pmu = dev_get_drvdata(dev); |
106 | struct imc_pmu *imc_pmu = container_of(pmu, struct imc_pmu, pmu); |
107 | cpumask_t *active_mask; |
108 | |
109 | switch(imc_pmu->domain){ |
110 | case IMC_DOMAIN_NEST: |
111 | active_mask = &nest_imc_cpumask; |
112 | break; |
113 | case IMC_DOMAIN_CORE: |
114 | active_mask = &core_imc_cpumask; |
115 | break; |
116 | default: |
117 | return 0; |
118 | } |
119 | |
120 | return cpumap_print_to_pagebuf(list: true, buf, mask: active_mask); |
121 | } |
122 | |
123 | static DEVICE_ATTR(cpumask, S_IRUGO, imc_pmu_cpumask_get_attr, NULL); |
124 | |
125 | static struct attribute *imc_pmu_cpumask_attrs[] = { |
126 | &dev_attr_cpumask.attr, |
127 | NULL, |
128 | }; |
129 | |
130 | static const struct attribute_group imc_pmu_cpumask_attr_group = { |
131 | .attrs = imc_pmu_cpumask_attrs, |
132 | }; |
133 | |
134 | /* device_str_attr_create : Populate event "name" and string "str" in attribute */ |
135 | static struct attribute *device_str_attr_create(const char *name, const char *str) |
136 | { |
137 | struct perf_pmu_events_attr *attr; |
138 | |
139 | attr = kzalloc(size: sizeof(*attr), GFP_KERNEL); |
140 | if (!attr) |
141 | return NULL; |
142 | sysfs_attr_init(&attr->attr.attr); |
143 | |
144 | attr->event_str = str; |
145 | attr->attr.attr.name = name; |
146 | attr->attr.attr.mode = 0444; |
147 | attr->attr.show = perf_event_sysfs_show; |
148 | |
149 | return &attr->attr.attr; |
150 | } |
151 | |
152 | static int imc_parse_event(struct device_node *np, const char *scale, |
153 | const char *unit, const char *prefix, |
154 | u32 base, struct imc_events *event) |
155 | { |
156 | const char *s; |
157 | u32 reg; |
158 | |
159 | if (of_property_read_u32(np, propname: "reg" , out_value: ®)) |
160 | goto error; |
161 | /* Add the base_reg value to the "reg" */ |
162 | event->value = base + reg; |
163 | |
164 | if (of_property_read_string(np, propname: "event-name" , out_string: &s)) |
165 | goto error; |
166 | |
167 | event->name = kasprintf(GFP_KERNEL, fmt: "%s%s" , prefix, s); |
168 | if (!event->name) |
169 | goto error; |
170 | |
171 | if (of_property_read_string(np, propname: "scale" , out_string: &s)) |
172 | s = scale; |
173 | |
174 | if (s) { |
175 | event->scale = kstrdup(s, GFP_KERNEL); |
176 | if (!event->scale) |
177 | goto error; |
178 | } |
179 | |
180 | if (of_property_read_string(np, propname: "unit" , out_string: &s)) |
181 | s = unit; |
182 | |
183 | if (s) { |
184 | event->unit = kstrdup(s, GFP_KERNEL); |
185 | if (!event->unit) |
186 | goto error; |
187 | } |
188 | |
189 | return 0; |
190 | error: |
191 | kfree(objp: event->unit); |
192 | kfree(objp: event->scale); |
193 | kfree(objp: event->name); |
194 | return -EINVAL; |
195 | } |
196 | |
197 | /* |
198 | * imc_free_events: Function to cleanup the events list, having |
199 | * "nr_entries". |
200 | */ |
201 | static void imc_free_events(struct imc_events *events, int nr_entries) |
202 | { |
203 | int i; |
204 | |
205 | /* Nothing to clean, return */ |
206 | if (!events) |
207 | return; |
208 | for (i = 0; i < nr_entries; i++) { |
209 | kfree(events[i].unit); |
210 | kfree(events[i].scale); |
211 | kfree(events[i].name); |
212 | } |
213 | |
214 | kfree(objp: events); |
215 | } |
216 | |
217 | /* |
218 | * update_events_in_group: Update the "events" information in an attr_group |
219 | * and assign the attr_group to the pmu "pmu". |
220 | */ |
221 | static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) |
222 | { |
223 | struct attribute_group *attr_group; |
224 | struct attribute **attrs, *dev_str; |
225 | struct device_node *np, *pmu_events; |
226 | u32 handle, base_reg; |
227 | int i = 0, j = 0, ct, ret; |
228 | const char *prefix, *g_scale, *g_unit; |
229 | const char *ev_val_str, *ev_scale_str, *ev_unit_str; |
230 | |
231 | if (!of_property_read_u32(np: node, propname: "events" , out_value: &handle)) |
232 | pmu_events = of_find_node_by_phandle(handle); |
233 | else |
234 | return 0; |
235 | |
236 | /* Did not find any node with a given phandle */ |
237 | if (!pmu_events) |
238 | return 0; |
239 | |
240 | /* Get a count of number of child nodes */ |
241 | ct = of_get_child_count(np: pmu_events); |
242 | |
243 | /* Get the event prefix */ |
244 | if (of_property_read_string(np: node, propname: "events-prefix" , out_string: &prefix)) { |
245 | of_node_put(node: pmu_events); |
246 | return 0; |
247 | } |
248 | |
249 | /* Get a global unit and scale data if available */ |
250 | if (of_property_read_string(np: node, propname: "scale" , out_string: &g_scale)) |
251 | g_scale = NULL; |
252 | |
253 | if (of_property_read_string(np: node, propname: "unit" , out_string: &g_unit)) |
254 | g_unit = NULL; |
255 | |
256 | /* "reg" property gives out the base offset of the counters data */ |
257 | of_property_read_u32(np: node, propname: "reg" , out_value: &base_reg); |
258 | |
259 | /* Allocate memory for the events */ |
260 | pmu->events = kcalloc(ct, sizeof(struct imc_events), GFP_KERNEL); |
261 | if (!pmu->events) { |
262 | of_node_put(node: pmu_events); |
263 | return -ENOMEM; |
264 | } |
265 | |
266 | ct = 0; |
267 | /* Parse the events and update the struct */ |
268 | for_each_child_of_node(pmu_events, np) { |
269 | ret = imc_parse_event(np, scale: g_scale, unit: g_unit, prefix, base: base_reg, event: &pmu->events[ct]); |
270 | if (!ret) |
271 | ct++; |
272 | } |
273 | |
274 | of_node_put(node: pmu_events); |
275 | |
276 | /* Allocate memory for attribute group */ |
277 | attr_group = kzalloc(size: sizeof(*attr_group), GFP_KERNEL); |
278 | if (!attr_group) { |
279 | imc_free_events(events: pmu->events, nr_entries: ct); |
280 | return -ENOMEM; |
281 | } |
282 | |
283 | /* |
284 | * Allocate memory for attributes. |
285 | * Since we have count of events for this pmu, we also allocate |
286 | * memory for the scale and unit attribute for now. |
287 | * "ct" has the total event structs added from the events-parent node. |
288 | * So allocate three times the "ct" (this includes event, event_scale and |
289 | * event_unit). |
290 | */ |
291 | attrs = kcalloc(n: ((ct * 3) + 1), size: sizeof(struct attribute *), GFP_KERNEL); |
292 | if (!attrs) { |
293 | kfree(objp: attr_group); |
294 | imc_free_events(events: pmu->events, nr_entries: ct); |
295 | return -ENOMEM; |
296 | } |
297 | |
298 | attr_group->name = "events" ; |
299 | attr_group->attrs = attrs; |
300 | do { |
301 | ev_val_str = kasprintf(GFP_KERNEL, fmt: "event=0x%x" , pmu->events[i].value); |
302 | if (!ev_val_str) |
303 | continue; |
304 | dev_str = device_str_attr_create(name: pmu->events[i].name, str: ev_val_str); |
305 | if (!dev_str) |
306 | continue; |
307 | |
308 | attrs[j++] = dev_str; |
309 | if (pmu->events[i].scale) { |
310 | ev_scale_str = kasprintf(GFP_KERNEL, fmt: "%s.scale" , pmu->events[i].name); |
311 | if (!ev_scale_str) |
312 | continue; |
313 | dev_str = device_str_attr_create(name: ev_scale_str, str: pmu->events[i].scale); |
314 | if (!dev_str) |
315 | continue; |
316 | |
317 | attrs[j++] = dev_str; |
318 | } |
319 | |
320 | if (pmu->events[i].unit) { |
321 | ev_unit_str = kasprintf(GFP_KERNEL, fmt: "%s.unit" , pmu->events[i].name); |
322 | if (!ev_unit_str) |
323 | continue; |
324 | dev_str = device_str_attr_create(name: ev_unit_str, str: pmu->events[i].unit); |
325 | if (!dev_str) |
326 | continue; |
327 | |
328 | attrs[j++] = dev_str; |
329 | } |
330 | } while (++i < ct); |
331 | |
332 | /* Save the event attribute */ |
333 | pmu->attr_groups[IMC_EVENT_ATTR] = attr_group; |
334 | |
335 | return 0; |
336 | } |
337 | |
338 | /* get_nest_pmu_ref: Return the imc_pmu_ref struct for the given node */ |
339 | static struct imc_pmu_ref *get_nest_pmu_ref(int cpu) |
340 | { |
341 | return per_cpu(local_nest_imc_refc, cpu); |
342 | } |
343 | |
344 | static void nest_change_cpu_context(int old_cpu, int new_cpu) |
345 | { |
346 | struct imc_pmu **pn = per_nest_pmu_arr; |
347 | |
348 | if (old_cpu < 0 || new_cpu < 0) |
349 | return; |
350 | |
351 | while (*pn) { |
352 | perf_pmu_migrate_context(pmu: &(*pn)->pmu, src_cpu: old_cpu, dst_cpu: new_cpu); |
353 | pn++; |
354 | } |
355 | } |
356 | |
357 | static int ppc_nest_imc_cpu_offline(unsigned int cpu) |
358 | { |
359 | int nid, target = -1; |
360 | const struct cpumask *l_cpumask; |
361 | struct imc_pmu_ref *ref; |
362 | |
363 | /* |
364 | * Check in the designated list for this cpu. Dont bother |
365 | * if not one of them. |
366 | */ |
367 | if (!cpumask_test_and_clear_cpu(cpu, cpumask: &nest_imc_cpumask)) |
368 | return 0; |
369 | |
370 | /* |
371 | * Check whether nest_imc is registered. We could end up here if the |
372 | * cpuhotplug callback registration fails. i.e, callback invokes the |
373 | * offline path for all successfully registered nodes. At this stage, |
374 | * nest_imc pmu will not be registered and we should return here. |
375 | * |
376 | * We return with a zero since this is not an offline failure. And |
377 | * cpuhp_setup_state() returns the actual failure reason to the caller, |
378 | * which in turn will call the cleanup routine. |
379 | */ |
380 | if (!nest_pmus) |
381 | return 0; |
382 | |
383 | /* |
384 | * Now that this cpu is one of the designated, |
385 | * find a next cpu a) which is online and b) in same chip. |
386 | */ |
387 | nid = cpu_to_node(cpu); |
388 | l_cpumask = cpumask_of_node(node: nid); |
389 | target = cpumask_last(srcp: l_cpumask); |
390 | |
391 | /* |
392 | * If this(target) is the last cpu in the cpumask for this chip, |
393 | * check for any possible online cpu in the chip. |
394 | */ |
395 | if (unlikely(target == cpu)) |
396 | target = cpumask_any_but(mask: l_cpumask, cpu); |
397 | |
398 | /* |
399 | * Update the cpumask with the target cpu and |
400 | * migrate the context if needed |
401 | */ |
402 | if (target >= 0 && target < nr_cpu_ids) { |
403 | cpumask_set_cpu(cpu: target, dstp: &nest_imc_cpumask); |
404 | nest_change_cpu_context(old_cpu: cpu, new_cpu: target); |
405 | } else { |
406 | opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, |
407 | get_hard_smp_processor_id(cpu)); |
408 | /* |
409 | * If this is the last cpu in this chip then, skip the reference |
410 | * count lock and make the reference count on this chip zero. |
411 | */ |
412 | ref = get_nest_pmu_ref(cpu); |
413 | if (!ref) |
414 | return -EINVAL; |
415 | |
416 | ref->refc = 0; |
417 | } |
418 | return 0; |
419 | } |
420 | |
421 | static int ppc_nest_imc_cpu_online(unsigned int cpu) |
422 | { |
423 | const struct cpumask *l_cpumask; |
424 | static struct cpumask tmp_mask; |
425 | int res; |
426 | |
427 | /* Get the cpumask of this node */ |
428 | l_cpumask = cpumask_of_node(cpu_to_node(cpu)); |
429 | |
430 | /* |
431 | * If this is not the first online CPU on this node, then |
432 | * just return. |
433 | */ |
434 | if (cpumask_and(dstp: &tmp_mask, src1p: l_cpumask, src2p: &nest_imc_cpumask)) |
435 | return 0; |
436 | |
437 | /* |
438 | * If this is the first online cpu on this node |
439 | * disable the nest counters by making an OPAL call. |
440 | */ |
441 | res = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, |
442 | get_hard_smp_processor_id(cpu)); |
443 | if (res) |
444 | return res; |
445 | |
446 | /* Make this CPU the designated target for counter collection */ |
447 | cpumask_set_cpu(cpu, dstp: &nest_imc_cpumask); |
448 | return 0; |
449 | } |
450 | |
451 | static int nest_pmu_cpumask_init(void) |
452 | { |
453 | return cpuhp_setup_state(state: CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, |
454 | name: "perf/powerpc/imc:online" , |
455 | startup: ppc_nest_imc_cpu_online, |
456 | teardown: ppc_nest_imc_cpu_offline); |
457 | } |
458 | |
459 | static void nest_imc_counters_release(struct perf_event *event) |
460 | { |
461 | int rc, node_id; |
462 | struct imc_pmu_ref *ref; |
463 | |
464 | if (event->cpu < 0) |
465 | return; |
466 | |
467 | node_id = cpu_to_node(cpu: event->cpu); |
468 | |
469 | /* |
470 | * See if we need to disable the nest PMU. |
471 | * If no events are currently in use, then we have to take a |
472 | * lock to ensure that we don't race with another task doing |
473 | * enable or disable the nest counters. |
474 | */ |
475 | ref = get_nest_pmu_ref(cpu: event->cpu); |
476 | if (!ref) |
477 | return; |
478 | |
479 | /* Take the lock for this node and then decrement the reference count */ |
480 | spin_lock(lock: &ref->lock); |
481 | if (ref->refc == 0) { |
482 | /* |
483 | * The scenario where this is true is, when perf session is |
484 | * started, followed by offlining of all cpus in a given node. |
485 | * |
486 | * In the cpuhotplug offline path, ppc_nest_imc_cpu_offline() |
487 | * function set the ref->count to zero, if the cpu which is |
488 | * about to offline is the last cpu in a given node and make |
489 | * an OPAL call to disable the engine in that node. |
490 | * |
491 | */ |
492 | spin_unlock(lock: &ref->lock); |
493 | return; |
494 | } |
495 | ref->refc--; |
496 | if (ref->refc == 0) { |
497 | rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, |
498 | get_hard_smp_processor_id(event->cpu)); |
499 | if (rc) { |
500 | spin_unlock(lock: &ref->lock); |
501 | pr_err("nest-imc: Unable to stop the counters for core %d\n" , node_id); |
502 | return; |
503 | } |
504 | } else if (ref->refc < 0) { |
505 | WARN(1, "nest-imc: Invalid event reference count\n" ); |
506 | ref->refc = 0; |
507 | } |
508 | spin_unlock(lock: &ref->lock); |
509 | } |
510 | |
511 | static int nest_imc_event_init(struct perf_event *event) |
512 | { |
513 | int chip_id, rc, node_id; |
514 | u32 l_config, config = event->attr.config; |
515 | struct imc_mem_info *pcni; |
516 | struct imc_pmu *pmu; |
517 | struct imc_pmu_ref *ref; |
518 | bool flag = false; |
519 | |
520 | if (event->attr.type != event->pmu->type) |
521 | return -ENOENT; |
522 | |
523 | /* Sampling not supported */ |
524 | if (event->hw.sample_period) |
525 | return -EINVAL; |
526 | |
527 | if (event->cpu < 0) |
528 | return -EINVAL; |
529 | |
530 | pmu = imc_event_to_pmu(event); |
531 | |
532 | /* Sanity check for config (event offset) */ |
533 | if ((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size) |
534 | return -EINVAL; |
535 | |
536 | /* |
537 | * Nest HW counter memory resides in a per-chip reserve-memory (HOMER). |
538 | * Get the base memory address for this cpu. |
539 | */ |
540 | chip_id = cpu_to_chip_id(event->cpu); |
541 | |
542 | /* Return, if chip_id is not valid */ |
543 | if (chip_id < 0) |
544 | return -ENODEV; |
545 | |
546 | pcni = pmu->mem_info; |
547 | do { |
548 | if (pcni->id == chip_id) { |
549 | flag = true; |
550 | break; |
551 | } |
552 | pcni++; |
553 | } while (pcni->vbase); |
554 | |
555 | if (!flag) |
556 | return -ENODEV; |
557 | |
558 | /* |
559 | * Add the event offset to the base address. |
560 | */ |
561 | l_config = config & IMC_EVENT_OFFSET_MASK; |
562 | event->hw.event_base = (u64)pcni->vbase + l_config; |
563 | node_id = cpu_to_node(cpu: event->cpu); |
564 | |
565 | /* |
566 | * Get the imc_pmu_ref struct for this node. |
567 | * Take the lock and then increment the count of nest pmu events inited. |
568 | */ |
569 | ref = get_nest_pmu_ref(cpu: event->cpu); |
570 | if (!ref) |
571 | return -EINVAL; |
572 | |
573 | spin_lock(lock: &ref->lock); |
574 | if (ref->refc == 0) { |
575 | rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST, |
576 | get_hard_smp_processor_id(event->cpu)); |
577 | if (rc) { |
578 | spin_unlock(lock: &ref->lock); |
579 | pr_err("nest-imc: Unable to start the counters for node %d\n" , |
580 | node_id); |
581 | return rc; |
582 | } |
583 | } |
584 | ++ref->refc; |
585 | spin_unlock(lock: &ref->lock); |
586 | |
587 | event->destroy = nest_imc_counters_release; |
588 | return 0; |
589 | } |
590 | |
591 | /* |
592 | * core_imc_mem_init : Initializes memory for the current core. |
593 | * |
594 | * Uses alloc_pages_node() and uses the returned address as an argument to |
595 | * an opal call to configure the pdbar. The address sent as an argument is |
596 | * converted to physical address before the opal call is made. This is the |
597 | * base address at which the core imc counters are populated. |
598 | */ |
599 | static int core_imc_mem_init(int cpu, int size) |
600 | { |
601 | int nid, rc = 0, core_id = (cpu / threads_per_core); |
602 | struct imc_mem_info *mem_info; |
603 | struct page *page; |
604 | |
605 | /* |
606 | * alloc_pages_node() will allocate memory for core in the |
607 | * local node only. |
608 | */ |
609 | nid = cpu_to_node(cpu); |
610 | mem_info = &core_imc_pmu->mem_info[core_id]; |
611 | mem_info->id = core_id; |
612 | |
613 | /* We need only vbase for core counters */ |
614 | page = alloc_pages_node(nid, |
615 | GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | |
616 | __GFP_NOWARN, order: get_order(size)); |
617 | if (!page) |
618 | return -ENOMEM; |
619 | mem_info->vbase = page_address(page); |
620 | |
621 | core_imc_refc[core_id].id = core_id; |
622 | spin_lock_init(&core_imc_refc[core_id].lock); |
623 | |
624 | rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE, |
625 | __pa((void *)mem_info->vbase), |
626 | get_hard_smp_processor_id(cpu)); |
627 | if (rc) { |
628 | free_pages(addr: (u64)mem_info->vbase, order: get_order(size)); |
629 | mem_info->vbase = NULL; |
630 | } |
631 | |
632 | return rc; |
633 | } |
634 | |
635 | static bool is_core_imc_mem_inited(int cpu) |
636 | { |
637 | struct imc_mem_info *mem_info; |
638 | int core_id = (cpu / threads_per_core); |
639 | |
640 | mem_info = &core_imc_pmu->mem_info[core_id]; |
641 | if (!mem_info->vbase) |
642 | return false; |
643 | |
644 | return true; |
645 | } |
646 | |
647 | static int ppc_core_imc_cpu_online(unsigned int cpu) |
648 | { |
649 | const struct cpumask *l_cpumask; |
650 | static struct cpumask tmp_mask; |
651 | int ret = 0; |
652 | |
653 | /* Get the cpumask for this core */ |
654 | l_cpumask = cpu_sibling_mask(cpu); |
655 | |
656 | /* If a cpu for this core is already set, then, don't do anything */ |
657 | if (cpumask_and(dstp: &tmp_mask, src1p: l_cpumask, src2p: &core_imc_cpumask)) |
658 | return 0; |
659 | |
660 | if (!is_core_imc_mem_inited(cpu)) { |
661 | ret = core_imc_mem_init(cpu, size: core_imc_pmu->counter_mem_size); |
662 | if (ret) { |
663 | pr_info("core_imc memory allocation for cpu %d failed\n" , cpu); |
664 | return ret; |
665 | } |
666 | } |
667 | |
668 | /* set the cpu in the mask */ |
669 | cpumask_set_cpu(cpu, dstp: &core_imc_cpumask); |
670 | return 0; |
671 | } |
672 | |
673 | static int ppc_core_imc_cpu_offline(unsigned int cpu) |
674 | { |
675 | unsigned int core_id; |
676 | int ncpu; |
677 | struct imc_pmu_ref *ref; |
678 | |
679 | /* |
680 | * clear this cpu out of the mask, if not present in the mask, |
681 | * don't bother doing anything. |
682 | */ |
683 | if (!cpumask_test_and_clear_cpu(cpu, cpumask: &core_imc_cpumask)) |
684 | return 0; |
685 | |
686 | /* |
687 | * Check whether core_imc is registered. We could end up here |
688 | * if the cpuhotplug callback registration fails. i.e, callback |
689 | * invokes the offline path for all successfully registered cpus. |
690 | * At this stage, core_imc pmu will not be registered and we |
691 | * should return here. |
692 | * |
693 | * We return with a zero since this is not an offline failure. |
694 | * And cpuhp_setup_state() returns the actual failure reason |
695 | * to the caller, which inturn will call the cleanup routine. |
696 | */ |
697 | if (!core_imc_pmu->pmu.event_init) |
698 | return 0; |
699 | |
700 | /* Find any online cpu in that core except the current "cpu" */ |
701 | ncpu = cpumask_last(srcp: cpu_sibling_mask(cpu)); |
702 | |
703 | if (unlikely(ncpu == cpu)) |
704 | ncpu = cpumask_any_but(mask: cpu_sibling_mask(cpu), cpu); |
705 | |
706 | if (ncpu >= 0 && ncpu < nr_cpu_ids) { |
707 | cpumask_set_cpu(cpu: ncpu, dstp: &core_imc_cpumask); |
708 | perf_pmu_migrate_context(pmu: &core_imc_pmu->pmu, src_cpu: cpu, dst_cpu: ncpu); |
709 | } else { |
710 | /* |
711 | * If this is the last cpu in this core then skip taking reference |
712 | * count lock for this core and directly zero "refc" for this core. |
713 | */ |
714 | opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, |
715 | get_hard_smp_processor_id(cpu)); |
716 | core_id = cpu / threads_per_core; |
717 | ref = &core_imc_refc[core_id]; |
718 | if (!ref) |
719 | return -EINVAL; |
720 | |
721 | ref->refc = 0; |
722 | /* |
723 | * Reduce the global reference count, if this is the |
724 | * last cpu in this core and core-imc event running |
725 | * in this cpu. |
726 | */ |
727 | spin_lock(lock: &imc_global_refc.lock); |
728 | if (imc_global_refc.id == IMC_DOMAIN_CORE) |
729 | imc_global_refc.refc--; |
730 | |
731 | spin_unlock(lock: &imc_global_refc.lock); |
732 | } |
733 | return 0; |
734 | } |
735 | |
736 | static int core_imc_pmu_cpumask_init(void) |
737 | { |
738 | return cpuhp_setup_state(state: CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, |
739 | name: "perf/powerpc/imc_core:online" , |
740 | startup: ppc_core_imc_cpu_online, |
741 | teardown: ppc_core_imc_cpu_offline); |
742 | } |
743 | |
744 | static void reset_global_refc(struct perf_event *event) |
745 | { |
746 | spin_lock(lock: &imc_global_refc.lock); |
747 | imc_global_refc.refc--; |
748 | |
749 | /* |
750 | * If no other thread is running any |
751 | * event for this domain(thread/core/trace), |
752 | * set the global id to zero. |
753 | */ |
754 | if (imc_global_refc.refc <= 0) { |
755 | imc_global_refc.refc = 0; |
756 | imc_global_refc.id = 0; |
757 | } |
758 | spin_unlock(lock: &imc_global_refc.lock); |
759 | } |
760 | |
761 | static void core_imc_counters_release(struct perf_event *event) |
762 | { |
763 | int rc, core_id; |
764 | struct imc_pmu_ref *ref; |
765 | |
766 | if (event->cpu < 0) |
767 | return; |
768 | /* |
769 | * See if we need to disable the IMC PMU. |
770 | * If no events are currently in use, then we have to take a |
771 | * lock to ensure that we don't race with another task doing |
772 | * enable or disable the core counters. |
773 | */ |
774 | core_id = event->cpu / threads_per_core; |
775 | |
776 | /* Take the lock and decrement the refernce count for this core */ |
777 | ref = &core_imc_refc[core_id]; |
778 | if (!ref) |
779 | return; |
780 | |
781 | spin_lock(lock: &ref->lock); |
782 | if (ref->refc == 0) { |
783 | /* |
784 | * The scenario where this is true is, when perf session is |
785 | * started, followed by offlining of all cpus in a given core. |
786 | * |
787 | * In the cpuhotplug offline path, ppc_core_imc_cpu_offline() |
788 | * function set the ref->count to zero, if the cpu which is |
789 | * about to offline is the last cpu in a given core and make |
790 | * an OPAL call to disable the engine in that core. |
791 | * |
792 | */ |
793 | spin_unlock(lock: &ref->lock); |
794 | return; |
795 | } |
796 | ref->refc--; |
797 | if (ref->refc == 0) { |
798 | rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, |
799 | get_hard_smp_processor_id(event->cpu)); |
800 | if (rc) { |
801 | spin_unlock(lock: &ref->lock); |
802 | pr_err("IMC: Unable to stop the counters for core %d\n" , core_id); |
803 | return; |
804 | } |
805 | } else if (ref->refc < 0) { |
806 | WARN(1, "core-imc: Invalid event reference count\n" ); |
807 | ref->refc = 0; |
808 | } |
809 | spin_unlock(lock: &ref->lock); |
810 | |
811 | reset_global_refc(event); |
812 | } |
813 | |
814 | static int core_imc_event_init(struct perf_event *event) |
815 | { |
816 | int core_id, rc; |
817 | u64 config = event->attr.config; |
818 | struct imc_mem_info *pcmi; |
819 | struct imc_pmu *pmu; |
820 | struct imc_pmu_ref *ref; |
821 | |
822 | if (event->attr.type != event->pmu->type) |
823 | return -ENOENT; |
824 | |
825 | /* Sampling not supported */ |
826 | if (event->hw.sample_period) |
827 | return -EINVAL; |
828 | |
829 | if (event->cpu < 0) |
830 | return -EINVAL; |
831 | |
832 | event->hw.idx = -1; |
833 | pmu = imc_event_to_pmu(event); |
834 | |
835 | /* Sanity check for config (event offset) */ |
836 | if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size)) |
837 | return -EINVAL; |
838 | |
839 | if (!is_core_imc_mem_inited(cpu: event->cpu)) |
840 | return -ENODEV; |
841 | |
842 | core_id = event->cpu / threads_per_core; |
843 | pcmi = &core_imc_pmu->mem_info[core_id]; |
844 | if ((!pcmi->vbase)) |
845 | return -ENODEV; |
846 | |
847 | ref = &core_imc_refc[core_id]; |
848 | if (!ref) |
849 | return -EINVAL; |
850 | |
851 | /* |
852 | * Core pmu units are enabled only when it is used. |
853 | * See if this is triggered for the first time. |
854 | * If yes, take the lock and enable the core counters. |
855 | * If not, just increment the count in core_imc_refc struct. |
856 | */ |
857 | spin_lock(lock: &ref->lock); |
858 | if (ref->refc == 0) { |
859 | rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, |
860 | get_hard_smp_processor_id(event->cpu)); |
861 | if (rc) { |
862 | spin_unlock(lock: &ref->lock); |
863 | pr_err("core-imc: Unable to start the counters for core %d\n" , |
864 | core_id); |
865 | return rc; |
866 | } |
867 | } |
868 | ++ref->refc; |
869 | spin_unlock(lock: &ref->lock); |
870 | |
871 | /* |
872 | * Since the system can run either in accumulation or trace-mode |
873 | * of IMC at a time, core-imc events are allowed only if no other |
874 | * trace/thread imc events are enabled/monitored. |
875 | * |
876 | * Take the global lock, and check the refc.id |
877 | * to know whether any other trace/thread imc |
878 | * events are running. |
879 | */ |
880 | spin_lock(lock: &imc_global_refc.lock); |
881 | if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) { |
882 | /* |
883 | * No other trace/thread imc events are running in |
884 | * the system, so set the refc.id to core-imc. |
885 | */ |
886 | imc_global_refc.id = IMC_DOMAIN_CORE; |
887 | imc_global_refc.refc++; |
888 | } else { |
889 | spin_unlock(lock: &imc_global_refc.lock); |
890 | return -EBUSY; |
891 | } |
892 | spin_unlock(lock: &imc_global_refc.lock); |
893 | |
894 | event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK); |
895 | event->destroy = core_imc_counters_release; |
896 | return 0; |
897 | } |
898 | |
899 | /* |
900 | * Allocates a page of memory for each of the online cpus, and load |
901 | * LDBAR with 0. |
902 | * The physical base address of the page allocated for a cpu will be |
903 | * written to the LDBAR for that cpu, when the thread-imc event |
904 | * is added. |
905 | * |
906 | * LDBAR Register Layout: |
907 | * |
908 | * 0 4 8 12 16 20 24 28 |
909 | * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | |
910 | * | | [ ] [ Counter Address [8:50] |
911 | * | * Mode | |
912 | * | * PB Scope |
913 | * * Enable/Disable |
914 | * |
915 | * 32 36 40 44 48 52 56 60 |
916 | * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | |
917 | * Counter Address [8:50] ] |
918 | * |
919 | */ |
920 | static int thread_imc_mem_alloc(int cpu_id, int size) |
921 | { |
922 | u64 *local_mem = per_cpu(thread_imc_mem, cpu_id); |
923 | int nid = cpu_to_node(cpu: cpu_id); |
924 | |
925 | if (!local_mem) { |
926 | struct page *page; |
927 | /* |
928 | * This case could happen only once at start, since we dont |
929 | * free the memory in cpu offline path. |
930 | */ |
931 | page = alloc_pages_node(nid, |
932 | GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | |
933 | __GFP_NOWARN, order: get_order(size)); |
934 | if (!page) |
935 | return -ENOMEM; |
936 | local_mem = page_address(page); |
937 | |
938 | per_cpu(thread_imc_mem, cpu_id) = local_mem; |
939 | } |
940 | |
941 | mtspr(SPRN_LDBAR, 0); |
942 | return 0; |
943 | } |
944 | |
945 | static int ppc_thread_imc_cpu_online(unsigned int cpu) |
946 | { |
947 | return thread_imc_mem_alloc(cpu_id: cpu, size: thread_imc_mem_size); |
948 | } |
949 | |
950 | static int ppc_thread_imc_cpu_offline(unsigned int cpu) |
951 | { |
952 | /* |
953 | * Set the bit 0 of LDBAR to zero. |
954 | * |
955 | * If bit 0 of LDBAR is unset, it will stop posting |
956 | * the counter data to memory. |
957 | * For thread-imc, bit 0 of LDBAR will be set to 1 in the |
958 | * event_add function. So reset this bit here, to stop the updates |
959 | * to memory in the cpu_offline path. |
960 | */ |
961 | mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); |
962 | |
963 | /* Reduce the refc if thread-imc event running on this cpu */ |
964 | spin_lock(lock: &imc_global_refc.lock); |
965 | if (imc_global_refc.id == IMC_DOMAIN_THREAD) |
966 | imc_global_refc.refc--; |
967 | spin_unlock(lock: &imc_global_refc.lock); |
968 | |
969 | return 0; |
970 | } |
971 | |
972 | static int thread_imc_cpu_init(void) |
973 | { |
974 | return cpuhp_setup_state(state: CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, |
975 | name: "perf/powerpc/imc_thread:online" , |
976 | startup: ppc_thread_imc_cpu_online, |
977 | teardown: ppc_thread_imc_cpu_offline); |
978 | } |
979 | |
980 | static int thread_imc_event_init(struct perf_event *event) |
981 | { |
982 | u32 config = event->attr.config; |
983 | struct task_struct *target; |
984 | struct imc_pmu *pmu; |
985 | |
986 | if (event->attr.type != event->pmu->type) |
987 | return -ENOENT; |
988 | |
989 | if (!perfmon_capable()) |
990 | return -EACCES; |
991 | |
992 | /* Sampling not supported */ |
993 | if (event->hw.sample_period) |
994 | return -EINVAL; |
995 | |
996 | event->hw.idx = -1; |
997 | pmu = imc_event_to_pmu(event); |
998 | |
999 | /* Sanity check for config offset */ |
1000 | if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size)) |
1001 | return -EINVAL; |
1002 | |
1003 | target = event->hw.target; |
1004 | if (!target) |
1005 | return -EINVAL; |
1006 | |
1007 | spin_lock(lock: &imc_global_refc.lock); |
1008 | /* |
1009 | * Check if any other trace/core imc events are running in the |
1010 | * system, if not set the global id to thread-imc. |
1011 | */ |
1012 | if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_THREAD) { |
1013 | imc_global_refc.id = IMC_DOMAIN_THREAD; |
1014 | imc_global_refc.refc++; |
1015 | } else { |
1016 | spin_unlock(lock: &imc_global_refc.lock); |
1017 | return -EBUSY; |
1018 | } |
1019 | spin_unlock(lock: &imc_global_refc.lock); |
1020 | |
1021 | event->pmu->task_ctx_nr = perf_sw_context; |
1022 | event->destroy = reset_global_refc; |
1023 | return 0; |
1024 | } |
1025 | |
1026 | static bool is_thread_imc_pmu(struct perf_event *event) |
1027 | { |
1028 | if (!strncmp(event->pmu->name, "thread_imc" , strlen("thread_imc" ))) |
1029 | return true; |
1030 | |
1031 | return false; |
1032 | } |
1033 | |
1034 | static __be64 *get_event_base_addr(struct perf_event *event) |
1035 | { |
1036 | u64 addr; |
1037 | |
1038 | if (is_thread_imc_pmu(event)) { |
1039 | addr = (u64)per_cpu(thread_imc_mem, smp_processor_id()); |
1040 | return (__be64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK)); |
1041 | } |
1042 | |
1043 | return (__be64 *)event->hw.event_base; |
1044 | } |
1045 | |
1046 | static void thread_imc_pmu_start_txn(struct pmu *pmu, |
1047 | unsigned int txn_flags) |
1048 | { |
1049 | if (txn_flags & ~PERF_PMU_TXN_ADD) |
1050 | return; |
1051 | perf_pmu_disable(pmu); |
1052 | } |
1053 | |
1054 | static void thread_imc_pmu_cancel_txn(struct pmu *pmu) |
1055 | { |
1056 | perf_pmu_enable(pmu); |
1057 | } |
1058 | |
1059 | static int thread_imc_pmu_commit_txn(struct pmu *pmu) |
1060 | { |
1061 | perf_pmu_enable(pmu); |
1062 | return 0; |
1063 | } |
1064 | |
1065 | static u64 imc_read_counter(struct perf_event *event) |
1066 | { |
1067 | __be64 *addr; |
1068 | u64 data; |
1069 | |
1070 | /* |
1071 | * In-Memory Collection (IMC) counters are free flowing counters. |
1072 | * So we take a snapshot of the counter value on enable and save it |
1073 | * to calculate the delta at later stage to present the event counter |
1074 | * value. |
1075 | */ |
1076 | addr = get_event_base_addr(event); |
1077 | data = be64_to_cpu(READ_ONCE(*addr)); |
1078 | local64_set(&event->hw.prev_count, data); |
1079 | |
1080 | return data; |
1081 | } |
1082 | |
1083 | static void imc_event_update(struct perf_event *event) |
1084 | { |
1085 | u64 counter_prev, counter_new, final_count; |
1086 | |
1087 | counter_prev = local64_read(&event->hw.prev_count); |
1088 | counter_new = imc_read_counter(event); |
1089 | final_count = counter_new - counter_prev; |
1090 | |
1091 | /* Update the delta to the event count */ |
1092 | local64_add(final_count, &event->count); |
1093 | } |
1094 | |
1095 | static void imc_event_start(struct perf_event *event, int flags) |
1096 | { |
1097 | /* |
1098 | * In Memory Counters are free flowing counters. HW or the microcode |
1099 | * keeps adding to the counter offset in memory. To get event |
1100 | * counter value, we snapshot the value here and we calculate |
1101 | * delta at later point. |
1102 | */ |
1103 | imc_read_counter(event); |
1104 | } |
1105 | |
1106 | static void imc_event_stop(struct perf_event *event, int flags) |
1107 | { |
1108 | /* |
1109 | * Take a snapshot and calculate the delta and update |
1110 | * the event counter values. |
1111 | */ |
1112 | imc_event_update(event); |
1113 | } |
1114 | |
1115 | static int imc_event_add(struct perf_event *event, int flags) |
1116 | { |
1117 | if (flags & PERF_EF_START) |
1118 | imc_event_start(event, flags); |
1119 | |
1120 | return 0; |
1121 | } |
1122 | |
1123 | static int thread_imc_event_add(struct perf_event *event, int flags) |
1124 | { |
1125 | int core_id; |
1126 | struct imc_pmu_ref *ref; |
1127 | u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, smp_processor_id()); |
1128 | |
1129 | if (flags & PERF_EF_START) |
1130 | imc_event_start(event, flags); |
1131 | |
1132 | if (!is_core_imc_mem_inited(smp_processor_id())) |
1133 | return -EINVAL; |
1134 | |
1135 | core_id = smp_processor_id() / threads_per_core; |
1136 | ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE; |
1137 | mtspr(SPRN_LDBAR, ldbar_value); |
1138 | |
1139 | /* |
1140 | * imc pmus are enabled only when it is used. |
1141 | * See if this is triggered for the first time. |
1142 | * If yes, take the lock and enable the counters. |
1143 | * If not, just increment the count in ref count struct. |
1144 | */ |
1145 | ref = &core_imc_refc[core_id]; |
1146 | if (!ref) |
1147 | return -EINVAL; |
1148 | |
1149 | spin_lock(lock: &ref->lock); |
1150 | if (ref->refc == 0) { |
1151 | if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, |
1152 | get_hard_smp_processor_id(smp_processor_id()))) { |
1153 | spin_unlock(lock: &ref->lock); |
1154 | pr_err("thread-imc: Unable to start the counter\ |
1155 | for core %d\n" , core_id); |
1156 | return -EINVAL; |
1157 | } |
1158 | } |
1159 | ++ref->refc; |
1160 | spin_unlock(lock: &ref->lock); |
1161 | return 0; |
1162 | } |
1163 | |
1164 | static void thread_imc_event_del(struct perf_event *event, int flags) |
1165 | { |
1166 | |
1167 | int core_id; |
1168 | struct imc_pmu_ref *ref; |
1169 | |
1170 | core_id = smp_processor_id() / threads_per_core; |
1171 | ref = &core_imc_refc[core_id]; |
1172 | if (!ref) { |
1173 | pr_debug("imc: Failed to get event reference count\n" ); |
1174 | return; |
1175 | } |
1176 | |
1177 | spin_lock(lock: &ref->lock); |
1178 | ref->refc--; |
1179 | if (ref->refc == 0) { |
1180 | if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, |
1181 | get_hard_smp_processor_id(smp_processor_id()))) { |
1182 | spin_unlock(lock: &ref->lock); |
1183 | pr_err("thread-imc: Unable to stop the counters\ |
1184 | for core %d\n" , core_id); |
1185 | return; |
1186 | } |
1187 | } else if (ref->refc < 0) { |
1188 | ref->refc = 0; |
1189 | } |
1190 | spin_unlock(lock: &ref->lock); |
1191 | |
1192 | /* Set bit 0 of LDBAR to zero, to stop posting updates to memory */ |
1193 | mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); |
1194 | |
1195 | /* |
1196 | * Take a snapshot and calculate the delta and update |
1197 | * the event counter values. |
1198 | */ |
1199 | imc_event_update(event); |
1200 | } |
1201 | |
1202 | /* |
1203 | * Allocate a page of memory for each cpu, and load LDBAR with 0. |
1204 | */ |
1205 | static int trace_imc_mem_alloc(int cpu_id, int size) |
1206 | { |
1207 | u64 *local_mem = per_cpu(trace_imc_mem, cpu_id); |
1208 | int phys_id = cpu_to_node(cpu: cpu_id), rc = 0; |
1209 | int core_id = (cpu_id / threads_per_core); |
1210 | |
1211 | if (!local_mem) { |
1212 | struct page *page; |
1213 | |
1214 | page = alloc_pages_node(nid: phys_id, |
1215 | GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | |
1216 | __GFP_NOWARN, order: get_order(size)); |
1217 | if (!page) |
1218 | return -ENOMEM; |
1219 | local_mem = page_address(page); |
1220 | per_cpu(trace_imc_mem, cpu_id) = local_mem; |
1221 | |
1222 | /* Initialise the counters for trace mode */ |
1223 | rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_TRACE, __pa((void *)local_mem), |
1224 | get_hard_smp_processor_id(cpu_id)); |
1225 | if (rc) { |
1226 | pr_info("IMC:opal init failed for trace imc\n" ); |
1227 | return rc; |
1228 | } |
1229 | } |
1230 | |
1231 | trace_imc_refc[core_id].id = core_id; |
1232 | spin_lock_init(&trace_imc_refc[core_id].lock); |
1233 | |
1234 | mtspr(SPRN_LDBAR, 0); |
1235 | return 0; |
1236 | } |
1237 | |
1238 | static int ppc_trace_imc_cpu_online(unsigned int cpu) |
1239 | { |
1240 | return trace_imc_mem_alloc(cpu_id: cpu, size: trace_imc_mem_size); |
1241 | } |
1242 | |
1243 | static int ppc_trace_imc_cpu_offline(unsigned int cpu) |
1244 | { |
1245 | /* |
1246 | * No need to set bit 0 of LDBAR to zero, as |
1247 | * it is set to zero for imc trace-mode |
1248 | * |
1249 | * Reduce the refc if any trace-imc event running |
1250 | * on this cpu. |
1251 | */ |
1252 | spin_lock(lock: &imc_global_refc.lock); |
1253 | if (imc_global_refc.id == IMC_DOMAIN_TRACE) |
1254 | imc_global_refc.refc--; |
1255 | spin_unlock(lock: &imc_global_refc.lock); |
1256 | |
1257 | return 0; |
1258 | } |
1259 | |
1260 | static int trace_imc_cpu_init(void) |
1261 | { |
1262 | return cpuhp_setup_state(state: CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE, |
1263 | name: "perf/powerpc/imc_trace:online" , |
1264 | startup: ppc_trace_imc_cpu_online, |
1265 | teardown: ppc_trace_imc_cpu_offline); |
1266 | } |
1267 | |
1268 | static u64 get_trace_imc_event_base_addr(void) |
1269 | { |
1270 | return (u64)per_cpu(trace_imc_mem, smp_processor_id()); |
1271 | } |
1272 | |
1273 | /* |
1274 | * Function to parse trace-imc data obtained |
1275 | * and to prepare the perf sample. |
1276 | */ |
1277 | static int trace_imc_prepare_sample(struct trace_imc_data *mem, |
1278 | struct perf_sample_data *data, |
1279 | u64 *prev_tb, |
1280 | struct perf_event_header *, |
1281 | struct perf_event *event) |
1282 | { |
1283 | /* Sanity checks for a valid record */ |
1284 | if (be64_to_cpu(READ_ONCE(mem->tb1)) > *prev_tb) |
1285 | *prev_tb = be64_to_cpu(READ_ONCE(mem->tb1)); |
1286 | else |
1287 | return -EINVAL; |
1288 | |
1289 | if ((be64_to_cpu(READ_ONCE(mem->tb1)) & IMC_TRACE_RECORD_TB1_MASK) != |
1290 | be64_to_cpu(READ_ONCE(mem->tb2))) |
1291 | return -EINVAL; |
1292 | |
1293 | /* Prepare perf sample */ |
1294 | data->ip = be64_to_cpu(READ_ONCE(mem->ip)); |
1295 | data->period = event->hw.last_period; |
1296 | |
1297 | header->type = PERF_RECORD_SAMPLE; |
1298 | header->size = sizeof(*header) + event->header_size; |
1299 | header->misc = 0; |
1300 | |
1301 | if (cpu_has_feature(CPU_FTR_ARCH_31)) { |
1302 | switch (IMC_TRACE_RECORD_VAL_HVPR(be64_to_cpu(READ_ONCE(mem->val)))) { |
1303 | case 0:/* when MSR HV and PR not set in the trace-record */ |
1304 | header->misc |= PERF_RECORD_MISC_GUEST_KERNEL; |
1305 | break; |
1306 | case 1: /* MSR HV is 0 and PR is 1 */ |
1307 | header->misc |= PERF_RECORD_MISC_GUEST_USER; |
1308 | break; |
1309 | case 2: /* MSR HV is 1 and PR is 0 */ |
1310 | header->misc |= PERF_RECORD_MISC_KERNEL; |
1311 | break; |
1312 | case 3: /* MSR HV is 1 and PR is 1 */ |
1313 | header->misc |= PERF_RECORD_MISC_USER; |
1314 | break; |
1315 | default: |
1316 | pr_info("IMC: Unable to set the flag based on MSR bits\n" ); |
1317 | break; |
1318 | } |
1319 | } else { |
1320 | if (is_kernel_addr(data->ip)) |
1321 | header->misc |= PERF_RECORD_MISC_KERNEL; |
1322 | else |
1323 | header->misc |= PERF_RECORD_MISC_USER; |
1324 | } |
1325 | perf_event_header__init_id(header, data, event); |
1326 | |
1327 | return 0; |
1328 | } |
1329 | |
1330 | static void dump_trace_imc_data(struct perf_event *event) |
1331 | { |
1332 | struct trace_imc_data *mem; |
1333 | int i, ret; |
1334 | u64 prev_tb = 0; |
1335 | |
1336 | mem = (struct trace_imc_data *)get_trace_imc_event_base_addr(); |
1337 | for (i = 0; i < (trace_imc_mem_size / sizeof(struct trace_imc_data)); |
1338 | i++, mem++) { |
1339 | struct perf_sample_data data; |
1340 | struct perf_event_header header; |
1341 | |
1342 | ret = trace_imc_prepare_sample(mem, &data, &prev_tb, &header, event); |
1343 | if (ret) /* Exit, if not a valid record */ |
1344 | break; |
1345 | else { |
1346 | /* If this is a valid record, create the sample */ |
1347 | struct perf_output_handle handle; |
1348 | |
1349 | if (perf_output_begin(&handle, &data, event, header.size)) |
1350 | return; |
1351 | |
1352 | perf_output_sample(&handle, &header, &data, event); |
1353 | perf_output_end(&handle); |
1354 | } |
1355 | } |
1356 | } |
1357 | |
1358 | static int trace_imc_event_add(struct perf_event *event, int flags) |
1359 | { |
1360 | int core_id = smp_processor_id() / threads_per_core; |
1361 | struct imc_pmu_ref *ref = NULL; |
1362 | u64 local_mem, ldbar_value; |
1363 | |
1364 | /* Set trace-imc bit in ldbar and load ldbar with per-thread memory address */ |
1365 | local_mem = get_trace_imc_event_base_addr(); |
1366 | ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE; |
1367 | |
1368 | /* trace-imc reference count */ |
1369 | if (trace_imc_refc) |
1370 | ref = &trace_imc_refc[core_id]; |
1371 | if (!ref) { |
1372 | pr_debug("imc: Failed to get the event reference count\n" ); |
1373 | return -EINVAL; |
1374 | } |
1375 | |
1376 | mtspr(SPRN_LDBAR, ldbar_value); |
1377 | spin_lock(lock: &ref->lock); |
1378 | if (ref->refc == 0) { |
1379 | if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE, |
1380 | get_hard_smp_processor_id(smp_processor_id()))) { |
1381 | spin_unlock(lock: &ref->lock); |
1382 | pr_err("trace-imc: Unable to start the counters for core %d\n" , core_id); |
1383 | return -EINVAL; |
1384 | } |
1385 | } |
1386 | ++ref->refc; |
1387 | spin_unlock(lock: &ref->lock); |
1388 | return 0; |
1389 | } |
1390 | |
1391 | static void trace_imc_event_read(struct perf_event *event) |
1392 | { |
1393 | return; |
1394 | } |
1395 | |
1396 | static void trace_imc_event_stop(struct perf_event *event, int flags) |
1397 | { |
1398 | u64 local_mem = get_trace_imc_event_base_addr(); |
1399 | dump_trace_imc_data(event); |
1400 | memset((void *)local_mem, 0, sizeof(u64)); |
1401 | } |
1402 | |
1403 | static void trace_imc_event_start(struct perf_event *event, int flags) |
1404 | { |
1405 | return; |
1406 | } |
1407 | |
1408 | static void trace_imc_event_del(struct perf_event *event, int flags) |
1409 | { |
1410 | int core_id = smp_processor_id() / threads_per_core; |
1411 | struct imc_pmu_ref *ref = NULL; |
1412 | |
1413 | if (trace_imc_refc) |
1414 | ref = &trace_imc_refc[core_id]; |
1415 | if (!ref) { |
1416 | pr_debug("imc: Failed to get event reference count\n" ); |
1417 | return; |
1418 | } |
1419 | |
1420 | spin_lock(lock: &ref->lock); |
1421 | ref->refc--; |
1422 | if (ref->refc == 0) { |
1423 | if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE, |
1424 | get_hard_smp_processor_id(smp_processor_id()))) { |
1425 | spin_unlock(lock: &ref->lock); |
1426 | pr_err("trace-imc: Unable to stop the counters for core %d\n" , core_id); |
1427 | return; |
1428 | } |
1429 | } else if (ref->refc < 0) { |
1430 | ref->refc = 0; |
1431 | } |
1432 | spin_unlock(lock: &ref->lock); |
1433 | |
1434 | trace_imc_event_stop(event, flags); |
1435 | } |
1436 | |
1437 | static int trace_imc_event_init(struct perf_event *event) |
1438 | { |
1439 | if (event->attr.type != event->pmu->type) |
1440 | return -ENOENT; |
1441 | |
1442 | if (!perfmon_capable()) |
1443 | return -EACCES; |
1444 | |
1445 | /* Return if this is a couting event */ |
1446 | if (event->attr.sample_period == 0) |
1447 | return -ENOENT; |
1448 | |
1449 | /* |
1450 | * Take the global lock, and make sure |
1451 | * no other thread is running any core/thread imc |
1452 | * events |
1453 | */ |
1454 | spin_lock(lock: &imc_global_refc.lock); |
1455 | if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) { |
1456 | /* |
1457 | * No core/thread imc events are running in the |
1458 | * system, so set the refc.id to trace-imc. |
1459 | */ |
1460 | imc_global_refc.id = IMC_DOMAIN_TRACE; |
1461 | imc_global_refc.refc++; |
1462 | } else { |
1463 | spin_unlock(lock: &imc_global_refc.lock); |
1464 | return -EBUSY; |
1465 | } |
1466 | spin_unlock(lock: &imc_global_refc.lock); |
1467 | |
1468 | event->hw.idx = -1; |
1469 | |
1470 | /* |
1471 | * There can only be a single PMU for perf_hw_context events which is assigned to |
1472 | * core PMU. Hence use "perf_sw_context" for trace_imc. |
1473 | */ |
1474 | event->pmu->task_ctx_nr = perf_sw_context; |
1475 | event->destroy = reset_global_refc; |
1476 | return 0; |
1477 | } |
1478 | |
1479 | /* update_pmu_ops : Populate the appropriate operations for "pmu" */ |
1480 | static int update_pmu_ops(struct imc_pmu *pmu) |
1481 | { |
1482 | pmu->pmu.task_ctx_nr = perf_invalid_context; |
1483 | pmu->pmu.add = imc_event_add; |
1484 | pmu->pmu.del = imc_event_stop; |
1485 | pmu->pmu.start = imc_event_start; |
1486 | pmu->pmu.stop = imc_event_stop; |
1487 | pmu->pmu.read = imc_event_update; |
1488 | pmu->pmu.attr_groups = pmu->attr_groups; |
1489 | pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; |
1490 | pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group; |
1491 | |
1492 | switch (pmu->domain) { |
1493 | case IMC_DOMAIN_NEST: |
1494 | pmu->pmu.event_init = nest_imc_event_init; |
1495 | pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group; |
1496 | break; |
1497 | case IMC_DOMAIN_CORE: |
1498 | pmu->pmu.event_init = core_imc_event_init; |
1499 | pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group; |
1500 | break; |
1501 | case IMC_DOMAIN_THREAD: |
1502 | pmu->pmu.event_init = thread_imc_event_init; |
1503 | pmu->pmu.add = thread_imc_event_add; |
1504 | pmu->pmu.del = thread_imc_event_del; |
1505 | pmu->pmu.start_txn = thread_imc_pmu_start_txn; |
1506 | pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn; |
1507 | pmu->pmu.commit_txn = thread_imc_pmu_commit_txn; |
1508 | break; |
1509 | case IMC_DOMAIN_TRACE: |
1510 | pmu->pmu.event_init = trace_imc_event_init; |
1511 | pmu->pmu.add = trace_imc_event_add; |
1512 | pmu->pmu.del = trace_imc_event_del; |
1513 | pmu->pmu.start = trace_imc_event_start; |
1514 | pmu->pmu.stop = trace_imc_event_stop; |
1515 | pmu->pmu.read = trace_imc_event_read; |
1516 | pmu->attr_groups[IMC_FORMAT_ATTR] = &trace_imc_format_group; |
1517 | break; |
1518 | default: |
1519 | break; |
1520 | } |
1521 | |
1522 | return 0; |
1523 | } |
1524 | |
1525 | /* init_nest_pmu_ref: Initialize the imc_pmu_ref struct for all the nodes */ |
1526 | static int init_nest_pmu_ref(void) |
1527 | { |
1528 | int nid, i, cpu; |
1529 | |
1530 | nest_imc_refc = kcalloc(num_possible_nodes(), sizeof(*nest_imc_refc), |
1531 | GFP_KERNEL); |
1532 | |
1533 | if (!nest_imc_refc) |
1534 | return -ENOMEM; |
1535 | |
1536 | i = 0; |
1537 | for_each_node(nid) { |
1538 | /* |
1539 | * Take the lock to avoid races while tracking the number of |
1540 | * sessions using the chip's nest pmu units. |
1541 | */ |
1542 | spin_lock_init(&nest_imc_refc[i].lock); |
1543 | |
1544 | /* |
1545 | * Loop to init the "id" with the node_id. Variable "i" initialized to |
1546 | * 0 and will be used as index to the array. "i" will not go off the |
1547 | * end of the array since the "for_each_node" loops for "N_POSSIBLE" |
1548 | * nodes only. |
1549 | */ |
1550 | nest_imc_refc[i++].id = nid; |
1551 | } |
1552 | |
1553 | /* |
1554 | * Loop to init the per_cpu "local_nest_imc_refc" with the proper |
1555 | * "nest_imc_refc" index. This makes get_nest_pmu_ref() alot simple. |
1556 | */ |
1557 | for_each_possible_cpu(cpu) { |
1558 | nid = cpu_to_node(cpu); |
1559 | for (i = 0; i < num_possible_nodes(); i++) { |
1560 | if (nest_imc_refc[i].id == nid) { |
1561 | per_cpu(local_nest_imc_refc, cpu) = &nest_imc_refc[i]; |
1562 | break; |
1563 | } |
1564 | } |
1565 | } |
1566 | return 0; |
1567 | } |
1568 | |
1569 | static void cleanup_all_core_imc_memory(void) |
1570 | { |
1571 | int i, nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core); |
1572 | struct imc_mem_info *ptr = core_imc_pmu->mem_info; |
1573 | int size = core_imc_pmu->counter_mem_size; |
1574 | |
1575 | /* mem_info will never be NULL */ |
1576 | for (i = 0; i < nr_cores; i++) { |
1577 | if (ptr[i].vbase) |
1578 | free_pages((u64)ptr[i].vbase, get_order(size)); |
1579 | } |
1580 | |
1581 | kfree(objp: ptr); |
1582 | kfree(objp: core_imc_refc); |
1583 | } |
1584 | |
1585 | static void thread_imc_ldbar_disable(void *dummy) |
1586 | { |
1587 | /* |
1588 | * By setting 0th bit of LDBAR to zero, we disable thread-imc |
1589 | * updates to memory. |
1590 | */ |
1591 | mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); |
1592 | } |
1593 | |
1594 | void thread_imc_disable(void) |
1595 | { |
1596 | on_each_cpu(func: thread_imc_ldbar_disable, NULL, wait: 1); |
1597 | } |
1598 | |
1599 | static void cleanup_all_thread_imc_memory(void) |
1600 | { |
1601 | int i, order = get_order(size: thread_imc_mem_size); |
1602 | |
1603 | for_each_online_cpu(i) { |
1604 | if (per_cpu(thread_imc_mem, i)) |
1605 | free_pages(addr: (u64)per_cpu(thread_imc_mem, i), order); |
1606 | |
1607 | } |
1608 | } |
1609 | |
1610 | static void cleanup_all_trace_imc_memory(void) |
1611 | { |
1612 | int i, order = get_order(size: trace_imc_mem_size); |
1613 | |
1614 | for_each_online_cpu(i) { |
1615 | if (per_cpu(trace_imc_mem, i)) |
1616 | free_pages(addr: (u64)per_cpu(trace_imc_mem, i), order); |
1617 | |
1618 | } |
1619 | kfree(objp: trace_imc_refc); |
1620 | } |
1621 | |
1622 | /* Function to free the attr_groups which are dynamically allocated */ |
1623 | static void imc_common_mem_free(struct imc_pmu *pmu_ptr) |
1624 | { |
1625 | if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]) |
1626 | kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); |
1627 | kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); |
1628 | } |
1629 | |
1630 | /* |
1631 | * Common function to unregister cpu hotplug callback and |
1632 | * free the memory. |
1633 | * TODO: Need to handle pmu unregistering, which will be |
1634 | * done in followup series. |
1635 | */ |
1636 | static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) |
1637 | { |
1638 | if (pmu_ptr->domain == IMC_DOMAIN_NEST) { |
1639 | mutex_lock(&nest_init_lock); |
1640 | if (nest_pmus == 1) { |
1641 | cpuhp_remove_state(state: CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); |
1642 | kfree(objp: nest_imc_refc); |
1643 | kfree(objp: per_nest_pmu_arr); |
1644 | per_nest_pmu_arr = NULL; |
1645 | } |
1646 | |
1647 | if (nest_pmus > 0) |
1648 | nest_pmus--; |
1649 | mutex_unlock(lock: &nest_init_lock); |
1650 | } |
1651 | |
1652 | /* Free core_imc memory */ |
1653 | if (pmu_ptr->domain == IMC_DOMAIN_CORE) { |
1654 | cpuhp_remove_state(state: CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE); |
1655 | cleanup_all_core_imc_memory(); |
1656 | } |
1657 | |
1658 | /* Free thread_imc memory */ |
1659 | if (pmu_ptr->domain == IMC_DOMAIN_THREAD) { |
1660 | cpuhp_remove_state(state: CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE); |
1661 | cleanup_all_thread_imc_memory(); |
1662 | } |
1663 | |
1664 | if (pmu_ptr->domain == IMC_DOMAIN_TRACE) { |
1665 | cpuhp_remove_state(state: CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE); |
1666 | cleanup_all_trace_imc_memory(); |
1667 | } |
1668 | } |
1669 | |
1670 | /* |
1671 | * Function to unregister thread-imc if core-imc |
1672 | * is not registered. |
1673 | */ |
1674 | void unregister_thread_imc(void) |
1675 | { |
1676 | imc_common_cpuhp_mem_free(pmu_ptr: thread_imc_pmu); |
1677 | imc_common_mem_free(pmu_ptr: thread_imc_pmu); |
1678 | perf_pmu_unregister(pmu: &thread_imc_pmu->pmu); |
1679 | } |
1680 | |
1681 | /* |
1682 | * imc_mem_init : Function to support memory allocation for core imc. |
1683 | */ |
1684 | static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent, |
1685 | int pmu_index) |
1686 | { |
1687 | const char *s; |
1688 | int nr_cores, cpu, res = -ENOMEM; |
1689 | |
1690 | if (of_property_read_string(np: parent, propname: "name" , out_string: &s)) |
1691 | return -ENODEV; |
1692 | |
1693 | switch (pmu_ptr->domain) { |
1694 | case IMC_DOMAIN_NEST: |
1695 | /* Update the pmu name */ |
1696 | pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, fmt: "%s%s_imc" , "nest_" , s); |
1697 | if (!pmu_ptr->pmu.name) |
1698 | goto err; |
1699 | |
1700 | /* Needed for hotplug/migration */ |
1701 | if (!per_nest_pmu_arr) { |
1702 | per_nest_pmu_arr = kcalloc(n: get_max_nest_dev() + 1, |
1703 | size: sizeof(struct imc_pmu *), |
1704 | GFP_KERNEL); |
1705 | if (!per_nest_pmu_arr) |
1706 | goto err; |
1707 | } |
1708 | per_nest_pmu_arr[pmu_index] = pmu_ptr; |
1709 | break; |
1710 | case IMC_DOMAIN_CORE: |
1711 | /* Update the pmu name */ |
1712 | pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, fmt: "%s%s" , s, "_imc" ); |
1713 | if (!pmu_ptr->pmu.name) |
1714 | goto err; |
1715 | |
1716 | nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core); |
1717 | pmu_ptr->mem_info = kcalloc(nr_cores, sizeof(struct imc_mem_info), |
1718 | GFP_KERNEL); |
1719 | |
1720 | if (!pmu_ptr->mem_info) |
1721 | goto err; |
1722 | |
1723 | core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref), |
1724 | GFP_KERNEL); |
1725 | |
1726 | if (!core_imc_refc) { |
1727 | kfree(objp: pmu_ptr->mem_info); |
1728 | goto err; |
1729 | } |
1730 | |
1731 | core_imc_pmu = pmu_ptr; |
1732 | break; |
1733 | case IMC_DOMAIN_THREAD: |
1734 | /* Update the pmu name */ |
1735 | pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, fmt: "%s%s" , s, "_imc" ); |
1736 | if (!pmu_ptr->pmu.name) |
1737 | goto err; |
1738 | |
1739 | thread_imc_mem_size = pmu_ptr->counter_mem_size; |
1740 | for_each_online_cpu(cpu) { |
1741 | res = thread_imc_mem_alloc(cpu_id: cpu, size: pmu_ptr->counter_mem_size); |
1742 | if (res) { |
1743 | cleanup_all_thread_imc_memory(); |
1744 | goto err; |
1745 | } |
1746 | } |
1747 | |
1748 | thread_imc_pmu = pmu_ptr; |
1749 | break; |
1750 | case IMC_DOMAIN_TRACE: |
1751 | /* Update the pmu name */ |
1752 | pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, fmt: "%s%s" , s, "_imc" ); |
1753 | if (!pmu_ptr->pmu.name) |
1754 | return -ENOMEM; |
1755 | |
1756 | nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core); |
1757 | trace_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref), |
1758 | GFP_KERNEL); |
1759 | if (!trace_imc_refc) |
1760 | return -ENOMEM; |
1761 | |
1762 | trace_imc_mem_size = pmu_ptr->counter_mem_size; |
1763 | for_each_online_cpu(cpu) { |
1764 | res = trace_imc_mem_alloc(cpu_id: cpu, size: trace_imc_mem_size); |
1765 | if (res) { |
1766 | cleanup_all_trace_imc_memory(); |
1767 | goto err; |
1768 | } |
1769 | } |
1770 | break; |
1771 | default: |
1772 | return -EINVAL; |
1773 | } |
1774 | |
1775 | return 0; |
1776 | err: |
1777 | return res; |
1778 | } |
1779 | |
1780 | /* |
1781 | * init_imc_pmu : Setup and register the IMC pmu device. |
1782 | * |
1783 | * @parent: Device tree unit node |
1784 | * @pmu_ptr: memory allocated for this pmu |
1785 | * @pmu_idx: Count of nest pmc registered |
1786 | * |
1787 | * init_imc_pmu() setup pmu cpumask and registers for a cpu hotplug callback. |
1788 | * Handles failure cases and accordingly frees memory. |
1789 | */ |
1790 | int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_idx) |
1791 | { |
1792 | int ret; |
1793 | |
1794 | ret = imc_mem_init(pmu_ptr, parent, pmu_index: pmu_idx); |
1795 | if (ret) |
1796 | goto err_free_mem; |
1797 | |
1798 | switch (pmu_ptr->domain) { |
1799 | case IMC_DOMAIN_NEST: |
1800 | /* |
1801 | * Nest imc pmu need only one cpu per chip, we initialize the |
1802 | * cpumask for the first nest imc pmu and use the same for the |
1803 | * rest. To handle the cpuhotplug callback unregister, we track |
1804 | * the number of nest pmus in "nest_pmus". |
1805 | */ |
1806 | mutex_lock(&nest_init_lock); |
1807 | if (nest_pmus == 0) { |
1808 | ret = init_nest_pmu_ref(); |
1809 | if (ret) { |
1810 | mutex_unlock(lock: &nest_init_lock); |
1811 | kfree(objp: per_nest_pmu_arr); |
1812 | per_nest_pmu_arr = NULL; |
1813 | goto err_free_mem; |
1814 | } |
1815 | /* Register for cpu hotplug notification. */ |
1816 | ret = nest_pmu_cpumask_init(); |
1817 | if (ret) { |
1818 | mutex_unlock(lock: &nest_init_lock); |
1819 | kfree(objp: nest_imc_refc); |
1820 | kfree(objp: per_nest_pmu_arr); |
1821 | per_nest_pmu_arr = NULL; |
1822 | goto err_free_mem; |
1823 | } |
1824 | } |
1825 | nest_pmus++; |
1826 | mutex_unlock(lock: &nest_init_lock); |
1827 | break; |
1828 | case IMC_DOMAIN_CORE: |
1829 | ret = core_imc_pmu_cpumask_init(); |
1830 | if (ret) { |
1831 | cleanup_all_core_imc_memory(); |
1832 | goto err_free_mem; |
1833 | } |
1834 | |
1835 | break; |
1836 | case IMC_DOMAIN_THREAD: |
1837 | ret = thread_imc_cpu_init(); |
1838 | if (ret) { |
1839 | cleanup_all_thread_imc_memory(); |
1840 | goto err_free_mem; |
1841 | } |
1842 | |
1843 | break; |
1844 | case IMC_DOMAIN_TRACE: |
1845 | ret = trace_imc_cpu_init(); |
1846 | if (ret) { |
1847 | cleanup_all_trace_imc_memory(); |
1848 | goto err_free_mem; |
1849 | } |
1850 | |
1851 | break; |
1852 | default: |
1853 | return -EINVAL; /* Unknown domain */ |
1854 | } |
1855 | |
1856 | ret = update_events_in_group(node: parent, pmu: pmu_ptr); |
1857 | if (ret) |
1858 | goto err_free_cpuhp_mem; |
1859 | |
1860 | ret = update_pmu_ops(pmu: pmu_ptr); |
1861 | if (ret) |
1862 | goto err_free_cpuhp_mem; |
1863 | |
1864 | ret = perf_pmu_register(pmu: &pmu_ptr->pmu, name: pmu_ptr->pmu.name, type: -1); |
1865 | if (ret) |
1866 | goto err_free_cpuhp_mem; |
1867 | |
1868 | pr_debug("%s performance monitor hardware support registered\n" , |
1869 | pmu_ptr->pmu.name); |
1870 | |
1871 | return 0; |
1872 | |
1873 | err_free_cpuhp_mem: |
1874 | imc_common_cpuhp_mem_free(pmu_ptr); |
1875 | err_free_mem: |
1876 | imc_common_mem_free(pmu_ptr); |
1877 | return ret; |
1878 | } |
1879 | |