1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2013 Advanced Micro Devices, Inc.
4 *
5 * Author: Jacob Shin <jacob.shin@amd.com>
6 */
7
8#include <linux/perf_event.h>
9#include <linux/percpu.h>
10#include <linux/types.h>
11#include <linux/slab.h>
12#include <linux/init.h>
13#include <linux/cpu.h>
14#include <linux/cpumask.h>
15#include <linux/cpufeature.h>
16#include <linux/smp.h>
17
18#include <asm/perf_event.h>
19#include <asm/msr.h>
20
21#define NUM_COUNTERS_NB 4
22#define NUM_COUNTERS_L2 4
23#define NUM_COUNTERS_L3 6
24
25#define RDPMC_BASE_NB 6
26#define RDPMC_BASE_LLC 10
27
28#define COUNTER_SHIFT 16
29#define UNCORE_NAME_LEN 16
30#define UNCORE_GROUP_MAX 256
31
32#undef pr_fmt
33#define pr_fmt(fmt) "amd_uncore: " fmt
34
35static int pmu_version;
36
37struct amd_uncore_ctx {
38 int refcnt;
39 int cpu;
40 struct perf_event **events;
41 struct hlist_node node;
42};
43
44struct amd_uncore_pmu {
45 char name[UNCORE_NAME_LEN];
46 int num_counters;
47 int rdpmc_base;
48 u32 msr_base;
49 int group;
50 cpumask_t active_mask;
51 struct pmu pmu;
52 struct amd_uncore_ctx * __percpu *ctx;
53};
54
55enum {
56 UNCORE_TYPE_DF,
57 UNCORE_TYPE_L3,
58 UNCORE_TYPE_UMC,
59
60 UNCORE_TYPE_MAX
61};
62
63union amd_uncore_info {
64 struct {
65 u64 aux_data:32; /* auxiliary data */
66 u64 num_pmcs:8; /* number of counters */
67 u64 gid:8; /* group id */
68 u64 cid:8; /* context id */
69 } split;
70 u64 full;
71};
72
73struct amd_uncore {
74 union amd_uncore_info __percpu *info;
75 struct amd_uncore_pmu *pmus;
76 unsigned int num_pmus;
77 bool init_done;
78 void (*scan)(struct amd_uncore *uncore, unsigned int cpu);
79 int (*init)(struct amd_uncore *uncore, unsigned int cpu);
80 void (*move)(struct amd_uncore *uncore, unsigned int cpu);
81 void (*free)(struct amd_uncore *uncore, unsigned int cpu);
82};
83
84static struct amd_uncore uncores[UNCORE_TYPE_MAX];
85
86static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
87{
88 return container_of(event->pmu, struct amd_uncore_pmu, pmu);
89}
90
91static void amd_uncore_read(struct perf_event *event)
92{
93 struct hw_perf_event *hwc = &event->hw;
94 u64 prev, new;
95 s64 delta;
96
97 /*
98 * since we do not enable counter overflow interrupts,
99 * we do not have to worry about prev_count changing on us
100 */
101
102 prev = local64_read(&hwc->prev_count);
103
104 /*
105 * Some uncore PMUs do not have RDPMC assignments. In such cases,
106 * read counts directly from the corresponding PERF_CTR.
107 */
108 if (hwc->event_base_rdpmc < 0)
109 rdmsrl(hwc->event_base, new);
110 else
111 rdpmcl(hwc->event_base_rdpmc, new);
112
113 local64_set(&hwc->prev_count, new);
114 delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
115 delta >>= COUNTER_SHIFT;
116 local64_add(delta, &event->count);
117}
118
119static void amd_uncore_start(struct perf_event *event, int flags)
120{
121 struct hw_perf_event *hwc = &event->hw;
122
123 if (flags & PERF_EF_RELOAD)
124 wrmsrl(msr: hwc->event_base, val: (u64)local64_read(&hwc->prev_count));
125
126 hwc->state = 0;
127 wrmsrl(msr: hwc->config_base, val: (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
128 perf_event_update_userpage(event);
129}
130
131static void amd_uncore_stop(struct perf_event *event, int flags)
132{
133 struct hw_perf_event *hwc = &event->hw;
134
135 wrmsrl(msr: hwc->config_base, val: hwc->config);
136 hwc->state |= PERF_HES_STOPPED;
137
138 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
139 event->pmu->read(event);
140 hwc->state |= PERF_HES_UPTODATE;
141 }
142}
143
144static int amd_uncore_add(struct perf_event *event, int flags)
145{
146 int i;
147 struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
148 struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
149 struct hw_perf_event *hwc = &event->hw;
150
151 /* are we already assigned? */
152 if (hwc->idx != -1 && ctx->events[hwc->idx] == event)
153 goto out;
154
155 for (i = 0; i < pmu->num_counters; i++) {
156 if (ctx->events[i] == event) {
157 hwc->idx = i;
158 goto out;
159 }
160 }
161
162 /* if not, take the first available counter */
163 hwc->idx = -1;
164 for (i = 0; i < pmu->num_counters; i++) {
165 if (cmpxchg(&ctx->events[i], NULL, event) == NULL) {
166 hwc->idx = i;
167 break;
168 }
169 }
170
171out:
172 if (hwc->idx == -1)
173 return -EBUSY;
174
175 hwc->config_base = pmu->msr_base + (2 * hwc->idx);
176 hwc->event_base = pmu->msr_base + 1 + (2 * hwc->idx);
177 hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx;
178 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
179
180 if (pmu->rdpmc_base < 0)
181 hwc->event_base_rdpmc = -1;
182
183 if (flags & PERF_EF_START)
184 event->pmu->start(event, PERF_EF_RELOAD);
185
186 return 0;
187}
188
189static void amd_uncore_del(struct perf_event *event, int flags)
190{
191 int i;
192 struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
193 struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
194 struct hw_perf_event *hwc = &event->hw;
195
196 event->pmu->stop(event, PERF_EF_UPDATE);
197
198 for (i = 0; i < pmu->num_counters; i++) {
199 if (cmpxchg(&ctx->events[i], event, NULL) == event)
200 break;
201 }
202
203 hwc->idx = -1;
204}
205
206static int amd_uncore_event_init(struct perf_event *event)
207{
208 struct amd_uncore_pmu *pmu;
209 struct amd_uncore_ctx *ctx;
210 struct hw_perf_event *hwc = &event->hw;
211
212 if (event->attr.type != event->pmu->type)
213 return -ENOENT;
214
215 if (event->cpu < 0)
216 return -EINVAL;
217
218 pmu = event_to_amd_uncore_pmu(event);
219 ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
220 if (!ctx)
221 return -ENODEV;
222
223 /*
224 * NB and Last level cache counters (MSRs) are shared across all cores
225 * that share the same NB / Last level cache. On family 16h and below,
226 * Interrupts can be directed to a single target core, however, event
227 * counts generated by processes running on other cores cannot be masked
228 * out. So we do not support sampling and per-thread events via
229 * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
230 */
231 hwc->config = event->attr.config;
232 hwc->idx = -1;
233
234 /*
235 * since request can come in to any of the shared cores, we will remap
236 * to a single common cpu.
237 */
238 event->cpu = ctx->cpu;
239
240 return 0;
241}
242
243static umode_t
244amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
245{
246 return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ?
247 attr->mode : 0;
248}
249
250static umode_t
251amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
252{
253 return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0;
254}
255
256static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
257 struct device_attribute *attr,
258 char *buf)
259{
260 struct pmu *ptr = dev_get_drvdata(dev);
261 struct amd_uncore_pmu *pmu = container_of(ptr, struct amd_uncore_pmu, pmu);
262
263 return cpumap_print_to_pagebuf(list: true, buf, mask: &pmu->active_mask);
264}
265static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
266
267static struct attribute *amd_uncore_attrs[] = {
268 &dev_attr_cpumask.attr,
269 NULL,
270};
271
272static struct attribute_group amd_uncore_attr_group = {
273 .attrs = amd_uncore_attrs,
274};
275
276#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
277static ssize_t __uncore_##_var##_show(struct device *dev, \
278 struct device_attribute *attr, \
279 char *page) \
280{ \
281 BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
282 return sprintf(page, _format "\n"); \
283} \
284static struct device_attribute format_attr_##_var = \
285 __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
286
287DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35");
288DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */
289DEFINE_UNCORE_FORMAT_ATTR(event14v2, event, "config:0-7,32-37"); /* PerfMonV2 DF */
290DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3, PerfMonV2 UMC */
291DEFINE_UNCORE_FORMAT_ATTR(umask8, umask, "config:8-15");
292DEFINE_UNCORE_FORMAT_ATTR(umask12, umask, "config:8-15,24-27"); /* PerfMonV2 DF */
293DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */
294DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */
295DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */
296DEFINE_UNCORE_FORMAT_ATTR(threadmask2, threadmask, "config:56-57"); /* F19h L3 */
297DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3 */
298DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */
299DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */
300DEFINE_UNCORE_FORMAT_ATTR(rdwrmask, rdwrmask, "config:8-9"); /* PerfMonV2 UMC */
301
302/* Common DF and NB attributes */
303static struct attribute *amd_uncore_df_format_attr[] = {
304 &format_attr_event12.attr, /* event */
305 &format_attr_umask8.attr, /* umask */
306 NULL,
307};
308
309/* Common L2 and L3 attributes */
310static struct attribute *amd_uncore_l3_format_attr[] = {
311 &format_attr_event12.attr, /* event */
312 &format_attr_umask8.attr, /* umask */
313 NULL, /* threadmask */
314 NULL,
315};
316
317/* Common UMC attributes */
318static struct attribute *amd_uncore_umc_format_attr[] = {
319 &format_attr_event8.attr, /* event */
320 &format_attr_rdwrmask.attr, /* rdwrmask */
321 NULL,
322};
323
324/* F17h unique L3 attributes */
325static struct attribute *amd_f17h_uncore_l3_format_attr[] = {
326 &format_attr_slicemask.attr, /* slicemask */
327 NULL,
328};
329
330/* F19h unique L3 attributes */
331static struct attribute *amd_f19h_uncore_l3_format_attr[] = {
332 &format_attr_coreid.attr, /* coreid */
333 &format_attr_enallslices.attr, /* enallslices */
334 &format_attr_enallcores.attr, /* enallcores */
335 &format_attr_sliceid.attr, /* sliceid */
336 NULL,
337};
338
339static struct attribute_group amd_uncore_df_format_group = {
340 .name = "format",
341 .attrs = amd_uncore_df_format_attr,
342};
343
344static struct attribute_group amd_uncore_l3_format_group = {
345 .name = "format",
346 .attrs = amd_uncore_l3_format_attr,
347};
348
349static struct attribute_group amd_f17h_uncore_l3_format_group = {
350 .name = "format",
351 .attrs = amd_f17h_uncore_l3_format_attr,
352 .is_visible = amd_f17h_uncore_is_visible,
353};
354
355static struct attribute_group amd_f19h_uncore_l3_format_group = {
356 .name = "format",
357 .attrs = amd_f19h_uncore_l3_format_attr,
358 .is_visible = amd_f19h_uncore_is_visible,
359};
360
361static struct attribute_group amd_uncore_umc_format_group = {
362 .name = "format",
363 .attrs = amd_uncore_umc_format_attr,
364};
365
366static const struct attribute_group *amd_uncore_df_attr_groups[] = {
367 &amd_uncore_attr_group,
368 &amd_uncore_df_format_group,
369 NULL,
370};
371
372static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
373 &amd_uncore_attr_group,
374 &amd_uncore_l3_format_group,
375 NULL,
376};
377
378static const struct attribute_group *amd_uncore_l3_attr_update[] = {
379 &amd_f17h_uncore_l3_format_group,
380 &amd_f19h_uncore_l3_format_group,
381 NULL,
382};
383
384static const struct attribute_group *amd_uncore_umc_attr_groups[] = {
385 &amd_uncore_attr_group,
386 &amd_uncore_umc_format_group,
387 NULL,
388};
389
390static __always_inline
391int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu)
392{
393 union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
394 return info->split.cid;
395}
396
397static __always_inline
398int amd_uncore_ctx_gid(struct amd_uncore *uncore, unsigned int cpu)
399{
400 union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
401 return info->split.gid;
402}
403
404static __always_inline
405int amd_uncore_ctx_num_pmcs(struct amd_uncore *uncore, unsigned int cpu)
406{
407 union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
408 return info->split.num_pmcs;
409}
410
411static void amd_uncore_ctx_free(struct amd_uncore *uncore, unsigned int cpu)
412{
413 struct amd_uncore_pmu *pmu;
414 struct amd_uncore_ctx *ctx;
415 int i;
416
417 if (!uncore->init_done)
418 return;
419
420 for (i = 0; i < uncore->num_pmus; i++) {
421 pmu = &uncore->pmus[i];
422 ctx = *per_cpu_ptr(pmu->ctx, cpu);
423 if (!ctx)
424 continue;
425
426 if (cpu == ctx->cpu)
427 cpumask_clear_cpu(cpu, dstp: &pmu->active_mask);
428
429 if (!--ctx->refcnt) {
430 kfree(objp: ctx->events);
431 kfree(objp: ctx);
432 }
433
434 *per_cpu_ptr(pmu->ctx, cpu) = NULL;
435 }
436}
437
438static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
439{
440 struct amd_uncore_ctx *curr, *prev;
441 struct amd_uncore_pmu *pmu;
442 int node, cid, gid, i, j;
443
444 if (!uncore->init_done || !uncore->num_pmus)
445 return 0;
446
447 cid = amd_uncore_ctx_cid(uncore, cpu);
448 gid = amd_uncore_ctx_gid(uncore, cpu);
449
450 for (i = 0; i < uncore->num_pmus; i++) {
451 pmu = &uncore->pmus[i];
452 *per_cpu_ptr(pmu->ctx, cpu) = NULL;
453 curr = NULL;
454
455 /* Check for group exclusivity */
456 if (gid != pmu->group)
457 continue;
458
459 /* Find a sibling context */
460 for_each_online_cpu(j) {
461 if (cpu == j)
462 continue;
463
464 prev = *per_cpu_ptr(pmu->ctx, j);
465 if (!prev)
466 continue;
467
468 if (cid == amd_uncore_ctx_cid(uncore, cpu: j)) {
469 curr = prev;
470 break;
471 }
472 }
473
474 /* Allocate context if sibling does not exist */
475 if (!curr) {
476 node = cpu_to_node(cpu);
477 curr = kzalloc_node(size: sizeof(*curr), GFP_KERNEL, node);
478 if (!curr)
479 goto fail;
480
481 curr->cpu = cpu;
482 curr->events = kzalloc_node(size: sizeof(*curr->events) *
483 pmu->num_counters,
484 GFP_KERNEL, node);
485 if (!curr->events) {
486 kfree(objp: curr);
487 goto fail;
488 }
489
490 cpumask_set_cpu(cpu, dstp: &pmu->active_mask);
491 }
492
493 curr->refcnt++;
494 *per_cpu_ptr(pmu->ctx, cpu) = curr;
495 }
496
497 return 0;
498
499fail:
500 amd_uncore_ctx_free(uncore, cpu);
501
502 return -ENOMEM;
503}
504
505static void amd_uncore_ctx_move(struct amd_uncore *uncore, unsigned int cpu)
506{
507 struct amd_uncore_ctx *curr, *next;
508 struct amd_uncore_pmu *pmu;
509 int i, j;
510
511 if (!uncore->init_done)
512 return;
513
514 for (i = 0; i < uncore->num_pmus; i++) {
515 pmu = &uncore->pmus[i];
516 curr = *per_cpu_ptr(pmu->ctx, cpu);
517 if (!curr)
518 continue;
519
520 /* Migrate to a shared sibling if possible */
521 for_each_online_cpu(j) {
522 next = *per_cpu_ptr(pmu->ctx, j);
523 if (!next || cpu == j)
524 continue;
525
526 if (curr == next) {
527 perf_pmu_migrate_context(pmu: &pmu->pmu, src_cpu: cpu, dst_cpu: j);
528 cpumask_clear_cpu(cpu, dstp: &pmu->active_mask);
529 cpumask_set_cpu(cpu: j, dstp: &pmu->active_mask);
530 next->cpu = j;
531 break;
532 }
533 }
534 }
535}
536
537static int amd_uncore_cpu_starting(unsigned int cpu)
538{
539 struct amd_uncore *uncore;
540 int i;
541
542 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
543 uncore = &uncores[i];
544 uncore->scan(uncore, cpu);
545 }
546
547 return 0;
548}
549
550static int amd_uncore_cpu_online(unsigned int cpu)
551{
552 struct amd_uncore *uncore;
553 int i;
554
555 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
556 uncore = &uncores[i];
557 if (uncore->init(uncore, cpu))
558 break;
559 }
560
561 return 0;
562}
563
564static int amd_uncore_cpu_down_prepare(unsigned int cpu)
565{
566 struct amd_uncore *uncore;
567 int i;
568
569 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
570 uncore = &uncores[i];
571 uncore->move(uncore, cpu);
572 }
573
574 return 0;
575}
576
577static int amd_uncore_cpu_dead(unsigned int cpu)
578{
579 struct amd_uncore *uncore;
580 int i;
581
582 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
583 uncore = &uncores[i];
584 uncore->free(uncore, cpu);
585 }
586
587 return 0;
588}
589
590static int amd_uncore_df_event_init(struct perf_event *event)
591{
592 struct hw_perf_event *hwc = &event->hw;
593 int ret = amd_uncore_event_init(event);
594
595 if (ret || pmu_version < 2)
596 return ret;
597
598 hwc->config = event->attr.config &
599 (pmu_version >= 2 ? AMD64_PERFMON_V2_RAW_EVENT_MASK_NB :
600 AMD64_RAW_EVENT_MASK_NB);
601
602 return 0;
603}
604
605static int amd_uncore_df_add(struct perf_event *event, int flags)
606{
607 int ret = amd_uncore_add(event, flags: flags & ~PERF_EF_START);
608 struct hw_perf_event *hwc = &event->hw;
609
610 if (ret)
611 return ret;
612
613 /*
614 * The first four DF counters are accessible via RDPMC index 6 to 9
615 * followed by the L3 counters from index 10 to 15. For processors
616 * with more than four DF counters, the DF RDPMC assignments become
617 * discontiguous as the additional counters are accessible starting
618 * from index 16.
619 */
620 if (hwc->idx >= NUM_COUNTERS_NB)
621 hwc->event_base_rdpmc += NUM_COUNTERS_L3;
622
623 /* Delayed start after rdpmc base update */
624 if (flags & PERF_EF_START)
625 amd_uncore_start(event, PERF_EF_RELOAD);
626
627 return 0;
628}
629
630static
631void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
632{
633 union cpuid_0x80000022_ebx ebx;
634 union amd_uncore_info info;
635
636 if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB))
637 return;
638
639 info.split.aux_data = 0;
640 info.split.num_pmcs = NUM_COUNTERS_NB;
641 info.split.gid = 0;
642 info.split.cid = topology_die_id(cpu);
643
644 if (pmu_version >= 2) {
645 ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
646 info.split.num_pmcs = ebx.split.num_df_pmc;
647 }
648
649 *per_cpu_ptr(uncore->info, cpu) = info;
650}
651
652static
653int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
654{
655 struct attribute **df_attr = amd_uncore_df_format_attr;
656 struct amd_uncore_pmu *pmu;
657
658 /* Run just once */
659 if (uncore->init_done)
660 return amd_uncore_ctx_init(uncore, cpu);
661
662 /* No grouping, single instance for a system */
663 uncore->pmus = kzalloc(size: sizeof(*uncore->pmus), GFP_KERNEL);
664 if (!uncore->pmus) {
665 uncore->num_pmus = 0;
666 goto done;
667 }
668
669 /*
670 * For Family 17h and above, the Northbridge counters are repurposed
671 * as Data Fabric counters. The PMUs are exported based on family as
672 * either NB or DF.
673 */
674 pmu = &uncore->pmus[0];
675 strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb",
676 sizeof(pmu->name));
677 pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
678 pmu->msr_base = MSR_F15H_NB_PERF_CTL;
679 pmu->rdpmc_base = RDPMC_BASE_NB;
680 pmu->group = amd_uncore_ctx_gid(uncore, cpu);
681
682 if (pmu_version >= 2) {
683 *df_attr++ = &format_attr_event14v2.attr;
684 *df_attr++ = &format_attr_umask12.attr;
685 } else if (boot_cpu_data.x86 >= 0x17) {
686 *df_attr = &format_attr_event14.attr;
687 }
688
689 pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
690 if (!pmu->ctx)
691 goto done;
692
693 pmu->pmu = (struct pmu) {
694 .task_ctx_nr = perf_invalid_context,
695 .attr_groups = amd_uncore_df_attr_groups,
696 .name = pmu->name,
697 .event_init = amd_uncore_df_event_init,
698 .add = amd_uncore_df_add,
699 .del = amd_uncore_del,
700 .start = amd_uncore_start,
701 .stop = amd_uncore_stop,
702 .read = amd_uncore_read,
703 .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
704 .module = THIS_MODULE,
705 };
706
707 if (perf_pmu_register(pmu: &pmu->pmu, name: pmu->pmu.name, type: -1)) {
708 free_percpu(pdata: pmu->ctx);
709 pmu->ctx = NULL;
710 goto done;
711 }
712
713 pr_info("%d %s%s counters detected\n", pmu->num_counters,
714 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "",
715 pmu->pmu.name);
716
717 uncore->num_pmus = 1;
718
719done:
720 uncore->init_done = true;
721
722 return amd_uncore_ctx_init(uncore, cpu);
723}
724
725static int amd_uncore_l3_event_init(struct perf_event *event)
726{
727 int ret = amd_uncore_event_init(event);
728 struct hw_perf_event *hwc = &event->hw;
729 u64 config = event->attr.config;
730 u64 mask;
731
732 hwc->config = config & AMD64_RAW_EVENT_MASK_NB;
733
734 /*
735 * SliceMask and ThreadMask need to be set for certain L3 events.
736 * For other events, the two fields do not affect the count.
737 */
738 if (ret || boot_cpu_data.x86 < 0x17)
739 return ret;
740
741 mask = config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
742 AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
743 AMD64_L3_COREID_MASK);
744
745 if (boot_cpu_data.x86 <= 0x18)
746 mask = ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
747 ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
748
749 /*
750 * If the user doesn't specify a ThreadMask, they're not trying to
751 * count core 0, so we enable all cores & threads.
752 * We'll also assume that they want to count slice 0 if they specify
753 * a ThreadMask and leave SliceId and EnAllSlices unpopulated.
754 */
755 else if (!(config & AMD64_L3_F19H_THREAD_MASK))
756 mask = AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
757 AMD64_L3_EN_ALL_CORES;
758
759 hwc->config |= mask;
760
761 return 0;
762}
763
764static
765void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
766{
767 union amd_uncore_info info;
768
769 if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC))
770 return;
771
772 info.split.aux_data = 0;
773 info.split.num_pmcs = NUM_COUNTERS_L2;
774 info.split.gid = 0;
775 info.split.cid = per_cpu_llc_id(cpu);
776
777 if (boot_cpu_data.x86 >= 0x17)
778 info.split.num_pmcs = NUM_COUNTERS_L3;
779
780 *per_cpu_ptr(uncore->info, cpu) = info;
781}
782
783static
784int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
785{
786 struct attribute **l3_attr = amd_uncore_l3_format_attr;
787 struct amd_uncore_pmu *pmu;
788
789 /* Run just once */
790 if (uncore->init_done)
791 return amd_uncore_ctx_init(uncore, cpu);
792
793 /* No grouping, single instance for a system */
794 uncore->pmus = kzalloc(size: sizeof(*uncore->pmus), GFP_KERNEL);
795 if (!uncore->pmus) {
796 uncore->num_pmus = 0;
797 goto done;
798 }
799
800 /*
801 * For Family 17h and above, L3 cache counters are available instead
802 * of L2 cache counters. The PMUs are exported based on family as
803 * either L2 or L3.
804 */
805 pmu = &uncore->pmus[0];
806 strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2",
807 sizeof(pmu->name));
808 pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
809 pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
810 pmu->rdpmc_base = RDPMC_BASE_LLC;
811 pmu->group = amd_uncore_ctx_gid(uncore, cpu);
812
813 if (boot_cpu_data.x86 >= 0x17) {
814 *l3_attr++ = &format_attr_event8.attr;
815 *l3_attr++ = &format_attr_umask8.attr;
816 *l3_attr++ = boot_cpu_data.x86 >= 0x19 ?
817 &format_attr_threadmask2.attr :
818 &format_attr_threadmask8.attr;
819 }
820
821 pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
822 if (!pmu->ctx)
823 goto done;
824
825 pmu->pmu = (struct pmu) {
826 .task_ctx_nr = perf_invalid_context,
827 .attr_groups = amd_uncore_l3_attr_groups,
828 .attr_update = amd_uncore_l3_attr_update,
829 .name = pmu->name,
830 .event_init = amd_uncore_l3_event_init,
831 .add = amd_uncore_add,
832 .del = amd_uncore_del,
833 .start = amd_uncore_start,
834 .stop = amd_uncore_stop,
835 .read = amd_uncore_read,
836 .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
837 .module = THIS_MODULE,
838 };
839
840 if (perf_pmu_register(pmu: &pmu->pmu, name: pmu->pmu.name, type: -1)) {
841 free_percpu(pdata: pmu->ctx);
842 pmu->ctx = NULL;
843 goto done;
844 }
845
846 pr_info("%d %s%s counters detected\n", pmu->num_counters,
847 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "",
848 pmu->pmu.name);
849
850 uncore->num_pmus = 1;
851
852done:
853 uncore->init_done = true;
854
855 return amd_uncore_ctx_init(uncore, cpu);
856}
857
858static int amd_uncore_umc_event_init(struct perf_event *event)
859{
860 struct hw_perf_event *hwc = &event->hw;
861 int ret = amd_uncore_event_init(event);
862
863 if (ret)
864 return ret;
865
866 hwc->config = event->attr.config & AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC;
867
868 return 0;
869}
870
871static void amd_uncore_umc_start(struct perf_event *event, int flags)
872{
873 struct hw_perf_event *hwc = &event->hw;
874
875 if (flags & PERF_EF_RELOAD)
876 wrmsrl(msr: hwc->event_base, val: (u64)local64_read(&hwc->prev_count));
877
878 hwc->state = 0;
879 wrmsrl(msr: hwc->config_base, val: (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
880 perf_event_update_userpage(event);
881}
882
883static
884void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
885{
886 union cpuid_0x80000022_ebx ebx;
887 union amd_uncore_info info;
888 unsigned int eax, ecx, edx;
889
890 if (pmu_version < 2)
891 return;
892
893 cpuid(EXT_PERFMON_DEBUG_FEATURES, eax: &eax, ebx: &ebx.full, ecx: &ecx, edx: &edx);
894 info.split.aux_data = ecx; /* stash active mask */
895 info.split.num_pmcs = ebx.split.num_umc_pmc;
896 info.split.gid = topology_die_id(cpu);
897 info.split.cid = topology_die_id(cpu);
898 *per_cpu_ptr(uncore->info, cpu) = info;
899}
900
901static
902int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
903{
904 DECLARE_BITMAP(gmask, UNCORE_GROUP_MAX) = { 0 };
905 u8 group_num_pmus[UNCORE_GROUP_MAX] = { 0 };
906 u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 };
907 union amd_uncore_info info;
908 struct amd_uncore_pmu *pmu;
909 int index = 0, gid, i;
910
911 if (pmu_version < 2)
912 return 0;
913
914 /* Run just once */
915 if (uncore->init_done)
916 return amd_uncore_ctx_init(uncore, cpu);
917
918 /* Find unique groups */
919 for_each_online_cpu(i) {
920 info = *per_cpu_ptr(uncore->info, i);
921 gid = info.split.gid;
922 if (test_bit(gid, gmask))
923 continue;
924
925 __set_bit(gid, gmask);
926 group_num_pmus[gid] = hweight32(info.split.aux_data);
927 group_num_pmcs[gid] = info.split.num_pmcs;
928 uncore->num_pmus += group_num_pmus[gid];
929 }
930
931 uncore->pmus = kzalloc(size: sizeof(*uncore->pmus) * uncore->num_pmus,
932 GFP_KERNEL);
933 if (!uncore->pmus) {
934 uncore->num_pmus = 0;
935 goto done;
936 }
937
938 for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) {
939 for (i = 0; i < group_num_pmus[gid]; i++) {
940 pmu = &uncore->pmus[index];
941 snprintf(buf: pmu->name, size: sizeof(pmu->name), fmt: "amd_umc_%d", index);
942 pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid];
943 pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2;
944 pmu->rdpmc_base = -1;
945 pmu->group = gid;
946
947 pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
948 if (!pmu->ctx)
949 goto done;
950
951 pmu->pmu = (struct pmu) {
952 .task_ctx_nr = perf_invalid_context,
953 .attr_groups = amd_uncore_umc_attr_groups,
954 .name = pmu->name,
955 .event_init = amd_uncore_umc_event_init,
956 .add = amd_uncore_add,
957 .del = amd_uncore_del,
958 .start = amd_uncore_umc_start,
959 .stop = amd_uncore_stop,
960 .read = amd_uncore_read,
961 .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
962 .module = THIS_MODULE,
963 };
964
965 if (perf_pmu_register(pmu: &pmu->pmu, name: pmu->pmu.name, type: -1)) {
966 free_percpu(pdata: pmu->ctx);
967 pmu->ctx = NULL;
968 goto done;
969 }
970
971 pr_info("%d %s counters detected\n", pmu->num_counters,
972 pmu->pmu.name);
973
974 index++;
975 }
976 }
977
978done:
979 uncore->num_pmus = index;
980 uncore->init_done = true;
981
982 return amd_uncore_ctx_init(uncore, cpu);
983}
984
985static struct amd_uncore uncores[UNCORE_TYPE_MAX] = {
986 /* UNCORE_TYPE_DF */
987 {
988 .scan = amd_uncore_df_ctx_scan,
989 .init = amd_uncore_df_ctx_init,
990 .move = amd_uncore_ctx_move,
991 .free = amd_uncore_ctx_free,
992 },
993 /* UNCORE_TYPE_L3 */
994 {
995 .scan = amd_uncore_l3_ctx_scan,
996 .init = amd_uncore_l3_ctx_init,
997 .move = amd_uncore_ctx_move,
998 .free = amd_uncore_ctx_free,
999 },
1000 /* UNCORE_TYPE_UMC */
1001 {
1002 .scan = amd_uncore_umc_ctx_scan,
1003 .init = amd_uncore_umc_ctx_init,
1004 .move = amd_uncore_ctx_move,
1005 .free = amd_uncore_ctx_free,
1006 },
1007};
1008
1009static int __init amd_uncore_init(void)
1010{
1011 struct amd_uncore *uncore;
1012 int ret = -ENODEV;
1013 int i;
1014
1015 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
1016 boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
1017 return -ENODEV;
1018
1019 if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
1020 return -ENODEV;
1021
1022 if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
1023 pmu_version = 2;
1024
1025 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1026 uncore = &uncores[i];
1027
1028 BUG_ON(!uncore->scan);
1029 BUG_ON(!uncore->init);
1030 BUG_ON(!uncore->move);
1031 BUG_ON(!uncore->free);
1032
1033 uncore->info = alloc_percpu(union amd_uncore_info);
1034 if (!uncore->info) {
1035 ret = -ENOMEM;
1036 goto fail;
1037 }
1038 };
1039
1040 /*
1041 * Install callbacks. Core will call them for each online cpu.
1042 */
1043 ret = cpuhp_setup_state(state: CPUHP_PERF_X86_AMD_UNCORE_PREP,
1044 name: "perf/x86/amd/uncore:prepare",
1045 NULL, teardown: amd_uncore_cpu_dead);
1046 if (ret)
1047 goto fail;
1048
1049 ret = cpuhp_setup_state(state: CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
1050 name: "perf/x86/amd/uncore:starting",
1051 startup: amd_uncore_cpu_starting, NULL);
1052 if (ret)
1053 goto fail_prep;
1054
1055 ret = cpuhp_setup_state(state: CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
1056 name: "perf/x86/amd/uncore:online",
1057 startup: amd_uncore_cpu_online,
1058 teardown: amd_uncore_cpu_down_prepare);
1059 if (ret)
1060 goto fail_start;
1061
1062 return 0;
1063
1064fail_start:
1065 cpuhp_remove_state(state: CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
1066fail_prep:
1067 cpuhp_remove_state(state: CPUHP_PERF_X86_AMD_UNCORE_PREP);
1068fail:
1069 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1070 uncore = &uncores[i];
1071 if (uncore->info) {
1072 free_percpu(pdata: uncore->info);
1073 uncore->info = NULL;
1074 }
1075 }
1076
1077 return ret;
1078}
1079
1080static void __exit amd_uncore_exit(void)
1081{
1082 struct amd_uncore *uncore;
1083 struct amd_uncore_pmu *pmu;
1084 int i, j;
1085
1086 cpuhp_remove_state(state: CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE);
1087 cpuhp_remove_state(state: CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
1088 cpuhp_remove_state(state: CPUHP_PERF_X86_AMD_UNCORE_PREP);
1089
1090 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1091 uncore = &uncores[i];
1092 if (!uncore->info)
1093 continue;
1094
1095 free_percpu(pdata: uncore->info);
1096 uncore->info = NULL;
1097
1098 for (j = 0; j < uncore->num_pmus; j++) {
1099 pmu = &uncore->pmus[j];
1100 if (!pmu->ctx)
1101 continue;
1102
1103 perf_pmu_unregister(pmu: &pmu->pmu);
1104 free_percpu(pdata: pmu->ctx);
1105 pmu->ctx = NULL;
1106 }
1107
1108 kfree(objp: uncore->pmus);
1109 uncore->pmus = NULL;
1110 }
1111}
1112
1113module_init(amd_uncore_init);
1114module_exit(amd_uncore_exit);
1115
1116MODULE_DESCRIPTION("AMD Uncore Driver");
1117MODULE_LICENSE("GPL v2");
1118

source code of linux/arch/x86/events/amd/uncore.c