1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Performance counter support for MPC7450-family processors. |
4 | * |
5 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. |
6 | */ |
7 | #include <linux/string.h> |
8 | #include <linux/perf_event.h> |
9 | #include <asm/reg.h> |
10 | #include <asm/cputable.h> |
11 | |
12 | #define N_COUNTER 6 /* Number of hardware counters */ |
13 | #define MAX_ALT 3 /* Maximum number of event alternative codes */ |
14 | |
15 | /* |
16 | * Bits in event code for MPC7450 family |
17 | */ |
18 | #define PM_THRMULT_MSKS 0x40000 |
19 | #define PM_THRESH_SH 12 |
20 | #define PM_THRESH_MSK 0x3f |
21 | #define PM_PMC_SH 8 |
22 | #define PM_PMC_MSK 7 |
23 | #define PM_PMCSEL_MSK 0x7f |
24 | |
25 | /* |
26 | * Classify events according to how specific their PMC requirements are. |
27 | * Result is: |
28 | * 0: can go on any PMC |
29 | * 1: can go on PMCs 1-4 |
30 | * 2: can go on PMCs 1,2,4 |
31 | * 3: can go on PMCs 1 or 2 |
32 | * 4: can only go on one PMC |
33 | * -1: event code is invalid |
34 | */ |
35 | #define N_CLASSES 5 |
36 | |
37 | static int mpc7450_classify_event(u32 event) |
38 | { |
39 | int pmc; |
40 | |
41 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; |
42 | if (pmc) { |
43 | if (pmc > N_COUNTER) |
44 | return -1; |
45 | return 4; |
46 | } |
47 | event &= PM_PMCSEL_MSK; |
48 | if (event <= 1) |
49 | return 0; |
50 | if (event <= 7) |
51 | return 1; |
52 | if (event <= 13) |
53 | return 2; |
54 | if (event <= 22) |
55 | return 3; |
56 | return -1; |
57 | } |
58 | |
59 | /* |
60 | * Events using threshold and possible threshold scale: |
61 | * code scale? name |
62 | * 11e N PM_INSTQ_EXCEED_CYC |
63 | * 11f N PM_ALTV_IQ_EXCEED_CYC |
64 | * 128 Y PM_DTLB_SEARCH_EXCEED_CYC |
65 | * 12b Y PM_LD_MISS_EXCEED_L1_CYC |
66 | * 220 N PM_CQ_EXCEED_CYC |
67 | * 30c N PM_GPR_RB_EXCEED_CYC |
68 | * 30d ? PM_FPR_IQ_EXCEED_CYC ? |
69 | * 311 Y PM_ITLB_SEARCH_EXCEED |
70 | * 410 N PM_GPR_IQ_EXCEED_CYC |
71 | */ |
72 | |
73 | /* |
74 | * Return use of threshold and threshold scale bits: |
75 | * 0 = uses neither, 1 = uses threshold, 2 = uses both |
76 | */ |
77 | static int mpc7450_threshold_use(u32 event) |
78 | { |
79 | int pmc, sel; |
80 | |
81 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; |
82 | sel = event & PM_PMCSEL_MSK; |
83 | switch (pmc) { |
84 | case 1: |
85 | if (sel == 0x1e || sel == 0x1f) |
86 | return 1; |
87 | if (sel == 0x28 || sel == 0x2b) |
88 | return 2; |
89 | break; |
90 | case 2: |
91 | if (sel == 0x20) |
92 | return 1; |
93 | break; |
94 | case 3: |
95 | if (sel == 0xc || sel == 0xd) |
96 | return 1; |
97 | if (sel == 0x11) |
98 | return 2; |
99 | break; |
100 | case 4: |
101 | if (sel == 0x10) |
102 | return 1; |
103 | break; |
104 | } |
105 | return 0; |
106 | } |
107 | |
108 | /* |
109 | * Layout of constraint bits: |
110 | * 33222222222211111111110000000000 |
111 | * 10987654321098765432109876543210 |
112 | * |< >< > < > < ><><><><><><> |
113 | * TS TV G4 G3 G2P6P5P4P3P2P1 |
114 | * |
115 | * P1 - P6 |
116 | * 0 - 11: Count of events needing PMC1 .. PMC6 |
117 | * |
118 | * G2 |
119 | * 12 - 14: Count of events needing PMC1 or PMC2 |
120 | * |
121 | * G3 |
122 | * 16 - 18: Count of events needing PMC1, PMC2 or PMC4 |
123 | * |
124 | * G4 |
125 | * 20 - 23: Count of events needing PMC1, PMC2, PMC3 or PMC4 |
126 | * |
127 | * TV |
128 | * 24 - 29: Threshold value requested |
129 | * |
130 | * TS |
131 | * 30: Threshold scale value requested |
132 | */ |
133 | |
134 | static u32 pmcbits[N_COUNTER][2] = { |
135 | { 0x00844002, 0x00111001 }, /* PMC1 mask, value: P1,G2,G3,G4 */ |
136 | { 0x00844008, 0x00111004 }, /* PMC2: P2,G2,G3,G4 */ |
137 | { 0x00800020, 0x00100010 }, /* PMC3: P3,G4 */ |
138 | { 0x00840080, 0x00110040 }, /* PMC4: P4,G3,G4 */ |
139 | { 0x00000200, 0x00000100 }, /* PMC5: P5 */ |
140 | { 0x00000800, 0x00000400 } /* PMC6: P6 */ |
141 | }; |
142 | |
143 | static u32 classbits[N_CLASSES - 1][2] = { |
144 | { 0x00000000, 0x00000000 }, /* class 0: no constraint */ |
145 | { 0x00800000, 0x00100000 }, /* class 1: G4 */ |
146 | { 0x00040000, 0x00010000 }, /* class 2: G3 */ |
147 | { 0x00004000, 0x00001000 }, /* class 3: G2 */ |
148 | }; |
149 | |
150 | static int mpc7450_get_constraint(u64 event, unsigned long *maskp, |
151 | unsigned long *valp, u64 event_config1 __maybe_unused) |
152 | { |
153 | int pmc, class; |
154 | u32 mask, value; |
155 | int thresh, tuse; |
156 | |
157 | class = mpc7450_classify_event(event); |
158 | if (class < 0) |
159 | return -1; |
160 | if (class == 4) { |
161 | pmc = ((unsigned int)event >> PM_PMC_SH) & PM_PMC_MSK; |
162 | mask = pmcbits[pmc - 1][0]; |
163 | value = pmcbits[pmc - 1][1]; |
164 | } else { |
165 | mask = classbits[class][0]; |
166 | value = classbits[class][1]; |
167 | } |
168 | |
169 | tuse = mpc7450_threshold_use(event); |
170 | if (tuse) { |
171 | thresh = ((unsigned int)event >> PM_THRESH_SH) & PM_THRESH_MSK; |
172 | mask |= 0x3f << 24; |
173 | value |= thresh << 24; |
174 | if (tuse == 2) { |
175 | mask |= 0x40000000; |
176 | if ((unsigned int)event & PM_THRMULT_MSKS) |
177 | value |= 0x40000000; |
178 | } |
179 | } |
180 | |
181 | *maskp = mask; |
182 | *valp = value; |
183 | return 0; |
184 | } |
185 | |
186 | static const unsigned int event_alternatives[][MAX_ALT] = { |
187 | { 0x217, 0x317 }, /* PM_L1_DCACHE_MISS */ |
188 | { 0x418, 0x50f, 0x60f }, /* PM_SNOOP_RETRY */ |
189 | { 0x502, 0x602 }, /* PM_L2_HIT */ |
190 | { 0x503, 0x603 }, /* PM_L3_HIT */ |
191 | { 0x504, 0x604 }, /* PM_L2_ICACHE_MISS */ |
192 | { 0x505, 0x605 }, /* PM_L3_ICACHE_MISS */ |
193 | { 0x506, 0x606 }, /* PM_L2_DCACHE_MISS */ |
194 | { 0x507, 0x607 }, /* PM_L3_DCACHE_MISS */ |
195 | { 0x50a, 0x623 }, /* PM_LD_HIT_L3 */ |
196 | { 0x50b, 0x624 }, /* PM_ST_HIT_L3 */ |
197 | { 0x50d, 0x60d }, /* PM_L2_TOUCH_HIT */ |
198 | { 0x50e, 0x60e }, /* PM_L3_TOUCH_HIT */ |
199 | { 0x512, 0x612 }, /* PM_INT_LOCAL */ |
200 | { 0x513, 0x61d }, /* PM_L2_MISS */ |
201 | { 0x514, 0x61e }, /* PM_L3_MISS */ |
202 | }; |
203 | |
204 | /* |
205 | * Scan the alternatives table for a match and return the |
206 | * index into the alternatives table if found, else -1. |
207 | */ |
208 | static int find_alternative(u32 event) |
209 | { |
210 | int i, j; |
211 | |
212 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { |
213 | if (event < event_alternatives[i][0]) |
214 | break; |
215 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) |
216 | if (event == event_alternatives[i][j]) |
217 | return i; |
218 | } |
219 | return -1; |
220 | } |
221 | |
222 | static int mpc7450_get_alternatives(u64 event, unsigned int flags, u64 alt[]) |
223 | { |
224 | int i, j, nalt = 1; |
225 | u32 ae; |
226 | |
227 | alt[0] = event; |
228 | nalt = 1; |
229 | i = find_alternative(event: (u32)event); |
230 | if (i >= 0) { |
231 | for (j = 0; j < MAX_ALT; ++j) { |
232 | ae = event_alternatives[i][j]; |
233 | if (ae && ae != (u32)event) |
234 | alt[nalt++] = ae; |
235 | } |
236 | } |
237 | return nalt; |
238 | } |
239 | |
240 | /* |
241 | * Bitmaps of which PMCs each class can use for classes 0 - 3. |
242 | * Bit i is set if PMC i+1 is usable. |
243 | */ |
244 | static const u8 classmap[N_CLASSES] = { |
245 | 0x3f, 0x0f, 0x0b, 0x03, 0 |
246 | }; |
247 | |
248 | /* Bit position and width of each PMCSEL field */ |
249 | static const int pmcsel_shift[N_COUNTER] = { |
250 | 6, 0, 27, 22, 17, 11 |
251 | }; |
252 | static const u32 pmcsel_mask[N_COUNTER] = { |
253 | 0x7f, 0x3f, 0x1f, 0x1f, 0x1f, 0x3f |
254 | }; |
255 | |
256 | /* |
257 | * Compute MMCR0/1/2 values for a set of events. |
258 | */ |
259 | static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], |
260 | struct mmcr_regs *mmcr, |
261 | struct perf_event *pevents[], |
262 | u32 flags __maybe_unused) |
263 | { |
264 | u8 event_index[N_CLASSES][N_COUNTER]; |
265 | int n_classevent[N_CLASSES]; |
266 | int i, j, class, tuse; |
267 | u32 pmc_inuse = 0, pmc_avail; |
268 | u32 mmcr0 = 0, mmcr1 = 0, mmcr2 = 0; |
269 | u32 ev, pmc, thresh; |
270 | |
271 | if (n_ev > N_COUNTER) |
272 | return -1; |
273 | |
274 | /* First pass: count usage in each class */ |
275 | for (i = 0; i < N_CLASSES; ++i) |
276 | n_classevent[i] = 0; |
277 | for (i = 0; i < n_ev; ++i) { |
278 | class = mpc7450_classify_event(event: event[i]); |
279 | if (class < 0) |
280 | return -1; |
281 | j = n_classevent[class]++; |
282 | event_index[class][j] = i; |
283 | } |
284 | |
285 | /* Second pass: allocate PMCs from most specific event to least */ |
286 | for (class = N_CLASSES - 1; class >= 0; --class) { |
287 | for (i = 0; i < n_classevent[class]; ++i) { |
288 | ev = event[event_index[class][i]]; |
289 | if (class == 4) { |
290 | pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK; |
291 | if (pmc_inuse & (1 << (pmc - 1))) |
292 | return -1; |
293 | } else { |
294 | /* Find a suitable PMC */ |
295 | pmc_avail = classmap[class] & ~pmc_inuse; |
296 | if (!pmc_avail) |
297 | return -1; |
298 | pmc = ffs(pmc_avail); |
299 | } |
300 | pmc_inuse |= 1 << (pmc - 1); |
301 | |
302 | tuse = mpc7450_threshold_use(event: ev); |
303 | if (tuse) { |
304 | thresh = (ev >> PM_THRESH_SH) & PM_THRESH_MSK; |
305 | mmcr0 |= thresh << 16; |
306 | if (tuse == 2 && (ev & PM_THRMULT_MSKS)) |
307 | mmcr2 = 0x80000000; |
308 | } |
309 | ev &= pmcsel_mask[pmc - 1]; |
310 | ev <<= pmcsel_shift[pmc - 1]; |
311 | if (pmc <= 2) |
312 | mmcr0 |= ev; |
313 | else |
314 | mmcr1 |= ev; |
315 | hwc[event_index[class][i]] = pmc - 1; |
316 | } |
317 | } |
318 | |
319 | if (pmc_inuse & 1) |
320 | mmcr0 |= MMCR0_PMC1CE; |
321 | if (pmc_inuse & 0x3e) |
322 | mmcr0 |= MMCR0_PMCnCE; |
323 | |
324 | /* Return MMCRx values */ |
325 | mmcr->mmcr0 = mmcr0; |
326 | mmcr->mmcr1 = mmcr1; |
327 | mmcr->mmcr2 = mmcr2; |
328 | /* |
329 | * 32-bit doesn't have an MMCRA and uses SPRN_MMCR2 to define |
330 | * SPRN_MMCRA. So assign mmcra of cpu_hw_events with `mmcr2` |
331 | * value to ensure that any write to this SPRN_MMCRA will |
332 | * use mmcr2 value. |
333 | */ |
334 | mmcr->mmcra = mmcr2; |
335 | return 0; |
336 | } |
337 | |
338 | /* |
339 | * Disable counting by a PMC. |
340 | * Note that the pmc argument is 0-based here, not 1-based. |
341 | */ |
342 | static void mpc7450_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) |
343 | { |
344 | if (pmc <= 1) |
345 | mmcr->mmcr0 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); |
346 | else |
347 | mmcr->mmcr1 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); |
348 | } |
349 | |
350 | static int mpc7450_generic_events[] = { |
351 | [PERF_COUNT_HW_CPU_CYCLES] = 1, |
352 | [PERF_COUNT_HW_INSTRUCTIONS] = 2, |
353 | [PERF_COUNT_HW_CACHE_MISSES] = 0x217, /* PM_L1_DCACHE_MISS */ |
354 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x122, /* PM_BR_CMPL */ |
355 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x41c, /* PM_BR_MPRED */ |
356 | }; |
357 | |
358 | #define C(x) PERF_COUNT_HW_CACHE_##x |
359 | |
360 | /* |
361 | * Table of generalized cache-related events. |
362 | * 0 means not supported, -1 means nonsensical, other values |
363 | * are event codes. |
364 | */ |
365 | static u64 mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { |
366 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ |
367 | [C(OP_READ)] = { 0, 0x225 }, |
368 | [C(OP_WRITE)] = { 0, 0x227 }, |
369 | [C(OP_PREFETCH)] = { 0, 0 }, |
370 | }, |
371 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ |
372 | [C(OP_READ)] = { 0x129, 0x115 }, |
373 | [C(OP_WRITE)] = { -1, -1 }, |
374 | [C(OP_PREFETCH)] = { 0x634, 0 }, |
375 | }, |
376 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ |
377 | [C(OP_READ)] = { 0, 0 }, |
378 | [C(OP_WRITE)] = { 0, 0 }, |
379 | [C(OP_PREFETCH)] = { 0, 0 }, |
380 | }, |
381 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ |
382 | [C(OP_READ)] = { 0, 0x312 }, |
383 | [C(OP_WRITE)] = { -1, -1 }, |
384 | [C(OP_PREFETCH)] = { -1, -1 }, |
385 | }, |
386 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ |
387 | [C(OP_READ)] = { 0, 0x223 }, |
388 | [C(OP_WRITE)] = { -1, -1 }, |
389 | [C(OP_PREFETCH)] = { -1, -1 }, |
390 | }, |
391 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ |
392 | [C(OP_READ)] = { 0x122, 0x41c }, |
393 | [C(OP_WRITE)] = { -1, -1 }, |
394 | [C(OP_PREFETCH)] = { -1, -1 }, |
395 | }, |
396 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ |
397 | [C(OP_READ)] = { -1, -1 }, |
398 | [C(OP_WRITE)] = { -1, -1 }, |
399 | [C(OP_PREFETCH)] = { -1, -1 }, |
400 | }, |
401 | }; |
402 | |
403 | struct power_pmu mpc7450_pmu = { |
404 | .name = "MPC7450 family" , |
405 | .n_counter = N_COUNTER, |
406 | .max_alternatives = MAX_ALT, |
407 | .add_fields = 0x00111555ul, |
408 | .test_adder = 0x00301000ul, |
409 | .compute_mmcr = mpc7450_compute_mmcr, |
410 | .get_constraint = mpc7450_get_constraint, |
411 | .get_alternatives = mpc7450_get_alternatives, |
412 | .disable_pmc = mpc7450_disable_pmc, |
413 | .n_generic = ARRAY_SIZE(mpc7450_generic_events), |
414 | .generic_events = mpc7450_generic_events, |
415 | .cache_events = &mpc7450_cache_events, |
416 | }; |
417 | |
418 | static int __init init_mpc7450_pmu(void) |
419 | { |
420 | if (!pvr_version_is(PVR_VER_7450) && !pvr_version_is(PVR_VER_7455) && |
421 | !pvr_version_is(PVR_VER_7447) && !pvr_version_is(PVR_VER_7447A) && |
422 | !pvr_version_is(PVR_VER_7448)) |
423 | return -ENODEV; |
424 | |
425 | return register_power_pmu(&mpc7450_pmu); |
426 | } |
427 | |
428 | early_initcall(init_mpc7450_pmu); |
429 | |