1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Hypervisor supplied "gpci" ("get performance counter info") performance |
4 | * counter support |
5 | * |
6 | * Author: Cody P Schafer <cody@linux.vnet.ibm.com> |
7 | * Copyright 2014 IBM Corporation. |
8 | */ |
9 | |
10 | #define pr_fmt(fmt) "hv-gpci: " fmt |
11 | |
12 | #include <linux/init.h> |
13 | #include <linux/perf_event.h> |
14 | #include <asm/firmware.h> |
15 | #include <asm/hvcall.h> |
16 | #include <asm/io.h> |
17 | |
18 | #include "hv-gpci.h" |
19 | #include "hv-common.h" |
20 | |
21 | /* |
22 | * Example usage: |
23 | * perf stat -e 'hv_gpci/counter_info_version=3,offset=0,length=8, |
24 | * secondary_index=0,starting_index=0xffffffff,request=0x10/' ... |
25 | */ |
26 | |
27 | /* u32 */ |
28 | EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31); |
29 | /* u32 */ |
30 | /* |
31 | * Note that starting_index, phys_processor_idx, sibling_part_id, |
32 | * hw_chip_id, partition_id all refer to the same bit range. They |
33 | * are basically aliases for the starting_index. The specific alias |
34 | * used depends on the event. See REQUEST_IDX_KIND in hv-gpci-requests.h |
35 | */ |
36 | EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63); |
37 | EVENT_DEFINE_RANGE_FORMAT_LITE(phys_processor_idx, config, 32, 63); |
38 | EVENT_DEFINE_RANGE_FORMAT_LITE(sibling_part_id, config, 32, 63); |
39 | EVENT_DEFINE_RANGE_FORMAT_LITE(hw_chip_id, config, 32, 63); |
40 | EVENT_DEFINE_RANGE_FORMAT_LITE(partition_id, config, 32, 63); |
41 | |
42 | /* u16 */ |
43 | EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15); |
44 | /* u8 */ |
45 | EVENT_DEFINE_RANGE_FORMAT(counter_info_version, config1, 16, 23); |
46 | /* u8, bytes of data (1-8) */ |
47 | EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31); |
48 | /* u32, byte offset */ |
49 | EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63); |
50 | |
51 | static cpumask_t hv_gpci_cpumask; |
52 | |
53 | static struct attribute *format_attrs[] = { |
54 | &format_attr_request.attr, |
55 | &format_attr_starting_index.attr, |
56 | &format_attr_phys_processor_idx.attr, |
57 | &format_attr_sibling_part_id.attr, |
58 | &format_attr_hw_chip_id.attr, |
59 | &format_attr_partition_id.attr, |
60 | &format_attr_secondary_index.attr, |
61 | &format_attr_counter_info_version.attr, |
62 | |
63 | &format_attr_offset.attr, |
64 | &format_attr_length.attr, |
65 | NULL, |
66 | }; |
67 | |
68 | static const struct attribute_group format_group = { |
69 | .name = "format" , |
70 | .attrs = format_attrs, |
71 | }; |
72 | |
73 | static struct attribute_group event_group = { |
74 | .name = "events" , |
75 | /* .attrs is set in init */ |
76 | }; |
77 | |
78 | #define HV_CAPS_ATTR(_name, _format) \ |
79 | static ssize_t _name##_show(struct device *dev, \ |
80 | struct device_attribute *attr, \ |
81 | char *page) \ |
82 | { \ |
83 | struct hv_perf_caps caps; \ |
84 | unsigned long hret = hv_perf_caps_get(&caps); \ |
85 | if (hret) \ |
86 | return -EIO; \ |
87 | \ |
88 | return sprintf(page, _format, caps._name); \ |
89 | } \ |
90 | static struct device_attribute hv_caps_attr_##_name = __ATTR_RO(_name) |
91 | |
92 | static ssize_t kernel_version_show(struct device *dev, |
93 | struct device_attribute *attr, |
94 | char *page) |
95 | { |
96 | return sprintf(buf: page, fmt: "0x%x\n" , COUNTER_INFO_VERSION_CURRENT); |
97 | } |
98 | |
99 | static ssize_t cpumask_show(struct device *dev, |
100 | struct device_attribute *attr, char *buf) |
101 | { |
102 | return cpumap_print_to_pagebuf(list: true, buf, mask: &hv_gpci_cpumask); |
103 | } |
104 | |
105 | /* Interface attribute array index to store system information */ |
106 | #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6 |
107 | #define INTERFACE_PROCESSOR_CONFIG_ATTR 7 |
108 | #define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR 8 |
109 | #define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR 9 |
110 | #define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR 10 |
111 | #define INTERFACE_NULL_ATTR 11 |
112 | |
113 | /* Counter request value to retrieve system information */ |
114 | enum { |
115 | PROCESSOR_BUS_TOPOLOGY, |
116 | PROCESSOR_CONFIG, |
117 | AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */ |
118 | AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */ |
119 | AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */ |
120 | }; |
121 | |
122 | static int sysinfo_counter_request[] = { |
123 | [PROCESSOR_BUS_TOPOLOGY] = 0xD0, |
124 | [PROCESSOR_CONFIG] = 0x90, |
125 | [AFFINITY_DOMAIN_VIA_VP] = 0xA0, |
126 | [AFFINITY_DOMAIN_VIA_DOM] = 0xB0, |
127 | [AFFINITY_DOMAIN_VIA_PAR] = 0xB1, |
128 | }; |
129 | |
130 | static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); |
131 | |
132 | static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index, |
133 | u16 secondary_index, char *buf, |
134 | size_t *n, struct hv_gpci_request_buffer *arg) |
135 | { |
136 | unsigned long ret; |
137 | size_t i, j; |
138 | |
139 | arg->params.counter_request = cpu_to_be32(req); |
140 | arg->params.starting_index = cpu_to_be32(starting_index); |
141 | arg->params.secondary_index = cpu_to_be16(secondary_index); |
142 | |
143 | ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, |
144 | virt_to_phys(address: arg), HGPCI_REQ_BUFFER_SIZE); |
145 | |
146 | /* |
147 | * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', |
148 | * which means that the current buffer size cannot accommodate |
149 | * all the information and a partial buffer returned. |
150 | * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER. |
151 | * |
152 | * ret value as H_AUTHORITY implies that partition is not permitted to retrieve |
153 | * performance information, and required to set |
154 | * "Enable Performance Information Collection" option. |
155 | */ |
156 | if (ret == H_AUTHORITY) |
157 | return -EPERM; |
158 | |
159 | /* |
160 | * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE |
161 | * because of invalid buffer-length/address or due to some hardware |
162 | * error. |
163 | */ |
164 | if (ret && (ret != H_PARAMETER)) |
165 | return -EIO; |
166 | |
167 | /* |
168 | * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values' |
169 | * to show the total number of counter_value array elements |
170 | * returned via hcall. |
171 | * hcall also populates 'cv_element_size' corresponds to individual |
172 | * counter_value array element size. Below loop go through all |
173 | * counter_value array elements as per their size and add it to |
174 | * the output buffer. |
175 | */ |
176 | for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) { |
177 | j = i * be16_to_cpu(arg->params.cv_element_size); |
178 | |
179 | for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++) |
180 | *n += sprintf(buf: buf + *n, fmt: "%02x" , (u8)arg->bytes[j]); |
181 | *n += sprintf(buf: buf + *n, fmt: "\n" ); |
182 | } |
183 | |
184 | if (*n >= PAGE_SIZE) { |
185 | pr_info("System information exceeds PAGE_SIZE\n" ); |
186 | return -EFBIG; |
187 | } |
188 | |
189 | return ret; |
190 | } |
191 | |
192 | static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr, |
193 | char *buf) |
194 | { |
195 | struct hv_gpci_request_buffer *arg; |
196 | unsigned long ret; |
197 | size_t n = 0; |
198 | |
199 | arg = (void *)get_cpu_var(hv_gpci_reqb); |
200 | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
201 | |
202 | /* |
203 | * Pass the counter request value 0xD0 corresponds to request |
204 | * type 'Processor_bus_topology', to retrieve |
205 | * the system topology information. |
206 | * starting_index value implies the starting hardware |
207 | * chip id. |
208 | */ |
209 | ret = systeminfo_gpci_request(req: sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], |
210 | starting_index: 0, secondary_index: 0, buf, n: &n, arg); |
211 | |
212 | if (!ret) |
213 | return n; |
214 | |
215 | if (ret != H_PARAMETER) |
216 | goto out; |
217 | |
218 | /* |
219 | * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which |
220 | * implies that buffer can't accommodate all information, and a partial buffer |
221 | * returned. To handle that, we need to make subsequent requests |
222 | * with next starting index to retrieve additional (missing) data. |
223 | * Below loop do subsequent hcalls with next starting index and add it |
224 | * to buffer util we get all the information. |
225 | */ |
226 | while (ret == H_PARAMETER) { |
227 | int returned_values = be16_to_cpu(arg->params.returned_values); |
228 | int elementsize = be16_to_cpu(arg->params.cv_element_size); |
229 | int last_element = (returned_values - 1) * elementsize; |
230 | |
231 | /* |
232 | * Since the starting index value is part of counter_value |
233 | * buffer elements, use the starting index value in the last |
234 | * element and add 1 to make subsequent hcalls. |
235 | */ |
236 | u32 starting_index = arg->bytes[last_element + 3] + |
237 | (arg->bytes[last_element + 2] << 8) + |
238 | (arg->bytes[last_element + 1] << 16) + |
239 | (arg->bytes[last_element] << 24) + 1; |
240 | |
241 | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
242 | |
243 | ret = systeminfo_gpci_request(req: sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], |
244 | starting_index, secondary_index: 0, buf, n: &n, arg); |
245 | |
246 | if (!ret) |
247 | return n; |
248 | |
249 | if (ret != H_PARAMETER) |
250 | goto out; |
251 | } |
252 | |
253 | return n; |
254 | |
255 | out: |
256 | put_cpu_var(hv_gpci_reqb); |
257 | return ret; |
258 | } |
259 | |
260 | static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr, |
261 | char *buf) |
262 | { |
263 | struct hv_gpci_request_buffer *arg; |
264 | unsigned long ret; |
265 | size_t n = 0; |
266 | |
267 | arg = (void *)get_cpu_var(hv_gpci_reqb); |
268 | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
269 | |
270 | /* |
271 | * Pass the counter request value 0x90 corresponds to request |
272 | * type 'Processor_config', to retrieve |
273 | * the system processor information. |
274 | * starting_index value implies the starting hardware |
275 | * processor index. |
276 | */ |
277 | ret = systeminfo_gpci_request(req: sysinfo_counter_request[PROCESSOR_CONFIG], |
278 | starting_index: 0, secondary_index: 0, buf, n: &n, arg); |
279 | |
280 | if (!ret) |
281 | return n; |
282 | |
283 | if (ret != H_PARAMETER) |
284 | goto out; |
285 | |
286 | /* |
287 | * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which |
288 | * implies that buffer can't accommodate all information, and a partial buffer |
289 | * returned. To handle that, we need to take subsequent requests |
290 | * with next starting index to retrieve additional (missing) data. |
291 | * Below loop do subsequent hcalls with next starting index and add it |
292 | * to buffer util we get all the information. |
293 | */ |
294 | while (ret == H_PARAMETER) { |
295 | int returned_values = be16_to_cpu(arg->params.returned_values); |
296 | int elementsize = be16_to_cpu(arg->params.cv_element_size); |
297 | int last_element = (returned_values - 1) * elementsize; |
298 | |
299 | /* |
300 | * Since the starting index is part of counter_value |
301 | * buffer elements, use the starting index value in the last |
302 | * element and add 1 to subsequent hcalls. |
303 | */ |
304 | u32 starting_index = arg->bytes[last_element + 3] + |
305 | (arg->bytes[last_element + 2] << 8) + |
306 | (arg->bytes[last_element + 1] << 16) + |
307 | (arg->bytes[last_element] << 24) + 1; |
308 | |
309 | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
310 | |
311 | ret = systeminfo_gpci_request(req: sysinfo_counter_request[PROCESSOR_CONFIG], |
312 | starting_index, secondary_index: 0, buf, n: &n, arg); |
313 | |
314 | if (!ret) |
315 | return n; |
316 | |
317 | if (ret != H_PARAMETER) |
318 | goto out; |
319 | } |
320 | |
321 | return n; |
322 | |
323 | out: |
324 | put_cpu_var(hv_gpci_reqb); |
325 | return ret; |
326 | } |
327 | |
328 | static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev, |
329 | struct device_attribute *attr, char *buf) |
330 | { |
331 | struct hv_gpci_request_buffer *arg; |
332 | unsigned long ret; |
333 | size_t n = 0; |
334 | |
335 | arg = (void *)get_cpu_var(hv_gpci_reqb); |
336 | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
337 | |
338 | /* |
339 | * Pass the counter request 0xA0 corresponds to request |
340 | * type 'Affinity_domain_information_by_virutal_processor', |
341 | * to retrieve the system affinity domain information. |
342 | * starting_index value refers to the starting hardware |
343 | * processor index. |
344 | */ |
345 | ret = systeminfo_gpci_request(req: sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], |
346 | starting_index: 0, secondary_index: 0, buf, n: &n, arg); |
347 | |
348 | if (!ret) |
349 | return n; |
350 | |
351 | if (ret != H_PARAMETER) |
352 | goto out; |
353 | |
354 | /* |
355 | * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which |
356 | * implies that buffer can't accommodate all information, and a partial buffer |
357 | * returned. To handle that, we need to take subsequent requests |
358 | * with next secondary index to retrieve additional (missing) data. |
359 | * Below loop do subsequent hcalls with next secondary index and add it |
360 | * to buffer util we get all the information. |
361 | */ |
362 | while (ret == H_PARAMETER) { |
363 | int returned_values = be16_to_cpu(arg->params.returned_values); |
364 | int elementsize = be16_to_cpu(arg->params.cv_element_size); |
365 | int last_element = (returned_values - 1) * elementsize; |
366 | |
367 | /* |
368 | * Since the starting index and secondary index type is part of the |
369 | * counter_value buffer elements, use the starting index value in the |
370 | * last array element as subsequent starting index, and use secondary index |
371 | * value in the last array element plus 1 as subsequent secondary index. |
372 | * For counter request '0xA0', starting index points to partition id |
373 | * and secondary index points to corresponding virtual processor index. |
374 | */ |
375 | u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8); |
376 | u16 secondary_index = arg->bytes[last_element + 3] + |
377 | (arg->bytes[last_element + 2] << 8) + 1; |
378 | |
379 | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
380 | |
381 | ret = systeminfo_gpci_request(req: sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], |
382 | starting_index, secondary_index, buf, n: &n, arg); |
383 | |
384 | if (!ret) |
385 | return n; |
386 | |
387 | if (ret != H_PARAMETER) |
388 | goto out; |
389 | } |
390 | |
391 | return n; |
392 | |
393 | out: |
394 | put_cpu_var(hv_gpci_reqb); |
395 | return ret; |
396 | } |
397 | |
398 | static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr, |
399 | char *buf) |
400 | { |
401 | struct hv_gpci_request_buffer *arg; |
402 | unsigned long ret; |
403 | size_t n = 0; |
404 | |
405 | arg = (void *)get_cpu_var(hv_gpci_reqb); |
406 | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
407 | |
408 | /* |
409 | * Pass the counter request 0xB0 corresponds to request |
410 | * type 'Affinity_domain_information_by_domain', |
411 | * to retrieve the system affinity domain information. |
412 | * starting_index value refers to the starting hardware |
413 | * processor index. |
414 | */ |
415 | ret = systeminfo_gpci_request(req: sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], |
416 | starting_index: 0, secondary_index: 0, buf, n: &n, arg); |
417 | |
418 | if (!ret) |
419 | return n; |
420 | |
421 | if (ret != H_PARAMETER) |
422 | goto out; |
423 | |
424 | /* |
425 | * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which |
426 | * implies that buffer can't accommodate all information, and a partial buffer |
427 | * returned. To handle that, we need to take subsequent requests |
428 | * with next starting index to retrieve additional (missing) data. |
429 | * Below loop do subsequent hcalls with next starting index and add it |
430 | * to buffer util we get all the information. |
431 | */ |
432 | while (ret == H_PARAMETER) { |
433 | int returned_values = be16_to_cpu(arg->params.returned_values); |
434 | int elementsize = be16_to_cpu(arg->params.cv_element_size); |
435 | int last_element = (returned_values - 1) * elementsize; |
436 | |
437 | /* |
438 | * Since the starting index value is part of counter_value |
439 | * buffer elements, use the starting index value in the last |
440 | * element and add 1 to make subsequent hcalls. |
441 | */ |
442 | u32 starting_index = arg->bytes[last_element + 1] + |
443 | (arg->bytes[last_element] << 8) + 1; |
444 | |
445 | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
446 | |
447 | ret = systeminfo_gpci_request(req: sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], |
448 | starting_index, secondary_index: 0, buf, n: &n, arg); |
449 | |
450 | if (!ret) |
451 | return n; |
452 | |
453 | if (ret != H_PARAMETER) |
454 | goto out; |
455 | } |
456 | |
457 | return n; |
458 | |
459 | out: |
460 | put_cpu_var(hv_gpci_reqb); |
461 | return ret; |
462 | } |
463 | |
464 | static void affinity_domain_via_partition_result_parse(int returned_values, |
465 | int element_size, char *buf, size_t *last_element, |
466 | size_t *n, struct hv_gpci_request_buffer *arg) |
467 | { |
468 | size_t i = 0, j = 0; |
469 | size_t k, l, m; |
470 | uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele; |
471 | |
472 | /* |
473 | * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values' |
474 | * to show the total number of counter_value array elements |
475 | * returned via hcall. |
476 | * Unlike other request types, the data structure returned by this |
477 | * request is variable-size. For this counter request type, |
478 | * hcall populates 'cv_element_size' corresponds to minimum size of |
479 | * the structure returned i.e; the size of the structure with no domain |
480 | * information. Below loop go through all counter_value array |
481 | * to determine the number and size of each domain array element and |
482 | * add it to the output buffer. |
483 | */ |
484 | while (i < returned_values) { |
485 | k = j; |
486 | for (; k < j + element_size; k++) |
487 | *n += sprintf(buf: buf + *n, fmt: "%02x" , (u8)arg->bytes[k]); |
488 | *n += sprintf(buf: buf + *n, fmt: "\n" ); |
489 | |
490 | total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3]; |
491 | size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1]; |
492 | |
493 | for (l = 0; l < total_affinity_domain_ele; l++) { |
494 | for (m = 0; m < size_of_each_affinity_domain_ele; m++) { |
495 | *n += sprintf(buf: buf + *n, fmt: "%02x" , (u8)arg->bytes[k]); |
496 | k++; |
497 | } |
498 | *n += sprintf(buf: buf + *n, fmt: "\n" ); |
499 | } |
500 | |
501 | *n += sprintf(buf: buf + *n, fmt: "\n" ); |
502 | i++; |
503 | j = k; |
504 | } |
505 | |
506 | *last_element = k; |
507 | } |
508 | |
509 | static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr, |
510 | char *buf) |
511 | { |
512 | struct hv_gpci_request_buffer *arg; |
513 | unsigned long ret; |
514 | size_t n = 0; |
515 | size_t last_element = 0; |
516 | u32 starting_index; |
517 | |
518 | arg = (void *)get_cpu_var(hv_gpci_reqb); |
519 | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
520 | |
521 | /* |
522 | * Pass the counter request value 0xB1 corresponds to counter request |
523 | * type 'Affinity_domain_information_by_partition', |
524 | * to retrieve the system affinity domain by partition information. |
525 | * starting_index value refers to the starting hardware |
526 | * processor index. |
527 | */ |
528 | arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]); |
529 | arg->params.starting_index = cpu_to_be32(0); |
530 | |
531 | ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, |
532 | virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); |
533 | |
534 | if (!ret) |
535 | goto parse_result; |
536 | |
537 | if (ret && (ret != H_PARAMETER)) |
538 | goto out; |
539 | |
540 | /* |
541 | * ret value as 'H_PARAMETER' implies that the current buffer size |
542 | * can't accommodate all the information, and a partial buffer |
543 | * returned. To handle that, we need to make subsequent requests |
544 | * with next starting index to retrieve additional (missing) data. |
545 | * Below loop do subsequent hcalls with next starting index and add it |
546 | * to buffer util we get all the information. |
547 | */ |
548 | while (ret == H_PARAMETER) { |
549 | affinity_domain_via_partition_result_parse( |
550 | be16_to_cpu(arg->params.returned_values) - 1, |
551 | be16_to_cpu(arg->params.cv_element_size), buf, |
552 | last_element: &last_element, n: &n, arg); |
553 | |
554 | if (n >= PAGE_SIZE) { |
555 | put_cpu_var(hv_gpci_reqb); |
556 | pr_debug("System information exceeds PAGE_SIZE\n" ); |
557 | return -EFBIG; |
558 | } |
559 | |
560 | /* |
561 | * Since the starting index value is part of counter_value |
562 | * buffer elements, use the starting_index value in the last |
563 | * element and add 1 to make subsequent hcalls. |
564 | */ |
565 | starting_index = (u8)arg->bytes[last_element] << 8 | |
566 | (u8)arg->bytes[last_element + 1]; |
567 | |
568 | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
569 | arg->params.counter_request = cpu_to_be32( |
570 | sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]); |
571 | arg->params.starting_index = cpu_to_be32(starting_index); |
572 | |
573 | ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, |
574 | virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); |
575 | |
576 | if (ret && (ret != H_PARAMETER)) |
577 | goto out; |
578 | } |
579 | |
580 | parse_result: |
581 | affinity_domain_via_partition_result_parse( |
582 | be16_to_cpu(arg->params.returned_values), |
583 | be16_to_cpu(arg->params.cv_element_size), |
584 | buf, last_element: &last_element, n: &n, arg); |
585 | |
586 | put_cpu_var(hv_gpci_reqb); |
587 | return n; |
588 | |
589 | out: |
590 | put_cpu_var(hv_gpci_reqb); |
591 | |
592 | /* |
593 | * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', |
594 | * which means that the current buffer size cannot accommodate |
595 | * all the information and a partial buffer returned. |
596 | * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER. |
597 | * |
598 | * ret value as H_AUTHORITY implies that partition is not permitted to retrieve |
599 | * performance information, and required to set |
600 | * "Enable Performance Information Collection" option. |
601 | */ |
602 | if (ret == H_AUTHORITY) |
603 | return -EPERM; |
604 | |
605 | /* |
606 | * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE |
607 | * because of invalid buffer-length/address or due to some hardware |
608 | * error. |
609 | */ |
610 | return -EIO; |
611 | } |
612 | |
613 | static DEVICE_ATTR_RO(kernel_version); |
614 | static DEVICE_ATTR_RO(cpumask); |
615 | |
616 | HV_CAPS_ATTR(version, "0x%x\n" ); |
617 | HV_CAPS_ATTR(ga, "%d\n" ); |
618 | HV_CAPS_ATTR(expanded, "%d\n" ); |
619 | HV_CAPS_ATTR(lab, "%d\n" ); |
620 | HV_CAPS_ATTR(collect_privileged, "%d\n" ); |
621 | |
622 | static struct attribute *interface_attrs[] = { |
623 | &dev_attr_kernel_version.attr, |
624 | &hv_caps_attr_version.attr, |
625 | &hv_caps_attr_ga.attr, |
626 | &hv_caps_attr_expanded.attr, |
627 | &hv_caps_attr_lab.attr, |
628 | &hv_caps_attr_collect_privileged.attr, |
629 | /* |
630 | * This NULL is a placeholder for the processor_bus_topology |
631 | * attribute, set in init function if applicable. |
632 | */ |
633 | NULL, |
634 | /* |
635 | * This NULL is a placeholder for the processor_config |
636 | * attribute, set in init function if applicable. |
637 | */ |
638 | NULL, |
639 | /* |
640 | * This NULL is a placeholder for the affinity_domain_via_virtual_processor |
641 | * attribute, set in init function if applicable. |
642 | */ |
643 | NULL, |
644 | /* |
645 | * This NULL is a placeholder for the affinity_domain_via_domain |
646 | * attribute, set in init function if applicable. |
647 | */ |
648 | NULL, |
649 | /* |
650 | * This NULL is a placeholder for the affinity_domain_via_partition |
651 | * attribute, set in init function if applicable. |
652 | */ |
653 | NULL, |
654 | NULL, |
655 | }; |
656 | |
657 | static struct attribute *cpumask_attrs[] = { |
658 | &dev_attr_cpumask.attr, |
659 | NULL, |
660 | }; |
661 | |
662 | static const struct attribute_group cpumask_attr_group = { |
663 | .attrs = cpumask_attrs, |
664 | }; |
665 | |
666 | static const struct attribute_group interface_group = { |
667 | .name = "interface" , |
668 | .attrs = interface_attrs, |
669 | }; |
670 | |
671 | static const struct attribute_group *attr_groups[] = { |
672 | &format_group, |
673 | &event_group, |
674 | &interface_group, |
675 | &cpumask_attr_group, |
676 | NULL, |
677 | }; |
678 | |
679 | static unsigned long single_gpci_request(u32 req, u32 starting_index, |
680 | u16 secondary_index, u8 version_in, u32 offset, u8 length, |
681 | u64 *value) |
682 | { |
683 | unsigned long ret; |
684 | size_t i; |
685 | u64 count; |
686 | struct hv_gpci_request_buffer *arg; |
687 | |
688 | arg = (void *)get_cpu_var(hv_gpci_reqb); |
689 | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
690 | |
691 | arg->params.counter_request = cpu_to_be32(req); |
692 | arg->params.starting_index = cpu_to_be32(starting_index); |
693 | arg->params.secondary_index = cpu_to_be16(secondary_index); |
694 | arg->params.counter_info_version_in = version_in; |
695 | |
696 | ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, |
697 | virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); |
698 | |
699 | /* |
700 | * ret value as 'H_PARAMETER' with detail_rc as 'GEN_BUF_TOO_SMALL', |
701 | * specifies that the current buffer size cannot accommodate |
702 | * all the information and a partial buffer returned. |
703 | * Since in this function we are only accessing data for a given starting index, |
704 | * we don't need to accommodate whole data and can get required count by |
705 | * accessing first entry data. |
706 | * Hence hcall fails only incase the ret value is other than H_SUCCESS or |
707 | * H_PARAMETER with detail_rc value as GEN_BUF_TOO_SMALL(0x1B). |
708 | */ |
709 | if (ret == H_PARAMETER && be32_to_cpu(arg->params.detail_rc) == 0x1B) |
710 | ret = 0; |
711 | |
712 | if (ret) { |
713 | pr_devel("hcall failed: 0x%lx\n" , ret); |
714 | goto out; |
715 | } |
716 | |
717 | /* |
718 | * we verify offset and length are within the zeroed buffer at event |
719 | * init. |
720 | */ |
721 | count = 0; |
722 | for (i = offset; i < offset + length; i++) |
723 | count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8); |
724 | |
725 | *value = count; |
726 | out: |
727 | put_cpu_var(hv_gpci_reqb); |
728 | return ret; |
729 | } |
730 | |
731 | static u64 h_gpci_get_value(struct perf_event *event) |
732 | { |
733 | u64 count; |
734 | unsigned long ret = single_gpci_request(req: event_get_request(event), |
735 | starting_index: event_get_starting_index(event), |
736 | secondary_index: event_get_secondary_index(event), |
737 | version_in: event_get_counter_info_version(event), |
738 | offset: event_get_offset(event), |
739 | length: event_get_length(event), |
740 | value: &count); |
741 | if (ret) |
742 | return 0; |
743 | return count; |
744 | } |
745 | |
746 | static void h_gpci_event_update(struct perf_event *event) |
747 | { |
748 | s64 prev; |
749 | u64 now = h_gpci_get_value(event); |
750 | prev = local64_xchg(&event->hw.prev_count, now); |
751 | local64_add(now - prev, &event->count); |
752 | } |
753 | |
754 | static void h_gpci_event_start(struct perf_event *event, int flags) |
755 | { |
756 | local64_set(&event->hw.prev_count, h_gpci_get_value(event)); |
757 | } |
758 | |
759 | static void h_gpci_event_stop(struct perf_event *event, int flags) |
760 | { |
761 | h_gpci_event_update(event); |
762 | } |
763 | |
764 | static int h_gpci_event_add(struct perf_event *event, int flags) |
765 | { |
766 | if (flags & PERF_EF_START) |
767 | h_gpci_event_start(event, flags); |
768 | |
769 | return 0; |
770 | } |
771 | |
772 | static int h_gpci_event_init(struct perf_event *event) |
773 | { |
774 | u64 count; |
775 | u8 length; |
776 | unsigned long ret; |
777 | |
778 | /* Not our event */ |
779 | if (event->attr.type != event->pmu->type) |
780 | return -ENOENT; |
781 | |
782 | /* config2 is unused */ |
783 | if (event->attr.config2) { |
784 | pr_devel("config2 set when reserved\n" ); |
785 | return -EINVAL; |
786 | } |
787 | |
788 | /* no branch sampling */ |
789 | if (has_branch_stack(event)) |
790 | return -EOPNOTSUPP; |
791 | |
792 | length = event_get_length(event); |
793 | if (length < 1 || length > 8) { |
794 | pr_devel("length invalid\n" ); |
795 | return -EINVAL; |
796 | } |
797 | |
798 | /* last byte within the buffer? */ |
799 | if ((event_get_offset(event) + length) > HGPCI_MAX_DATA_BYTES) { |
800 | pr_devel("request outside of buffer: %zu > %zu\n" , |
801 | (size_t)event_get_offset(event) + length, |
802 | HGPCI_MAX_DATA_BYTES); |
803 | return -EINVAL; |
804 | } |
805 | |
806 | /* check if the request works... */ |
807 | ret = single_gpci_request(req: event_get_request(event), |
808 | starting_index: event_get_starting_index(event), |
809 | secondary_index: event_get_secondary_index(event), |
810 | version_in: event_get_counter_info_version(event), |
811 | offset: event_get_offset(event), |
812 | length, |
813 | value: &count); |
814 | |
815 | /* |
816 | * ret value as H_AUTHORITY implies that partition is not permitted to retrieve |
817 | * performance information, and required to set |
818 | * "Enable Performance Information Collection" option. |
819 | */ |
820 | if (ret == H_AUTHORITY) |
821 | return -EPERM; |
822 | |
823 | if (ret) { |
824 | pr_devel("gpci hcall failed\n" ); |
825 | return -EINVAL; |
826 | } |
827 | |
828 | return 0; |
829 | } |
830 | |
831 | static struct pmu h_gpci_pmu = { |
832 | .task_ctx_nr = perf_invalid_context, |
833 | |
834 | .name = "hv_gpci" , |
835 | .attr_groups = attr_groups, |
836 | .event_init = h_gpci_event_init, |
837 | .add = h_gpci_event_add, |
838 | .del = h_gpci_event_stop, |
839 | .start = h_gpci_event_start, |
840 | .stop = h_gpci_event_stop, |
841 | .read = h_gpci_event_update, |
842 | .capabilities = PERF_PMU_CAP_NO_EXCLUDE, |
843 | }; |
844 | |
845 | static int ppc_hv_gpci_cpu_online(unsigned int cpu) |
846 | { |
847 | if (cpumask_empty(srcp: &hv_gpci_cpumask)) |
848 | cpumask_set_cpu(cpu, dstp: &hv_gpci_cpumask); |
849 | |
850 | return 0; |
851 | } |
852 | |
853 | static int ppc_hv_gpci_cpu_offline(unsigned int cpu) |
854 | { |
855 | int target; |
856 | |
857 | /* Check if exiting cpu is used for collecting gpci events */ |
858 | if (!cpumask_test_and_clear_cpu(cpu, cpumask: &hv_gpci_cpumask)) |
859 | return 0; |
860 | |
861 | /* Find a new cpu to collect gpci events */ |
862 | target = cpumask_last(cpu_active_mask); |
863 | |
864 | if (target < 0 || target >= nr_cpu_ids) { |
865 | pr_err("hv_gpci: CPU hotplug init failed\n" ); |
866 | return -1; |
867 | } |
868 | |
869 | /* Migrate gpci events to the new target */ |
870 | cpumask_set_cpu(cpu: target, dstp: &hv_gpci_cpumask); |
871 | perf_pmu_migrate_context(pmu: &h_gpci_pmu, src_cpu: cpu, dst_cpu: target); |
872 | |
873 | return 0; |
874 | } |
875 | |
876 | static int hv_gpci_cpu_hotplug_init(void) |
877 | { |
878 | return cpuhp_setup_state(state: CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE, |
879 | name: "perf/powerpc/hv_gcpi:online" , |
880 | startup: ppc_hv_gpci_cpu_online, |
881 | teardown: ppc_hv_gpci_cpu_offline); |
882 | } |
883 | |
884 | static struct device_attribute *sysinfo_device_attr_create(int |
885 | sysinfo_interface_group_index, u32 req) |
886 | { |
887 | struct device_attribute *attr = NULL; |
888 | unsigned long ret; |
889 | struct hv_gpci_request_buffer *arg; |
890 | |
891 | if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR || |
892 | sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) { |
893 | pr_info("Wrong interface group index for system information\n" ); |
894 | return NULL; |
895 | } |
896 | |
897 | /* Check for given counter request value support */ |
898 | arg = (void *)get_cpu_var(hv_gpci_reqb); |
899 | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
900 | |
901 | arg->params.counter_request = cpu_to_be32(req); |
902 | |
903 | ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, |
904 | virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); |
905 | |
906 | put_cpu_var(hv_gpci_reqb); |
907 | |
908 | /* |
909 | * Add given counter request value attribute in the interface_attrs |
910 | * attribute array, only for valid return types. |
911 | */ |
912 | if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) { |
913 | attr = kzalloc(size: sizeof(*attr), GFP_KERNEL); |
914 | if (!attr) |
915 | return NULL; |
916 | |
917 | sysfs_attr_init(&attr->attr); |
918 | attr->attr.mode = 0444; |
919 | |
920 | switch (sysinfo_interface_group_index) { |
921 | case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR: |
922 | attr->attr.name = "processor_bus_topology" ; |
923 | attr->show = processor_bus_topology_show; |
924 | break; |
925 | case INTERFACE_PROCESSOR_CONFIG_ATTR: |
926 | attr->attr.name = "processor_config" ; |
927 | attr->show = processor_config_show; |
928 | break; |
929 | case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR: |
930 | attr->attr.name = "affinity_domain_via_virtual_processor" ; |
931 | attr->show = affinity_domain_via_virtual_processor_show; |
932 | break; |
933 | case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR: |
934 | attr->attr.name = "affinity_domain_via_domain" ; |
935 | attr->show = affinity_domain_via_domain_show; |
936 | break; |
937 | case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR: |
938 | attr->attr.name = "affinity_domain_via_partition" ; |
939 | attr->show = affinity_domain_via_partition_show; |
940 | break; |
941 | } |
942 | } else |
943 | pr_devel("hcall failed, with error: 0x%lx\n" , ret); |
944 | |
945 | return attr; |
946 | } |
947 | |
948 | static void add_sysinfo_interface_files(void) |
949 | { |
950 | int sysfs_count; |
951 | struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR]; |
952 | int i; |
953 | |
954 | sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR; |
955 | |
956 | /* Get device attribute for a given counter request value */ |
957 | for (i = 0; i < sysfs_count; i++) { |
958 | attr[i] = sysinfo_device_attr_create(sysinfo_interface_group_index: i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR, |
959 | req: sysinfo_counter_request[i]); |
960 | |
961 | if (!attr[i]) |
962 | goto out; |
963 | } |
964 | |
965 | /* Add sysinfo interface attributes in the interface_attrs attribute array */ |
966 | for (i = 0; i < sysfs_count; i++) |
967 | interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr; |
968 | |
969 | return; |
970 | |
971 | out: |
972 | /* |
973 | * The sysinfo interface attributes will be added, only if hcall passed for |
974 | * all the counter request values. Free the device attribute array incase |
975 | * of any hcall failure. |
976 | */ |
977 | if (i > 0) { |
978 | while (i >= 0) { |
979 | kfree(objp: attr[i]); |
980 | i--; |
981 | } |
982 | } |
983 | } |
984 | |
985 | static int hv_gpci_init(void) |
986 | { |
987 | int r; |
988 | unsigned long hret; |
989 | struct hv_perf_caps caps; |
990 | struct hv_gpci_request_buffer *arg; |
991 | |
992 | hv_gpci_assert_offsets_correct(); |
993 | |
994 | if (!firmware_has_feature(FW_FEATURE_LPAR)) { |
995 | pr_debug("not a virtualized system, not enabling\n" ); |
996 | return -ENODEV; |
997 | } |
998 | |
999 | hret = hv_perf_caps_get(caps: &caps); |
1000 | if (hret) { |
1001 | pr_debug("could not obtain capabilities, not enabling, rc=%ld\n" , |
1002 | hret); |
1003 | return -ENODEV; |
1004 | } |
1005 | |
1006 | /* init cpuhotplug */ |
1007 | r = hv_gpci_cpu_hotplug_init(); |
1008 | if (r) |
1009 | return r; |
1010 | |
1011 | /* sampling not supported */ |
1012 | h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; |
1013 | |
1014 | arg = (void *)get_cpu_var(hv_gpci_reqb); |
1015 | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
1016 | |
1017 | /* |
1018 | * hcall H_GET_PERF_COUNTER_INFO populates the output |
1019 | * counter_info_version value based on the system hypervisor. |
1020 | * Pass the counter request 0x10 corresponds to request type |
1021 | * 'Dispatch_timebase_by_processor', to get the supported |
1022 | * counter_info_version. |
1023 | */ |
1024 | arg->params.counter_request = cpu_to_be32(0x10); |
1025 | |
1026 | r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, |
1027 | virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); |
1028 | if (r) { |
1029 | pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n" , r); |
1030 | arg->params.counter_info_version_out = 0x8; |
1031 | } |
1032 | |
1033 | /* |
1034 | * Use counter_info_version_out value to assign |
1035 | * required hv-gpci event list. |
1036 | */ |
1037 | if (arg->params.counter_info_version_out >= 0x8) |
1038 | event_group.attrs = hv_gpci_event_attrs; |
1039 | else |
1040 | event_group.attrs = hv_gpci_event_attrs_v6; |
1041 | |
1042 | put_cpu_var(hv_gpci_reqb); |
1043 | |
1044 | r = perf_pmu_register(pmu: &h_gpci_pmu, name: h_gpci_pmu.name, type: -1); |
1045 | if (r) |
1046 | return r; |
1047 | |
1048 | /* sysinfo interface files are only available for power10 and above platforms */ |
1049 | if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10) |
1050 | add_sysinfo_interface_files(); |
1051 | |
1052 | return 0; |
1053 | } |
1054 | |
1055 | device_initcall(hv_gpci_init); |
1056 | |