1 | /* |
2 | * Copyright 2013, Michael Ellerman, IBM Corp. |
3 | * Licensed under GPLv2. |
4 | */ |
5 | |
6 | #define _GNU_SOURCE |
7 | |
8 | #include <stdio.h> |
9 | #include <stdbool.h> |
10 | #include <string.h> |
11 | #include <sys/prctl.h> |
12 | |
13 | #include "event.h" |
14 | #include "utils.h" |
15 | #include "lib.h" |
16 | |
17 | extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target); |
18 | |
19 | static void setup_event(struct event *e, u64 config, int type, char *name) |
20 | { |
21 | event_init_opts(e, config, type, name); |
22 | |
23 | e->attr.disabled = 1; |
24 | e->attr.exclude_kernel = 1; |
25 | e->attr.exclude_hv = 1; |
26 | e->attr.exclude_idle = 1; |
27 | } |
28 | |
29 | static int do_count_loop(struct event *events, u64 instructions, |
30 | u64 overhead, bool report) |
31 | { |
32 | s64 difference, expected; |
33 | double percentage; |
34 | u64 dummy; |
35 | |
36 | prctl(PR_TASK_PERF_EVENTS_ENABLE); |
37 | |
38 | /* Run for 1M instructions */ |
39 | thirty_two_instruction_loop_with_ll_sc(loops: instructions >> 5, ll_sc_target: &dummy); |
40 | |
41 | prctl(PR_TASK_PERF_EVENTS_DISABLE); |
42 | |
43 | event_read(e: &events[0]); |
44 | event_read(e: &events[1]); |
45 | event_read(e: &events[2]); |
46 | |
47 | expected = instructions + overhead + (events[2].result.value * 10); |
48 | difference = events[0].result.value - expected; |
49 | percentage = (double)difference / events[0].result.value * 100; |
50 | |
51 | if (report) { |
52 | printf("-----\n" ); |
53 | event_report(e: &events[0]); |
54 | event_report(e: &events[1]); |
55 | event_report(e: &events[2]); |
56 | |
57 | printf("Looped for %llu instructions, overhead %llu\n" , instructions, overhead); |
58 | printf("Expected %llu\n" , expected); |
59 | printf("Actual %llu\n" , events[0].result.value); |
60 | printf("Delta %lld, %f%%\n" , difference, percentage); |
61 | } |
62 | |
63 | event_reset(e: &events[0]); |
64 | event_reset(e: &events[1]); |
65 | event_reset(e: &events[2]); |
66 | |
67 | if (difference < 0) |
68 | difference = -difference; |
69 | |
70 | /* Tolerate a difference below 0.0001 % */ |
71 | difference *= 10000 * 100; |
72 | if (difference / events[0].result.value) |
73 | return -1; |
74 | |
75 | return 0; |
76 | } |
77 | |
78 | /* Count how many instructions it takes to do a null loop */ |
79 | static u64 determine_overhead(struct event *events) |
80 | { |
81 | u64 current, overhead; |
82 | int i; |
83 | |
84 | do_count_loop(events, instructions: 0, overhead: 0, report: false); |
85 | overhead = events[0].result.value; |
86 | |
87 | for (i = 0; i < 100; i++) { |
88 | do_count_loop(events, instructions: 0, overhead: 0, report: false); |
89 | current = events[0].result.value; |
90 | if (current < overhead) { |
91 | printf("Replacing overhead %llu with %llu\n" , overhead, current); |
92 | overhead = current; |
93 | } |
94 | } |
95 | |
96 | return overhead; |
97 | } |
98 | |
99 | #define PM_MRK_STCX_FAIL 0x03e158 |
100 | #define PM_STCX_FAIL 0x01e058 |
101 | |
102 | static int test_body(void) |
103 | { |
104 | struct event events[3]; |
105 | u64 overhead; |
106 | |
107 | // The STCX_FAIL event we use works on Power8 or later |
108 | SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); |
109 | |
110 | setup_event(e: &events[0], config: PERF_COUNT_HW_INSTRUCTIONS, type: PERF_TYPE_HARDWARE, name: "instructions" ); |
111 | setup_event(e: &events[1], config: PERF_COUNT_HW_CPU_CYCLES, type: PERF_TYPE_HARDWARE, name: "cycles" ); |
112 | setup_event(e: &events[2], PM_STCX_FAIL, type: PERF_TYPE_RAW, name: "stcx_fail" ); |
113 | |
114 | if (event_open(e: &events[0])) { |
115 | perror("perf_event_open" ); |
116 | return -1; |
117 | } |
118 | |
119 | if (event_open_with_group(e: &events[1], group_fd: events[0].fd)) { |
120 | perror("perf_event_open" ); |
121 | return -1; |
122 | } |
123 | |
124 | if (event_open_with_group(e: &events[2], group_fd: events[0].fd)) { |
125 | perror("perf_event_open" ); |
126 | return -1; |
127 | } |
128 | |
129 | overhead = determine_overhead(events); |
130 | printf("Overhead of null loop: %llu instructions\n" , overhead); |
131 | |
132 | /* Run for 1Mi instructions */ |
133 | FAIL_IF(do_count_loop(events, instructions: 1000000, overhead, report: true)); |
134 | |
135 | /* Run for 10Mi instructions */ |
136 | FAIL_IF(do_count_loop(events, instructions: 10000000, overhead, report: true)); |
137 | |
138 | /* Run for 100Mi instructions */ |
139 | FAIL_IF(do_count_loop(events, instructions: 100000000, overhead, report: true)); |
140 | |
141 | /* Run for 1Bi instructions */ |
142 | FAIL_IF(do_count_loop(events, instructions: 1000000000, overhead, report: true)); |
143 | |
144 | /* Run for 16Bi instructions */ |
145 | FAIL_IF(do_count_loop(events, instructions: 16000000000, overhead, report: true)); |
146 | |
147 | /* Run for 64Bi instructions */ |
148 | FAIL_IF(do_count_loop(events, instructions: 64000000000, overhead, report: true)); |
149 | |
150 | event_close(e: &events[0]); |
151 | event_close(e: &events[1]); |
152 | |
153 | return 0; |
154 | } |
155 | |
156 | static int count_ll_sc(void) |
157 | { |
158 | return eat_cpu(test_function: test_body); |
159 | } |
160 | |
161 | int main(void) |
162 | { |
163 | return test_harness(count_ll_sc, "count_ll_sc" ); |
164 | } |
165 | |