1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | #define _GNU_SOURCE |
4 | #include <errno.h> |
5 | #include <stdio.h> |
6 | #include <stdlib.h> |
7 | #include <signal.h> |
8 | #include <sched.h> |
9 | #include <string.h> |
10 | #include <unistd.h> |
11 | #include <fcntl.h> |
12 | #include <locale.h> |
13 | #include <sys/types.h> |
14 | #include <sys/stat.h> |
15 | #include <sys/time.h> |
16 | #include <sys/wait.h> |
17 | |
18 | #include <bpf/bpf.h> |
19 | #include <bpf/libbpf.h> |
20 | |
21 | static int cstate_map_fd, pstate_map_fd; |
22 | |
23 | #define MAX_CPU 8 |
24 | #define MAX_PSTATE_ENTRIES 5 |
25 | #define MAX_CSTATE_ENTRIES 3 |
26 | #define MAX_STARS 40 |
27 | |
28 | #define CPUFREQ_MAX_SYSFS_PATH "/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq" |
29 | #define CPUFREQ_LOWEST_FREQ "208000" |
30 | #define CPUFREQ_HIGHEST_FREQ "12000000" |
31 | |
32 | struct cpu_stat_data { |
33 | unsigned long cstate[MAX_CSTATE_ENTRIES]; |
34 | unsigned long pstate[MAX_PSTATE_ENTRIES]; |
35 | }; |
36 | |
37 | static struct cpu_stat_data stat_data[MAX_CPU]; |
38 | |
39 | static void cpu_stat_print(void) |
40 | { |
41 | int i, j; |
42 | char state_str[sizeof("cstate-9" )]; |
43 | struct cpu_stat_data *data; |
44 | |
45 | /* Clear screen */ |
46 | printf(format: "\033[2J" ); |
47 | |
48 | /* Header */ |
49 | printf(format: "\nCPU states statistics:\n" ); |
50 | printf(format: "%-10s " , "state(ms)" ); |
51 | |
52 | for (i = 0; i < MAX_CSTATE_ENTRIES; i++) { |
53 | sprintf(s: state_str, format: "cstate-%d" , i); |
54 | printf(format: "%-11s " , state_str); |
55 | } |
56 | |
57 | for (i = 0; i < MAX_PSTATE_ENTRIES; i++) { |
58 | sprintf(s: state_str, format: "pstate-%d" , i); |
59 | printf(format: "%-11s " , state_str); |
60 | } |
61 | |
62 | printf(format: "\n" ); |
63 | |
64 | for (j = 0; j < MAX_CPU; j++) { |
65 | data = &stat_data[j]; |
66 | |
67 | printf(format: "CPU-%-6d " , j); |
68 | for (i = 0; i < MAX_CSTATE_ENTRIES; i++) |
69 | printf(format: "%-11ld " , data->cstate[i] / 1000000); |
70 | |
71 | for (i = 0; i < MAX_PSTATE_ENTRIES; i++) |
72 | printf(format: "%-11ld " , data->pstate[i] / 1000000); |
73 | |
74 | printf(format: "\n" ); |
75 | } |
76 | } |
77 | |
78 | static void cpu_stat_update(int cstate_fd, int pstate_fd) |
79 | { |
80 | unsigned long key, value; |
81 | int c, i; |
82 | |
83 | for (c = 0; c < MAX_CPU; c++) { |
84 | for (i = 0; i < MAX_CSTATE_ENTRIES; i++) { |
85 | key = c * MAX_CSTATE_ENTRIES + i; |
86 | bpf_map_lookup_elem(cstate_fd, &key, &value); |
87 | stat_data[c].cstate[i] = value; |
88 | } |
89 | |
90 | for (i = 0; i < MAX_PSTATE_ENTRIES; i++) { |
91 | key = c * MAX_PSTATE_ENTRIES + i; |
92 | bpf_map_lookup_elem(pstate_fd, &key, &value); |
93 | stat_data[c].pstate[i] = value; |
94 | } |
95 | } |
96 | } |
97 | |
98 | /* |
99 | * This function is copied from 'idlestat' tool function |
100 | * idlestat_wake_all() in idlestate.c. |
101 | * |
102 | * It sets the self running task affinity to cpus one by one so can wake up |
103 | * the specific CPU to handle scheduling; this results in all cpus can be |
104 | * waken up once and produce ftrace event 'trace_cpu_idle'. |
105 | */ |
106 | static int cpu_stat_inject_cpu_idle_event(void) |
107 | { |
108 | int rcpu, i, ret; |
109 | cpu_set_t cpumask; |
110 | cpu_set_t original_cpumask; |
111 | |
112 | ret = sysconf(_SC_NPROCESSORS_CONF); |
113 | if (ret < 0) |
114 | return -1; |
115 | |
116 | rcpu = sched_getcpu(); |
117 | if (rcpu < 0) |
118 | return -1; |
119 | |
120 | /* Keep track of the CPUs we will run on */ |
121 | sched_getaffinity(pid: 0, cpusetsize: sizeof(original_cpumask), cpuset: &original_cpumask); |
122 | |
123 | for (i = 0; i < ret; i++) { |
124 | |
125 | /* Pointless to wake up ourself */ |
126 | if (i == rcpu) |
127 | continue; |
128 | |
129 | /* Pointless to wake CPUs we will not run on */ |
130 | if (!CPU_ISSET(i, &original_cpumask)) |
131 | continue; |
132 | |
133 | CPU_ZERO(&cpumask); |
134 | CPU_SET(i, &cpumask); |
135 | |
136 | sched_setaffinity(pid: 0, cpusetsize: sizeof(cpumask), cpuset: &cpumask); |
137 | } |
138 | |
139 | /* Enable all the CPUs of the original mask */ |
140 | sched_setaffinity(pid: 0, cpusetsize: sizeof(original_cpumask), cpuset: &original_cpumask); |
141 | return 0; |
142 | } |
143 | |
144 | /* |
145 | * It's possible to have no any frequency change for long time and cannot |
146 | * get ftrace event 'trace_cpu_frequency' for long period, this introduces |
147 | * big deviation for pstate statistics. |
148 | * |
149 | * To solve this issue, below code forces to set 'scaling_max_freq' to 208MHz |
150 | * for triggering ftrace event 'trace_cpu_frequency' and then recovery back to |
151 | * the maximum frequency value 1.2GHz. |
152 | */ |
153 | static int cpu_stat_inject_cpu_frequency_event(void) |
154 | { |
155 | int len, fd; |
156 | |
157 | fd = open(CPUFREQ_MAX_SYSFS_PATH, O_WRONLY); |
158 | if (fd < 0) { |
159 | printf(format: "failed to open scaling_max_freq, errno=%d\n" , errno); |
160 | return fd; |
161 | } |
162 | |
163 | len = write(fd: fd, CPUFREQ_LOWEST_FREQ, n: strlen(CPUFREQ_LOWEST_FREQ)); |
164 | if (len < 0) { |
165 | printf(format: "failed to open scaling_max_freq, errno=%d\n" , errno); |
166 | goto err; |
167 | } |
168 | |
169 | len = write(fd: fd, CPUFREQ_HIGHEST_FREQ, n: strlen(CPUFREQ_HIGHEST_FREQ)); |
170 | if (len < 0) { |
171 | printf(format: "failed to open scaling_max_freq, errno=%d\n" , errno); |
172 | goto err; |
173 | } |
174 | |
175 | err: |
176 | close(fd: fd); |
177 | return len; |
178 | } |
179 | |
180 | static void int_exit(int sig) |
181 | { |
182 | cpu_stat_inject_cpu_idle_event(); |
183 | cpu_stat_inject_cpu_frequency_event(); |
184 | cpu_stat_update(cstate_fd: cstate_map_fd, pstate_fd: pstate_map_fd); |
185 | cpu_stat_print(); |
186 | exit(status: 0); |
187 | } |
188 | |
189 | int main(int argc, char **argv) |
190 | { |
191 | struct bpf_link *link = NULL; |
192 | struct bpf_program *prog; |
193 | struct bpf_object *obj; |
194 | char filename[256]; |
195 | int ret; |
196 | |
197 | snprintf(s: filename, maxlen: sizeof(filename), format: "%s_kern.o" , argv[0]); |
198 | obj = bpf_object__open_file(filename, NULL); |
199 | if (libbpf_get_error(obj)) { |
200 | fprintf(stderr, format: "ERROR: opening BPF object file failed\n" ); |
201 | return 0; |
202 | } |
203 | |
204 | prog = bpf_object__find_program_by_name(obj, "bpf_prog1" ); |
205 | if (!prog) { |
206 | printf(format: "finding a prog in obj file failed\n" ); |
207 | goto cleanup; |
208 | } |
209 | |
210 | /* load BPF program */ |
211 | if (bpf_object__load(obj)) { |
212 | fprintf(stderr, format: "ERROR: loading BPF object file failed\n" ); |
213 | goto cleanup; |
214 | } |
215 | |
216 | cstate_map_fd = bpf_object__find_map_fd_by_name(obj, "cstate_duration" ); |
217 | pstate_map_fd = bpf_object__find_map_fd_by_name(obj, "pstate_duration" ); |
218 | if (cstate_map_fd < 0 || pstate_map_fd < 0) { |
219 | fprintf(stderr, format: "ERROR: finding a map in obj file failed\n" ); |
220 | goto cleanup; |
221 | } |
222 | |
223 | link = bpf_program__attach(prog); |
224 | if (libbpf_get_error(link)) { |
225 | fprintf(stderr, format: "ERROR: bpf_program__attach failed\n" ); |
226 | link = NULL; |
227 | goto cleanup; |
228 | } |
229 | |
230 | ret = cpu_stat_inject_cpu_idle_event(); |
231 | if (ret < 0) |
232 | return 1; |
233 | |
234 | ret = cpu_stat_inject_cpu_frequency_event(); |
235 | if (ret < 0) |
236 | return 1; |
237 | |
238 | signal(SIGINT, handler: int_exit); |
239 | signal(SIGTERM, handler: int_exit); |
240 | |
241 | while (1) { |
242 | cpu_stat_update(cstate_fd: cstate_map_fd, pstate_fd: pstate_map_fd); |
243 | cpu_stat_print(); |
244 | sleep(seconds: 5); |
245 | } |
246 | |
247 | cleanup: |
248 | bpf_link__destroy(link); |
249 | bpf_object__close(obj); |
250 | return 0; |
251 | } |
252 | |