1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include "debug.h" |
3 | #include "evlist.h" |
4 | #include "evsel.h" |
5 | #include "evsel_config.h" |
6 | #include "parse-events.h" |
7 | #include <errno.h> |
8 | #include <limits.h> |
9 | #include <stdlib.h> |
10 | #include <api/fs/fs.h> |
11 | #include <subcmd/parse-options.h> |
12 | #include <perf/cpumap.h> |
13 | #include "cloexec.h" |
14 | #include "util/perf_api_probe.h" |
15 | #include "record.h" |
16 | #include "../perf-sys.h" |
17 | #include "topdown.h" |
18 | #include "map_symbol.h" |
19 | #include "mem-events.h" |
20 | |
21 | /* |
22 | * evsel__config_leader_sampling() uses special rules for leader sampling. |
23 | * However, if the leader is an AUX area event, then assume the event to sample |
24 | * is the next event. |
25 | */ |
26 | static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evlist) |
27 | { |
28 | struct evsel *leader = evsel__leader(evsel); |
29 | |
30 | if (evsel__is_aux_event(evsel: leader) || arch_topdown_sample_read(leader) || |
31 | is_mem_loads_aux_event(leader)) { |
32 | evlist__for_each_entry(evlist, evsel) { |
33 | if (evsel__leader(evsel) == leader && evsel != evsel__leader(evsel)) |
34 | return evsel; |
35 | } |
36 | } |
37 | |
38 | return leader; |
39 | } |
40 | |
41 | static u64 evsel__config_term_mask(struct evsel *evsel) |
42 | { |
43 | struct evsel_config_term *term; |
44 | struct list_head *config_terms = &evsel->config_terms; |
45 | u64 term_types = 0; |
46 | |
47 | list_for_each_entry(term, config_terms, list) { |
48 | term_types |= 1 << term->type; |
49 | } |
50 | return term_types; |
51 | } |
52 | |
53 | static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist) |
54 | { |
55 | struct perf_event_attr *attr = &evsel->core.attr; |
56 | struct evsel *leader = evsel__leader(evsel); |
57 | struct evsel *read_sampler; |
58 | u64 term_types, freq_mask; |
59 | |
60 | if (!leader->sample_read) |
61 | return; |
62 | |
63 | read_sampler = evsel__read_sampler(evsel, evlist); |
64 | |
65 | if (evsel == read_sampler) |
66 | return; |
67 | |
68 | term_types = evsel__config_term_mask(evsel); |
69 | /* |
70 | * Disable sampling for all group members except those with explicit |
71 | * config terms or the leader. In the case of an AUX area event, the 2nd |
72 | * event in the group is the one that 'leads' the sampling. |
73 | */ |
74 | freq_mask = (1 << EVSEL__CONFIG_TERM_FREQ) | (1 << EVSEL__CONFIG_TERM_PERIOD); |
75 | if ((term_types & freq_mask) == 0) { |
76 | attr->freq = 0; |
77 | attr->sample_freq = 0; |
78 | attr->sample_period = 0; |
79 | } |
80 | if ((term_types & (1 << EVSEL__CONFIG_TERM_OVERWRITE)) == 0) |
81 | attr->write_backward = 0; |
82 | |
83 | /* |
84 | * We don't get a sample for slave events, we make them when delivering |
85 | * the group leader sample. Set the slave event to follow the master |
86 | * sample_type to ease up reporting. |
87 | * An AUX area event also has sample_type requirements, so also include |
88 | * the sample type bits from the leader's sample_type to cover that |
89 | * case. |
90 | */ |
91 | attr->sample_type = read_sampler->core.attr.sample_type | |
92 | leader->core.attr.sample_type; |
93 | } |
94 | |
95 | void evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain) |
96 | { |
97 | struct evsel *evsel; |
98 | bool use_sample_identifier = false; |
99 | bool use_comm_exec; |
100 | bool sample_id = opts->sample_id; |
101 | |
102 | if (perf_cpu_map__cpu(evlist->core.user_requested_cpus, 0).cpu < 0) |
103 | opts->no_inherit = true; |
104 | |
105 | use_comm_exec = perf_can_comm_exec(); |
106 | |
107 | evlist__for_each_entry(evlist, evsel) { |
108 | evsel__config(evsel, opts, callchain); |
109 | if (evsel->tracking && use_comm_exec) |
110 | evsel->core.attr.comm_exec = 1; |
111 | } |
112 | |
113 | /* Configure leader sampling here now that the sample type is known */ |
114 | evlist__for_each_entry(evlist, evsel) |
115 | evsel__config_leader_sampling(evsel, evlist); |
116 | |
117 | if (opts->full_auxtrace || opts->sample_identifier) { |
118 | /* |
119 | * Need to be able to synthesize and parse selected events with |
120 | * arbitrary sample types, which requires always being able to |
121 | * match the id. |
122 | */ |
123 | use_sample_identifier = perf_can_sample_identifier(); |
124 | sample_id = true; |
125 | } else if (evlist->core.nr_entries > 1) { |
126 | struct evsel *first = evlist__first(evlist); |
127 | |
128 | evlist__for_each_entry(evlist, evsel) { |
129 | if (evsel->core.attr.sample_type == first->core.attr.sample_type) |
130 | continue; |
131 | use_sample_identifier = perf_can_sample_identifier(); |
132 | break; |
133 | } |
134 | sample_id = true; |
135 | } |
136 | |
137 | if (sample_id) { |
138 | evlist__for_each_entry(evlist, evsel) |
139 | evsel__set_sample_id(evsel, use_sample_identifier); |
140 | } |
141 | |
142 | evlist__set_id_pos(evlist); |
143 | } |
144 | |
145 | static int get_max_rate(unsigned int *rate) |
146 | { |
147 | return sysctl__read_int("kernel/perf_event_max_sample_rate" , (int *)rate); |
148 | } |
149 | |
150 | static int record_opts__config_freq(struct record_opts *opts) |
151 | { |
152 | bool user_freq = opts->user_freq != UINT_MAX; |
153 | bool user_interval = opts->user_interval != ULLONG_MAX; |
154 | unsigned int max_rate; |
155 | |
156 | if (user_interval && user_freq) { |
157 | pr_err("cannot set frequency and period at the same time\n" ); |
158 | return -1; |
159 | } |
160 | |
161 | if (user_interval) |
162 | opts->default_interval = opts->user_interval; |
163 | if (user_freq) |
164 | opts->freq = opts->user_freq; |
165 | |
166 | /* |
167 | * User specified count overrides default frequency. |
168 | */ |
169 | if (opts->default_interval) |
170 | opts->freq = 0; |
171 | else if (opts->freq) { |
172 | opts->default_interval = opts->freq; |
173 | } else { |
174 | pr_err("frequency and count are zero, aborting\n" ); |
175 | return -1; |
176 | } |
177 | |
178 | if (get_max_rate(rate: &max_rate)) |
179 | return 0; |
180 | |
181 | /* |
182 | * User specified frequency is over current maximum. |
183 | */ |
184 | if (user_freq && (max_rate < opts->freq)) { |
185 | if (opts->strict_freq) { |
186 | pr_err("error: Maximum frequency rate (%'u Hz) exceeded.\n" |
187 | " Please use -F freq option with a lower value or consider\n" |
188 | " tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n" , |
189 | max_rate); |
190 | return -1; |
191 | } else { |
192 | pr_warning("warning: Maximum frequency rate (%'u Hz) exceeded, throttling from %'u Hz to %'u Hz.\n" |
193 | " The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.\n" |
194 | " The kernel will lower it when perf's interrupts take too long.\n" |
195 | " Use --strict-freq to disable this throttling, refusing to record.\n" , |
196 | max_rate, opts->freq, max_rate); |
197 | |
198 | opts->freq = max_rate; |
199 | } |
200 | } |
201 | |
202 | /* |
203 | * Default frequency is over current maximum. |
204 | */ |
205 | if (max_rate < opts->freq) { |
206 | pr_warning("Lowering default frequency rate from %u to %u.\n" |
207 | "Please consider tweaking " |
208 | "/proc/sys/kernel/perf_event_max_sample_rate.\n" , |
209 | opts->freq, max_rate); |
210 | opts->freq = max_rate; |
211 | } |
212 | |
213 | return 0; |
214 | } |
215 | |
216 | int record_opts__config(struct record_opts *opts) |
217 | { |
218 | return record_opts__config_freq(opts); |
219 | } |
220 | |
221 | bool evlist__can_select_event(struct evlist *evlist, const char *str) |
222 | { |
223 | struct evlist *temp_evlist; |
224 | struct evsel *evsel; |
225 | int err, fd; |
226 | struct perf_cpu cpu = { .cpu = 0 }; |
227 | bool ret = false; |
228 | pid_t pid = -1; |
229 | |
230 | temp_evlist = evlist__new(); |
231 | if (!temp_evlist) |
232 | return false; |
233 | |
234 | err = parse_event(evlist: temp_evlist, str); |
235 | if (err) |
236 | goto out_delete; |
237 | |
238 | evsel = evlist__last(evlist: temp_evlist); |
239 | |
240 | if (!evlist || perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) { |
241 | struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus(); |
242 | |
243 | if (cpus) |
244 | cpu = perf_cpu_map__cpu(cpus, 0); |
245 | |
246 | perf_cpu_map__put(cpus); |
247 | } else { |
248 | cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, 0); |
249 | } |
250 | |
251 | while (1) { |
252 | fd = sys_perf_event_open(attr: &evsel->core.attr, pid, cpu: cpu.cpu, group_fd: -1, |
253 | flags: perf_event_open_cloexec_flag()); |
254 | if (fd < 0) { |
255 | if (pid == -1 && errno == EACCES) { |
256 | pid = 0; |
257 | continue; |
258 | } |
259 | goto out_delete; |
260 | } |
261 | break; |
262 | } |
263 | close(fd); |
264 | ret = true; |
265 | |
266 | out_delete: |
267 | evlist__delete(evlist: temp_evlist); |
268 | return ret; |
269 | } |
270 | |
271 | int record__parse_freq(const struct option *opt, const char *str, int unset __maybe_unused) |
272 | { |
273 | unsigned int freq; |
274 | struct record_opts *opts = opt->value; |
275 | |
276 | if (!str) |
277 | return -EINVAL; |
278 | |
279 | if (strcasecmp(s1: str, s2: "max" ) == 0) { |
280 | if (get_max_rate(rate: &freq)) { |
281 | pr_err("couldn't read /proc/sys/kernel/perf_event_max_sample_rate\n" ); |
282 | return -1; |
283 | } |
284 | pr_info("info: Using a maximum frequency rate of %'d Hz\n" , freq); |
285 | } else { |
286 | freq = atoi(str); |
287 | } |
288 | |
289 | opts->user_freq = freq; |
290 | return 0; |
291 | } |
292 | |