1 | // SPDX-License-Identifier: GPL-2.0 |
2 | // Copyright (c) 2019 Facebook |
3 | #include <linux/sched.h> |
4 | #include <linux/ptrace.h> |
5 | #include <stdint.h> |
6 | #include <stddef.h> |
7 | #include <stdbool.h> |
8 | #include <linux/bpf.h> |
9 | #include <bpf/bpf_helpers.h> |
10 | #include "bpf_misc.h" |
11 | #include "bpf_compiler.h" |
12 | |
13 | #define FUNCTION_NAME_LEN 64 |
14 | #define FILE_NAME_LEN 128 |
15 | #define TASK_COMM_LEN 16 |
16 | |
17 | typedef struct { |
18 | int PyThreadState_frame; |
19 | int PyThreadState_thread; |
20 | int PyFrameObject_back; |
21 | int PyFrameObject_code; |
22 | int PyFrameObject_lineno; |
23 | int PyCodeObject_filename; |
24 | int PyCodeObject_name; |
25 | int String_data; |
26 | int String_size; |
27 | } OffsetConfig; |
28 | |
29 | typedef struct { |
30 | uintptr_t current_state_addr; |
31 | uintptr_t tls_key_addr; |
32 | OffsetConfig offsets; |
33 | bool use_tls; |
34 | } PidData; |
35 | |
36 | typedef struct { |
37 | uint32_t success; |
38 | } Stats; |
39 | |
40 | typedef struct { |
41 | char name[FUNCTION_NAME_LEN]; |
42 | char file[FILE_NAME_LEN]; |
43 | } Symbol; |
44 | |
45 | typedef struct { |
46 | uint32_t pid; |
47 | uint32_t tid; |
48 | char comm[TASK_COMM_LEN]; |
49 | int32_t kernel_stack_id; |
50 | int32_t user_stack_id; |
51 | bool thread_current; |
52 | bool pthread_match; |
53 | bool stack_complete; |
54 | int16_t stack_len; |
55 | int32_t stack[STACK_MAX_LEN]; |
56 | |
57 | int has_meta; |
58 | int metadata; |
59 | char dummy_safeguard; |
60 | } Event; |
61 | |
62 | |
63 | typedef int pid_t; |
64 | |
65 | typedef struct { |
66 | void* f_back; // PyFrameObject.f_back, previous frame |
67 | void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject |
68 | void* co_filename; // PyCodeObject.co_filename |
69 | void* co_name; // PyCodeObject.co_name |
70 | } FrameData; |
71 | |
72 | #ifdef SUBPROGS |
73 | __noinline |
74 | #else |
75 | __always_inline |
76 | #endif |
77 | static void *get_thread_state(void *tls_base, PidData *pidData) |
78 | { |
79 | void* thread_state; |
80 | int key; |
81 | |
82 | bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr); |
83 | bpf_probe_read_user(&thread_state, sizeof(thread_state), |
84 | tls_base + 0x310 + key * 0x10 + 0x08); |
85 | return thread_state; |
86 | } |
87 | |
88 | static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData, |
89 | FrameData *frame, Symbol *symbol) |
90 | { |
91 | // read data from PyFrameObject |
92 | bpf_probe_read_user(&frame->f_back, |
93 | sizeof(frame->f_back), |
94 | frame_ptr + pidData->offsets.PyFrameObject_back); |
95 | bpf_probe_read_user(&frame->f_code, |
96 | sizeof(frame->f_code), |
97 | frame_ptr + pidData->offsets.PyFrameObject_code); |
98 | |
99 | // read data from PyCodeObject |
100 | if (!frame->f_code) |
101 | return false; |
102 | bpf_probe_read_user(&frame->co_filename, |
103 | sizeof(frame->co_filename), |
104 | frame->f_code + pidData->offsets.PyCodeObject_filename); |
105 | bpf_probe_read_user(&frame->co_name, |
106 | sizeof(frame->co_name), |
107 | frame->f_code + pidData->offsets.PyCodeObject_name); |
108 | // read actual names into symbol |
109 | if (frame->co_filename) |
110 | bpf_probe_read_user_str(&symbol->file, |
111 | sizeof(symbol->file), |
112 | frame->co_filename + |
113 | pidData->offsets.String_data); |
114 | if (frame->co_name) |
115 | bpf_probe_read_user_str(&symbol->name, |
116 | sizeof(symbol->name), |
117 | frame->co_name + |
118 | pidData->offsets.String_data); |
119 | return true; |
120 | } |
121 | |
122 | struct { |
123 | __uint(type, BPF_MAP_TYPE_HASH); |
124 | __uint(max_entries, 1); |
125 | __type(key, int); |
126 | __type(value, PidData); |
127 | } pidmap SEC(".maps" ); |
128 | |
129 | struct { |
130 | __uint(type, BPF_MAP_TYPE_HASH); |
131 | __uint(max_entries, 1); |
132 | __type(key, int); |
133 | __type(value, Event); |
134 | } eventmap SEC(".maps" ); |
135 | |
136 | struct { |
137 | __uint(type, BPF_MAP_TYPE_HASH); |
138 | __uint(max_entries, 1); |
139 | __type(key, Symbol); |
140 | __type(value, int); |
141 | } symbolmap SEC(".maps" ); |
142 | |
143 | struct { |
144 | __uint(type, BPF_MAP_TYPE_ARRAY); |
145 | __uint(max_entries, 1); |
146 | __type(key, int); |
147 | __type(value, Stats); |
148 | } statsmap SEC(".maps" ); |
149 | |
150 | struct { |
151 | __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); |
152 | __uint(max_entries, 32); |
153 | __uint(key_size, sizeof(int)); |
154 | __uint(value_size, sizeof(int)); |
155 | } perfmap SEC(".maps" ); |
156 | |
157 | struct { |
158 | __uint(type, BPF_MAP_TYPE_STACK_TRACE); |
159 | __uint(max_entries, 1000); |
160 | __uint(key_size, sizeof(int)); |
161 | __uint(value_size, sizeof(long long) * 127); |
162 | } stackmap SEC(".maps" ); |
163 | |
164 | #ifdef USE_BPF_LOOP |
165 | struct process_frame_ctx { |
166 | int cur_cpu; |
167 | int32_t *symbol_counter; |
168 | void *frame_ptr; |
169 | FrameData *frame; |
170 | PidData *pidData; |
171 | Symbol *sym; |
172 | Event *event; |
173 | bool done; |
174 | }; |
175 | |
176 | static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx) |
177 | { |
178 | int zero = 0; |
179 | void *frame_ptr = ctx->frame_ptr; |
180 | PidData *pidData = ctx->pidData; |
181 | FrameData *frame = ctx->frame; |
182 | int32_t *symbol_counter = ctx->symbol_counter; |
183 | int cur_cpu = ctx->cur_cpu; |
184 | Event *event = ctx->event; |
185 | Symbol *sym = ctx->sym; |
186 | |
187 | if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) { |
188 | int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; |
189 | int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym); |
190 | |
191 | if (!symbol_id) { |
192 | bpf_map_update_elem(&symbolmap, sym, &zero, 0); |
193 | symbol_id = bpf_map_lookup_elem(&symbolmap, sym); |
194 | if (!symbol_id) { |
195 | ctx->done = true; |
196 | return 1; |
197 | } |
198 | } |
199 | if (*symbol_id == new_symbol_id) |
200 | (*symbol_counter)++; |
201 | |
202 | barrier_var(i); |
203 | if (i >= STACK_MAX_LEN) |
204 | return 1; |
205 | |
206 | event->stack[i] = *symbol_id; |
207 | |
208 | event->stack_len = i + 1; |
209 | frame_ptr = frame->f_back; |
210 | } |
211 | return 0; |
212 | } |
213 | #endif /* USE_BPF_LOOP */ |
214 | |
215 | #ifdef GLOBAL_FUNC |
216 | __noinline |
217 | #elif defined(SUBPROGS) |
218 | static __noinline |
219 | #else |
220 | static __always_inline |
221 | #endif |
222 | int __on_event(struct bpf_raw_tracepoint_args *ctx) |
223 | { |
224 | uint64_t pid_tgid = bpf_get_current_pid_tgid(); |
225 | pid_t pid = (pid_t)(pid_tgid >> 32); |
226 | PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid); |
227 | if (!pidData) |
228 | return 0; |
229 | |
230 | int zero = 0; |
231 | Event* event = bpf_map_lookup_elem(&eventmap, &zero); |
232 | if (!event) |
233 | return 0; |
234 | |
235 | event->pid = pid; |
236 | |
237 | event->tid = (pid_t)pid_tgid; |
238 | bpf_get_current_comm(&event->comm, sizeof(event->comm)); |
239 | |
240 | event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK); |
241 | event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0); |
242 | |
243 | void* thread_state_current = (void*)0; |
244 | bpf_probe_read_user(&thread_state_current, |
245 | sizeof(thread_state_current), |
246 | (void*)(long)pidData->current_state_addr); |
247 | |
248 | struct task_struct* task = (struct task_struct*)bpf_get_current_task(); |
249 | void* tls_base = (void*)task; |
250 | |
251 | void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData) |
252 | : thread_state_current; |
253 | event->thread_current = thread_state == thread_state_current; |
254 | |
255 | if (pidData->use_tls) { |
256 | uint64_t pthread_created; |
257 | uint64_t pthread_self; |
258 | bpf_probe_read_user(&pthread_self, sizeof(pthread_self), |
259 | tls_base + 0x10); |
260 | |
261 | bpf_probe_read_user(&pthread_created, |
262 | sizeof(pthread_created), |
263 | thread_state + |
264 | pidData->offsets.PyThreadState_thread); |
265 | event->pthread_match = pthread_created == pthread_self; |
266 | } else { |
267 | event->pthread_match = 1; |
268 | } |
269 | |
270 | if (event->pthread_match || !pidData->use_tls) { |
271 | void* frame_ptr; |
272 | FrameData frame; |
273 | Symbol sym = {}; |
274 | int cur_cpu = bpf_get_smp_processor_id(); |
275 | |
276 | bpf_probe_read_user(&frame_ptr, |
277 | sizeof(frame_ptr), |
278 | thread_state + |
279 | pidData->offsets.PyThreadState_frame); |
280 | |
281 | int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); |
282 | if (symbol_counter == NULL) |
283 | return 0; |
284 | #ifdef USE_BPF_LOOP |
285 | struct process_frame_ctx ctx = { |
286 | .cur_cpu = cur_cpu, |
287 | .symbol_counter = symbol_counter, |
288 | .frame_ptr = frame_ptr, |
289 | .frame = &frame, |
290 | .pidData = pidData, |
291 | .sym = &sym, |
292 | .event = event, |
293 | }; |
294 | |
295 | bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0); |
296 | if (ctx.done) |
297 | return 0; |
298 | #else |
299 | #if defined(USE_ITER) |
300 | /* no for loop, no unrolling */ |
301 | #elif defined(NO_UNROLL) |
302 | __pragma_loop_no_unroll |
303 | #elif defined(UNROLL_COUNT) |
304 | __pragma_loop_unroll_count(UNROLL_COUNT) |
305 | #else |
306 | __pragma_loop_unroll_full |
307 | #endif /* NO_UNROLL */ |
308 | /* Unwind python stack */ |
309 | #ifdef USE_ITER |
310 | int i; |
311 | bpf_for(i, 0, STACK_MAX_LEN) { |
312 | #else /* !USE_ITER */ |
313 | for (int i = 0; i < STACK_MAX_LEN; ++i) { |
314 | #endif |
315 | if (frame_ptr && get_frame_data(frame_ptr, pidData, frame: &frame, symbol: &sym)) { |
316 | int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; |
317 | int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); |
318 | if (!symbol_id) { |
319 | bpf_map_update_elem(&symbolmap, &sym, &zero, 0); |
320 | symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); |
321 | if (!symbol_id) |
322 | return 0; |
323 | } |
324 | if (*symbol_id == new_symbol_id) |
325 | (*symbol_counter)++; |
326 | event->stack[i] = *symbol_id; |
327 | event->stack_len = i + 1; |
328 | frame_ptr = frame.f_back; |
329 | } |
330 | } |
331 | #endif /* USE_BPF_LOOP */ |
332 | event->stack_complete = frame_ptr == NULL; |
333 | } else { |
334 | event->stack_complete = 1; |
335 | } |
336 | |
337 | Stats* stats = bpf_map_lookup_elem(&statsmap, &zero); |
338 | if (stats) |
339 | stats->success++; |
340 | |
341 | event->has_meta = 0; |
342 | bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata)); |
343 | return 0; |
344 | } |
345 | |
346 | SEC("raw_tracepoint/kfree_skb" ) |
347 | int on_event(struct bpf_raw_tracepoint_args* ctx) |
348 | { |
349 | int ret = 0; |
350 | ret |= __on_event(ctx); |
351 | ret |= __on_event(ctx); |
352 | ret |= __on_event(ctx); |
353 | ret |= __on_event(ctx); |
354 | ret |= __on_event(ctx); |
355 | return ret; |
356 | } |
357 | |
358 | char _license[] SEC("license" ) = "GPL" ; |
359 | |