1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 2021 VMware Inc, Steven Rostedt <rostedt@goodmis.org> |
4 | */ |
5 | #include <linux/spinlock.h> |
6 | #include <linux/irq_work.h> |
7 | #include <linux/slab.h> |
8 | #include "trace.h" |
9 | |
10 | /* See pid_list.h for details */ |
11 | |
12 | static inline union lower_chunk *get_lower_chunk(struct trace_pid_list *pid_list) |
13 | { |
14 | union lower_chunk *chunk; |
15 | |
16 | lockdep_assert_held(&pid_list->lock); |
17 | |
18 | if (!pid_list->lower_list) |
19 | return NULL; |
20 | |
21 | chunk = pid_list->lower_list; |
22 | pid_list->lower_list = chunk->next; |
23 | pid_list->free_lower_chunks--; |
24 | WARN_ON_ONCE(pid_list->free_lower_chunks < 0); |
25 | chunk->next = NULL; |
26 | /* |
27 | * If a refill needs to happen, it can not happen here |
28 | * as the scheduler run queue locks are held. |
29 | */ |
30 | if (pid_list->free_lower_chunks <= CHUNK_REALLOC) |
31 | irq_work_queue(work: &pid_list->refill_irqwork); |
32 | |
33 | return chunk; |
34 | } |
35 | |
36 | static inline union upper_chunk *get_upper_chunk(struct trace_pid_list *pid_list) |
37 | { |
38 | union upper_chunk *chunk; |
39 | |
40 | lockdep_assert_held(&pid_list->lock); |
41 | |
42 | if (!pid_list->upper_list) |
43 | return NULL; |
44 | |
45 | chunk = pid_list->upper_list; |
46 | pid_list->upper_list = chunk->next; |
47 | pid_list->free_upper_chunks--; |
48 | WARN_ON_ONCE(pid_list->free_upper_chunks < 0); |
49 | chunk->next = NULL; |
50 | /* |
51 | * If a refill needs to happen, it can not happen here |
52 | * as the scheduler run queue locks are held. |
53 | */ |
54 | if (pid_list->free_upper_chunks <= CHUNK_REALLOC) |
55 | irq_work_queue(work: &pid_list->refill_irqwork); |
56 | |
57 | return chunk; |
58 | } |
59 | |
60 | static inline void put_lower_chunk(struct trace_pid_list *pid_list, |
61 | union lower_chunk *chunk) |
62 | { |
63 | lockdep_assert_held(&pid_list->lock); |
64 | |
65 | chunk->next = pid_list->lower_list; |
66 | pid_list->lower_list = chunk; |
67 | pid_list->free_lower_chunks++; |
68 | } |
69 | |
70 | static inline void put_upper_chunk(struct trace_pid_list *pid_list, |
71 | union upper_chunk *chunk) |
72 | { |
73 | lockdep_assert_held(&pid_list->lock); |
74 | |
75 | chunk->next = pid_list->upper_list; |
76 | pid_list->upper_list = chunk; |
77 | pid_list->free_upper_chunks++; |
78 | } |
79 | |
80 | static inline bool upper_empty(union upper_chunk *chunk) |
81 | { |
82 | /* |
83 | * If chunk->data has no lower chunks, it will be the same |
84 | * as a zeroed bitmask. Use find_first_bit() to test it |
85 | * and if it doesn't find any bits set, then the array |
86 | * is empty. |
87 | */ |
88 | int bit = find_first_bit(addr: (unsigned long *)chunk->data, |
89 | size: sizeof(chunk->data) * 8); |
90 | return bit >= sizeof(chunk->data) * 8; |
91 | } |
92 | |
93 | static inline int pid_split(unsigned int pid, unsigned int *upper1, |
94 | unsigned int *upper2, unsigned int *lower) |
95 | { |
96 | /* MAX_PID should cover all pids */ |
97 | BUILD_BUG_ON(MAX_PID < PID_MAX_LIMIT); |
98 | |
99 | /* In case a bad pid is passed in, then fail */ |
100 | if (unlikely(pid >= MAX_PID)) |
101 | return -1; |
102 | |
103 | *upper1 = (pid >> UPPER1_SHIFT) & UPPER_MASK; |
104 | *upper2 = (pid >> UPPER2_SHIFT) & UPPER_MASK; |
105 | *lower = pid & LOWER_MASK; |
106 | |
107 | return 0; |
108 | } |
109 | |
110 | static inline unsigned int pid_join(unsigned int upper1, |
111 | unsigned int upper2, unsigned int lower) |
112 | { |
113 | return ((upper1 & UPPER_MASK) << UPPER1_SHIFT) | |
114 | ((upper2 & UPPER_MASK) << UPPER2_SHIFT) | |
115 | (lower & LOWER_MASK); |
116 | } |
117 | |
118 | /** |
119 | * trace_pid_list_is_set - test if the pid is set in the list |
120 | * @pid_list: The pid list to test |
121 | * @pid: The pid to see if set in the list. |
122 | * |
123 | * Tests if @pid is set in the @pid_list. This is usually called |
124 | * from the scheduler when a task is scheduled. Its pid is checked |
125 | * if it should be traced or not. |
126 | * |
127 | * Return true if the pid is in the list, false otherwise. |
128 | */ |
129 | bool trace_pid_list_is_set(struct trace_pid_list *pid_list, unsigned int pid) |
130 | { |
131 | union upper_chunk *upper_chunk; |
132 | union lower_chunk *lower_chunk; |
133 | unsigned long flags; |
134 | unsigned int upper1; |
135 | unsigned int upper2; |
136 | unsigned int lower; |
137 | bool ret = false; |
138 | |
139 | if (!pid_list) |
140 | return false; |
141 | |
142 | if (pid_split(pid, upper1: &upper1, upper2: &upper2, lower: &lower) < 0) |
143 | return false; |
144 | |
145 | raw_spin_lock_irqsave(&pid_list->lock, flags); |
146 | upper_chunk = pid_list->upper[upper1]; |
147 | if (upper_chunk) { |
148 | lower_chunk = upper_chunk->data[upper2]; |
149 | if (lower_chunk) |
150 | ret = test_bit(lower, lower_chunk->data); |
151 | } |
152 | raw_spin_unlock_irqrestore(&pid_list->lock, flags); |
153 | |
154 | return ret; |
155 | } |
156 | |
157 | /** |
158 | * trace_pid_list_set - add a pid to the list |
159 | * @pid_list: The pid list to add the @pid to. |
160 | * @pid: The pid to add. |
161 | * |
162 | * Adds @pid to @pid_list. This is usually done explicitly by a user |
163 | * adding a task to be traced, or indirectly by the fork function |
164 | * when children should be traced and a task's pid is in the list. |
165 | * |
166 | * Return 0 on success, negative otherwise. |
167 | */ |
168 | int trace_pid_list_set(struct trace_pid_list *pid_list, unsigned int pid) |
169 | { |
170 | union upper_chunk *upper_chunk; |
171 | union lower_chunk *lower_chunk; |
172 | unsigned long flags; |
173 | unsigned int upper1; |
174 | unsigned int upper2; |
175 | unsigned int lower; |
176 | int ret; |
177 | |
178 | if (!pid_list) |
179 | return -ENODEV; |
180 | |
181 | if (pid_split(pid, upper1: &upper1, upper2: &upper2, lower: &lower) < 0) |
182 | return -EINVAL; |
183 | |
184 | raw_spin_lock_irqsave(&pid_list->lock, flags); |
185 | upper_chunk = pid_list->upper[upper1]; |
186 | if (!upper_chunk) { |
187 | upper_chunk = get_upper_chunk(pid_list); |
188 | if (!upper_chunk) { |
189 | ret = -ENOMEM; |
190 | goto out; |
191 | } |
192 | pid_list->upper[upper1] = upper_chunk; |
193 | } |
194 | lower_chunk = upper_chunk->data[upper2]; |
195 | if (!lower_chunk) { |
196 | lower_chunk = get_lower_chunk(pid_list); |
197 | if (!lower_chunk) { |
198 | ret = -ENOMEM; |
199 | goto out; |
200 | } |
201 | upper_chunk->data[upper2] = lower_chunk; |
202 | } |
203 | set_bit(nr: lower, addr: lower_chunk->data); |
204 | ret = 0; |
205 | out: |
206 | raw_spin_unlock_irqrestore(&pid_list->lock, flags); |
207 | return ret; |
208 | } |
209 | |
210 | /** |
211 | * trace_pid_list_clear - remove a pid from the list |
212 | * @pid_list: The pid list to remove the @pid from. |
213 | * @pid: The pid to remove. |
214 | * |
215 | * Removes @pid from @pid_list. This is usually done explicitly by a user |
216 | * removing tasks from tracing, or indirectly by the exit function |
217 | * when a task that is set to be traced exits. |
218 | * |
219 | * Return 0 on success, negative otherwise. |
220 | */ |
221 | int trace_pid_list_clear(struct trace_pid_list *pid_list, unsigned int pid) |
222 | { |
223 | union upper_chunk *upper_chunk; |
224 | union lower_chunk *lower_chunk; |
225 | unsigned long flags; |
226 | unsigned int upper1; |
227 | unsigned int upper2; |
228 | unsigned int lower; |
229 | |
230 | if (!pid_list) |
231 | return -ENODEV; |
232 | |
233 | if (pid_split(pid, upper1: &upper1, upper2: &upper2, lower: &lower) < 0) |
234 | return -EINVAL; |
235 | |
236 | raw_spin_lock_irqsave(&pid_list->lock, flags); |
237 | upper_chunk = pid_list->upper[upper1]; |
238 | if (!upper_chunk) |
239 | goto out; |
240 | |
241 | lower_chunk = upper_chunk->data[upper2]; |
242 | if (!lower_chunk) |
243 | goto out; |
244 | |
245 | clear_bit(nr: lower, addr: lower_chunk->data); |
246 | |
247 | /* if there's no more bits set, add it to the free list */ |
248 | if (find_first_bit(addr: lower_chunk->data, LOWER_MAX) >= LOWER_MAX) { |
249 | put_lower_chunk(pid_list, chunk: lower_chunk); |
250 | upper_chunk->data[upper2] = NULL; |
251 | if (upper_empty(chunk: upper_chunk)) { |
252 | put_upper_chunk(pid_list, chunk: upper_chunk); |
253 | pid_list->upper[upper1] = NULL; |
254 | } |
255 | } |
256 | out: |
257 | raw_spin_unlock_irqrestore(&pid_list->lock, flags); |
258 | return 0; |
259 | } |
260 | |
261 | /** |
262 | * trace_pid_list_next - return the next pid in the list |
263 | * @pid_list: The pid list to examine. |
264 | * @pid: The pid to start from |
265 | * @next: The pointer to place the pid that is set starting from @pid. |
266 | * |
267 | * Looks for the next consecutive pid that is in @pid_list starting |
268 | * at the pid specified by @pid. If one is set (including @pid), then |
269 | * that pid is placed into @next. |
270 | * |
271 | * Return 0 when a pid is found, -1 if there are no more pids included. |
272 | */ |
273 | int trace_pid_list_next(struct trace_pid_list *pid_list, unsigned int pid, |
274 | unsigned int *next) |
275 | { |
276 | union upper_chunk *upper_chunk; |
277 | union lower_chunk *lower_chunk; |
278 | unsigned long flags; |
279 | unsigned int upper1; |
280 | unsigned int upper2; |
281 | unsigned int lower; |
282 | |
283 | if (!pid_list) |
284 | return -ENODEV; |
285 | |
286 | if (pid_split(pid, upper1: &upper1, upper2: &upper2, lower: &lower) < 0) |
287 | return -EINVAL; |
288 | |
289 | raw_spin_lock_irqsave(&pid_list->lock, flags); |
290 | for (; upper1 <= UPPER_MASK; upper1++, upper2 = 0) { |
291 | upper_chunk = pid_list->upper[upper1]; |
292 | |
293 | if (!upper_chunk) |
294 | continue; |
295 | |
296 | for (; upper2 <= UPPER_MASK; upper2++, lower = 0) { |
297 | lower_chunk = upper_chunk->data[upper2]; |
298 | if (!lower_chunk) |
299 | continue; |
300 | |
301 | lower = find_next_bit(addr: lower_chunk->data, LOWER_MAX, |
302 | offset: lower); |
303 | if (lower < LOWER_MAX) |
304 | goto found; |
305 | } |
306 | } |
307 | |
308 | found: |
309 | raw_spin_unlock_irqrestore(&pid_list->lock, flags); |
310 | if (upper1 > UPPER_MASK) |
311 | return -1; |
312 | |
313 | *next = pid_join(upper1, upper2, lower); |
314 | return 0; |
315 | } |
316 | |
317 | /** |
318 | * trace_pid_list_first - return the first pid in the list |
319 | * @pid_list: The pid list to examine. |
320 | * @pid: The pointer to place the pid first found pid that is set. |
321 | * |
322 | * Looks for the first pid that is set in @pid_list, and places it |
323 | * into @pid if found. |
324 | * |
325 | * Return 0 when a pid is found, -1 if there are no pids set. |
326 | */ |
327 | int trace_pid_list_first(struct trace_pid_list *pid_list, unsigned int *pid) |
328 | { |
329 | return trace_pid_list_next(pid_list, pid: 0, next: pid); |
330 | } |
331 | |
332 | static void pid_list_refill_irq(struct irq_work *iwork) |
333 | { |
334 | struct trace_pid_list *pid_list = container_of(iwork, struct trace_pid_list, |
335 | refill_irqwork); |
336 | union upper_chunk *upper = NULL; |
337 | union lower_chunk *lower = NULL; |
338 | union upper_chunk **upper_next = &upper; |
339 | union lower_chunk **lower_next = &lower; |
340 | int upper_count; |
341 | int lower_count; |
342 | int ucnt = 0; |
343 | int lcnt = 0; |
344 | |
345 | again: |
346 | raw_spin_lock(&pid_list->lock); |
347 | upper_count = CHUNK_ALLOC - pid_list->free_upper_chunks; |
348 | lower_count = CHUNK_ALLOC - pid_list->free_lower_chunks; |
349 | raw_spin_unlock(&pid_list->lock); |
350 | |
351 | if (upper_count <= 0 && lower_count <= 0) |
352 | return; |
353 | |
354 | while (upper_count-- > 0) { |
355 | union upper_chunk *chunk; |
356 | |
357 | chunk = kzalloc(size: sizeof(*chunk), GFP_KERNEL); |
358 | if (!chunk) |
359 | break; |
360 | *upper_next = chunk; |
361 | upper_next = &chunk->next; |
362 | ucnt++; |
363 | } |
364 | |
365 | while (lower_count-- > 0) { |
366 | union lower_chunk *chunk; |
367 | |
368 | chunk = kzalloc(size: sizeof(*chunk), GFP_KERNEL); |
369 | if (!chunk) |
370 | break; |
371 | *lower_next = chunk; |
372 | lower_next = &chunk->next; |
373 | lcnt++; |
374 | } |
375 | |
376 | raw_spin_lock(&pid_list->lock); |
377 | if (upper) { |
378 | *upper_next = pid_list->upper_list; |
379 | pid_list->upper_list = upper; |
380 | pid_list->free_upper_chunks += ucnt; |
381 | } |
382 | if (lower) { |
383 | *lower_next = pid_list->lower_list; |
384 | pid_list->lower_list = lower; |
385 | pid_list->free_lower_chunks += lcnt; |
386 | } |
387 | raw_spin_unlock(&pid_list->lock); |
388 | |
389 | /* |
390 | * On success of allocating all the chunks, both counters |
391 | * will be less than zero. If they are not, then an allocation |
392 | * failed, and we should not try again. |
393 | */ |
394 | if (upper_count >= 0 || lower_count >= 0) |
395 | return; |
396 | /* |
397 | * When the locks were released, free chunks could have |
398 | * been used and allocation needs to be done again. Might as |
399 | * well allocate it now. |
400 | */ |
401 | goto again; |
402 | } |
403 | |
404 | /** |
405 | * trace_pid_list_alloc - create a new pid_list |
406 | * |
407 | * Allocates a new pid_list to store pids into. |
408 | * |
409 | * Returns the pid_list on success, NULL otherwise. |
410 | */ |
411 | struct trace_pid_list *trace_pid_list_alloc(void) |
412 | { |
413 | struct trace_pid_list *pid_list; |
414 | int i; |
415 | |
416 | /* According to linux/thread.h, pids can be no bigger that 30 bits */ |
417 | WARN_ON_ONCE(pid_max > (1 << 30)); |
418 | |
419 | pid_list = kzalloc(size: sizeof(*pid_list), GFP_KERNEL); |
420 | if (!pid_list) |
421 | return NULL; |
422 | |
423 | init_irq_work(work: &pid_list->refill_irqwork, func: pid_list_refill_irq); |
424 | |
425 | raw_spin_lock_init(&pid_list->lock); |
426 | |
427 | for (i = 0; i < CHUNK_ALLOC; i++) { |
428 | union upper_chunk *chunk; |
429 | |
430 | chunk = kzalloc(size: sizeof(*chunk), GFP_KERNEL); |
431 | if (!chunk) |
432 | break; |
433 | chunk->next = pid_list->upper_list; |
434 | pid_list->upper_list = chunk; |
435 | pid_list->free_upper_chunks++; |
436 | } |
437 | |
438 | for (i = 0; i < CHUNK_ALLOC; i++) { |
439 | union lower_chunk *chunk; |
440 | |
441 | chunk = kzalloc(size: sizeof(*chunk), GFP_KERNEL); |
442 | if (!chunk) |
443 | break; |
444 | chunk->next = pid_list->lower_list; |
445 | pid_list->lower_list = chunk; |
446 | pid_list->free_lower_chunks++; |
447 | } |
448 | |
449 | return pid_list; |
450 | } |
451 | |
452 | /** |
453 | * trace_pid_list_free - Frees an allocated pid_list. |
454 | * |
455 | * Frees the memory for a pid_list that was allocated. |
456 | */ |
457 | void trace_pid_list_free(struct trace_pid_list *pid_list) |
458 | { |
459 | union upper_chunk *upper; |
460 | union lower_chunk *lower; |
461 | int i, j; |
462 | |
463 | if (!pid_list) |
464 | return; |
465 | |
466 | irq_work_sync(work: &pid_list->refill_irqwork); |
467 | |
468 | while (pid_list->lower_list) { |
469 | union lower_chunk *chunk; |
470 | |
471 | chunk = pid_list->lower_list; |
472 | pid_list->lower_list = pid_list->lower_list->next; |
473 | kfree(objp: chunk); |
474 | } |
475 | |
476 | while (pid_list->upper_list) { |
477 | union upper_chunk *chunk; |
478 | |
479 | chunk = pid_list->upper_list; |
480 | pid_list->upper_list = pid_list->upper_list->next; |
481 | kfree(objp: chunk); |
482 | } |
483 | |
484 | for (i = 0; i < UPPER1_SIZE; i++) { |
485 | upper = pid_list->upper[i]; |
486 | if (upper) { |
487 | for (j = 0; j < UPPER2_SIZE; j++) { |
488 | lower = upper->data[j]; |
489 | kfree(objp: lower); |
490 | } |
491 | kfree(objp: upper); |
492 | } |
493 | } |
494 | kfree(objp: pid_list); |
495 | } |
496 | |