1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* |
3 | * Copyright (C) 2007 Alan Stern |
4 | * Copyright (C) IBM Corporation, 2009 |
5 | * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com> |
6 | * |
7 | * Thanks to Ingo Molnar for his many suggestions. |
8 | * |
9 | * Authors: Alan Stern <stern@rowland.harvard.edu> |
10 | * K.Prasad <prasad@linux.vnet.ibm.com> |
11 | * Frederic Weisbecker <fweisbec@gmail.com> |
12 | */ |
13 | |
14 | /* |
15 | * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, |
16 | * using the CPU's debug registers. |
17 | * This file contains the arch-independent routines. |
18 | */ |
19 | |
20 | #include <linux/hw_breakpoint.h> |
21 | |
22 | #include <linux/atomic.h> |
23 | #include <linux/bug.h> |
24 | #include <linux/cpu.h> |
25 | #include <linux/export.h> |
26 | #include <linux/init.h> |
27 | #include <linux/irqflags.h> |
28 | #include <linux/kdebug.h> |
29 | #include <linux/kernel.h> |
30 | #include <linux/mutex.h> |
31 | #include <linux/notifier.h> |
32 | #include <linux/percpu-rwsem.h> |
33 | #include <linux/percpu.h> |
34 | #include <linux/rhashtable.h> |
35 | #include <linux/sched.h> |
36 | #include <linux/slab.h> |
37 | |
38 | /* |
39 | * Datastructure to track the total uses of N slots across tasks or CPUs; |
40 | * bp_slots_histogram::count[N] is the number of assigned N+1 breakpoint slots. |
41 | */ |
42 | struct bp_slots_histogram { |
43 | #ifdef hw_breakpoint_slots |
44 | atomic_t count[hw_breakpoint_slots(0)]; |
45 | #else |
46 | atomic_t *count; |
47 | #endif |
48 | }; |
49 | |
50 | /* |
51 | * Per-CPU constraints data. |
52 | */ |
53 | struct bp_cpuinfo { |
54 | /* Number of pinned CPU breakpoints in a CPU. */ |
55 | unsigned int cpu_pinned; |
56 | /* Histogram of pinned task breakpoints in a CPU. */ |
57 | struct bp_slots_histogram tsk_pinned; |
58 | }; |
59 | |
60 | static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]); |
61 | |
62 | static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type) |
63 | { |
64 | return per_cpu_ptr(bp_cpuinfo + type, cpu); |
65 | } |
66 | |
67 | /* Number of pinned CPU breakpoints globally. */ |
68 | static struct bp_slots_histogram cpu_pinned[TYPE_MAX]; |
69 | /* Number of pinned CPU-independent task breakpoints. */ |
70 | static struct bp_slots_histogram tsk_pinned_all[TYPE_MAX]; |
71 | |
72 | /* Keep track of the breakpoints attached to tasks */ |
73 | static struct rhltable task_bps_ht; |
74 | static const struct rhashtable_params task_bps_ht_params = { |
75 | .head_offset = offsetof(struct hw_perf_event, bp_list), |
76 | .key_offset = offsetof(struct hw_perf_event, target), |
77 | .key_len = sizeof_field(struct hw_perf_event, target), |
78 | .automatic_shrinking = true, |
79 | }; |
80 | |
81 | static bool constraints_initialized __ro_after_init; |
82 | |
83 | /* |
84 | * Synchronizes accesses to the per-CPU constraints; the locking rules are: |
85 | * |
86 | * 1. Atomic updates to bp_cpuinfo::tsk_pinned only require a held read-lock |
87 | * (due to bp_slots_histogram::count being atomic, no update are lost). |
88 | * |
89 | * 2. Holding a write-lock is required for computations that require a |
90 | * stable snapshot of all bp_cpuinfo::tsk_pinned. |
91 | * |
92 | * 3. In all other cases, non-atomic accesses require the appropriately held |
93 | * lock (read-lock for read-only accesses; write-lock for reads/writes). |
94 | */ |
95 | DEFINE_STATIC_PERCPU_RWSEM(bp_cpuinfo_sem); |
96 | |
97 | /* |
98 | * Return mutex to serialize accesses to per-task lists in task_bps_ht. Since |
99 | * rhltable synchronizes concurrent insertions/deletions, independent tasks may |
100 | * insert/delete concurrently; therefore, a mutex per task is sufficient. |
101 | * |
102 | * Uses task_struct::perf_event_mutex, to avoid extending task_struct with a |
103 | * hw_breakpoint-only mutex, which may be infrequently used. The caveat here is |
104 | * that hw_breakpoint may contend with per-task perf event list management. The |
105 | * assumption is that perf usecases involving hw_breakpoints are very unlikely |
106 | * to result in unnecessary contention. |
107 | */ |
108 | static inline struct mutex *get_task_bps_mutex(struct perf_event *bp) |
109 | { |
110 | struct task_struct *tsk = bp->hw.target; |
111 | |
112 | return tsk ? &tsk->perf_event_mutex : NULL; |
113 | } |
114 | |
115 | static struct mutex *bp_constraints_lock(struct perf_event *bp) |
116 | { |
117 | struct mutex *tsk_mtx = get_task_bps_mutex(bp); |
118 | |
119 | if (tsk_mtx) { |
120 | /* |
121 | * Fully analogous to the perf_try_init_event() nesting |
122 | * argument in the comment near perf_event_ctx_lock_nested(); |
123 | * this child->perf_event_mutex cannot ever deadlock against |
124 | * the parent->perf_event_mutex usage from |
125 | * perf_event_task_{en,dis}able(). |
126 | * |
127 | * Specifically, inherited events will never occur on |
128 | * ->perf_event_list. |
129 | */ |
130 | mutex_lock_nested(lock: tsk_mtx, SINGLE_DEPTH_NESTING); |
131 | percpu_down_read(sem: &bp_cpuinfo_sem); |
132 | } else { |
133 | percpu_down_write(&bp_cpuinfo_sem); |
134 | } |
135 | |
136 | return tsk_mtx; |
137 | } |
138 | |
139 | static void bp_constraints_unlock(struct mutex *tsk_mtx) |
140 | { |
141 | if (tsk_mtx) { |
142 | percpu_up_read(sem: &bp_cpuinfo_sem); |
143 | mutex_unlock(lock: tsk_mtx); |
144 | } else { |
145 | percpu_up_write(&bp_cpuinfo_sem); |
146 | } |
147 | } |
148 | |
149 | static bool bp_constraints_is_locked(struct perf_event *bp) |
150 | { |
151 | struct mutex *tsk_mtx = get_task_bps_mutex(bp); |
152 | |
153 | return percpu_is_write_locked(sem: &bp_cpuinfo_sem) || |
154 | (tsk_mtx ? mutex_is_locked(lock: tsk_mtx) : |
155 | percpu_is_read_locked(&bp_cpuinfo_sem)); |
156 | } |
157 | |
158 | static inline void assert_bp_constraints_lock_held(struct perf_event *bp) |
159 | { |
160 | struct mutex *tsk_mtx = get_task_bps_mutex(bp); |
161 | |
162 | if (tsk_mtx) |
163 | lockdep_assert_held(tsk_mtx); |
164 | lockdep_assert_held(&bp_cpuinfo_sem); |
165 | } |
166 | |
167 | #ifdef hw_breakpoint_slots |
168 | /* |
169 | * Number of breakpoint slots is constant, and the same for all types. |
170 | */ |
171 | static_assert(hw_breakpoint_slots(TYPE_INST) == hw_breakpoint_slots(TYPE_DATA)); |
172 | static inline int hw_breakpoint_slots_cached(int type) { return hw_breakpoint_slots(type); } |
173 | static inline int init_breakpoint_slots(void) { return 0; } |
174 | #else |
175 | /* |
176 | * Dynamic number of breakpoint slots. |
177 | */ |
178 | static int __nr_bp_slots[TYPE_MAX] __ro_after_init; |
179 | |
180 | static inline int hw_breakpoint_slots_cached(int type) |
181 | { |
182 | return __nr_bp_slots[type]; |
183 | } |
184 | |
185 | static __init bool |
186 | bp_slots_histogram_alloc(struct bp_slots_histogram *hist, enum bp_type_idx type) |
187 | { |
188 | hist->count = kcalloc(hw_breakpoint_slots_cached(type), sizeof(*hist->count), GFP_KERNEL); |
189 | return hist->count; |
190 | } |
191 | |
192 | static __init void bp_slots_histogram_free(struct bp_slots_histogram *hist) |
193 | { |
194 | kfree(hist->count); |
195 | } |
196 | |
197 | static __init int init_breakpoint_slots(void) |
198 | { |
199 | int i, cpu, err_cpu; |
200 | |
201 | for (i = 0; i < TYPE_MAX; i++) |
202 | __nr_bp_slots[i] = hw_breakpoint_slots(i); |
203 | |
204 | for_each_possible_cpu(cpu) { |
205 | for (i = 0; i < TYPE_MAX; i++) { |
206 | struct bp_cpuinfo *info = get_bp_info(cpu, i); |
207 | |
208 | if (!bp_slots_histogram_alloc(&info->tsk_pinned, i)) |
209 | goto err; |
210 | } |
211 | } |
212 | for (i = 0; i < TYPE_MAX; i++) { |
213 | if (!bp_slots_histogram_alloc(&cpu_pinned[i], i)) |
214 | goto err; |
215 | if (!bp_slots_histogram_alloc(&tsk_pinned_all[i], i)) |
216 | goto err; |
217 | } |
218 | |
219 | return 0; |
220 | err: |
221 | for_each_possible_cpu(err_cpu) { |
222 | for (i = 0; i < TYPE_MAX; i++) |
223 | bp_slots_histogram_free(&get_bp_info(err_cpu, i)->tsk_pinned); |
224 | if (err_cpu == cpu) |
225 | break; |
226 | } |
227 | for (i = 0; i < TYPE_MAX; i++) { |
228 | bp_slots_histogram_free(&cpu_pinned[i]); |
229 | bp_slots_histogram_free(&tsk_pinned_all[i]); |
230 | } |
231 | |
232 | return -ENOMEM; |
233 | } |
234 | #endif |
235 | |
236 | static inline void |
237 | bp_slots_histogram_add(struct bp_slots_histogram *hist, int old, int val) |
238 | { |
239 | const int old_idx = old - 1; |
240 | const int new_idx = old_idx + val; |
241 | |
242 | if (old_idx >= 0) |
243 | WARN_ON(atomic_dec_return_relaxed(&hist->count[old_idx]) < 0); |
244 | if (new_idx >= 0) |
245 | WARN_ON(atomic_inc_return_relaxed(&hist->count[new_idx]) < 0); |
246 | } |
247 | |
248 | static int |
249 | bp_slots_histogram_max(struct bp_slots_histogram *hist, enum bp_type_idx type) |
250 | { |
251 | for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) { |
252 | const int count = atomic_read(v: &hist->count[i]); |
253 | |
254 | /* Catch unexpected writers; we want a stable snapshot. */ |
255 | ASSERT_EXCLUSIVE_WRITER(hist->count[i]); |
256 | if (count > 0) |
257 | return i + 1; |
258 | WARN(count < 0, "inconsistent breakpoint slots histogram" ); |
259 | } |
260 | |
261 | return 0; |
262 | } |
263 | |
264 | static int |
265 | bp_slots_histogram_max_merge(struct bp_slots_histogram *hist1, struct bp_slots_histogram *hist2, |
266 | enum bp_type_idx type) |
267 | { |
268 | for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) { |
269 | const int count1 = atomic_read(v: &hist1->count[i]); |
270 | const int count2 = atomic_read(v: &hist2->count[i]); |
271 | |
272 | /* Catch unexpected writers; we want a stable snapshot. */ |
273 | ASSERT_EXCLUSIVE_WRITER(hist1->count[i]); |
274 | ASSERT_EXCLUSIVE_WRITER(hist2->count[i]); |
275 | if (count1 + count2 > 0) |
276 | return i + 1; |
277 | WARN(count1 < 0, "inconsistent breakpoint slots histogram" ); |
278 | WARN(count2 < 0, "inconsistent breakpoint slots histogram" ); |
279 | } |
280 | |
281 | return 0; |
282 | } |
283 | |
284 | #ifndef hw_breakpoint_weight |
285 | static inline int hw_breakpoint_weight(struct perf_event *bp) |
286 | { |
287 | return 1; |
288 | } |
289 | #endif |
290 | |
291 | static inline enum bp_type_idx find_slot_idx(u64 bp_type) |
292 | { |
293 | if (bp_type & HW_BREAKPOINT_RW) |
294 | return TYPE_DATA; |
295 | |
296 | return TYPE_INST; |
297 | } |
298 | |
299 | /* |
300 | * Return the maximum number of pinned breakpoints a task has in this CPU. |
301 | */ |
302 | static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) |
303 | { |
304 | struct bp_slots_histogram *tsk_pinned = &get_bp_info(cpu, type)->tsk_pinned; |
305 | |
306 | /* |
307 | * At this point we want to have acquired the bp_cpuinfo_sem as a |
308 | * writer to ensure that there are no concurrent writers in |
309 | * toggle_bp_task_slot() to tsk_pinned, and we get a stable snapshot. |
310 | */ |
311 | lockdep_assert_held_write(&bp_cpuinfo_sem); |
312 | return bp_slots_histogram_max_merge(hist1: tsk_pinned, hist2: &tsk_pinned_all[type], type); |
313 | } |
314 | |
315 | /* |
316 | * Count the number of breakpoints of the same type and same task. |
317 | * The given event must be not on the list. |
318 | * |
319 | * If @cpu is -1, but the result of task_bp_pinned() is not CPU-independent, |
320 | * returns a negative value. |
321 | */ |
322 | static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type) |
323 | { |
324 | struct rhlist_head *head, *pos; |
325 | struct perf_event *iter; |
326 | int count = 0; |
327 | |
328 | /* |
329 | * We need a stable snapshot of the per-task breakpoint list. |
330 | */ |
331 | assert_bp_constraints_lock_held(bp); |
332 | |
333 | rcu_read_lock(); |
334 | head = rhltable_lookup(hlt: &task_bps_ht, key: &bp->hw.target, params: task_bps_ht_params); |
335 | if (!head) |
336 | goto out; |
337 | |
338 | rhl_for_each_entry_rcu(iter, pos, head, hw.bp_list) { |
339 | if (find_slot_idx(bp_type: iter->attr.bp_type) != type) |
340 | continue; |
341 | |
342 | if (iter->cpu >= 0) { |
343 | if (cpu == -1) { |
344 | count = -1; |
345 | goto out; |
346 | } else if (cpu != iter->cpu) |
347 | continue; |
348 | } |
349 | |
350 | count += hw_breakpoint_weight(bp: iter); |
351 | } |
352 | |
353 | out: |
354 | rcu_read_unlock(); |
355 | return count; |
356 | } |
357 | |
358 | static const struct cpumask *cpumask_of_bp(struct perf_event *bp) |
359 | { |
360 | if (bp->cpu >= 0) |
361 | return cpumask_of(bp->cpu); |
362 | return cpu_possible_mask; |
363 | } |
364 | |
365 | /* |
366 | * Returns the max pinned breakpoint slots in a given |
367 | * CPU (cpu > -1) or across all of them (cpu = -1). |
368 | */ |
369 | static int |
370 | max_bp_pinned_slots(struct perf_event *bp, enum bp_type_idx type) |
371 | { |
372 | const struct cpumask *cpumask = cpumask_of_bp(bp); |
373 | int pinned_slots = 0; |
374 | int cpu; |
375 | |
376 | if (bp->hw.target && bp->cpu < 0) { |
377 | int max_pinned = task_bp_pinned(cpu: -1, bp, type); |
378 | |
379 | if (max_pinned >= 0) { |
380 | /* |
381 | * Fast path: task_bp_pinned() is CPU-independent and |
382 | * returns the same value for any CPU. |
383 | */ |
384 | max_pinned += bp_slots_histogram_max(hist: &cpu_pinned[type], type); |
385 | return max_pinned; |
386 | } |
387 | } |
388 | |
389 | for_each_cpu(cpu, cpumask) { |
390 | struct bp_cpuinfo *info = get_bp_info(cpu, type); |
391 | int nr; |
392 | |
393 | nr = info->cpu_pinned; |
394 | if (!bp->hw.target) |
395 | nr += max_task_bp_pinned(cpu, type); |
396 | else |
397 | nr += task_bp_pinned(cpu, bp, type); |
398 | |
399 | pinned_slots = max(nr, pinned_slots); |
400 | } |
401 | |
402 | return pinned_slots; |
403 | } |
404 | |
405 | /* |
406 | * Add/remove the given breakpoint in our constraint table |
407 | */ |
408 | static int |
409 | toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, int weight) |
410 | { |
411 | int cpu, next_tsk_pinned; |
412 | |
413 | if (!enable) |
414 | weight = -weight; |
415 | |
416 | if (!bp->hw.target) { |
417 | /* |
418 | * Update the pinned CPU slots, in per-CPU bp_cpuinfo and in the |
419 | * global histogram. |
420 | */ |
421 | struct bp_cpuinfo *info = get_bp_info(cpu: bp->cpu, type); |
422 | |
423 | lockdep_assert_held_write(&bp_cpuinfo_sem); |
424 | bp_slots_histogram_add(hist: &cpu_pinned[type], old: info->cpu_pinned, val: weight); |
425 | info->cpu_pinned += weight; |
426 | return 0; |
427 | } |
428 | |
429 | /* |
430 | * If bp->hw.target, tsk_pinned is only modified, but not used |
431 | * otherwise. We can permit concurrent updates as long as there are no |
432 | * other uses: having acquired bp_cpuinfo_sem as a reader allows |
433 | * concurrent updates here. Uses of tsk_pinned will require acquiring |
434 | * bp_cpuinfo_sem as a writer to stabilize tsk_pinned's value. |
435 | */ |
436 | lockdep_assert_held_read(&bp_cpuinfo_sem); |
437 | |
438 | /* |
439 | * Update the pinned task slots, in per-CPU bp_cpuinfo and in the global |
440 | * histogram. We need to take care of 4 cases: |
441 | * |
442 | * 1. This breakpoint targets all CPUs (cpu < 0), and there may only |
443 | * exist other task breakpoints targeting all CPUs. In this case we |
444 | * can simply update the global slots histogram. |
445 | * |
446 | * 2. This breakpoint targets a specific CPU (cpu >= 0), but there may |
447 | * only exist other task breakpoints targeting all CPUs. |
448 | * |
449 | * a. On enable: remove the existing breakpoints from the global |
450 | * slots histogram and use the per-CPU histogram. |
451 | * |
452 | * b. On disable: re-insert the existing breakpoints into the global |
453 | * slots histogram and remove from per-CPU histogram. |
454 | * |
455 | * 3. Some other existing task breakpoints target specific CPUs. Only |
456 | * update the per-CPU slots histogram. |
457 | */ |
458 | |
459 | if (!enable) { |
460 | /* |
461 | * Remove before updating histograms so we can determine if this |
462 | * was the last task breakpoint for a specific CPU. |
463 | */ |
464 | int ret = rhltable_remove(hlt: &task_bps_ht, list: &bp->hw.bp_list, params: task_bps_ht_params); |
465 | |
466 | if (ret) |
467 | return ret; |
468 | } |
469 | /* |
470 | * Note: If !enable, next_tsk_pinned will not count the to-be-removed breakpoint. |
471 | */ |
472 | next_tsk_pinned = task_bp_pinned(cpu: -1, bp, type); |
473 | |
474 | if (next_tsk_pinned >= 0) { |
475 | if (bp->cpu < 0) { /* Case 1: fast path */ |
476 | if (!enable) |
477 | next_tsk_pinned += hw_breakpoint_weight(bp); |
478 | bp_slots_histogram_add(hist: &tsk_pinned_all[type], old: next_tsk_pinned, val: weight); |
479 | } else if (enable) { /* Case 2.a: slow path */ |
480 | /* Add existing to per-CPU histograms. */ |
481 | for_each_possible_cpu(cpu) { |
482 | bp_slots_histogram_add(hist: &get_bp_info(cpu, type)->tsk_pinned, |
483 | old: 0, val: next_tsk_pinned); |
484 | } |
485 | /* Add this first CPU-pinned task breakpoint. */ |
486 | bp_slots_histogram_add(hist: &get_bp_info(cpu: bp->cpu, type)->tsk_pinned, |
487 | old: next_tsk_pinned, val: weight); |
488 | /* Rebalance global task pinned histogram. */ |
489 | bp_slots_histogram_add(hist: &tsk_pinned_all[type], old: next_tsk_pinned, |
490 | val: -next_tsk_pinned); |
491 | } else { /* Case 2.b: slow path */ |
492 | /* Remove this last CPU-pinned task breakpoint. */ |
493 | bp_slots_histogram_add(hist: &get_bp_info(cpu: bp->cpu, type)->tsk_pinned, |
494 | old: next_tsk_pinned + hw_breakpoint_weight(bp), val: weight); |
495 | /* Remove all from per-CPU histograms. */ |
496 | for_each_possible_cpu(cpu) { |
497 | bp_slots_histogram_add(hist: &get_bp_info(cpu, type)->tsk_pinned, |
498 | old: next_tsk_pinned, val: -next_tsk_pinned); |
499 | } |
500 | /* Rebalance global task pinned histogram. */ |
501 | bp_slots_histogram_add(hist: &tsk_pinned_all[type], old: 0, val: next_tsk_pinned); |
502 | } |
503 | } else { /* Case 3: slow path */ |
504 | const struct cpumask *cpumask = cpumask_of_bp(bp); |
505 | |
506 | for_each_cpu(cpu, cpumask) { |
507 | next_tsk_pinned = task_bp_pinned(cpu, bp, type); |
508 | if (!enable) |
509 | next_tsk_pinned += hw_breakpoint_weight(bp); |
510 | bp_slots_histogram_add(hist: &get_bp_info(cpu, type)->tsk_pinned, |
511 | old: next_tsk_pinned, val: weight); |
512 | } |
513 | } |
514 | |
515 | /* |
516 | * Readers want a stable snapshot of the per-task breakpoint list. |
517 | */ |
518 | assert_bp_constraints_lock_held(bp); |
519 | |
520 | if (enable) |
521 | return rhltable_insert(hlt: &task_bps_ht, list: &bp->hw.bp_list, params: task_bps_ht_params); |
522 | |
523 | return 0; |
524 | } |
525 | |
526 | /* |
527 | * Constraints to check before allowing this new breakpoint counter. |
528 | * |
529 | * Note: Flexible breakpoints are currently unimplemented, but outlined in the |
530 | * below algorithm for completeness. The implementation treats flexible as |
531 | * pinned due to no guarantee that we currently always schedule flexible events |
532 | * before a pinned event in a same CPU. |
533 | * |
534 | * == Non-pinned counter == (Considered as pinned for now) |
535 | * |
536 | * - If attached to a single cpu, check: |
537 | * |
538 | * (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu) |
539 | * + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM |
540 | * |
541 | * -> If there are already non-pinned counters in this cpu, it means |
542 | * there is already a free slot for them. |
543 | * Otherwise, we check that the maximum number of per task |
544 | * breakpoints (for this cpu) plus the number of per cpu breakpoint |
545 | * (for this cpu) doesn't cover every registers. |
546 | * |
547 | * - If attached to every cpus, check: |
548 | * |
549 | * (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *)) |
550 | * + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM |
551 | * |
552 | * -> This is roughly the same, except we check the number of per cpu |
553 | * bp for every cpu and we keep the max one. Same for the per tasks |
554 | * breakpoints. |
555 | * |
556 | * |
557 | * == Pinned counter == |
558 | * |
559 | * - If attached to a single cpu, check: |
560 | * |
561 | * ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu) |
562 | * + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM |
563 | * |
564 | * -> Same checks as before. But now the info->flexible, if any, must keep |
565 | * one register at least (or they will never be fed). |
566 | * |
567 | * - If attached to every cpus, check: |
568 | * |
569 | * ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *)) |
570 | * + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM |
571 | */ |
572 | static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type) |
573 | { |
574 | enum bp_type_idx type; |
575 | int max_pinned_slots; |
576 | int weight; |
577 | |
578 | /* We couldn't initialize breakpoint constraints on boot */ |
579 | if (!constraints_initialized) |
580 | return -ENOMEM; |
581 | |
582 | /* Basic checks */ |
583 | if (bp_type == HW_BREAKPOINT_EMPTY || |
584 | bp_type == HW_BREAKPOINT_INVALID) |
585 | return -EINVAL; |
586 | |
587 | type = find_slot_idx(bp_type); |
588 | weight = hw_breakpoint_weight(bp); |
589 | |
590 | /* Check if this new breakpoint can be satisfied across all CPUs. */ |
591 | max_pinned_slots = max_bp_pinned_slots(bp, type) + weight; |
592 | if (max_pinned_slots > hw_breakpoint_slots_cached(type)) |
593 | return -ENOSPC; |
594 | |
595 | return toggle_bp_slot(bp, enable: true, type, weight); |
596 | } |
597 | |
598 | int reserve_bp_slot(struct perf_event *bp) |
599 | { |
600 | struct mutex *mtx = bp_constraints_lock(bp); |
601 | int ret = __reserve_bp_slot(bp, bp_type: bp->attr.bp_type); |
602 | |
603 | bp_constraints_unlock(tsk_mtx: mtx); |
604 | return ret; |
605 | } |
606 | |
607 | static void __release_bp_slot(struct perf_event *bp, u64 bp_type) |
608 | { |
609 | enum bp_type_idx type; |
610 | int weight; |
611 | |
612 | type = find_slot_idx(bp_type); |
613 | weight = hw_breakpoint_weight(bp); |
614 | WARN_ON(toggle_bp_slot(bp, false, type, weight)); |
615 | } |
616 | |
617 | void release_bp_slot(struct perf_event *bp) |
618 | { |
619 | struct mutex *mtx = bp_constraints_lock(bp); |
620 | |
621 | __release_bp_slot(bp, bp_type: bp->attr.bp_type); |
622 | bp_constraints_unlock(tsk_mtx: mtx); |
623 | } |
624 | |
625 | static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type) |
626 | { |
627 | int err; |
628 | |
629 | __release_bp_slot(bp, bp_type: old_type); |
630 | |
631 | err = __reserve_bp_slot(bp, bp_type: new_type); |
632 | if (err) { |
633 | /* |
634 | * Reserve the old_type slot back in case |
635 | * there's no space for the new type. |
636 | * |
637 | * This must succeed, because we just released |
638 | * the old_type slot in the __release_bp_slot |
639 | * call above. If not, something is broken. |
640 | */ |
641 | WARN_ON(__reserve_bp_slot(bp, old_type)); |
642 | } |
643 | |
644 | return err; |
645 | } |
646 | |
647 | static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type) |
648 | { |
649 | struct mutex *mtx = bp_constraints_lock(bp); |
650 | int ret = __modify_bp_slot(bp, old_type, new_type); |
651 | |
652 | bp_constraints_unlock(tsk_mtx: mtx); |
653 | return ret; |
654 | } |
655 | |
656 | /* |
657 | * Allow the kernel debugger to reserve breakpoint slots without |
658 | * taking a lock using the dbg_* variant of for the reserve and |
659 | * release breakpoint slots. |
660 | */ |
661 | int dbg_reserve_bp_slot(struct perf_event *bp) |
662 | { |
663 | int ret; |
664 | |
665 | if (bp_constraints_is_locked(bp)) |
666 | return -1; |
667 | |
668 | /* Locks aren't held; disable lockdep assert checking. */ |
669 | lockdep_off(); |
670 | ret = __reserve_bp_slot(bp, bp_type: bp->attr.bp_type); |
671 | lockdep_on(); |
672 | |
673 | return ret; |
674 | } |
675 | |
676 | int dbg_release_bp_slot(struct perf_event *bp) |
677 | { |
678 | if (bp_constraints_is_locked(bp)) |
679 | return -1; |
680 | |
681 | /* Locks aren't held; disable lockdep assert checking. */ |
682 | lockdep_off(); |
683 | __release_bp_slot(bp, bp_type: bp->attr.bp_type); |
684 | lockdep_on(); |
685 | |
686 | return 0; |
687 | } |
688 | |
689 | static int hw_breakpoint_parse(struct perf_event *bp, |
690 | const struct perf_event_attr *attr, |
691 | struct arch_hw_breakpoint *hw) |
692 | { |
693 | int err; |
694 | |
695 | err = hw_breakpoint_arch_parse(bp, attr, hw); |
696 | if (err) |
697 | return err; |
698 | |
699 | if (arch_check_bp_in_kernelspace(hw)) { |
700 | if (attr->exclude_kernel) |
701 | return -EINVAL; |
702 | /* |
703 | * Don't let unprivileged users set a breakpoint in the trap |
704 | * path to avoid trap recursion attacks. |
705 | */ |
706 | if (!capable(CAP_SYS_ADMIN)) |
707 | return -EPERM; |
708 | } |
709 | |
710 | return 0; |
711 | } |
712 | |
713 | int register_perf_hw_breakpoint(struct perf_event *bp) |
714 | { |
715 | struct arch_hw_breakpoint hw = { }; |
716 | int err; |
717 | |
718 | err = reserve_bp_slot(bp); |
719 | if (err) |
720 | return err; |
721 | |
722 | err = hw_breakpoint_parse(bp, attr: &bp->attr, hw: &hw); |
723 | if (err) { |
724 | release_bp_slot(bp); |
725 | return err; |
726 | } |
727 | |
728 | bp->hw.info = hw; |
729 | |
730 | return 0; |
731 | } |
732 | |
733 | /** |
734 | * register_user_hw_breakpoint - register a hardware breakpoint for user space |
735 | * @attr: breakpoint attributes |
736 | * @triggered: callback to trigger when we hit the breakpoint |
737 | * @context: context data could be used in the triggered callback |
738 | * @tsk: pointer to 'task_struct' of the process to which the address belongs |
739 | */ |
740 | struct perf_event * |
741 | register_user_hw_breakpoint(struct perf_event_attr *attr, |
742 | perf_overflow_handler_t triggered, |
743 | void *context, |
744 | struct task_struct *tsk) |
745 | { |
746 | return perf_event_create_kernel_counter(attr, cpu: -1, task: tsk, callback: triggered, |
747 | context); |
748 | } |
749 | EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); |
750 | |
751 | static void hw_breakpoint_copy_attr(struct perf_event_attr *to, |
752 | struct perf_event_attr *from) |
753 | { |
754 | to->bp_addr = from->bp_addr; |
755 | to->bp_type = from->bp_type; |
756 | to->bp_len = from->bp_len; |
757 | to->disabled = from->disabled; |
758 | } |
759 | |
760 | int |
761 | modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr, |
762 | bool check) |
763 | { |
764 | struct arch_hw_breakpoint hw = { }; |
765 | int err; |
766 | |
767 | err = hw_breakpoint_parse(bp, attr, hw: &hw); |
768 | if (err) |
769 | return err; |
770 | |
771 | if (check) { |
772 | struct perf_event_attr old_attr; |
773 | |
774 | old_attr = bp->attr; |
775 | hw_breakpoint_copy_attr(to: &old_attr, from: attr); |
776 | if (memcmp(p: &old_attr, q: attr, size: sizeof(*attr))) |
777 | return -EINVAL; |
778 | } |
779 | |
780 | if (bp->attr.bp_type != attr->bp_type) { |
781 | err = modify_bp_slot(bp, old_type: bp->attr.bp_type, new_type: attr->bp_type); |
782 | if (err) |
783 | return err; |
784 | } |
785 | |
786 | hw_breakpoint_copy_attr(to: &bp->attr, from: attr); |
787 | bp->hw.info = hw; |
788 | |
789 | return 0; |
790 | } |
791 | |
792 | /** |
793 | * modify_user_hw_breakpoint - modify a user-space hardware breakpoint |
794 | * @bp: the breakpoint structure to modify |
795 | * @attr: new breakpoint attributes |
796 | */ |
797 | int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) |
798 | { |
799 | int err; |
800 | |
801 | /* |
802 | * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it |
803 | * will not be possible to raise IPIs that invoke __perf_event_disable. |
804 | * So call the function directly after making sure we are targeting the |
805 | * current task. |
806 | */ |
807 | if (irqs_disabled() && bp->ctx && bp->ctx->task == current) |
808 | perf_event_disable_local(event: bp); |
809 | else |
810 | perf_event_disable(event: bp); |
811 | |
812 | err = modify_user_hw_breakpoint_check(bp, attr, check: false); |
813 | |
814 | if (!bp->attr.disabled) |
815 | perf_event_enable(event: bp); |
816 | |
817 | return err; |
818 | } |
819 | EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); |
820 | |
821 | /** |
822 | * unregister_hw_breakpoint - unregister a user-space hardware breakpoint |
823 | * @bp: the breakpoint structure to unregister |
824 | */ |
825 | void unregister_hw_breakpoint(struct perf_event *bp) |
826 | { |
827 | if (!bp) |
828 | return; |
829 | perf_event_release_kernel(event: bp); |
830 | } |
831 | EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); |
832 | |
833 | /** |
834 | * register_wide_hw_breakpoint - register a wide breakpoint in the kernel |
835 | * @attr: breakpoint attributes |
836 | * @triggered: callback to trigger when we hit the breakpoint |
837 | * @context: context data could be used in the triggered callback |
838 | * |
839 | * @return a set of per_cpu pointers to perf events |
840 | */ |
841 | struct perf_event * __percpu * |
842 | register_wide_hw_breakpoint(struct perf_event_attr *attr, |
843 | perf_overflow_handler_t triggered, |
844 | void *context) |
845 | { |
846 | struct perf_event * __percpu *cpu_events, *bp; |
847 | long err = 0; |
848 | int cpu; |
849 | |
850 | cpu_events = alloc_percpu(typeof(*cpu_events)); |
851 | if (!cpu_events) |
852 | return (void __percpu __force *)ERR_PTR(error: -ENOMEM); |
853 | |
854 | cpus_read_lock(); |
855 | for_each_online_cpu(cpu) { |
856 | bp = perf_event_create_kernel_counter(attr, cpu, NULL, |
857 | callback: triggered, context); |
858 | if (IS_ERR(ptr: bp)) { |
859 | err = PTR_ERR(ptr: bp); |
860 | break; |
861 | } |
862 | |
863 | per_cpu(*cpu_events, cpu) = bp; |
864 | } |
865 | cpus_read_unlock(); |
866 | |
867 | if (likely(!err)) |
868 | return cpu_events; |
869 | |
870 | unregister_wide_hw_breakpoint(cpu_events); |
871 | return (void __percpu __force *)ERR_PTR(error: err); |
872 | } |
873 | EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); |
874 | |
875 | /** |
876 | * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel |
877 | * @cpu_events: the per cpu set of events to unregister |
878 | */ |
879 | void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) |
880 | { |
881 | int cpu; |
882 | |
883 | for_each_possible_cpu(cpu) |
884 | unregister_hw_breakpoint(per_cpu(*cpu_events, cpu)); |
885 | |
886 | free_percpu(pdata: cpu_events); |
887 | } |
888 | EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint); |
889 | |
890 | /** |
891 | * hw_breakpoint_is_used - check if breakpoints are currently used |
892 | * |
893 | * Returns: true if breakpoints are used, false otherwise. |
894 | */ |
895 | bool hw_breakpoint_is_used(void) |
896 | { |
897 | int cpu; |
898 | |
899 | if (!constraints_initialized) |
900 | return false; |
901 | |
902 | for_each_possible_cpu(cpu) { |
903 | for (int type = 0; type < TYPE_MAX; ++type) { |
904 | struct bp_cpuinfo *info = get_bp_info(cpu, type); |
905 | |
906 | if (info->cpu_pinned) |
907 | return true; |
908 | |
909 | for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) { |
910 | if (atomic_read(v: &info->tsk_pinned.count[slot])) |
911 | return true; |
912 | } |
913 | } |
914 | } |
915 | |
916 | for (int type = 0; type < TYPE_MAX; ++type) { |
917 | for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) { |
918 | /* |
919 | * Warn, because if there are CPU pinned counters, |
920 | * should never get here; bp_cpuinfo::cpu_pinned should |
921 | * be consistent with the global cpu_pinned histogram. |
922 | */ |
923 | if (WARN_ON(atomic_read(&cpu_pinned[type].count[slot]))) |
924 | return true; |
925 | |
926 | if (atomic_read(v: &tsk_pinned_all[type].count[slot])) |
927 | return true; |
928 | } |
929 | } |
930 | |
931 | return false; |
932 | } |
933 | |
934 | static struct notifier_block hw_breakpoint_exceptions_nb = { |
935 | .notifier_call = hw_breakpoint_exceptions_notify, |
936 | /* we need to be notified first */ |
937 | .priority = 0x7fffffff |
938 | }; |
939 | |
940 | static void bp_perf_event_destroy(struct perf_event *event) |
941 | { |
942 | release_bp_slot(bp: event); |
943 | } |
944 | |
945 | static int hw_breakpoint_event_init(struct perf_event *bp) |
946 | { |
947 | int err; |
948 | |
949 | if (bp->attr.type != PERF_TYPE_BREAKPOINT) |
950 | return -ENOENT; |
951 | |
952 | /* |
953 | * no branch sampling for breakpoint events |
954 | */ |
955 | if (has_branch_stack(event: bp)) |
956 | return -EOPNOTSUPP; |
957 | |
958 | err = register_perf_hw_breakpoint(bp); |
959 | if (err) |
960 | return err; |
961 | |
962 | bp->destroy = bp_perf_event_destroy; |
963 | |
964 | return 0; |
965 | } |
966 | |
967 | static int hw_breakpoint_add(struct perf_event *bp, int flags) |
968 | { |
969 | if (!(flags & PERF_EF_START)) |
970 | bp->hw.state = PERF_HES_STOPPED; |
971 | |
972 | if (is_sampling_event(event: bp)) { |
973 | bp->hw.last_period = bp->hw.sample_period; |
974 | perf_swevent_set_period(event: bp); |
975 | } |
976 | |
977 | return arch_install_hw_breakpoint(bp); |
978 | } |
979 | |
980 | static void hw_breakpoint_del(struct perf_event *bp, int flags) |
981 | { |
982 | arch_uninstall_hw_breakpoint(bp); |
983 | } |
984 | |
985 | static void hw_breakpoint_start(struct perf_event *bp, int flags) |
986 | { |
987 | bp->hw.state = 0; |
988 | } |
989 | |
990 | static void hw_breakpoint_stop(struct perf_event *bp, int flags) |
991 | { |
992 | bp->hw.state = PERF_HES_STOPPED; |
993 | } |
994 | |
995 | static struct pmu perf_breakpoint = { |
996 | .task_ctx_nr = perf_sw_context, /* could eventually get its own */ |
997 | |
998 | .event_init = hw_breakpoint_event_init, |
999 | .add = hw_breakpoint_add, |
1000 | .del = hw_breakpoint_del, |
1001 | .start = hw_breakpoint_start, |
1002 | .stop = hw_breakpoint_stop, |
1003 | .read = hw_breakpoint_pmu_read, |
1004 | }; |
1005 | |
1006 | int __init init_hw_breakpoint(void) |
1007 | { |
1008 | int ret; |
1009 | |
1010 | ret = rhltable_init(hlt: &task_bps_ht, params: &task_bps_ht_params); |
1011 | if (ret) |
1012 | return ret; |
1013 | |
1014 | ret = init_breakpoint_slots(); |
1015 | if (ret) |
1016 | return ret; |
1017 | |
1018 | constraints_initialized = true; |
1019 | |
1020 | perf_pmu_register(pmu: &perf_breakpoint, name: "breakpoint" , type: PERF_TYPE_BREAKPOINT); |
1021 | |
1022 | return register_die_notifier(nb: &hw_breakpoint_exceptions_nb); |
1023 | } |
1024 | |