1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Data Access Monitor |
4 | * |
5 | * Author: SeongJae Park <sj@kernel.org> |
6 | */ |
7 | |
8 | #define pr_fmt(fmt) "damon: " fmt |
9 | |
10 | #include <linux/damon.h> |
11 | #include <linux/delay.h> |
12 | #include <linux/kthread.h> |
13 | #include <linux/mm.h> |
14 | #include <linux/psi.h> |
15 | #include <linux/slab.h> |
16 | #include <linux/string.h> |
17 | |
18 | #define CREATE_TRACE_POINTS |
19 | #include <trace/events/damon.h> |
20 | |
21 | #ifdef CONFIG_DAMON_KUNIT_TEST |
22 | #undef DAMON_MIN_REGION |
23 | #define DAMON_MIN_REGION 1 |
24 | #endif |
25 | |
26 | static DEFINE_MUTEX(damon_lock); |
27 | static int nr_running_ctxs; |
28 | static bool running_exclusive_ctxs; |
29 | |
30 | static DEFINE_MUTEX(damon_ops_lock); |
31 | static struct damon_operations damon_registered_ops[NR_DAMON_OPS]; |
32 | |
33 | static struct kmem_cache *damon_region_cache __ro_after_init; |
34 | |
35 | /* Should be called under damon_ops_lock with id smaller than NR_DAMON_OPS */ |
36 | static bool __damon_is_registered_ops(enum damon_ops_id id) |
37 | { |
38 | struct damon_operations empty_ops = {}; |
39 | |
40 | if (!memcmp(p: &empty_ops, q: &damon_registered_ops[id], size: sizeof(empty_ops))) |
41 | return false; |
42 | return true; |
43 | } |
44 | |
45 | /** |
46 | * damon_is_registered_ops() - Check if a given damon_operations is registered. |
47 | * @id: Id of the damon_operations to check if registered. |
48 | * |
49 | * Return: true if the ops is set, false otherwise. |
50 | */ |
51 | bool damon_is_registered_ops(enum damon_ops_id id) |
52 | { |
53 | bool registered; |
54 | |
55 | if (id >= NR_DAMON_OPS) |
56 | return false; |
57 | mutex_lock(&damon_ops_lock); |
58 | registered = __damon_is_registered_ops(id); |
59 | mutex_unlock(lock: &damon_ops_lock); |
60 | return registered; |
61 | } |
62 | |
63 | /** |
64 | * damon_register_ops() - Register a monitoring operations set to DAMON. |
65 | * @ops: monitoring operations set to register. |
66 | * |
67 | * This function registers a monitoring operations set of valid &struct |
68 | * damon_operations->id so that others can find and use them later. |
69 | * |
70 | * Return: 0 on success, negative error code otherwise. |
71 | */ |
72 | int damon_register_ops(struct damon_operations *ops) |
73 | { |
74 | int err = 0; |
75 | |
76 | if (ops->id >= NR_DAMON_OPS) |
77 | return -EINVAL; |
78 | mutex_lock(&damon_ops_lock); |
79 | /* Fail for already registered ops */ |
80 | if (__damon_is_registered_ops(id: ops->id)) { |
81 | err = -EINVAL; |
82 | goto out; |
83 | } |
84 | damon_registered_ops[ops->id] = *ops; |
85 | out: |
86 | mutex_unlock(lock: &damon_ops_lock); |
87 | return err; |
88 | } |
89 | |
90 | /** |
91 | * damon_select_ops() - Select a monitoring operations to use with the context. |
92 | * @ctx: monitoring context to use the operations. |
93 | * @id: id of the registered monitoring operations to select. |
94 | * |
95 | * This function finds registered monitoring operations set of @id and make |
96 | * @ctx to use it. |
97 | * |
98 | * Return: 0 on success, negative error code otherwise. |
99 | */ |
100 | int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id) |
101 | { |
102 | int err = 0; |
103 | |
104 | if (id >= NR_DAMON_OPS) |
105 | return -EINVAL; |
106 | |
107 | mutex_lock(&damon_ops_lock); |
108 | if (!__damon_is_registered_ops(id)) |
109 | err = -EINVAL; |
110 | else |
111 | ctx->ops = damon_registered_ops[id]; |
112 | mutex_unlock(lock: &damon_ops_lock); |
113 | return err; |
114 | } |
115 | |
116 | /* |
117 | * Construct a damon_region struct |
118 | * |
119 | * Returns the pointer to the new struct if success, or NULL otherwise |
120 | */ |
121 | struct damon_region *damon_new_region(unsigned long start, unsigned long end) |
122 | { |
123 | struct damon_region *region; |
124 | |
125 | region = kmem_cache_alloc(cachep: damon_region_cache, GFP_KERNEL); |
126 | if (!region) |
127 | return NULL; |
128 | |
129 | region->ar.start = start; |
130 | region->ar.end = end; |
131 | region->nr_accesses = 0; |
132 | region->nr_accesses_bp = 0; |
133 | INIT_LIST_HEAD(list: ®ion->list); |
134 | |
135 | region->age = 0; |
136 | region->last_nr_accesses = 0; |
137 | |
138 | return region; |
139 | } |
140 | |
141 | void damon_add_region(struct damon_region *r, struct damon_target *t) |
142 | { |
143 | list_add_tail(new: &r->list, head: &t->regions_list); |
144 | t->nr_regions++; |
145 | } |
146 | |
147 | static void damon_del_region(struct damon_region *r, struct damon_target *t) |
148 | { |
149 | list_del(entry: &r->list); |
150 | t->nr_regions--; |
151 | } |
152 | |
153 | static void damon_free_region(struct damon_region *r) |
154 | { |
155 | kmem_cache_free(s: damon_region_cache, objp: r); |
156 | } |
157 | |
158 | void damon_destroy_region(struct damon_region *r, struct damon_target *t) |
159 | { |
160 | damon_del_region(r, t); |
161 | damon_free_region(r); |
162 | } |
163 | |
164 | /* |
165 | * Check whether a region is intersecting an address range |
166 | * |
167 | * Returns true if it is. |
168 | */ |
169 | static bool damon_intersect(struct damon_region *r, |
170 | struct damon_addr_range *re) |
171 | { |
172 | return !(r->ar.end <= re->start || re->end <= r->ar.start); |
173 | } |
174 | |
175 | /* |
176 | * Fill holes in regions with new regions. |
177 | */ |
178 | static int damon_fill_regions_holes(struct damon_region *first, |
179 | struct damon_region *last, struct damon_target *t) |
180 | { |
181 | struct damon_region *r = first; |
182 | |
183 | damon_for_each_region_from(r, t) { |
184 | struct damon_region *next, *newr; |
185 | |
186 | if (r == last) |
187 | break; |
188 | next = damon_next_region(r); |
189 | if (r->ar.end != next->ar.start) { |
190 | newr = damon_new_region(start: r->ar.end, end: next->ar.start); |
191 | if (!newr) |
192 | return -ENOMEM; |
193 | damon_insert_region(r: newr, prev: r, next, t); |
194 | } |
195 | } |
196 | return 0; |
197 | } |
198 | |
199 | /* |
200 | * damon_set_regions() - Set regions of a target for given address ranges. |
201 | * @t: the given target. |
202 | * @ranges: array of new monitoring target ranges. |
203 | * @nr_ranges: length of @ranges. |
204 | * |
205 | * This function adds new regions to, or modify existing regions of a |
206 | * monitoring target to fit in specific ranges. |
207 | * |
208 | * Return: 0 if success, or negative error code otherwise. |
209 | */ |
210 | int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, |
211 | unsigned int nr_ranges) |
212 | { |
213 | struct damon_region *r, *next; |
214 | unsigned int i; |
215 | int err; |
216 | |
217 | /* Remove regions which are not in the new ranges */ |
218 | damon_for_each_region_safe(r, next, t) { |
219 | for (i = 0; i < nr_ranges; i++) { |
220 | if (damon_intersect(r, re: &ranges[i])) |
221 | break; |
222 | } |
223 | if (i == nr_ranges) |
224 | damon_destroy_region(r, t); |
225 | } |
226 | |
227 | r = damon_first_region(t); |
228 | /* Add new regions or resize existing regions to fit in the ranges */ |
229 | for (i = 0; i < nr_ranges; i++) { |
230 | struct damon_region *first = NULL, *last, *newr; |
231 | struct damon_addr_range *range; |
232 | |
233 | range = &ranges[i]; |
234 | /* Get the first/last regions intersecting with the range */ |
235 | damon_for_each_region_from(r, t) { |
236 | if (damon_intersect(r, re: range)) { |
237 | if (!first) |
238 | first = r; |
239 | last = r; |
240 | } |
241 | if (r->ar.start >= range->end) |
242 | break; |
243 | } |
244 | if (!first) { |
245 | /* no region intersects with this range */ |
246 | newr = damon_new_region( |
247 | ALIGN_DOWN(range->start, |
248 | DAMON_MIN_REGION), |
249 | ALIGN(range->end, DAMON_MIN_REGION)); |
250 | if (!newr) |
251 | return -ENOMEM; |
252 | damon_insert_region(r: newr, prev: damon_prev_region(r), next: r, t); |
253 | } else { |
254 | /* resize intersecting regions to fit in this range */ |
255 | first->ar.start = ALIGN_DOWN(range->start, |
256 | DAMON_MIN_REGION); |
257 | last->ar.end = ALIGN(range->end, DAMON_MIN_REGION); |
258 | |
259 | /* fill possible holes in the range */ |
260 | err = damon_fill_regions_holes(first, last, t); |
261 | if (err) |
262 | return err; |
263 | } |
264 | } |
265 | return 0; |
266 | } |
267 | |
268 | struct damos_filter *damos_new_filter(enum damos_filter_type type, |
269 | bool matching) |
270 | { |
271 | struct damos_filter *filter; |
272 | |
273 | filter = kmalloc(size: sizeof(*filter), GFP_KERNEL); |
274 | if (!filter) |
275 | return NULL; |
276 | filter->type = type; |
277 | filter->matching = matching; |
278 | INIT_LIST_HEAD(list: &filter->list); |
279 | return filter; |
280 | } |
281 | |
282 | void damos_add_filter(struct damos *s, struct damos_filter *f) |
283 | { |
284 | list_add_tail(new: &f->list, head: &s->filters); |
285 | } |
286 | |
287 | static void damos_del_filter(struct damos_filter *f) |
288 | { |
289 | list_del(entry: &f->list); |
290 | } |
291 | |
292 | static void damos_free_filter(struct damos_filter *f) |
293 | { |
294 | kfree(objp: f); |
295 | } |
296 | |
297 | void damos_destroy_filter(struct damos_filter *f) |
298 | { |
299 | damos_del_filter(f); |
300 | damos_free_filter(f); |
301 | } |
302 | |
303 | struct damos_quota_goal *damos_new_quota_goal( |
304 | enum damos_quota_goal_metric metric, |
305 | unsigned long target_value) |
306 | { |
307 | struct damos_quota_goal *goal; |
308 | |
309 | goal = kmalloc(size: sizeof(*goal), GFP_KERNEL); |
310 | if (!goal) |
311 | return NULL; |
312 | goal->metric = metric; |
313 | goal->target_value = target_value; |
314 | INIT_LIST_HEAD(list: &goal->list); |
315 | return goal; |
316 | } |
317 | |
318 | void damos_add_quota_goal(struct damos_quota *q, struct damos_quota_goal *g) |
319 | { |
320 | list_add_tail(new: &g->list, head: &q->goals); |
321 | } |
322 | |
323 | static void damos_del_quota_goal(struct damos_quota_goal *g) |
324 | { |
325 | list_del(entry: &g->list); |
326 | } |
327 | |
328 | static void damos_free_quota_goal(struct damos_quota_goal *g) |
329 | { |
330 | kfree(objp: g); |
331 | } |
332 | |
333 | void damos_destroy_quota_goal(struct damos_quota_goal *g) |
334 | { |
335 | damos_del_quota_goal(g); |
336 | damos_free_quota_goal(g); |
337 | } |
338 | |
339 | /* initialize fields of @quota that normally API users wouldn't set */ |
340 | static struct damos_quota *damos_quota_init(struct damos_quota *quota) |
341 | { |
342 | quota->esz = 0; |
343 | quota->total_charged_sz = 0; |
344 | quota->total_charged_ns = 0; |
345 | quota->charged_sz = 0; |
346 | quota->charged_from = 0; |
347 | quota->charge_target_from = NULL; |
348 | quota->charge_addr_from = 0; |
349 | return quota; |
350 | } |
351 | |
352 | struct damos *damon_new_scheme(struct damos_access_pattern *pattern, |
353 | enum damos_action action, |
354 | unsigned long apply_interval_us, |
355 | struct damos_quota *quota, |
356 | struct damos_watermarks *wmarks) |
357 | { |
358 | struct damos *scheme; |
359 | |
360 | scheme = kmalloc(size: sizeof(*scheme), GFP_KERNEL); |
361 | if (!scheme) |
362 | return NULL; |
363 | scheme->pattern = *pattern; |
364 | scheme->action = action; |
365 | scheme->apply_interval_us = apply_interval_us; |
366 | /* |
367 | * next_apply_sis will be set when kdamond starts. While kdamond is |
368 | * running, it will also updated when it is added to the DAMON context, |
369 | * or damon_attrs are updated. |
370 | */ |
371 | scheme->next_apply_sis = 0; |
372 | INIT_LIST_HEAD(list: &scheme->filters); |
373 | scheme->stat = (struct damos_stat){}; |
374 | INIT_LIST_HEAD(list: &scheme->list); |
375 | |
376 | scheme->quota = *(damos_quota_init(quota)); |
377 | /* quota.goals should be separately set by caller */ |
378 | INIT_LIST_HEAD(list: &scheme->quota.goals); |
379 | |
380 | scheme->wmarks = *wmarks; |
381 | scheme->wmarks.activated = true; |
382 | |
383 | return scheme; |
384 | } |
385 | |
386 | static void damos_set_next_apply_sis(struct damos *s, struct damon_ctx *ctx) |
387 | { |
388 | unsigned long sample_interval = ctx->attrs.sample_interval ? |
389 | ctx->attrs.sample_interval : 1; |
390 | unsigned long apply_interval = s->apply_interval_us ? |
391 | s->apply_interval_us : ctx->attrs.aggr_interval; |
392 | |
393 | s->next_apply_sis = ctx->passed_sample_intervals + |
394 | apply_interval / sample_interval; |
395 | } |
396 | |
397 | void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) |
398 | { |
399 | list_add_tail(new: &s->list, head: &ctx->schemes); |
400 | damos_set_next_apply_sis(s, ctx); |
401 | } |
402 | |
403 | static void damon_del_scheme(struct damos *s) |
404 | { |
405 | list_del(entry: &s->list); |
406 | } |
407 | |
408 | static void damon_free_scheme(struct damos *s) |
409 | { |
410 | kfree(objp: s); |
411 | } |
412 | |
413 | void damon_destroy_scheme(struct damos *s) |
414 | { |
415 | struct damos_quota_goal *g, *g_next; |
416 | struct damos_filter *f, *next; |
417 | |
418 | damos_for_each_quota_goal_safe(g, g_next, &s->quota) |
419 | damos_destroy_quota_goal(g); |
420 | |
421 | damos_for_each_filter_safe(f, next, s) |
422 | damos_destroy_filter(f); |
423 | damon_del_scheme(s); |
424 | damon_free_scheme(s); |
425 | } |
426 | |
427 | /* |
428 | * Construct a damon_target struct |
429 | * |
430 | * Returns the pointer to the new struct if success, or NULL otherwise |
431 | */ |
432 | struct damon_target *damon_new_target(void) |
433 | { |
434 | struct damon_target *t; |
435 | |
436 | t = kmalloc(size: sizeof(*t), GFP_KERNEL); |
437 | if (!t) |
438 | return NULL; |
439 | |
440 | t->pid = NULL; |
441 | t->nr_regions = 0; |
442 | INIT_LIST_HEAD(list: &t->regions_list); |
443 | INIT_LIST_HEAD(list: &t->list); |
444 | |
445 | return t; |
446 | } |
447 | |
448 | void damon_add_target(struct damon_ctx *ctx, struct damon_target *t) |
449 | { |
450 | list_add_tail(new: &t->list, head: &ctx->adaptive_targets); |
451 | } |
452 | |
453 | bool damon_targets_empty(struct damon_ctx *ctx) |
454 | { |
455 | return list_empty(head: &ctx->adaptive_targets); |
456 | } |
457 | |
458 | static void damon_del_target(struct damon_target *t) |
459 | { |
460 | list_del(entry: &t->list); |
461 | } |
462 | |
463 | void damon_free_target(struct damon_target *t) |
464 | { |
465 | struct damon_region *r, *next; |
466 | |
467 | damon_for_each_region_safe(r, next, t) |
468 | damon_free_region(r); |
469 | kfree(objp: t); |
470 | } |
471 | |
472 | void damon_destroy_target(struct damon_target *t) |
473 | { |
474 | damon_del_target(t); |
475 | damon_free_target(t); |
476 | } |
477 | |
478 | unsigned int damon_nr_regions(struct damon_target *t) |
479 | { |
480 | return t->nr_regions; |
481 | } |
482 | |
483 | struct damon_ctx *damon_new_ctx(void) |
484 | { |
485 | struct damon_ctx *ctx; |
486 | |
487 | ctx = kzalloc(size: sizeof(*ctx), GFP_KERNEL); |
488 | if (!ctx) |
489 | return NULL; |
490 | |
491 | init_completion(x: &ctx->kdamond_started); |
492 | |
493 | ctx->attrs.sample_interval = 5 * 1000; |
494 | ctx->attrs.aggr_interval = 100 * 1000; |
495 | ctx->attrs.ops_update_interval = 60 * 1000 * 1000; |
496 | |
497 | ctx->passed_sample_intervals = 0; |
498 | /* These will be set from kdamond_init_intervals_sis() */ |
499 | ctx->next_aggregation_sis = 0; |
500 | ctx->next_ops_update_sis = 0; |
501 | |
502 | mutex_init(&ctx->kdamond_lock); |
503 | |
504 | ctx->attrs.min_nr_regions = 10; |
505 | ctx->attrs.max_nr_regions = 1000; |
506 | |
507 | INIT_LIST_HEAD(list: &ctx->adaptive_targets); |
508 | INIT_LIST_HEAD(list: &ctx->schemes); |
509 | |
510 | return ctx; |
511 | } |
512 | |
513 | static void damon_destroy_targets(struct damon_ctx *ctx) |
514 | { |
515 | struct damon_target *t, *next_t; |
516 | |
517 | if (ctx->ops.cleanup) { |
518 | ctx->ops.cleanup(ctx); |
519 | return; |
520 | } |
521 | |
522 | damon_for_each_target_safe(t, next_t, ctx) |
523 | damon_destroy_target(t); |
524 | } |
525 | |
526 | void damon_destroy_ctx(struct damon_ctx *ctx) |
527 | { |
528 | struct damos *s, *next_s; |
529 | |
530 | damon_destroy_targets(ctx); |
531 | |
532 | damon_for_each_scheme_safe(s, next_s, ctx) |
533 | damon_destroy_scheme(s); |
534 | |
535 | kfree(objp: ctx); |
536 | } |
537 | |
538 | static unsigned int damon_age_for_new_attrs(unsigned int age, |
539 | struct damon_attrs *old_attrs, struct damon_attrs *new_attrs) |
540 | { |
541 | return age * old_attrs->aggr_interval / new_attrs->aggr_interval; |
542 | } |
543 | |
544 | /* convert access ratio in bp (per 10,000) to nr_accesses */ |
545 | static unsigned int damon_accesses_bp_to_nr_accesses( |
546 | unsigned int accesses_bp, struct damon_attrs *attrs) |
547 | { |
548 | return accesses_bp * damon_max_nr_accesses(attrs) / 10000; |
549 | } |
550 | |
551 | /* convert nr_accesses to access ratio in bp (per 10,000) */ |
552 | static unsigned int damon_nr_accesses_to_accesses_bp( |
553 | unsigned int nr_accesses, struct damon_attrs *attrs) |
554 | { |
555 | return nr_accesses * 10000 / damon_max_nr_accesses(attrs); |
556 | } |
557 | |
558 | static unsigned int damon_nr_accesses_for_new_attrs(unsigned int nr_accesses, |
559 | struct damon_attrs *old_attrs, struct damon_attrs *new_attrs) |
560 | { |
561 | return damon_accesses_bp_to_nr_accesses( |
562 | accesses_bp: damon_nr_accesses_to_accesses_bp( |
563 | nr_accesses, attrs: old_attrs), |
564 | attrs: new_attrs); |
565 | } |
566 | |
567 | static void damon_update_monitoring_result(struct damon_region *r, |
568 | struct damon_attrs *old_attrs, struct damon_attrs *new_attrs) |
569 | { |
570 | r->nr_accesses = damon_nr_accesses_for_new_attrs(nr_accesses: r->nr_accesses, |
571 | old_attrs, new_attrs); |
572 | r->nr_accesses_bp = r->nr_accesses * 10000; |
573 | r->age = damon_age_for_new_attrs(age: r->age, old_attrs, new_attrs); |
574 | } |
575 | |
576 | /* |
577 | * region->nr_accesses is the number of sampling intervals in the last |
578 | * aggregation interval that access to the region has found, and region->age is |
579 | * the number of aggregation intervals that its access pattern has maintained. |
580 | * For the reason, the real meaning of the two fields depend on current |
581 | * sampling interval and aggregation interval. This function updates |
582 | * ->nr_accesses and ->age of given damon_ctx's regions for new damon_attrs. |
583 | */ |
584 | static void damon_update_monitoring_results(struct damon_ctx *ctx, |
585 | struct damon_attrs *new_attrs) |
586 | { |
587 | struct damon_attrs *old_attrs = &ctx->attrs; |
588 | struct damon_target *t; |
589 | struct damon_region *r; |
590 | |
591 | /* if any interval is zero, simply forgive conversion */ |
592 | if (!old_attrs->sample_interval || !old_attrs->aggr_interval || |
593 | !new_attrs->sample_interval || |
594 | !new_attrs->aggr_interval) |
595 | return; |
596 | |
597 | damon_for_each_target(t, ctx) |
598 | damon_for_each_region(r, t) |
599 | damon_update_monitoring_result( |
600 | r, old_attrs, new_attrs); |
601 | } |
602 | |
603 | /** |
604 | * damon_set_attrs() - Set attributes for the monitoring. |
605 | * @ctx: monitoring context |
606 | * @attrs: monitoring attributes |
607 | * |
608 | * This function should be called while the kdamond is not running, or an |
609 | * access check results aggregation is not ongoing (e.g., from |
610 | * &struct damon_callback->after_aggregation or |
611 | * &struct damon_callback->after_wmarks_check callbacks). |
612 | * |
613 | * Every time interval is in micro-seconds. |
614 | * |
615 | * Return: 0 on success, negative error code otherwise. |
616 | */ |
617 | int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) |
618 | { |
619 | unsigned long sample_interval = attrs->sample_interval ? |
620 | attrs->sample_interval : 1; |
621 | struct damos *s; |
622 | |
623 | if (attrs->min_nr_regions < 3) |
624 | return -EINVAL; |
625 | if (attrs->min_nr_regions > attrs->max_nr_regions) |
626 | return -EINVAL; |
627 | if (attrs->sample_interval > attrs->aggr_interval) |
628 | return -EINVAL; |
629 | |
630 | ctx->next_aggregation_sis = ctx->passed_sample_intervals + |
631 | attrs->aggr_interval / sample_interval; |
632 | ctx->next_ops_update_sis = ctx->passed_sample_intervals + |
633 | attrs->ops_update_interval / sample_interval; |
634 | |
635 | damon_update_monitoring_results(ctx, new_attrs: attrs); |
636 | ctx->attrs = *attrs; |
637 | |
638 | damon_for_each_scheme(s, ctx) |
639 | damos_set_next_apply_sis(s, ctx); |
640 | |
641 | return 0; |
642 | } |
643 | |
644 | /** |
645 | * damon_set_schemes() - Set data access monitoring based operation schemes. |
646 | * @ctx: monitoring context |
647 | * @schemes: array of the schemes |
648 | * @nr_schemes: number of entries in @schemes |
649 | * |
650 | * This function should not be called while the kdamond of the context is |
651 | * running. |
652 | */ |
653 | void damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, |
654 | ssize_t nr_schemes) |
655 | { |
656 | struct damos *s, *next; |
657 | ssize_t i; |
658 | |
659 | damon_for_each_scheme_safe(s, next, ctx) |
660 | damon_destroy_scheme(s); |
661 | for (i = 0; i < nr_schemes; i++) |
662 | damon_add_scheme(ctx, s: schemes[i]); |
663 | } |
664 | |
665 | /** |
666 | * damon_nr_running_ctxs() - Return number of currently running contexts. |
667 | */ |
668 | int damon_nr_running_ctxs(void) |
669 | { |
670 | int nr_ctxs; |
671 | |
672 | mutex_lock(&damon_lock); |
673 | nr_ctxs = nr_running_ctxs; |
674 | mutex_unlock(lock: &damon_lock); |
675 | |
676 | return nr_ctxs; |
677 | } |
678 | |
679 | /* Returns the size upper limit for each monitoring region */ |
680 | static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) |
681 | { |
682 | struct damon_target *t; |
683 | struct damon_region *r; |
684 | unsigned long sz = 0; |
685 | |
686 | damon_for_each_target(t, ctx) { |
687 | damon_for_each_region(r, t) |
688 | sz += damon_sz_region(r); |
689 | } |
690 | |
691 | if (ctx->attrs.min_nr_regions) |
692 | sz /= ctx->attrs.min_nr_regions; |
693 | if (sz < DAMON_MIN_REGION) |
694 | sz = DAMON_MIN_REGION; |
695 | |
696 | return sz; |
697 | } |
698 | |
699 | static int kdamond_fn(void *data); |
700 | |
701 | /* |
702 | * __damon_start() - Starts monitoring with given context. |
703 | * @ctx: monitoring context |
704 | * |
705 | * This function should be called while damon_lock is hold. |
706 | * |
707 | * Return: 0 on success, negative error code otherwise. |
708 | */ |
709 | static int __damon_start(struct damon_ctx *ctx) |
710 | { |
711 | int err = -EBUSY; |
712 | |
713 | mutex_lock(&ctx->kdamond_lock); |
714 | if (!ctx->kdamond) { |
715 | err = 0; |
716 | reinit_completion(x: &ctx->kdamond_started); |
717 | ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d" , |
718 | nr_running_ctxs); |
719 | if (IS_ERR(ptr: ctx->kdamond)) { |
720 | err = PTR_ERR(ptr: ctx->kdamond); |
721 | ctx->kdamond = NULL; |
722 | } else { |
723 | wait_for_completion(&ctx->kdamond_started); |
724 | } |
725 | } |
726 | mutex_unlock(lock: &ctx->kdamond_lock); |
727 | |
728 | return err; |
729 | } |
730 | |
731 | /** |
732 | * damon_start() - Starts the monitorings for a given group of contexts. |
733 | * @ctxs: an array of the pointers for contexts to start monitoring |
734 | * @nr_ctxs: size of @ctxs |
735 | * @exclusive: exclusiveness of this contexts group |
736 | * |
737 | * This function starts a group of monitoring threads for a group of monitoring |
738 | * contexts. One thread per each context is created and run in parallel. The |
739 | * caller should handle synchronization between the threads by itself. If |
740 | * @exclusive is true and a group of threads that created by other |
741 | * 'damon_start()' call is currently running, this function does nothing but |
742 | * returns -EBUSY. |
743 | * |
744 | * Return: 0 on success, negative error code otherwise. |
745 | */ |
746 | int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive) |
747 | { |
748 | int i; |
749 | int err = 0; |
750 | |
751 | mutex_lock(&damon_lock); |
752 | if ((exclusive && nr_running_ctxs) || |
753 | (!exclusive && running_exclusive_ctxs)) { |
754 | mutex_unlock(lock: &damon_lock); |
755 | return -EBUSY; |
756 | } |
757 | |
758 | for (i = 0; i < nr_ctxs; i++) { |
759 | err = __damon_start(ctx: ctxs[i]); |
760 | if (err) |
761 | break; |
762 | nr_running_ctxs++; |
763 | } |
764 | if (exclusive && nr_running_ctxs) |
765 | running_exclusive_ctxs = true; |
766 | mutex_unlock(lock: &damon_lock); |
767 | |
768 | return err; |
769 | } |
770 | |
771 | /* |
772 | * __damon_stop() - Stops monitoring of a given context. |
773 | * @ctx: monitoring context |
774 | * |
775 | * Return: 0 on success, negative error code otherwise. |
776 | */ |
777 | static int __damon_stop(struct damon_ctx *ctx) |
778 | { |
779 | struct task_struct *tsk; |
780 | |
781 | mutex_lock(&ctx->kdamond_lock); |
782 | tsk = ctx->kdamond; |
783 | if (tsk) { |
784 | get_task_struct(t: tsk); |
785 | mutex_unlock(lock: &ctx->kdamond_lock); |
786 | kthread_stop_put(k: tsk); |
787 | return 0; |
788 | } |
789 | mutex_unlock(lock: &ctx->kdamond_lock); |
790 | |
791 | return -EPERM; |
792 | } |
793 | |
794 | /** |
795 | * damon_stop() - Stops the monitorings for a given group of contexts. |
796 | * @ctxs: an array of the pointers for contexts to stop monitoring |
797 | * @nr_ctxs: size of @ctxs |
798 | * |
799 | * Return: 0 on success, negative error code otherwise. |
800 | */ |
801 | int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) |
802 | { |
803 | int i, err = 0; |
804 | |
805 | for (i = 0; i < nr_ctxs; i++) { |
806 | /* nr_running_ctxs is decremented in kdamond_fn */ |
807 | err = __damon_stop(ctx: ctxs[i]); |
808 | if (err) |
809 | break; |
810 | } |
811 | return err; |
812 | } |
813 | |
814 | /* |
815 | * Reset the aggregated monitoring results ('nr_accesses' of each region). |
816 | */ |
817 | static void kdamond_reset_aggregated(struct damon_ctx *c) |
818 | { |
819 | struct damon_target *t; |
820 | unsigned int ti = 0; /* target's index */ |
821 | |
822 | damon_for_each_target(t, c) { |
823 | struct damon_region *r; |
824 | |
825 | damon_for_each_region(r, t) { |
826 | trace_damon_aggregated(target_id: ti, r, nr_regions: damon_nr_regions(t)); |
827 | r->last_nr_accesses = r->nr_accesses; |
828 | r->nr_accesses = 0; |
829 | } |
830 | ti++; |
831 | } |
832 | } |
833 | |
834 | static void damon_split_region_at(struct damon_target *t, |
835 | struct damon_region *r, unsigned long sz_r); |
836 | |
837 | static bool __damos_valid_target(struct damon_region *r, struct damos *s) |
838 | { |
839 | unsigned long sz; |
840 | unsigned int nr_accesses = r->nr_accesses_bp / 10000; |
841 | |
842 | sz = damon_sz_region(r); |
843 | return s->pattern.min_sz_region <= sz && |
844 | sz <= s->pattern.max_sz_region && |
845 | s->pattern.min_nr_accesses <= nr_accesses && |
846 | nr_accesses <= s->pattern.max_nr_accesses && |
847 | s->pattern.min_age_region <= r->age && |
848 | r->age <= s->pattern.max_age_region; |
849 | } |
850 | |
851 | static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, |
852 | struct damon_region *r, struct damos *s) |
853 | { |
854 | bool ret = __damos_valid_target(r, s); |
855 | |
856 | if (!ret || !s->quota.esz || !c->ops.get_scheme_score) |
857 | return ret; |
858 | |
859 | return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score; |
860 | } |
861 | |
862 | /* |
863 | * damos_skip_charged_region() - Check if the given region or starting part of |
864 | * it is already charged for the DAMOS quota. |
865 | * @t: The target of the region. |
866 | * @rp: The pointer to the region. |
867 | * @s: The scheme to be applied. |
868 | * |
869 | * If a quota of a scheme has exceeded in a quota charge window, the scheme's |
870 | * action would applied to only a part of the target access pattern fulfilling |
871 | * regions. To avoid applying the scheme action to only already applied |
872 | * regions, DAMON skips applying the scheme action to the regions that charged |
873 | * in the previous charge window. |
874 | * |
875 | * This function checks if a given region should be skipped or not for the |
876 | * reason. If only the starting part of the region has previously charged, |
877 | * this function splits the region into two so that the second one covers the |
878 | * area that not charged in the previous charge widnow and saves the second |
879 | * region in *rp and returns false, so that the caller can apply DAMON action |
880 | * to the second one. |
881 | * |
882 | * Return: true if the region should be entirely skipped, false otherwise. |
883 | */ |
884 | static bool damos_skip_charged_region(struct damon_target *t, |
885 | struct damon_region **rp, struct damos *s) |
886 | { |
887 | struct damon_region *r = *rp; |
888 | struct damos_quota *quota = &s->quota; |
889 | unsigned long sz_to_skip; |
890 | |
891 | /* Skip previously charged regions */ |
892 | if (quota->charge_target_from) { |
893 | if (t != quota->charge_target_from) |
894 | return true; |
895 | if (r == damon_last_region(t)) { |
896 | quota->charge_target_from = NULL; |
897 | quota->charge_addr_from = 0; |
898 | return true; |
899 | } |
900 | if (quota->charge_addr_from && |
901 | r->ar.end <= quota->charge_addr_from) |
902 | return true; |
903 | |
904 | if (quota->charge_addr_from && r->ar.start < |
905 | quota->charge_addr_from) { |
906 | sz_to_skip = ALIGN_DOWN(quota->charge_addr_from - |
907 | r->ar.start, DAMON_MIN_REGION); |
908 | if (!sz_to_skip) { |
909 | if (damon_sz_region(r) <= DAMON_MIN_REGION) |
910 | return true; |
911 | sz_to_skip = DAMON_MIN_REGION; |
912 | } |
913 | damon_split_region_at(t, r, sz_r: sz_to_skip); |
914 | r = damon_next_region(r); |
915 | *rp = r; |
916 | } |
917 | quota->charge_target_from = NULL; |
918 | quota->charge_addr_from = 0; |
919 | } |
920 | return false; |
921 | } |
922 | |
923 | static void damos_update_stat(struct damos *s, |
924 | unsigned long sz_tried, unsigned long sz_applied) |
925 | { |
926 | s->stat.nr_tried++; |
927 | s->stat.sz_tried += sz_tried; |
928 | if (sz_applied) |
929 | s->stat.nr_applied++; |
930 | s->stat.sz_applied += sz_applied; |
931 | } |
932 | |
933 | static bool __damos_filter_out(struct damon_ctx *ctx, struct damon_target *t, |
934 | struct damon_region *r, struct damos_filter *filter) |
935 | { |
936 | bool matched = false; |
937 | struct damon_target *ti; |
938 | int target_idx = 0; |
939 | unsigned long start, end; |
940 | |
941 | switch (filter->type) { |
942 | case DAMOS_FILTER_TYPE_TARGET: |
943 | damon_for_each_target(ti, ctx) { |
944 | if (ti == t) |
945 | break; |
946 | target_idx++; |
947 | } |
948 | matched = target_idx == filter->target_idx; |
949 | break; |
950 | case DAMOS_FILTER_TYPE_ADDR: |
951 | start = ALIGN_DOWN(filter->addr_range.start, DAMON_MIN_REGION); |
952 | end = ALIGN_DOWN(filter->addr_range.end, DAMON_MIN_REGION); |
953 | |
954 | /* inside the range */ |
955 | if (start <= r->ar.start && r->ar.end <= end) { |
956 | matched = true; |
957 | break; |
958 | } |
959 | /* outside of the range */ |
960 | if (r->ar.end <= start || end <= r->ar.start) { |
961 | matched = false; |
962 | break; |
963 | } |
964 | /* start before the range and overlap */ |
965 | if (r->ar.start < start) { |
966 | damon_split_region_at(t, r, sz_r: start - r->ar.start); |
967 | matched = false; |
968 | break; |
969 | } |
970 | /* start inside the range */ |
971 | damon_split_region_at(t, r, sz_r: end - r->ar.start); |
972 | matched = true; |
973 | break; |
974 | default: |
975 | return false; |
976 | } |
977 | |
978 | return matched == filter->matching; |
979 | } |
980 | |
981 | static bool damos_filter_out(struct damon_ctx *ctx, struct damon_target *t, |
982 | struct damon_region *r, struct damos *s) |
983 | { |
984 | struct damos_filter *filter; |
985 | |
986 | damos_for_each_filter(filter, s) { |
987 | if (__damos_filter_out(ctx, t, r, filter)) |
988 | return true; |
989 | } |
990 | return false; |
991 | } |
992 | |
993 | static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, |
994 | struct damon_region *r, struct damos *s) |
995 | { |
996 | struct damos_quota *quota = &s->quota; |
997 | unsigned long sz = damon_sz_region(r); |
998 | struct timespec64 begin, end; |
999 | unsigned long sz_applied = 0; |
1000 | int err = 0; |
1001 | /* |
1002 | * We plan to support multiple context per kdamond, as DAMON sysfs |
1003 | * implies with 'nr_contexts' file. Nevertheless, only single context |
1004 | * per kdamond is supported for now. So, we can simply use '0' context |
1005 | * index here. |
1006 | */ |
1007 | unsigned int cidx = 0; |
1008 | struct damos *siter; /* schemes iterator */ |
1009 | unsigned int sidx = 0; |
1010 | struct damon_target *titer; /* targets iterator */ |
1011 | unsigned int tidx = 0; |
1012 | bool do_trace = false; |
1013 | |
1014 | /* get indices for trace_damos_before_apply() */ |
1015 | if (trace_damos_before_apply_enabled()) { |
1016 | damon_for_each_scheme(siter, c) { |
1017 | if (siter == s) |
1018 | break; |
1019 | sidx++; |
1020 | } |
1021 | damon_for_each_target(titer, c) { |
1022 | if (titer == t) |
1023 | break; |
1024 | tidx++; |
1025 | } |
1026 | do_trace = true; |
1027 | } |
1028 | |
1029 | if (c->ops.apply_scheme) { |
1030 | if (quota->esz && quota->charged_sz + sz > quota->esz) { |
1031 | sz = ALIGN_DOWN(quota->esz - quota->charged_sz, |
1032 | DAMON_MIN_REGION); |
1033 | if (!sz) |
1034 | goto update_stat; |
1035 | damon_split_region_at(t, r, sz_r: sz); |
1036 | } |
1037 | if (damos_filter_out(ctx: c, t, r, s)) |
1038 | return; |
1039 | ktime_get_coarse_ts64(ts: &begin); |
1040 | if (c->callback.before_damos_apply) |
1041 | err = c->callback.before_damos_apply(c, t, r, s); |
1042 | if (!err) { |
1043 | trace_damos_before_apply(context_idx: cidx, scheme_idx: sidx, target_idx: tidx, r, |
1044 | nr_regions: damon_nr_regions(t), do_trace); |
1045 | sz_applied = c->ops.apply_scheme(c, t, r, s); |
1046 | } |
1047 | ktime_get_coarse_ts64(ts: &end); |
1048 | quota->total_charged_ns += timespec64_to_ns(ts: &end) - |
1049 | timespec64_to_ns(ts: &begin); |
1050 | quota->charged_sz += sz; |
1051 | if (quota->esz && quota->charged_sz >= quota->esz) { |
1052 | quota->charge_target_from = t; |
1053 | quota->charge_addr_from = r->ar.end + 1; |
1054 | } |
1055 | } |
1056 | if (s->action != DAMOS_STAT) |
1057 | r->age = 0; |
1058 | |
1059 | update_stat: |
1060 | damos_update_stat(s, sz_tried: sz, sz_applied); |
1061 | } |
1062 | |
1063 | static void damon_do_apply_schemes(struct damon_ctx *c, |
1064 | struct damon_target *t, |
1065 | struct damon_region *r) |
1066 | { |
1067 | struct damos *s; |
1068 | |
1069 | damon_for_each_scheme(s, c) { |
1070 | struct damos_quota *quota = &s->quota; |
1071 | |
1072 | if (c->passed_sample_intervals != s->next_apply_sis) |
1073 | continue; |
1074 | |
1075 | if (!s->wmarks.activated) |
1076 | continue; |
1077 | |
1078 | /* Check the quota */ |
1079 | if (quota->esz && quota->charged_sz >= quota->esz) |
1080 | continue; |
1081 | |
1082 | if (damos_skip_charged_region(t, rp: &r, s)) |
1083 | continue; |
1084 | |
1085 | if (!damos_valid_target(c, t, r, s)) |
1086 | continue; |
1087 | |
1088 | damos_apply_scheme(c, t, r, s); |
1089 | } |
1090 | } |
1091 | |
1092 | /* |
1093 | * damon_feed_loop_next_input() - get next input to achieve a target score. |
1094 | * @last_input The last input. |
1095 | * @score Current score that made with @last_input. |
1096 | * |
1097 | * Calculate next input to achieve the target score, based on the last input |
1098 | * and current score. Assuming the input and the score are positively |
1099 | * proportional, calculate how much compensation should be added to or |
1100 | * subtracted from the last input as a proportion of the last input. Avoid |
1101 | * next input always being zero by setting it non-zero always. In short form |
1102 | * (assuming support of float and signed calculations), the algorithm is as |
1103 | * below. |
1104 | * |
1105 | * next_input = max(last_input * ((goal - current) / goal + 1), 1) |
1106 | * |
1107 | * For simple implementation, we assume the target score is always 10,000. The |
1108 | * caller should adjust @score for this. |
1109 | * |
1110 | * Returns next input that assumed to achieve the target score. |
1111 | */ |
1112 | static unsigned long damon_feed_loop_next_input(unsigned long last_input, |
1113 | unsigned long score) |
1114 | { |
1115 | const unsigned long goal = 10000; |
1116 | unsigned long score_goal_diff = max(goal, score) - min(goal, score); |
1117 | unsigned long score_goal_diff_bp = score_goal_diff * 10000 / goal; |
1118 | unsigned long compensation = last_input * score_goal_diff_bp / 10000; |
1119 | /* Set minimum input as 10000 to avoid compensation be zero */ |
1120 | const unsigned long min_input = 10000; |
1121 | |
1122 | if (goal > score) |
1123 | return last_input + compensation; |
1124 | if (last_input > compensation + min_input) |
1125 | return last_input - compensation; |
1126 | return min_input; |
1127 | } |
1128 | |
1129 | #ifdef CONFIG_PSI |
1130 | |
1131 | static u64 damos_get_some_mem_psi_total(void) |
1132 | { |
1133 | if (static_branch_likely(&psi_disabled)) |
1134 | return 0; |
1135 | return div_u64(dividend: psi_system.total[PSI_AVGS][PSI_MEM * 2], |
1136 | NSEC_PER_USEC); |
1137 | } |
1138 | |
1139 | #else /* CONFIG_PSI */ |
1140 | |
1141 | static inline u64 damos_get_some_mem_psi_total(void) |
1142 | { |
1143 | return 0; |
1144 | }; |
1145 | |
1146 | #endif /* CONFIG_PSI */ |
1147 | |
1148 | static void damos_set_quota_goal_current_value(struct damos_quota_goal *goal) |
1149 | { |
1150 | u64 now_psi_total; |
1151 | |
1152 | switch (goal->metric) { |
1153 | case DAMOS_QUOTA_USER_INPUT: |
1154 | /* User should already set goal->current_value */ |
1155 | break; |
1156 | case DAMOS_QUOTA_SOME_MEM_PSI_US: |
1157 | now_psi_total = damos_get_some_mem_psi_total(); |
1158 | goal->current_value = now_psi_total - goal->last_psi_total; |
1159 | goal->last_psi_total = now_psi_total; |
1160 | break; |
1161 | default: |
1162 | break; |
1163 | } |
1164 | } |
1165 | |
1166 | /* Return the highest score since it makes schemes least aggressive */ |
1167 | static unsigned long damos_quota_score(struct damos_quota *quota) |
1168 | { |
1169 | struct damos_quota_goal *goal; |
1170 | unsigned long highest_score = 0; |
1171 | |
1172 | damos_for_each_quota_goal(goal, quota) { |
1173 | damos_set_quota_goal_current_value(goal); |
1174 | highest_score = max(highest_score, |
1175 | goal->current_value * 10000 / |
1176 | goal->target_value); |
1177 | } |
1178 | |
1179 | return highest_score; |
1180 | } |
1181 | |
1182 | /* |
1183 | * Called only if quota->ms, or quota->sz are set, or quota->goals is not empty |
1184 | */ |
1185 | static void damos_set_effective_quota(struct damos_quota *quota) |
1186 | { |
1187 | unsigned long throughput; |
1188 | unsigned long esz; |
1189 | |
1190 | if (!quota->ms && list_empty(head: "a->goals)) { |
1191 | quota->esz = quota->sz; |
1192 | return; |
1193 | } |
1194 | |
1195 | if (!list_empty(head: "a->goals)) { |
1196 | unsigned long score = damos_quota_score(quota); |
1197 | |
1198 | quota->esz_bp = damon_feed_loop_next_input( |
1199 | max(quota->esz_bp, 10000UL), |
1200 | score); |
1201 | esz = quota->esz_bp / 10000; |
1202 | } |
1203 | |
1204 | if (quota->ms) { |
1205 | if (quota->total_charged_ns) |
1206 | throughput = quota->total_charged_sz * 1000000 / |
1207 | quota->total_charged_ns; |
1208 | else |
1209 | throughput = PAGE_SIZE * 1024; |
1210 | if (!list_empty(head: "a->goals)) |
1211 | esz = min(throughput * quota->ms, esz); |
1212 | else |
1213 | esz = throughput * quota->ms; |
1214 | } |
1215 | |
1216 | if (quota->sz && quota->sz < esz) |
1217 | esz = quota->sz; |
1218 | |
1219 | quota->esz = esz; |
1220 | } |
1221 | |
1222 | static void damos_adjust_quota(struct damon_ctx *c, struct damos *s) |
1223 | { |
1224 | struct damos_quota *quota = &s->quota; |
1225 | struct damon_target *t; |
1226 | struct damon_region *r; |
1227 | unsigned long cumulated_sz; |
1228 | unsigned int score, max_score = 0; |
1229 | |
1230 | if (!quota->ms && !quota->sz && list_empty(head: "a->goals)) |
1231 | return; |
1232 | |
1233 | /* New charge window starts */ |
1234 | if (time_after_eq(jiffies, quota->charged_from + |
1235 | msecs_to_jiffies(quota->reset_interval))) { |
1236 | if (quota->esz && quota->charged_sz >= quota->esz) |
1237 | s->stat.qt_exceeds++; |
1238 | quota->total_charged_sz += quota->charged_sz; |
1239 | quota->charged_from = jiffies; |
1240 | quota->charged_sz = 0; |
1241 | damos_set_effective_quota(quota); |
1242 | } |
1243 | |
1244 | if (!c->ops.get_scheme_score) |
1245 | return; |
1246 | |
1247 | /* Fill up the score histogram */ |
1248 | memset(quota->histogram, 0, sizeof(quota->histogram)); |
1249 | damon_for_each_target(t, c) { |
1250 | damon_for_each_region(r, t) { |
1251 | if (!__damos_valid_target(r, s)) |
1252 | continue; |
1253 | score = c->ops.get_scheme_score(c, t, r, s); |
1254 | quota->histogram[score] += damon_sz_region(r); |
1255 | if (score > max_score) |
1256 | max_score = score; |
1257 | } |
1258 | } |
1259 | |
1260 | /* Set the min score limit */ |
1261 | for (cumulated_sz = 0, score = max_score; ; score--) { |
1262 | cumulated_sz += quota->histogram[score]; |
1263 | if (cumulated_sz >= quota->esz || !score) |
1264 | break; |
1265 | } |
1266 | quota->min_score = score; |
1267 | } |
1268 | |
1269 | static void kdamond_apply_schemes(struct damon_ctx *c) |
1270 | { |
1271 | struct damon_target *t; |
1272 | struct damon_region *r, *next_r; |
1273 | struct damos *s; |
1274 | unsigned long sample_interval = c->attrs.sample_interval ? |
1275 | c->attrs.sample_interval : 1; |
1276 | bool has_schemes_to_apply = false; |
1277 | |
1278 | damon_for_each_scheme(s, c) { |
1279 | if (c->passed_sample_intervals != s->next_apply_sis) |
1280 | continue; |
1281 | |
1282 | if (!s->wmarks.activated) |
1283 | continue; |
1284 | |
1285 | has_schemes_to_apply = true; |
1286 | |
1287 | damos_adjust_quota(c, s); |
1288 | } |
1289 | |
1290 | if (!has_schemes_to_apply) |
1291 | return; |
1292 | |
1293 | damon_for_each_target(t, c) { |
1294 | damon_for_each_region_safe(r, next_r, t) |
1295 | damon_do_apply_schemes(c, t, r); |
1296 | } |
1297 | |
1298 | damon_for_each_scheme(s, c) { |
1299 | if (c->passed_sample_intervals != s->next_apply_sis) |
1300 | continue; |
1301 | s->next_apply_sis += |
1302 | (s->apply_interval_us ? s->apply_interval_us : |
1303 | c->attrs.aggr_interval) / sample_interval; |
1304 | } |
1305 | } |
1306 | |
1307 | /* |
1308 | * Merge two adjacent regions into one region |
1309 | */ |
1310 | static void damon_merge_two_regions(struct damon_target *t, |
1311 | struct damon_region *l, struct damon_region *r) |
1312 | { |
1313 | unsigned long sz_l = damon_sz_region(r: l), sz_r = damon_sz_region(r); |
1314 | |
1315 | l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / |
1316 | (sz_l + sz_r); |
1317 | l->nr_accesses_bp = l->nr_accesses * 10000; |
1318 | l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); |
1319 | l->ar.end = r->ar.end; |
1320 | damon_destroy_region(r, t); |
1321 | } |
1322 | |
1323 | /* |
1324 | * Merge adjacent regions having similar access frequencies |
1325 | * |
1326 | * t target affected by this merge operation |
1327 | * thres '->nr_accesses' diff threshold for the merge |
1328 | * sz_limit size upper limit of each region |
1329 | */ |
1330 | static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, |
1331 | unsigned long sz_limit) |
1332 | { |
1333 | struct damon_region *r, *prev = NULL, *next; |
1334 | |
1335 | damon_for_each_region_safe(r, next, t) { |
1336 | if (abs(r->nr_accesses - r->last_nr_accesses) > thres) |
1337 | r->age = 0; |
1338 | else |
1339 | r->age++; |
1340 | |
1341 | if (prev && prev->ar.end == r->ar.start && |
1342 | abs(prev->nr_accesses - r->nr_accesses) <= thres && |
1343 | damon_sz_region(r: prev) + damon_sz_region(r) <= sz_limit) |
1344 | damon_merge_two_regions(t, l: prev, r); |
1345 | else |
1346 | prev = r; |
1347 | } |
1348 | } |
1349 | |
1350 | /* |
1351 | * Merge adjacent regions having similar access frequencies |
1352 | * |
1353 | * threshold '->nr_accesses' diff threshold for the merge |
1354 | * sz_limit size upper limit of each region |
1355 | * |
1356 | * This function merges monitoring target regions which are adjacent and their |
1357 | * access frequencies are similar. This is for minimizing the monitoring |
1358 | * overhead under the dynamically changeable access pattern. If a merge was |
1359 | * unnecessarily made, later 'kdamond_split_regions()' will revert it. |
1360 | */ |
1361 | static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, |
1362 | unsigned long sz_limit) |
1363 | { |
1364 | struct damon_target *t; |
1365 | |
1366 | damon_for_each_target(t, c) |
1367 | damon_merge_regions_of(t, thres: threshold, sz_limit); |
1368 | } |
1369 | |
1370 | /* |
1371 | * Split a region in two |
1372 | * |
1373 | * r the region to be split |
1374 | * sz_r size of the first sub-region that will be made |
1375 | */ |
1376 | static void damon_split_region_at(struct damon_target *t, |
1377 | struct damon_region *r, unsigned long sz_r) |
1378 | { |
1379 | struct damon_region *new; |
1380 | |
1381 | new = damon_new_region(start: r->ar.start + sz_r, end: r->ar.end); |
1382 | if (!new) |
1383 | return; |
1384 | |
1385 | r->ar.end = new->ar.start; |
1386 | |
1387 | new->age = r->age; |
1388 | new->last_nr_accesses = r->last_nr_accesses; |
1389 | new->nr_accesses_bp = r->nr_accesses_bp; |
1390 | new->nr_accesses = r->nr_accesses; |
1391 | |
1392 | damon_insert_region(r: new, prev: r, next: damon_next_region(r), t); |
1393 | } |
1394 | |
1395 | /* Split every region in the given target into 'nr_subs' regions */ |
1396 | static void damon_split_regions_of(struct damon_target *t, int nr_subs) |
1397 | { |
1398 | struct damon_region *r, *next; |
1399 | unsigned long sz_region, sz_sub = 0; |
1400 | int i; |
1401 | |
1402 | damon_for_each_region_safe(r, next, t) { |
1403 | sz_region = damon_sz_region(r); |
1404 | |
1405 | for (i = 0; i < nr_subs - 1 && |
1406 | sz_region > 2 * DAMON_MIN_REGION; i++) { |
1407 | /* |
1408 | * Randomly select size of left sub-region to be at |
1409 | * least 10 percent and at most 90% of original region |
1410 | */ |
1411 | sz_sub = ALIGN_DOWN(damon_rand(1, 10) * |
1412 | sz_region / 10, DAMON_MIN_REGION); |
1413 | /* Do not allow blank region */ |
1414 | if (sz_sub == 0 || sz_sub >= sz_region) |
1415 | continue; |
1416 | |
1417 | damon_split_region_at(t, r, sz_r: sz_sub); |
1418 | sz_region = sz_sub; |
1419 | } |
1420 | } |
1421 | } |
1422 | |
1423 | /* |
1424 | * Split every target region into randomly-sized small regions |
1425 | * |
1426 | * This function splits every target region into random-sized small regions if |
1427 | * current total number of the regions is equal or smaller than half of the |
1428 | * user-specified maximum number of regions. This is for maximizing the |
1429 | * monitoring accuracy under the dynamically changeable access patterns. If a |
1430 | * split was unnecessarily made, later 'kdamond_merge_regions()' will revert |
1431 | * it. |
1432 | */ |
1433 | static void kdamond_split_regions(struct damon_ctx *ctx) |
1434 | { |
1435 | struct damon_target *t; |
1436 | unsigned int nr_regions = 0; |
1437 | static unsigned int last_nr_regions; |
1438 | int nr_subregions = 2; |
1439 | |
1440 | damon_for_each_target(t, ctx) |
1441 | nr_regions += damon_nr_regions(t); |
1442 | |
1443 | if (nr_regions > ctx->attrs.max_nr_regions / 2) |
1444 | return; |
1445 | |
1446 | /* Maybe the middle of the region has different access frequency */ |
1447 | if (last_nr_regions == nr_regions && |
1448 | nr_regions < ctx->attrs.max_nr_regions / 3) |
1449 | nr_subregions = 3; |
1450 | |
1451 | damon_for_each_target(t, ctx) |
1452 | damon_split_regions_of(t, nr_subs: nr_subregions); |
1453 | |
1454 | last_nr_regions = nr_regions; |
1455 | } |
1456 | |
1457 | /* |
1458 | * Check whether current monitoring should be stopped |
1459 | * |
1460 | * The monitoring is stopped when either the user requested to stop, or all |
1461 | * monitoring targets are invalid. |
1462 | * |
1463 | * Returns true if need to stop current monitoring. |
1464 | */ |
1465 | static bool kdamond_need_stop(struct damon_ctx *ctx) |
1466 | { |
1467 | struct damon_target *t; |
1468 | |
1469 | if (kthread_should_stop()) |
1470 | return true; |
1471 | |
1472 | if (!ctx->ops.target_valid) |
1473 | return false; |
1474 | |
1475 | damon_for_each_target(t, ctx) { |
1476 | if (ctx->ops.target_valid(t)) |
1477 | return false; |
1478 | } |
1479 | |
1480 | return true; |
1481 | } |
1482 | |
1483 | static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric) |
1484 | { |
1485 | switch (metric) { |
1486 | case DAMOS_WMARK_FREE_MEM_RATE: |
1487 | return global_zone_page_state(item: NR_FREE_PAGES) * 1000 / |
1488 | totalram_pages(); |
1489 | default: |
1490 | break; |
1491 | } |
1492 | return -EINVAL; |
1493 | } |
1494 | |
1495 | /* |
1496 | * Returns zero if the scheme is active. Else, returns time to wait for next |
1497 | * watermark check in micro-seconds. |
1498 | */ |
1499 | static unsigned long damos_wmark_wait_us(struct damos *scheme) |
1500 | { |
1501 | unsigned long metric; |
1502 | |
1503 | if (scheme->wmarks.metric == DAMOS_WMARK_NONE) |
1504 | return 0; |
1505 | |
1506 | metric = damos_wmark_metric_value(metric: scheme->wmarks.metric); |
1507 | /* higher than high watermark or lower than low watermark */ |
1508 | if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) { |
1509 | if (scheme->wmarks.activated) |
1510 | pr_debug("deactivate a scheme (%d) for %s wmark\n" , |
1511 | scheme->action, |
1512 | metric > scheme->wmarks.high ? |
1513 | "high" : "low" ); |
1514 | scheme->wmarks.activated = false; |
1515 | return scheme->wmarks.interval; |
1516 | } |
1517 | |
1518 | /* inactive and higher than middle watermark */ |
1519 | if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) && |
1520 | !scheme->wmarks.activated) |
1521 | return scheme->wmarks.interval; |
1522 | |
1523 | if (!scheme->wmarks.activated) |
1524 | pr_debug("activate a scheme (%d)\n" , scheme->action); |
1525 | scheme->wmarks.activated = true; |
1526 | return 0; |
1527 | } |
1528 | |
1529 | static void kdamond_usleep(unsigned long usecs) |
1530 | { |
1531 | /* See Documentation/timers/timers-howto.rst for the thresholds */ |
1532 | if (usecs > 20 * USEC_PER_MSEC) |
1533 | schedule_timeout_idle(timeout: usecs_to_jiffies(u: usecs)); |
1534 | else |
1535 | usleep_idle_range(min: usecs, max: usecs + 1); |
1536 | } |
1537 | |
1538 | /* Returns negative error code if it's not activated but should return */ |
1539 | static int kdamond_wait_activation(struct damon_ctx *ctx) |
1540 | { |
1541 | struct damos *s; |
1542 | unsigned long wait_time; |
1543 | unsigned long min_wait_time = 0; |
1544 | bool init_wait_time = false; |
1545 | |
1546 | while (!kdamond_need_stop(ctx)) { |
1547 | damon_for_each_scheme(s, ctx) { |
1548 | wait_time = damos_wmark_wait_us(scheme: s); |
1549 | if (!init_wait_time || wait_time < min_wait_time) { |
1550 | init_wait_time = true; |
1551 | min_wait_time = wait_time; |
1552 | } |
1553 | } |
1554 | if (!min_wait_time) |
1555 | return 0; |
1556 | |
1557 | kdamond_usleep(usecs: min_wait_time); |
1558 | |
1559 | if (ctx->callback.after_wmarks_check && |
1560 | ctx->callback.after_wmarks_check(ctx)) |
1561 | break; |
1562 | } |
1563 | return -EBUSY; |
1564 | } |
1565 | |
1566 | static void kdamond_init_intervals_sis(struct damon_ctx *ctx) |
1567 | { |
1568 | unsigned long sample_interval = ctx->attrs.sample_interval ? |
1569 | ctx->attrs.sample_interval : 1; |
1570 | unsigned long apply_interval; |
1571 | struct damos *scheme; |
1572 | |
1573 | ctx->passed_sample_intervals = 0; |
1574 | ctx->next_aggregation_sis = ctx->attrs.aggr_interval / sample_interval; |
1575 | ctx->next_ops_update_sis = ctx->attrs.ops_update_interval / |
1576 | sample_interval; |
1577 | |
1578 | damon_for_each_scheme(scheme, ctx) { |
1579 | apply_interval = scheme->apply_interval_us ? |
1580 | scheme->apply_interval_us : ctx->attrs.aggr_interval; |
1581 | scheme->next_apply_sis = apply_interval / sample_interval; |
1582 | } |
1583 | } |
1584 | |
1585 | /* |
1586 | * The monitoring daemon that runs as a kernel thread |
1587 | */ |
1588 | static int kdamond_fn(void *data) |
1589 | { |
1590 | struct damon_ctx *ctx = data; |
1591 | struct damon_target *t; |
1592 | struct damon_region *r, *next; |
1593 | unsigned int max_nr_accesses = 0; |
1594 | unsigned long sz_limit = 0; |
1595 | |
1596 | pr_debug("kdamond (%d) starts\n" , current->pid); |
1597 | |
1598 | complete(&ctx->kdamond_started); |
1599 | kdamond_init_intervals_sis(ctx); |
1600 | |
1601 | if (ctx->ops.init) |
1602 | ctx->ops.init(ctx); |
1603 | if (ctx->callback.before_start && ctx->callback.before_start(ctx)) |
1604 | goto done; |
1605 | |
1606 | sz_limit = damon_region_sz_limit(ctx); |
1607 | |
1608 | while (!kdamond_need_stop(ctx)) { |
1609 | /* |
1610 | * ctx->attrs and ctx->next_{aggregation,ops_update}_sis could |
1611 | * be changed from after_wmarks_check() or after_aggregation() |
1612 | * callbacks. Read the values here, and use those for this |
1613 | * iteration. That is, damon_set_attrs() updated new values |
1614 | * are respected from next iteration. |
1615 | */ |
1616 | unsigned long next_aggregation_sis = ctx->next_aggregation_sis; |
1617 | unsigned long next_ops_update_sis = ctx->next_ops_update_sis; |
1618 | unsigned long sample_interval = ctx->attrs.sample_interval; |
1619 | |
1620 | if (kdamond_wait_activation(ctx)) |
1621 | break; |
1622 | |
1623 | if (ctx->ops.prepare_access_checks) |
1624 | ctx->ops.prepare_access_checks(ctx); |
1625 | if (ctx->callback.after_sampling && |
1626 | ctx->callback.after_sampling(ctx)) |
1627 | break; |
1628 | |
1629 | kdamond_usleep(usecs: sample_interval); |
1630 | ctx->passed_sample_intervals++; |
1631 | |
1632 | if (ctx->ops.check_accesses) |
1633 | max_nr_accesses = ctx->ops.check_accesses(ctx); |
1634 | |
1635 | if (ctx->passed_sample_intervals == next_aggregation_sis) { |
1636 | kdamond_merge_regions(c: ctx, |
1637 | threshold: max_nr_accesses / 10, |
1638 | sz_limit); |
1639 | if (ctx->callback.after_aggregation && |
1640 | ctx->callback.after_aggregation(ctx)) |
1641 | break; |
1642 | } |
1643 | |
1644 | /* |
1645 | * do kdamond_apply_schemes() after kdamond_merge_regions() if |
1646 | * possible, to reduce overhead |
1647 | */ |
1648 | if (!list_empty(head: &ctx->schemes)) |
1649 | kdamond_apply_schemes(c: ctx); |
1650 | |
1651 | sample_interval = ctx->attrs.sample_interval ? |
1652 | ctx->attrs.sample_interval : 1; |
1653 | if (ctx->passed_sample_intervals == next_aggregation_sis) { |
1654 | ctx->next_aggregation_sis = next_aggregation_sis + |
1655 | ctx->attrs.aggr_interval / sample_interval; |
1656 | |
1657 | kdamond_reset_aggregated(c: ctx); |
1658 | kdamond_split_regions(ctx); |
1659 | if (ctx->ops.reset_aggregated) |
1660 | ctx->ops.reset_aggregated(ctx); |
1661 | } |
1662 | |
1663 | if (ctx->passed_sample_intervals == next_ops_update_sis) { |
1664 | ctx->next_ops_update_sis = next_ops_update_sis + |
1665 | ctx->attrs.ops_update_interval / |
1666 | sample_interval; |
1667 | if (ctx->ops.update) |
1668 | ctx->ops.update(ctx); |
1669 | sz_limit = damon_region_sz_limit(ctx); |
1670 | } |
1671 | } |
1672 | done: |
1673 | damon_for_each_target(t, ctx) { |
1674 | damon_for_each_region_safe(r, next, t) |
1675 | damon_destroy_region(r, t); |
1676 | } |
1677 | |
1678 | if (ctx->callback.before_terminate) |
1679 | ctx->callback.before_terminate(ctx); |
1680 | if (ctx->ops.cleanup) |
1681 | ctx->ops.cleanup(ctx); |
1682 | |
1683 | pr_debug("kdamond (%d) finishes\n" , current->pid); |
1684 | mutex_lock(&ctx->kdamond_lock); |
1685 | ctx->kdamond = NULL; |
1686 | mutex_unlock(lock: &ctx->kdamond_lock); |
1687 | |
1688 | mutex_lock(&damon_lock); |
1689 | nr_running_ctxs--; |
1690 | if (!nr_running_ctxs && running_exclusive_ctxs) |
1691 | running_exclusive_ctxs = false; |
1692 | mutex_unlock(lock: &damon_lock); |
1693 | |
1694 | return 0; |
1695 | } |
1696 | |
1697 | /* |
1698 | * struct damon_system_ram_region - System RAM resource address region of |
1699 | * [@start, @end). |
1700 | * @start: Start address of the region (inclusive). |
1701 | * @end: End address of the region (exclusive). |
1702 | */ |
1703 | struct damon_system_ram_region { |
1704 | unsigned long start; |
1705 | unsigned long end; |
1706 | }; |
1707 | |
1708 | static int walk_system_ram(struct resource *res, void *arg) |
1709 | { |
1710 | struct damon_system_ram_region *a = arg; |
1711 | |
1712 | if (a->end - a->start < resource_size(res)) { |
1713 | a->start = res->start; |
1714 | a->end = res->end; |
1715 | } |
1716 | return 0; |
1717 | } |
1718 | |
1719 | /* |
1720 | * Find biggest 'System RAM' resource and store its start and end address in |
1721 | * @start and @end, respectively. If no System RAM is found, returns false. |
1722 | */ |
1723 | static bool damon_find_biggest_system_ram(unsigned long *start, |
1724 | unsigned long *end) |
1725 | |
1726 | { |
1727 | struct damon_system_ram_region arg = {}; |
1728 | |
1729 | walk_system_ram_res(start: 0, ULONG_MAX, arg: &arg, func: walk_system_ram); |
1730 | if (arg.end <= arg.start) |
1731 | return false; |
1732 | |
1733 | *start = arg.start; |
1734 | *end = arg.end; |
1735 | return true; |
1736 | } |
1737 | |
1738 | /** |
1739 | * damon_set_region_biggest_system_ram_default() - Set the region of the given |
1740 | * monitoring target as requested, or biggest 'System RAM'. |
1741 | * @t: The monitoring target to set the region. |
1742 | * @start: The pointer to the start address of the region. |
1743 | * @end: The pointer to the end address of the region. |
1744 | * |
1745 | * This function sets the region of @t as requested by @start and @end. If the |
1746 | * values of @start and @end are zero, however, this function finds the biggest |
1747 | * 'System RAM' resource and sets the region to cover the resource. In the |
1748 | * latter case, this function saves the start and end addresses of the resource |
1749 | * in @start and @end, respectively. |
1750 | * |
1751 | * Return: 0 on success, negative error code otherwise. |
1752 | */ |
1753 | int damon_set_region_biggest_system_ram_default(struct damon_target *t, |
1754 | unsigned long *start, unsigned long *end) |
1755 | { |
1756 | struct damon_addr_range addr_range; |
1757 | |
1758 | if (*start > *end) |
1759 | return -EINVAL; |
1760 | |
1761 | if (!*start && !*end && |
1762 | !damon_find_biggest_system_ram(start, end)) |
1763 | return -EINVAL; |
1764 | |
1765 | addr_range.start = *start; |
1766 | addr_range.end = *end; |
1767 | return damon_set_regions(t, ranges: &addr_range, nr_ranges: 1); |
1768 | } |
1769 | |
1770 | /* |
1771 | * damon_moving_sum() - Calculate an inferred moving sum value. |
1772 | * @mvsum: Inferred sum of the last @len_window values. |
1773 | * @nomvsum: Non-moving sum of the last discrete @len_window window values. |
1774 | * @len_window: The number of last values to take care of. |
1775 | * @new_value: New value that will be added to the pseudo moving sum. |
1776 | * |
1777 | * Moving sum (moving average * window size) is good for handling noise, but |
1778 | * the cost of keeping past values can be high for arbitrary window size. This |
1779 | * function implements a lightweight pseudo moving sum function that doesn't |
1780 | * keep the past window values. |
1781 | * |
1782 | * It simply assumes there was no noise in the past, and get the no-noise |
1783 | * assumed past value to drop from @nomvsum and @len_window. @nomvsum is a |
1784 | * non-moving sum of the last window. For example, if @len_window is 10 and we |
1785 | * have 25 values, @nomvsum is the sum of the 11th to 20th values of the 25 |
1786 | * values. Hence, this function simply drops @nomvsum / @len_window from |
1787 | * given @mvsum and add @new_value. |
1788 | * |
1789 | * For example, if @len_window is 10 and @nomvsum is 50, the last 10 values for |
1790 | * the last window could be vary, e.g., 0, 10, 0, 10, 0, 10, 0, 0, 0, 20. For |
1791 | * calculating next moving sum with a new value, we should drop 0 from 50 and |
1792 | * add the new value. However, this function assumes it got value 5 for each |
1793 | * of the last ten times. Based on the assumption, when the next value is |
1794 | * measured, it drops the assumed past value, 5 from the current sum, and add |
1795 | * the new value to get the updated pseduo-moving average. |
1796 | * |
1797 | * This means the value could have errors, but the errors will be disappeared |
1798 | * for every @len_window aligned calls. For example, if @len_window is 10, the |
1799 | * pseudo moving sum with 11th value to 19th value would have an error. But |
1800 | * the sum with 20th value will not have the error. |
1801 | * |
1802 | * Return: Pseudo-moving average after getting the @new_value. |
1803 | */ |
1804 | static unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, |
1805 | unsigned int len_window, unsigned int new_value) |
1806 | { |
1807 | return mvsum - nomvsum / len_window + new_value; |
1808 | } |
1809 | |
1810 | /** |
1811 | * damon_update_region_access_rate() - Update the access rate of a region. |
1812 | * @r: The DAMON region to update for its access check result. |
1813 | * @accessed: Whether the region has accessed during last sampling interval. |
1814 | * @attrs: The damon_attrs of the DAMON context. |
1815 | * |
1816 | * Update the access rate of a region with the region's last sampling interval |
1817 | * access check result. |
1818 | * |
1819 | * Usually this will be called by &damon_operations->check_accesses callback. |
1820 | */ |
1821 | void damon_update_region_access_rate(struct damon_region *r, bool accessed, |
1822 | struct damon_attrs *attrs) |
1823 | { |
1824 | unsigned int len_window = 1; |
1825 | |
1826 | /* |
1827 | * sample_interval can be zero, but cannot be larger than |
1828 | * aggr_interval, owing to validation of damon_set_attrs(). |
1829 | */ |
1830 | if (attrs->sample_interval) |
1831 | len_window = damon_max_nr_accesses(attrs); |
1832 | r->nr_accesses_bp = damon_moving_sum(mvsum: r->nr_accesses_bp, |
1833 | nomvsum: r->last_nr_accesses * 10000, len_window, |
1834 | new_value: accessed ? 10000 : 0); |
1835 | |
1836 | if (accessed) |
1837 | r->nr_accesses++; |
1838 | } |
1839 | |
1840 | static int __init damon_init(void) |
1841 | { |
1842 | damon_region_cache = KMEM_CACHE(damon_region, 0); |
1843 | if (unlikely(!damon_region_cache)) { |
1844 | pr_err("creating damon_region_cache fails\n" ); |
1845 | return -ENOMEM; |
1846 | } |
1847 | |
1848 | return 0; |
1849 | } |
1850 | |
1851 | subsys_initcall(damon_init); |
1852 | |
1853 | #include "core-test.h" |
1854 | |