cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

core.c (30099B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Data Access Monitor
      4 *
      5 * Author: SeongJae Park <sjpark@amazon.de>
      6 */
      7
      8#define pr_fmt(fmt) "damon: " fmt
      9
     10#include <linux/damon.h>
     11#include <linux/delay.h>
     12#include <linux/kthread.h>
     13#include <linux/mm.h>
     14#include <linux/slab.h>
     15#include <linux/string.h>
     16
     17#define CREATE_TRACE_POINTS
     18#include <trace/events/damon.h>
     19
     20#ifdef CONFIG_DAMON_KUNIT_TEST
     21#undef DAMON_MIN_REGION
     22#define DAMON_MIN_REGION 1
     23#endif
     24
     25static DEFINE_MUTEX(damon_lock);
     26static int nr_running_ctxs;
     27static bool running_exclusive_ctxs;
     28
     29static DEFINE_MUTEX(damon_ops_lock);
     30static struct damon_operations damon_registered_ops[NR_DAMON_OPS];
     31
     32/* Should be called under damon_ops_lock with id smaller than NR_DAMON_OPS */
     33static bool __damon_is_registered_ops(enum damon_ops_id id)
     34{
     35	struct damon_operations empty_ops = {};
     36
     37	if (!memcmp(&empty_ops, &damon_registered_ops[id], sizeof(empty_ops)))
     38		return false;
     39	return true;
     40}
     41
     42/**
     43 * damon_is_registered_ops() - Check if a given damon_operations is registered.
     44 * @id:	Id of the damon_operations to check if registered.
     45 *
     46 * Return: true if the ops is set, false otherwise.
     47 */
     48bool damon_is_registered_ops(enum damon_ops_id id)
     49{
     50	bool registered;
     51
     52	if (id >= NR_DAMON_OPS)
     53		return false;
     54	mutex_lock(&damon_ops_lock);
     55	registered = __damon_is_registered_ops(id);
     56	mutex_unlock(&damon_ops_lock);
     57	return registered;
     58}
     59
     60/**
     61 * damon_register_ops() - Register a monitoring operations set to DAMON.
     62 * @ops:	monitoring operations set to register.
     63 *
     64 * This function registers a monitoring operations set of valid &struct
     65 * damon_operations->id so that others can find and use them later.
     66 *
     67 * Return: 0 on success, negative error code otherwise.
     68 */
     69int damon_register_ops(struct damon_operations *ops)
     70{
     71	int err = 0;
     72
     73	if (ops->id >= NR_DAMON_OPS)
     74		return -EINVAL;
     75	mutex_lock(&damon_ops_lock);
     76	/* Fail for already registered ops */
     77	if (__damon_is_registered_ops(ops->id)) {
     78		err = -EINVAL;
     79		goto out;
     80	}
     81	damon_registered_ops[ops->id] = *ops;
     82out:
     83	mutex_unlock(&damon_ops_lock);
     84	return err;
     85}
     86
     87/**
     88 * damon_select_ops() - Select a monitoring operations to use with the context.
     89 * @ctx:	monitoring context to use the operations.
     90 * @id:		id of the registered monitoring operations to select.
     91 *
     92 * This function finds registered monitoring operations set of @id and make
     93 * @ctx to use it.
     94 *
     95 * Return: 0 on success, negative error code otherwise.
     96 */
     97int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id)
     98{
     99	int err = 0;
    100
    101	if (id >= NR_DAMON_OPS)
    102		return -EINVAL;
    103
    104	mutex_lock(&damon_ops_lock);
    105	if (!__damon_is_registered_ops(id))
    106		err = -EINVAL;
    107	else
    108		ctx->ops = damon_registered_ops[id];
    109	mutex_unlock(&damon_ops_lock);
    110	return err;
    111}
    112
    113/*
    114 * Construct a damon_region struct
    115 *
    116 * Returns the pointer to the new struct if success, or NULL otherwise
    117 */
    118struct damon_region *damon_new_region(unsigned long start, unsigned long end)
    119{
    120	struct damon_region *region;
    121
    122	region = kmalloc(sizeof(*region), GFP_KERNEL);
    123	if (!region)
    124		return NULL;
    125
    126	region->ar.start = start;
    127	region->ar.end = end;
    128	region->nr_accesses = 0;
    129	INIT_LIST_HEAD(&region->list);
    130
    131	region->age = 0;
    132	region->last_nr_accesses = 0;
    133
    134	return region;
    135}
    136
    137void damon_add_region(struct damon_region *r, struct damon_target *t)
    138{
    139	list_add_tail(&r->list, &t->regions_list);
    140	t->nr_regions++;
    141}
    142
    143static void damon_del_region(struct damon_region *r, struct damon_target *t)
    144{
    145	list_del(&r->list);
    146	t->nr_regions--;
    147}
    148
    149static void damon_free_region(struct damon_region *r)
    150{
    151	kfree(r);
    152}
    153
    154void damon_destroy_region(struct damon_region *r, struct damon_target *t)
    155{
    156	damon_del_region(r, t);
    157	damon_free_region(r);
    158}
    159
    160/*
    161 * Check whether a region is intersecting an address range
    162 *
    163 * Returns true if it is.
    164 */
    165static bool damon_intersect(struct damon_region *r,
    166		struct damon_addr_range *re)
    167{
    168	return !(r->ar.end <= re->start || re->end <= r->ar.start);
    169}
    170
    171/*
    172 * damon_set_regions() - Set regions of a target for given address ranges.
    173 * @t:		the given target.
    174 * @ranges:	array of new monitoring target ranges.
    175 * @nr_ranges:	length of @ranges.
    176 *
    177 * This function adds new regions to, or modify existing regions of a
    178 * monitoring target to fit in specific ranges.
    179 *
    180 * Return: 0 if success, or negative error code otherwise.
    181 */
    182int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
    183		unsigned int nr_ranges)
    184{
    185	struct damon_region *r, *next;
    186	unsigned int i;
    187
    188	/* Remove regions which are not in the new ranges */
    189	damon_for_each_region_safe(r, next, t) {
    190		for (i = 0; i < nr_ranges; i++) {
    191			if (damon_intersect(r, &ranges[i]))
    192				break;
    193		}
    194		if (i == nr_ranges)
    195			damon_destroy_region(r, t);
    196	}
    197
    198	/* Add new regions or resize existing regions to fit in the ranges */
    199	for (i = 0; i < nr_ranges; i++) {
    200		struct damon_region *first = NULL, *last, *newr;
    201		struct damon_addr_range *range;
    202
    203		range = &ranges[i];
    204		/* Get the first/last regions intersecting with the range */
    205		damon_for_each_region(r, t) {
    206			if (damon_intersect(r, range)) {
    207				if (!first)
    208					first = r;
    209				last = r;
    210			}
    211			if (r->ar.start >= range->end)
    212				break;
    213		}
    214		if (!first) {
    215			/* no region intersects with this range */
    216			newr = damon_new_region(
    217					ALIGN_DOWN(range->start,
    218						DAMON_MIN_REGION),
    219					ALIGN(range->end, DAMON_MIN_REGION));
    220			if (!newr)
    221				return -ENOMEM;
    222			damon_insert_region(newr, damon_prev_region(r), r, t);
    223		} else {
    224			/* resize intersecting regions to fit in this range */
    225			first->ar.start = ALIGN_DOWN(range->start,
    226					DAMON_MIN_REGION);
    227			last->ar.end = ALIGN(range->end, DAMON_MIN_REGION);
    228		}
    229	}
    230	return 0;
    231}
    232
    233struct damos *damon_new_scheme(
    234		unsigned long min_sz_region, unsigned long max_sz_region,
    235		unsigned int min_nr_accesses, unsigned int max_nr_accesses,
    236		unsigned int min_age_region, unsigned int max_age_region,
    237		enum damos_action action, struct damos_quota *quota,
    238		struct damos_watermarks *wmarks)
    239{
    240	struct damos *scheme;
    241
    242	scheme = kmalloc(sizeof(*scheme), GFP_KERNEL);
    243	if (!scheme)
    244		return NULL;
    245	scheme->min_sz_region = min_sz_region;
    246	scheme->max_sz_region = max_sz_region;
    247	scheme->min_nr_accesses = min_nr_accesses;
    248	scheme->max_nr_accesses = max_nr_accesses;
    249	scheme->min_age_region = min_age_region;
    250	scheme->max_age_region = max_age_region;
    251	scheme->action = action;
    252	scheme->stat = (struct damos_stat){};
    253	INIT_LIST_HEAD(&scheme->list);
    254
    255	scheme->quota.ms = quota->ms;
    256	scheme->quota.sz = quota->sz;
    257	scheme->quota.reset_interval = quota->reset_interval;
    258	scheme->quota.weight_sz = quota->weight_sz;
    259	scheme->quota.weight_nr_accesses = quota->weight_nr_accesses;
    260	scheme->quota.weight_age = quota->weight_age;
    261	scheme->quota.total_charged_sz = 0;
    262	scheme->quota.total_charged_ns = 0;
    263	scheme->quota.esz = 0;
    264	scheme->quota.charged_sz = 0;
    265	scheme->quota.charged_from = 0;
    266	scheme->quota.charge_target_from = NULL;
    267	scheme->quota.charge_addr_from = 0;
    268
    269	scheme->wmarks.metric = wmarks->metric;
    270	scheme->wmarks.interval = wmarks->interval;
    271	scheme->wmarks.high = wmarks->high;
    272	scheme->wmarks.mid = wmarks->mid;
    273	scheme->wmarks.low = wmarks->low;
    274	scheme->wmarks.activated = true;
    275
    276	return scheme;
    277}
    278
    279void damon_add_scheme(struct damon_ctx *ctx, struct damos *s)
    280{
    281	list_add_tail(&s->list, &ctx->schemes);
    282}
    283
    284static void damon_del_scheme(struct damos *s)
    285{
    286	list_del(&s->list);
    287}
    288
    289static void damon_free_scheme(struct damos *s)
    290{
    291	kfree(s);
    292}
    293
    294void damon_destroy_scheme(struct damos *s)
    295{
    296	damon_del_scheme(s);
    297	damon_free_scheme(s);
    298}
    299
    300/*
    301 * Construct a damon_target struct
    302 *
    303 * Returns the pointer to the new struct if success, or NULL otherwise
    304 */
    305struct damon_target *damon_new_target(void)
    306{
    307	struct damon_target *t;
    308
    309	t = kmalloc(sizeof(*t), GFP_KERNEL);
    310	if (!t)
    311		return NULL;
    312
    313	t->pid = NULL;
    314	t->nr_regions = 0;
    315	INIT_LIST_HEAD(&t->regions_list);
    316
    317	return t;
    318}
    319
    320void damon_add_target(struct damon_ctx *ctx, struct damon_target *t)
    321{
    322	list_add_tail(&t->list, &ctx->adaptive_targets);
    323}
    324
    325bool damon_targets_empty(struct damon_ctx *ctx)
    326{
    327	return list_empty(&ctx->adaptive_targets);
    328}
    329
    330static void damon_del_target(struct damon_target *t)
    331{
    332	list_del(&t->list);
    333}
    334
    335void damon_free_target(struct damon_target *t)
    336{
    337	struct damon_region *r, *next;
    338
    339	damon_for_each_region_safe(r, next, t)
    340		damon_free_region(r);
    341	kfree(t);
    342}
    343
    344void damon_destroy_target(struct damon_target *t)
    345{
    346	damon_del_target(t);
    347	damon_free_target(t);
    348}
    349
    350unsigned int damon_nr_regions(struct damon_target *t)
    351{
    352	return t->nr_regions;
    353}
    354
    355struct damon_ctx *damon_new_ctx(void)
    356{
    357	struct damon_ctx *ctx;
    358
    359	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
    360	if (!ctx)
    361		return NULL;
    362
    363	ctx->sample_interval = 5 * 1000;
    364	ctx->aggr_interval = 100 * 1000;
    365	ctx->ops_update_interval = 60 * 1000 * 1000;
    366
    367	ktime_get_coarse_ts64(&ctx->last_aggregation);
    368	ctx->last_ops_update = ctx->last_aggregation;
    369
    370	mutex_init(&ctx->kdamond_lock);
    371
    372	ctx->min_nr_regions = 10;
    373	ctx->max_nr_regions = 1000;
    374
    375	INIT_LIST_HEAD(&ctx->adaptive_targets);
    376	INIT_LIST_HEAD(&ctx->schemes);
    377
    378	return ctx;
    379}
    380
    381static void damon_destroy_targets(struct damon_ctx *ctx)
    382{
    383	struct damon_target *t, *next_t;
    384
    385	if (ctx->ops.cleanup) {
    386		ctx->ops.cleanup(ctx);
    387		return;
    388	}
    389
    390	damon_for_each_target_safe(t, next_t, ctx)
    391		damon_destroy_target(t);
    392}
    393
    394void damon_destroy_ctx(struct damon_ctx *ctx)
    395{
    396	struct damos *s, *next_s;
    397
    398	damon_destroy_targets(ctx);
    399
    400	damon_for_each_scheme_safe(s, next_s, ctx)
    401		damon_destroy_scheme(s);
    402
    403	kfree(ctx);
    404}
    405
    406/**
    407 * damon_set_attrs() - Set attributes for the monitoring.
    408 * @ctx:		monitoring context
    409 * @sample_int:		time interval between samplings
    410 * @aggr_int:		time interval between aggregations
    411 * @ops_upd_int:	time interval between monitoring operations updates
    412 * @min_nr_reg:		minimal number of regions
    413 * @max_nr_reg:		maximum number of regions
    414 *
    415 * This function should not be called while the kdamond is running.
    416 * Every time interval is in micro-seconds.
    417 *
    418 * Return: 0 on success, negative error code otherwise.
    419 */
    420int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
    421		    unsigned long aggr_int, unsigned long ops_upd_int,
    422		    unsigned long min_nr_reg, unsigned long max_nr_reg)
    423{
    424	if (min_nr_reg < 3)
    425		return -EINVAL;
    426	if (min_nr_reg > max_nr_reg)
    427		return -EINVAL;
    428
    429	ctx->sample_interval = sample_int;
    430	ctx->aggr_interval = aggr_int;
    431	ctx->ops_update_interval = ops_upd_int;
    432	ctx->min_nr_regions = min_nr_reg;
    433	ctx->max_nr_regions = max_nr_reg;
    434
    435	return 0;
    436}
    437
    438/**
    439 * damon_set_schemes() - Set data access monitoring based operation schemes.
    440 * @ctx:	monitoring context
    441 * @schemes:	array of the schemes
    442 * @nr_schemes:	number of entries in @schemes
    443 *
    444 * This function should not be called while the kdamond of the context is
    445 * running.
    446 *
    447 * Return: 0 if success, or negative error code otherwise.
    448 */
    449int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes,
    450			ssize_t nr_schemes)
    451{
    452	struct damos *s, *next;
    453	ssize_t i;
    454
    455	damon_for_each_scheme_safe(s, next, ctx)
    456		damon_destroy_scheme(s);
    457	for (i = 0; i < nr_schemes; i++)
    458		damon_add_scheme(ctx, schemes[i]);
    459	return 0;
    460}
    461
    462/**
    463 * damon_nr_running_ctxs() - Return number of currently running contexts.
    464 */
    465int damon_nr_running_ctxs(void)
    466{
    467	int nr_ctxs;
    468
    469	mutex_lock(&damon_lock);
    470	nr_ctxs = nr_running_ctxs;
    471	mutex_unlock(&damon_lock);
    472
    473	return nr_ctxs;
    474}
    475
    476/* Returns the size upper limit for each monitoring region */
    477static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
    478{
    479	struct damon_target *t;
    480	struct damon_region *r;
    481	unsigned long sz = 0;
    482
    483	damon_for_each_target(t, ctx) {
    484		damon_for_each_region(r, t)
    485			sz += r->ar.end - r->ar.start;
    486	}
    487
    488	if (ctx->min_nr_regions)
    489		sz /= ctx->min_nr_regions;
    490	if (sz < DAMON_MIN_REGION)
    491		sz = DAMON_MIN_REGION;
    492
    493	return sz;
    494}
    495
    496static int kdamond_fn(void *data);
    497
    498/*
    499 * __damon_start() - Starts monitoring with given context.
    500 * @ctx:	monitoring context
    501 *
    502 * This function should be called while damon_lock is hold.
    503 *
    504 * Return: 0 on success, negative error code otherwise.
    505 */
    506static int __damon_start(struct damon_ctx *ctx)
    507{
    508	int err = -EBUSY;
    509
    510	mutex_lock(&ctx->kdamond_lock);
    511	if (!ctx->kdamond) {
    512		err = 0;
    513		ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
    514				nr_running_ctxs);
    515		if (IS_ERR(ctx->kdamond)) {
    516			err = PTR_ERR(ctx->kdamond);
    517			ctx->kdamond = NULL;
    518		}
    519	}
    520	mutex_unlock(&ctx->kdamond_lock);
    521
    522	return err;
    523}
    524
    525/**
    526 * damon_start() - Starts the monitorings for a given group of contexts.
    527 * @ctxs:	an array of the pointers for contexts to start monitoring
    528 * @nr_ctxs:	size of @ctxs
    529 * @exclusive:	exclusiveness of this contexts group
    530 *
    531 * This function starts a group of monitoring threads for a group of monitoring
    532 * contexts.  One thread per each context is created and run in parallel.  The
    533 * caller should handle synchronization between the threads by itself.  If
    534 * @exclusive is true and a group of threads that created by other
    535 * 'damon_start()' call is currently running, this function does nothing but
    536 * returns -EBUSY.
    537 *
    538 * Return: 0 on success, negative error code otherwise.
    539 */
    540int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive)
    541{
    542	int i;
    543	int err = 0;
    544
    545	mutex_lock(&damon_lock);
    546	if ((exclusive && nr_running_ctxs) ||
    547			(!exclusive && running_exclusive_ctxs)) {
    548		mutex_unlock(&damon_lock);
    549		return -EBUSY;
    550	}
    551
    552	for (i = 0; i < nr_ctxs; i++) {
    553		err = __damon_start(ctxs[i]);
    554		if (err)
    555			break;
    556		nr_running_ctxs++;
    557	}
    558	if (exclusive && nr_running_ctxs)
    559		running_exclusive_ctxs = true;
    560	mutex_unlock(&damon_lock);
    561
    562	return err;
    563}
    564
    565/*
    566 * __damon_stop() - Stops monitoring of a given context.
    567 * @ctx:	monitoring context
    568 *
    569 * Return: 0 on success, negative error code otherwise.
    570 */
    571static int __damon_stop(struct damon_ctx *ctx)
    572{
    573	struct task_struct *tsk;
    574
    575	mutex_lock(&ctx->kdamond_lock);
    576	tsk = ctx->kdamond;
    577	if (tsk) {
    578		get_task_struct(tsk);
    579		mutex_unlock(&ctx->kdamond_lock);
    580		kthread_stop(tsk);
    581		put_task_struct(tsk);
    582		return 0;
    583	}
    584	mutex_unlock(&ctx->kdamond_lock);
    585
    586	return -EPERM;
    587}
    588
    589/**
    590 * damon_stop() - Stops the monitorings for a given group of contexts.
    591 * @ctxs:	an array of the pointers for contexts to stop monitoring
    592 * @nr_ctxs:	size of @ctxs
    593 *
    594 * Return: 0 on success, negative error code otherwise.
    595 */
    596int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
    597{
    598	int i, err = 0;
    599
    600	for (i = 0; i < nr_ctxs; i++) {
    601		/* nr_running_ctxs is decremented in kdamond_fn */
    602		err = __damon_stop(ctxs[i]);
    603		if (err)
    604			break;
    605	}
    606	return err;
    607}
    608
    609/*
    610 * damon_check_reset_time_interval() - Check if a time interval is elapsed.
    611 * @baseline:	the time to check whether the interval has elapsed since
    612 * @interval:	the time interval (microseconds)
    613 *
    614 * See whether the given time interval has passed since the given baseline
    615 * time.  If so, it also updates the baseline to current time for next check.
    616 *
    617 * Return:	true if the time interval has passed, or false otherwise.
    618 */
    619static bool damon_check_reset_time_interval(struct timespec64 *baseline,
    620		unsigned long interval)
    621{
    622	struct timespec64 now;
    623
    624	ktime_get_coarse_ts64(&now);
    625	if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) <
    626			interval * 1000)
    627		return false;
    628	*baseline = now;
    629	return true;
    630}
    631
    632/*
    633 * Check whether it is time to flush the aggregated information
    634 */
    635static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx)
    636{
    637	return damon_check_reset_time_interval(&ctx->last_aggregation,
    638			ctx->aggr_interval);
    639}
    640
    641/*
    642 * Reset the aggregated monitoring results ('nr_accesses' of each region).
    643 */
    644static void kdamond_reset_aggregated(struct damon_ctx *c)
    645{
    646	struct damon_target *t;
    647	unsigned int ti = 0;	/* target's index */
    648
    649	damon_for_each_target(t, c) {
    650		struct damon_region *r;
    651
    652		damon_for_each_region(r, t) {
    653			trace_damon_aggregated(t, ti, r, damon_nr_regions(t));
    654			r->last_nr_accesses = r->nr_accesses;
    655			r->nr_accesses = 0;
    656		}
    657		ti++;
    658	}
    659}
    660
    661static void damon_split_region_at(struct damon_ctx *ctx,
    662		struct damon_target *t, struct damon_region *r,
    663		unsigned long sz_r);
    664
    665static bool __damos_valid_target(struct damon_region *r, struct damos *s)
    666{
    667	unsigned long sz;
    668
    669	sz = r->ar.end - r->ar.start;
    670	return s->min_sz_region <= sz && sz <= s->max_sz_region &&
    671		s->min_nr_accesses <= r->nr_accesses &&
    672		r->nr_accesses <= s->max_nr_accesses &&
    673		s->min_age_region <= r->age && r->age <= s->max_age_region;
    674}
    675
    676static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
    677		struct damon_region *r, struct damos *s)
    678{
    679	bool ret = __damos_valid_target(r, s);
    680
    681	if (!ret || !s->quota.esz || !c->ops.get_scheme_score)
    682		return ret;
    683
    684	return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score;
    685}
    686
    687static void damon_do_apply_schemes(struct damon_ctx *c,
    688				   struct damon_target *t,
    689				   struct damon_region *r)
    690{
    691	struct damos *s;
    692
    693	damon_for_each_scheme(s, c) {
    694		struct damos_quota *quota = &s->quota;
    695		unsigned long sz = r->ar.end - r->ar.start;
    696		struct timespec64 begin, end;
    697		unsigned long sz_applied = 0;
    698
    699		if (!s->wmarks.activated)
    700			continue;
    701
    702		/* Check the quota */
    703		if (quota->esz && quota->charged_sz >= quota->esz)
    704			continue;
    705
    706		/* Skip previously charged regions */
    707		if (quota->charge_target_from) {
    708			if (t != quota->charge_target_from)
    709				continue;
    710			if (r == damon_last_region(t)) {
    711				quota->charge_target_from = NULL;
    712				quota->charge_addr_from = 0;
    713				continue;
    714			}
    715			if (quota->charge_addr_from &&
    716					r->ar.end <= quota->charge_addr_from)
    717				continue;
    718
    719			if (quota->charge_addr_from && r->ar.start <
    720					quota->charge_addr_from) {
    721				sz = ALIGN_DOWN(quota->charge_addr_from -
    722						r->ar.start, DAMON_MIN_REGION);
    723				if (!sz) {
    724					if (r->ar.end - r->ar.start <=
    725							DAMON_MIN_REGION)
    726						continue;
    727					sz = DAMON_MIN_REGION;
    728				}
    729				damon_split_region_at(c, t, r, sz);
    730				r = damon_next_region(r);
    731				sz = r->ar.end - r->ar.start;
    732			}
    733			quota->charge_target_from = NULL;
    734			quota->charge_addr_from = 0;
    735		}
    736
    737		if (!damos_valid_target(c, t, r, s))
    738			continue;
    739
    740		/* Apply the scheme */
    741		if (c->ops.apply_scheme) {
    742			if (quota->esz &&
    743					quota->charged_sz + sz > quota->esz) {
    744				sz = ALIGN_DOWN(quota->esz - quota->charged_sz,
    745						DAMON_MIN_REGION);
    746				if (!sz)
    747					goto update_stat;
    748				damon_split_region_at(c, t, r, sz);
    749			}
    750			ktime_get_coarse_ts64(&begin);
    751			sz_applied = c->ops.apply_scheme(c, t, r, s);
    752			ktime_get_coarse_ts64(&end);
    753			quota->total_charged_ns += timespec64_to_ns(&end) -
    754				timespec64_to_ns(&begin);
    755			quota->charged_sz += sz;
    756			if (quota->esz && quota->charged_sz >= quota->esz) {
    757				quota->charge_target_from = t;
    758				quota->charge_addr_from = r->ar.end + 1;
    759			}
    760		}
    761		if (s->action != DAMOS_STAT)
    762			r->age = 0;
    763
    764update_stat:
    765		s->stat.nr_tried++;
    766		s->stat.sz_tried += sz;
    767		if (sz_applied)
    768			s->stat.nr_applied++;
    769		s->stat.sz_applied += sz_applied;
    770	}
    771}
    772
    773/* Shouldn't be called if quota->ms and quota->sz are zero */
    774static void damos_set_effective_quota(struct damos_quota *quota)
    775{
    776	unsigned long throughput;
    777	unsigned long esz;
    778
    779	if (!quota->ms) {
    780		quota->esz = quota->sz;
    781		return;
    782	}
    783
    784	if (quota->total_charged_ns)
    785		throughput = quota->total_charged_sz * 1000000 /
    786			quota->total_charged_ns;
    787	else
    788		throughput = PAGE_SIZE * 1024;
    789	esz = throughput * quota->ms;
    790
    791	if (quota->sz && quota->sz < esz)
    792		esz = quota->sz;
    793	quota->esz = esz;
    794}
    795
    796static void kdamond_apply_schemes(struct damon_ctx *c)
    797{
    798	struct damon_target *t;
    799	struct damon_region *r, *next_r;
    800	struct damos *s;
    801
    802	damon_for_each_scheme(s, c) {
    803		struct damos_quota *quota = &s->quota;
    804		unsigned long cumulated_sz;
    805		unsigned int score, max_score = 0;
    806
    807		if (!s->wmarks.activated)
    808			continue;
    809
    810		if (!quota->ms && !quota->sz)
    811			continue;
    812
    813		/* New charge window starts */
    814		if (time_after_eq(jiffies, quota->charged_from +
    815					msecs_to_jiffies(
    816						quota->reset_interval))) {
    817			if (quota->esz && quota->charged_sz >= quota->esz)
    818				s->stat.qt_exceeds++;
    819			quota->total_charged_sz += quota->charged_sz;
    820			quota->charged_from = jiffies;
    821			quota->charged_sz = 0;
    822			damos_set_effective_quota(quota);
    823		}
    824
    825		if (!c->ops.get_scheme_score)
    826			continue;
    827
    828		/* Fill up the score histogram */
    829		memset(quota->histogram, 0, sizeof(quota->histogram));
    830		damon_for_each_target(t, c) {
    831			damon_for_each_region(r, t) {
    832				if (!__damos_valid_target(r, s))
    833					continue;
    834				score = c->ops.get_scheme_score(
    835						c, t, r, s);
    836				quota->histogram[score] +=
    837					r->ar.end - r->ar.start;
    838				if (score > max_score)
    839					max_score = score;
    840			}
    841		}
    842
    843		/* Set the min score limit */
    844		for (cumulated_sz = 0, score = max_score; ; score--) {
    845			cumulated_sz += quota->histogram[score];
    846			if (cumulated_sz >= quota->esz || !score)
    847				break;
    848		}
    849		quota->min_score = score;
    850	}
    851
    852	damon_for_each_target(t, c) {
    853		damon_for_each_region_safe(r, next_r, t)
    854			damon_do_apply_schemes(c, t, r);
    855	}
    856}
    857
    858static inline unsigned long sz_damon_region(struct damon_region *r)
    859{
    860	return r->ar.end - r->ar.start;
    861}
    862
    863/*
    864 * Merge two adjacent regions into one region
    865 */
    866static void damon_merge_two_regions(struct damon_target *t,
    867		struct damon_region *l, struct damon_region *r)
    868{
    869	unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r);
    870
    871	l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
    872			(sz_l + sz_r);
    873	l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r);
    874	l->ar.end = r->ar.end;
    875	damon_destroy_region(r, t);
    876}
    877
    878/*
    879 * Merge adjacent regions having similar access frequencies
    880 *
    881 * t		target affected by this merge operation
    882 * thres	'->nr_accesses' diff threshold for the merge
    883 * sz_limit	size upper limit of each region
    884 */
    885static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
    886				   unsigned long sz_limit)
    887{
    888	struct damon_region *r, *prev = NULL, *next;
    889
    890	damon_for_each_region_safe(r, next, t) {
    891		if (abs(r->nr_accesses - r->last_nr_accesses) > thres)
    892			r->age = 0;
    893		else
    894			r->age++;
    895
    896		if (prev && prev->ar.end == r->ar.start &&
    897		    abs(prev->nr_accesses - r->nr_accesses) <= thres &&
    898		    sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
    899			damon_merge_two_regions(t, prev, r);
    900		else
    901			prev = r;
    902	}
    903}
    904
    905/*
    906 * Merge adjacent regions having similar access frequencies
    907 *
    908 * threshold	'->nr_accesses' diff threshold for the merge
    909 * sz_limit	size upper limit of each region
    910 *
    911 * This function merges monitoring target regions which are adjacent and their
    912 * access frequencies are similar.  This is for minimizing the monitoring
    913 * overhead under the dynamically changeable access pattern.  If a merge was
    914 * unnecessarily made, later 'kdamond_split_regions()' will revert it.
    915 */
    916static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
    917				  unsigned long sz_limit)
    918{
    919	struct damon_target *t;
    920
    921	damon_for_each_target(t, c)
    922		damon_merge_regions_of(t, threshold, sz_limit);
    923}
    924
    925/*
    926 * Split a region in two
    927 *
    928 * r		the region to be split
    929 * sz_r		size of the first sub-region that will be made
    930 */
    931static void damon_split_region_at(struct damon_ctx *ctx,
    932		struct damon_target *t, struct damon_region *r,
    933		unsigned long sz_r)
    934{
    935	struct damon_region *new;
    936
    937	new = damon_new_region(r->ar.start + sz_r, r->ar.end);
    938	if (!new)
    939		return;
    940
    941	r->ar.end = new->ar.start;
    942
    943	new->age = r->age;
    944	new->last_nr_accesses = r->last_nr_accesses;
    945
    946	damon_insert_region(new, r, damon_next_region(r), t);
    947}
    948
    949/* Split every region in the given target into 'nr_subs' regions */
    950static void damon_split_regions_of(struct damon_ctx *ctx,
    951				     struct damon_target *t, int nr_subs)
    952{
    953	struct damon_region *r, *next;
    954	unsigned long sz_region, sz_sub = 0;
    955	int i;
    956
    957	damon_for_each_region_safe(r, next, t) {
    958		sz_region = r->ar.end - r->ar.start;
    959
    960		for (i = 0; i < nr_subs - 1 &&
    961				sz_region > 2 * DAMON_MIN_REGION; i++) {
    962			/*
    963			 * Randomly select size of left sub-region to be at
    964			 * least 10 percent and at most 90% of original region
    965			 */
    966			sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
    967					sz_region / 10, DAMON_MIN_REGION);
    968			/* Do not allow blank region */
    969			if (sz_sub == 0 || sz_sub >= sz_region)
    970				continue;
    971
    972			damon_split_region_at(ctx, t, r, sz_sub);
    973			sz_region = sz_sub;
    974		}
    975	}
    976}
    977
    978/*
    979 * Split every target region into randomly-sized small regions
    980 *
    981 * This function splits every target region into random-sized small regions if
    982 * current total number of the regions is equal or smaller than half of the
    983 * user-specified maximum number of regions.  This is for maximizing the
    984 * monitoring accuracy under the dynamically changeable access patterns.  If a
    985 * split was unnecessarily made, later 'kdamond_merge_regions()' will revert
    986 * it.
    987 */
    988static void kdamond_split_regions(struct damon_ctx *ctx)
    989{
    990	struct damon_target *t;
    991	unsigned int nr_regions = 0;
    992	static unsigned int last_nr_regions;
    993	int nr_subregions = 2;
    994
    995	damon_for_each_target(t, ctx)
    996		nr_regions += damon_nr_regions(t);
    997
    998	if (nr_regions > ctx->max_nr_regions / 2)
    999		return;
   1000
   1001	/* Maybe the middle of the region has different access frequency */
   1002	if (last_nr_regions == nr_regions &&
   1003			nr_regions < ctx->max_nr_regions / 3)
   1004		nr_subregions = 3;
   1005
   1006	damon_for_each_target(t, ctx)
   1007		damon_split_regions_of(ctx, t, nr_subregions);
   1008
   1009	last_nr_regions = nr_regions;
   1010}
   1011
   1012/*
   1013 * Check whether it is time to check and apply the operations-related data
   1014 * structures.
   1015 *
   1016 * Returns true if it is.
   1017 */
   1018static bool kdamond_need_update_operations(struct damon_ctx *ctx)
   1019{
   1020	return damon_check_reset_time_interval(&ctx->last_ops_update,
   1021			ctx->ops_update_interval);
   1022}
   1023
   1024/*
   1025 * Check whether current monitoring should be stopped
   1026 *
   1027 * The monitoring is stopped when either the user requested to stop, or all
   1028 * monitoring targets are invalid.
   1029 *
   1030 * Returns true if need to stop current monitoring.
   1031 */
   1032static bool kdamond_need_stop(struct damon_ctx *ctx)
   1033{
   1034	struct damon_target *t;
   1035
   1036	if (kthread_should_stop())
   1037		return true;
   1038
   1039	if (!ctx->ops.target_valid)
   1040		return false;
   1041
   1042	damon_for_each_target(t, ctx) {
   1043		if (ctx->ops.target_valid(t))
   1044			return false;
   1045	}
   1046
   1047	return true;
   1048}
   1049
   1050static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric)
   1051{
   1052	struct sysinfo i;
   1053
   1054	switch (metric) {
   1055	case DAMOS_WMARK_FREE_MEM_RATE:
   1056		si_meminfo(&i);
   1057		return i.freeram * 1000 / i.totalram;
   1058	default:
   1059		break;
   1060	}
   1061	return -EINVAL;
   1062}
   1063
   1064/*
   1065 * Returns zero if the scheme is active.  Else, returns time to wait for next
   1066 * watermark check in micro-seconds.
   1067 */
   1068static unsigned long damos_wmark_wait_us(struct damos *scheme)
   1069{
   1070	unsigned long metric;
   1071
   1072	if (scheme->wmarks.metric == DAMOS_WMARK_NONE)
   1073		return 0;
   1074
   1075	metric = damos_wmark_metric_value(scheme->wmarks.metric);
   1076	/* higher than high watermark or lower than low watermark */
   1077	if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) {
   1078		if (scheme->wmarks.activated)
   1079			pr_debug("deactivate a scheme (%d) for %s wmark\n",
   1080					scheme->action,
   1081					metric > scheme->wmarks.high ?
   1082					"high" : "low");
   1083		scheme->wmarks.activated = false;
   1084		return scheme->wmarks.interval;
   1085	}
   1086
   1087	/* inactive and higher than middle watermark */
   1088	if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) &&
   1089			!scheme->wmarks.activated)
   1090		return scheme->wmarks.interval;
   1091
   1092	if (!scheme->wmarks.activated)
   1093		pr_debug("activate a scheme (%d)\n", scheme->action);
   1094	scheme->wmarks.activated = true;
   1095	return 0;
   1096}
   1097
   1098static void kdamond_usleep(unsigned long usecs)
   1099{
   1100	/* See Documentation/timers/timers-howto.rst for the thresholds */
   1101	if (usecs > 20 * USEC_PER_MSEC)
   1102		schedule_timeout_idle(usecs_to_jiffies(usecs));
   1103	else
   1104		usleep_idle_range(usecs, usecs + 1);
   1105}
   1106
   1107/* Returns negative error code if it's not activated but should return */
   1108static int kdamond_wait_activation(struct damon_ctx *ctx)
   1109{
   1110	struct damos *s;
   1111	unsigned long wait_time;
   1112	unsigned long min_wait_time = 0;
   1113	bool init_wait_time = false;
   1114
   1115	while (!kdamond_need_stop(ctx)) {
   1116		damon_for_each_scheme(s, ctx) {
   1117			wait_time = damos_wmark_wait_us(s);
   1118			if (!init_wait_time || wait_time < min_wait_time) {
   1119				init_wait_time = true;
   1120				min_wait_time = wait_time;
   1121			}
   1122		}
   1123		if (!min_wait_time)
   1124			return 0;
   1125
   1126		kdamond_usleep(min_wait_time);
   1127
   1128		if (ctx->callback.after_wmarks_check &&
   1129				ctx->callback.after_wmarks_check(ctx))
   1130			break;
   1131	}
   1132	return -EBUSY;
   1133}
   1134
   1135/*
   1136 * The monitoring daemon that runs as a kernel thread
   1137 */
   1138static int kdamond_fn(void *data)
   1139{
   1140	struct damon_ctx *ctx = data;
   1141	struct damon_target *t;
   1142	struct damon_region *r, *next;
   1143	unsigned int max_nr_accesses = 0;
   1144	unsigned long sz_limit = 0;
   1145	bool done = false;
   1146
   1147	pr_debug("kdamond (%d) starts\n", current->pid);
   1148
   1149	if (ctx->ops.init)
   1150		ctx->ops.init(ctx);
   1151	if (ctx->callback.before_start && ctx->callback.before_start(ctx))
   1152		done = true;
   1153
   1154	sz_limit = damon_region_sz_limit(ctx);
   1155
   1156	while (!kdamond_need_stop(ctx) && !done) {
   1157		if (kdamond_wait_activation(ctx)) {
   1158			done = true;
   1159			continue;
   1160		}
   1161
   1162		if (ctx->ops.prepare_access_checks)
   1163			ctx->ops.prepare_access_checks(ctx);
   1164		if (ctx->callback.after_sampling &&
   1165				ctx->callback.after_sampling(ctx)) {
   1166			done = true;
   1167			continue;
   1168		}
   1169
   1170		kdamond_usleep(ctx->sample_interval);
   1171
   1172		if (ctx->ops.check_accesses)
   1173			max_nr_accesses = ctx->ops.check_accesses(ctx);
   1174
   1175		if (kdamond_aggregate_interval_passed(ctx)) {
   1176			kdamond_merge_regions(ctx,
   1177					max_nr_accesses / 10,
   1178					sz_limit);
   1179			if (ctx->callback.after_aggregation &&
   1180					ctx->callback.after_aggregation(ctx)) {
   1181				done = true;
   1182				continue;
   1183			}
   1184			kdamond_apply_schemes(ctx);
   1185			kdamond_reset_aggregated(ctx);
   1186			kdamond_split_regions(ctx);
   1187			if (ctx->ops.reset_aggregated)
   1188				ctx->ops.reset_aggregated(ctx);
   1189		}
   1190
   1191		if (kdamond_need_update_operations(ctx)) {
   1192			if (ctx->ops.update)
   1193				ctx->ops.update(ctx);
   1194			sz_limit = damon_region_sz_limit(ctx);
   1195		}
   1196	}
   1197	damon_for_each_target(t, ctx) {
   1198		damon_for_each_region_safe(r, next, t)
   1199			damon_destroy_region(r, t);
   1200	}
   1201
   1202	if (ctx->callback.before_terminate)
   1203		ctx->callback.before_terminate(ctx);
   1204	if (ctx->ops.cleanup)
   1205		ctx->ops.cleanup(ctx);
   1206
   1207	pr_debug("kdamond (%d) finishes\n", current->pid);
   1208	mutex_lock(&ctx->kdamond_lock);
   1209	ctx->kdamond = NULL;
   1210	mutex_unlock(&ctx->kdamond_lock);
   1211
   1212	mutex_lock(&damon_lock);
   1213	nr_running_ctxs--;
   1214	if (!nr_running_ctxs && running_exclusive_ctxs)
   1215		running_exclusive_ctxs = false;
   1216	mutex_unlock(&damon_lock);
   1217
   1218	return 0;
   1219}
   1220
   1221#include "core-test.h"