cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

shrinker.c (9230B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * This file is part of UBIFS.
      4 *
      5 * Copyright (C) 2006-2008 Nokia Corporation.
      6 *
      7 * Authors: Artem Bityutskiy (Битюцкий Артём)
      8 *          Adrian Hunter
      9 */
     10
     11/*
     12 * This file implements UBIFS shrinker which evicts clean znodes from the TNC
     13 * tree when Linux VM needs more RAM.
     14 *
     15 * We do not implement any LRU lists to find oldest znodes to free because it
     16 * would add additional overhead to the file system fast paths. So the shrinker
     17 * just walks the TNC tree when searching for znodes to free.
     18 *
     19 * If the root of a TNC sub-tree is clean and old enough, then the children are
     20 * also clean and old enough. So the shrinker walks the TNC in level order and
     21 * dumps entire sub-trees.
     22 *
     23 * The age of znodes is just the time-stamp when they were last looked at.
     24 * The current shrinker first tries to evict old znodes, then young ones.
     25 *
     26 * Since the shrinker is global, it has to protect against races with FS
     27 * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
     28 */
     29
     30#include "ubifs.h"
     31
     32/* List of all UBIFS file-system instances */
     33LIST_HEAD(ubifs_infos);
     34
     35/*
     36 * We number each shrinker run and record the number on the ubifs_info structure
     37 * so that we can easily work out which ubifs_info structures have already been
     38 * done by the current run.
     39 */
     40static unsigned int shrinker_run_no;
     41
     42/* Protects 'ubifs_infos' list */
     43DEFINE_SPINLOCK(ubifs_infos_lock);
     44
     45/* Global clean znode counter (for all mounted UBIFS instances) */
     46atomic_long_t ubifs_clean_zn_cnt;
     47
     48/**
     49 * shrink_tnc - shrink TNC tree.
     50 * @c: UBIFS file-system description object
     51 * @nr: number of znodes to free
     52 * @age: the age of znodes to free
     53 * @contention: if any contention, this is set to %1
     54 *
     55 * This function traverses TNC tree and frees clean znodes. It does not free
     56 * clean znodes which younger then @age. Returns number of freed znodes.
     57 */
     58static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
     59{
     60	int total_freed = 0;
     61	struct ubifs_znode *znode, *zprev;
     62	time64_t time = ktime_get_seconds();
     63
     64	ubifs_assert(c, mutex_is_locked(&c->umount_mutex));
     65	ubifs_assert(c, mutex_is_locked(&c->tnc_mutex));
     66
     67	if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
     68		return 0;
     69
     70	/*
     71	 * Traverse the TNC tree in levelorder manner, so that it is possible
     72	 * to destroy large sub-trees. Indeed, if a znode is old, then all its
     73	 * children are older or of the same age.
     74	 *
     75	 * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
     76	 * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
     77	 * changed only when the 'c->tnc_mutex' is held.
     78	 */
     79	zprev = NULL;
     80	znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
     81	while (znode && total_freed < nr &&
     82	       atomic_long_read(&c->clean_zn_cnt) > 0) {
     83		int freed;
     84
     85		/*
     86		 * If the znode is clean, but it is in the 'c->cnext' list, this
     87		 * means that this znode has just been written to flash as a
     88		 * part of commit and was marked clean. They will be removed
     89		 * from the list at end commit. We cannot change the list,
     90		 * because it is not protected by any mutex (design decision to
     91		 * make commit really independent and parallel to main I/O). So
     92		 * we just skip these znodes.
     93		 *
     94		 * Note, the 'clean_zn_cnt' counters are not updated until
     95		 * after the commit, so the UBIFS shrinker does not report
     96		 * the znodes which are in the 'c->cnext' list as freeable.
     97		 *
     98		 * Also note, if the root of a sub-tree is not in 'c->cnext',
     99		 * then the whole sub-tree is not in 'c->cnext' as well, so it
    100		 * is safe to dump whole sub-tree.
    101		 */
    102
    103		if (znode->cnext) {
    104			/*
    105			 * Very soon these znodes will be removed from the list
    106			 * and become freeable.
    107			 */
    108			*contention = 1;
    109		} else if (!ubifs_zn_dirty(znode) &&
    110			   abs(time - znode->time) >= age) {
    111			if (znode->parent)
    112				znode->parent->zbranch[znode->iip].znode = NULL;
    113			else
    114				c->zroot.znode = NULL;
    115
    116			freed = ubifs_destroy_tnc_subtree(c, znode);
    117			atomic_long_sub(freed, &ubifs_clean_zn_cnt);
    118			atomic_long_sub(freed, &c->clean_zn_cnt);
    119			total_freed += freed;
    120			znode = zprev;
    121		}
    122
    123		if (unlikely(!c->zroot.znode))
    124			break;
    125
    126		zprev = znode;
    127		znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
    128		cond_resched();
    129	}
    130
    131	return total_freed;
    132}
    133
    134/**
    135 * shrink_tnc_trees - shrink UBIFS TNC trees.
    136 * @nr: number of znodes to free
    137 * @age: the age of znodes to free
    138 * @contention: if any contention, this is set to %1
    139 *
    140 * This function walks the list of mounted UBIFS file-systems and frees clean
    141 * znodes which are older than @age, until at least @nr znodes are freed.
    142 * Returns the number of freed znodes.
    143 */
    144static int shrink_tnc_trees(int nr, int age, int *contention)
    145{
    146	struct ubifs_info *c;
    147	struct list_head *p;
    148	unsigned int run_no;
    149	int freed = 0;
    150
    151	spin_lock(&ubifs_infos_lock);
    152	do {
    153		run_no = ++shrinker_run_no;
    154	} while (run_no == 0);
    155	/* Iterate over all mounted UBIFS file-systems and try to shrink them */
    156	p = ubifs_infos.next;
    157	while (p != &ubifs_infos) {
    158		c = list_entry(p, struct ubifs_info, infos_list);
    159		/*
    160		 * We move the ones we do to the end of the list, so we stop
    161		 * when we see one we have already done.
    162		 */
    163		if (c->shrinker_run_no == run_no)
    164			break;
    165		if (!mutex_trylock(&c->umount_mutex)) {
    166			/* Some un-mount is in progress, try next FS */
    167			*contention = 1;
    168			p = p->next;
    169			continue;
    170		}
    171		/*
    172		 * We're holding 'c->umount_mutex', so the file-system won't go
    173		 * away.
    174		 */
    175		if (!mutex_trylock(&c->tnc_mutex)) {
    176			mutex_unlock(&c->umount_mutex);
    177			*contention = 1;
    178			p = p->next;
    179			continue;
    180		}
    181		spin_unlock(&ubifs_infos_lock);
    182		/*
    183		 * OK, now we have TNC locked, the file-system cannot go away -
    184		 * it is safe to reap the cache.
    185		 */
    186		c->shrinker_run_no = run_no;
    187		freed += shrink_tnc(c, nr, age, contention);
    188		mutex_unlock(&c->tnc_mutex);
    189		spin_lock(&ubifs_infos_lock);
    190		/* Get the next list element before we move this one */
    191		p = p->next;
    192		/*
    193		 * Move this one to the end of the list to provide some
    194		 * fairness.
    195		 */
    196		list_move_tail(&c->infos_list, &ubifs_infos);
    197		mutex_unlock(&c->umount_mutex);
    198		if (freed >= nr)
    199			break;
    200	}
    201	spin_unlock(&ubifs_infos_lock);
    202	return freed;
    203}
    204
    205/**
    206 * kick_a_thread - kick a background thread to start commit.
    207 *
    208 * This function kicks a background thread to start background commit. Returns
    209 * %-1 if a thread was kicked or there is another reason to assume the memory
    210 * will soon be freed or become freeable. If there are no dirty znodes, returns
    211 * %0.
    212 */
    213static int kick_a_thread(void)
    214{
    215	int i;
    216	struct ubifs_info *c;
    217
    218	/*
    219	 * Iterate over all mounted UBIFS file-systems and find out if there is
    220	 * already an ongoing commit operation there. If no, then iterate for
    221	 * the second time and initiate background commit.
    222	 */
    223	spin_lock(&ubifs_infos_lock);
    224	for (i = 0; i < 2; i++) {
    225		list_for_each_entry(c, &ubifs_infos, infos_list) {
    226			long dirty_zn_cnt;
    227
    228			if (!mutex_trylock(&c->umount_mutex)) {
    229				/*
    230				 * Some un-mount is in progress, it will
    231				 * certainly free memory, so just return.
    232				 */
    233				spin_unlock(&ubifs_infos_lock);
    234				return -1;
    235			}
    236
    237			dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
    238
    239			if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
    240			    c->ro_mount || c->ro_error) {
    241				mutex_unlock(&c->umount_mutex);
    242				continue;
    243			}
    244
    245			if (c->cmt_state != COMMIT_RESTING) {
    246				spin_unlock(&ubifs_infos_lock);
    247				mutex_unlock(&c->umount_mutex);
    248				return -1;
    249			}
    250
    251			if (i == 1) {
    252				list_move_tail(&c->infos_list, &ubifs_infos);
    253				spin_unlock(&ubifs_infos_lock);
    254
    255				ubifs_request_bg_commit(c);
    256				mutex_unlock(&c->umount_mutex);
    257				return -1;
    258			}
    259			mutex_unlock(&c->umount_mutex);
    260		}
    261	}
    262	spin_unlock(&ubifs_infos_lock);
    263
    264	return 0;
    265}
    266
    267unsigned long ubifs_shrink_count(struct shrinker *shrink,
    268				 struct shrink_control *sc)
    269{
    270	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
    271
    272	/*
    273	 * Due to the way UBIFS updates the clean znode counter it may
    274	 * temporarily be negative.
    275	 */
    276	return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
    277}
    278
    279unsigned long ubifs_shrink_scan(struct shrinker *shrink,
    280				struct shrink_control *sc)
    281{
    282	unsigned long nr = sc->nr_to_scan;
    283	int contention = 0;
    284	unsigned long freed;
    285	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
    286
    287	if (!clean_zn_cnt) {
    288		/*
    289		 * No clean znodes, nothing to reap. All we can do in this case
    290		 * is to kick background threads to start commit, which will
    291		 * probably make clean znodes which, in turn, will be freeable.
    292		 * And we return -1 which means will make VM call us again
    293		 * later.
    294		 */
    295		dbg_tnc("no clean znodes, kick a thread");
    296		return kick_a_thread();
    297	}
    298
    299	freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
    300	if (freed >= nr)
    301		goto out;
    302
    303	dbg_tnc("not enough old znodes, try to free young ones");
    304	freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
    305	if (freed >= nr)
    306		goto out;
    307
    308	dbg_tnc("not enough young znodes, free all");
    309	freed += shrink_tnc_trees(nr - freed, 0, &contention);
    310
    311	if (!freed && contention) {
    312		dbg_tnc("freed nothing, but contention");
    313		return SHRINK_STOP;
    314	}
    315
    316out:
    317	dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
    318	return freed;
    319}