shrinker.c (9230B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * This file is part of UBIFS. 4 * 5 * Copyright (C) 2006-2008 Nokia Corporation. 6 * 7 * Authors: Artem Bityutskiy (Битюцкий Артём) 8 * Adrian Hunter 9 */ 10 11/* 12 * This file implements UBIFS shrinker which evicts clean znodes from the TNC 13 * tree when Linux VM needs more RAM. 14 * 15 * We do not implement any LRU lists to find oldest znodes to free because it 16 * would add additional overhead to the file system fast paths. So the shrinker 17 * just walks the TNC tree when searching for znodes to free. 18 * 19 * If the root of a TNC sub-tree is clean and old enough, then the children are 20 * also clean and old enough. So the shrinker walks the TNC in level order and 21 * dumps entire sub-trees. 22 * 23 * The age of znodes is just the time-stamp when they were last looked at. 24 * The current shrinker first tries to evict old znodes, then young ones. 25 * 26 * Since the shrinker is global, it has to protect against races with FS 27 * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'. 28 */ 29 30#include "ubifs.h" 31 32/* List of all UBIFS file-system instances */ 33LIST_HEAD(ubifs_infos); 34 35/* 36 * We number each shrinker run and record the number on the ubifs_info structure 37 * so that we can easily work out which ubifs_info structures have already been 38 * done by the current run. 39 */ 40static unsigned int shrinker_run_no; 41 42/* Protects 'ubifs_infos' list */ 43DEFINE_SPINLOCK(ubifs_infos_lock); 44 45/* Global clean znode counter (for all mounted UBIFS instances) */ 46atomic_long_t ubifs_clean_zn_cnt; 47 48/** 49 * shrink_tnc - shrink TNC tree. 50 * @c: UBIFS file-system description object 51 * @nr: number of znodes to free 52 * @age: the age of znodes to free 53 * @contention: if any contention, this is set to %1 54 * 55 * This function traverses TNC tree and frees clean znodes. It does not free 56 * clean znodes which younger then @age. Returns number of freed znodes. 57 */ 58static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) 59{ 60 int total_freed = 0; 61 struct ubifs_znode *znode, *zprev; 62 time64_t time = ktime_get_seconds(); 63 64 ubifs_assert(c, mutex_is_locked(&c->umount_mutex)); 65 ubifs_assert(c, mutex_is_locked(&c->tnc_mutex)); 66 67 if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0) 68 return 0; 69 70 /* 71 * Traverse the TNC tree in levelorder manner, so that it is possible 72 * to destroy large sub-trees. Indeed, if a znode is old, then all its 73 * children are older or of the same age. 74 * 75 * Note, we are holding 'c->tnc_mutex', so we do not have to lock the 76 * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is 77 * changed only when the 'c->tnc_mutex' is held. 78 */ 79 zprev = NULL; 80 znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); 81 while (znode && total_freed < nr && 82 atomic_long_read(&c->clean_zn_cnt) > 0) { 83 int freed; 84 85 /* 86 * If the znode is clean, but it is in the 'c->cnext' list, this 87 * means that this znode has just been written to flash as a 88 * part of commit and was marked clean. They will be removed 89 * from the list at end commit. We cannot change the list, 90 * because it is not protected by any mutex (design decision to 91 * make commit really independent and parallel to main I/O). So 92 * we just skip these znodes. 93 * 94 * Note, the 'clean_zn_cnt' counters are not updated until 95 * after the commit, so the UBIFS shrinker does not report 96 * the znodes which are in the 'c->cnext' list as freeable. 97 * 98 * Also note, if the root of a sub-tree is not in 'c->cnext', 99 * then the whole sub-tree is not in 'c->cnext' as well, so it 100 * is safe to dump whole sub-tree. 101 */ 102 103 if (znode->cnext) { 104 /* 105 * Very soon these znodes will be removed from the list 106 * and become freeable. 107 */ 108 *contention = 1; 109 } else if (!ubifs_zn_dirty(znode) && 110 abs(time - znode->time) >= age) { 111 if (znode->parent) 112 znode->parent->zbranch[znode->iip].znode = NULL; 113 else 114 c->zroot.znode = NULL; 115 116 freed = ubifs_destroy_tnc_subtree(c, znode); 117 atomic_long_sub(freed, &ubifs_clean_zn_cnt); 118 atomic_long_sub(freed, &c->clean_zn_cnt); 119 total_freed += freed; 120 znode = zprev; 121 } 122 123 if (unlikely(!c->zroot.znode)) 124 break; 125 126 zprev = znode; 127 znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); 128 cond_resched(); 129 } 130 131 return total_freed; 132} 133 134/** 135 * shrink_tnc_trees - shrink UBIFS TNC trees. 136 * @nr: number of znodes to free 137 * @age: the age of znodes to free 138 * @contention: if any contention, this is set to %1 139 * 140 * This function walks the list of mounted UBIFS file-systems and frees clean 141 * znodes which are older than @age, until at least @nr znodes are freed. 142 * Returns the number of freed znodes. 143 */ 144static int shrink_tnc_trees(int nr, int age, int *contention) 145{ 146 struct ubifs_info *c; 147 struct list_head *p; 148 unsigned int run_no; 149 int freed = 0; 150 151 spin_lock(&ubifs_infos_lock); 152 do { 153 run_no = ++shrinker_run_no; 154 } while (run_no == 0); 155 /* Iterate over all mounted UBIFS file-systems and try to shrink them */ 156 p = ubifs_infos.next; 157 while (p != &ubifs_infos) { 158 c = list_entry(p, struct ubifs_info, infos_list); 159 /* 160 * We move the ones we do to the end of the list, so we stop 161 * when we see one we have already done. 162 */ 163 if (c->shrinker_run_no == run_no) 164 break; 165 if (!mutex_trylock(&c->umount_mutex)) { 166 /* Some un-mount is in progress, try next FS */ 167 *contention = 1; 168 p = p->next; 169 continue; 170 } 171 /* 172 * We're holding 'c->umount_mutex', so the file-system won't go 173 * away. 174 */ 175 if (!mutex_trylock(&c->tnc_mutex)) { 176 mutex_unlock(&c->umount_mutex); 177 *contention = 1; 178 p = p->next; 179 continue; 180 } 181 spin_unlock(&ubifs_infos_lock); 182 /* 183 * OK, now we have TNC locked, the file-system cannot go away - 184 * it is safe to reap the cache. 185 */ 186 c->shrinker_run_no = run_no; 187 freed += shrink_tnc(c, nr, age, contention); 188 mutex_unlock(&c->tnc_mutex); 189 spin_lock(&ubifs_infos_lock); 190 /* Get the next list element before we move this one */ 191 p = p->next; 192 /* 193 * Move this one to the end of the list to provide some 194 * fairness. 195 */ 196 list_move_tail(&c->infos_list, &ubifs_infos); 197 mutex_unlock(&c->umount_mutex); 198 if (freed >= nr) 199 break; 200 } 201 spin_unlock(&ubifs_infos_lock); 202 return freed; 203} 204 205/** 206 * kick_a_thread - kick a background thread to start commit. 207 * 208 * This function kicks a background thread to start background commit. Returns 209 * %-1 if a thread was kicked or there is another reason to assume the memory 210 * will soon be freed or become freeable. If there are no dirty znodes, returns 211 * %0. 212 */ 213static int kick_a_thread(void) 214{ 215 int i; 216 struct ubifs_info *c; 217 218 /* 219 * Iterate over all mounted UBIFS file-systems and find out if there is 220 * already an ongoing commit operation there. If no, then iterate for 221 * the second time and initiate background commit. 222 */ 223 spin_lock(&ubifs_infos_lock); 224 for (i = 0; i < 2; i++) { 225 list_for_each_entry(c, &ubifs_infos, infos_list) { 226 long dirty_zn_cnt; 227 228 if (!mutex_trylock(&c->umount_mutex)) { 229 /* 230 * Some un-mount is in progress, it will 231 * certainly free memory, so just return. 232 */ 233 spin_unlock(&ubifs_infos_lock); 234 return -1; 235 } 236 237 dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); 238 239 if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || 240 c->ro_mount || c->ro_error) { 241 mutex_unlock(&c->umount_mutex); 242 continue; 243 } 244 245 if (c->cmt_state != COMMIT_RESTING) { 246 spin_unlock(&ubifs_infos_lock); 247 mutex_unlock(&c->umount_mutex); 248 return -1; 249 } 250 251 if (i == 1) { 252 list_move_tail(&c->infos_list, &ubifs_infos); 253 spin_unlock(&ubifs_infos_lock); 254 255 ubifs_request_bg_commit(c); 256 mutex_unlock(&c->umount_mutex); 257 return -1; 258 } 259 mutex_unlock(&c->umount_mutex); 260 } 261 } 262 spin_unlock(&ubifs_infos_lock); 263 264 return 0; 265} 266 267unsigned long ubifs_shrink_count(struct shrinker *shrink, 268 struct shrink_control *sc) 269{ 270 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); 271 272 /* 273 * Due to the way UBIFS updates the clean znode counter it may 274 * temporarily be negative. 275 */ 276 return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; 277} 278 279unsigned long ubifs_shrink_scan(struct shrinker *shrink, 280 struct shrink_control *sc) 281{ 282 unsigned long nr = sc->nr_to_scan; 283 int contention = 0; 284 unsigned long freed; 285 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); 286 287 if (!clean_zn_cnt) { 288 /* 289 * No clean znodes, nothing to reap. All we can do in this case 290 * is to kick background threads to start commit, which will 291 * probably make clean znodes which, in turn, will be freeable. 292 * And we return -1 which means will make VM call us again 293 * later. 294 */ 295 dbg_tnc("no clean znodes, kick a thread"); 296 return kick_a_thread(); 297 } 298 299 freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention); 300 if (freed >= nr) 301 goto out; 302 303 dbg_tnc("not enough old znodes, try to free young ones"); 304 freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention); 305 if (freed >= nr) 306 goto out; 307 308 dbg_tnc("not enough young znodes, free all"); 309 freed += shrink_tnc_trees(nr - freed, 0, &contention); 310 311 if (!freed && contention) { 312 dbg_tnc("freed nothing, but contention"); 313 return SHRINK_STOP; 314 } 315 316out: 317 dbg_tnc("%lu znodes were freed, requested %lu", freed, nr); 318 return freed; 319}