lru_cache.c (19267B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 lru_cache.c 4 5 This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 6 7 Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. 8 Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>. 9 Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 10 11 12 */ 13 14#include <linux/module.h> 15#include <linux/bitops.h> 16#include <linux/slab.h> 17#include <linux/string.h> /* for memset */ 18#include <linux/seq_file.h> /* for seq_printf */ 19#include <linux/lru_cache.h> 20 21MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " 22 "Lars Ellenberg <lars@linbit.com>"); 23MODULE_DESCRIPTION("lru_cache - Track sets of hot objects"); 24MODULE_LICENSE("GPL"); 25 26/* this is developers aid only. 27 * it catches concurrent access (lack of locking on the users part) */ 28#define PARANOIA_ENTRY() do { \ 29 BUG_ON(!lc); \ 30 BUG_ON(!lc->nr_elements); \ 31 BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \ 32} while (0) 33 34#define RETURN(x...) do { \ 35 clear_bit_unlock(__LC_PARANOIA, &lc->flags); \ 36 return x ; } while (0) 37 38/* BUG() if e is not one of the elements tracked by lc */ 39#define PARANOIA_LC_ELEMENT(lc, e) do { \ 40 struct lru_cache *lc_ = (lc); \ 41 struct lc_element *e_ = (e); \ 42 unsigned i = e_->lc_index; \ 43 BUG_ON(i >= lc_->nr_elements); \ 44 BUG_ON(lc_->lc_element[i] != e_); } while (0) 45 46 47/* We need to atomically 48 * - try to grab the lock (set LC_LOCKED) 49 * - only if there is no pending transaction 50 * (neither LC_DIRTY nor LC_STARVING is set) 51 * Because of PARANOIA_ENTRY() above abusing lc->flags as well, 52 * it is not sufficient to just say 53 * return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED); 54 */ 55int lc_try_lock(struct lru_cache *lc) 56{ 57 unsigned long val; 58 do { 59 val = cmpxchg(&lc->flags, 0, LC_LOCKED); 60 } while (unlikely (val == LC_PARANOIA)); 61 /* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */ 62 return 0 == val; 63#if 0 64 /* Alternative approach, spin in case someone enters or leaves a 65 * PARANOIA_ENTRY()/RETURN() section. */ 66 unsigned long old, new, val; 67 do { 68 old = lc->flags & LC_PARANOIA; 69 new = old | LC_LOCKED; 70 val = cmpxchg(&lc->flags, old, new); 71 } while (unlikely (val == (old ^ LC_PARANOIA))); 72 return old == val; 73#endif 74} 75 76/** 77 * lc_create - prepares to track objects in an active set 78 * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details 79 * @cache: cache root pointer 80 * @max_pending_changes: maximum changes to accumulate until a transaction is required 81 * @e_count: number of elements allowed to be active simultaneously 82 * @e_size: size of the tracked objects 83 * @e_off: offset to the &struct lc_element member in a tracked object 84 * 85 * Returns a pointer to a newly initialized struct lru_cache on success, 86 * or NULL on (allocation) failure. 87 */ 88struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, 89 unsigned max_pending_changes, 90 unsigned e_count, size_t e_size, size_t e_off) 91{ 92 struct hlist_head *slot = NULL; 93 struct lc_element **element = NULL; 94 struct lru_cache *lc; 95 struct lc_element *e; 96 unsigned cache_obj_size = kmem_cache_size(cache); 97 unsigned i; 98 99 WARN_ON(cache_obj_size < e_size); 100 if (cache_obj_size < e_size) 101 return NULL; 102 103 /* e_count too big; would probably fail the allocation below anyways. 104 * for typical use cases, e_count should be few thousand at most. */ 105 if (e_count > LC_MAX_ACTIVE) 106 return NULL; 107 108 slot = kcalloc(e_count, sizeof(struct hlist_head), GFP_KERNEL); 109 if (!slot) 110 goto out_fail; 111 element = kcalloc(e_count, sizeof(struct lc_element *), GFP_KERNEL); 112 if (!element) 113 goto out_fail; 114 115 lc = kzalloc(sizeof(*lc), GFP_KERNEL); 116 if (!lc) 117 goto out_fail; 118 119 INIT_LIST_HEAD(&lc->in_use); 120 INIT_LIST_HEAD(&lc->lru); 121 INIT_LIST_HEAD(&lc->free); 122 INIT_LIST_HEAD(&lc->to_be_changed); 123 124 lc->name = name; 125 lc->element_size = e_size; 126 lc->element_off = e_off; 127 lc->nr_elements = e_count; 128 lc->max_pending_changes = max_pending_changes; 129 lc->lc_cache = cache; 130 lc->lc_element = element; 131 lc->lc_slot = slot; 132 133 /* preallocate all objects */ 134 for (i = 0; i < e_count; i++) { 135 void *p = kmem_cache_alloc(cache, GFP_KERNEL); 136 if (!p) 137 break; 138 memset(p, 0, lc->element_size); 139 e = p + e_off; 140 e->lc_index = i; 141 e->lc_number = LC_FREE; 142 e->lc_new_number = LC_FREE; 143 list_add(&e->list, &lc->free); 144 element[i] = e; 145 } 146 if (i == e_count) 147 return lc; 148 149 /* else: could not allocate all elements, give up */ 150 for (i--; i; i--) { 151 void *p = element[i]; 152 kmem_cache_free(cache, p - e_off); 153 } 154 kfree(lc); 155out_fail: 156 kfree(element); 157 kfree(slot); 158 return NULL; 159} 160 161static void lc_free_by_index(struct lru_cache *lc, unsigned i) 162{ 163 void *p = lc->lc_element[i]; 164 WARN_ON(!p); 165 if (p) { 166 p -= lc->element_off; 167 kmem_cache_free(lc->lc_cache, p); 168 } 169} 170 171/** 172 * lc_destroy - frees memory allocated by lc_create() 173 * @lc: the lru cache to destroy 174 */ 175void lc_destroy(struct lru_cache *lc) 176{ 177 unsigned i; 178 if (!lc) 179 return; 180 for (i = 0; i < lc->nr_elements; i++) 181 lc_free_by_index(lc, i); 182 kfree(lc->lc_element); 183 kfree(lc->lc_slot); 184 kfree(lc); 185} 186 187/** 188 * lc_reset - does a full reset for @lc and the hash table slots. 189 * @lc: the lru cache to operate on 190 * 191 * It is roughly the equivalent of re-allocating a fresh lru_cache object, 192 * basically a short cut to lc_destroy(lc); lc = lc_create(...); 193 */ 194void lc_reset(struct lru_cache *lc) 195{ 196 unsigned i; 197 198 INIT_LIST_HEAD(&lc->in_use); 199 INIT_LIST_HEAD(&lc->lru); 200 INIT_LIST_HEAD(&lc->free); 201 INIT_LIST_HEAD(&lc->to_be_changed); 202 lc->used = 0; 203 lc->hits = 0; 204 lc->misses = 0; 205 lc->starving = 0; 206 lc->locked = 0; 207 lc->changed = 0; 208 lc->pending_changes = 0; 209 lc->flags = 0; 210 memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); 211 212 for (i = 0; i < lc->nr_elements; i++) { 213 struct lc_element *e = lc->lc_element[i]; 214 void *p = e; 215 p -= lc->element_off; 216 memset(p, 0, lc->element_size); 217 /* re-init it */ 218 e->lc_index = i; 219 e->lc_number = LC_FREE; 220 e->lc_new_number = LC_FREE; 221 list_add(&e->list, &lc->free); 222 } 223} 224 225/** 226 * lc_seq_printf_stats - print stats about @lc into @seq 227 * @seq: the seq_file to print into 228 * @lc: the lru cache to print statistics of 229 */ 230void lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) 231{ 232 /* NOTE: 233 * total calls to lc_get are 234 * (starving + hits + misses) 235 * misses include "locked" count (update from an other thread in 236 * progress) and "changed", when this in fact lead to an successful 237 * update of the cache. 238 */ 239 seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", 240 lc->name, lc->used, lc->nr_elements, 241 lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); 242} 243 244static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) 245{ 246 return lc->lc_slot + (enr % lc->nr_elements); 247} 248 249 250static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr, 251 bool include_changing) 252{ 253 struct lc_element *e; 254 255 BUG_ON(!lc); 256 BUG_ON(!lc->nr_elements); 257 hlist_for_each_entry(e, lc_hash_slot(lc, enr), colision) { 258 /* "about to be changed" elements, pending transaction commit, 259 * are hashed by their "new number". "Normal" elements have 260 * lc_number == lc_new_number. */ 261 if (e->lc_new_number != enr) 262 continue; 263 if (e->lc_new_number == e->lc_number || include_changing) 264 return e; 265 break; 266 } 267 return NULL; 268} 269 270/** 271 * lc_find - find element by label, if present in the hash table 272 * @lc: The lru_cache object 273 * @enr: element number 274 * 275 * Returns the pointer to an element, if the element with the requested 276 * "label" or element number is present in the hash table, 277 * or NULL if not found. Does not change the refcnt. 278 * Ignores elements that are "about to be used", i.e. not yet in the active 279 * set, but still pending transaction commit. 280 */ 281struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) 282{ 283 return __lc_find(lc, enr, 0); 284} 285 286/** 287 * lc_is_used - find element by label 288 * @lc: The lru_cache object 289 * @enr: element number 290 * 291 * Returns true, if the element with the requested "label" or element number is 292 * present in the hash table, and is used (refcnt > 0). 293 * Also finds elements that are not _currently_ used but only "about to be 294 * used", i.e. on the "to_be_changed" list, pending transaction commit. 295 */ 296bool lc_is_used(struct lru_cache *lc, unsigned int enr) 297{ 298 struct lc_element *e = __lc_find(lc, enr, 1); 299 return e && e->refcnt; 300} 301 302/** 303 * lc_del - removes an element from the cache 304 * @lc: The lru_cache object 305 * @e: The element to remove 306 * 307 * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list, 308 * sets @e->enr to %LC_FREE. 309 */ 310void lc_del(struct lru_cache *lc, struct lc_element *e) 311{ 312 PARANOIA_ENTRY(); 313 PARANOIA_LC_ELEMENT(lc, e); 314 BUG_ON(e->refcnt); 315 316 e->lc_number = e->lc_new_number = LC_FREE; 317 hlist_del_init(&e->colision); 318 list_move(&e->list, &lc->free); 319 RETURN(); 320} 321 322static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number) 323{ 324 struct list_head *n; 325 struct lc_element *e; 326 327 if (!list_empty(&lc->free)) 328 n = lc->free.next; 329 else if (!list_empty(&lc->lru)) 330 n = lc->lru.prev; 331 else 332 return NULL; 333 334 e = list_entry(n, struct lc_element, list); 335 PARANOIA_LC_ELEMENT(lc, e); 336 337 e->lc_new_number = new_number; 338 if (!hlist_unhashed(&e->colision)) 339 __hlist_del(&e->colision); 340 hlist_add_head(&e->colision, lc_hash_slot(lc, new_number)); 341 list_move(&e->list, &lc->to_be_changed); 342 343 return e; 344} 345 346static int lc_unused_element_available(struct lru_cache *lc) 347{ 348 if (!list_empty(&lc->free)) 349 return 1; /* something on the free list */ 350 if (!list_empty(&lc->lru)) 351 return 1; /* something to evict */ 352 353 return 0; 354} 355 356/* used as internal flags to __lc_get */ 357enum { 358 LC_GET_MAY_CHANGE = 1, 359 LC_GET_MAY_USE_UNCOMMITTED = 2, 360}; 361 362static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, unsigned int flags) 363{ 364 struct lc_element *e; 365 366 PARANOIA_ENTRY(); 367 if (lc->flags & LC_STARVING) { 368 ++lc->starving; 369 RETURN(NULL); 370 } 371 372 e = __lc_find(lc, enr, 1); 373 /* if lc_new_number != lc_number, 374 * this enr is currently being pulled in already, 375 * and will be available once the pending transaction 376 * has been committed. */ 377 if (e) { 378 if (e->lc_new_number != e->lc_number) { 379 /* It has been found above, but on the "to_be_changed" 380 * list, not yet committed. Don't pull it in twice, 381 * wait for the transaction, then try again... 382 */ 383 if (!(flags & LC_GET_MAY_USE_UNCOMMITTED)) 384 RETURN(NULL); 385 /* ... unless the caller is aware of the implications, 386 * probably preparing a cumulative transaction. */ 387 ++e->refcnt; 388 ++lc->hits; 389 RETURN(e); 390 } 391 /* else: lc_new_number == lc_number; a real hit. */ 392 ++lc->hits; 393 if (e->refcnt++ == 0) 394 lc->used++; 395 list_move(&e->list, &lc->in_use); /* Not evictable... */ 396 RETURN(e); 397 } 398 /* e == NULL */ 399 400 ++lc->misses; 401 if (!(flags & LC_GET_MAY_CHANGE)) 402 RETURN(NULL); 403 404 /* To avoid races with lc_try_lock(), first, mark us dirty 405 * (using test_and_set_bit, as it implies memory barriers), ... */ 406 test_and_set_bit(__LC_DIRTY, &lc->flags); 407 408 /* ... only then check if it is locked anyways. If lc_unlock clears 409 * the dirty bit again, that's not a problem, we will come here again. 410 */ 411 if (test_bit(__LC_LOCKED, &lc->flags)) { 412 ++lc->locked; 413 RETURN(NULL); 414 } 415 416 /* In case there is nothing available and we can not kick out 417 * the LRU element, we have to wait ... 418 */ 419 if (!lc_unused_element_available(lc)) { 420 __set_bit(__LC_STARVING, &lc->flags); 421 RETURN(NULL); 422 } 423 424 /* It was not present in the active set. We are going to recycle an 425 * unused (or even "free") element, but we won't accumulate more than 426 * max_pending_changes changes. */ 427 if (lc->pending_changes >= lc->max_pending_changes) 428 RETURN(NULL); 429 430 e = lc_prepare_for_change(lc, enr); 431 BUG_ON(!e); 432 433 clear_bit(__LC_STARVING, &lc->flags); 434 BUG_ON(++e->refcnt != 1); 435 lc->used++; 436 lc->pending_changes++; 437 438 RETURN(e); 439} 440 441/** 442 * lc_get - get element by label, maybe change the active set 443 * @lc: the lru cache to operate on 444 * @enr: the label to look up 445 * 446 * Finds an element in the cache, increases its usage count, 447 * "touches" and returns it. 448 * 449 * In case the requested number is not present, it needs to be added to the 450 * cache. Therefore it is possible that an other element becomes evicted from 451 * the cache. In either case, the user is notified so he is able to e.g. keep 452 * a persistent log of the cache changes, and therefore the objects in use. 453 * 454 * Return values: 455 * NULL 456 * The cache was marked %LC_STARVING, 457 * or the requested label was not in the active set 458 * and a changing transaction is still pending (@lc was marked %LC_DIRTY). 459 * Or no unused or free element could be recycled (@lc will be marked as 460 * %LC_STARVING, blocking further lc_get() operations). 461 * 462 * pointer to the element with the REQUESTED element number. 463 * In this case, it can be used right away 464 * 465 * pointer to an UNUSED element with some different element number, 466 * where that different number may also be %LC_FREE. 467 * 468 * In this case, the cache is marked %LC_DIRTY, 469 * so lc_try_lock() will no longer succeed. 470 * The returned element pointer is moved to the "to_be_changed" list, 471 * and registered with the new element number on the hash collision chains, 472 * so it is possible to pick it up from lc_is_used(). 473 * Up to "max_pending_changes" (see lc_create()) can be accumulated. 474 * The user now should do whatever housekeeping is necessary, 475 * typically serialize on lc_try_lock_for_transaction(), then call 476 * lc_committed(lc) and lc_unlock(), to finish the change. 477 * 478 * NOTE: The user needs to check the lc_number on EACH use, so he recognizes 479 * any cache set change. 480 */ 481struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) 482{ 483 return __lc_get(lc, enr, LC_GET_MAY_CHANGE); 484} 485 486/** 487 * lc_get_cumulative - like lc_get; also finds to-be-changed elements 488 * @lc: the lru cache to operate on 489 * @enr: the label to look up 490 * 491 * Unlike lc_get this also returns the element for @enr, if it is belonging to 492 * a pending transaction, so the return values are like for lc_get(), 493 * plus: 494 * 495 * pointer to an element already on the "to_be_changed" list. 496 * In this case, the cache was already marked %LC_DIRTY. 497 * 498 * Caller needs to make sure that the pending transaction is completed, 499 * before proceeding to actually use this element. 500 */ 501struct lc_element *lc_get_cumulative(struct lru_cache *lc, unsigned int enr) 502{ 503 return __lc_get(lc, enr, LC_GET_MAY_CHANGE|LC_GET_MAY_USE_UNCOMMITTED); 504} 505 506/** 507 * lc_try_get - get element by label, if present; do not change the active set 508 * @lc: the lru cache to operate on 509 * @enr: the label to look up 510 * 511 * Finds an element in the cache, increases its usage count, 512 * "touches" and returns it. 513 * 514 * Return values: 515 * NULL 516 * The cache was marked %LC_STARVING, 517 * or the requested label was not in the active set 518 * 519 * pointer to the element with the REQUESTED element number. 520 * In this case, it can be used right away 521 */ 522struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) 523{ 524 return __lc_get(lc, enr, 0); 525} 526 527/** 528 * lc_committed - tell @lc that pending changes have been recorded 529 * @lc: the lru cache to operate on 530 * 531 * User is expected to serialize on explicit lc_try_lock_for_transaction() 532 * before the transaction is started, and later needs to lc_unlock() explicitly 533 * as well. 534 */ 535void lc_committed(struct lru_cache *lc) 536{ 537 struct lc_element *e, *tmp; 538 539 PARANOIA_ENTRY(); 540 list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) { 541 /* count number of changes, not number of transactions */ 542 ++lc->changed; 543 e->lc_number = e->lc_new_number; 544 list_move(&e->list, &lc->in_use); 545 } 546 lc->pending_changes = 0; 547 RETURN(); 548} 549 550 551/** 552 * lc_put - give up refcnt of @e 553 * @lc: the lru cache to operate on 554 * @e: the element to put 555 * 556 * If refcnt reaches zero, the element is moved to the lru list, 557 * and a %LC_STARVING (if set) is cleared. 558 * Returns the new (post-decrement) refcnt. 559 */ 560unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) 561{ 562 PARANOIA_ENTRY(); 563 PARANOIA_LC_ELEMENT(lc, e); 564 BUG_ON(e->refcnt == 0); 565 BUG_ON(e->lc_number != e->lc_new_number); 566 if (--e->refcnt == 0) { 567 /* move it to the front of LRU. */ 568 list_move(&e->list, &lc->lru); 569 lc->used--; 570 clear_bit_unlock(__LC_STARVING, &lc->flags); 571 } 572 RETURN(e->refcnt); 573} 574 575/** 576 * lc_element_by_index 577 * @lc: the lru cache to operate on 578 * @i: the index of the element to return 579 */ 580struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i) 581{ 582 BUG_ON(i >= lc->nr_elements); 583 BUG_ON(lc->lc_element[i] == NULL); 584 BUG_ON(lc->lc_element[i]->lc_index != i); 585 return lc->lc_element[i]; 586} 587 588/** 589 * lc_index_of 590 * @lc: the lru cache to operate on 591 * @e: the element to query for its index position in lc->element 592 */ 593unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) 594{ 595 PARANOIA_LC_ELEMENT(lc, e); 596 return e->lc_index; 597} 598 599/** 600 * lc_set - associate index with label 601 * @lc: the lru cache to operate on 602 * @enr: the label to set 603 * @index: the element index to associate label with. 604 * 605 * Used to initialize the active set to some previously recorded state. 606 */ 607void lc_set(struct lru_cache *lc, unsigned int enr, int index) 608{ 609 struct lc_element *e; 610 struct list_head *lh; 611 612 if (index < 0 || index >= lc->nr_elements) 613 return; 614 615 e = lc_element_by_index(lc, index); 616 BUG_ON(e->lc_number != e->lc_new_number); 617 BUG_ON(e->refcnt != 0); 618 619 e->lc_number = e->lc_new_number = enr; 620 hlist_del_init(&e->colision); 621 if (enr == LC_FREE) 622 lh = &lc->free; 623 else { 624 hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); 625 lh = &lc->lru; 626 } 627 list_move(&e->list, lh); 628} 629 630/** 631 * lc_seq_dump_details - Dump a complete LRU cache to seq in textual form. 632 * @lc: the lru cache to operate on 633 * @seq: the &struct seq_file pointer to seq_printf into 634 * @utext: user supplied additional "heading" or other info 635 * @detail: function pointer the user may provide to dump further details 636 * of the object the lc_element is embedded in. May be NULL. 637 * Note: a leading space ' ' and trailing newline '\n' is implied. 638 */ 639void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, 640 void (*detail) (struct seq_file *, struct lc_element *)) 641{ 642 unsigned int nr_elements = lc->nr_elements; 643 struct lc_element *e; 644 int i; 645 646 seq_printf(seq, "\tnn: lc_number (new nr) refcnt %s\n ", utext); 647 for (i = 0; i < nr_elements; i++) { 648 e = lc_element_by_index(lc, i); 649 if (e->lc_number != e->lc_new_number) 650 seq_printf(seq, "\t%5d: %6d %8d %6d ", 651 i, e->lc_number, e->lc_new_number, e->refcnt); 652 else 653 seq_printf(seq, "\t%5d: %6d %-8s %6d ", 654 i, e->lc_number, "-\"-", e->refcnt); 655 if (detail) 656 detail(seq, e); 657 seq_putc(seq, '\n'); 658 } 659} 660 661EXPORT_SYMBOL(lc_create); 662EXPORT_SYMBOL(lc_reset); 663EXPORT_SYMBOL(lc_destroy); 664EXPORT_SYMBOL(lc_set); 665EXPORT_SYMBOL(lc_del); 666EXPORT_SYMBOL(lc_try_get); 667EXPORT_SYMBOL(lc_find); 668EXPORT_SYMBOL(lc_get); 669EXPORT_SYMBOL(lc_put); 670EXPORT_SYMBOL(lc_committed); 671EXPORT_SYMBOL(lc_element_by_index); 672EXPORT_SYMBOL(lc_index_of); 673EXPORT_SYMBOL(lc_seq_printf_stats); 674EXPORT_SYMBOL(lc_seq_dump_details); 675EXPORT_SYMBOL(lc_try_lock); 676EXPORT_SYMBOL(lc_is_used); 677EXPORT_SYMBOL(lc_get_cumulative);