zswap.c (40892B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * zswap.c - zswap driver file 4 * 5 * zswap is a backend for frontswap that takes pages that are in the process 6 * of being swapped out and attempts to compress and store them in a 7 * RAM-based memory pool. This can result in a significant I/O reduction on 8 * the swap device and, in the case where decompressing from RAM is faster 9 * than reading from the swap device, can also improve workload performance. 10 * 11 * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> 12*/ 13 14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 16#include <linux/module.h> 17#include <linux/cpu.h> 18#include <linux/highmem.h> 19#include <linux/slab.h> 20#include <linux/spinlock.h> 21#include <linux/types.h> 22#include <linux/atomic.h> 23#include <linux/frontswap.h> 24#include <linux/rbtree.h> 25#include <linux/swap.h> 26#include <linux/crypto.h> 27#include <linux/scatterlist.h> 28#include <linux/mempool.h> 29#include <linux/zpool.h> 30#include <crypto/acompress.h> 31 32#include <linux/mm_types.h> 33#include <linux/page-flags.h> 34#include <linux/swapops.h> 35#include <linux/writeback.h> 36#include <linux/pagemap.h> 37#include <linux/workqueue.h> 38 39#include "swap.h" 40 41/********************************* 42* statistics 43**********************************/ 44/* Total bytes used by the compressed storage */ 45u64 zswap_pool_total_size; 46/* The number of compressed pages currently stored in zswap */ 47atomic_t zswap_stored_pages = ATOMIC_INIT(0); 48/* The number of same-value filled pages currently stored in zswap */ 49static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0); 50 51/* 52 * The statistics below are not protected from concurrent access for 53 * performance reasons so they may not be a 100% accurate. However, 54 * they do provide useful information on roughly how many times a 55 * certain event is occurring. 56*/ 57 58/* Pool limit was hit (see zswap_max_pool_percent) */ 59static u64 zswap_pool_limit_hit; 60/* Pages written back when pool limit was reached */ 61static u64 zswap_written_back_pages; 62/* Store failed due to a reclaim failure after pool limit was reached */ 63static u64 zswap_reject_reclaim_fail; 64/* Compressed page was too big for the allocator to (optimally) store */ 65static u64 zswap_reject_compress_poor; 66/* Store failed because underlying allocator could not get memory */ 67static u64 zswap_reject_alloc_fail; 68/* Store failed because the entry metadata could not be allocated (rare) */ 69static u64 zswap_reject_kmemcache_fail; 70/* Duplicate store was encountered (rare) */ 71static u64 zswap_duplicate_entry; 72 73/* Shrinker work queue */ 74static struct workqueue_struct *shrink_wq; 75/* Pool limit was hit, we need to calm down */ 76static bool zswap_pool_reached_full; 77 78/********************************* 79* tunables 80**********************************/ 81 82#define ZSWAP_PARAM_UNSET "" 83 84/* Enable/disable zswap */ 85static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON); 86static int zswap_enabled_param_set(const char *, 87 const struct kernel_param *); 88static const struct kernel_param_ops zswap_enabled_param_ops = { 89 .set = zswap_enabled_param_set, 90 .get = param_get_bool, 91}; 92module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); 93 94/* Crypto compressor to use */ 95static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 96static int zswap_compressor_param_set(const char *, 97 const struct kernel_param *); 98static const struct kernel_param_ops zswap_compressor_param_ops = { 99 .set = zswap_compressor_param_set, 100 .get = param_get_charp, 101 .free = param_free_charp, 102}; 103module_param_cb(compressor, &zswap_compressor_param_ops, 104 &zswap_compressor, 0644); 105 106/* Compressed storage zpool to use */ 107static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 108static int zswap_zpool_param_set(const char *, const struct kernel_param *); 109static const struct kernel_param_ops zswap_zpool_param_ops = { 110 .set = zswap_zpool_param_set, 111 .get = param_get_charp, 112 .free = param_free_charp, 113}; 114module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644); 115 116/* The maximum percentage of memory that the compressed pool can occupy */ 117static unsigned int zswap_max_pool_percent = 20; 118module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644); 119 120/* The threshold for accepting new pages after the max_pool_percent was hit */ 121static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ 122module_param_named(accept_threshold_percent, zswap_accept_thr_percent, 123 uint, 0644); 124 125/* 126 * Enable/disable handling same-value filled pages (enabled by default). 127 * If disabled every page is considered non-same-value filled. 128 */ 129static bool zswap_same_filled_pages_enabled = true; 130module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled, 131 bool, 0644); 132 133/* Enable/disable handling non-same-value filled pages (enabled by default) */ 134static bool zswap_non_same_filled_pages_enabled = true; 135module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled, 136 bool, 0644); 137 138/********************************* 139* data structures 140**********************************/ 141 142struct crypto_acomp_ctx { 143 struct crypto_acomp *acomp; 144 struct acomp_req *req; 145 struct crypto_wait wait; 146 u8 *dstmem; 147 struct mutex *mutex; 148}; 149 150struct zswap_pool { 151 struct zpool *zpool; 152 struct crypto_acomp_ctx __percpu *acomp_ctx; 153 struct kref kref; 154 struct list_head list; 155 struct work_struct release_work; 156 struct work_struct shrink_work; 157 struct hlist_node node; 158 char tfm_name[CRYPTO_MAX_ALG_NAME]; 159}; 160 161/* 162 * struct zswap_entry 163 * 164 * This structure contains the metadata for tracking a single compressed 165 * page within zswap. 166 * 167 * rbnode - links the entry into red-black tree for the appropriate swap type 168 * offset - the swap offset for the entry. Index into the red-black tree. 169 * refcount - the number of outstanding reference to the entry. This is needed 170 * to protect against premature freeing of the entry by code 171 * concurrent calls to load, invalidate, and writeback. The lock 172 * for the zswap_tree structure that contains the entry must 173 * be held while changing the refcount. Since the lock must 174 * be held, there is no reason to also make refcount atomic. 175 * length - the length in bytes of the compressed page data. Needed during 176 * decompression. For a same value filled page length is 0. 177 * pool - the zswap_pool the entry's data is in 178 * handle - zpool allocation handle that stores the compressed page data 179 * value - value of the same-value filled pages which have same content 180 */ 181struct zswap_entry { 182 struct rb_node rbnode; 183 pgoff_t offset; 184 int refcount; 185 unsigned int length; 186 struct zswap_pool *pool; 187 union { 188 unsigned long handle; 189 unsigned long value; 190 }; 191 struct obj_cgroup *objcg; 192}; 193 194struct zswap_header { 195 swp_entry_t swpentry; 196}; 197 198/* 199 * The tree lock in the zswap_tree struct protects a few things: 200 * - the rbtree 201 * - the refcount field of each entry in the tree 202 */ 203struct zswap_tree { 204 struct rb_root rbroot; 205 spinlock_t lock; 206}; 207 208static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; 209 210/* RCU-protected iteration */ 211static LIST_HEAD(zswap_pools); 212/* protects zswap_pools list modification */ 213static DEFINE_SPINLOCK(zswap_pools_lock); 214/* pool counter to provide unique names to zpool */ 215static atomic_t zswap_pools_count = ATOMIC_INIT(0); 216 217/* used by param callback function */ 218static bool zswap_init_started; 219 220/* fatal error during init */ 221static bool zswap_init_failed; 222 223/* init completed, but couldn't create the initial pool */ 224static bool zswap_has_pool; 225 226/********************************* 227* helpers and fwd declarations 228**********************************/ 229 230#define zswap_pool_debug(msg, p) \ 231 pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ 232 zpool_get_type((p)->zpool)) 233 234static int zswap_writeback_entry(struct zpool *pool, unsigned long handle); 235static int zswap_pool_get(struct zswap_pool *pool); 236static void zswap_pool_put(struct zswap_pool *pool); 237 238static const struct zpool_ops zswap_zpool_ops = { 239 .evict = zswap_writeback_entry 240}; 241 242static bool zswap_is_full(void) 243{ 244 return totalram_pages() * zswap_max_pool_percent / 100 < 245 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 246} 247 248static bool zswap_can_accept(void) 249{ 250 return totalram_pages() * zswap_accept_thr_percent / 100 * 251 zswap_max_pool_percent / 100 > 252 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 253} 254 255static void zswap_update_total_size(void) 256{ 257 struct zswap_pool *pool; 258 u64 total = 0; 259 260 rcu_read_lock(); 261 262 list_for_each_entry_rcu(pool, &zswap_pools, list) 263 total += zpool_get_total_size(pool->zpool); 264 265 rcu_read_unlock(); 266 267 zswap_pool_total_size = total; 268} 269 270/********************************* 271* zswap entry functions 272**********************************/ 273static struct kmem_cache *zswap_entry_cache; 274 275static int __init zswap_entry_cache_create(void) 276{ 277 zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); 278 return zswap_entry_cache == NULL; 279} 280 281static void __init zswap_entry_cache_destroy(void) 282{ 283 kmem_cache_destroy(zswap_entry_cache); 284} 285 286static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp) 287{ 288 struct zswap_entry *entry; 289 entry = kmem_cache_alloc(zswap_entry_cache, gfp); 290 if (!entry) 291 return NULL; 292 entry->refcount = 1; 293 RB_CLEAR_NODE(&entry->rbnode); 294 return entry; 295} 296 297static void zswap_entry_cache_free(struct zswap_entry *entry) 298{ 299 kmem_cache_free(zswap_entry_cache, entry); 300} 301 302/********************************* 303* rbtree functions 304**********************************/ 305static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset) 306{ 307 struct rb_node *node = root->rb_node; 308 struct zswap_entry *entry; 309 310 while (node) { 311 entry = rb_entry(node, struct zswap_entry, rbnode); 312 if (entry->offset > offset) 313 node = node->rb_left; 314 else if (entry->offset < offset) 315 node = node->rb_right; 316 else 317 return entry; 318 } 319 return NULL; 320} 321 322/* 323 * In the case that a entry with the same offset is found, a pointer to 324 * the existing entry is stored in dupentry and the function returns -EEXIST 325 */ 326static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry, 327 struct zswap_entry **dupentry) 328{ 329 struct rb_node **link = &root->rb_node, *parent = NULL; 330 struct zswap_entry *myentry; 331 332 while (*link) { 333 parent = *link; 334 myentry = rb_entry(parent, struct zswap_entry, rbnode); 335 if (myentry->offset > entry->offset) 336 link = &(*link)->rb_left; 337 else if (myentry->offset < entry->offset) 338 link = &(*link)->rb_right; 339 else { 340 *dupentry = myentry; 341 return -EEXIST; 342 } 343 } 344 rb_link_node(&entry->rbnode, parent, link); 345 rb_insert_color(&entry->rbnode, root); 346 return 0; 347} 348 349static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry) 350{ 351 if (!RB_EMPTY_NODE(&entry->rbnode)) { 352 rb_erase(&entry->rbnode, root); 353 RB_CLEAR_NODE(&entry->rbnode); 354 } 355} 356 357/* 358 * Carries out the common pattern of freeing and entry's zpool allocation, 359 * freeing the entry itself, and decrementing the number of stored pages. 360 */ 361static void zswap_free_entry(struct zswap_entry *entry) 362{ 363 if (entry->objcg) { 364 obj_cgroup_uncharge_zswap(entry->objcg, entry->length); 365 obj_cgroup_put(entry->objcg); 366 } 367 if (!entry->length) 368 atomic_dec(&zswap_same_filled_pages); 369 else { 370 zpool_free(entry->pool->zpool, entry->handle); 371 zswap_pool_put(entry->pool); 372 } 373 zswap_entry_cache_free(entry); 374 atomic_dec(&zswap_stored_pages); 375 zswap_update_total_size(); 376} 377 378/* caller must hold the tree lock */ 379static void zswap_entry_get(struct zswap_entry *entry) 380{ 381 entry->refcount++; 382} 383 384/* caller must hold the tree lock 385* remove from the tree and free it, if nobody reference the entry 386*/ 387static void zswap_entry_put(struct zswap_tree *tree, 388 struct zswap_entry *entry) 389{ 390 int refcount = --entry->refcount; 391 392 BUG_ON(refcount < 0); 393 if (refcount == 0) { 394 zswap_rb_erase(&tree->rbroot, entry); 395 zswap_free_entry(entry); 396 } 397} 398 399/* caller must hold the tree lock */ 400static struct zswap_entry *zswap_entry_find_get(struct rb_root *root, 401 pgoff_t offset) 402{ 403 struct zswap_entry *entry; 404 405 entry = zswap_rb_search(root, offset); 406 if (entry) 407 zswap_entry_get(entry); 408 409 return entry; 410} 411 412/********************************* 413* per-cpu code 414**********************************/ 415static DEFINE_PER_CPU(u8 *, zswap_dstmem); 416/* 417 * If users dynamically change the zpool type and compressor at runtime, i.e. 418 * zswap is running, zswap can have more than one zpool on one cpu, but they 419 * are sharing dtsmem. So we need this mutex to be per-cpu. 420 */ 421static DEFINE_PER_CPU(struct mutex *, zswap_mutex); 422 423static int zswap_dstmem_prepare(unsigned int cpu) 424{ 425 struct mutex *mutex; 426 u8 *dst; 427 428 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); 429 if (!dst) 430 return -ENOMEM; 431 432 mutex = kmalloc_node(sizeof(*mutex), GFP_KERNEL, cpu_to_node(cpu)); 433 if (!mutex) { 434 kfree(dst); 435 return -ENOMEM; 436 } 437 438 mutex_init(mutex); 439 per_cpu(zswap_dstmem, cpu) = dst; 440 per_cpu(zswap_mutex, cpu) = mutex; 441 return 0; 442} 443 444static int zswap_dstmem_dead(unsigned int cpu) 445{ 446 struct mutex *mutex; 447 u8 *dst; 448 449 mutex = per_cpu(zswap_mutex, cpu); 450 kfree(mutex); 451 per_cpu(zswap_mutex, cpu) = NULL; 452 453 dst = per_cpu(zswap_dstmem, cpu); 454 kfree(dst); 455 per_cpu(zswap_dstmem, cpu) = NULL; 456 457 return 0; 458} 459 460static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node) 461{ 462 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 463 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 464 struct crypto_acomp *acomp; 465 struct acomp_req *req; 466 467 acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); 468 if (IS_ERR(acomp)) { 469 pr_err("could not alloc crypto acomp %s : %ld\n", 470 pool->tfm_name, PTR_ERR(acomp)); 471 return PTR_ERR(acomp); 472 } 473 acomp_ctx->acomp = acomp; 474 475 req = acomp_request_alloc(acomp_ctx->acomp); 476 if (!req) { 477 pr_err("could not alloc crypto acomp_request %s\n", 478 pool->tfm_name); 479 crypto_free_acomp(acomp_ctx->acomp); 480 return -ENOMEM; 481 } 482 acomp_ctx->req = req; 483 484 crypto_init_wait(&acomp_ctx->wait); 485 /* 486 * if the backend of acomp is async zip, crypto_req_done() will wakeup 487 * crypto_wait_req(); if the backend of acomp is scomp, the callback 488 * won't be called, crypto_wait_req() will return without blocking. 489 */ 490 acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 491 crypto_req_done, &acomp_ctx->wait); 492 493 acomp_ctx->mutex = per_cpu(zswap_mutex, cpu); 494 acomp_ctx->dstmem = per_cpu(zswap_dstmem, cpu); 495 496 return 0; 497} 498 499static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node) 500{ 501 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 502 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 503 504 if (!IS_ERR_OR_NULL(acomp_ctx)) { 505 if (!IS_ERR_OR_NULL(acomp_ctx->req)) 506 acomp_request_free(acomp_ctx->req); 507 if (!IS_ERR_OR_NULL(acomp_ctx->acomp)) 508 crypto_free_acomp(acomp_ctx->acomp); 509 } 510 511 return 0; 512} 513 514/********************************* 515* pool functions 516**********************************/ 517 518static struct zswap_pool *__zswap_pool_current(void) 519{ 520 struct zswap_pool *pool; 521 522 pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list); 523 WARN_ONCE(!pool && zswap_has_pool, 524 "%s: no page storage pool!\n", __func__); 525 526 return pool; 527} 528 529static struct zswap_pool *zswap_pool_current(void) 530{ 531 assert_spin_locked(&zswap_pools_lock); 532 533 return __zswap_pool_current(); 534} 535 536static struct zswap_pool *zswap_pool_current_get(void) 537{ 538 struct zswap_pool *pool; 539 540 rcu_read_lock(); 541 542 pool = __zswap_pool_current(); 543 if (!zswap_pool_get(pool)) 544 pool = NULL; 545 546 rcu_read_unlock(); 547 548 return pool; 549} 550 551static struct zswap_pool *zswap_pool_last_get(void) 552{ 553 struct zswap_pool *pool, *last = NULL; 554 555 rcu_read_lock(); 556 557 list_for_each_entry_rcu(pool, &zswap_pools, list) 558 last = pool; 559 WARN_ONCE(!last && zswap_has_pool, 560 "%s: no page storage pool!\n", __func__); 561 if (!zswap_pool_get(last)) 562 last = NULL; 563 564 rcu_read_unlock(); 565 566 return last; 567} 568 569/* type and compressor must be null-terminated */ 570static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) 571{ 572 struct zswap_pool *pool; 573 574 assert_spin_locked(&zswap_pools_lock); 575 576 list_for_each_entry_rcu(pool, &zswap_pools, list) { 577 if (strcmp(pool->tfm_name, compressor)) 578 continue; 579 if (strcmp(zpool_get_type(pool->zpool), type)) 580 continue; 581 /* if we can't get it, it's about to be destroyed */ 582 if (!zswap_pool_get(pool)) 583 continue; 584 return pool; 585 } 586 587 return NULL; 588} 589 590static void shrink_worker(struct work_struct *w) 591{ 592 struct zswap_pool *pool = container_of(w, typeof(*pool), 593 shrink_work); 594 595 if (zpool_shrink(pool->zpool, 1, NULL)) 596 zswap_reject_reclaim_fail++; 597 zswap_pool_put(pool); 598} 599 600static struct zswap_pool *zswap_pool_create(char *type, char *compressor) 601{ 602 struct zswap_pool *pool; 603 char name[38]; /* 'zswap' + 32 char (max) num + \0 */ 604 gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 605 int ret; 606 607 if (!zswap_has_pool) { 608 /* if either are unset, pool initialization failed, and we 609 * need both params to be set correctly before trying to 610 * create a pool. 611 */ 612 if (!strcmp(type, ZSWAP_PARAM_UNSET)) 613 return NULL; 614 if (!strcmp(compressor, ZSWAP_PARAM_UNSET)) 615 return NULL; 616 } 617 618 pool = kzalloc(sizeof(*pool), GFP_KERNEL); 619 if (!pool) 620 return NULL; 621 622 /* unique name for each pool specifically required by zsmalloc */ 623 snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count)); 624 625 pool->zpool = zpool_create_pool(type, name, gfp, &zswap_zpool_ops); 626 if (!pool->zpool) { 627 pr_err("%s zpool not available\n", type); 628 goto error; 629 } 630 pr_debug("using %s zpool\n", zpool_get_type(pool->zpool)); 631 632 strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); 633 634 pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx); 635 if (!pool->acomp_ctx) { 636 pr_err("percpu alloc failed\n"); 637 goto error; 638 } 639 640 ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE, 641 &pool->node); 642 if (ret) 643 goto error; 644 pr_debug("using %s compressor\n", pool->tfm_name); 645 646 /* being the current pool takes 1 ref; this func expects the 647 * caller to always add the new pool as the current pool 648 */ 649 kref_init(&pool->kref); 650 INIT_LIST_HEAD(&pool->list); 651 INIT_WORK(&pool->shrink_work, shrink_worker); 652 653 zswap_pool_debug("created", pool); 654 655 return pool; 656 657error: 658 if (pool->acomp_ctx) 659 free_percpu(pool->acomp_ctx); 660 if (pool->zpool) 661 zpool_destroy_pool(pool->zpool); 662 kfree(pool); 663 return NULL; 664} 665 666static __init struct zswap_pool *__zswap_pool_create_fallback(void) 667{ 668 bool has_comp, has_zpool; 669 670 has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 671 if (!has_comp && strcmp(zswap_compressor, 672 CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) { 673 pr_err("compressor %s not available, using default %s\n", 674 zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT); 675 param_free_charp(&zswap_compressor); 676 zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 677 has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 678 } 679 if (!has_comp) { 680 pr_err("default compressor %s not available\n", 681 zswap_compressor); 682 param_free_charp(&zswap_compressor); 683 zswap_compressor = ZSWAP_PARAM_UNSET; 684 } 685 686 has_zpool = zpool_has_pool(zswap_zpool_type); 687 if (!has_zpool && strcmp(zswap_zpool_type, 688 CONFIG_ZSWAP_ZPOOL_DEFAULT)) { 689 pr_err("zpool %s not available, using default %s\n", 690 zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT); 691 param_free_charp(&zswap_zpool_type); 692 zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 693 has_zpool = zpool_has_pool(zswap_zpool_type); 694 } 695 if (!has_zpool) { 696 pr_err("default zpool %s not available\n", 697 zswap_zpool_type); 698 param_free_charp(&zswap_zpool_type); 699 zswap_zpool_type = ZSWAP_PARAM_UNSET; 700 } 701 702 if (!has_comp || !has_zpool) 703 return NULL; 704 705 return zswap_pool_create(zswap_zpool_type, zswap_compressor); 706} 707 708static void zswap_pool_destroy(struct zswap_pool *pool) 709{ 710 zswap_pool_debug("destroying", pool); 711 712 cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); 713 free_percpu(pool->acomp_ctx); 714 zpool_destroy_pool(pool->zpool); 715 kfree(pool); 716} 717 718static int __must_check zswap_pool_get(struct zswap_pool *pool) 719{ 720 if (!pool) 721 return 0; 722 723 return kref_get_unless_zero(&pool->kref); 724} 725 726static void __zswap_pool_release(struct work_struct *work) 727{ 728 struct zswap_pool *pool = container_of(work, typeof(*pool), 729 release_work); 730 731 synchronize_rcu(); 732 733 /* nobody should have been able to get a kref... */ 734 WARN_ON(kref_get_unless_zero(&pool->kref)); 735 736 /* pool is now off zswap_pools list and has no references. */ 737 zswap_pool_destroy(pool); 738} 739 740static void __zswap_pool_empty(struct kref *kref) 741{ 742 struct zswap_pool *pool; 743 744 pool = container_of(kref, typeof(*pool), kref); 745 746 spin_lock(&zswap_pools_lock); 747 748 WARN_ON(pool == zswap_pool_current()); 749 750 list_del_rcu(&pool->list); 751 752 INIT_WORK(&pool->release_work, __zswap_pool_release); 753 schedule_work(&pool->release_work); 754 755 spin_unlock(&zswap_pools_lock); 756} 757 758static void zswap_pool_put(struct zswap_pool *pool) 759{ 760 kref_put(&pool->kref, __zswap_pool_empty); 761} 762 763/********************************* 764* param callbacks 765**********************************/ 766 767/* val must be a null-terminated string */ 768static int __zswap_param_set(const char *val, const struct kernel_param *kp, 769 char *type, char *compressor) 770{ 771 struct zswap_pool *pool, *put_pool = NULL; 772 char *s = strstrip((char *)val); 773 int ret; 774 775 if (zswap_init_failed) { 776 pr_err("can't set param, initialization failed\n"); 777 return -ENODEV; 778 } 779 780 /* no change required */ 781 if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool) 782 return 0; 783 784 /* if this is load-time (pre-init) param setting, 785 * don't create a pool; that's done during init. 786 */ 787 if (!zswap_init_started) 788 return param_set_charp(s, kp); 789 790 if (!type) { 791 if (!zpool_has_pool(s)) { 792 pr_err("zpool %s not available\n", s); 793 return -ENOENT; 794 } 795 type = s; 796 } else if (!compressor) { 797 if (!crypto_has_acomp(s, 0, 0)) { 798 pr_err("compressor %s not available\n", s); 799 return -ENOENT; 800 } 801 compressor = s; 802 } else { 803 WARN_ON(1); 804 return -EINVAL; 805 } 806 807 spin_lock(&zswap_pools_lock); 808 809 pool = zswap_pool_find_get(type, compressor); 810 if (pool) { 811 zswap_pool_debug("using existing", pool); 812 WARN_ON(pool == zswap_pool_current()); 813 list_del_rcu(&pool->list); 814 } 815 816 spin_unlock(&zswap_pools_lock); 817 818 if (!pool) 819 pool = zswap_pool_create(type, compressor); 820 821 if (pool) 822 ret = param_set_charp(s, kp); 823 else 824 ret = -EINVAL; 825 826 spin_lock(&zswap_pools_lock); 827 828 if (!ret) { 829 put_pool = zswap_pool_current(); 830 list_add_rcu(&pool->list, &zswap_pools); 831 zswap_has_pool = true; 832 } else if (pool) { 833 /* add the possibly pre-existing pool to the end of the pools 834 * list; if it's new (and empty) then it'll be removed and 835 * destroyed by the put after we drop the lock 836 */ 837 list_add_tail_rcu(&pool->list, &zswap_pools); 838 put_pool = pool; 839 } 840 841 spin_unlock(&zswap_pools_lock); 842 843 if (!zswap_has_pool && !pool) { 844 /* if initial pool creation failed, and this pool creation also 845 * failed, maybe both compressor and zpool params were bad. 846 * Allow changing this param, so pool creation will succeed 847 * when the other param is changed. We already verified this 848 * param is ok in the zpool_has_pool() or crypto_has_acomp() 849 * checks above. 850 */ 851 ret = param_set_charp(s, kp); 852 } 853 854 /* drop the ref from either the old current pool, 855 * or the new pool we failed to add 856 */ 857 if (put_pool) 858 zswap_pool_put(put_pool); 859 860 return ret; 861} 862 863static int zswap_compressor_param_set(const char *val, 864 const struct kernel_param *kp) 865{ 866 return __zswap_param_set(val, kp, zswap_zpool_type, NULL); 867} 868 869static int zswap_zpool_param_set(const char *val, 870 const struct kernel_param *kp) 871{ 872 return __zswap_param_set(val, kp, NULL, zswap_compressor); 873} 874 875static int zswap_enabled_param_set(const char *val, 876 const struct kernel_param *kp) 877{ 878 if (zswap_init_failed) { 879 pr_err("can't enable, initialization failed\n"); 880 return -ENODEV; 881 } 882 if (!zswap_has_pool && zswap_init_started) { 883 pr_err("can't enable, no pool configured\n"); 884 return -ENODEV; 885 } 886 887 return param_set_bool(val, kp); 888} 889 890/********************************* 891* writeback code 892**********************************/ 893/* return enum for zswap_get_swap_cache_page */ 894enum zswap_get_swap_ret { 895 ZSWAP_SWAPCACHE_NEW, 896 ZSWAP_SWAPCACHE_EXIST, 897 ZSWAP_SWAPCACHE_FAIL, 898}; 899 900/* 901 * zswap_get_swap_cache_page 902 * 903 * This is an adaption of read_swap_cache_async() 904 * 905 * This function tries to find a page with the given swap entry 906 * in the swapper_space address space (the swap cache). If the page 907 * is found, it is returned in retpage. Otherwise, a page is allocated, 908 * added to the swap cache, and returned in retpage. 909 * 910 * If success, the swap cache page is returned in retpage 911 * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache 912 * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated, 913 * the new page is added to swapcache and locked 914 * Returns ZSWAP_SWAPCACHE_FAIL on error 915 */ 916static int zswap_get_swap_cache_page(swp_entry_t entry, 917 struct page **retpage) 918{ 919 bool page_was_allocated; 920 921 *retpage = __read_swap_cache_async(entry, GFP_KERNEL, 922 NULL, 0, &page_was_allocated); 923 if (page_was_allocated) 924 return ZSWAP_SWAPCACHE_NEW; 925 if (!*retpage) 926 return ZSWAP_SWAPCACHE_FAIL; 927 return ZSWAP_SWAPCACHE_EXIST; 928} 929 930/* 931 * Attempts to free an entry by adding a page to the swap cache, 932 * decompressing the entry data into the page, and issuing a 933 * bio write to write the page back to the swap device. 934 * 935 * This can be thought of as a "resumed writeback" of the page 936 * to the swap device. We are basically resuming the same swap 937 * writeback path that was intercepted with the frontswap_store() 938 * in the first place. After the page has been decompressed into 939 * the swap cache, the compressed version stored by zswap can be 940 * freed. 941 */ 942static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) 943{ 944 struct zswap_header *zhdr; 945 swp_entry_t swpentry; 946 struct zswap_tree *tree; 947 pgoff_t offset; 948 struct zswap_entry *entry; 949 struct page *page; 950 struct scatterlist input, output; 951 struct crypto_acomp_ctx *acomp_ctx; 952 953 u8 *src, *tmp = NULL; 954 unsigned int dlen; 955 int ret; 956 struct writeback_control wbc = { 957 .sync_mode = WB_SYNC_NONE, 958 }; 959 960 if (!zpool_can_sleep_mapped(pool)) { 961 tmp = kmalloc(PAGE_SIZE, GFP_ATOMIC); 962 if (!tmp) 963 return -ENOMEM; 964 } 965 966 /* extract swpentry from data */ 967 zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO); 968 swpentry = zhdr->swpentry; /* here */ 969 tree = zswap_trees[swp_type(swpentry)]; 970 offset = swp_offset(swpentry); 971 972 /* find and ref zswap entry */ 973 spin_lock(&tree->lock); 974 entry = zswap_entry_find_get(&tree->rbroot, offset); 975 if (!entry) { 976 /* entry was invalidated */ 977 spin_unlock(&tree->lock); 978 zpool_unmap_handle(pool, handle); 979 kfree(tmp); 980 return 0; 981 } 982 spin_unlock(&tree->lock); 983 BUG_ON(offset != entry->offset); 984 985 src = (u8 *)zhdr + sizeof(struct zswap_header); 986 if (!zpool_can_sleep_mapped(pool)) { 987 memcpy(tmp, src, entry->length); 988 src = tmp; 989 zpool_unmap_handle(pool, handle); 990 } 991 992 /* try to allocate swap cache page */ 993 switch (zswap_get_swap_cache_page(swpentry, &page)) { 994 case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */ 995 ret = -ENOMEM; 996 goto fail; 997 998 case ZSWAP_SWAPCACHE_EXIST: 999 /* page is already in the swap cache, ignore for now */ 1000 put_page(page); 1001 ret = -EEXIST; 1002 goto fail; 1003 1004 case ZSWAP_SWAPCACHE_NEW: /* page is locked */ 1005 /* decompress */ 1006 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1007 dlen = PAGE_SIZE; 1008 1009 mutex_lock(acomp_ctx->mutex); 1010 sg_init_one(&input, src, entry->length); 1011 sg_init_table(&output, 1); 1012 sg_set_page(&output, page, PAGE_SIZE, 0); 1013 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen); 1014 ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); 1015 dlen = acomp_ctx->req->dlen; 1016 mutex_unlock(acomp_ctx->mutex); 1017 1018 BUG_ON(ret); 1019 BUG_ON(dlen != PAGE_SIZE); 1020 1021 /* page is up to date */ 1022 SetPageUptodate(page); 1023 } 1024 1025 /* move it to the tail of the inactive list after end_writeback */ 1026 SetPageReclaim(page); 1027 1028 /* start writeback */ 1029 __swap_writepage(page, &wbc, end_swap_bio_write); 1030 put_page(page); 1031 zswap_written_back_pages++; 1032 1033 spin_lock(&tree->lock); 1034 /* drop local reference */ 1035 zswap_entry_put(tree, entry); 1036 1037 /* 1038 * There are two possible situations for entry here: 1039 * (1) refcount is 1(normal case), entry is valid and on the tree 1040 * (2) refcount is 0, entry is freed and not on the tree 1041 * because invalidate happened during writeback 1042 * search the tree and free the entry if find entry 1043 */ 1044 if (entry == zswap_rb_search(&tree->rbroot, offset)) 1045 zswap_entry_put(tree, entry); 1046 spin_unlock(&tree->lock); 1047 1048 goto end; 1049 1050 /* 1051 * if we get here due to ZSWAP_SWAPCACHE_EXIST 1052 * a load may be happening concurrently. 1053 * it is safe and okay to not free the entry. 1054 * if we free the entry in the following put 1055 * it is also okay to return !0 1056 */ 1057fail: 1058 spin_lock(&tree->lock); 1059 zswap_entry_put(tree, entry); 1060 spin_unlock(&tree->lock); 1061 1062end: 1063 if (zpool_can_sleep_mapped(pool)) 1064 zpool_unmap_handle(pool, handle); 1065 else 1066 kfree(tmp); 1067 1068 return ret; 1069} 1070 1071static int zswap_is_page_same_filled(void *ptr, unsigned long *value) 1072{ 1073 unsigned int pos; 1074 unsigned long *page; 1075 1076 page = (unsigned long *)ptr; 1077 for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) { 1078 if (page[pos] != page[0]) 1079 return 0; 1080 } 1081 *value = page[0]; 1082 return 1; 1083} 1084 1085static void zswap_fill_page(void *ptr, unsigned long value) 1086{ 1087 unsigned long *page; 1088 1089 page = (unsigned long *)ptr; 1090 memset_l(page, value, PAGE_SIZE / sizeof(unsigned long)); 1091} 1092 1093/********************************* 1094* frontswap hooks 1095**********************************/ 1096/* attempts to compress and store an single page */ 1097static int zswap_frontswap_store(unsigned type, pgoff_t offset, 1098 struct page *page) 1099{ 1100 struct zswap_tree *tree = zswap_trees[type]; 1101 struct zswap_entry *entry, *dupentry; 1102 struct scatterlist input, output; 1103 struct crypto_acomp_ctx *acomp_ctx; 1104 struct obj_cgroup *objcg = NULL; 1105 struct zswap_pool *pool; 1106 int ret; 1107 unsigned int hlen, dlen = PAGE_SIZE; 1108 unsigned long handle, value; 1109 char *buf; 1110 u8 *src, *dst; 1111 struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) }; 1112 gfp_t gfp; 1113 1114 /* THP isn't supported */ 1115 if (PageTransHuge(page)) { 1116 ret = -EINVAL; 1117 goto reject; 1118 } 1119 1120 if (!zswap_enabled || !tree) { 1121 ret = -ENODEV; 1122 goto reject; 1123 } 1124 1125 objcg = get_obj_cgroup_from_page(page); 1126 if (objcg && !obj_cgroup_may_zswap(objcg)) 1127 goto shrink; 1128 1129 /* reclaim space if needed */ 1130 if (zswap_is_full()) { 1131 zswap_pool_limit_hit++; 1132 zswap_pool_reached_full = true; 1133 goto shrink; 1134 } 1135 1136 if (zswap_pool_reached_full) { 1137 if (!zswap_can_accept()) { 1138 ret = -ENOMEM; 1139 goto reject; 1140 } else 1141 zswap_pool_reached_full = false; 1142 } 1143 1144 /* allocate entry */ 1145 entry = zswap_entry_cache_alloc(GFP_KERNEL); 1146 if (!entry) { 1147 zswap_reject_kmemcache_fail++; 1148 ret = -ENOMEM; 1149 goto reject; 1150 } 1151 1152 if (zswap_same_filled_pages_enabled) { 1153 src = kmap_atomic(page); 1154 if (zswap_is_page_same_filled(src, &value)) { 1155 kunmap_atomic(src); 1156 entry->offset = offset; 1157 entry->length = 0; 1158 entry->value = value; 1159 atomic_inc(&zswap_same_filled_pages); 1160 goto insert_entry; 1161 } 1162 kunmap_atomic(src); 1163 } 1164 1165 if (!zswap_non_same_filled_pages_enabled) { 1166 ret = -EINVAL; 1167 goto freepage; 1168 } 1169 1170 /* if entry is successfully added, it keeps the reference */ 1171 entry->pool = zswap_pool_current_get(); 1172 if (!entry->pool) { 1173 ret = -EINVAL; 1174 goto freepage; 1175 } 1176 1177 /* compress */ 1178 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1179 1180 mutex_lock(acomp_ctx->mutex); 1181 1182 dst = acomp_ctx->dstmem; 1183 sg_init_table(&input, 1); 1184 sg_set_page(&input, page, PAGE_SIZE, 0); 1185 1186 /* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */ 1187 sg_init_one(&output, dst, PAGE_SIZE * 2); 1188 acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); 1189 /* 1190 * it maybe looks a little bit silly that we send an asynchronous request, 1191 * then wait for its completion synchronously. This makes the process look 1192 * synchronous in fact. 1193 * Theoretically, acomp supports users send multiple acomp requests in one 1194 * acomp instance, then get those requests done simultaneously. but in this 1195 * case, frontswap actually does store and load page by page, there is no 1196 * existing method to send the second page before the first page is done 1197 * in one thread doing frontswap. 1198 * but in different threads running on different cpu, we have different 1199 * acomp instance, so multiple threads can do (de)compression in parallel. 1200 */ 1201 ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); 1202 dlen = acomp_ctx->req->dlen; 1203 1204 if (ret) { 1205 ret = -EINVAL; 1206 goto put_dstmem; 1207 } 1208 1209 /* store */ 1210 hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0; 1211 gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 1212 if (zpool_malloc_support_movable(entry->pool->zpool)) 1213 gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; 1214 ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle); 1215 if (ret == -ENOSPC) { 1216 zswap_reject_compress_poor++; 1217 goto put_dstmem; 1218 } 1219 if (ret) { 1220 zswap_reject_alloc_fail++; 1221 goto put_dstmem; 1222 } 1223 buf = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_WO); 1224 memcpy(buf, &zhdr, hlen); 1225 memcpy(buf + hlen, dst, dlen); 1226 zpool_unmap_handle(entry->pool->zpool, handle); 1227 mutex_unlock(acomp_ctx->mutex); 1228 1229 /* populate entry */ 1230 entry->offset = offset; 1231 entry->handle = handle; 1232 entry->length = dlen; 1233 1234insert_entry: 1235 entry->objcg = objcg; 1236 if (objcg) { 1237 obj_cgroup_charge_zswap(objcg, entry->length); 1238 /* Account before objcg ref is moved to tree */ 1239 count_objcg_event(objcg, ZSWPOUT); 1240 } 1241 1242 /* map */ 1243 spin_lock(&tree->lock); 1244 do { 1245 ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry); 1246 if (ret == -EEXIST) { 1247 zswap_duplicate_entry++; 1248 /* remove from rbtree */ 1249 zswap_rb_erase(&tree->rbroot, dupentry); 1250 zswap_entry_put(tree, dupentry); 1251 } 1252 } while (ret == -EEXIST); 1253 spin_unlock(&tree->lock); 1254 1255 /* update stats */ 1256 atomic_inc(&zswap_stored_pages); 1257 zswap_update_total_size(); 1258 count_vm_event(ZSWPOUT); 1259 1260 return 0; 1261 1262put_dstmem: 1263 mutex_unlock(acomp_ctx->mutex); 1264 zswap_pool_put(entry->pool); 1265freepage: 1266 zswap_entry_cache_free(entry); 1267reject: 1268 if (objcg) 1269 obj_cgroup_put(objcg); 1270 return ret; 1271 1272shrink: 1273 pool = zswap_pool_last_get(); 1274 if (pool) 1275 queue_work(shrink_wq, &pool->shrink_work); 1276 ret = -ENOMEM; 1277 goto reject; 1278} 1279 1280/* 1281 * returns 0 if the page was successfully decompressed 1282 * return -1 on entry not found or error 1283*/ 1284static int zswap_frontswap_load(unsigned type, pgoff_t offset, 1285 struct page *page) 1286{ 1287 struct zswap_tree *tree = zswap_trees[type]; 1288 struct zswap_entry *entry; 1289 struct scatterlist input, output; 1290 struct crypto_acomp_ctx *acomp_ctx; 1291 u8 *src, *dst, *tmp; 1292 unsigned int dlen; 1293 int ret; 1294 1295 /* find */ 1296 spin_lock(&tree->lock); 1297 entry = zswap_entry_find_get(&tree->rbroot, offset); 1298 if (!entry) { 1299 /* entry was written back */ 1300 spin_unlock(&tree->lock); 1301 return -1; 1302 } 1303 spin_unlock(&tree->lock); 1304 1305 if (!entry->length) { 1306 dst = kmap_atomic(page); 1307 zswap_fill_page(dst, entry->value); 1308 kunmap_atomic(dst); 1309 ret = 0; 1310 goto stats; 1311 } 1312 1313 if (!zpool_can_sleep_mapped(entry->pool->zpool)) { 1314 tmp = kmalloc(entry->length, GFP_ATOMIC); 1315 if (!tmp) { 1316 ret = -ENOMEM; 1317 goto freeentry; 1318 } 1319 } 1320 1321 /* decompress */ 1322 dlen = PAGE_SIZE; 1323 src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO); 1324 if (zpool_evictable(entry->pool->zpool)) 1325 src += sizeof(struct zswap_header); 1326 1327 if (!zpool_can_sleep_mapped(entry->pool->zpool)) { 1328 memcpy(tmp, src, entry->length); 1329 src = tmp; 1330 zpool_unmap_handle(entry->pool->zpool, entry->handle); 1331 } 1332 1333 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1334 mutex_lock(acomp_ctx->mutex); 1335 sg_init_one(&input, src, entry->length); 1336 sg_init_table(&output, 1); 1337 sg_set_page(&output, page, PAGE_SIZE, 0); 1338 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen); 1339 ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); 1340 mutex_unlock(acomp_ctx->mutex); 1341 1342 if (zpool_can_sleep_mapped(entry->pool->zpool)) 1343 zpool_unmap_handle(entry->pool->zpool, entry->handle); 1344 else 1345 kfree(tmp); 1346 1347 BUG_ON(ret); 1348stats: 1349 count_vm_event(ZSWPIN); 1350 if (entry->objcg) 1351 count_objcg_event(entry->objcg, ZSWPIN); 1352freeentry: 1353 spin_lock(&tree->lock); 1354 zswap_entry_put(tree, entry); 1355 spin_unlock(&tree->lock); 1356 1357 return ret; 1358} 1359 1360/* frees an entry in zswap */ 1361static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset) 1362{ 1363 struct zswap_tree *tree = zswap_trees[type]; 1364 struct zswap_entry *entry; 1365 1366 /* find */ 1367 spin_lock(&tree->lock); 1368 entry = zswap_rb_search(&tree->rbroot, offset); 1369 if (!entry) { 1370 /* entry was written back */ 1371 spin_unlock(&tree->lock); 1372 return; 1373 } 1374 1375 /* remove from rbtree */ 1376 zswap_rb_erase(&tree->rbroot, entry); 1377 1378 /* drop the initial reference from entry creation */ 1379 zswap_entry_put(tree, entry); 1380 1381 spin_unlock(&tree->lock); 1382} 1383 1384/* frees all zswap entries for the given swap type */ 1385static void zswap_frontswap_invalidate_area(unsigned type) 1386{ 1387 struct zswap_tree *tree = zswap_trees[type]; 1388 struct zswap_entry *entry, *n; 1389 1390 if (!tree) 1391 return; 1392 1393 /* walk the tree and free everything */ 1394 spin_lock(&tree->lock); 1395 rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode) 1396 zswap_free_entry(entry); 1397 tree->rbroot = RB_ROOT; 1398 spin_unlock(&tree->lock); 1399 kfree(tree); 1400 zswap_trees[type] = NULL; 1401} 1402 1403static void zswap_frontswap_init(unsigned type) 1404{ 1405 struct zswap_tree *tree; 1406 1407 tree = kzalloc(sizeof(*tree), GFP_KERNEL); 1408 if (!tree) { 1409 pr_err("alloc failed, zswap disabled for swap type %d\n", type); 1410 return; 1411 } 1412 1413 tree->rbroot = RB_ROOT; 1414 spin_lock_init(&tree->lock); 1415 zswap_trees[type] = tree; 1416} 1417 1418static const struct frontswap_ops zswap_frontswap_ops = { 1419 .store = zswap_frontswap_store, 1420 .load = zswap_frontswap_load, 1421 .invalidate_page = zswap_frontswap_invalidate_page, 1422 .invalidate_area = zswap_frontswap_invalidate_area, 1423 .init = zswap_frontswap_init 1424}; 1425 1426/********************************* 1427* debugfs functions 1428**********************************/ 1429#ifdef CONFIG_DEBUG_FS 1430#include <linux/debugfs.h> 1431 1432static struct dentry *zswap_debugfs_root; 1433 1434static int __init zswap_debugfs_init(void) 1435{ 1436 if (!debugfs_initialized()) 1437 return -ENODEV; 1438 1439 zswap_debugfs_root = debugfs_create_dir("zswap", NULL); 1440 1441 debugfs_create_u64("pool_limit_hit", 0444, 1442 zswap_debugfs_root, &zswap_pool_limit_hit); 1443 debugfs_create_u64("reject_reclaim_fail", 0444, 1444 zswap_debugfs_root, &zswap_reject_reclaim_fail); 1445 debugfs_create_u64("reject_alloc_fail", 0444, 1446 zswap_debugfs_root, &zswap_reject_alloc_fail); 1447 debugfs_create_u64("reject_kmemcache_fail", 0444, 1448 zswap_debugfs_root, &zswap_reject_kmemcache_fail); 1449 debugfs_create_u64("reject_compress_poor", 0444, 1450 zswap_debugfs_root, &zswap_reject_compress_poor); 1451 debugfs_create_u64("written_back_pages", 0444, 1452 zswap_debugfs_root, &zswap_written_back_pages); 1453 debugfs_create_u64("duplicate_entry", 0444, 1454 zswap_debugfs_root, &zswap_duplicate_entry); 1455 debugfs_create_u64("pool_total_size", 0444, 1456 zswap_debugfs_root, &zswap_pool_total_size); 1457 debugfs_create_atomic_t("stored_pages", 0444, 1458 zswap_debugfs_root, &zswap_stored_pages); 1459 debugfs_create_atomic_t("same_filled_pages", 0444, 1460 zswap_debugfs_root, &zswap_same_filled_pages); 1461 1462 return 0; 1463} 1464#else 1465static int __init zswap_debugfs_init(void) 1466{ 1467 return 0; 1468} 1469#endif 1470 1471/********************************* 1472* module init and exit 1473**********************************/ 1474static int __init init_zswap(void) 1475{ 1476 struct zswap_pool *pool; 1477 int ret; 1478 1479 zswap_init_started = true; 1480 1481 if (zswap_entry_cache_create()) { 1482 pr_err("entry cache creation failed\n"); 1483 goto cache_fail; 1484 } 1485 1486 ret = cpuhp_setup_state(CPUHP_MM_ZSWP_MEM_PREPARE, "mm/zswap:prepare", 1487 zswap_dstmem_prepare, zswap_dstmem_dead); 1488 if (ret) { 1489 pr_err("dstmem alloc failed\n"); 1490 goto dstmem_fail; 1491 } 1492 1493 ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE, 1494 "mm/zswap_pool:prepare", 1495 zswap_cpu_comp_prepare, 1496 zswap_cpu_comp_dead); 1497 if (ret) 1498 goto hp_fail; 1499 1500 pool = __zswap_pool_create_fallback(); 1501 if (pool) { 1502 pr_info("loaded using pool %s/%s\n", pool->tfm_name, 1503 zpool_get_type(pool->zpool)); 1504 list_add(&pool->list, &zswap_pools); 1505 zswap_has_pool = true; 1506 } else { 1507 pr_err("pool creation failed\n"); 1508 zswap_enabled = false; 1509 } 1510 1511 shrink_wq = create_workqueue("zswap-shrink"); 1512 if (!shrink_wq) 1513 goto fallback_fail; 1514 1515 ret = frontswap_register_ops(&zswap_frontswap_ops); 1516 if (ret) 1517 goto destroy_wq; 1518 if (zswap_debugfs_init()) 1519 pr_warn("debugfs initialization failed\n"); 1520 return 0; 1521 1522destroy_wq: 1523 destroy_workqueue(shrink_wq); 1524fallback_fail: 1525 if (pool) 1526 zswap_pool_destroy(pool); 1527hp_fail: 1528 cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE); 1529dstmem_fail: 1530 zswap_entry_cache_destroy(); 1531cache_fail: 1532 /* if built-in, we aren't unloaded on failure; don't allow use */ 1533 zswap_init_failed = true; 1534 zswap_enabled = false; 1535 return -ENOMEM; 1536} 1537/* must be late so crypto has time to come up */ 1538late_initcall(init_zswap); 1539 1540MODULE_LICENSE("GPL"); 1541MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>"); 1542MODULE_DESCRIPTION("Compressed cache for swap pages");