cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

zswap.c (40892B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * zswap.c - zswap driver file
      4 *
      5 * zswap is a backend for frontswap that takes pages that are in the process
      6 * of being swapped out and attempts to compress and store them in a
      7 * RAM-based memory pool.  This can result in a significant I/O reduction on
      8 * the swap device and, in the case where decompressing from RAM is faster
      9 * than reading from the swap device, can also improve workload performance.
     10 *
     11 * Copyright (C) 2012  Seth Jennings <sjenning@linux.vnet.ibm.com>
     12*/
     13
     14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
     15
     16#include <linux/module.h>
     17#include <linux/cpu.h>
     18#include <linux/highmem.h>
     19#include <linux/slab.h>
     20#include <linux/spinlock.h>
     21#include <linux/types.h>
     22#include <linux/atomic.h>
     23#include <linux/frontswap.h>
     24#include <linux/rbtree.h>
     25#include <linux/swap.h>
     26#include <linux/crypto.h>
     27#include <linux/scatterlist.h>
     28#include <linux/mempool.h>
     29#include <linux/zpool.h>
     30#include <crypto/acompress.h>
     31
     32#include <linux/mm_types.h>
     33#include <linux/page-flags.h>
     34#include <linux/swapops.h>
     35#include <linux/writeback.h>
     36#include <linux/pagemap.h>
     37#include <linux/workqueue.h>
     38
     39#include "swap.h"
     40
     41/*********************************
     42* statistics
     43**********************************/
     44/* Total bytes used by the compressed storage */
     45u64 zswap_pool_total_size;
     46/* The number of compressed pages currently stored in zswap */
     47atomic_t zswap_stored_pages = ATOMIC_INIT(0);
     48/* The number of same-value filled pages currently stored in zswap */
     49static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0);
     50
     51/*
     52 * The statistics below are not protected from concurrent access for
     53 * performance reasons so they may not be a 100% accurate.  However,
     54 * they do provide useful information on roughly how many times a
     55 * certain event is occurring.
     56*/
     57
     58/* Pool limit was hit (see zswap_max_pool_percent) */
     59static u64 zswap_pool_limit_hit;
     60/* Pages written back when pool limit was reached */
     61static u64 zswap_written_back_pages;
     62/* Store failed due to a reclaim failure after pool limit was reached */
     63static u64 zswap_reject_reclaim_fail;
     64/* Compressed page was too big for the allocator to (optimally) store */
     65static u64 zswap_reject_compress_poor;
     66/* Store failed because underlying allocator could not get memory */
     67static u64 zswap_reject_alloc_fail;
     68/* Store failed because the entry metadata could not be allocated (rare) */
     69static u64 zswap_reject_kmemcache_fail;
     70/* Duplicate store was encountered (rare) */
     71static u64 zswap_duplicate_entry;
     72
     73/* Shrinker work queue */
     74static struct workqueue_struct *shrink_wq;
     75/* Pool limit was hit, we need to calm down */
     76static bool zswap_pool_reached_full;
     77
     78/*********************************
     79* tunables
     80**********************************/
     81
     82#define ZSWAP_PARAM_UNSET ""
     83
     84/* Enable/disable zswap */
     85static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
     86static int zswap_enabled_param_set(const char *,
     87				   const struct kernel_param *);
     88static const struct kernel_param_ops zswap_enabled_param_ops = {
     89	.set =		zswap_enabled_param_set,
     90	.get =		param_get_bool,
     91};
     92module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
     93
     94/* Crypto compressor to use */
     95static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
     96static int zswap_compressor_param_set(const char *,
     97				      const struct kernel_param *);
     98static const struct kernel_param_ops zswap_compressor_param_ops = {
     99	.set =		zswap_compressor_param_set,
    100	.get =		param_get_charp,
    101	.free =		param_free_charp,
    102};
    103module_param_cb(compressor, &zswap_compressor_param_ops,
    104		&zswap_compressor, 0644);
    105
    106/* Compressed storage zpool to use */
    107static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
    108static int zswap_zpool_param_set(const char *, const struct kernel_param *);
    109static const struct kernel_param_ops zswap_zpool_param_ops = {
    110	.set =		zswap_zpool_param_set,
    111	.get =		param_get_charp,
    112	.free =		param_free_charp,
    113};
    114module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644);
    115
    116/* The maximum percentage of memory that the compressed pool can occupy */
    117static unsigned int zswap_max_pool_percent = 20;
    118module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644);
    119
    120/* The threshold for accepting new pages after the max_pool_percent was hit */
    121static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
    122module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
    123		   uint, 0644);
    124
    125/*
    126 * Enable/disable handling same-value filled pages (enabled by default).
    127 * If disabled every page is considered non-same-value filled.
    128 */
    129static bool zswap_same_filled_pages_enabled = true;
    130module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled,
    131		   bool, 0644);
    132
    133/* Enable/disable handling non-same-value filled pages (enabled by default) */
    134static bool zswap_non_same_filled_pages_enabled = true;
    135module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled,
    136		   bool, 0644);
    137
    138/*********************************
    139* data structures
    140**********************************/
    141
    142struct crypto_acomp_ctx {
    143	struct crypto_acomp *acomp;
    144	struct acomp_req *req;
    145	struct crypto_wait wait;
    146	u8 *dstmem;
    147	struct mutex *mutex;
    148};
    149
    150struct zswap_pool {
    151	struct zpool *zpool;
    152	struct crypto_acomp_ctx __percpu *acomp_ctx;
    153	struct kref kref;
    154	struct list_head list;
    155	struct work_struct release_work;
    156	struct work_struct shrink_work;
    157	struct hlist_node node;
    158	char tfm_name[CRYPTO_MAX_ALG_NAME];
    159};
    160
    161/*
    162 * struct zswap_entry
    163 *
    164 * This structure contains the metadata for tracking a single compressed
    165 * page within zswap.
    166 *
    167 * rbnode - links the entry into red-black tree for the appropriate swap type
    168 * offset - the swap offset for the entry.  Index into the red-black tree.
    169 * refcount - the number of outstanding reference to the entry. This is needed
    170 *            to protect against premature freeing of the entry by code
    171 *            concurrent calls to load, invalidate, and writeback.  The lock
    172 *            for the zswap_tree structure that contains the entry must
    173 *            be held while changing the refcount.  Since the lock must
    174 *            be held, there is no reason to also make refcount atomic.
    175 * length - the length in bytes of the compressed page data.  Needed during
    176 *          decompression. For a same value filled page length is 0.
    177 * pool - the zswap_pool the entry's data is in
    178 * handle - zpool allocation handle that stores the compressed page data
    179 * value - value of the same-value filled pages which have same content
    180 */
    181struct zswap_entry {
    182	struct rb_node rbnode;
    183	pgoff_t offset;
    184	int refcount;
    185	unsigned int length;
    186	struct zswap_pool *pool;
    187	union {
    188		unsigned long handle;
    189		unsigned long value;
    190	};
    191	struct obj_cgroup *objcg;
    192};
    193
    194struct zswap_header {
    195	swp_entry_t swpentry;
    196};
    197
    198/*
    199 * The tree lock in the zswap_tree struct protects a few things:
    200 * - the rbtree
    201 * - the refcount field of each entry in the tree
    202 */
    203struct zswap_tree {
    204	struct rb_root rbroot;
    205	spinlock_t lock;
    206};
    207
    208static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
    209
    210/* RCU-protected iteration */
    211static LIST_HEAD(zswap_pools);
    212/* protects zswap_pools list modification */
    213static DEFINE_SPINLOCK(zswap_pools_lock);
    214/* pool counter to provide unique names to zpool */
    215static atomic_t zswap_pools_count = ATOMIC_INIT(0);
    216
    217/* used by param callback function */
    218static bool zswap_init_started;
    219
    220/* fatal error during init */
    221static bool zswap_init_failed;
    222
    223/* init completed, but couldn't create the initial pool */
    224static bool zswap_has_pool;
    225
    226/*********************************
    227* helpers and fwd declarations
    228**********************************/
    229
    230#define zswap_pool_debug(msg, p)				\
    231	pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,		\
    232		 zpool_get_type((p)->zpool))
    233
    234static int zswap_writeback_entry(struct zpool *pool, unsigned long handle);
    235static int zswap_pool_get(struct zswap_pool *pool);
    236static void zswap_pool_put(struct zswap_pool *pool);
    237
    238static const struct zpool_ops zswap_zpool_ops = {
    239	.evict = zswap_writeback_entry
    240};
    241
    242static bool zswap_is_full(void)
    243{
    244	return totalram_pages() * zswap_max_pool_percent / 100 <
    245			DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
    246}
    247
    248static bool zswap_can_accept(void)
    249{
    250	return totalram_pages() * zswap_accept_thr_percent / 100 *
    251				zswap_max_pool_percent / 100 >
    252			DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
    253}
    254
    255static void zswap_update_total_size(void)
    256{
    257	struct zswap_pool *pool;
    258	u64 total = 0;
    259
    260	rcu_read_lock();
    261
    262	list_for_each_entry_rcu(pool, &zswap_pools, list)
    263		total += zpool_get_total_size(pool->zpool);
    264
    265	rcu_read_unlock();
    266
    267	zswap_pool_total_size = total;
    268}
    269
    270/*********************************
    271* zswap entry functions
    272**********************************/
    273static struct kmem_cache *zswap_entry_cache;
    274
    275static int __init zswap_entry_cache_create(void)
    276{
    277	zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
    278	return zswap_entry_cache == NULL;
    279}
    280
    281static void __init zswap_entry_cache_destroy(void)
    282{
    283	kmem_cache_destroy(zswap_entry_cache);
    284}
    285
    286static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp)
    287{
    288	struct zswap_entry *entry;
    289	entry = kmem_cache_alloc(zswap_entry_cache, gfp);
    290	if (!entry)
    291		return NULL;
    292	entry->refcount = 1;
    293	RB_CLEAR_NODE(&entry->rbnode);
    294	return entry;
    295}
    296
    297static void zswap_entry_cache_free(struct zswap_entry *entry)
    298{
    299	kmem_cache_free(zswap_entry_cache, entry);
    300}
    301
    302/*********************************
    303* rbtree functions
    304**********************************/
    305static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset)
    306{
    307	struct rb_node *node = root->rb_node;
    308	struct zswap_entry *entry;
    309
    310	while (node) {
    311		entry = rb_entry(node, struct zswap_entry, rbnode);
    312		if (entry->offset > offset)
    313			node = node->rb_left;
    314		else if (entry->offset < offset)
    315			node = node->rb_right;
    316		else
    317			return entry;
    318	}
    319	return NULL;
    320}
    321
    322/*
    323 * In the case that a entry with the same offset is found, a pointer to
    324 * the existing entry is stored in dupentry and the function returns -EEXIST
    325 */
    326static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
    327			struct zswap_entry **dupentry)
    328{
    329	struct rb_node **link = &root->rb_node, *parent = NULL;
    330	struct zswap_entry *myentry;
    331
    332	while (*link) {
    333		parent = *link;
    334		myentry = rb_entry(parent, struct zswap_entry, rbnode);
    335		if (myentry->offset > entry->offset)
    336			link = &(*link)->rb_left;
    337		else if (myentry->offset < entry->offset)
    338			link = &(*link)->rb_right;
    339		else {
    340			*dupentry = myentry;
    341			return -EEXIST;
    342		}
    343	}
    344	rb_link_node(&entry->rbnode, parent, link);
    345	rb_insert_color(&entry->rbnode, root);
    346	return 0;
    347}
    348
    349static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
    350{
    351	if (!RB_EMPTY_NODE(&entry->rbnode)) {
    352		rb_erase(&entry->rbnode, root);
    353		RB_CLEAR_NODE(&entry->rbnode);
    354	}
    355}
    356
    357/*
    358 * Carries out the common pattern of freeing and entry's zpool allocation,
    359 * freeing the entry itself, and decrementing the number of stored pages.
    360 */
    361static void zswap_free_entry(struct zswap_entry *entry)
    362{
    363	if (entry->objcg) {
    364		obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
    365		obj_cgroup_put(entry->objcg);
    366	}
    367	if (!entry->length)
    368		atomic_dec(&zswap_same_filled_pages);
    369	else {
    370		zpool_free(entry->pool->zpool, entry->handle);
    371		zswap_pool_put(entry->pool);
    372	}
    373	zswap_entry_cache_free(entry);
    374	atomic_dec(&zswap_stored_pages);
    375	zswap_update_total_size();
    376}
    377
    378/* caller must hold the tree lock */
    379static void zswap_entry_get(struct zswap_entry *entry)
    380{
    381	entry->refcount++;
    382}
    383
    384/* caller must hold the tree lock
    385* remove from the tree and free it, if nobody reference the entry
    386*/
    387static void zswap_entry_put(struct zswap_tree *tree,
    388			struct zswap_entry *entry)
    389{
    390	int refcount = --entry->refcount;
    391
    392	BUG_ON(refcount < 0);
    393	if (refcount == 0) {
    394		zswap_rb_erase(&tree->rbroot, entry);
    395		zswap_free_entry(entry);
    396	}
    397}
    398
    399/* caller must hold the tree lock */
    400static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
    401				pgoff_t offset)
    402{
    403	struct zswap_entry *entry;
    404
    405	entry = zswap_rb_search(root, offset);
    406	if (entry)
    407		zswap_entry_get(entry);
    408
    409	return entry;
    410}
    411
    412/*********************************
    413* per-cpu code
    414**********************************/
    415static DEFINE_PER_CPU(u8 *, zswap_dstmem);
    416/*
    417 * If users dynamically change the zpool type and compressor at runtime, i.e.
    418 * zswap is running, zswap can have more than one zpool on one cpu, but they
    419 * are sharing dtsmem. So we need this mutex to be per-cpu.
    420 */
    421static DEFINE_PER_CPU(struct mutex *, zswap_mutex);
    422
    423static int zswap_dstmem_prepare(unsigned int cpu)
    424{
    425	struct mutex *mutex;
    426	u8 *dst;
    427
    428	dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
    429	if (!dst)
    430		return -ENOMEM;
    431
    432	mutex = kmalloc_node(sizeof(*mutex), GFP_KERNEL, cpu_to_node(cpu));
    433	if (!mutex) {
    434		kfree(dst);
    435		return -ENOMEM;
    436	}
    437
    438	mutex_init(mutex);
    439	per_cpu(zswap_dstmem, cpu) = dst;
    440	per_cpu(zswap_mutex, cpu) = mutex;
    441	return 0;
    442}
    443
    444static int zswap_dstmem_dead(unsigned int cpu)
    445{
    446	struct mutex *mutex;
    447	u8 *dst;
    448
    449	mutex = per_cpu(zswap_mutex, cpu);
    450	kfree(mutex);
    451	per_cpu(zswap_mutex, cpu) = NULL;
    452
    453	dst = per_cpu(zswap_dstmem, cpu);
    454	kfree(dst);
    455	per_cpu(zswap_dstmem, cpu) = NULL;
    456
    457	return 0;
    458}
    459
    460static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
    461{
    462	struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
    463	struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
    464	struct crypto_acomp *acomp;
    465	struct acomp_req *req;
    466
    467	acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
    468	if (IS_ERR(acomp)) {
    469		pr_err("could not alloc crypto acomp %s : %ld\n",
    470				pool->tfm_name, PTR_ERR(acomp));
    471		return PTR_ERR(acomp);
    472	}
    473	acomp_ctx->acomp = acomp;
    474
    475	req = acomp_request_alloc(acomp_ctx->acomp);
    476	if (!req) {
    477		pr_err("could not alloc crypto acomp_request %s\n",
    478		       pool->tfm_name);
    479		crypto_free_acomp(acomp_ctx->acomp);
    480		return -ENOMEM;
    481	}
    482	acomp_ctx->req = req;
    483
    484	crypto_init_wait(&acomp_ctx->wait);
    485	/*
    486	 * if the backend of acomp is async zip, crypto_req_done() will wakeup
    487	 * crypto_wait_req(); if the backend of acomp is scomp, the callback
    488	 * won't be called, crypto_wait_req() will return without blocking.
    489	 */
    490	acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
    491				   crypto_req_done, &acomp_ctx->wait);
    492
    493	acomp_ctx->mutex = per_cpu(zswap_mutex, cpu);
    494	acomp_ctx->dstmem = per_cpu(zswap_dstmem, cpu);
    495
    496	return 0;
    497}
    498
    499static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
    500{
    501	struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
    502	struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
    503
    504	if (!IS_ERR_OR_NULL(acomp_ctx)) {
    505		if (!IS_ERR_OR_NULL(acomp_ctx->req))
    506			acomp_request_free(acomp_ctx->req);
    507		if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
    508			crypto_free_acomp(acomp_ctx->acomp);
    509	}
    510
    511	return 0;
    512}
    513
    514/*********************************
    515* pool functions
    516**********************************/
    517
    518static struct zswap_pool *__zswap_pool_current(void)
    519{
    520	struct zswap_pool *pool;
    521
    522	pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
    523	WARN_ONCE(!pool && zswap_has_pool,
    524		  "%s: no page storage pool!\n", __func__);
    525
    526	return pool;
    527}
    528
    529static struct zswap_pool *zswap_pool_current(void)
    530{
    531	assert_spin_locked(&zswap_pools_lock);
    532
    533	return __zswap_pool_current();
    534}
    535
    536static struct zswap_pool *zswap_pool_current_get(void)
    537{
    538	struct zswap_pool *pool;
    539
    540	rcu_read_lock();
    541
    542	pool = __zswap_pool_current();
    543	if (!zswap_pool_get(pool))
    544		pool = NULL;
    545
    546	rcu_read_unlock();
    547
    548	return pool;
    549}
    550
    551static struct zswap_pool *zswap_pool_last_get(void)
    552{
    553	struct zswap_pool *pool, *last = NULL;
    554
    555	rcu_read_lock();
    556
    557	list_for_each_entry_rcu(pool, &zswap_pools, list)
    558		last = pool;
    559	WARN_ONCE(!last && zswap_has_pool,
    560		  "%s: no page storage pool!\n", __func__);
    561	if (!zswap_pool_get(last))
    562		last = NULL;
    563
    564	rcu_read_unlock();
    565
    566	return last;
    567}
    568
    569/* type and compressor must be null-terminated */
    570static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
    571{
    572	struct zswap_pool *pool;
    573
    574	assert_spin_locked(&zswap_pools_lock);
    575
    576	list_for_each_entry_rcu(pool, &zswap_pools, list) {
    577		if (strcmp(pool->tfm_name, compressor))
    578			continue;
    579		if (strcmp(zpool_get_type(pool->zpool), type))
    580			continue;
    581		/* if we can't get it, it's about to be destroyed */
    582		if (!zswap_pool_get(pool))
    583			continue;
    584		return pool;
    585	}
    586
    587	return NULL;
    588}
    589
    590static void shrink_worker(struct work_struct *w)
    591{
    592	struct zswap_pool *pool = container_of(w, typeof(*pool),
    593						shrink_work);
    594
    595	if (zpool_shrink(pool->zpool, 1, NULL))
    596		zswap_reject_reclaim_fail++;
    597	zswap_pool_put(pool);
    598}
    599
    600static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
    601{
    602	struct zswap_pool *pool;
    603	char name[38]; /* 'zswap' + 32 char (max) num + \0 */
    604	gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
    605	int ret;
    606
    607	if (!zswap_has_pool) {
    608		/* if either are unset, pool initialization failed, and we
    609		 * need both params to be set correctly before trying to
    610		 * create a pool.
    611		 */
    612		if (!strcmp(type, ZSWAP_PARAM_UNSET))
    613			return NULL;
    614		if (!strcmp(compressor, ZSWAP_PARAM_UNSET))
    615			return NULL;
    616	}
    617
    618	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
    619	if (!pool)
    620		return NULL;
    621
    622	/* unique name for each pool specifically required by zsmalloc */
    623	snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
    624
    625	pool->zpool = zpool_create_pool(type, name, gfp, &zswap_zpool_ops);
    626	if (!pool->zpool) {
    627		pr_err("%s zpool not available\n", type);
    628		goto error;
    629	}
    630	pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
    631
    632	strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
    633
    634	pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
    635	if (!pool->acomp_ctx) {
    636		pr_err("percpu alloc failed\n");
    637		goto error;
    638	}
    639
    640	ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
    641				       &pool->node);
    642	if (ret)
    643		goto error;
    644	pr_debug("using %s compressor\n", pool->tfm_name);
    645
    646	/* being the current pool takes 1 ref; this func expects the
    647	 * caller to always add the new pool as the current pool
    648	 */
    649	kref_init(&pool->kref);
    650	INIT_LIST_HEAD(&pool->list);
    651	INIT_WORK(&pool->shrink_work, shrink_worker);
    652
    653	zswap_pool_debug("created", pool);
    654
    655	return pool;
    656
    657error:
    658	if (pool->acomp_ctx)
    659		free_percpu(pool->acomp_ctx);
    660	if (pool->zpool)
    661		zpool_destroy_pool(pool->zpool);
    662	kfree(pool);
    663	return NULL;
    664}
    665
    666static __init struct zswap_pool *__zswap_pool_create_fallback(void)
    667{
    668	bool has_comp, has_zpool;
    669
    670	has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
    671	if (!has_comp && strcmp(zswap_compressor,
    672				CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
    673		pr_err("compressor %s not available, using default %s\n",
    674		       zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
    675		param_free_charp(&zswap_compressor);
    676		zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
    677		has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
    678	}
    679	if (!has_comp) {
    680		pr_err("default compressor %s not available\n",
    681		       zswap_compressor);
    682		param_free_charp(&zswap_compressor);
    683		zswap_compressor = ZSWAP_PARAM_UNSET;
    684	}
    685
    686	has_zpool = zpool_has_pool(zswap_zpool_type);
    687	if (!has_zpool && strcmp(zswap_zpool_type,
    688				 CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
    689		pr_err("zpool %s not available, using default %s\n",
    690		       zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
    691		param_free_charp(&zswap_zpool_type);
    692		zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
    693		has_zpool = zpool_has_pool(zswap_zpool_type);
    694	}
    695	if (!has_zpool) {
    696		pr_err("default zpool %s not available\n",
    697		       zswap_zpool_type);
    698		param_free_charp(&zswap_zpool_type);
    699		zswap_zpool_type = ZSWAP_PARAM_UNSET;
    700	}
    701
    702	if (!has_comp || !has_zpool)
    703		return NULL;
    704
    705	return zswap_pool_create(zswap_zpool_type, zswap_compressor);
    706}
    707
    708static void zswap_pool_destroy(struct zswap_pool *pool)
    709{
    710	zswap_pool_debug("destroying", pool);
    711
    712	cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
    713	free_percpu(pool->acomp_ctx);
    714	zpool_destroy_pool(pool->zpool);
    715	kfree(pool);
    716}
    717
    718static int __must_check zswap_pool_get(struct zswap_pool *pool)
    719{
    720	if (!pool)
    721		return 0;
    722
    723	return kref_get_unless_zero(&pool->kref);
    724}
    725
    726static void __zswap_pool_release(struct work_struct *work)
    727{
    728	struct zswap_pool *pool = container_of(work, typeof(*pool),
    729						release_work);
    730
    731	synchronize_rcu();
    732
    733	/* nobody should have been able to get a kref... */
    734	WARN_ON(kref_get_unless_zero(&pool->kref));
    735
    736	/* pool is now off zswap_pools list and has no references. */
    737	zswap_pool_destroy(pool);
    738}
    739
    740static void __zswap_pool_empty(struct kref *kref)
    741{
    742	struct zswap_pool *pool;
    743
    744	pool = container_of(kref, typeof(*pool), kref);
    745
    746	spin_lock(&zswap_pools_lock);
    747
    748	WARN_ON(pool == zswap_pool_current());
    749
    750	list_del_rcu(&pool->list);
    751
    752	INIT_WORK(&pool->release_work, __zswap_pool_release);
    753	schedule_work(&pool->release_work);
    754
    755	spin_unlock(&zswap_pools_lock);
    756}
    757
    758static void zswap_pool_put(struct zswap_pool *pool)
    759{
    760	kref_put(&pool->kref, __zswap_pool_empty);
    761}
    762
    763/*********************************
    764* param callbacks
    765**********************************/
    766
    767/* val must be a null-terminated string */
    768static int __zswap_param_set(const char *val, const struct kernel_param *kp,
    769			     char *type, char *compressor)
    770{
    771	struct zswap_pool *pool, *put_pool = NULL;
    772	char *s = strstrip((char *)val);
    773	int ret;
    774
    775	if (zswap_init_failed) {
    776		pr_err("can't set param, initialization failed\n");
    777		return -ENODEV;
    778	}
    779
    780	/* no change required */
    781	if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool)
    782		return 0;
    783
    784	/* if this is load-time (pre-init) param setting,
    785	 * don't create a pool; that's done during init.
    786	 */
    787	if (!zswap_init_started)
    788		return param_set_charp(s, kp);
    789
    790	if (!type) {
    791		if (!zpool_has_pool(s)) {
    792			pr_err("zpool %s not available\n", s);
    793			return -ENOENT;
    794		}
    795		type = s;
    796	} else if (!compressor) {
    797		if (!crypto_has_acomp(s, 0, 0)) {
    798			pr_err("compressor %s not available\n", s);
    799			return -ENOENT;
    800		}
    801		compressor = s;
    802	} else {
    803		WARN_ON(1);
    804		return -EINVAL;
    805	}
    806
    807	spin_lock(&zswap_pools_lock);
    808
    809	pool = zswap_pool_find_get(type, compressor);
    810	if (pool) {
    811		zswap_pool_debug("using existing", pool);
    812		WARN_ON(pool == zswap_pool_current());
    813		list_del_rcu(&pool->list);
    814	}
    815
    816	spin_unlock(&zswap_pools_lock);
    817
    818	if (!pool)
    819		pool = zswap_pool_create(type, compressor);
    820
    821	if (pool)
    822		ret = param_set_charp(s, kp);
    823	else
    824		ret = -EINVAL;
    825
    826	spin_lock(&zswap_pools_lock);
    827
    828	if (!ret) {
    829		put_pool = zswap_pool_current();
    830		list_add_rcu(&pool->list, &zswap_pools);
    831		zswap_has_pool = true;
    832	} else if (pool) {
    833		/* add the possibly pre-existing pool to the end of the pools
    834		 * list; if it's new (and empty) then it'll be removed and
    835		 * destroyed by the put after we drop the lock
    836		 */
    837		list_add_tail_rcu(&pool->list, &zswap_pools);
    838		put_pool = pool;
    839	}
    840
    841	spin_unlock(&zswap_pools_lock);
    842
    843	if (!zswap_has_pool && !pool) {
    844		/* if initial pool creation failed, and this pool creation also
    845		 * failed, maybe both compressor and zpool params were bad.
    846		 * Allow changing this param, so pool creation will succeed
    847		 * when the other param is changed. We already verified this
    848		 * param is ok in the zpool_has_pool() or crypto_has_acomp()
    849		 * checks above.
    850		 */
    851		ret = param_set_charp(s, kp);
    852	}
    853
    854	/* drop the ref from either the old current pool,
    855	 * or the new pool we failed to add
    856	 */
    857	if (put_pool)
    858		zswap_pool_put(put_pool);
    859
    860	return ret;
    861}
    862
    863static int zswap_compressor_param_set(const char *val,
    864				      const struct kernel_param *kp)
    865{
    866	return __zswap_param_set(val, kp, zswap_zpool_type, NULL);
    867}
    868
    869static int zswap_zpool_param_set(const char *val,
    870				 const struct kernel_param *kp)
    871{
    872	return __zswap_param_set(val, kp, NULL, zswap_compressor);
    873}
    874
    875static int zswap_enabled_param_set(const char *val,
    876				   const struct kernel_param *kp)
    877{
    878	if (zswap_init_failed) {
    879		pr_err("can't enable, initialization failed\n");
    880		return -ENODEV;
    881	}
    882	if (!zswap_has_pool && zswap_init_started) {
    883		pr_err("can't enable, no pool configured\n");
    884		return -ENODEV;
    885	}
    886
    887	return param_set_bool(val, kp);
    888}
    889
    890/*********************************
    891* writeback code
    892**********************************/
    893/* return enum for zswap_get_swap_cache_page */
    894enum zswap_get_swap_ret {
    895	ZSWAP_SWAPCACHE_NEW,
    896	ZSWAP_SWAPCACHE_EXIST,
    897	ZSWAP_SWAPCACHE_FAIL,
    898};
    899
    900/*
    901 * zswap_get_swap_cache_page
    902 *
    903 * This is an adaption of read_swap_cache_async()
    904 *
    905 * This function tries to find a page with the given swap entry
    906 * in the swapper_space address space (the swap cache).  If the page
    907 * is found, it is returned in retpage.  Otherwise, a page is allocated,
    908 * added to the swap cache, and returned in retpage.
    909 *
    910 * If success, the swap cache page is returned in retpage
    911 * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache
    912 * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated,
    913 *     the new page is added to swapcache and locked
    914 * Returns ZSWAP_SWAPCACHE_FAIL on error
    915 */
    916static int zswap_get_swap_cache_page(swp_entry_t entry,
    917				struct page **retpage)
    918{
    919	bool page_was_allocated;
    920
    921	*retpage = __read_swap_cache_async(entry, GFP_KERNEL,
    922			NULL, 0, &page_was_allocated);
    923	if (page_was_allocated)
    924		return ZSWAP_SWAPCACHE_NEW;
    925	if (!*retpage)
    926		return ZSWAP_SWAPCACHE_FAIL;
    927	return ZSWAP_SWAPCACHE_EXIST;
    928}
    929
    930/*
    931 * Attempts to free an entry by adding a page to the swap cache,
    932 * decompressing the entry data into the page, and issuing a
    933 * bio write to write the page back to the swap device.
    934 *
    935 * This can be thought of as a "resumed writeback" of the page
    936 * to the swap device.  We are basically resuming the same swap
    937 * writeback path that was intercepted with the frontswap_store()
    938 * in the first place.  After the page has been decompressed into
    939 * the swap cache, the compressed version stored by zswap can be
    940 * freed.
    941 */
    942static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
    943{
    944	struct zswap_header *zhdr;
    945	swp_entry_t swpentry;
    946	struct zswap_tree *tree;
    947	pgoff_t offset;
    948	struct zswap_entry *entry;
    949	struct page *page;
    950	struct scatterlist input, output;
    951	struct crypto_acomp_ctx *acomp_ctx;
    952
    953	u8 *src, *tmp = NULL;
    954	unsigned int dlen;
    955	int ret;
    956	struct writeback_control wbc = {
    957		.sync_mode = WB_SYNC_NONE,
    958	};
    959
    960	if (!zpool_can_sleep_mapped(pool)) {
    961		tmp = kmalloc(PAGE_SIZE, GFP_ATOMIC);
    962		if (!tmp)
    963			return -ENOMEM;
    964	}
    965
    966	/* extract swpentry from data */
    967	zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
    968	swpentry = zhdr->swpentry; /* here */
    969	tree = zswap_trees[swp_type(swpentry)];
    970	offset = swp_offset(swpentry);
    971
    972	/* find and ref zswap entry */
    973	spin_lock(&tree->lock);
    974	entry = zswap_entry_find_get(&tree->rbroot, offset);
    975	if (!entry) {
    976		/* entry was invalidated */
    977		spin_unlock(&tree->lock);
    978		zpool_unmap_handle(pool, handle);
    979		kfree(tmp);
    980		return 0;
    981	}
    982	spin_unlock(&tree->lock);
    983	BUG_ON(offset != entry->offset);
    984
    985	src = (u8 *)zhdr + sizeof(struct zswap_header);
    986	if (!zpool_can_sleep_mapped(pool)) {
    987		memcpy(tmp, src, entry->length);
    988		src = tmp;
    989		zpool_unmap_handle(pool, handle);
    990	}
    991
    992	/* try to allocate swap cache page */
    993	switch (zswap_get_swap_cache_page(swpentry, &page)) {
    994	case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */
    995		ret = -ENOMEM;
    996		goto fail;
    997
    998	case ZSWAP_SWAPCACHE_EXIST:
    999		/* page is already in the swap cache, ignore for now */
   1000		put_page(page);
   1001		ret = -EEXIST;
   1002		goto fail;
   1003
   1004	case ZSWAP_SWAPCACHE_NEW: /* page is locked */
   1005		/* decompress */
   1006		acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
   1007		dlen = PAGE_SIZE;
   1008
   1009		mutex_lock(acomp_ctx->mutex);
   1010		sg_init_one(&input, src, entry->length);
   1011		sg_init_table(&output, 1);
   1012		sg_set_page(&output, page, PAGE_SIZE, 0);
   1013		acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen);
   1014		ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
   1015		dlen = acomp_ctx->req->dlen;
   1016		mutex_unlock(acomp_ctx->mutex);
   1017
   1018		BUG_ON(ret);
   1019		BUG_ON(dlen != PAGE_SIZE);
   1020
   1021		/* page is up to date */
   1022		SetPageUptodate(page);
   1023	}
   1024
   1025	/* move it to the tail of the inactive list after end_writeback */
   1026	SetPageReclaim(page);
   1027
   1028	/* start writeback */
   1029	__swap_writepage(page, &wbc, end_swap_bio_write);
   1030	put_page(page);
   1031	zswap_written_back_pages++;
   1032
   1033	spin_lock(&tree->lock);
   1034	/* drop local reference */
   1035	zswap_entry_put(tree, entry);
   1036
   1037	/*
   1038	* There are two possible situations for entry here:
   1039	* (1) refcount is 1(normal case),  entry is valid and on the tree
   1040	* (2) refcount is 0, entry is freed and not on the tree
   1041	*     because invalidate happened during writeback
   1042	*  search the tree and free the entry if find entry
   1043	*/
   1044	if (entry == zswap_rb_search(&tree->rbroot, offset))
   1045		zswap_entry_put(tree, entry);
   1046	spin_unlock(&tree->lock);
   1047
   1048	goto end;
   1049
   1050	/*
   1051	* if we get here due to ZSWAP_SWAPCACHE_EXIST
   1052	* a load may be happening concurrently.
   1053	* it is safe and okay to not free the entry.
   1054	* if we free the entry in the following put
   1055	* it is also okay to return !0
   1056	*/
   1057fail:
   1058	spin_lock(&tree->lock);
   1059	zswap_entry_put(tree, entry);
   1060	spin_unlock(&tree->lock);
   1061
   1062end:
   1063	if (zpool_can_sleep_mapped(pool))
   1064		zpool_unmap_handle(pool, handle);
   1065	else
   1066		kfree(tmp);
   1067
   1068	return ret;
   1069}
   1070
   1071static int zswap_is_page_same_filled(void *ptr, unsigned long *value)
   1072{
   1073	unsigned int pos;
   1074	unsigned long *page;
   1075
   1076	page = (unsigned long *)ptr;
   1077	for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) {
   1078		if (page[pos] != page[0])
   1079			return 0;
   1080	}
   1081	*value = page[0];
   1082	return 1;
   1083}
   1084
   1085static void zswap_fill_page(void *ptr, unsigned long value)
   1086{
   1087	unsigned long *page;
   1088
   1089	page = (unsigned long *)ptr;
   1090	memset_l(page, value, PAGE_SIZE / sizeof(unsigned long));
   1091}
   1092
   1093/*********************************
   1094* frontswap hooks
   1095**********************************/
   1096/* attempts to compress and store an single page */
   1097static int zswap_frontswap_store(unsigned type, pgoff_t offset,
   1098				struct page *page)
   1099{
   1100	struct zswap_tree *tree = zswap_trees[type];
   1101	struct zswap_entry *entry, *dupentry;
   1102	struct scatterlist input, output;
   1103	struct crypto_acomp_ctx *acomp_ctx;
   1104	struct obj_cgroup *objcg = NULL;
   1105	struct zswap_pool *pool;
   1106	int ret;
   1107	unsigned int hlen, dlen = PAGE_SIZE;
   1108	unsigned long handle, value;
   1109	char *buf;
   1110	u8 *src, *dst;
   1111	struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
   1112	gfp_t gfp;
   1113
   1114	/* THP isn't supported */
   1115	if (PageTransHuge(page)) {
   1116		ret = -EINVAL;
   1117		goto reject;
   1118	}
   1119
   1120	if (!zswap_enabled || !tree) {
   1121		ret = -ENODEV;
   1122		goto reject;
   1123	}
   1124
   1125	objcg = get_obj_cgroup_from_page(page);
   1126	if (objcg && !obj_cgroup_may_zswap(objcg))
   1127		goto shrink;
   1128
   1129	/* reclaim space if needed */
   1130	if (zswap_is_full()) {
   1131		zswap_pool_limit_hit++;
   1132		zswap_pool_reached_full = true;
   1133		goto shrink;
   1134	}
   1135
   1136	if (zswap_pool_reached_full) {
   1137	       if (!zswap_can_accept()) {
   1138			ret = -ENOMEM;
   1139			goto reject;
   1140		} else
   1141			zswap_pool_reached_full = false;
   1142	}
   1143
   1144	/* allocate entry */
   1145	entry = zswap_entry_cache_alloc(GFP_KERNEL);
   1146	if (!entry) {
   1147		zswap_reject_kmemcache_fail++;
   1148		ret = -ENOMEM;
   1149		goto reject;
   1150	}
   1151
   1152	if (zswap_same_filled_pages_enabled) {
   1153		src = kmap_atomic(page);
   1154		if (zswap_is_page_same_filled(src, &value)) {
   1155			kunmap_atomic(src);
   1156			entry->offset = offset;
   1157			entry->length = 0;
   1158			entry->value = value;
   1159			atomic_inc(&zswap_same_filled_pages);
   1160			goto insert_entry;
   1161		}
   1162		kunmap_atomic(src);
   1163	}
   1164
   1165	if (!zswap_non_same_filled_pages_enabled) {
   1166		ret = -EINVAL;
   1167		goto freepage;
   1168	}
   1169
   1170	/* if entry is successfully added, it keeps the reference */
   1171	entry->pool = zswap_pool_current_get();
   1172	if (!entry->pool) {
   1173		ret = -EINVAL;
   1174		goto freepage;
   1175	}
   1176
   1177	/* compress */
   1178	acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
   1179
   1180	mutex_lock(acomp_ctx->mutex);
   1181
   1182	dst = acomp_ctx->dstmem;
   1183	sg_init_table(&input, 1);
   1184	sg_set_page(&input, page, PAGE_SIZE, 0);
   1185
   1186	/* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */
   1187	sg_init_one(&output, dst, PAGE_SIZE * 2);
   1188	acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
   1189	/*
   1190	 * it maybe looks a little bit silly that we send an asynchronous request,
   1191	 * then wait for its completion synchronously. This makes the process look
   1192	 * synchronous in fact.
   1193	 * Theoretically, acomp supports users send multiple acomp requests in one
   1194	 * acomp instance, then get those requests done simultaneously. but in this
   1195	 * case, frontswap actually does store and load page by page, there is no
   1196	 * existing method to send the second page before the first page is done
   1197	 * in one thread doing frontswap.
   1198	 * but in different threads running on different cpu, we have different
   1199	 * acomp instance, so multiple threads can do (de)compression in parallel.
   1200	 */
   1201	ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
   1202	dlen = acomp_ctx->req->dlen;
   1203
   1204	if (ret) {
   1205		ret = -EINVAL;
   1206		goto put_dstmem;
   1207	}
   1208
   1209	/* store */
   1210	hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
   1211	gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
   1212	if (zpool_malloc_support_movable(entry->pool->zpool))
   1213		gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
   1214	ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle);
   1215	if (ret == -ENOSPC) {
   1216		zswap_reject_compress_poor++;
   1217		goto put_dstmem;
   1218	}
   1219	if (ret) {
   1220		zswap_reject_alloc_fail++;
   1221		goto put_dstmem;
   1222	}
   1223	buf = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_WO);
   1224	memcpy(buf, &zhdr, hlen);
   1225	memcpy(buf + hlen, dst, dlen);
   1226	zpool_unmap_handle(entry->pool->zpool, handle);
   1227	mutex_unlock(acomp_ctx->mutex);
   1228
   1229	/* populate entry */
   1230	entry->offset = offset;
   1231	entry->handle = handle;
   1232	entry->length = dlen;
   1233
   1234insert_entry:
   1235	entry->objcg = objcg;
   1236	if (objcg) {
   1237		obj_cgroup_charge_zswap(objcg, entry->length);
   1238		/* Account before objcg ref is moved to tree */
   1239		count_objcg_event(objcg, ZSWPOUT);
   1240	}
   1241
   1242	/* map */
   1243	spin_lock(&tree->lock);
   1244	do {
   1245		ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
   1246		if (ret == -EEXIST) {
   1247			zswap_duplicate_entry++;
   1248			/* remove from rbtree */
   1249			zswap_rb_erase(&tree->rbroot, dupentry);
   1250			zswap_entry_put(tree, dupentry);
   1251		}
   1252	} while (ret == -EEXIST);
   1253	spin_unlock(&tree->lock);
   1254
   1255	/* update stats */
   1256	atomic_inc(&zswap_stored_pages);
   1257	zswap_update_total_size();
   1258	count_vm_event(ZSWPOUT);
   1259
   1260	return 0;
   1261
   1262put_dstmem:
   1263	mutex_unlock(acomp_ctx->mutex);
   1264	zswap_pool_put(entry->pool);
   1265freepage:
   1266	zswap_entry_cache_free(entry);
   1267reject:
   1268	if (objcg)
   1269		obj_cgroup_put(objcg);
   1270	return ret;
   1271
   1272shrink:
   1273	pool = zswap_pool_last_get();
   1274	if (pool)
   1275		queue_work(shrink_wq, &pool->shrink_work);
   1276	ret = -ENOMEM;
   1277	goto reject;
   1278}
   1279
   1280/*
   1281 * returns 0 if the page was successfully decompressed
   1282 * return -1 on entry not found or error
   1283*/
   1284static int zswap_frontswap_load(unsigned type, pgoff_t offset,
   1285				struct page *page)
   1286{
   1287	struct zswap_tree *tree = zswap_trees[type];
   1288	struct zswap_entry *entry;
   1289	struct scatterlist input, output;
   1290	struct crypto_acomp_ctx *acomp_ctx;
   1291	u8 *src, *dst, *tmp;
   1292	unsigned int dlen;
   1293	int ret;
   1294
   1295	/* find */
   1296	spin_lock(&tree->lock);
   1297	entry = zswap_entry_find_get(&tree->rbroot, offset);
   1298	if (!entry) {
   1299		/* entry was written back */
   1300		spin_unlock(&tree->lock);
   1301		return -1;
   1302	}
   1303	spin_unlock(&tree->lock);
   1304
   1305	if (!entry->length) {
   1306		dst = kmap_atomic(page);
   1307		zswap_fill_page(dst, entry->value);
   1308		kunmap_atomic(dst);
   1309		ret = 0;
   1310		goto stats;
   1311	}
   1312
   1313	if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
   1314		tmp = kmalloc(entry->length, GFP_ATOMIC);
   1315		if (!tmp) {
   1316			ret = -ENOMEM;
   1317			goto freeentry;
   1318		}
   1319	}
   1320
   1321	/* decompress */
   1322	dlen = PAGE_SIZE;
   1323	src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO);
   1324	if (zpool_evictable(entry->pool->zpool))
   1325		src += sizeof(struct zswap_header);
   1326
   1327	if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
   1328		memcpy(tmp, src, entry->length);
   1329		src = tmp;
   1330		zpool_unmap_handle(entry->pool->zpool, entry->handle);
   1331	}
   1332
   1333	acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
   1334	mutex_lock(acomp_ctx->mutex);
   1335	sg_init_one(&input, src, entry->length);
   1336	sg_init_table(&output, 1);
   1337	sg_set_page(&output, page, PAGE_SIZE, 0);
   1338	acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen);
   1339	ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
   1340	mutex_unlock(acomp_ctx->mutex);
   1341
   1342	if (zpool_can_sleep_mapped(entry->pool->zpool))
   1343		zpool_unmap_handle(entry->pool->zpool, entry->handle);
   1344	else
   1345		kfree(tmp);
   1346
   1347	BUG_ON(ret);
   1348stats:
   1349	count_vm_event(ZSWPIN);
   1350	if (entry->objcg)
   1351		count_objcg_event(entry->objcg, ZSWPIN);
   1352freeentry:
   1353	spin_lock(&tree->lock);
   1354	zswap_entry_put(tree, entry);
   1355	spin_unlock(&tree->lock);
   1356
   1357	return ret;
   1358}
   1359
   1360/* frees an entry in zswap */
   1361static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
   1362{
   1363	struct zswap_tree *tree = zswap_trees[type];
   1364	struct zswap_entry *entry;
   1365
   1366	/* find */
   1367	spin_lock(&tree->lock);
   1368	entry = zswap_rb_search(&tree->rbroot, offset);
   1369	if (!entry) {
   1370		/* entry was written back */
   1371		spin_unlock(&tree->lock);
   1372		return;
   1373	}
   1374
   1375	/* remove from rbtree */
   1376	zswap_rb_erase(&tree->rbroot, entry);
   1377
   1378	/* drop the initial reference from entry creation */
   1379	zswap_entry_put(tree, entry);
   1380
   1381	spin_unlock(&tree->lock);
   1382}
   1383
   1384/* frees all zswap entries for the given swap type */
   1385static void zswap_frontswap_invalidate_area(unsigned type)
   1386{
   1387	struct zswap_tree *tree = zswap_trees[type];
   1388	struct zswap_entry *entry, *n;
   1389
   1390	if (!tree)
   1391		return;
   1392
   1393	/* walk the tree and free everything */
   1394	spin_lock(&tree->lock);
   1395	rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
   1396		zswap_free_entry(entry);
   1397	tree->rbroot = RB_ROOT;
   1398	spin_unlock(&tree->lock);
   1399	kfree(tree);
   1400	zswap_trees[type] = NULL;
   1401}
   1402
   1403static void zswap_frontswap_init(unsigned type)
   1404{
   1405	struct zswap_tree *tree;
   1406
   1407	tree = kzalloc(sizeof(*tree), GFP_KERNEL);
   1408	if (!tree) {
   1409		pr_err("alloc failed, zswap disabled for swap type %d\n", type);
   1410		return;
   1411	}
   1412
   1413	tree->rbroot = RB_ROOT;
   1414	spin_lock_init(&tree->lock);
   1415	zswap_trees[type] = tree;
   1416}
   1417
   1418static const struct frontswap_ops zswap_frontswap_ops = {
   1419	.store = zswap_frontswap_store,
   1420	.load = zswap_frontswap_load,
   1421	.invalidate_page = zswap_frontswap_invalidate_page,
   1422	.invalidate_area = zswap_frontswap_invalidate_area,
   1423	.init = zswap_frontswap_init
   1424};
   1425
   1426/*********************************
   1427* debugfs functions
   1428**********************************/
   1429#ifdef CONFIG_DEBUG_FS
   1430#include <linux/debugfs.h>
   1431
   1432static struct dentry *zswap_debugfs_root;
   1433
   1434static int __init zswap_debugfs_init(void)
   1435{
   1436	if (!debugfs_initialized())
   1437		return -ENODEV;
   1438
   1439	zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
   1440
   1441	debugfs_create_u64("pool_limit_hit", 0444,
   1442			   zswap_debugfs_root, &zswap_pool_limit_hit);
   1443	debugfs_create_u64("reject_reclaim_fail", 0444,
   1444			   zswap_debugfs_root, &zswap_reject_reclaim_fail);
   1445	debugfs_create_u64("reject_alloc_fail", 0444,
   1446			   zswap_debugfs_root, &zswap_reject_alloc_fail);
   1447	debugfs_create_u64("reject_kmemcache_fail", 0444,
   1448			   zswap_debugfs_root, &zswap_reject_kmemcache_fail);
   1449	debugfs_create_u64("reject_compress_poor", 0444,
   1450			   zswap_debugfs_root, &zswap_reject_compress_poor);
   1451	debugfs_create_u64("written_back_pages", 0444,
   1452			   zswap_debugfs_root, &zswap_written_back_pages);
   1453	debugfs_create_u64("duplicate_entry", 0444,
   1454			   zswap_debugfs_root, &zswap_duplicate_entry);
   1455	debugfs_create_u64("pool_total_size", 0444,
   1456			   zswap_debugfs_root, &zswap_pool_total_size);
   1457	debugfs_create_atomic_t("stored_pages", 0444,
   1458				zswap_debugfs_root, &zswap_stored_pages);
   1459	debugfs_create_atomic_t("same_filled_pages", 0444,
   1460				zswap_debugfs_root, &zswap_same_filled_pages);
   1461
   1462	return 0;
   1463}
   1464#else
   1465static int __init zswap_debugfs_init(void)
   1466{
   1467	return 0;
   1468}
   1469#endif
   1470
   1471/*********************************
   1472* module init and exit
   1473**********************************/
   1474static int __init init_zswap(void)
   1475{
   1476	struct zswap_pool *pool;
   1477	int ret;
   1478
   1479	zswap_init_started = true;
   1480
   1481	if (zswap_entry_cache_create()) {
   1482		pr_err("entry cache creation failed\n");
   1483		goto cache_fail;
   1484	}
   1485
   1486	ret = cpuhp_setup_state(CPUHP_MM_ZSWP_MEM_PREPARE, "mm/zswap:prepare",
   1487				zswap_dstmem_prepare, zswap_dstmem_dead);
   1488	if (ret) {
   1489		pr_err("dstmem alloc failed\n");
   1490		goto dstmem_fail;
   1491	}
   1492
   1493	ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE,
   1494				      "mm/zswap_pool:prepare",
   1495				      zswap_cpu_comp_prepare,
   1496				      zswap_cpu_comp_dead);
   1497	if (ret)
   1498		goto hp_fail;
   1499
   1500	pool = __zswap_pool_create_fallback();
   1501	if (pool) {
   1502		pr_info("loaded using pool %s/%s\n", pool->tfm_name,
   1503			zpool_get_type(pool->zpool));
   1504		list_add(&pool->list, &zswap_pools);
   1505		zswap_has_pool = true;
   1506	} else {
   1507		pr_err("pool creation failed\n");
   1508		zswap_enabled = false;
   1509	}
   1510
   1511	shrink_wq = create_workqueue("zswap-shrink");
   1512	if (!shrink_wq)
   1513		goto fallback_fail;
   1514
   1515	ret = frontswap_register_ops(&zswap_frontswap_ops);
   1516	if (ret)
   1517		goto destroy_wq;
   1518	if (zswap_debugfs_init())
   1519		pr_warn("debugfs initialization failed\n");
   1520	return 0;
   1521
   1522destroy_wq:
   1523	destroy_workqueue(shrink_wq);
   1524fallback_fail:
   1525	if (pool)
   1526		zswap_pool_destroy(pool);
   1527hp_fail:
   1528	cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE);
   1529dstmem_fail:
   1530	zswap_entry_cache_destroy();
   1531cache_fail:
   1532	/* if built-in, we aren't unloaded on failure; don't allow use */
   1533	zswap_init_failed = true;
   1534	zswap_enabled = false;
   1535	return -ENOMEM;
   1536}
   1537/* must be late so crypto has time to come up */
   1538late_initcall(init_zswap);
   1539
   1540MODULE_LICENSE("GPL");
   1541MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>");
   1542MODULE_DESCRIPTION("Compressed cache for swap pages");