cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vmw_balloon.c (55913B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * VMware Balloon driver.
      4 *
      5 * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved.
      6 *
      7 * This is VMware physical memory management driver for Linux. The driver
      8 * acts like a "balloon" that can be inflated to reclaim physical pages by
      9 * reserving them in the guest and invalidating them in the monitor,
     10 * freeing up the underlying machine pages so they can be allocated to
     11 * other guests.  The balloon can also be deflated to allow the guest to
     12 * use more physical memory. Higher level policies can control the sizes
     13 * of balloons in VMs in order to manage physical memory resources.
     14 */
     15
     16//#define DEBUG
     17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
     18
     19#include <linux/types.h>
     20#include <linux/io.h>
     21#include <linux/kernel.h>
     22#include <linux/mm.h>
     23#include <linux/vmalloc.h>
     24#include <linux/sched.h>
     25#include <linux/module.h>
     26#include <linux/workqueue.h>
     27#include <linux/debugfs.h>
     28#include <linux/seq_file.h>
     29#include <linux/rwsem.h>
     30#include <linux/slab.h>
     31#include <linux/spinlock.h>
     32#include <linux/mount.h>
     33#include <linux/pseudo_fs.h>
     34#include <linux/balloon_compaction.h>
     35#include <linux/vmw_vmci_defs.h>
     36#include <linux/vmw_vmci_api.h>
     37#include <asm/hypervisor.h>
     38
     39MODULE_AUTHOR("VMware, Inc.");
     40MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
     41MODULE_ALIAS("dmi:*:svnVMware*:*");
     42MODULE_ALIAS("vmware_vmmemctl");
     43MODULE_LICENSE("GPL");
     44
     45static bool __read_mostly vmwballoon_shrinker_enable;
     46module_param(vmwballoon_shrinker_enable, bool, 0444);
     47MODULE_PARM_DESC(vmwballoon_shrinker_enable,
     48	"Enable non-cooperative out-of-memory protection. Disabled by default as it may degrade performance.");
     49
     50/* Delay in seconds after shrink before inflation. */
     51#define VMBALLOON_SHRINK_DELAY		(5)
     52
     53/* Maximum number of refused pages we accumulate during inflation cycle */
     54#define VMW_BALLOON_MAX_REFUSED		16
     55
     56/* Magic number for the balloon mount-point */
     57#define BALLOON_VMW_MAGIC		0x0ba11007
     58
     59/*
     60 * Hypervisor communication port definitions.
     61 */
     62#define VMW_BALLOON_HV_PORT		0x5670
     63#define VMW_BALLOON_HV_MAGIC		0x456c6d6f
     64#define VMW_BALLOON_GUEST_ID		1	/* Linux */
     65
     66enum vmwballoon_capabilities {
     67	/*
     68	 * Bit 0 is reserved and not associated to any capability.
     69	 */
     70	VMW_BALLOON_BASIC_CMDS			= (1 << 1),
     71	VMW_BALLOON_BATCHED_CMDS		= (1 << 2),
     72	VMW_BALLOON_BATCHED_2M_CMDS		= (1 << 3),
     73	VMW_BALLOON_SIGNALLED_WAKEUP_CMD	= (1 << 4),
     74	VMW_BALLOON_64_BIT_TARGET		= (1 << 5)
     75};
     76
     77#define VMW_BALLOON_CAPABILITIES_COMMON	(VMW_BALLOON_BASIC_CMDS \
     78					| VMW_BALLOON_BATCHED_CMDS \
     79					| VMW_BALLOON_BATCHED_2M_CMDS \
     80					| VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
     81
     82#define VMW_BALLOON_2M_ORDER		(PMD_SHIFT - PAGE_SHIFT)
     83
     84/*
     85 * 64-bit targets are only supported in 64-bit
     86 */
     87#ifdef CONFIG_64BIT
     88#define VMW_BALLOON_CAPABILITIES	(VMW_BALLOON_CAPABILITIES_COMMON \
     89					| VMW_BALLOON_64_BIT_TARGET)
     90#else
     91#define VMW_BALLOON_CAPABILITIES	VMW_BALLOON_CAPABILITIES_COMMON
     92#endif
     93
     94enum vmballoon_page_size_type {
     95	VMW_BALLOON_4K_PAGE,
     96	VMW_BALLOON_2M_PAGE,
     97	VMW_BALLOON_LAST_SIZE = VMW_BALLOON_2M_PAGE
     98};
     99
    100#define VMW_BALLOON_NUM_PAGE_SIZES	(VMW_BALLOON_LAST_SIZE + 1)
    101
    102static const char * const vmballoon_page_size_names[] = {
    103	[VMW_BALLOON_4K_PAGE]			= "4k",
    104	[VMW_BALLOON_2M_PAGE]			= "2M"
    105};
    106
    107enum vmballoon_op {
    108	VMW_BALLOON_INFLATE,
    109	VMW_BALLOON_DEFLATE
    110};
    111
    112enum vmballoon_op_stat_type {
    113	VMW_BALLOON_OP_STAT,
    114	VMW_BALLOON_OP_FAIL_STAT
    115};
    116
    117#define VMW_BALLOON_OP_STAT_TYPES	(VMW_BALLOON_OP_FAIL_STAT + 1)
    118
    119/**
    120 * enum vmballoon_cmd_type - backdoor commands.
    121 *
    122 * Availability of the commands is as followed:
    123 *
    124 * %VMW_BALLOON_CMD_START, %VMW_BALLOON_CMD_GET_TARGET and
    125 * %VMW_BALLOON_CMD_GUEST_ID are always available.
    126 *
    127 * If the host reports %VMW_BALLOON_BASIC_CMDS are supported then
    128 * %VMW_BALLOON_CMD_LOCK and %VMW_BALLOON_CMD_UNLOCK commands are available.
    129 *
    130 * If the host reports %VMW_BALLOON_BATCHED_CMDS are supported then
    131 * %VMW_BALLOON_CMD_BATCHED_LOCK and VMW_BALLOON_CMD_BATCHED_UNLOCK commands
    132 * are available.
    133 *
    134 * If the host reports %VMW_BALLOON_BATCHED_2M_CMDS are supported then
    135 * %VMW_BALLOON_CMD_BATCHED_2M_LOCK and %VMW_BALLOON_CMD_BATCHED_2M_UNLOCK
    136 * are supported.
    137 *
    138 * If the host reports  VMW_BALLOON_SIGNALLED_WAKEUP_CMD is supported then
    139 * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command is supported.
    140 *
    141 * @VMW_BALLOON_CMD_START: Communicating supported version with the hypervisor.
    142 * @VMW_BALLOON_CMD_GET_TARGET: Gets the balloon target size.
    143 * @VMW_BALLOON_CMD_LOCK: Informs the hypervisor about a ballooned page.
    144 * @VMW_BALLOON_CMD_UNLOCK: Informs the hypervisor about a page that is about
    145 *			    to be deflated from the balloon.
    146 * @VMW_BALLOON_CMD_GUEST_ID: Informs the hypervisor about the type of OS that
    147 *			      runs in the VM.
    148 * @VMW_BALLOON_CMD_BATCHED_LOCK: Inform the hypervisor about a batch of
    149 *				  ballooned pages (up to 512).
    150 * @VMW_BALLOON_CMD_BATCHED_UNLOCK: Inform the hypervisor about a batch of
    151 *				  pages that are about to be deflated from the
    152 *				  balloon (up to 512).
    153 * @VMW_BALLOON_CMD_BATCHED_2M_LOCK: Similar to @VMW_BALLOON_CMD_BATCHED_LOCK
    154 *				     for 2MB pages.
    155 * @VMW_BALLOON_CMD_BATCHED_2M_UNLOCK: Similar to
    156 *				       @VMW_BALLOON_CMD_BATCHED_UNLOCK for 2MB
    157 *				       pages.
    158 * @VMW_BALLOON_CMD_VMCI_DOORBELL_SET: A command to set doorbell notification
    159 *				       that would be invoked when the balloon
    160 *				       size changes.
    161 * @VMW_BALLOON_CMD_LAST: Value of the last command.
    162 */
    163enum vmballoon_cmd_type {
    164	VMW_BALLOON_CMD_START,
    165	VMW_BALLOON_CMD_GET_TARGET,
    166	VMW_BALLOON_CMD_LOCK,
    167	VMW_BALLOON_CMD_UNLOCK,
    168	VMW_BALLOON_CMD_GUEST_ID,
    169	/* No command 5 */
    170	VMW_BALLOON_CMD_BATCHED_LOCK = 6,
    171	VMW_BALLOON_CMD_BATCHED_UNLOCK,
    172	VMW_BALLOON_CMD_BATCHED_2M_LOCK,
    173	VMW_BALLOON_CMD_BATCHED_2M_UNLOCK,
    174	VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
    175	VMW_BALLOON_CMD_LAST = VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
    176};
    177
    178#define VMW_BALLOON_CMD_NUM	(VMW_BALLOON_CMD_LAST + 1)
    179
    180enum vmballoon_error_codes {
    181	VMW_BALLOON_SUCCESS,
    182	VMW_BALLOON_ERROR_CMD_INVALID,
    183	VMW_BALLOON_ERROR_PPN_INVALID,
    184	VMW_BALLOON_ERROR_PPN_LOCKED,
    185	VMW_BALLOON_ERROR_PPN_UNLOCKED,
    186	VMW_BALLOON_ERROR_PPN_PINNED,
    187	VMW_BALLOON_ERROR_PPN_NOTNEEDED,
    188	VMW_BALLOON_ERROR_RESET,
    189	VMW_BALLOON_ERROR_BUSY
    190};
    191
    192#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES	(0x03000000)
    193
    194#define VMW_BALLOON_CMD_WITH_TARGET_MASK			\
    195	((1UL << VMW_BALLOON_CMD_GET_TARGET)		|	\
    196	 (1UL << VMW_BALLOON_CMD_LOCK)			|	\
    197	 (1UL << VMW_BALLOON_CMD_UNLOCK)		|	\
    198	 (1UL << VMW_BALLOON_CMD_BATCHED_LOCK)		|	\
    199	 (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK)	|	\
    200	 (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK)	|	\
    201	 (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK))
    202
    203static const char * const vmballoon_cmd_names[] = {
    204	[VMW_BALLOON_CMD_START]			= "start",
    205	[VMW_BALLOON_CMD_GET_TARGET]		= "target",
    206	[VMW_BALLOON_CMD_LOCK]			= "lock",
    207	[VMW_BALLOON_CMD_UNLOCK]		= "unlock",
    208	[VMW_BALLOON_CMD_GUEST_ID]		= "guestType",
    209	[VMW_BALLOON_CMD_BATCHED_LOCK]		= "batchLock",
    210	[VMW_BALLOON_CMD_BATCHED_UNLOCK]	= "batchUnlock",
    211	[VMW_BALLOON_CMD_BATCHED_2M_LOCK]	= "2m-lock",
    212	[VMW_BALLOON_CMD_BATCHED_2M_UNLOCK]	= "2m-unlock",
    213	[VMW_BALLOON_CMD_VMCI_DOORBELL_SET]	= "doorbellSet"
    214};
    215
    216enum vmballoon_stat_page {
    217	VMW_BALLOON_PAGE_STAT_ALLOC,
    218	VMW_BALLOON_PAGE_STAT_ALLOC_FAIL,
    219	VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC,
    220	VMW_BALLOON_PAGE_STAT_REFUSED_FREE,
    221	VMW_BALLOON_PAGE_STAT_FREE,
    222	VMW_BALLOON_PAGE_STAT_LAST = VMW_BALLOON_PAGE_STAT_FREE
    223};
    224
    225#define VMW_BALLOON_PAGE_STAT_NUM	(VMW_BALLOON_PAGE_STAT_LAST + 1)
    226
    227enum vmballoon_stat_general {
    228	VMW_BALLOON_STAT_TIMER,
    229	VMW_BALLOON_STAT_DOORBELL,
    230	VMW_BALLOON_STAT_RESET,
    231	VMW_BALLOON_STAT_SHRINK,
    232	VMW_BALLOON_STAT_SHRINK_FREE,
    233	VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_SHRINK_FREE
    234};
    235
    236#define VMW_BALLOON_STAT_NUM		(VMW_BALLOON_STAT_LAST + 1)
    237
    238static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching);
    239static DEFINE_STATIC_KEY_FALSE(balloon_stat_enabled);
    240
    241struct vmballoon_ctl {
    242	struct list_head pages;
    243	struct list_head refused_pages;
    244	struct list_head prealloc_pages;
    245	unsigned int n_refused_pages;
    246	unsigned int n_pages;
    247	enum vmballoon_page_size_type page_size;
    248	enum vmballoon_op op;
    249};
    250
    251/**
    252 * struct vmballoon_batch_entry - a batch entry for lock or unlock.
    253 *
    254 * @status: the status of the operation, which is written by the hypervisor.
    255 * @reserved: reserved for future use. Must be set to zero.
    256 * @pfn: the physical frame number of the page to be locked or unlocked.
    257 */
    258struct vmballoon_batch_entry {
    259	u64 status : 5;
    260	u64 reserved : PAGE_SHIFT - 5;
    261	u64 pfn : 52;
    262} __packed;
    263
    264struct vmballoon {
    265	/**
    266	 * @max_page_size: maximum supported page size for ballooning.
    267	 *
    268	 * Protected by @conf_sem
    269	 */
    270	enum vmballoon_page_size_type max_page_size;
    271
    272	/**
    273	 * @size: balloon actual size in basic page size (frames).
    274	 *
    275	 * While we currently do not support size which is bigger than 32-bit,
    276	 * in preparation for future support, use 64-bits.
    277	 */
    278	atomic64_t size;
    279
    280	/**
    281	 * @target: balloon target size in basic page size (frames).
    282	 *
    283	 * We do not protect the target under the assumption that setting the
    284	 * value is always done through a single write. If this assumption ever
    285	 * breaks, we would have to use X_ONCE for accesses, and suffer the less
    286	 * optimized code. Although we may read stale target value if multiple
    287	 * accesses happen at once, the performance impact should be minor.
    288	 */
    289	unsigned long target;
    290
    291	/**
    292	 * @reset_required: reset flag
    293	 *
    294	 * Setting this flag may introduce races, but the code is expected to
    295	 * handle them gracefully. In the worst case, another operation will
    296	 * fail as reset did not take place. Clearing the flag is done while
    297	 * holding @conf_sem for write.
    298	 */
    299	bool reset_required;
    300
    301	/**
    302	 * @capabilities: hypervisor balloon capabilities.
    303	 *
    304	 * Protected by @conf_sem.
    305	 */
    306	unsigned long capabilities;
    307
    308	/**
    309	 * @batch_page: pointer to communication batch page.
    310	 *
    311	 * When batching is used, batch_page points to a page, which holds up to
    312	 * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking.
    313	 */
    314	struct vmballoon_batch_entry *batch_page;
    315
    316	/**
    317	 * @batch_max_pages: maximum pages that can be locked/unlocked.
    318	 *
    319	 * Indicates the number of pages that the hypervisor can lock or unlock
    320	 * at once, according to whether batching is enabled. If batching is
    321	 * disabled, only a single page can be locked/unlock on each operation.
    322	 *
    323	 * Protected by @conf_sem.
    324	 */
    325	unsigned int batch_max_pages;
    326
    327	/**
    328	 * @page: page to be locked/unlocked by the hypervisor
    329	 *
    330	 * @page is only used when batching is disabled and a single page is
    331	 * reclaimed on each iteration.
    332	 *
    333	 * Protected by @comm_lock.
    334	 */
    335	struct page *page;
    336
    337	/**
    338	 * @shrink_timeout: timeout until the next inflation.
    339	 *
    340	 * After an shrink event, indicates the time in jiffies after which
    341	 * inflation is allowed again. Can be written concurrently with reads,
    342	 * so must use READ_ONCE/WRITE_ONCE when accessing.
    343	 */
    344	unsigned long shrink_timeout;
    345
    346	/* statistics */
    347	struct vmballoon_stats *stats;
    348
    349	/**
    350	 * @b_dev_info: balloon device information descriptor.
    351	 */
    352	struct balloon_dev_info b_dev_info;
    353
    354	struct delayed_work dwork;
    355
    356	/**
    357	 * @huge_pages - list of the inflated 2MB pages.
    358	 *
    359	 * Protected by @b_dev_info.pages_lock .
    360	 */
    361	struct list_head huge_pages;
    362
    363	/**
    364	 * @vmci_doorbell.
    365	 *
    366	 * Protected by @conf_sem.
    367	 */
    368	struct vmci_handle vmci_doorbell;
    369
    370	/**
    371	 * @conf_sem: semaphore to protect the configuration and the statistics.
    372	 */
    373	struct rw_semaphore conf_sem;
    374
    375	/**
    376	 * @comm_lock: lock to protect the communication with the host.
    377	 *
    378	 * Lock ordering: @conf_sem -> @comm_lock .
    379	 */
    380	spinlock_t comm_lock;
    381
    382	/**
    383	 * @shrinker: shrinker interface that is used to avoid over-inflation.
    384	 */
    385	struct shrinker shrinker;
    386
    387	/**
    388	 * @shrinker_registered: whether the shrinker was registered.
    389	 *
    390	 * The shrinker interface does not handle gracefully the removal of
    391	 * shrinker that was not registered before. This indication allows to
    392	 * simplify the unregistration process.
    393	 */
    394	bool shrinker_registered;
    395};
    396
    397static struct vmballoon balloon;
    398
    399struct vmballoon_stats {
    400	/* timer / doorbell operations */
    401	atomic64_t general_stat[VMW_BALLOON_STAT_NUM];
    402
    403	/* allocation statistics for huge and small pages */
    404	atomic64_t
    405	       page_stat[VMW_BALLOON_PAGE_STAT_NUM][VMW_BALLOON_NUM_PAGE_SIZES];
    406
    407	/* Monitor operations: total operations, and failures */
    408	atomic64_t ops[VMW_BALLOON_CMD_NUM][VMW_BALLOON_OP_STAT_TYPES];
    409};
    410
    411static inline bool is_vmballoon_stats_on(void)
    412{
    413	return IS_ENABLED(CONFIG_DEBUG_FS) &&
    414		static_branch_unlikely(&balloon_stat_enabled);
    415}
    416
    417static inline void vmballoon_stats_op_inc(struct vmballoon *b, unsigned int op,
    418					  enum vmballoon_op_stat_type type)
    419{
    420	if (is_vmballoon_stats_on())
    421		atomic64_inc(&b->stats->ops[op][type]);
    422}
    423
    424static inline void vmballoon_stats_gen_inc(struct vmballoon *b,
    425					   enum vmballoon_stat_general stat)
    426{
    427	if (is_vmballoon_stats_on())
    428		atomic64_inc(&b->stats->general_stat[stat]);
    429}
    430
    431static inline void vmballoon_stats_gen_add(struct vmballoon *b,
    432					   enum vmballoon_stat_general stat,
    433					   unsigned int val)
    434{
    435	if (is_vmballoon_stats_on())
    436		atomic64_add(val, &b->stats->general_stat[stat]);
    437}
    438
    439static inline void vmballoon_stats_page_inc(struct vmballoon *b,
    440					    enum vmballoon_stat_page stat,
    441					    enum vmballoon_page_size_type size)
    442{
    443	if (is_vmballoon_stats_on())
    444		atomic64_inc(&b->stats->page_stat[stat][size]);
    445}
    446
    447static inline void vmballoon_stats_page_add(struct vmballoon *b,
    448					    enum vmballoon_stat_page stat,
    449					    enum vmballoon_page_size_type size,
    450					    unsigned int val)
    451{
    452	if (is_vmballoon_stats_on())
    453		atomic64_add(val, &b->stats->page_stat[stat][size]);
    454}
    455
    456static inline unsigned long
    457__vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
    458		unsigned long arg2, unsigned long *result)
    459{
    460	unsigned long status, dummy1, dummy2, dummy3, local_result;
    461
    462	vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_STAT);
    463
    464	asm volatile ("inl %%dx" :
    465		"=a"(status),
    466		"=c"(dummy1),
    467		"=d"(dummy2),
    468		"=b"(local_result),
    469		"=S"(dummy3) :
    470		"0"(VMW_BALLOON_HV_MAGIC),
    471		"1"(cmd),
    472		"2"(VMW_BALLOON_HV_PORT),
    473		"3"(arg1),
    474		"4"(arg2) :
    475		"memory");
    476
    477	/* update the result if needed */
    478	if (result)
    479		*result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 :
    480							   local_result;
    481
    482	/* update target when applicable */
    483	if (status == VMW_BALLOON_SUCCESS &&
    484	    ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK))
    485		WRITE_ONCE(b->target, local_result);
    486
    487	if (status != VMW_BALLOON_SUCCESS &&
    488	    status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) {
    489		vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_FAIL_STAT);
    490		pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n",
    491			 __func__, vmballoon_cmd_names[cmd], arg1, arg2,
    492			 status);
    493	}
    494
    495	/* mark reset required accordingly */
    496	if (status == VMW_BALLOON_ERROR_RESET)
    497		b->reset_required = true;
    498
    499	return status;
    500}
    501
    502static __always_inline unsigned long
    503vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
    504	      unsigned long arg2)
    505{
    506	unsigned long dummy;
    507
    508	return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy);
    509}
    510
    511/*
    512 * Send "start" command to the host, communicating supported version
    513 * of the protocol.
    514 */
    515static int vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
    516{
    517	unsigned long status, capabilities;
    518
    519	status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0,
    520				 &capabilities);
    521
    522	switch (status) {
    523	case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
    524		b->capabilities = capabilities;
    525		break;
    526	case VMW_BALLOON_SUCCESS:
    527		b->capabilities = VMW_BALLOON_BASIC_CMDS;
    528		break;
    529	default:
    530		return -EIO;
    531	}
    532
    533	/*
    534	 * 2MB pages are only supported with batching. If batching is for some
    535	 * reason disabled, do not use 2MB pages, since otherwise the legacy
    536	 * mechanism is used with 2MB pages, causing a failure.
    537	 */
    538	b->max_page_size = VMW_BALLOON_4K_PAGE;
    539	if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) &&
    540	    (b->capabilities & VMW_BALLOON_BATCHED_CMDS))
    541		b->max_page_size = VMW_BALLOON_2M_PAGE;
    542
    543
    544	return 0;
    545}
    546
    547/**
    548 * vmballoon_send_guest_id - communicate guest type to the host.
    549 *
    550 * @b: pointer to the balloon.
    551 *
    552 * Communicate guest type to the host so that it can adjust ballooning
    553 * algorithm to the one most appropriate for the guest. This command
    554 * is normally issued after sending "start" command and is part of
    555 * standard reset sequence.
    556 *
    557 * Return: zero on success or appropriate error code.
    558 */
    559static int vmballoon_send_guest_id(struct vmballoon *b)
    560{
    561	unsigned long status;
    562
    563	status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID,
    564			       VMW_BALLOON_GUEST_ID, 0);
    565
    566	return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
    567}
    568
    569/**
    570 * vmballoon_page_order() - return the order of the page
    571 * @page_size: the size of the page.
    572 *
    573 * Return: the allocation order.
    574 */
    575static inline
    576unsigned int vmballoon_page_order(enum vmballoon_page_size_type page_size)
    577{
    578	return page_size == VMW_BALLOON_2M_PAGE ? VMW_BALLOON_2M_ORDER : 0;
    579}
    580
    581/**
    582 * vmballoon_page_in_frames() - returns the number of frames in a page.
    583 * @page_size: the size of the page.
    584 *
    585 * Return: the number of 4k frames.
    586 */
    587static inline unsigned int
    588vmballoon_page_in_frames(enum vmballoon_page_size_type page_size)
    589{
    590	return 1 << vmballoon_page_order(page_size);
    591}
    592
    593/**
    594 * vmballoon_mark_page_offline() - mark a page as offline
    595 * @page: pointer for the page.
    596 * @page_size: the size of the page.
    597 */
    598static void
    599vmballoon_mark_page_offline(struct page *page,
    600			    enum vmballoon_page_size_type page_size)
    601{
    602	int i;
    603
    604	for (i = 0; i < vmballoon_page_in_frames(page_size); i++)
    605		__SetPageOffline(page + i);
    606}
    607
    608/**
    609 * vmballoon_mark_page_online() - mark a page as online
    610 * @page: pointer for the page.
    611 * @page_size: the size of the page.
    612 */
    613static void
    614vmballoon_mark_page_online(struct page *page,
    615			   enum vmballoon_page_size_type page_size)
    616{
    617	int i;
    618
    619	for (i = 0; i < vmballoon_page_in_frames(page_size); i++)
    620		__ClearPageOffline(page + i);
    621}
    622
    623/**
    624 * vmballoon_send_get_target() - Retrieve desired balloon size from the host.
    625 *
    626 * @b: pointer to the balloon.
    627 *
    628 * Return: zero on success, EINVAL if limit does not fit in 32-bit, as required
    629 * by the host-guest protocol and EIO if an error occurred in communicating with
    630 * the host.
    631 */
    632static int vmballoon_send_get_target(struct vmballoon *b)
    633{
    634	unsigned long status;
    635	unsigned long limit;
    636
    637	limit = totalram_pages();
    638
    639	/* Ensure limit fits in 32-bits if 64-bit targets are not supported */
    640	if (!(b->capabilities & VMW_BALLOON_64_BIT_TARGET) &&
    641	    limit != (u32)limit)
    642		return -EINVAL;
    643
    644	status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0);
    645
    646	return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
    647}
    648
    649/**
    650 * vmballoon_alloc_page_list - allocates a list of pages.
    651 *
    652 * @b: pointer to the balloon.
    653 * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
    654 * @req_n_pages: the number of requested pages.
    655 *
    656 * Tries to allocate @req_n_pages. Add them to the list of balloon pages in
    657 * @ctl.pages and updates @ctl.n_pages to reflect the number of pages.
    658 *
    659 * Return: zero on success or error code otherwise.
    660 */
    661static int vmballoon_alloc_page_list(struct vmballoon *b,
    662				     struct vmballoon_ctl *ctl,
    663				     unsigned int req_n_pages)
    664{
    665	struct page *page;
    666	unsigned int i;
    667
    668	for (i = 0; i < req_n_pages; i++) {
    669		/*
    670		 * First check if we happen to have pages that were allocated
    671		 * before. This happens when 2MB page rejected during inflation
    672		 * by the hypervisor, and then split into 4KB pages.
    673		 */
    674		if (!list_empty(&ctl->prealloc_pages)) {
    675			page = list_first_entry(&ctl->prealloc_pages,
    676						struct page, lru);
    677			list_del(&page->lru);
    678		} else {
    679			if (ctl->page_size == VMW_BALLOON_2M_PAGE)
    680				page = alloc_pages(__GFP_HIGHMEM|__GFP_NOWARN|
    681					__GFP_NOMEMALLOC, VMW_BALLOON_2M_ORDER);
    682			else
    683				page = balloon_page_alloc();
    684
    685			vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC,
    686						 ctl->page_size);
    687		}
    688
    689		if (page) {
    690			/* Success. Add the page to the list and continue. */
    691			list_add(&page->lru, &ctl->pages);
    692			continue;
    693		}
    694
    695		/* Allocation failed. Update statistics and stop. */
    696		vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC_FAIL,
    697					 ctl->page_size);
    698		break;
    699	}
    700
    701	ctl->n_pages = i;
    702
    703	return req_n_pages == ctl->n_pages ? 0 : -ENOMEM;
    704}
    705
    706/**
    707 * vmballoon_handle_one_result - Handle lock/unlock result for a single page.
    708 *
    709 * @b: pointer for %struct vmballoon.
    710 * @page: pointer for the page whose result should be handled.
    711 * @page_size: size of the page.
    712 * @status: status of the operation as provided by the hypervisor.
    713 */
    714static int vmballoon_handle_one_result(struct vmballoon *b, struct page *page,
    715				       enum vmballoon_page_size_type page_size,
    716				       unsigned long status)
    717{
    718	/* On success do nothing. The page is already on the balloon list. */
    719	if (likely(status == VMW_BALLOON_SUCCESS))
    720		return 0;
    721
    722	pr_debug("%s: failed comm pfn %lx status %lu page_size %s\n", __func__,
    723		 page_to_pfn(page), status,
    724		 vmballoon_page_size_names[page_size]);
    725
    726	/* Error occurred */
    727	vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC,
    728				 page_size);
    729
    730	return -EIO;
    731}
    732
    733/**
    734 * vmballoon_status_page - returns the status of (un)lock operation
    735 *
    736 * @b: pointer to the balloon.
    737 * @idx: index for the page for which the operation is performed.
    738 * @p: pointer to where the page struct is returned.
    739 *
    740 * Following a lock or unlock operation, returns the status of the operation for
    741 * an individual page. Provides the page that the operation was performed on on
    742 * the @page argument.
    743 *
    744 * Returns: The status of a lock or unlock operation for an individual page.
    745 */
    746static unsigned long vmballoon_status_page(struct vmballoon *b, int idx,
    747					   struct page **p)
    748{
    749	if (static_branch_likely(&vmw_balloon_batching)) {
    750		/* batching mode */
    751		*p = pfn_to_page(b->batch_page[idx].pfn);
    752		return b->batch_page[idx].status;
    753	}
    754
    755	/* non-batching mode */
    756	*p = b->page;
    757
    758	/*
    759	 * If a failure occurs, the indication will be provided in the status
    760	 * of the entire operation, which is considered before the individual
    761	 * page status. So for non-batching mode, the indication is always of
    762	 * success.
    763	 */
    764	return VMW_BALLOON_SUCCESS;
    765}
    766
    767/**
    768 * vmballoon_lock_op - notifies the host about inflated/deflated pages.
    769 * @b: pointer to the balloon.
    770 * @num_pages: number of inflated/deflated pages.
    771 * @page_size: size of the page.
    772 * @op: the type of operation (lock or unlock).
    773 *
    774 * Notify the host about page(s) that were ballooned (or removed from the
    775 * balloon) so that host can use it without fear that guest will need it (or
    776 * stop using them since the VM does). Host may reject some pages, we need to
    777 * check the return value and maybe submit a different page. The pages that are
    778 * inflated/deflated are pointed by @b->page.
    779 *
    780 * Return: result as provided by the hypervisor.
    781 */
    782static unsigned long vmballoon_lock_op(struct vmballoon *b,
    783				       unsigned int num_pages,
    784				       enum vmballoon_page_size_type page_size,
    785				       enum vmballoon_op op)
    786{
    787	unsigned long cmd, pfn;
    788
    789	lockdep_assert_held(&b->comm_lock);
    790
    791	if (static_branch_likely(&vmw_balloon_batching)) {
    792		if (op == VMW_BALLOON_INFLATE)
    793			cmd = page_size == VMW_BALLOON_2M_PAGE ?
    794				VMW_BALLOON_CMD_BATCHED_2M_LOCK :
    795				VMW_BALLOON_CMD_BATCHED_LOCK;
    796		else
    797			cmd = page_size == VMW_BALLOON_2M_PAGE ?
    798				VMW_BALLOON_CMD_BATCHED_2M_UNLOCK :
    799				VMW_BALLOON_CMD_BATCHED_UNLOCK;
    800
    801		pfn = PHYS_PFN(virt_to_phys(b->batch_page));
    802	} else {
    803		cmd = op == VMW_BALLOON_INFLATE ? VMW_BALLOON_CMD_LOCK :
    804						  VMW_BALLOON_CMD_UNLOCK;
    805		pfn = page_to_pfn(b->page);
    806
    807		/* In non-batching mode, PFNs must fit in 32-bit */
    808		if (unlikely(pfn != (u32)pfn))
    809			return VMW_BALLOON_ERROR_PPN_INVALID;
    810	}
    811
    812	return vmballoon_cmd(b, cmd, pfn, num_pages);
    813}
    814
    815/**
    816 * vmballoon_add_page - adds a page towards lock/unlock operation.
    817 *
    818 * @b: pointer to the balloon.
    819 * @idx: index of the page to be ballooned in this batch.
    820 * @p: pointer to the page that is about to be ballooned.
    821 *
    822 * Adds the page to be ballooned. Must be called while holding @comm_lock.
    823 */
    824static void vmballoon_add_page(struct vmballoon *b, unsigned int idx,
    825			       struct page *p)
    826{
    827	lockdep_assert_held(&b->comm_lock);
    828
    829	if (static_branch_likely(&vmw_balloon_batching))
    830		b->batch_page[idx] = (struct vmballoon_batch_entry)
    831					{ .pfn = page_to_pfn(p) };
    832	else
    833		b->page = p;
    834}
    835
    836/**
    837 * vmballoon_lock - lock or unlock a batch of pages.
    838 *
    839 * @b: pointer to the balloon.
    840 * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
    841 *
    842 * Notifies the host of about ballooned pages (after inflation or deflation,
    843 * according to @ctl). If the host rejects the page put it on the
    844 * @ctl refuse list. These refused page are then released when moving to the
    845 * next size of pages.
    846 *
    847 * Note that we neither free any @page here nor put them back on the ballooned
    848 * pages list. Instead we queue it for later processing. We do that for several
    849 * reasons. First, we do not want to free the page under the lock. Second, it
    850 * allows us to unify the handling of lock and unlock. In the inflate case, the
    851 * caller will check if there are too many refused pages and release them.
    852 * Although it is not identical to the past behavior, it should not affect
    853 * performance.
    854 */
    855static int vmballoon_lock(struct vmballoon *b, struct vmballoon_ctl *ctl)
    856{
    857	unsigned long batch_status;
    858	struct page *page;
    859	unsigned int i, num_pages;
    860
    861	num_pages = ctl->n_pages;
    862	if (num_pages == 0)
    863		return 0;
    864
    865	/* communication with the host is done under the communication lock */
    866	spin_lock(&b->comm_lock);
    867
    868	i = 0;
    869	list_for_each_entry(page, &ctl->pages, lru)
    870		vmballoon_add_page(b, i++, page);
    871
    872	batch_status = vmballoon_lock_op(b, ctl->n_pages, ctl->page_size,
    873					 ctl->op);
    874
    875	/*
    876	 * Iterate over the pages in the provided list. Since we are changing
    877	 * @ctl->n_pages we are saving the original value in @num_pages and
    878	 * use this value to bound the loop.
    879	 */
    880	for (i = 0; i < num_pages; i++) {
    881		unsigned long status;
    882
    883		status = vmballoon_status_page(b, i, &page);
    884
    885		/*
    886		 * Failure of the whole batch overrides a single operation
    887		 * results.
    888		 */
    889		if (batch_status != VMW_BALLOON_SUCCESS)
    890			status = batch_status;
    891
    892		/* Continue if no error happened */
    893		if (!vmballoon_handle_one_result(b, page, ctl->page_size,
    894						 status))
    895			continue;
    896
    897		/*
    898		 * Error happened. Move the pages to the refused list and update
    899		 * the pages number.
    900		 */
    901		list_move(&page->lru, &ctl->refused_pages);
    902		ctl->n_pages--;
    903		ctl->n_refused_pages++;
    904	}
    905
    906	spin_unlock(&b->comm_lock);
    907
    908	return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
    909}
    910
    911/**
    912 * vmballoon_release_page_list() - Releases a page list
    913 *
    914 * @page_list: list of pages to release.
    915 * @n_pages: pointer to the number of pages.
    916 * @page_size: whether the pages in the list are 2MB (or else 4KB).
    917 *
    918 * Releases the list of pages and zeros the number of pages.
    919 */
    920static void vmballoon_release_page_list(struct list_head *page_list,
    921				       int *n_pages,
    922				       enum vmballoon_page_size_type page_size)
    923{
    924	struct page *page, *tmp;
    925
    926	list_for_each_entry_safe(page, tmp, page_list, lru) {
    927		list_del(&page->lru);
    928		__free_pages(page, vmballoon_page_order(page_size));
    929	}
    930
    931	if (n_pages)
    932		*n_pages = 0;
    933}
    934
    935
    936/*
    937 * Release pages that were allocated while attempting to inflate the
    938 * balloon but were refused by the host for one reason or another.
    939 */
    940static void vmballoon_release_refused_pages(struct vmballoon *b,
    941					    struct vmballoon_ctl *ctl)
    942{
    943	vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_FREE,
    944				 ctl->page_size);
    945
    946	vmballoon_release_page_list(&ctl->refused_pages, &ctl->n_refused_pages,
    947				    ctl->page_size);
    948}
    949
    950/**
    951 * vmballoon_change - retrieve the required balloon change
    952 *
    953 * @b: pointer for the balloon.
    954 *
    955 * Return: the required change for the balloon size. A positive number
    956 * indicates inflation, a negative number indicates a deflation.
    957 */
    958static int64_t vmballoon_change(struct vmballoon *b)
    959{
    960	int64_t size, target;
    961
    962	size = atomic64_read(&b->size);
    963	target = READ_ONCE(b->target);
    964
    965	/*
    966	 * We must cast first because of int sizes
    967	 * Otherwise we might get huge positives instead of negatives
    968	 */
    969
    970	if (b->reset_required)
    971		return 0;
    972
    973	/* consider a 2MB slack on deflate, unless the balloon is emptied */
    974	if (target < size && target != 0 &&
    975	    size - target < vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE))
    976		return 0;
    977
    978	/* If an out-of-memory recently occurred, inflation is disallowed. */
    979	if (target > size && time_before(jiffies, READ_ONCE(b->shrink_timeout)))
    980		return 0;
    981
    982	return target - size;
    983}
    984
    985/**
    986 * vmballoon_enqueue_page_list() - Enqueues list of pages after inflation.
    987 *
    988 * @b: pointer to balloon.
    989 * @pages: list of pages to enqueue.
    990 * @n_pages: pointer to number of pages in list. The value is zeroed.
    991 * @page_size: whether the pages are 2MB or 4KB pages.
    992 *
    993 * Enqueues the provides list of pages in the ballooned page list, clears the
    994 * list and zeroes the number of pages that was provided.
    995 */
    996static void vmballoon_enqueue_page_list(struct vmballoon *b,
    997					struct list_head *pages,
    998					unsigned int *n_pages,
    999					enum vmballoon_page_size_type page_size)
   1000{
   1001	unsigned long flags;
   1002	struct page *page;
   1003
   1004	if (page_size == VMW_BALLOON_4K_PAGE) {
   1005		balloon_page_list_enqueue(&b->b_dev_info, pages);
   1006	} else {
   1007		/*
   1008		 * Keep the huge pages in a local list which is not available
   1009		 * for the balloon compaction mechanism.
   1010		 */
   1011		spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
   1012
   1013		list_for_each_entry(page, pages, lru) {
   1014			vmballoon_mark_page_offline(page, VMW_BALLOON_2M_PAGE);
   1015		}
   1016
   1017		list_splice_init(pages, &b->huge_pages);
   1018		__count_vm_events(BALLOON_INFLATE, *n_pages *
   1019				  vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
   1020		spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
   1021	}
   1022
   1023	*n_pages = 0;
   1024}
   1025
   1026/**
   1027 * vmballoon_dequeue_page_list() - Dequeues page lists for deflation.
   1028 *
   1029 * @b: pointer to balloon.
   1030 * @pages: list of pages to enqueue.
   1031 * @n_pages: pointer to number of pages in list. The value is zeroed.
   1032 * @page_size: whether the pages are 2MB or 4KB pages.
   1033 * @n_req_pages: the number of requested pages.
   1034 *
   1035 * Dequeues the number of requested pages from the balloon for deflation. The
   1036 * number of dequeued pages may be lower, if not enough pages in the requested
   1037 * size are available.
   1038 */
   1039static void vmballoon_dequeue_page_list(struct vmballoon *b,
   1040					struct list_head *pages,
   1041					unsigned int *n_pages,
   1042					enum vmballoon_page_size_type page_size,
   1043					unsigned int n_req_pages)
   1044{
   1045	struct page *page, *tmp;
   1046	unsigned int i = 0;
   1047	unsigned long flags;
   1048
   1049	/* In the case of 4k pages, use the compaction infrastructure */
   1050	if (page_size == VMW_BALLOON_4K_PAGE) {
   1051		*n_pages = balloon_page_list_dequeue(&b->b_dev_info, pages,
   1052						     n_req_pages);
   1053		return;
   1054	}
   1055
   1056	/* 2MB pages */
   1057	spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
   1058	list_for_each_entry_safe(page, tmp, &b->huge_pages, lru) {
   1059		vmballoon_mark_page_online(page, VMW_BALLOON_2M_PAGE);
   1060
   1061		list_move(&page->lru, pages);
   1062		if (++i == n_req_pages)
   1063			break;
   1064	}
   1065
   1066	__count_vm_events(BALLOON_DEFLATE,
   1067			  i * vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
   1068	spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
   1069	*n_pages = i;
   1070}
   1071
   1072/**
   1073 * vmballoon_split_refused_pages() - Split the 2MB refused pages to 4k.
   1074 *
   1075 * If inflation of 2MB pages was denied by the hypervisor, it is likely to be
   1076 * due to one or few 4KB pages. These 2MB pages may keep being allocated and
   1077 * then being refused. To prevent this case, this function splits the refused
   1078 * pages into 4KB pages and adds them into @prealloc_pages list.
   1079 *
   1080 * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
   1081 */
   1082static void vmballoon_split_refused_pages(struct vmballoon_ctl *ctl)
   1083{
   1084	struct page *page, *tmp;
   1085	unsigned int i, order;
   1086
   1087	order = vmballoon_page_order(ctl->page_size);
   1088
   1089	list_for_each_entry_safe(page, tmp, &ctl->refused_pages, lru) {
   1090		list_del(&page->lru);
   1091		split_page(page, order);
   1092		for (i = 0; i < (1 << order); i++)
   1093			list_add(&page[i].lru, &ctl->prealloc_pages);
   1094	}
   1095	ctl->n_refused_pages = 0;
   1096}
   1097
   1098/**
   1099 * vmballoon_inflate() - Inflate the balloon towards its target size.
   1100 *
   1101 * @b: pointer to the balloon.
   1102 */
   1103static void vmballoon_inflate(struct vmballoon *b)
   1104{
   1105	int64_t to_inflate_frames;
   1106	struct vmballoon_ctl ctl = {
   1107		.pages = LIST_HEAD_INIT(ctl.pages),
   1108		.refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
   1109		.prealloc_pages = LIST_HEAD_INIT(ctl.prealloc_pages),
   1110		.page_size = b->max_page_size,
   1111		.op = VMW_BALLOON_INFLATE
   1112	};
   1113
   1114	while ((to_inflate_frames = vmballoon_change(b)) > 0) {
   1115		unsigned int to_inflate_pages, page_in_frames;
   1116		int alloc_error, lock_error = 0;
   1117
   1118		VM_BUG_ON(!list_empty(&ctl.pages));
   1119		VM_BUG_ON(ctl.n_pages != 0);
   1120
   1121		page_in_frames = vmballoon_page_in_frames(ctl.page_size);
   1122
   1123		to_inflate_pages = min_t(unsigned long, b->batch_max_pages,
   1124					 DIV_ROUND_UP_ULL(to_inflate_frames,
   1125							  page_in_frames));
   1126
   1127		/* Start by allocating */
   1128		alloc_error = vmballoon_alloc_page_list(b, &ctl,
   1129							to_inflate_pages);
   1130
   1131		/* Actually lock the pages by telling the hypervisor */
   1132		lock_error = vmballoon_lock(b, &ctl);
   1133
   1134		/*
   1135		 * If an error indicates that something serious went wrong,
   1136		 * stop the inflation.
   1137		 */
   1138		if (lock_error)
   1139			break;
   1140
   1141		/* Update the balloon size */
   1142		atomic64_add(ctl.n_pages * page_in_frames, &b->size);
   1143
   1144		vmballoon_enqueue_page_list(b, &ctl.pages, &ctl.n_pages,
   1145					    ctl.page_size);
   1146
   1147		/*
   1148		 * If allocation failed or the number of refused pages exceeds
   1149		 * the maximum allowed, move to the next page size.
   1150		 */
   1151		if (alloc_error ||
   1152		    ctl.n_refused_pages >= VMW_BALLOON_MAX_REFUSED) {
   1153			if (ctl.page_size == VMW_BALLOON_4K_PAGE)
   1154				break;
   1155
   1156			/*
   1157			 * Split the refused pages to 4k. This will also empty
   1158			 * the refused pages list.
   1159			 */
   1160			vmballoon_split_refused_pages(&ctl);
   1161			ctl.page_size--;
   1162		}
   1163
   1164		cond_resched();
   1165	}
   1166
   1167	/*
   1168	 * Release pages that were allocated while attempting to inflate the
   1169	 * balloon but were refused by the host for one reason or another,
   1170	 * and update the statistics.
   1171	 */
   1172	if (ctl.n_refused_pages != 0)
   1173		vmballoon_release_refused_pages(b, &ctl);
   1174
   1175	vmballoon_release_page_list(&ctl.prealloc_pages, NULL, ctl.page_size);
   1176}
   1177
   1178/**
   1179 * vmballoon_deflate() - Decrease the size of the balloon.
   1180 *
   1181 * @b: pointer to the balloon
   1182 * @n_frames: the number of frames to deflate. If zero, automatically
   1183 * calculated according to the target size.
   1184 * @coordinated: whether to coordinate with the host
   1185 *
   1186 * Decrease the size of the balloon allowing guest to use more memory.
   1187 *
   1188 * Return: The number of deflated frames (i.e., basic page size units)
   1189 */
   1190static unsigned long vmballoon_deflate(struct vmballoon *b, uint64_t n_frames,
   1191				       bool coordinated)
   1192{
   1193	unsigned long deflated_frames = 0;
   1194	unsigned long tried_frames = 0;
   1195	struct vmballoon_ctl ctl = {
   1196		.pages = LIST_HEAD_INIT(ctl.pages),
   1197		.refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
   1198		.page_size = VMW_BALLOON_4K_PAGE,
   1199		.op = VMW_BALLOON_DEFLATE
   1200	};
   1201
   1202	/* free pages to reach target */
   1203	while (true) {
   1204		unsigned int to_deflate_pages, n_unlocked_frames;
   1205		unsigned int page_in_frames;
   1206		int64_t to_deflate_frames;
   1207		bool deflated_all;
   1208
   1209		page_in_frames = vmballoon_page_in_frames(ctl.page_size);
   1210
   1211		VM_BUG_ON(!list_empty(&ctl.pages));
   1212		VM_BUG_ON(ctl.n_pages);
   1213		VM_BUG_ON(!list_empty(&ctl.refused_pages));
   1214		VM_BUG_ON(ctl.n_refused_pages);
   1215
   1216		/*
   1217		 * If we were requested a specific number of frames, we try to
   1218		 * deflate this number of frames. Otherwise, deflation is
   1219		 * performed according to the target and balloon size.
   1220		 */
   1221		to_deflate_frames = n_frames ? n_frames - tried_frames :
   1222					       -vmballoon_change(b);
   1223
   1224		/* break if no work to do */
   1225		if (to_deflate_frames <= 0)
   1226			break;
   1227
   1228		/*
   1229		 * Calculate the number of frames based on current page size,
   1230		 * but limit the deflated frames to a single chunk
   1231		 */
   1232		to_deflate_pages = min_t(unsigned long, b->batch_max_pages,
   1233					 DIV_ROUND_UP_ULL(to_deflate_frames,
   1234							  page_in_frames));
   1235
   1236		/* First take the pages from the balloon pages. */
   1237		vmballoon_dequeue_page_list(b, &ctl.pages, &ctl.n_pages,
   1238					    ctl.page_size, to_deflate_pages);
   1239
   1240		/*
   1241		 * Before pages are moving to the refused list, count their
   1242		 * frames as frames that we tried to deflate.
   1243		 */
   1244		tried_frames += ctl.n_pages * page_in_frames;
   1245
   1246		/*
   1247		 * Unlock the pages by communicating with the hypervisor if the
   1248		 * communication is coordinated (i.e., not pop). We ignore the
   1249		 * return code. Instead we check if all the pages we manage to
   1250		 * unlock all the pages. If we failed, we will move to the next
   1251		 * page size, and would eventually try again later.
   1252		 */
   1253		if (coordinated)
   1254			vmballoon_lock(b, &ctl);
   1255
   1256		/*
   1257		 * Check if we deflated enough. We will move to the next page
   1258		 * size if we did not manage to do so. This calculation takes
   1259		 * place now, as once the pages are released, the number of
   1260		 * pages is zeroed.
   1261		 */
   1262		deflated_all = (ctl.n_pages == to_deflate_pages);
   1263
   1264		/* Update local and global counters */
   1265		n_unlocked_frames = ctl.n_pages * page_in_frames;
   1266		atomic64_sub(n_unlocked_frames, &b->size);
   1267		deflated_frames += n_unlocked_frames;
   1268
   1269		vmballoon_stats_page_add(b, VMW_BALLOON_PAGE_STAT_FREE,
   1270					 ctl.page_size, ctl.n_pages);
   1271
   1272		/* free the ballooned pages */
   1273		vmballoon_release_page_list(&ctl.pages, &ctl.n_pages,
   1274					    ctl.page_size);
   1275
   1276		/* Return the refused pages to the ballooned list. */
   1277		vmballoon_enqueue_page_list(b, &ctl.refused_pages,
   1278					    &ctl.n_refused_pages,
   1279					    ctl.page_size);
   1280
   1281		/* If we failed to unlock all the pages, move to next size. */
   1282		if (!deflated_all) {
   1283			if (ctl.page_size == b->max_page_size)
   1284				break;
   1285			ctl.page_size++;
   1286		}
   1287
   1288		cond_resched();
   1289	}
   1290
   1291	return deflated_frames;
   1292}
   1293
   1294/**
   1295 * vmballoon_deinit_batching - disables batching mode.
   1296 *
   1297 * @b: pointer to &struct vmballoon.
   1298 *
   1299 * Disables batching, by deallocating the page for communication with the
   1300 * hypervisor and disabling the static key to indicate that batching is off.
   1301 */
   1302static void vmballoon_deinit_batching(struct vmballoon *b)
   1303{
   1304	free_page((unsigned long)b->batch_page);
   1305	b->batch_page = NULL;
   1306	static_branch_disable(&vmw_balloon_batching);
   1307	b->batch_max_pages = 1;
   1308}
   1309
   1310/**
   1311 * vmballoon_init_batching - enable batching mode.
   1312 *
   1313 * @b: pointer to &struct vmballoon.
   1314 *
   1315 * Enables batching, by allocating a page for communication with the hypervisor
   1316 * and enabling the static_key to use batching.
   1317 *
   1318 * Return: zero on success or an appropriate error-code.
   1319 */
   1320static int vmballoon_init_batching(struct vmballoon *b)
   1321{
   1322	struct page *page;
   1323
   1324	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
   1325	if (!page)
   1326		return -ENOMEM;
   1327
   1328	b->batch_page = page_address(page);
   1329	b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry);
   1330
   1331	static_branch_enable(&vmw_balloon_batching);
   1332
   1333	return 0;
   1334}
   1335
   1336/*
   1337 * Receive notification and resize balloon
   1338 */
   1339static void vmballoon_doorbell(void *client_data)
   1340{
   1341	struct vmballoon *b = client_data;
   1342
   1343	vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_DOORBELL);
   1344
   1345	mod_delayed_work(system_freezable_wq, &b->dwork, 0);
   1346}
   1347
   1348/*
   1349 * Clean up vmci doorbell
   1350 */
   1351static void vmballoon_vmci_cleanup(struct vmballoon *b)
   1352{
   1353	vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
   1354		      VMCI_INVALID_ID, VMCI_INVALID_ID);
   1355
   1356	if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
   1357		vmci_doorbell_destroy(b->vmci_doorbell);
   1358		b->vmci_doorbell = VMCI_INVALID_HANDLE;
   1359	}
   1360}
   1361
   1362/**
   1363 * vmballoon_vmci_init - Initialize vmci doorbell.
   1364 *
   1365 * @b: pointer to the balloon.
   1366 *
   1367 * Return: zero on success or when wakeup command not supported. Error-code
   1368 * otherwise.
   1369 *
   1370 * Initialize vmci doorbell, to get notified as soon as balloon changes.
   1371 */
   1372static int vmballoon_vmci_init(struct vmballoon *b)
   1373{
   1374	unsigned long error;
   1375
   1376	if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0)
   1377		return 0;
   1378
   1379	error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB,
   1380				     VMCI_PRIVILEGE_FLAG_RESTRICTED,
   1381				     vmballoon_doorbell, b);
   1382
   1383	if (error != VMCI_SUCCESS)
   1384		goto fail;
   1385
   1386	error =	__vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
   1387				b->vmci_doorbell.context,
   1388				b->vmci_doorbell.resource, NULL);
   1389
   1390	if (error != VMW_BALLOON_SUCCESS)
   1391		goto fail;
   1392
   1393	return 0;
   1394fail:
   1395	vmballoon_vmci_cleanup(b);
   1396	return -EIO;
   1397}
   1398
   1399/**
   1400 * vmballoon_pop - Quickly release all pages allocate for the balloon.
   1401 *
   1402 * @b: pointer to the balloon.
   1403 *
   1404 * This function is called when host decides to "reset" balloon for one reason
   1405 * or another. Unlike normal "deflate" we do not (shall not) notify host of the
   1406 * pages being released.
   1407 */
   1408static void vmballoon_pop(struct vmballoon *b)
   1409{
   1410	unsigned long size;
   1411
   1412	while ((size = atomic64_read(&b->size)))
   1413		vmballoon_deflate(b, size, false);
   1414}
   1415
   1416/*
   1417 * Perform standard reset sequence by popping the balloon (in case it
   1418 * is not  empty) and then restarting protocol. This operation normally
   1419 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
   1420 */
   1421static void vmballoon_reset(struct vmballoon *b)
   1422{
   1423	int error;
   1424
   1425	down_write(&b->conf_sem);
   1426
   1427	vmballoon_vmci_cleanup(b);
   1428
   1429	/* free all pages, skipping monitor unlock */
   1430	vmballoon_pop(b);
   1431
   1432	if (vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
   1433		goto unlock;
   1434
   1435	if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
   1436		if (vmballoon_init_batching(b)) {
   1437			/*
   1438			 * We failed to initialize batching, inform the monitor
   1439			 * about it by sending a null capability.
   1440			 *
   1441			 * The guest will retry in one second.
   1442			 */
   1443			vmballoon_send_start(b, 0);
   1444			goto unlock;
   1445		}
   1446	} else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
   1447		vmballoon_deinit_batching(b);
   1448	}
   1449
   1450	vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_RESET);
   1451	b->reset_required = false;
   1452
   1453	error = vmballoon_vmci_init(b);
   1454	if (error)
   1455		pr_err_once("failed to initialize vmci doorbell\n");
   1456
   1457	if (vmballoon_send_guest_id(b))
   1458		pr_err_once("failed to send guest ID to the host\n");
   1459
   1460unlock:
   1461	up_write(&b->conf_sem);
   1462}
   1463
   1464/**
   1465 * vmballoon_work - periodic balloon worker for reset, inflation and deflation.
   1466 *
   1467 * @work: pointer to the &work_struct which is provided by the workqueue.
   1468 *
   1469 * Resets the protocol if needed, gets the new size and adjusts balloon as
   1470 * needed. Repeat in 1 sec.
   1471 */
   1472static void vmballoon_work(struct work_struct *work)
   1473{
   1474	struct delayed_work *dwork = to_delayed_work(work);
   1475	struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
   1476	int64_t change = 0;
   1477
   1478	if (b->reset_required)
   1479		vmballoon_reset(b);
   1480
   1481	down_read(&b->conf_sem);
   1482
   1483	/*
   1484	 * Update the stats while holding the semaphore to ensure that
   1485	 * @stats_enabled is consistent with whether the stats are actually
   1486	 * enabled
   1487	 */
   1488	vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_TIMER);
   1489
   1490	if (!vmballoon_send_get_target(b))
   1491		change = vmballoon_change(b);
   1492
   1493	if (change != 0) {
   1494		pr_debug("%s - size: %llu, target %lu\n", __func__,
   1495			 atomic64_read(&b->size), READ_ONCE(b->target));
   1496
   1497		if (change > 0)
   1498			vmballoon_inflate(b);
   1499		else  /* (change < 0) */
   1500			vmballoon_deflate(b, 0, true);
   1501	}
   1502
   1503	up_read(&b->conf_sem);
   1504
   1505	/*
   1506	 * We are using a freezable workqueue so that balloon operations are
   1507	 * stopped while the system transitions to/from sleep/hibernation.
   1508	 */
   1509	queue_delayed_work(system_freezable_wq,
   1510			   dwork, round_jiffies_relative(HZ));
   1511
   1512}
   1513
   1514/**
   1515 * vmballoon_shrinker_scan() - deflate the balloon due to memory pressure.
   1516 * @shrinker: pointer to the balloon shrinker.
   1517 * @sc: page reclaim information.
   1518 *
   1519 * Returns: number of pages that were freed during deflation.
   1520 */
   1521static unsigned long vmballoon_shrinker_scan(struct shrinker *shrinker,
   1522					     struct shrink_control *sc)
   1523{
   1524	struct vmballoon *b = &balloon;
   1525	unsigned long deflated_frames;
   1526
   1527	pr_debug("%s - size: %llu", __func__, atomic64_read(&b->size));
   1528
   1529	vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_SHRINK);
   1530
   1531	/*
   1532	 * If the lock is also contended for read, we cannot easily reclaim and
   1533	 * we bail out.
   1534	 */
   1535	if (!down_read_trylock(&b->conf_sem))
   1536		return 0;
   1537
   1538	deflated_frames = vmballoon_deflate(b, sc->nr_to_scan, true);
   1539
   1540	vmballoon_stats_gen_add(b, VMW_BALLOON_STAT_SHRINK_FREE,
   1541				deflated_frames);
   1542
   1543	/*
   1544	 * Delay future inflation for some time to mitigate the situations in
   1545	 * which balloon continuously grows and shrinks. Use WRITE_ONCE() since
   1546	 * the access is asynchronous.
   1547	 */
   1548	WRITE_ONCE(b->shrink_timeout, jiffies + HZ * VMBALLOON_SHRINK_DELAY);
   1549
   1550	up_read(&b->conf_sem);
   1551
   1552	return deflated_frames;
   1553}
   1554
   1555/**
   1556 * vmballoon_shrinker_count() - return the number of ballooned pages.
   1557 * @shrinker: pointer to the balloon shrinker.
   1558 * @sc: page reclaim information.
   1559 *
   1560 * Returns: number of 4k pages that are allocated for the balloon and can
   1561 *	    therefore be reclaimed under pressure.
   1562 */
   1563static unsigned long vmballoon_shrinker_count(struct shrinker *shrinker,
   1564					      struct shrink_control *sc)
   1565{
   1566	struct vmballoon *b = &balloon;
   1567
   1568	return atomic64_read(&b->size);
   1569}
   1570
   1571static void vmballoon_unregister_shrinker(struct vmballoon *b)
   1572{
   1573	if (b->shrinker_registered)
   1574		unregister_shrinker(&b->shrinker);
   1575	b->shrinker_registered = false;
   1576}
   1577
   1578static int vmballoon_register_shrinker(struct vmballoon *b)
   1579{
   1580	int r;
   1581
   1582	/* Do nothing if the shrinker is not enabled */
   1583	if (!vmwballoon_shrinker_enable)
   1584		return 0;
   1585
   1586	b->shrinker.scan_objects = vmballoon_shrinker_scan;
   1587	b->shrinker.count_objects = vmballoon_shrinker_count;
   1588	b->shrinker.seeks = DEFAULT_SEEKS;
   1589
   1590	r = register_shrinker(&b->shrinker);
   1591
   1592	if (r == 0)
   1593		b->shrinker_registered = true;
   1594
   1595	return r;
   1596}
   1597
   1598/*
   1599 * DEBUGFS Interface
   1600 */
   1601#ifdef CONFIG_DEBUG_FS
   1602
   1603static const char * const vmballoon_stat_page_names[] = {
   1604	[VMW_BALLOON_PAGE_STAT_ALLOC]		= "alloc",
   1605	[VMW_BALLOON_PAGE_STAT_ALLOC_FAIL]	= "allocFail",
   1606	[VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC]	= "errAlloc",
   1607	[VMW_BALLOON_PAGE_STAT_REFUSED_FREE]	= "errFree",
   1608	[VMW_BALLOON_PAGE_STAT_FREE]		= "free"
   1609};
   1610
   1611static const char * const vmballoon_stat_names[] = {
   1612	[VMW_BALLOON_STAT_TIMER]		= "timer",
   1613	[VMW_BALLOON_STAT_DOORBELL]		= "doorbell",
   1614	[VMW_BALLOON_STAT_RESET]		= "reset",
   1615	[VMW_BALLOON_STAT_SHRINK]		= "shrink",
   1616	[VMW_BALLOON_STAT_SHRINK_FREE]		= "shrinkFree"
   1617};
   1618
   1619static int vmballoon_enable_stats(struct vmballoon *b)
   1620{
   1621	int r = 0;
   1622
   1623	down_write(&b->conf_sem);
   1624
   1625	/* did we somehow race with another reader which enabled stats? */
   1626	if (b->stats)
   1627		goto out;
   1628
   1629	b->stats = kzalloc(sizeof(*b->stats), GFP_KERNEL);
   1630
   1631	if (!b->stats) {
   1632		/* allocation failed */
   1633		r = -ENOMEM;
   1634		goto out;
   1635	}
   1636	static_key_enable(&balloon_stat_enabled.key);
   1637out:
   1638	up_write(&b->conf_sem);
   1639	return r;
   1640}
   1641
   1642/**
   1643 * vmballoon_debug_show - shows statistics of balloon operations.
   1644 * @f: pointer to the &struct seq_file.
   1645 * @offset: ignored.
   1646 *
   1647 * Provides the statistics that can be accessed in vmmemctl in the debugfs.
   1648 * To avoid the overhead - mainly that of memory - of collecting the statistics,
   1649 * we only collect statistics after the first time the counters are read.
   1650 *
   1651 * Return: zero on success or an error code.
   1652 */
   1653static int vmballoon_debug_show(struct seq_file *f, void *offset)
   1654{
   1655	struct vmballoon *b = f->private;
   1656	int i, j;
   1657
   1658	/* enables stats if they are disabled */
   1659	if (!b->stats) {
   1660		int r = vmballoon_enable_stats(b);
   1661
   1662		if (r)
   1663			return r;
   1664	}
   1665
   1666	/* format capabilities info */
   1667	seq_printf(f, "%-22s: %#16x\n", "balloon capabilities",
   1668		   VMW_BALLOON_CAPABILITIES);
   1669	seq_printf(f, "%-22s: %#16lx\n", "used capabilities", b->capabilities);
   1670	seq_printf(f, "%-22s: %16s\n", "is resetting",
   1671		   b->reset_required ? "y" : "n");
   1672
   1673	/* format size info */
   1674	seq_printf(f, "%-22s: %16lu\n", "target", READ_ONCE(b->target));
   1675	seq_printf(f, "%-22s: %16llu\n", "current", atomic64_read(&b->size));
   1676
   1677	for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) {
   1678		if (vmballoon_cmd_names[i] == NULL)
   1679			continue;
   1680
   1681		seq_printf(f, "%-22s: %16llu (%llu failed)\n",
   1682			   vmballoon_cmd_names[i],
   1683			   atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_STAT]),
   1684			   atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_FAIL_STAT]));
   1685	}
   1686
   1687	for (i = 0; i < VMW_BALLOON_STAT_NUM; i++)
   1688		seq_printf(f, "%-22s: %16llu\n",
   1689			   vmballoon_stat_names[i],
   1690			   atomic64_read(&b->stats->general_stat[i]));
   1691
   1692	for (i = 0; i < VMW_BALLOON_PAGE_STAT_NUM; i++) {
   1693		for (j = 0; j < VMW_BALLOON_NUM_PAGE_SIZES; j++)
   1694			seq_printf(f, "%-18s(%s): %16llu\n",
   1695				   vmballoon_stat_page_names[i],
   1696				   vmballoon_page_size_names[j],
   1697				   atomic64_read(&b->stats->page_stat[i][j]));
   1698	}
   1699
   1700	return 0;
   1701}
   1702
   1703DEFINE_SHOW_ATTRIBUTE(vmballoon_debug);
   1704
   1705static void __init vmballoon_debugfs_init(struct vmballoon *b)
   1706{
   1707	debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
   1708			    &vmballoon_debug_fops);
   1709}
   1710
   1711static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
   1712{
   1713	static_key_disable(&balloon_stat_enabled.key);
   1714	debugfs_remove(debugfs_lookup("vmmemctl", NULL));
   1715	kfree(b->stats);
   1716	b->stats = NULL;
   1717}
   1718
   1719#else
   1720
   1721static inline void vmballoon_debugfs_init(struct vmballoon *b)
   1722{
   1723}
   1724
   1725static inline void vmballoon_debugfs_exit(struct vmballoon *b)
   1726{
   1727}
   1728
   1729#endif	/* CONFIG_DEBUG_FS */
   1730
   1731
   1732#ifdef CONFIG_BALLOON_COMPACTION
   1733
   1734static int vmballoon_init_fs_context(struct fs_context *fc)
   1735{
   1736	return init_pseudo(fc, BALLOON_VMW_MAGIC) ? 0 : -ENOMEM;
   1737}
   1738
   1739static struct file_system_type vmballoon_fs = {
   1740	.name           	= "balloon-vmware",
   1741	.init_fs_context	= vmballoon_init_fs_context,
   1742	.kill_sb        	= kill_anon_super,
   1743};
   1744
   1745static struct vfsmount *vmballoon_mnt;
   1746
   1747/**
   1748 * vmballoon_migratepage() - migrates a balloon page.
   1749 * @b_dev_info: balloon device information descriptor.
   1750 * @newpage: the page to which @page should be migrated.
   1751 * @page: a ballooned page that should be migrated.
   1752 * @mode: migration mode, ignored.
   1753 *
   1754 * This function is really open-coded, but that is according to the interface
   1755 * that balloon_compaction provides.
   1756 *
   1757 * Return: zero on success, -EAGAIN when migration cannot be performed
   1758 *	   momentarily, and -EBUSY if migration failed and should be retried
   1759 *	   with that specific page.
   1760 */
   1761static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info,
   1762				 struct page *newpage, struct page *page,
   1763				 enum migrate_mode mode)
   1764{
   1765	unsigned long status, flags;
   1766	struct vmballoon *b;
   1767	int ret;
   1768
   1769	b = container_of(b_dev_info, struct vmballoon, b_dev_info);
   1770
   1771	/*
   1772	 * If the semaphore is taken, there is ongoing configuration change
   1773	 * (i.e., balloon reset), so try again.
   1774	 */
   1775	if (!down_read_trylock(&b->conf_sem))
   1776		return -EAGAIN;
   1777
   1778	spin_lock(&b->comm_lock);
   1779	/*
   1780	 * We must start by deflating and not inflating, as otherwise the
   1781	 * hypervisor may tell us that it has enough memory and the new page is
   1782	 * not needed. Since the old page is isolated, we cannot use the list
   1783	 * interface to unlock it, as the LRU field is used for isolation.
   1784	 * Instead, we use the native interface directly.
   1785	 */
   1786	vmballoon_add_page(b, 0, page);
   1787	status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
   1788				   VMW_BALLOON_DEFLATE);
   1789
   1790	if (status == VMW_BALLOON_SUCCESS)
   1791		status = vmballoon_status_page(b, 0, &page);
   1792
   1793	/*
   1794	 * If a failure happened, let the migration mechanism know that it
   1795	 * should not retry.
   1796	 */
   1797	if (status != VMW_BALLOON_SUCCESS) {
   1798		spin_unlock(&b->comm_lock);
   1799		ret = -EBUSY;
   1800		goto out_unlock;
   1801	}
   1802
   1803	/*
   1804	 * The page is isolated, so it is safe to delete it without holding
   1805	 * @pages_lock . We keep holding @comm_lock since we will need it in a
   1806	 * second.
   1807	 */
   1808	balloon_page_delete(page);
   1809
   1810	put_page(page);
   1811
   1812	/* Inflate */
   1813	vmballoon_add_page(b, 0, newpage);
   1814	status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
   1815				   VMW_BALLOON_INFLATE);
   1816
   1817	if (status == VMW_BALLOON_SUCCESS)
   1818		status = vmballoon_status_page(b, 0, &newpage);
   1819
   1820	spin_unlock(&b->comm_lock);
   1821
   1822	if (status != VMW_BALLOON_SUCCESS) {
   1823		/*
   1824		 * A failure happened. While we can deflate the page we just
   1825		 * inflated, this deflation can also encounter an error. Instead
   1826		 * we will decrease the size of the balloon to reflect the
   1827		 * change and report failure.
   1828		 */
   1829		atomic64_dec(&b->size);
   1830		ret = -EBUSY;
   1831	} else {
   1832		/*
   1833		 * Success. Take a reference for the page, and we will add it to
   1834		 * the list after acquiring the lock.
   1835		 */
   1836		get_page(newpage);
   1837		ret = MIGRATEPAGE_SUCCESS;
   1838	}
   1839
   1840	/* Update the balloon list under the @pages_lock */
   1841	spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
   1842
   1843	/*
   1844	 * On inflation success, we already took a reference for the @newpage.
   1845	 * If we succeed just insert it to the list and update the statistics
   1846	 * under the lock.
   1847	 */
   1848	if (ret == MIGRATEPAGE_SUCCESS) {
   1849		balloon_page_insert(&b->b_dev_info, newpage);
   1850		__count_vm_event(BALLOON_MIGRATE);
   1851	}
   1852
   1853	/*
   1854	 * We deflated successfully, so regardless to the inflation success, we
   1855	 * need to reduce the number of isolated_pages.
   1856	 */
   1857	b->b_dev_info.isolated_pages--;
   1858	spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
   1859
   1860out_unlock:
   1861	up_read(&b->conf_sem);
   1862	return ret;
   1863}
   1864
   1865/**
   1866 * vmballoon_compaction_deinit() - removes compaction related data.
   1867 *
   1868 * @b: pointer to the balloon.
   1869 */
   1870static void vmballoon_compaction_deinit(struct vmballoon *b)
   1871{
   1872	if (!IS_ERR(b->b_dev_info.inode))
   1873		iput(b->b_dev_info.inode);
   1874
   1875	b->b_dev_info.inode = NULL;
   1876	kern_unmount(vmballoon_mnt);
   1877	vmballoon_mnt = NULL;
   1878}
   1879
   1880/**
   1881 * vmballoon_compaction_init() - initialized compaction for the balloon.
   1882 *
   1883 * @b: pointer to the balloon.
   1884 *
   1885 * If during the initialization a failure occurred, this function does not
   1886 * perform cleanup. The caller must call vmballoon_compaction_deinit() in this
   1887 * case.
   1888 *
   1889 * Return: zero on success or error code on failure.
   1890 */
   1891static __init int vmballoon_compaction_init(struct vmballoon *b)
   1892{
   1893	vmballoon_mnt = kern_mount(&vmballoon_fs);
   1894	if (IS_ERR(vmballoon_mnt))
   1895		return PTR_ERR(vmballoon_mnt);
   1896
   1897	b->b_dev_info.migratepage = vmballoon_migratepage;
   1898	b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb);
   1899
   1900	if (IS_ERR(b->b_dev_info.inode))
   1901		return PTR_ERR(b->b_dev_info.inode);
   1902
   1903	b->b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
   1904	return 0;
   1905}
   1906
   1907#else /* CONFIG_BALLOON_COMPACTION */
   1908
   1909static void vmballoon_compaction_deinit(struct vmballoon *b)
   1910{
   1911}
   1912
   1913static int vmballoon_compaction_init(struct vmballoon *b)
   1914{
   1915	return 0;
   1916}
   1917
   1918#endif /* CONFIG_BALLOON_COMPACTION */
   1919
   1920static int __init vmballoon_init(void)
   1921{
   1922	int error;
   1923
   1924	/*
   1925	 * Check if we are running on VMware's hypervisor and bail out
   1926	 * if we are not.
   1927	 */
   1928	if (x86_hyper_type != X86_HYPER_VMWARE)
   1929		return -ENODEV;
   1930
   1931	INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
   1932
   1933	error = vmballoon_register_shrinker(&balloon);
   1934	if (error)
   1935		goto fail;
   1936
   1937	/*
   1938	 * Initialization of compaction must be done after the call to
   1939	 * balloon_devinfo_init() .
   1940	 */
   1941	balloon_devinfo_init(&balloon.b_dev_info);
   1942	error = vmballoon_compaction_init(&balloon);
   1943	if (error)
   1944		goto fail;
   1945
   1946	INIT_LIST_HEAD(&balloon.huge_pages);
   1947	spin_lock_init(&balloon.comm_lock);
   1948	init_rwsem(&balloon.conf_sem);
   1949	balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
   1950	balloon.batch_page = NULL;
   1951	balloon.page = NULL;
   1952	balloon.reset_required = true;
   1953
   1954	queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
   1955
   1956	vmballoon_debugfs_init(&balloon);
   1957
   1958	return 0;
   1959fail:
   1960	vmballoon_unregister_shrinker(&balloon);
   1961	vmballoon_compaction_deinit(&balloon);
   1962	return error;
   1963}
   1964
   1965/*
   1966 * Using late_initcall() instead of module_init() allows the balloon to use the
   1967 * VMCI doorbell even when the balloon is built into the kernel. Otherwise the
   1968 * VMCI is probed only after the balloon is initialized. If the balloon is used
   1969 * as a module, late_initcall() is equivalent to module_init().
   1970 */
   1971late_initcall(vmballoon_init);
   1972
   1973static void __exit vmballoon_exit(void)
   1974{
   1975	vmballoon_unregister_shrinker(&balloon);
   1976	vmballoon_vmci_cleanup(&balloon);
   1977	cancel_delayed_work_sync(&balloon.dwork);
   1978
   1979	vmballoon_debugfs_exit(&balloon);
   1980
   1981	/*
   1982	 * Deallocate all reserved memory, and reset connection with monitor.
   1983	 * Reset connection before deallocating memory to avoid potential for
   1984	 * additional spurious resets from guest touching deallocated pages.
   1985	 */
   1986	vmballoon_send_start(&balloon, 0);
   1987	vmballoon_pop(&balloon);
   1988
   1989	/* Only once we popped the balloon, compaction can be deinit */
   1990	vmballoon_compaction_deinit(&balloon);
   1991}
   1992module_exit(vmballoon_exit);