cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vas.c (28434B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * Copyright 2020-21 IBM Corp.
      4 */
      5
      6#define pr_fmt(fmt) "vas: " fmt
      7
      8#include <linux/module.h>
      9#include <linux/kernel.h>
     10#include <linux/export.h>
     11#include <linux/types.h>
     12#include <linux/delay.h>
     13#include <linux/slab.h>
     14#include <linux/interrupt.h>
     15#include <linux/irqdomain.h>
     16#include <asm/machdep.h>
     17#include <asm/hvcall.h>
     18#include <asm/plpar_wrappers.h>
     19#include <asm/vas.h>
     20#include "vas.h"
     21
     22#define VAS_INVALID_WIN_ADDRESS	0xFFFFFFFFFFFFFFFFul
     23#define VAS_DEFAULT_DOMAIN_ID	0xFFFFFFFFFFFFFFFFul
     24/* The hypervisor allows one credit per window right now */
     25#define DEF_WIN_CREDS		1
     26
     27static struct vas_all_caps caps_all;
     28static bool copypaste_feat;
     29static struct hv_vas_cop_feat_caps hv_cop_caps;
     30
     31static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
     32static DEFINE_MUTEX(vas_pseries_mutex);
     33static bool migration_in_progress;
     34
     35static long hcall_return_busy_check(long rc)
     36{
     37	/* Check if we are stalled for some time */
     38	if (H_IS_LONG_BUSY(rc)) {
     39		msleep(get_longbusy_msecs(rc));
     40		rc = H_BUSY;
     41	} else if (rc == H_BUSY) {
     42		cond_resched();
     43	}
     44
     45	return rc;
     46}
     47
     48/*
     49 * Allocate VAS window hcall
     50 */
     51static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
     52				     u8 wintype, u16 credits)
     53{
     54	long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
     55	long rc;
     56
     57	do {
     58		rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
     59				  credits, domain[0], domain[1], domain[2],
     60				  domain[3], domain[4], domain[5]);
     61
     62		rc = hcall_return_busy_check(rc);
     63	} while (rc == H_BUSY);
     64
     65	if (rc == H_SUCCESS) {
     66		if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
     67			pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
     68			return -ENOTSUPP;
     69		}
     70		win->vas_win.winid = retbuf[0];
     71		win->win_addr = retbuf[1];
     72		win->complete_irq = retbuf[2];
     73		win->fault_irq = retbuf[3];
     74		return 0;
     75	}
     76
     77	pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
     78		rc, wintype, credits);
     79
     80	return -EIO;
     81}
     82
     83/*
     84 * Deallocate VAS window hcall.
     85 */
     86static int h_deallocate_vas_window(u64 winid)
     87{
     88	long rc;
     89
     90	do {
     91		rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
     92
     93		rc = hcall_return_busy_check(rc);
     94	} while (rc == H_BUSY);
     95
     96	if (rc == H_SUCCESS)
     97		return 0;
     98
     99	pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
    100		rc, winid);
    101	return -EIO;
    102}
    103
    104/*
    105 * Modify VAS window.
    106 * After the window is opened with allocate window hcall, configure it
    107 * with flags and LPAR PID before using.
    108 */
    109static int h_modify_vas_window(struct pseries_vas_window *win)
    110{
    111	long rc;
    112
    113	/*
    114	 * AMR value is not supported in Linux VAS implementation.
    115	 * The hypervisor ignores it if 0 is passed.
    116	 */
    117	do {
    118		rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
    119					win->vas_win.winid, win->pid, 0,
    120					VAS_MOD_WIN_FLAGS, 0);
    121
    122		rc = hcall_return_busy_check(rc);
    123	} while (rc == H_BUSY);
    124
    125	if (rc == H_SUCCESS)
    126		return 0;
    127
    128	pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n",
    129			rc, win->vas_win.winid, win->pid);
    130	return -EIO;
    131}
    132
    133/*
    134 * This hcall is used to determine the capabilities from the hypervisor.
    135 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
    136 * @query_type: If 0 is passed, the hypervisor returns the overall
    137 *		capabilities which provides all feature(s) that are
    138 *		available. Then query the hypervisor to get the
    139 *		corresponding capabilities for the specific feature.
    140 *		Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
    141 *			and VAS GZIP Default capabilities.
    142 *			H_QUERY_NX_CAPABILITIES provides NX GZIP
    143 *			capabilities.
    144 * @result: Return buffer to save capabilities.
    145 */
    146int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
    147{
    148	long rc;
    149
    150	rc = plpar_hcall_norets(hcall, query_type, result);
    151
    152	if (rc == H_SUCCESS)
    153		return 0;
    154
    155	/* H_FUNCTION means HV does not support VAS so don't print an error */
    156	if (rc != H_FUNCTION) {
    157		pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n",
    158			(hcall == H_QUERY_VAS_CAPABILITIES) ?
    159				"H_QUERY_VAS_CAPABILITIES" :
    160				"H_QUERY_NX_CAPABILITIES",
    161			rc, query_type, result);
    162	}
    163
    164	return -EIO;
    165}
    166EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
    167
    168/*
    169 * hcall to get fault CRB from the hypervisor.
    170 */
    171static int h_get_nx_fault(u32 winid, u64 buffer)
    172{
    173	long rc;
    174
    175	rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
    176
    177	if (rc == H_SUCCESS)
    178		return 0;
    179
    180	pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
    181		rc, winid, buffer);
    182	return -EIO;
    183
    184}
    185
    186/*
    187 * Handle the fault interrupt.
    188 * When the fault interrupt is received for each window, query the
    189 * hypervisor to get the fault CRB on the specific fault. Then
    190 * process the CRB by updating CSB or send signal if the user space
    191 * CSB is invalid.
    192 * Note: The hypervisor forwards an interrupt for each fault request.
    193 *	So one fault CRB to process for each H_GET_NX_FAULT hcall.
    194 */
    195static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
    196{
    197	struct pseries_vas_window *txwin = data;
    198	struct coprocessor_request_block crb;
    199	struct vas_user_win_ref *tsk_ref;
    200	int rc;
    201
    202	rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
    203	if (!rc) {
    204		tsk_ref = &txwin->vas_win.task_ref;
    205		vas_dump_crb(&crb);
    206		vas_update_csb(&crb, tsk_ref);
    207	}
    208
    209	return IRQ_HANDLED;
    210}
    211
    212/*
    213 * Allocate window and setup IRQ mapping.
    214 */
    215static int allocate_setup_window(struct pseries_vas_window *txwin,
    216				 u64 *domain, u8 wintype)
    217{
    218	int rc;
    219
    220	rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
    221	if (rc)
    222		return rc;
    223	/*
    224	 * On PowerVM, the hypervisor setup and forwards the fault
    225	 * interrupt per window. So the IRQ setup and fault handling
    226	 * will be done for each open window separately.
    227	 */
    228	txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
    229	if (!txwin->fault_virq) {
    230		pr_err("Failed irq mapping %d\n", txwin->fault_irq);
    231		rc = -EINVAL;
    232		goto out_win;
    233	}
    234
    235	txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
    236				txwin->vas_win.winid);
    237	if (!txwin->name) {
    238		rc = -ENOMEM;
    239		goto out_irq;
    240	}
    241
    242	rc = request_threaded_irq(txwin->fault_virq, NULL,
    243				  pseries_vas_fault_thread_fn, IRQF_ONESHOT,
    244				  txwin->name, txwin);
    245	if (rc) {
    246		pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
    247		       txwin->vas_win.winid, txwin->fault_virq, rc);
    248		goto out_free;
    249	}
    250
    251	txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
    252
    253	return 0;
    254out_free:
    255	kfree(txwin->name);
    256out_irq:
    257	irq_dispose_mapping(txwin->fault_virq);
    258out_win:
    259	h_deallocate_vas_window(txwin->vas_win.winid);
    260	return rc;
    261}
    262
    263static inline void free_irq_setup(struct pseries_vas_window *txwin)
    264{
    265	free_irq(txwin->fault_virq, txwin);
    266	kfree(txwin->name);
    267	irq_dispose_mapping(txwin->fault_virq);
    268}
    269
    270static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
    271					      enum vas_cop_type cop_type)
    272{
    273	long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
    274	struct vas_cop_feat_caps *cop_feat_caps;
    275	struct vas_caps *caps;
    276	struct pseries_vas_window *txwin;
    277	int rc;
    278
    279	txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
    280	if (!txwin)
    281		return ERR_PTR(-ENOMEM);
    282
    283	/*
    284	 * A VAS window can have many credits which means that many
    285	 * requests can be issued simultaneously. But the hypervisor
    286	 * restricts one credit per window.
    287	 * The hypervisor introduces 2 different types of credits:
    288	 * Default credit type (Uses normal priority FIFO):
    289	 *	A limited number of credits are assigned to partitions
    290	 *	based on processor entitlement. But these credits may be
    291	 *	over-committed on a system depends on whether the CPUs
    292	 *	are in shared or dedicated modes - that is, more requests
    293	 *	may be issued across the system than NX can service at
    294	 *	once which can result in paste command failure (RMA_busy).
    295	 *	Then the process has to resend requests or fall-back to
    296	 *	SW compression.
    297	 * Quality of Service (QoS) credit type (Uses high priority FIFO):
    298	 *	To avoid NX HW contention, the system admins can assign
    299	 *	QoS credits for each LPAR so that this partition is
    300	 *	guaranteed access to NX resources. These credits are
    301	 *	assigned to partitions via the HMC.
    302	 *	Refer PAPR for more information.
    303	 *
    304	 * Allocate window with QoS credits if user requested. Otherwise
    305	 * default credits are used.
    306	 */
    307	if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
    308		caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
    309	else
    310		caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
    311
    312	cop_feat_caps = &caps->caps;
    313
    314	if (atomic_inc_return(&cop_feat_caps->nr_used_credits) >
    315			atomic_read(&cop_feat_caps->nr_total_credits)) {
    316		pr_err("Credits are not available to allocate window\n");
    317		rc = -EINVAL;
    318		goto out;
    319	}
    320
    321	if (vas_id == -1) {
    322		/*
    323		 * The user space is requesting to allocate a window on
    324		 * a VAS instance where the process is executing.
    325		 * On PowerVM, domain values are passed to the hypervisor
    326		 * to select VAS instance. Useful if the process is
    327		 * affinity to NUMA node.
    328		 * The hypervisor selects VAS instance if
    329		 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
    330		 * The h_allocate_vas_window hcall is defined to take a
    331		 * domain values as specified by h_home_node_associativity,
    332		 * So no unpacking needs to be done.
    333		 */
    334		rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
    335				  VPHN_FLAG_VCPU, smp_processor_id());
    336		if (rc != H_SUCCESS) {
    337			pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
    338			goto out;
    339		}
    340	}
    341
    342	txwin->pid = mfspr(SPRN_PID);
    343
    344	/*
    345	 * Allocate / Deallocate window hcalls and setup / free IRQs
    346	 * have to be protected with mutex.
    347	 * Open VAS window: Allocate window hcall and setup IRQ
    348	 * Close VAS window: Deallocate window hcall and free IRQ
    349	 *	The hypervisor waits until all NX requests are
    350	 *	completed before closing the window. So expects OS
    351	 *	to handle NX faults, means IRQ can be freed only
    352	 *	after the deallocate window hcall is returned.
    353	 * So once the window is closed with deallocate hcall before
    354	 * the IRQ is freed, it can be assigned to new allocate
    355	 * hcall with the same fault IRQ by the hypervisor. It can
    356	 * result in setup IRQ fail for the new window since the
    357	 * same fault IRQ is not freed by the OS before.
    358	 */
    359	mutex_lock(&vas_pseries_mutex);
    360	if (migration_in_progress)
    361		rc = -EBUSY;
    362	else
    363		rc = allocate_setup_window(txwin, (u64 *)&domain[0],
    364				   cop_feat_caps->win_type);
    365	mutex_unlock(&vas_pseries_mutex);
    366	if (rc)
    367		goto out;
    368
    369	/*
    370	 * Modify window and it is ready to use.
    371	 */
    372	rc = h_modify_vas_window(txwin);
    373	if (!rc)
    374		rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
    375	if (rc)
    376		goto out_free;
    377
    378	txwin->win_type = cop_feat_caps->win_type;
    379	mutex_lock(&vas_pseries_mutex);
    380	/*
    381	 * Possible to lose the acquired credit with DLPAR core
    382	 * removal after the window is opened. So if there are any
    383	 * closed windows (means with lost credits), do not give new
    384	 * window to user space. New windows will be opened only
    385	 * after the existing windows are reopened when credits are
    386	 * available.
    387	 */
    388	if (!caps->nr_close_wins) {
    389		list_add(&txwin->win_list, &caps->list);
    390		caps->nr_open_windows++;
    391		mutex_unlock(&vas_pseries_mutex);
    392		vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
    393		return &txwin->vas_win;
    394	}
    395	mutex_unlock(&vas_pseries_mutex);
    396
    397	put_vas_user_win_ref(&txwin->vas_win.task_ref);
    398	rc = -EBUSY;
    399	pr_err("No credit is available to allocate window\n");
    400
    401out_free:
    402	/*
    403	 * Window is not operational. Free IRQ before closing
    404	 * window so that do not have to hold mutex.
    405	 */
    406	free_irq_setup(txwin);
    407	h_deallocate_vas_window(txwin->vas_win.winid);
    408out:
    409	atomic_dec(&cop_feat_caps->nr_used_credits);
    410	kfree(txwin);
    411	return ERR_PTR(rc);
    412}
    413
    414static u64 vas_paste_address(struct vas_window *vwin)
    415{
    416	struct pseries_vas_window *win;
    417
    418	win = container_of(vwin, struct pseries_vas_window, vas_win);
    419	return win->win_addr;
    420}
    421
    422static int deallocate_free_window(struct pseries_vas_window *win)
    423{
    424	int rc = 0;
    425
    426	/*
    427	 * The hypervisor waits for all requests including faults
    428	 * are processed before closing the window - Means all
    429	 * credits have to be returned. In the case of fault
    430	 * request, a credit is returned after OS issues
    431	 * H_GET_NX_FAULT hcall.
    432	 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
    433	 * hcall.
    434	 */
    435	rc = h_deallocate_vas_window(win->vas_win.winid);
    436	if (!rc)
    437		free_irq_setup(win);
    438
    439	return rc;
    440}
    441
    442static int vas_deallocate_window(struct vas_window *vwin)
    443{
    444	struct pseries_vas_window *win;
    445	struct vas_cop_feat_caps *caps;
    446	int rc = 0;
    447
    448	if (!vwin)
    449		return -EINVAL;
    450
    451	win = container_of(vwin, struct pseries_vas_window, vas_win);
    452
    453	/* Should not happen */
    454	if (win->win_type >= VAS_MAX_FEAT_TYPE) {
    455		pr_err("Window (%u): Invalid window type %u\n",
    456				vwin->winid, win->win_type);
    457		return -EINVAL;
    458	}
    459
    460	caps = &vascaps[win->win_type].caps;
    461	mutex_lock(&vas_pseries_mutex);
    462	/*
    463	 * VAS window is already closed in the hypervisor when
    464	 * lost the credit or with migration. So just remove the entry
    465	 * from the list, remove task references and free vas_window
    466	 * struct.
    467	 */
    468	if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
    469		!(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
    470		rc = deallocate_free_window(win);
    471		if (rc) {
    472			mutex_unlock(&vas_pseries_mutex);
    473			return rc;
    474		}
    475	} else
    476		vascaps[win->win_type].nr_close_wins--;
    477
    478	list_del(&win->win_list);
    479	atomic_dec(&caps->nr_used_credits);
    480	vascaps[win->win_type].nr_open_windows--;
    481	mutex_unlock(&vas_pseries_mutex);
    482
    483	put_vas_user_win_ref(&vwin->task_ref);
    484	mm_context_remove_vas_window(vwin->task_ref.mm);
    485
    486	kfree(win);
    487	return 0;
    488}
    489
    490static const struct vas_user_win_ops vops_pseries = {
    491	.open_win	= vas_allocate_window,	/* Open and configure window */
    492	.paste_addr	= vas_paste_address,	/* To do copy/paste */
    493	.close_win	= vas_deallocate_window, /* Close window */
    494};
    495
    496/*
    497 * Supporting only nx-gzip coprocessor type now, but this API code
    498 * extended to other coprocessor types later.
    499 */
    500int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
    501			     const char *name)
    502{
    503	int rc;
    504
    505	if (!copypaste_feat)
    506		return -ENOTSUPP;
    507
    508	rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
    509
    510	return rc;
    511}
    512EXPORT_SYMBOL_GPL(vas_register_api_pseries);
    513
    514void vas_unregister_api_pseries(void)
    515{
    516	vas_unregister_coproc_api();
    517}
    518EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
    519
    520/*
    521 * Get the specific capabilities based on the feature type.
    522 * Right now supports GZIP default and GZIP QoS capabilities.
    523 */
    524static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
    525				struct hv_vas_cop_feat_caps *hv_caps)
    526{
    527	struct vas_cop_feat_caps *caps;
    528	struct vas_caps *vcaps;
    529	int rc = 0;
    530
    531	vcaps = &vascaps[type];
    532	memset(vcaps, 0, sizeof(*vcaps));
    533	INIT_LIST_HEAD(&vcaps->list);
    534
    535	vcaps->feat = feat;
    536	caps = &vcaps->caps;
    537
    538	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
    539					  (u64)virt_to_phys(hv_caps));
    540	if (rc)
    541		return rc;
    542
    543	caps->user_mode = hv_caps->user_mode;
    544	if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
    545		pr_err("User space COPY/PASTE is not supported\n");
    546		return -ENOTSUPP;
    547	}
    548
    549	caps->descriptor = be64_to_cpu(hv_caps->descriptor);
    550	caps->win_type = hv_caps->win_type;
    551	if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
    552		pr_err("Unsupported window type %u\n", caps->win_type);
    553		return -EINVAL;
    554	}
    555	caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
    556	caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
    557	atomic_set(&caps->nr_total_credits,
    558		   be16_to_cpu(hv_caps->target_lpar_creds));
    559	if (feat == VAS_GZIP_DEF_FEAT) {
    560		caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
    561
    562		if (caps->max_win_creds < DEF_WIN_CREDS) {
    563			pr_err("Window creds(%u) > max allowed window creds(%u)\n",
    564			       DEF_WIN_CREDS, caps->max_win_creds);
    565			return -EINVAL;
    566		}
    567	}
    568
    569	rc = sysfs_add_vas_caps(caps);
    570	if (rc)
    571		return rc;
    572
    573	copypaste_feat = true;
    574
    575	return 0;
    576}
    577
    578/*
    579 * VAS windows can be closed due to lost credits when the core is
    580 * removed. So reopen them if credits are available due to DLPAR
    581 * core add and set the window active status. When NX sees the page
    582 * fault on the unmapped paste address, the kernel handles the fault
    583 * by setting the remapping to new paste address if the window is
    584 * active.
    585 */
    586static int reconfig_open_windows(struct vas_caps *vcaps, int creds,
    587				 bool migrate)
    588{
    589	long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
    590	struct vas_cop_feat_caps *caps = &vcaps->caps;
    591	struct pseries_vas_window *win = NULL, *tmp;
    592	int rc, mv_ents = 0;
    593	int flag;
    594
    595	/*
    596	 * Nothing to do if there are no closed windows.
    597	 */
    598	if (!vcaps->nr_close_wins)
    599		return 0;
    600
    601	/*
    602	 * For the core removal, the hypervisor reduces the credits
    603	 * assigned to the LPAR and the kernel closes VAS windows
    604	 * in the hypervisor depends on reduced credits. The kernel
    605	 * uses LIFO (the last windows that are opened will be closed
    606	 * first) and expects to open in the same order when credits
    607	 * are available.
    608	 * For example, 40 windows are closed when the LPAR lost 2 cores
    609	 * (dedicated). If 1 core is added, this LPAR can have 20 more
    610	 * credits. It means the kernel can reopen 20 windows. So move
    611	 * 20 entries in the VAS windows lost and reopen next 20 windows.
    612	 * For partition migration, reopen all windows that are closed
    613	 * during resume.
    614	 */
    615	if ((vcaps->nr_close_wins > creds) && !migrate)
    616		mv_ents = vcaps->nr_close_wins - creds;
    617
    618	list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) {
    619		if (!mv_ents)
    620			break;
    621
    622		mv_ents--;
    623	}
    624
    625	/*
    626	 * Open windows if they are closed only with migration or
    627	 * DLPAR (lost credit) before.
    628	 */
    629	if (migrate)
    630		flag = VAS_WIN_MIGRATE_CLOSE;
    631	else
    632		flag = VAS_WIN_NO_CRED_CLOSE;
    633
    634	list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) {
    635		/*
    636		 * This window is closed with DLPAR and migration events.
    637		 * So reopen the window with the last event.
    638		 * The user space is not suspended with the current
    639		 * migration notifier. So the user space can issue DLPAR
    640		 * CPU hotplug while migration in progress. In this case
    641		 * this window will be opened with the last event.
    642		 */
    643		if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
    644			(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
    645			win->vas_win.status &= ~flag;
    646			continue;
    647		}
    648
    649		/*
    650		 * Nothing to do on this window if it is not closed
    651		 * with this flag
    652		 */
    653		if (!(win->vas_win.status & flag))
    654			continue;
    655
    656		rc = allocate_setup_window(win, (u64 *)&domain[0],
    657					   caps->win_type);
    658		if (rc)
    659			return rc;
    660
    661		rc = h_modify_vas_window(win);
    662		if (rc)
    663			goto out;
    664
    665		mutex_lock(&win->vas_win.task_ref.mmap_mutex);
    666		/*
    667		 * Set window status to active
    668		 */
    669		win->vas_win.status &= ~flag;
    670		mutex_unlock(&win->vas_win.task_ref.mmap_mutex);
    671		win->win_type = caps->win_type;
    672		if (!--vcaps->nr_close_wins)
    673			break;
    674	}
    675
    676	return 0;
    677out:
    678	/*
    679	 * Window modify HCALL failed. So close the window to the
    680	 * hypervisor and return.
    681	 */
    682	free_irq_setup(win);
    683	h_deallocate_vas_window(win->vas_win.winid);
    684	return rc;
    685}
    686
    687/*
    688 * The hypervisor reduces the available credits if the LPAR lost core. It
    689 * means the excessive windows should not be active and the user space
    690 * should not be using these windows to send compression requests to NX.
    691 * So the kernel closes the excessive windows and unmap the paste address
    692 * such that the user space receives paste instruction failure. Then up to
    693 * the user space to fall back to SW compression and manage with the
    694 * existing windows.
    695 */
    696static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
    697									bool migrate)
    698{
    699	struct pseries_vas_window *win, *tmp;
    700	struct vas_user_win_ref *task_ref;
    701	struct vm_area_struct *vma;
    702	int rc = 0, flag;
    703
    704	if (migrate)
    705		flag = VAS_WIN_MIGRATE_CLOSE;
    706	else
    707		flag = VAS_WIN_NO_CRED_CLOSE;
    708
    709	list_for_each_entry_safe(win, tmp, &vcap->list, win_list) {
    710		/*
    711		 * This window is already closed due to lost credit
    712		 * or for migration before. Go for next window.
    713		 * For migration, nothing to do since this window
    714		 * closed for DLPAR and will be reopened even on
    715		 * the destination system with other DLPAR operation.
    716		 */
    717		if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) ||
    718			(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) {
    719			win->vas_win.status |= flag;
    720			continue;
    721		}
    722
    723		task_ref = &win->vas_win.task_ref;
    724		mutex_lock(&task_ref->mmap_mutex);
    725		vma = task_ref->vma;
    726		/*
    727		 * Number of available credits are reduced, So select
    728		 * and close windows.
    729		 */
    730		win->vas_win.status |= flag;
    731
    732		mmap_write_lock(task_ref->mm);
    733		/*
    734		 * vma is set in the original mapping. But this mapping
    735		 * is done with mmap() after the window is opened with ioctl.
    736		 * so we may not see the original mapping if the core remove
    737		 * is done before the original mmap() and after the ioctl.
    738		 */
    739		if (vma)
    740			zap_page_range(vma, vma->vm_start,
    741					vma->vm_end - vma->vm_start);
    742
    743		mmap_write_unlock(task_ref->mm);
    744		mutex_unlock(&task_ref->mmap_mutex);
    745		/*
    746		 * Close VAS window in the hypervisor, but do not
    747		 * free vas_window struct since it may be reused
    748		 * when the credit is available later (DLPAR with
    749		 * adding cores). This struct will be used
    750		 * later when the process issued with close(FD).
    751		 */
    752		rc = deallocate_free_window(win);
    753		/*
    754		 * This failure is from the hypervisor.
    755		 * No way to stop migration for these failures.
    756		 * So ignore error and continue closing other windows.
    757		 */
    758		if (rc && !migrate)
    759			return rc;
    760
    761		vcap->nr_close_wins++;
    762
    763		/*
    764		 * For migration, do not depend on lpar_creds in case if
    765		 * mismatch with the hypervisor value (should not happen).
    766		 * So close all active windows in the list and will be
    767		 * reopened windows based on the new lpar_creds on the
    768		 * destination system during resume.
    769		 */
    770		if (!migrate && !--excess_creds)
    771			break;
    772	}
    773
    774	return 0;
    775}
    776
    777/*
    778 * Get new VAS capabilities when the core add/removal configuration
    779 * changes. Reconfig window configurations based on the credits
    780 * availability from this new capabilities.
    781 */
    782int vas_reconfig_capabilties(u8 type, int new_nr_creds)
    783{
    784	struct vas_cop_feat_caps *caps;
    785	int old_nr_creds;
    786	struct vas_caps *vcaps;
    787	int rc = 0, nr_active_wins;
    788
    789	if (type >= VAS_MAX_FEAT_TYPE) {
    790		pr_err("Invalid credit type %d\n", type);
    791		return -EINVAL;
    792	}
    793
    794	vcaps = &vascaps[type];
    795	caps = &vcaps->caps;
    796
    797	mutex_lock(&vas_pseries_mutex);
    798
    799	old_nr_creds = atomic_read(&caps->nr_total_credits);
    800
    801	atomic_set(&caps->nr_total_credits, new_nr_creds);
    802	/*
    803	 * The total number of available credits may be decreased or
    804	 * increased with DLPAR operation. Means some windows have to be
    805	 * closed / reopened. Hold the vas_pseries_mutex so that the
    806	 * the user space can not open new windows.
    807	 */
    808	if (old_nr_creds <  new_nr_creds) {
    809		/*
    810		 * If the existing target credits is less than the new
    811		 * target, reopen windows if they are closed due to
    812		 * the previous DLPAR (core removal).
    813		 */
    814		rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds,
    815					   false);
    816	} else {
    817		/*
    818		 * # active windows is more than new LPAR available
    819		 * credits. So close the excessive windows.
    820		 * On pseries, each window will have 1 credit.
    821		 */
    822		nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins;
    823		if (nr_active_wins > new_nr_creds)
    824			rc = reconfig_close_windows(vcaps,
    825					nr_active_wins - new_nr_creds,
    826					false);
    827	}
    828
    829	mutex_unlock(&vas_pseries_mutex);
    830	return rc;
    831}
    832/*
    833 * Total number of default credits available (target_credits)
    834 * in LPAR depends on number of cores configured. It varies based on
    835 * whether processors are in shared mode or dedicated mode.
    836 * Get the notifier when CPU configuration is changed with DLPAR
    837 * operation so that get the new target_credits (vas default capabilities)
    838 * and then update the existing windows usage if needed.
    839 */
    840static int pseries_vas_notifier(struct notifier_block *nb,
    841				unsigned long action, void *data)
    842{
    843	struct of_reconfig_data *rd = data;
    844	struct device_node *dn = rd->dn;
    845	const __be32 *intserv = NULL;
    846	int new_nr_creds, len, rc = 0;
    847
    848	if ((action == OF_RECONFIG_ATTACH_NODE) ||
    849		(action == OF_RECONFIG_DETACH_NODE))
    850		intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
    851					  &len);
    852	/*
    853	 * Processor config is not changed
    854	 */
    855	if (!intserv)
    856		return NOTIFY_OK;
    857
    858	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
    859					vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
    860					(u64)virt_to_phys(&hv_cop_caps));
    861	if (!rc) {
    862		new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
    863		rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE,
    864						new_nr_creds);
    865	}
    866
    867	if (rc)
    868		pr_err("Failed reconfig VAS capabilities with DLPAR\n");
    869
    870	return rc;
    871}
    872
    873static struct notifier_block pseries_vas_nb = {
    874	.notifier_call = pseries_vas_notifier,
    875};
    876
    877/*
    878 * For LPM, all windows have to be closed on the source partition
    879 * before migration and reopen them on the destination partition
    880 * after migration. So closing windows during suspend and
    881 * reopen them during resume.
    882 */
    883int vas_migration_handler(int action)
    884{
    885	struct vas_cop_feat_caps *caps;
    886	int old_nr_creds, new_nr_creds = 0;
    887	struct vas_caps *vcaps;
    888	int i, rc = 0;
    889
    890	/*
    891	 * NX-GZIP is not enabled. Nothing to do for migration.
    892	 */
    893	if (!copypaste_feat)
    894		return rc;
    895
    896	mutex_lock(&vas_pseries_mutex);
    897
    898	if (action == VAS_SUSPEND)
    899		migration_in_progress = true;
    900	else
    901		migration_in_progress = false;
    902
    903	for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) {
    904		vcaps = &vascaps[i];
    905		caps = &vcaps->caps;
    906		old_nr_creds = atomic_read(&caps->nr_total_credits);
    907
    908		rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
    909					      vcaps->feat,
    910					      (u64)virt_to_phys(&hv_cop_caps));
    911		if (!rc) {
    912			new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
    913			/*
    914			 * Should not happen. But incase print messages, close
    915			 * all windows in the list during suspend and reopen
    916			 * windows based on new lpar_creds on the destination
    917			 * system.
    918			 */
    919			if (old_nr_creds != new_nr_creds) {
    920				pr_err("Target credits mismatch with the hypervisor\n");
    921				pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n",
    922					action, old_nr_creds, new_nr_creds);
    923				pr_err("Used creds: %d, Active creds: %d\n",
    924					atomic_read(&caps->nr_used_credits),
    925					vcaps->nr_open_windows - vcaps->nr_close_wins);
    926			}
    927		} else {
    928			pr_err("state(%d): Get VAS capabilities failed with %d\n",
    929				action, rc);
    930			/*
    931			 * We can not stop migration with the current lpm
    932			 * implementation. So continue closing all windows in
    933			 * the list (during suspend) and return without
    934			 * opening windows (during resume) if VAS capabilities
    935			 * HCALL failed.
    936			 */
    937			if (action == VAS_RESUME)
    938				goto out;
    939		}
    940
    941		switch (action) {
    942		case VAS_SUSPEND:
    943			rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows,
    944							true);
    945			break;
    946		case VAS_RESUME:
    947			atomic_set(&caps->nr_total_credits, new_nr_creds);
    948			rc = reconfig_open_windows(vcaps, new_nr_creds, true);
    949			break;
    950		default:
    951			/* should not happen */
    952			pr_err("Invalid migration action %d\n", action);
    953			rc = -EINVAL;
    954			goto out;
    955		}
    956
    957		/*
    958		 * Ignore errors during suspend and return for resume.
    959		 */
    960		if (rc && (action == VAS_RESUME))
    961			goto out;
    962	}
    963
    964out:
    965	mutex_unlock(&vas_pseries_mutex);
    966	return rc;
    967}
    968
    969static int __init pseries_vas_init(void)
    970{
    971	struct hv_vas_all_caps *hv_caps;
    972	int rc = 0;
    973
    974	/*
    975	 * Linux supports user space COPY/PASTE only with Radix
    976	 */
    977	if (!radix_enabled()) {
    978		pr_err("API is supported only with radix page tables\n");
    979		return -ENOTSUPP;
    980	}
    981
    982	hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
    983	if (!hv_caps)
    984		return -ENOMEM;
    985	/*
    986	 * Get VAS overall capabilities by passing 0 to feature type.
    987	 */
    988	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
    989					  (u64)virt_to_phys(hv_caps));
    990	if (rc)
    991		goto out;
    992
    993	caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
    994	caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
    995
    996	sysfs_pseries_vas_init(&caps_all);
    997
    998	/*
    999	 * QOS capabilities available
   1000	 */
   1001	if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
   1002		rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
   1003					  VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps);
   1004
   1005		if (rc)
   1006			goto out;
   1007	}
   1008	/*
   1009	 * Default capabilities available
   1010	 */
   1011	if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT)
   1012		rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
   1013					  VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps);
   1014
   1015	if (!rc && copypaste_feat) {
   1016		if (firmware_has_feature(FW_FEATURE_LPAR))
   1017			of_reconfig_notifier_register(&pseries_vas_nb);
   1018
   1019		pr_info("GZIP feature is available\n");
   1020	} else {
   1021		/*
   1022		 * Should not happen, but only when get default
   1023		 * capabilities HCALL failed. So disable copy paste
   1024		 * feature.
   1025		 */
   1026		copypaste_feat = false;
   1027	}
   1028
   1029out:
   1030	kfree(hv_caps);
   1031	return rc;
   1032}
   1033machine_device_initcall(pseries, pseries_vas_init);