cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

kvm-s390.c (146364B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * hosting IBM Z kernel virtual machines (s390x)
      4 *
      5 * Copyright IBM Corp. 2008, 2020
      6 *
      7 *    Author(s): Carsten Otte <cotte@de.ibm.com>
      8 *               Christian Borntraeger <borntraeger@de.ibm.com>
      9 *               Christian Ehrhardt <ehrhardt@de.ibm.com>
     10 *               Jason J. Herne <jjherne@us.ibm.com>
     11 */
     12
     13#define KMSG_COMPONENT "kvm-s390"
     14#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
     15
     16#include <linux/compiler.h>
     17#include <linux/err.h>
     18#include <linux/fs.h>
     19#include <linux/hrtimer.h>
     20#include <linux/init.h>
     21#include <linux/kvm.h>
     22#include <linux/kvm_host.h>
     23#include <linux/mman.h>
     24#include <linux/module.h>
     25#include <linux/moduleparam.h>
     26#include <linux/random.h>
     27#include <linux/slab.h>
     28#include <linux/timer.h>
     29#include <linux/vmalloc.h>
     30#include <linux/bitmap.h>
     31#include <linux/sched/signal.h>
     32#include <linux/string.h>
     33#include <linux/pgtable.h>
     34
     35#include <asm/asm-offsets.h>
     36#include <asm/lowcore.h>
     37#include <asm/stp.h>
     38#include <asm/gmap.h>
     39#include <asm/nmi.h>
     40#include <asm/switch_to.h>
     41#include <asm/isc.h>
     42#include <asm/sclp.h>
     43#include <asm/cpacf.h>
     44#include <asm/timex.h>
     45#include <asm/ap.h>
     46#include <asm/uv.h>
     47#include <asm/fpu/api.h>
     48#include "kvm-s390.h"
     49#include "gaccess.h"
     50
     51#define CREATE_TRACE_POINTS
     52#include "trace.h"
     53#include "trace-s390.h"
     54
     55#define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
     56#define LOCAL_IRQS 32
     57#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
     58			   (KVM_MAX_VCPUS + LOCAL_IRQS))
     59
     60const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
     61	KVM_GENERIC_VM_STATS(),
     62	STATS_DESC_COUNTER(VM, inject_io),
     63	STATS_DESC_COUNTER(VM, inject_float_mchk),
     64	STATS_DESC_COUNTER(VM, inject_pfault_done),
     65	STATS_DESC_COUNTER(VM, inject_service_signal),
     66	STATS_DESC_COUNTER(VM, inject_virtio)
     67};
     68
     69const struct kvm_stats_header kvm_vm_stats_header = {
     70	.name_size = KVM_STATS_NAME_SIZE,
     71	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
     72	.id_offset = sizeof(struct kvm_stats_header),
     73	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
     74	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
     75		       sizeof(kvm_vm_stats_desc),
     76};
     77
     78const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
     79	KVM_GENERIC_VCPU_STATS(),
     80	STATS_DESC_COUNTER(VCPU, exit_userspace),
     81	STATS_DESC_COUNTER(VCPU, exit_null),
     82	STATS_DESC_COUNTER(VCPU, exit_external_request),
     83	STATS_DESC_COUNTER(VCPU, exit_io_request),
     84	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
     85	STATS_DESC_COUNTER(VCPU, exit_stop_request),
     86	STATS_DESC_COUNTER(VCPU, exit_validity),
     87	STATS_DESC_COUNTER(VCPU, exit_instruction),
     88	STATS_DESC_COUNTER(VCPU, exit_pei),
     89	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
     90	STATS_DESC_COUNTER(VCPU, instruction_lctl),
     91	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
     92	STATS_DESC_COUNTER(VCPU, instruction_stctl),
     93	STATS_DESC_COUNTER(VCPU, instruction_stctg),
     94	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
     95	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
     96	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
     97	STATS_DESC_COUNTER(VCPU, deliver_ckc),
     98	STATS_DESC_COUNTER(VCPU, deliver_cputm),
     99	STATS_DESC_COUNTER(VCPU, deliver_external_call),
    100	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
    101	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
    102	STATS_DESC_COUNTER(VCPU, deliver_virtio),
    103	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
    104	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
    105	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
    106	STATS_DESC_COUNTER(VCPU, deliver_program),
    107	STATS_DESC_COUNTER(VCPU, deliver_io),
    108	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
    109	STATS_DESC_COUNTER(VCPU, exit_wait_state),
    110	STATS_DESC_COUNTER(VCPU, inject_ckc),
    111	STATS_DESC_COUNTER(VCPU, inject_cputm),
    112	STATS_DESC_COUNTER(VCPU, inject_external_call),
    113	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
    114	STATS_DESC_COUNTER(VCPU, inject_mchk),
    115	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
    116	STATS_DESC_COUNTER(VCPU, inject_program),
    117	STATS_DESC_COUNTER(VCPU, inject_restart),
    118	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
    119	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
    120	STATS_DESC_COUNTER(VCPU, instruction_epsw),
    121	STATS_DESC_COUNTER(VCPU, instruction_gs),
    122	STATS_DESC_COUNTER(VCPU, instruction_io_other),
    123	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
    124	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
    125	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
    126	STATS_DESC_COUNTER(VCPU, instruction_ptff),
    127	STATS_DESC_COUNTER(VCPU, instruction_sck),
    128	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
    129	STATS_DESC_COUNTER(VCPU, instruction_stidp),
    130	STATS_DESC_COUNTER(VCPU, instruction_spx),
    131	STATS_DESC_COUNTER(VCPU, instruction_stpx),
    132	STATS_DESC_COUNTER(VCPU, instruction_stap),
    133	STATS_DESC_COUNTER(VCPU, instruction_iske),
    134	STATS_DESC_COUNTER(VCPU, instruction_ri),
    135	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
    136	STATS_DESC_COUNTER(VCPU, instruction_sske),
    137	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
    138	STATS_DESC_COUNTER(VCPU, instruction_stsi),
    139	STATS_DESC_COUNTER(VCPU, instruction_stfl),
    140	STATS_DESC_COUNTER(VCPU, instruction_tb),
    141	STATS_DESC_COUNTER(VCPU, instruction_tpi),
    142	STATS_DESC_COUNTER(VCPU, instruction_tprot),
    143	STATS_DESC_COUNTER(VCPU, instruction_tsch),
    144	STATS_DESC_COUNTER(VCPU, instruction_sie),
    145	STATS_DESC_COUNTER(VCPU, instruction_essa),
    146	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
    147	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
    148	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
    149	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
    150	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
    151	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
    152	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
    153	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
    154	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
    155	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
    156	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
    157	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
    158	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
    159	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
    160	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
    161	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
    162	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
    163	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
    164	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
    165	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
    166	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
    167	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
    168	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
    169	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
    170	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
    171	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
    172	STATS_DESC_COUNTER(VCPU, pfault_sync)
    173};
    174
    175const struct kvm_stats_header kvm_vcpu_stats_header = {
    176	.name_size = KVM_STATS_NAME_SIZE,
    177	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
    178	.id_offset = sizeof(struct kvm_stats_header),
    179	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
    180	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
    181		       sizeof(kvm_vcpu_stats_desc),
    182};
    183
    184/* allow nested virtualization in KVM (if enabled by user space) */
    185static int nested;
    186module_param(nested, int, S_IRUGO);
    187MODULE_PARM_DESC(nested, "Nested virtualization support");
    188
    189/* allow 1m huge page guest backing, if !nested */
    190static int hpage;
    191module_param(hpage, int, 0444);
    192MODULE_PARM_DESC(hpage, "1m huge page backing support");
    193
    194/* maximum percentage of steal time for polling.  >100 is treated like 100 */
    195static u8 halt_poll_max_steal = 10;
    196module_param(halt_poll_max_steal, byte, 0644);
    197MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
    198
    199/* if set to true, the GISA will be initialized and used if available */
    200static bool use_gisa  = true;
    201module_param(use_gisa, bool, 0644);
    202MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
    203
    204/* maximum diag9c forwarding per second */
    205unsigned int diag9c_forwarding_hz;
    206module_param(diag9c_forwarding_hz, uint, 0644);
    207MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
    208
    209/*
    210 * For now we handle at most 16 double words as this is what the s390 base
    211 * kernel handles and stores in the prefix page. If we ever need to go beyond
    212 * this, this requires changes to code, but the external uapi can stay.
    213 */
    214#define SIZE_INTERNAL 16
    215
    216/*
    217 * Base feature mask that defines default mask for facilities. Consists of the
    218 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
    219 */
    220static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
    221/*
    222 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
    223 * and defines the facilities that can be enabled via a cpu model.
    224 */
    225static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
    226
    227static unsigned long kvm_s390_fac_size(void)
    228{
    229	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
    230	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
    231	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
    232		sizeof(stfle_fac_list));
    233
    234	return SIZE_INTERNAL;
    235}
    236
    237/* available cpu features supported by kvm */
    238static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
    239/* available subfunctions indicated via query / "test bit" */
    240static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
    241
    242static struct gmap_notifier gmap_notifier;
    243static struct gmap_notifier vsie_gmap_notifier;
    244debug_info_t *kvm_s390_dbf;
    245debug_info_t *kvm_s390_dbf_uv;
    246
    247/* Section: not file related */
    248int kvm_arch_hardware_enable(void)
    249{
    250	/* every s390 is virtualization enabled ;-) */
    251	return 0;
    252}
    253
    254int kvm_arch_check_processor_compat(void *opaque)
    255{
    256	return 0;
    257}
    258
    259/* forward declarations */
    260static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
    261			      unsigned long end);
    262static int sca_switch_to_extended(struct kvm *kvm);
    263
    264static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
    265{
    266	u8 delta_idx = 0;
    267
    268	/*
    269	 * The TOD jumps by delta, we have to compensate this by adding
    270	 * -delta to the epoch.
    271	 */
    272	delta = -delta;
    273
    274	/* sign-extension - we're adding to signed values below */
    275	if ((s64)delta < 0)
    276		delta_idx = -1;
    277
    278	scb->epoch += delta;
    279	if (scb->ecd & ECD_MEF) {
    280		scb->epdx += delta_idx;
    281		if (scb->epoch < delta)
    282			scb->epdx += 1;
    283	}
    284}
    285
    286/*
    287 * This callback is executed during stop_machine(). All CPUs are therefore
    288 * temporarily stopped. In order not to change guest behavior, we have to
    289 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
    290 * so a CPU won't be stopped while calculating with the epoch.
    291 */
    292static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
    293			  void *v)
    294{
    295	struct kvm *kvm;
    296	struct kvm_vcpu *vcpu;
    297	unsigned long i;
    298	unsigned long long *delta = v;
    299
    300	list_for_each_entry(kvm, &vm_list, vm_list) {
    301		kvm_for_each_vcpu(i, vcpu, kvm) {
    302			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
    303			if (i == 0) {
    304				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
    305				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
    306			}
    307			if (vcpu->arch.cputm_enabled)
    308				vcpu->arch.cputm_start += *delta;
    309			if (vcpu->arch.vsie_block)
    310				kvm_clock_sync_scb(vcpu->arch.vsie_block,
    311						   *delta);
    312		}
    313	}
    314	return NOTIFY_OK;
    315}
    316
    317static struct notifier_block kvm_clock_notifier = {
    318	.notifier_call = kvm_clock_sync,
    319};
    320
    321int kvm_arch_hardware_setup(void *opaque)
    322{
    323	gmap_notifier.notifier_call = kvm_gmap_notifier;
    324	gmap_register_pte_notifier(&gmap_notifier);
    325	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
    326	gmap_register_pte_notifier(&vsie_gmap_notifier);
    327	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
    328				       &kvm_clock_notifier);
    329	return 0;
    330}
    331
    332void kvm_arch_hardware_unsetup(void)
    333{
    334	gmap_unregister_pte_notifier(&gmap_notifier);
    335	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
    336	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
    337					 &kvm_clock_notifier);
    338}
    339
    340static void allow_cpu_feat(unsigned long nr)
    341{
    342	set_bit_inv(nr, kvm_s390_available_cpu_feat);
    343}
    344
    345static inline int plo_test_bit(unsigned char nr)
    346{
    347	unsigned long function = (unsigned long)nr | 0x100;
    348	int cc;
    349
    350	asm volatile(
    351		"	lgr	0,%[function]\n"
    352		/* Parameter registers are ignored for "test bit" */
    353		"	plo	0,0,0,0(0)\n"
    354		"	ipm	%0\n"
    355		"	srl	%0,28\n"
    356		: "=d" (cc)
    357		: [function] "d" (function)
    358		: "cc", "0");
    359	return cc == 0;
    360}
    361
    362static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
    363{
    364	asm volatile(
    365		"	lghi	0,0\n"
    366		"	lgr	1,%[query]\n"
    367		/* Parameter registers are ignored */
    368		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
    369		:
    370		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
    371		: "cc", "memory", "0", "1");
    372}
    373
    374#define INSN_SORTL 0xb938
    375#define INSN_DFLTCC 0xb939
    376
    377static void kvm_s390_cpu_feat_init(void)
    378{
    379	int i;
    380
    381	for (i = 0; i < 256; ++i) {
    382		if (plo_test_bit(i))
    383			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
    384	}
    385
    386	if (test_facility(28)) /* TOD-clock steering */
    387		ptff(kvm_s390_available_subfunc.ptff,
    388		     sizeof(kvm_s390_available_subfunc.ptff),
    389		     PTFF_QAF);
    390
    391	if (test_facility(17)) { /* MSA */
    392		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
    393			      kvm_s390_available_subfunc.kmac);
    394		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
    395			      kvm_s390_available_subfunc.kmc);
    396		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
    397			      kvm_s390_available_subfunc.km);
    398		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
    399			      kvm_s390_available_subfunc.kimd);
    400		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
    401			      kvm_s390_available_subfunc.klmd);
    402	}
    403	if (test_facility(76)) /* MSA3 */
    404		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
    405			      kvm_s390_available_subfunc.pckmo);
    406	if (test_facility(77)) { /* MSA4 */
    407		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
    408			      kvm_s390_available_subfunc.kmctr);
    409		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
    410			      kvm_s390_available_subfunc.kmf);
    411		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
    412			      kvm_s390_available_subfunc.kmo);
    413		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
    414			      kvm_s390_available_subfunc.pcc);
    415	}
    416	if (test_facility(57)) /* MSA5 */
    417		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
    418			      kvm_s390_available_subfunc.ppno);
    419
    420	if (test_facility(146)) /* MSA8 */
    421		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
    422			      kvm_s390_available_subfunc.kma);
    423
    424	if (test_facility(155)) /* MSA9 */
    425		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
    426			      kvm_s390_available_subfunc.kdsa);
    427
    428	if (test_facility(150)) /* SORTL */
    429		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
    430
    431	if (test_facility(151)) /* DFLTCC */
    432		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
    433
    434	if (MACHINE_HAS_ESOP)
    435		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
    436	/*
    437	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
    438	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
    439	 */
    440	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
    441	    !test_facility(3) || !nested)
    442		return;
    443	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
    444	if (sclp.has_64bscao)
    445		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
    446	if (sclp.has_siif)
    447		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
    448	if (sclp.has_gpere)
    449		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
    450	if (sclp.has_gsls)
    451		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
    452	if (sclp.has_ib)
    453		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
    454	if (sclp.has_cei)
    455		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
    456	if (sclp.has_ibs)
    457		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
    458	if (sclp.has_kss)
    459		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
    460	/*
    461	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
    462	 * all skey handling functions read/set the skey from the PGSTE
    463	 * instead of the real storage key.
    464	 *
    465	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
    466	 * pages being detected as preserved although they are resident.
    467	 *
    468	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
    469	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
    470	 *
    471	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
    472	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
    473	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
    474	 *
    475	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
    476	 * cannot easily shadow the SCA because of the ipte lock.
    477	 */
    478}
    479
    480int kvm_arch_init(void *opaque)
    481{
    482	int rc = -ENOMEM;
    483
    484	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
    485	if (!kvm_s390_dbf)
    486		return -ENOMEM;
    487
    488	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
    489	if (!kvm_s390_dbf_uv)
    490		goto out;
    491
    492	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
    493	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
    494		goto out;
    495
    496	kvm_s390_cpu_feat_init();
    497
    498	/* Register floating interrupt controller interface. */
    499	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
    500	if (rc) {
    501		pr_err("A FLIC registration call failed with rc=%d\n", rc);
    502		goto out;
    503	}
    504
    505	rc = kvm_s390_gib_init(GAL_ISC);
    506	if (rc)
    507		goto out;
    508
    509	return 0;
    510
    511out:
    512	kvm_arch_exit();
    513	return rc;
    514}
    515
    516void kvm_arch_exit(void)
    517{
    518	kvm_s390_gib_destroy();
    519	debug_unregister(kvm_s390_dbf);
    520	debug_unregister(kvm_s390_dbf_uv);
    521}
    522
    523/* Section: device related */
    524long kvm_arch_dev_ioctl(struct file *filp,
    525			unsigned int ioctl, unsigned long arg)
    526{
    527	if (ioctl == KVM_S390_ENABLE_SIE)
    528		return s390_enable_sie();
    529	return -EINVAL;
    530}
    531
    532int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
    533{
    534	int r;
    535
    536	switch (ext) {
    537	case KVM_CAP_S390_PSW:
    538	case KVM_CAP_S390_GMAP:
    539	case KVM_CAP_SYNC_MMU:
    540#ifdef CONFIG_KVM_S390_UCONTROL
    541	case KVM_CAP_S390_UCONTROL:
    542#endif
    543	case KVM_CAP_ASYNC_PF:
    544	case KVM_CAP_SYNC_REGS:
    545	case KVM_CAP_ONE_REG:
    546	case KVM_CAP_ENABLE_CAP:
    547	case KVM_CAP_S390_CSS_SUPPORT:
    548	case KVM_CAP_IOEVENTFD:
    549	case KVM_CAP_DEVICE_CTRL:
    550	case KVM_CAP_S390_IRQCHIP:
    551	case KVM_CAP_VM_ATTRIBUTES:
    552	case KVM_CAP_MP_STATE:
    553	case KVM_CAP_IMMEDIATE_EXIT:
    554	case KVM_CAP_S390_INJECT_IRQ:
    555	case KVM_CAP_S390_USER_SIGP:
    556	case KVM_CAP_S390_USER_STSI:
    557	case KVM_CAP_S390_SKEYS:
    558	case KVM_CAP_S390_IRQ_STATE:
    559	case KVM_CAP_S390_USER_INSTR0:
    560	case KVM_CAP_S390_CMMA_MIGRATION:
    561	case KVM_CAP_S390_AIS:
    562	case KVM_CAP_S390_AIS_MIGRATION:
    563	case KVM_CAP_S390_VCPU_RESETS:
    564	case KVM_CAP_SET_GUEST_DEBUG:
    565	case KVM_CAP_S390_DIAG318:
    566	case KVM_CAP_S390_MEM_OP_EXTENSION:
    567		r = 1;
    568		break;
    569	case KVM_CAP_SET_GUEST_DEBUG2:
    570		r = KVM_GUESTDBG_VALID_MASK;
    571		break;
    572	case KVM_CAP_S390_HPAGE_1M:
    573		r = 0;
    574		if (hpage && !kvm_is_ucontrol(kvm))
    575			r = 1;
    576		break;
    577	case KVM_CAP_S390_MEM_OP:
    578		r = MEM_OP_MAX_SIZE;
    579		break;
    580	case KVM_CAP_NR_VCPUS:
    581	case KVM_CAP_MAX_VCPUS:
    582	case KVM_CAP_MAX_VCPU_ID:
    583		r = KVM_S390_BSCA_CPU_SLOTS;
    584		if (!kvm_s390_use_sca_entries())
    585			r = KVM_MAX_VCPUS;
    586		else if (sclp.has_esca && sclp.has_64bscao)
    587			r = KVM_S390_ESCA_CPU_SLOTS;
    588		if (ext == KVM_CAP_NR_VCPUS)
    589			r = min_t(unsigned int, num_online_cpus(), r);
    590		break;
    591	case KVM_CAP_S390_COW:
    592		r = MACHINE_HAS_ESOP;
    593		break;
    594	case KVM_CAP_S390_VECTOR_REGISTERS:
    595		r = MACHINE_HAS_VX;
    596		break;
    597	case KVM_CAP_S390_RI:
    598		r = test_facility(64);
    599		break;
    600	case KVM_CAP_S390_GS:
    601		r = test_facility(133);
    602		break;
    603	case KVM_CAP_S390_BPB:
    604		r = test_facility(82);
    605		break;
    606	case KVM_CAP_S390_PROTECTED:
    607		r = is_prot_virt_host();
    608		break;
    609	default:
    610		r = 0;
    611	}
    612	return r;
    613}
    614
    615void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
    616{
    617	int i;
    618	gfn_t cur_gfn, last_gfn;
    619	unsigned long gaddr, vmaddr;
    620	struct gmap *gmap = kvm->arch.gmap;
    621	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
    622
    623	/* Loop over all guest segments */
    624	cur_gfn = memslot->base_gfn;
    625	last_gfn = memslot->base_gfn + memslot->npages;
    626	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
    627		gaddr = gfn_to_gpa(cur_gfn);
    628		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
    629		if (kvm_is_error_hva(vmaddr))
    630			continue;
    631
    632		bitmap_zero(bitmap, _PAGE_ENTRIES);
    633		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
    634		for (i = 0; i < _PAGE_ENTRIES; i++) {
    635			if (test_bit(i, bitmap))
    636				mark_page_dirty(kvm, cur_gfn + i);
    637		}
    638
    639		if (fatal_signal_pending(current))
    640			return;
    641		cond_resched();
    642	}
    643}
    644
    645/* Section: vm related */
    646static void sca_del_vcpu(struct kvm_vcpu *vcpu);
    647
    648/*
    649 * Get (and clear) the dirty memory log for a memory slot.
    650 */
    651int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
    652			       struct kvm_dirty_log *log)
    653{
    654	int r;
    655	unsigned long n;
    656	struct kvm_memory_slot *memslot;
    657	int is_dirty;
    658
    659	if (kvm_is_ucontrol(kvm))
    660		return -EINVAL;
    661
    662	mutex_lock(&kvm->slots_lock);
    663
    664	r = -EINVAL;
    665	if (log->slot >= KVM_USER_MEM_SLOTS)
    666		goto out;
    667
    668	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
    669	if (r)
    670		goto out;
    671
    672	/* Clear the dirty log */
    673	if (is_dirty) {
    674		n = kvm_dirty_bitmap_bytes(memslot);
    675		memset(memslot->dirty_bitmap, 0, n);
    676	}
    677	r = 0;
    678out:
    679	mutex_unlock(&kvm->slots_lock);
    680	return r;
    681}
    682
    683static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
    684{
    685	unsigned long i;
    686	struct kvm_vcpu *vcpu;
    687
    688	kvm_for_each_vcpu(i, vcpu, kvm) {
    689		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
    690	}
    691}
    692
    693int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
    694{
    695	int r;
    696
    697	if (cap->flags)
    698		return -EINVAL;
    699
    700	switch (cap->cap) {
    701	case KVM_CAP_S390_IRQCHIP:
    702		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
    703		kvm->arch.use_irqchip = 1;
    704		r = 0;
    705		break;
    706	case KVM_CAP_S390_USER_SIGP:
    707		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
    708		kvm->arch.user_sigp = 1;
    709		r = 0;
    710		break;
    711	case KVM_CAP_S390_VECTOR_REGISTERS:
    712		mutex_lock(&kvm->lock);
    713		if (kvm->created_vcpus) {
    714			r = -EBUSY;
    715		} else if (MACHINE_HAS_VX) {
    716			set_kvm_facility(kvm->arch.model.fac_mask, 129);
    717			set_kvm_facility(kvm->arch.model.fac_list, 129);
    718			if (test_facility(134)) {
    719				set_kvm_facility(kvm->arch.model.fac_mask, 134);
    720				set_kvm_facility(kvm->arch.model.fac_list, 134);
    721			}
    722			if (test_facility(135)) {
    723				set_kvm_facility(kvm->arch.model.fac_mask, 135);
    724				set_kvm_facility(kvm->arch.model.fac_list, 135);
    725			}
    726			if (test_facility(148)) {
    727				set_kvm_facility(kvm->arch.model.fac_mask, 148);
    728				set_kvm_facility(kvm->arch.model.fac_list, 148);
    729			}
    730			if (test_facility(152)) {
    731				set_kvm_facility(kvm->arch.model.fac_mask, 152);
    732				set_kvm_facility(kvm->arch.model.fac_list, 152);
    733			}
    734			if (test_facility(192)) {
    735				set_kvm_facility(kvm->arch.model.fac_mask, 192);
    736				set_kvm_facility(kvm->arch.model.fac_list, 192);
    737			}
    738			r = 0;
    739		} else
    740			r = -EINVAL;
    741		mutex_unlock(&kvm->lock);
    742		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
    743			 r ? "(not available)" : "(success)");
    744		break;
    745	case KVM_CAP_S390_RI:
    746		r = -EINVAL;
    747		mutex_lock(&kvm->lock);
    748		if (kvm->created_vcpus) {
    749			r = -EBUSY;
    750		} else if (test_facility(64)) {
    751			set_kvm_facility(kvm->arch.model.fac_mask, 64);
    752			set_kvm_facility(kvm->arch.model.fac_list, 64);
    753			r = 0;
    754		}
    755		mutex_unlock(&kvm->lock);
    756		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
    757			 r ? "(not available)" : "(success)");
    758		break;
    759	case KVM_CAP_S390_AIS:
    760		mutex_lock(&kvm->lock);
    761		if (kvm->created_vcpus) {
    762			r = -EBUSY;
    763		} else {
    764			set_kvm_facility(kvm->arch.model.fac_mask, 72);
    765			set_kvm_facility(kvm->arch.model.fac_list, 72);
    766			r = 0;
    767		}
    768		mutex_unlock(&kvm->lock);
    769		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
    770			 r ? "(not available)" : "(success)");
    771		break;
    772	case KVM_CAP_S390_GS:
    773		r = -EINVAL;
    774		mutex_lock(&kvm->lock);
    775		if (kvm->created_vcpus) {
    776			r = -EBUSY;
    777		} else if (test_facility(133)) {
    778			set_kvm_facility(kvm->arch.model.fac_mask, 133);
    779			set_kvm_facility(kvm->arch.model.fac_list, 133);
    780			r = 0;
    781		}
    782		mutex_unlock(&kvm->lock);
    783		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
    784			 r ? "(not available)" : "(success)");
    785		break;
    786	case KVM_CAP_S390_HPAGE_1M:
    787		mutex_lock(&kvm->lock);
    788		if (kvm->created_vcpus)
    789			r = -EBUSY;
    790		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
    791			r = -EINVAL;
    792		else {
    793			r = 0;
    794			mmap_write_lock(kvm->mm);
    795			kvm->mm->context.allow_gmap_hpage_1m = 1;
    796			mmap_write_unlock(kvm->mm);
    797			/*
    798			 * We might have to create fake 4k page
    799			 * tables. To avoid that the hardware works on
    800			 * stale PGSTEs, we emulate these instructions.
    801			 */
    802			kvm->arch.use_skf = 0;
    803			kvm->arch.use_pfmfi = 0;
    804		}
    805		mutex_unlock(&kvm->lock);
    806		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
    807			 r ? "(not available)" : "(success)");
    808		break;
    809	case KVM_CAP_S390_USER_STSI:
    810		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
    811		kvm->arch.user_stsi = 1;
    812		r = 0;
    813		break;
    814	case KVM_CAP_S390_USER_INSTR0:
    815		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
    816		kvm->arch.user_instr0 = 1;
    817		icpt_operexc_on_all_vcpus(kvm);
    818		r = 0;
    819		break;
    820	default:
    821		r = -EINVAL;
    822		break;
    823	}
    824	return r;
    825}
    826
    827static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
    828{
    829	int ret;
    830
    831	switch (attr->attr) {
    832	case KVM_S390_VM_MEM_LIMIT_SIZE:
    833		ret = 0;
    834		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
    835			 kvm->arch.mem_limit);
    836		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
    837			ret = -EFAULT;
    838		break;
    839	default:
    840		ret = -ENXIO;
    841		break;
    842	}
    843	return ret;
    844}
    845
    846static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
    847{
    848	int ret;
    849	unsigned int idx;
    850	switch (attr->attr) {
    851	case KVM_S390_VM_MEM_ENABLE_CMMA:
    852		ret = -ENXIO;
    853		if (!sclp.has_cmma)
    854			break;
    855
    856		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
    857		mutex_lock(&kvm->lock);
    858		if (kvm->created_vcpus)
    859			ret = -EBUSY;
    860		else if (kvm->mm->context.allow_gmap_hpage_1m)
    861			ret = -EINVAL;
    862		else {
    863			kvm->arch.use_cmma = 1;
    864			/* Not compatible with cmma. */
    865			kvm->arch.use_pfmfi = 0;
    866			ret = 0;
    867		}
    868		mutex_unlock(&kvm->lock);
    869		break;
    870	case KVM_S390_VM_MEM_CLR_CMMA:
    871		ret = -ENXIO;
    872		if (!sclp.has_cmma)
    873			break;
    874		ret = -EINVAL;
    875		if (!kvm->arch.use_cmma)
    876			break;
    877
    878		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
    879		mutex_lock(&kvm->lock);
    880		idx = srcu_read_lock(&kvm->srcu);
    881		s390_reset_cmma(kvm->arch.gmap->mm);
    882		srcu_read_unlock(&kvm->srcu, idx);
    883		mutex_unlock(&kvm->lock);
    884		ret = 0;
    885		break;
    886	case KVM_S390_VM_MEM_LIMIT_SIZE: {
    887		unsigned long new_limit;
    888
    889		if (kvm_is_ucontrol(kvm))
    890			return -EINVAL;
    891
    892		if (get_user(new_limit, (u64 __user *)attr->addr))
    893			return -EFAULT;
    894
    895		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
    896		    new_limit > kvm->arch.mem_limit)
    897			return -E2BIG;
    898
    899		if (!new_limit)
    900			return -EINVAL;
    901
    902		/* gmap_create takes last usable address */
    903		if (new_limit != KVM_S390_NO_MEM_LIMIT)
    904			new_limit -= 1;
    905
    906		ret = -EBUSY;
    907		mutex_lock(&kvm->lock);
    908		if (!kvm->created_vcpus) {
    909			/* gmap_create will round the limit up */
    910			struct gmap *new = gmap_create(current->mm, new_limit);
    911
    912			if (!new) {
    913				ret = -ENOMEM;
    914			} else {
    915				gmap_remove(kvm->arch.gmap);
    916				new->private = kvm;
    917				kvm->arch.gmap = new;
    918				ret = 0;
    919			}
    920		}
    921		mutex_unlock(&kvm->lock);
    922		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
    923		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
    924			 (void *) kvm->arch.gmap->asce);
    925		break;
    926	}
    927	default:
    928		ret = -ENXIO;
    929		break;
    930	}
    931	return ret;
    932}
    933
    934static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
    935
    936void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
    937{
    938	struct kvm_vcpu *vcpu;
    939	unsigned long i;
    940
    941	kvm_s390_vcpu_block_all(kvm);
    942
    943	kvm_for_each_vcpu(i, vcpu, kvm) {
    944		kvm_s390_vcpu_crypto_setup(vcpu);
    945		/* recreate the shadow crycb by leaving the VSIE handler */
    946		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
    947	}
    948
    949	kvm_s390_vcpu_unblock_all(kvm);
    950}
    951
    952static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
    953{
    954	mutex_lock(&kvm->lock);
    955	switch (attr->attr) {
    956	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
    957		if (!test_kvm_facility(kvm, 76)) {
    958			mutex_unlock(&kvm->lock);
    959			return -EINVAL;
    960		}
    961		get_random_bytes(
    962			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
    963			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
    964		kvm->arch.crypto.aes_kw = 1;
    965		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
    966		break;
    967	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
    968		if (!test_kvm_facility(kvm, 76)) {
    969			mutex_unlock(&kvm->lock);
    970			return -EINVAL;
    971		}
    972		get_random_bytes(
    973			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
    974			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
    975		kvm->arch.crypto.dea_kw = 1;
    976		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
    977		break;
    978	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
    979		if (!test_kvm_facility(kvm, 76)) {
    980			mutex_unlock(&kvm->lock);
    981			return -EINVAL;
    982		}
    983		kvm->arch.crypto.aes_kw = 0;
    984		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
    985			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
    986		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
    987		break;
    988	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
    989		if (!test_kvm_facility(kvm, 76)) {
    990			mutex_unlock(&kvm->lock);
    991			return -EINVAL;
    992		}
    993		kvm->arch.crypto.dea_kw = 0;
    994		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
    995			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
    996		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
    997		break;
    998	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
    999		if (!ap_instructions_available()) {
   1000			mutex_unlock(&kvm->lock);
   1001			return -EOPNOTSUPP;
   1002		}
   1003		kvm->arch.crypto.apie = 1;
   1004		break;
   1005	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
   1006		if (!ap_instructions_available()) {
   1007			mutex_unlock(&kvm->lock);
   1008			return -EOPNOTSUPP;
   1009		}
   1010		kvm->arch.crypto.apie = 0;
   1011		break;
   1012	default:
   1013		mutex_unlock(&kvm->lock);
   1014		return -ENXIO;
   1015	}
   1016
   1017	kvm_s390_vcpu_crypto_reset_all(kvm);
   1018	mutex_unlock(&kvm->lock);
   1019	return 0;
   1020}
   1021
   1022static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
   1023{
   1024	unsigned long cx;
   1025	struct kvm_vcpu *vcpu;
   1026
   1027	kvm_for_each_vcpu(cx, vcpu, kvm)
   1028		kvm_s390_sync_request(req, vcpu);
   1029}
   1030
   1031/*
   1032 * Must be called with kvm->srcu held to avoid races on memslots, and with
   1033 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
   1034 */
   1035static int kvm_s390_vm_start_migration(struct kvm *kvm)
   1036{
   1037	struct kvm_memory_slot *ms;
   1038	struct kvm_memslots *slots;
   1039	unsigned long ram_pages = 0;
   1040	int bkt;
   1041
   1042	/* migration mode already enabled */
   1043	if (kvm->arch.migration_mode)
   1044		return 0;
   1045	slots = kvm_memslots(kvm);
   1046	if (!slots || kvm_memslots_empty(slots))
   1047		return -EINVAL;
   1048
   1049	if (!kvm->arch.use_cmma) {
   1050		kvm->arch.migration_mode = 1;
   1051		return 0;
   1052	}
   1053	/* mark all the pages in active slots as dirty */
   1054	kvm_for_each_memslot(ms, bkt, slots) {
   1055		if (!ms->dirty_bitmap)
   1056			return -EINVAL;
   1057		/*
   1058		 * The second half of the bitmap is only used on x86,
   1059		 * and would be wasted otherwise, so we put it to good
   1060		 * use here to keep track of the state of the storage
   1061		 * attributes.
   1062		 */
   1063		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
   1064		ram_pages += ms->npages;
   1065	}
   1066	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
   1067	kvm->arch.migration_mode = 1;
   1068	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
   1069	return 0;
   1070}
   1071
   1072/*
   1073 * Must be called with kvm->slots_lock to avoid races with ourselves and
   1074 * kvm_s390_vm_start_migration.
   1075 */
   1076static int kvm_s390_vm_stop_migration(struct kvm *kvm)
   1077{
   1078	/* migration mode already disabled */
   1079	if (!kvm->arch.migration_mode)
   1080		return 0;
   1081	kvm->arch.migration_mode = 0;
   1082	if (kvm->arch.use_cmma)
   1083		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
   1084	return 0;
   1085}
   1086
   1087static int kvm_s390_vm_set_migration(struct kvm *kvm,
   1088				     struct kvm_device_attr *attr)
   1089{
   1090	int res = -ENXIO;
   1091
   1092	mutex_lock(&kvm->slots_lock);
   1093	switch (attr->attr) {
   1094	case KVM_S390_VM_MIGRATION_START:
   1095		res = kvm_s390_vm_start_migration(kvm);
   1096		break;
   1097	case KVM_S390_VM_MIGRATION_STOP:
   1098		res = kvm_s390_vm_stop_migration(kvm);
   1099		break;
   1100	default:
   1101		break;
   1102	}
   1103	mutex_unlock(&kvm->slots_lock);
   1104
   1105	return res;
   1106}
   1107
   1108static int kvm_s390_vm_get_migration(struct kvm *kvm,
   1109				     struct kvm_device_attr *attr)
   1110{
   1111	u64 mig = kvm->arch.migration_mode;
   1112
   1113	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
   1114		return -ENXIO;
   1115
   1116	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
   1117		return -EFAULT;
   1118	return 0;
   1119}
   1120
   1121static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
   1122{
   1123	struct kvm_s390_vm_tod_clock gtod;
   1124
   1125	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
   1126		return -EFAULT;
   1127
   1128	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
   1129		return -EINVAL;
   1130	kvm_s390_set_tod_clock(kvm, &gtod);
   1131
   1132	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
   1133		gtod.epoch_idx, gtod.tod);
   1134
   1135	return 0;
   1136}
   1137
   1138static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
   1139{
   1140	u8 gtod_high;
   1141
   1142	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
   1143					   sizeof(gtod_high)))
   1144		return -EFAULT;
   1145
   1146	if (gtod_high != 0)
   1147		return -EINVAL;
   1148	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
   1149
   1150	return 0;
   1151}
   1152
   1153static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
   1154{
   1155	struct kvm_s390_vm_tod_clock gtod = { 0 };
   1156
   1157	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
   1158			   sizeof(gtod.tod)))
   1159		return -EFAULT;
   1160
   1161	kvm_s390_set_tod_clock(kvm, &gtod);
   1162	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
   1163	return 0;
   1164}
   1165
   1166static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
   1167{
   1168	int ret;
   1169
   1170	if (attr->flags)
   1171		return -EINVAL;
   1172
   1173	switch (attr->attr) {
   1174	case KVM_S390_VM_TOD_EXT:
   1175		ret = kvm_s390_set_tod_ext(kvm, attr);
   1176		break;
   1177	case KVM_S390_VM_TOD_HIGH:
   1178		ret = kvm_s390_set_tod_high(kvm, attr);
   1179		break;
   1180	case KVM_S390_VM_TOD_LOW:
   1181		ret = kvm_s390_set_tod_low(kvm, attr);
   1182		break;
   1183	default:
   1184		ret = -ENXIO;
   1185		break;
   1186	}
   1187	return ret;
   1188}
   1189
   1190static void kvm_s390_get_tod_clock(struct kvm *kvm,
   1191				   struct kvm_s390_vm_tod_clock *gtod)
   1192{
   1193	union tod_clock clk;
   1194
   1195	preempt_disable();
   1196
   1197	store_tod_clock_ext(&clk);
   1198
   1199	gtod->tod = clk.tod + kvm->arch.epoch;
   1200	gtod->epoch_idx = 0;
   1201	if (test_kvm_facility(kvm, 139)) {
   1202		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
   1203		if (gtod->tod < clk.tod)
   1204			gtod->epoch_idx += 1;
   1205	}
   1206
   1207	preempt_enable();
   1208}
   1209
   1210static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
   1211{
   1212	struct kvm_s390_vm_tod_clock gtod;
   1213
   1214	memset(&gtod, 0, sizeof(gtod));
   1215	kvm_s390_get_tod_clock(kvm, &gtod);
   1216	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
   1217		return -EFAULT;
   1218
   1219	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
   1220		gtod.epoch_idx, gtod.tod);
   1221	return 0;
   1222}
   1223
   1224static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
   1225{
   1226	u8 gtod_high = 0;
   1227
   1228	if (copy_to_user((void __user *)attr->addr, &gtod_high,
   1229					 sizeof(gtod_high)))
   1230		return -EFAULT;
   1231	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
   1232
   1233	return 0;
   1234}
   1235
   1236static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
   1237{
   1238	u64 gtod;
   1239
   1240	gtod = kvm_s390_get_tod_clock_fast(kvm);
   1241	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
   1242		return -EFAULT;
   1243	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
   1244
   1245	return 0;
   1246}
   1247
   1248static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
   1249{
   1250	int ret;
   1251
   1252	if (attr->flags)
   1253		return -EINVAL;
   1254
   1255	switch (attr->attr) {
   1256	case KVM_S390_VM_TOD_EXT:
   1257		ret = kvm_s390_get_tod_ext(kvm, attr);
   1258		break;
   1259	case KVM_S390_VM_TOD_HIGH:
   1260		ret = kvm_s390_get_tod_high(kvm, attr);
   1261		break;
   1262	case KVM_S390_VM_TOD_LOW:
   1263		ret = kvm_s390_get_tod_low(kvm, attr);
   1264		break;
   1265	default:
   1266		ret = -ENXIO;
   1267		break;
   1268	}
   1269	return ret;
   1270}
   1271
   1272static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
   1273{
   1274	struct kvm_s390_vm_cpu_processor *proc;
   1275	u16 lowest_ibc, unblocked_ibc;
   1276	int ret = 0;
   1277
   1278	mutex_lock(&kvm->lock);
   1279	if (kvm->created_vcpus) {
   1280		ret = -EBUSY;
   1281		goto out;
   1282	}
   1283	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
   1284	if (!proc) {
   1285		ret = -ENOMEM;
   1286		goto out;
   1287	}
   1288	if (!copy_from_user(proc, (void __user *)attr->addr,
   1289			    sizeof(*proc))) {
   1290		kvm->arch.model.cpuid = proc->cpuid;
   1291		lowest_ibc = sclp.ibc >> 16 & 0xfff;
   1292		unblocked_ibc = sclp.ibc & 0xfff;
   1293		if (lowest_ibc && proc->ibc) {
   1294			if (proc->ibc > unblocked_ibc)
   1295				kvm->arch.model.ibc = unblocked_ibc;
   1296			else if (proc->ibc < lowest_ibc)
   1297				kvm->arch.model.ibc = lowest_ibc;
   1298			else
   1299				kvm->arch.model.ibc = proc->ibc;
   1300		}
   1301		memcpy(kvm->arch.model.fac_list, proc->fac_list,
   1302		       S390_ARCH_FAC_LIST_SIZE_BYTE);
   1303		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
   1304			 kvm->arch.model.ibc,
   1305			 kvm->arch.model.cpuid);
   1306		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
   1307			 kvm->arch.model.fac_list[0],
   1308			 kvm->arch.model.fac_list[1],
   1309			 kvm->arch.model.fac_list[2]);
   1310	} else
   1311		ret = -EFAULT;
   1312	kfree(proc);
   1313out:
   1314	mutex_unlock(&kvm->lock);
   1315	return ret;
   1316}
   1317
   1318static int kvm_s390_set_processor_feat(struct kvm *kvm,
   1319				       struct kvm_device_attr *attr)
   1320{
   1321	struct kvm_s390_vm_cpu_feat data;
   1322
   1323	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
   1324		return -EFAULT;
   1325	if (!bitmap_subset((unsigned long *) data.feat,
   1326			   kvm_s390_available_cpu_feat,
   1327			   KVM_S390_VM_CPU_FEAT_NR_BITS))
   1328		return -EINVAL;
   1329
   1330	mutex_lock(&kvm->lock);
   1331	if (kvm->created_vcpus) {
   1332		mutex_unlock(&kvm->lock);
   1333		return -EBUSY;
   1334	}
   1335	bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
   1336	mutex_unlock(&kvm->lock);
   1337	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
   1338			 data.feat[0],
   1339			 data.feat[1],
   1340			 data.feat[2]);
   1341	return 0;
   1342}
   1343
   1344static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
   1345					  struct kvm_device_attr *attr)
   1346{
   1347	mutex_lock(&kvm->lock);
   1348	if (kvm->created_vcpus) {
   1349		mutex_unlock(&kvm->lock);
   1350		return -EBUSY;
   1351	}
   1352
   1353	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
   1354			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
   1355		mutex_unlock(&kvm->lock);
   1356		return -EFAULT;
   1357	}
   1358	mutex_unlock(&kvm->lock);
   1359
   1360	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
   1361		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
   1362		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
   1363		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
   1364		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
   1365	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
   1366		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
   1367		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
   1368	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
   1369		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
   1370		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
   1371	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
   1372		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
   1373		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
   1374	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
   1375		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
   1376		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
   1377	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
   1378		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
   1379		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
   1380	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
   1381		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
   1382		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
   1383	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
   1384		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
   1385		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
   1386	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
   1387		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
   1388		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
   1389	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
   1390		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
   1391		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
   1392	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
   1393		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
   1394		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
   1395	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
   1396		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
   1397		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
   1398	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
   1399		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
   1400		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
   1401	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
   1402		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
   1403		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
   1404	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
   1405		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
   1406		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
   1407	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
   1408		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
   1409		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
   1410		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
   1411		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
   1412	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
   1413		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
   1414		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
   1415		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
   1416		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
   1417
   1418	return 0;
   1419}
   1420
   1421static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
   1422{
   1423	int ret = -ENXIO;
   1424
   1425	switch (attr->attr) {
   1426	case KVM_S390_VM_CPU_PROCESSOR:
   1427		ret = kvm_s390_set_processor(kvm, attr);
   1428		break;
   1429	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
   1430		ret = kvm_s390_set_processor_feat(kvm, attr);
   1431		break;
   1432	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
   1433		ret = kvm_s390_set_processor_subfunc(kvm, attr);
   1434		break;
   1435	}
   1436	return ret;
   1437}
   1438
   1439static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
   1440{
   1441	struct kvm_s390_vm_cpu_processor *proc;
   1442	int ret = 0;
   1443
   1444	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
   1445	if (!proc) {
   1446		ret = -ENOMEM;
   1447		goto out;
   1448	}
   1449	proc->cpuid = kvm->arch.model.cpuid;
   1450	proc->ibc = kvm->arch.model.ibc;
   1451	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
   1452	       S390_ARCH_FAC_LIST_SIZE_BYTE);
   1453	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
   1454		 kvm->arch.model.ibc,
   1455		 kvm->arch.model.cpuid);
   1456	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
   1457		 kvm->arch.model.fac_list[0],
   1458		 kvm->arch.model.fac_list[1],
   1459		 kvm->arch.model.fac_list[2]);
   1460	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
   1461		ret = -EFAULT;
   1462	kfree(proc);
   1463out:
   1464	return ret;
   1465}
   1466
   1467static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
   1468{
   1469	struct kvm_s390_vm_cpu_machine *mach;
   1470	int ret = 0;
   1471
   1472	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
   1473	if (!mach) {
   1474		ret = -ENOMEM;
   1475		goto out;
   1476	}
   1477	get_cpu_id((struct cpuid *) &mach->cpuid);
   1478	mach->ibc = sclp.ibc;
   1479	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
   1480	       S390_ARCH_FAC_LIST_SIZE_BYTE);
   1481	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
   1482	       sizeof(stfle_fac_list));
   1483	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
   1484		 kvm->arch.model.ibc,
   1485		 kvm->arch.model.cpuid);
   1486	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
   1487		 mach->fac_mask[0],
   1488		 mach->fac_mask[1],
   1489		 mach->fac_mask[2]);
   1490	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
   1491		 mach->fac_list[0],
   1492		 mach->fac_list[1],
   1493		 mach->fac_list[2]);
   1494	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
   1495		ret = -EFAULT;
   1496	kfree(mach);
   1497out:
   1498	return ret;
   1499}
   1500
   1501static int kvm_s390_get_processor_feat(struct kvm *kvm,
   1502				       struct kvm_device_attr *attr)
   1503{
   1504	struct kvm_s390_vm_cpu_feat data;
   1505
   1506	bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
   1507	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
   1508		return -EFAULT;
   1509	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
   1510			 data.feat[0],
   1511			 data.feat[1],
   1512			 data.feat[2]);
   1513	return 0;
   1514}
   1515
   1516static int kvm_s390_get_machine_feat(struct kvm *kvm,
   1517				     struct kvm_device_attr *attr)
   1518{
   1519	struct kvm_s390_vm_cpu_feat data;
   1520
   1521	bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
   1522	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
   1523		return -EFAULT;
   1524	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
   1525			 data.feat[0],
   1526			 data.feat[1],
   1527			 data.feat[2]);
   1528	return 0;
   1529}
   1530
   1531static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
   1532					  struct kvm_device_attr *attr)
   1533{
   1534	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
   1535	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
   1536		return -EFAULT;
   1537
   1538	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
   1539		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
   1540		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
   1541		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
   1542		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
   1543	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
   1544		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
   1545		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
   1546	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
   1547		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
   1548		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
   1549	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
   1550		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
   1551		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
   1552	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
   1553		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
   1554		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
   1555	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
   1556		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
   1557		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
   1558	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
   1559		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
   1560		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
   1561	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
   1562		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
   1563		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
   1564	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
   1565		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
   1566		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
   1567	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
   1568		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
   1569		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
   1570	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
   1571		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
   1572		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
   1573	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
   1574		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
   1575		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
   1576	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
   1577		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
   1578		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
   1579	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
   1580		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
   1581		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
   1582	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
   1583		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
   1584		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
   1585	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
   1586		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
   1587		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
   1588		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
   1589		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
   1590	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
   1591		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
   1592		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
   1593		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
   1594		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
   1595
   1596	return 0;
   1597}
   1598
   1599static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
   1600					struct kvm_device_attr *attr)
   1601{
   1602	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
   1603	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
   1604		return -EFAULT;
   1605
   1606	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
   1607		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
   1608		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
   1609		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
   1610		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
   1611	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
   1612		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
   1613		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
   1614	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
   1615		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
   1616		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
   1617	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
   1618		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
   1619		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
   1620	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
   1621		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
   1622		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
   1623	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
   1624		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
   1625		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
   1626	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
   1627		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
   1628		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
   1629	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
   1630		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
   1631		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
   1632	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
   1633		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
   1634		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
   1635	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
   1636		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
   1637		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
   1638	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
   1639		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
   1640		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
   1641	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
   1642		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
   1643		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
   1644	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
   1645		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
   1646		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
   1647	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
   1648		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
   1649		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
   1650	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
   1651		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
   1652		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
   1653	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
   1654		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
   1655		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
   1656		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
   1657		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
   1658	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
   1659		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
   1660		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
   1661		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
   1662		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
   1663
   1664	return 0;
   1665}
   1666
   1667static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
   1668{
   1669	int ret = -ENXIO;
   1670
   1671	switch (attr->attr) {
   1672	case KVM_S390_VM_CPU_PROCESSOR:
   1673		ret = kvm_s390_get_processor(kvm, attr);
   1674		break;
   1675	case KVM_S390_VM_CPU_MACHINE:
   1676		ret = kvm_s390_get_machine(kvm, attr);
   1677		break;
   1678	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
   1679		ret = kvm_s390_get_processor_feat(kvm, attr);
   1680		break;
   1681	case KVM_S390_VM_CPU_MACHINE_FEAT:
   1682		ret = kvm_s390_get_machine_feat(kvm, attr);
   1683		break;
   1684	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
   1685		ret = kvm_s390_get_processor_subfunc(kvm, attr);
   1686		break;
   1687	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
   1688		ret = kvm_s390_get_machine_subfunc(kvm, attr);
   1689		break;
   1690	}
   1691	return ret;
   1692}
   1693
   1694static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
   1695{
   1696	int ret;
   1697
   1698	switch (attr->group) {
   1699	case KVM_S390_VM_MEM_CTRL:
   1700		ret = kvm_s390_set_mem_control(kvm, attr);
   1701		break;
   1702	case KVM_S390_VM_TOD:
   1703		ret = kvm_s390_set_tod(kvm, attr);
   1704		break;
   1705	case KVM_S390_VM_CPU_MODEL:
   1706		ret = kvm_s390_set_cpu_model(kvm, attr);
   1707		break;
   1708	case KVM_S390_VM_CRYPTO:
   1709		ret = kvm_s390_vm_set_crypto(kvm, attr);
   1710		break;
   1711	case KVM_S390_VM_MIGRATION:
   1712		ret = kvm_s390_vm_set_migration(kvm, attr);
   1713		break;
   1714	default:
   1715		ret = -ENXIO;
   1716		break;
   1717	}
   1718
   1719	return ret;
   1720}
   1721
   1722static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
   1723{
   1724	int ret;
   1725
   1726	switch (attr->group) {
   1727	case KVM_S390_VM_MEM_CTRL:
   1728		ret = kvm_s390_get_mem_control(kvm, attr);
   1729		break;
   1730	case KVM_S390_VM_TOD:
   1731		ret = kvm_s390_get_tod(kvm, attr);
   1732		break;
   1733	case KVM_S390_VM_CPU_MODEL:
   1734		ret = kvm_s390_get_cpu_model(kvm, attr);
   1735		break;
   1736	case KVM_S390_VM_MIGRATION:
   1737		ret = kvm_s390_vm_get_migration(kvm, attr);
   1738		break;
   1739	default:
   1740		ret = -ENXIO;
   1741		break;
   1742	}
   1743
   1744	return ret;
   1745}
   1746
   1747static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
   1748{
   1749	int ret;
   1750
   1751	switch (attr->group) {
   1752	case KVM_S390_VM_MEM_CTRL:
   1753		switch (attr->attr) {
   1754		case KVM_S390_VM_MEM_ENABLE_CMMA:
   1755		case KVM_S390_VM_MEM_CLR_CMMA:
   1756			ret = sclp.has_cmma ? 0 : -ENXIO;
   1757			break;
   1758		case KVM_S390_VM_MEM_LIMIT_SIZE:
   1759			ret = 0;
   1760			break;
   1761		default:
   1762			ret = -ENXIO;
   1763			break;
   1764		}
   1765		break;
   1766	case KVM_S390_VM_TOD:
   1767		switch (attr->attr) {
   1768		case KVM_S390_VM_TOD_LOW:
   1769		case KVM_S390_VM_TOD_HIGH:
   1770			ret = 0;
   1771			break;
   1772		default:
   1773			ret = -ENXIO;
   1774			break;
   1775		}
   1776		break;
   1777	case KVM_S390_VM_CPU_MODEL:
   1778		switch (attr->attr) {
   1779		case KVM_S390_VM_CPU_PROCESSOR:
   1780		case KVM_S390_VM_CPU_MACHINE:
   1781		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
   1782		case KVM_S390_VM_CPU_MACHINE_FEAT:
   1783		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
   1784		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
   1785			ret = 0;
   1786			break;
   1787		default:
   1788			ret = -ENXIO;
   1789			break;
   1790		}
   1791		break;
   1792	case KVM_S390_VM_CRYPTO:
   1793		switch (attr->attr) {
   1794		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
   1795		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
   1796		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
   1797		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
   1798			ret = 0;
   1799			break;
   1800		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
   1801		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
   1802			ret = ap_instructions_available() ? 0 : -ENXIO;
   1803			break;
   1804		default:
   1805			ret = -ENXIO;
   1806			break;
   1807		}
   1808		break;
   1809	case KVM_S390_VM_MIGRATION:
   1810		ret = 0;
   1811		break;
   1812	default:
   1813		ret = -ENXIO;
   1814		break;
   1815	}
   1816
   1817	return ret;
   1818}
   1819
   1820static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
   1821{
   1822	uint8_t *keys;
   1823	uint64_t hva;
   1824	int srcu_idx, i, r = 0;
   1825
   1826	if (args->flags != 0)
   1827		return -EINVAL;
   1828
   1829	/* Is this guest using storage keys? */
   1830	if (!mm_uses_skeys(current->mm))
   1831		return KVM_S390_GET_SKEYS_NONE;
   1832
   1833	/* Enforce sane limit on memory allocation */
   1834	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
   1835		return -EINVAL;
   1836
   1837	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
   1838	if (!keys)
   1839		return -ENOMEM;
   1840
   1841	mmap_read_lock(current->mm);
   1842	srcu_idx = srcu_read_lock(&kvm->srcu);
   1843	for (i = 0; i < args->count; i++) {
   1844		hva = gfn_to_hva(kvm, args->start_gfn + i);
   1845		if (kvm_is_error_hva(hva)) {
   1846			r = -EFAULT;
   1847			break;
   1848		}
   1849
   1850		r = get_guest_storage_key(current->mm, hva, &keys[i]);
   1851		if (r)
   1852			break;
   1853	}
   1854	srcu_read_unlock(&kvm->srcu, srcu_idx);
   1855	mmap_read_unlock(current->mm);
   1856
   1857	if (!r) {
   1858		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
   1859				 sizeof(uint8_t) * args->count);
   1860		if (r)
   1861			r = -EFAULT;
   1862	}
   1863
   1864	kvfree(keys);
   1865	return r;
   1866}
   1867
   1868static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
   1869{
   1870	uint8_t *keys;
   1871	uint64_t hva;
   1872	int srcu_idx, i, r = 0;
   1873	bool unlocked;
   1874
   1875	if (args->flags != 0)
   1876		return -EINVAL;
   1877
   1878	/* Enforce sane limit on memory allocation */
   1879	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
   1880		return -EINVAL;
   1881
   1882	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
   1883	if (!keys)
   1884		return -ENOMEM;
   1885
   1886	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
   1887			   sizeof(uint8_t) * args->count);
   1888	if (r) {
   1889		r = -EFAULT;
   1890		goto out;
   1891	}
   1892
   1893	/* Enable storage key handling for the guest */
   1894	r = s390_enable_skey();
   1895	if (r)
   1896		goto out;
   1897
   1898	i = 0;
   1899	mmap_read_lock(current->mm);
   1900	srcu_idx = srcu_read_lock(&kvm->srcu);
   1901        while (i < args->count) {
   1902		unlocked = false;
   1903		hva = gfn_to_hva(kvm, args->start_gfn + i);
   1904		if (kvm_is_error_hva(hva)) {
   1905			r = -EFAULT;
   1906			break;
   1907		}
   1908
   1909		/* Lowest order bit is reserved */
   1910		if (keys[i] & 0x01) {
   1911			r = -EINVAL;
   1912			break;
   1913		}
   1914
   1915		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
   1916		if (r) {
   1917			r = fixup_user_fault(current->mm, hva,
   1918					     FAULT_FLAG_WRITE, &unlocked);
   1919			if (r)
   1920				break;
   1921		}
   1922		if (!r)
   1923			i++;
   1924	}
   1925	srcu_read_unlock(&kvm->srcu, srcu_idx);
   1926	mmap_read_unlock(current->mm);
   1927out:
   1928	kvfree(keys);
   1929	return r;
   1930}
   1931
   1932/*
   1933 * Base address and length must be sent at the start of each block, therefore
   1934 * it's cheaper to send some clean data, as long as it's less than the size of
   1935 * two longs.
   1936 */
   1937#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
   1938/* for consistency */
   1939#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
   1940
   1941static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
   1942			      u8 *res, unsigned long bufsize)
   1943{
   1944	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
   1945
   1946	args->count = 0;
   1947	while (args->count < bufsize) {
   1948		hva = gfn_to_hva(kvm, cur_gfn);
   1949		/*
   1950		 * We return an error if the first value was invalid, but we
   1951		 * return successfully if at least one value was copied.
   1952		 */
   1953		if (kvm_is_error_hva(hva))
   1954			return args->count ? 0 : -EFAULT;
   1955		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
   1956			pgstev = 0;
   1957		res[args->count++] = (pgstev >> 24) & 0x43;
   1958		cur_gfn++;
   1959	}
   1960
   1961	return 0;
   1962}
   1963
   1964static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
   1965						     gfn_t gfn)
   1966{
   1967	return ____gfn_to_memslot(slots, gfn, true);
   1968}
   1969
   1970static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
   1971					      unsigned long cur_gfn)
   1972{
   1973	struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
   1974	unsigned long ofs = cur_gfn - ms->base_gfn;
   1975	struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
   1976
   1977	if (ms->base_gfn + ms->npages <= cur_gfn) {
   1978		mnode = rb_next(mnode);
   1979		/* If we are above the highest slot, wrap around */
   1980		if (!mnode)
   1981			mnode = rb_first(&slots->gfn_tree);
   1982
   1983		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
   1984		ofs = 0;
   1985	}
   1986	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
   1987	while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
   1988		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
   1989		ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
   1990	}
   1991	return ms->base_gfn + ofs;
   1992}
   1993
   1994static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
   1995			     u8 *res, unsigned long bufsize)
   1996{
   1997	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
   1998	struct kvm_memslots *slots = kvm_memslots(kvm);
   1999	struct kvm_memory_slot *ms;
   2000
   2001	if (unlikely(kvm_memslots_empty(slots)))
   2002		return 0;
   2003
   2004	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
   2005	ms = gfn_to_memslot(kvm, cur_gfn);
   2006	args->count = 0;
   2007	args->start_gfn = cur_gfn;
   2008	if (!ms)
   2009		return 0;
   2010	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
   2011	mem_end = kvm_s390_get_gfn_end(slots);
   2012
   2013	while (args->count < bufsize) {
   2014		hva = gfn_to_hva(kvm, cur_gfn);
   2015		if (kvm_is_error_hva(hva))
   2016			return 0;
   2017		/* Decrement only if we actually flipped the bit to 0 */
   2018		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
   2019			atomic64_dec(&kvm->arch.cmma_dirty_pages);
   2020		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
   2021			pgstev = 0;
   2022		/* Save the value */
   2023		res[args->count++] = (pgstev >> 24) & 0x43;
   2024		/* If the next bit is too far away, stop. */
   2025		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
   2026			return 0;
   2027		/* If we reached the previous "next", find the next one */
   2028		if (cur_gfn == next_gfn)
   2029			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
   2030		/* Reached the end of memory or of the buffer, stop */
   2031		if ((next_gfn >= mem_end) ||
   2032		    (next_gfn - args->start_gfn >= bufsize))
   2033			return 0;
   2034		cur_gfn++;
   2035		/* Reached the end of the current memslot, take the next one. */
   2036		if (cur_gfn - ms->base_gfn >= ms->npages) {
   2037			ms = gfn_to_memslot(kvm, cur_gfn);
   2038			if (!ms)
   2039				return 0;
   2040		}
   2041	}
   2042	return 0;
   2043}
   2044
   2045/*
   2046 * This function searches for the next page with dirty CMMA attributes, and
   2047 * saves the attributes in the buffer up to either the end of the buffer or
   2048 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
   2049 * no trailing clean bytes are saved.
   2050 * In case no dirty bits were found, or if CMMA was not enabled or used, the
   2051 * output buffer will indicate 0 as length.
   2052 */
   2053static int kvm_s390_get_cmma_bits(struct kvm *kvm,
   2054				  struct kvm_s390_cmma_log *args)
   2055{
   2056	unsigned long bufsize;
   2057	int srcu_idx, peek, ret;
   2058	u8 *values;
   2059
   2060	if (!kvm->arch.use_cmma)
   2061		return -ENXIO;
   2062	/* Invalid/unsupported flags were specified */
   2063	if (args->flags & ~KVM_S390_CMMA_PEEK)
   2064		return -EINVAL;
   2065	/* Migration mode query, and we are not doing a migration */
   2066	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
   2067	if (!peek && !kvm->arch.migration_mode)
   2068		return -EINVAL;
   2069	/* CMMA is disabled or was not used, or the buffer has length zero */
   2070	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
   2071	if (!bufsize || !kvm->mm->context.uses_cmm) {
   2072		memset(args, 0, sizeof(*args));
   2073		return 0;
   2074	}
   2075	/* We are not peeking, and there are no dirty pages */
   2076	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
   2077		memset(args, 0, sizeof(*args));
   2078		return 0;
   2079	}
   2080
   2081	values = vmalloc(bufsize);
   2082	if (!values)
   2083		return -ENOMEM;
   2084
   2085	mmap_read_lock(kvm->mm);
   2086	srcu_idx = srcu_read_lock(&kvm->srcu);
   2087	if (peek)
   2088		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
   2089	else
   2090		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
   2091	srcu_read_unlock(&kvm->srcu, srcu_idx);
   2092	mmap_read_unlock(kvm->mm);
   2093
   2094	if (kvm->arch.migration_mode)
   2095		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
   2096	else
   2097		args->remaining = 0;
   2098
   2099	if (copy_to_user((void __user *)args->values, values, args->count))
   2100		ret = -EFAULT;
   2101
   2102	vfree(values);
   2103	return ret;
   2104}
   2105
   2106/*
   2107 * This function sets the CMMA attributes for the given pages. If the input
   2108 * buffer has zero length, no action is taken, otherwise the attributes are
   2109 * set and the mm->context.uses_cmm flag is set.
   2110 */
   2111static int kvm_s390_set_cmma_bits(struct kvm *kvm,
   2112				  const struct kvm_s390_cmma_log *args)
   2113{
   2114	unsigned long hva, mask, pgstev, i;
   2115	uint8_t *bits;
   2116	int srcu_idx, r = 0;
   2117
   2118	mask = args->mask;
   2119
   2120	if (!kvm->arch.use_cmma)
   2121		return -ENXIO;
   2122	/* invalid/unsupported flags */
   2123	if (args->flags != 0)
   2124		return -EINVAL;
   2125	/* Enforce sane limit on memory allocation */
   2126	if (args->count > KVM_S390_CMMA_SIZE_MAX)
   2127		return -EINVAL;
   2128	/* Nothing to do */
   2129	if (args->count == 0)
   2130		return 0;
   2131
   2132	bits = vmalloc(array_size(sizeof(*bits), args->count));
   2133	if (!bits)
   2134		return -ENOMEM;
   2135
   2136	r = copy_from_user(bits, (void __user *)args->values, args->count);
   2137	if (r) {
   2138		r = -EFAULT;
   2139		goto out;
   2140	}
   2141
   2142	mmap_read_lock(kvm->mm);
   2143	srcu_idx = srcu_read_lock(&kvm->srcu);
   2144	for (i = 0; i < args->count; i++) {
   2145		hva = gfn_to_hva(kvm, args->start_gfn + i);
   2146		if (kvm_is_error_hva(hva)) {
   2147			r = -EFAULT;
   2148			break;
   2149		}
   2150
   2151		pgstev = bits[i];
   2152		pgstev = pgstev << 24;
   2153		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
   2154		set_pgste_bits(kvm->mm, hva, mask, pgstev);
   2155	}
   2156	srcu_read_unlock(&kvm->srcu, srcu_idx);
   2157	mmap_read_unlock(kvm->mm);
   2158
   2159	if (!kvm->mm->context.uses_cmm) {
   2160		mmap_write_lock(kvm->mm);
   2161		kvm->mm->context.uses_cmm = 1;
   2162		mmap_write_unlock(kvm->mm);
   2163	}
   2164out:
   2165	vfree(bits);
   2166	return r;
   2167}
   2168
   2169static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
   2170{
   2171	struct kvm_vcpu *vcpu;
   2172	u16 rc, rrc;
   2173	int ret = 0;
   2174	unsigned long i;
   2175
   2176	/*
   2177	 * We ignore failures and try to destroy as many CPUs as possible.
   2178	 * At the same time we must not free the assigned resources when
   2179	 * this fails, as the ultravisor has still access to that memory.
   2180	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
   2181	 * behind.
   2182	 * We want to return the first failure rc and rrc, though.
   2183	 */
   2184	kvm_for_each_vcpu(i, vcpu, kvm) {
   2185		mutex_lock(&vcpu->mutex);
   2186		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
   2187			*rcp = rc;
   2188			*rrcp = rrc;
   2189			ret = -EIO;
   2190		}
   2191		mutex_unlock(&vcpu->mutex);
   2192	}
   2193	/* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
   2194	if (use_gisa)
   2195		kvm_s390_gisa_enable(kvm);
   2196	return ret;
   2197}
   2198
   2199static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
   2200{
   2201	unsigned long i;
   2202	int r = 0;
   2203	u16 dummy;
   2204
   2205	struct kvm_vcpu *vcpu;
   2206
   2207	/* Disable the GISA if the ultravisor does not support AIV. */
   2208	if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
   2209		kvm_s390_gisa_disable(kvm);
   2210
   2211	kvm_for_each_vcpu(i, vcpu, kvm) {
   2212		mutex_lock(&vcpu->mutex);
   2213		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
   2214		mutex_unlock(&vcpu->mutex);
   2215		if (r)
   2216			break;
   2217	}
   2218	if (r)
   2219		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
   2220	return r;
   2221}
   2222
   2223static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
   2224{
   2225	int r = 0;
   2226	u16 dummy;
   2227	void __user *argp = (void __user *)cmd->data;
   2228
   2229	switch (cmd->cmd) {
   2230	case KVM_PV_ENABLE: {
   2231		r = -EINVAL;
   2232		if (kvm_s390_pv_is_protected(kvm))
   2233			break;
   2234
   2235		/*
   2236		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
   2237		 *  esca, we need no cleanup in the error cases below
   2238		 */
   2239		r = sca_switch_to_extended(kvm);
   2240		if (r)
   2241			break;
   2242
   2243		mmap_write_lock(current->mm);
   2244		r = gmap_mark_unmergeable();
   2245		mmap_write_unlock(current->mm);
   2246		if (r)
   2247			break;
   2248
   2249		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
   2250		if (r)
   2251			break;
   2252
   2253		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
   2254		if (r)
   2255			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
   2256
   2257		/* we need to block service interrupts from now on */
   2258		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
   2259		break;
   2260	}
   2261	case KVM_PV_DISABLE: {
   2262		r = -EINVAL;
   2263		if (!kvm_s390_pv_is_protected(kvm))
   2264			break;
   2265
   2266		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
   2267		/*
   2268		 * If a CPU could not be destroyed, destroy VM will also fail.
   2269		 * There is no point in trying to destroy it. Instead return
   2270		 * the rc and rrc from the first CPU that failed destroying.
   2271		 */
   2272		if (r)
   2273			break;
   2274		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
   2275
   2276		/* no need to block service interrupts any more */
   2277		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
   2278		break;
   2279	}
   2280	case KVM_PV_SET_SEC_PARMS: {
   2281		struct kvm_s390_pv_sec_parm parms = {};
   2282		void *hdr;
   2283
   2284		r = -EINVAL;
   2285		if (!kvm_s390_pv_is_protected(kvm))
   2286			break;
   2287
   2288		r = -EFAULT;
   2289		if (copy_from_user(&parms, argp, sizeof(parms)))
   2290			break;
   2291
   2292		/* Currently restricted to 8KB */
   2293		r = -EINVAL;
   2294		if (parms.length > PAGE_SIZE * 2)
   2295			break;
   2296
   2297		r = -ENOMEM;
   2298		hdr = vmalloc(parms.length);
   2299		if (!hdr)
   2300			break;
   2301
   2302		r = -EFAULT;
   2303		if (!copy_from_user(hdr, (void __user *)parms.origin,
   2304				    parms.length))
   2305			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
   2306						      &cmd->rc, &cmd->rrc);
   2307
   2308		vfree(hdr);
   2309		break;
   2310	}
   2311	case KVM_PV_UNPACK: {
   2312		struct kvm_s390_pv_unp unp = {};
   2313
   2314		r = -EINVAL;
   2315		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
   2316			break;
   2317
   2318		r = -EFAULT;
   2319		if (copy_from_user(&unp, argp, sizeof(unp)))
   2320			break;
   2321
   2322		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
   2323				       &cmd->rc, &cmd->rrc);
   2324		break;
   2325	}
   2326	case KVM_PV_VERIFY: {
   2327		r = -EINVAL;
   2328		if (!kvm_s390_pv_is_protected(kvm))
   2329			break;
   2330
   2331		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
   2332				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
   2333		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
   2334			     cmd->rrc);
   2335		break;
   2336	}
   2337	case KVM_PV_PREP_RESET: {
   2338		r = -EINVAL;
   2339		if (!kvm_s390_pv_is_protected(kvm))
   2340			break;
   2341
   2342		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
   2343				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
   2344		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
   2345			     cmd->rc, cmd->rrc);
   2346		break;
   2347	}
   2348	case KVM_PV_UNSHARE_ALL: {
   2349		r = -EINVAL;
   2350		if (!kvm_s390_pv_is_protected(kvm))
   2351			break;
   2352
   2353		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
   2354				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
   2355		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
   2356			     cmd->rc, cmd->rrc);
   2357		break;
   2358	}
   2359	default:
   2360		r = -ENOTTY;
   2361	}
   2362	return r;
   2363}
   2364
   2365static bool access_key_invalid(u8 access_key)
   2366{
   2367	return access_key > 0xf;
   2368}
   2369
   2370static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
   2371{
   2372	void __user *uaddr = (void __user *)mop->buf;
   2373	u64 supported_flags;
   2374	void *tmpbuf = NULL;
   2375	int r, srcu_idx;
   2376
   2377	supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
   2378			  | KVM_S390_MEMOP_F_CHECK_ONLY;
   2379	if (mop->flags & ~supported_flags || !mop->size)
   2380		return -EINVAL;
   2381	if (mop->size > MEM_OP_MAX_SIZE)
   2382		return -E2BIG;
   2383	/*
   2384	 * This is technically a heuristic only, if the kvm->lock is not
   2385	 * taken, it is not guaranteed that the vm is/remains non-protected.
   2386	 * This is ok from a kernel perspective, wrongdoing is detected
   2387	 * on the access, -EFAULT is returned and the vm may crash the
   2388	 * next time it accesses the memory in question.
   2389	 * There is no sane usecase to do switching and a memop on two
   2390	 * different CPUs at the same time.
   2391	 */
   2392	if (kvm_s390_pv_get_handle(kvm))
   2393		return -EINVAL;
   2394	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
   2395		if (access_key_invalid(mop->key))
   2396			return -EINVAL;
   2397	} else {
   2398		mop->key = 0;
   2399	}
   2400	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
   2401		tmpbuf = vmalloc(mop->size);
   2402		if (!tmpbuf)
   2403			return -ENOMEM;
   2404	}
   2405
   2406	srcu_idx = srcu_read_lock(&kvm->srcu);
   2407
   2408	if (kvm_is_error_gpa(kvm, mop->gaddr)) {
   2409		r = PGM_ADDRESSING;
   2410		goto out_unlock;
   2411	}
   2412
   2413	switch (mop->op) {
   2414	case KVM_S390_MEMOP_ABSOLUTE_READ: {
   2415		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
   2416			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
   2417		} else {
   2418			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
   2419						      mop->size, GACC_FETCH, mop->key);
   2420			if (r == 0) {
   2421				if (copy_to_user(uaddr, tmpbuf, mop->size))
   2422					r = -EFAULT;
   2423			}
   2424		}
   2425		break;
   2426	}
   2427	case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
   2428		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
   2429			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
   2430		} else {
   2431			if (copy_from_user(tmpbuf, uaddr, mop->size)) {
   2432				r = -EFAULT;
   2433				break;
   2434			}
   2435			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
   2436						      mop->size, GACC_STORE, mop->key);
   2437		}
   2438		break;
   2439	}
   2440	default:
   2441		r = -EINVAL;
   2442	}
   2443
   2444out_unlock:
   2445	srcu_read_unlock(&kvm->srcu, srcu_idx);
   2446
   2447	vfree(tmpbuf);
   2448	return r;
   2449}
   2450
   2451long kvm_arch_vm_ioctl(struct file *filp,
   2452		       unsigned int ioctl, unsigned long arg)
   2453{
   2454	struct kvm *kvm = filp->private_data;
   2455	void __user *argp = (void __user *)arg;
   2456	struct kvm_device_attr attr;
   2457	int r;
   2458
   2459	switch (ioctl) {
   2460	case KVM_S390_INTERRUPT: {
   2461		struct kvm_s390_interrupt s390int;
   2462
   2463		r = -EFAULT;
   2464		if (copy_from_user(&s390int, argp, sizeof(s390int)))
   2465			break;
   2466		r = kvm_s390_inject_vm(kvm, &s390int);
   2467		break;
   2468	}
   2469	case KVM_CREATE_IRQCHIP: {
   2470		struct kvm_irq_routing_entry routing;
   2471
   2472		r = -EINVAL;
   2473		if (kvm->arch.use_irqchip) {
   2474			/* Set up dummy routing. */
   2475			memset(&routing, 0, sizeof(routing));
   2476			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
   2477		}
   2478		break;
   2479	}
   2480	case KVM_SET_DEVICE_ATTR: {
   2481		r = -EFAULT;
   2482		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
   2483			break;
   2484		r = kvm_s390_vm_set_attr(kvm, &attr);
   2485		break;
   2486	}
   2487	case KVM_GET_DEVICE_ATTR: {
   2488		r = -EFAULT;
   2489		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
   2490			break;
   2491		r = kvm_s390_vm_get_attr(kvm, &attr);
   2492		break;
   2493	}
   2494	case KVM_HAS_DEVICE_ATTR: {
   2495		r = -EFAULT;
   2496		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
   2497			break;
   2498		r = kvm_s390_vm_has_attr(kvm, &attr);
   2499		break;
   2500	}
   2501	case KVM_S390_GET_SKEYS: {
   2502		struct kvm_s390_skeys args;
   2503
   2504		r = -EFAULT;
   2505		if (copy_from_user(&args, argp,
   2506				   sizeof(struct kvm_s390_skeys)))
   2507			break;
   2508		r = kvm_s390_get_skeys(kvm, &args);
   2509		break;
   2510	}
   2511	case KVM_S390_SET_SKEYS: {
   2512		struct kvm_s390_skeys args;
   2513
   2514		r = -EFAULT;
   2515		if (copy_from_user(&args, argp,
   2516				   sizeof(struct kvm_s390_skeys)))
   2517			break;
   2518		r = kvm_s390_set_skeys(kvm, &args);
   2519		break;
   2520	}
   2521	case KVM_S390_GET_CMMA_BITS: {
   2522		struct kvm_s390_cmma_log args;
   2523
   2524		r = -EFAULT;
   2525		if (copy_from_user(&args, argp, sizeof(args)))
   2526			break;
   2527		mutex_lock(&kvm->slots_lock);
   2528		r = kvm_s390_get_cmma_bits(kvm, &args);
   2529		mutex_unlock(&kvm->slots_lock);
   2530		if (!r) {
   2531			r = copy_to_user(argp, &args, sizeof(args));
   2532			if (r)
   2533				r = -EFAULT;
   2534		}
   2535		break;
   2536	}
   2537	case KVM_S390_SET_CMMA_BITS: {
   2538		struct kvm_s390_cmma_log args;
   2539
   2540		r = -EFAULT;
   2541		if (copy_from_user(&args, argp, sizeof(args)))
   2542			break;
   2543		mutex_lock(&kvm->slots_lock);
   2544		r = kvm_s390_set_cmma_bits(kvm, &args);
   2545		mutex_unlock(&kvm->slots_lock);
   2546		break;
   2547	}
   2548	case KVM_S390_PV_COMMAND: {
   2549		struct kvm_pv_cmd args;
   2550
   2551		/* protvirt means user cpu state */
   2552		kvm_s390_set_user_cpu_state_ctrl(kvm);
   2553		r = 0;
   2554		if (!is_prot_virt_host()) {
   2555			r = -EINVAL;
   2556			break;
   2557		}
   2558		if (copy_from_user(&args, argp, sizeof(args))) {
   2559			r = -EFAULT;
   2560			break;
   2561		}
   2562		if (args.flags) {
   2563			r = -EINVAL;
   2564			break;
   2565		}
   2566		mutex_lock(&kvm->lock);
   2567		r = kvm_s390_handle_pv(kvm, &args);
   2568		mutex_unlock(&kvm->lock);
   2569		if (copy_to_user(argp, &args, sizeof(args))) {
   2570			r = -EFAULT;
   2571			break;
   2572		}
   2573		break;
   2574	}
   2575	case KVM_S390_MEM_OP: {
   2576		struct kvm_s390_mem_op mem_op;
   2577
   2578		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
   2579			r = kvm_s390_vm_mem_op(kvm, &mem_op);
   2580		else
   2581			r = -EFAULT;
   2582		break;
   2583	}
   2584	default:
   2585		r = -ENOTTY;
   2586	}
   2587
   2588	return r;
   2589}
   2590
   2591static int kvm_s390_apxa_installed(void)
   2592{
   2593	struct ap_config_info info;
   2594
   2595	if (ap_instructions_available()) {
   2596		if (ap_qci(&info) == 0)
   2597			return info.apxa;
   2598	}
   2599
   2600	return 0;
   2601}
   2602
   2603/*
   2604 * The format of the crypto control block (CRYCB) is specified in the 3 low
   2605 * order bits of the CRYCB designation (CRYCBD) field as follows:
   2606 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
   2607 *	     AP extended addressing (APXA) facility are installed.
   2608 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
   2609 * Format 2: Both the APXA and MSAX3 facilities are installed
   2610 */
   2611static void kvm_s390_set_crycb_format(struct kvm *kvm)
   2612{
   2613	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
   2614
   2615	/* Clear the CRYCB format bits - i.e., set format 0 by default */
   2616	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
   2617
   2618	/* Check whether MSAX3 is installed */
   2619	if (!test_kvm_facility(kvm, 76))
   2620		return;
   2621
   2622	if (kvm_s390_apxa_installed())
   2623		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
   2624	else
   2625		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
   2626}
   2627
   2628/*
   2629 * kvm_arch_crypto_set_masks
   2630 *
   2631 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
   2632 *	 to be set.
   2633 * @apm: the mask identifying the accessible AP adapters
   2634 * @aqm: the mask identifying the accessible AP domains
   2635 * @adm: the mask identifying the accessible AP control domains
   2636 *
   2637 * Set the masks that identify the adapters, domains and control domains to
   2638 * which the KVM guest is granted access.
   2639 *
   2640 * Note: The kvm->lock mutex must be locked by the caller before invoking this
   2641 *	 function.
   2642 */
   2643void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
   2644			       unsigned long *aqm, unsigned long *adm)
   2645{
   2646	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
   2647
   2648	kvm_s390_vcpu_block_all(kvm);
   2649
   2650	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
   2651	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
   2652		memcpy(crycb->apcb1.apm, apm, 32);
   2653		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
   2654			 apm[0], apm[1], apm[2], apm[3]);
   2655		memcpy(crycb->apcb1.aqm, aqm, 32);
   2656		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
   2657			 aqm[0], aqm[1], aqm[2], aqm[3]);
   2658		memcpy(crycb->apcb1.adm, adm, 32);
   2659		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
   2660			 adm[0], adm[1], adm[2], adm[3]);
   2661		break;
   2662	case CRYCB_FORMAT1:
   2663	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
   2664		memcpy(crycb->apcb0.apm, apm, 8);
   2665		memcpy(crycb->apcb0.aqm, aqm, 2);
   2666		memcpy(crycb->apcb0.adm, adm, 2);
   2667		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
   2668			 apm[0], *((unsigned short *)aqm),
   2669			 *((unsigned short *)adm));
   2670		break;
   2671	default:	/* Can not happen */
   2672		break;
   2673	}
   2674
   2675	/* recreate the shadow crycb for each vcpu */
   2676	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
   2677	kvm_s390_vcpu_unblock_all(kvm);
   2678}
   2679EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
   2680
   2681/*
   2682 * kvm_arch_crypto_clear_masks
   2683 *
   2684 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
   2685 *	 to be cleared.
   2686 *
   2687 * Clear the masks that identify the adapters, domains and control domains to
   2688 * which the KVM guest is granted access.
   2689 *
   2690 * Note: The kvm->lock mutex must be locked by the caller before invoking this
   2691 *	 function.
   2692 */
   2693void kvm_arch_crypto_clear_masks(struct kvm *kvm)
   2694{
   2695	kvm_s390_vcpu_block_all(kvm);
   2696
   2697	memset(&kvm->arch.crypto.crycb->apcb0, 0,
   2698	       sizeof(kvm->arch.crypto.crycb->apcb0));
   2699	memset(&kvm->arch.crypto.crycb->apcb1, 0,
   2700	       sizeof(kvm->arch.crypto.crycb->apcb1));
   2701
   2702	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
   2703	/* recreate the shadow crycb for each vcpu */
   2704	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
   2705	kvm_s390_vcpu_unblock_all(kvm);
   2706}
   2707EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
   2708
   2709static u64 kvm_s390_get_initial_cpuid(void)
   2710{
   2711	struct cpuid cpuid;
   2712
   2713	get_cpu_id(&cpuid);
   2714	cpuid.version = 0xff;
   2715	return *((u64 *) &cpuid);
   2716}
   2717
   2718static void kvm_s390_crypto_init(struct kvm *kvm)
   2719{
   2720	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
   2721	kvm_s390_set_crycb_format(kvm);
   2722	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
   2723
   2724	if (!test_kvm_facility(kvm, 76))
   2725		return;
   2726
   2727	/* Enable AES/DEA protected key functions by default */
   2728	kvm->arch.crypto.aes_kw = 1;
   2729	kvm->arch.crypto.dea_kw = 1;
   2730	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
   2731			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
   2732	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
   2733			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
   2734}
   2735
   2736static void sca_dispose(struct kvm *kvm)
   2737{
   2738	if (kvm->arch.use_esca)
   2739		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
   2740	else
   2741		free_page((unsigned long)(kvm->arch.sca));
   2742	kvm->arch.sca = NULL;
   2743}
   2744
   2745int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
   2746{
   2747	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
   2748	int i, rc;
   2749	char debug_name[16];
   2750	static unsigned long sca_offset;
   2751
   2752	rc = -EINVAL;
   2753#ifdef CONFIG_KVM_S390_UCONTROL
   2754	if (type & ~KVM_VM_S390_UCONTROL)
   2755		goto out_err;
   2756	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
   2757		goto out_err;
   2758#else
   2759	if (type)
   2760		goto out_err;
   2761#endif
   2762
   2763	rc = s390_enable_sie();
   2764	if (rc)
   2765		goto out_err;
   2766
   2767	rc = -ENOMEM;
   2768
   2769	if (!sclp.has_64bscao)
   2770		alloc_flags |= GFP_DMA;
   2771	rwlock_init(&kvm->arch.sca_lock);
   2772	/* start with basic SCA */
   2773	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
   2774	if (!kvm->arch.sca)
   2775		goto out_err;
   2776	mutex_lock(&kvm_lock);
   2777	sca_offset += 16;
   2778	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
   2779		sca_offset = 0;
   2780	kvm->arch.sca = (struct bsca_block *)
   2781			((char *) kvm->arch.sca + sca_offset);
   2782	mutex_unlock(&kvm_lock);
   2783
   2784	sprintf(debug_name, "kvm-%u", current->pid);
   2785
   2786	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
   2787	if (!kvm->arch.dbf)
   2788		goto out_err;
   2789
   2790	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
   2791	kvm->arch.sie_page2 =
   2792	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
   2793	if (!kvm->arch.sie_page2)
   2794		goto out_err;
   2795
   2796	kvm->arch.sie_page2->kvm = kvm;
   2797	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
   2798
   2799	for (i = 0; i < kvm_s390_fac_size(); i++) {
   2800		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
   2801					      (kvm_s390_fac_base[i] |
   2802					       kvm_s390_fac_ext[i]);
   2803		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
   2804					      kvm_s390_fac_base[i];
   2805	}
   2806	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
   2807
   2808	/* we are always in czam mode - even on pre z14 machines */
   2809	set_kvm_facility(kvm->arch.model.fac_mask, 138);
   2810	set_kvm_facility(kvm->arch.model.fac_list, 138);
   2811	/* we emulate STHYI in kvm */
   2812	set_kvm_facility(kvm->arch.model.fac_mask, 74);
   2813	set_kvm_facility(kvm->arch.model.fac_list, 74);
   2814	if (MACHINE_HAS_TLB_GUEST) {
   2815		set_kvm_facility(kvm->arch.model.fac_mask, 147);
   2816		set_kvm_facility(kvm->arch.model.fac_list, 147);
   2817	}
   2818
   2819	if (css_general_characteristics.aiv && test_facility(65))
   2820		set_kvm_facility(kvm->arch.model.fac_mask, 65);
   2821
   2822	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
   2823	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
   2824
   2825	kvm_s390_crypto_init(kvm);
   2826
   2827	mutex_init(&kvm->arch.float_int.ais_lock);
   2828	spin_lock_init(&kvm->arch.float_int.lock);
   2829	for (i = 0; i < FIRQ_LIST_COUNT; i++)
   2830		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
   2831	init_waitqueue_head(&kvm->arch.ipte_wq);
   2832	mutex_init(&kvm->arch.ipte_mutex);
   2833
   2834	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
   2835	VM_EVENT(kvm, 3, "vm created with type %lu", type);
   2836
   2837	if (type & KVM_VM_S390_UCONTROL) {
   2838		kvm->arch.gmap = NULL;
   2839		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
   2840	} else {
   2841		if (sclp.hamax == U64_MAX)
   2842			kvm->arch.mem_limit = TASK_SIZE_MAX;
   2843		else
   2844			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
   2845						    sclp.hamax + 1);
   2846		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
   2847		if (!kvm->arch.gmap)
   2848			goto out_err;
   2849		kvm->arch.gmap->private = kvm;
   2850		kvm->arch.gmap->pfault_enabled = 0;
   2851	}
   2852
   2853	kvm->arch.use_pfmfi = sclp.has_pfmfi;
   2854	kvm->arch.use_skf = sclp.has_skey;
   2855	spin_lock_init(&kvm->arch.start_stop_lock);
   2856	kvm_s390_vsie_init(kvm);
   2857	if (use_gisa)
   2858		kvm_s390_gisa_init(kvm);
   2859	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
   2860
   2861	return 0;
   2862out_err:
   2863	free_page((unsigned long)kvm->arch.sie_page2);
   2864	debug_unregister(kvm->arch.dbf);
   2865	sca_dispose(kvm);
   2866	KVM_EVENT(3, "creation of vm failed: %d", rc);
   2867	return rc;
   2868}
   2869
   2870void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
   2871{
   2872	u16 rc, rrc;
   2873
   2874	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
   2875	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
   2876	kvm_s390_clear_local_irqs(vcpu);
   2877	kvm_clear_async_pf_completion_queue(vcpu);
   2878	if (!kvm_is_ucontrol(vcpu->kvm))
   2879		sca_del_vcpu(vcpu);
   2880
   2881	if (kvm_is_ucontrol(vcpu->kvm))
   2882		gmap_remove(vcpu->arch.gmap);
   2883
   2884	if (vcpu->kvm->arch.use_cmma)
   2885		kvm_s390_vcpu_unsetup_cmma(vcpu);
   2886	/* We can not hold the vcpu mutex here, we are already dying */
   2887	if (kvm_s390_pv_cpu_get_handle(vcpu))
   2888		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
   2889	free_page((unsigned long)(vcpu->arch.sie_block));
   2890}
   2891
   2892void kvm_arch_destroy_vm(struct kvm *kvm)
   2893{
   2894	u16 rc, rrc;
   2895
   2896	kvm_destroy_vcpus(kvm);
   2897	sca_dispose(kvm);
   2898	kvm_s390_gisa_destroy(kvm);
   2899	/*
   2900	 * We are already at the end of life and kvm->lock is not taken.
   2901	 * This is ok as the file descriptor is closed by now and nobody
   2902	 * can mess with the pv state. To avoid lockdep_assert_held from
   2903	 * complaining we do not use kvm_s390_pv_is_protected.
   2904	 */
   2905	if (kvm_s390_pv_get_handle(kvm))
   2906		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
   2907	debug_unregister(kvm->arch.dbf);
   2908	free_page((unsigned long)kvm->arch.sie_page2);
   2909	if (!kvm_is_ucontrol(kvm))
   2910		gmap_remove(kvm->arch.gmap);
   2911	kvm_s390_destroy_adapters(kvm);
   2912	kvm_s390_clear_float_irqs(kvm);
   2913	kvm_s390_vsie_destroy(kvm);
   2914	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
   2915}
   2916
   2917/* Section: vcpu related */
   2918static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
   2919{
   2920	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
   2921	if (!vcpu->arch.gmap)
   2922		return -ENOMEM;
   2923	vcpu->arch.gmap->private = vcpu->kvm;
   2924
   2925	return 0;
   2926}
   2927
   2928static void sca_del_vcpu(struct kvm_vcpu *vcpu)
   2929{
   2930	if (!kvm_s390_use_sca_entries())
   2931		return;
   2932	read_lock(&vcpu->kvm->arch.sca_lock);
   2933	if (vcpu->kvm->arch.use_esca) {
   2934		struct esca_block *sca = vcpu->kvm->arch.sca;
   2935
   2936		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
   2937		sca->cpu[vcpu->vcpu_id].sda = 0;
   2938	} else {
   2939		struct bsca_block *sca = vcpu->kvm->arch.sca;
   2940
   2941		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
   2942		sca->cpu[vcpu->vcpu_id].sda = 0;
   2943	}
   2944	read_unlock(&vcpu->kvm->arch.sca_lock);
   2945}
   2946
   2947static void sca_add_vcpu(struct kvm_vcpu *vcpu)
   2948{
   2949	if (!kvm_s390_use_sca_entries()) {
   2950		struct bsca_block *sca = vcpu->kvm->arch.sca;
   2951
   2952		/* we still need the basic sca for the ipte control */
   2953		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
   2954		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
   2955		return;
   2956	}
   2957	read_lock(&vcpu->kvm->arch.sca_lock);
   2958	if (vcpu->kvm->arch.use_esca) {
   2959		struct esca_block *sca = vcpu->kvm->arch.sca;
   2960
   2961		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
   2962		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
   2963		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
   2964		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
   2965		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
   2966	} else {
   2967		struct bsca_block *sca = vcpu->kvm->arch.sca;
   2968
   2969		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
   2970		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
   2971		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
   2972		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
   2973	}
   2974	read_unlock(&vcpu->kvm->arch.sca_lock);
   2975}
   2976
   2977/* Basic SCA to Extended SCA data copy routines */
   2978static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
   2979{
   2980	d->sda = s->sda;
   2981	d->sigp_ctrl.c = s->sigp_ctrl.c;
   2982	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
   2983}
   2984
   2985static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
   2986{
   2987	int i;
   2988
   2989	d->ipte_control = s->ipte_control;
   2990	d->mcn[0] = s->mcn;
   2991	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
   2992		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
   2993}
   2994
   2995static int sca_switch_to_extended(struct kvm *kvm)
   2996{
   2997	struct bsca_block *old_sca = kvm->arch.sca;
   2998	struct esca_block *new_sca;
   2999	struct kvm_vcpu *vcpu;
   3000	unsigned long vcpu_idx;
   3001	u32 scaol, scaoh;
   3002
   3003	if (kvm->arch.use_esca)
   3004		return 0;
   3005
   3006	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
   3007	if (!new_sca)
   3008		return -ENOMEM;
   3009
   3010	scaoh = (u32)((u64)(new_sca) >> 32);
   3011	scaol = (u32)(u64)(new_sca) & ~0x3fU;
   3012
   3013	kvm_s390_vcpu_block_all(kvm);
   3014	write_lock(&kvm->arch.sca_lock);
   3015
   3016	sca_copy_b_to_e(new_sca, old_sca);
   3017
   3018	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
   3019		vcpu->arch.sie_block->scaoh = scaoh;
   3020		vcpu->arch.sie_block->scaol = scaol;
   3021		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
   3022	}
   3023	kvm->arch.sca = new_sca;
   3024	kvm->arch.use_esca = 1;
   3025
   3026	write_unlock(&kvm->arch.sca_lock);
   3027	kvm_s390_vcpu_unblock_all(kvm);
   3028
   3029	free_page((unsigned long)old_sca);
   3030
   3031	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
   3032		 old_sca, kvm->arch.sca);
   3033	return 0;
   3034}
   3035
   3036static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
   3037{
   3038	int rc;
   3039
   3040	if (!kvm_s390_use_sca_entries()) {
   3041		if (id < KVM_MAX_VCPUS)
   3042			return true;
   3043		return false;
   3044	}
   3045	if (id < KVM_S390_BSCA_CPU_SLOTS)
   3046		return true;
   3047	if (!sclp.has_esca || !sclp.has_64bscao)
   3048		return false;
   3049
   3050	mutex_lock(&kvm->lock);
   3051	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
   3052	mutex_unlock(&kvm->lock);
   3053
   3054	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
   3055}
   3056
   3057/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
   3058static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
   3059{
   3060	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
   3061	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
   3062	vcpu->arch.cputm_start = get_tod_clock_fast();
   3063	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
   3064}
   3065
   3066/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
   3067static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
   3068{
   3069	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
   3070	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
   3071	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
   3072	vcpu->arch.cputm_start = 0;
   3073	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
   3074}
   3075
   3076/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
   3077static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
   3078{
   3079	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
   3080	vcpu->arch.cputm_enabled = true;
   3081	__start_cpu_timer_accounting(vcpu);
   3082}
   3083
   3084/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
   3085static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
   3086{
   3087	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
   3088	__stop_cpu_timer_accounting(vcpu);
   3089	vcpu->arch.cputm_enabled = false;
   3090}
   3091
   3092static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
   3093{
   3094	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
   3095	__enable_cpu_timer_accounting(vcpu);
   3096	preempt_enable();
   3097}
   3098
   3099static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
   3100{
   3101	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
   3102	__disable_cpu_timer_accounting(vcpu);
   3103	preempt_enable();
   3104}
   3105
   3106/* set the cpu timer - may only be called from the VCPU thread itself */
   3107void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
   3108{
   3109	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
   3110	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
   3111	if (vcpu->arch.cputm_enabled)
   3112		vcpu->arch.cputm_start = get_tod_clock_fast();
   3113	vcpu->arch.sie_block->cputm = cputm;
   3114	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
   3115	preempt_enable();
   3116}
   3117
   3118/* update and get the cpu timer - can also be called from other VCPU threads */
   3119__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
   3120{
   3121	unsigned int seq;
   3122	__u64 value;
   3123
   3124	if (unlikely(!vcpu->arch.cputm_enabled))
   3125		return vcpu->arch.sie_block->cputm;
   3126
   3127	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
   3128	do {
   3129		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
   3130		/*
   3131		 * If the writer would ever execute a read in the critical
   3132		 * section, e.g. in irq context, we have a deadlock.
   3133		 */
   3134		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
   3135		value = vcpu->arch.sie_block->cputm;
   3136		/* if cputm_start is 0, accounting is being started/stopped */
   3137		if (likely(vcpu->arch.cputm_start))
   3138			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
   3139	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
   3140	preempt_enable();
   3141	return value;
   3142}
   3143
   3144void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
   3145{
   3146
   3147	gmap_enable(vcpu->arch.enabled_gmap);
   3148	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
   3149	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
   3150		__start_cpu_timer_accounting(vcpu);
   3151	vcpu->cpu = cpu;
   3152}
   3153
   3154void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
   3155{
   3156	vcpu->cpu = -1;
   3157	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
   3158		__stop_cpu_timer_accounting(vcpu);
   3159	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
   3160	vcpu->arch.enabled_gmap = gmap_get_enabled();
   3161	gmap_disable(vcpu->arch.enabled_gmap);
   3162
   3163}
   3164
   3165void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
   3166{
   3167	mutex_lock(&vcpu->kvm->lock);
   3168	preempt_disable();
   3169	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
   3170	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
   3171	preempt_enable();
   3172	mutex_unlock(&vcpu->kvm->lock);
   3173	if (!kvm_is_ucontrol(vcpu->kvm)) {
   3174		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
   3175		sca_add_vcpu(vcpu);
   3176	}
   3177	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
   3178		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
   3179	/* make vcpu_load load the right gmap on the first trigger */
   3180	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
   3181}
   3182
   3183static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
   3184{
   3185	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
   3186	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
   3187		return true;
   3188	return false;
   3189}
   3190
   3191static bool kvm_has_pckmo_ecc(struct kvm *kvm)
   3192{
   3193	/* At least one ECC subfunction must be present */
   3194	return kvm_has_pckmo_subfunc(kvm, 32) ||
   3195	       kvm_has_pckmo_subfunc(kvm, 33) ||
   3196	       kvm_has_pckmo_subfunc(kvm, 34) ||
   3197	       kvm_has_pckmo_subfunc(kvm, 40) ||
   3198	       kvm_has_pckmo_subfunc(kvm, 41);
   3199
   3200}
   3201
   3202static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
   3203{
   3204	/*
   3205	 * If the AP instructions are not being interpreted and the MSAX3
   3206	 * facility is not configured for the guest, there is nothing to set up.
   3207	 */
   3208	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
   3209		return;
   3210
   3211	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
   3212	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
   3213	vcpu->arch.sie_block->eca &= ~ECA_APIE;
   3214	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
   3215
   3216	if (vcpu->kvm->arch.crypto.apie)
   3217		vcpu->arch.sie_block->eca |= ECA_APIE;
   3218
   3219	/* Set up protected key support */
   3220	if (vcpu->kvm->arch.crypto.aes_kw) {
   3221		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
   3222		/* ecc is also wrapped with AES key */
   3223		if (kvm_has_pckmo_ecc(vcpu->kvm))
   3224			vcpu->arch.sie_block->ecd |= ECD_ECC;
   3225	}
   3226
   3227	if (vcpu->kvm->arch.crypto.dea_kw)
   3228		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
   3229}
   3230
   3231void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
   3232{
   3233	free_page(vcpu->arch.sie_block->cbrlo);
   3234	vcpu->arch.sie_block->cbrlo = 0;
   3235}
   3236
   3237int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
   3238{
   3239	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
   3240	if (!vcpu->arch.sie_block->cbrlo)
   3241		return -ENOMEM;
   3242	return 0;
   3243}
   3244
   3245static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
   3246{
   3247	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
   3248
   3249	vcpu->arch.sie_block->ibc = model->ibc;
   3250	if (test_kvm_facility(vcpu->kvm, 7))
   3251		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
   3252}
   3253
   3254static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
   3255{
   3256	int rc = 0;
   3257	u16 uvrc, uvrrc;
   3258
   3259	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
   3260						    CPUSTAT_SM |
   3261						    CPUSTAT_STOPPED);
   3262
   3263	if (test_kvm_facility(vcpu->kvm, 78))
   3264		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
   3265	else if (test_kvm_facility(vcpu->kvm, 8))
   3266		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
   3267
   3268	kvm_s390_vcpu_setup_model(vcpu);
   3269
   3270	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
   3271	if (MACHINE_HAS_ESOP)
   3272		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
   3273	if (test_kvm_facility(vcpu->kvm, 9))
   3274		vcpu->arch.sie_block->ecb |= ECB_SRSI;
   3275	if (test_kvm_facility(vcpu->kvm, 73))
   3276		vcpu->arch.sie_block->ecb |= ECB_TE;
   3277	if (!kvm_is_ucontrol(vcpu->kvm))
   3278		vcpu->arch.sie_block->ecb |= ECB_SPECI;
   3279
   3280	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
   3281		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
   3282	if (test_kvm_facility(vcpu->kvm, 130))
   3283		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
   3284	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
   3285	if (sclp.has_cei)
   3286		vcpu->arch.sie_block->eca |= ECA_CEI;
   3287	if (sclp.has_ib)
   3288		vcpu->arch.sie_block->eca |= ECA_IB;
   3289	if (sclp.has_siif)
   3290		vcpu->arch.sie_block->eca |= ECA_SII;
   3291	if (sclp.has_sigpif)
   3292		vcpu->arch.sie_block->eca |= ECA_SIGPI;
   3293	if (test_kvm_facility(vcpu->kvm, 129)) {
   3294		vcpu->arch.sie_block->eca |= ECA_VX;
   3295		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
   3296	}
   3297	if (test_kvm_facility(vcpu->kvm, 139))
   3298		vcpu->arch.sie_block->ecd |= ECD_MEF;
   3299	if (test_kvm_facility(vcpu->kvm, 156))
   3300		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
   3301	if (vcpu->arch.sie_block->gd) {
   3302		vcpu->arch.sie_block->eca |= ECA_AIV;
   3303		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
   3304			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
   3305	}
   3306	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
   3307					| SDNXC;
   3308	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
   3309
   3310	if (sclp.has_kss)
   3311		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
   3312	else
   3313		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
   3314
   3315	if (vcpu->kvm->arch.use_cmma) {
   3316		rc = kvm_s390_vcpu_setup_cmma(vcpu);
   3317		if (rc)
   3318			return rc;
   3319	}
   3320	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
   3321	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
   3322
   3323	vcpu->arch.sie_block->hpid = HPID_KVM;
   3324
   3325	kvm_s390_vcpu_crypto_setup(vcpu);
   3326
   3327	mutex_lock(&vcpu->kvm->lock);
   3328	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
   3329		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
   3330		if (rc)
   3331			kvm_s390_vcpu_unsetup_cmma(vcpu);
   3332	}
   3333	mutex_unlock(&vcpu->kvm->lock);
   3334
   3335	return rc;
   3336}
   3337
   3338int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
   3339{
   3340	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
   3341		return -EINVAL;
   3342	return 0;
   3343}
   3344
   3345int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
   3346{
   3347	struct sie_page *sie_page;
   3348	int rc;
   3349
   3350	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
   3351	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
   3352	if (!sie_page)
   3353		return -ENOMEM;
   3354
   3355	vcpu->arch.sie_block = &sie_page->sie_block;
   3356	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
   3357
   3358	/* the real guest size will always be smaller than msl */
   3359	vcpu->arch.sie_block->mso = 0;
   3360	vcpu->arch.sie_block->msl = sclp.hamax;
   3361
   3362	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
   3363	spin_lock_init(&vcpu->arch.local_int.lock);
   3364	vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
   3365	seqcount_init(&vcpu->arch.cputm_seqcount);
   3366
   3367	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
   3368	kvm_clear_async_pf_completion_queue(vcpu);
   3369	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
   3370				    KVM_SYNC_GPRS |
   3371				    KVM_SYNC_ACRS |
   3372				    KVM_SYNC_CRS |
   3373				    KVM_SYNC_ARCH0 |
   3374				    KVM_SYNC_PFAULT |
   3375				    KVM_SYNC_DIAG318;
   3376	kvm_s390_set_prefix(vcpu, 0);
   3377	if (test_kvm_facility(vcpu->kvm, 64))
   3378		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
   3379	if (test_kvm_facility(vcpu->kvm, 82))
   3380		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
   3381	if (test_kvm_facility(vcpu->kvm, 133))
   3382		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
   3383	if (test_kvm_facility(vcpu->kvm, 156))
   3384		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
   3385	/* fprs can be synchronized via vrs, even if the guest has no vx. With
   3386	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
   3387	 */
   3388	if (MACHINE_HAS_VX)
   3389		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
   3390	else
   3391		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
   3392
   3393	if (kvm_is_ucontrol(vcpu->kvm)) {
   3394		rc = __kvm_ucontrol_vcpu_init(vcpu);
   3395		if (rc)
   3396			goto out_free_sie_block;
   3397	}
   3398
   3399	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
   3400		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
   3401	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
   3402
   3403	rc = kvm_s390_vcpu_setup(vcpu);
   3404	if (rc)
   3405		goto out_ucontrol_uninit;
   3406	return 0;
   3407
   3408out_ucontrol_uninit:
   3409	if (kvm_is_ucontrol(vcpu->kvm))
   3410		gmap_remove(vcpu->arch.gmap);
   3411out_free_sie_block:
   3412	free_page((unsigned long)(vcpu->arch.sie_block));
   3413	return rc;
   3414}
   3415
   3416int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
   3417{
   3418	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
   3419	return kvm_s390_vcpu_has_irq(vcpu, 0);
   3420}
   3421
   3422bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
   3423{
   3424	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
   3425}
   3426
   3427void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
   3428{
   3429	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
   3430	exit_sie(vcpu);
   3431}
   3432
   3433void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
   3434{
   3435	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
   3436}
   3437
   3438static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
   3439{
   3440	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
   3441	exit_sie(vcpu);
   3442}
   3443
   3444bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
   3445{
   3446	return atomic_read(&vcpu->arch.sie_block->prog20) &
   3447	       (PROG_BLOCK_SIE | PROG_REQUEST);
   3448}
   3449
   3450static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
   3451{
   3452	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
   3453}
   3454
   3455/*
   3456 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
   3457 * If the CPU is not running (e.g. waiting as idle) the function will
   3458 * return immediately. */
   3459void exit_sie(struct kvm_vcpu *vcpu)
   3460{
   3461	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
   3462	kvm_s390_vsie_kick(vcpu);
   3463	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
   3464		cpu_relax();
   3465}
   3466
   3467/* Kick a guest cpu out of SIE to process a request synchronously */
   3468void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
   3469{
   3470	__kvm_make_request(req, vcpu);
   3471	kvm_s390_vcpu_request(vcpu);
   3472}
   3473
   3474static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
   3475			      unsigned long end)
   3476{
   3477	struct kvm *kvm = gmap->private;
   3478	struct kvm_vcpu *vcpu;
   3479	unsigned long prefix;
   3480	unsigned long i;
   3481
   3482	if (gmap_is_shadow(gmap))
   3483		return;
   3484	if (start >= 1UL << 31)
   3485		/* We are only interested in prefix pages */
   3486		return;
   3487	kvm_for_each_vcpu(i, vcpu, kvm) {
   3488		/* match against both prefix pages */
   3489		prefix = kvm_s390_get_prefix(vcpu);
   3490		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
   3491			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
   3492				   start, end);
   3493			kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
   3494		}
   3495	}
   3496}
   3497
   3498bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
   3499{
   3500	/* do not poll with more than halt_poll_max_steal percent of steal time */
   3501	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
   3502	    READ_ONCE(halt_poll_max_steal)) {
   3503		vcpu->stat.halt_no_poll_steal++;
   3504		return true;
   3505	}
   3506	return false;
   3507}
   3508
   3509int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
   3510{
   3511	/* kvm common code refers to this, but never calls it */
   3512	BUG();
   3513	return 0;
   3514}
   3515
   3516static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
   3517					   struct kvm_one_reg *reg)
   3518{
   3519	int r = -EINVAL;
   3520
   3521	switch (reg->id) {
   3522	case KVM_REG_S390_TODPR:
   3523		r = put_user(vcpu->arch.sie_block->todpr,
   3524			     (u32 __user *)reg->addr);
   3525		break;
   3526	case KVM_REG_S390_EPOCHDIFF:
   3527		r = put_user(vcpu->arch.sie_block->epoch,
   3528			     (u64 __user *)reg->addr);
   3529		break;
   3530	case KVM_REG_S390_CPU_TIMER:
   3531		r = put_user(kvm_s390_get_cpu_timer(vcpu),
   3532			     (u64 __user *)reg->addr);
   3533		break;
   3534	case KVM_REG_S390_CLOCK_COMP:
   3535		r = put_user(vcpu->arch.sie_block->ckc,
   3536			     (u64 __user *)reg->addr);
   3537		break;
   3538	case KVM_REG_S390_PFTOKEN:
   3539		r = put_user(vcpu->arch.pfault_token,
   3540			     (u64 __user *)reg->addr);
   3541		break;
   3542	case KVM_REG_S390_PFCOMPARE:
   3543		r = put_user(vcpu->arch.pfault_compare,
   3544			     (u64 __user *)reg->addr);
   3545		break;
   3546	case KVM_REG_S390_PFSELECT:
   3547		r = put_user(vcpu->arch.pfault_select,
   3548			     (u64 __user *)reg->addr);
   3549		break;
   3550	case KVM_REG_S390_PP:
   3551		r = put_user(vcpu->arch.sie_block->pp,
   3552			     (u64 __user *)reg->addr);
   3553		break;
   3554	case KVM_REG_S390_GBEA:
   3555		r = put_user(vcpu->arch.sie_block->gbea,
   3556			     (u64 __user *)reg->addr);
   3557		break;
   3558	default:
   3559		break;
   3560	}
   3561
   3562	return r;
   3563}
   3564
   3565static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
   3566					   struct kvm_one_reg *reg)
   3567{
   3568	int r = -EINVAL;
   3569	__u64 val;
   3570
   3571	switch (reg->id) {
   3572	case KVM_REG_S390_TODPR:
   3573		r = get_user(vcpu->arch.sie_block->todpr,
   3574			     (u32 __user *)reg->addr);
   3575		break;
   3576	case KVM_REG_S390_EPOCHDIFF:
   3577		r = get_user(vcpu->arch.sie_block->epoch,
   3578			     (u64 __user *)reg->addr);
   3579		break;
   3580	case KVM_REG_S390_CPU_TIMER:
   3581		r = get_user(val, (u64 __user *)reg->addr);
   3582		if (!r)
   3583			kvm_s390_set_cpu_timer(vcpu, val);
   3584		break;
   3585	case KVM_REG_S390_CLOCK_COMP:
   3586		r = get_user(vcpu->arch.sie_block->ckc,
   3587			     (u64 __user *)reg->addr);
   3588		break;
   3589	case KVM_REG_S390_PFTOKEN:
   3590		r = get_user(vcpu->arch.pfault_token,
   3591			     (u64 __user *)reg->addr);
   3592		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
   3593			kvm_clear_async_pf_completion_queue(vcpu);
   3594		break;
   3595	case KVM_REG_S390_PFCOMPARE:
   3596		r = get_user(vcpu->arch.pfault_compare,
   3597			     (u64 __user *)reg->addr);
   3598		break;
   3599	case KVM_REG_S390_PFSELECT:
   3600		r = get_user(vcpu->arch.pfault_select,
   3601			     (u64 __user *)reg->addr);
   3602		break;
   3603	case KVM_REG_S390_PP:
   3604		r = get_user(vcpu->arch.sie_block->pp,
   3605			     (u64 __user *)reg->addr);
   3606		break;
   3607	case KVM_REG_S390_GBEA:
   3608		r = get_user(vcpu->arch.sie_block->gbea,
   3609			     (u64 __user *)reg->addr);
   3610		break;
   3611	default:
   3612		break;
   3613	}
   3614
   3615	return r;
   3616}
   3617
   3618static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
   3619{
   3620	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
   3621	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
   3622	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
   3623
   3624	kvm_clear_async_pf_completion_queue(vcpu);
   3625	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
   3626		kvm_s390_vcpu_stop(vcpu);
   3627	kvm_s390_clear_local_irqs(vcpu);
   3628}
   3629
   3630static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
   3631{
   3632	/* Initial reset is a superset of the normal reset */
   3633	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
   3634
   3635	/*
   3636	 * This equals initial cpu reset in pop, but we don't switch to ESA.
   3637	 * We do not only reset the internal data, but also ...
   3638	 */
   3639	vcpu->arch.sie_block->gpsw.mask = 0;
   3640	vcpu->arch.sie_block->gpsw.addr = 0;
   3641	kvm_s390_set_prefix(vcpu, 0);
   3642	kvm_s390_set_cpu_timer(vcpu, 0);
   3643	vcpu->arch.sie_block->ckc = 0;
   3644	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
   3645	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
   3646	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
   3647
   3648	/* ... the data in sync regs */
   3649	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
   3650	vcpu->run->s.regs.ckc = 0;
   3651	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
   3652	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
   3653	vcpu->run->psw_addr = 0;
   3654	vcpu->run->psw_mask = 0;
   3655	vcpu->run->s.regs.todpr = 0;
   3656	vcpu->run->s.regs.cputm = 0;
   3657	vcpu->run->s.regs.ckc = 0;
   3658	vcpu->run->s.regs.pp = 0;
   3659	vcpu->run->s.regs.gbea = 1;
   3660	vcpu->run->s.regs.fpc = 0;
   3661	/*
   3662	 * Do not reset these registers in the protected case, as some of
   3663	 * them are overlayed and they are not accessible in this case
   3664	 * anyway.
   3665	 */
   3666	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
   3667		vcpu->arch.sie_block->gbea = 1;
   3668		vcpu->arch.sie_block->pp = 0;
   3669		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
   3670		vcpu->arch.sie_block->todpr = 0;
   3671	}
   3672}
   3673
   3674static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
   3675{
   3676	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
   3677
   3678	/* Clear reset is a superset of the initial reset */
   3679	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
   3680
   3681	memset(&regs->gprs, 0, sizeof(regs->gprs));
   3682	memset(&regs->vrs, 0, sizeof(regs->vrs));
   3683	memset(&regs->acrs, 0, sizeof(regs->acrs));
   3684	memset(&regs->gscb, 0, sizeof(regs->gscb));
   3685
   3686	regs->etoken = 0;
   3687	regs->etoken_extension = 0;
   3688}
   3689
   3690int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
   3691{
   3692	vcpu_load(vcpu);
   3693	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
   3694	vcpu_put(vcpu);
   3695	return 0;
   3696}
   3697
   3698int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
   3699{
   3700	vcpu_load(vcpu);
   3701	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
   3702	vcpu_put(vcpu);
   3703	return 0;
   3704}
   3705
   3706int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
   3707				  struct kvm_sregs *sregs)
   3708{
   3709	vcpu_load(vcpu);
   3710
   3711	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
   3712	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
   3713
   3714	vcpu_put(vcpu);
   3715	return 0;
   3716}
   3717
   3718int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
   3719				  struct kvm_sregs *sregs)
   3720{
   3721	vcpu_load(vcpu);
   3722
   3723	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
   3724	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
   3725
   3726	vcpu_put(vcpu);
   3727	return 0;
   3728}
   3729
   3730int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
   3731{
   3732	int ret = 0;
   3733
   3734	vcpu_load(vcpu);
   3735
   3736	if (test_fp_ctl(fpu->fpc)) {
   3737		ret = -EINVAL;
   3738		goto out;
   3739	}
   3740	vcpu->run->s.regs.fpc = fpu->fpc;
   3741	if (MACHINE_HAS_VX)
   3742		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
   3743				 (freg_t *) fpu->fprs);
   3744	else
   3745		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
   3746
   3747out:
   3748	vcpu_put(vcpu);
   3749	return ret;
   3750}
   3751
   3752int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
   3753{
   3754	vcpu_load(vcpu);
   3755
   3756	/* make sure we have the latest values */
   3757	save_fpu_regs();
   3758	if (MACHINE_HAS_VX)
   3759		convert_vx_to_fp((freg_t *) fpu->fprs,
   3760				 (__vector128 *) vcpu->run->s.regs.vrs);
   3761	else
   3762		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
   3763	fpu->fpc = vcpu->run->s.regs.fpc;
   3764
   3765	vcpu_put(vcpu);
   3766	return 0;
   3767}
   3768
   3769static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
   3770{
   3771	int rc = 0;
   3772
   3773	if (!is_vcpu_stopped(vcpu))
   3774		rc = -EBUSY;
   3775	else {
   3776		vcpu->run->psw_mask = psw.mask;
   3777		vcpu->run->psw_addr = psw.addr;
   3778	}
   3779	return rc;
   3780}
   3781
   3782int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
   3783				  struct kvm_translation *tr)
   3784{
   3785	return -EINVAL; /* not implemented yet */
   3786}
   3787
   3788#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
   3789			      KVM_GUESTDBG_USE_HW_BP | \
   3790			      KVM_GUESTDBG_ENABLE)
   3791
   3792int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
   3793					struct kvm_guest_debug *dbg)
   3794{
   3795	int rc = 0;
   3796
   3797	vcpu_load(vcpu);
   3798
   3799	vcpu->guest_debug = 0;
   3800	kvm_s390_clear_bp_data(vcpu);
   3801
   3802	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
   3803		rc = -EINVAL;
   3804		goto out;
   3805	}
   3806	if (!sclp.has_gpere) {
   3807		rc = -EINVAL;
   3808		goto out;
   3809	}
   3810
   3811	if (dbg->control & KVM_GUESTDBG_ENABLE) {
   3812		vcpu->guest_debug = dbg->control;
   3813		/* enforce guest PER */
   3814		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
   3815
   3816		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
   3817			rc = kvm_s390_import_bp_data(vcpu, dbg);
   3818	} else {
   3819		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
   3820		vcpu->arch.guestdbg.last_bp = 0;
   3821	}
   3822
   3823	if (rc) {
   3824		vcpu->guest_debug = 0;
   3825		kvm_s390_clear_bp_data(vcpu);
   3826		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
   3827	}
   3828
   3829out:
   3830	vcpu_put(vcpu);
   3831	return rc;
   3832}
   3833
   3834int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
   3835				    struct kvm_mp_state *mp_state)
   3836{
   3837	int ret;
   3838
   3839	vcpu_load(vcpu);
   3840
   3841	/* CHECK_STOP and LOAD are not supported yet */
   3842	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
   3843				      KVM_MP_STATE_OPERATING;
   3844
   3845	vcpu_put(vcpu);
   3846	return ret;
   3847}
   3848
   3849int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
   3850				    struct kvm_mp_state *mp_state)
   3851{
   3852	int rc = 0;
   3853
   3854	vcpu_load(vcpu);
   3855
   3856	/* user space knows about this interface - let it control the state */
   3857	kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
   3858
   3859	switch (mp_state->mp_state) {
   3860	case KVM_MP_STATE_STOPPED:
   3861		rc = kvm_s390_vcpu_stop(vcpu);
   3862		break;
   3863	case KVM_MP_STATE_OPERATING:
   3864		rc = kvm_s390_vcpu_start(vcpu);
   3865		break;
   3866	case KVM_MP_STATE_LOAD:
   3867		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
   3868			rc = -ENXIO;
   3869			break;
   3870		}
   3871		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
   3872		break;
   3873	case KVM_MP_STATE_CHECK_STOP:
   3874		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
   3875	default:
   3876		rc = -ENXIO;
   3877	}
   3878
   3879	vcpu_put(vcpu);
   3880	return rc;
   3881}
   3882
   3883static bool ibs_enabled(struct kvm_vcpu *vcpu)
   3884{
   3885	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
   3886}
   3887
   3888static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
   3889{
   3890retry:
   3891	kvm_s390_vcpu_request_handled(vcpu);
   3892	if (!kvm_request_pending(vcpu))
   3893		return 0;
   3894	/*
   3895	 * If the guest prefix changed, re-arm the ipte notifier for the
   3896	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
   3897	 * This ensures that the ipte instruction for this request has
   3898	 * already finished. We might race against a second unmapper that
   3899	 * wants to set the blocking bit. Lets just retry the request loop.
   3900	 */
   3901	if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
   3902		int rc;
   3903		rc = gmap_mprotect_notify(vcpu->arch.gmap,
   3904					  kvm_s390_get_prefix(vcpu),
   3905					  PAGE_SIZE * 2, PROT_WRITE);
   3906		if (rc) {
   3907			kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
   3908			return rc;
   3909		}
   3910		goto retry;
   3911	}
   3912
   3913	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
   3914		vcpu->arch.sie_block->ihcpu = 0xffff;
   3915		goto retry;
   3916	}
   3917
   3918	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
   3919		if (!ibs_enabled(vcpu)) {
   3920			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
   3921			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
   3922		}
   3923		goto retry;
   3924	}
   3925
   3926	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
   3927		if (ibs_enabled(vcpu)) {
   3928			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
   3929			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
   3930		}
   3931		goto retry;
   3932	}
   3933
   3934	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
   3935		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
   3936		goto retry;
   3937	}
   3938
   3939	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
   3940		/*
   3941		 * Disable CMM virtualization; we will emulate the ESSA
   3942		 * instruction manually, in order to provide additional
   3943		 * functionalities needed for live migration.
   3944		 */
   3945		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
   3946		goto retry;
   3947	}
   3948
   3949	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
   3950		/*
   3951		 * Re-enable CMM virtualization if CMMA is available and
   3952		 * CMM has been used.
   3953		 */
   3954		if ((vcpu->kvm->arch.use_cmma) &&
   3955		    (vcpu->kvm->mm->context.uses_cmm))
   3956			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
   3957		goto retry;
   3958	}
   3959
   3960	/* nothing to do, just clear the request */
   3961	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
   3962	/* we left the vsie handler, nothing to do, just clear the request */
   3963	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
   3964
   3965	return 0;
   3966}
   3967
   3968static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
   3969{
   3970	struct kvm_vcpu *vcpu;
   3971	union tod_clock clk;
   3972	unsigned long i;
   3973
   3974	preempt_disable();
   3975
   3976	store_tod_clock_ext(&clk);
   3977
   3978	kvm->arch.epoch = gtod->tod - clk.tod;
   3979	kvm->arch.epdx = 0;
   3980	if (test_kvm_facility(kvm, 139)) {
   3981		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
   3982		if (kvm->arch.epoch > gtod->tod)
   3983			kvm->arch.epdx -= 1;
   3984	}
   3985
   3986	kvm_s390_vcpu_block_all(kvm);
   3987	kvm_for_each_vcpu(i, vcpu, kvm) {
   3988		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
   3989		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
   3990	}
   3991
   3992	kvm_s390_vcpu_unblock_all(kvm);
   3993	preempt_enable();
   3994}
   3995
   3996void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
   3997{
   3998	mutex_lock(&kvm->lock);
   3999	__kvm_s390_set_tod_clock(kvm, gtod);
   4000	mutex_unlock(&kvm->lock);
   4001}
   4002
   4003int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
   4004{
   4005	if (!mutex_trylock(&kvm->lock))
   4006		return 0;
   4007	__kvm_s390_set_tod_clock(kvm, gtod);
   4008	mutex_unlock(&kvm->lock);
   4009	return 1;
   4010}
   4011
   4012/**
   4013 * kvm_arch_fault_in_page - fault-in guest page if necessary
   4014 * @vcpu: The corresponding virtual cpu
   4015 * @gpa: Guest physical address
   4016 * @writable: Whether the page should be writable or not
   4017 *
   4018 * Make sure that a guest page has been faulted-in on the host.
   4019 *
   4020 * Return: Zero on success, negative error code otherwise.
   4021 */
   4022long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
   4023{
   4024	return gmap_fault(vcpu->arch.gmap, gpa,
   4025			  writable ? FAULT_FLAG_WRITE : 0);
   4026}
   4027
   4028static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
   4029				      unsigned long token)
   4030{
   4031	struct kvm_s390_interrupt inti;
   4032	struct kvm_s390_irq irq;
   4033
   4034	if (start_token) {
   4035		irq.u.ext.ext_params2 = token;
   4036		irq.type = KVM_S390_INT_PFAULT_INIT;
   4037		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
   4038	} else {
   4039		inti.type = KVM_S390_INT_PFAULT_DONE;
   4040		inti.parm64 = token;
   4041		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
   4042	}
   4043}
   4044
   4045bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
   4046				     struct kvm_async_pf *work)
   4047{
   4048	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
   4049	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
   4050
   4051	return true;
   4052}
   4053
   4054void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
   4055				 struct kvm_async_pf *work)
   4056{
   4057	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
   4058	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
   4059}
   4060
   4061void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
   4062			       struct kvm_async_pf *work)
   4063{
   4064	/* s390 will always inject the page directly */
   4065}
   4066
   4067bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
   4068{
   4069	/*
   4070	 * s390 will always inject the page directly,
   4071	 * but we still want check_async_completion to cleanup
   4072	 */
   4073	return true;
   4074}
   4075
   4076static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
   4077{
   4078	hva_t hva;
   4079	struct kvm_arch_async_pf arch;
   4080
   4081	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
   4082		return false;
   4083	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
   4084	    vcpu->arch.pfault_compare)
   4085		return false;
   4086	if (psw_extint_disabled(vcpu))
   4087		return false;
   4088	if (kvm_s390_vcpu_has_irq(vcpu, 0))
   4089		return false;
   4090	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
   4091		return false;
   4092	if (!vcpu->arch.gmap->pfault_enabled)
   4093		return false;
   4094
   4095	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
   4096	hva += current->thread.gmap_addr & ~PAGE_MASK;
   4097	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
   4098		return false;
   4099
   4100	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
   4101}
   4102
   4103static int vcpu_pre_run(struct kvm_vcpu *vcpu)
   4104{
   4105	int rc, cpuflags;
   4106
   4107	/*
   4108	 * On s390 notifications for arriving pages will be delivered directly
   4109	 * to the guest but the house keeping for completed pfaults is
   4110	 * handled outside the worker.
   4111	 */
   4112	kvm_check_async_pf_completion(vcpu);
   4113
   4114	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
   4115	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
   4116
   4117	if (need_resched())
   4118		schedule();
   4119
   4120	if (!kvm_is_ucontrol(vcpu->kvm)) {
   4121		rc = kvm_s390_deliver_pending_interrupts(vcpu);
   4122		if (rc)
   4123			return rc;
   4124	}
   4125
   4126	rc = kvm_s390_handle_requests(vcpu);
   4127	if (rc)
   4128		return rc;
   4129
   4130	if (guestdbg_enabled(vcpu)) {
   4131		kvm_s390_backup_guest_per_regs(vcpu);
   4132		kvm_s390_patch_guest_per_regs(vcpu);
   4133	}
   4134
   4135	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
   4136
   4137	vcpu->arch.sie_block->icptcode = 0;
   4138	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
   4139	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
   4140	trace_kvm_s390_sie_enter(vcpu, cpuflags);
   4141
   4142	return 0;
   4143}
   4144
   4145static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
   4146{
   4147	struct kvm_s390_pgm_info pgm_info = {
   4148		.code = PGM_ADDRESSING,
   4149	};
   4150	u8 opcode, ilen;
   4151	int rc;
   4152
   4153	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
   4154	trace_kvm_s390_sie_fault(vcpu);
   4155
   4156	/*
   4157	 * We want to inject an addressing exception, which is defined as a
   4158	 * suppressing or terminating exception. However, since we came here
   4159	 * by a DAT access exception, the PSW still points to the faulting
   4160	 * instruction since DAT exceptions are nullifying. So we've got
   4161	 * to look up the current opcode to get the length of the instruction
   4162	 * to be able to forward the PSW.
   4163	 */
   4164	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
   4165	ilen = insn_length(opcode);
   4166	if (rc < 0) {
   4167		return rc;
   4168	} else if (rc) {
   4169		/* Instruction-Fetching Exceptions - we can't detect the ilen.
   4170		 * Forward by arbitrary ilc, injection will take care of
   4171		 * nullification if necessary.
   4172		 */
   4173		pgm_info = vcpu->arch.pgm;
   4174		ilen = 4;
   4175	}
   4176	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
   4177	kvm_s390_forward_psw(vcpu, ilen);
   4178	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
   4179}
   4180
   4181static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
   4182{
   4183	struct mcck_volatile_info *mcck_info;
   4184	struct sie_page *sie_page;
   4185
   4186	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
   4187		   vcpu->arch.sie_block->icptcode);
   4188	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
   4189
   4190	if (guestdbg_enabled(vcpu))
   4191		kvm_s390_restore_guest_per_regs(vcpu);
   4192
   4193	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
   4194	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
   4195
   4196	if (exit_reason == -EINTR) {
   4197		VCPU_EVENT(vcpu, 3, "%s", "machine check");
   4198		sie_page = container_of(vcpu->arch.sie_block,
   4199					struct sie_page, sie_block);
   4200		mcck_info = &sie_page->mcck_info;
   4201		kvm_s390_reinject_machine_check(vcpu, mcck_info);
   4202		return 0;
   4203	}
   4204
   4205	if (vcpu->arch.sie_block->icptcode > 0) {
   4206		int rc = kvm_handle_sie_intercept(vcpu);
   4207
   4208		if (rc != -EOPNOTSUPP)
   4209			return rc;
   4210		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
   4211		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
   4212		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
   4213		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
   4214		return -EREMOTE;
   4215	} else if (exit_reason != -EFAULT) {
   4216		vcpu->stat.exit_null++;
   4217		return 0;
   4218	} else if (kvm_is_ucontrol(vcpu->kvm)) {
   4219		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
   4220		vcpu->run->s390_ucontrol.trans_exc_code =
   4221						current->thread.gmap_addr;
   4222		vcpu->run->s390_ucontrol.pgm_code = 0x10;
   4223		return -EREMOTE;
   4224	} else if (current->thread.gmap_pfault) {
   4225		trace_kvm_s390_major_guest_pfault(vcpu);
   4226		current->thread.gmap_pfault = 0;
   4227		if (kvm_arch_setup_async_pf(vcpu))
   4228			return 0;
   4229		vcpu->stat.pfault_sync++;
   4230		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
   4231	}
   4232	return vcpu_post_run_fault_in_sie(vcpu);
   4233}
   4234
   4235#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
   4236static int __vcpu_run(struct kvm_vcpu *vcpu)
   4237{
   4238	int rc, exit_reason;
   4239	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
   4240
   4241	/*
   4242	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
   4243	 * ning the guest), so that memslots (and other stuff) are protected
   4244	 */
   4245	kvm_vcpu_srcu_read_lock(vcpu);
   4246
   4247	do {
   4248		rc = vcpu_pre_run(vcpu);
   4249		if (rc)
   4250			break;
   4251
   4252		kvm_vcpu_srcu_read_unlock(vcpu);
   4253		/*
   4254		 * As PF_VCPU will be used in fault handler, between
   4255		 * guest_enter and guest_exit should be no uaccess.
   4256		 */
   4257		local_irq_disable();
   4258		guest_enter_irqoff();
   4259		__disable_cpu_timer_accounting(vcpu);
   4260		local_irq_enable();
   4261		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
   4262			memcpy(sie_page->pv_grregs,
   4263			       vcpu->run->s.regs.gprs,
   4264			       sizeof(sie_page->pv_grregs));
   4265		}
   4266		if (test_cpu_flag(CIF_FPU))
   4267			load_fpu_regs();
   4268		exit_reason = sie64a(vcpu->arch.sie_block,
   4269				     vcpu->run->s.regs.gprs);
   4270		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
   4271			memcpy(vcpu->run->s.regs.gprs,
   4272			       sie_page->pv_grregs,
   4273			       sizeof(sie_page->pv_grregs));
   4274			/*
   4275			 * We're not allowed to inject interrupts on intercepts
   4276			 * that leave the guest state in an "in-between" state
   4277			 * where the next SIE entry will do a continuation.
   4278			 * Fence interrupts in our "internal" PSW.
   4279			 */
   4280			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
   4281			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
   4282				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
   4283			}
   4284		}
   4285		local_irq_disable();
   4286		__enable_cpu_timer_accounting(vcpu);
   4287		guest_exit_irqoff();
   4288		local_irq_enable();
   4289		kvm_vcpu_srcu_read_lock(vcpu);
   4290
   4291		rc = vcpu_post_run(vcpu, exit_reason);
   4292	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
   4293
   4294	kvm_vcpu_srcu_read_unlock(vcpu);
   4295	return rc;
   4296}
   4297
   4298static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
   4299{
   4300	struct kvm_run *kvm_run = vcpu->run;
   4301	struct runtime_instr_cb *riccb;
   4302	struct gs_cb *gscb;
   4303
   4304	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
   4305	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
   4306	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
   4307	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
   4308	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
   4309		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
   4310		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
   4311		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
   4312	}
   4313	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
   4314		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
   4315		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
   4316		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
   4317		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
   4318			kvm_clear_async_pf_completion_queue(vcpu);
   4319	}
   4320	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
   4321		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
   4322		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
   4323		VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
   4324	}
   4325	/*
   4326	 * If userspace sets the riccb (e.g. after migration) to a valid state,
   4327	 * we should enable RI here instead of doing the lazy enablement.
   4328	 */
   4329	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
   4330	    test_kvm_facility(vcpu->kvm, 64) &&
   4331	    riccb->v &&
   4332	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
   4333		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
   4334		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
   4335	}
   4336	/*
   4337	 * If userspace sets the gscb (e.g. after migration) to non-zero,
   4338	 * we should enable GS here instead of doing the lazy enablement.
   4339	 */
   4340	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
   4341	    test_kvm_facility(vcpu->kvm, 133) &&
   4342	    gscb->gssm &&
   4343	    !vcpu->arch.gs_enabled) {
   4344		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
   4345		vcpu->arch.sie_block->ecb |= ECB_GS;
   4346		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
   4347		vcpu->arch.gs_enabled = 1;
   4348	}
   4349	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
   4350	    test_kvm_facility(vcpu->kvm, 82)) {
   4351		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
   4352		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
   4353	}
   4354	if (MACHINE_HAS_GS) {
   4355		preempt_disable();
   4356		__ctl_set_bit(2, 4);
   4357		if (current->thread.gs_cb) {
   4358			vcpu->arch.host_gscb = current->thread.gs_cb;
   4359			save_gs_cb(vcpu->arch.host_gscb);
   4360		}
   4361		if (vcpu->arch.gs_enabled) {
   4362			current->thread.gs_cb = (struct gs_cb *)
   4363						&vcpu->run->s.regs.gscb;
   4364			restore_gs_cb(current->thread.gs_cb);
   4365		}
   4366		preempt_enable();
   4367	}
   4368	/* SIE will load etoken directly from SDNX and therefore kvm_run */
   4369}
   4370
   4371static void sync_regs(struct kvm_vcpu *vcpu)
   4372{
   4373	struct kvm_run *kvm_run = vcpu->run;
   4374
   4375	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
   4376		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
   4377	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
   4378		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
   4379		/* some control register changes require a tlb flush */
   4380		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
   4381	}
   4382	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
   4383		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
   4384		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
   4385	}
   4386	save_access_regs(vcpu->arch.host_acrs);
   4387	restore_access_regs(vcpu->run->s.regs.acrs);
   4388	/* save host (userspace) fprs/vrs */
   4389	save_fpu_regs();
   4390	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
   4391	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
   4392	if (MACHINE_HAS_VX)
   4393		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
   4394	else
   4395		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
   4396	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
   4397	if (test_fp_ctl(current->thread.fpu.fpc))
   4398		/* User space provided an invalid FPC, let's clear it */
   4399		current->thread.fpu.fpc = 0;
   4400
   4401	/* Sync fmt2 only data */
   4402	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
   4403		sync_regs_fmt2(vcpu);
   4404	} else {
   4405		/*
   4406		 * In several places we have to modify our internal view to
   4407		 * not do things that are disallowed by the ultravisor. For
   4408		 * example we must not inject interrupts after specific exits
   4409		 * (e.g. 112 prefix page not secure). We do this by turning
   4410		 * off the machine check, external and I/O interrupt bits
   4411		 * of our PSW copy. To avoid getting validity intercepts, we
   4412		 * do only accept the condition code from userspace.
   4413		 */
   4414		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
   4415		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
   4416						   PSW_MASK_CC;
   4417	}
   4418
   4419	kvm_run->kvm_dirty_regs = 0;
   4420}
   4421
   4422static void store_regs_fmt2(struct kvm_vcpu *vcpu)
   4423{
   4424	struct kvm_run *kvm_run = vcpu->run;
   4425
   4426	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
   4427	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
   4428	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
   4429	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
   4430	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
   4431	if (MACHINE_HAS_GS) {
   4432		preempt_disable();
   4433		__ctl_set_bit(2, 4);
   4434		if (vcpu->arch.gs_enabled)
   4435			save_gs_cb(current->thread.gs_cb);
   4436		current->thread.gs_cb = vcpu->arch.host_gscb;
   4437		restore_gs_cb(vcpu->arch.host_gscb);
   4438		if (!vcpu->arch.host_gscb)
   4439			__ctl_clear_bit(2, 4);
   4440		vcpu->arch.host_gscb = NULL;
   4441		preempt_enable();
   4442	}
   4443	/* SIE will save etoken directly into SDNX and therefore kvm_run */
   4444}
   4445
   4446static void store_regs(struct kvm_vcpu *vcpu)
   4447{
   4448	struct kvm_run *kvm_run = vcpu->run;
   4449
   4450	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
   4451	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
   4452	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
   4453	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
   4454	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
   4455	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
   4456	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
   4457	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
   4458	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
   4459	save_access_regs(vcpu->run->s.regs.acrs);
   4460	restore_access_regs(vcpu->arch.host_acrs);
   4461	/* Save guest register state */
   4462	save_fpu_regs();
   4463	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
   4464	/* Restore will be done lazily at return */
   4465	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
   4466	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
   4467	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
   4468		store_regs_fmt2(vcpu);
   4469}
   4470
   4471int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
   4472{
   4473	struct kvm_run *kvm_run = vcpu->run;
   4474	int rc;
   4475
   4476	if (kvm_run->immediate_exit)
   4477		return -EINTR;
   4478
   4479	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
   4480	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
   4481		return -EINVAL;
   4482
   4483	vcpu_load(vcpu);
   4484
   4485	if (guestdbg_exit_pending(vcpu)) {
   4486		kvm_s390_prepare_debug_exit(vcpu);
   4487		rc = 0;
   4488		goto out;
   4489	}
   4490
   4491	kvm_sigset_activate(vcpu);
   4492
   4493	/*
   4494	 * no need to check the return value of vcpu_start as it can only have
   4495	 * an error for protvirt, but protvirt means user cpu state
   4496	 */
   4497	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
   4498		kvm_s390_vcpu_start(vcpu);
   4499	} else if (is_vcpu_stopped(vcpu)) {
   4500		pr_err_ratelimited("can't run stopped vcpu %d\n",
   4501				   vcpu->vcpu_id);
   4502		rc = -EINVAL;
   4503		goto out;
   4504	}
   4505
   4506	sync_regs(vcpu);
   4507	enable_cpu_timer_accounting(vcpu);
   4508
   4509	might_fault();
   4510	rc = __vcpu_run(vcpu);
   4511
   4512	if (signal_pending(current) && !rc) {
   4513		kvm_run->exit_reason = KVM_EXIT_INTR;
   4514		rc = -EINTR;
   4515	}
   4516
   4517	if (guestdbg_exit_pending(vcpu) && !rc)  {
   4518		kvm_s390_prepare_debug_exit(vcpu);
   4519		rc = 0;
   4520	}
   4521
   4522	if (rc == -EREMOTE) {
   4523		/* userspace support is needed, kvm_run has been prepared */
   4524		rc = 0;
   4525	}
   4526
   4527	disable_cpu_timer_accounting(vcpu);
   4528	store_regs(vcpu);
   4529
   4530	kvm_sigset_deactivate(vcpu);
   4531
   4532	vcpu->stat.exit_userspace++;
   4533out:
   4534	vcpu_put(vcpu);
   4535	return rc;
   4536}
   4537
   4538/*
   4539 * store status at address
   4540 * we use have two special cases:
   4541 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
   4542 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
   4543 */
   4544int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
   4545{
   4546	unsigned char archmode = 1;
   4547	freg_t fprs[NUM_FPRS];
   4548	unsigned int px;
   4549	u64 clkcomp, cputm;
   4550	int rc;
   4551
   4552	px = kvm_s390_get_prefix(vcpu);
   4553	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
   4554		if (write_guest_abs(vcpu, 163, &archmode, 1))
   4555			return -EFAULT;
   4556		gpa = 0;
   4557	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
   4558		if (write_guest_real(vcpu, 163, &archmode, 1))
   4559			return -EFAULT;
   4560		gpa = px;
   4561	} else
   4562		gpa -= __LC_FPREGS_SAVE_AREA;
   4563
   4564	/* manually convert vector registers if necessary */
   4565	if (MACHINE_HAS_VX) {
   4566		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
   4567		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
   4568				     fprs, 128);
   4569	} else {
   4570		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
   4571				     vcpu->run->s.regs.fprs, 128);
   4572	}
   4573	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
   4574			      vcpu->run->s.regs.gprs, 128);
   4575	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
   4576			      &vcpu->arch.sie_block->gpsw, 16);
   4577	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
   4578			      &px, 4);
   4579	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
   4580			      &vcpu->run->s.regs.fpc, 4);
   4581	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
   4582			      &vcpu->arch.sie_block->todpr, 4);
   4583	cputm = kvm_s390_get_cpu_timer(vcpu);
   4584	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
   4585			      &cputm, 8);
   4586	clkcomp = vcpu->arch.sie_block->ckc >> 8;
   4587	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
   4588			      &clkcomp, 8);
   4589	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
   4590			      &vcpu->run->s.regs.acrs, 64);
   4591	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
   4592			      &vcpu->arch.sie_block->gcr, 128);
   4593	return rc ? -EFAULT : 0;
   4594}
   4595
   4596int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
   4597{
   4598	/*
   4599	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
   4600	 * switch in the run ioctl. Let's update our copies before we save
   4601	 * it into the save area
   4602	 */
   4603	save_fpu_regs();
   4604	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
   4605	save_access_regs(vcpu->run->s.regs.acrs);
   4606
   4607	return kvm_s390_store_status_unloaded(vcpu, addr);
   4608}
   4609
   4610static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
   4611{
   4612	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
   4613	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
   4614}
   4615
   4616static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
   4617{
   4618	unsigned long i;
   4619	struct kvm_vcpu *vcpu;
   4620
   4621	kvm_for_each_vcpu(i, vcpu, kvm) {
   4622		__disable_ibs_on_vcpu(vcpu);
   4623	}
   4624}
   4625
   4626static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
   4627{
   4628	if (!sclp.has_ibs)
   4629		return;
   4630	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
   4631	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
   4632}
   4633
   4634int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
   4635{
   4636	int i, online_vcpus, r = 0, started_vcpus = 0;
   4637
   4638	if (!is_vcpu_stopped(vcpu))
   4639		return 0;
   4640
   4641	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
   4642	/* Only one cpu at a time may enter/leave the STOPPED state. */
   4643	spin_lock(&vcpu->kvm->arch.start_stop_lock);
   4644	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
   4645
   4646	/* Let's tell the UV that we want to change into the operating state */
   4647	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
   4648		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
   4649		if (r) {
   4650			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
   4651			return r;
   4652		}
   4653	}
   4654
   4655	for (i = 0; i < online_vcpus; i++) {
   4656		if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
   4657			started_vcpus++;
   4658	}
   4659
   4660	if (started_vcpus == 0) {
   4661		/* we're the only active VCPU -> speed it up */
   4662		__enable_ibs_on_vcpu(vcpu);
   4663	} else if (started_vcpus == 1) {
   4664		/*
   4665		 * As we are starting a second VCPU, we have to disable
   4666		 * the IBS facility on all VCPUs to remove potentially
   4667		 * outstanding ENABLE requests.
   4668		 */
   4669		__disable_ibs_on_all_vcpus(vcpu->kvm);
   4670	}
   4671
   4672	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
   4673	/*
   4674	 * The real PSW might have changed due to a RESTART interpreted by the
   4675	 * ultravisor. We block all interrupts and let the next sie exit
   4676	 * refresh our view.
   4677	 */
   4678	if (kvm_s390_pv_cpu_is_protected(vcpu))
   4679		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
   4680	/*
   4681	 * Another VCPU might have used IBS while we were offline.
   4682	 * Let's play safe and flush the VCPU at startup.
   4683	 */
   4684	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
   4685	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
   4686	return 0;
   4687}
   4688
   4689int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
   4690{
   4691	int i, online_vcpus, r = 0, started_vcpus = 0;
   4692	struct kvm_vcpu *started_vcpu = NULL;
   4693
   4694	if (is_vcpu_stopped(vcpu))
   4695		return 0;
   4696
   4697	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
   4698	/* Only one cpu at a time may enter/leave the STOPPED state. */
   4699	spin_lock(&vcpu->kvm->arch.start_stop_lock);
   4700	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
   4701
   4702	/* Let's tell the UV that we want to change into the stopped state */
   4703	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
   4704		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
   4705		if (r) {
   4706			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
   4707			return r;
   4708		}
   4709	}
   4710
   4711	/*
   4712	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
   4713	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
   4714	 * have been fully processed. This will ensure that the VCPU
   4715	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
   4716	 */
   4717	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
   4718	kvm_s390_clear_stop_irq(vcpu);
   4719
   4720	__disable_ibs_on_vcpu(vcpu);
   4721
   4722	for (i = 0; i < online_vcpus; i++) {
   4723		struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
   4724
   4725		if (!is_vcpu_stopped(tmp)) {
   4726			started_vcpus++;
   4727			started_vcpu = tmp;
   4728		}
   4729	}
   4730
   4731	if (started_vcpus == 1) {
   4732		/*
   4733		 * As we only have one VCPU left, we want to enable the
   4734		 * IBS facility for that VCPU to speed it up.
   4735		 */
   4736		__enable_ibs_on_vcpu(started_vcpu);
   4737	}
   4738
   4739	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
   4740	return 0;
   4741}
   4742
   4743static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
   4744				     struct kvm_enable_cap *cap)
   4745{
   4746	int r;
   4747
   4748	if (cap->flags)
   4749		return -EINVAL;
   4750
   4751	switch (cap->cap) {
   4752	case KVM_CAP_S390_CSS_SUPPORT:
   4753		if (!vcpu->kvm->arch.css_support) {
   4754			vcpu->kvm->arch.css_support = 1;
   4755			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
   4756			trace_kvm_s390_enable_css(vcpu->kvm);
   4757		}
   4758		r = 0;
   4759		break;
   4760	default:
   4761		r = -EINVAL;
   4762		break;
   4763	}
   4764	return r;
   4765}
   4766
   4767static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
   4768				  struct kvm_s390_mem_op *mop)
   4769{
   4770	void __user *uaddr = (void __user *)mop->buf;
   4771	int r = 0;
   4772
   4773	if (mop->flags || !mop->size)
   4774		return -EINVAL;
   4775	if (mop->size + mop->sida_offset < mop->size)
   4776		return -EINVAL;
   4777	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
   4778		return -E2BIG;
   4779	if (!kvm_s390_pv_cpu_is_protected(vcpu))
   4780		return -EINVAL;
   4781
   4782	switch (mop->op) {
   4783	case KVM_S390_MEMOP_SIDA_READ:
   4784		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
   4785				 mop->sida_offset), mop->size))
   4786			r = -EFAULT;
   4787
   4788		break;
   4789	case KVM_S390_MEMOP_SIDA_WRITE:
   4790		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
   4791				   mop->sida_offset), uaddr, mop->size))
   4792			r = -EFAULT;
   4793		break;
   4794	}
   4795	return r;
   4796}
   4797
   4798static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
   4799				 struct kvm_s390_mem_op *mop)
   4800{
   4801	void __user *uaddr = (void __user *)mop->buf;
   4802	void *tmpbuf = NULL;
   4803	int r = 0;
   4804	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
   4805				    | KVM_S390_MEMOP_F_CHECK_ONLY
   4806				    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
   4807
   4808	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
   4809		return -EINVAL;
   4810	if (mop->size > MEM_OP_MAX_SIZE)
   4811		return -E2BIG;
   4812	if (kvm_s390_pv_cpu_is_protected(vcpu))
   4813		return -EINVAL;
   4814	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
   4815		if (access_key_invalid(mop->key))
   4816			return -EINVAL;
   4817	} else {
   4818		mop->key = 0;
   4819	}
   4820	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
   4821		tmpbuf = vmalloc(mop->size);
   4822		if (!tmpbuf)
   4823			return -ENOMEM;
   4824	}
   4825
   4826	switch (mop->op) {
   4827	case KVM_S390_MEMOP_LOGICAL_READ:
   4828		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
   4829			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
   4830					    GACC_FETCH, mop->key);
   4831			break;
   4832		}
   4833		r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
   4834					mop->size, mop->key);
   4835		if (r == 0) {
   4836			if (copy_to_user(uaddr, tmpbuf, mop->size))
   4837				r = -EFAULT;
   4838		}
   4839		break;
   4840	case KVM_S390_MEMOP_LOGICAL_WRITE:
   4841		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
   4842			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
   4843					    GACC_STORE, mop->key);
   4844			break;
   4845		}
   4846		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
   4847			r = -EFAULT;
   4848			break;
   4849		}
   4850		r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
   4851					 mop->size, mop->key);
   4852		break;
   4853	}
   4854
   4855	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
   4856		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
   4857
   4858	vfree(tmpbuf);
   4859	return r;
   4860}
   4861
   4862static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
   4863				     struct kvm_s390_mem_op *mop)
   4864{
   4865	int r, srcu_idx;
   4866
   4867	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
   4868
   4869	switch (mop->op) {
   4870	case KVM_S390_MEMOP_LOGICAL_READ:
   4871	case KVM_S390_MEMOP_LOGICAL_WRITE:
   4872		r = kvm_s390_vcpu_mem_op(vcpu, mop);
   4873		break;
   4874	case KVM_S390_MEMOP_SIDA_READ:
   4875	case KVM_S390_MEMOP_SIDA_WRITE:
   4876		/* we are locked against sida going away by the vcpu->mutex */
   4877		r = kvm_s390_vcpu_sida_op(vcpu, mop);
   4878		break;
   4879	default:
   4880		r = -EINVAL;
   4881	}
   4882
   4883	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
   4884	return r;
   4885}
   4886
   4887long kvm_arch_vcpu_async_ioctl(struct file *filp,
   4888			       unsigned int ioctl, unsigned long arg)
   4889{
   4890	struct kvm_vcpu *vcpu = filp->private_data;
   4891	void __user *argp = (void __user *)arg;
   4892
   4893	switch (ioctl) {
   4894	case KVM_S390_IRQ: {
   4895		struct kvm_s390_irq s390irq;
   4896
   4897		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
   4898			return -EFAULT;
   4899		return kvm_s390_inject_vcpu(vcpu, &s390irq);
   4900	}
   4901	case KVM_S390_INTERRUPT: {
   4902		struct kvm_s390_interrupt s390int;
   4903		struct kvm_s390_irq s390irq = {};
   4904
   4905		if (copy_from_user(&s390int, argp, sizeof(s390int)))
   4906			return -EFAULT;
   4907		if (s390int_to_s390irq(&s390int, &s390irq))
   4908			return -EINVAL;
   4909		return kvm_s390_inject_vcpu(vcpu, &s390irq);
   4910	}
   4911	}
   4912	return -ENOIOCTLCMD;
   4913}
   4914
   4915long kvm_arch_vcpu_ioctl(struct file *filp,
   4916			 unsigned int ioctl, unsigned long arg)
   4917{
   4918	struct kvm_vcpu *vcpu = filp->private_data;
   4919	void __user *argp = (void __user *)arg;
   4920	int idx;
   4921	long r;
   4922	u16 rc, rrc;
   4923
   4924	vcpu_load(vcpu);
   4925
   4926	switch (ioctl) {
   4927	case KVM_S390_STORE_STATUS:
   4928		idx = srcu_read_lock(&vcpu->kvm->srcu);
   4929		r = kvm_s390_store_status_unloaded(vcpu, arg);
   4930		srcu_read_unlock(&vcpu->kvm->srcu, idx);
   4931		break;
   4932	case KVM_S390_SET_INITIAL_PSW: {
   4933		psw_t psw;
   4934
   4935		r = -EFAULT;
   4936		if (copy_from_user(&psw, argp, sizeof(psw)))
   4937			break;
   4938		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
   4939		break;
   4940	}
   4941	case KVM_S390_CLEAR_RESET:
   4942		r = 0;
   4943		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
   4944		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
   4945			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
   4946					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
   4947			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
   4948				   rc, rrc);
   4949		}
   4950		break;
   4951	case KVM_S390_INITIAL_RESET:
   4952		r = 0;
   4953		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
   4954		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
   4955			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
   4956					  UVC_CMD_CPU_RESET_INITIAL,
   4957					  &rc, &rrc);
   4958			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
   4959				   rc, rrc);
   4960		}
   4961		break;
   4962	case KVM_S390_NORMAL_RESET:
   4963		r = 0;
   4964		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
   4965		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
   4966			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
   4967					  UVC_CMD_CPU_RESET, &rc, &rrc);
   4968			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
   4969				   rc, rrc);
   4970		}
   4971		break;
   4972	case KVM_SET_ONE_REG:
   4973	case KVM_GET_ONE_REG: {
   4974		struct kvm_one_reg reg;
   4975		r = -EINVAL;
   4976		if (kvm_s390_pv_cpu_is_protected(vcpu))
   4977			break;
   4978		r = -EFAULT;
   4979		if (copy_from_user(&reg, argp, sizeof(reg)))
   4980			break;
   4981		if (ioctl == KVM_SET_ONE_REG)
   4982			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
   4983		else
   4984			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
   4985		break;
   4986	}
   4987#ifdef CONFIG_KVM_S390_UCONTROL
   4988	case KVM_S390_UCAS_MAP: {
   4989		struct kvm_s390_ucas_mapping ucasmap;
   4990
   4991		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
   4992			r = -EFAULT;
   4993			break;
   4994		}
   4995
   4996		if (!kvm_is_ucontrol(vcpu->kvm)) {
   4997			r = -EINVAL;
   4998			break;
   4999		}
   5000
   5001		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
   5002				     ucasmap.vcpu_addr, ucasmap.length);
   5003		break;
   5004	}
   5005	case KVM_S390_UCAS_UNMAP: {
   5006		struct kvm_s390_ucas_mapping ucasmap;
   5007
   5008		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
   5009			r = -EFAULT;
   5010			break;
   5011		}
   5012
   5013		if (!kvm_is_ucontrol(vcpu->kvm)) {
   5014			r = -EINVAL;
   5015			break;
   5016		}
   5017
   5018		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
   5019			ucasmap.length);
   5020		break;
   5021	}
   5022#endif
   5023	case KVM_S390_VCPU_FAULT: {
   5024		r = gmap_fault(vcpu->arch.gmap, arg, 0);
   5025		break;
   5026	}
   5027	case KVM_ENABLE_CAP:
   5028	{
   5029		struct kvm_enable_cap cap;
   5030		r = -EFAULT;
   5031		if (copy_from_user(&cap, argp, sizeof(cap)))
   5032			break;
   5033		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
   5034		break;
   5035	}
   5036	case KVM_S390_MEM_OP: {
   5037		struct kvm_s390_mem_op mem_op;
   5038
   5039		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
   5040			r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
   5041		else
   5042			r = -EFAULT;
   5043		break;
   5044	}
   5045	case KVM_S390_SET_IRQ_STATE: {
   5046		struct kvm_s390_irq_state irq_state;
   5047
   5048		r = -EFAULT;
   5049		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
   5050			break;
   5051		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
   5052		    irq_state.len == 0 ||
   5053		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
   5054			r = -EINVAL;
   5055			break;
   5056		}
   5057		/* do not use irq_state.flags, it will break old QEMUs */
   5058		r = kvm_s390_set_irq_state(vcpu,
   5059					   (void __user *) irq_state.buf,
   5060					   irq_state.len);
   5061		break;
   5062	}
   5063	case KVM_S390_GET_IRQ_STATE: {
   5064		struct kvm_s390_irq_state irq_state;
   5065
   5066		r = -EFAULT;
   5067		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
   5068			break;
   5069		if (irq_state.len == 0) {
   5070			r = -EINVAL;
   5071			break;
   5072		}
   5073		/* do not use irq_state.flags, it will break old QEMUs */
   5074		r = kvm_s390_get_irq_state(vcpu,
   5075					   (__u8 __user *)  irq_state.buf,
   5076					   irq_state.len);
   5077		break;
   5078	}
   5079	default:
   5080		r = -ENOTTY;
   5081	}
   5082
   5083	vcpu_put(vcpu);
   5084	return r;
   5085}
   5086
   5087vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
   5088{
   5089#ifdef CONFIG_KVM_S390_UCONTROL
   5090	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
   5091		 && (kvm_is_ucontrol(vcpu->kvm))) {
   5092		vmf->page = virt_to_page(vcpu->arch.sie_block);
   5093		get_page(vmf->page);
   5094		return 0;
   5095	}
   5096#endif
   5097	return VM_FAULT_SIGBUS;
   5098}
   5099
   5100/* Section: memory related */
   5101int kvm_arch_prepare_memory_region(struct kvm *kvm,
   5102				   const struct kvm_memory_slot *old,
   5103				   struct kvm_memory_slot *new,
   5104				   enum kvm_mr_change change)
   5105{
   5106	gpa_t size;
   5107
   5108	/* When we are protected, we should not change the memory slots */
   5109	if (kvm_s390_pv_get_handle(kvm))
   5110		return -EINVAL;
   5111
   5112	if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
   5113		return 0;
   5114
   5115	/* A few sanity checks. We can have memory slots which have to be
   5116	   located/ended at a segment boundary (1MB). The memory in userland is
   5117	   ok to be fragmented into various different vmas. It is okay to mmap()
   5118	   and munmap() stuff in this slot after doing this call at any time */
   5119
   5120	if (new->userspace_addr & 0xffffful)
   5121		return -EINVAL;
   5122
   5123	size = new->npages * PAGE_SIZE;
   5124	if (size & 0xffffful)
   5125		return -EINVAL;
   5126
   5127	if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
   5128		return -EINVAL;
   5129
   5130	return 0;
   5131}
   5132
   5133void kvm_arch_commit_memory_region(struct kvm *kvm,
   5134				struct kvm_memory_slot *old,
   5135				const struct kvm_memory_slot *new,
   5136				enum kvm_mr_change change)
   5137{
   5138	int rc = 0;
   5139
   5140	switch (change) {
   5141	case KVM_MR_DELETE:
   5142		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
   5143					old->npages * PAGE_SIZE);
   5144		break;
   5145	case KVM_MR_MOVE:
   5146		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
   5147					old->npages * PAGE_SIZE);
   5148		if (rc)
   5149			break;
   5150		fallthrough;
   5151	case KVM_MR_CREATE:
   5152		rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
   5153				      new->base_gfn * PAGE_SIZE,
   5154				      new->npages * PAGE_SIZE);
   5155		break;
   5156	case KVM_MR_FLAGS_ONLY:
   5157		break;
   5158	default:
   5159		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
   5160	}
   5161	if (rc)
   5162		pr_warn("failed to commit memory region\n");
   5163	return;
   5164}
   5165
   5166static inline unsigned long nonhyp_mask(int i)
   5167{
   5168	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
   5169
   5170	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
   5171}
   5172
   5173static int __init kvm_s390_init(void)
   5174{
   5175	int i;
   5176
   5177	if (!sclp.has_sief2) {
   5178		pr_info("SIE is not available\n");
   5179		return -ENODEV;
   5180	}
   5181
   5182	if (nested && hpage) {
   5183		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
   5184		return -EINVAL;
   5185	}
   5186
   5187	for (i = 0; i < 16; i++)
   5188		kvm_s390_fac_base[i] |=
   5189			stfle_fac_list[i] & nonhyp_mask(i);
   5190
   5191	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
   5192}
   5193
   5194static void __exit kvm_s390_exit(void)
   5195{
   5196	kvm_exit();
   5197}
   5198
   5199module_init(kvm_s390_init);
   5200module_exit(kvm_s390_exit);
   5201
   5202/*
   5203 * Enable autoloading of the kvm module.
   5204 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
   5205 * since x86 takes a different approach.
   5206 */
   5207#include <linux/miscdevice.h>
   5208MODULE_ALIAS_MISCDEV(KVM_MINOR);
   5209MODULE_ALIAS("devname:kvm");