cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

book3s_pr.c (55238B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
      4 *
      5 * Authors:
      6 *    Alexander Graf <agraf@suse.de>
      7 *    Kevin Wolf <mail@kevin-wolf.de>
      8 *    Paul Mackerras <paulus@samba.org>
      9 *
     10 * Description:
     11 * Functions relating to running KVM on Book 3S processors where
     12 * we don't have access to hypervisor mode, and we run the guest
     13 * in problem state (user mode).
     14 *
     15 * This file is derived from arch/powerpc/kvm/44x.c,
     16 * by Hollis Blanchard <hollisb@us.ibm.com>.
     17 */
     18
     19#include <linux/kvm_host.h>
     20#include <linux/export.h>
     21#include <linux/err.h>
     22#include <linux/slab.h>
     23
     24#include <asm/reg.h>
     25#include <asm/cputable.h>
     26#include <asm/cacheflush.h>
     27#include <linux/uaccess.h>
     28#include <asm/interrupt.h>
     29#include <asm/io.h>
     30#include <asm/kvm_ppc.h>
     31#include <asm/kvm_book3s.h>
     32#include <asm/mmu_context.h>
     33#include <asm/switch_to.h>
     34#include <asm/firmware.h>
     35#include <asm/setup.h>
     36#include <linux/gfp.h>
     37#include <linux/sched.h>
     38#include <linux/vmalloc.h>
     39#include <linux/highmem.h>
     40#include <linux/module.h>
     41#include <linux/miscdevice.h>
     42#include <asm/asm-prototypes.h>
     43#include <asm/tm.h>
     44
     45#include "book3s.h"
     46
     47#define CREATE_TRACE_POINTS
     48#include "trace_pr.h"
     49
     50/* #define EXIT_DEBUG */
     51/* #define DEBUG_EXT */
     52
     53static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
     54			     ulong msr);
     55#ifdef CONFIG_PPC_BOOK3S_64
     56static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac);
     57#endif
     58
     59/* Some compatibility defines */
     60#ifdef CONFIG_PPC_BOOK3S_32
     61#define MSR_USER32 MSR_USER
     62#define MSR_USER64 MSR_USER
     63#define HW_PAGE_SIZE PAGE_SIZE
     64#define HPTE_R_M   _PAGE_COHERENT
     65#endif
     66
     67static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
     68{
     69	ulong msr = kvmppc_get_msr(vcpu);
     70	return (msr & (MSR_IR|MSR_DR)) == MSR_DR;
     71}
     72
     73static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu)
     74{
     75	ulong msr = kvmppc_get_msr(vcpu);
     76	ulong pc = kvmppc_get_pc(vcpu);
     77
     78	/* We are in DR only split real mode */
     79	if ((msr & (MSR_IR|MSR_DR)) != MSR_DR)
     80		return;
     81
     82	/* We have not fixed up the guest already */
     83	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK)
     84		return;
     85
     86	/* The code is in fixupable address space */
     87	if (pc & SPLIT_HACK_MASK)
     88		return;
     89
     90	vcpu->arch.hflags |= BOOK3S_HFLAG_SPLIT_HACK;
     91	kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS);
     92}
     93
     94static void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
     95{
     96	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
     97		ulong pc = kvmppc_get_pc(vcpu);
     98		ulong lr = kvmppc_get_lr(vcpu);
     99		if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
    100			kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
    101		if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
    102			kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
    103		vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
    104	}
    105}
    106
    107static void kvmppc_inject_interrupt_pr(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
    108{
    109	unsigned long msr, pc, new_msr, new_pc;
    110
    111	kvmppc_unfixup_split_real(vcpu);
    112
    113	msr = kvmppc_get_msr(vcpu);
    114	pc = kvmppc_get_pc(vcpu);
    115	new_msr = vcpu->arch.intr_msr;
    116	new_pc = to_book3s(vcpu)->hior + vec;
    117
    118#ifdef CONFIG_PPC_BOOK3S_64
    119	/* If transactional, change to suspend mode on IRQ delivery */
    120	if (MSR_TM_TRANSACTIONAL(msr))
    121		new_msr |= MSR_TS_S;
    122	else
    123		new_msr |= msr & MSR_TS_MASK;
    124#endif
    125
    126	kvmppc_set_srr0(vcpu, pc);
    127	kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
    128	kvmppc_set_pc(vcpu, new_pc);
    129	kvmppc_set_msr(vcpu, new_msr);
    130}
    131
    132static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
    133{
    134#ifdef CONFIG_PPC_BOOK3S_64
    135	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
    136	memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
    137	svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
    138	svcpu->in_use = 0;
    139	svcpu_put(svcpu);
    140
    141	/* Disable AIL if supported */
    142	if (cpu_has_feature(CPU_FTR_HVMODE)) {
    143		if (cpu_has_feature(CPU_FTR_ARCH_207S))
    144			mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL);
    145		if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV))
    146			mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) & ~FSCR_SCV);
    147	}
    148#endif
    149
    150	vcpu->cpu = smp_processor_id();
    151#ifdef CONFIG_PPC_BOOK3S_32
    152	current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu;
    153#endif
    154
    155	if (kvmppc_is_split_real(vcpu))
    156		kvmppc_fixup_split_real(vcpu);
    157
    158	kvmppc_restore_tm_pr(vcpu);
    159}
    160
    161static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
    162{
    163#ifdef CONFIG_PPC_BOOK3S_64
    164	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
    165	if (svcpu->in_use) {
    166		kvmppc_copy_from_svcpu(vcpu);
    167	}
    168	memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
    169	to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
    170	svcpu_put(svcpu);
    171
    172	/* Enable AIL if supported */
    173	if (cpu_has_feature(CPU_FTR_HVMODE)) {
    174		if (cpu_has_feature(CPU_FTR_ARCH_207S))
    175			mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3);
    176		if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV))
    177			mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) | FSCR_SCV);
    178	}
    179#endif
    180
    181	if (kvmppc_is_split_real(vcpu))
    182		kvmppc_unfixup_split_real(vcpu);
    183
    184	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
    185	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
    186	kvmppc_save_tm_pr(vcpu);
    187
    188	vcpu->cpu = -1;
    189}
    190
    191/* Copy data needed by real-mode code from vcpu to shadow vcpu */
    192void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
    193{
    194	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
    195
    196	svcpu->gpr[0] = vcpu->arch.regs.gpr[0];
    197	svcpu->gpr[1] = vcpu->arch.regs.gpr[1];
    198	svcpu->gpr[2] = vcpu->arch.regs.gpr[2];
    199	svcpu->gpr[3] = vcpu->arch.regs.gpr[3];
    200	svcpu->gpr[4] = vcpu->arch.regs.gpr[4];
    201	svcpu->gpr[5] = vcpu->arch.regs.gpr[5];
    202	svcpu->gpr[6] = vcpu->arch.regs.gpr[6];
    203	svcpu->gpr[7] = vcpu->arch.regs.gpr[7];
    204	svcpu->gpr[8] = vcpu->arch.regs.gpr[8];
    205	svcpu->gpr[9] = vcpu->arch.regs.gpr[9];
    206	svcpu->gpr[10] = vcpu->arch.regs.gpr[10];
    207	svcpu->gpr[11] = vcpu->arch.regs.gpr[11];
    208	svcpu->gpr[12] = vcpu->arch.regs.gpr[12];
    209	svcpu->gpr[13] = vcpu->arch.regs.gpr[13];
    210	svcpu->cr  = vcpu->arch.regs.ccr;
    211	svcpu->xer = vcpu->arch.regs.xer;
    212	svcpu->ctr = vcpu->arch.regs.ctr;
    213	svcpu->lr  = vcpu->arch.regs.link;
    214	svcpu->pc  = vcpu->arch.regs.nip;
    215#ifdef CONFIG_PPC_BOOK3S_64
    216	svcpu->shadow_fscr = vcpu->arch.shadow_fscr;
    217#endif
    218	/*
    219	 * Now also save the current time base value. We use this
    220	 * to find the guest purr and spurr value.
    221	 */
    222	vcpu->arch.entry_tb = get_tb();
    223	vcpu->arch.entry_vtb = get_vtb();
    224	if (cpu_has_feature(CPU_FTR_ARCH_207S))
    225		vcpu->arch.entry_ic = mfspr(SPRN_IC);
    226	svcpu->in_use = true;
    227
    228	svcpu_put(svcpu);
    229}
    230
    231static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
    232{
    233	ulong guest_msr = kvmppc_get_msr(vcpu);
    234	ulong smsr = guest_msr;
    235
    236	/* Guest MSR values */
    237#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
    238	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE |
    239		MSR_TM | MSR_TS_MASK;
    240#else
    241	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE;
    242#endif
    243	/* Process MSR values */
    244	smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
    245	/* External providers the guest reserved */
    246	smsr |= (guest_msr & vcpu->arch.guest_owned_ext);
    247	/* 64-bit Process MSR values */
    248#ifdef CONFIG_PPC_BOOK3S_64
    249	smsr |= MSR_HV;
    250#endif
    251#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
    252	/*
    253	 * in guest privileged state, we want to fail all TM transactions.
    254	 * So disable MSR TM bit so that all tbegin. will be able to be
    255	 * trapped into host.
    256	 */
    257	if (!(guest_msr & MSR_PR))
    258		smsr &= ~MSR_TM;
    259#endif
    260	vcpu->arch.shadow_msr = smsr;
    261}
    262
    263/* Copy data touched by real-mode code from shadow vcpu back to vcpu */
    264void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
    265{
    266	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
    267#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
    268	ulong old_msr;
    269#endif
    270
    271	/*
    272	 * Maybe we were already preempted and synced the svcpu from
    273	 * our preempt notifiers. Don't bother touching this svcpu then.
    274	 */
    275	if (!svcpu->in_use)
    276		goto out;
    277
    278	vcpu->arch.regs.gpr[0] = svcpu->gpr[0];
    279	vcpu->arch.regs.gpr[1] = svcpu->gpr[1];
    280	vcpu->arch.regs.gpr[2] = svcpu->gpr[2];
    281	vcpu->arch.regs.gpr[3] = svcpu->gpr[3];
    282	vcpu->arch.regs.gpr[4] = svcpu->gpr[4];
    283	vcpu->arch.regs.gpr[5] = svcpu->gpr[5];
    284	vcpu->arch.regs.gpr[6] = svcpu->gpr[6];
    285	vcpu->arch.regs.gpr[7] = svcpu->gpr[7];
    286	vcpu->arch.regs.gpr[8] = svcpu->gpr[8];
    287	vcpu->arch.regs.gpr[9] = svcpu->gpr[9];
    288	vcpu->arch.regs.gpr[10] = svcpu->gpr[10];
    289	vcpu->arch.regs.gpr[11] = svcpu->gpr[11];
    290	vcpu->arch.regs.gpr[12] = svcpu->gpr[12];
    291	vcpu->arch.regs.gpr[13] = svcpu->gpr[13];
    292	vcpu->arch.regs.ccr  = svcpu->cr;
    293	vcpu->arch.regs.xer = svcpu->xer;
    294	vcpu->arch.regs.ctr = svcpu->ctr;
    295	vcpu->arch.regs.link  = svcpu->lr;
    296	vcpu->arch.regs.nip  = svcpu->pc;
    297	vcpu->arch.shadow_srr1 = svcpu->shadow_srr1;
    298	vcpu->arch.fault_dar   = svcpu->fault_dar;
    299	vcpu->arch.fault_dsisr = svcpu->fault_dsisr;
    300	vcpu->arch.last_inst   = svcpu->last_inst;
    301#ifdef CONFIG_PPC_BOOK3S_64
    302	vcpu->arch.shadow_fscr = svcpu->shadow_fscr;
    303#endif
    304	/*
    305	 * Update purr and spurr using time base on exit.
    306	 */
    307	vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb;
    308	vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb;
    309	to_book3s(vcpu)->vtb += get_vtb() - vcpu->arch.entry_vtb;
    310	if (cpu_has_feature(CPU_FTR_ARCH_207S))
    311		vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic;
    312
    313#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
    314	/*
    315	 * Unlike other MSR bits, MSR[TS]bits can be changed at guest without
    316	 * notifying host:
    317	 *  modified by unprivileged instructions like "tbegin"/"tend"/
    318	 * "tresume"/"tsuspend" in PR KVM guest.
    319	 *
    320	 * It is necessary to sync here to calculate a correct shadow_msr.
    321	 *
    322	 * privileged guest's tbegin will be failed at present. So we
    323	 * only take care of problem state guest.
    324	 */
    325	old_msr = kvmppc_get_msr(vcpu);
    326	if (unlikely((old_msr & MSR_PR) &&
    327		(vcpu->arch.shadow_srr1 & (MSR_TS_MASK)) !=
    328				(old_msr & (MSR_TS_MASK)))) {
    329		old_msr &= ~(MSR_TS_MASK);
    330		old_msr |= (vcpu->arch.shadow_srr1 & (MSR_TS_MASK));
    331		kvmppc_set_msr_fast(vcpu, old_msr);
    332		kvmppc_recalc_shadow_msr(vcpu);
    333	}
    334#endif
    335
    336	svcpu->in_use = false;
    337
    338out:
    339	svcpu_put(svcpu);
    340}
    341
    342#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
    343void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu)
    344{
    345	tm_enable();
    346	vcpu->arch.tfhar = mfspr(SPRN_TFHAR);
    347	vcpu->arch.texasr = mfspr(SPRN_TEXASR);
    348	vcpu->arch.tfiar = mfspr(SPRN_TFIAR);
    349	tm_disable();
    350}
    351
    352void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu)
    353{
    354	tm_enable();
    355	mtspr(SPRN_TFHAR, vcpu->arch.tfhar);
    356	mtspr(SPRN_TEXASR, vcpu->arch.texasr);
    357	mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
    358	tm_disable();
    359}
    360
    361/* loadup math bits which is enabled at kvmppc_get_msr() but not enabled at
    362 * hardware.
    363 */
    364static void kvmppc_handle_lost_math_exts(struct kvm_vcpu *vcpu)
    365{
    366	ulong exit_nr;
    367	ulong ext_diff = (kvmppc_get_msr(vcpu) & ~vcpu->arch.guest_owned_ext) &
    368		(MSR_FP | MSR_VEC | MSR_VSX);
    369
    370	if (!ext_diff)
    371		return;
    372
    373	if (ext_diff == MSR_FP)
    374		exit_nr = BOOK3S_INTERRUPT_FP_UNAVAIL;
    375	else if (ext_diff == MSR_VEC)
    376		exit_nr = BOOK3S_INTERRUPT_ALTIVEC;
    377	else
    378		exit_nr = BOOK3S_INTERRUPT_VSX;
    379
    380	kvmppc_handle_ext(vcpu, exit_nr, ext_diff);
    381}
    382
    383void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu)
    384{
    385	if (!(MSR_TM_ACTIVE(kvmppc_get_msr(vcpu)))) {
    386		kvmppc_save_tm_sprs(vcpu);
    387		return;
    388	}
    389
    390	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
    391	kvmppc_giveup_ext(vcpu, MSR_VSX);
    392
    393	preempt_disable();
    394	_kvmppc_save_tm_pr(vcpu, mfmsr());
    395	preempt_enable();
    396}
    397
    398void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu)
    399{
    400	if (!MSR_TM_ACTIVE(kvmppc_get_msr(vcpu))) {
    401		kvmppc_restore_tm_sprs(vcpu);
    402		if (kvmppc_get_msr(vcpu) & MSR_TM) {
    403			kvmppc_handle_lost_math_exts(vcpu);
    404			if (vcpu->arch.fscr & FSCR_TAR)
    405				kvmppc_handle_fac(vcpu, FSCR_TAR_LG);
    406		}
    407		return;
    408	}
    409
    410	preempt_disable();
    411	_kvmppc_restore_tm_pr(vcpu, kvmppc_get_msr(vcpu));
    412	preempt_enable();
    413
    414	if (kvmppc_get_msr(vcpu) & MSR_TM) {
    415		kvmppc_handle_lost_math_exts(vcpu);
    416		if (vcpu->arch.fscr & FSCR_TAR)
    417			kvmppc_handle_fac(vcpu, FSCR_TAR_LG);
    418	}
    419}
    420#endif
    421
    422static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
    423{
    424	int r = 1; /* Indicate we want to get back into the guest */
    425
    426	/* We misuse TLB_FLUSH to indicate that we want to clear
    427	   all shadow cache entries */
    428	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
    429		kvmppc_mmu_pte_flush(vcpu, 0, 0);
    430
    431	return r;
    432}
    433
    434/************* MMU Notifiers *************/
    435static bool do_kvm_unmap_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
    436{
    437	unsigned long i;
    438	struct kvm_vcpu *vcpu;
    439
    440	kvm_for_each_vcpu(i, vcpu, kvm)
    441		kvmppc_mmu_pte_pflush(vcpu, range->start << PAGE_SHIFT,
    442				      range->end << PAGE_SHIFT);
    443
    444	return false;
    445}
    446
    447static bool kvm_unmap_gfn_range_pr(struct kvm *kvm, struct kvm_gfn_range *range)
    448{
    449	return do_kvm_unmap_gfn(kvm, range);
    450}
    451
    452static bool kvm_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
    453{
    454	/* XXX could be more clever ;) */
    455	return false;
    456}
    457
    458static bool kvm_test_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
    459{
    460	/* XXX could be more clever ;) */
    461	return false;
    462}
    463
    464static bool kvm_set_spte_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
    465{
    466	/* The page will get remapped properly on its next fault */
    467	return do_kvm_unmap_gfn(kvm, range);
    468}
    469
    470/*****************************************/
    471
    472static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
    473{
    474	ulong old_msr;
    475
    476	/* For PAPR guest, make sure MSR reflects guest mode */
    477	if (vcpu->arch.papr_enabled)
    478		msr = (msr & ~MSR_HV) | MSR_ME;
    479
    480#ifdef EXIT_DEBUG
    481	printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
    482#endif
    483
    484#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
    485	/* We should never target guest MSR to TS=10 && PR=0,
    486	 * since we always fail transaction for guest privilege
    487	 * state.
    488	 */
    489	if (!(msr & MSR_PR) && MSR_TM_TRANSACTIONAL(msr))
    490		kvmppc_emulate_tabort(vcpu,
    491			TM_CAUSE_KVM_FAC_UNAV | TM_CAUSE_PERSISTENT);
    492#endif
    493
    494	old_msr = kvmppc_get_msr(vcpu);
    495	msr &= to_book3s(vcpu)->msr_mask;
    496	kvmppc_set_msr_fast(vcpu, msr);
    497	kvmppc_recalc_shadow_msr(vcpu);
    498
    499	if (msr & MSR_POW) {
    500		if (!vcpu->arch.pending_exceptions) {
    501			kvm_vcpu_halt(vcpu);
    502			kvm_clear_request(KVM_REQ_UNHALT, vcpu);
    503			vcpu->stat.generic.halt_wakeup++;
    504
    505			/* Unset POW bit after we woke up */
    506			msr &= ~MSR_POW;
    507			kvmppc_set_msr_fast(vcpu, msr);
    508		}
    509	}
    510
    511	if (kvmppc_is_split_real(vcpu))
    512		kvmppc_fixup_split_real(vcpu);
    513	else
    514		kvmppc_unfixup_split_real(vcpu);
    515
    516	if ((kvmppc_get_msr(vcpu) & (MSR_PR|MSR_IR|MSR_DR)) !=
    517		   (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
    518		kvmppc_mmu_flush_segments(vcpu);
    519		kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
    520
    521		/* Preload magic page segment when in kernel mode */
    522		if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) {
    523			struct kvm_vcpu_arch *a = &vcpu->arch;
    524
    525			if (msr & MSR_DR)
    526				kvmppc_mmu_map_segment(vcpu, a->magic_page_ea);
    527			else
    528				kvmppc_mmu_map_segment(vcpu, a->magic_page_pa);
    529		}
    530	}
    531
    532	/*
    533	 * When switching from 32 to 64-bit, we may have a stale 32-bit
    534	 * magic page around, we need to flush it. Typically 32-bit magic
    535	 * page will be instantiated when calling into RTAS. Note: We
    536	 * assume that such transition only happens while in kernel mode,
    537	 * ie, we never transition from user 32-bit to kernel 64-bit with
    538	 * a 32-bit magic page around.
    539	 */
    540	if (vcpu->arch.magic_page_pa &&
    541	    !(old_msr & MSR_PR) && !(old_msr & MSR_SF) && (msr & MSR_SF)) {
    542		/* going from RTAS to normal kernel code */
    543		kvmppc_mmu_pte_flush(vcpu, (uint32_t)vcpu->arch.magic_page_pa,
    544				     ~0xFFFUL);
    545	}
    546
    547	/* Preload FPU if it's enabled */
    548	if (kvmppc_get_msr(vcpu) & MSR_FP)
    549		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
    550
    551#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
    552	if (kvmppc_get_msr(vcpu) & MSR_TM)
    553		kvmppc_handle_lost_math_exts(vcpu);
    554#endif
    555}
    556
    557static void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
    558{
    559	u32 host_pvr;
    560
    561	vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
    562	vcpu->arch.pvr = pvr;
    563#ifdef CONFIG_PPC_BOOK3S_64
    564	if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
    565		kvmppc_mmu_book3s_64_init(vcpu);
    566		if (!to_book3s(vcpu)->hior_explicit)
    567			to_book3s(vcpu)->hior = 0xfff00000;
    568		to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
    569		vcpu->arch.cpu_type = KVM_CPU_3S_64;
    570	} else
    571#endif
    572	{
    573		kvmppc_mmu_book3s_32_init(vcpu);
    574		if (!to_book3s(vcpu)->hior_explicit)
    575			to_book3s(vcpu)->hior = 0;
    576		to_book3s(vcpu)->msr_mask = 0xffffffffULL;
    577		vcpu->arch.cpu_type = KVM_CPU_3S_32;
    578	}
    579
    580	kvmppc_sanity_check(vcpu);
    581
    582	/* If we are in hypervisor level on 970, we can tell the CPU to
    583	 * treat DCBZ as 32 bytes store */
    584	vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
    585	if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) &&
    586	    !strcmp(cur_cpu_spec->platform, "ppc970"))
    587		vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
    588
    589	/* Cell performs badly if MSR_FEx are set. So let's hope nobody
    590	   really needs them in a VM on Cell and force disable them. */
    591	if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
    592		to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
    593
    594	/*
    595	 * If they're asking for POWER6 or later, set the flag
    596	 * indicating that we can do multiple large page sizes
    597	 * and 1TB segments.
    598	 * Also set the flag that indicates that tlbie has the large
    599	 * page bit in the RB operand instead of the instruction.
    600	 */
    601	switch (PVR_VER(pvr)) {
    602	case PVR_POWER6:
    603	case PVR_POWER7:
    604	case PVR_POWER7p:
    605	case PVR_POWER8:
    606	case PVR_POWER8E:
    607	case PVR_POWER8NVL:
    608	case PVR_POWER9:
    609		vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
    610			BOOK3S_HFLAG_NEW_TLBIE;
    611		break;
    612	}
    613
    614#ifdef CONFIG_PPC_BOOK3S_32
    615	/* 32 bit Book3S always has 32 byte dcbz */
    616	vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
    617#endif
    618
    619	/* On some CPUs we can execute paired single operations natively */
    620	asm ( "mfpvr %0" : "=r"(host_pvr));
    621	switch (host_pvr) {
    622	case 0x00080200:	/* lonestar 2.0 */
    623	case 0x00088202:	/* lonestar 2.2 */
    624	case 0x70000100:	/* gekko 1.0 */
    625	case 0x00080100:	/* gekko 2.0 */
    626	case 0x00083203:	/* gekko 2.3a */
    627	case 0x00083213:	/* gekko 2.3b */
    628	case 0x00083204:	/* gekko 2.4 */
    629	case 0x00083214:	/* gekko 2.4e (8SE) - retail HW2 */
    630	case 0x00087200:	/* broadway */
    631		vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS;
    632		/* Enable HID2.PSE - in case we need it later */
    633		mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29));
    634	}
    635}
    636
    637/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
    638 * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
    639 * emulate 32 bytes dcbz length.
    640 *
    641 * The Book3s_64 inventors also realized this case and implemented a special bit
    642 * in the HID5 register, which is a hypervisor ressource. Thus we can't use it.
    643 *
    644 * My approach here is to patch the dcbz instruction on executing pages.
    645 */
    646static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
    647{
    648	struct page *hpage;
    649	u64 hpage_offset;
    650	u32 *page;
    651	int i;
    652
    653	hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
    654	if (is_error_page(hpage))
    655		return;
    656
    657	hpage_offset = pte->raddr & ~PAGE_MASK;
    658	hpage_offset &= ~0xFFFULL;
    659	hpage_offset /= 4;
    660
    661	get_page(hpage);
    662	page = kmap_atomic(hpage);
    663
    664	/* patch dcbz into reserved instruction, so we trap */
    665	for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
    666		if ((be32_to_cpu(page[i]) & 0xff0007ff) == INS_DCBZ)
    667			page[i] &= cpu_to_be32(0xfffffff7);
    668
    669	kunmap_atomic(page);
    670	put_page(hpage);
    671}
    672
    673static bool kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
    674{
    675	ulong mp_pa = vcpu->arch.magic_page_pa;
    676
    677	if (!(kvmppc_get_msr(vcpu) & MSR_SF))
    678		mp_pa = (uint32_t)mp_pa;
    679
    680	gpa &= ~0xFFFULL;
    681	if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) {
    682		return true;
    683	}
    684
    685	return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT);
    686}
    687
    688static int kvmppc_handle_pagefault(struct kvm_vcpu *vcpu,
    689			    ulong eaddr, int vec)
    690{
    691	bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
    692	bool iswrite = false;
    693	int r = RESUME_GUEST;
    694	int relocated;
    695	int page_found = 0;
    696	struct kvmppc_pte pte = { 0 };
    697	bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false;
    698	bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false;
    699	u64 vsid;
    700
    701	relocated = data ? dr : ir;
    702	if (data && (vcpu->arch.fault_dsisr & DSISR_ISSTORE))
    703		iswrite = true;
    704
    705	/* Resolve real address if translation turned on */
    706	if (relocated) {
    707		page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data, iswrite);
    708	} else {
    709		pte.may_execute = true;
    710		pte.may_read = true;
    711		pte.may_write = true;
    712		pte.raddr = eaddr & KVM_PAM;
    713		pte.eaddr = eaddr;
    714		pte.vpage = eaddr >> 12;
    715		pte.page_size = MMU_PAGE_64K;
    716		pte.wimg = HPTE_R_M;
    717	}
    718
    719	switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
    720	case 0:
    721		pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
    722		break;
    723	case MSR_DR:
    724		if (!data &&
    725		    (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) &&
    726		    ((pte.raddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS))
    727			pte.raddr &= ~SPLIT_HACK_MASK;
    728		fallthrough;
    729	case MSR_IR:
    730		vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
    731
    732		if ((kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) == MSR_DR)
    733			pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
    734		else
    735			pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
    736		pte.vpage |= vsid;
    737
    738		if (vsid == -1)
    739			page_found = -EINVAL;
    740		break;
    741	}
    742
    743	if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
    744	   (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
    745		/*
    746		 * If we do the dcbz hack, we have to NX on every execution,
    747		 * so we can patch the executing code. This renders our guest
    748		 * NX-less.
    749		 */
    750		pte.may_execute = !data;
    751	}
    752
    753	if (page_found == -ENOENT || page_found == -EPERM) {
    754		/* Page not found in guest PTE entries, or protection fault */
    755		u64 flags;
    756
    757		if (page_found == -EPERM)
    758			flags = DSISR_PROTFAULT;
    759		else
    760			flags = DSISR_NOHPTE;
    761		if (data) {
    762			flags |= vcpu->arch.fault_dsisr & DSISR_ISSTORE;
    763			kvmppc_core_queue_data_storage(vcpu, eaddr, flags);
    764		} else {
    765			kvmppc_core_queue_inst_storage(vcpu, flags);
    766		}
    767	} else if (page_found == -EINVAL) {
    768		/* Page not found in guest SLB */
    769		kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
    770		kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
    771	} else if (kvmppc_visible_gpa(vcpu, pte.raddr)) {
    772		if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
    773			/*
    774			 * There is already a host HPTE there, presumably
    775			 * a read-only one for a page the guest thinks
    776			 * is writable, so get rid of it first.
    777			 */
    778			kvmppc_mmu_unmap_page(vcpu, &pte);
    779		}
    780		/* The guest's PTE is not mapped yet. Map on the host */
    781		if (kvmppc_mmu_map_page(vcpu, &pte, iswrite) == -EIO) {
    782			/* Exit KVM if mapping failed */
    783			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
    784			return RESUME_HOST;
    785		}
    786		if (data)
    787			vcpu->stat.sp_storage++;
    788		else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
    789			 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
    790			kvmppc_patch_dcbz(vcpu, &pte);
    791	} else {
    792		/* MMIO */
    793		vcpu->stat.mmio_exits++;
    794		vcpu->arch.paddr_accessed = pte.raddr;
    795		vcpu->arch.vaddr_accessed = pte.eaddr;
    796		r = kvmppc_emulate_mmio(vcpu);
    797		if ( r == RESUME_HOST_NV )
    798			r = RESUME_HOST;
    799	}
    800
    801	return r;
    802}
    803
    804/* Give up external provider (FPU, Altivec, VSX) */
    805void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
    806{
    807	struct thread_struct *t = &current->thread;
    808
    809	/*
    810	 * VSX instructions can access FP and vector registers, so if
    811	 * we are giving up VSX, make sure we give up FP and VMX as well.
    812	 */
    813	if (msr & MSR_VSX)
    814		msr |= MSR_FP | MSR_VEC;
    815
    816	msr &= vcpu->arch.guest_owned_ext;
    817	if (!msr)
    818		return;
    819
    820#ifdef DEBUG_EXT
    821	printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
    822#endif
    823
    824	if (msr & MSR_FP) {
    825		/*
    826		 * Note that on CPUs with VSX, giveup_fpu stores
    827		 * both the traditional FP registers and the added VSX
    828		 * registers into thread.fp_state.fpr[].
    829		 */
    830		if (t->regs->msr & MSR_FP)
    831			giveup_fpu(current);
    832		t->fp_save_area = NULL;
    833	}
    834
    835#ifdef CONFIG_ALTIVEC
    836	if (msr & MSR_VEC) {
    837		if (current->thread.regs->msr & MSR_VEC)
    838			giveup_altivec(current);
    839		t->vr_save_area = NULL;
    840	}
    841#endif
    842
    843	vcpu->arch.guest_owned_ext &= ~(msr | MSR_VSX);
    844	kvmppc_recalc_shadow_msr(vcpu);
    845}
    846
    847/* Give up facility (TAR / EBB / DSCR) */
    848void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac)
    849{
    850#ifdef CONFIG_PPC_BOOK3S_64
    851	if (!(vcpu->arch.shadow_fscr & (1ULL << fac))) {
    852		/* Facility not available to the guest, ignore giveup request*/
    853		return;
    854	}
    855
    856	switch (fac) {
    857	case FSCR_TAR_LG:
    858		vcpu->arch.tar = mfspr(SPRN_TAR);
    859		mtspr(SPRN_TAR, current->thread.tar);
    860		vcpu->arch.shadow_fscr &= ~FSCR_TAR;
    861		break;
    862	}
    863#endif
    864}
    865
    866/* Handle external providers (FPU, Altivec, VSX) */
    867static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
    868			     ulong msr)
    869{
    870	struct thread_struct *t = &current->thread;
    871
    872	/* When we have paired singles, we emulate in software */
    873	if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
    874		return RESUME_GUEST;
    875
    876	if (!(kvmppc_get_msr(vcpu) & msr)) {
    877		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
    878		return RESUME_GUEST;
    879	}
    880
    881	if (msr == MSR_VSX) {
    882		/* No VSX?  Give an illegal instruction interrupt */
    883#ifdef CONFIG_VSX
    884		if (!cpu_has_feature(CPU_FTR_VSX))
    885#endif
    886		{
    887			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
    888			return RESUME_GUEST;
    889		}
    890
    891		/*
    892		 * We have to load up all the FP and VMX registers before
    893		 * we can let the guest use VSX instructions.
    894		 */
    895		msr = MSR_FP | MSR_VEC | MSR_VSX;
    896	}
    897
    898	/* See if we already own all the ext(s) needed */
    899	msr &= ~vcpu->arch.guest_owned_ext;
    900	if (!msr)
    901		return RESUME_GUEST;
    902
    903#ifdef DEBUG_EXT
    904	printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
    905#endif
    906
    907	if (msr & MSR_FP) {
    908		preempt_disable();
    909		enable_kernel_fp();
    910		load_fp_state(&vcpu->arch.fp);
    911		disable_kernel_fp();
    912		t->fp_save_area = &vcpu->arch.fp;
    913		preempt_enable();
    914	}
    915
    916	if (msr & MSR_VEC) {
    917#ifdef CONFIG_ALTIVEC
    918		preempt_disable();
    919		enable_kernel_altivec();
    920		load_vr_state(&vcpu->arch.vr);
    921		disable_kernel_altivec();
    922		t->vr_save_area = &vcpu->arch.vr;
    923		preempt_enable();
    924#endif
    925	}
    926
    927	t->regs->msr |= msr;
    928	vcpu->arch.guest_owned_ext |= msr;
    929	kvmppc_recalc_shadow_msr(vcpu);
    930
    931	return RESUME_GUEST;
    932}
    933
    934/*
    935 * Kernel code using FP or VMX could have flushed guest state to
    936 * the thread_struct; if so, get it back now.
    937 */
    938static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
    939{
    940	unsigned long lost_ext;
    941
    942	lost_ext = vcpu->arch.guest_owned_ext & ~current->thread.regs->msr;
    943	if (!lost_ext)
    944		return;
    945
    946	if (lost_ext & MSR_FP) {
    947		preempt_disable();
    948		enable_kernel_fp();
    949		load_fp_state(&vcpu->arch.fp);
    950		disable_kernel_fp();
    951		preempt_enable();
    952	}
    953#ifdef CONFIG_ALTIVEC
    954	if (lost_ext & MSR_VEC) {
    955		preempt_disable();
    956		enable_kernel_altivec();
    957		load_vr_state(&vcpu->arch.vr);
    958		disable_kernel_altivec();
    959		preempt_enable();
    960	}
    961#endif
    962	current->thread.regs->msr |= lost_ext;
    963}
    964
    965#ifdef CONFIG_PPC_BOOK3S_64
    966
    967void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac)
    968{
    969	/* Inject the Interrupt Cause field and trigger a guest interrupt */
    970	vcpu->arch.fscr &= ~(0xffULL << 56);
    971	vcpu->arch.fscr |= (fac << 56);
    972	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
    973}
    974
    975static void kvmppc_emulate_fac(struct kvm_vcpu *vcpu, ulong fac)
    976{
    977	enum emulation_result er = EMULATE_FAIL;
    978
    979	if (!(kvmppc_get_msr(vcpu) & MSR_PR))
    980		er = kvmppc_emulate_instruction(vcpu);
    981
    982	if ((er != EMULATE_DONE) && (er != EMULATE_AGAIN)) {
    983		/* Couldn't emulate, trigger interrupt in guest */
    984		kvmppc_trigger_fac_interrupt(vcpu, fac);
    985	}
    986}
    987
    988/* Enable facilities (TAR, EBB, DSCR) for the guest */
    989static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac)
    990{
    991	bool guest_fac_enabled;
    992	BUG_ON(!cpu_has_feature(CPU_FTR_ARCH_207S));
    993
    994	/*
    995	 * Not every facility is enabled by FSCR bits, check whether the
    996	 * guest has this facility enabled at all.
    997	 */
    998	switch (fac) {
    999	case FSCR_TAR_LG:
   1000	case FSCR_EBB_LG:
   1001		guest_fac_enabled = (vcpu->arch.fscr & (1ULL << fac));
   1002		break;
   1003	case FSCR_TM_LG:
   1004		guest_fac_enabled = kvmppc_get_msr(vcpu) & MSR_TM;
   1005		break;
   1006	default:
   1007		guest_fac_enabled = false;
   1008		break;
   1009	}
   1010
   1011	if (!guest_fac_enabled) {
   1012		/* Facility not enabled by the guest */
   1013		kvmppc_trigger_fac_interrupt(vcpu, fac);
   1014		return RESUME_GUEST;
   1015	}
   1016
   1017	switch (fac) {
   1018	case FSCR_TAR_LG:
   1019		/* TAR switching isn't lazy in Linux yet */
   1020		current->thread.tar = mfspr(SPRN_TAR);
   1021		mtspr(SPRN_TAR, vcpu->arch.tar);
   1022		vcpu->arch.shadow_fscr |= FSCR_TAR;
   1023		break;
   1024	default:
   1025		kvmppc_emulate_fac(vcpu, fac);
   1026		break;
   1027	}
   1028
   1029#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
   1030	/* Since we disabled MSR_TM at privilege state, the mfspr instruction
   1031	 * for TM spr can trigger TM fac unavailable. In this case, the
   1032	 * emulation is handled by kvmppc_emulate_fac(), which invokes
   1033	 * kvmppc_emulate_mfspr() finally. But note the mfspr can include
   1034	 * RT for NV registers. So it need to restore those NV reg to reflect
   1035	 * the update.
   1036	 */
   1037	if ((fac == FSCR_TM_LG) && !(kvmppc_get_msr(vcpu) & MSR_PR))
   1038		return RESUME_GUEST_NV;
   1039#endif
   1040
   1041	return RESUME_GUEST;
   1042}
   1043
   1044void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr)
   1045{
   1046	if (fscr & FSCR_SCV)
   1047		fscr &= ~FSCR_SCV; /* SCV must not be enabled */
   1048	if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) {
   1049		/* TAR got dropped, drop it in shadow too */
   1050		kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
   1051	} else if (!(vcpu->arch.fscr & FSCR_TAR) && (fscr & FSCR_TAR)) {
   1052		vcpu->arch.fscr = fscr;
   1053		kvmppc_handle_fac(vcpu, FSCR_TAR_LG);
   1054		return;
   1055	}
   1056
   1057	vcpu->arch.fscr = fscr;
   1058}
   1059#endif
   1060
   1061static void kvmppc_setup_debug(struct kvm_vcpu *vcpu)
   1062{
   1063	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
   1064		u64 msr = kvmppc_get_msr(vcpu);
   1065
   1066		kvmppc_set_msr(vcpu, msr | MSR_SE);
   1067	}
   1068}
   1069
   1070static void kvmppc_clear_debug(struct kvm_vcpu *vcpu)
   1071{
   1072	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
   1073		u64 msr = kvmppc_get_msr(vcpu);
   1074
   1075		kvmppc_set_msr(vcpu, msr & ~MSR_SE);
   1076	}
   1077}
   1078
   1079static int kvmppc_exit_pr_progint(struct kvm_vcpu *vcpu, unsigned int exit_nr)
   1080{
   1081	enum emulation_result er;
   1082	ulong flags;
   1083	u32 last_inst;
   1084	int emul, r;
   1085
   1086	/*
   1087	 * shadow_srr1 only contains valid flags if we came here via a program
   1088	 * exception. The other exceptions (emulation assist, FP unavailable,
   1089	 * etc.) do not provide flags in SRR1, so use an illegal-instruction
   1090	 * exception when injecting a program interrupt into the guest.
   1091	 */
   1092	if (exit_nr == BOOK3S_INTERRUPT_PROGRAM)
   1093		flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
   1094	else
   1095		flags = SRR1_PROGILL;
   1096
   1097	emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
   1098	if (emul != EMULATE_DONE)
   1099		return RESUME_GUEST;
   1100
   1101	if (kvmppc_get_msr(vcpu) & MSR_PR) {
   1102#ifdef EXIT_DEBUG
   1103		pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n",
   1104			kvmppc_get_pc(vcpu), last_inst);
   1105#endif
   1106		if ((last_inst & 0xff0007ff) != (INS_DCBZ & 0xfffffff7)) {
   1107			kvmppc_core_queue_program(vcpu, flags);
   1108			return RESUME_GUEST;
   1109		}
   1110	}
   1111
   1112	vcpu->stat.emulated_inst_exits++;
   1113	er = kvmppc_emulate_instruction(vcpu);
   1114	switch (er) {
   1115	case EMULATE_DONE:
   1116		r = RESUME_GUEST_NV;
   1117		break;
   1118	case EMULATE_AGAIN:
   1119		r = RESUME_GUEST;
   1120		break;
   1121	case EMULATE_FAIL:
   1122		pr_crit("%s: emulation at %lx failed (%08x)\n",
   1123			__func__, kvmppc_get_pc(vcpu), last_inst);
   1124		kvmppc_core_queue_program(vcpu, flags);
   1125		r = RESUME_GUEST;
   1126		break;
   1127	case EMULATE_DO_MMIO:
   1128		vcpu->run->exit_reason = KVM_EXIT_MMIO;
   1129		r = RESUME_HOST_NV;
   1130		break;
   1131	case EMULATE_EXIT_USER:
   1132		r = RESUME_HOST_NV;
   1133		break;
   1134	default:
   1135		BUG();
   1136	}
   1137
   1138	return r;
   1139}
   1140
   1141int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr)
   1142{
   1143	struct kvm_run *run = vcpu->run;
   1144	int r = RESUME_HOST;
   1145	int s;
   1146
   1147	vcpu->stat.sum_exits++;
   1148
   1149	run->exit_reason = KVM_EXIT_UNKNOWN;
   1150	run->ready_for_interrupt_injection = 1;
   1151
   1152	/* We get here with MSR.EE=1 */
   1153
   1154	trace_kvm_exit(exit_nr, vcpu);
   1155	guest_exit();
   1156
   1157	switch (exit_nr) {
   1158	case BOOK3S_INTERRUPT_INST_STORAGE:
   1159	{
   1160		ulong shadow_srr1 = vcpu->arch.shadow_srr1;
   1161		vcpu->stat.pf_instruc++;
   1162
   1163		if (kvmppc_is_split_real(vcpu))
   1164			kvmppc_fixup_split_real(vcpu);
   1165
   1166#ifdef CONFIG_PPC_BOOK3S_32
   1167		/* We set segments as unused segments when invalidating them. So
   1168		 * treat the respective fault as segment fault. */
   1169		{
   1170			struct kvmppc_book3s_shadow_vcpu *svcpu;
   1171			u32 sr;
   1172
   1173			svcpu = svcpu_get(vcpu);
   1174			sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT];
   1175			svcpu_put(svcpu);
   1176			if (sr == SR_INVALID) {
   1177				kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
   1178				r = RESUME_GUEST;
   1179				break;
   1180			}
   1181		}
   1182#endif
   1183
   1184		/* only care about PTEG not found errors, but leave NX alone */
   1185		if (shadow_srr1 & 0x40000000) {
   1186			int idx = srcu_read_lock(&vcpu->kvm->srcu);
   1187			r = kvmppc_handle_pagefault(vcpu, kvmppc_get_pc(vcpu), exit_nr);
   1188			srcu_read_unlock(&vcpu->kvm->srcu, idx);
   1189			vcpu->stat.sp_instruc++;
   1190		} else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
   1191			  (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
   1192			/*
   1193			 * XXX If we do the dcbz hack we use the NX bit to flush&patch the page,
   1194			 *     so we can't use the NX bit inside the guest. Let's cross our fingers,
   1195			 *     that no guest that needs the dcbz hack does NX.
   1196			 */
   1197			kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
   1198			r = RESUME_GUEST;
   1199		} else {
   1200			kvmppc_core_queue_inst_storage(vcpu,
   1201						shadow_srr1 & 0x58000000);
   1202			r = RESUME_GUEST;
   1203		}
   1204		break;
   1205	}
   1206	case BOOK3S_INTERRUPT_DATA_STORAGE:
   1207	{
   1208		ulong dar = kvmppc_get_fault_dar(vcpu);
   1209		u32 fault_dsisr = vcpu->arch.fault_dsisr;
   1210		vcpu->stat.pf_storage++;
   1211
   1212#ifdef CONFIG_PPC_BOOK3S_32
   1213		/* We set segments as unused segments when invalidating them. So
   1214		 * treat the respective fault as segment fault. */
   1215		{
   1216			struct kvmppc_book3s_shadow_vcpu *svcpu;
   1217			u32 sr;
   1218
   1219			svcpu = svcpu_get(vcpu);
   1220			sr = svcpu->sr[dar >> SID_SHIFT];
   1221			svcpu_put(svcpu);
   1222			if (sr == SR_INVALID) {
   1223				kvmppc_mmu_map_segment(vcpu, dar);
   1224				r = RESUME_GUEST;
   1225				break;
   1226			}
   1227		}
   1228#endif
   1229
   1230		/*
   1231		 * We need to handle missing shadow PTEs, and
   1232		 * protection faults due to us mapping a page read-only
   1233		 * when the guest thinks it is writable.
   1234		 */
   1235		if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) {
   1236			int idx = srcu_read_lock(&vcpu->kvm->srcu);
   1237			r = kvmppc_handle_pagefault(vcpu, dar, exit_nr);
   1238			srcu_read_unlock(&vcpu->kvm->srcu, idx);
   1239		} else {
   1240			kvmppc_core_queue_data_storage(vcpu, dar, fault_dsisr);
   1241			r = RESUME_GUEST;
   1242		}
   1243		break;
   1244	}
   1245	case BOOK3S_INTERRUPT_DATA_SEGMENT:
   1246		if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
   1247			kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
   1248			kvmppc_book3s_queue_irqprio(vcpu,
   1249				BOOK3S_INTERRUPT_DATA_SEGMENT);
   1250		}
   1251		r = RESUME_GUEST;
   1252		break;
   1253	case BOOK3S_INTERRUPT_INST_SEGMENT:
   1254		if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) {
   1255			kvmppc_book3s_queue_irqprio(vcpu,
   1256				BOOK3S_INTERRUPT_INST_SEGMENT);
   1257		}
   1258		r = RESUME_GUEST;
   1259		break;
   1260	/* We're good on these - the host merely wanted to get our attention */
   1261	case BOOK3S_INTERRUPT_DECREMENTER:
   1262	case BOOK3S_INTERRUPT_HV_DECREMENTER:
   1263	case BOOK3S_INTERRUPT_DOORBELL:
   1264	case BOOK3S_INTERRUPT_H_DOORBELL:
   1265		vcpu->stat.dec_exits++;
   1266		r = RESUME_GUEST;
   1267		break;
   1268	case BOOK3S_INTERRUPT_EXTERNAL:
   1269	case BOOK3S_INTERRUPT_EXTERNAL_HV:
   1270	case BOOK3S_INTERRUPT_H_VIRT:
   1271		vcpu->stat.ext_intr_exits++;
   1272		r = RESUME_GUEST;
   1273		break;
   1274	case BOOK3S_INTERRUPT_HMI:
   1275	case BOOK3S_INTERRUPT_PERFMON:
   1276	case BOOK3S_INTERRUPT_SYSTEM_RESET:
   1277		r = RESUME_GUEST;
   1278		break;
   1279	case BOOK3S_INTERRUPT_PROGRAM:
   1280	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
   1281		r = kvmppc_exit_pr_progint(vcpu, exit_nr);
   1282		break;
   1283	case BOOK3S_INTERRUPT_SYSCALL:
   1284	{
   1285		u32 last_sc;
   1286		int emul;
   1287
   1288		/* Get last sc for papr */
   1289		if (vcpu->arch.papr_enabled) {
   1290			/* The sc instruction points SRR0 to the next inst */
   1291			emul = kvmppc_get_last_inst(vcpu, INST_SC, &last_sc);
   1292			if (emul != EMULATE_DONE) {
   1293				kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) - 4);
   1294				r = RESUME_GUEST;
   1295				break;
   1296			}
   1297		}
   1298
   1299		if (vcpu->arch.papr_enabled &&
   1300		    (last_sc == 0x44000022) &&
   1301		    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
   1302			/* SC 1 papr hypercalls */
   1303			ulong cmd = kvmppc_get_gpr(vcpu, 3);
   1304			int i;
   1305
   1306#ifdef CONFIG_PPC_BOOK3S_64
   1307			if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
   1308				r = RESUME_GUEST;
   1309				break;
   1310			}
   1311#endif
   1312
   1313			run->papr_hcall.nr = cmd;
   1314			for (i = 0; i < 9; ++i) {
   1315				ulong gpr = kvmppc_get_gpr(vcpu, 4 + i);
   1316				run->papr_hcall.args[i] = gpr;
   1317			}
   1318			run->exit_reason = KVM_EXIT_PAPR_HCALL;
   1319			vcpu->arch.hcall_needed = 1;
   1320			r = RESUME_HOST;
   1321		} else if (vcpu->arch.osi_enabled &&
   1322		    (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
   1323		    (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
   1324			/* MOL hypercalls */
   1325			u64 *gprs = run->osi.gprs;
   1326			int i;
   1327
   1328			run->exit_reason = KVM_EXIT_OSI;
   1329			for (i = 0; i < 32; i++)
   1330				gprs[i] = kvmppc_get_gpr(vcpu, i);
   1331			vcpu->arch.osi_needed = 1;
   1332			r = RESUME_HOST_NV;
   1333		} else if (!(kvmppc_get_msr(vcpu) & MSR_PR) &&
   1334		    (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
   1335			/* KVM PV hypercalls */
   1336			kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
   1337			r = RESUME_GUEST;
   1338		} else {
   1339			/* Guest syscalls */
   1340			vcpu->stat.syscall_exits++;
   1341			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
   1342			r = RESUME_GUEST;
   1343		}
   1344		break;
   1345	}
   1346	case BOOK3S_INTERRUPT_FP_UNAVAIL:
   1347	case BOOK3S_INTERRUPT_ALTIVEC:
   1348	case BOOK3S_INTERRUPT_VSX:
   1349	{
   1350		int ext_msr = 0;
   1351		int emul;
   1352		u32 last_inst;
   1353
   1354		if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) {
   1355			/* Do paired single instruction emulation */
   1356			emul = kvmppc_get_last_inst(vcpu, INST_GENERIC,
   1357						    &last_inst);
   1358			if (emul == EMULATE_DONE)
   1359				r = kvmppc_exit_pr_progint(vcpu, exit_nr);
   1360			else
   1361				r = RESUME_GUEST;
   1362
   1363			break;
   1364		}
   1365
   1366		/* Enable external provider */
   1367		switch (exit_nr) {
   1368		case BOOK3S_INTERRUPT_FP_UNAVAIL:
   1369			ext_msr = MSR_FP;
   1370			break;
   1371
   1372		case BOOK3S_INTERRUPT_ALTIVEC:
   1373			ext_msr = MSR_VEC;
   1374			break;
   1375
   1376		case BOOK3S_INTERRUPT_VSX:
   1377			ext_msr = MSR_VSX;
   1378			break;
   1379		}
   1380
   1381		r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
   1382		break;
   1383	}
   1384	case BOOK3S_INTERRUPT_ALIGNMENT:
   1385	{
   1386		u32 last_inst;
   1387		int emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
   1388
   1389		if (emul == EMULATE_DONE) {
   1390			u32 dsisr;
   1391			u64 dar;
   1392
   1393			dsisr = kvmppc_alignment_dsisr(vcpu, last_inst);
   1394			dar = kvmppc_alignment_dar(vcpu, last_inst);
   1395
   1396			kvmppc_set_dsisr(vcpu, dsisr);
   1397			kvmppc_set_dar(vcpu, dar);
   1398
   1399			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
   1400		}
   1401		r = RESUME_GUEST;
   1402		break;
   1403	}
   1404#ifdef CONFIG_PPC_BOOK3S_64
   1405	case BOOK3S_INTERRUPT_FAC_UNAVAIL:
   1406		r = kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56);
   1407		break;
   1408#endif
   1409	case BOOK3S_INTERRUPT_MACHINE_CHECK:
   1410		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
   1411		r = RESUME_GUEST;
   1412		break;
   1413	case BOOK3S_INTERRUPT_TRACE:
   1414		if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
   1415			run->exit_reason = KVM_EXIT_DEBUG;
   1416			r = RESUME_HOST;
   1417		} else {
   1418			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
   1419			r = RESUME_GUEST;
   1420		}
   1421		break;
   1422	default:
   1423	{
   1424		ulong shadow_srr1 = vcpu->arch.shadow_srr1;
   1425		/* Ugh - bork here! What did we get? */
   1426		printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
   1427			exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
   1428		r = RESUME_HOST;
   1429		BUG();
   1430		break;
   1431	}
   1432	}
   1433
   1434	if (!(r & RESUME_HOST)) {
   1435		/* To avoid clobbering exit_reason, only check for signals if
   1436		 * we aren't already exiting to userspace for some other
   1437		 * reason. */
   1438
   1439		/*
   1440		 * Interrupts could be timers for the guest which we have to
   1441		 * inject again, so let's postpone them until we're in the guest
   1442		 * and if we really did time things so badly, then we just exit
   1443		 * again due to a host external interrupt.
   1444		 */
   1445		s = kvmppc_prepare_to_enter(vcpu);
   1446		if (s <= 0)
   1447			r = s;
   1448		else {
   1449			/* interrupts now hard-disabled */
   1450			kvmppc_fix_ee_before_entry();
   1451		}
   1452
   1453		kvmppc_handle_lost_ext(vcpu);
   1454	}
   1455
   1456	trace_kvm_book3s_reenter(r, vcpu);
   1457
   1458	return r;
   1459}
   1460
   1461static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu,
   1462					    struct kvm_sregs *sregs)
   1463{
   1464	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
   1465	int i;
   1466
   1467	sregs->pvr = vcpu->arch.pvr;
   1468
   1469	sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
   1470	if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
   1471		for (i = 0; i < 64; i++) {
   1472			sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige | i;
   1473			sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
   1474		}
   1475	} else {
   1476		for (i = 0; i < 16; i++)
   1477			sregs->u.s.ppc32.sr[i] = kvmppc_get_sr(vcpu, i);
   1478
   1479		for (i = 0; i < 8; i++) {
   1480			sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
   1481			sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
   1482		}
   1483	}
   1484
   1485	return 0;
   1486}
   1487
   1488static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu,
   1489					    struct kvm_sregs *sregs)
   1490{
   1491	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
   1492	int i;
   1493
   1494	kvmppc_set_pvr_pr(vcpu, sregs->pvr);
   1495
   1496	vcpu3s->sdr1 = sregs->u.s.sdr1;
   1497#ifdef CONFIG_PPC_BOOK3S_64
   1498	if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
   1499		/* Flush all SLB entries */
   1500		vcpu->arch.mmu.slbmte(vcpu, 0, 0);
   1501		vcpu->arch.mmu.slbia(vcpu);
   1502
   1503		for (i = 0; i < 64; i++) {
   1504			u64 rb = sregs->u.s.ppc64.slb[i].slbe;
   1505			u64 rs = sregs->u.s.ppc64.slb[i].slbv;
   1506
   1507			if (rb & SLB_ESID_V)
   1508				vcpu->arch.mmu.slbmte(vcpu, rs, rb);
   1509		}
   1510	} else
   1511#endif
   1512	{
   1513		for (i = 0; i < 16; i++) {
   1514			vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
   1515		}
   1516		for (i = 0; i < 8; i++) {
   1517			kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false,
   1518				       (u32)sregs->u.s.ppc32.ibat[i]);
   1519			kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true,
   1520				       (u32)(sregs->u.s.ppc32.ibat[i] >> 32));
   1521			kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false,
   1522				       (u32)sregs->u.s.ppc32.dbat[i]);
   1523			kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true,
   1524				       (u32)(sregs->u.s.ppc32.dbat[i] >> 32));
   1525		}
   1526	}
   1527
   1528	/* Flush the MMU after messing with the segments */
   1529	kvmppc_mmu_pte_flush(vcpu, 0, 0);
   1530
   1531	return 0;
   1532}
   1533
   1534static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
   1535				 union kvmppc_one_reg *val)
   1536{
   1537	int r = 0;
   1538
   1539	switch (id) {
   1540	case KVM_REG_PPC_DEBUG_INST:
   1541		*val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
   1542		break;
   1543	case KVM_REG_PPC_HIOR:
   1544		*val = get_reg_val(id, to_book3s(vcpu)->hior);
   1545		break;
   1546	case KVM_REG_PPC_VTB:
   1547		*val = get_reg_val(id, to_book3s(vcpu)->vtb);
   1548		break;
   1549	case KVM_REG_PPC_LPCR:
   1550	case KVM_REG_PPC_LPCR_64:
   1551		/*
   1552		 * We are only interested in the LPCR_ILE bit
   1553		 */
   1554		if (vcpu->arch.intr_msr & MSR_LE)
   1555			*val = get_reg_val(id, LPCR_ILE);
   1556		else
   1557			*val = get_reg_val(id, 0);
   1558		break;
   1559#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
   1560	case KVM_REG_PPC_TFHAR:
   1561		*val = get_reg_val(id, vcpu->arch.tfhar);
   1562		break;
   1563	case KVM_REG_PPC_TFIAR:
   1564		*val = get_reg_val(id, vcpu->arch.tfiar);
   1565		break;
   1566	case KVM_REG_PPC_TEXASR:
   1567		*val = get_reg_val(id, vcpu->arch.texasr);
   1568		break;
   1569	case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
   1570		*val = get_reg_val(id,
   1571				vcpu->arch.gpr_tm[id-KVM_REG_PPC_TM_GPR0]);
   1572		break;
   1573	case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
   1574	{
   1575		int i, j;
   1576
   1577		i = id - KVM_REG_PPC_TM_VSR0;
   1578		if (i < 32)
   1579			for (j = 0; j < TS_FPRWIDTH; j++)
   1580				val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j];
   1581		else {
   1582			if (cpu_has_feature(CPU_FTR_ALTIVEC))
   1583				val->vval = vcpu->arch.vr_tm.vr[i-32];
   1584			else
   1585				r = -ENXIO;
   1586		}
   1587		break;
   1588	}
   1589	case KVM_REG_PPC_TM_CR:
   1590		*val = get_reg_val(id, vcpu->arch.cr_tm);
   1591		break;
   1592	case KVM_REG_PPC_TM_XER:
   1593		*val = get_reg_val(id, vcpu->arch.xer_tm);
   1594		break;
   1595	case KVM_REG_PPC_TM_LR:
   1596		*val = get_reg_val(id, vcpu->arch.lr_tm);
   1597		break;
   1598	case KVM_REG_PPC_TM_CTR:
   1599		*val = get_reg_val(id, vcpu->arch.ctr_tm);
   1600		break;
   1601	case KVM_REG_PPC_TM_FPSCR:
   1602		*val = get_reg_val(id, vcpu->arch.fp_tm.fpscr);
   1603		break;
   1604	case KVM_REG_PPC_TM_AMR:
   1605		*val = get_reg_val(id, vcpu->arch.amr_tm);
   1606		break;
   1607	case KVM_REG_PPC_TM_PPR:
   1608		*val = get_reg_val(id, vcpu->arch.ppr_tm);
   1609		break;
   1610	case KVM_REG_PPC_TM_VRSAVE:
   1611		*val = get_reg_val(id, vcpu->arch.vrsave_tm);
   1612		break;
   1613	case KVM_REG_PPC_TM_VSCR:
   1614		if (cpu_has_feature(CPU_FTR_ALTIVEC))
   1615			*val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]);
   1616		else
   1617			r = -ENXIO;
   1618		break;
   1619	case KVM_REG_PPC_TM_DSCR:
   1620		*val = get_reg_val(id, vcpu->arch.dscr_tm);
   1621		break;
   1622	case KVM_REG_PPC_TM_TAR:
   1623		*val = get_reg_val(id, vcpu->arch.tar_tm);
   1624		break;
   1625#endif
   1626	default:
   1627		r = -EINVAL;
   1628		break;
   1629	}
   1630
   1631	return r;
   1632}
   1633
   1634static void kvmppc_set_lpcr_pr(struct kvm_vcpu *vcpu, u64 new_lpcr)
   1635{
   1636	if (new_lpcr & LPCR_ILE)
   1637		vcpu->arch.intr_msr |= MSR_LE;
   1638	else
   1639		vcpu->arch.intr_msr &= ~MSR_LE;
   1640}
   1641
   1642static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
   1643				 union kvmppc_one_reg *val)
   1644{
   1645	int r = 0;
   1646
   1647	switch (id) {
   1648	case KVM_REG_PPC_HIOR:
   1649		to_book3s(vcpu)->hior = set_reg_val(id, *val);
   1650		to_book3s(vcpu)->hior_explicit = true;
   1651		break;
   1652	case KVM_REG_PPC_VTB:
   1653		to_book3s(vcpu)->vtb = set_reg_val(id, *val);
   1654		break;
   1655	case KVM_REG_PPC_LPCR:
   1656	case KVM_REG_PPC_LPCR_64:
   1657		kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val));
   1658		break;
   1659#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
   1660	case KVM_REG_PPC_TFHAR:
   1661		vcpu->arch.tfhar = set_reg_val(id, *val);
   1662		break;
   1663	case KVM_REG_PPC_TFIAR:
   1664		vcpu->arch.tfiar = set_reg_val(id, *val);
   1665		break;
   1666	case KVM_REG_PPC_TEXASR:
   1667		vcpu->arch.texasr = set_reg_val(id, *val);
   1668		break;
   1669	case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
   1670		vcpu->arch.gpr_tm[id - KVM_REG_PPC_TM_GPR0] =
   1671			set_reg_val(id, *val);
   1672		break;
   1673	case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
   1674	{
   1675		int i, j;
   1676
   1677		i = id - KVM_REG_PPC_TM_VSR0;
   1678		if (i < 32)
   1679			for (j = 0; j < TS_FPRWIDTH; j++)
   1680				vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j];
   1681		else
   1682			if (cpu_has_feature(CPU_FTR_ALTIVEC))
   1683				vcpu->arch.vr_tm.vr[i-32] = val->vval;
   1684			else
   1685				r = -ENXIO;
   1686		break;
   1687	}
   1688	case KVM_REG_PPC_TM_CR:
   1689		vcpu->arch.cr_tm = set_reg_val(id, *val);
   1690		break;
   1691	case KVM_REG_PPC_TM_XER:
   1692		vcpu->arch.xer_tm = set_reg_val(id, *val);
   1693		break;
   1694	case KVM_REG_PPC_TM_LR:
   1695		vcpu->arch.lr_tm = set_reg_val(id, *val);
   1696		break;
   1697	case KVM_REG_PPC_TM_CTR:
   1698		vcpu->arch.ctr_tm = set_reg_val(id, *val);
   1699		break;
   1700	case KVM_REG_PPC_TM_FPSCR:
   1701		vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val);
   1702		break;
   1703	case KVM_REG_PPC_TM_AMR:
   1704		vcpu->arch.amr_tm = set_reg_val(id, *val);
   1705		break;
   1706	case KVM_REG_PPC_TM_PPR:
   1707		vcpu->arch.ppr_tm = set_reg_val(id, *val);
   1708		break;
   1709	case KVM_REG_PPC_TM_VRSAVE:
   1710		vcpu->arch.vrsave_tm = set_reg_val(id, *val);
   1711		break;
   1712	case KVM_REG_PPC_TM_VSCR:
   1713		if (cpu_has_feature(CPU_FTR_ALTIVEC))
   1714			vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val);
   1715		else
   1716			r = -ENXIO;
   1717		break;
   1718	case KVM_REG_PPC_TM_DSCR:
   1719		vcpu->arch.dscr_tm = set_reg_val(id, *val);
   1720		break;
   1721	case KVM_REG_PPC_TM_TAR:
   1722		vcpu->arch.tar_tm = set_reg_val(id, *val);
   1723		break;
   1724#endif
   1725	default:
   1726		r = -EINVAL;
   1727		break;
   1728	}
   1729
   1730	return r;
   1731}
   1732
   1733static int kvmppc_core_vcpu_create_pr(struct kvm_vcpu *vcpu)
   1734{
   1735	struct kvmppc_vcpu_book3s *vcpu_book3s;
   1736	unsigned long p;
   1737	int err;
   1738
   1739	err = -ENOMEM;
   1740
   1741	vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
   1742	if (!vcpu_book3s)
   1743		goto out;
   1744	vcpu->arch.book3s = vcpu_book3s;
   1745
   1746#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
   1747	vcpu->arch.shadow_vcpu =
   1748		kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL);
   1749	if (!vcpu->arch.shadow_vcpu)
   1750		goto free_vcpu3s;
   1751#endif
   1752
   1753	p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
   1754	if (!p)
   1755		goto free_shadow_vcpu;
   1756	vcpu->arch.shared = (void *)p;
   1757#ifdef CONFIG_PPC_BOOK3S_64
   1758	/* Always start the shared struct in native endian mode */
   1759#ifdef __BIG_ENDIAN__
   1760        vcpu->arch.shared_big_endian = true;
   1761#else
   1762        vcpu->arch.shared_big_endian = false;
   1763#endif
   1764
   1765	/*
   1766	 * Default to the same as the host if we're on sufficiently
   1767	 * recent machine that we have 1TB segments;
   1768	 * otherwise default to PPC970FX.
   1769	 */
   1770	vcpu->arch.pvr = 0x3C0301;
   1771	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
   1772		vcpu->arch.pvr = mfspr(SPRN_PVR);
   1773	vcpu->arch.intr_msr = MSR_SF;
   1774#else
   1775	/* default to book3s_32 (750) */
   1776	vcpu->arch.pvr = 0x84202;
   1777	vcpu->arch.intr_msr = 0;
   1778#endif
   1779	kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
   1780	vcpu->arch.slb_nr = 64;
   1781
   1782	vcpu->arch.shadow_msr = MSR_USER64 & ~MSR_LE;
   1783
   1784	err = kvmppc_mmu_init_pr(vcpu);
   1785	if (err < 0)
   1786		goto free_shared_page;
   1787
   1788	return 0;
   1789
   1790free_shared_page:
   1791	free_page((unsigned long)vcpu->arch.shared);
   1792free_shadow_vcpu:
   1793#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
   1794	kfree(vcpu->arch.shadow_vcpu);
   1795free_vcpu3s:
   1796#endif
   1797	vfree(vcpu_book3s);
   1798out:
   1799	return err;
   1800}
   1801
   1802static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
   1803{
   1804	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
   1805
   1806	kvmppc_mmu_destroy_pr(vcpu);
   1807	free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
   1808#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
   1809	kfree(vcpu->arch.shadow_vcpu);
   1810#endif
   1811	vfree(vcpu_book3s);
   1812}
   1813
   1814static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu)
   1815{
   1816	int ret;
   1817
   1818	/* Check if we can run the vcpu at all */
   1819	if (!vcpu->arch.sane) {
   1820		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
   1821		ret = -EINVAL;
   1822		goto out;
   1823	}
   1824
   1825	kvmppc_setup_debug(vcpu);
   1826
   1827	/*
   1828	 * Interrupts could be timers for the guest which we have to inject
   1829	 * again, so let's postpone them until we're in the guest and if we
   1830	 * really did time things so badly, then we just exit again due to
   1831	 * a host external interrupt.
   1832	 */
   1833	ret = kvmppc_prepare_to_enter(vcpu);
   1834	if (ret <= 0)
   1835		goto out;
   1836	/* interrupts now hard-disabled */
   1837
   1838	/* Save FPU, Altivec and VSX state */
   1839	giveup_all(current);
   1840
   1841	/* Preload FPU if it's enabled */
   1842	if (kvmppc_get_msr(vcpu) & MSR_FP)
   1843		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
   1844
   1845	kvmppc_fix_ee_before_entry();
   1846
   1847	ret = __kvmppc_vcpu_run(vcpu);
   1848
   1849	kvmppc_clear_debug(vcpu);
   1850
   1851	/* No need for guest_exit. It's done in handle_exit.
   1852	   We also get here with interrupts enabled. */
   1853
   1854	/* Make sure we save the guest FPU/Altivec/VSX state */
   1855	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
   1856
   1857	/* Make sure we save the guest TAR/EBB/DSCR state */
   1858	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
   1859
   1860	srr_regs_clobbered();
   1861out:
   1862	vcpu->mode = OUTSIDE_GUEST_MODE;
   1863	return ret;
   1864}
   1865
   1866/*
   1867 * Get (and clear) the dirty memory log for a memory slot.
   1868 */
   1869static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
   1870					 struct kvm_dirty_log *log)
   1871{
   1872	struct kvm_memory_slot *memslot;
   1873	struct kvm_vcpu *vcpu;
   1874	ulong ga, ga_end;
   1875	int is_dirty = 0;
   1876	int r;
   1877	unsigned long n;
   1878
   1879	mutex_lock(&kvm->slots_lock);
   1880
   1881	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
   1882	if (r)
   1883		goto out;
   1884
   1885	/* If nothing is dirty, don't bother messing with page tables. */
   1886	if (is_dirty) {
   1887		ga = memslot->base_gfn << PAGE_SHIFT;
   1888		ga_end = ga + (memslot->npages << PAGE_SHIFT);
   1889
   1890		kvm_for_each_vcpu(n, vcpu, kvm)
   1891			kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
   1892
   1893		n = kvm_dirty_bitmap_bytes(memslot);
   1894		memset(memslot->dirty_bitmap, 0, n);
   1895	}
   1896
   1897	r = 0;
   1898out:
   1899	mutex_unlock(&kvm->slots_lock);
   1900	return r;
   1901}
   1902
   1903static void kvmppc_core_flush_memslot_pr(struct kvm *kvm,
   1904					 struct kvm_memory_slot *memslot)
   1905{
   1906	return;
   1907}
   1908
   1909static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
   1910				const struct kvm_memory_slot *old,
   1911				struct kvm_memory_slot *new,
   1912				enum kvm_mr_change change)
   1913{
   1914	return 0;
   1915}
   1916
   1917static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
   1918				struct kvm_memory_slot *old,
   1919				const struct kvm_memory_slot *new,
   1920				enum kvm_mr_change change)
   1921{
   1922	return;
   1923}
   1924
   1925static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *slot)
   1926{
   1927	return;
   1928}
   1929
   1930#ifdef CONFIG_PPC64
   1931static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
   1932					 struct kvm_ppc_smmu_info *info)
   1933{
   1934	long int i;
   1935	struct kvm_vcpu *vcpu;
   1936
   1937	info->flags = 0;
   1938
   1939	/* SLB is always 64 entries */
   1940	info->slb_size = 64;
   1941
   1942	/* Standard 4k base page size segment */
   1943	info->sps[0].page_shift = 12;
   1944	info->sps[0].slb_enc = 0;
   1945	info->sps[0].enc[0].page_shift = 12;
   1946	info->sps[0].enc[0].pte_enc = 0;
   1947
   1948	/*
   1949	 * 64k large page size.
   1950	 * We only want to put this in if the CPUs we're emulating
   1951	 * support it, but unfortunately we don't have a vcpu easily
   1952	 * to hand here to test.  Just pick the first vcpu, and if
   1953	 * that doesn't exist yet, report the minimum capability,
   1954	 * i.e., no 64k pages.
   1955	 * 1T segment support goes along with 64k pages.
   1956	 */
   1957	i = 1;
   1958	vcpu = kvm_get_vcpu(kvm, 0);
   1959	if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
   1960		info->flags = KVM_PPC_1T_SEGMENTS;
   1961		info->sps[i].page_shift = 16;
   1962		info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01;
   1963		info->sps[i].enc[0].page_shift = 16;
   1964		info->sps[i].enc[0].pte_enc = 1;
   1965		++i;
   1966	}
   1967
   1968	/* Standard 16M large page size segment */
   1969	info->sps[i].page_shift = 24;
   1970	info->sps[i].slb_enc = SLB_VSID_L;
   1971	info->sps[i].enc[0].page_shift = 24;
   1972	info->sps[i].enc[0].pte_enc = 0;
   1973
   1974	return 0;
   1975}
   1976
   1977static int kvm_configure_mmu_pr(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
   1978{
   1979	if (!cpu_has_feature(CPU_FTR_ARCH_300))
   1980		return -ENODEV;
   1981	/* Require flags and process table base and size to all be zero. */
   1982	if (cfg->flags || cfg->process_table)
   1983		return -EINVAL;
   1984	return 0;
   1985}
   1986
   1987#else
   1988static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
   1989					 struct kvm_ppc_smmu_info *info)
   1990{
   1991	/* We should not get called */
   1992	BUG();
   1993	return 0;
   1994}
   1995#endif /* CONFIG_PPC64 */
   1996
   1997static unsigned int kvm_global_user_count = 0;
   1998static DEFINE_SPINLOCK(kvm_global_user_count_lock);
   1999
   2000static int kvmppc_core_init_vm_pr(struct kvm *kvm)
   2001{
   2002	mutex_init(&kvm->arch.hpt_mutex);
   2003
   2004#ifdef CONFIG_PPC_BOOK3S_64
   2005	/* Start out with the default set of hcalls enabled */
   2006	kvmppc_pr_init_default_hcalls(kvm);
   2007#endif
   2008
   2009	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
   2010		spin_lock(&kvm_global_user_count_lock);
   2011		if (++kvm_global_user_count == 1)
   2012			pseries_disable_reloc_on_exc();
   2013		spin_unlock(&kvm_global_user_count_lock);
   2014	}
   2015	return 0;
   2016}
   2017
   2018static void kvmppc_core_destroy_vm_pr(struct kvm *kvm)
   2019{
   2020#ifdef CONFIG_PPC64
   2021	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
   2022#endif
   2023
   2024	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
   2025		spin_lock(&kvm_global_user_count_lock);
   2026		BUG_ON(kvm_global_user_count == 0);
   2027		if (--kvm_global_user_count == 0)
   2028			pseries_enable_reloc_on_exc();
   2029		spin_unlock(&kvm_global_user_count_lock);
   2030	}
   2031}
   2032
   2033static int kvmppc_core_check_processor_compat_pr(void)
   2034{
   2035	/*
   2036	 * PR KVM can work on POWER9 inside a guest partition
   2037	 * running in HPT mode.  It can't work if we are using
   2038	 * radix translation (because radix provides no way for
   2039	 * a process to have unique translations in quadrant 3).
   2040	 */
   2041	if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
   2042		return -EIO;
   2043	return 0;
   2044}
   2045
   2046static long kvm_arch_vm_ioctl_pr(struct file *filp,
   2047				 unsigned int ioctl, unsigned long arg)
   2048{
   2049	return -ENOTTY;
   2050}
   2051
   2052static struct kvmppc_ops kvm_ops_pr = {
   2053	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr,
   2054	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr,
   2055	.get_one_reg = kvmppc_get_one_reg_pr,
   2056	.set_one_reg = kvmppc_set_one_reg_pr,
   2057	.vcpu_load   = kvmppc_core_vcpu_load_pr,
   2058	.vcpu_put    = kvmppc_core_vcpu_put_pr,
   2059	.inject_interrupt = kvmppc_inject_interrupt_pr,
   2060	.set_msr     = kvmppc_set_msr_pr,
   2061	.vcpu_run    = kvmppc_vcpu_run_pr,
   2062	.vcpu_create = kvmppc_core_vcpu_create_pr,
   2063	.vcpu_free   = kvmppc_core_vcpu_free_pr,
   2064	.check_requests = kvmppc_core_check_requests_pr,
   2065	.get_dirty_log = kvm_vm_ioctl_get_dirty_log_pr,
   2066	.flush_memslot = kvmppc_core_flush_memslot_pr,
   2067	.prepare_memory_region = kvmppc_core_prepare_memory_region_pr,
   2068	.commit_memory_region = kvmppc_core_commit_memory_region_pr,
   2069	.unmap_gfn_range = kvm_unmap_gfn_range_pr,
   2070	.age_gfn  = kvm_age_gfn_pr,
   2071	.test_age_gfn = kvm_test_age_gfn_pr,
   2072	.set_spte_gfn = kvm_set_spte_gfn_pr,
   2073	.free_memslot = kvmppc_core_free_memslot_pr,
   2074	.init_vm = kvmppc_core_init_vm_pr,
   2075	.destroy_vm = kvmppc_core_destroy_vm_pr,
   2076	.get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr,
   2077	.emulate_op = kvmppc_core_emulate_op_pr,
   2078	.emulate_mtspr = kvmppc_core_emulate_mtspr_pr,
   2079	.emulate_mfspr = kvmppc_core_emulate_mfspr_pr,
   2080	.fast_vcpu_kick = kvm_vcpu_kick,
   2081	.arch_vm_ioctl  = kvm_arch_vm_ioctl_pr,
   2082#ifdef CONFIG_PPC_BOOK3S_64
   2083	.hcall_implemented = kvmppc_hcall_impl_pr,
   2084	.configure_mmu = kvm_configure_mmu_pr,
   2085#endif
   2086	.giveup_ext = kvmppc_giveup_ext,
   2087};
   2088
   2089
   2090int kvmppc_book3s_init_pr(void)
   2091{
   2092	int r;
   2093
   2094	r = kvmppc_core_check_processor_compat_pr();
   2095	if (r < 0)
   2096		return r;
   2097
   2098	kvm_ops_pr.owner = THIS_MODULE;
   2099	kvmppc_pr_ops = &kvm_ops_pr;
   2100
   2101	r = kvmppc_mmu_hpte_sysinit();
   2102	return r;
   2103}
   2104
   2105void kvmppc_book3s_exit_pr(void)
   2106{
   2107	kvmppc_pr_ops = NULL;
   2108	kvmppc_mmu_hpte_sysexit();
   2109}
   2110
   2111/*
   2112 * We only support separate modules for book3s 64
   2113 */
   2114#ifdef CONFIG_PPC_BOOK3S_64
   2115
   2116module_init(kvmppc_book3s_init_pr);
   2117module_exit(kvmppc_book3s_exit_pr);
   2118
   2119MODULE_LICENSE("GPL");
   2120MODULE_ALIAS_MISCDEV(KVM_MINOR);
   2121MODULE_ALIAS("devname:kvm");
   2122#endif