cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

gaccess.c (41534B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * guest access functions
      4 *
      5 * Copyright IBM Corp. 2014
      6 *
      7 */
      8
      9#include <linux/vmalloc.h>
     10#include <linux/mm_types.h>
     11#include <linux/err.h>
     12#include <linux/pgtable.h>
     13#include <linux/bitfield.h>
     14
     15#include <asm/gmap.h>
     16#include "kvm-s390.h"
     17#include "gaccess.h"
     18#include <asm/switch_to.h>
     19
     20union asce {
     21	unsigned long val;
     22	struct {
     23		unsigned long origin : 52; /* Region- or Segment-Table Origin */
     24		unsigned long	 : 2;
     25		unsigned long g  : 1; /* Subspace Group Control */
     26		unsigned long p  : 1; /* Private Space Control */
     27		unsigned long s  : 1; /* Storage-Alteration-Event Control */
     28		unsigned long x  : 1; /* Space-Switch-Event Control */
     29		unsigned long r  : 1; /* Real-Space Control */
     30		unsigned long	 : 1;
     31		unsigned long dt : 2; /* Designation-Type Control */
     32		unsigned long tl : 2; /* Region- or Segment-Table Length */
     33	};
     34};
     35
     36enum {
     37	ASCE_TYPE_SEGMENT = 0,
     38	ASCE_TYPE_REGION3 = 1,
     39	ASCE_TYPE_REGION2 = 2,
     40	ASCE_TYPE_REGION1 = 3
     41};
     42
     43union region1_table_entry {
     44	unsigned long val;
     45	struct {
     46		unsigned long rto: 52;/* Region-Table Origin */
     47		unsigned long	 : 2;
     48		unsigned long p  : 1; /* DAT-Protection Bit */
     49		unsigned long	 : 1;
     50		unsigned long tf : 2; /* Region-Second-Table Offset */
     51		unsigned long i  : 1; /* Region-Invalid Bit */
     52		unsigned long	 : 1;
     53		unsigned long tt : 2; /* Table-Type Bits */
     54		unsigned long tl : 2; /* Region-Second-Table Length */
     55	};
     56};
     57
     58union region2_table_entry {
     59	unsigned long val;
     60	struct {
     61		unsigned long rto: 52;/* Region-Table Origin */
     62		unsigned long	 : 2;
     63		unsigned long p  : 1; /* DAT-Protection Bit */
     64		unsigned long	 : 1;
     65		unsigned long tf : 2; /* Region-Third-Table Offset */
     66		unsigned long i  : 1; /* Region-Invalid Bit */
     67		unsigned long	 : 1;
     68		unsigned long tt : 2; /* Table-Type Bits */
     69		unsigned long tl : 2; /* Region-Third-Table Length */
     70	};
     71};
     72
     73struct region3_table_entry_fc0 {
     74	unsigned long sto: 52;/* Segment-Table Origin */
     75	unsigned long	 : 1;
     76	unsigned long fc : 1; /* Format-Control */
     77	unsigned long p  : 1; /* DAT-Protection Bit */
     78	unsigned long	 : 1;
     79	unsigned long tf : 2; /* Segment-Table Offset */
     80	unsigned long i  : 1; /* Region-Invalid Bit */
     81	unsigned long cr : 1; /* Common-Region Bit */
     82	unsigned long tt : 2; /* Table-Type Bits */
     83	unsigned long tl : 2; /* Segment-Table Length */
     84};
     85
     86struct region3_table_entry_fc1 {
     87	unsigned long rfaa : 33; /* Region-Frame Absolute Address */
     88	unsigned long	 : 14;
     89	unsigned long av : 1; /* ACCF-Validity Control */
     90	unsigned long acc: 4; /* Access-Control Bits */
     91	unsigned long f  : 1; /* Fetch-Protection Bit */
     92	unsigned long fc : 1; /* Format-Control */
     93	unsigned long p  : 1; /* DAT-Protection Bit */
     94	unsigned long iep: 1; /* Instruction-Execution-Protection */
     95	unsigned long	 : 2;
     96	unsigned long i  : 1; /* Region-Invalid Bit */
     97	unsigned long cr : 1; /* Common-Region Bit */
     98	unsigned long tt : 2; /* Table-Type Bits */
     99	unsigned long	 : 2;
    100};
    101
    102union region3_table_entry {
    103	unsigned long val;
    104	struct region3_table_entry_fc0 fc0;
    105	struct region3_table_entry_fc1 fc1;
    106	struct {
    107		unsigned long	 : 53;
    108		unsigned long fc : 1; /* Format-Control */
    109		unsigned long	 : 4;
    110		unsigned long i  : 1; /* Region-Invalid Bit */
    111		unsigned long cr : 1; /* Common-Region Bit */
    112		unsigned long tt : 2; /* Table-Type Bits */
    113		unsigned long	 : 2;
    114	};
    115};
    116
    117struct segment_entry_fc0 {
    118	unsigned long pto: 53;/* Page-Table Origin */
    119	unsigned long fc : 1; /* Format-Control */
    120	unsigned long p  : 1; /* DAT-Protection Bit */
    121	unsigned long	 : 3;
    122	unsigned long i  : 1; /* Segment-Invalid Bit */
    123	unsigned long cs : 1; /* Common-Segment Bit */
    124	unsigned long tt : 2; /* Table-Type Bits */
    125	unsigned long	 : 2;
    126};
    127
    128struct segment_entry_fc1 {
    129	unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
    130	unsigned long	 : 3;
    131	unsigned long av : 1; /* ACCF-Validity Control */
    132	unsigned long acc: 4; /* Access-Control Bits */
    133	unsigned long f  : 1; /* Fetch-Protection Bit */
    134	unsigned long fc : 1; /* Format-Control */
    135	unsigned long p  : 1; /* DAT-Protection Bit */
    136	unsigned long iep: 1; /* Instruction-Execution-Protection */
    137	unsigned long	 : 2;
    138	unsigned long i  : 1; /* Segment-Invalid Bit */
    139	unsigned long cs : 1; /* Common-Segment Bit */
    140	unsigned long tt : 2; /* Table-Type Bits */
    141	unsigned long	 : 2;
    142};
    143
    144union segment_table_entry {
    145	unsigned long val;
    146	struct segment_entry_fc0 fc0;
    147	struct segment_entry_fc1 fc1;
    148	struct {
    149		unsigned long	 : 53;
    150		unsigned long fc : 1; /* Format-Control */
    151		unsigned long	 : 4;
    152		unsigned long i  : 1; /* Segment-Invalid Bit */
    153		unsigned long cs : 1; /* Common-Segment Bit */
    154		unsigned long tt : 2; /* Table-Type Bits */
    155		unsigned long	 : 2;
    156	};
    157};
    158
    159enum {
    160	TABLE_TYPE_SEGMENT = 0,
    161	TABLE_TYPE_REGION3 = 1,
    162	TABLE_TYPE_REGION2 = 2,
    163	TABLE_TYPE_REGION1 = 3
    164};
    165
    166union page_table_entry {
    167	unsigned long val;
    168	struct {
    169		unsigned long pfra : 52; /* Page-Frame Real Address */
    170		unsigned long z  : 1; /* Zero Bit */
    171		unsigned long i  : 1; /* Page-Invalid Bit */
    172		unsigned long p  : 1; /* DAT-Protection Bit */
    173		unsigned long iep: 1; /* Instruction-Execution-Protection */
    174		unsigned long	 : 8;
    175	};
    176};
    177
    178/*
    179 * vaddress union in order to easily decode a virtual address into its
    180 * region first index, region second index etc. parts.
    181 */
    182union vaddress {
    183	unsigned long addr;
    184	struct {
    185		unsigned long rfx : 11;
    186		unsigned long rsx : 11;
    187		unsigned long rtx : 11;
    188		unsigned long sx  : 11;
    189		unsigned long px  : 8;
    190		unsigned long bx  : 12;
    191	};
    192	struct {
    193		unsigned long rfx01 : 2;
    194		unsigned long	    : 9;
    195		unsigned long rsx01 : 2;
    196		unsigned long	    : 9;
    197		unsigned long rtx01 : 2;
    198		unsigned long	    : 9;
    199		unsigned long sx01  : 2;
    200		unsigned long	    : 29;
    201	};
    202};
    203
    204/*
    205 * raddress union which will contain the result (real or absolute address)
    206 * after a page table walk. The rfaa, sfaa and pfra members are used to
    207 * simply assign them the value of a region, segment or page table entry.
    208 */
    209union raddress {
    210	unsigned long addr;
    211	unsigned long rfaa : 33; /* Region-Frame Absolute Address */
    212	unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
    213	unsigned long pfra : 52; /* Page-Frame Real Address */
    214};
    215
    216union alet {
    217	u32 val;
    218	struct {
    219		u32 reserved : 7;
    220		u32 p        : 1;
    221		u32 alesn    : 8;
    222		u32 alen     : 16;
    223	};
    224};
    225
    226union ald {
    227	u32 val;
    228	struct {
    229		u32     : 1;
    230		u32 alo : 24;
    231		u32 all : 7;
    232	};
    233};
    234
    235struct ale {
    236	unsigned long i      : 1; /* ALEN-Invalid Bit */
    237	unsigned long        : 5;
    238	unsigned long fo     : 1; /* Fetch-Only Bit */
    239	unsigned long p      : 1; /* Private Bit */
    240	unsigned long alesn  : 8; /* Access-List-Entry Sequence Number */
    241	unsigned long aleax  : 16; /* Access-List-Entry Authorization Index */
    242	unsigned long        : 32;
    243	unsigned long        : 1;
    244	unsigned long asteo  : 25; /* ASN-Second-Table-Entry Origin */
    245	unsigned long        : 6;
    246	unsigned long astesn : 32; /* ASTE Sequence Number */
    247};
    248
    249struct aste {
    250	unsigned long i      : 1; /* ASX-Invalid Bit */
    251	unsigned long ato    : 29; /* Authority-Table Origin */
    252	unsigned long        : 1;
    253	unsigned long b      : 1; /* Base-Space Bit */
    254	unsigned long ax     : 16; /* Authorization Index */
    255	unsigned long atl    : 12; /* Authority-Table Length */
    256	unsigned long        : 2;
    257	unsigned long ca     : 1; /* Controlled-ASN Bit */
    258	unsigned long ra     : 1; /* Reusable-ASN Bit */
    259	unsigned long asce   : 64; /* Address-Space-Control Element */
    260	unsigned long ald    : 32;
    261	unsigned long astesn : 32;
    262	/* .. more fields there */
    263};
    264
    265int ipte_lock_held(struct kvm_vcpu *vcpu)
    266{
    267	if (vcpu->arch.sie_block->eca & ECA_SII) {
    268		int rc;
    269
    270		read_lock(&vcpu->kvm->arch.sca_lock);
    271		rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0;
    272		read_unlock(&vcpu->kvm->arch.sca_lock);
    273		return rc;
    274	}
    275	return vcpu->kvm->arch.ipte_lock_count != 0;
    276}
    277
    278static void ipte_lock_simple(struct kvm_vcpu *vcpu)
    279{
    280	union ipte_control old, new, *ic;
    281
    282	mutex_lock(&vcpu->kvm->arch.ipte_mutex);
    283	vcpu->kvm->arch.ipte_lock_count++;
    284	if (vcpu->kvm->arch.ipte_lock_count > 1)
    285		goto out;
    286retry:
    287	read_lock(&vcpu->kvm->arch.sca_lock);
    288	ic = kvm_s390_get_ipte_control(vcpu->kvm);
    289	do {
    290		old = READ_ONCE(*ic);
    291		if (old.k) {
    292			read_unlock(&vcpu->kvm->arch.sca_lock);
    293			cond_resched();
    294			goto retry;
    295		}
    296		new = old;
    297		new.k = 1;
    298	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
    299	read_unlock(&vcpu->kvm->arch.sca_lock);
    300out:
    301	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
    302}
    303
    304static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
    305{
    306	union ipte_control old, new, *ic;
    307
    308	mutex_lock(&vcpu->kvm->arch.ipte_mutex);
    309	vcpu->kvm->arch.ipte_lock_count--;
    310	if (vcpu->kvm->arch.ipte_lock_count)
    311		goto out;
    312	read_lock(&vcpu->kvm->arch.sca_lock);
    313	ic = kvm_s390_get_ipte_control(vcpu->kvm);
    314	do {
    315		old = READ_ONCE(*ic);
    316		new = old;
    317		new.k = 0;
    318	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
    319	read_unlock(&vcpu->kvm->arch.sca_lock);
    320	wake_up(&vcpu->kvm->arch.ipte_wq);
    321out:
    322	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
    323}
    324
    325static void ipte_lock_siif(struct kvm_vcpu *vcpu)
    326{
    327	union ipte_control old, new, *ic;
    328
    329retry:
    330	read_lock(&vcpu->kvm->arch.sca_lock);
    331	ic = kvm_s390_get_ipte_control(vcpu->kvm);
    332	do {
    333		old = READ_ONCE(*ic);
    334		if (old.kg) {
    335			read_unlock(&vcpu->kvm->arch.sca_lock);
    336			cond_resched();
    337			goto retry;
    338		}
    339		new = old;
    340		new.k = 1;
    341		new.kh++;
    342	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
    343	read_unlock(&vcpu->kvm->arch.sca_lock);
    344}
    345
    346static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
    347{
    348	union ipte_control old, new, *ic;
    349
    350	read_lock(&vcpu->kvm->arch.sca_lock);
    351	ic = kvm_s390_get_ipte_control(vcpu->kvm);
    352	do {
    353		old = READ_ONCE(*ic);
    354		new = old;
    355		new.kh--;
    356		if (!new.kh)
    357			new.k = 0;
    358	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
    359	read_unlock(&vcpu->kvm->arch.sca_lock);
    360	if (!new.kh)
    361		wake_up(&vcpu->kvm->arch.ipte_wq);
    362}
    363
    364void ipte_lock(struct kvm_vcpu *vcpu)
    365{
    366	if (vcpu->arch.sie_block->eca & ECA_SII)
    367		ipte_lock_siif(vcpu);
    368	else
    369		ipte_lock_simple(vcpu);
    370}
    371
    372void ipte_unlock(struct kvm_vcpu *vcpu)
    373{
    374	if (vcpu->arch.sie_block->eca & ECA_SII)
    375		ipte_unlock_siif(vcpu);
    376	else
    377		ipte_unlock_simple(vcpu);
    378}
    379
    380static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
    381			  enum gacc_mode mode)
    382{
    383	union alet alet;
    384	struct ale ale;
    385	struct aste aste;
    386	unsigned long ald_addr, authority_table_addr;
    387	union ald ald;
    388	int eax, rc;
    389	u8 authority_table;
    390
    391	if (ar >= NUM_ACRS)
    392		return -EINVAL;
    393
    394	save_access_regs(vcpu->run->s.regs.acrs);
    395	alet.val = vcpu->run->s.regs.acrs[ar];
    396
    397	if (ar == 0 || alet.val == 0) {
    398		asce->val = vcpu->arch.sie_block->gcr[1];
    399		return 0;
    400	} else if (alet.val == 1) {
    401		asce->val = vcpu->arch.sie_block->gcr[7];
    402		return 0;
    403	}
    404
    405	if (alet.reserved)
    406		return PGM_ALET_SPECIFICATION;
    407
    408	if (alet.p)
    409		ald_addr = vcpu->arch.sie_block->gcr[5];
    410	else
    411		ald_addr = vcpu->arch.sie_block->gcr[2];
    412	ald_addr &= 0x7fffffc0;
    413
    414	rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald));
    415	if (rc)
    416		return rc;
    417
    418	if (alet.alen / 8 > ald.all)
    419		return PGM_ALEN_TRANSLATION;
    420
    421	if (0x7fffffff - ald.alo * 128 < alet.alen * 16)
    422		return PGM_ADDRESSING;
    423
    424	rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale,
    425			     sizeof(struct ale));
    426	if (rc)
    427		return rc;
    428
    429	if (ale.i == 1)
    430		return PGM_ALEN_TRANSLATION;
    431	if (ale.alesn != alet.alesn)
    432		return PGM_ALE_SEQUENCE;
    433
    434	rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste));
    435	if (rc)
    436		return rc;
    437
    438	if (aste.i)
    439		return PGM_ASTE_VALIDITY;
    440	if (aste.astesn != ale.astesn)
    441		return PGM_ASTE_SEQUENCE;
    442
    443	if (ale.p == 1) {
    444		eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff;
    445		if (ale.aleax != eax) {
    446			if (eax / 16 > aste.atl)
    447				return PGM_EXTENDED_AUTHORITY;
    448
    449			authority_table_addr = aste.ato * 4 + eax / 4;
    450
    451			rc = read_guest_real(vcpu, authority_table_addr,
    452					     &authority_table,
    453					     sizeof(u8));
    454			if (rc)
    455				return rc;
    456
    457			if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0)
    458				return PGM_EXTENDED_AUTHORITY;
    459		}
    460	}
    461
    462	if (ale.fo == 1 && mode == GACC_STORE)
    463		return PGM_PROTECTION;
    464
    465	asce->val = aste.asce;
    466	return 0;
    467}
    468
    469struct trans_exc_code_bits {
    470	unsigned long addr : 52; /* Translation-exception Address */
    471	unsigned long fsi  : 2;  /* Access Exception Fetch/Store Indication */
    472	unsigned long	   : 2;
    473	unsigned long b56  : 1;
    474	unsigned long	   : 3;
    475	unsigned long b60  : 1;
    476	unsigned long b61  : 1;
    477	unsigned long as   : 2;  /* ASCE Identifier */
    478};
    479
    480enum {
    481	FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
    482	FSI_STORE   = 1, /* Exception was due to store operation */
    483	FSI_FETCH   = 2  /* Exception was due to fetch operation */
    484};
    485
    486enum prot_type {
    487	PROT_TYPE_LA   = 0,
    488	PROT_TYPE_KEYC = 1,
    489	PROT_TYPE_ALC  = 2,
    490	PROT_TYPE_DAT  = 3,
    491	PROT_TYPE_IEP  = 4,
    492};
    493
    494static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
    495			    enum gacc_mode mode, enum prot_type prot, bool terminate)
    496{
    497	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
    498	struct trans_exc_code_bits *tec;
    499
    500	memset(pgm, 0, sizeof(*pgm));
    501	pgm->code = code;
    502	tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
    503
    504	switch (code) {
    505	case PGM_PROTECTION:
    506		switch (prot) {
    507		case PROT_TYPE_IEP:
    508			tec->b61 = 1;
    509			fallthrough;
    510		case PROT_TYPE_LA:
    511			tec->b56 = 1;
    512			break;
    513		case PROT_TYPE_KEYC:
    514			tec->b60 = 1;
    515			break;
    516		case PROT_TYPE_ALC:
    517			tec->b60 = 1;
    518			fallthrough;
    519		case PROT_TYPE_DAT:
    520			tec->b61 = 1;
    521			break;
    522		}
    523		if (terminate) {
    524			tec->b56 = 0;
    525			tec->b60 = 0;
    526			tec->b61 = 0;
    527		}
    528		fallthrough;
    529	case PGM_ASCE_TYPE:
    530	case PGM_PAGE_TRANSLATION:
    531	case PGM_REGION_FIRST_TRANS:
    532	case PGM_REGION_SECOND_TRANS:
    533	case PGM_REGION_THIRD_TRANS:
    534	case PGM_SEGMENT_TRANSLATION:
    535		/*
    536		 * op_access_id only applies to MOVE_PAGE -> set bit 61
    537		 * exc_access_id has to be set to 0 for some instructions. Both
    538		 * cases have to be handled by the caller.
    539		 */
    540		tec->addr = gva >> PAGE_SHIFT;
    541		tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
    542		tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
    543		fallthrough;
    544	case PGM_ALEN_TRANSLATION:
    545	case PGM_ALE_SEQUENCE:
    546	case PGM_ASTE_VALIDITY:
    547	case PGM_ASTE_SEQUENCE:
    548	case PGM_EXTENDED_AUTHORITY:
    549		/*
    550		 * We can always store exc_access_id, as it is
    551		 * undefined for non-ar cases. It is undefined for
    552		 * most DAT protection exceptions.
    553		 */
    554		pgm->exc_access_id = ar;
    555		break;
    556	}
    557	return code;
    558}
    559
    560static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
    561		     enum gacc_mode mode, enum prot_type prot)
    562{
    563	return trans_exc_ending(vcpu, code, gva, ar, mode, prot, false);
    564}
    565
    566static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
    567			 unsigned long ga, u8 ar, enum gacc_mode mode)
    568{
    569	int rc;
    570	struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
    571
    572	if (!psw.dat) {
    573		asce->val = 0;
    574		asce->r = 1;
    575		return 0;
    576	}
    577
    578	if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME))
    579		psw.as = PSW_BITS_AS_PRIMARY;
    580
    581	switch (psw.as) {
    582	case PSW_BITS_AS_PRIMARY:
    583		asce->val = vcpu->arch.sie_block->gcr[1];
    584		return 0;
    585	case PSW_BITS_AS_SECONDARY:
    586		asce->val = vcpu->arch.sie_block->gcr[7];
    587		return 0;
    588	case PSW_BITS_AS_HOME:
    589		asce->val = vcpu->arch.sie_block->gcr[13];
    590		return 0;
    591	case PSW_BITS_AS_ACCREG:
    592		rc = ar_translation(vcpu, asce, ar, mode);
    593		if (rc > 0)
    594			return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC);
    595		return rc;
    596	}
    597	return 0;
    598}
    599
    600static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
    601{
    602	return kvm_read_guest(kvm, gpa, val, sizeof(*val));
    603}
    604
    605/**
    606 * guest_translate - translate a guest virtual into a guest absolute address
    607 * @vcpu: virtual cpu
    608 * @gva: guest virtual address
    609 * @gpa: points to where guest physical (absolute) address should be stored
    610 * @asce: effective asce
    611 * @mode: indicates the access mode to be used
    612 * @prot: returns the type for protection exceptions
    613 *
    614 * Translate a guest virtual address into a guest absolute address by means
    615 * of dynamic address translation as specified by the architecture.
    616 * If the resulting absolute address is not available in the configuration
    617 * an addressing exception is indicated and @gpa will not be changed.
    618 *
    619 * Returns: - zero on success; @gpa contains the resulting absolute address
    620 *	    - a negative value if guest access failed due to e.g. broken
    621 *	      guest mapping
    622 *	    - a positve value if an access exception happened. In this case
    623 *	      the returned value is the program interruption code as defined
    624 *	      by the architecture
    625 */
    626static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
    627				     unsigned long *gpa, const union asce asce,
    628				     enum gacc_mode mode, enum prot_type *prot)
    629{
    630	union vaddress vaddr = {.addr = gva};
    631	union raddress raddr = {.addr = gva};
    632	union page_table_entry pte;
    633	int dat_protection = 0;
    634	int iep_protection = 0;
    635	union ctlreg0 ctlreg0;
    636	unsigned long ptr;
    637	int edat1, edat2, iep;
    638
    639	ctlreg0.val = vcpu->arch.sie_block->gcr[0];
    640	edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
    641	edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
    642	iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
    643	if (asce.r)
    644		goto real_address;
    645	ptr = asce.origin * PAGE_SIZE;
    646	switch (asce.dt) {
    647	case ASCE_TYPE_REGION1:
    648		if (vaddr.rfx01 > asce.tl)
    649			return PGM_REGION_FIRST_TRANS;
    650		ptr += vaddr.rfx * 8;
    651		break;
    652	case ASCE_TYPE_REGION2:
    653		if (vaddr.rfx)
    654			return PGM_ASCE_TYPE;
    655		if (vaddr.rsx01 > asce.tl)
    656			return PGM_REGION_SECOND_TRANS;
    657		ptr += vaddr.rsx * 8;
    658		break;
    659	case ASCE_TYPE_REGION3:
    660		if (vaddr.rfx || vaddr.rsx)
    661			return PGM_ASCE_TYPE;
    662		if (vaddr.rtx01 > asce.tl)
    663			return PGM_REGION_THIRD_TRANS;
    664		ptr += vaddr.rtx * 8;
    665		break;
    666	case ASCE_TYPE_SEGMENT:
    667		if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
    668			return PGM_ASCE_TYPE;
    669		if (vaddr.sx01 > asce.tl)
    670			return PGM_SEGMENT_TRANSLATION;
    671		ptr += vaddr.sx * 8;
    672		break;
    673	}
    674	switch (asce.dt) {
    675	case ASCE_TYPE_REGION1:	{
    676		union region1_table_entry rfte;
    677
    678		if (kvm_is_error_gpa(vcpu->kvm, ptr))
    679			return PGM_ADDRESSING;
    680		if (deref_table(vcpu->kvm, ptr, &rfte.val))
    681			return -EFAULT;
    682		if (rfte.i)
    683			return PGM_REGION_FIRST_TRANS;
    684		if (rfte.tt != TABLE_TYPE_REGION1)
    685			return PGM_TRANSLATION_SPEC;
    686		if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
    687			return PGM_REGION_SECOND_TRANS;
    688		if (edat1)
    689			dat_protection |= rfte.p;
    690		ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
    691	}
    692		fallthrough;
    693	case ASCE_TYPE_REGION2: {
    694		union region2_table_entry rste;
    695
    696		if (kvm_is_error_gpa(vcpu->kvm, ptr))
    697			return PGM_ADDRESSING;
    698		if (deref_table(vcpu->kvm, ptr, &rste.val))
    699			return -EFAULT;
    700		if (rste.i)
    701			return PGM_REGION_SECOND_TRANS;
    702		if (rste.tt != TABLE_TYPE_REGION2)
    703			return PGM_TRANSLATION_SPEC;
    704		if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
    705			return PGM_REGION_THIRD_TRANS;
    706		if (edat1)
    707			dat_protection |= rste.p;
    708		ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
    709	}
    710		fallthrough;
    711	case ASCE_TYPE_REGION3: {
    712		union region3_table_entry rtte;
    713
    714		if (kvm_is_error_gpa(vcpu->kvm, ptr))
    715			return PGM_ADDRESSING;
    716		if (deref_table(vcpu->kvm, ptr, &rtte.val))
    717			return -EFAULT;
    718		if (rtte.i)
    719			return PGM_REGION_THIRD_TRANS;
    720		if (rtte.tt != TABLE_TYPE_REGION3)
    721			return PGM_TRANSLATION_SPEC;
    722		if (rtte.cr && asce.p && edat2)
    723			return PGM_TRANSLATION_SPEC;
    724		if (rtte.fc && edat2) {
    725			dat_protection |= rtte.fc1.p;
    726			iep_protection = rtte.fc1.iep;
    727			raddr.rfaa = rtte.fc1.rfaa;
    728			goto absolute_address;
    729		}
    730		if (vaddr.sx01 < rtte.fc0.tf)
    731			return PGM_SEGMENT_TRANSLATION;
    732		if (vaddr.sx01 > rtte.fc0.tl)
    733			return PGM_SEGMENT_TRANSLATION;
    734		if (edat1)
    735			dat_protection |= rtte.fc0.p;
    736		ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
    737	}
    738		fallthrough;
    739	case ASCE_TYPE_SEGMENT: {
    740		union segment_table_entry ste;
    741
    742		if (kvm_is_error_gpa(vcpu->kvm, ptr))
    743			return PGM_ADDRESSING;
    744		if (deref_table(vcpu->kvm, ptr, &ste.val))
    745			return -EFAULT;
    746		if (ste.i)
    747			return PGM_SEGMENT_TRANSLATION;
    748		if (ste.tt != TABLE_TYPE_SEGMENT)
    749			return PGM_TRANSLATION_SPEC;
    750		if (ste.cs && asce.p)
    751			return PGM_TRANSLATION_SPEC;
    752		if (ste.fc && edat1) {
    753			dat_protection |= ste.fc1.p;
    754			iep_protection = ste.fc1.iep;
    755			raddr.sfaa = ste.fc1.sfaa;
    756			goto absolute_address;
    757		}
    758		dat_protection |= ste.fc0.p;
    759		ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
    760	}
    761	}
    762	if (kvm_is_error_gpa(vcpu->kvm, ptr))
    763		return PGM_ADDRESSING;
    764	if (deref_table(vcpu->kvm, ptr, &pte.val))
    765		return -EFAULT;
    766	if (pte.i)
    767		return PGM_PAGE_TRANSLATION;
    768	if (pte.z)
    769		return PGM_TRANSLATION_SPEC;
    770	dat_protection |= pte.p;
    771	iep_protection = pte.iep;
    772	raddr.pfra = pte.pfra;
    773real_address:
    774	raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
    775absolute_address:
    776	if (mode == GACC_STORE && dat_protection) {
    777		*prot = PROT_TYPE_DAT;
    778		return PGM_PROTECTION;
    779	}
    780	if (mode == GACC_IFETCH && iep_protection && iep) {
    781		*prot = PROT_TYPE_IEP;
    782		return PGM_PROTECTION;
    783	}
    784	if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
    785		return PGM_ADDRESSING;
    786	*gpa = raddr.addr;
    787	return 0;
    788}
    789
    790static inline int is_low_address(unsigned long ga)
    791{
    792	/* Check for address ranges 0..511 and 4096..4607 */
    793	return (ga & ~0x11fful) == 0;
    794}
    795
    796static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
    797					  const union asce asce)
    798{
    799	union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
    800	psw_t *psw = &vcpu->arch.sie_block->gpsw;
    801
    802	if (!ctlreg0.lap)
    803		return 0;
    804	if (psw_bits(*psw).dat && asce.p)
    805		return 0;
    806	return 1;
    807}
    808
    809static int vm_check_access_key(struct kvm *kvm, u8 access_key,
    810			       enum gacc_mode mode, gpa_t gpa)
    811{
    812	u8 storage_key, access_control;
    813	bool fetch_protected;
    814	unsigned long hva;
    815	int r;
    816
    817	if (access_key == 0)
    818		return 0;
    819
    820	hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
    821	if (kvm_is_error_hva(hva))
    822		return PGM_ADDRESSING;
    823
    824	mmap_read_lock(current->mm);
    825	r = get_guest_storage_key(current->mm, hva, &storage_key);
    826	mmap_read_unlock(current->mm);
    827	if (r)
    828		return r;
    829	access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
    830	if (access_control == access_key)
    831		return 0;
    832	fetch_protected = storage_key & _PAGE_FP_BIT;
    833	if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !fetch_protected)
    834		return 0;
    835	return PGM_PROTECTION;
    836}
    837
    838static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
    839					   union asce asce)
    840{
    841	psw_t *psw = &vcpu->arch.sie_block->gpsw;
    842	unsigned long override;
    843
    844	if (mode == GACC_FETCH || mode == GACC_IFETCH) {
    845		/* check if fetch protection override enabled */
    846		override = vcpu->arch.sie_block->gcr[0];
    847		override &= CR0_FETCH_PROTECTION_OVERRIDE;
    848		/* not applicable if subject to DAT && private space */
    849		override = override && !(psw_bits(*psw).dat && asce.p);
    850		return override;
    851	}
    852	return false;
    853}
    854
    855static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
    856{
    857	return ga < 2048 && ga + len <= 2048;
    858}
    859
    860static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
    861{
    862	/* check if storage protection override enabled */
    863	return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
    864}
    865
    866static bool storage_prot_override_applies(u8 access_control)
    867{
    868	/* matches special storage protection override key (9) -> allow */
    869	return access_control == PAGE_SPO_ACC;
    870}
    871
    872static int vcpu_check_access_key(struct kvm_vcpu *vcpu, u8 access_key,
    873				 enum gacc_mode mode, union asce asce, gpa_t gpa,
    874				 unsigned long ga, unsigned int len)
    875{
    876	u8 storage_key, access_control;
    877	unsigned long hva;
    878	int r;
    879
    880	/* access key 0 matches any storage key -> allow */
    881	if (access_key == 0)
    882		return 0;
    883	/*
    884	 * caller needs to ensure that gfn is accessible, so we can
    885	 * assume that this cannot fail
    886	 */
    887	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa));
    888	mmap_read_lock(current->mm);
    889	r = get_guest_storage_key(current->mm, hva, &storage_key);
    890	mmap_read_unlock(current->mm);
    891	if (r)
    892		return r;
    893	access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
    894	/* access key matches storage key -> allow */
    895	if (access_control == access_key)
    896		return 0;
    897	if (mode == GACC_FETCH || mode == GACC_IFETCH) {
    898		/* it is a fetch and fetch protection is off -> allow */
    899		if (!(storage_key & _PAGE_FP_BIT))
    900			return 0;
    901		if (fetch_prot_override_applicable(vcpu, mode, asce) &&
    902		    fetch_prot_override_applies(ga, len))
    903			return 0;
    904	}
    905	if (storage_prot_override_applicable(vcpu) &&
    906	    storage_prot_override_applies(access_control))
    907		return 0;
    908	return PGM_PROTECTION;
    909}
    910
    911/**
    912 * guest_range_to_gpas() - Calculate guest physical addresses of page fragments
    913 * covering a logical range
    914 * @vcpu: virtual cpu
    915 * @ga: guest address, start of range
    916 * @ar: access register
    917 * @gpas: output argument, may be NULL
    918 * @len: length of range in bytes
    919 * @asce: address-space-control element to use for translation
    920 * @mode: access mode
    921 * @access_key: access key to mach the range's storage keys against
    922 *
    923 * Translate a logical range to a series of guest absolute addresses,
    924 * such that the concatenation of page fragments starting at each gpa make up
    925 * the whole range.
    926 * The translation is performed as if done by the cpu for the given @asce, @ar,
    927 * @mode and state of the @vcpu.
    928 * If the translation causes an exception, its program interruption code is
    929 * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified
    930 * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject
    931 * a correct exception into the guest.
    932 * The resulting gpas are stored into @gpas, unless it is NULL.
    933 *
    934 * Note: All fragments except the first one start at the beginning of a page.
    935 *	 When deriving the boundaries of a fragment from a gpa, all but the last
    936 *	 fragment end at the end of the page.
    937 *
    938 * Return:
    939 * * 0		- success
    940 * * <0		- translation could not be performed, for example if  guest
    941 *		  memory could not be accessed
    942 * * >0		- an access exception occurred. In this case the returned value
    943 *		  is the program interruption code and the contents of pgm may
    944 *		  be used to inject an exception into the guest.
    945 */
    946static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
    947			       unsigned long *gpas, unsigned long len,
    948			       const union asce asce, enum gacc_mode mode,
    949			       u8 access_key)
    950{
    951	psw_t *psw = &vcpu->arch.sie_block->gpsw;
    952	unsigned int offset = offset_in_page(ga);
    953	unsigned int fragment_len;
    954	int lap_enabled, rc = 0;
    955	enum prot_type prot;
    956	unsigned long gpa;
    957
    958	lap_enabled = low_address_protection_enabled(vcpu, asce);
    959	while (min(PAGE_SIZE - offset, len) > 0) {
    960		fragment_len = min(PAGE_SIZE - offset, len);
    961		ga = kvm_s390_logical_to_effective(vcpu, ga);
    962		if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
    963			return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
    964					 PROT_TYPE_LA);
    965		if (psw_bits(*psw).dat) {
    966			rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot);
    967			if (rc < 0)
    968				return rc;
    969		} else {
    970			gpa = kvm_s390_real_to_abs(vcpu, ga);
    971			if (kvm_is_error_gpa(vcpu->kvm, gpa))
    972				rc = PGM_ADDRESSING;
    973		}
    974		if (rc)
    975			return trans_exc(vcpu, rc, ga, ar, mode, prot);
    976		rc = vcpu_check_access_key(vcpu, access_key, mode, asce, gpa, ga,
    977					   fragment_len);
    978		if (rc)
    979			return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC);
    980		if (gpas)
    981			*gpas++ = gpa;
    982		offset = 0;
    983		ga += fragment_len;
    984		len -= fragment_len;
    985	}
    986	return 0;
    987}
    988
    989static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
    990			     void *data, unsigned int len)
    991{
    992	const unsigned int offset = offset_in_page(gpa);
    993	const gfn_t gfn = gpa_to_gfn(gpa);
    994	int rc;
    995
    996	if (mode == GACC_STORE)
    997		rc = kvm_write_guest_page(kvm, gfn, data, offset, len);
    998	else
    999		rc = kvm_read_guest_page(kvm, gfn, data, offset, len);
   1000	return rc;
   1001}
   1002
   1003static int
   1004access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
   1005			   void *data, unsigned int len, u8 access_key)
   1006{
   1007	struct kvm_memory_slot *slot;
   1008	bool writable;
   1009	gfn_t gfn;
   1010	hva_t hva;
   1011	int rc;
   1012
   1013	gfn = gpa >> PAGE_SHIFT;
   1014	slot = gfn_to_memslot(kvm, gfn);
   1015	hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
   1016
   1017	if (kvm_is_error_hva(hva))
   1018		return PGM_ADDRESSING;
   1019	/*
   1020	 * Check if it's a ro memslot, even tho that can't occur (they're unsupported).
   1021	 * Don't try to actually handle that case.
   1022	 */
   1023	if (!writable && mode == GACC_STORE)
   1024		return -EOPNOTSUPP;
   1025	hva += offset_in_page(gpa);
   1026	if (mode == GACC_STORE)
   1027		rc = copy_to_user_key((void __user *)hva, data, len, access_key);
   1028	else
   1029		rc = copy_from_user_key(data, (void __user *)hva, len, access_key);
   1030	if (rc)
   1031		return PGM_PROTECTION;
   1032	if (mode == GACC_STORE)
   1033		mark_page_dirty_in_slot(kvm, slot, gfn);
   1034	return 0;
   1035}
   1036
   1037int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data,
   1038			      unsigned long len, enum gacc_mode mode, u8 access_key)
   1039{
   1040	int offset = offset_in_page(gpa);
   1041	int fragment_len;
   1042	int rc;
   1043
   1044	while (min(PAGE_SIZE - offset, len) > 0) {
   1045		fragment_len = min(PAGE_SIZE - offset, len);
   1046		rc = access_guest_page_with_key(kvm, mode, gpa, data, fragment_len, access_key);
   1047		if (rc)
   1048			return rc;
   1049		offset = 0;
   1050		len -= fragment_len;
   1051		data += fragment_len;
   1052		gpa += fragment_len;
   1053	}
   1054	return 0;
   1055}
   1056
   1057int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
   1058			  void *data, unsigned long len, enum gacc_mode mode,
   1059			  u8 access_key)
   1060{
   1061	psw_t *psw = &vcpu->arch.sie_block->gpsw;
   1062	unsigned long nr_pages, idx;
   1063	unsigned long gpa_array[2];
   1064	unsigned int fragment_len;
   1065	unsigned long *gpas;
   1066	enum prot_type prot;
   1067	int need_ipte_lock;
   1068	union asce asce;
   1069	bool try_storage_prot_override;
   1070	bool try_fetch_prot_override;
   1071	int rc;
   1072
   1073	if (!len)
   1074		return 0;
   1075	ga = kvm_s390_logical_to_effective(vcpu, ga);
   1076	rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode);
   1077	if (rc)
   1078		return rc;
   1079	nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
   1080	gpas = gpa_array;
   1081	if (nr_pages > ARRAY_SIZE(gpa_array))
   1082		gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
   1083	if (!gpas)
   1084		return -ENOMEM;
   1085	try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce);
   1086	try_storage_prot_override = storage_prot_override_applicable(vcpu);
   1087	need_ipte_lock = psw_bits(*psw).dat && !asce.r;
   1088	if (need_ipte_lock)
   1089		ipte_lock(vcpu);
   1090	/*
   1091	 * Since we do the access further down ultimately via a move instruction
   1092	 * that does key checking and returns an error in case of a protection
   1093	 * violation, we don't need to do the check during address translation.
   1094	 * Skip it by passing access key 0, which matches any storage key,
   1095	 * obviating the need for any further checks. As a result the check is
   1096	 * handled entirely in hardware on access, we only need to take care to
   1097	 * forego key protection checking if fetch protection override applies or
   1098	 * retry with the special key 9 in case of storage protection override.
   1099	 */
   1100	rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0);
   1101	if (rc)
   1102		goto out_unlock;
   1103	for (idx = 0; idx < nr_pages; idx++) {
   1104		fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
   1105		if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) {
   1106			rc = access_guest_page(vcpu->kvm, mode, gpas[idx],
   1107					       data, fragment_len);
   1108		} else {
   1109			rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
   1110							data, fragment_len, access_key);
   1111		}
   1112		if (rc == PGM_PROTECTION && try_storage_prot_override)
   1113			rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
   1114							data, fragment_len, PAGE_SPO_ACC);
   1115		if (rc == PGM_PROTECTION)
   1116			prot = PROT_TYPE_KEYC;
   1117		if (rc)
   1118			break;
   1119		len -= fragment_len;
   1120		data += fragment_len;
   1121		ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len);
   1122	}
   1123	if (rc > 0) {
   1124		bool terminate = (mode == GACC_STORE) && (idx > 0);
   1125
   1126		rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
   1127	}
   1128out_unlock:
   1129	if (need_ipte_lock)
   1130		ipte_unlock(vcpu);
   1131	if (nr_pages > ARRAY_SIZE(gpa_array))
   1132		vfree(gpas);
   1133	return rc;
   1134}
   1135
   1136int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
   1137		      void *data, unsigned long len, enum gacc_mode mode)
   1138{
   1139	unsigned int fragment_len;
   1140	unsigned long gpa;
   1141	int rc = 0;
   1142
   1143	while (len && !rc) {
   1144		gpa = kvm_s390_real_to_abs(vcpu, gra);
   1145		fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len);
   1146		rc = access_guest_page(vcpu->kvm, mode, gpa, data, fragment_len);
   1147		len -= fragment_len;
   1148		gra += fragment_len;
   1149		data += fragment_len;
   1150	}
   1151	return rc;
   1152}
   1153
   1154/**
   1155 * guest_translate_address_with_key - translate guest logical into guest absolute address
   1156 * @vcpu: virtual cpu
   1157 * @gva: Guest virtual address
   1158 * @ar: Access register
   1159 * @gpa: Guest physical address
   1160 * @mode: Translation access mode
   1161 * @access_key: access key to mach the storage key with
   1162 *
   1163 * Parameter semantics are the same as the ones from guest_translate.
   1164 * The memory contents at the guest address are not changed.
   1165 *
   1166 * Note: The IPTE lock is not taken during this function, so the caller
   1167 * has to take care of this.
   1168 */
   1169int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
   1170				     unsigned long *gpa, enum gacc_mode mode,
   1171				     u8 access_key)
   1172{
   1173	union asce asce;
   1174	int rc;
   1175
   1176	gva = kvm_s390_logical_to_effective(vcpu, gva);
   1177	rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
   1178	if (rc)
   1179		return rc;
   1180	return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode,
   1181				   access_key);
   1182}
   1183
   1184/**
   1185 * check_gva_range - test a range of guest virtual addresses for accessibility
   1186 * @vcpu: virtual cpu
   1187 * @gva: Guest virtual address
   1188 * @ar: Access register
   1189 * @length: Length of test range
   1190 * @mode: Translation access mode
   1191 * @access_key: access key to mach the storage keys with
   1192 */
   1193int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
   1194		    unsigned long length, enum gacc_mode mode, u8 access_key)
   1195{
   1196	union asce asce;
   1197	int rc = 0;
   1198
   1199	rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
   1200	if (rc)
   1201		return rc;
   1202	ipte_lock(vcpu);
   1203	rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode,
   1204				 access_key);
   1205	ipte_unlock(vcpu);
   1206
   1207	return rc;
   1208}
   1209
   1210/**
   1211 * check_gpa_range - test a range of guest physical addresses for accessibility
   1212 * @kvm: virtual machine instance
   1213 * @gpa: guest physical address
   1214 * @length: length of test range
   1215 * @mode: access mode to test, relevant for storage keys
   1216 * @access_key: access key to mach the storage keys with
   1217 */
   1218int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length,
   1219		    enum gacc_mode mode, u8 access_key)
   1220{
   1221	unsigned int fragment_len;
   1222	int rc = 0;
   1223
   1224	while (length && !rc) {
   1225		fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length);
   1226		rc = vm_check_access_key(kvm, access_key, mode, gpa);
   1227		length -= fragment_len;
   1228		gpa += fragment_len;
   1229	}
   1230	return rc;
   1231}
   1232
   1233/**
   1234 * kvm_s390_check_low_addr_prot_real - check for low-address protection
   1235 * @vcpu: virtual cpu
   1236 * @gra: Guest real address
   1237 *
   1238 * Checks whether an address is subject to low-address protection and set
   1239 * up vcpu->arch.pgm accordingly if necessary.
   1240 *
   1241 * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
   1242 */
   1243int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
   1244{
   1245	union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
   1246
   1247	if (!ctlreg0.lap || !is_low_address(gra))
   1248		return 0;
   1249	return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA);
   1250}
   1251
   1252/**
   1253 * kvm_s390_shadow_tables - walk the guest page table and create shadow tables
   1254 * @sg: pointer to the shadow guest address space structure
   1255 * @saddr: faulting address in the shadow gmap
   1256 * @pgt: pointer to the beginning of the page table for the given address if
   1257 *	 successful (return value 0), or to the first invalid DAT entry in
   1258 *	 case of exceptions (return value > 0)
   1259 * @dat_protection: referenced memory is write protected
   1260 * @fake: pgt references contiguous guest memory block, not a pgtable
   1261 */
   1262static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
   1263				  unsigned long *pgt, int *dat_protection,
   1264				  int *fake)
   1265{
   1266	struct gmap *parent;
   1267	union asce asce;
   1268	union vaddress vaddr;
   1269	unsigned long ptr;
   1270	int rc;
   1271
   1272	*fake = 0;
   1273	*dat_protection = 0;
   1274	parent = sg->parent;
   1275	vaddr.addr = saddr;
   1276	asce.val = sg->orig_asce;
   1277	ptr = asce.origin * PAGE_SIZE;
   1278	if (asce.r) {
   1279		*fake = 1;
   1280		ptr = 0;
   1281		asce.dt = ASCE_TYPE_REGION1;
   1282	}
   1283	switch (asce.dt) {
   1284	case ASCE_TYPE_REGION1:
   1285		if (vaddr.rfx01 > asce.tl && !*fake)
   1286			return PGM_REGION_FIRST_TRANS;
   1287		break;
   1288	case ASCE_TYPE_REGION2:
   1289		if (vaddr.rfx)
   1290			return PGM_ASCE_TYPE;
   1291		if (vaddr.rsx01 > asce.tl)
   1292			return PGM_REGION_SECOND_TRANS;
   1293		break;
   1294	case ASCE_TYPE_REGION3:
   1295		if (vaddr.rfx || vaddr.rsx)
   1296			return PGM_ASCE_TYPE;
   1297		if (vaddr.rtx01 > asce.tl)
   1298			return PGM_REGION_THIRD_TRANS;
   1299		break;
   1300	case ASCE_TYPE_SEGMENT:
   1301		if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
   1302			return PGM_ASCE_TYPE;
   1303		if (vaddr.sx01 > asce.tl)
   1304			return PGM_SEGMENT_TRANSLATION;
   1305		break;
   1306	}
   1307
   1308	switch (asce.dt) {
   1309	case ASCE_TYPE_REGION1: {
   1310		union region1_table_entry rfte;
   1311
   1312		if (*fake) {
   1313			ptr += vaddr.rfx * _REGION1_SIZE;
   1314			rfte.val = ptr;
   1315			goto shadow_r2t;
   1316		}
   1317		*pgt = ptr + vaddr.rfx * 8;
   1318		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
   1319		if (rc)
   1320			return rc;
   1321		if (rfte.i)
   1322			return PGM_REGION_FIRST_TRANS;
   1323		if (rfte.tt != TABLE_TYPE_REGION1)
   1324			return PGM_TRANSLATION_SPEC;
   1325		if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
   1326			return PGM_REGION_SECOND_TRANS;
   1327		if (sg->edat_level >= 1)
   1328			*dat_protection |= rfte.p;
   1329		ptr = rfte.rto * PAGE_SIZE;
   1330shadow_r2t:
   1331		rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
   1332		if (rc)
   1333			return rc;
   1334	}
   1335		fallthrough;
   1336	case ASCE_TYPE_REGION2: {
   1337		union region2_table_entry rste;
   1338
   1339		if (*fake) {
   1340			ptr += vaddr.rsx * _REGION2_SIZE;
   1341			rste.val = ptr;
   1342			goto shadow_r3t;
   1343		}
   1344		*pgt = ptr + vaddr.rsx * 8;
   1345		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
   1346		if (rc)
   1347			return rc;
   1348		if (rste.i)
   1349			return PGM_REGION_SECOND_TRANS;
   1350		if (rste.tt != TABLE_TYPE_REGION2)
   1351			return PGM_TRANSLATION_SPEC;
   1352		if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
   1353			return PGM_REGION_THIRD_TRANS;
   1354		if (sg->edat_level >= 1)
   1355			*dat_protection |= rste.p;
   1356		ptr = rste.rto * PAGE_SIZE;
   1357shadow_r3t:
   1358		rste.p |= *dat_protection;
   1359		rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
   1360		if (rc)
   1361			return rc;
   1362	}
   1363		fallthrough;
   1364	case ASCE_TYPE_REGION3: {
   1365		union region3_table_entry rtte;
   1366
   1367		if (*fake) {
   1368			ptr += vaddr.rtx * _REGION3_SIZE;
   1369			rtte.val = ptr;
   1370			goto shadow_sgt;
   1371		}
   1372		*pgt = ptr + vaddr.rtx * 8;
   1373		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
   1374		if (rc)
   1375			return rc;
   1376		if (rtte.i)
   1377			return PGM_REGION_THIRD_TRANS;
   1378		if (rtte.tt != TABLE_TYPE_REGION3)
   1379			return PGM_TRANSLATION_SPEC;
   1380		if (rtte.cr && asce.p && sg->edat_level >= 2)
   1381			return PGM_TRANSLATION_SPEC;
   1382		if (rtte.fc && sg->edat_level >= 2) {
   1383			*dat_protection |= rtte.fc0.p;
   1384			*fake = 1;
   1385			ptr = rtte.fc1.rfaa * _REGION3_SIZE;
   1386			rtte.val = ptr;
   1387			goto shadow_sgt;
   1388		}
   1389		if (vaddr.sx01 < rtte.fc0.tf || vaddr.sx01 > rtte.fc0.tl)
   1390			return PGM_SEGMENT_TRANSLATION;
   1391		if (sg->edat_level >= 1)
   1392			*dat_protection |= rtte.fc0.p;
   1393		ptr = rtte.fc0.sto * PAGE_SIZE;
   1394shadow_sgt:
   1395		rtte.fc0.p |= *dat_protection;
   1396		rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
   1397		if (rc)
   1398			return rc;
   1399	}
   1400		fallthrough;
   1401	case ASCE_TYPE_SEGMENT: {
   1402		union segment_table_entry ste;
   1403
   1404		if (*fake) {
   1405			ptr += vaddr.sx * _SEGMENT_SIZE;
   1406			ste.val = ptr;
   1407			goto shadow_pgt;
   1408		}
   1409		*pgt = ptr + vaddr.sx * 8;
   1410		rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
   1411		if (rc)
   1412			return rc;
   1413		if (ste.i)
   1414			return PGM_SEGMENT_TRANSLATION;
   1415		if (ste.tt != TABLE_TYPE_SEGMENT)
   1416			return PGM_TRANSLATION_SPEC;
   1417		if (ste.cs && asce.p)
   1418			return PGM_TRANSLATION_SPEC;
   1419		*dat_protection |= ste.fc0.p;
   1420		if (ste.fc && sg->edat_level >= 1) {
   1421			*fake = 1;
   1422			ptr = ste.fc1.sfaa * _SEGMENT_SIZE;
   1423			ste.val = ptr;
   1424			goto shadow_pgt;
   1425		}
   1426		ptr = ste.fc0.pto * (PAGE_SIZE / 2);
   1427shadow_pgt:
   1428		ste.fc0.p |= *dat_protection;
   1429		rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
   1430		if (rc)
   1431			return rc;
   1432	}
   1433	}
   1434	/* Return the parent address of the page table */
   1435	*pgt = ptr;
   1436	return 0;
   1437}
   1438
   1439/**
   1440 * kvm_s390_shadow_fault - handle fault on a shadow page table
   1441 * @vcpu: virtual cpu
   1442 * @sg: pointer to the shadow guest address space structure
   1443 * @saddr: faulting address in the shadow gmap
   1444 * @datptr: will contain the address of the faulting DAT table entry, or of
   1445 *	    the valid leaf, plus some flags
   1446 *
   1447 * Returns: - 0 if the shadow fault was successfully resolved
   1448 *	    - > 0 (pgm exception code) on exceptions while faulting
   1449 *	    - -EAGAIN if the caller can retry immediately
   1450 *	    - -EFAULT when accessing invalid guest addresses
   1451 *	    - -ENOMEM if out of memory
   1452 */
   1453int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
   1454			  unsigned long saddr, unsigned long *datptr)
   1455{
   1456	union vaddress vaddr;
   1457	union page_table_entry pte;
   1458	unsigned long pgt = 0;
   1459	int dat_protection, fake;
   1460	int rc;
   1461
   1462	mmap_read_lock(sg->mm);
   1463	/*
   1464	 * We don't want any guest-2 tables to change - so the parent
   1465	 * tables/pointers we read stay valid - unshadowing is however
   1466	 * always possible - only guest_table_lock protects us.
   1467	 */
   1468	ipte_lock(vcpu);
   1469
   1470	rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
   1471	if (rc)
   1472		rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
   1473					    &fake);
   1474
   1475	vaddr.addr = saddr;
   1476	if (fake) {
   1477		pte.val = pgt + vaddr.px * PAGE_SIZE;
   1478		goto shadow_page;
   1479	}
   1480
   1481	switch (rc) {
   1482	case PGM_SEGMENT_TRANSLATION:
   1483	case PGM_REGION_THIRD_TRANS:
   1484	case PGM_REGION_SECOND_TRANS:
   1485	case PGM_REGION_FIRST_TRANS:
   1486		pgt |= PEI_NOT_PTE;
   1487		break;
   1488	case 0:
   1489		pgt += vaddr.px * 8;
   1490		rc = gmap_read_table(sg->parent, pgt, &pte.val);
   1491	}
   1492	if (datptr)
   1493		*datptr = pgt | dat_protection * PEI_DAT_PROT;
   1494	if (!rc && pte.i)
   1495		rc = PGM_PAGE_TRANSLATION;
   1496	if (!rc && pte.z)
   1497		rc = PGM_TRANSLATION_SPEC;
   1498shadow_page:
   1499	pte.p |= dat_protection;
   1500	if (!rc)
   1501		rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
   1502	ipte_unlock(vcpu);
   1503	mmap_read_unlock(sg->mm);
   1504	return rc;
   1505}