cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sev-shared.c (25503B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * AMD Encrypted Register State Support
      4 *
      5 * Author: Joerg Roedel <jroedel@suse.de>
      6 *
      7 * This file is not compiled stand-alone. It contains code shared
      8 * between the pre-decompression boot code and the running Linux kernel
      9 * and is included directly into both code-bases.
     10 */
     11
     12#ifndef __BOOT_COMPRESSED
     13#define error(v)	pr_err(v)
     14#define has_cpuflag(f)	boot_cpu_has(f)
     15#endif
     16
     17/* I/O parameters for CPUID-related helpers */
     18struct cpuid_leaf {
     19	u32 fn;
     20	u32 subfn;
     21	u32 eax;
     22	u32 ebx;
     23	u32 ecx;
     24	u32 edx;
     25};
     26
     27/*
     28 * Individual entries of the SNP CPUID table, as defined by the SNP
     29 * Firmware ABI, Revision 0.9, Section 7.1, Table 14.
     30 */
     31struct snp_cpuid_fn {
     32	u32 eax_in;
     33	u32 ecx_in;
     34	u64 xcr0_in;
     35	u64 xss_in;
     36	u32 eax;
     37	u32 ebx;
     38	u32 ecx;
     39	u32 edx;
     40	u64 __reserved;
     41} __packed;
     42
     43/*
     44 * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9,
     45 * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit
     46 * of 64 entries per CPUID table.
     47 */
     48#define SNP_CPUID_COUNT_MAX 64
     49
     50struct snp_cpuid_table {
     51	u32 count;
     52	u32 __reserved1;
     53	u64 __reserved2;
     54	struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX];
     55} __packed;
     56
     57/*
     58 * Since feature negotiation related variables are set early in the boot
     59 * process they must reside in the .data section so as not to be zeroed
     60 * out when the .bss section is later cleared.
     61 *
     62 * GHCB protocol version negotiated with the hypervisor.
     63 */
     64static u16 ghcb_version __ro_after_init;
     65
     66/* Copy of the SNP firmware's CPUID page. */
     67static struct snp_cpuid_table cpuid_table_copy __ro_after_init;
     68
     69/*
     70 * These will be initialized based on CPUID table so that non-present
     71 * all-zero leaves (for sparse tables) can be differentiated from
     72 * invalid/out-of-range leaves. This is needed since all-zero leaves
     73 * still need to be post-processed.
     74 */
     75static u32 cpuid_std_range_max __ro_after_init;
     76static u32 cpuid_hyp_range_max __ro_after_init;
     77static u32 cpuid_ext_range_max __ro_after_init;
     78
     79static bool __init sev_es_check_cpu_features(void)
     80{
     81	if (!has_cpuflag(X86_FEATURE_RDRAND)) {
     82		error("RDRAND instruction not supported - no trusted source of randomness available\n");
     83		return false;
     84	}
     85
     86	return true;
     87}
     88
     89static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason)
     90{
     91	u64 val = GHCB_MSR_TERM_REQ;
     92
     93	/* Tell the hypervisor what went wrong. */
     94	val |= GHCB_SEV_TERM_REASON(set, reason);
     95
     96	/* Request Guest Termination from Hypvervisor */
     97	sev_es_wr_ghcb_msr(val);
     98	VMGEXIT();
     99
    100	while (true)
    101		asm volatile("hlt\n" : : : "memory");
    102}
    103
    104/*
    105 * The hypervisor features are available from GHCB version 2 onward.
    106 */
    107static u64 get_hv_features(void)
    108{
    109	u64 val;
    110
    111	if (ghcb_version < 2)
    112		return 0;
    113
    114	sev_es_wr_ghcb_msr(GHCB_MSR_HV_FT_REQ);
    115	VMGEXIT();
    116
    117	val = sev_es_rd_ghcb_msr();
    118	if (GHCB_RESP_CODE(val) != GHCB_MSR_HV_FT_RESP)
    119		return 0;
    120
    121	return GHCB_MSR_HV_FT_RESP_VAL(val);
    122}
    123
    124static void snp_register_ghcb_early(unsigned long paddr)
    125{
    126	unsigned long pfn = paddr >> PAGE_SHIFT;
    127	u64 val;
    128
    129	sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn));
    130	VMGEXIT();
    131
    132	val = sev_es_rd_ghcb_msr();
    133
    134	/* If the response GPA is not ours then abort the guest */
    135	if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) ||
    136	    (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn))
    137		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER);
    138}
    139
    140static bool sev_es_negotiate_protocol(void)
    141{
    142	u64 val;
    143
    144	/* Do the GHCB protocol version negotiation */
    145	sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ);
    146	VMGEXIT();
    147	val = sev_es_rd_ghcb_msr();
    148
    149	if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
    150		return false;
    151
    152	if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN ||
    153	    GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX)
    154		return false;
    155
    156	ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX);
    157
    158	return true;
    159}
    160
    161static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
    162{
    163	ghcb->save.sw_exit_code = 0;
    164	__builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
    165}
    166
    167static bool vc_decoding_needed(unsigned long exit_code)
    168{
    169	/* Exceptions don't require to decode the instruction */
    170	return !(exit_code >= SVM_EXIT_EXCP_BASE &&
    171		 exit_code <= SVM_EXIT_LAST_EXCP);
    172}
    173
    174static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt,
    175				      struct pt_regs *regs,
    176				      unsigned long exit_code)
    177{
    178	enum es_result ret = ES_OK;
    179
    180	memset(ctxt, 0, sizeof(*ctxt));
    181	ctxt->regs = regs;
    182
    183	if (vc_decoding_needed(exit_code))
    184		ret = vc_decode_insn(ctxt);
    185
    186	return ret;
    187}
    188
    189static void vc_finish_insn(struct es_em_ctxt *ctxt)
    190{
    191	ctxt->regs->ip += ctxt->insn.length;
    192}
    193
    194static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
    195{
    196	u32 ret;
    197
    198	ret = ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0);
    199	if (!ret)
    200		return ES_OK;
    201
    202	if (ret == 1) {
    203		u64 info = ghcb->save.sw_exit_info_2;
    204		unsigned long v = info & SVM_EVTINJ_VEC_MASK;
    205
    206		/* Check if exception information from hypervisor is sane. */
    207		if ((info & SVM_EVTINJ_VALID) &&
    208		    ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) &&
    209		    ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) {
    210			ctxt->fi.vector = v;
    211
    212			if (info & SVM_EVTINJ_VALID_ERR)
    213				ctxt->fi.error_code = info >> 32;
    214
    215			return ES_EXCEPTION;
    216		}
    217	}
    218
    219	return ES_VMM_ERROR;
    220}
    221
    222enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
    223				   struct es_em_ctxt *ctxt, u64 exit_code,
    224				   u64 exit_info_1, u64 exit_info_2)
    225{
    226	/* Fill in protocol and format specifiers */
    227	ghcb->protocol_version = ghcb_version;
    228	ghcb->ghcb_usage       = GHCB_DEFAULT_USAGE;
    229
    230	ghcb_set_sw_exit_code(ghcb, exit_code);
    231	ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
    232	ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
    233
    234	/*
    235	 * Hyper-V unenlightened guests use a paravisor for communicating and
    236	 * GHCB pages are being allocated and set up by that paravisor. Linux
    237	 * should not change the GHCB page's physical address.
    238	 */
    239	if (set_ghcb_msr)
    240		sev_es_wr_ghcb_msr(__pa(ghcb));
    241
    242	VMGEXIT();
    243
    244	return verify_exception_info(ghcb, ctxt);
    245}
    246
    247static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg)
    248{
    249	u64 val;
    250
    251	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, reg_idx));
    252	VMGEXIT();
    253	val = sev_es_rd_ghcb_msr();
    254	if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
    255		return -EIO;
    256
    257	*reg = (val >> 32);
    258
    259	return 0;
    260}
    261
    262static int sev_cpuid_hv(struct cpuid_leaf *leaf)
    263{
    264	int ret;
    265
    266	/*
    267	 * MSR protocol does not support fetching non-zero subfunctions, but is
    268	 * sufficient to handle current early-boot cases. Should that change,
    269	 * make sure to report an error rather than ignoring the index and
    270	 * grabbing random values. If this issue arises in the future, handling
    271	 * can be added here to use GHCB-page protocol for cases that occur late
    272	 * enough in boot that GHCB page is available.
    273	 */
    274	if (cpuid_function_is_indexed(leaf->fn) && leaf->subfn)
    275		return -EINVAL;
    276
    277	ret =         __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EAX, &leaf->eax);
    278	ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EBX, &leaf->ebx);
    279	ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_ECX, &leaf->ecx);
    280	ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EDX, &leaf->edx);
    281
    282	return ret;
    283}
    284
    285/*
    286 * This may be called early while still running on the initial identity
    287 * mapping. Use RIP-relative addressing to obtain the correct address
    288 * while running with the initial identity mapping as well as the
    289 * switch-over to kernel virtual addresses later.
    290 */
    291static const struct snp_cpuid_table *snp_cpuid_get_table(void)
    292{
    293	void *ptr;
    294
    295	asm ("lea cpuid_table_copy(%%rip), %0"
    296	     : "=r" (ptr)
    297	     : "p" (&cpuid_table_copy));
    298
    299	return ptr;
    300}
    301
    302/*
    303 * The SNP Firmware ABI, Revision 0.9, Section 7.1, details the use of
    304 * XCR0_IN and XSS_IN to encode multiple versions of 0xD subfunctions 0
    305 * and 1 based on the corresponding features enabled by a particular
    306 * combination of XCR0 and XSS registers so that a guest can look up the
    307 * version corresponding to the features currently enabled in its XCR0/XSS
    308 * registers. The only values that differ between these versions/table
    309 * entries is the enabled XSAVE area size advertised via EBX.
    310 *
    311 * While hypervisors may choose to make use of this support, it is more
    312 * robust/secure for a guest to simply find the entry corresponding to the
    313 * base/legacy XSAVE area size (XCR0=1 or XCR0=3), and then calculate the
    314 * XSAVE area size using subfunctions 2 through 64, as documented in APM
    315 * Volume 3, Rev 3.31, Appendix E.3.8, which is what is done here.
    316 *
    317 * Since base/legacy XSAVE area size is documented as 0x240, use that value
    318 * directly rather than relying on the base size in the CPUID table.
    319 *
    320 * Return: XSAVE area size on success, 0 otherwise.
    321 */
    322static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
    323{
    324	const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
    325	u64 xfeatures_found = 0;
    326	u32 xsave_size = 0x240;
    327	int i;
    328
    329	for (i = 0; i < cpuid_table->count; i++) {
    330		const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
    331
    332		if (!(e->eax_in == 0xD && e->ecx_in > 1 && e->ecx_in < 64))
    333			continue;
    334		if (!(xfeatures_en & (BIT_ULL(e->ecx_in))))
    335			continue;
    336		if (xfeatures_found & (BIT_ULL(e->ecx_in)))
    337			continue;
    338
    339		xfeatures_found |= (BIT_ULL(e->ecx_in));
    340
    341		if (compacted)
    342			xsave_size += e->eax;
    343		else
    344			xsave_size = max(xsave_size, e->eax + e->ebx);
    345	}
    346
    347	/*
    348	 * Either the guest set unsupported XCR0/XSS bits, or the corresponding
    349	 * entries in the CPUID table were not present. This is not a valid
    350	 * state to be in.
    351	 */
    352	if (xfeatures_found != (xfeatures_en & GENMASK_ULL(63, 2)))
    353		return 0;
    354
    355	return xsave_size;
    356}
    357
    358static bool
    359snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
    360{
    361	const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
    362	int i;
    363
    364	for (i = 0; i < cpuid_table->count; i++) {
    365		const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
    366
    367		if (e->eax_in != leaf->fn)
    368			continue;
    369
    370		if (cpuid_function_is_indexed(leaf->fn) && e->ecx_in != leaf->subfn)
    371			continue;
    372
    373		/*
    374		 * For 0xD subfunctions 0 and 1, only use the entry corresponding
    375		 * to the base/legacy XSAVE area size (XCR0=1 or XCR0=3, XSS=0).
    376		 * See the comments above snp_cpuid_calc_xsave_size() for more
    377		 * details.
    378		 */
    379		if (e->eax_in == 0xD && (e->ecx_in == 0 || e->ecx_in == 1))
    380			if (!(e->xcr0_in == 1 || e->xcr0_in == 3) || e->xss_in)
    381				continue;
    382
    383		leaf->eax = e->eax;
    384		leaf->ebx = e->ebx;
    385		leaf->ecx = e->ecx;
    386		leaf->edx = e->edx;
    387
    388		return true;
    389	}
    390
    391	return false;
    392}
    393
    394static void snp_cpuid_hv(struct cpuid_leaf *leaf)
    395{
    396	if (sev_cpuid_hv(leaf))
    397		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV);
    398}
    399
    400static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
    401{
    402	struct cpuid_leaf leaf_hv = *leaf;
    403
    404	switch (leaf->fn) {
    405	case 0x1:
    406		snp_cpuid_hv(&leaf_hv);
    407
    408		/* initial APIC ID */
    409		leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0));
    410		/* APIC enabled bit */
    411		leaf->edx = (leaf_hv.edx & BIT(9)) | (leaf->edx & ~BIT(9));
    412
    413		/* OSXSAVE enabled bit */
    414		if (native_read_cr4() & X86_CR4_OSXSAVE)
    415			leaf->ecx |= BIT(27);
    416		break;
    417	case 0x7:
    418		/* OSPKE enabled bit */
    419		leaf->ecx &= ~BIT(4);
    420		if (native_read_cr4() & X86_CR4_PKE)
    421			leaf->ecx |= BIT(4);
    422		break;
    423	case 0xB:
    424		leaf_hv.subfn = 0;
    425		snp_cpuid_hv(&leaf_hv);
    426
    427		/* extended APIC ID */
    428		leaf->edx = leaf_hv.edx;
    429		break;
    430	case 0xD: {
    431		bool compacted = false;
    432		u64 xcr0 = 1, xss = 0;
    433		u32 xsave_size;
    434
    435		if (leaf->subfn != 0 && leaf->subfn != 1)
    436			return 0;
    437
    438		if (native_read_cr4() & X86_CR4_OSXSAVE)
    439			xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
    440		if (leaf->subfn == 1) {
    441			/* Get XSS value if XSAVES is enabled. */
    442			if (leaf->eax & BIT(3)) {
    443				unsigned long lo, hi;
    444
    445				asm volatile("rdmsr" : "=a" (lo), "=d" (hi)
    446						     : "c" (MSR_IA32_XSS));
    447				xss = (hi << 32) | lo;
    448			}
    449
    450			/*
    451			 * The PPR and APM aren't clear on what size should be
    452			 * encoded in 0xD:0x1:EBX when compaction is not enabled
    453			 * by either XSAVEC (feature bit 1) or XSAVES (feature
    454			 * bit 3) since SNP-capable hardware has these feature
    455			 * bits fixed as 1. KVM sets it to 0 in this case, but
    456			 * to avoid this becoming an issue it's safer to simply
    457			 * treat this as unsupported for SNP guests.
    458			 */
    459			if (!(leaf->eax & (BIT(1) | BIT(3))))
    460				return -EINVAL;
    461
    462			compacted = true;
    463		}
    464
    465		xsave_size = snp_cpuid_calc_xsave_size(xcr0 | xss, compacted);
    466		if (!xsave_size)
    467			return -EINVAL;
    468
    469		leaf->ebx = xsave_size;
    470		}
    471		break;
    472	case 0x8000001E:
    473		snp_cpuid_hv(&leaf_hv);
    474
    475		/* extended APIC ID */
    476		leaf->eax = leaf_hv.eax;
    477		/* compute ID */
    478		leaf->ebx = (leaf->ebx & GENMASK(31, 8)) | (leaf_hv.ebx & GENMASK(7, 0));
    479		/* node ID */
    480		leaf->ecx = (leaf->ecx & GENMASK(31, 8)) | (leaf_hv.ecx & GENMASK(7, 0));
    481		break;
    482	default:
    483		/* No fix-ups needed, use values as-is. */
    484		break;
    485	}
    486
    487	return 0;
    488}
    489
    490/*
    491 * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
    492 * should be treated as fatal by caller.
    493 */
    494static int snp_cpuid(struct cpuid_leaf *leaf)
    495{
    496	const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
    497
    498	if (!cpuid_table->count)
    499		return -EOPNOTSUPP;
    500
    501	if (!snp_cpuid_get_validated_func(leaf)) {
    502		/*
    503		 * Some hypervisors will avoid keeping track of CPUID entries
    504		 * where all values are zero, since they can be handled the
    505		 * same as out-of-range values (all-zero). This is useful here
    506		 * as well as it allows virtually all guest configurations to
    507		 * work using a single SNP CPUID table.
    508		 *
    509		 * To allow for this, there is a need to distinguish between
    510		 * out-of-range entries and in-range zero entries, since the
    511		 * CPUID table entries are only a template that may need to be
    512		 * augmented with additional values for things like
    513		 * CPU-specific information during post-processing. So if it's
    514		 * not in the table, set the values to zero. Then, if they are
    515		 * within a valid CPUID range, proceed with post-processing
    516		 * using zeros as the initial values. Otherwise, skip
    517		 * post-processing and just return zeros immediately.
    518		 */
    519		leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0;
    520
    521		/* Skip post-processing for out-of-range zero leafs. */
    522		if (!(leaf->fn <= cpuid_std_range_max ||
    523		      (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) ||
    524		      (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max)))
    525			return 0;
    526	}
    527
    528	return snp_cpuid_postprocess(leaf);
    529}
    530
    531/*
    532 * Boot VC Handler - This is the first VC handler during boot, there is no GHCB
    533 * page yet, so it only supports the MSR based communication with the
    534 * hypervisor and only the CPUID exit-code.
    535 */
    536void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
    537{
    538	unsigned int subfn = lower_bits(regs->cx, 32);
    539	unsigned int fn = lower_bits(regs->ax, 32);
    540	struct cpuid_leaf leaf;
    541	int ret;
    542
    543	/* Only CPUID is supported via MSR protocol */
    544	if (exit_code != SVM_EXIT_CPUID)
    545		goto fail;
    546
    547	leaf.fn = fn;
    548	leaf.subfn = subfn;
    549
    550	ret = snp_cpuid(&leaf);
    551	if (!ret)
    552		goto cpuid_done;
    553
    554	if (ret != -EOPNOTSUPP)
    555		goto fail;
    556
    557	if (sev_cpuid_hv(&leaf))
    558		goto fail;
    559
    560cpuid_done:
    561	regs->ax = leaf.eax;
    562	regs->bx = leaf.ebx;
    563	regs->cx = leaf.ecx;
    564	regs->dx = leaf.edx;
    565
    566	/*
    567	 * This is a VC handler and the #VC is only raised when SEV-ES is
    568	 * active, which means SEV must be active too. Do sanity checks on the
    569	 * CPUID results to make sure the hypervisor does not trick the kernel
    570	 * into the no-sev path. This could map sensitive data unencrypted and
    571	 * make it accessible to the hypervisor.
    572	 *
    573	 * In particular, check for:
    574	 *	- Availability of CPUID leaf 0x8000001f
    575	 *	- SEV CPUID bit.
    576	 *
    577	 * The hypervisor might still report the wrong C-bit position, but this
    578	 * can't be checked here.
    579	 */
    580
    581	if (fn == 0x80000000 && (regs->ax < 0x8000001f))
    582		/* SEV leaf check */
    583		goto fail;
    584	else if ((fn == 0x8000001f && !(regs->ax & BIT(1))))
    585		/* SEV bit */
    586		goto fail;
    587
    588	/* Skip over the CPUID two-byte opcode */
    589	regs->ip += 2;
    590
    591	return;
    592
    593fail:
    594	/* Terminate the guest */
    595	sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
    596}
    597
    598static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
    599					  void *src, char *buf,
    600					  unsigned int data_size,
    601					  unsigned int count,
    602					  bool backwards)
    603{
    604	int i, b = backwards ? -1 : 1;
    605	enum es_result ret = ES_OK;
    606
    607	for (i = 0; i < count; i++) {
    608		void *s = src + (i * data_size * b);
    609		char *d = buf + (i * data_size);
    610
    611		ret = vc_read_mem(ctxt, s, d, data_size);
    612		if (ret != ES_OK)
    613			break;
    614	}
    615
    616	return ret;
    617}
    618
    619static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt,
    620					   void *dst, char *buf,
    621					   unsigned int data_size,
    622					   unsigned int count,
    623					   bool backwards)
    624{
    625	int i, s = backwards ? -1 : 1;
    626	enum es_result ret = ES_OK;
    627
    628	for (i = 0; i < count; i++) {
    629		void *d = dst + (i * data_size * s);
    630		char *b = buf + (i * data_size);
    631
    632		ret = vc_write_mem(ctxt, d, b, data_size);
    633		if (ret != ES_OK)
    634			break;
    635	}
    636
    637	return ret;
    638}
    639
    640#define IOIO_TYPE_STR  BIT(2)
    641#define IOIO_TYPE_IN   1
    642#define IOIO_TYPE_INS  (IOIO_TYPE_IN | IOIO_TYPE_STR)
    643#define IOIO_TYPE_OUT  0
    644#define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR)
    645
    646#define IOIO_REP       BIT(3)
    647
    648#define IOIO_ADDR_64   BIT(9)
    649#define IOIO_ADDR_32   BIT(8)
    650#define IOIO_ADDR_16   BIT(7)
    651
    652#define IOIO_DATA_32   BIT(6)
    653#define IOIO_DATA_16   BIT(5)
    654#define IOIO_DATA_8    BIT(4)
    655
    656#define IOIO_SEG_ES    (0 << 10)
    657#define IOIO_SEG_DS    (3 << 10)
    658
    659static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
    660{
    661	struct insn *insn = &ctxt->insn;
    662	*exitinfo = 0;
    663
    664	switch (insn->opcode.bytes[0]) {
    665	/* INS opcodes */
    666	case 0x6c:
    667	case 0x6d:
    668		*exitinfo |= IOIO_TYPE_INS;
    669		*exitinfo |= IOIO_SEG_ES;
    670		*exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
    671		break;
    672
    673	/* OUTS opcodes */
    674	case 0x6e:
    675	case 0x6f:
    676		*exitinfo |= IOIO_TYPE_OUTS;
    677		*exitinfo |= IOIO_SEG_DS;
    678		*exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
    679		break;
    680
    681	/* IN immediate opcodes */
    682	case 0xe4:
    683	case 0xe5:
    684		*exitinfo |= IOIO_TYPE_IN;
    685		*exitinfo |= (u8)insn->immediate.value << 16;
    686		break;
    687
    688	/* OUT immediate opcodes */
    689	case 0xe6:
    690	case 0xe7:
    691		*exitinfo |= IOIO_TYPE_OUT;
    692		*exitinfo |= (u8)insn->immediate.value << 16;
    693		break;
    694
    695	/* IN register opcodes */
    696	case 0xec:
    697	case 0xed:
    698		*exitinfo |= IOIO_TYPE_IN;
    699		*exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
    700		break;
    701
    702	/* OUT register opcodes */
    703	case 0xee:
    704	case 0xef:
    705		*exitinfo |= IOIO_TYPE_OUT;
    706		*exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
    707		break;
    708
    709	default:
    710		return ES_DECODE_FAILED;
    711	}
    712
    713	switch (insn->opcode.bytes[0]) {
    714	case 0x6c:
    715	case 0x6e:
    716	case 0xe4:
    717	case 0xe6:
    718	case 0xec:
    719	case 0xee:
    720		/* Single byte opcodes */
    721		*exitinfo |= IOIO_DATA_8;
    722		break;
    723	default:
    724		/* Length determined by instruction parsing */
    725		*exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16
    726						     : IOIO_DATA_32;
    727	}
    728	switch (insn->addr_bytes) {
    729	case 2:
    730		*exitinfo |= IOIO_ADDR_16;
    731		break;
    732	case 4:
    733		*exitinfo |= IOIO_ADDR_32;
    734		break;
    735	case 8:
    736		*exitinfo |= IOIO_ADDR_64;
    737		break;
    738	}
    739
    740	if (insn_has_rep_prefix(insn))
    741		*exitinfo |= IOIO_REP;
    742
    743	return ES_OK;
    744}
    745
    746static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
    747{
    748	struct pt_regs *regs = ctxt->regs;
    749	u64 exit_info_1, exit_info_2;
    750	enum es_result ret;
    751
    752	ret = vc_ioio_exitinfo(ctxt, &exit_info_1);
    753	if (ret != ES_OK)
    754		return ret;
    755
    756	if (exit_info_1 & IOIO_TYPE_STR) {
    757
    758		/* (REP) INS/OUTS */
    759
    760		bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF);
    761		unsigned int io_bytes, exit_bytes;
    762		unsigned int ghcb_count, op_count;
    763		unsigned long es_base;
    764		u64 sw_scratch;
    765
    766		/*
    767		 * For the string variants with rep prefix the amount of in/out
    768		 * operations per #VC exception is limited so that the kernel
    769		 * has a chance to take interrupts and re-schedule while the
    770		 * instruction is emulated.
    771		 */
    772		io_bytes   = (exit_info_1 >> 4) & 0x7;
    773		ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes;
    774
    775		op_count    = (exit_info_1 & IOIO_REP) ? regs->cx : 1;
    776		exit_info_2 = min(op_count, ghcb_count);
    777		exit_bytes  = exit_info_2 * io_bytes;
    778
    779		es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
    780
    781		/* Read bytes of OUTS into the shared buffer */
    782		if (!(exit_info_1 & IOIO_TYPE_IN)) {
    783			ret = vc_insn_string_read(ctxt,
    784					       (void *)(es_base + regs->si),
    785					       ghcb->shared_buffer, io_bytes,
    786					       exit_info_2, df);
    787			if (ret)
    788				return ret;
    789		}
    790
    791		/*
    792		 * Issue an VMGEXIT to the HV to consume the bytes from the
    793		 * shared buffer or to have it write them into the shared buffer
    794		 * depending on the instruction: OUTS or INS.
    795		 */
    796		sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer);
    797		ghcb_set_sw_scratch(ghcb, sw_scratch);
    798		ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_IOIO,
    799					  exit_info_1, exit_info_2);
    800		if (ret != ES_OK)
    801			return ret;
    802
    803		/* Read bytes from shared buffer into the guest's destination. */
    804		if (exit_info_1 & IOIO_TYPE_IN) {
    805			ret = vc_insn_string_write(ctxt,
    806						   (void *)(es_base + regs->di),
    807						   ghcb->shared_buffer, io_bytes,
    808						   exit_info_2, df);
    809			if (ret)
    810				return ret;
    811
    812			if (df)
    813				regs->di -= exit_bytes;
    814			else
    815				regs->di += exit_bytes;
    816		} else {
    817			if (df)
    818				regs->si -= exit_bytes;
    819			else
    820				regs->si += exit_bytes;
    821		}
    822
    823		if (exit_info_1 & IOIO_REP)
    824			regs->cx -= exit_info_2;
    825
    826		ret = regs->cx ? ES_RETRY : ES_OK;
    827
    828	} else {
    829
    830		/* IN/OUT into/from rAX */
    831
    832		int bits = (exit_info_1 & 0x70) >> 1;
    833		u64 rax = 0;
    834
    835		if (!(exit_info_1 & IOIO_TYPE_IN))
    836			rax = lower_bits(regs->ax, bits);
    837
    838		ghcb_set_rax(ghcb, rax);
    839
    840		ret = sev_es_ghcb_hv_call(ghcb, true, ctxt,
    841					  SVM_EXIT_IOIO, exit_info_1, 0);
    842		if (ret != ES_OK)
    843			return ret;
    844
    845		if (exit_info_1 & IOIO_TYPE_IN) {
    846			if (!ghcb_rax_is_valid(ghcb))
    847				return ES_VMM_ERROR;
    848			regs->ax = lower_bits(ghcb->save.rax, bits);
    849		}
    850	}
    851
    852	return ret;
    853}
    854
    855static int vc_handle_cpuid_snp(struct pt_regs *regs)
    856{
    857	struct cpuid_leaf leaf;
    858	int ret;
    859
    860	leaf.fn = regs->ax;
    861	leaf.subfn = regs->cx;
    862	ret = snp_cpuid(&leaf);
    863	if (!ret) {
    864		regs->ax = leaf.eax;
    865		regs->bx = leaf.ebx;
    866		regs->cx = leaf.ecx;
    867		regs->dx = leaf.edx;
    868	}
    869
    870	return ret;
    871}
    872
    873static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
    874				      struct es_em_ctxt *ctxt)
    875{
    876	struct pt_regs *regs = ctxt->regs;
    877	u32 cr4 = native_read_cr4();
    878	enum es_result ret;
    879	int snp_cpuid_ret;
    880
    881	snp_cpuid_ret = vc_handle_cpuid_snp(regs);
    882	if (!snp_cpuid_ret)
    883		return ES_OK;
    884	if (snp_cpuid_ret != -EOPNOTSUPP)
    885		return ES_VMM_ERROR;
    886
    887	ghcb_set_rax(ghcb, regs->ax);
    888	ghcb_set_rcx(ghcb, regs->cx);
    889
    890	if (cr4 & X86_CR4_OSXSAVE)
    891		/* Safe to read xcr0 */
    892		ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
    893	else
    894		/* xgetbv will cause #GP - use reset value for xcr0 */
    895		ghcb_set_xcr0(ghcb, 1);
    896
    897	ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_CPUID, 0, 0);
    898	if (ret != ES_OK)
    899		return ret;
    900
    901	if (!(ghcb_rax_is_valid(ghcb) &&
    902	      ghcb_rbx_is_valid(ghcb) &&
    903	      ghcb_rcx_is_valid(ghcb) &&
    904	      ghcb_rdx_is_valid(ghcb)))
    905		return ES_VMM_ERROR;
    906
    907	regs->ax = ghcb->save.rax;
    908	regs->bx = ghcb->save.rbx;
    909	regs->cx = ghcb->save.rcx;
    910	regs->dx = ghcb->save.rdx;
    911
    912	return ES_OK;
    913}
    914
    915static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
    916				      struct es_em_ctxt *ctxt,
    917				      unsigned long exit_code)
    918{
    919	bool rdtscp = (exit_code == SVM_EXIT_RDTSCP);
    920	enum es_result ret;
    921
    922	ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, 0, 0);
    923	if (ret != ES_OK)
    924		return ret;
    925
    926	if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) &&
    927	     (!rdtscp || ghcb_rcx_is_valid(ghcb))))
    928		return ES_VMM_ERROR;
    929
    930	ctxt->regs->ax = ghcb->save.rax;
    931	ctxt->regs->dx = ghcb->save.rdx;
    932	if (rdtscp)
    933		ctxt->regs->cx = ghcb->save.rcx;
    934
    935	return ES_OK;
    936}
    937
    938struct cc_setup_data {
    939	struct setup_data header;
    940	u32 cc_blob_address;
    941};
    942
    943/*
    944 * Search for a Confidential Computing blob passed in as a setup_data entry
    945 * via the Linux Boot Protocol.
    946 */
    947static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
    948{
    949	struct cc_setup_data *sd = NULL;
    950	struct setup_data *hdr;
    951
    952	hdr = (struct setup_data *)bp->hdr.setup_data;
    953
    954	while (hdr) {
    955		if (hdr->type == SETUP_CC_BLOB) {
    956			sd = (struct cc_setup_data *)hdr;
    957			return (struct cc_blob_sev_info *)(unsigned long)sd->cc_blob_address;
    958		}
    959		hdr = (struct setup_data *)hdr->next;
    960	}
    961
    962	return NULL;
    963}
    964
    965/*
    966 * Initialize the kernel's copy of the SNP CPUID table, and set up the
    967 * pointer that will be used to access it.
    968 *
    969 * Maintaining a direct mapping of the SNP CPUID table used by firmware would
    970 * be possible as an alternative, but the approach is brittle since the
    971 * mapping needs to be updated in sync with all the changes to virtual memory
    972 * layout and related mapping facilities throughout the boot process.
    973 */
    974static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
    975{
    976	const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table;
    977	int i;
    978
    979	if (!cc_info || !cc_info->cpuid_phys || cc_info->cpuid_len < PAGE_SIZE)
    980		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID);
    981
    982	cpuid_table_fw = (const struct snp_cpuid_table *)cc_info->cpuid_phys;
    983	if (!cpuid_table_fw->count || cpuid_table_fw->count > SNP_CPUID_COUNT_MAX)
    984		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID);
    985
    986	cpuid_table = snp_cpuid_get_table();
    987	memcpy((void *)cpuid_table, cpuid_table_fw, sizeof(*cpuid_table));
    988
    989	/* Initialize CPUID ranges for range-checking. */
    990	for (i = 0; i < cpuid_table->count; i++) {
    991		const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
    992
    993		if (fn->eax_in == 0x0)
    994			cpuid_std_range_max = fn->eax;
    995		else if (fn->eax_in == 0x40000000)
    996			cpuid_hyp_range_max = fn->eax;
    997		else if (fn->eax_in == 0x80000000)
    998			cpuid_ext_range_max = fn->eax;
    999	}
   1000}