cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

core.c (23524B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 *  Copyright (C) 1994 Linus Torvalds
      4 *
      5 *  Pentium III FXSR, SSE support
      6 *  General FPU state handling cleanups
      7 *	Gareth Hughes <gareth@valinux.com>, May 2000
      8 */
      9#include <asm/fpu/api.h>
     10#include <asm/fpu/regset.h>
     11#include <asm/fpu/sched.h>
     12#include <asm/fpu/signal.h>
     13#include <asm/fpu/types.h>
     14#include <asm/traps.h>
     15#include <asm/irq_regs.h>
     16
     17#include <uapi/asm/kvm.h>
     18
     19#include <linux/hardirq.h>
     20#include <linux/pkeys.h>
     21#include <linux/vmalloc.h>
     22
     23#include "context.h"
     24#include "internal.h"
     25#include "legacy.h"
     26#include "xstate.h"
     27
     28#define CREATE_TRACE_POINTS
     29#include <asm/trace/fpu.h>
     30
     31#ifdef CONFIG_X86_64
     32DEFINE_STATIC_KEY_FALSE(__fpu_state_size_dynamic);
     33DEFINE_PER_CPU(u64, xfd_state);
     34#endif
     35
     36/* The FPU state configuration data for kernel and user space */
     37struct fpu_state_config	fpu_kernel_cfg __ro_after_init;
     38struct fpu_state_config fpu_user_cfg __ro_after_init;
     39
     40/*
     41 * Represents the initial FPU state. It's mostly (but not completely) zeroes,
     42 * depending on the FPU hardware format:
     43 */
     44struct fpstate init_fpstate __ro_after_init;
     45
     46/* Track in-kernel FPU usage */
     47static DEFINE_PER_CPU(bool, in_kernel_fpu);
     48
     49/*
     50 * Track which context is using the FPU on the CPU:
     51 */
     52DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
     53
     54/*
     55 * Can we use the FPU in kernel mode with the
     56 * whole "kernel_fpu_begin/end()" sequence?
     57 */
     58bool irq_fpu_usable(void)
     59{
     60	if (WARN_ON_ONCE(in_nmi()))
     61		return false;
     62
     63	/* In kernel FPU usage already active? */
     64	if (this_cpu_read(in_kernel_fpu))
     65		return false;
     66
     67	/*
     68	 * When not in NMI or hard interrupt context, FPU can be used in:
     69	 *
     70	 * - Task context except from within fpregs_lock()'ed critical
     71	 *   regions.
     72	 *
     73	 * - Soft interrupt processing context which cannot happen
     74	 *   while in a fpregs_lock()'ed critical region.
     75	 */
     76	if (!in_hardirq())
     77		return true;
     78
     79	/*
     80	 * In hard interrupt context it's safe when soft interrupts
     81	 * are enabled, which means the interrupt did not hit in
     82	 * a fpregs_lock()'ed critical region.
     83	 */
     84	return !softirq_count();
     85}
     86EXPORT_SYMBOL(irq_fpu_usable);
     87
     88/*
     89 * Track AVX512 state use because it is known to slow the max clock
     90 * speed of the core.
     91 */
     92static void update_avx_timestamp(struct fpu *fpu)
     93{
     94
     95#define AVX512_TRACKING_MASK	(XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM)
     96
     97	if (fpu->fpstate->regs.xsave.header.xfeatures & AVX512_TRACKING_MASK)
     98		fpu->avx512_timestamp = jiffies;
     99}
    100
    101/*
    102 * Save the FPU register state in fpu->fpstate->regs. The register state is
    103 * preserved.
    104 *
    105 * Must be called with fpregs_lock() held.
    106 *
    107 * The legacy FNSAVE instruction clears all FPU state unconditionally, so
    108 * register state has to be reloaded. That might be a pointless exercise
    109 * when the FPU is going to be used by another task right after that. But
    110 * this only affects 20+ years old 32bit systems and avoids conditionals all
    111 * over the place.
    112 *
    113 * FXSAVE and all XSAVE variants preserve the FPU register state.
    114 */
    115void save_fpregs_to_fpstate(struct fpu *fpu)
    116{
    117	if (likely(use_xsave())) {
    118		os_xsave(fpu->fpstate);
    119		update_avx_timestamp(fpu);
    120		return;
    121	}
    122
    123	if (likely(use_fxsr())) {
    124		fxsave(&fpu->fpstate->regs.fxsave);
    125		return;
    126	}
    127
    128	/*
    129	 * Legacy FPU register saving, FNSAVE always clears FPU registers,
    130	 * so we have to reload them from the memory state.
    131	 */
    132	asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->fpstate->regs.fsave));
    133	frstor(&fpu->fpstate->regs.fsave);
    134}
    135
    136void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask)
    137{
    138	/*
    139	 * AMD K7/K8 and later CPUs up to Zen don't save/restore
    140	 * FDP/FIP/FOP unless an exception is pending. Clear the x87 state
    141	 * here by setting it to fixed values.  "m" is a random variable
    142	 * that should be in L1.
    143	 */
    144	if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
    145		asm volatile(
    146			"fnclex\n\t"
    147			"emms\n\t"
    148			"fildl %P[addr]"	/* set F?P to defined value */
    149			: : [addr] "m" (fpstate));
    150	}
    151
    152	if (use_xsave()) {
    153		/*
    154		 * Dynamically enabled features are enabled in XCR0, but
    155		 * usage requires also that the corresponding bits in XFD
    156		 * are cleared.  If the bits are set then using a related
    157		 * instruction will raise #NM. This allows to do the
    158		 * allocation of the larger FPU buffer lazy from #NM or if
    159		 * the task has no permission to kill it which would happen
    160		 * via #UD if the feature is disabled in XCR0.
    161		 *
    162		 * XFD state is following the same life time rules as
    163		 * XSTATE and to restore state correctly XFD has to be
    164		 * updated before XRSTORS otherwise the component would
    165		 * stay in or go into init state even if the bits are set
    166		 * in fpstate::regs::xsave::xfeatures.
    167		 */
    168		xfd_update_state(fpstate);
    169
    170		/*
    171		 * Restoring state always needs to modify all features
    172		 * which are in @mask even if the current task cannot use
    173		 * extended features.
    174		 *
    175		 * So fpstate->xfeatures cannot be used here, because then
    176		 * a feature for which the task has no permission but was
    177		 * used by the previous task would not go into init state.
    178		 */
    179		mask = fpu_kernel_cfg.max_features & mask;
    180
    181		os_xrstor(fpstate, mask);
    182	} else {
    183		if (use_fxsr())
    184			fxrstor(&fpstate->regs.fxsave);
    185		else
    186			frstor(&fpstate->regs.fsave);
    187	}
    188}
    189
    190void fpu_reset_from_exception_fixup(void)
    191{
    192	restore_fpregs_from_fpstate(&init_fpstate, XFEATURE_MASK_FPSTATE);
    193}
    194
    195#if IS_ENABLED(CONFIG_KVM)
    196static void __fpstate_reset(struct fpstate *fpstate, u64 xfd);
    197
    198static void fpu_init_guest_permissions(struct fpu_guest *gfpu)
    199{
    200	struct fpu_state_perm *fpuperm;
    201	u64 perm;
    202
    203	if (!IS_ENABLED(CONFIG_X86_64))
    204		return;
    205
    206	spin_lock_irq(&current->sighand->siglock);
    207	fpuperm = &current->group_leader->thread.fpu.guest_perm;
    208	perm = fpuperm->__state_perm;
    209
    210	/* First fpstate allocation locks down permissions. */
    211	WRITE_ONCE(fpuperm->__state_perm, perm | FPU_GUEST_PERM_LOCKED);
    212
    213	spin_unlock_irq(&current->sighand->siglock);
    214
    215	gfpu->perm = perm & ~FPU_GUEST_PERM_LOCKED;
    216}
    217
    218bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
    219{
    220	struct fpstate *fpstate;
    221	unsigned int size;
    222
    223	size = fpu_user_cfg.default_size + ALIGN(offsetof(struct fpstate, regs), 64);
    224	fpstate = vzalloc(size);
    225	if (!fpstate)
    226		return false;
    227
    228	/* Leave xfd to 0 (the reset value defined by spec) */
    229	__fpstate_reset(fpstate, 0);
    230	fpstate_init_user(fpstate);
    231	fpstate->is_valloc	= true;
    232	fpstate->is_guest	= true;
    233
    234	gfpu->fpstate		= fpstate;
    235	gfpu->xfeatures		= fpu_user_cfg.default_features;
    236	gfpu->perm		= fpu_user_cfg.default_features;
    237
    238	/*
    239	 * KVM sets the FP+SSE bits in the XSAVE header when copying FPU state
    240	 * to userspace, even when XSAVE is unsupported, so that restoring FPU
    241	 * state on a different CPU that does support XSAVE can cleanly load
    242	 * the incoming state using its natural XSAVE.  In other words, KVM's
    243	 * uABI size may be larger than this host's default size.  Conversely,
    244	 * the default size should never be larger than KVM's base uABI size;
    245	 * all features that can expand the uABI size must be opt-in.
    246	 */
    247	gfpu->uabi_size		= sizeof(struct kvm_xsave);
    248	if (WARN_ON_ONCE(fpu_user_cfg.default_size > gfpu->uabi_size))
    249		gfpu->uabi_size = fpu_user_cfg.default_size;
    250
    251	fpu_init_guest_permissions(gfpu);
    252
    253	return true;
    254}
    255EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate);
    256
    257void fpu_free_guest_fpstate(struct fpu_guest *gfpu)
    258{
    259	struct fpstate *fps = gfpu->fpstate;
    260
    261	if (!fps)
    262		return;
    263
    264	if (WARN_ON_ONCE(!fps->is_valloc || !fps->is_guest || fps->in_use))
    265		return;
    266
    267	gfpu->fpstate = NULL;
    268	vfree(fps);
    269}
    270EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate);
    271
    272/*
    273  * fpu_enable_guest_xfd_features - Check xfeatures against guest perm and enable
    274  * @guest_fpu:         Pointer to the guest FPU container
    275  * @xfeatures:         Features requested by guest CPUID
    276  *
    277  * Enable all dynamic xfeatures according to guest perm and requested CPUID.
    278  *
    279  * Return: 0 on success, error code otherwise
    280  */
    281int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures)
    282{
    283	lockdep_assert_preemption_enabled();
    284
    285	/* Nothing to do if all requested features are already enabled. */
    286	xfeatures &= ~guest_fpu->xfeatures;
    287	if (!xfeatures)
    288		return 0;
    289
    290	return __xfd_enable_feature(xfeatures, guest_fpu);
    291}
    292EXPORT_SYMBOL_GPL(fpu_enable_guest_xfd_features);
    293
    294#ifdef CONFIG_X86_64
    295void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd)
    296{
    297	fpregs_lock();
    298	guest_fpu->fpstate->xfd = xfd;
    299	if (guest_fpu->fpstate->in_use)
    300		xfd_update_state(guest_fpu->fpstate);
    301	fpregs_unlock();
    302}
    303EXPORT_SYMBOL_GPL(fpu_update_guest_xfd);
    304
    305/**
    306 * fpu_sync_guest_vmexit_xfd_state - Synchronize XFD MSR and software state
    307 *
    308 * Must be invoked from KVM after a VMEXIT before enabling interrupts when
    309 * XFD write emulation is disabled. This is required because the guest can
    310 * freely modify XFD and the state at VMEXIT is not guaranteed to be the
    311 * same as the state on VMENTER. So software state has to be udpated before
    312 * any operation which depends on it can take place.
    313 *
    314 * Note: It can be invoked unconditionally even when write emulation is
    315 * enabled for the price of a then pointless MSR read.
    316 */
    317void fpu_sync_guest_vmexit_xfd_state(void)
    318{
    319	struct fpstate *fps = current->thread.fpu.fpstate;
    320
    321	lockdep_assert_irqs_disabled();
    322	if (fpu_state_size_dynamic()) {
    323		rdmsrl(MSR_IA32_XFD, fps->xfd);
    324		__this_cpu_write(xfd_state, fps->xfd);
    325	}
    326}
    327EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state);
    328#endif /* CONFIG_X86_64 */
    329
    330int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
    331{
    332	struct fpstate *guest_fps = guest_fpu->fpstate;
    333	struct fpu *fpu = &current->thread.fpu;
    334	struct fpstate *cur_fps = fpu->fpstate;
    335
    336	fpregs_lock();
    337	if (!cur_fps->is_confidential && !test_thread_flag(TIF_NEED_FPU_LOAD))
    338		save_fpregs_to_fpstate(fpu);
    339
    340	/* Swap fpstate */
    341	if (enter_guest) {
    342		fpu->__task_fpstate = cur_fps;
    343		fpu->fpstate = guest_fps;
    344		guest_fps->in_use = true;
    345	} else {
    346		guest_fps->in_use = false;
    347		fpu->fpstate = fpu->__task_fpstate;
    348		fpu->__task_fpstate = NULL;
    349	}
    350
    351	cur_fps = fpu->fpstate;
    352
    353	if (!cur_fps->is_confidential) {
    354		/* Includes XFD update */
    355		restore_fpregs_from_fpstate(cur_fps, XFEATURE_MASK_FPSTATE);
    356	} else {
    357		/*
    358		 * XSTATE is restored by firmware from encrypted
    359		 * memory. Make sure XFD state is correct while
    360		 * running with guest fpstate
    361		 */
    362		xfd_update_state(cur_fps);
    363	}
    364
    365	fpregs_mark_activate();
    366	fpregs_unlock();
    367	return 0;
    368}
    369EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);
    370
    371void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
    372				    unsigned int size, u32 pkru)
    373{
    374	struct fpstate *kstate = gfpu->fpstate;
    375	union fpregs_state *ustate = buf;
    376	struct membuf mb = { .p = buf, .left = size };
    377
    378	if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
    379		__copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
    380	} else {
    381		memcpy(&ustate->fxsave, &kstate->regs.fxsave,
    382		       sizeof(ustate->fxsave));
    383		/* Make it restorable on a XSAVE enabled host */
    384		ustate->xsave.header.xfeatures = XFEATURE_MASK_FPSSE;
    385	}
    386}
    387EXPORT_SYMBOL_GPL(fpu_copy_guest_fpstate_to_uabi);
    388
    389int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf,
    390				   u64 xcr0, u32 *vpkru)
    391{
    392	struct fpstate *kstate = gfpu->fpstate;
    393	const union fpregs_state *ustate = buf;
    394	struct pkru_state *xpkru;
    395	int ret;
    396
    397	if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) {
    398		if (ustate->xsave.header.xfeatures & ~XFEATURE_MASK_FPSSE)
    399			return -EINVAL;
    400		if (ustate->fxsave.mxcsr & ~mxcsr_feature_mask)
    401			return -EINVAL;
    402		memcpy(&kstate->regs.fxsave, &ustate->fxsave, sizeof(ustate->fxsave));
    403		return 0;
    404	}
    405
    406	if (ustate->xsave.header.xfeatures & ~xcr0)
    407		return -EINVAL;
    408
    409	ret = copy_uabi_from_kernel_to_xstate(kstate, ustate);
    410	if (ret)
    411		return ret;
    412
    413	/* Retrieve PKRU if not in init state */
    414	if (kstate->regs.xsave.header.xfeatures & XFEATURE_MASK_PKRU) {
    415		xpkru = get_xsave_addr(&kstate->regs.xsave, XFEATURE_PKRU);
    416		*vpkru = xpkru->pkru;
    417	}
    418	return 0;
    419}
    420EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate);
    421#endif /* CONFIG_KVM */
    422
    423void kernel_fpu_begin_mask(unsigned int kfpu_mask)
    424{
    425	preempt_disable();
    426
    427	WARN_ON_FPU(!irq_fpu_usable());
    428	WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
    429
    430	this_cpu_write(in_kernel_fpu, true);
    431
    432	if (!(current->flags & PF_KTHREAD) &&
    433	    !test_thread_flag(TIF_NEED_FPU_LOAD)) {
    434		set_thread_flag(TIF_NEED_FPU_LOAD);
    435		save_fpregs_to_fpstate(&current->thread.fpu);
    436	}
    437	__cpu_invalidate_fpregs_state();
    438
    439	/* Put sane initial values into the control registers. */
    440	if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM))
    441		ldmxcsr(MXCSR_DEFAULT);
    442
    443	if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU))
    444		asm volatile ("fninit");
    445}
    446EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask);
    447
    448void kernel_fpu_end(void)
    449{
    450	WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
    451
    452	this_cpu_write(in_kernel_fpu, false);
    453	preempt_enable();
    454}
    455EXPORT_SYMBOL_GPL(kernel_fpu_end);
    456
    457/*
    458 * Sync the FPU register state to current's memory register state when the
    459 * current task owns the FPU. The hardware register state is preserved.
    460 */
    461void fpu_sync_fpstate(struct fpu *fpu)
    462{
    463	WARN_ON_FPU(fpu != &current->thread.fpu);
    464
    465	fpregs_lock();
    466	trace_x86_fpu_before_save(fpu);
    467
    468	if (!test_thread_flag(TIF_NEED_FPU_LOAD))
    469		save_fpregs_to_fpstate(fpu);
    470
    471	trace_x86_fpu_after_save(fpu);
    472	fpregs_unlock();
    473}
    474
    475static inline unsigned int init_fpstate_copy_size(void)
    476{
    477	if (!use_xsave())
    478		return fpu_kernel_cfg.default_size;
    479
    480	/* XSAVE(S) just needs the legacy and the xstate header part */
    481	return sizeof(init_fpstate.regs.xsave);
    482}
    483
    484static inline void fpstate_init_fxstate(struct fpstate *fpstate)
    485{
    486	fpstate->regs.fxsave.cwd = 0x37f;
    487	fpstate->regs.fxsave.mxcsr = MXCSR_DEFAULT;
    488}
    489
    490/*
    491 * Legacy x87 fpstate state init:
    492 */
    493static inline void fpstate_init_fstate(struct fpstate *fpstate)
    494{
    495	fpstate->regs.fsave.cwd = 0xffff037fu;
    496	fpstate->regs.fsave.swd = 0xffff0000u;
    497	fpstate->regs.fsave.twd = 0xffffffffu;
    498	fpstate->regs.fsave.fos = 0xffff0000u;
    499}
    500
    501/*
    502 * Used in two places:
    503 * 1) Early boot to setup init_fpstate for non XSAVE systems
    504 * 2) fpu_init_fpstate_user() which is invoked from KVM
    505 */
    506void fpstate_init_user(struct fpstate *fpstate)
    507{
    508	if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
    509		fpstate_init_soft(&fpstate->regs.soft);
    510		return;
    511	}
    512
    513	xstate_init_xcomp_bv(&fpstate->regs.xsave, fpstate->xfeatures);
    514
    515	if (cpu_feature_enabled(X86_FEATURE_FXSR))
    516		fpstate_init_fxstate(fpstate);
    517	else
    518		fpstate_init_fstate(fpstate);
    519}
    520
    521static void __fpstate_reset(struct fpstate *fpstate, u64 xfd)
    522{
    523	/* Initialize sizes and feature masks */
    524	fpstate->size		= fpu_kernel_cfg.default_size;
    525	fpstate->user_size	= fpu_user_cfg.default_size;
    526	fpstate->xfeatures	= fpu_kernel_cfg.default_features;
    527	fpstate->user_xfeatures	= fpu_user_cfg.default_features;
    528	fpstate->xfd		= xfd;
    529}
    530
    531void fpstate_reset(struct fpu *fpu)
    532{
    533	/* Set the fpstate pointer to the default fpstate */
    534	fpu->fpstate = &fpu->__fpstate;
    535	__fpstate_reset(fpu->fpstate, init_fpstate.xfd);
    536
    537	/* Initialize the permission related info in fpu */
    538	fpu->perm.__state_perm		= fpu_kernel_cfg.default_features;
    539	fpu->perm.__state_size		= fpu_kernel_cfg.default_size;
    540	fpu->perm.__user_state_size	= fpu_user_cfg.default_size;
    541	/* Same defaults for guests */
    542	fpu->guest_perm = fpu->perm;
    543}
    544
    545static inline void fpu_inherit_perms(struct fpu *dst_fpu)
    546{
    547	if (fpu_state_size_dynamic()) {
    548		struct fpu *src_fpu = &current->group_leader->thread.fpu;
    549
    550		spin_lock_irq(&current->sighand->siglock);
    551		/* Fork also inherits the permissions of the parent */
    552		dst_fpu->perm = src_fpu->perm;
    553		dst_fpu->guest_perm = src_fpu->guest_perm;
    554		spin_unlock_irq(&current->sighand->siglock);
    555	}
    556}
    557
    558/* Clone current's FPU state on fork */
    559int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal)
    560{
    561	struct fpu *src_fpu = &current->thread.fpu;
    562	struct fpu *dst_fpu = &dst->thread.fpu;
    563
    564	/* The new task's FPU state cannot be valid in the hardware. */
    565	dst_fpu->last_cpu = -1;
    566
    567	fpstate_reset(dst_fpu);
    568
    569	if (!cpu_feature_enabled(X86_FEATURE_FPU))
    570		return 0;
    571
    572	/*
    573	 * Enforce reload for user space tasks and prevent kernel threads
    574	 * from trying to save the FPU registers on context switch.
    575	 */
    576	set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
    577
    578	/*
    579	 * No FPU state inheritance for kernel threads and IO
    580	 * worker threads.
    581	 */
    582	if (minimal) {
    583		/* Clear out the minimal state */
    584		memcpy(&dst_fpu->fpstate->regs, &init_fpstate.regs,
    585		       init_fpstate_copy_size());
    586		return 0;
    587	}
    588
    589	/*
    590	 * If a new feature is added, ensure all dynamic features are
    591	 * caller-saved from here!
    592	 */
    593	BUILD_BUG_ON(XFEATURE_MASK_USER_DYNAMIC != XFEATURE_MASK_XTILE_DATA);
    594
    595	/*
    596	 * Save the default portion of the current FPU state into the
    597	 * clone. Assume all dynamic features to be defined as caller-
    598	 * saved, which enables skipping both the expansion of fpstate
    599	 * and the copying of any dynamic state.
    600	 *
    601	 * Do not use memcpy() when TIF_NEED_FPU_LOAD is set because
    602	 * copying is not valid when current uses non-default states.
    603	 */
    604	fpregs_lock();
    605	if (test_thread_flag(TIF_NEED_FPU_LOAD))
    606		fpregs_restore_userregs();
    607	save_fpregs_to_fpstate(dst_fpu);
    608	if (!(clone_flags & CLONE_THREAD))
    609		fpu_inherit_perms(dst_fpu);
    610	fpregs_unlock();
    611
    612	/*
    613	 * Children never inherit PASID state.
    614	 * Force it to have its init value:
    615	 */
    616	if (use_xsave())
    617		dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID;
    618
    619	trace_x86_fpu_copy_src(src_fpu);
    620	trace_x86_fpu_copy_dst(dst_fpu);
    621
    622	return 0;
    623}
    624
    625/*
    626 * Whitelist the FPU register state embedded into task_struct for hardened
    627 * usercopy.
    628 */
    629void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
    630{
    631	*offset = offsetof(struct thread_struct, fpu.__fpstate.regs);
    632	*size = fpu_kernel_cfg.default_size;
    633}
    634
    635/*
    636 * Drops current FPU state: deactivates the fpregs and
    637 * the fpstate. NOTE: it still leaves previous contents
    638 * in the fpregs in the eager-FPU case.
    639 *
    640 * This function can be used in cases where we know that
    641 * a state-restore is coming: either an explicit one,
    642 * or a reschedule.
    643 */
    644void fpu__drop(struct fpu *fpu)
    645{
    646	preempt_disable();
    647
    648	if (fpu == &current->thread.fpu) {
    649		/* Ignore delayed exceptions from user space */
    650		asm volatile("1: fwait\n"
    651			     "2:\n"
    652			     _ASM_EXTABLE(1b, 2b));
    653		fpregs_deactivate(fpu);
    654	}
    655
    656	trace_x86_fpu_dropped(fpu);
    657
    658	preempt_enable();
    659}
    660
    661/*
    662 * Clear FPU registers by setting them up from the init fpstate.
    663 * Caller must do fpregs_[un]lock() around it.
    664 */
    665static inline void restore_fpregs_from_init_fpstate(u64 features_mask)
    666{
    667	if (use_xsave())
    668		os_xrstor(&init_fpstate, features_mask);
    669	else if (use_fxsr())
    670		fxrstor(&init_fpstate.regs.fxsave);
    671	else
    672		frstor(&init_fpstate.regs.fsave);
    673
    674	pkru_write_default();
    675}
    676
    677/*
    678 * Reset current->fpu memory state to the init values.
    679 */
    680static void fpu_reset_fpregs(void)
    681{
    682	struct fpu *fpu = &current->thread.fpu;
    683
    684	fpregs_lock();
    685	fpu__drop(fpu);
    686	/*
    687	 * This does not change the actual hardware registers. It just
    688	 * resets the memory image and sets TIF_NEED_FPU_LOAD so a
    689	 * subsequent return to usermode will reload the registers from the
    690	 * task's memory image.
    691	 *
    692	 * Do not use fpstate_init() here. Just copy init_fpstate which has
    693	 * the correct content already except for PKRU.
    694	 *
    695	 * PKRU handling does not rely on the xstate when restoring for
    696	 * user space as PKRU is eagerly written in switch_to() and
    697	 * flush_thread().
    698	 */
    699	memcpy(&fpu->fpstate->regs, &init_fpstate.regs, init_fpstate_copy_size());
    700	set_thread_flag(TIF_NEED_FPU_LOAD);
    701	fpregs_unlock();
    702}
    703
    704/*
    705 * Reset current's user FPU states to the init states.  current's
    706 * supervisor states, if any, are not modified by this function.  The
    707 * caller guarantees that the XSTATE header in memory is intact.
    708 */
    709void fpu__clear_user_states(struct fpu *fpu)
    710{
    711	WARN_ON_FPU(fpu != &current->thread.fpu);
    712
    713	fpregs_lock();
    714	if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
    715		fpu_reset_fpregs();
    716		fpregs_unlock();
    717		return;
    718	}
    719
    720	/*
    721	 * Ensure that current's supervisor states are loaded into their
    722	 * corresponding registers.
    723	 */
    724	if (xfeatures_mask_supervisor() &&
    725	    !fpregs_state_valid(fpu, smp_processor_id()))
    726		os_xrstor_supervisor(fpu->fpstate);
    727
    728	/* Reset user states in registers. */
    729	restore_fpregs_from_init_fpstate(XFEATURE_MASK_USER_RESTORE);
    730
    731	/*
    732	 * Now all FPU registers have their desired values.  Inform the FPU
    733	 * state machine that current's FPU registers are in the hardware
    734	 * registers. The memory image does not need to be updated because
    735	 * any operation relying on it has to save the registers first when
    736	 * current's FPU is marked active.
    737	 */
    738	fpregs_mark_activate();
    739	fpregs_unlock();
    740}
    741
    742void fpu_flush_thread(void)
    743{
    744	fpstate_reset(&current->thread.fpu);
    745	fpu_reset_fpregs();
    746}
    747/*
    748 * Load FPU context before returning to userspace.
    749 */
    750void switch_fpu_return(void)
    751{
    752	if (!static_cpu_has(X86_FEATURE_FPU))
    753		return;
    754
    755	fpregs_restore_userregs();
    756}
    757EXPORT_SYMBOL_GPL(switch_fpu_return);
    758
    759#ifdef CONFIG_X86_DEBUG_FPU
    760/*
    761 * If current FPU state according to its tracking (loaded FPU context on this
    762 * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is
    763 * loaded on return to userland.
    764 */
    765void fpregs_assert_state_consistent(void)
    766{
    767	struct fpu *fpu = &current->thread.fpu;
    768
    769	if (test_thread_flag(TIF_NEED_FPU_LOAD))
    770		return;
    771
    772	WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id()));
    773}
    774EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent);
    775#endif
    776
    777void fpregs_mark_activate(void)
    778{
    779	struct fpu *fpu = &current->thread.fpu;
    780
    781	fpregs_activate(fpu);
    782	fpu->last_cpu = smp_processor_id();
    783	clear_thread_flag(TIF_NEED_FPU_LOAD);
    784}
    785
    786/*
    787 * x87 math exception handling:
    788 */
    789
    790int fpu__exception_code(struct fpu *fpu, int trap_nr)
    791{
    792	int err;
    793
    794	if (trap_nr == X86_TRAP_MF) {
    795		unsigned short cwd, swd;
    796		/*
    797		 * (~cwd & swd) will mask out exceptions that are not set to unmasked
    798		 * status.  0x3f is the exception bits in these regs, 0x200 is the
    799		 * C1 reg you need in case of a stack fault, 0x040 is the stack
    800		 * fault bit.  We should only be taking one exception at a time,
    801		 * so if this combination doesn't produce any single exception,
    802		 * then we have a bad program that isn't synchronizing its FPU usage
    803		 * and it will suffer the consequences since we won't be able to
    804		 * fully reproduce the context of the exception.
    805		 */
    806		if (boot_cpu_has(X86_FEATURE_FXSR)) {
    807			cwd = fpu->fpstate->regs.fxsave.cwd;
    808			swd = fpu->fpstate->regs.fxsave.swd;
    809		} else {
    810			cwd = (unsigned short)fpu->fpstate->regs.fsave.cwd;
    811			swd = (unsigned short)fpu->fpstate->regs.fsave.swd;
    812		}
    813
    814		err = swd & ~cwd;
    815	} else {
    816		/*
    817		 * The SIMD FPU exceptions are handled a little differently, as there
    818		 * is only a single status/control register.  Thus, to determine which
    819		 * unmasked exception was caught we must mask the exception mask bits
    820		 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
    821		 */
    822		unsigned short mxcsr = MXCSR_DEFAULT;
    823
    824		if (boot_cpu_has(X86_FEATURE_XMM))
    825			mxcsr = fpu->fpstate->regs.fxsave.mxcsr;
    826
    827		err = ~(mxcsr >> 7) & mxcsr;
    828	}
    829
    830	if (err & 0x001) {	/* Invalid op */
    831		/*
    832		 * swd & 0x240 == 0x040: Stack Underflow
    833		 * swd & 0x240 == 0x240: Stack Overflow
    834		 * User must clear the SF bit (0x40) if set
    835		 */
    836		return FPE_FLTINV;
    837	} else if (err & 0x004) { /* Divide by Zero */
    838		return FPE_FLTDIV;
    839	} else if (err & 0x008) { /* Overflow */
    840		return FPE_FLTOVF;
    841	} else if (err & 0x012) { /* Denormal, Underflow */
    842		return FPE_FLTUND;
    843	} else if (err & 0x020) { /* Precision */
    844		return FPE_FLTRES;
    845	}
    846
    847	/*
    848	 * If we're using IRQ 13, or supposedly even some trap
    849	 * X86_TRAP_MF implementations, it's possible
    850	 * we get a spurious trap, which is not an error.
    851	 */
    852	return 0;
    853}