cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vm86_32.c (22523B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 *  Copyright (C) 1994  Linus Torvalds
      4 *
      5 *  29 dec 2001 - Fixed oopses caused by unchecked access to the vm86
      6 *                stack - Manfred Spraul <manfred@colorfullife.com>
      7 *
      8 *  22 mar 2002 - Manfred detected the stackfaults, but didn't handle
      9 *                them correctly. Now the emulation will be in a
     10 *                consistent state after stackfaults - Kasper Dupont
     11 *                <kasperd@daimi.au.dk>
     12 *
     13 *  22 mar 2002 - Added missing clear_IF in set_vflags_* Kasper Dupont
     14 *                <kasperd@daimi.au.dk>
     15 *
     16 *  ?? ??? 2002 - Fixed premature returns from handle_vm86_fault
     17 *                caused by Kasper Dupont's changes - Stas Sergeev
     18 *
     19 *   4 apr 2002 - Fixed CHECK_IF_IN_TRAP broken by Stas' changes.
     20 *                Kasper Dupont <kasperd@daimi.au.dk>
     21 *
     22 *   9 apr 2002 - Changed syntax of macros in handle_vm86_fault.
     23 *                Kasper Dupont <kasperd@daimi.au.dk>
     24 *
     25 *   9 apr 2002 - Changed stack access macros to jump to a label
     26 *                instead of returning to userspace. This simplifies
     27 *                do_int, and is needed by handle_vm6_fault. Kasper
     28 *                Dupont <kasperd@daimi.au.dk>
     29 *
     30 */
     31
     32#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
     33
     34#include <linux/capability.h>
     35#include <linux/errno.h>
     36#include <linux/interrupt.h>
     37#include <linux/syscalls.h>
     38#include <linux/sched.h>
     39#include <linux/sched/task_stack.h>
     40#include <linux/kernel.h>
     41#include <linux/signal.h>
     42#include <linux/string.h>
     43#include <linux/mm.h>
     44#include <linux/smp.h>
     45#include <linux/highmem.h>
     46#include <linux/ptrace.h>
     47#include <linux/audit.h>
     48#include <linux/stddef.h>
     49#include <linux/slab.h>
     50#include <linux/security.h>
     51
     52#include <linux/uaccess.h>
     53#include <asm/io.h>
     54#include <asm/tlbflush.h>
     55#include <asm/irq.h>
     56#include <asm/traps.h>
     57#include <asm/vm86.h>
     58#include <asm/switch_to.h>
     59
     60/*
     61 * Known problems:
     62 *
     63 * Interrupt handling is not guaranteed:
     64 * - a real x86 will disable all interrupts for one instruction
     65 *   after a "mov ss,xx" to make stack handling atomic even without
     66 *   the 'lss' instruction. We can't guarantee this in v86 mode,
     67 *   as the next instruction might result in a page fault or similar.
     68 * - a real x86 will have interrupts disabled for one instruction
     69 *   past the 'sti' that enables them. We don't bother with all the
     70 *   details yet.
     71 *
     72 * Let's hope these problems do not actually matter for anything.
     73 */
     74
     75
     76/*
     77 * 8- and 16-bit register defines..
     78 */
     79#define AL(regs)	(((unsigned char *)&((regs)->pt.ax))[0])
     80#define AH(regs)	(((unsigned char *)&((regs)->pt.ax))[1])
     81#define IP(regs)	(*(unsigned short *)&((regs)->pt.ip))
     82#define SP(regs)	(*(unsigned short *)&((regs)->pt.sp))
     83
     84/*
     85 * virtual flags (16 and 32-bit versions)
     86 */
     87#define VFLAGS	(*(unsigned short *)&(current->thread.vm86->veflags))
     88#define VEFLAGS	(current->thread.vm86->veflags)
     89
     90#define set_flags(X, new, mask) \
     91((X) = ((X) & ~(mask)) | ((new) & (mask)))
     92
     93#define SAFE_MASK	(0xDD5)
     94#define RETURN_MASK	(0xDFF)
     95
     96void save_v86_state(struct kernel_vm86_regs *regs, int retval)
     97{
     98	struct task_struct *tsk = current;
     99	struct vm86plus_struct __user *user;
    100	struct vm86 *vm86 = current->thread.vm86;
    101
    102	/*
    103	 * This gets called from entry.S with interrupts disabled, but
    104	 * from process context. Enable interrupts here, before trying
    105	 * to access user space.
    106	 */
    107	local_irq_enable();
    108
    109	BUG_ON(!vm86);
    110
    111	set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask);
    112	user = vm86->user_vm86;
    113
    114	if (!user_access_begin(user, vm86->vm86plus.is_vm86pus ?
    115		       sizeof(struct vm86plus_struct) :
    116		       sizeof(struct vm86_struct)))
    117		goto Efault;
    118
    119	unsafe_put_user(regs->pt.bx, &user->regs.ebx, Efault_end);
    120	unsafe_put_user(regs->pt.cx, &user->regs.ecx, Efault_end);
    121	unsafe_put_user(regs->pt.dx, &user->regs.edx, Efault_end);
    122	unsafe_put_user(regs->pt.si, &user->regs.esi, Efault_end);
    123	unsafe_put_user(regs->pt.di, &user->regs.edi, Efault_end);
    124	unsafe_put_user(regs->pt.bp, &user->regs.ebp, Efault_end);
    125	unsafe_put_user(regs->pt.ax, &user->regs.eax, Efault_end);
    126	unsafe_put_user(regs->pt.ip, &user->regs.eip, Efault_end);
    127	unsafe_put_user(regs->pt.cs, &user->regs.cs, Efault_end);
    128	unsafe_put_user(regs->pt.flags, &user->regs.eflags, Efault_end);
    129	unsafe_put_user(regs->pt.sp, &user->regs.esp, Efault_end);
    130	unsafe_put_user(regs->pt.ss, &user->regs.ss, Efault_end);
    131	unsafe_put_user(regs->es, &user->regs.es, Efault_end);
    132	unsafe_put_user(regs->ds, &user->regs.ds, Efault_end);
    133	unsafe_put_user(regs->fs, &user->regs.fs, Efault_end);
    134	unsafe_put_user(regs->gs, &user->regs.gs, Efault_end);
    135
    136	/*
    137	 * Don't write screen_bitmap in case some user had a value there
    138	 * and expected it to remain unchanged.
    139	 */
    140
    141	user_access_end();
    142
    143exit_vm86:
    144	preempt_disable();
    145	tsk->thread.sp0 = vm86->saved_sp0;
    146	tsk->thread.sysenter_cs = __KERNEL_CS;
    147	update_task_stack(tsk);
    148	refresh_sysenter_cs(&tsk->thread);
    149	vm86->saved_sp0 = 0;
    150	preempt_enable();
    151
    152	memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
    153
    154	loadsegment(gs, vm86->regs32.gs);
    155
    156	regs->pt.ax = retval;
    157	return;
    158
    159Efault_end:
    160	user_access_end();
    161Efault:
    162	pr_alert("could not access userspace vm86 info\n");
    163	force_exit_sig(SIGSEGV);
    164	goto exit_vm86;
    165}
    166
    167static int do_vm86_irq_handling(int subfunction, int irqnumber);
    168static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus);
    169
    170SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, user_vm86)
    171{
    172	return do_sys_vm86((struct vm86plus_struct __user *) user_vm86, false);
    173}
    174
    175
    176SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
    177{
    178	switch (cmd) {
    179	case VM86_REQUEST_IRQ:
    180	case VM86_FREE_IRQ:
    181	case VM86_GET_IRQ_BITS:
    182	case VM86_GET_AND_RESET_IRQ:
    183		return do_vm86_irq_handling(cmd, (int)arg);
    184	case VM86_PLUS_INSTALL_CHECK:
    185		/*
    186		 * NOTE: on old vm86 stuff this will return the error
    187		 *  from access_ok(), because the subfunction is
    188		 *  interpreted as (invalid) address to vm86_struct.
    189		 *  So the installation check works.
    190		 */
    191		return 0;
    192	}
    193
    194	/* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
    195	return do_sys_vm86((struct vm86plus_struct __user *) arg, true);
    196}
    197
    198
    199static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
    200{
    201	struct task_struct *tsk = current;
    202	struct vm86 *vm86 = tsk->thread.vm86;
    203	struct kernel_vm86_regs vm86regs;
    204	struct pt_regs *regs = current_pt_regs();
    205	unsigned long err = 0;
    206	struct vm86_struct v;
    207
    208	err = security_mmap_addr(0);
    209	if (err) {
    210		/*
    211		 * vm86 cannot virtualize the address space, so vm86 users
    212		 * need to manage the low 1MB themselves using mmap.  Given
    213		 * that BIOS places important data in the first page, vm86
    214		 * is essentially useless if mmap_min_addr != 0.  DOSEMU,
    215		 * for example, won't even bother trying to use vm86 if it
    216		 * can't map a page at virtual address 0.
    217		 *
    218		 * To reduce the available kernel attack surface, simply
    219		 * disallow vm86(old) for users who cannot mmap at va 0.
    220		 *
    221		 * The implementation of security_mmap_addr will allow
    222		 * suitably privileged users to map va 0 even if
    223		 * vm.mmap_min_addr is set above 0, and we want this
    224		 * behavior for vm86 as well, as it ensures that legacy
    225		 * tools like vbetool will not fail just because of
    226		 * vm.mmap_min_addr.
    227		 */
    228		pr_info_once("Denied a call to vm86(old) from %s[%d] (uid: %d).  Set the vm.mmap_min_addr sysctl to 0 and/or adjust LSM mmap_min_addr policy to enable vm86 if you are using a vm86-based DOS emulator.\n",
    229			     current->comm, task_pid_nr(current),
    230			     from_kuid_munged(&init_user_ns, current_uid()));
    231		return -EPERM;
    232	}
    233
    234	if (!vm86) {
    235		if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL)))
    236			return -ENOMEM;
    237		tsk->thread.vm86 = vm86;
    238	}
    239	if (vm86->saved_sp0)
    240		return -EPERM;
    241
    242	if (copy_from_user(&v, user_vm86,
    243			offsetof(struct vm86_struct, int_revectored)))
    244		return -EFAULT;
    245
    246
    247	/* VM86_SCREEN_BITMAP had numerous bugs and appears to have no users. */
    248	if (v.flags & VM86_SCREEN_BITMAP) {
    249		char comm[TASK_COMM_LEN];
    250
    251		pr_info_once("vm86: '%s' uses VM86_SCREEN_BITMAP, which is no longer supported\n", get_task_comm(comm, current));
    252		return -EINVAL;
    253	}
    254
    255	memset(&vm86regs, 0, sizeof(vm86regs));
    256
    257	vm86regs.pt.bx = v.regs.ebx;
    258	vm86regs.pt.cx = v.regs.ecx;
    259	vm86regs.pt.dx = v.regs.edx;
    260	vm86regs.pt.si = v.regs.esi;
    261	vm86regs.pt.di = v.regs.edi;
    262	vm86regs.pt.bp = v.regs.ebp;
    263	vm86regs.pt.ax = v.regs.eax;
    264	vm86regs.pt.ip = v.regs.eip;
    265	vm86regs.pt.cs = v.regs.cs;
    266	vm86regs.pt.flags = v.regs.eflags;
    267	vm86regs.pt.sp = v.regs.esp;
    268	vm86regs.pt.ss = v.regs.ss;
    269	vm86regs.es = v.regs.es;
    270	vm86regs.ds = v.regs.ds;
    271	vm86regs.fs = v.regs.fs;
    272	vm86regs.gs = v.regs.gs;
    273
    274	vm86->flags = v.flags;
    275	vm86->cpu_type = v.cpu_type;
    276
    277	if (copy_from_user(&vm86->int_revectored,
    278			   &user_vm86->int_revectored,
    279			   sizeof(struct revectored_struct)))
    280		return -EFAULT;
    281	if (copy_from_user(&vm86->int21_revectored,
    282			   &user_vm86->int21_revectored,
    283			   sizeof(struct revectored_struct)))
    284		return -EFAULT;
    285	if (plus) {
    286		if (copy_from_user(&vm86->vm86plus, &user_vm86->vm86plus,
    287				   sizeof(struct vm86plus_info_struct)))
    288			return -EFAULT;
    289		vm86->vm86plus.is_vm86pus = 1;
    290	} else
    291		memset(&vm86->vm86plus, 0,
    292		       sizeof(struct vm86plus_info_struct));
    293
    294	memcpy(&vm86->regs32, regs, sizeof(struct pt_regs));
    295	vm86->user_vm86 = user_vm86;
    296
    297/*
    298 * The flags register is also special: we cannot trust that the user
    299 * has set it up safely, so this makes sure interrupt etc flags are
    300 * inherited from protected mode.
    301 */
    302	VEFLAGS = vm86regs.pt.flags;
    303	vm86regs.pt.flags &= SAFE_MASK;
    304	vm86regs.pt.flags |= regs->flags & ~SAFE_MASK;
    305	vm86regs.pt.flags |= X86_VM_MASK;
    306
    307	vm86regs.pt.orig_ax = regs->orig_ax;
    308
    309	switch (vm86->cpu_type) {
    310	case CPU_286:
    311		vm86->veflags_mask = 0;
    312		break;
    313	case CPU_386:
    314		vm86->veflags_mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL;
    315		break;
    316	case CPU_486:
    317		vm86->veflags_mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
    318		break;
    319	default:
    320		vm86->veflags_mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
    321		break;
    322	}
    323
    324/*
    325 * Save old state
    326 */
    327	vm86->saved_sp0 = tsk->thread.sp0;
    328	savesegment(gs, vm86->regs32.gs);
    329
    330	/* make room for real-mode segments */
    331	preempt_disable();
    332	tsk->thread.sp0 += 16;
    333
    334	if (boot_cpu_has(X86_FEATURE_SEP)) {
    335		tsk->thread.sysenter_cs = 0;
    336		refresh_sysenter_cs(&tsk->thread);
    337	}
    338
    339	update_task_stack(tsk);
    340	preempt_enable();
    341
    342	memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs));
    343	return regs->ax;
    344}
    345
    346static inline void set_IF(struct kernel_vm86_regs *regs)
    347{
    348	VEFLAGS |= X86_EFLAGS_VIF;
    349}
    350
    351static inline void clear_IF(struct kernel_vm86_regs *regs)
    352{
    353	VEFLAGS &= ~X86_EFLAGS_VIF;
    354}
    355
    356static inline void clear_TF(struct kernel_vm86_regs *regs)
    357{
    358	regs->pt.flags &= ~X86_EFLAGS_TF;
    359}
    360
    361static inline void clear_AC(struct kernel_vm86_regs *regs)
    362{
    363	regs->pt.flags &= ~X86_EFLAGS_AC;
    364}
    365
    366/*
    367 * It is correct to call set_IF(regs) from the set_vflags_*
    368 * functions. However someone forgot to call clear_IF(regs)
    369 * in the opposite case.
    370 * After the command sequence CLI PUSHF STI POPF you should
    371 * end up with interrupts disabled, but you ended up with
    372 * interrupts enabled.
    373 *  ( I was testing my own changes, but the only bug I
    374 *    could find was in a function I had not changed. )
    375 * [KD]
    376 */
    377
    378static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs *regs)
    379{
    380	set_flags(VEFLAGS, flags, current->thread.vm86->veflags_mask);
    381	set_flags(regs->pt.flags, flags, SAFE_MASK);
    382	if (flags & X86_EFLAGS_IF)
    383		set_IF(regs);
    384	else
    385		clear_IF(regs);
    386}
    387
    388static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs *regs)
    389{
    390	set_flags(VFLAGS, flags, current->thread.vm86->veflags_mask);
    391	set_flags(regs->pt.flags, flags, SAFE_MASK);
    392	if (flags & X86_EFLAGS_IF)
    393		set_IF(regs);
    394	else
    395		clear_IF(regs);
    396}
    397
    398static inline unsigned long get_vflags(struct kernel_vm86_regs *regs)
    399{
    400	unsigned long flags = regs->pt.flags & RETURN_MASK;
    401
    402	if (VEFLAGS & X86_EFLAGS_VIF)
    403		flags |= X86_EFLAGS_IF;
    404	flags |= X86_EFLAGS_IOPL;
    405	return flags | (VEFLAGS & current->thread.vm86->veflags_mask);
    406}
    407
    408static inline int is_revectored(int nr, struct revectored_struct *bitmap)
    409{
    410	return test_bit(nr, bitmap->__map);
    411}
    412
    413#define val_byte(val, n) (((__u8 *)&val)[n])
    414
    415#define pushb(base, ptr, val, err_label) \
    416	do { \
    417		__u8 __val = val; \
    418		ptr--; \
    419		if (put_user(__val, base + ptr) < 0) \
    420			goto err_label; \
    421	} while (0)
    422
    423#define pushw(base, ptr, val, err_label) \
    424	do { \
    425		__u16 __val = val; \
    426		ptr--; \
    427		if (put_user(val_byte(__val, 1), base + ptr) < 0) \
    428			goto err_label; \
    429		ptr--; \
    430		if (put_user(val_byte(__val, 0), base + ptr) < 0) \
    431			goto err_label; \
    432	} while (0)
    433
    434#define pushl(base, ptr, val, err_label) \
    435	do { \
    436		__u32 __val = val; \
    437		ptr--; \
    438		if (put_user(val_byte(__val, 3), base + ptr) < 0) \
    439			goto err_label; \
    440		ptr--; \
    441		if (put_user(val_byte(__val, 2), base + ptr) < 0) \
    442			goto err_label; \
    443		ptr--; \
    444		if (put_user(val_byte(__val, 1), base + ptr) < 0) \
    445			goto err_label; \
    446		ptr--; \
    447		if (put_user(val_byte(__val, 0), base + ptr) < 0) \
    448			goto err_label; \
    449	} while (0)
    450
    451#define popb(base, ptr, err_label) \
    452	({ \
    453		__u8 __res; \
    454		if (get_user(__res, base + ptr) < 0) \
    455			goto err_label; \
    456		ptr++; \
    457		__res; \
    458	})
    459
    460#define popw(base, ptr, err_label) \
    461	({ \
    462		__u16 __res; \
    463		if (get_user(val_byte(__res, 0), base + ptr) < 0) \
    464			goto err_label; \
    465		ptr++; \
    466		if (get_user(val_byte(__res, 1), base + ptr) < 0) \
    467			goto err_label; \
    468		ptr++; \
    469		__res; \
    470	})
    471
    472#define popl(base, ptr, err_label) \
    473	({ \
    474		__u32 __res; \
    475		if (get_user(val_byte(__res, 0), base + ptr) < 0) \
    476			goto err_label; \
    477		ptr++; \
    478		if (get_user(val_byte(__res, 1), base + ptr) < 0) \
    479			goto err_label; \
    480		ptr++; \
    481		if (get_user(val_byte(__res, 2), base + ptr) < 0) \
    482			goto err_label; \
    483		ptr++; \
    484		if (get_user(val_byte(__res, 3), base + ptr) < 0) \
    485			goto err_label; \
    486		ptr++; \
    487		__res; \
    488	})
    489
    490/* There are so many possible reasons for this function to return
    491 * VM86_INTx, so adding another doesn't bother me. We can expect
    492 * userspace programs to be able to handle it. (Getting a problem
    493 * in userspace is always better than an Oops anyway.) [KD]
    494 */
    495static void do_int(struct kernel_vm86_regs *regs, int i,
    496    unsigned char __user *ssp, unsigned short sp)
    497{
    498	unsigned long __user *intr_ptr;
    499	unsigned long segoffs;
    500	struct vm86 *vm86 = current->thread.vm86;
    501
    502	if (regs->pt.cs == BIOSSEG)
    503		goto cannot_handle;
    504	if (is_revectored(i, &vm86->int_revectored))
    505		goto cannot_handle;
    506	if (i == 0x21 && is_revectored(AH(regs), &vm86->int21_revectored))
    507		goto cannot_handle;
    508	intr_ptr = (unsigned long __user *) (i << 2);
    509	if (get_user(segoffs, intr_ptr))
    510		goto cannot_handle;
    511	if ((segoffs >> 16) == BIOSSEG)
    512		goto cannot_handle;
    513	pushw(ssp, sp, get_vflags(regs), cannot_handle);
    514	pushw(ssp, sp, regs->pt.cs, cannot_handle);
    515	pushw(ssp, sp, IP(regs), cannot_handle);
    516	regs->pt.cs = segoffs >> 16;
    517	SP(regs) -= 6;
    518	IP(regs) = segoffs & 0xffff;
    519	clear_TF(regs);
    520	clear_IF(regs);
    521	clear_AC(regs);
    522	return;
    523
    524cannot_handle:
    525	save_v86_state(regs, VM86_INTx + (i << 8));
    526}
    527
    528int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
    529{
    530	struct vm86 *vm86 = current->thread.vm86;
    531
    532	if (vm86->vm86plus.is_vm86pus) {
    533		if ((trapno == 3) || (trapno == 1)) {
    534			save_v86_state(regs, VM86_TRAP + (trapno << 8));
    535			return 0;
    536		}
    537		do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
    538		return 0;
    539	}
    540	if (trapno != 1)
    541		return 1; /* we let this handle by the calling routine */
    542	current->thread.trap_nr = trapno;
    543	current->thread.error_code = error_code;
    544	force_sig(SIGTRAP);
    545	return 0;
    546}
    547
    548void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
    549{
    550	unsigned char opcode;
    551	unsigned char __user *csp;
    552	unsigned char __user *ssp;
    553	unsigned short ip, sp, orig_flags;
    554	int data32, pref_done;
    555	struct vm86plus_info_struct *vmpi = &current->thread.vm86->vm86plus;
    556
    557#define CHECK_IF_IN_TRAP \
    558	if (vmpi->vm86dbg_active && vmpi->vm86dbg_TFpendig) \
    559		newflags |= X86_EFLAGS_TF
    560
    561	orig_flags = *(unsigned short *)&regs->pt.flags;
    562
    563	csp = (unsigned char __user *) (regs->pt.cs << 4);
    564	ssp = (unsigned char __user *) (regs->pt.ss << 4);
    565	sp = SP(regs);
    566	ip = IP(regs);
    567
    568	data32 = 0;
    569	pref_done = 0;
    570	do {
    571		switch (opcode = popb(csp, ip, simulate_sigsegv)) {
    572		case 0x66:      /* 32-bit data */     data32 = 1; break;
    573		case 0x67:      /* 32-bit address */  break;
    574		case 0x2e:      /* CS */              break;
    575		case 0x3e:      /* DS */              break;
    576		case 0x26:      /* ES */              break;
    577		case 0x36:      /* SS */              break;
    578		case 0x65:      /* GS */              break;
    579		case 0x64:      /* FS */              break;
    580		case 0xf2:      /* repnz */       break;
    581		case 0xf3:      /* rep */             break;
    582		default: pref_done = 1;
    583		}
    584	} while (!pref_done);
    585
    586	switch (opcode) {
    587
    588	/* pushf */
    589	case 0x9c:
    590		if (data32) {
    591			pushl(ssp, sp, get_vflags(regs), simulate_sigsegv);
    592			SP(regs) -= 4;
    593		} else {
    594			pushw(ssp, sp, get_vflags(regs), simulate_sigsegv);
    595			SP(regs) -= 2;
    596		}
    597		IP(regs) = ip;
    598		goto vm86_fault_return;
    599
    600	/* popf */
    601	case 0x9d:
    602		{
    603		unsigned long newflags;
    604		if (data32) {
    605			newflags = popl(ssp, sp, simulate_sigsegv);
    606			SP(regs) += 4;
    607		} else {
    608			newflags = popw(ssp, sp, simulate_sigsegv);
    609			SP(regs) += 2;
    610		}
    611		IP(regs) = ip;
    612		CHECK_IF_IN_TRAP;
    613		if (data32)
    614			set_vflags_long(newflags, regs);
    615		else
    616			set_vflags_short(newflags, regs);
    617
    618		goto check_vip;
    619		}
    620
    621	/* int xx */
    622	case 0xcd: {
    623		int intno = popb(csp, ip, simulate_sigsegv);
    624		IP(regs) = ip;
    625		if (vmpi->vm86dbg_active) {
    626			if ((1 << (intno & 7)) & vmpi->vm86dbg_intxxtab[intno >> 3]) {
    627				save_v86_state(regs, VM86_INTx + (intno << 8));
    628				return;
    629			}
    630		}
    631		do_int(regs, intno, ssp, sp);
    632		return;
    633	}
    634
    635	/* iret */
    636	case 0xcf:
    637		{
    638		unsigned long newip;
    639		unsigned long newcs;
    640		unsigned long newflags;
    641		if (data32) {
    642			newip = popl(ssp, sp, simulate_sigsegv);
    643			newcs = popl(ssp, sp, simulate_sigsegv);
    644			newflags = popl(ssp, sp, simulate_sigsegv);
    645			SP(regs) += 12;
    646		} else {
    647			newip = popw(ssp, sp, simulate_sigsegv);
    648			newcs = popw(ssp, sp, simulate_sigsegv);
    649			newflags = popw(ssp, sp, simulate_sigsegv);
    650			SP(regs) += 6;
    651		}
    652		IP(regs) = newip;
    653		regs->pt.cs = newcs;
    654		CHECK_IF_IN_TRAP;
    655		if (data32) {
    656			set_vflags_long(newflags, regs);
    657		} else {
    658			set_vflags_short(newflags, regs);
    659		}
    660		goto check_vip;
    661		}
    662
    663	/* cli */
    664	case 0xfa:
    665		IP(regs) = ip;
    666		clear_IF(regs);
    667		goto vm86_fault_return;
    668
    669	/* sti */
    670	/*
    671	 * Damn. This is incorrect: the 'sti' instruction should actually
    672	 * enable interrupts after the /next/ instruction. Not good.
    673	 *
    674	 * Probably needs some horsing around with the TF flag. Aiee..
    675	 */
    676	case 0xfb:
    677		IP(regs) = ip;
    678		set_IF(regs);
    679		goto check_vip;
    680
    681	default:
    682		save_v86_state(regs, VM86_UNKNOWN);
    683	}
    684
    685	return;
    686
    687check_vip:
    688	if ((VEFLAGS & (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) ==
    689	    (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) {
    690		save_v86_state(regs, VM86_STI);
    691		return;
    692	}
    693
    694vm86_fault_return:
    695	if (vmpi->force_return_for_pic  && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) {
    696		save_v86_state(regs, VM86_PICRETURN);
    697		return;
    698	}
    699	if (orig_flags & X86_EFLAGS_TF)
    700		handle_vm86_trap(regs, 0, X86_TRAP_DB);
    701	return;
    702
    703simulate_sigsegv:
    704	/* FIXME: After a long discussion with Stas we finally
    705	 *        agreed, that this is wrong. Here we should
    706	 *        really send a SIGSEGV to the user program.
    707	 *        But how do we create the correct context? We
    708	 *        are inside a general protection fault handler
    709	 *        and has just returned from a page fault handler.
    710	 *        The correct context for the signal handler
    711	 *        should be a mixture of the two, but how do we
    712	 *        get the information? [KD]
    713	 */
    714	save_v86_state(regs, VM86_UNKNOWN);
    715}
    716
    717/* ---------------- vm86 special IRQ passing stuff ----------------- */
    718
    719#define VM86_IRQNAME		"vm86irq"
    720
    721static struct vm86_irqs {
    722	struct task_struct *tsk;
    723	int sig;
    724} vm86_irqs[16];
    725
    726static DEFINE_SPINLOCK(irqbits_lock);
    727static int irqbits;
    728
    729#define ALLOWED_SIGS (1 /* 0 = don't send a signal */ \
    730	| (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO)  | (1 << SIGURG) \
    731	| (1 << SIGUNUSED))
    732
    733static irqreturn_t irq_handler(int intno, void *dev_id)
    734{
    735	int irq_bit;
    736	unsigned long flags;
    737
    738	spin_lock_irqsave(&irqbits_lock, flags);
    739	irq_bit = 1 << intno;
    740	if ((irqbits & irq_bit) || !vm86_irqs[intno].tsk)
    741		goto out;
    742	irqbits |= irq_bit;
    743	if (vm86_irqs[intno].sig)
    744		send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1);
    745	/*
    746	 * IRQ will be re-enabled when user asks for the irq (whether
    747	 * polling or as a result of the signal)
    748	 */
    749	disable_irq_nosync(intno);
    750	spin_unlock_irqrestore(&irqbits_lock, flags);
    751	return IRQ_HANDLED;
    752
    753out:
    754	spin_unlock_irqrestore(&irqbits_lock, flags);
    755	return IRQ_NONE;
    756}
    757
    758static inline void free_vm86_irq(int irqnumber)
    759{
    760	unsigned long flags;
    761
    762	free_irq(irqnumber, NULL);
    763	vm86_irqs[irqnumber].tsk = NULL;
    764
    765	spin_lock_irqsave(&irqbits_lock, flags);
    766	irqbits &= ~(1 << irqnumber);
    767	spin_unlock_irqrestore(&irqbits_lock, flags);
    768}
    769
    770void release_vm86_irqs(struct task_struct *task)
    771{
    772	int i;
    773	for (i = FIRST_VM86_IRQ ; i <= LAST_VM86_IRQ; i++)
    774	    if (vm86_irqs[i].tsk == task)
    775		free_vm86_irq(i);
    776}
    777
    778static inline int get_and_reset_irq(int irqnumber)
    779{
    780	int bit;
    781	unsigned long flags;
    782	int ret = 0;
    783
    784	if (invalid_vm86_irq(irqnumber)) return 0;
    785	if (vm86_irqs[irqnumber].tsk != current) return 0;
    786	spin_lock_irqsave(&irqbits_lock, flags);
    787	bit = irqbits & (1 << irqnumber);
    788	irqbits &= ~bit;
    789	if (bit) {
    790		enable_irq(irqnumber);
    791		ret = 1;
    792	}
    793
    794	spin_unlock_irqrestore(&irqbits_lock, flags);
    795	return ret;
    796}
    797
    798
    799static int do_vm86_irq_handling(int subfunction, int irqnumber)
    800{
    801	int ret;
    802	switch (subfunction) {
    803		case VM86_GET_AND_RESET_IRQ: {
    804			return get_and_reset_irq(irqnumber);
    805		}
    806		case VM86_GET_IRQ_BITS: {
    807			return irqbits;
    808		}
    809		case VM86_REQUEST_IRQ: {
    810			int sig = irqnumber >> 8;
    811			int irq = irqnumber & 255;
    812			if (!capable(CAP_SYS_ADMIN)) return -EPERM;
    813			if (!((1 << sig) & ALLOWED_SIGS)) return -EPERM;
    814			if (invalid_vm86_irq(irq)) return -EPERM;
    815			if (vm86_irqs[irq].tsk) return -EPERM;
    816			ret = request_irq(irq, &irq_handler, 0, VM86_IRQNAME, NULL);
    817			if (ret) return ret;
    818			vm86_irqs[irq].sig = sig;
    819			vm86_irqs[irq].tsk = current;
    820			return irq;
    821		}
    822		case  VM86_FREE_IRQ: {
    823			if (invalid_vm86_irq(irqnumber)) return -EPERM;
    824			if (!vm86_irqs[irqnumber].tsk) return 0;
    825			if (vm86_irqs[irqnumber].tsk != current) return -EPERM;
    826			free_vm86_irq(irqnumber);
    827			return 0;
    828		}
    829	}
    830	return -EINVAL;
    831}
    832