cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

test-i386.c (71837B)


      1/*
      2 *  x86 CPU test
      3 *
      4 *  Copyright (c) 2003 Fabrice Bellard
      5 *
      6 *  This program is free software; you can redistribute it and/or modify
      7 *  it under the terms of the GNU General Public License as published by
      8 *  the Free Software Foundation; either version 2 of the License, or
      9 *  (at your option) any later version.
     10 *
     11 *  This program is distributed in the hope that it will be useful,
     12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 *  GNU General Public License for more details.
     15 *
     16 *  You should have received a copy of the GNU General Public License
     17 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
     18 */
     19#define _GNU_SOURCE
     20#include <stdlib.h>
     21#include <stdio.h>
     22#include <string.h>
     23#include <inttypes.h>
     24#include <math.h>
     25#include <signal.h>
     26#include <setjmp.h>
     27#include <errno.h>
     28#include <sys/ucontext.h>
     29#include <sys/mman.h>
     30
     31#if !defined(__x86_64__)
     32//#define TEST_VM86
     33#define TEST_SEGS
     34#endif
     35//#define LINUX_VM86_IOPL_FIX
     36//#define TEST_P4_FLAGS
     37#ifdef __SSE__
     38#define TEST_SSE
     39#define TEST_CMOV  1
     40#define TEST_FCOMI 1
     41#else
     42#undef TEST_SSE
     43#define TEST_CMOV  1
     44#define TEST_FCOMI 1
     45#endif
     46
     47#if defined(__x86_64__)
     48#define FMT64X "%016lx"
     49#define FMTLX "%016lx"
     50#define X86_64_ONLY(x) x
     51#else
     52#define FMT64X "%016" PRIx64
     53#define FMTLX "%08lx"
     54#define X86_64_ONLY(x)
     55#endif
     56
     57#ifdef TEST_VM86
     58#include <asm/vm86.h>
     59#endif
     60
     61#define xglue(x, y) x ## y
     62#define glue(x, y) xglue(x, y)
     63#define stringify(s)	tostring(s)
     64#define tostring(s)	#s
     65
     66#define CC_C   	0x0001
     67#define CC_P 	0x0004
     68#define CC_A	0x0010
     69#define CC_Z	0x0040
     70#define CC_S    0x0080
     71#define CC_O    0x0800
     72
     73#define __init_call	__attribute__ ((unused,__section__ ("initcall")))
     74
     75#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
     76
     77#if defined(__x86_64__)
     78static inline long i2l(long v)
     79{
     80    return v | ((v ^ 0xabcd) << 32);
     81}
     82#else
     83static inline long i2l(long v)
     84{
     85    return v;
     86}
     87#endif
     88
     89#define OP add
     90#include "test-i386.h"
     91
     92#define OP sub
     93#include "test-i386.h"
     94
     95#define OP xor
     96#include "test-i386.h"
     97
     98#define OP and
     99#include "test-i386.h"
    100
    101#define OP or
    102#include "test-i386.h"
    103
    104#define OP cmp
    105#include "test-i386.h"
    106
    107#define OP adc
    108#define OP_CC
    109#include "test-i386.h"
    110
    111#define OP sbb
    112#define OP_CC
    113#include "test-i386.h"
    114
    115#define OP inc
    116#define OP_CC
    117#define OP1
    118#include "test-i386.h"
    119
    120#define OP dec
    121#define OP_CC
    122#define OP1
    123#include "test-i386.h"
    124
    125#define OP neg
    126#define OP_CC
    127#define OP1
    128#include "test-i386.h"
    129
    130#define OP not
    131#define OP_CC
    132#define OP1
    133#include "test-i386.h"
    134
    135#undef CC_MASK
    136#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O)
    137
    138#define OP shl
    139#include "test-i386-shift.h"
    140
    141#define OP shr
    142#include "test-i386-shift.h"
    143
    144#define OP sar
    145#include "test-i386-shift.h"
    146
    147#define OP rol
    148#include "test-i386-shift.h"
    149
    150#define OP ror
    151#include "test-i386-shift.h"
    152
    153#define OP rcr
    154#define OP_CC
    155#include "test-i386-shift.h"
    156
    157#define OP rcl
    158#define OP_CC
    159#include "test-i386-shift.h"
    160
    161#define OP shld
    162#define OP_SHIFTD
    163#define OP_NOBYTE
    164#include "test-i386-shift.h"
    165
    166#define OP shrd
    167#define OP_SHIFTD
    168#define OP_NOBYTE
    169#include "test-i386-shift.h"
    170
    171/* XXX: should be more precise ? */
    172#undef CC_MASK
    173#define CC_MASK (CC_C)
    174
    175#define OP bt
    176#define OP_NOBYTE
    177#include "test-i386-shift.h"
    178
    179#define OP bts
    180#define OP_NOBYTE
    181#include "test-i386-shift.h"
    182
    183#define OP btr
    184#define OP_NOBYTE
    185#include "test-i386-shift.h"
    186
    187#define OP btc
    188#define OP_NOBYTE
    189#include "test-i386-shift.h"
    190
    191/* lea test (modrm support) */
    192#define TEST_LEAQ(STR)\
    193{\
    194    asm("lea " STR ", %0"\
    195        : "=r" (res)\
    196        : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
    197    printf("lea %s = " FMTLX "\n", STR, res);\
    198}
    199
    200#define TEST_LEA(STR)\
    201{\
    202    asm("lea " STR ", %0"\
    203        : "=r" (res)\
    204        : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
    205    printf("lea %s = " FMTLX "\n", STR, res);\
    206}
    207
    208#define TEST_LEA16(STR)\
    209{\
    210    asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\
    211        : "=r" (res)\
    212        : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
    213    printf("lea %s = %08lx\n", STR, res);\
    214}
    215
    216
    217void test_lea(void)
    218{
    219    long eax, ebx, ecx, edx, esi, edi, res;
    220    eax = i2l(0x0001);
    221    ebx = i2l(0x0002);
    222    ecx = i2l(0x0004);
    223    edx = i2l(0x0008);
    224    esi = i2l(0x0010);
    225    edi = i2l(0x0020);
    226
    227    TEST_LEA("0x4000");
    228
    229    TEST_LEA("(%%eax)");
    230    TEST_LEA("(%%ebx)");
    231    TEST_LEA("(%%ecx)");
    232    TEST_LEA("(%%edx)");
    233    TEST_LEA("(%%esi)");
    234    TEST_LEA("(%%edi)");
    235
    236    TEST_LEA("0x40(%%eax)");
    237    TEST_LEA("0x40(%%ebx)");
    238    TEST_LEA("0x40(%%ecx)");
    239    TEST_LEA("0x40(%%edx)");
    240    TEST_LEA("0x40(%%esi)");
    241    TEST_LEA("0x40(%%edi)");
    242
    243    TEST_LEA("0x4000(%%eax)");
    244    TEST_LEA("0x4000(%%ebx)");
    245    TEST_LEA("0x4000(%%ecx)");
    246    TEST_LEA("0x4000(%%edx)");
    247    TEST_LEA("0x4000(%%esi)");
    248    TEST_LEA("0x4000(%%edi)");
    249
    250    TEST_LEA("(%%eax, %%ecx)");
    251    TEST_LEA("(%%ebx, %%edx)");
    252    TEST_LEA("(%%ecx, %%ecx)");
    253    TEST_LEA("(%%edx, %%ecx)");
    254    TEST_LEA("(%%esi, %%ecx)");
    255    TEST_LEA("(%%edi, %%ecx)");
    256
    257    TEST_LEA("0x40(%%eax, %%ecx)");
    258    TEST_LEA("0x4000(%%ebx, %%edx)");
    259
    260    TEST_LEA("(%%ecx, %%ecx, 2)");
    261    TEST_LEA("(%%edx, %%ecx, 4)");
    262    TEST_LEA("(%%esi, %%ecx, 8)");
    263
    264    TEST_LEA("(,%%eax, 2)");
    265    TEST_LEA("(,%%ebx, 4)");
    266    TEST_LEA("(,%%ecx, 8)");
    267
    268    TEST_LEA("0x40(,%%eax, 2)");
    269    TEST_LEA("0x40(,%%ebx, 4)");
    270    TEST_LEA("0x40(,%%ecx, 8)");
    271
    272
    273    TEST_LEA("-10(%%ecx, %%ecx, 2)");
    274    TEST_LEA("-10(%%edx, %%ecx, 4)");
    275    TEST_LEA("-10(%%esi, %%ecx, 8)");
    276
    277    TEST_LEA("0x4000(%%ecx, %%ecx, 2)");
    278    TEST_LEA("0x4000(%%edx, %%ecx, 4)");
    279    TEST_LEA("0x4000(%%esi, %%ecx, 8)");
    280
    281#if defined(__x86_64__)
    282    TEST_LEAQ("0x4000");
    283    TEST_LEAQ("0x4000(%%rip)");
    284
    285    TEST_LEAQ("(%%rax)");
    286    TEST_LEAQ("(%%rbx)");
    287    TEST_LEAQ("(%%rcx)");
    288    TEST_LEAQ("(%%rdx)");
    289    TEST_LEAQ("(%%rsi)");
    290    TEST_LEAQ("(%%rdi)");
    291
    292    TEST_LEAQ("0x40(%%rax)");
    293    TEST_LEAQ("0x40(%%rbx)");
    294    TEST_LEAQ("0x40(%%rcx)");
    295    TEST_LEAQ("0x40(%%rdx)");
    296    TEST_LEAQ("0x40(%%rsi)");
    297    TEST_LEAQ("0x40(%%rdi)");
    298
    299    TEST_LEAQ("0x4000(%%rax)");
    300    TEST_LEAQ("0x4000(%%rbx)");
    301    TEST_LEAQ("0x4000(%%rcx)");
    302    TEST_LEAQ("0x4000(%%rdx)");
    303    TEST_LEAQ("0x4000(%%rsi)");
    304    TEST_LEAQ("0x4000(%%rdi)");
    305
    306    TEST_LEAQ("(%%rax, %%rcx)");
    307    TEST_LEAQ("(%%rbx, %%rdx)");
    308    TEST_LEAQ("(%%rcx, %%rcx)");
    309    TEST_LEAQ("(%%rdx, %%rcx)");
    310    TEST_LEAQ("(%%rsi, %%rcx)");
    311    TEST_LEAQ("(%%rdi, %%rcx)");
    312
    313    TEST_LEAQ("0x40(%%rax, %%rcx)");
    314    TEST_LEAQ("0x4000(%%rbx, %%rdx)");
    315
    316    TEST_LEAQ("(%%rcx, %%rcx, 2)");
    317    TEST_LEAQ("(%%rdx, %%rcx, 4)");
    318    TEST_LEAQ("(%%rsi, %%rcx, 8)");
    319
    320    TEST_LEAQ("(,%%rax, 2)");
    321    TEST_LEAQ("(,%%rbx, 4)");
    322    TEST_LEAQ("(,%%rcx, 8)");
    323
    324    TEST_LEAQ("0x40(,%%rax, 2)");
    325    TEST_LEAQ("0x40(,%%rbx, 4)");
    326    TEST_LEAQ("0x40(,%%rcx, 8)");
    327
    328
    329    TEST_LEAQ("-10(%%rcx, %%rcx, 2)");
    330    TEST_LEAQ("-10(%%rdx, %%rcx, 4)");
    331    TEST_LEAQ("-10(%%rsi, %%rcx, 8)");
    332
    333    TEST_LEAQ("0x4000(%%rcx, %%rcx, 2)");
    334    TEST_LEAQ("0x4000(%%rdx, %%rcx, 4)");
    335    TEST_LEAQ("0x4000(%%rsi, %%rcx, 8)");
    336#else
    337    /* limited 16 bit addressing test */
    338    TEST_LEA16("0x4000");
    339    TEST_LEA16("(%%bx)");
    340    TEST_LEA16("(%%si)");
    341    TEST_LEA16("(%%di)");
    342    TEST_LEA16("0x40(%%bx)");
    343    TEST_LEA16("0x40(%%si)");
    344    TEST_LEA16("0x40(%%di)");
    345    TEST_LEA16("0x4000(%%bx)");
    346    TEST_LEA16("0x4000(%%si)");
    347    TEST_LEA16("(%%bx,%%si)");
    348    TEST_LEA16("(%%bx,%%di)");
    349    TEST_LEA16("0x40(%%bx,%%si)");
    350    TEST_LEA16("0x40(%%bx,%%di)");
    351    TEST_LEA16("0x4000(%%bx,%%si)");
    352    TEST_LEA16("0x4000(%%bx,%%di)");
    353#endif
    354}
    355
    356#define TEST_JCC(JCC, v1, v2)\
    357{\
    358    int res;\
    359    asm("movl $1, %0\n\t"\
    360        "cmpl %2, %1\n\t"\
    361        "j" JCC " 1f\n\t"\
    362        "movl $0, %0\n\t"\
    363        "1:\n\t"\
    364        : "=r" (res)\
    365        : "r" (v1), "r" (v2));\
    366    printf("%-10s %d\n", "j" JCC, res);\
    367\
    368    asm("movl $0, %0\n\t"\
    369        "cmpl %2, %1\n\t"\
    370        "set" JCC " %b0\n\t"\
    371        : "=r" (res)\
    372        : "r" (v1), "r" (v2));\
    373    printf("%-10s %d\n", "set" JCC, res);\
    374 if (TEST_CMOV) {\
    375    long val = i2l(1);\
    376    long res = i2l(0x12345678);\
    377X86_64_ONLY(\
    378    asm("cmpl %2, %1\n\t"\
    379        "cmov" JCC "q %3, %0\n\t"\
    380        : "=r" (res)\
    381        : "r" (v1), "r" (v2), "m" (val), "0" (res));\
    382        printf("%-10s R=" FMTLX "\n", "cmov" JCC "q", res);)\
    383    asm("cmpl %2, %1\n\t"\
    384        "cmov" JCC "l %k3, %k0\n\t"\
    385        : "=r" (res)\
    386        : "r" (v1), "r" (v2), "m" (val), "0" (res));\
    387        printf("%-10s R=" FMTLX "\n", "cmov" JCC "l", res);\
    388    asm("cmpl %2, %1\n\t"\
    389        "cmov" JCC "w %w3, %w0\n\t"\
    390        : "=r" (res)\
    391        : "r" (v1), "r" (v2), "r" (1), "0" (res));\
    392        printf("%-10s R=" FMTLX "\n", "cmov" JCC "w", res);\
    393 } \
    394}
    395
    396/* various jump tests */
    397void test_jcc(void)
    398{
    399    TEST_JCC("ne", 1, 1);
    400    TEST_JCC("ne", 1, 0);
    401
    402    TEST_JCC("e", 1, 1);
    403    TEST_JCC("e", 1, 0);
    404
    405    TEST_JCC("l", 1, 1);
    406    TEST_JCC("l", 1, 0);
    407    TEST_JCC("l", 1, -1);
    408
    409    TEST_JCC("le", 1, 1);
    410    TEST_JCC("le", 1, 0);
    411    TEST_JCC("le", 1, -1);
    412
    413    TEST_JCC("ge", 1, 1);
    414    TEST_JCC("ge", 1, 0);
    415    TEST_JCC("ge", -1, 1);
    416
    417    TEST_JCC("g", 1, 1);
    418    TEST_JCC("g", 1, 0);
    419    TEST_JCC("g", 1, -1);
    420
    421    TEST_JCC("b", 1, 1);
    422    TEST_JCC("b", 1, 0);
    423    TEST_JCC("b", 1, -1);
    424
    425    TEST_JCC("be", 1, 1);
    426    TEST_JCC("be", 1, 0);
    427    TEST_JCC("be", 1, -1);
    428
    429    TEST_JCC("ae", 1, 1);
    430    TEST_JCC("ae", 1, 0);
    431    TEST_JCC("ae", 1, -1);
    432
    433    TEST_JCC("a", 1, 1);
    434    TEST_JCC("a", 1, 0);
    435    TEST_JCC("a", 1, -1);
    436
    437
    438    TEST_JCC("p", 1, 1);
    439    TEST_JCC("p", 1, 0);
    440
    441    TEST_JCC("np", 1, 1);
    442    TEST_JCC("np", 1, 0);
    443
    444    TEST_JCC("o", 0x7fffffff, 0);
    445    TEST_JCC("o", 0x7fffffff, -1);
    446
    447    TEST_JCC("no", 0x7fffffff, 0);
    448    TEST_JCC("no", 0x7fffffff, -1);
    449
    450    TEST_JCC("s", 0, 1);
    451    TEST_JCC("s", 0, -1);
    452    TEST_JCC("s", 0, 0);
    453
    454    TEST_JCC("ns", 0, 1);
    455    TEST_JCC("ns", 0, -1);
    456    TEST_JCC("ns", 0, 0);
    457}
    458
    459#define TEST_LOOP(insn) \
    460{\
    461    for(i = 0; i < sizeof(ecx_vals) / sizeof(long); i++) {\
    462        ecx = ecx_vals[i];\
    463        for(zf = 0; zf < 2; zf++) {\
    464    asm("test %2, %2\n\t"\
    465        "movl $1, %0\n\t"\
    466          insn " 1f\n\t" \
    467        "movl $0, %0\n\t"\
    468        "1:\n\t"\
    469        : "=a" (res)\
    470        : "c" (ecx), "b" (!zf)); \
    471    printf("%-10s ECX=" FMTLX " ZF=%ld r=%d\n", insn, ecx, zf, res);      \
    472        }\
    473   }\
    474}
    475
    476void test_loop(void)
    477{
    478    long ecx, zf;
    479    const long ecx_vals[] = {
    480        0,
    481        1,
    482        0x10000,
    483        0x10001,
    484#if defined(__x86_64__)
    485        0x100000000L,
    486        0x100000001L,
    487#endif
    488    };
    489    int i, res;
    490
    491#if !defined(__x86_64__)
    492    TEST_LOOP("jcxz");
    493    TEST_LOOP("loopw");
    494    TEST_LOOP("loopzw");
    495    TEST_LOOP("loopnzw");
    496#endif
    497
    498    TEST_LOOP("jecxz");
    499    TEST_LOOP("loopl");
    500    TEST_LOOP("loopzl");
    501    TEST_LOOP("loopnzl");
    502}
    503
    504#undef CC_MASK
    505#ifdef TEST_P4_FLAGS
    506#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
    507#else
    508#define CC_MASK (CC_O | CC_C)
    509#endif
    510
    511#define OP mul
    512#include "test-i386-muldiv.h"
    513
    514#define OP imul
    515#include "test-i386-muldiv.h"
    516
    517void test_imulw2(long op0, long op1)
    518{
    519    long res, s1, s0, flags;
    520    s0 = op0;
    521    s1 = op1;
    522    res = s0;
    523    flags = 0;
    524    asm volatile ("push %4\n\t"
    525         "popf\n\t"
    526         "imulw %w2, %w0\n\t"
    527         "pushf\n\t"
    528         "pop %1\n\t"
    529         : "=q" (res), "=g" (flags)
    530         : "q" (s1), "0" (res), "1" (flags));
    531    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",
    532           "imulw", s0, s1, res, flags & CC_MASK);
    533}
    534
    535void test_imull2(long op0, long op1)
    536{
    537    long res, s1, s0, flags;
    538    s0 = op0;
    539    s1 = op1;
    540    res = s0;
    541    flags = 0;
    542    asm volatile ("push %4\n\t"
    543         "popf\n\t"
    544         "imull %k2, %k0\n\t"
    545         "pushf\n\t"
    546         "pop %1\n\t"
    547         : "=q" (res), "=g" (flags)
    548         : "q" (s1), "0" (res), "1" (flags));
    549    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",
    550           "imull", s0, s1, res, flags & CC_MASK);
    551}
    552
    553#if defined(__x86_64__)
    554void test_imulq2(long op0, long op1)
    555{
    556    long res, s1, s0, flags;
    557    s0 = op0;
    558    s1 = op1;
    559    res = s0;
    560    flags = 0;
    561    asm volatile ("push %4\n\t"
    562         "popf\n\t"
    563         "imulq %2, %0\n\t"
    564         "pushf\n\t"
    565         "pop %1\n\t"
    566         : "=q" (res), "=g" (flags)
    567         : "q" (s1), "0" (res), "1" (flags));
    568    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",
    569           "imulq", s0, s1, res, flags & CC_MASK);
    570}
    571#endif
    572
    573#define TEST_IMUL_IM(size, rsize, op0, op1)\
    574{\
    575    long res, flags, s1;\
    576    flags = 0;\
    577    res = 0;\
    578    s1 = op1;\
    579    asm volatile ("push %3\n\t"\
    580         "popf\n\t"\
    581         "imul" size " $" #op0 ", %" rsize "2, %" rsize "0\n\t" \
    582         "pushf\n\t"\
    583         "pop %1\n\t"\
    584         : "=r" (res), "=g" (flags)\
    585         : "r" (s1), "1" (flags), "0" (res));\
    586    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",\
    587           "imul" size " im", (long)op0, (long)op1, res, flags & CC_MASK);\
    588}
    589
    590
    591#undef CC_MASK
    592#define CC_MASK (0)
    593
    594#define OP div
    595#include "test-i386-muldiv.h"
    596
    597#define OP idiv
    598#include "test-i386-muldiv.h"
    599
    600void test_mul(void)
    601{
    602    test_imulb(0x1234561d, 4);
    603    test_imulb(3, -4);
    604    test_imulb(0x80, 0x80);
    605    test_imulb(0x10, 0x10);
    606
    607    test_imulw(0, 0x1234001d, 45);
    608    test_imulw(0, 23, -45);
    609    test_imulw(0, 0x8000, 0x8000);
    610    test_imulw(0, 0x100, 0x100);
    611
    612    test_imull(0, 0x1234001d, 45);
    613    test_imull(0, 23, -45);
    614    test_imull(0, 0x80000000, 0x80000000);
    615    test_imull(0, 0x10000, 0x10000);
    616
    617    test_mulb(0x1234561d, 4);
    618    test_mulb(3, -4);
    619    test_mulb(0x80, 0x80);
    620    test_mulb(0x10, 0x10);
    621
    622    test_mulw(0, 0x1234001d, 45);
    623    test_mulw(0, 23, -45);
    624    test_mulw(0, 0x8000, 0x8000);
    625    test_mulw(0, 0x100, 0x100);
    626
    627    test_mull(0, 0x1234001d, 45);
    628    test_mull(0, 23, -45);
    629    test_mull(0, 0x80000000, 0x80000000);
    630    test_mull(0, 0x10000, 0x10000);
    631
    632    test_imulw2(0x1234001d, 45);
    633    test_imulw2(23, -45);
    634    test_imulw2(0x8000, 0x8000);
    635    test_imulw2(0x100, 0x100);
    636
    637    test_imull2(0x1234001d, 45);
    638    test_imull2(23, -45);
    639    test_imull2(0x80000000, 0x80000000);
    640    test_imull2(0x10000, 0x10000);
    641
    642    TEST_IMUL_IM("w", "w", 45, 0x1234);
    643    TEST_IMUL_IM("w", "w", -45, 23);
    644    TEST_IMUL_IM("w", "w", 0x8000, 0x80000000);
    645    TEST_IMUL_IM("w", "w", 0x7fff, 0x1000);
    646
    647    TEST_IMUL_IM("l", "k", 45, 0x1234);
    648    TEST_IMUL_IM("l", "k", -45, 23);
    649    TEST_IMUL_IM("l", "k", 0x8000, 0x80000000);
    650    TEST_IMUL_IM("l", "k", 0x7fff, 0x1000);
    651
    652    test_idivb(0x12341678, 0x127e);
    653    test_idivb(0x43210123, -5);
    654    test_idivb(0x12340004, -1);
    655
    656    test_idivw(0, 0x12345678, 12347);
    657    test_idivw(0, -23223, -45);
    658    test_idivw(0, 0x12348000, -1);
    659    test_idivw(0x12343, 0x12345678, 0x81238567);
    660
    661    test_idivl(0, 0x12345678, 12347);
    662    test_idivl(0, -233223, -45);
    663    test_idivl(0, 0x80000000, -1);
    664    test_idivl(0x12343, 0x12345678, 0x81234567);
    665
    666    test_divb(0x12341678, 0x127e);
    667    test_divb(0x43210123, -5);
    668    test_divb(0x12340004, -1);
    669
    670    test_divw(0, 0x12345678, 12347);
    671    test_divw(0, -23223, -45);
    672    test_divw(0, 0x12348000, -1);
    673    test_divw(0x12343, 0x12345678, 0x81238567);
    674
    675    test_divl(0, 0x12345678, 12347);
    676    test_divl(0, -233223, -45);
    677    test_divl(0, 0x80000000, -1);
    678    test_divl(0x12343, 0x12345678, 0x81234567);
    679
    680#if defined(__x86_64__)
    681    test_imulq(0, 0x1234001d1234001d, 45);
    682    test_imulq(0, 23, -45);
    683    test_imulq(0, 0x8000000000000000, 0x8000000000000000);
    684    test_imulq(0, 0x100000000, 0x100000000);
    685
    686    test_mulq(0, 0x1234001d1234001d, 45);
    687    test_mulq(0, 23, -45);
    688    test_mulq(0, 0x8000000000000000, 0x8000000000000000);
    689    test_mulq(0, 0x100000000, 0x100000000);
    690
    691    test_imulq2(0x1234001d1234001d, 45);
    692    test_imulq2(23, -45);
    693    test_imulq2(0x8000000000000000, 0x8000000000000000);
    694    test_imulq2(0x100000000, 0x100000000);
    695
    696    TEST_IMUL_IM("q", "", 45, 0x12341234);
    697    TEST_IMUL_IM("q", "", -45, 23);
    698    TEST_IMUL_IM("q", "", 0x8000, 0x8000000000000000);
    699    TEST_IMUL_IM("q", "", 0x7fff, 0x10000000);
    700
    701    test_idivq(0, 0x12345678abcdef, 12347);
    702    test_idivq(0, -233223, -45);
    703    test_idivq(0, 0x8000000000000000, -1);
    704    test_idivq(0x12343, 0x12345678, 0x81234567);
    705
    706    test_divq(0, 0x12345678abcdef, 12347);
    707    test_divq(0, -233223, -45);
    708    test_divq(0, 0x8000000000000000, -1);
    709    test_divq(0x12343, 0x12345678, 0x81234567);
    710#endif
    711}
    712
    713#define TEST_BSX(op, size, op0)\
    714{\
    715    long res, val, resz;\
    716    val = op0;\
    717    asm("xor %1, %1\n"\
    718        "mov $0x12345678, %0\n"\
    719        #op " %" size "2, %" size "0 ; setz %b1" \
    720        : "=&r" (res), "=&q" (resz)\
    721        : "r" (val));\
    722    printf("%-10s A=" FMTLX " R=" FMTLX " %ld\n", #op, val, res, resz);\
    723}
    724
    725void test_bsx(void)
    726{
    727    TEST_BSX(bsrw, "w", 0);
    728    TEST_BSX(bsrw, "w", 0x12340128);
    729    TEST_BSX(bsfw, "w", 0);
    730    TEST_BSX(bsfw, "w", 0x12340128);
    731    TEST_BSX(bsrl, "k", 0);
    732    TEST_BSX(bsrl, "k", 0x00340128);
    733    TEST_BSX(bsfl, "k", 0);
    734    TEST_BSX(bsfl, "k", 0x00340128);
    735#if defined(__x86_64__)
    736    TEST_BSX(bsrq, "", 0);
    737    TEST_BSX(bsrq, "", 0x003401281234);
    738    TEST_BSX(bsfq, "", 0);
    739    TEST_BSX(bsfq, "", 0x003401281234);
    740#endif
    741}
    742
    743/**********************************************/
    744
    745union float64u {
    746    double d;
    747    uint64_t l;
    748};
    749
    750union float64u q_nan = { .l = 0xFFF8000000000000LL };
    751union float64u s_nan = { .l = 0xFFF0000000000000LL };
    752
    753void test_fops(double a, double b)
    754{
    755    printf("a=%f b=%f a+b=%f\n", a, b, a + b);
    756    printf("a=%f b=%f a-b=%f\n", a, b, a - b);
    757    printf("a=%f b=%f a*b=%f\n", a, b, a * b);
    758    printf("a=%f b=%f a/b=%f\n", a, b, a / b);
    759    printf("a=%f b=%f fmod(a, b)=%f\n", a, b, fmod(a, b));
    760    printf("a=%f sqrt(a)=%f\n", a, sqrt(a));
    761    printf("a=%f sin(a)=%f\n", a, sin(a));
    762    printf("a=%f cos(a)=%f\n", a, cos(a));
    763    printf("a=%f tan(a)=%f\n", a, tan(a));
    764    printf("a=%f log(a)=%f\n", a, log(a));
    765    printf("a=%f exp(a)=%f\n", a, exp(a));
    766    printf("a=%f b=%f atan2(a, b)=%f\n", a, b, atan2(a, b));
    767    /* just to test some op combining */
    768    printf("a=%f asin(sin(a))=%f\n", a, asin(sin(a)));
    769    printf("a=%f acos(cos(a))=%f\n", a, acos(cos(a)));
    770    printf("a=%f atan(tan(a))=%f\n", a, atan(tan(a)));
    771
    772}
    773
    774void fpu_clear_exceptions(void)
    775{
    776    struct QEMU_PACKED {
    777        uint16_t fpuc;
    778        uint16_t dummy1;
    779        uint16_t fpus;
    780        uint16_t dummy2;
    781        uint16_t fptag;
    782        uint16_t dummy3;
    783        uint32_t ignored[4];
    784        long double fpregs[8];
    785    } float_env32;
    786
    787    asm volatile ("fnstenv %0\n" : "=m" (float_env32));
    788    float_env32.fpus &= ~0x7f;
    789    asm volatile ("fldenv %0\n" : : "m" (float_env32));
    790}
    791
    792/* XXX: display exception bits when supported */
    793#define FPUS_EMASK 0x0000
    794//#define FPUS_EMASK 0x007f
    795
    796void test_fcmp(double a, double b)
    797{
    798    long eflags, fpus;
    799
    800    fpu_clear_exceptions();
    801    asm("fcom %2\n"
    802        "fstsw %%ax\n"
    803        : "=a" (fpus)
    804        : "t" (a), "u" (b));
    805    printf("fcom(%f %f)=%04lx\n",
    806           a, b, fpus & (0x4500 | FPUS_EMASK));
    807    fpu_clear_exceptions();
    808    asm("fucom %2\n"
    809        "fstsw %%ax\n"
    810        : "=a" (fpus)
    811        : "t" (a), "u" (b));
    812    printf("fucom(%f %f)=%04lx\n",
    813           a, b, fpus & (0x4500 | FPUS_EMASK));
    814    if (TEST_FCOMI) {
    815        /* test f(u)comi instruction */
    816        fpu_clear_exceptions();
    817        asm("fcomi %3, %2\n"
    818            "fstsw %%ax\n"
    819            "pushf\n"
    820            "pop %0\n"
    821            : "=r" (eflags), "=a" (fpus)
    822            : "t" (a), "u" (b));
    823        printf("fcomi(%f %f)=%04lx %02lx\n",
    824               a, b, fpus & FPUS_EMASK, eflags & (CC_Z | CC_P | CC_C));
    825        fpu_clear_exceptions();
    826        asm("fucomi %3, %2\n"
    827            "fstsw %%ax\n"
    828            "pushf\n"
    829            "pop %0\n"
    830            : "=r" (eflags), "=a" (fpus)
    831            : "t" (a), "u" (b));
    832        printf("fucomi(%f %f)=%04lx %02lx\n",
    833               a, b, fpus & FPUS_EMASK, eflags & (CC_Z | CC_P | CC_C));
    834    }
    835    fpu_clear_exceptions();
    836    asm volatile("fxam\n"
    837                 "fstsw %%ax\n"
    838                 : "=a" (fpus)
    839                 : "t" (a));
    840    printf("fxam(%f)=%04lx\n", a, fpus & 0x4700);
    841    fpu_clear_exceptions();
    842}
    843
    844void test_fcvt(double a)
    845{
    846    float fa;
    847    long double la;
    848    int16_t fpuc;
    849    int i;
    850    int64_t lla;
    851    int ia;
    852    int16_t wa;
    853    double ra;
    854
    855    fa = a;
    856    la = a;
    857    printf("(float)%f = %f\n", a, fa);
    858    printf("(long double)%f = %Lf\n", a, la);
    859    printf("a=" FMT64X "\n", *(uint64_t *)&a);
    860    printf("la=" FMT64X " %04x\n", *(uint64_t *)&la,
    861           *(unsigned short *)((char *)(&la) + 8));
    862
    863    /* test all roundings */
    864    asm volatile ("fstcw %0" : "=m" (fpuc));
    865    for(i=0;i<4;i++) {
    866        uint16_t val16;
    867        val16 = (fpuc & ~0x0c00) | (i << 10);
    868        asm volatile ("fldcw %0" : : "m" (val16));
    869        asm volatile ("fist %0" : "=m" (wa) : "t" (a));
    870        asm volatile ("fistl %0" : "=m" (ia) : "t" (a));
    871        asm volatile ("fistpll %0" : "=m" (lla) : "t" (a) : "st");
    872        asm volatile ("frndint ; fstl %0" : "=m" (ra) : "t" (a));
    873        asm volatile ("fldcw %0" : : "m" (fpuc));
    874        printf("(short)a = %d\n", wa);
    875        printf("(int)a = %d\n", ia);
    876        printf("(int64_t)a = " FMT64X "\n", lla);
    877        printf("rint(a) = %f\n", ra);
    878    }
    879}
    880
    881#define TEST(N) \
    882    asm("fld" #N : "=t" (a)); \
    883    printf("fld" #N "= %f\n", a);
    884
    885void test_fconst(void)
    886{
    887    double a;
    888    TEST(1);
    889    TEST(l2t);
    890    TEST(l2e);
    891    TEST(pi);
    892    TEST(lg2);
    893    TEST(ln2);
    894    TEST(z);
    895}
    896
    897void test_fbcd(double a)
    898{
    899    unsigned short bcd[5];
    900    double b;
    901
    902    asm("fbstp %0" : "=m" (bcd[0]) : "t" (a) : "st");
    903    asm("fbld %1" : "=t" (b) : "m" (bcd[0]));
    904    printf("a=%f bcd=%04x%04x%04x%04x%04x b=%f\n",
    905           a, bcd[4], bcd[3], bcd[2], bcd[1], bcd[0], b);
    906}
    907
    908#define TEST_ENV(env, save, restore)\
    909{\
    910    memset((env), 0xaa, sizeof(*(env)));\
    911    for(i=0;i<5;i++)\
    912        asm volatile ("fldl %0" : : "m" (dtab[i]));\
    913    asm volatile (save " %0\n" : : "m" (*(env)));\
    914    asm volatile (restore " %0\n": : "m" (*(env)));\
    915    for(i=0;i<5;i++)\
    916        asm volatile ("fstpl %0" : "=m" (rtab[i]));\
    917    for(i=0;i<5;i++)\
    918        printf("res[%d]=%f\n", i, rtab[i]);\
    919    printf("fpuc=%04x fpus=%04x fptag=%04x\n",\
    920           (env)->fpuc,\
    921           (env)->fpus & 0xff00,\
    922           (env)->fptag);\
    923}
    924
    925void test_fenv(void)
    926{
    927    struct __attribute__((__packed__)) {
    928        uint16_t fpuc;
    929        uint16_t dummy1;
    930        uint16_t fpus;
    931        uint16_t dummy2;
    932        uint16_t fptag;
    933        uint16_t dummy3;
    934        uint32_t ignored[4];
    935        long double fpregs[8];
    936    } float_env32;
    937    struct __attribute__((__packed__)) {
    938        uint16_t fpuc;
    939        uint16_t fpus;
    940        uint16_t fptag;
    941        uint16_t ignored[4];
    942        long double fpregs[8];
    943    } float_env16;
    944    double dtab[8];
    945    double rtab[8];
    946    int i;
    947
    948    for(i=0;i<8;i++)
    949        dtab[i] = i + 1;
    950
    951    TEST_ENV(&float_env16, "data16 fnstenv", "data16 fldenv");
    952    TEST_ENV(&float_env16, "data16 fnsave", "data16 frstor");
    953    TEST_ENV(&float_env32, "fnstenv", "fldenv");
    954    TEST_ENV(&float_env32, "fnsave", "frstor");
    955
    956    /* test for ffree */
    957    for(i=0;i<5;i++)
    958        asm volatile ("fldl %0" : : "m" (dtab[i]));
    959    asm volatile("ffree %st(2)");
    960    asm volatile ("fnstenv %0\n" : : "m" (float_env32));
    961    asm volatile ("fninit");
    962    printf("fptag=%04x\n", float_env32.fptag);
    963}
    964
    965
    966#define TEST_FCMOV(a, b, eflags, CC)\
    967{\
    968    double res;\
    969    asm("push %3\n"\
    970        "popf\n"\
    971        "fcmov" CC " %2, %0\n"\
    972        : "=t" (res)\
    973        : "0" (a), "u" (b), "g" (eflags));\
    974    printf("fcmov%s eflags=0x%04lx-> %f\n", \
    975           CC, (long)eflags, res);\
    976}
    977
    978void test_fcmov(void)
    979{
    980    double a, b;
    981    long eflags, i;
    982
    983    a = 1.0;
    984    b = 2.0;
    985    for(i = 0; i < 4; i++) {
    986        eflags = 0;
    987        if (i & 1)
    988            eflags |= CC_C;
    989        if (i & 2)
    990            eflags |= CC_Z;
    991        TEST_FCMOV(a, b, eflags, "b");
    992        TEST_FCMOV(a, b, eflags, "e");
    993        TEST_FCMOV(a, b, eflags, "be");
    994        TEST_FCMOV(a, b, eflags, "nb");
    995        TEST_FCMOV(a, b, eflags, "ne");
    996        TEST_FCMOV(a, b, eflags, "nbe");
    997    }
    998    TEST_FCMOV(a, b, 0, "u");
    999    TEST_FCMOV(a, b, CC_P, "u");
   1000    TEST_FCMOV(a, b, 0, "nu");
   1001    TEST_FCMOV(a, b, CC_P, "nu");
   1002}
   1003
   1004void test_floats(void)
   1005{
   1006    test_fops(2, 3);
   1007    test_fops(1.4, -5);
   1008    test_fcmp(2, -1);
   1009    test_fcmp(2, 2);
   1010    test_fcmp(2, 3);
   1011    test_fcmp(2, q_nan.d);
   1012    test_fcmp(q_nan.d, -1);
   1013    test_fcmp(-1.0/0.0, -1);
   1014    test_fcmp(1.0/0.0, -1);
   1015    test_fcvt(0.5);
   1016    test_fcvt(-0.5);
   1017    test_fcvt(1.0/7.0);
   1018    test_fcvt(-1.0/9.0);
   1019    test_fcvt(32768);
   1020    test_fcvt(-1e20);
   1021    test_fcvt(-1.0/0.0);
   1022    test_fcvt(1.0/0.0);
   1023    test_fcvt(q_nan.d);
   1024    test_fconst();
   1025    test_fbcd(1234567890123456.0);
   1026    test_fbcd(-123451234567890.0);
   1027    test_fenv();
   1028    if (TEST_CMOV) {
   1029        test_fcmov();
   1030    }
   1031}
   1032
   1033/**********************************************/
   1034#if !defined(__x86_64__)
   1035
   1036#define TEST_BCD(op, op0, cc_in, cc_mask)\
   1037{\
   1038    int res, flags;\
   1039    res = op0;\
   1040    flags = cc_in;\
   1041    asm ("push %3\n\t"\
   1042         "popf\n\t"\
   1043         #op "\n\t"\
   1044         "pushf\n\t"\
   1045         "pop %1\n\t"\
   1046        : "=a" (res), "=g" (flags)\
   1047        : "0" (res), "1" (flags));\
   1048    printf("%-10s A=%08x R=%08x CCIN=%04x CC=%04x\n",\
   1049           #op, op0, res, cc_in, flags & cc_mask);\
   1050}
   1051
   1052void test_bcd(void)
   1053{
   1054    TEST_BCD(daa, 0x12340503, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1055    TEST_BCD(daa, 0x12340506, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1056    TEST_BCD(daa, 0x12340507, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1057    TEST_BCD(daa, 0x12340559, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1058    TEST_BCD(daa, 0x12340560, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1059    TEST_BCD(daa, 0x1234059f, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1060    TEST_BCD(daa, 0x123405a0, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1061    TEST_BCD(daa, 0x12340503, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1062    TEST_BCD(daa, 0x12340506, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1063    TEST_BCD(daa, 0x12340503, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1064    TEST_BCD(daa, 0x12340506, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1065    TEST_BCD(daa, 0x12340503, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1066    TEST_BCD(daa, 0x12340506, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1067
   1068    TEST_BCD(das, 0x12340503, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1069    TEST_BCD(das, 0x12340506, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1070    TEST_BCD(das, 0x12340507, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1071    TEST_BCD(das, 0x12340559, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1072    TEST_BCD(das, 0x12340560, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1073    TEST_BCD(das, 0x1234059f, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1074    TEST_BCD(das, 0x123405a0, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1075    TEST_BCD(das, 0x12340503, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1076    TEST_BCD(das, 0x12340506, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1077    TEST_BCD(das, 0x12340503, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1078    TEST_BCD(das, 0x12340506, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1079    TEST_BCD(das, 0x12340503, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1080    TEST_BCD(das, 0x12340506, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
   1081
   1082    TEST_BCD(aaa, 0x12340205, CC_A, (CC_C | CC_A));
   1083    TEST_BCD(aaa, 0x12340306, CC_A, (CC_C | CC_A));
   1084    TEST_BCD(aaa, 0x1234040a, CC_A, (CC_C | CC_A));
   1085    TEST_BCD(aaa, 0x123405fa, CC_A, (CC_C | CC_A));
   1086    TEST_BCD(aaa, 0x12340205, 0, (CC_C | CC_A));
   1087    TEST_BCD(aaa, 0x12340306, 0, (CC_C | CC_A));
   1088    TEST_BCD(aaa, 0x1234040a, 0, (CC_C | CC_A));
   1089    TEST_BCD(aaa, 0x123405fa, 0, (CC_C | CC_A));
   1090
   1091    TEST_BCD(aas, 0x12340205, CC_A, (CC_C | CC_A));
   1092    TEST_BCD(aas, 0x12340306, CC_A, (CC_C | CC_A));
   1093    TEST_BCD(aas, 0x1234040a, CC_A, (CC_C | CC_A));
   1094    TEST_BCD(aas, 0x123405fa, CC_A, (CC_C | CC_A));
   1095    TEST_BCD(aas, 0x12340205, 0, (CC_C | CC_A));
   1096    TEST_BCD(aas, 0x12340306, 0, (CC_C | CC_A));
   1097    TEST_BCD(aas, 0x1234040a, 0, (CC_C | CC_A));
   1098    TEST_BCD(aas, 0x123405fa, 0, (CC_C | CC_A));
   1099
   1100    TEST_BCD(aam, 0x12340547, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));
   1101    TEST_BCD(aad, 0x12340407, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));
   1102}
   1103#endif
   1104
   1105#define TEST_XCHG(op, size, opconst)\
   1106{\
   1107    long op0, op1;\
   1108    op0 = i2l(0x12345678);\
   1109    op1 = i2l(0xfbca7654);\
   1110    asm(#op " %" size "0, %" size "1" \
   1111        : "=q" (op0), opconst (op1) \
   1112        : "0" (op0));\
   1113    printf("%-10s A=" FMTLX " B=" FMTLX "\n",\
   1114           #op, op0, op1);\
   1115}
   1116
   1117#define TEST_CMPXCHG(op, size, opconst, eax)\
   1118{\
   1119    long op0, op1, op2;\
   1120    op0 = i2l(0x12345678);\
   1121    op1 = i2l(0xfbca7654);\
   1122    op2 = i2l(eax);\
   1123    asm(#op " %" size "0, %" size "1" \
   1124        : "=q" (op0), opconst (op1) \
   1125        : "0" (op0), "a" (op2));\
   1126    printf("%-10s EAX=" FMTLX " A=" FMTLX " C=" FMTLX "\n",\
   1127           #op, op2, op0, op1);\
   1128}
   1129
   1130void test_xchg(void)
   1131{
   1132#if defined(__x86_64__)
   1133    TEST_XCHG(xchgq, "", "+q");
   1134#endif
   1135    TEST_XCHG(xchgl, "k", "+q");
   1136    TEST_XCHG(xchgw, "w", "+q");
   1137    TEST_XCHG(xchgb, "b", "+q");
   1138
   1139#if defined(__x86_64__)
   1140    TEST_XCHG(xchgq, "", "+m");
   1141#endif
   1142    TEST_XCHG(xchgl, "k", "+m");
   1143    TEST_XCHG(xchgw, "w", "+m");
   1144    TEST_XCHG(xchgb, "b", "+m");
   1145
   1146#if defined(__x86_64__)
   1147    TEST_XCHG(xaddq, "", "+q");
   1148#endif
   1149    TEST_XCHG(xaddl, "k", "+q");
   1150    TEST_XCHG(xaddw, "w", "+q");
   1151    TEST_XCHG(xaddb, "b", "+q");
   1152
   1153    {
   1154        int res;
   1155        res = 0x12345678;
   1156        asm("xaddl %1, %0" : "=r" (res) : "0" (res));
   1157        printf("xaddl same res=%08x\n", res);
   1158    }
   1159
   1160#if defined(__x86_64__)
   1161    TEST_XCHG(xaddq, "", "+m");
   1162#endif
   1163    TEST_XCHG(xaddl, "k", "+m");
   1164    TEST_XCHG(xaddw, "w", "+m");
   1165    TEST_XCHG(xaddb, "b", "+m");
   1166
   1167#if defined(__x86_64__)
   1168    TEST_CMPXCHG(cmpxchgq, "", "+q", 0xfbca7654);
   1169#endif
   1170    TEST_CMPXCHG(cmpxchgl, "k", "+q", 0xfbca7654);
   1171    TEST_CMPXCHG(cmpxchgw, "w", "+q", 0xfbca7654);
   1172    TEST_CMPXCHG(cmpxchgb, "b", "+q", 0xfbca7654);
   1173
   1174#if defined(__x86_64__)
   1175    TEST_CMPXCHG(cmpxchgq, "", "+q", 0xfffefdfc);
   1176#endif
   1177    TEST_CMPXCHG(cmpxchgl, "k", "+q", 0xfffefdfc);
   1178    TEST_CMPXCHG(cmpxchgw, "w", "+q", 0xfffefdfc);
   1179    TEST_CMPXCHG(cmpxchgb, "b", "+q", 0xfffefdfc);
   1180
   1181#if defined(__x86_64__)
   1182    TEST_CMPXCHG(cmpxchgq, "", "+m", 0xfbca7654);
   1183#endif
   1184    TEST_CMPXCHG(cmpxchgl, "k", "+m", 0xfbca7654);
   1185    TEST_CMPXCHG(cmpxchgw, "w", "+m", 0xfbca7654);
   1186    TEST_CMPXCHG(cmpxchgb, "b", "+m", 0xfbca7654);
   1187
   1188#if defined(__x86_64__)
   1189    TEST_CMPXCHG(cmpxchgq, "", "+m", 0xfffefdfc);
   1190#endif
   1191    TEST_CMPXCHG(cmpxchgl, "k", "+m", 0xfffefdfc);
   1192    TEST_CMPXCHG(cmpxchgw, "w", "+m", 0xfffefdfc);
   1193    TEST_CMPXCHG(cmpxchgb, "b", "+m", 0xfffefdfc);
   1194
   1195    {
   1196        uint64_t op0, op1, op2;
   1197        long eax, edx;
   1198        long i, eflags;
   1199
   1200        for(i = 0; i < 2; i++) {
   1201            op0 = 0x123456789abcdLL;
   1202            eax = i2l(op0 & 0xffffffff);
   1203            edx = i2l(op0 >> 32);
   1204            if (i == 0)
   1205                op1 = 0xfbca765423456LL;
   1206            else
   1207                op1 = op0;
   1208            op2 = 0x6532432432434LL;
   1209            asm("cmpxchg8b %2\n"
   1210                "pushf\n"
   1211                "pop %3\n"
   1212                : "=a" (eax), "=d" (edx), "=m" (op1), "=g" (eflags)
   1213                : "0" (eax), "1" (edx), "m" (op1), "b" ((int)op2), "c" ((int)(op2 >> 32)));
   1214            printf("cmpxchg8b: eax=" FMTLX " edx=" FMTLX " op1=" FMT64X " CC=%02lx\n",
   1215                   eax, edx, op1, eflags & CC_Z);
   1216        }
   1217    }
   1218}
   1219
   1220#ifdef TEST_SEGS
   1221/**********************************************/
   1222/* segmentation tests */
   1223
   1224#include <sys/syscall.h>
   1225#include <unistd.h>
   1226#include <asm/ldt.h>
   1227#include <linux/version.h>
   1228
   1229static inline int modify_ldt(int func, void * ptr, unsigned long bytecount)
   1230{
   1231    return syscall(__NR_modify_ldt, func, ptr, bytecount);
   1232}
   1233
   1234#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 66)
   1235#define modify_ldt_ldt_s user_desc
   1236#endif
   1237
   1238#define MK_SEL(n) (((n) << 3) | 7)
   1239
   1240uint8_t seg_data1[4096];
   1241uint8_t seg_data2[4096];
   1242
   1243#define TEST_LR(op, size, seg, mask)\
   1244{\
   1245    int res, res2;\
   1246    uint16_t mseg = seg;\
   1247    res = 0x12345678;\
   1248    asm (op " %" size "2, %" size "0\n" \
   1249         "movl $0, %1\n"\
   1250         "jnz 1f\n"\
   1251         "movl $1, %1\n"\
   1252         "1:\n"\
   1253         : "=r" (res), "=r" (res2) : "m" (mseg), "0" (res));\
   1254    printf(op ": Z=%d %08x\n", res2, res & ~(mask));\
   1255}
   1256
   1257#define TEST_ARPL(op, size, op1, op2)\
   1258{\
   1259    long a, b, c;                               \
   1260    a = (op1);                                  \
   1261    b = (op2);                                  \
   1262    asm volatile(op " %" size "3, %" size "0\n"\
   1263                 "movl $0,%1\n"\
   1264                 "jnz 1f\n"\
   1265                 "movl $1,%1\n"\
   1266                 "1:\n"\
   1267                 : "=r" (a), "=r" (c) : "0" (a), "r" (b));    \
   1268    printf(op size " A=" FMTLX " B=" FMTLX " R=" FMTLX " z=%ld\n",\
   1269           (long)(op1), (long)(op2), a, c);\
   1270}
   1271
   1272/* NOTE: we use Linux modify_ldt syscall */
   1273void test_segs(void)
   1274{
   1275    struct modify_ldt_ldt_s ldt;
   1276    long long ldt_table[3];
   1277    int res, res2;
   1278    char tmp;
   1279    struct {
   1280        uint32_t offset;
   1281        uint16_t seg;
   1282    } __attribute__((__packed__)) segoff;
   1283
   1284    ldt.entry_number = 1;
   1285    ldt.base_addr = (unsigned long)&seg_data1;
   1286    ldt.limit = (sizeof(seg_data1) + 0xfff) >> 12;
   1287    ldt.seg_32bit = 1;
   1288    ldt.contents = MODIFY_LDT_CONTENTS_DATA;
   1289    ldt.read_exec_only = 0;
   1290    ldt.limit_in_pages = 1;
   1291    ldt.seg_not_present = 0;
   1292    ldt.useable = 1;
   1293    modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
   1294
   1295    ldt.entry_number = 2;
   1296    ldt.base_addr = (unsigned long)&seg_data2;
   1297    ldt.limit = (sizeof(seg_data2) + 0xfff) >> 12;
   1298    ldt.seg_32bit = 1;
   1299    ldt.contents = MODIFY_LDT_CONTENTS_DATA;
   1300    ldt.read_exec_only = 0;
   1301    ldt.limit_in_pages = 1;
   1302    ldt.seg_not_present = 0;
   1303    ldt.useable = 1;
   1304    modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
   1305
   1306    modify_ldt(0, &ldt_table, sizeof(ldt_table)); /* read ldt entries */
   1307#if 0
   1308    {
   1309        int i;
   1310        for(i=0;i<3;i++)
   1311            printf("%d: %016Lx\n", i, ldt_table[i]);
   1312    }
   1313#endif
   1314    /* do some tests with fs or gs */
   1315    asm volatile ("movl %0, %%fs" : : "r" (MK_SEL(1)));
   1316
   1317    seg_data1[1] = 0xaa;
   1318    seg_data2[1] = 0x55;
   1319
   1320    asm volatile ("fs movzbl 0x1, %0" : "=r" (res));
   1321    printf("FS[1] = %02x\n", res);
   1322
   1323    asm volatile ("pushl %%gs\n"
   1324                  "movl %1, %%gs\n"
   1325                  "gs movzbl 0x1, %0\n"
   1326                  "popl %%gs\n"
   1327                  : "=r" (res)
   1328                  : "r" (MK_SEL(2)));
   1329    printf("GS[1] = %02x\n", res);
   1330
   1331    /* tests with ds/ss (implicit segment case) */
   1332    tmp = 0xa5;
   1333    asm volatile ("pushl %%ebp\n\t"
   1334                  "pushl %%ds\n\t"
   1335                  "movl %2, %%ds\n\t"
   1336                  "movl %3, %%ebp\n\t"
   1337                  "movzbl 0x1, %0\n\t"
   1338                  "movzbl (%%ebp), %1\n\t"
   1339                  "popl %%ds\n\t"
   1340                  "popl %%ebp\n\t"
   1341                  : "=r" (res), "=r" (res2)
   1342                  : "r" (MK_SEL(1)), "r" (&tmp));
   1343    printf("DS[1] = %02x\n", res);
   1344    printf("SS[tmp] = %02x\n", res2);
   1345
   1346    segoff.seg = MK_SEL(2);
   1347    segoff.offset = 0xabcdef12;
   1348    asm volatile("lfs %2, %0\n\t"
   1349                 "movl %%fs, %1\n\t"
   1350                 : "=r" (res), "=g" (res2)
   1351                 : "m" (segoff));
   1352    printf("FS:reg = %04x:%08x\n", res2, res);
   1353
   1354    TEST_LR("larw", "w", MK_SEL(2), 0x0100);
   1355    TEST_LR("larl", "", MK_SEL(2), 0x0100);
   1356    TEST_LR("lslw", "w", MK_SEL(2), 0);
   1357    TEST_LR("lsll", "", MK_SEL(2), 0);
   1358
   1359    TEST_LR("larw", "w", 0xfff8, 0);
   1360    TEST_LR("larl", "", 0xfff8, 0);
   1361    TEST_LR("lslw", "w", 0xfff8, 0);
   1362    TEST_LR("lsll", "", 0xfff8, 0);
   1363
   1364    TEST_ARPL("arpl", "w", 0x12345678 | 3, 0x762123c | 1);
   1365    TEST_ARPL("arpl", "w", 0x12345678 | 1, 0x762123c | 3);
   1366    TEST_ARPL("arpl", "w", 0x12345678 | 1, 0x762123c | 1);
   1367}
   1368
   1369/* 16 bit code test */
   1370extern char code16_start, code16_end;
   1371extern char code16_func1;
   1372extern char code16_func2;
   1373extern char code16_func3;
   1374
   1375void test_code16(void)
   1376{
   1377    struct modify_ldt_ldt_s ldt;
   1378    int res, res2;
   1379
   1380    /* build a code segment */
   1381    ldt.entry_number = 1;
   1382    ldt.base_addr = (unsigned long)&code16_start;
   1383    ldt.limit = &code16_end - &code16_start;
   1384    ldt.seg_32bit = 0;
   1385    ldt.contents = MODIFY_LDT_CONTENTS_CODE;
   1386    ldt.read_exec_only = 0;
   1387    ldt.limit_in_pages = 0;
   1388    ldt.seg_not_present = 0;
   1389    ldt.useable = 1;
   1390    modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
   1391
   1392    /* call the first function */
   1393    asm volatile ("lcall %1, %2"
   1394                  : "=a" (res)
   1395                  : "i" (MK_SEL(1)), "i" (&code16_func1): "memory", "cc");
   1396    printf("func1() = 0x%08x\n", res);
   1397    asm volatile ("lcall %2, %3"
   1398                  : "=a" (res), "=c" (res2)
   1399                  : "i" (MK_SEL(1)), "i" (&code16_func2): "memory", "cc");
   1400    printf("func2() = 0x%08x spdec=%d\n", res, res2);
   1401    asm volatile ("lcall %1, %2"
   1402                  : "=a" (res)
   1403                  : "i" (MK_SEL(1)), "i" (&code16_func3): "memory", "cc");
   1404    printf("func3() = 0x%08x\n", res);
   1405}
   1406#endif
   1407
   1408#if defined(__x86_64__)
   1409asm(".globl func_lret\n"
   1410    "func_lret:\n"
   1411    "movl $0x87654641, %eax\n"
   1412    "lretq\n");
   1413#else
   1414asm(".globl func_lret\n"
   1415    "func_lret:\n"
   1416    "movl $0x87654321, %eax\n"
   1417    "lret\n"
   1418
   1419    ".globl func_iret\n"
   1420    "func_iret:\n"
   1421    "movl $0xabcd4321, %eax\n"
   1422    "iret\n");
   1423#endif
   1424
   1425extern char func_lret;
   1426extern char func_iret;
   1427
   1428void test_misc(void)
   1429{
   1430    char table[256];
   1431    long res, i;
   1432
   1433    for(i=0;i<256;i++) table[i] = 256 - i;
   1434    res = 0x12345678;
   1435    asm ("xlat" : "=a" (res) : "b" (table), "0" (res));
   1436    printf("xlat: EAX=" FMTLX "\n", res);
   1437
   1438#if defined(__x86_64__)
   1439#if 0
   1440    {
   1441        /* XXX: see if Intel Core2 and AMD64 behavior really
   1442           differ. Here we implemented the Intel way which is not
   1443           compatible yet with QEMU. */
   1444        static struct QEMU_PACKED {
   1445            uint64_t offset;
   1446            uint16_t seg;
   1447        } desc;
   1448        long cs_sel;
   1449
   1450        asm volatile ("mov %%cs, %0" : "=r" (cs_sel));
   1451
   1452        asm volatile ("push %1\n"
   1453                      "call func_lret\n"
   1454                      : "=a" (res)
   1455                      : "r" (cs_sel) : "memory", "cc");
   1456        printf("func_lret=" FMTLX "\n", res);
   1457
   1458        desc.offset = (long)&func_lret;
   1459        desc.seg = cs_sel;
   1460
   1461        asm volatile ("xor %%rax, %%rax\n"
   1462                      "rex64 lcall *(%%rcx)\n"
   1463                      : "=a" (res)
   1464                      : "c" (&desc)
   1465                      : "memory", "cc");
   1466        printf("func_lret2=" FMTLX "\n", res);
   1467
   1468        asm volatile ("push %2\n"
   1469                      "mov $ 1f, %%rax\n"
   1470                      "push %%rax\n"
   1471                      "rex64 ljmp *(%%rcx)\n"
   1472                      "1:\n"
   1473                      : "=a" (res)
   1474                      : "c" (&desc), "b" (cs_sel)
   1475                      : "memory", "cc");
   1476        printf("func_lret3=" FMTLX "\n", res);
   1477    }
   1478#endif
   1479#else
   1480    asm volatile ("push %%cs ; call %1"
   1481                  : "=a" (res)
   1482                  : "m" (func_lret): "memory", "cc");
   1483    printf("func_lret=" FMTLX "\n", res);
   1484
   1485    asm volatile ("pushf ; push %%cs ; call %1"
   1486                  : "=a" (res)
   1487                  : "m" (func_iret): "memory", "cc");
   1488    printf("func_iret=" FMTLX "\n", res);
   1489#endif
   1490
   1491#if defined(__x86_64__)
   1492    /* specific popl test */
   1493    asm volatile ("push $12345432 ; push $0x9abcdef ; pop (%%rsp) ; pop %0"
   1494                  : "=g" (res));
   1495    printf("popl esp=" FMTLX "\n", res);
   1496#else
   1497    /* specific popl test */
   1498    asm volatile ("pushl $12345432 ; pushl $0x9abcdef ; popl (%%esp) ; popl %0"
   1499                  : "=g" (res));
   1500    printf("popl esp=" FMTLX "\n", res);
   1501
   1502    /* specific popw test */
   1503    asm volatile ("pushl $12345432 ; pushl $0x9abcdef ; popw (%%esp) ; addl $2, %%esp ; popl %0"
   1504                  : "=g" (res));
   1505    printf("popw esp=" FMTLX "\n", res);
   1506#endif
   1507}
   1508
   1509uint8_t str_buffer[4096];
   1510
   1511#define TEST_STRING1(OP, size, DF, REP)\
   1512{\
   1513    long esi, edi, eax, ecx, eflags;\
   1514\
   1515    esi = (long)(str_buffer + sizeof(str_buffer) / 2);\
   1516    edi = (long)(str_buffer + sizeof(str_buffer) / 2) + 16;\
   1517    eax = i2l(0x12345678);\
   1518    ecx = 17;\
   1519\
   1520    asm volatile ("push $0\n\t"\
   1521                  "popf\n\t"\
   1522                  DF "\n\t"\
   1523                  REP #OP size "\n\t"\
   1524                  "cld\n\t"\
   1525                  "pushf\n\t"\
   1526                  "pop %4\n\t"\
   1527                  : "=S" (esi), "=D" (edi), "=a" (eax), "=c" (ecx), "=g" (eflags)\
   1528                  : "0" (esi), "1" (edi), "2" (eax), "3" (ecx));\
   1529    printf("%-10s ESI=" FMTLX " EDI=" FMTLX " EAX=" FMTLX " ECX=" FMTLX " EFL=%04x\n",\
   1530           REP #OP size, esi, edi, eax, ecx,\
   1531           (int)(eflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)));\
   1532}
   1533
   1534#define TEST_STRING(OP, REP)\
   1535    TEST_STRING1(OP, "b", "", REP);\
   1536    TEST_STRING1(OP, "w", "", REP);\
   1537    TEST_STRING1(OP, "l", "", REP);\
   1538    X86_64_ONLY(TEST_STRING1(OP, "q", "", REP));\
   1539    TEST_STRING1(OP, "b", "std", REP);\
   1540    TEST_STRING1(OP, "w", "std", REP);\
   1541    TEST_STRING1(OP, "l", "std", REP);\
   1542    X86_64_ONLY(TEST_STRING1(OP, "q", "std", REP))
   1543
   1544void test_string(void)
   1545{
   1546    int i;
   1547    for(i = 0;i < sizeof(str_buffer); i++)
   1548        str_buffer[i] = i + 0x56;
   1549   TEST_STRING(stos, "");
   1550   TEST_STRING(stos, "rep ");
   1551   TEST_STRING(lods, ""); /* to verify stos */
   1552   TEST_STRING(lods, "rep ");
   1553   TEST_STRING(movs, "");
   1554   TEST_STRING(movs, "rep ");
   1555   TEST_STRING(lods, ""); /* to verify stos */
   1556
   1557   /* XXX: better tests */
   1558   TEST_STRING(scas, "");
   1559   TEST_STRING(scas, "repz ");
   1560   TEST_STRING(scas, "repnz ");
   1561   TEST_STRING(cmps, "");
   1562   TEST_STRING(cmps, "repz ");
   1563   TEST_STRING(cmps, "repnz ");
   1564}
   1565
   1566#ifdef TEST_VM86
   1567/* VM86 test */
   1568
   1569static inline void set_bit(uint8_t *a, unsigned int bit)
   1570{
   1571    a[bit / 8] |= (1 << (bit % 8));
   1572}
   1573
   1574static inline uint8_t *seg_to_linear(unsigned int seg, unsigned int reg)
   1575{
   1576    return (uint8_t *)((seg << 4) + (reg & 0xffff));
   1577}
   1578
   1579static inline void pushw(struct vm86_regs *r, int val)
   1580{
   1581    r->esp = (r->esp & ~0xffff) | ((r->esp - 2) & 0xffff);
   1582    *(uint16_t *)seg_to_linear(r->ss, r->esp) = val;
   1583}
   1584
   1585static inline int vm86(int func, struct vm86plus_struct *v86)
   1586{
   1587    return syscall(__NR_vm86, func, v86);
   1588}
   1589
   1590extern char vm86_code_start;
   1591extern char vm86_code_end;
   1592
   1593#define VM86_CODE_CS 0x100
   1594#define VM86_CODE_IP 0x100
   1595
   1596void test_vm86(void)
   1597{
   1598    struct vm86plus_struct ctx;
   1599    struct vm86_regs *r;
   1600    uint8_t *vm86_mem;
   1601    int seg, ret;
   1602
   1603    vm86_mem = mmap((void *)0x00000000, 0x110000,
   1604                    PROT_WRITE | PROT_READ | PROT_EXEC,
   1605                    MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
   1606    if (vm86_mem == MAP_FAILED) {
   1607        printf("ERROR: could not map vm86 memory");
   1608        return;
   1609    }
   1610    memset(&ctx, 0, sizeof(ctx));
   1611
   1612    /* init basic registers */
   1613    r = &ctx.regs;
   1614    r->eip = VM86_CODE_IP;
   1615    r->esp = 0xfffe;
   1616    seg = VM86_CODE_CS;
   1617    r->cs = seg;
   1618    r->ss = seg;
   1619    r->ds = seg;
   1620    r->es = seg;
   1621    r->fs = seg;
   1622    r->gs = seg;
   1623    r->eflags = VIF_MASK;
   1624
   1625    /* move code to proper address. We use the same layout as a .com
   1626       dos program. */
   1627    memcpy(vm86_mem + (VM86_CODE_CS << 4) + VM86_CODE_IP,
   1628           &vm86_code_start, &vm86_code_end - &vm86_code_start);
   1629
   1630    /* mark int 0x21 as being emulated */
   1631    set_bit((uint8_t *)&ctx.int_revectored, 0x21);
   1632
   1633    for(;;) {
   1634        ret = vm86(VM86_ENTER, &ctx);
   1635        switch(VM86_TYPE(ret)) {
   1636        case VM86_INTx:
   1637            {
   1638                int int_num, ah, v;
   1639
   1640                int_num = VM86_ARG(ret);
   1641                if (int_num != 0x21)
   1642                    goto unknown_int;
   1643                ah = (r->eax >> 8) & 0xff;
   1644                switch(ah) {
   1645                case 0x00: /* exit */
   1646                    goto the_end;
   1647                case 0x02: /* write char */
   1648                    {
   1649                        uint8_t c = r->edx;
   1650                        putchar(c);
   1651                    }
   1652                    break;
   1653                case 0x09: /* write string */
   1654                    {
   1655                        uint8_t c, *ptr;
   1656                        ptr = seg_to_linear(r->ds, r->edx);
   1657                        for(;;) {
   1658                            c = *ptr++;
   1659                            if (c == '$')
   1660                                break;
   1661                            putchar(c);
   1662                        }
   1663                        r->eax = (r->eax & ~0xff) | '$';
   1664                    }
   1665                    break;
   1666                case 0xff: /* extension: write eflags number in edx */
   1667                    v = (int)r->edx;
   1668#ifndef LINUX_VM86_IOPL_FIX
   1669                    v &= ~0x3000;
   1670#endif
   1671                    printf("%08x\n", v);
   1672                    break;
   1673                default:
   1674                unknown_int:
   1675                    printf("unsupported int 0x%02x\n", int_num);
   1676                    goto the_end;
   1677                }
   1678            }
   1679            break;
   1680        case VM86_SIGNAL:
   1681            /* a signal came, we just ignore that */
   1682            break;
   1683        case VM86_STI:
   1684            break;
   1685        default:
   1686            printf("ERROR: unhandled vm86 return code (0x%x)\n", ret);
   1687            goto the_end;
   1688        }
   1689    }
   1690 the_end:
   1691    printf("VM86 end\n");
   1692    munmap(vm86_mem, 0x110000);
   1693}
   1694#endif
   1695
   1696/* exception tests */
   1697#if defined(__i386__) && !defined(REG_EAX)
   1698#define REG_EAX EAX
   1699#define REG_EBX EBX
   1700#define REG_ECX ECX
   1701#define REG_EDX EDX
   1702#define REG_ESI ESI
   1703#define REG_EDI EDI
   1704#define REG_EBP EBP
   1705#define REG_ESP ESP
   1706#define REG_EIP EIP
   1707#define REG_EFL EFL
   1708#define REG_TRAPNO TRAPNO
   1709#define REG_ERR ERR
   1710#endif
   1711
   1712#if defined(__x86_64__)
   1713#define REG_EIP REG_RIP
   1714#endif
   1715
   1716jmp_buf jmp_env;
   1717int v1;
   1718int tab[2];
   1719
   1720void sig_handler(int sig, siginfo_t *info, void *puc)
   1721{
   1722    ucontext_t *uc = puc;
   1723
   1724    printf("si_signo=%d si_errno=%d si_code=%d",
   1725           info->si_signo, info->si_errno, info->si_code);
   1726    printf(" si_addr=0x%08lx",
   1727           (unsigned long)info->si_addr);
   1728    printf("\n");
   1729
   1730    printf("trapno=" FMTLX " err=" FMTLX,
   1731           (long)uc->uc_mcontext.gregs[REG_TRAPNO],
   1732           (long)uc->uc_mcontext.gregs[REG_ERR]);
   1733    printf(" EIP=" FMTLX, (long)uc->uc_mcontext.gregs[REG_EIP]);
   1734    printf("\n");
   1735    longjmp(jmp_env, 1);
   1736}
   1737
   1738void test_exceptions(void)
   1739{
   1740    struct sigaction act;
   1741    volatile int val;
   1742
   1743    act.sa_sigaction = sig_handler;
   1744    sigemptyset(&act.sa_mask);
   1745    act.sa_flags = SA_SIGINFO | SA_NODEFER;
   1746    sigaction(SIGFPE, &act, NULL);
   1747    sigaction(SIGILL, &act, NULL);
   1748    sigaction(SIGSEGV, &act, NULL);
   1749    sigaction(SIGBUS, &act, NULL);
   1750    sigaction(SIGTRAP, &act, NULL);
   1751
   1752    /* test division by zero reporting */
   1753    printf("DIVZ exception:\n");
   1754    if (setjmp(jmp_env) == 0) {
   1755        /* now divide by zero */
   1756        v1 = 0;
   1757        v1 = 2 / v1;
   1758    }
   1759
   1760#if !defined(__x86_64__)
   1761    printf("BOUND exception:\n");
   1762    if (setjmp(jmp_env) == 0) {
   1763        /* bound exception */
   1764        tab[0] = 1;
   1765        tab[1] = 10;
   1766        asm volatile ("bound %0, %1" : : "r" (11), "m" (tab[0]));
   1767    }
   1768#endif
   1769
   1770#ifdef TEST_SEGS
   1771    printf("segment exceptions:\n");
   1772    if (setjmp(jmp_env) == 0) {
   1773        /* load an invalid segment */
   1774        asm volatile ("movl %0, %%fs" : : "r" ((0x1234 << 3) | 1));
   1775    }
   1776    if (setjmp(jmp_env) == 0) {
   1777        /* null data segment is valid */
   1778        asm volatile ("movl %0, %%fs" : : "r" (3));
   1779        /* null stack segment */
   1780        asm volatile ("movl %0, %%ss" : : "r" (3));
   1781    }
   1782
   1783    {
   1784        struct modify_ldt_ldt_s ldt;
   1785        ldt.entry_number = 1;
   1786        ldt.base_addr = (unsigned long)&seg_data1;
   1787        ldt.limit = (sizeof(seg_data1) + 0xfff) >> 12;
   1788        ldt.seg_32bit = 1;
   1789        ldt.contents = MODIFY_LDT_CONTENTS_DATA;
   1790        ldt.read_exec_only = 0;
   1791        ldt.limit_in_pages = 1;
   1792        ldt.seg_not_present = 1;
   1793        ldt.useable = 1;
   1794        modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
   1795
   1796        if (setjmp(jmp_env) == 0) {
   1797            /* segment not present */
   1798            asm volatile ("movl %0, %%fs" : : "r" (MK_SEL(1)));
   1799        }
   1800    }
   1801#endif
   1802
   1803    /* test SEGV reporting */
   1804    printf("PF exception:\n");
   1805    if (setjmp(jmp_env) == 0) {
   1806        val = 1;
   1807        /* we add a nop to test a weird PC retrieval case */
   1808        asm volatile ("nop");
   1809        /* now store in an invalid address */
   1810        *(char *)0x1234 = 1;
   1811    }
   1812
   1813    /* test SEGV reporting */
   1814    printf("PF exception:\n");
   1815    if (setjmp(jmp_env) == 0) {
   1816        val = 1;
   1817        /* read from an invalid address */
   1818        v1 = *(char *)0x1234;
   1819    }
   1820
   1821    /* test illegal instruction reporting */
   1822    printf("UD2 exception:\n");
   1823    if (setjmp(jmp_env) == 0) {
   1824        /* now execute an invalid instruction */
   1825        asm volatile("ud2");
   1826    }
   1827    printf("lock nop exception:\n");
   1828    if (setjmp(jmp_env) == 0) {
   1829        /* now execute an invalid instruction */
   1830        asm volatile(".byte 0xf0, 0x90");
   1831    }
   1832
   1833    printf("INT exception:\n");
   1834    if (setjmp(jmp_env) == 0) {
   1835        asm volatile ("int $0xfd");
   1836    }
   1837    if (setjmp(jmp_env) == 0) {
   1838        asm volatile ("int $0x01");
   1839    }
   1840    if (setjmp(jmp_env) == 0) {
   1841        asm volatile (".byte 0xcd, 0x03");
   1842    }
   1843    if (setjmp(jmp_env) == 0) {
   1844        asm volatile ("int $0x04");
   1845    }
   1846    if (setjmp(jmp_env) == 0) {
   1847        asm volatile ("int $0x05");
   1848    }
   1849
   1850    printf("INT3 exception:\n");
   1851    if (setjmp(jmp_env) == 0) {
   1852        asm volatile ("int3");
   1853    }
   1854
   1855    printf("CLI exception:\n");
   1856    if (setjmp(jmp_env) == 0) {
   1857        asm volatile ("cli");
   1858    }
   1859
   1860    printf("STI exception:\n");
   1861    if (setjmp(jmp_env) == 0) {
   1862        asm volatile ("cli");
   1863    }
   1864
   1865#if !defined(__x86_64__)
   1866    printf("INTO exception:\n");
   1867    if (setjmp(jmp_env) == 0) {
   1868        /* overflow exception */
   1869        asm volatile ("addl $1, %0 ; into" : : "r" (0x7fffffff));
   1870    }
   1871#endif
   1872
   1873    printf("OUTB exception:\n");
   1874    if (setjmp(jmp_env) == 0) {
   1875        asm volatile ("outb %%al, %%dx" : : "d" (0x4321), "a" (0));
   1876    }
   1877
   1878    printf("INB exception:\n");
   1879    if (setjmp(jmp_env) == 0) {
   1880        asm volatile ("inb %%dx, %%al" : "=a" (val) : "d" (0x4321));
   1881    }
   1882
   1883    printf("REP OUTSB exception:\n");
   1884    if (setjmp(jmp_env) == 0) {
   1885        asm volatile ("rep outsb" : : "d" (0x4321), "S" (tab), "c" (1));
   1886    }
   1887
   1888    printf("REP INSB exception:\n");
   1889    if (setjmp(jmp_env) == 0) {
   1890        asm volatile ("rep insb" : : "d" (0x4321), "D" (tab), "c" (1));
   1891    }
   1892
   1893    printf("HLT exception:\n");
   1894    if (setjmp(jmp_env) == 0) {
   1895        asm volatile ("hlt");
   1896    }
   1897
   1898    printf("single step exception:\n");
   1899    val = 0;
   1900    if (setjmp(jmp_env) == 0) {
   1901        asm volatile ("pushf\n"
   1902                      "orl $0x00100, (%%esp)\n"
   1903                      "popf\n"
   1904                      "movl $0xabcd, %0\n"
   1905                      "movl $0x0, %0\n" : "=m" (val) : : "cc", "memory");
   1906    }
   1907    printf("val=0x%x\n", val);
   1908}
   1909
   1910#if !defined(__x86_64__)
   1911/* specific precise single step test */
   1912void sig_trap_handler(int sig, siginfo_t *info, void *puc)
   1913{
   1914    ucontext_t *uc = puc;
   1915    printf("EIP=" FMTLX "\n", (long)uc->uc_mcontext.gregs[REG_EIP]);
   1916}
   1917
   1918const uint8_t sstep_buf1[4] = { 1, 2, 3, 4};
   1919uint8_t sstep_buf2[4];
   1920
   1921void test_single_step(void)
   1922{
   1923    struct sigaction act;
   1924    volatile int val;
   1925    int i;
   1926
   1927    val = 0;
   1928    act.sa_sigaction = sig_trap_handler;
   1929    sigemptyset(&act.sa_mask);
   1930    act.sa_flags = SA_SIGINFO;
   1931    sigaction(SIGTRAP, &act, NULL);
   1932    asm volatile ("pushf\n"
   1933                  "orl $0x00100, (%%esp)\n"
   1934                  "popf\n"
   1935                  "movl $0xabcd, %0\n"
   1936
   1937                  /* jmp test */
   1938                  "movl $3, %%ecx\n"
   1939                  "1:\n"
   1940                  "addl $1, %0\n"
   1941                  "decl %%ecx\n"
   1942                  "jnz 1b\n"
   1943
   1944                  /* movsb: the single step should stop at each movsb iteration */
   1945                  "movl $sstep_buf1, %%esi\n"
   1946                  "movl $sstep_buf2, %%edi\n"
   1947                  "movl $0, %%ecx\n"
   1948                  "rep movsb\n"
   1949                  "movl $3, %%ecx\n"
   1950                  "rep movsb\n"
   1951                  "movl $1, %%ecx\n"
   1952                  "rep movsb\n"
   1953
   1954                  /* cmpsb: the single step should stop at each cmpsb iteration */
   1955                  "movl $sstep_buf1, %%esi\n"
   1956                  "movl $sstep_buf2, %%edi\n"
   1957                  "movl $0, %%ecx\n"
   1958                  "rep cmpsb\n"
   1959                  "movl $4, %%ecx\n"
   1960                  "rep cmpsb\n"
   1961
   1962                  /* getpid() syscall: single step should skip one
   1963                     instruction */
   1964                  "movl $20, %%eax\n"
   1965                  "int $0x80\n"
   1966                  "movl $0, %%eax\n"
   1967
   1968                  /* when modifying SS, trace is not done on the next
   1969                     instruction */
   1970                  "movl %%ss, %%ecx\n"
   1971                  "movl %%ecx, %%ss\n"
   1972                  "addl $1, %0\n"
   1973                  "movl $1, %%eax\n"
   1974                  "movl %%ecx, %%ss\n"
   1975                  "jmp 1f\n"
   1976                  "addl $1, %0\n"
   1977                  "1:\n"
   1978                  "movl $1, %%eax\n"
   1979                  "pushl %%ecx\n"
   1980                  "popl %%ss\n"
   1981                  "addl $1, %0\n"
   1982                  "movl $1, %%eax\n"
   1983
   1984                  "pushf\n"
   1985                  "andl $~0x00100, (%%esp)\n"
   1986                  "popf\n"
   1987                  : "=m" (val)
   1988                  :
   1989                  : "cc", "memory", "eax", "ecx", "esi", "edi");
   1990    printf("val=%d\n", val);
   1991    for(i = 0; i < 4; i++)
   1992        printf("sstep_buf2[%d] = %d\n", i, sstep_buf2[i]);
   1993}
   1994
   1995/* self modifying code test */
   1996uint8_t code[] = {
   1997    0xb8, 0x1, 0x00, 0x00, 0x00, /* movl $1, %eax */
   1998    0xc3, /* ret */
   1999};
   2000
   2001asm(".section \".data\"\n"
   2002    "smc_code2:\n"
   2003    "movl 4(%esp), %eax\n"
   2004    "movl %eax, smc_patch_addr2 + 1\n"
   2005    "nop\n"
   2006    "nop\n"
   2007    "nop\n"
   2008    "nop\n"
   2009    "nop\n"
   2010    "nop\n"
   2011    "nop\n"
   2012    "nop\n"
   2013    "smc_patch_addr2:\n"
   2014    "movl $1, %eax\n"
   2015    "ret\n"
   2016    ".previous\n"
   2017    );
   2018
   2019typedef int FuncType(void);
   2020extern int smc_code2(int);
   2021void test_self_modifying_code(void)
   2022{
   2023    int i;
   2024    printf("self modifying code:\n");
   2025    printf("func1 = 0x%x\n", ((FuncType *)code)());
   2026    for(i = 2; i <= 4; i++) {
   2027        code[1] = i;
   2028        printf("func%d = 0x%x\n", i, ((FuncType *)code)());
   2029    }
   2030
   2031    /* more difficult test : the modified code is just after the
   2032       modifying instruction. It is forbidden in Intel specs, but it
   2033       is used by old DOS programs */
   2034    for(i = 2; i <= 4; i++) {
   2035        printf("smc_code2(%d) = %d\n", i, smc_code2(i));
   2036    }
   2037}
   2038#endif
   2039
   2040long enter_stack[4096];
   2041
   2042#if defined(__x86_64__)
   2043#define RSP "%%rsp"
   2044#define RBP "%%rbp"
   2045#else
   2046#define RSP "%%esp"
   2047#define RBP "%%ebp"
   2048#endif
   2049
   2050#if !defined(__x86_64__)
   2051/* causes an infinite loop, disable it for now.  */
   2052#define TEST_ENTER(size, stack_type, level)
   2053#else
   2054#define TEST_ENTER(size, stack_type, level)\
   2055{\
   2056    long esp_save, esp_val, ebp_val, ebp_save, i;\
   2057    stack_type *ptr, *stack_end, *stack_ptr;\
   2058    memset(enter_stack, 0, sizeof(enter_stack));\
   2059    stack_end = stack_ptr = (stack_type *)(enter_stack + 4096);\
   2060    ebp_val = (long)stack_ptr;\
   2061    for(i=1;i<=32;i++)\
   2062       *--stack_ptr = i;\
   2063    esp_val = (long)stack_ptr;\
   2064    asm("mov " RSP ", %[esp_save]\n"\
   2065        "mov " RBP ", %[ebp_save]\n"\
   2066        "mov %[esp_val], " RSP "\n"\
   2067        "mov %[ebp_val], " RBP "\n"\
   2068        "enter" size " $8, $" #level "\n"\
   2069        "mov " RSP ", %[esp_val]\n"\
   2070        "mov " RBP ", %[ebp_val]\n"\
   2071        "mov %[esp_save], " RSP "\n"\
   2072        "mov %[ebp_save], " RBP "\n"\
   2073        : [esp_save] "=r" (esp_save),\
   2074        [ebp_save] "=r" (ebp_save),\
   2075        [esp_val] "=r" (esp_val),\
   2076        [ebp_val] "=r" (ebp_val)\
   2077        :  "[esp_val]" (esp_val),\
   2078        "[ebp_val]" (ebp_val));\
   2079    printf("level=%d:\n", level);\
   2080    printf("esp_val=" FMTLX "\n", esp_val - (long)stack_end);\
   2081    printf("ebp_val=" FMTLX "\n", ebp_val - (long)stack_end);\
   2082    for(ptr = (stack_type *)esp_val; ptr < stack_end; ptr++)\
   2083        printf(FMTLX "\n", (long)ptr[0]);\
   2084}
   2085#endif
   2086
   2087static void test_enter(void)
   2088{
   2089#if defined(__x86_64__)
   2090    TEST_ENTER("q", uint64_t, 0);
   2091    TEST_ENTER("q", uint64_t, 1);
   2092    TEST_ENTER("q", uint64_t, 2);
   2093    TEST_ENTER("q", uint64_t, 31);
   2094#else
   2095    TEST_ENTER("l", uint32_t, 0);
   2096    TEST_ENTER("l", uint32_t, 1);
   2097    TEST_ENTER("l", uint32_t, 2);
   2098    TEST_ENTER("l", uint32_t, 31);
   2099#endif
   2100
   2101    TEST_ENTER("w", uint16_t, 0);
   2102    TEST_ENTER("w", uint16_t, 1);
   2103    TEST_ENTER("w", uint16_t, 2);
   2104    TEST_ENTER("w", uint16_t, 31);
   2105}
   2106
   2107#ifdef TEST_SSE
   2108
   2109typedef int __m64 __attribute__ ((vector_size(8)));
   2110typedef float __m128 __attribute__ ((vector_size(16)));
   2111
   2112typedef union {
   2113    double d[2];
   2114    float s[4];
   2115    uint32_t l[4];
   2116    uint64_t q[2];
   2117    __m128 dq;
   2118} XMMReg;
   2119
   2120static uint64_t __attribute__((aligned(16))) test_values[4][2] = {
   2121    { 0x456723c698694873, 0xdc515cff944a58ec },
   2122    { 0x1f297ccd58bad7ab, 0x41f21efba9e3e146 },
   2123    { 0x007c62c2085427f8, 0x231be9e8cde7438d },
   2124    { 0x0f76255a085427f8, 0xc233e9e8c4c9439a },
   2125};
   2126
   2127#define SSE_OP(op)\
   2128{\
   2129    asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\
   2130    printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\
   2131           #op,\
   2132           a.q[1], a.q[0],\
   2133           b.q[1], b.q[0],\
   2134           r.q[1], r.q[0]);\
   2135}
   2136
   2137#define SSE_OP2(op)\
   2138{\
   2139    int i;\
   2140    for(i=0;i<2;i++) {\
   2141    a.q[0] = test_values[2*i][0];\
   2142    a.q[1] = test_values[2*i][1];\
   2143    b.q[0] = test_values[2*i+1][0];\
   2144    b.q[1] = test_values[2*i+1][1];\
   2145    SSE_OP(op);\
   2146    }\
   2147}
   2148
   2149#define MMX_OP2(op)\
   2150{\
   2151    int i;\
   2152    for(i=0;i<2;i++) {\
   2153    a.q[0] = test_values[2*i][0];\
   2154    b.q[0] = test_values[2*i+1][0];\
   2155    asm volatile (#op " %2, %0" : "=y" (r.q[0]) : "0" (a.q[0]), "y" (b.q[0]));\
   2156    printf("%-9s: a=" FMT64X " b=" FMT64X " r=" FMT64X "\n",\
   2157           #op,\
   2158           a.q[0],\
   2159           b.q[0],\
   2160           r.q[0]);\
   2161    }\
   2162    SSE_OP2(op);\
   2163}
   2164
   2165#define SHUF_OP(op, ib)\
   2166{\
   2167    a.q[0] = test_values[0][0];\
   2168    a.q[1] = test_values[0][1];\
   2169    b.q[0] = test_values[1][0];\
   2170    b.q[1] = test_values[1][1];\
   2171    asm volatile (#op " $" #ib ", %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\
   2172    printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\
   2173           #op,\
   2174           a.q[1], a.q[0],\
   2175           b.q[1], b.q[0],\
   2176           ib,\
   2177           r.q[1], r.q[0]);\
   2178}
   2179
   2180#define PSHUF_OP(op, ib)\
   2181{\
   2182    int i;\
   2183    for(i=0;i<2;i++) {\
   2184    a.q[0] = test_values[2*i][0];\
   2185    a.q[1] = test_values[2*i][1];\
   2186    asm volatile (#op " $" #ib ", %1, %0" : "=x" (r.dq) : "x" (a.dq));\
   2187    printf("%-9s: a=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\
   2188           #op,\
   2189           a.q[1], a.q[0],\
   2190           ib,\
   2191           r.q[1], r.q[0]);\
   2192    }\
   2193}
   2194
   2195#define SHIFT_IM(op, ib)\
   2196{\
   2197    int i;\
   2198    for(i=0;i<2;i++) {\
   2199    a.q[0] = test_values[2*i][0];\
   2200    a.q[1] = test_values[2*i][1];\
   2201    asm volatile (#op " $" #ib ", %0" : "=x" (r.dq) : "0" (a.dq));\
   2202    printf("%-9s: a=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\
   2203           #op,\
   2204           a.q[1], a.q[0],\
   2205           ib,\
   2206           r.q[1], r.q[0]);\
   2207    }\
   2208}
   2209
   2210#define SHIFT_OP(op, ib)\
   2211{\
   2212    int i;\
   2213    SHIFT_IM(op, ib);\
   2214    for(i=0;i<2;i++) {\
   2215    a.q[0] = test_values[2*i][0];\
   2216    a.q[1] = test_values[2*i][1];\
   2217    b.q[0] = ib;\
   2218    b.q[1] = 0;\
   2219    asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\
   2220    printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\
   2221           #op,\
   2222           a.q[1], a.q[0],\
   2223           b.q[1], b.q[0],\
   2224           r.q[1], r.q[0]);\
   2225    }\
   2226}
   2227
   2228#define MOVMSK(op)\
   2229{\
   2230    int i, reg;\
   2231    for(i=0;i<2;i++) {\
   2232    a.q[0] = test_values[2*i][0];\
   2233    a.q[1] = test_values[2*i][1];\
   2234    asm volatile (#op " %1, %0" : "=r" (reg) : "x" (a.dq));\
   2235    printf("%-9s: a=" FMT64X "" FMT64X " r=%08x\n",\
   2236           #op,\
   2237           a.q[1], a.q[0],\
   2238           reg);\
   2239    }\
   2240}
   2241
   2242#define SSE_OPS(a) \
   2243SSE_OP(a ## ps);\
   2244SSE_OP(a ## ss);
   2245
   2246#define SSE_OPD(a) \
   2247SSE_OP(a ## pd);\
   2248SSE_OP(a ## sd);
   2249
   2250#define SSE_COMI(op, field)\
   2251{\
   2252    unsigned long eflags;\
   2253    XMMReg a, b;\
   2254    a.field[0] = a1;\
   2255    b.field[0] = b1;\
   2256    asm volatile (#op " %2, %1\n"\
   2257        "pushf\n"\
   2258        "pop %0\n"\
   2259        : "=rm" (eflags)\
   2260        : "x" (a.dq), "x" (b.dq));\
   2261    printf("%-9s: a=%f b=%f cc=%04lx\n",\
   2262           #op, a1, b1,\
   2263           eflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));\
   2264}
   2265
   2266void test_sse_comi(double a1, double b1)
   2267{
   2268    SSE_COMI(ucomiss, s);
   2269    SSE_COMI(ucomisd, d);
   2270    SSE_COMI(comiss, s);
   2271    SSE_COMI(comisd, d);
   2272}
   2273
   2274#define CVT_OP_XMM(op)\
   2275{\
   2276    asm volatile (#op " %1, %0" : "=x" (r.dq) : "x" (a.dq));\
   2277    printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\
   2278           #op,\
   2279           a.q[1], a.q[0],\
   2280           r.q[1], r.q[0]);\
   2281}
   2282
   2283/* Force %xmm0 usage to avoid the case where both register index are 0
   2284   to test instruction decoding more extensively */
   2285#define CVT_OP_XMM2MMX(op)\
   2286{\
   2287    asm volatile (#op " %1, %0" : "=y" (r.q[0]) : "x" (a.dq) \
   2288                  : "%xmm0"); \
   2289    asm volatile("emms\n"); \
   2290    printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "\n",\
   2291           #op,\
   2292           a.q[1], a.q[0],\
   2293           r.q[0]);\
   2294}
   2295
   2296#define CVT_OP_MMX2XMM(op)\
   2297{\
   2298    asm volatile (#op " %1, %0" : "=x" (r.dq) : "y" (a.q[0]));\
   2299    asm volatile("emms\n"); \
   2300    printf("%-9s: a=" FMT64X " r=" FMT64X "" FMT64X "\n",\
   2301           #op,\
   2302           a.q[0],\
   2303           r.q[1], r.q[0]);\
   2304}
   2305
   2306#define CVT_OP_REG2XMM(op)\
   2307{\
   2308    asm volatile (#op " %1, %0" : "=x" (r.dq) : "r" (a.l[0]));\
   2309    printf("%-9s: a=%08x r=" FMT64X "" FMT64X "\n",\
   2310           #op,\
   2311           a.l[0],\
   2312           r.q[1], r.q[0]);\
   2313}
   2314
   2315#define CVT_OP_XMM2REG(op)\
   2316{\
   2317    asm volatile (#op " %1, %0" : "=r" (r.l[0]) : "x" (a.dq));\
   2318    printf("%-9s: a=" FMT64X "" FMT64X " r=%08x\n",\
   2319           #op,\
   2320           a.q[1], a.q[0],\
   2321           r.l[0]);\
   2322}
   2323
   2324struct fpxstate {
   2325    uint16_t fpuc;
   2326    uint16_t fpus;
   2327    uint16_t fptag;
   2328    uint16_t fop;
   2329    uint32_t fpuip;
   2330    uint16_t cs_sel;
   2331    uint16_t dummy0;
   2332    uint32_t fpudp;
   2333    uint16_t ds_sel;
   2334    uint16_t dummy1;
   2335    uint32_t mxcsr;
   2336    uint32_t mxcsr_mask;
   2337    uint8_t fpregs1[8 * 16];
   2338    uint8_t xmm_regs[8 * 16];
   2339    uint8_t dummy2[224];
   2340};
   2341
   2342static struct fpxstate fpx_state __attribute__((aligned(16)));
   2343static struct fpxstate fpx_state2 __attribute__((aligned(16)));
   2344
   2345void test_fxsave(void)
   2346{
   2347    struct fpxstate *fp = &fpx_state;
   2348    struct fpxstate *fp2 = &fpx_state2;
   2349    int i, nb_xmm;
   2350    XMMReg a, b;
   2351    a.q[0] = test_values[0][0];
   2352    a.q[1] = test_values[0][1];
   2353    b.q[0] = test_values[1][0];
   2354    b.q[1] = test_values[1][1];
   2355
   2356    asm("movdqa %2, %%xmm0\n"
   2357        "movdqa %3, %%xmm7\n"
   2358#if defined(__x86_64__)
   2359        "movdqa %2, %%xmm15\n"
   2360#endif
   2361        " fld1\n"
   2362        " fldpi\n"
   2363        " fldln2\n"
   2364        " fxsave %0\n"
   2365        " fxrstor %0\n"
   2366        " fxsave %1\n"
   2367        " fninit\n"
   2368        : "=m" (*(uint32_t *)fp2), "=m" (*(uint32_t *)fp)
   2369        : "m" (a), "m" (b));
   2370    printf("fpuc=%04x\n", fp->fpuc);
   2371    printf("fpus=%04x\n", fp->fpus);
   2372    printf("fptag=%04x\n", fp->fptag);
   2373    for(i = 0; i < 3; i++) {
   2374        printf("ST%d: " FMT64X " %04x\n",
   2375               i,
   2376               *(uint64_t *)&fp->fpregs1[i * 16],
   2377               *(uint16_t *)&fp->fpregs1[i * 16 + 8]);
   2378    }
   2379    printf("mxcsr=%08x\n", fp->mxcsr & 0x1f80);
   2380#if defined(__x86_64__)
   2381    nb_xmm = 16;
   2382#else
   2383    nb_xmm = 8;
   2384#endif
   2385    for(i = 0; i < nb_xmm; i++) {
   2386        printf("xmm%d: " FMT64X "" FMT64X "\n",
   2387               i,
   2388               *(uint64_t *)&fp->xmm_regs[i * 16],
   2389               *(uint64_t *)&fp->xmm_regs[i * 16 + 8]);
   2390    }
   2391}
   2392
   2393void test_sse(void)
   2394{
   2395    XMMReg r, a, b;
   2396    int i;
   2397
   2398    MMX_OP2(punpcklbw);
   2399    MMX_OP2(punpcklwd);
   2400    MMX_OP2(punpckldq);
   2401    MMX_OP2(packsswb);
   2402    MMX_OP2(pcmpgtb);
   2403    MMX_OP2(pcmpgtw);
   2404    MMX_OP2(pcmpgtd);
   2405    MMX_OP2(packuswb);
   2406    MMX_OP2(punpckhbw);
   2407    MMX_OP2(punpckhwd);
   2408    MMX_OP2(punpckhdq);
   2409    MMX_OP2(packssdw);
   2410    MMX_OP2(pcmpeqb);
   2411    MMX_OP2(pcmpeqw);
   2412    MMX_OP2(pcmpeqd);
   2413
   2414    MMX_OP2(paddq);
   2415    MMX_OP2(pmullw);
   2416    MMX_OP2(psubusb);
   2417    MMX_OP2(psubusw);
   2418    MMX_OP2(pminub);
   2419    MMX_OP2(pand);
   2420    MMX_OP2(paddusb);
   2421    MMX_OP2(paddusw);
   2422    MMX_OP2(pmaxub);
   2423    MMX_OP2(pandn);
   2424
   2425    MMX_OP2(pmulhuw);
   2426    MMX_OP2(pmulhw);
   2427
   2428    MMX_OP2(psubsb);
   2429    MMX_OP2(psubsw);
   2430    MMX_OP2(pminsw);
   2431    MMX_OP2(por);
   2432    MMX_OP2(paddsb);
   2433    MMX_OP2(paddsw);
   2434    MMX_OP2(pmaxsw);
   2435    MMX_OP2(pxor);
   2436    MMX_OP2(pmuludq);
   2437    MMX_OP2(pmaddwd);
   2438    MMX_OP2(psadbw);
   2439    MMX_OP2(psubb);
   2440    MMX_OP2(psubw);
   2441    MMX_OP2(psubd);
   2442    MMX_OP2(psubq);
   2443    MMX_OP2(paddb);
   2444    MMX_OP2(paddw);
   2445    MMX_OP2(paddd);
   2446
   2447    MMX_OP2(pavgb);
   2448    MMX_OP2(pavgw);
   2449
   2450    asm volatile ("pinsrw $1, %1, %0" : "=y" (r.q[0]) : "r" (0x12345678));
   2451    printf("%-9s: r=" FMT64X "\n", "pinsrw", r.q[0]);
   2452
   2453    asm volatile ("pinsrw $5, %1, %0" : "=x" (r.dq) : "r" (0x12345678));
   2454    printf("%-9s: r=" FMT64X "" FMT64X "\n", "pinsrw", r.q[1], r.q[0]);
   2455
   2456    a.q[0] = test_values[0][0];
   2457    a.q[1] = test_values[0][1];
   2458    asm volatile ("pextrw $1, %1, %0" : "=r" (r.l[0]) : "y" (a.q[0]));
   2459    printf("%-9s: r=%08x\n", "pextrw", r.l[0]);
   2460
   2461    asm volatile ("pextrw $5, %1, %0" : "=r" (r.l[0]) : "x" (a.dq));
   2462    printf("%-9s: r=%08x\n", "pextrw", r.l[0]);
   2463
   2464    asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "y" (a.q[0]));
   2465    printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]);
   2466
   2467    asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "x" (a.dq));
   2468    printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]);
   2469
   2470    {
   2471        r.q[0] = -1;
   2472        r.q[1] = -1;
   2473
   2474        a.q[0] = test_values[0][0];
   2475        a.q[1] = test_values[0][1];
   2476        b.q[0] = test_values[1][0];
   2477        b.q[1] = test_values[1][1];
   2478        asm volatile("maskmovq %1, %0" :
   2479                     : "y" (a.q[0]), "y" (b.q[0]), "D" (&r)
   2480                     : "memory");
   2481        printf("%-9s: r=" FMT64X " a=" FMT64X " b=" FMT64X "\n",
   2482               "maskmov",
   2483               r.q[0],
   2484               a.q[0],
   2485               b.q[0]);
   2486        asm volatile("maskmovdqu %1, %0" :
   2487                     : "x" (a.dq), "x" (b.dq), "D" (&r)
   2488                     : "memory");
   2489        printf("%-9s: r=" FMT64X "" FMT64X " a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X "\n",
   2490               "maskmov",
   2491               r.q[1], r.q[0],
   2492               a.q[1], a.q[0],
   2493               b.q[1], b.q[0]);
   2494    }
   2495
   2496    asm volatile ("emms");
   2497
   2498    SSE_OP2(punpcklqdq);
   2499    SSE_OP2(punpckhqdq);
   2500    SSE_OP2(andps);
   2501    SSE_OP2(andpd);
   2502    SSE_OP2(andnps);
   2503    SSE_OP2(andnpd);
   2504    SSE_OP2(orps);
   2505    SSE_OP2(orpd);
   2506    SSE_OP2(xorps);
   2507    SSE_OP2(xorpd);
   2508
   2509    SSE_OP2(unpcklps);
   2510    SSE_OP2(unpcklpd);
   2511    SSE_OP2(unpckhps);
   2512    SSE_OP2(unpckhpd);
   2513
   2514    SHUF_OP(shufps, 0x78);
   2515    SHUF_OP(shufpd, 0x02);
   2516
   2517    PSHUF_OP(pshufd, 0x78);
   2518    PSHUF_OP(pshuflw, 0x78);
   2519    PSHUF_OP(pshufhw, 0x78);
   2520
   2521    SHIFT_OP(psrlw, 7);
   2522    SHIFT_OP(psrlw, 16);
   2523    SHIFT_OP(psraw, 7);
   2524    SHIFT_OP(psraw, 16);
   2525    SHIFT_OP(psllw, 7);
   2526    SHIFT_OP(psllw, 16);
   2527
   2528    SHIFT_OP(psrld, 7);
   2529    SHIFT_OP(psrld, 32);
   2530    SHIFT_OP(psrad, 7);
   2531    SHIFT_OP(psrad, 32);
   2532    SHIFT_OP(pslld, 7);
   2533    SHIFT_OP(pslld, 32);
   2534
   2535    SHIFT_OP(psrlq, 7);
   2536    SHIFT_OP(psrlq, 32);
   2537    SHIFT_OP(psllq, 7);
   2538    SHIFT_OP(psllq, 32);
   2539
   2540    SHIFT_IM(psrldq, 16);
   2541    SHIFT_IM(psrldq, 7);
   2542    SHIFT_IM(pslldq, 16);
   2543    SHIFT_IM(pslldq, 7);
   2544
   2545    MOVMSK(movmskps);
   2546    MOVMSK(movmskpd);
   2547
   2548    /* FPU specific ops */
   2549
   2550    {
   2551        uint32_t mxcsr;
   2552        asm volatile("stmxcsr %0" : "=m" (mxcsr));
   2553        printf("mxcsr=%08x\n", mxcsr & 0x1f80);
   2554        asm volatile("ldmxcsr %0" : : "m" (mxcsr));
   2555    }
   2556
   2557    test_sse_comi(2, -1);
   2558    test_sse_comi(2, 2);
   2559    test_sse_comi(2, 3);
   2560    test_sse_comi(2, q_nan.d);
   2561    test_sse_comi(q_nan.d, -1);
   2562
   2563    for(i = 0; i < 2; i++) {
   2564        a.s[0] = 2.7;
   2565        a.s[1] = 3.4;
   2566        a.s[2] = 4;
   2567        a.s[3] = -6.3;
   2568        b.s[0] = 45.7;
   2569        b.s[1] = 353.4;
   2570        b.s[2] = 4;
   2571        b.s[3] = 56.3;
   2572        if (i == 1) {
   2573            a.s[0] = q_nan.d;
   2574            b.s[3] = q_nan.d;
   2575        }
   2576
   2577        SSE_OPS(add);
   2578        SSE_OPS(mul);
   2579        SSE_OPS(sub);
   2580        SSE_OPS(min);
   2581        SSE_OPS(div);
   2582        SSE_OPS(max);
   2583        SSE_OPS(sqrt);
   2584        SSE_OPS(cmpeq);
   2585        SSE_OPS(cmplt);
   2586        SSE_OPS(cmple);
   2587        SSE_OPS(cmpunord);
   2588        SSE_OPS(cmpneq);
   2589        SSE_OPS(cmpnlt);
   2590        SSE_OPS(cmpnle);
   2591        SSE_OPS(cmpord);
   2592
   2593
   2594        a.d[0] = 2.7;
   2595        a.d[1] = -3.4;
   2596        b.d[0] = 45.7;
   2597        b.d[1] = -53.4;
   2598        if (i == 1) {
   2599            a.d[0] = q_nan.d;
   2600            b.d[1] = q_nan.d;
   2601        }
   2602        SSE_OPD(add);
   2603        SSE_OPD(mul);
   2604        SSE_OPD(sub);
   2605        SSE_OPD(min);
   2606        SSE_OPD(div);
   2607        SSE_OPD(max);
   2608        SSE_OPD(sqrt);
   2609        SSE_OPD(cmpeq);
   2610        SSE_OPD(cmplt);
   2611        SSE_OPD(cmple);
   2612        SSE_OPD(cmpunord);
   2613        SSE_OPD(cmpneq);
   2614        SSE_OPD(cmpnlt);
   2615        SSE_OPD(cmpnle);
   2616        SSE_OPD(cmpord);
   2617    }
   2618
   2619    /* float to float/int */
   2620    a.s[0] = 2.7;
   2621    a.s[1] = 3.4;
   2622    a.s[2] = 4;
   2623    a.s[3] = -6.3;
   2624    CVT_OP_XMM(cvtps2pd);
   2625    CVT_OP_XMM(cvtss2sd);
   2626    CVT_OP_XMM2MMX(cvtps2pi);
   2627    CVT_OP_XMM2MMX(cvttps2pi);
   2628    CVT_OP_XMM2REG(cvtss2si);
   2629    CVT_OP_XMM2REG(cvttss2si);
   2630    CVT_OP_XMM(cvtps2dq);
   2631    CVT_OP_XMM(cvttps2dq);
   2632
   2633    a.d[0] = 2.6;
   2634    a.d[1] = -3.4;
   2635    CVT_OP_XMM(cvtpd2ps);
   2636    CVT_OP_XMM(cvtsd2ss);
   2637    CVT_OP_XMM2MMX(cvtpd2pi);
   2638    CVT_OP_XMM2MMX(cvttpd2pi);
   2639    CVT_OP_XMM2REG(cvtsd2si);
   2640    CVT_OP_XMM2REG(cvttsd2si);
   2641    CVT_OP_XMM(cvtpd2dq);
   2642    CVT_OP_XMM(cvttpd2dq);
   2643
   2644    /* sse/mmx moves */
   2645    CVT_OP_XMM2MMX(movdq2q);
   2646    CVT_OP_MMX2XMM(movq2dq);
   2647
   2648    /* int to float */
   2649    a.l[0] = -6;
   2650    a.l[1] = 2;
   2651    a.l[2] = 100;
   2652    a.l[3] = -60000;
   2653    CVT_OP_MMX2XMM(cvtpi2ps);
   2654    CVT_OP_MMX2XMM(cvtpi2pd);
   2655    CVT_OP_REG2XMM(cvtsi2ss);
   2656    CVT_OP_REG2XMM(cvtsi2sd);
   2657    CVT_OP_XMM(cvtdq2ps);
   2658    CVT_OP_XMM(cvtdq2pd);
   2659
   2660    /* XXX: test PNI insns */
   2661#if 0
   2662    SSE_OP2(movshdup);
   2663#endif
   2664    asm volatile ("emms");
   2665}
   2666
   2667#endif
   2668
   2669#define TEST_CONV_RAX(op)\
   2670{\
   2671    unsigned long a, r;\
   2672    a = i2l(0x8234a6f8);\
   2673    r = a;\
   2674    asm volatile(#op : "=a" (r) : "0" (r));\
   2675    printf("%-10s A=" FMTLX " R=" FMTLX "\n", #op, a, r);\
   2676}
   2677
   2678#define TEST_CONV_RAX_RDX(op)\
   2679{\
   2680    unsigned long a, d, r, rh;                   \
   2681    a = i2l(0x8234a6f8);\
   2682    d = i2l(0x8345a1f2);\
   2683    r = a;\
   2684    rh = d;\
   2685    asm volatile(#op : "=a" (r), "=d" (rh) : "0" (r), "1" (rh));   \
   2686    printf("%-10s A=" FMTLX " R=" FMTLX ":" FMTLX "\n", #op, a, r, rh);  \
   2687}
   2688
   2689void test_conv(void)
   2690{
   2691    TEST_CONV_RAX(cbw);
   2692    TEST_CONV_RAX(cwde);
   2693#if defined(__x86_64__)
   2694    TEST_CONV_RAX(cdqe);
   2695#endif
   2696
   2697    TEST_CONV_RAX_RDX(cwd);
   2698    TEST_CONV_RAX_RDX(cdq);
   2699#if defined(__x86_64__)
   2700    TEST_CONV_RAX_RDX(cqo);
   2701#endif
   2702
   2703    {
   2704        unsigned long a, r;
   2705        a = i2l(0x12345678);
   2706        asm volatile("bswapl %k0" : "=r" (r) : "0" (a));
   2707        printf("%-10s: A=" FMTLX " R=" FMTLX "\n", "bswapl", a, r);
   2708    }
   2709#if defined(__x86_64__)
   2710    {
   2711        unsigned long a, r;
   2712        a = i2l(0x12345678);
   2713        asm volatile("bswapq %0" : "=r" (r) : "0" (a));
   2714        printf("%-10s: A=" FMTLX " R=" FMTLX "\n", "bswapq", a, r);
   2715    }
   2716#endif
   2717}
   2718
   2719extern void *__start_initcall;
   2720extern void *__stop_initcall;
   2721
   2722
   2723int main(int argc, char **argv)
   2724{
   2725    void **ptr;
   2726    void (*func)(void);
   2727
   2728    ptr = &__start_initcall;
   2729    while (ptr != &__stop_initcall) {
   2730        func = *ptr++;
   2731        func();
   2732    }
   2733    test_bsx();
   2734    test_mul();
   2735    test_jcc();
   2736    test_loop();
   2737    test_floats();
   2738#if !defined(__x86_64__)
   2739    test_bcd();
   2740#endif
   2741    test_xchg();
   2742    test_string();
   2743    test_misc();
   2744    test_lea();
   2745#ifdef TEST_SEGS
   2746    test_segs();
   2747    test_code16();
   2748#endif
   2749#ifdef TEST_VM86
   2750    test_vm86();
   2751#endif
   2752#if !defined(__x86_64__)
   2753    test_exceptions();
   2754    test_self_modifying_code();
   2755    test_single_step();
   2756#endif
   2757    test_enter();
   2758    test_conv();
   2759#ifdef TEST_SSE
   2760    test_sse();
   2761    test_fxsave();
   2762#endif
   2763    return 0;
   2764}