cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

translate-all.c (73193B)


      1/*
      2 *  Host code generation
      3 *
      4 *  Copyright (c) 2003 Fabrice Bellard
      5 *
      6 * This library is free software; you can redistribute it and/or
      7 * modify it under the terms of the GNU Lesser General Public
      8 * License as published by the Free Software Foundation; either
      9 * version 2.1 of the License, or (at your option) any later version.
     10 *
     11 * This library is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14 * Lesser General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU Lesser General Public
     17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18 */
     19
     20#include "qemu/osdep.h"
     21#include "qemu-common.h"
     22
     23#define NO_CPU_IO_DEFS
     24#include "trace.h"
     25#include "disas/disas.h"
     26#include "exec/exec-all.h"
     27#include "tcg/tcg.h"
     28#if defined(CONFIG_USER_ONLY)
     29#include "qemu.h"
     30#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
     31#include <sys/param.h>
     32#if __FreeBSD_version >= 700104
     33#define HAVE_KINFO_GETVMMAP
     34#define sigqueue sigqueue_freebsd  /* avoid redefinition */
     35#include <sys/proc.h>
     36#include <machine/profile.h>
     37#define _KERNEL
     38#include <sys/user.h>
     39#undef _KERNEL
     40#undef sigqueue
     41#include <libutil.h>
     42#endif
     43#endif
     44#else
     45#include "exec/ram_addr.h"
     46#endif
     47
     48#include "exec/cputlb.h"
     49#include "exec/translate-all.h"
     50#include "qemu/bitmap.h"
     51#include "qemu/qemu-print.h"
     52#include "qemu/timer.h"
     53#include "qemu/main-loop.h"
     54#include "exec/log.h"
     55#include "sysemu/cpus.h"
     56#include "sysemu/cpu-timers.h"
     57#include "sysemu/tcg.h"
     58#include "qapi/error.h"
     59#include "hw/core/tcg-cpu-ops.h"
     60#include "tb-hash.h"
     61#include "tb-context.h"
     62#include "internal.h"
     63
     64/* #define DEBUG_TB_INVALIDATE */
     65/* #define DEBUG_TB_FLUSH */
     66/* make various TB consistency checks */
     67/* #define DEBUG_TB_CHECK */
     68
     69#ifdef DEBUG_TB_INVALIDATE
     70#define DEBUG_TB_INVALIDATE_GATE 1
     71#else
     72#define DEBUG_TB_INVALIDATE_GATE 0
     73#endif
     74
     75#ifdef DEBUG_TB_FLUSH
     76#define DEBUG_TB_FLUSH_GATE 1
     77#else
     78#define DEBUG_TB_FLUSH_GATE 0
     79#endif
     80
     81#if !defined(CONFIG_USER_ONLY)
     82/* TB consistency checks only implemented for usermode emulation.  */
     83#undef DEBUG_TB_CHECK
     84#endif
     85
     86#ifdef DEBUG_TB_CHECK
     87#define DEBUG_TB_CHECK_GATE 1
     88#else
     89#define DEBUG_TB_CHECK_GATE 0
     90#endif
     91
     92/* Access to the various translations structures need to be serialised via locks
     93 * for consistency.
     94 * In user-mode emulation access to the memory related structures are protected
     95 * with mmap_lock.
     96 * In !user-mode we use per-page locks.
     97 */
     98#ifdef CONFIG_SOFTMMU
     99#define assert_memory_lock()
    100#else
    101#define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
    102#endif
    103
    104#define SMC_BITMAP_USE_THRESHOLD 10
    105
    106typedef struct PageDesc {
    107    /* list of TBs intersecting this ram page */
    108    uintptr_t first_tb;
    109#ifdef CONFIG_SOFTMMU
    110    /* in order to optimize self modifying code, we count the number
    111       of lookups we do to a given page to use a bitmap */
    112    unsigned long *code_bitmap;
    113    unsigned int code_write_count;
    114#else
    115    unsigned long flags;
    116    void *target_data;
    117#endif
    118#ifndef CONFIG_USER_ONLY
    119    QemuSpin lock;
    120#endif
    121} PageDesc;
    122
    123/**
    124 * struct page_entry - page descriptor entry
    125 * @pd:     pointer to the &struct PageDesc of the page this entry represents
    126 * @index:  page index of the page
    127 * @locked: whether the page is locked
    128 *
    129 * This struct helps us keep track of the locked state of a page, without
    130 * bloating &struct PageDesc.
    131 *
    132 * A page lock protects accesses to all fields of &struct PageDesc.
    133 *
    134 * See also: &struct page_collection.
    135 */
    136struct page_entry {
    137    PageDesc *pd;
    138    tb_page_addr_t index;
    139    bool locked;
    140};
    141
    142/**
    143 * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
    144 * @tree:   Binary search tree (BST) of the pages, with key == page index
    145 * @max:    Pointer to the page in @tree with the highest page index
    146 *
    147 * To avoid deadlock we lock pages in ascending order of page index.
    148 * When operating on a set of pages, we need to keep track of them so that
    149 * we can lock them in order and also unlock them later. For this we collect
    150 * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
    151 * @tree implementation we use does not provide an O(1) operation to obtain the
    152 * highest-ranked element, we use @max to keep track of the inserted page
    153 * with the highest index. This is valuable because if a page is not in
    154 * the tree and its index is higher than @max's, then we can lock it
    155 * without breaking the locking order rule.
    156 *
    157 * Note on naming: 'struct page_set' would be shorter, but we already have a few
    158 * page_set_*() helpers, so page_collection is used instead to avoid confusion.
    159 *
    160 * See also: page_collection_lock().
    161 */
    162struct page_collection {
    163    GTree *tree;
    164    struct page_entry *max;
    165};
    166
    167/* list iterators for lists of tagged pointers in TranslationBlock */
    168#define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
    169    for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
    170         tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
    171             tb = (TranslationBlock *)((uintptr_t)tb & ~1))
    172
    173#define PAGE_FOR_EACH_TB(pagedesc, tb, n)                       \
    174    TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
    175
    176#define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
    177    TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
    178
    179/*
    180 * In system mode we want L1_MAP to be based on ram offsets,
    181 * while in user mode we want it to be based on virtual addresses.
    182 *
    183 * TODO: For user mode, see the caveat re host vs guest virtual
    184 * address spaces near GUEST_ADDR_MAX.
    185 */
    186#if !defined(CONFIG_USER_ONLY)
    187#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
    188# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
    189#else
    190# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
    191#endif
    192#else
    193# define L1_MAP_ADDR_SPACE_BITS  MIN(HOST_LONG_BITS, TARGET_ABI_BITS)
    194#endif
    195
    196/* Size of the L2 (and L3, etc) page tables.  */
    197#define V_L2_BITS 10
    198#define V_L2_SIZE (1 << V_L2_BITS)
    199
    200/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
    201QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
    202                  sizeof_field(TranslationBlock, trace_vcpu_dstate)
    203                  * BITS_PER_BYTE);
    204
    205/*
    206 * L1 Mapping properties
    207 */
    208static int v_l1_size;
    209static int v_l1_shift;
    210static int v_l2_levels;
    211
    212/* The bottom level has pointers to PageDesc, and is indexed by
    213 * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
    214 */
    215#define V_L1_MIN_BITS 4
    216#define V_L1_MAX_BITS (V_L2_BITS + 3)
    217#define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
    218
    219static void *l1_map[V_L1_MAX_SIZE];
    220
    221TBContext tb_ctx;
    222
    223static void page_table_config_init(void)
    224{
    225    uint32_t v_l1_bits;
    226
    227    assert(TARGET_PAGE_BITS);
    228    /* The bits remaining after N lower levels of page tables.  */
    229    v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
    230    if (v_l1_bits < V_L1_MIN_BITS) {
    231        v_l1_bits += V_L2_BITS;
    232    }
    233
    234    v_l1_size = 1 << v_l1_bits;
    235    v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
    236    v_l2_levels = v_l1_shift / V_L2_BITS - 1;
    237
    238    assert(v_l1_bits <= V_L1_MAX_BITS);
    239    assert(v_l1_shift % V_L2_BITS == 0);
    240    assert(v_l2_levels >= 0);
    241}
    242
    243/* Encode VAL as a signed leb128 sequence at P.
    244   Return P incremented past the encoded value.  */
    245static uint8_t *encode_sleb128(uint8_t *p, target_long val)
    246{
    247    int more, byte;
    248
    249    do {
    250        byte = val & 0x7f;
    251        val >>= 7;
    252        more = !((val == 0 && (byte & 0x40) == 0)
    253                 || (val == -1 && (byte & 0x40) != 0));
    254        if (more) {
    255            byte |= 0x80;
    256        }
    257        *p++ = byte;
    258    } while (more);
    259
    260    return p;
    261}
    262
    263/* Decode a signed leb128 sequence at *PP; increment *PP past the
    264   decoded value.  Return the decoded value.  */
    265static target_long decode_sleb128(const uint8_t **pp)
    266{
    267    const uint8_t *p = *pp;
    268    target_long val = 0;
    269    int byte, shift = 0;
    270
    271    do {
    272        byte = *p++;
    273        val |= (target_ulong)(byte & 0x7f) << shift;
    274        shift += 7;
    275    } while (byte & 0x80);
    276    if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
    277        val |= -(target_ulong)1 << shift;
    278    }
    279
    280    *pp = p;
    281    return val;
    282}
    283
    284/* Encode the data collected about the instructions while compiling TB.
    285   Place the data at BLOCK, and return the number of bytes consumed.
    286
    287   The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
    288   which come from the target's insn_start data, followed by a uintptr_t
    289   which comes from the host pc of the end of the code implementing the insn.
    290
    291   Each line of the table is encoded as sleb128 deltas from the previous
    292   line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
    293   That is, the first column is seeded with the guest pc, the last column
    294   with the host pc, and the middle columns with zeros.  */
    295
    296static int encode_search(TranslationBlock *tb, uint8_t *block)
    297{
    298    uint8_t *highwater = tcg_ctx->code_gen_highwater;
    299    uint8_t *p = block;
    300    int i, j, n;
    301
    302    for (i = 0, n = tb->icount; i < n; ++i) {
    303        target_ulong prev;
    304
    305        for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
    306            if (i == 0) {
    307                prev = (j == 0 ? tb->pc : 0);
    308            } else {
    309                prev = tcg_ctx->gen_insn_data[i - 1][j];
    310            }
    311            p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
    312        }
    313        prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
    314        p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
    315
    316        /* Test for (pending) buffer overflow.  The assumption is that any
    317           one row beginning below the high water mark cannot overrun
    318           the buffer completely.  Thus we can test for overflow after
    319           encoding a row without having to check during encoding.  */
    320        if (unlikely(p > highwater)) {
    321            return -1;
    322        }
    323    }
    324
    325    return p - block;
    326}
    327
    328/* The cpu state corresponding to 'searched_pc' is restored.
    329 * When reset_icount is true, current TB will be interrupted and
    330 * icount should be recalculated.
    331 */
    332static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
    333                                     uintptr_t searched_pc, bool reset_icount)
    334{
    335    target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
    336    uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
    337    CPUArchState *env = cpu->env_ptr;
    338    const uint8_t *p = tb->tc.ptr + tb->tc.size;
    339    int i, j, num_insns = tb->icount;
    340#ifdef CONFIG_PROFILER
    341    TCGProfile *prof = &tcg_ctx->prof;
    342    int64_t ti = profile_getclock();
    343#endif
    344
    345    searched_pc -= GETPC_ADJ;
    346
    347    if (searched_pc < host_pc) {
    348        return -1;
    349    }
    350
    351    /* Reconstruct the stored insn data while looking for the point at
    352       which the end of the insn exceeds the searched_pc.  */
    353    for (i = 0; i < num_insns; ++i) {
    354        for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
    355            data[j] += decode_sleb128(&p);
    356        }
    357        host_pc += decode_sleb128(&p);
    358        if (host_pc > searched_pc) {
    359            goto found;
    360        }
    361    }
    362    return -1;
    363
    364 found:
    365    if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
    366        assert(icount_enabled());
    367        /* Reset the cycle counter to the start of the block
    368           and shift if to the number of actually executed instructions */
    369        cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
    370    }
    371    restore_state_to_opc(env, tb, data);
    372
    373#ifdef CONFIG_PROFILER
    374    qatomic_set(&prof->restore_time,
    375                prof->restore_time + profile_getclock() - ti);
    376    qatomic_set(&prof->restore_count, prof->restore_count + 1);
    377#endif
    378    return 0;
    379}
    380
    381bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
    382{
    383    /*
    384     * The host_pc has to be in the rx region of the code buffer.
    385     * If it is not we will not be able to resolve it here.
    386     * The two cases where host_pc will not be correct are:
    387     *
    388     *  - fault during translation (instruction fetch)
    389     *  - fault from helper (not using GETPC() macro)
    390     *
    391     * Either way we need return early as we can't resolve it here.
    392     */
    393    if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
    394        TranslationBlock *tb = tcg_tb_lookup(host_pc);
    395        if (tb) {
    396            cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
    397            return true;
    398        }
    399    }
    400    return false;
    401}
    402
    403void page_init(void)
    404{
    405    page_size_init();
    406    page_table_config_init();
    407
    408#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
    409    {
    410#ifdef HAVE_KINFO_GETVMMAP
    411        struct kinfo_vmentry *freep;
    412        int i, cnt;
    413
    414        freep = kinfo_getvmmap(getpid(), &cnt);
    415        if (freep) {
    416            mmap_lock();
    417            for (i = 0; i < cnt; i++) {
    418                unsigned long startaddr, endaddr;
    419
    420                startaddr = freep[i].kve_start;
    421                endaddr = freep[i].kve_end;
    422                if (h2g_valid(startaddr)) {
    423                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
    424
    425                    if (h2g_valid(endaddr)) {
    426                        endaddr = h2g(endaddr);
    427                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
    428                    } else {
    429#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
    430                        endaddr = ~0ul;
    431                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
    432#endif
    433                    }
    434                }
    435            }
    436            free(freep);
    437            mmap_unlock();
    438        }
    439#else
    440        FILE *f;
    441
    442        last_brk = (unsigned long)sbrk(0);
    443
    444        f = fopen("/compat/linux/proc/self/maps", "r");
    445        if (f) {
    446            mmap_lock();
    447
    448            do {
    449                unsigned long startaddr, endaddr;
    450                int n;
    451
    452                n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
    453
    454                if (n == 2 && h2g_valid(startaddr)) {
    455                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
    456
    457                    if (h2g_valid(endaddr)) {
    458                        endaddr = h2g(endaddr);
    459                    } else {
    460                        endaddr = ~0ul;
    461                    }
    462                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
    463                }
    464            } while (!feof(f));
    465
    466            fclose(f);
    467            mmap_unlock();
    468        }
    469#endif
    470    }
    471#endif
    472}
    473
    474static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
    475{
    476    PageDesc *pd;
    477    void **lp;
    478    int i;
    479
    480    /* Level 1.  Always allocated.  */
    481    lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
    482
    483    /* Level 2..N-1.  */
    484    for (i = v_l2_levels; i > 0; i--) {
    485        void **p = qatomic_rcu_read(lp);
    486
    487        if (p == NULL) {
    488            void *existing;
    489
    490            if (!alloc) {
    491                return NULL;
    492            }
    493            p = g_new0(void *, V_L2_SIZE);
    494            existing = qatomic_cmpxchg(lp, NULL, p);
    495            if (unlikely(existing)) {
    496                g_free(p);
    497                p = existing;
    498            }
    499        }
    500
    501        lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
    502    }
    503
    504    pd = qatomic_rcu_read(lp);
    505    if (pd == NULL) {
    506        void *existing;
    507
    508        if (!alloc) {
    509            return NULL;
    510        }
    511        pd = g_new0(PageDesc, V_L2_SIZE);
    512#ifndef CONFIG_USER_ONLY
    513        {
    514            int i;
    515
    516            for (i = 0; i < V_L2_SIZE; i++) {
    517                qemu_spin_init(&pd[i].lock);
    518            }
    519        }
    520#endif
    521        existing = qatomic_cmpxchg(lp, NULL, pd);
    522        if (unlikely(existing)) {
    523#ifndef CONFIG_USER_ONLY
    524            {
    525                int i;
    526
    527                for (i = 0; i < V_L2_SIZE; i++) {
    528                    qemu_spin_destroy(&pd[i].lock);
    529                }
    530            }
    531#endif
    532            g_free(pd);
    533            pd = existing;
    534        }
    535    }
    536
    537    return pd + (index & (V_L2_SIZE - 1));
    538}
    539
    540static inline PageDesc *page_find(tb_page_addr_t index)
    541{
    542    return page_find_alloc(index, 0);
    543}
    544
    545static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
    546                           PageDesc **ret_p2, tb_page_addr_t phys2, int alloc);
    547
    548/* In user-mode page locks aren't used; mmap_lock is enough */
    549#ifdef CONFIG_USER_ONLY
    550
    551#define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
    552
    553static inline void page_lock(PageDesc *pd)
    554{ }
    555
    556static inline void page_unlock(PageDesc *pd)
    557{ }
    558
    559static inline void page_lock_tb(const TranslationBlock *tb)
    560{ }
    561
    562static inline void page_unlock_tb(const TranslationBlock *tb)
    563{ }
    564
    565struct page_collection *
    566page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
    567{
    568    return NULL;
    569}
    570
    571void page_collection_unlock(struct page_collection *set)
    572{ }
    573#else /* !CONFIG_USER_ONLY */
    574
    575#ifdef CONFIG_DEBUG_TCG
    576
    577static __thread GHashTable *ht_pages_locked_debug;
    578
    579static void ht_pages_locked_debug_init(void)
    580{
    581    if (ht_pages_locked_debug) {
    582        return;
    583    }
    584    ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
    585}
    586
    587static bool page_is_locked(const PageDesc *pd)
    588{
    589    PageDesc *found;
    590
    591    ht_pages_locked_debug_init();
    592    found = g_hash_table_lookup(ht_pages_locked_debug, pd);
    593    return !!found;
    594}
    595
    596static void page_lock__debug(PageDesc *pd)
    597{
    598    ht_pages_locked_debug_init();
    599    g_assert(!page_is_locked(pd));
    600    g_hash_table_insert(ht_pages_locked_debug, pd, pd);
    601}
    602
    603static void page_unlock__debug(const PageDesc *pd)
    604{
    605    bool removed;
    606
    607    ht_pages_locked_debug_init();
    608    g_assert(page_is_locked(pd));
    609    removed = g_hash_table_remove(ht_pages_locked_debug, pd);
    610    g_assert(removed);
    611}
    612
    613static void
    614do_assert_page_locked(const PageDesc *pd, const char *file, int line)
    615{
    616    if (unlikely(!page_is_locked(pd))) {
    617        error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
    618                     pd, file, line);
    619        abort();
    620    }
    621}
    622
    623#define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
    624
    625void assert_no_pages_locked(void)
    626{
    627    ht_pages_locked_debug_init();
    628    g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
    629}
    630
    631#else /* !CONFIG_DEBUG_TCG */
    632
    633#define assert_page_locked(pd)
    634
    635static inline void page_lock__debug(const PageDesc *pd)
    636{
    637}
    638
    639static inline void page_unlock__debug(const PageDesc *pd)
    640{
    641}
    642
    643#endif /* CONFIG_DEBUG_TCG */
    644
    645static inline void page_lock(PageDesc *pd)
    646{
    647    page_lock__debug(pd);
    648    qemu_spin_lock(&pd->lock);
    649}
    650
    651static inline void page_unlock(PageDesc *pd)
    652{
    653    qemu_spin_unlock(&pd->lock);
    654    page_unlock__debug(pd);
    655}
    656
    657/* lock the page(s) of a TB in the correct acquisition order */
    658static inline void page_lock_tb(const TranslationBlock *tb)
    659{
    660    page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0);
    661}
    662
    663static inline void page_unlock_tb(const TranslationBlock *tb)
    664{
    665    PageDesc *p1 = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
    666
    667    page_unlock(p1);
    668    if (unlikely(tb->page_addr[1] != -1)) {
    669        PageDesc *p2 = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
    670
    671        if (p2 != p1) {
    672            page_unlock(p2);
    673        }
    674    }
    675}
    676
    677static inline struct page_entry *
    678page_entry_new(PageDesc *pd, tb_page_addr_t index)
    679{
    680    struct page_entry *pe = g_malloc(sizeof(*pe));
    681
    682    pe->index = index;
    683    pe->pd = pd;
    684    pe->locked = false;
    685    return pe;
    686}
    687
    688static void page_entry_destroy(gpointer p)
    689{
    690    struct page_entry *pe = p;
    691
    692    g_assert(pe->locked);
    693    page_unlock(pe->pd);
    694    g_free(pe);
    695}
    696
    697/* returns false on success */
    698static bool page_entry_trylock(struct page_entry *pe)
    699{
    700    bool busy;
    701
    702    busy = qemu_spin_trylock(&pe->pd->lock);
    703    if (!busy) {
    704        g_assert(!pe->locked);
    705        pe->locked = true;
    706        page_lock__debug(pe->pd);
    707    }
    708    return busy;
    709}
    710
    711static void do_page_entry_lock(struct page_entry *pe)
    712{
    713    page_lock(pe->pd);
    714    g_assert(!pe->locked);
    715    pe->locked = true;
    716}
    717
    718static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
    719{
    720    struct page_entry *pe = value;
    721
    722    do_page_entry_lock(pe);
    723    return FALSE;
    724}
    725
    726static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
    727{
    728    struct page_entry *pe = value;
    729
    730    if (pe->locked) {
    731        pe->locked = false;
    732        page_unlock(pe->pd);
    733    }
    734    return FALSE;
    735}
    736
    737/*
    738 * Trylock a page, and if successful, add the page to a collection.
    739 * Returns true ("busy") if the page could not be locked; false otherwise.
    740 */
    741static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
    742{
    743    tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
    744    struct page_entry *pe;
    745    PageDesc *pd;
    746
    747    pe = g_tree_lookup(set->tree, &index);
    748    if (pe) {
    749        return false;
    750    }
    751
    752    pd = page_find(index);
    753    if (pd == NULL) {
    754        return false;
    755    }
    756
    757    pe = page_entry_new(pd, index);
    758    g_tree_insert(set->tree, &pe->index, pe);
    759
    760    /*
    761     * If this is either (1) the first insertion or (2) a page whose index
    762     * is higher than any other so far, just lock the page and move on.
    763     */
    764    if (set->max == NULL || pe->index > set->max->index) {
    765        set->max = pe;
    766        do_page_entry_lock(pe);
    767        return false;
    768    }
    769    /*
    770     * Try to acquire out-of-order lock; if busy, return busy so that we acquire
    771     * locks in order.
    772     */
    773    return page_entry_trylock(pe);
    774}
    775
    776static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
    777{
    778    tb_page_addr_t a = *(const tb_page_addr_t *)ap;
    779    tb_page_addr_t b = *(const tb_page_addr_t *)bp;
    780
    781    if (a == b) {
    782        return 0;
    783    } else if (a < b) {
    784        return -1;
    785    }
    786    return 1;
    787}
    788
    789/*
    790 * Lock a range of pages ([@start,@end[) as well as the pages of all
    791 * intersecting TBs.
    792 * Locking order: acquire locks in ascending order of page index.
    793 */
    794struct page_collection *
    795page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
    796{
    797    struct page_collection *set = g_malloc(sizeof(*set));
    798    tb_page_addr_t index;
    799    PageDesc *pd;
    800
    801    start >>= TARGET_PAGE_BITS;
    802    end   >>= TARGET_PAGE_BITS;
    803    g_assert(start <= end);
    804
    805    set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
    806                                page_entry_destroy);
    807    set->max = NULL;
    808    assert_no_pages_locked();
    809
    810 retry:
    811    g_tree_foreach(set->tree, page_entry_lock, NULL);
    812
    813    for (index = start; index <= end; index++) {
    814        TranslationBlock *tb;
    815        int n;
    816
    817        pd = page_find(index);
    818        if (pd == NULL) {
    819            continue;
    820        }
    821        if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
    822            g_tree_foreach(set->tree, page_entry_unlock, NULL);
    823            goto retry;
    824        }
    825        assert_page_locked(pd);
    826        PAGE_FOR_EACH_TB(pd, tb, n) {
    827            if (page_trylock_add(set, tb->page_addr[0]) ||
    828                (tb->page_addr[1] != -1 &&
    829                 page_trylock_add(set, tb->page_addr[1]))) {
    830                /* drop all locks, and reacquire in order */
    831                g_tree_foreach(set->tree, page_entry_unlock, NULL);
    832                goto retry;
    833            }
    834        }
    835    }
    836    return set;
    837}
    838
    839void page_collection_unlock(struct page_collection *set)
    840{
    841    /* entries are unlocked and freed via page_entry_destroy */
    842    g_tree_destroy(set->tree);
    843    g_free(set);
    844}
    845
    846#endif /* !CONFIG_USER_ONLY */
    847
    848static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
    849                           PageDesc **ret_p2, tb_page_addr_t phys2, int alloc)
    850{
    851    PageDesc *p1, *p2;
    852    tb_page_addr_t page1;
    853    tb_page_addr_t page2;
    854
    855    assert_memory_lock();
    856    g_assert(phys1 != -1);
    857
    858    page1 = phys1 >> TARGET_PAGE_BITS;
    859    page2 = phys2 >> TARGET_PAGE_BITS;
    860
    861    p1 = page_find_alloc(page1, alloc);
    862    if (ret_p1) {
    863        *ret_p1 = p1;
    864    }
    865    if (likely(phys2 == -1)) {
    866        page_lock(p1);
    867        return;
    868    } else if (page1 == page2) {
    869        page_lock(p1);
    870        if (ret_p2) {
    871            *ret_p2 = p1;
    872        }
    873        return;
    874    }
    875    p2 = page_find_alloc(page2, alloc);
    876    if (ret_p2) {
    877        *ret_p2 = p2;
    878    }
    879    if (page1 < page2) {
    880        page_lock(p1);
    881        page_lock(p2);
    882    } else {
    883        page_lock(p2);
    884        page_lock(p1);
    885    }
    886}
    887
    888static bool tb_cmp(const void *ap, const void *bp)
    889{
    890    const TranslationBlock *a = ap;
    891    const TranslationBlock *b = bp;
    892
    893    return a->pc == b->pc &&
    894        a->cs_base == b->cs_base &&
    895        a->flags == b->flags &&
    896        (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
    897        a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
    898        a->page_addr[0] == b->page_addr[0] &&
    899        a->page_addr[1] == b->page_addr[1];
    900}
    901
    902void tb_htable_init(void)
    903{
    904    unsigned int mode = QHT_MODE_AUTO_RESIZE;
    905
    906    qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
    907}
    908
    909/* call with @p->lock held */
    910static inline void invalidate_page_bitmap(PageDesc *p)
    911{
    912    assert_page_locked(p);
    913#ifdef CONFIG_SOFTMMU
    914    g_free(p->code_bitmap);
    915    p->code_bitmap = NULL;
    916    p->code_write_count = 0;
    917#endif
    918}
    919
    920/* Set to NULL all the 'first_tb' fields in all PageDescs. */
    921static void page_flush_tb_1(int level, void **lp)
    922{
    923    int i;
    924
    925    if (*lp == NULL) {
    926        return;
    927    }
    928    if (level == 0) {
    929        PageDesc *pd = *lp;
    930
    931        for (i = 0; i < V_L2_SIZE; ++i) {
    932            page_lock(&pd[i]);
    933            pd[i].first_tb = (uintptr_t)NULL;
    934            invalidate_page_bitmap(pd + i);
    935            page_unlock(&pd[i]);
    936        }
    937    } else {
    938        void **pp = *lp;
    939
    940        for (i = 0; i < V_L2_SIZE; ++i) {
    941            page_flush_tb_1(level - 1, pp + i);
    942        }
    943    }
    944}
    945
    946static void page_flush_tb(void)
    947{
    948    int i, l1_sz = v_l1_size;
    949
    950    for (i = 0; i < l1_sz; i++) {
    951        page_flush_tb_1(v_l2_levels, l1_map + i);
    952    }
    953}
    954
    955static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
    956{
    957    const TranslationBlock *tb = value;
    958    size_t *size = data;
    959
    960    *size += tb->tc.size;
    961    return false;
    962}
    963
    964/* flush all the translation blocks */
    965static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
    966{
    967    bool did_flush = false;
    968
    969    mmap_lock();
    970    /* If it is already been done on request of another CPU,
    971     * just retry.
    972     */
    973    if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
    974        goto done;
    975    }
    976    did_flush = true;
    977
    978    if (DEBUG_TB_FLUSH_GATE) {
    979        size_t nb_tbs = tcg_nb_tbs();
    980        size_t host_size = 0;
    981
    982        tcg_tb_foreach(tb_host_size_iter, &host_size);
    983        printf("qemu: flush code_size=%zu nb_tbs=%zu avg_tb_size=%zu\n",
    984               tcg_code_size(), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
    985    }
    986
    987    CPU_FOREACH(cpu) {
    988        cpu_tb_jmp_cache_clear(cpu);
    989    }
    990
    991    qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
    992    page_flush_tb();
    993
    994    tcg_region_reset_all();
    995    /* XXX: flush processor icache at this point if cache flush is
    996       expensive */
    997    qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
    998
    999done:
   1000    mmap_unlock();
   1001    if (did_flush) {
   1002        qemu_plugin_flush_cb();
   1003    }
   1004}
   1005
   1006void tb_flush(CPUState *cpu)
   1007{
   1008    if (tcg_enabled()) {
   1009        unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count);
   1010
   1011        if (cpu_in_exclusive_context(cpu)) {
   1012            do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
   1013        } else {
   1014            async_safe_run_on_cpu(cpu, do_tb_flush,
   1015                                  RUN_ON_CPU_HOST_INT(tb_flush_count));
   1016        }
   1017    }
   1018}
   1019
   1020/*
   1021 * Formerly ifdef DEBUG_TB_CHECK. These debug functions are user-mode-only,
   1022 * so in order to prevent bit rot we compile them unconditionally in user-mode,
   1023 * and let the optimizer get rid of them by wrapping their user-only callers
   1024 * with if (DEBUG_TB_CHECK_GATE).
   1025 */
   1026#ifdef CONFIG_USER_ONLY
   1027
   1028static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp)
   1029{
   1030    TranslationBlock *tb = p;
   1031    target_ulong addr = *(target_ulong *)userp;
   1032
   1033    if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
   1034        printf("ERROR invalidate: address=" TARGET_FMT_lx
   1035               " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
   1036    }
   1037}
   1038
   1039/* verify that all the pages have correct rights for code
   1040 *
   1041 * Called with mmap_lock held.
   1042 */
   1043static void tb_invalidate_check(target_ulong address)
   1044{
   1045    address &= TARGET_PAGE_MASK;
   1046    qht_iter(&tb_ctx.htable, do_tb_invalidate_check, &address);
   1047}
   1048
   1049static void do_tb_page_check(void *p, uint32_t hash, void *userp)
   1050{
   1051    TranslationBlock *tb = p;
   1052    int flags1, flags2;
   1053
   1054    flags1 = page_get_flags(tb->pc);
   1055    flags2 = page_get_flags(tb->pc + tb->size - 1);
   1056    if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
   1057        printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
   1058               (long)tb->pc, tb->size, flags1, flags2);
   1059    }
   1060}
   1061
   1062/* verify that all the pages have correct rights for code */
   1063static void tb_page_check(void)
   1064{
   1065    qht_iter(&tb_ctx.htable, do_tb_page_check, NULL);
   1066}
   1067
   1068#endif /* CONFIG_USER_ONLY */
   1069
   1070/*
   1071 * user-mode: call with mmap_lock held
   1072 * !user-mode: call with @pd->lock held
   1073 */
   1074static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
   1075{
   1076    TranslationBlock *tb1;
   1077    uintptr_t *pprev;
   1078    unsigned int n1;
   1079
   1080    assert_page_locked(pd);
   1081    pprev = &pd->first_tb;
   1082    PAGE_FOR_EACH_TB(pd, tb1, n1) {
   1083        if (tb1 == tb) {
   1084            *pprev = tb1->page_next[n1];
   1085            return;
   1086        }
   1087        pprev = &tb1->page_next[n1];
   1088    }
   1089    g_assert_not_reached();
   1090}
   1091
   1092/* remove @orig from its @n_orig-th jump list */
   1093static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
   1094{
   1095    uintptr_t ptr, ptr_locked;
   1096    TranslationBlock *dest;
   1097    TranslationBlock *tb;
   1098    uintptr_t *pprev;
   1099    int n;
   1100
   1101    /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
   1102    ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
   1103    dest = (TranslationBlock *)(ptr & ~1);
   1104    if (dest == NULL) {
   1105        return;
   1106    }
   1107
   1108    qemu_spin_lock(&dest->jmp_lock);
   1109    /*
   1110     * While acquiring the lock, the jump might have been removed if the
   1111     * destination TB was invalidated; check again.
   1112     */
   1113    ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
   1114    if (ptr_locked != ptr) {
   1115        qemu_spin_unlock(&dest->jmp_lock);
   1116        /*
   1117         * The only possibility is that the jump was unlinked via
   1118         * tb_jump_unlink(dest). Seeing here another destination would be a bug,
   1119         * because we set the LSB above.
   1120         */
   1121        g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
   1122        return;
   1123    }
   1124    /*
   1125     * We first acquired the lock, and since the destination pointer matches,
   1126     * we know for sure that @orig is in the jmp list.
   1127     */
   1128    pprev = &dest->jmp_list_head;
   1129    TB_FOR_EACH_JMP(dest, tb, n) {
   1130        if (tb == orig && n == n_orig) {
   1131            *pprev = tb->jmp_list_next[n];
   1132            /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
   1133            qemu_spin_unlock(&dest->jmp_lock);
   1134            return;
   1135        }
   1136        pprev = &tb->jmp_list_next[n];
   1137    }
   1138    g_assert_not_reached();
   1139}
   1140
   1141/* reset the jump entry 'n' of a TB so that it is not chained to
   1142   another TB */
   1143static inline void tb_reset_jump(TranslationBlock *tb, int n)
   1144{
   1145    uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
   1146    tb_set_jmp_target(tb, n, addr);
   1147}
   1148
   1149/* remove any jumps to the TB */
   1150static inline void tb_jmp_unlink(TranslationBlock *dest)
   1151{
   1152    TranslationBlock *tb;
   1153    int n;
   1154
   1155    qemu_spin_lock(&dest->jmp_lock);
   1156
   1157    TB_FOR_EACH_JMP(dest, tb, n) {
   1158        tb_reset_jump(tb, n);
   1159        qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
   1160        /* No need to clear the list entry; setting the dest ptr is enough */
   1161    }
   1162    dest->jmp_list_head = (uintptr_t)NULL;
   1163
   1164    qemu_spin_unlock(&dest->jmp_lock);
   1165}
   1166
   1167/*
   1168 * In user-mode, call with mmap_lock held.
   1169 * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
   1170 * locks held.
   1171 */
   1172static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
   1173{
   1174    CPUState *cpu;
   1175    PageDesc *p;
   1176    uint32_t h;
   1177    tb_page_addr_t phys_pc;
   1178    uint32_t orig_cflags = tb_cflags(tb);
   1179
   1180    assert_memory_lock();
   1181
   1182    /* make sure no further incoming jumps will be chained to this TB */
   1183    qemu_spin_lock(&tb->jmp_lock);
   1184    qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
   1185    qemu_spin_unlock(&tb->jmp_lock);
   1186
   1187    /* remove the TB from the hash list */
   1188    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
   1189    h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
   1190                     tb->trace_vcpu_dstate);
   1191    if (!qht_remove(&tb_ctx.htable, tb, h)) {
   1192        return;
   1193    }
   1194
   1195    /* remove the TB from the page list */
   1196    if (rm_from_page_list) {
   1197        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
   1198        tb_page_remove(p, tb);
   1199        invalidate_page_bitmap(p);
   1200        if (tb->page_addr[1] != -1) {
   1201            p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
   1202            tb_page_remove(p, tb);
   1203            invalidate_page_bitmap(p);
   1204        }
   1205    }
   1206
   1207    /* remove the TB from the hash list */
   1208    h = tb_jmp_cache_hash_func(tb->pc);
   1209    CPU_FOREACH(cpu) {
   1210        if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) {
   1211            qatomic_set(&cpu->tb_jmp_cache[h], NULL);
   1212        }
   1213    }
   1214
   1215    /* suppress this TB from the two jump lists */
   1216    tb_remove_from_jmp_list(tb, 0);
   1217    tb_remove_from_jmp_list(tb, 1);
   1218
   1219    /* suppress any remaining jumps to this TB */
   1220    tb_jmp_unlink(tb);
   1221
   1222    qatomic_set(&tb_ctx.tb_phys_invalidate_count,
   1223                tb_ctx.tb_phys_invalidate_count + 1);
   1224}
   1225
   1226static void tb_phys_invalidate__locked(TranslationBlock *tb)
   1227{
   1228    qemu_thread_jit_write();
   1229    do_tb_phys_invalidate(tb, true);
   1230    qemu_thread_jit_execute();
   1231}
   1232
   1233/* invalidate one TB
   1234 *
   1235 * Called with mmap_lock held in user-mode.
   1236 */
   1237void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
   1238{
   1239    if (page_addr == -1 && tb->page_addr[0] != -1) {
   1240        page_lock_tb(tb);
   1241        do_tb_phys_invalidate(tb, true);
   1242        page_unlock_tb(tb);
   1243    } else {
   1244        do_tb_phys_invalidate(tb, false);
   1245    }
   1246}
   1247
   1248#ifdef CONFIG_SOFTMMU
   1249/* call with @p->lock held */
   1250static void build_page_bitmap(PageDesc *p)
   1251{
   1252    int n, tb_start, tb_end;
   1253    TranslationBlock *tb;
   1254
   1255    assert_page_locked(p);
   1256    p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE);
   1257
   1258    PAGE_FOR_EACH_TB(p, tb, n) {
   1259        /* NOTE: this is subtle as a TB may span two physical pages */
   1260        if (n == 0) {
   1261            /* NOTE: tb_end may be after the end of the page, but
   1262               it is not a problem */
   1263            tb_start = tb->pc & ~TARGET_PAGE_MASK;
   1264            tb_end = tb_start + tb->size;
   1265            if (tb_end > TARGET_PAGE_SIZE) {
   1266                tb_end = TARGET_PAGE_SIZE;
   1267             }
   1268        } else {
   1269            tb_start = 0;
   1270            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
   1271        }
   1272        bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start);
   1273    }
   1274}
   1275#endif
   1276
   1277/* add the tb in the target page and protect it if necessary
   1278 *
   1279 * Called with mmap_lock held for user-mode emulation.
   1280 * Called with @p->lock held in !user-mode.
   1281 */
   1282static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
   1283                               unsigned int n, tb_page_addr_t page_addr)
   1284{
   1285#ifndef CONFIG_USER_ONLY
   1286    bool page_already_protected;
   1287#endif
   1288
   1289    assert_page_locked(p);
   1290
   1291    tb->page_addr[n] = page_addr;
   1292    tb->page_next[n] = p->first_tb;
   1293#ifndef CONFIG_USER_ONLY
   1294    page_already_protected = p->first_tb != (uintptr_t)NULL;
   1295#endif
   1296    p->first_tb = (uintptr_t)tb | n;
   1297    invalidate_page_bitmap(p);
   1298
   1299#if defined(CONFIG_USER_ONLY)
   1300    /* translator_loop() must have made all TB pages non-writable */
   1301    assert(!(p->flags & PAGE_WRITE));
   1302#else
   1303    /* if some code is already present, then the pages are already
   1304       protected. So we handle the case where only the first TB is
   1305       allocated in a physical page */
   1306    if (!page_already_protected) {
   1307        tlb_protect_code(page_addr);
   1308    }
   1309#endif
   1310}
   1311
   1312/*
   1313 * Add a new TB and link it to the physical page tables. phys_page2 is
   1314 * (-1) to indicate that only one page contains the TB.
   1315 *
   1316 * Called with mmap_lock held for user-mode emulation.
   1317 *
   1318 * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
   1319 * Note that in !user-mode, another thread might have already added a TB
   1320 * for the same block of guest code that @tb corresponds to. In that case,
   1321 * the caller should discard the original @tb, and use instead the returned TB.
   1322 */
   1323static TranslationBlock *
   1324tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
   1325             tb_page_addr_t phys_page2)
   1326{
   1327    PageDesc *p;
   1328    PageDesc *p2 = NULL;
   1329    void *existing_tb = NULL;
   1330    uint32_t h;
   1331
   1332    assert_memory_lock();
   1333    tcg_debug_assert(!(tb->cflags & CF_INVALID));
   1334
   1335    /*
   1336     * Add the TB to the page list, acquiring first the pages's locks.
   1337     * We keep the locks held until after inserting the TB in the hash table,
   1338     * so that if the insertion fails we know for sure that the TBs are still
   1339     * in the page descriptors.
   1340     * Note that inserting into the hash table first isn't an option, since
   1341     * we can only insert TBs that are fully initialized.
   1342     */
   1343    page_lock_pair(&p, phys_pc, &p2, phys_page2, 1);
   1344    tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
   1345    if (p2) {
   1346        tb_page_add(p2, tb, 1, phys_page2);
   1347    } else {
   1348        tb->page_addr[1] = -1;
   1349    }
   1350
   1351    /* add in the hash table */
   1352    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
   1353                     tb->trace_vcpu_dstate);
   1354    qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
   1355
   1356    /* remove TB from the page(s) if we couldn't insert it */
   1357    if (unlikely(existing_tb)) {
   1358        tb_page_remove(p, tb);
   1359        invalidate_page_bitmap(p);
   1360        if (p2) {
   1361            tb_page_remove(p2, tb);
   1362            invalidate_page_bitmap(p2);
   1363        }
   1364        tb = existing_tb;
   1365    }
   1366
   1367    if (p2 && p2 != p) {
   1368        page_unlock(p2);
   1369    }
   1370    page_unlock(p);
   1371
   1372#ifdef CONFIG_USER_ONLY
   1373    if (DEBUG_TB_CHECK_GATE) {
   1374        tb_page_check();
   1375    }
   1376#endif
   1377    return tb;
   1378}
   1379
   1380/* Called with mmap_lock held for user mode emulation.  */
   1381TranslationBlock *tb_gen_code(CPUState *cpu,
   1382                              target_ulong pc, target_ulong cs_base,
   1383                              uint32_t flags, int cflags)
   1384{
   1385    CPUArchState *env = cpu->env_ptr;
   1386    TranslationBlock *tb, *existing_tb;
   1387    tb_page_addr_t phys_pc, phys_page2;
   1388    target_ulong virt_page2;
   1389    tcg_insn_unit *gen_code_buf;
   1390    int gen_code_size, search_size, max_insns;
   1391#ifdef CONFIG_PROFILER
   1392    TCGProfile *prof = &tcg_ctx->prof;
   1393    int64_t ti;
   1394#endif
   1395
   1396    assert_memory_lock();
   1397    qemu_thread_jit_write();
   1398
   1399    phys_pc = get_page_addr_code(env, pc);
   1400
   1401    if (phys_pc == -1) {
   1402        /* Generate a one-shot TB with 1 insn in it */
   1403        cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
   1404    }
   1405
   1406    max_insns = cflags & CF_COUNT_MASK;
   1407    if (max_insns == 0) {
   1408        max_insns = TCG_MAX_INSNS;
   1409    }
   1410    QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
   1411
   1412 buffer_overflow:
   1413    tb = tcg_tb_alloc(tcg_ctx);
   1414    if (unlikely(!tb)) {
   1415        /* flush must be done */
   1416        tb_flush(cpu);
   1417        mmap_unlock();
   1418        /* Make the execution loop process the flush as soon as possible.  */
   1419        cpu->exception_index = EXCP_INTERRUPT;
   1420        cpu_loop_exit(cpu);
   1421    }
   1422
   1423    gen_code_buf = tcg_ctx->code_gen_ptr;
   1424    tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
   1425    tb->pc = pc;
   1426    tb->cs_base = cs_base;
   1427    tb->flags = flags;
   1428    tb->cflags = cflags;
   1429    tb->trace_vcpu_dstate = *cpu->trace_dstate;
   1430    tcg_ctx->tb_cflags = cflags;
   1431 tb_overflow:
   1432
   1433#ifdef CONFIG_PROFILER
   1434    /* includes aborted translations because of exceptions */
   1435    qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
   1436    ti = profile_getclock();
   1437#endif
   1438
   1439    gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0);
   1440    if (unlikely(gen_code_size != 0)) {
   1441        goto error_return;
   1442    }
   1443
   1444    tcg_func_start(tcg_ctx);
   1445
   1446    tcg_ctx->cpu = env_cpu(env);
   1447    gen_intermediate_code(cpu, tb, max_insns);
   1448    assert(tb->size != 0);
   1449    tcg_ctx->cpu = NULL;
   1450    max_insns = tb->icount;
   1451
   1452    trace_translate_block(tb, tb->pc, tb->tc.ptr);
   1453
   1454    /* generate machine code */
   1455    tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
   1456    tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
   1457    tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
   1458    if (TCG_TARGET_HAS_direct_jump) {
   1459        tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
   1460        tcg_ctx->tb_jmp_target_addr = NULL;
   1461    } else {
   1462        tcg_ctx->tb_jmp_insn_offset = NULL;
   1463        tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
   1464    }
   1465
   1466#ifdef CONFIG_PROFILER
   1467    qatomic_set(&prof->tb_count, prof->tb_count + 1);
   1468    qatomic_set(&prof->interm_time,
   1469                prof->interm_time + profile_getclock() - ti);
   1470    ti = profile_getclock();
   1471#endif
   1472
   1473    gen_code_size = tcg_gen_code(tcg_ctx, tb);
   1474    if (unlikely(gen_code_size < 0)) {
   1475 error_return:
   1476        switch (gen_code_size) {
   1477        case -1:
   1478            /*
   1479             * Overflow of code_gen_buffer, or the current slice of it.
   1480             *
   1481             * TODO: We don't need to re-do gen_intermediate_code, nor
   1482             * should we re-do the tcg optimization currently hidden
   1483             * inside tcg_gen_code.  All that should be required is to
   1484             * flush the TBs, allocate a new TB, re-initialize it per
   1485             * above, and re-do the actual code generation.
   1486             */
   1487            qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
   1488                          "Restarting code generation for "
   1489                          "code_gen_buffer overflow\n");
   1490            goto buffer_overflow;
   1491
   1492        case -2:
   1493            /*
   1494             * The code generated for the TranslationBlock is too large.
   1495             * The maximum size allowed by the unwind info is 64k.
   1496             * There may be stricter constraints from relocations
   1497             * in the tcg backend.
   1498             *
   1499             * Try again with half as many insns as we attempted this time.
   1500             * If a single insn overflows, there's a bug somewhere...
   1501             */
   1502            assert(max_insns > 1);
   1503            max_insns /= 2;
   1504            qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
   1505                          "Restarting code generation with "
   1506                          "smaller translation block (max %d insns)\n",
   1507                          max_insns);
   1508            goto tb_overflow;
   1509
   1510        default:
   1511            g_assert_not_reached();
   1512        }
   1513    }
   1514    search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
   1515    if (unlikely(search_size < 0)) {
   1516        goto buffer_overflow;
   1517    }
   1518    tb->tc.size = gen_code_size;
   1519
   1520#ifdef CONFIG_PROFILER
   1521    qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
   1522    qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
   1523    qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
   1524    qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
   1525#endif
   1526
   1527#ifdef DEBUG_DISAS
   1528    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
   1529        qemu_log_in_addr_range(tb->pc)) {
   1530        FILE *logfile = qemu_log_lock();
   1531        int code_size, data_size;
   1532        const tcg_target_ulong *rx_data_gen_ptr;
   1533        size_t chunk_start;
   1534        int insn = 0;
   1535
   1536        if (tcg_ctx->data_gen_ptr) {
   1537            rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
   1538            code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
   1539            data_size = gen_code_size - code_size;
   1540        } else {
   1541            rx_data_gen_ptr = 0;
   1542            code_size = gen_code_size;
   1543            data_size = 0;
   1544        }
   1545
   1546        /* Dump header and the first instruction */
   1547        qemu_log("OUT: [size=%d]\n", gen_code_size);
   1548        qemu_log("  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
   1549                 tcg_ctx->gen_insn_data[insn][0]);
   1550        chunk_start = tcg_ctx->gen_insn_end_off[insn];
   1551        log_disas(tb->tc.ptr, chunk_start);
   1552
   1553        /*
   1554         * Dump each instruction chunk, wrapping up empty chunks into
   1555         * the next instruction. The whole array is offset so the
   1556         * first entry is the beginning of the 2nd instruction.
   1557         */
   1558        while (insn < tb->icount) {
   1559            size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
   1560            if (chunk_end > chunk_start) {
   1561                qemu_log("  -- guest addr 0x" TARGET_FMT_lx "\n",
   1562                         tcg_ctx->gen_insn_data[insn][0]);
   1563                log_disas(tb->tc.ptr + chunk_start, chunk_end - chunk_start);
   1564                chunk_start = chunk_end;
   1565            }
   1566            insn++;
   1567        }
   1568
   1569        if (chunk_start < code_size) {
   1570            qemu_log("  -- tb slow paths + alignment\n");
   1571            log_disas(tb->tc.ptr + chunk_start, code_size - chunk_start);
   1572        }
   1573
   1574        /* Finally dump any data we may have after the block */
   1575        if (data_size) {
   1576            int i;
   1577            qemu_log("  data: [size=%d]\n", data_size);
   1578            for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
   1579                if (sizeof(tcg_target_ulong) == 8) {
   1580                    qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
   1581                             (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
   1582                } else if (sizeof(tcg_target_ulong) == 4) {
   1583                    qemu_log("0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
   1584                             (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
   1585                } else {
   1586                    qemu_build_not_reached();
   1587                }
   1588            }
   1589        }
   1590        qemu_log("\n");
   1591        qemu_log_flush();
   1592        qemu_log_unlock(logfile);
   1593    }
   1594#endif
   1595
   1596    qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
   1597        ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
   1598                 CODE_GEN_ALIGN));
   1599
   1600    /* init jump list */
   1601    qemu_spin_init(&tb->jmp_lock);
   1602    tb->jmp_list_head = (uintptr_t)NULL;
   1603    tb->jmp_list_next[0] = (uintptr_t)NULL;
   1604    tb->jmp_list_next[1] = (uintptr_t)NULL;
   1605    tb->jmp_dest[0] = (uintptr_t)NULL;
   1606    tb->jmp_dest[1] = (uintptr_t)NULL;
   1607
   1608    /* init original jump addresses which have been set during tcg_gen_code() */
   1609    if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
   1610        tb_reset_jump(tb, 0);
   1611    }
   1612    if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
   1613        tb_reset_jump(tb, 1);
   1614    }
   1615
   1616    /*
   1617     * If the TB is not associated with a physical RAM page then
   1618     * it must be a temporary one-insn TB, and we have nothing to do
   1619     * except fill in the page_addr[] fields. Return early before
   1620     * attempting to link to other TBs or add to the lookup table.
   1621     */
   1622    if (phys_pc == -1) {
   1623        tb->page_addr[0] = tb->page_addr[1] = -1;
   1624        return tb;
   1625    }
   1626
   1627    /*
   1628     * Insert TB into the corresponding region tree before publishing it
   1629     * through QHT. Otherwise rewinding happened in the TB might fail to
   1630     * lookup itself using host PC.
   1631     */
   1632    tcg_tb_insert(tb);
   1633
   1634    /* check next page if needed */
   1635    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
   1636    phys_page2 = -1;
   1637    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
   1638        phys_page2 = get_page_addr_code(env, virt_page2);
   1639    }
   1640    /*
   1641     * No explicit memory barrier is required -- tb_link_page() makes the
   1642     * TB visible in a consistent state.
   1643     */
   1644    existing_tb = tb_link_page(tb, phys_pc, phys_page2);
   1645    /* if the TB already exists, discard what we just translated */
   1646    if (unlikely(existing_tb != tb)) {
   1647        uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
   1648
   1649        orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
   1650        qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
   1651        tcg_tb_remove(tb);
   1652        return existing_tb;
   1653    }
   1654    return tb;
   1655}
   1656
   1657/*
   1658 * @p must be non-NULL.
   1659 * user-mode: call with mmap_lock held.
   1660 * !user-mode: call with all @pages locked.
   1661 */
   1662static void
   1663tb_invalidate_phys_page_range__locked(struct page_collection *pages,
   1664                                      PageDesc *p, tb_page_addr_t start,
   1665                                      tb_page_addr_t end,
   1666                                      uintptr_t retaddr)
   1667{
   1668    TranslationBlock *tb;
   1669    tb_page_addr_t tb_start, tb_end;
   1670    int n;
   1671#ifdef TARGET_HAS_PRECISE_SMC
   1672    CPUState *cpu = current_cpu;
   1673    CPUArchState *env = NULL;
   1674    bool current_tb_not_found = retaddr != 0;
   1675    bool current_tb_modified = false;
   1676    TranslationBlock *current_tb = NULL;
   1677    target_ulong current_pc = 0;
   1678    target_ulong current_cs_base = 0;
   1679    uint32_t current_flags = 0;
   1680#endif /* TARGET_HAS_PRECISE_SMC */
   1681
   1682    assert_page_locked(p);
   1683
   1684#if defined(TARGET_HAS_PRECISE_SMC)
   1685    if (cpu != NULL) {
   1686        env = cpu->env_ptr;
   1687    }
   1688#endif
   1689
   1690    /* we remove all the TBs in the range [start, end[ */
   1691    /* XXX: see if in some cases it could be faster to invalidate all
   1692       the code */
   1693    PAGE_FOR_EACH_TB(p, tb, n) {
   1694        assert_page_locked(p);
   1695        /* NOTE: this is subtle as a TB may span two physical pages */
   1696        if (n == 0) {
   1697            /* NOTE: tb_end may be after the end of the page, but
   1698               it is not a problem */
   1699            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
   1700            tb_end = tb_start + tb->size;
   1701        } else {
   1702            tb_start = tb->page_addr[1];
   1703            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
   1704        }
   1705        if (!(tb_end <= start || tb_start >= end)) {
   1706#ifdef TARGET_HAS_PRECISE_SMC
   1707            if (current_tb_not_found) {
   1708                current_tb_not_found = false;
   1709                /* now we have a real cpu fault */
   1710                current_tb = tcg_tb_lookup(retaddr);
   1711            }
   1712            if (current_tb == tb &&
   1713                (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
   1714                /*
   1715                 * If we are modifying the current TB, we must stop
   1716                 * its execution. We could be more precise by checking
   1717                 * that the modification is after the current PC, but it
   1718                 * would require a specialized function to partially
   1719                 * restore the CPU state.
   1720                 */
   1721                current_tb_modified = true;
   1722                cpu_restore_state_from_tb(cpu, current_tb, retaddr, true);
   1723                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
   1724                                     &current_flags);
   1725            }
   1726#endif /* TARGET_HAS_PRECISE_SMC */
   1727            tb_phys_invalidate__locked(tb);
   1728        }
   1729    }
   1730#if !defined(CONFIG_USER_ONLY)
   1731    /* if no code remaining, no need to continue to use slow writes */
   1732    if (!p->first_tb) {
   1733        invalidate_page_bitmap(p);
   1734        tlb_unprotect_code(start);
   1735    }
   1736#endif
   1737#ifdef TARGET_HAS_PRECISE_SMC
   1738    if (current_tb_modified) {
   1739        page_collection_unlock(pages);
   1740        /* Force execution of one insn next time.  */
   1741        cpu->cflags_next_tb = 1 | curr_cflags(cpu);
   1742        mmap_unlock();
   1743        cpu_loop_exit_noexc(cpu);
   1744    }
   1745#endif
   1746}
   1747
   1748/*
   1749 * Invalidate all TBs which intersect with the target physical address range
   1750 * [start;end[. NOTE: start and end must refer to the *same* physical page.
   1751 * 'is_cpu_write_access' should be true if called from a real cpu write
   1752 * access: the virtual CPU will exit the current TB if code is modified inside
   1753 * this TB.
   1754 *
   1755 * Called with mmap_lock held for user-mode emulation
   1756 */
   1757void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end)
   1758{
   1759    struct page_collection *pages;
   1760    PageDesc *p;
   1761
   1762    assert_memory_lock();
   1763
   1764    p = page_find(start >> TARGET_PAGE_BITS);
   1765    if (p == NULL) {
   1766        return;
   1767    }
   1768    pages = page_collection_lock(start, end);
   1769    tb_invalidate_phys_page_range__locked(pages, p, start, end, 0);
   1770    page_collection_unlock(pages);
   1771}
   1772
   1773/*
   1774 * Invalidate all TBs which intersect with the target physical address range
   1775 * [start;end[. NOTE: start and end may refer to *different* physical pages.
   1776 * 'is_cpu_write_access' should be true if called from a real cpu write
   1777 * access: the virtual CPU will exit the current TB if code is modified inside
   1778 * this TB.
   1779 *
   1780 * Called with mmap_lock held for user-mode emulation.
   1781 */
   1782#ifdef CONFIG_SOFTMMU
   1783void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end)
   1784#else
   1785void tb_invalidate_phys_range(target_ulong start, target_ulong end)
   1786#endif
   1787{
   1788    struct page_collection *pages;
   1789    tb_page_addr_t next;
   1790
   1791    assert_memory_lock();
   1792
   1793    pages = page_collection_lock(start, end);
   1794    for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
   1795         start < end;
   1796         start = next, next += TARGET_PAGE_SIZE) {
   1797        PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
   1798        tb_page_addr_t bound = MIN(next, end);
   1799
   1800        if (pd == NULL) {
   1801            continue;
   1802        }
   1803        tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
   1804    }
   1805    page_collection_unlock(pages);
   1806}
   1807
   1808#ifdef CONFIG_SOFTMMU
   1809/* len must be <= 8 and start must be a multiple of len.
   1810 * Called via softmmu_template.h when code areas are written to with
   1811 * iothread mutex not held.
   1812 *
   1813 * Call with all @pages in the range [@start, @start + len[ locked.
   1814 */
   1815void tb_invalidate_phys_page_fast(struct page_collection *pages,
   1816                                  tb_page_addr_t start, int len,
   1817                                  uintptr_t retaddr)
   1818{
   1819    PageDesc *p;
   1820
   1821    assert_memory_lock();
   1822
   1823    p = page_find(start >> TARGET_PAGE_BITS);
   1824    if (!p) {
   1825        return;
   1826    }
   1827
   1828    assert_page_locked(p);
   1829    if (!p->code_bitmap &&
   1830        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
   1831        build_page_bitmap(p);
   1832    }
   1833    if (p->code_bitmap) {
   1834        unsigned int nr;
   1835        unsigned long b;
   1836
   1837        nr = start & ~TARGET_PAGE_MASK;
   1838        b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
   1839        if (b & ((1 << len) - 1)) {
   1840            goto do_invalidate;
   1841        }
   1842    } else {
   1843    do_invalidate:
   1844        tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
   1845                                              retaddr);
   1846    }
   1847}
   1848#else
   1849/* Called with mmap_lock held. If pc is not 0 then it indicates the
   1850 * host PC of the faulting store instruction that caused this invalidate.
   1851 * Returns true if the caller needs to abort execution of the current
   1852 * TB (because it was modified by this store and the guest CPU has
   1853 * precise-SMC semantics).
   1854 */
   1855static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
   1856{
   1857    TranslationBlock *tb;
   1858    PageDesc *p;
   1859    int n;
   1860#ifdef TARGET_HAS_PRECISE_SMC
   1861    TranslationBlock *current_tb = NULL;
   1862    CPUState *cpu = current_cpu;
   1863    CPUArchState *env = NULL;
   1864    int current_tb_modified = 0;
   1865    target_ulong current_pc = 0;
   1866    target_ulong current_cs_base = 0;
   1867    uint32_t current_flags = 0;
   1868#endif
   1869
   1870    assert_memory_lock();
   1871
   1872    addr &= TARGET_PAGE_MASK;
   1873    p = page_find(addr >> TARGET_PAGE_BITS);
   1874    if (!p) {
   1875        return false;
   1876    }
   1877
   1878#ifdef TARGET_HAS_PRECISE_SMC
   1879    if (p->first_tb && pc != 0) {
   1880        current_tb = tcg_tb_lookup(pc);
   1881    }
   1882    if (cpu != NULL) {
   1883        env = cpu->env_ptr;
   1884    }
   1885#endif
   1886    assert_page_locked(p);
   1887    PAGE_FOR_EACH_TB(p, tb, n) {
   1888#ifdef TARGET_HAS_PRECISE_SMC
   1889        if (current_tb == tb &&
   1890            (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
   1891                /* If we are modifying the current TB, we must stop
   1892                   its execution. We could be more precise by checking
   1893                   that the modification is after the current PC, but it
   1894                   would require a specialized function to partially
   1895                   restore the CPU state */
   1896
   1897            current_tb_modified = 1;
   1898            cpu_restore_state_from_tb(cpu, current_tb, pc, true);
   1899            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
   1900                                 &current_flags);
   1901        }
   1902#endif /* TARGET_HAS_PRECISE_SMC */
   1903        tb_phys_invalidate(tb, addr);
   1904    }
   1905    p->first_tb = (uintptr_t)NULL;
   1906#ifdef TARGET_HAS_PRECISE_SMC
   1907    if (current_tb_modified) {
   1908        /* Force execution of one insn next time.  */
   1909        cpu->cflags_next_tb = 1 | curr_cflags(cpu);
   1910        return true;
   1911    }
   1912#endif
   1913
   1914    return false;
   1915}
   1916#endif
   1917
   1918/* user-mode: call with mmap_lock held */
   1919void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
   1920{
   1921    TranslationBlock *tb;
   1922
   1923    assert_memory_lock();
   1924
   1925    tb = tcg_tb_lookup(retaddr);
   1926    if (tb) {
   1927        /* We can use retranslation to find the PC.  */
   1928        cpu_restore_state_from_tb(cpu, tb, retaddr, true);
   1929        tb_phys_invalidate(tb, -1);
   1930    } else {
   1931        /* The exception probably happened in a helper.  The CPU state should
   1932           have been saved before calling it. Fetch the PC from there.  */
   1933        CPUArchState *env = cpu->env_ptr;
   1934        target_ulong pc, cs_base;
   1935        tb_page_addr_t addr;
   1936        uint32_t flags;
   1937
   1938        cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
   1939        addr = get_page_addr_code(env, pc);
   1940        if (addr != -1) {
   1941            tb_invalidate_phys_range(addr, addr + 1);
   1942        }
   1943    }
   1944}
   1945
   1946#ifndef CONFIG_USER_ONLY
   1947/*
   1948 * In deterministic execution mode, instructions doing device I/Os
   1949 * must be at the end of the TB.
   1950 *
   1951 * Called by softmmu_template.h, with iothread mutex not held.
   1952 */
   1953void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
   1954{
   1955    TranslationBlock *tb;
   1956    CPUClass *cc;
   1957    uint32_t n;
   1958
   1959    tb = tcg_tb_lookup(retaddr);
   1960    if (!tb) {
   1961        cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
   1962                  (void *)retaddr);
   1963    }
   1964    cpu_restore_state_from_tb(cpu, tb, retaddr, true);
   1965
   1966    /*
   1967     * Some guests must re-execute the branch when re-executing a delay
   1968     * slot instruction.  When this is the case, adjust icount and N
   1969     * to account for the re-execution of the branch.
   1970     */
   1971    n = 1;
   1972    cc = CPU_GET_CLASS(cpu);
   1973    if (cc->tcg_ops->io_recompile_replay_branch &&
   1974        cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
   1975        cpu_neg(cpu)->icount_decr.u16.low++;
   1976        n = 2;
   1977    }
   1978
   1979    /*
   1980     * Exit the loop and potentially generate a new TB executing the
   1981     * just the I/O insns. We also limit instrumentation to memory
   1982     * operations only (which execute after completion) so we don't
   1983     * double instrument the instruction.
   1984     */
   1985    cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
   1986
   1987    qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
   1988                           "cpu_io_recompile: rewound execution of TB to "
   1989                           TARGET_FMT_lx "\n", tb->pc);
   1990
   1991    cpu_loop_exit_noexc(cpu);
   1992}
   1993
   1994static void print_qht_statistics(struct qht_stats hst)
   1995{
   1996    uint32_t hgram_opts;
   1997    size_t hgram_bins;
   1998    char *hgram;
   1999
   2000    if (!hst.head_buckets) {
   2001        return;
   2002    }
   2003    qemu_printf("TB hash buckets     %zu/%zu (%0.2f%% head buckets used)\n",
   2004                hst.used_head_buckets, hst.head_buckets,
   2005                (double)hst.used_head_buckets / hst.head_buckets * 100);
   2006
   2007    hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
   2008    hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
   2009    if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
   2010        hgram_opts |= QDIST_PR_NODECIMAL;
   2011    }
   2012    hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
   2013    qemu_printf("TB hash occupancy   %0.2f%% avg chain occ. Histogram: %s\n",
   2014                qdist_avg(&hst.occupancy) * 100, hgram);
   2015    g_free(hgram);
   2016
   2017    hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
   2018    hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
   2019    if (hgram_bins > 10) {
   2020        hgram_bins = 10;
   2021    } else {
   2022        hgram_bins = 0;
   2023        hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
   2024    }
   2025    hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
   2026    qemu_printf("TB hash avg chain   %0.3f buckets. Histogram: %s\n",
   2027                qdist_avg(&hst.chain), hgram);
   2028    g_free(hgram);
   2029}
   2030
   2031struct tb_tree_stats {
   2032    size_t nb_tbs;
   2033    size_t host_size;
   2034    size_t target_size;
   2035    size_t max_target_size;
   2036    size_t direct_jmp_count;
   2037    size_t direct_jmp2_count;
   2038    size_t cross_page;
   2039};
   2040
   2041static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
   2042{
   2043    const TranslationBlock *tb = value;
   2044    struct tb_tree_stats *tst = data;
   2045
   2046    tst->nb_tbs++;
   2047    tst->host_size += tb->tc.size;
   2048    tst->target_size += tb->size;
   2049    if (tb->size > tst->max_target_size) {
   2050        tst->max_target_size = tb->size;
   2051    }
   2052    if (tb->page_addr[1] != -1) {
   2053        tst->cross_page++;
   2054    }
   2055    if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
   2056        tst->direct_jmp_count++;
   2057        if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
   2058            tst->direct_jmp2_count++;
   2059        }
   2060    }
   2061    return false;
   2062}
   2063
   2064void dump_exec_info(void)
   2065{
   2066    struct tb_tree_stats tst = {};
   2067    struct qht_stats hst;
   2068    size_t nb_tbs, flush_full, flush_part, flush_elide;
   2069
   2070    tcg_tb_foreach(tb_tree_stats_iter, &tst);
   2071    nb_tbs = tst.nb_tbs;
   2072    /* XXX: avoid using doubles ? */
   2073    qemu_printf("Translation buffer state:\n");
   2074    /*
   2075     * Report total code size including the padding and TB structs;
   2076     * otherwise users might think "-accel tcg,tb-size" is not honoured.
   2077     * For avg host size we use the precise numbers from tb_tree_stats though.
   2078     */
   2079    qemu_printf("gen code size       %zu/%zu\n",
   2080                tcg_code_size(), tcg_code_capacity());
   2081    qemu_printf("TB count            %zu\n", nb_tbs);
   2082    qemu_printf("TB avg target size  %zu max=%zu bytes\n",
   2083                nb_tbs ? tst.target_size / nb_tbs : 0,
   2084                tst.max_target_size);
   2085    qemu_printf("TB avg host size    %zu bytes (expansion ratio: %0.1f)\n",
   2086                nb_tbs ? tst.host_size / nb_tbs : 0,
   2087                tst.target_size ? (double)tst.host_size / tst.target_size : 0);
   2088    qemu_printf("cross page TB count %zu (%zu%%)\n", tst.cross_page,
   2089                nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
   2090    qemu_printf("direct jump count   %zu (%zu%%) (2 jumps=%zu %zu%%)\n",
   2091                tst.direct_jmp_count,
   2092                nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
   2093                tst.direct_jmp2_count,
   2094                nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
   2095
   2096    qht_statistics_init(&tb_ctx.htable, &hst);
   2097    print_qht_statistics(hst);
   2098    qht_statistics_destroy(&hst);
   2099
   2100    qemu_printf("\nStatistics:\n");
   2101    qemu_printf("TB flush count      %u\n",
   2102                qatomic_read(&tb_ctx.tb_flush_count));
   2103    qemu_printf("TB invalidate count %u\n",
   2104                qatomic_read(&tb_ctx.tb_phys_invalidate_count));
   2105
   2106    tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
   2107    qemu_printf("TLB full flushes    %zu\n", flush_full);
   2108    qemu_printf("TLB partial flushes %zu\n", flush_part);
   2109    qemu_printf("TLB elided flushes  %zu\n", flush_elide);
   2110    tcg_dump_info();
   2111}
   2112
   2113void dump_opcount_info(void)
   2114{
   2115    tcg_dump_op_count();
   2116}
   2117
   2118#else /* CONFIG_USER_ONLY */
   2119
   2120void cpu_interrupt(CPUState *cpu, int mask)
   2121{
   2122    g_assert(qemu_mutex_iothread_locked());
   2123    cpu->interrupt_request |= mask;
   2124    qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
   2125}
   2126
   2127/*
   2128 * Walks guest process memory "regions" one by one
   2129 * and calls callback function 'fn' for each region.
   2130 */
   2131struct walk_memory_regions_data {
   2132    walk_memory_regions_fn fn;
   2133    void *priv;
   2134    target_ulong start;
   2135    int prot;
   2136};
   2137
   2138static int walk_memory_regions_end(struct walk_memory_regions_data *data,
   2139                                   target_ulong end, int new_prot)
   2140{
   2141    if (data->start != -1u) {
   2142        int rc = data->fn(data->priv, data->start, end, data->prot);
   2143        if (rc != 0) {
   2144            return rc;
   2145        }
   2146    }
   2147
   2148    data->start = (new_prot ? end : -1u);
   2149    data->prot = new_prot;
   2150
   2151    return 0;
   2152}
   2153
   2154static int walk_memory_regions_1(struct walk_memory_regions_data *data,
   2155                                 target_ulong base, int level, void **lp)
   2156{
   2157    target_ulong pa;
   2158    int i, rc;
   2159
   2160    if (*lp == NULL) {
   2161        return walk_memory_regions_end(data, base, 0);
   2162    }
   2163
   2164    if (level == 0) {
   2165        PageDesc *pd = *lp;
   2166
   2167        for (i = 0; i < V_L2_SIZE; ++i) {
   2168            int prot = pd[i].flags;
   2169
   2170            pa = base | (i << TARGET_PAGE_BITS);
   2171            if (prot != data->prot) {
   2172                rc = walk_memory_regions_end(data, pa, prot);
   2173                if (rc != 0) {
   2174                    return rc;
   2175                }
   2176            }
   2177        }
   2178    } else {
   2179        void **pp = *lp;
   2180
   2181        for (i = 0; i < V_L2_SIZE; ++i) {
   2182            pa = base | ((target_ulong)i <<
   2183                (TARGET_PAGE_BITS + V_L2_BITS * level));
   2184            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
   2185            if (rc != 0) {
   2186                return rc;
   2187            }
   2188        }
   2189    }
   2190
   2191    return 0;
   2192}
   2193
   2194int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
   2195{
   2196    struct walk_memory_regions_data data;
   2197    uintptr_t i, l1_sz = v_l1_size;
   2198
   2199    data.fn = fn;
   2200    data.priv = priv;
   2201    data.start = -1u;
   2202    data.prot = 0;
   2203
   2204    for (i = 0; i < l1_sz; i++) {
   2205        target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
   2206        int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
   2207        if (rc != 0) {
   2208            return rc;
   2209        }
   2210    }
   2211
   2212    return walk_memory_regions_end(&data, 0, 0);
   2213}
   2214
   2215static int dump_region(void *priv, target_ulong start,
   2216    target_ulong end, unsigned long prot)
   2217{
   2218    FILE *f = (FILE *)priv;
   2219
   2220    (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
   2221        " "TARGET_FMT_lx" %c%c%c\n",
   2222        start, end, end - start,
   2223        ((prot & PAGE_READ) ? 'r' : '-'),
   2224        ((prot & PAGE_WRITE) ? 'w' : '-'),
   2225        ((prot & PAGE_EXEC) ? 'x' : '-'));
   2226
   2227    return 0;
   2228}
   2229
   2230/* dump memory mappings */
   2231void page_dump(FILE *f)
   2232{
   2233    const int length = sizeof(target_ulong) * 2;
   2234    (void) fprintf(f, "%-*s %-*s %-*s %s\n",
   2235            length, "start", length, "end", length, "size", "prot");
   2236    walk_memory_regions(f, dump_region);
   2237}
   2238
   2239int page_get_flags(target_ulong address)
   2240{
   2241    PageDesc *p;
   2242
   2243    p = page_find(address >> TARGET_PAGE_BITS);
   2244    if (!p) {
   2245        return 0;
   2246    }
   2247    return p->flags;
   2248}
   2249
   2250/* Modify the flags of a page and invalidate the code if necessary.
   2251   The flag PAGE_WRITE_ORG is positioned automatically depending
   2252   on PAGE_WRITE.  The mmap_lock should already be held.  */
   2253void page_set_flags(target_ulong start, target_ulong end, int flags)
   2254{
   2255    target_ulong addr, len;
   2256    bool reset_target_data;
   2257
   2258    /* This function should never be called with addresses outside the
   2259       guest address space.  If this assert fires, it probably indicates
   2260       a missing call to h2g_valid.  */
   2261    assert(end - 1 <= GUEST_ADDR_MAX);
   2262    assert(start < end);
   2263    /* Only set PAGE_ANON with new mappings. */
   2264    assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
   2265    assert_memory_lock();
   2266
   2267    start = start & TARGET_PAGE_MASK;
   2268    end = TARGET_PAGE_ALIGN(end);
   2269
   2270    if (flags & PAGE_WRITE) {
   2271        flags |= PAGE_WRITE_ORG;
   2272    }
   2273    reset_target_data = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
   2274    flags &= ~PAGE_RESET;
   2275
   2276    for (addr = start, len = end - start;
   2277         len != 0;
   2278         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
   2279        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
   2280
   2281        /* If the write protection bit is set, then we invalidate
   2282           the code inside.  */
   2283        if (!(p->flags & PAGE_WRITE) &&
   2284            (flags & PAGE_WRITE) &&
   2285            p->first_tb) {
   2286            tb_invalidate_phys_page(addr, 0);
   2287        }
   2288        if (reset_target_data) {
   2289            g_free(p->target_data);
   2290            p->target_data = NULL;
   2291            p->flags = flags;
   2292        } else {
   2293            /* Using mprotect on a page does not change MAP_ANON. */
   2294            p->flags = (p->flags & PAGE_ANON) | flags;
   2295        }
   2296    }
   2297}
   2298
   2299void *page_get_target_data(target_ulong address)
   2300{
   2301    PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
   2302    return p ? p->target_data : NULL;
   2303}
   2304
   2305void *page_alloc_target_data(target_ulong address, size_t size)
   2306{
   2307    PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
   2308    void *ret = NULL;
   2309
   2310    if (p->flags & PAGE_VALID) {
   2311        ret = p->target_data;
   2312        if (!ret) {
   2313            p->target_data = ret = g_malloc0(size);
   2314        }
   2315    }
   2316    return ret;
   2317}
   2318
   2319int page_check_range(target_ulong start, target_ulong len, int flags)
   2320{
   2321    PageDesc *p;
   2322    target_ulong end;
   2323    target_ulong addr;
   2324
   2325    /* This function should never be called with addresses outside the
   2326       guest address space.  If this assert fires, it probably indicates
   2327       a missing call to h2g_valid.  */
   2328    if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) {
   2329        assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
   2330    }
   2331
   2332    if (len == 0) {
   2333        return 0;
   2334    }
   2335    if (start + len - 1 < start) {
   2336        /* We've wrapped around.  */
   2337        return -1;
   2338    }
   2339
   2340    /* must do before we loose bits in the next step */
   2341    end = TARGET_PAGE_ALIGN(start + len);
   2342    start = start & TARGET_PAGE_MASK;
   2343
   2344    for (addr = start, len = end - start;
   2345         len != 0;
   2346         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
   2347        p = page_find(addr >> TARGET_PAGE_BITS);
   2348        if (!p) {
   2349            return -1;
   2350        }
   2351        if (!(p->flags & PAGE_VALID)) {
   2352            return -1;
   2353        }
   2354
   2355        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
   2356            return -1;
   2357        }
   2358        if (flags & PAGE_WRITE) {
   2359            if (!(p->flags & PAGE_WRITE_ORG)) {
   2360                return -1;
   2361            }
   2362            /* unprotect the page if it was put read-only because it
   2363               contains translated code */
   2364            if (!(p->flags & PAGE_WRITE)) {
   2365                if (!page_unprotect(addr, 0)) {
   2366                    return -1;
   2367                }
   2368            }
   2369        }
   2370    }
   2371    return 0;
   2372}
   2373
   2374void page_protect(tb_page_addr_t page_addr)
   2375{
   2376    target_ulong addr;
   2377    PageDesc *p;
   2378    int prot;
   2379
   2380    p = page_find(page_addr >> TARGET_PAGE_BITS);
   2381    if (p && (p->flags & PAGE_WRITE)) {
   2382        /*
   2383         * Force the host page as non writable (writes will have a page fault +
   2384         * mprotect overhead).
   2385         */
   2386        page_addr &= qemu_host_page_mask;
   2387        prot = 0;
   2388        for (addr = page_addr; addr < page_addr + qemu_host_page_size;
   2389             addr += TARGET_PAGE_SIZE) {
   2390
   2391            p = page_find(addr >> TARGET_PAGE_BITS);
   2392            if (!p) {
   2393                continue;
   2394            }
   2395            prot |= p->flags;
   2396            p->flags &= ~PAGE_WRITE;
   2397        }
   2398        mprotect(g2h_untagged(page_addr), qemu_host_page_size,
   2399                 (prot & PAGE_BITS) & ~PAGE_WRITE);
   2400        if (DEBUG_TB_INVALIDATE_GATE) {
   2401            printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
   2402        }
   2403    }
   2404}
   2405
   2406/* called from signal handler: invalidate the code and unprotect the
   2407 * page. Return 0 if the fault was not handled, 1 if it was handled,
   2408 * and 2 if it was handled but the caller must cause the TB to be
   2409 * immediately exited. (We can only return 2 if the 'pc' argument is
   2410 * non-zero.)
   2411 */
   2412int page_unprotect(target_ulong address, uintptr_t pc)
   2413{
   2414    unsigned int prot;
   2415    bool current_tb_invalidated;
   2416    PageDesc *p;
   2417    target_ulong host_start, host_end, addr;
   2418
   2419    /* Technically this isn't safe inside a signal handler.  However we
   2420       know this only ever happens in a synchronous SEGV handler, so in
   2421       practice it seems to be ok.  */
   2422    mmap_lock();
   2423
   2424    p = page_find(address >> TARGET_PAGE_BITS);
   2425    if (!p) {
   2426        mmap_unlock();
   2427        return 0;
   2428    }
   2429
   2430    /* if the page was really writable, then we change its
   2431       protection back to writable */
   2432    if (p->flags & PAGE_WRITE_ORG) {
   2433        current_tb_invalidated = false;
   2434        if (p->flags & PAGE_WRITE) {
   2435            /* If the page is actually marked WRITE then assume this is because
   2436             * this thread raced with another one which got here first and
   2437             * set the page to PAGE_WRITE and did the TB invalidate for us.
   2438             */
   2439#ifdef TARGET_HAS_PRECISE_SMC
   2440            TranslationBlock *current_tb = tcg_tb_lookup(pc);
   2441            if (current_tb) {
   2442                current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
   2443            }
   2444#endif
   2445        } else {
   2446            host_start = address & qemu_host_page_mask;
   2447            host_end = host_start + qemu_host_page_size;
   2448
   2449            prot = 0;
   2450            for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
   2451                p = page_find(addr >> TARGET_PAGE_BITS);
   2452                p->flags |= PAGE_WRITE;
   2453                prot |= p->flags;
   2454
   2455                /* and since the content will be modified, we must invalidate
   2456                   the corresponding translated code. */
   2457                current_tb_invalidated |= tb_invalidate_phys_page(addr, pc);
   2458#ifdef CONFIG_USER_ONLY
   2459                if (DEBUG_TB_CHECK_GATE) {
   2460                    tb_invalidate_check(addr);
   2461                }
   2462#endif
   2463            }
   2464            mprotect((void *)g2h_untagged(host_start), qemu_host_page_size,
   2465                     prot & PAGE_BITS);
   2466        }
   2467        mmap_unlock();
   2468        /* If current TB was invalidated return to main loop */
   2469        return current_tb_invalidated ? 2 : 1;
   2470    }
   2471    mmap_unlock();
   2472    return 0;
   2473}
   2474#endif /* CONFIG_USER_ONLY */
   2475
   2476/* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
   2477void tcg_flush_softmmu_tlb(CPUState *cs)
   2478{
   2479#ifdef CONFIG_SOFTMMU
   2480    tlb_flush(cs);
   2481#endif
   2482}