cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

cputlb.c (90945B)


      1/*
      2 *  Common CPU TLB handling
      3 *
      4 *  Copyright (c) 2003 Fabrice Bellard
      5 *
      6 * This library is free software; you can redistribute it and/or
      7 * modify it under the terms of the GNU Lesser General Public
      8 * License as published by the Free Software Foundation; either
      9 * version 2.1 of the License, or (at your option) any later version.
     10 *
     11 * This library is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14 * Lesser General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU Lesser General Public
     17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18 */
     19
     20#include "qemu/osdep.h"
     21#include "qemu/main-loop.h"
     22#include "hw/core/tcg-cpu-ops.h"
     23#include "exec/exec-all.h"
     24#include "exec/memory.h"
     25#include "exec/cpu_ldst.h"
     26#include "exec/cputlb.h"
     27#include "exec/memory-internal.h"
     28#include "exec/ram_addr.h"
     29#include "tcg/tcg.h"
     30#include "qemu/error-report.h"
     31#include "exec/log.h"
     32#include "exec/helper-proto.h"
     33#include "qemu/atomic.h"
     34#include "qemu/atomic128.h"
     35#include "exec/translate-all.h"
     36#include "trace/trace-root.h"
     37#include "tb-hash.h"
     38#include "internal.h"
     39#ifdef CONFIG_PLUGIN
     40#include "qemu/plugin-memory.h"
     41#endif
     42
     43/* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
     44/* #define DEBUG_TLB */
     45/* #define DEBUG_TLB_LOG */
     46
     47#ifdef DEBUG_TLB
     48# define DEBUG_TLB_GATE 1
     49# ifdef DEBUG_TLB_LOG
     50#  define DEBUG_TLB_LOG_GATE 1
     51# else
     52#  define DEBUG_TLB_LOG_GATE 0
     53# endif
     54#else
     55# define DEBUG_TLB_GATE 0
     56# define DEBUG_TLB_LOG_GATE 0
     57#endif
     58
     59#define tlb_debug(fmt, ...) do { \
     60    if (DEBUG_TLB_LOG_GATE) { \
     61        qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
     62                      ## __VA_ARGS__); \
     63    } else if (DEBUG_TLB_GATE) { \
     64        fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
     65    } \
     66} while (0)
     67
     68#define assert_cpu_is_self(cpu) do {                              \
     69        if (DEBUG_TLB_GATE) {                                     \
     70            g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
     71        }                                                         \
     72    } while (0)
     73
     74/* run_on_cpu_data.target_ptr should always be big enough for a
     75 * target_ulong even on 32 bit builds */
     76QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
     77
     78/* We currently can't handle more than 16 bits in the MMUIDX bitmask.
     79 */
     80QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
     81#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
     82
     83static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
     84{
     85    return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
     86}
     87
     88static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
     89{
     90    return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
     91}
     92
     93static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
     94                             size_t max_entries)
     95{
     96    desc->window_begin_ns = ns;
     97    desc->window_max_entries = max_entries;
     98}
     99
    100static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
    101{
    102    unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);
    103
    104    for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
    105        qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
    106    }
    107}
    108
    109static void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
    110{
    111    /* Discard jump cache entries for any tb which might potentially
    112       overlap the flushed page.  */
    113    tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
    114    tb_jmp_cache_clear_page(cpu, addr);
    115}
    116
    117/**
    118 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
    119 * @desc: The CPUTLBDesc portion of the TLB
    120 * @fast: The CPUTLBDescFast portion of the same TLB
    121 *
    122 * Called with tlb_lock_held.
    123 *
    124 * We have two main constraints when resizing a TLB: (1) we only resize it
    125 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
    126 * the array or unnecessarily flushing it), which means we do not control how
    127 * frequently the resizing can occur; (2) we don't have access to the guest's
    128 * future scheduling decisions, and therefore have to decide the magnitude of
    129 * the resize based on past observations.
    130 *
    131 * In general, a memory-hungry process can benefit greatly from an appropriately
    132 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
    133 * we just have to make the TLB as large as possible; while an oversized TLB
    134 * results in minimal TLB miss rates, it also takes longer to be flushed
    135 * (flushes can be _very_ frequent), and the reduced locality can also hurt
    136 * performance.
    137 *
    138 * To achieve near-optimal performance for all kinds of workloads, we:
    139 *
    140 * 1. Aggressively increase the size of the TLB when the use rate of the
    141 * TLB being flushed is high, since it is likely that in the near future this
    142 * memory-hungry process will execute again, and its memory hungriness will
    143 * probably be similar.
    144 *
    145 * 2. Slowly reduce the size of the TLB as the use rate declines over a
    146 * reasonably large time window. The rationale is that if in such a time window
    147 * we have not observed a high TLB use rate, it is likely that we won't observe
    148 * it in the near future. In that case, once a time window expires we downsize
    149 * the TLB to match the maximum use rate observed in the window.
    150 *
    151 * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
    152 * since in that range performance is likely near-optimal. Recall that the TLB
    153 * is direct mapped, so we want the use rate to be low (or at least not too
    154 * high), since otherwise we are likely to have a significant amount of
    155 * conflict misses.
    156 */
    157static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
    158                                  int64_t now)
    159{
    160    size_t old_size = tlb_n_entries(fast);
    161    size_t rate;
    162    size_t new_size = old_size;
    163    int64_t window_len_ms = 100;
    164    int64_t window_len_ns = window_len_ms * 1000 * 1000;
    165    bool window_expired = now > desc->window_begin_ns + window_len_ns;
    166
    167    if (desc->n_used_entries > desc->window_max_entries) {
    168        desc->window_max_entries = desc->n_used_entries;
    169    }
    170    rate = desc->window_max_entries * 100 / old_size;
    171
    172    if (rate > 70) {
    173        new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
    174    } else if (rate < 30 && window_expired) {
    175        size_t ceil = pow2ceil(desc->window_max_entries);
    176        size_t expected_rate = desc->window_max_entries * 100 / ceil;
    177
    178        /*
    179         * Avoid undersizing when the max number of entries seen is just below
    180         * a pow2. For instance, if max_entries == 1025, the expected use rate
    181         * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
    182         * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
    183         * later. Thus, make sure that the expected use rate remains below 70%.
    184         * (and since we double the size, that means the lowest rate we'd
    185         * expect to get is 35%, which is still in the 30-70% range where
    186         * we consider that the size is appropriate.)
    187         */
    188        if (expected_rate > 70) {
    189            ceil *= 2;
    190        }
    191        new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
    192    }
    193
    194    if (new_size == old_size) {
    195        if (window_expired) {
    196            tlb_window_reset(desc, now, desc->n_used_entries);
    197        }
    198        return;
    199    }
    200
    201    g_free(fast->table);
    202    g_free(desc->iotlb);
    203
    204    tlb_window_reset(desc, now, 0);
    205    /* desc->n_used_entries is cleared by the caller */
    206    fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
    207    fast->table = g_try_new(CPUTLBEntry, new_size);
    208    desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
    209
    210    /*
    211     * If the allocations fail, try smaller sizes. We just freed some
    212     * memory, so going back to half of new_size has a good chance of working.
    213     * Increased memory pressure elsewhere in the system might cause the
    214     * allocations to fail though, so we progressively reduce the allocation
    215     * size, aborting if we cannot even allocate the smallest TLB we support.
    216     */
    217    while (fast->table == NULL || desc->iotlb == NULL) {
    218        if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
    219            error_report("%s: %s", __func__, strerror(errno));
    220            abort();
    221        }
    222        new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
    223        fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
    224
    225        g_free(fast->table);
    226        g_free(desc->iotlb);
    227        fast->table = g_try_new(CPUTLBEntry, new_size);
    228        desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
    229    }
    230}
    231
    232static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
    233{
    234    desc->n_used_entries = 0;
    235    desc->large_page_addr = -1;
    236    desc->large_page_mask = -1;
    237    desc->vindex = 0;
    238    memset(fast->table, -1, sizeof_tlb(fast));
    239    memset(desc->vtable, -1, sizeof(desc->vtable));
    240}
    241
    242static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx,
    243                                        int64_t now)
    244{
    245    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
    246    CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
    247
    248    tlb_mmu_resize_locked(desc, fast, now);
    249    tlb_mmu_flush_locked(desc, fast);
    250}
    251
    252static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
    253{
    254    size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
    255
    256    tlb_window_reset(desc, now, 0);
    257    desc->n_used_entries = 0;
    258    fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
    259    fast->table = g_new(CPUTLBEntry, n_entries);
    260    desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
    261    tlb_mmu_flush_locked(desc, fast);
    262}
    263
    264static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
    265{
    266    env_tlb(env)->d[mmu_idx].n_used_entries++;
    267}
    268
    269static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
    270{
    271    env_tlb(env)->d[mmu_idx].n_used_entries--;
    272}
    273
    274void tlb_init(CPUState *cpu)
    275{
    276    CPUArchState *env = cpu->env_ptr;
    277    int64_t now = get_clock_realtime();
    278    int i;
    279
    280    qemu_spin_init(&env_tlb(env)->c.lock);
    281
    282    /* All tlbs are initialized flushed. */
    283    env_tlb(env)->c.dirty = 0;
    284
    285    for (i = 0; i < NB_MMU_MODES; i++) {
    286        tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
    287    }
    288}
    289
    290void tlb_destroy(CPUState *cpu)
    291{
    292    CPUArchState *env = cpu->env_ptr;
    293    int i;
    294
    295    qemu_spin_destroy(&env_tlb(env)->c.lock);
    296    for (i = 0; i < NB_MMU_MODES; i++) {
    297        CPUTLBDesc *desc = &env_tlb(env)->d[i];
    298        CPUTLBDescFast *fast = &env_tlb(env)->f[i];
    299
    300        g_free(fast->table);
    301        g_free(desc->iotlb);
    302    }
    303}
    304
    305/* flush_all_helper: run fn across all cpus
    306 *
    307 * If the wait flag is set then the src cpu's helper will be queued as
    308 * "safe" work and the loop exited creating a synchronisation point
    309 * where all queued work will be finished before execution starts
    310 * again.
    311 */
    312static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
    313                             run_on_cpu_data d)
    314{
    315    CPUState *cpu;
    316
    317    CPU_FOREACH(cpu) {
    318        if (cpu != src) {
    319            async_run_on_cpu(cpu, fn, d);
    320        }
    321    }
    322}
    323
    324void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
    325{
    326    CPUState *cpu;
    327    size_t full = 0, part = 0, elide = 0;
    328
    329    CPU_FOREACH(cpu) {
    330        CPUArchState *env = cpu->env_ptr;
    331
    332        full += qatomic_read(&env_tlb(env)->c.full_flush_count);
    333        part += qatomic_read(&env_tlb(env)->c.part_flush_count);
    334        elide += qatomic_read(&env_tlb(env)->c.elide_flush_count);
    335    }
    336    *pfull = full;
    337    *ppart = part;
    338    *pelide = elide;
    339}
    340
    341static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
    342{
    343    CPUArchState *env = cpu->env_ptr;
    344    uint16_t asked = data.host_int;
    345    uint16_t all_dirty, work, to_clean;
    346    int64_t now = get_clock_realtime();
    347
    348    assert_cpu_is_self(cpu);
    349
    350    tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
    351
    352    qemu_spin_lock(&env_tlb(env)->c.lock);
    353
    354    all_dirty = env_tlb(env)->c.dirty;
    355    to_clean = asked & all_dirty;
    356    all_dirty &= ~to_clean;
    357    env_tlb(env)->c.dirty = all_dirty;
    358
    359    for (work = to_clean; work != 0; work &= work - 1) {
    360        int mmu_idx = ctz32(work);
    361        tlb_flush_one_mmuidx_locked(env, mmu_idx, now);
    362    }
    363
    364    qemu_spin_unlock(&env_tlb(env)->c.lock);
    365
    366    cpu_tb_jmp_cache_clear(cpu);
    367
    368    if (to_clean == ALL_MMUIDX_BITS) {
    369        qatomic_set(&env_tlb(env)->c.full_flush_count,
    370                   env_tlb(env)->c.full_flush_count + 1);
    371    } else {
    372        qatomic_set(&env_tlb(env)->c.part_flush_count,
    373                   env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
    374        if (to_clean != asked) {
    375            qatomic_set(&env_tlb(env)->c.elide_flush_count,
    376                       env_tlb(env)->c.elide_flush_count +
    377                       ctpop16(asked & ~to_clean));
    378        }
    379    }
    380}
    381
    382void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
    383{
    384    tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
    385
    386    if (cpu->created && !qemu_cpu_is_self(cpu)) {
    387        async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
    388                         RUN_ON_CPU_HOST_INT(idxmap));
    389    } else {
    390        tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
    391    }
    392}
    393
    394void tlb_flush(CPUState *cpu)
    395{
    396    tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
    397}
    398
    399void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
    400{
    401    const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
    402
    403    tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
    404
    405    flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
    406    fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
    407}
    408
    409void tlb_flush_all_cpus(CPUState *src_cpu)
    410{
    411    tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
    412}
    413
    414void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
    415{
    416    const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
    417
    418    tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
    419
    420    flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
    421    async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
    422}
    423
    424void tlb_flush_all_cpus_synced(CPUState *src_cpu)
    425{
    426    tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
    427}
    428
    429static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry,
    430                                      target_ulong page, target_ulong mask)
    431{
    432    page &= mask;
    433    mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK;
    434
    435    return (page == (tlb_entry->addr_read & mask) ||
    436            page == (tlb_addr_write(tlb_entry) & mask) ||
    437            page == (tlb_entry->addr_code & mask));
    438}
    439
    440static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
    441                                        target_ulong page)
    442{
    443    return tlb_hit_page_mask_anyprot(tlb_entry, page, -1);
    444}
    445
    446/**
    447 * tlb_entry_is_empty - return true if the entry is not in use
    448 * @te: pointer to CPUTLBEntry
    449 */
    450static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
    451{
    452    return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
    453}
    454
    455/* Called with tlb_c.lock held */
    456static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry,
    457                                        target_ulong page,
    458                                        target_ulong mask)
    459{
    460    if (tlb_hit_page_mask_anyprot(tlb_entry, page, mask)) {
    461        memset(tlb_entry, -1, sizeof(*tlb_entry));
    462        return true;
    463    }
    464    return false;
    465}
    466
    467static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
    468                                          target_ulong page)
    469{
    470    return tlb_flush_entry_mask_locked(tlb_entry, page, -1);
    471}
    472
    473/* Called with tlb_c.lock held */
    474static void tlb_flush_vtlb_page_mask_locked(CPUArchState *env, int mmu_idx,
    475                                            target_ulong page,
    476                                            target_ulong mask)
    477{
    478    CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
    479    int k;
    480
    481    assert_cpu_is_self(env_cpu(env));
    482    for (k = 0; k < CPU_VTLB_SIZE; k++) {
    483        if (tlb_flush_entry_mask_locked(&d->vtable[k], page, mask)) {
    484            tlb_n_used_entries_dec(env, mmu_idx);
    485        }
    486    }
    487}
    488
    489static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
    490                                              target_ulong page)
    491{
    492    tlb_flush_vtlb_page_mask_locked(env, mmu_idx, page, -1);
    493}
    494
    495static void tlb_flush_page_locked(CPUArchState *env, int midx,
    496                                  target_ulong page)
    497{
    498    target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
    499    target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
    500
    501    /* Check if we need to flush due to large pages.  */
    502    if ((page & lp_mask) == lp_addr) {
    503        tlb_debug("forcing full flush midx %d ("
    504                  TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
    505                  midx, lp_addr, lp_mask);
    506        tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
    507    } else {
    508        if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
    509            tlb_n_used_entries_dec(env, midx);
    510        }
    511        tlb_flush_vtlb_page_locked(env, midx, page);
    512    }
    513}
    514
    515/**
    516 * tlb_flush_page_by_mmuidx_async_0:
    517 * @cpu: cpu on which to flush
    518 * @addr: page of virtual address to flush
    519 * @idxmap: set of mmu_idx to flush
    520 *
    521 * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
    522 * at @addr from the tlbs indicated by @idxmap from @cpu.
    523 */
    524static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
    525                                             target_ulong addr,
    526                                             uint16_t idxmap)
    527{
    528    CPUArchState *env = cpu->env_ptr;
    529    int mmu_idx;
    530
    531    assert_cpu_is_self(cpu);
    532
    533    tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
    534
    535    qemu_spin_lock(&env_tlb(env)->c.lock);
    536    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
    537        if ((idxmap >> mmu_idx) & 1) {
    538            tlb_flush_page_locked(env, mmu_idx, addr);
    539        }
    540    }
    541    qemu_spin_unlock(&env_tlb(env)->c.lock);
    542
    543    tb_flush_jmp_cache(cpu, addr);
    544}
    545
    546/**
    547 * tlb_flush_page_by_mmuidx_async_1:
    548 * @cpu: cpu on which to flush
    549 * @data: encoded addr + idxmap
    550 *
    551 * Helper for tlb_flush_page_by_mmuidx and friends, called through
    552 * async_run_on_cpu.  The idxmap parameter is encoded in the page
    553 * offset of the target_ptr field.  This limits the set of mmu_idx
    554 * that can be passed via this method.
    555 */
    556static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
    557                                             run_on_cpu_data data)
    558{
    559    target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
    560    target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
    561    uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
    562
    563    tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
    564}
    565
    566typedef struct {
    567    target_ulong addr;
    568    uint16_t idxmap;
    569} TLBFlushPageByMMUIdxData;
    570
    571/**
    572 * tlb_flush_page_by_mmuidx_async_2:
    573 * @cpu: cpu on which to flush
    574 * @data: allocated addr + idxmap
    575 *
    576 * Helper for tlb_flush_page_by_mmuidx and friends, called through
    577 * async_run_on_cpu.  The addr+idxmap parameters are stored in a
    578 * TLBFlushPageByMMUIdxData structure that has been allocated
    579 * specifically for this helper.  Free the structure when done.
    580 */
    581static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
    582                                             run_on_cpu_data data)
    583{
    584    TLBFlushPageByMMUIdxData *d = data.host_ptr;
    585
    586    tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
    587    g_free(d);
    588}
    589
    590void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
    591{
    592    tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
    593
    594    /* This should already be page aligned */
    595    addr &= TARGET_PAGE_MASK;
    596
    597    if (qemu_cpu_is_self(cpu)) {
    598        tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
    599    } else if (idxmap < TARGET_PAGE_SIZE) {
    600        /*
    601         * Most targets have only a few mmu_idx.  In the case where
    602         * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
    603         * allocating memory for this operation.
    604         */
    605        async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
    606                         RUN_ON_CPU_TARGET_PTR(addr | idxmap));
    607    } else {
    608        TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
    609
    610        /* Otherwise allocate a structure, freed by the worker.  */
    611        d->addr = addr;
    612        d->idxmap = idxmap;
    613        async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
    614                         RUN_ON_CPU_HOST_PTR(d));
    615    }
    616}
    617
    618void tlb_flush_page(CPUState *cpu, target_ulong addr)
    619{
    620    tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
    621}
    622
    623void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
    624                                       uint16_t idxmap)
    625{
    626    tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
    627
    628    /* This should already be page aligned */
    629    addr &= TARGET_PAGE_MASK;
    630
    631    /*
    632     * Allocate memory to hold addr+idxmap only when needed.
    633     * See tlb_flush_page_by_mmuidx for details.
    634     */
    635    if (idxmap < TARGET_PAGE_SIZE) {
    636        flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
    637                         RUN_ON_CPU_TARGET_PTR(addr | idxmap));
    638    } else {
    639        CPUState *dst_cpu;
    640
    641        /* Allocate a separate data block for each destination cpu.  */
    642        CPU_FOREACH(dst_cpu) {
    643            if (dst_cpu != src_cpu) {
    644                TLBFlushPageByMMUIdxData *d
    645                    = g_new(TLBFlushPageByMMUIdxData, 1);
    646
    647                d->addr = addr;
    648                d->idxmap = idxmap;
    649                async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
    650                                 RUN_ON_CPU_HOST_PTR(d));
    651            }
    652        }
    653    }
    654
    655    tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
    656}
    657
    658void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
    659{
    660    tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
    661}
    662
    663void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
    664                                              target_ulong addr,
    665                                              uint16_t idxmap)
    666{
    667    tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
    668
    669    /* This should already be page aligned */
    670    addr &= TARGET_PAGE_MASK;
    671
    672    /*
    673     * Allocate memory to hold addr+idxmap only when needed.
    674     * See tlb_flush_page_by_mmuidx for details.
    675     */
    676    if (idxmap < TARGET_PAGE_SIZE) {
    677        flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
    678                         RUN_ON_CPU_TARGET_PTR(addr | idxmap));
    679        async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
    680                              RUN_ON_CPU_TARGET_PTR(addr | idxmap));
    681    } else {
    682        CPUState *dst_cpu;
    683        TLBFlushPageByMMUIdxData *d;
    684
    685        /* Allocate a separate data block for each destination cpu.  */
    686        CPU_FOREACH(dst_cpu) {
    687            if (dst_cpu != src_cpu) {
    688                d = g_new(TLBFlushPageByMMUIdxData, 1);
    689                d->addr = addr;
    690                d->idxmap = idxmap;
    691                async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
    692                                 RUN_ON_CPU_HOST_PTR(d));
    693            }
    694        }
    695
    696        d = g_new(TLBFlushPageByMMUIdxData, 1);
    697        d->addr = addr;
    698        d->idxmap = idxmap;
    699        async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
    700                              RUN_ON_CPU_HOST_PTR(d));
    701    }
    702}
    703
    704void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
    705{
    706    tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
    707}
    708
    709static void tlb_flush_range_locked(CPUArchState *env, int midx,
    710                                   target_ulong addr, target_ulong len,
    711                                   unsigned bits)
    712{
    713    CPUTLBDesc *d = &env_tlb(env)->d[midx];
    714    CPUTLBDescFast *f = &env_tlb(env)->f[midx];
    715    target_ulong mask = MAKE_64BIT_MASK(0, bits);
    716
    717    /*
    718     * If @bits is smaller than the tlb size, there may be multiple entries
    719     * within the TLB; otherwise all addresses that match under @mask hit
    720     * the same TLB entry.
    721     * TODO: Perhaps allow bits to be a few bits less than the size.
    722     * For now, just flush the entire TLB.
    723     *
    724     * If @len is larger than the tlb size, then it will take longer to
    725     * test all of the entries in the TLB than it will to flush it all.
    726     */
    727    if (mask < f->mask || len > f->mask) {
    728        tlb_debug("forcing full flush midx %d ("
    729                  TARGET_FMT_lx "/" TARGET_FMT_lx "+" TARGET_FMT_lx ")\n",
    730                  midx, addr, mask, len);
    731        tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
    732        return;
    733    }
    734
    735    /*
    736     * Check if we need to flush due to large pages.
    737     * Because large_page_mask contains all 1's from the msb,
    738     * we only need to test the end of the range.
    739     */
    740    if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) {
    741        tlb_debug("forcing full flush midx %d ("
    742                  TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
    743                  midx, d->large_page_addr, d->large_page_mask);
    744        tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
    745        return;
    746    }
    747
    748    for (target_ulong i = 0; i < len; i += TARGET_PAGE_SIZE) {
    749        target_ulong page = addr + i;
    750        CPUTLBEntry *entry = tlb_entry(env, midx, page);
    751
    752        if (tlb_flush_entry_mask_locked(entry, page, mask)) {
    753            tlb_n_used_entries_dec(env, midx);
    754        }
    755        tlb_flush_vtlb_page_mask_locked(env, midx, page, mask);
    756    }
    757}
    758
    759typedef struct {
    760    target_ulong addr;
    761    target_ulong len;
    762    uint16_t idxmap;
    763    uint16_t bits;
    764} TLBFlushRangeData;
    765
    766static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
    767                                              TLBFlushRangeData d)
    768{
    769    CPUArchState *env = cpu->env_ptr;
    770    int mmu_idx;
    771
    772    assert_cpu_is_self(cpu);
    773
    774    tlb_debug("range:" TARGET_FMT_lx "/%u+" TARGET_FMT_lx " mmu_map:0x%x\n",
    775              d.addr, d.bits, d.len, d.idxmap);
    776
    777    qemu_spin_lock(&env_tlb(env)->c.lock);
    778    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
    779        if ((d.idxmap >> mmu_idx) & 1) {
    780            tlb_flush_range_locked(env, mmu_idx, d.addr, d.len, d.bits);
    781        }
    782    }
    783    qemu_spin_unlock(&env_tlb(env)->c.lock);
    784
    785    for (target_ulong i = 0; i < d.len; i += TARGET_PAGE_SIZE) {
    786        tb_flush_jmp_cache(cpu, d.addr + i);
    787    }
    788}
    789
    790static void tlb_flush_range_by_mmuidx_async_1(CPUState *cpu,
    791                                              run_on_cpu_data data)
    792{
    793    TLBFlushRangeData *d = data.host_ptr;
    794    tlb_flush_range_by_mmuidx_async_0(cpu, *d);
    795    g_free(d);
    796}
    797
    798void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr,
    799                               target_ulong len, uint16_t idxmap,
    800                               unsigned bits)
    801{
    802    TLBFlushRangeData d;
    803
    804    /*
    805     * If all bits are significant, and len is small,
    806     * this devolves to tlb_flush_page.
    807     */
    808    if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
    809        tlb_flush_page_by_mmuidx(cpu, addr, idxmap);
    810        return;
    811    }
    812    /* If no page bits are significant, this devolves to tlb_flush. */
    813    if (bits < TARGET_PAGE_BITS) {
    814        tlb_flush_by_mmuidx(cpu, idxmap);
    815        return;
    816    }
    817
    818    /* This should already be page aligned */
    819    d.addr = addr & TARGET_PAGE_MASK;
    820    d.len = len;
    821    d.idxmap = idxmap;
    822    d.bits = bits;
    823
    824    if (qemu_cpu_is_self(cpu)) {
    825        tlb_flush_range_by_mmuidx_async_0(cpu, d);
    826    } else {
    827        /* Otherwise allocate a structure, freed by the worker.  */
    828        TLBFlushRangeData *p = g_memdup(&d, sizeof(d));
    829        async_run_on_cpu(cpu, tlb_flush_range_by_mmuidx_async_1,
    830                         RUN_ON_CPU_HOST_PTR(p));
    831    }
    832}
    833
    834void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr,
    835                                   uint16_t idxmap, unsigned bits)
    836{
    837    tlb_flush_range_by_mmuidx(cpu, addr, TARGET_PAGE_SIZE, idxmap, bits);
    838}
    839
    840void tlb_flush_range_by_mmuidx_all_cpus(CPUState *src_cpu,
    841                                        target_ulong addr, target_ulong len,
    842                                        uint16_t idxmap, unsigned bits)
    843{
    844    TLBFlushRangeData d;
    845    CPUState *dst_cpu;
    846
    847    /*
    848     * If all bits are significant, and len is small,
    849     * this devolves to tlb_flush_page.
    850     */
    851    if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
    852        tlb_flush_page_by_mmuidx_all_cpus(src_cpu, addr, idxmap);
    853        return;
    854    }
    855    /* If no page bits are significant, this devolves to tlb_flush. */
    856    if (bits < TARGET_PAGE_BITS) {
    857        tlb_flush_by_mmuidx_all_cpus(src_cpu, idxmap);
    858        return;
    859    }
    860
    861    /* This should already be page aligned */
    862    d.addr = addr & TARGET_PAGE_MASK;
    863    d.len = len;
    864    d.idxmap = idxmap;
    865    d.bits = bits;
    866
    867    /* Allocate a separate data block for each destination cpu.  */
    868    CPU_FOREACH(dst_cpu) {
    869        if (dst_cpu != src_cpu) {
    870            TLBFlushRangeData *p = g_memdup(&d, sizeof(d));
    871            async_run_on_cpu(dst_cpu,
    872                             tlb_flush_range_by_mmuidx_async_1,
    873                             RUN_ON_CPU_HOST_PTR(p));
    874        }
    875    }
    876
    877    tlb_flush_range_by_mmuidx_async_0(src_cpu, d);
    878}
    879
    880void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu,
    881                                            target_ulong addr,
    882                                            uint16_t idxmap, unsigned bits)
    883{
    884    tlb_flush_range_by_mmuidx_all_cpus(src_cpu, addr, TARGET_PAGE_SIZE,
    885                                       idxmap, bits);
    886}
    887
    888void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
    889                                               target_ulong addr,
    890                                               target_ulong len,
    891                                               uint16_t idxmap,
    892                                               unsigned bits)
    893{
    894    TLBFlushRangeData d, *p;
    895    CPUState *dst_cpu;
    896
    897    /*
    898     * If all bits are significant, and len is small,
    899     * this devolves to tlb_flush_page.
    900     */
    901    if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
    902        tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap);
    903        return;
    904    }
    905    /* If no page bits are significant, this devolves to tlb_flush. */
    906    if (bits < TARGET_PAGE_BITS) {
    907        tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap);
    908        return;
    909    }
    910
    911    /* This should already be page aligned */
    912    d.addr = addr & TARGET_PAGE_MASK;
    913    d.len = len;
    914    d.idxmap = idxmap;
    915    d.bits = bits;
    916
    917    /* Allocate a separate data block for each destination cpu.  */
    918    CPU_FOREACH(dst_cpu) {
    919        if (dst_cpu != src_cpu) {
    920            p = g_memdup(&d, sizeof(d));
    921            async_run_on_cpu(dst_cpu, tlb_flush_range_by_mmuidx_async_1,
    922                             RUN_ON_CPU_HOST_PTR(p));
    923        }
    924    }
    925
    926    p = g_memdup(&d, sizeof(d));
    927    async_safe_run_on_cpu(src_cpu, tlb_flush_range_by_mmuidx_async_1,
    928                          RUN_ON_CPU_HOST_PTR(p));
    929}
    930
    931void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
    932                                                   target_ulong addr,
    933                                                   uint16_t idxmap,
    934                                                   unsigned bits)
    935{
    936    tlb_flush_range_by_mmuidx_all_cpus_synced(src_cpu, addr, TARGET_PAGE_SIZE,
    937                                              idxmap, bits);
    938}
    939
    940/* update the TLBs so that writes to code in the virtual page 'addr'
    941   can be detected */
    942void tlb_protect_code(ram_addr_t ram_addr)
    943{
    944    cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
    945                                             DIRTY_MEMORY_CODE);
    946}
    947
    948/* update the TLB so that writes in physical page 'phys_addr' are no longer
    949   tested for self modifying code */
    950void tlb_unprotect_code(ram_addr_t ram_addr)
    951{
    952    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
    953}
    954
    955
    956/*
    957 * Dirty write flag handling
    958 *
    959 * When the TCG code writes to a location it looks up the address in
    960 * the TLB and uses that data to compute the final address. If any of
    961 * the lower bits of the address are set then the slow path is forced.
    962 * There are a number of reasons to do this but for normal RAM the
    963 * most usual is detecting writes to code regions which may invalidate
    964 * generated code.
    965 *
    966 * Other vCPUs might be reading their TLBs during guest execution, so we update
    967 * te->addr_write with qatomic_set. We don't need to worry about this for
    968 * oversized guests as MTTCG is disabled for them.
    969 *
    970 * Called with tlb_c.lock held.
    971 */
    972static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
    973                                         uintptr_t start, uintptr_t length)
    974{
    975    uintptr_t addr = tlb_entry->addr_write;
    976
    977    if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
    978                 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
    979        addr &= TARGET_PAGE_MASK;
    980        addr += tlb_entry->addend;
    981        if ((addr - start) < length) {
    982#if TCG_OVERSIZED_GUEST
    983            tlb_entry->addr_write |= TLB_NOTDIRTY;
    984#else
    985            qatomic_set(&tlb_entry->addr_write,
    986                       tlb_entry->addr_write | TLB_NOTDIRTY);
    987#endif
    988        }
    989    }
    990}
    991
    992/*
    993 * Called with tlb_c.lock held.
    994 * Called only from the vCPU context, i.e. the TLB's owner thread.
    995 */
    996static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
    997{
    998    *d = *s;
    999}
   1000
   1001/* This is a cross vCPU call (i.e. another vCPU resetting the flags of
   1002 * the target vCPU).
   1003 * We must take tlb_c.lock to avoid racing with another vCPU update. The only
   1004 * thing actually updated is the target TLB entry ->addr_write flags.
   1005 */
   1006void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
   1007{
   1008    CPUArchState *env;
   1009
   1010    int mmu_idx;
   1011
   1012    env = cpu->env_ptr;
   1013    qemu_spin_lock(&env_tlb(env)->c.lock);
   1014    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
   1015        unsigned int i;
   1016        unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
   1017
   1018        for (i = 0; i < n; i++) {
   1019            tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
   1020                                         start1, length);
   1021        }
   1022
   1023        for (i = 0; i < CPU_VTLB_SIZE; i++) {
   1024            tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
   1025                                         start1, length);
   1026        }
   1027    }
   1028    qemu_spin_unlock(&env_tlb(env)->c.lock);
   1029}
   1030
   1031/* Called with tlb_c.lock held */
   1032static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
   1033                                         target_ulong vaddr)
   1034{
   1035    if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
   1036        tlb_entry->addr_write = vaddr;
   1037    }
   1038}
   1039
   1040/* update the TLB corresponding to virtual page vaddr
   1041   so that it is no longer dirty */
   1042void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
   1043{
   1044    CPUArchState *env = cpu->env_ptr;
   1045    int mmu_idx;
   1046
   1047    assert_cpu_is_self(cpu);
   1048
   1049    vaddr &= TARGET_PAGE_MASK;
   1050    qemu_spin_lock(&env_tlb(env)->c.lock);
   1051    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
   1052        tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
   1053    }
   1054
   1055    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
   1056        int k;
   1057        for (k = 0; k < CPU_VTLB_SIZE; k++) {
   1058            tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
   1059        }
   1060    }
   1061    qemu_spin_unlock(&env_tlb(env)->c.lock);
   1062}
   1063
   1064/* Our TLB does not support large pages, so remember the area covered by
   1065   large pages and trigger a full TLB flush if these are invalidated.  */
   1066static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
   1067                               target_ulong vaddr, target_ulong size)
   1068{
   1069    target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
   1070    target_ulong lp_mask = ~(size - 1);
   1071
   1072    if (lp_addr == (target_ulong)-1) {
   1073        /* No previous large page.  */
   1074        lp_addr = vaddr;
   1075    } else {
   1076        /* Extend the existing region to include the new page.
   1077           This is a compromise between unnecessary flushes and
   1078           the cost of maintaining a full variable size TLB.  */
   1079        lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
   1080        while (((lp_addr ^ vaddr) & lp_mask) != 0) {
   1081            lp_mask <<= 1;
   1082        }
   1083    }
   1084    env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
   1085    env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
   1086}
   1087
   1088/* Add a new TLB entry. At most one entry for a given virtual address
   1089 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
   1090 * supplied size is only used by tlb_flush_page.
   1091 *
   1092 * Called from TCG-generated code, which is under an RCU read-side
   1093 * critical section.
   1094 */
   1095void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
   1096                             hwaddr paddr, MemTxAttrs attrs, int prot,
   1097                             int mmu_idx, target_ulong size)
   1098{
   1099    CPUArchState *env = cpu->env_ptr;
   1100    CPUTLB *tlb = env_tlb(env);
   1101    CPUTLBDesc *desc = &tlb->d[mmu_idx];
   1102    MemoryRegionSection *section;
   1103    unsigned int index;
   1104    target_ulong address;
   1105    target_ulong write_address;
   1106    uintptr_t addend;
   1107    CPUTLBEntry *te, tn;
   1108    hwaddr iotlb, xlat, sz, paddr_page;
   1109    target_ulong vaddr_page;
   1110    int asidx = cpu_asidx_from_attrs(cpu, attrs);
   1111    int wp_flags;
   1112    bool is_ram, is_romd;
   1113
   1114    assert_cpu_is_self(cpu);
   1115
   1116    if (size <= TARGET_PAGE_SIZE) {
   1117        sz = TARGET_PAGE_SIZE;
   1118    } else {
   1119        tlb_add_large_page(env, mmu_idx, vaddr, size);
   1120        sz = size;
   1121    }
   1122    vaddr_page = vaddr & TARGET_PAGE_MASK;
   1123    paddr_page = paddr & TARGET_PAGE_MASK;
   1124
   1125    section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
   1126                                                &xlat, &sz, attrs, &prot);
   1127    assert(sz >= TARGET_PAGE_SIZE);
   1128
   1129    tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
   1130              " prot=%x idx=%d\n",
   1131              vaddr, paddr, prot, mmu_idx);
   1132
   1133    address = vaddr_page;
   1134    if (size < TARGET_PAGE_SIZE) {
   1135        /* Repeat the MMU check and TLB fill on every access.  */
   1136        address |= TLB_INVALID_MASK;
   1137    }
   1138    if (attrs.byte_swap) {
   1139        address |= TLB_BSWAP;
   1140    }
   1141
   1142    is_ram = memory_region_is_ram(section->mr);
   1143    is_romd = memory_region_is_romd(section->mr);
   1144
   1145    if (is_ram || is_romd) {
   1146        /* RAM and ROMD both have associated host memory. */
   1147        addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
   1148    } else {
   1149        /* I/O does not; force the host address to NULL. */
   1150        addend = 0;
   1151    }
   1152
   1153    write_address = address;
   1154    if (is_ram) {
   1155        iotlb = memory_region_get_ram_addr(section->mr) + xlat;
   1156        /*
   1157         * Computing is_clean is expensive; avoid all that unless
   1158         * the page is actually writable.
   1159         */
   1160        if (prot & PAGE_WRITE) {
   1161            if (section->readonly) {
   1162                write_address |= TLB_DISCARD_WRITE;
   1163            } else if (cpu_physical_memory_is_clean(iotlb)) {
   1164                write_address |= TLB_NOTDIRTY;
   1165            }
   1166        }
   1167    } else {
   1168        /* I/O or ROMD */
   1169        iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
   1170        /*
   1171         * Writes to romd devices must go through MMIO to enable write.
   1172         * Reads to romd devices go through the ram_ptr found above,
   1173         * but of course reads to I/O must go through MMIO.
   1174         */
   1175        write_address |= TLB_MMIO;
   1176        if (!is_romd) {
   1177            address = write_address;
   1178        }
   1179    }
   1180
   1181    wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
   1182                                              TARGET_PAGE_SIZE);
   1183
   1184    index = tlb_index(env, mmu_idx, vaddr_page);
   1185    te = tlb_entry(env, mmu_idx, vaddr_page);
   1186
   1187    /*
   1188     * Hold the TLB lock for the rest of the function. We could acquire/release
   1189     * the lock several times in the function, but it is faster to amortize the
   1190     * acquisition cost by acquiring it just once. Note that this leads to
   1191     * a longer critical section, but this is not a concern since the TLB lock
   1192     * is unlikely to be contended.
   1193     */
   1194    qemu_spin_lock(&tlb->c.lock);
   1195
   1196    /* Note that the tlb is no longer clean.  */
   1197    tlb->c.dirty |= 1 << mmu_idx;
   1198
   1199    /* Make sure there's no cached translation for the new page.  */
   1200    tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
   1201
   1202    /*
   1203     * Only evict the old entry to the victim tlb if it's for a
   1204     * different page; otherwise just overwrite the stale data.
   1205     */
   1206    if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
   1207        unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
   1208        CPUTLBEntry *tv = &desc->vtable[vidx];
   1209
   1210        /* Evict the old entry into the victim tlb.  */
   1211        copy_tlb_helper_locked(tv, te);
   1212        desc->viotlb[vidx] = desc->iotlb[index];
   1213        tlb_n_used_entries_dec(env, mmu_idx);
   1214    }
   1215
   1216    /* refill the tlb */
   1217    /*
   1218     * At this point iotlb contains a physical section number in the lower
   1219     * TARGET_PAGE_BITS, and either
   1220     *  + the ram_addr_t of the page base of the target RAM (RAM)
   1221     *  + the offset within section->mr of the page base (I/O, ROMD)
   1222     * We subtract the vaddr_page (which is page aligned and thus won't
   1223     * disturb the low bits) to give an offset which can be added to the
   1224     * (non-page-aligned) vaddr of the eventual memory access to get
   1225     * the MemoryRegion offset for the access. Note that the vaddr we
   1226     * subtract here is that of the page base, and not the same as the
   1227     * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
   1228     */
   1229    desc->iotlb[index].addr = iotlb - vaddr_page;
   1230    desc->iotlb[index].attrs = attrs;
   1231
   1232    /* Now calculate the new entry */
   1233    tn.addend = addend - vaddr_page;
   1234    if (prot & PAGE_READ) {
   1235        tn.addr_read = address;
   1236        if (wp_flags & BP_MEM_READ) {
   1237            tn.addr_read |= TLB_WATCHPOINT;
   1238        }
   1239    } else {
   1240        tn.addr_read = -1;
   1241    }
   1242
   1243    if (prot & PAGE_EXEC) {
   1244        tn.addr_code = address;
   1245    } else {
   1246        tn.addr_code = -1;
   1247    }
   1248
   1249    tn.addr_write = -1;
   1250    if (prot & PAGE_WRITE) {
   1251        tn.addr_write = write_address;
   1252        if (prot & PAGE_WRITE_INV) {
   1253            tn.addr_write |= TLB_INVALID_MASK;
   1254        }
   1255        if (wp_flags & BP_MEM_WRITE) {
   1256            tn.addr_write |= TLB_WATCHPOINT;
   1257        }
   1258    }
   1259
   1260    copy_tlb_helper_locked(te, &tn);
   1261    tlb_n_used_entries_inc(env, mmu_idx);
   1262    qemu_spin_unlock(&tlb->c.lock);
   1263}
   1264
   1265/* Add a new TLB entry, but without specifying the memory
   1266 * transaction attributes to be used.
   1267 */
   1268void tlb_set_page(CPUState *cpu, target_ulong vaddr,
   1269                  hwaddr paddr, int prot,
   1270                  int mmu_idx, target_ulong size)
   1271{
   1272    tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
   1273                            prot, mmu_idx, size);
   1274}
   1275
   1276static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
   1277{
   1278    ram_addr_t ram_addr;
   1279
   1280    ram_addr = qemu_ram_addr_from_host(ptr);
   1281    if (ram_addr == RAM_ADDR_INVALID) {
   1282        error_report("Bad ram pointer %p", ptr);
   1283        abort();
   1284    }
   1285    return ram_addr;
   1286}
   1287
   1288/*
   1289 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
   1290 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
   1291 * be discarded and looked up again (e.g. via tlb_entry()).
   1292 */
   1293static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
   1294                     MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
   1295{
   1296    CPUClass *cc = CPU_GET_CLASS(cpu);
   1297    bool ok;
   1298
   1299    /*
   1300     * This is not a probe, so only valid return is success; failure
   1301     * should result in exception + longjmp to the cpu loop.
   1302     */
   1303    ok = cc->tcg_ops->tlb_fill(cpu, addr, size,
   1304                               access_type, mmu_idx, false, retaddr);
   1305    assert(ok);
   1306}
   1307
   1308static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
   1309                                        MMUAccessType access_type,
   1310                                        int mmu_idx, uintptr_t retaddr)
   1311{
   1312    CPUClass *cc = CPU_GET_CLASS(cpu);
   1313
   1314    cc->tcg_ops->do_unaligned_access(cpu, addr, access_type, mmu_idx, retaddr);
   1315}
   1316
   1317static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
   1318                                          vaddr addr, unsigned size,
   1319                                          MMUAccessType access_type,
   1320                                          int mmu_idx, MemTxAttrs attrs,
   1321                                          MemTxResult response,
   1322                                          uintptr_t retaddr)
   1323{
   1324    CPUClass *cc = CPU_GET_CLASS(cpu);
   1325
   1326    if (!cpu->ignore_memory_transaction_failures &&
   1327        cc->tcg_ops->do_transaction_failed) {
   1328        cc->tcg_ops->do_transaction_failed(cpu, physaddr, addr, size,
   1329                                           access_type, mmu_idx, attrs,
   1330                                           response, retaddr);
   1331    }
   1332}
   1333
   1334static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
   1335                         int mmu_idx, target_ulong addr, uintptr_t retaddr,
   1336                         MMUAccessType access_type, MemOp op)
   1337{
   1338    CPUState *cpu = env_cpu(env);
   1339    hwaddr mr_offset;
   1340    MemoryRegionSection *section;
   1341    MemoryRegion *mr;
   1342    uint64_t val;
   1343    bool locked = false;
   1344    MemTxResult r;
   1345
   1346    section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
   1347    mr = section->mr;
   1348    mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
   1349    cpu->mem_io_pc = retaddr;
   1350    if (!cpu->can_do_io) {
   1351        cpu_io_recompile(cpu, retaddr);
   1352    }
   1353
   1354    if (!qemu_mutex_iothread_locked()) {
   1355        qemu_mutex_lock_iothread();
   1356        locked = true;
   1357    }
   1358    r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
   1359    if (r != MEMTX_OK) {
   1360        hwaddr physaddr = mr_offset +
   1361            section->offset_within_address_space -
   1362            section->offset_within_region;
   1363
   1364        cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
   1365                               mmu_idx, iotlbentry->attrs, r, retaddr);
   1366    }
   1367    if (locked) {
   1368        qemu_mutex_unlock_iothread();
   1369    }
   1370
   1371    return val;
   1372}
   1373
   1374/*
   1375 * Save a potentially trashed IOTLB entry for later lookup by plugin.
   1376 * This is read by tlb_plugin_lookup if the iotlb entry doesn't match
   1377 * because of the side effect of io_writex changing memory layout.
   1378 */
   1379static void save_iotlb_data(CPUState *cs, hwaddr addr,
   1380                            MemoryRegionSection *section, hwaddr mr_offset)
   1381{
   1382#ifdef CONFIG_PLUGIN
   1383    SavedIOTLB *saved = &cs->saved_iotlb;
   1384    saved->addr = addr;
   1385    saved->section = section;
   1386    saved->mr_offset = mr_offset;
   1387#endif
   1388}
   1389
   1390static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
   1391                      int mmu_idx, uint64_t val, target_ulong addr,
   1392                      uintptr_t retaddr, MemOp op)
   1393{
   1394    CPUState *cpu = env_cpu(env);
   1395    hwaddr mr_offset;
   1396    MemoryRegionSection *section;
   1397    MemoryRegion *mr;
   1398    bool locked = false;
   1399    MemTxResult r;
   1400
   1401    section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
   1402    mr = section->mr;
   1403    mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
   1404    if (!cpu->can_do_io) {
   1405        cpu_io_recompile(cpu, retaddr);
   1406    }
   1407    cpu->mem_io_pc = retaddr;
   1408
   1409    /*
   1410     * The memory_region_dispatch may trigger a flush/resize
   1411     * so for plugins we save the iotlb_data just in case.
   1412     */
   1413    save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset);
   1414
   1415    if (!qemu_mutex_iothread_locked()) {
   1416        qemu_mutex_lock_iothread();
   1417        locked = true;
   1418    }
   1419    r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
   1420    if (r != MEMTX_OK) {
   1421        hwaddr physaddr = mr_offset +
   1422            section->offset_within_address_space -
   1423            section->offset_within_region;
   1424
   1425        cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
   1426                               MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
   1427                               retaddr);
   1428    }
   1429    if (locked) {
   1430        qemu_mutex_unlock_iothread();
   1431    }
   1432}
   1433
   1434static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
   1435{
   1436#if TCG_OVERSIZED_GUEST
   1437    return *(target_ulong *)((uintptr_t)entry + ofs);
   1438#else
   1439    /* ofs might correspond to .addr_write, so use qatomic_read */
   1440    return qatomic_read((target_ulong *)((uintptr_t)entry + ofs));
   1441#endif
   1442}
   1443
   1444/* Return true if ADDR is present in the victim tlb, and has been copied
   1445   back to the main tlb.  */
   1446static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
   1447                           size_t elt_ofs, target_ulong page)
   1448{
   1449    size_t vidx;
   1450
   1451    assert_cpu_is_self(env_cpu(env));
   1452    for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
   1453        CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
   1454        target_ulong cmp;
   1455
   1456        /* elt_ofs might correspond to .addr_write, so use qatomic_read */
   1457#if TCG_OVERSIZED_GUEST
   1458        cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
   1459#else
   1460        cmp = qatomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
   1461#endif
   1462
   1463        if (cmp == page) {
   1464            /* Found entry in victim tlb, swap tlb and iotlb.  */
   1465            CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index];
   1466
   1467            qemu_spin_lock(&env_tlb(env)->c.lock);
   1468            copy_tlb_helper_locked(&tmptlb, tlb);
   1469            copy_tlb_helper_locked(tlb, vtlb);
   1470            copy_tlb_helper_locked(vtlb, &tmptlb);
   1471            qemu_spin_unlock(&env_tlb(env)->c.lock);
   1472
   1473            CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
   1474            CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
   1475            tmpio = *io; *io = *vio; *vio = tmpio;
   1476            return true;
   1477        }
   1478    }
   1479    return false;
   1480}
   1481
   1482/* Macro to call the above, with local variables from the use context.  */
   1483#define VICTIM_TLB_HIT(TY, ADDR) \
   1484  victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
   1485                 (ADDR) & TARGET_PAGE_MASK)
   1486
   1487/*
   1488 * Return a ram_addr_t for the virtual address for execution.
   1489 *
   1490 * Return -1 if we can't translate and execute from an entire page
   1491 * of RAM.  This will force us to execute by loading and translating
   1492 * one insn at a time, without caching.
   1493 *
   1494 * NOTE: This function will trigger an exception if the page is
   1495 * not executable.
   1496 */
   1497tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
   1498                                        void **hostp)
   1499{
   1500    uintptr_t mmu_idx = cpu_mmu_index(env, true);
   1501    uintptr_t index = tlb_index(env, mmu_idx, addr);
   1502    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
   1503    void *p;
   1504
   1505    if (unlikely(!tlb_hit(entry->addr_code, addr))) {
   1506        if (!VICTIM_TLB_HIT(addr_code, addr)) {
   1507            tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
   1508            index = tlb_index(env, mmu_idx, addr);
   1509            entry = tlb_entry(env, mmu_idx, addr);
   1510
   1511            if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
   1512                /*
   1513                 * The MMU protection covers a smaller range than a target
   1514                 * page, so we must redo the MMU check for every insn.
   1515                 */
   1516                return -1;
   1517            }
   1518        }
   1519        assert(tlb_hit(entry->addr_code, addr));
   1520    }
   1521
   1522    if (unlikely(entry->addr_code & TLB_MMIO)) {
   1523        /* The region is not backed by RAM.  */
   1524        if (hostp) {
   1525            *hostp = NULL;
   1526        }
   1527        return -1;
   1528    }
   1529
   1530    p = (void *)((uintptr_t)addr + entry->addend);
   1531    if (hostp) {
   1532        *hostp = p;
   1533    }
   1534    return qemu_ram_addr_from_host_nofail(p);
   1535}
   1536
   1537tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
   1538{
   1539    return get_page_addr_code_hostp(env, addr, NULL);
   1540}
   1541
   1542static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
   1543                           CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
   1544{
   1545    ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
   1546
   1547    trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
   1548
   1549    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
   1550        struct page_collection *pages
   1551            = page_collection_lock(ram_addr, ram_addr + size);
   1552        tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr);
   1553        page_collection_unlock(pages);
   1554    }
   1555
   1556    /*
   1557     * Set both VGA and migration bits for simplicity and to remove
   1558     * the notdirty callback faster.
   1559     */
   1560    cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
   1561
   1562    /* We remove the notdirty callback only if the code has been flushed. */
   1563    if (!cpu_physical_memory_is_clean(ram_addr)) {
   1564        trace_memory_notdirty_set_dirty(mem_vaddr);
   1565        tlb_set_dirty(cpu, mem_vaddr);
   1566    }
   1567}
   1568
   1569static int probe_access_internal(CPUArchState *env, target_ulong addr,
   1570                                 int fault_size, MMUAccessType access_type,
   1571                                 int mmu_idx, bool nonfault,
   1572                                 void **phost, uintptr_t retaddr)
   1573{
   1574    uintptr_t index = tlb_index(env, mmu_idx, addr);
   1575    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
   1576    target_ulong tlb_addr, page_addr;
   1577    size_t elt_ofs;
   1578    int flags;
   1579
   1580    switch (access_type) {
   1581    case MMU_DATA_LOAD:
   1582        elt_ofs = offsetof(CPUTLBEntry, addr_read);
   1583        break;
   1584    case MMU_DATA_STORE:
   1585        elt_ofs = offsetof(CPUTLBEntry, addr_write);
   1586        break;
   1587    case MMU_INST_FETCH:
   1588        elt_ofs = offsetof(CPUTLBEntry, addr_code);
   1589        break;
   1590    default:
   1591        g_assert_not_reached();
   1592    }
   1593    tlb_addr = tlb_read_ofs(entry, elt_ofs);
   1594
   1595    page_addr = addr & TARGET_PAGE_MASK;
   1596    if (!tlb_hit_page(tlb_addr, page_addr)) {
   1597        if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
   1598            CPUState *cs = env_cpu(env);
   1599            CPUClass *cc = CPU_GET_CLASS(cs);
   1600
   1601            if (!cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
   1602                                       mmu_idx, nonfault, retaddr)) {
   1603                /* Non-faulting page table read failed.  */
   1604                *phost = NULL;
   1605                return TLB_INVALID_MASK;
   1606            }
   1607
   1608            /* TLB resize via tlb_fill may have moved the entry.  */
   1609            entry = tlb_entry(env, mmu_idx, addr);
   1610        }
   1611        tlb_addr = tlb_read_ofs(entry, elt_ofs);
   1612    }
   1613    flags = tlb_addr & TLB_FLAGS_MASK;
   1614
   1615    /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
   1616    if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
   1617        *phost = NULL;
   1618        return TLB_MMIO;
   1619    }
   1620
   1621    /* Everything else is RAM. */
   1622    *phost = (void *)((uintptr_t)addr + entry->addend);
   1623    return flags;
   1624}
   1625
   1626int probe_access_flags(CPUArchState *env, target_ulong addr,
   1627                       MMUAccessType access_type, int mmu_idx,
   1628                       bool nonfault, void **phost, uintptr_t retaddr)
   1629{
   1630    int flags;
   1631
   1632    flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
   1633                                  nonfault, phost, retaddr);
   1634
   1635    /* Handle clean RAM pages.  */
   1636    if (unlikely(flags & TLB_NOTDIRTY)) {
   1637        uintptr_t index = tlb_index(env, mmu_idx, addr);
   1638        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
   1639
   1640        notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
   1641        flags &= ~TLB_NOTDIRTY;
   1642    }
   1643
   1644    return flags;
   1645}
   1646
   1647void *probe_access(CPUArchState *env, target_ulong addr, int size,
   1648                   MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
   1649{
   1650    void *host;
   1651    int flags;
   1652
   1653    g_assert(-(addr | TARGET_PAGE_MASK) >= size);
   1654
   1655    flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
   1656                                  false, &host, retaddr);
   1657
   1658    /* Per the interface, size == 0 merely faults the access. */
   1659    if (size == 0) {
   1660        return NULL;
   1661    }
   1662
   1663    if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
   1664        uintptr_t index = tlb_index(env, mmu_idx, addr);
   1665        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
   1666
   1667        /* Handle watchpoints.  */
   1668        if (flags & TLB_WATCHPOINT) {
   1669            int wp_access = (access_type == MMU_DATA_STORE
   1670                             ? BP_MEM_WRITE : BP_MEM_READ);
   1671            cpu_check_watchpoint(env_cpu(env), addr, size,
   1672                                 iotlbentry->attrs, wp_access, retaddr);
   1673        }
   1674
   1675        /* Handle clean RAM pages.  */
   1676        if (flags & TLB_NOTDIRTY) {
   1677            notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
   1678        }
   1679    }
   1680
   1681    return host;
   1682}
   1683
   1684void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
   1685                        MMUAccessType access_type, int mmu_idx)
   1686{
   1687    void *host;
   1688    int flags;
   1689
   1690    flags = probe_access_internal(env, addr, 0, access_type,
   1691                                  mmu_idx, true, &host, 0);
   1692
   1693    /* No combination of flags are expected by the caller. */
   1694    return flags ? NULL : host;
   1695}
   1696
   1697#ifdef CONFIG_PLUGIN
   1698/*
   1699 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
   1700 * This should be a hot path as we will have just looked this path up
   1701 * in the softmmu lookup code (or helper). We don't handle re-fills or
   1702 * checking the victim table. This is purely informational.
   1703 *
   1704 * This almost never fails as the memory access being instrumented
   1705 * should have just filled the TLB. The one corner case is io_writex
   1706 * which can cause TLB flushes and potential resizing of the TLBs
   1707 * losing the information we need. In those cases we need to recover
   1708 * data from a copy of the iotlbentry. As long as this always occurs
   1709 * from the same thread (which a mem callback will be) this is safe.
   1710 */
   1711
   1712bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
   1713                       bool is_store, struct qemu_plugin_hwaddr *data)
   1714{
   1715    CPUArchState *env = cpu->env_ptr;
   1716    CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
   1717    uintptr_t index = tlb_index(env, mmu_idx, addr);
   1718    target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
   1719
   1720    if (likely(tlb_hit(tlb_addr, addr))) {
   1721        /* We must have an iotlb entry for MMIO */
   1722        if (tlb_addr & TLB_MMIO) {
   1723            CPUIOTLBEntry *iotlbentry;
   1724            iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
   1725            data->is_io = true;
   1726            data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
   1727            data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
   1728        } else {
   1729            data->is_io = false;
   1730            data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
   1731        }
   1732        return true;
   1733    } else {
   1734        SavedIOTLB *saved = &cpu->saved_iotlb;
   1735        data->is_io = true;
   1736        data->v.io.section = saved->section;
   1737        data->v.io.offset = saved->mr_offset;
   1738        return true;
   1739    }
   1740}
   1741
   1742#endif
   1743
   1744/*
   1745 * Probe for an atomic operation.  Do not allow unaligned operations,
   1746 * or io operations to proceed.  Return the host address.
   1747 *
   1748 * @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE.
   1749 */
   1750static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
   1751                               MemOpIdx oi, int size, int prot,
   1752                               uintptr_t retaddr)
   1753{
   1754    size_t mmu_idx = get_mmuidx(oi);
   1755    MemOp mop = get_memop(oi);
   1756    int a_bits = get_alignment_bits(mop);
   1757    uintptr_t index;
   1758    CPUTLBEntry *tlbe;
   1759    target_ulong tlb_addr;
   1760    void *hostaddr;
   1761
   1762    /* Adjust the given return address.  */
   1763    retaddr -= GETPC_ADJ;
   1764
   1765    /* Enforce guest required alignment.  */
   1766    if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
   1767        /* ??? Maybe indicate atomic op to cpu_unaligned_access */
   1768        cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
   1769                             mmu_idx, retaddr);
   1770    }
   1771
   1772    /* Enforce qemu required alignment.  */
   1773    if (unlikely(addr & (size - 1))) {
   1774        /* We get here if guest alignment was not requested,
   1775           or was not enforced by cpu_unaligned_access above.
   1776           We might widen the access and emulate, but for now
   1777           mark an exception and exit the cpu loop.  */
   1778        goto stop_the_world;
   1779    }
   1780
   1781    index = tlb_index(env, mmu_idx, addr);
   1782    tlbe = tlb_entry(env, mmu_idx, addr);
   1783
   1784    /* Check TLB entry and enforce page permissions.  */
   1785    if (prot & PAGE_WRITE) {
   1786        tlb_addr = tlb_addr_write(tlbe);
   1787        if (!tlb_hit(tlb_addr, addr)) {
   1788            if (!VICTIM_TLB_HIT(addr_write, addr)) {
   1789                tlb_fill(env_cpu(env), addr, size,
   1790                         MMU_DATA_STORE, mmu_idx, retaddr);
   1791                index = tlb_index(env, mmu_idx, addr);
   1792                tlbe = tlb_entry(env, mmu_idx, addr);
   1793            }
   1794            tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
   1795        }
   1796
   1797        /* Let the guest notice RMW on a write-only page.  */
   1798        if ((prot & PAGE_READ) &&
   1799            unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
   1800            tlb_fill(env_cpu(env), addr, size,
   1801                     MMU_DATA_LOAD, mmu_idx, retaddr);
   1802            /*
   1803             * Since we don't support reads and writes to different addresses,
   1804             * and we do have the proper page loaded for write, this shouldn't
   1805             * ever return.  But just in case, handle via stop-the-world.
   1806             */
   1807            goto stop_the_world;
   1808        }
   1809    } else /* if (prot & PAGE_READ) */ {
   1810        tlb_addr = tlbe->addr_read;
   1811        if (!tlb_hit(tlb_addr, addr)) {
   1812            if (!VICTIM_TLB_HIT(addr_write, addr)) {
   1813                tlb_fill(env_cpu(env), addr, size,
   1814                         MMU_DATA_LOAD, mmu_idx, retaddr);
   1815                index = tlb_index(env, mmu_idx, addr);
   1816                tlbe = tlb_entry(env, mmu_idx, addr);
   1817            }
   1818            tlb_addr = tlbe->addr_read & ~TLB_INVALID_MASK;
   1819        }
   1820    }
   1821
   1822    /* Notice an IO access or a needs-MMU-lookup access */
   1823    if (unlikely(tlb_addr & TLB_MMIO)) {
   1824        /* There's really nothing that can be done to
   1825           support this apart from stop-the-world.  */
   1826        goto stop_the_world;
   1827    }
   1828
   1829    hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
   1830
   1831    if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
   1832        notdirty_write(env_cpu(env), addr, size,
   1833                       &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
   1834    }
   1835
   1836    return hostaddr;
   1837
   1838 stop_the_world:
   1839    cpu_loop_exit_atomic(env_cpu(env), retaddr);
   1840}
   1841
   1842/*
   1843 * Load Helpers
   1844 *
   1845 * We support two different access types. SOFTMMU_CODE_ACCESS is
   1846 * specifically for reading instructions from system memory. It is
   1847 * called by the translation loop and in some helpers where the code
   1848 * is disassembled. It shouldn't be called directly by guest code.
   1849 */
   1850
   1851typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
   1852                                MemOpIdx oi, uintptr_t retaddr);
   1853
   1854static inline uint64_t QEMU_ALWAYS_INLINE
   1855load_memop(const void *haddr, MemOp op)
   1856{
   1857    switch (op) {
   1858    case MO_UB:
   1859        return ldub_p(haddr);
   1860    case MO_BEUW:
   1861        return lduw_be_p(haddr);
   1862    case MO_LEUW:
   1863        return lduw_le_p(haddr);
   1864    case MO_BEUL:
   1865        return (uint32_t)ldl_be_p(haddr);
   1866    case MO_LEUL:
   1867        return (uint32_t)ldl_le_p(haddr);
   1868    case MO_BEQ:
   1869        return ldq_be_p(haddr);
   1870    case MO_LEQ:
   1871        return ldq_le_p(haddr);
   1872    default:
   1873        qemu_build_not_reached();
   1874    }
   1875}
   1876
   1877static inline uint64_t QEMU_ALWAYS_INLINE
   1878load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
   1879            uintptr_t retaddr, MemOp op, bool code_read,
   1880            FullLoadHelper *full_load)
   1881{
   1882    uintptr_t mmu_idx = get_mmuidx(oi);
   1883    uintptr_t index = tlb_index(env, mmu_idx, addr);
   1884    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
   1885    target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
   1886    const size_t tlb_off = code_read ?
   1887        offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
   1888    const MMUAccessType access_type =
   1889        code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
   1890    unsigned a_bits = get_alignment_bits(get_memop(oi));
   1891    void *haddr;
   1892    uint64_t res;
   1893    size_t size = memop_size(op);
   1894
   1895    /* Handle CPU specific unaligned behaviour */
   1896    if (addr & ((1 << a_bits) - 1)) {
   1897        cpu_unaligned_access(env_cpu(env), addr, access_type,
   1898                             mmu_idx, retaddr);
   1899    }
   1900
   1901    /* If the TLB entry is for a different page, reload and try again.  */
   1902    if (!tlb_hit(tlb_addr, addr)) {
   1903        if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
   1904                            addr & TARGET_PAGE_MASK)) {
   1905            tlb_fill(env_cpu(env), addr, size,
   1906                     access_type, mmu_idx, retaddr);
   1907            index = tlb_index(env, mmu_idx, addr);
   1908            entry = tlb_entry(env, mmu_idx, addr);
   1909        }
   1910        tlb_addr = code_read ? entry->addr_code : entry->addr_read;
   1911        tlb_addr &= ~TLB_INVALID_MASK;
   1912    }
   1913
   1914    /* Handle anything that isn't just a straight memory access.  */
   1915    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
   1916        CPUIOTLBEntry *iotlbentry;
   1917        bool need_swap;
   1918
   1919        /* For anything that is unaligned, recurse through full_load.  */
   1920        if ((addr & (size - 1)) != 0) {
   1921            goto do_unaligned_access;
   1922        }
   1923
   1924        iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
   1925
   1926        /* Handle watchpoints.  */
   1927        if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
   1928            /* On watchpoint hit, this will longjmp out.  */
   1929            cpu_check_watchpoint(env_cpu(env), addr, size,
   1930                                 iotlbentry->attrs, BP_MEM_READ, retaddr);
   1931        }
   1932
   1933        need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
   1934
   1935        /* Handle I/O access.  */
   1936        if (likely(tlb_addr & TLB_MMIO)) {
   1937            return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
   1938                            access_type, op ^ (need_swap * MO_BSWAP));
   1939        }
   1940
   1941        haddr = (void *)((uintptr_t)addr + entry->addend);
   1942
   1943        /*
   1944         * Keep these two load_memop separate to ensure that the compiler
   1945         * is able to fold the entire function to a single instruction.
   1946         * There is a build-time assert inside to remind you of this.  ;-)
   1947         */
   1948        if (unlikely(need_swap)) {
   1949            return load_memop(haddr, op ^ MO_BSWAP);
   1950        }
   1951        return load_memop(haddr, op);
   1952    }
   1953
   1954    /* Handle slow unaligned access (it spans two pages or IO).  */
   1955    if (size > 1
   1956        && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
   1957                    >= TARGET_PAGE_SIZE)) {
   1958        target_ulong addr1, addr2;
   1959        uint64_t r1, r2;
   1960        unsigned shift;
   1961    do_unaligned_access:
   1962        addr1 = addr & ~((target_ulong)size - 1);
   1963        addr2 = addr1 + size;
   1964        r1 = full_load(env, addr1, oi, retaddr);
   1965        r2 = full_load(env, addr2, oi, retaddr);
   1966        shift = (addr & (size - 1)) * 8;
   1967
   1968        if (memop_big_endian(op)) {
   1969            /* Big-endian combine.  */
   1970            res = (r1 << shift) | (r2 >> ((size * 8) - shift));
   1971        } else {
   1972            /* Little-endian combine.  */
   1973            res = (r1 >> shift) | (r2 << ((size * 8) - shift));
   1974        }
   1975        return res & MAKE_64BIT_MASK(0, size * 8);
   1976    }
   1977
   1978    haddr = (void *)((uintptr_t)addr + entry->addend);
   1979    return load_memop(haddr, op);
   1980}
   1981
   1982/*
   1983 * For the benefit of TCG generated code, we want to avoid the
   1984 * complication of ABI-specific return type promotion and always
   1985 * return a value extended to the register size of the host. This is
   1986 * tcg_target_long, except in the case of a 32-bit host and 64-bit
   1987 * data, and for that we always have uint64_t.
   1988 *
   1989 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
   1990 */
   1991
   1992static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
   1993                              MemOpIdx oi, uintptr_t retaddr)
   1994{
   1995    return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
   1996}
   1997
   1998tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
   1999                                     MemOpIdx oi, uintptr_t retaddr)
   2000{
   2001    return full_ldub_mmu(env, addr, oi, retaddr);
   2002}
   2003
   2004static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
   2005                                 MemOpIdx oi, uintptr_t retaddr)
   2006{
   2007    return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
   2008                       full_le_lduw_mmu);
   2009}
   2010
   2011tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
   2012                                    MemOpIdx oi, uintptr_t retaddr)
   2013{
   2014    return full_le_lduw_mmu(env, addr, oi, retaddr);
   2015}
   2016
   2017static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
   2018                                 MemOpIdx oi, uintptr_t retaddr)
   2019{
   2020    return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
   2021                       full_be_lduw_mmu);
   2022}
   2023
   2024tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
   2025                                    MemOpIdx oi, uintptr_t retaddr)
   2026{
   2027    return full_be_lduw_mmu(env, addr, oi, retaddr);
   2028}
   2029
   2030static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
   2031                                 MemOpIdx oi, uintptr_t retaddr)
   2032{
   2033    return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
   2034                       full_le_ldul_mmu);
   2035}
   2036
   2037tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
   2038                                    MemOpIdx oi, uintptr_t retaddr)
   2039{
   2040    return full_le_ldul_mmu(env, addr, oi, retaddr);
   2041}
   2042
   2043static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
   2044                                 MemOpIdx oi, uintptr_t retaddr)
   2045{
   2046    return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
   2047                       full_be_ldul_mmu);
   2048}
   2049
   2050tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
   2051                                    MemOpIdx oi, uintptr_t retaddr)
   2052{
   2053    return full_be_ldul_mmu(env, addr, oi, retaddr);
   2054}
   2055
   2056uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
   2057                           MemOpIdx oi, uintptr_t retaddr)
   2058{
   2059    return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
   2060                       helper_le_ldq_mmu);
   2061}
   2062
   2063uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
   2064                           MemOpIdx oi, uintptr_t retaddr)
   2065{
   2066    return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
   2067                       helper_be_ldq_mmu);
   2068}
   2069
   2070/*
   2071 * Provide signed versions of the load routines as well.  We can of course
   2072 * avoid this for 64-bit data, or for 32-bit data on 32-bit host.
   2073 */
   2074
   2075
   2076tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
   2077                                     MemOpIdx oi, uintptr_t retaddr)
   2078{
   2079    return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
   2080}
   2081
   2082tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
   2083                                    MemOpIdx oi, uintptr_t retaddr)
   2084{
   2085    return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
   2086}
   2087
   2088tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
   2089                                    MemOpIdx oi, uintptr_t retaddr)
   2090{
   2091    return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
   2092}
   2093
   2094tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
   2095                                    MemOpIdx oi, uintptr_t retaddr)
   2096{
   2097    return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
   2098}
   2099
   2100tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
   2101                                    MemOpIdx oi, uintptr_t retaddr)
   2102{
   2103    return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
   2104}
   2105
   2106/*
   2107 * Load helpers for cpu_ldst.h.
   2108 */
   2109
   2110static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
   2111                                       int mmu_idx, uintptr_t retaddr,
   2112                                       MemOp op, FullLoadHelper *full_load)
   2113{
   2114    MemOpIdx oi = make_memop_idx(op, mmu_idx);
   2115    uint64_t ret;
   2116
   2117    trace_guest_ld_before_exec(env_cpu(env), addr, oi);
   2118
   2119    ret = full_load(env, addr, oi, retaddr);
   2120
   2121    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
   2122
   2123    return ret;
   2124}
   2125
   2126uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
   2127                            int mmu_idx, uintptr_t ra)
   2128{
   2129    return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu);
   2130}
   2131
   2132int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
   2133                       int mmu_idx, uintptr_t ra)
   2134{
   2135    return (int8_t)cpu_ldub_mmuidx_ra(env, addr, mmu_idx, ra);
   2136}
   2137
   2138uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
   2139                               int mmu_idx, uintptr_t ra)
   2140{
   2141    return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu);
   2142}
   2143
   2144int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
   2145                          int mmu_idx, uintptr_t ra)
   2146{
   2147    return (int16_t)cpu_lduw_be_mmuidx_ra(env, addr, mmu_idx, ra);
   2148}
   2149
   2150uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
   2151                              int mmu_idx, uintptr_t ra)
   2152{
   2153    return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu);
   2154}
   2155
   2156uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
   2157                              int mmu_idx, uintptr_t ra)
   2158{
   2159    return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu);
   2160}
   2161
   2162uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
   2163                               int mmu_idx, uintptr_t ra)
   2164{
   2165    return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu);
   2166}
   2167
   2168int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
   2169                          int mmu_idx, uintptr_t ra)
   2170{
   2171    return (int16_t)cpu_lduw_le_mmuidx_ra(env, addr, mmu_idx, ra);
   2172}
   2173
   2174uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
   2175                              int mmu_idx, uintptr_t ra)
   2176{
   2177    return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu);
   2178}
   2179
   2180uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
   2181                              int mmu_idx, uintptr_t ra)
   2182{
   2183    return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu);
   2184}
   2185
   2186uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr,
   2187                          uintptr_t retaddr)
   2188{
   2189    return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
   2190}
   2191
   2192int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
   2193{
   2194    return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
   2195}
   2196
   2197uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr,
   2198                             uintptr_t retaddr)
   2199{
   2200    return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
   2201}
   2202
   2203int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
   2204{
   2205    return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
   2206}
   2207
   2208uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr,
   2209                            uintptr_t retaddr)
   2210{
   2211    return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
   2212}
   2213
   2214uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr,
   2215                            uintptr_t retaddr)
   2216{
   2217    return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
   2218}
   2219
   2220uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr,
   2221                             uintptr_t retaddr)
   2222{
   2223    return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
   2224}
   2225
   2226int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
   2227{
   2228    return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
   2229}
   2230
   2231uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr,
   2232                            uintptr_t retaddr)
   2233{
   2234    return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
   2235}
   2236
   2237uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr,
   2238                            uintptr_t retaddr)
   2239{
   2240    return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
   2241}
   2242
   2243uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr)
   2244{
   2245    return cpu_ldub_data_ra(env, ptr, 0);
   2246}
   2247
   2248int cpu_ldsb_data(CPUArchState *env, target_ulong ptr)
   2249{
   2250    return cpu_ldsb_data_ra(env, ptr, 0);
   2251}
   2252
   2253uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr)
   2254{
   2255    return cpu_lduw_be_data_ra(env, ptr, 0);
   2256}
   2257
   2258int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr)
   2259{
   2260    return cpu_ldsw_be_data_ra(env, ptr, 0);
   2261}
   2262
   2263uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr)
   2264{
   2265    return cpu_ldl_be_data_ra(env, ptr, 0);
   2266}
   2267
   2268uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr)
   2269{
   2270    return cpu_ldq_be_data_ra(env, ptr, 0);
   2271}
   2272
   2273uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr)
   2274{
   2275    return cpu_lduw_le_data_ra(env, ptr, 0);
   2276}
   2277
   2278int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr)
   2279{
   2280    return cpu_ldsw_le_data_ra(env, ptr, 0);
   2281}
   2282
   2283uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr)
   2284{
   2285    return cpu_ldl_le_data_ra(env, ptr, 0);
   2286}
   2287
   2288uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr)
   2289{
   2290    return cpu_ldq_le_data_ra(env, ptr, 0);
   2291}
   2292
   2293/*
   2294 * Store Helpers
   2295 */
   2296
   2297static inline void QEMU_ALWAYS_INLINE
   2298store_memop(void *haddr, uint64_t val, MemOp op)
   2299{
   2300    switch (op) {
   2301    case MO_UB:
   2302        stb_p(haddr, val);
   2303        break;
   2304    case MO_BEUW:
   2305        stw_be_p(haddr, val);
   2306        break;
   2307    case MO_LEUW:
   2308        stw_le_p(haddr, val);
   2309        break;
   2310    case MO_BEUL:
   2311        stl_be_p(haddr, val);
   2312        break;
   2313    case MO_LEUL:
   2314        stl_le_p(haddr, val);
   2315        break;
   2316    case MO_BEQ:
   2317        stq_be_p(haddr, val);
   2318        break;
   2319    case MO_LEQ:
   2320        stq_le_p(haddr, val);
   2321        break;
   2322    default:
   2323        qemu_build_not_reached();
   2324    }
   2325}
   2326
   2327static void __attribute__((noinline))
   2328store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
   2329                       uintptr_t retaddr, size_t size, uintptr_t mmu_idx,
   2330                       bool big_endian)
   2331{
   2332    const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
   2333    uintptr_t index, index2;
   2334    CPUTLBEntry *entry, *entry2;
   2335    target_ulong page2, tlb_addr, tlb_addr2;
   2336    MemOpIdx oi;
   2337    size_t size2;
   2338    int i;
   2339
   2340    /*
   2341     * Ensure the second page is in the TLB.  Note that the first page
   2342     * is already guaranteed to be filled, and that the second page
   2343     * cannot evict the first.
   2344     */
   2345    page2 = (addr + size) & TARGET_PAGE_MASK;
   2346    size2 = (addr + size) & ~TARGET_PAGE_MASK;
   2347    index2 = tlb_index(env, mmu_idx, page2);
   2348    entry2 = tlb_entry(env, mmu_idx, page2);
   2349
   2350    tlb_addr2 = tlb_addr_write(entry2);
   2351    if (!tlb_hit_page(tlb_addr2, page2)) {
   2352        if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
   2353            tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
   2354                     mmu_idx, retaddr);
   2355            index2 = tlb_index(env, mmu_idx, page2);
   2356            entry2 = tlb_entry(env, mmu_idx, page2);
   2357        }
   2358        tlb_addr2 = tlb_addr_write(entry2);
   2359    }
   2360
   2361    index = tlb_index(env, mmu_idx, addr);
   2362    entry = tlb_entry(env, mmu_idx, addr);
   2363    tlb_addr = tlb_addr_write(entry);
   2364
   2365    /*
   2366     * Handle watchpoints.  Since this may trap, all checks
   2367     * must happen before any store.
   2368     */
   2369    if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
   2370        cpu_check_watchpoint(env_cpu(env), addr, size - size2,
   2371                             env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
   2372                             BP_MEM_WRITE, retaddr);
   2373    }
   2374    if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
   2375        cpu_check_watchpoint(env_cpu(env), page2, size2,
   2376                             env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
   2377                             BP_MEM_WRITE, retaddr);
   2378    }
   2379
   2380    /*
   2381     * XXX: not efficient, but simple.
   2382     * This loop must go in the forward direction to avoid issues
   2383     * with self-modifying code in Windows 64-bit.
   2384     */
   2385    oi = make_memop_idx(MO_UB, mmu_idx);
   2386    if (big_endian) {
   2387        for (i = 0; i < size; ++i) {
   2388            /* Big-endian extract.  */
   2389            uint8_t val8 = val >> (((size - 1) * 8) - (i * 8));
   2390            helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
   2391        }
   2392    } else {
   2393        for (i = 0; i < size; ++i) {
   2394            /* Little-endian extract.  */
   2395            uint8_t val8 = val >> (i * 8);
   2396            helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
   2397        }
   2398    }
   2399}
   2400
   2401static inline void QEMU_ALWAYS_INLINE
   2402store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
   2403             MemOpIdx oi, uintptr_t retaddr, MemOp op)
   2404{
   2405    uintptr_t mmu_idx = get_mmuidx(oi);
   2406    uintptr_t index = tlb_index(env, mmu_idx, addr);
   2407    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
   2408    target_ulong tlb_addr = tlb_addr_write(entry);
   2409    const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
   2410    unsigned a_bits = get_alignment_bits(get_memop(oi));
   2411    void *haddr;
   2412    size_t size = memop_size(op);
   2413
   2414    /* Handle CPU specific unaligned behaviour */
   2415    if (addr & ((1 << a_bits) - 1)) {
   2416        cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
   2417                             mmu_idx, retaddr);
   2418    }
   2419
   2420    /* If the TLB entry is for a different page, reload and try again.  */
   2421    if (!tlb_hit(tlb_addr, addr)) {
   2422        if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
   2423            addr & TARGET_PAGE_MASK)) {
   2424            tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
   2425                     mmu_idx, retaddr);
   2426            index = tlb_index(env, mmu_idx, addr);
   2427            entry = tlb_entry(env, mmu_idx, addr);
   2428        }
   2429        tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
   2430    }
   2431
   2432    /* Handle anything that isn't just a straight memory access.  */
   2433    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
   2434        CPUIOTLBEntry *iotlbentry;
   2435        bool need_swap;
   2436
   2437        /* For anything that is unaligned, recurse through byte stores.  */
   2438        if ((addr & (size - 1)) != 0) {
   2439            goto do_unaligned_access;
   2440        }
   2441
   2442        iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
   2443
   2444        /* Handle watchpoints.  */
   2445        if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
   2446            /* On watchpoint hit, this will longjmp out.  */
   2447            cpu_check_watchpoint(env_cpu(env), addr, size,
   2448                                 iotlbentry->attrs, BP_MEM_WRITE, retaddr);
   2449        }
   2450
   2451        need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
   2452
   2453        /* Handle I/O access.  */
   2454        if (tlb_addr & TLB_MMIO) {
   2455            io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
   2456                      op ^ (need_swap * MO_BSWAP));
   2457            return;
   2458        }
   2459
   2460        /* Ignore writes to ROM.  */
   2461        if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
   2462            return;
   2463        }
   2464
   2465        /* Handle clean RAM pages.  */
   2466        if (tlb_addr & TLB_NOTDIRTY) {
   2467            notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
   2468        }
   2469
   2470        haddr = (void *)((uintptr_t)addr + entry->addend);
   2471
   2472        /*
   2473         * Keep these two store_memop separate to ensure that the compiler
   2474         * is able to fold the entire function to a single instruction.
   2475         * There is a build-time assert inside to remind you of this.  ;-)
   2476         */
   2477        if (unlikely(need_swap)) {
   2478            store_memop(haddr, val, op ^ MO_BSWAP);
   2479        } else {
   2480            store_memop(haddr, val, op);
   2481        }
   2482        return;
   2483    }
   2484
   2485    /* Handle slow unaligned access (it spans two pages or IO).  */
   2486    if (size > 1
   2487        && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
   2488                     >= TARGET_PAGE_SIZE)) {
   2489    do_unaligned_access:
   2490        store_helper_unaligned(env, addr, val, retaddr, size,
   2491                               mmu_idx, memop_big_endian(op));
   2492        return;
   2493    }
   2494
   2495    haddr = (void *)((uintptr_t)addr + entry->addend);
   2496    store_memop(haddr, val, op);
   2497}
   2498
   2499void __attribute__((noinline))
   2500helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
   2501                   MemOpIdx oi, uintptr_t retaddr)
   2502{
   2503    store_helper(env, addr, val, oi, retaddr, MO_UB);
   2504}
   2505
   2506void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
   2507                       MemOpIdx oi, uintptr_t retaddr)
   2508{
   2509    store_helper(env, addr, val, oi, retaddr, MO_LEUW);
   2510}
   2511
   2512void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
   2513                       MemOpIdx oi, uintptr_t retaddr)
   2514{
   2515    store_helper(env, addr, val, oi, retaddr, MO_BEUW);
   2516}
   2517
   2518void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
   2519                       MemOpIdx oi, uintptr_t retaddr)
   2520{
   2521    store_helper(env, addr, val, oi, retaddr, MO_LEUL);
   2522}
   2523
   2524void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
   2525                       MemOpIdx oi, uintptr_t retaddr)
   2526{
   2527    store_helper(env, addr, val, oi, retaddr, MO_BEUL);
   2528}
   2529
   2530void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
   2531                       MemOpIdx oi, uintptr_t retaddr)
   2532{
   2533    store_helper(env, addr, val, oi, retaddr, MO_LEQ);
   2534}
   2535
   2536void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
   2537                       MemOpIdx oi, uintptr_t retaddr)
   2538{
   2539    store_helper(env, addr, val, oi, retaddr, MO_BEQ);
   2540}
   2541
   2542/*
   2543 * Store Helpers for cpu_ldst.h
   2544 */
   2545
   2546static inline void QEMU_ALWAYS_INLINE
   2547cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
   2548                 int mmu_idx, uintptr_t retaddr, MemOp op)
   2549{
   2550    MemOpIdx oi = make_memop_idx(op, mmu_idx);
   2551
   2552    trace_guest_st_before_exec(env_cpu(env), addr, oi);
   2553
   2554    store_helper(env, addr, val, oi, retaddr, op);
   2555
   2556    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
   2557}
   2558
   2559void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
   2560                       int mmu_idx, uintptr_t retaddr)
   2561{
   2562    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB);
   2563}
   2564
   2565void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
   2566                          int mmu_idx, uintptr_t retaddr)
   2567{
   2568    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW);
   2569}
   2570
   2571void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
   2572                          int mmu_idx, uintptr_t retaddr)
   2573{
   2574    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL);
   2575}
   2576
   2577void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
   2578                          int mmu_idx, uintptr_t retaddr)
   2579{
   2580    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ);
   2581}
   2582
   2583void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
   2584                          int mmu_idx, uintptr_t retaddr)
   2585{
   2586    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW);
   2587}
   2588
   2589void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
   2590                          int mmu_idx, uintptr_t retaddr)
   2591{
   2592    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL);
   2593}
   2594
   2595void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
   2596                          int mmu_idx, uintptr_t retaddr)
   2597{
   2598    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ);
   2599}
   2600
   2601void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr,
   2602                     uint32_t val, uintptr_t retaddr)
   2603{
   2604    cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
   2605}
   2606
   2607void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr,
   2608                        uint32_t val, uintptr_t retaddr)
   2609{
   2610    cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
   2611}
   2612
   2613void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr,
   2614                        uint32_t val, uintptr_t retaddr)
   2615{
   2616    cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
   2617}
   2618
   2619void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr,
   2620                        uint64_t val, uintptr_t retaddr)
   2621{
   2622    cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
   2623}
   2624
   2625void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr,
   2626                        uint32_t val, uintptr_t retaddr)
   2627{
   2628    cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
   2629}
   2630
   2631void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr,
   2632                        uint32_t val, uintptr_t retaddr)
   2633{
   2634    cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
   2635}
   2636
   2637void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr,
   2638                        uint64_t val, uintptr_t retaddr)
   2639{
   2640    cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
   2641}
   2642
   2643void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val)
   2644{
   2645    cpu_stb_data_ra(env, ptr, val, 0);
   2646}
   2647
   2648void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
   2649{
   2650    cpu_stw_be_data_ra(env, ptr, val, 0);
   2651}
   2652
   2653void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
   2654{
   2655    cpu_stl_be_data_ra(env, ptr, val, 0);
   2656}
   2657
   2658void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val)
   2659{
   2660    cpu_stq_be_data_ra(env, ptr, val, 0);
   2661}
   2662
   2663void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
   2664{
   2665    cpu_stw_le_data_ra(env, ptr, val, 0);
   2666}
   2667
   2668void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
   2669{
   2670    cpu_stl_le_data_ra(env, ptr, val, 0);
   2671}
   2672
   2673void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val)
   2674{
   2675    cpu_stq_le_data_ra(env, ptr, val, 0);
   2676}
   2677
   2678/*
   2679 * First set of functions passes in OI and RETADDR.
   2680 * This makes them callable from other helpers.
   2681 */
   2682
   2683#define ATOMIC_NAME(X) \
   2684    glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
   2685
   2686#define ATOMIC_MMU_CLEANUP
   2687#define ATOMIC_MMU_IDX   get_mmuidx(oi)
   2688
   2689#include "atomic_common.c.inc"
   2690
   2691#define DATA_SIZE 1
   2692#include "atomic_template.h"
   2693
   2694#define DATA_SIZE 2
   2695#include "atomic_template.h"
   2696
   2697#define DATA_SIZE 4
   2698#include "atomic_template.h"
   2699
   2700#ifdef CONFIG_ATOMIC64
   2701#define DATA_SIZE 8
   2702#include "atomic_template.h"
   2703#endif
   2704
   2705#if HAVE_CMPXCHG128 || HAVE_ATOMIC128
   2706#define DATA_SIZE 16
   2707#include "atomic_template.h"
   2708#endif
   2709
   2710/* Code access functions.  */
   2711
   2712static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
   2713                               MemOpIdx oi, uintptr_t retaddr)
   2714{
   2715    return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
   2716}
   2717
   2718uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
   2719{
   2720    MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
   2721    return full_ldub_code(env, addr, oi, 0);
   2722}
   2723
   2724static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
   2725                               MemOpIdx oi, uintptr_t retaddr)
   2726{
   2727    return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
   2728}
   2729
   2730uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
   2731{
   2732    MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
   2733    return full_lduw_code(env, addr, oi, 0);
   2734}
   2735
   2736static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
   2737                              MemOpIdx oi, uintptr_t retaddr)
   2738{
   2739    return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
   2740}
   2741
   2742uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
   2743{
   2744    MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
   2745    return full_ldl_code(env, addr, oi, 0);
   2746}
   2747
   2748static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
   2749                              MemOpIdx oi, uintptr_t retaddr)
   2750{
   2751    return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code);
   2752}
   2753
   2754uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
   2755{
   2756    MemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true));
   2757    return full_ldq_code(env, addr, oi, 0);
   2758}