cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

region.c (26332B)


      1/*
      2 * Memory region management for Tiny Code Generator for QEMU
      3 *
      4 * Copyright (c) 2008 Fabrice Bellard
      5 *
      6 * Permission is hereby granted, free of charge, to any person obtaining a copy
      7 * of this software and associated documentation files (the "Software"), to deal
      8 * in the Software without restriction, including without limitation the rights
      9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     10 * copies of the Software, and to permit persons to whom the Software is
     11 * furnished to do so, subject to the following conditions:
     12 *
     13 * The above copyright notice and this permission notice shall be included in
     14 * all copies or substantial portions of the Software.
     15 *
     16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     22 * THE SOFTWARE.
     23 */
     24
     25#include "qemu/osdep.h"
     26#include "qemu/units.h"
     27#include "qapi/error.h"
     28#include "exec/exec-all.h"
     29#include "tcg/tcg.h"
     30#include "tcg-internal.h"
     31
     32
     33struct tcg_region_tree {
     34    QemuMutex lock;
     35    GTree *tree;
     36    /* padding to avoid false sharing is computed at run-time */
     37};
     38
     39/*
     40 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
     41 * dynamically allocate from as demand dictates. Given appropriate region
     42 * sizing, this minimizes flushes even when some TCG threads generate a lot
     43 * more code than others.
     44 */
     45struct tcg_region_state {
     46    QemuMutex lock;
     47
     48    /* fields set at init time */
     49    void *start_aligned;
     50    void *after_prologue;
     51    size_t n;
     52    size_t size; /* size of one region */
     53    size_t stride; /* .size + guard size */
     54    size_t total_size; /* size of entire buffer, >= n * stride */
     55
     56    /* fields protected by the lock */
     57    size_t current; /* current region index */
     58    size_t agg_size_full; /* aggregate size of full regions */
     59};
     60
     61static struct tcg_region_state region;
     62
     63/*
     64 * This is an array of struct tcg_region_tree's, with padding.
     65 * We use void * to simplify the computation of region_trees[i]; each
     66 * struct is found every tree_size bytes.
     67 */
     68static void *region_trees;
     69static size_t tree_size;
     70
     71bool in_code_gen_buffer(const void *p)
     72{
     73    /*
     74     * Much like it is valid to have a pointer to the byte past the
     75     * end of an array (so long as you don't dereference it), allow
     76     * a pointer to the byte past the end of the code gen buffer.
     77     */
     78    return (size_t)(p - region.start_aligned) <= region.total_size;
     79}
     80
     81#ifdef CONFIG_DEBUG_TCG
     82const void *tcg_splitwx_to_rx(void *rw)
     83{
     84    /* Pass NULL pointers unchanged. */
     85    if (rw) {
     86        g_assert(in_code_gen_buffer(rw));
     87        rw += tcg_splitwx_diff;
     88    }
     89    return rw;
     90}
     91
     92void *tcg_splitwx_to_rw(const void *rx)
     93{
     94    /* Pass NULL pointers unchanged. */
     95    if (rx) {
     96        rx -= tcg_splitwx_diff;
     97        /* Assert that we end with a pointer in the rw region. */
     98        g_assert(in_code_gen_buffer(rx));
     99    }
    100    return (void *)rx;
    101}
    102#endif /* CONFIG_DEBUG_TCG */
    103
    104/* compare a pointer @ptr and a tb_tc @s */
    105static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
    106{
    107    if (ptr >= s->ptr + s->size) {
    108        return 1;
    109    } else if (ptr < s->ptr) {
    110        return -1;
    111    }
    112    return 0;
    113}
    114
    115static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp, gpointer userdata)
    116{
    117    const struct tb_tc *a = ap;
    118    const struct tb_tc *b = bp;
    119
    120    /*
    121     * When both sizes are set, we know this isn't a lookup.
    122     * This is the most likely case: every TB must be inserted; lookups
    123     * are a lot less frequent.
    124     */
    125    if (likely(a->size && b->size)) {
    126        if (a->ptr > b->ptr) {
    127            return 1;
    128        } else if (a->ptr < b->ptr) {
    129            return -1;
    130        }
    131        /* a->ptr == b->ptr should happen only on deletions */
    132        g_assert(a->size == b->size);
    133        return 0;
    134    }
    135    /*
    136     * All lookups have either .size field set to 0.
    137     * From the glib sources we see that @ap is always the lookup key. However
    138     * the docs provide no guarantee, so we just mark this case as likely.
    139     */
    140    if (likely(a->size == 0)) {
    141        return ptr_cmp_tb_tc(a->ptr, b);
    142    }
    143    return ptr_cmp_tb_tc(b->ptr, a);
    144}
    145
    146static void tb_destroy(gpointer value)
    147{
    148    TranslationBlock *tb = value;
    149    qemu_spin_destroy(&tb->jmp_lock);
    150}
    151
    152static void tcg_region_trees_init(void)
    153{
    154    size_t i;
    155
    156    tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
    157    region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
    158    for (i = 0; i < region.n; i++) {
    159        struct tcg_region_tree *rt = region_trees + i * tree_size;
    160
    161        qemu_mutex_init(&rt->lock);
    162        rt->tree = g_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy);
    163    }
    164}
    165
    166static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
    167{
    168    size_t region_idx;
    169
    170    /*
    171     * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
    172     * a signal handler over which the caller has no control.
    173     */
    174    if (!in_code_gen_buffer(p)) {
    175        p -= tcg_splitwx_diff;
    176        if (!in_code_gen_buffer(p)) {
    177            return NULL;
    178        }
    179    }
    180
    181    if (p < region.start_aligned) {
    182        region_idx = 0;
    183    } else {
    184        ptrdiff_t offset = p - region.start_aligned;
    185
    186        if (offset > region.stride * (region.n - 1)) {
    187            region_idx = region.n - 1;
    188        } else {
    189            region_idx = offset / region.stride;
    190        }
    191    }
    192    return region_trees + region_idx * tree_size;
    193}
    194
    195void tcg_tb_insert(TranslationBlock *tb)
    196{
    197    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
    198
    199    g_assert(rt != NULL);
    200    qemu_mutex_lock(&rt->lock);
    201    g_tree_insert(rt->tree, &tb->tc, tb);
    202    qemu_mutex_unlock(&rt->lock);
    203}
    204
    205void tcg_tb_remove(TranslationBlock *tb)
    206{
    207    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
    208
    209    g_assert(rt != NULL);
    210    qemu_mutex_lock(&rt->lock);
    211    g_tree_remove(rt->tree, &tb->tc);
    212    qemu_mutex_unlock(&rt->lock);
    213}
    214
    215/*
    216 * Find the TB 'tb' such that
    217 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
    218 * Return NULL if not found.
    219 */
    220TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
    221{
    222    struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
    223    TranslationBlock *tb;
    224    struct tb_tc s = { .ptr = (void *)tc_ptr };
    225
    226    if (rt == NULL) {
    227        return NULL;
    228    }
    229
    230    qemu_mutex_lock(&rt->lock);
    231    tb = g_tree_lookup(rt->tree, &s);
    232    qemu_mutex_unlock(&rt->lock);
    233    return tb;
    234}
    235
    236static void tcg_region_tree_lock_all(void)
    237{
    238    size_t i;
    239
    240    for (i = 0; i < region.n; i++) {
    241        struct tcg_region_tree *rt = region_trees + i * tree_size;
    242
    243        qemu_mutex_lock(&rt->lock);
    244    }
    245}
    246
    247static void tcg_region_tree_unlock_all(void)
    248{
    249    size_t i;
    250
    251    for (i = 0; i < region.n; i++) {
    252        struct tcg_region_tree *rt = region_trees + i * tree_size;
    253
    254        qemu_mutex_unlock(&rt->lock);
    255    }
    256}
    257
    258void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
    259{
    260    size_t i;
    261
    262    tcg_region_tree_lock_all();
    263    for (i = 0; i < region.n; i++) {
    264        struct tcg_region_tree *rt = region_trees + i * tree_size;
    265
    266        g_tree_foreach(rt->tree, func, user_data);
    267    }
    268    tcg_region_tree_unlock_all();
    269}
    270
    271size_t tcg_nb_tbs(void)
    272{
    273    size_t nb_tbs = 0;
    274    size_t i;
    275
    276    tcg_region_tree_lock_all();
    277    for (i = 0; i < region.n; i++) {
    278        struct tcg_region_tree *rt = region_trees + i * tree_size;
    279
    280        nb_tbs += g_tree_nnodes(rt->tree);
    281    }
    282    tcg_region_tree_unlock_all();
    283    return nb_tbs;
    284}
    285
    286static void tcg_region_tree_reset_all(void)
    287{
    288    size_t i;
    289
    290    tcg_region_tree_lock_all();
    291    for (i = 0; i < region.n; i++) {
    292        struct tcg_region_tree *rt = region_trees + i * tree_size;
    293
    294        /* Increment the refcount first so that destroy acts as a reset */
    295        g_tree_ref(rt->tree);
    296        g_tree_destroy(rt->tree);
    297    }
    298    tcg_region_tree_unlock_all();
    299}
    300
    301static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
    302{
    303    void *start, *end;
    304
    305    start = region.start_aligned + curr_region * region.stride;
    306    end = start + region.size;
    307
    308    if (curr_region == 0) {
    309        start = region.after_prologue;
    310    }
    311    /* The final region may have a few extra pages due to earlier rounding. */
    312    if (curr_region == region.n - 1) {
    313        end = region.start_aligned + region.total_size;
    314    }
    315
    316    *pstart = start;
    317    *pend = end;
    318}
    319
    320static void tcg_region_assign(TCGContext *s, size_t curr_region)
    321{
    322    void *start, *end;
    323
    324    tcg_region_bounds(curr_region, &start, &end);
    325
    326    s->code_gen_buffer = start;
    327    s->code_gen_ptr = start;
    328    s->code_gen_buffer_size = end - start;
    329    s->code_gen_highwater = end - TCG_HIGHWATER;
    330}
    331
    332static bool tcg_region_alloc__locked(TCGContext *s)
    333{
    334    if (region.current == region.n) {
    335        return true;
    336    }
    337    tcg_region_assign(s, region.current);
    338    region.current++;
    339    return false;
    340}
    341
    342/*
    343 * Request a new region once the one in use has filled up.
    344 * Returns true on error.
    345 */
    346bool tcg_region_alloc(TCGContext *s)
    347{
    348    bool err;
    349    /* read the region size now; alloc__locked will overwrite it on success */
    350    size_t size_full = s->code_gen_buffer_size;
    351
    352    qemu_mutex_lock(&region.lock);
    353    err = tcg_region_alloc__locked(s);
    354    if (!err) {
    355        region.agg_size_full += size_full - TCG_HIGHWATER;
    356    }
    357    qemu_mutex_unlock(&region.lock);
    358    return err;
    359}
    360
    361/*
    362 * Perform a context's first region allocation.
    363 * This function does _not_ increment region.agg_size_full.
    364 */
    365static void tcg_region_initial_alloc__locked(TCGContext *s)
    366{
    367    bool err = tcg_region_alloc__locked(s);
    368    g_assert(!err);
    369}
    370
    371void tcg_region_initial_alloc(TCGContext *s)
    372{
    373    qemu_mutex_lock(&region.lock);
    374    tcg_region_initial_alloc__locked(s);
    375    qemu_mutex_unlock(&region.lock);
    376}
    377
    378/* Call from a safe-work context */
    379void tcg_region_reset_all(void)
    380{
    381    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
    382    unsigned int i;
    383
    384    qemu_mutex_lock(&region.lock);
    385    region.current = 0;
    386    region.agg_size_full = 0;
    387
    388    for (i = 0; i < n_ctxs; i++) {
    389        TCGContext *s = qatomic_read(&tcg_ctxs[i]);
    390        tcg_region_initial_alloc__locked(s);
    391    }
    392    qemu_mutex_unlock(&region.lock);
    393
    394    tcg_region_tree_reset_all();
    395}
    396
    397static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus)
    398{
    399#ifdef CONFIG_USER_ONLY
    400    return 1;
    401#else
    402    size_t n_regions;
    403
    404    /*
    405     * It is likely that some vCPUs will translate more code than others,
    406     * so we first try to set more regions than max_cpus, with those regions
    407     * being of reasonable size. If that's not possible we make do by evenly
    408     * dividing the code_gen_buffer among the vCPUs.
    409     */
    410    /* Use a single region if all we have is one vCPU thread */
    411    if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
    412        return 1;
    413    }
    414
    415    /*
    416     * Try to have more regions than max_cpus, with each region being >= 2 MB.
    417     * If we can't, then just allocate one region per vCPU thread.
    418     */
    419    n_regions = tb_size / (2 * MiB);
    420    if (n_regions <= max_cpus) {
    421        return max_cpus;
    422    }
    423    return MIN(n_regions, max_cpus * 8);
    424#endif
    425}
    426
    427/*
    428 * Minimum size of the code gen buffer.  This number is randomly chosen,
    429 * but not so small that we can't have a fair number of TB's live.
    430 *
    431 * Maximum size, MAX_CODE_GEN_BUFFER_SIZE, is defined in tcg-target.h.
    432 * Unless otherwise indicated, this is constrained by the range of
    433 * direct branches on the host cpu, as used by the TCG implementation
    434 * of goto_tb.
    435 */
    436#define MIN_CODE_GEN_BUFFER_SIZE     (1 * MiB)
    437
    438#if TCG_TARGET_REG_BITS == 32
    439#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
    440#ifdef CONFIG_USER_ONLY
    441/*
    442 * For user mode on smaller 32 bit systems we may run into trouble
    443 * allocating big chunks of data in the right place. On these systems
    444 * we utilise a static code generation buffer directly in the binary.
    445 */
    446#define USE_STATIC_CODE_GEN_BUFFER
    447#endif
    448#else /* TCG_TARGET_REG_BITS == 64 */
    449#ifdef CONFIG_USER_ONLY
    450/*
    451 * As user-mode emulation typically means running multiple instances
    452 * of the translator don't go too nuts with our default code gen
    453 * buffer lest we make things too hard for the OS.
    454 */
    455#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
    456#else
    457/*
    458 * We expect most system emulation to run one or two guests per host.
    459 * Users running large scale system emulation may want to tweak their
    460 * runtime setup via the tb-size control on the command line.
    461 */
    462#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
    463#endif
    464#endif
    465
    466#define DEFAULT_CODE_GEN_BUFFER_SIZE \
    467  (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
    468   ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
    469
    470#ifdef USE_STATIC_CODE_GEN_BUFFER
    471static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
    472    __attribute__((aligned(CODE_GEN_ALIGN)));
    473
    474static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
    475{
    476    void *buf, *end;
    477    size_t size;
    478
    479    if (splitwx > 0) {
    480        error_setg(errp, "jit split-wx not supported");
    481        return -1;
    482    }
    483
    484    /* page-align the beginning and end of the buffer */
    485    buf = static_code_gen_buffer;
    486    end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
    487    buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
    488    end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
    489
    490    size = end - buf;
    491
    492    /* Honor a command-line option limiting the size of the buffer.  */
    493    if (size > tb_size) {
    494        size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
    495    }
    496
    497    region.start_aligned = buf;
    498    region.total_size = size;
    499
    500    return PROT_READ | PROT_WRITE;
    501}
    502#elif defined(_WIN32)
    503static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
    504{
    505    void *buf;
    506
    507    if (splitwx > 0) {
    508        error_setg(errp, "jit split-wx not supported");
    509        return -1;
    510    }
    511
    512    buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
    513                             PAGE_EXECUTE_READWRITE);
    514    if (buf == NULL) {
    515        error_setg_win32(errp, GetLastError(),
    516                         "allocate %zu bytes for jit buffer", size);
    517        return false;
    518    }
    519
    520    region.start_aligned = buf;
    521    region.total_size = size;
    522
    523    return PAGE_READ | PAGE_WRITE | PAGE_EXEC;
    524}
    525#else
    526static int alloc_code_gen_buffer_anon(size_t size, int prot,
    527                                      int flags, Error **errp)
    528{
    529    void *buf;
    530
    531    buf = mmap(NULL, size, prot, flags, -1, 0);
    532    if (buf == MAP_FAILED) {
    533        error_setg_errno(errp, errno,
    534                         "allocate %zu bytes for jit buffer", size);
    535        return -1;
    536    }
    537
    538    region.start_aligned = buf;
    539    region.total_size = size;
    540    return prot;
    541}
    542
    543#ifndef CONFIG_TCG_INTERPRETER
    544#ifdef CONFIG_POSIX
    545#include "qemu/memfd.h"
    546
    547static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
    548{
    549    void *buf_rw = NULL, *buf_rx = MAP_FAILED;
    550    int fd = -1;
    551
    552    buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
    553    if (buf_rw == NULL) {
    554        goto fail;
    555    }
    556
    557    buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
    558    if (buf_rx == MAP_FAILED) {
    559        goto fail_rx;
    560    }
    561
    562    close(fd);
    563    region.start_aligned = buf_rw;
    564    region.total_size = size;
    565    tcg_splitwx_diff = buf_rx - buf_rw;
    566
    567    return PROT_READ | PROT_WRITE;
    568
    569 fail_rx:
    570    error_setg_errno(errp, errno, "failed to map shared memory for execute");
    571 fail:
    572    if (buf_rx != MAP_FAILED) {
    573        munmap(buf_rx, size);
    574    }
    575    if (buf_rw) {
    576        munmap(buf_rw, size);
    577    }
    578    if (fd >= 0) {
    579        close(fd);
    580    }
    581    return -1;
    582}
    583#endif /* CONFIG_POSIX */
    584
    585#ifdef CONFIG_DARWIN
    586#include <mach/mach.h>
    587
    588extern kern_return_t mach_vm_remap(vm_map_t target_task,
    589                                   mach_vm_address_t *target_address,
    590                                   mach_vm_size_t size,
    591                                   mach_vm_offset_t mask,
    592                                   int flags,
    593                                   vm_map_t src_task,
    594                                   mach_vm_address_t src_address,
    595                                   boolean_t copy,
    596                                   vm_prot_t *cur_protection,
    597                                   vm_prot_t *max_protection,
    598                                   vm_inherit_t inheritance);
    599
    600static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
    601{
    602    kern_return_t ret;
    603    mach_vm_address_t buf_rw, buf_rx;
    604    vm_prot_t cur_prot, max_prot;
    605
    606    /* Map the read-write portion via normal anon memory. */
    607    if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
    608                                    MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
    609        return -1;
    610    }
    611
    612    buf_rw = (mach_vm_address_t)region.start_aligned;
    613    buf_rx = 0;
    614    ret = mach_vm_remap(mach_task_self(),
    615                        &buf_rx,
    616                        size,
    617                        0,
    618                        VM_FLAGS_ANYWHERE,
    619                        mach_task_self(),
    620                        buf_rw,
    621                        false,
    622                        &cur_prot,
    623                        &max_prot,
    624                        VM_INHERIT_NONE);
    625    if (ret != KERN_SUCCESS) {
    626        /* TODO: Convert "ret" to a human readable error message. */
    627        error_setg(errp, "vm_remap for jit splitwx failed");
    628        munmap((void *)buf_rw, size);
    629        return -1;
    630    }
    631
    632    if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
    633        error_setg_errno(errp, errno, "mprotect for jit splitwx");
    634        munmap((void *)buf_rx, size);
    635        munmap((void *)buf_rw, size);
    636        return -1;
    637    }
    638
    639    tcg_splitwx_diff = buf_rx - buf_rw;
    640    return PROT_READ | PROT_WRITE;
    641}
    642#endif /* CONFIG_DARWIN */
    643#endif /* CONFIG_TCG_INTERPRETER */
    644
    645static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
    646{
    647#ifndef CONFIG_TCG_INTERPRETER
    648# ifdef CONFIG_DARWIN
    649    return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
    650# endif
    651# ifdef CONFIG_POSIX
    652    return alloc_code_gen_buffer_splitwx_memfd(size, errp);
    653# endif
    654#endif
    655    error_setg(errp, "jit split-wx not supported");
    656    return -1;
    657}
    658
    659static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
    660{
    661    ERRP_GUARD();
    662    int prot, flags;
    663
    664    if (splitwx) {
    665        prot = alloc_code_gen_buffer_splitwx(size, errp);
    666        if (prot >= 0) {
    667            return prot;
    668        }
    669        /*
    670         * If splitwx force-on (1), fail;
    671         * if splitwx default-on (-1), fall through to splitwx off.
    672         */
    673        if (splitwx > 0) {
    674            return -1;
    675        }
    676        error_free_or_abort(errp);
    677    }
    678
    679    /*
    680     * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
    681     * rejects a permission change from RWX -> NONE when reserving the
    682     * guard pages later.  We can go the other way with the same number
    683     * of syscalls, so always begin with PROT_NONE.
    684     */
    685    prot = PROT_NONE;
    686    flags = MAP_PRIVATE | MAP_ANONYMOUS;
    687#ifdef CONFIG_DARWIN
    688    /* Applicable to both iOS and macOS (Apple Silicon). */
    689    if (!splitwx) {
    690        flags |= MAP_JIT;
    691    }
    692#endif
    693
    694    return alloc_code_gen_buffer_anon(size, prot, flags, errp);
    695}
    696#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
    697
    698/*
    699 * Initializes region partitioning.
    700 *
    701 * Called at init time from the parent thread (i.e. the one calling
    702 * tcg_context_init), after the target's TCG globals have been set.
    703 *
    704 * Region partitioning works by splitting code_gen_buffer into separate regions,
    705 * and then assigning regions to TCG threads so that the threads can translate
    706 * code in parallel without synchronization.
    707 *
    708 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
    709 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
    710 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
    711 * must have been parsed before calling this function, since it calls
    712 * qemu_tcg_mttcg_enabled().
    713 *
    714 * In user-mode we use a single region.  Having multiple regions in user-mode
    715 * is not supported, because the number of vCPU threads (recall that each thread
    716 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
    717 * OS, and usually this number is huge (tens of thousands is not uncommon).
    718 * Thus, given this large bound on the number of vCPU threads and the fact
    719 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
    720 * that the availability of at least one region per vCPU thread.
    721 *
    722 * However, this user-mode limitation is unlikely to be a significant problem
    723 * in practice. Multi-threaded guests share most if not all of their translated
    724 * code, which makes parallel code generation less appealing than in softmmu.
    725 */
    726void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
    727{
    728    const size_t page_size = qemu_real_host_page_size;
    729    size_t region_size;
    730    int have_prot, need_prot;
    731
    732    /* Size the buffer.  */
    733    if (tb_size == 0) {
    734        size_t phys_mem = qemu_get_host_physmem();
    735        if (phys_mem == 0) {
    736            tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
    737        } else {
    738            tb_size = QEMU_ALIGN_DOWN(phys_mem / 8, page_size);
    739            tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, tb_size);
    740        }
    741    }
    742    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
    743        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
    744    }
    745    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
    746        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
    747    }
    748
    749    have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal);
    750    assert(have_prot >= 0);
    751
    752    /* Request large pages for the buffer and the splitwx.  */
    753    qemu_madvise(region.start_aligned, region.total_size, QEMU_MADV_HUGEPAGE);
    754    if (tcg_splitwx_diff) {
    755        qemu_madvise(region.start_aligned + tcg_splitwx_diff,
    756                     region.total_size, QEMU_MADV_HUGEPAGE);
    757    }
    758
    759    /*
    760     * Make region_size a multiple of page_size, using aligned as the start.
    761     * As a result of this we might end up with a few extra pages at the end of
    762     * the buffer; we will assign those to the last region.
    763     */
    764    region.n = tcg_n_regions(tb_size, max_cpus);
    765    region_size = tb_size / region.n;
    766    region_size = QEMU_ALIGN_DOWN(region_size, page_size);
    767
    768    /* A region must have at least 2 pages; one code, one guard */
    769    g_assert(region_size >= 2 * page_size);
    770    region.stride = region_size;
    771
    772    /* Reserve space for guard pages. */
    773    region.size = region_size - page_size;
    774    region.total_size -= page_size;
    775
    776    /*
    777     * The first region will be smaller than the others, via the prologue,
    778     * which has yet to be allocated.  For now, the first region begins at
    779     * the page boundary.
    780     */
    781    region.after_prologue = region.start_aligned;
    782
    783    /* init the region struct */
    784    qemu_mutex_init(&region.lock);
    785
    786    /*
    787     * Set guard pages in the rw buffer, as that's the one into which
    788     * buffer overruns could occur.  Do not set guard pages in the rx
    789     * buffer -- let that one use hugepages throughout.
    790     * Work with the page protections set up with the initial mapping.
    791     */
    792    need_prot = PAGE_READ | PAGE_WRITE;
    793#ifndef CONFIG_TCG_INTERPRETER
    794    if (tcg_splitwx_diff == 0) {
    795        need_prot |= PAGE_EXEC;
    796    }
    797#endif
    798    for (size_t i = 0, n = region.n; i < n; i++) {
    799        void *start, *end;
    800
    801        tcg_region_bounds(i, &start, &end);
    802        if (have_prot != need_prot) {
    803            int rc;
    804
    805            if (need_prot == (PAGE_READ | PAGE_WRITE | PAGE_EXEC)) {
    806                rc = qemu_mprotect_rwx(start, end - start);
    807            } else if (need_prot == (PAGE_READ | PAGE_WRITE)) {
    808                rc = qemu_mprotect_rw(start, end - start);
    809            } else {
    810                g_assert_not_reached();
    811            }
    812            if (rc) {
    813                error_setg_errno(&error_fatal, errno,
    814                                 "mprotect of jit buffer");
    815            }
    816        }
    817        if (have_prot != 0) {
    818            /* Guard pages are nice for bug detection but are not essential. */
    819            (void)qemu_mprotect_none(end, page_size);
    820        }
    821    }
    822
    823    tcg_region_trees_init();
    824
    825    /*
    826     * Leave the initial context initialized to the first region.
    827     * This will be the context into which we generate the prologue.
    828     * It is also the only context for CONFIG_USER_ONLY.
    829     */
    830    tcg_region_initial_alloc__locked(&tcg_init_ctx);
    831}
    832
    833void tcg_region_prologue_set(TCGContext *s)
    834{
    835    /* Deduct the prologue from the first region.  */
    836    g_assert(region.start_aligned == s->code_gen_buffer);
    837    region.after_prologue = s->code_ptr;
    838
    839    /* Recompute boundaries of the first region. */
    840    tcg_region_assign(s, 0);
    841
    842    /* Register the balance of the buffer with gdb. */
    843    tcg_register_jit(tcg_splitwx_to_rx(region.after_prologue),
    844                     region.start_aligned + region.total_size -
    845                     region.after_prologue);
    846}
    847
    848/*
    849 * Returns the size (in bytes) of all translated code (i.e. from all regions)
    850 * currently in the cache.
    851 * See also: tcg_code_capacity()
    852 * Do not confuse with tcg_current_code_size(); that one applies to a single
    853 * TCG context.
    854 */
    855size_t tcg_code_size(void)
    856{
    857    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
    858    unsigned int i;
    859    size_t total;
    860
    861    qemu_mutex_lock(&region.lock);
    862    total = region.agg_size_full;
    863    for (i = 0; i < n_ctxs; i++) {
    864        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
    865        size_t size;
    866
    867        size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
    868        g_assert(size <= s->code_gen_buffer_size);
    869        total += size;
    870    }
    871    qemu_mutex_unlock(&region.lock);
    872    return total;
    873}
    874
    875/*
    876 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
    877 * regions.
    878 * See also: tcg_code_size()
    879 */
    880size_t tcg_code_capacity(void)
    881{
    882    size_t guard_size, capacity;
    883
    884    /* no need for synchronization; these variables are set at init time */
    885    guard_size = region.stride - region.size;
    886    capacity = region.total_size;
    887    capacity -= (region.n - 1) * guard_size;
    888    capacity -= region.n * TCG_HIGHWATER;
    889
    890    return capacity;
    891}