cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

mmap-alloc.c (9278B)


      1/*
      2 * Support for RAM backed by mmaped host memory.
      3 *
      4 * Copyright (c) 2015 Red Hat, Inc.
      5 *
      6 * Authors:
      7 *  Michael S. Tsirkin <mst@redhat.com>
      8 *
      9 * This work is licensed under the terms of the GNU GPL, version 2 or
     10 * later.  See the COPYING file in the top-level directory.
     11 */
     12
     13#ifdef CONFIG_LINUX
     14#include <linux/mman.h>
     15#else  /* !CONFIG_LINUX */
     16#define MAP_SYNC              0x0
     17#define MAP_SHARED_VALIDATE   0x0
     18#endif /* CONFIG_LINUX */
     19
     20#include "qemu/osdep.h"
     21#include "qemu/mmap-alloc.h"
     22#include "qemu/host-utils.h"
     23#include "qemu/cutils.h"
     24#include "qemu/error-report.h"
     25
     26#define HUGETLBFS_MAGIC       0x958458f6
     27
     28#ifdef CONFIG_LINUX
     29#include <sys/vfs.h>
     30#endif
     31
     32size_t qemu_fd_getpagesize(int fd)
     33{
     34#ifdef CONFIG_LINUX
     35    struct statfs fs;
     36    int ret;
     37
     38    if (fd != -1) {
     39        do {
     40            ret = fstatfs(fd, &fs);
     41        } while (ret != 0 && errno == EINTR);
     42
     43        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
     44            return fs.f_bsize;
     45        }
     46    }
     47#ifdef __sparc__
     48    /* SPARC Linux needs greater alignment than the pagesize */
     49    return QEMU_VMALLOC_ALIGN;
     50#endif
     51#endif
     52
     53    return qemu_real_host_page_size;
     54}
     55
     56size_t qemu_mempath_getpagesize(const char *mem_path)
     57{
     58#ifdef CONFIG_LINUX
     59    struct statfs fs;
     60    int ret;
     61
     62    if (mem_path) {
     63        do {
     64            ret = statfs(mem_path, &fs);
     65        } while (ret != 0 && errno == EINTR);
     66
     67        if (ret != 0) {
     68            fprintf(stderr, "Couldn't statfs() memory path: %s\n",
     69                    strerror(errno));
     70            exit(1);
     71        }
     72
     73        if (fs.f_type == HUGETLBFS_MAGIC) {
     74            /* It's hugepage, return the huge page size */
     75            return fs.f_bsize;
     76        }
     77    }
     78#ifdef __sparc__
     79    /* SPARC Linux needs greater alignment than the pagesize */
     80    return QEMU_VMALLOC_ALIGN;
     81#endif
     82#endif
     83
     84    return qemu_real_host_page_size;
     85}
     86
     87#define OVERCOMMIT_MEMORY_PATH "/proc/sys/vm/overcommit_memory"
     88static bool map_noreserve_effective(int fd, uint32_t qemu_map_flags)
     89{
     90#if defined(__linux__)
     91    const bool readonly = qemu_map_flags & QEMU_MAP_READONLY;
     92    const bool shared = qemu_map_flags & QEMU_MAP_SHARED;
     93    gchar *content = NULL;
     94    const char *endptr;
     95    unsigned int tmp;
     96
     97    /*
     98     * hugeltb accounting is different than ordinary swap reservation:
     99     * a) Hugetlb pages from the pool are reserved for both private and
    100     *    shared mappings. For shared mappings, all mappers have to specify
    101     *    MAP_NORESERVE.
    102     * b) MAP_NORESERVE is not affected by /proc/sys/vm/overcommit_memory.
    103     */
    104    if (qemu_fd_getpagesize(fd) != qemu_real_host_page_size) {
    105        return true;
    106    }
    107
    108    /*
    109     * Accountable mappings in the kernel that can be affected by MAP_NORESEVE
    110     * are private writable mappings (see mm/mmap.c:accountable_mapping() in
    111     * Linux). For all shared or readonly mappings, MAP_NORESERVE is always
    112     * implicitly active -- no reservation; this includes shmem. The only
    113     * exception is shared anonymous memory, it is accounted like private
    114     * anonymous memory.
    115     */
    116    if (readonly || (shared && fd >= 0)) {
    117        return true;
    118    }
    119
    120    /*
    121     * MAP_NORESERVE is globally ignored for applicable !hugetlb mappings when
    122     * memory overcommit is set to "never". Sparse memory regions aren't really
    123     * possible in this system configuration.
    124     *
    125     * Bail out now instead of silently committing way more memory than
    126     * currently desired by the user.
    127     */
    128    if (g_file_get_contents(OVERCOMMIT_MEMORY_PATH, &content, NULL, NULL) &&
    129        !qemu_strtoui(content, &endptr, 0, &tmp) &&
    130        (!endptr || *endptr == '\n')) {
    131        if (tmp == 2) {
    132            error_report("Skipping reservation of swap space is not supported:"
    133                         " \"" OVERCOMMIT_MEMORY_PATH "\" is \"2\"");
    134            return false;
    135        }
    136        return true;
    137    }
    138    /* this interface has been around since Linux 2.6 */
    139    error_report("Skipping reservation of swap space is not supported:"
    140                 " Could not read: \"" OVERCOMMIT_MEMORY_PATH "\"");
    141    return false;
    142#endif
    143    /*
    144     * E.g., FreeBSD used to define MAP_NORESERVE, never implemented it,
    145     * and removed it a while ago.
    146     */
    147    error_report("Skipping reservation of swap space is not supported");
    148    return false;
    149}
    150
    151/*
    152 * Reserve a new memory region of the requested size to be used for mapping
    153 * from the given fd (if any).
    154 */
    155static void *mmap_reserve(size_t size, int fd)
    156{
    157    int flags = MAP_PRIVATE;
    158
    159#if defined(__powerpc64__) && defined(__linux__)
    160    /*
    161     * On ppc64 mappings in the same segment (aka slice) must share the same
    162     * page size. Since we will be re-allocating part of this segment
    163     * from the supplied fd, we should make sure to use the same page size, to
    164     * this end we mmap the supplied fd.  In this case, set MAP_NORESERVE to
    165     * avoid allocating backing store memory.
    166     * We do this unless we are using the system page size, in which case
    167     * anonymous memory is OK.
    168     */
    169    if (fd == -1 || qemu_fd_getpagesize(fd) == qemu_real_host_page_size) {
    170        fd = -1;
    171        flags |= MAP_ANONYMOUS;
    172    } else {
    173        flags |= MAP_NORESERVE;
    174    }
    175#else
    176    fd = -1;
    177    flags |= MAP_ANONYMOUS;
    178#endif
    179
    180    return mmap(0, size, PROT_NONE, flags, fd, 0);
    181}
    182
    183/*
    184 * Activate memory in a reserved region from the given fd (if any), to make
    185 * it accessible.
    186 */
    187static void *mmap_activate(void *ptr, size_t size, int fd,
    188                           uint32_t qemu_map_flags, off_t map_offset)
    189{
    190    const bool noreserve = qemu_map_flags & QEMU_MAP_NORESERVE;
    191    const bool readonly = qemu_map_flags & QEMU_MAP_READONLY;
    192    const bool shared = qemu_map_flags & QEMU_MAP_SHARED;
    193    const bool sync = qemu_map_flags & QEMU_MAP_SYNC;
    194    const int prot = PROT_READ | (readonly ? 0 : PROT_WRITE);
    195    int map_sync_flags = 0;
    196    int flags = MAP_FIXED;
    197    void *activated_ptr;
    198
    199    if (noreserve && !map_noreserve_effective(fd, qemu_map_flags)) {
    200        return MAP_FAILED;
    201    }
    202
    203    flags |= fd == -1 ? MAP_ANONYMOUS : 0;
    204    flags |= shared ? MAP_SHARED : MAP_PRIVATE;
    205    flags |= noreserve ? MAP_NORESERVE : 0;
    206    if (shared && sync) {
    207        map_sync_flags = MAP_SYNC | MAP_SHARED_VALIDATE;
    208    }
    209
    210    activated_ptr = mmap(ptr, size, prot, flags | map_sync_flags, fd,
    211                         map_offset);
    212    if (activated_ptr == MAP_FAILED && map_sync_flags) {
    213        if (errno == ENOTSUP) {
    214            char *proc_link = g_strdup_printf("/proc/self/fd/%d", fd);
    215            char *file_name = g_malloc0(PATH_MAX);
    216            int len = readlink(proc_link, file_name, PATH_MAX - 1);
    217
    218            if (len < 0) {
    219                len = 0;
    220            }
    221            file_name[len] = '\0';
    222            fprintf(stderr, "Warning: requesting persistence across crashes "
    223                    "for backend file %s failed. Proceeding without "
    224                    "persistence, data might become corrupted in case of host "
    225                    "crash.\n", file_name);
    226            g_free(proc_link);
    227            g_free(file_name);
    228            warn_report("Using non DAX backing file with 'pmem=on' option"
    229                        " is deprecated");
    230        }
    231        /*
    232         * If mmap failed with MAP_SHARED_VALIDATE | MAP_SYNC, we will try
    233         * again without these flags to handle backwards compatibility.
    234         */
    235        activated_ptr = mmap(ptr, size, prot, flags, fd, map_offset);
    236    }
    237    return activated_ptr;
    238}
    239
    240static inline size_t mmap_guard_pagesize(int fd)
    241{
    242#if defined(__powerpc64__) && defined(__linux__)
    243    /* Mappings in the same segment must share the same page size */
    244    return qemu_fd_getpagesize(fd);
    245#else
    246    return qemu_real_host_page_size;
    247#endif
    248}
    249
    250void *qemu_ram_mmap(int fd,
    251                    size_t size,
    252                    size_t align,
    253                    uint32_t qemu_map_flags,
    254                    off_t map_offset)
    255{
    256    const size_t guard_pagesize = mmap_guard_pagesize(fd);
    257    size_t offset, total;
    258    void *ptr, *guardptr;
    259
    260    /*
    261     * Note: this always allocates at least one extra page of virtual address
    262     * space, even if size is already aligned.
    263     */
    264    total = size + align;
    265
    266    guardptr = mmap_reserve(total, fd);
    267    if (guardptr == MAP_FAILED) {
    268        return MAP_FAILED;
    269    }
    270
    271    assert(is_power_of_2(align));
    272    /* Always align to host page size */
    273    assert(align >= guard_pagesize);
    274
    275    offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr;
    276
    277    ptr = mmap_activate(guardptr + offset, size, fd, qemu_map_flags,
    278                        map_offset);
    279    if (ptr == MAP_FAILED) {
    280        munmap(guardptr, total);
    281        return MAP_FAILED;
    282    }
    283
    284    if (offset > 0) {
    285        munmap(guardptr, offset);
    286    }
    287
    288    /*
    289     * Leave a single PROT_NONE page allocated after the RAM block, to serve as
    290     * a guard page guarding against potential buffer overflows.
    291     */
    292    total -= offset;
    293    if (total > size + guard_pagesize) {
    294        munmap(ptr + size + guard_pagesize, total - size - guard_pagesize);
    295    }
    296
    297    return ptr;
    298}
    299
    300void qemu_ram_munmap(int fd, void *ptr, size_t size)
    301{
    302    if (ptr) {
    303        /* Unmap both the RAM block and the guard page */
    304        munmap(ptr, size + mmap_guard_pagesize(fd));
    305    }
    306}