cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

stream.c (11363B)


      1/*
      2 * Image streaming
      3 *
      4 * Copyright IBM, Corp. 2011
      5 *
      6 * Authors:
      7 *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
      8 *
      9 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
     10 * See the COPYING.LIB file in the top-level directory.
     11 *
     12 */
     13
     14#include "qemu/osdep.h"
     15#include "trace.h"
     16#include "block/block_int.h"
     17#include "block/blockjob_int.h"
     18#include "qapi/error.h"
     19#include "qapi/qmp/qerror.h"
     20#include "qapi/qmp/qdict.h"
     21#include "qemu/ratelimit.h"
     22#include "sysemu/block-backend.h"
     23#include "block/copy-on-read.h"
     24
     25enum {
     26    /*
     27     * Maximum chunk size to feed to copy-on-read.  This should be
     28     * large enough to process multiple clusters in a single call, so
     29     * that populating contiguous regions of the image is efficient.
     30     */
     31    STREAM_CHUNK = 512 * 1024, /* in bytes */
     32};
     33
     34typedef struct StreamBlockJob {
     35    BlockJob common;
     36    BlockDriverState *base_overlay; /* COW overlay (stream from this) */
     37    BlockDriverState *above_base;   /* Node directly above the base */
     38    BlockDriverState *cor_filter_bs;
     39    BlockDriverState *target_bs;
     40    BlockdevOnError on_error;
     41    char *backing_file_str;
     42    bool bs_read_only;
     43} StreamBlockJob;
     44
     45static int coroutine_fn stream_populate(BlockBackend *blk,
     46                                        int64_t offset, uint64_t bytes)
     47{
     48    assert(bytes < SIZE_MAX);
     49
     50    return blk_co_preadv(blk, offset, bytes, NULL, BDRV_REQ_PREFETCH);
     51}
     52
     53static int stream_prepare(Job *job)
     54{
     55    StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
     56    BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
     57    BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
     58    BlockDriverState *unfiltered_base = bdrv_skip_filters(base);
     59    Error *local_err = NULL;
     60    int ret = 0;
     61
     62    /* We should drop filter at this point, as filter hold the backing chain */
     63    bdrv_cor_filter_drop(s->cor_filter_bs);
     64    s->cor_filter_bs = NULL;
     65
     66    if (bdrv_cow_child(unfiltered_bs)) {
     67        const char *base_id = NULL, *base_fmt = NULL;
     68        if (unfiltered_base) {
     69            base_id = s->backing_file_str ?: unfiltered_base->filename;
     70            if (unfiltered_base->drv) {
     71                base_fmt = unfiltered_base->drv->format_name;
     72            }
     73        }
     74        bdrv_set_backing_hd(unfiltered_bs, base, &local_err);
     75        ret = bdrv_change_backing_file(unfiltered_bs, base_id, base_fmt, false);
     76        if (local_err) {
     77            error_report_err(local_err);
     78            return -EPERM;
     79        }
     80    }
     81
     82    return ret;
     83}
     84
     85static void stream_clean(Job *job)
     86{
     87    StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
     88    BlockJob *bjob = &s->common;
     89
     90    if (s->cor_filter_bs) {
     91        bdrv_cor_filter_drop(s->cor_filter_bs);
     92        s->cor_filter_bs = NULL;
     93    }
     94
     95    /* Reopen the image back in read-only mode if necessary */
     96    if (s->bs_read_only) {
     97        /* Give up write permissions before making it read-only */
     98        blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
     99        bdrv_reopen_set_read_only(s->target_bs, true, NULL);
    100    }
    101
    102    g_free(s->backing_file_str);
    103}
    104
    105static int coroutine_fn stream_run(Job *job, Error **errp)
    106{
    107    StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
    108    BlockBackend *blk = s->common.blk;
    109    BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
    110    int64_t len;
    111    int64_t offset = 0;
    112    uint64_t delay_ns = 0;
    113    int error = 0;
    114    int64_t n = 0; /* bytes */
    115
    116    if (unfiltered_bs == s->base_overlay) {
    117        /* Nothing to stream */
    118        return 0;
    119    }
    120
    121    len = bdrv_getlength(s->target_bs);
    122    if (len < 0) {
    123        return len;
    124    }
    125    job_progress_set_remaining(&s->common.job, len);
    126
    127    for ( ; offset < len; offset += n) {
    128        bool copy;
    129        int ret;
    130
    131        /* Note that even when no rate limit is applied we need to yield
    132         * with no pending I/O here so that bdrv_drain_all() returns.
    133         */
    134        job_sleep_ns(&s->common.job, delay_ns);
    135        if (job_is_cancelled(&s->common.job)) {
    136            break;
    137        }
    138
    139        copy = false;
    140
    141        ret = bdrv_is_allocated(unfiltered_bs, offset, STREAM_CHUNK, &n);
    142        if (ret == 1) {
    143            /* Allocated in the top, no need to copy.  */
    144        } else if (ret >= 0) {
    145            /* Copy if allocated in the intermediate images.  Limit to the
    146             * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE).  */
    147            ret = bdrv_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
    148                                          s->base_overlay, true,
    149                                          offset, n, &n);
    150            /* Finish early if end of backing file has been reached */
    151            if (ret == 0 && n == 0) {
    152                n = len - offset;
    153            }
    154
    155            copy = (ret > 0);
    156        }
    157        trace_stream_one_iteration(s, offset, n, ret);
    158        if (copy) {
    159            ret = stream_populate(blk, offset, n);
    160        }
    161        if (ret < 0) {
    162            BlockErrorAction action =
    163                block_job_error_action(&s->common, s->on_error, true, -ret);
    164            if (action == BLOCK_ERROR_ACTION_STOP) {
    165                n = 0;
    166                continue;
    167            }
    168            if (error == 0) {
    169                error = ret;
    170            }
    171            if (action == BLOCK_ERROR_ACTION_REPORT) {
    172                break;
    173            }
    174        }
    175
    176        /* Publish progress */
    177        job_progress_update(&s->common.job, n);
    178        if (copy) {
    179            delay_ns = block_job_ratelimit_get_delay(&s->common, n);
    180        } else {
    181            delay_ns = 0;
    182        }
    183    }
    184
    185    /* Do not remove the backing file if an error was there but ignored. */
    186    return error;
    187}
    188
    189static const BlockJobDriver stream_job_driver = {
    190    .job_driver = {
    191        .instance_size = sizeof(StreamBlockJob),
    192        .job_type      = JOB_TYPE_STREAM,
    193        .free          = block_job_free,
    194        .run           = stream_run,
    195        .prepare       = stream_prepare,
    196        .clean         = stream_clean,
    197        .user_resume   = block_job_user_resume,
    198    },
    199};
    200
    201void stream_start(const char *job_id, BlockDriverState *bs,
    202                  BlockDriverState *base, const char *backing_file_str,
    203                  BlockDriverState *bottom,
    204                  int creation_flags, int64_t speed,
    205                  BlockdevOnError on_error,
    206                  const char *filter_node_name,
    207                  Error **errp)
    208{
    209    StreamBlockJob *s = NULL;
    210    BlockDriverState *iter;
    211    bool bs_read_only;
    212    int basic_flags = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
    213    BlockDriverState *base_overlay;
    214    BlockDriverState *cor_filter_bs = NULL;
    215    BlockDriverState *above_base;
    216    QDict *opts;
    217    int ret;
    218
    219    assert(!(base && bottom));
    220    assert(!(backing_file_str && bottom));
    221
    222    if (bottom) {
    223        /*
    224         * New simple interface. The code is written in terms of old interface
    225         * with @base parameter (still, it doesn't freeze link to base, so in
    226         * this mean old code is correct for new interface). So, for now, just
    227         * emulate base_overlay and above_base. Still, when old interface
    228         * finally removed, we should refactor code to use only "bottom", but
    229         * not "*base*" things.
    230         */
    231        assert(!bottom->drv->is_filter);
    232        base_overlay = above_base = bottom;
    233    } else {
    234        base_overlay = bdrv_find_overlay(bs, base);
    235        if (!base_overlay) {
    236            error_setg(errp, "'%s' is not in the backing chain of '%s'",
    237                       base->node_name, bs->node_name);
    238            return;
    239        }
    240
    241        /*
    242         * Find the node directly above @base.  @base_overlay is a COW overlay,
    243         * so it must have a bdrv_cow_child(), but it is the immediate overlay
    244         * of @base, so between the two there can only be filters.
    245         */
    246        above_base = base_overlay;
    247        if (bdrv_cow_bs(above_base) != base) {
    248            above_base = bdrv_cow_bs(above_base);
    249            while (bdrv_filter_bs(above_base) != base) {
    250                above_base = bdrv_filter_bs(above_base);
    251            }
    252        }
    253    }
    254
    255    /* Make sure that the image is opened in read-write mode */
    256    bs_read_only = bdrv_is_read_only(bs);
    257    if (bs_read_only) {
    258        int ret;
    259        /* Hold the chain during reopen */
    260        if (bdrv_freeze_backing_chain(bs, above_base, errp) < 0) {
    261            return;
    262        }
    263
    264        ret = bdrv_reopen_set_read_only(bs, false, errp);
    265
    266        /* failure, or cor-filter will hold the chain */
    267        bdrv_unfreeze_backing_chain(bs, above_base);
    268
    269        if (ret < 0) {
    270            return;
    271        }
    272    }
    273
    274    opts = qdict_new();
    275
    276    qdict_put_str(opts, "driver", "copy-on-read");
    277    qdict_put_str(opts, "file", bdrv_get_node_name(bs));
    278    /* Pass the base_overlay node name as 'bottom' to COR driver */
    279    qdict_put_str(opts, "bottom", base_overlay->node_name);
    280    if (filter_node_name) {
    281        qdict_put_str(opts, "node-name", filter_node_name);
    282    }
    283
    284    cor_filter_bs = bdrv_insert_node(bs, opts, BDRV_O_RDWR, errp);
    285    if (!cor_filter_bs) {
    286        goto fail;
    287    }
    288
    289    if (!filter_node_name) {
    290        cor_filter_bs->implicit = true;
    291    }
    292
    293    s = block_job_create(job_id, &stream_job_driver, NULL, cor_filter_bs,
    294                         BLK_PERM_CONSISTENT_READ,
    295                         basic_flags | BLK_PERM_WRITE,
    296                         speed, creation_flags, NULL, NULL, errp);
    297    if (!s) {
    298        goto fail;
    299    }
    300
    301    /*
    302     * Prevent concurrent jobs trying to modify the graph structure here, we
    303     * already have our own plans. Also don't allow resize as the image size is
    304     * queried only at the job start and then cached.
    305     */
    306    if (block_job_add_bdrv(&s->common, "active node", bs, 0,
    307                           basic_flags | BLK_PERM_WRITE, errp)) {
    308        goto fail;
    309    }
    310
    311    /* Block all intermediate nodes between bs and base, because they will
    312     * disappear from the chain after this operation. The streaming job reads
    313     * every block only once, assuming that it doesn't change, so forbid writes
    314     * and resizes. Reassign the base node pointer because the backing BS of the
    315     * bottom node might change after the call to bdrv_reopen_set_read_only()
    316     * due to parallel block jobs running.
    317     * above_base node might change after the call to
    318     * bdrv_reopen_set_read_only() due to parallel block jobs running.
    319     */
    320    base = bdrv_filter_or_cow_bs(above_base);
    321    for (iter = bdrv_filter_or_cow_bs(bs); iter != base;
    322         iter = bdrv_filter_or_cow_bs(iter))
    323    {
    324        ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
    325                                 basic_flags, errp);
    326        if (ret < 0) {
    327            goto fail;
    328        }
    329    }
    330
    331    s->base_overlay = base_overlay;
    332    s->above_base = above_base;
    333    s->backing_file_str = g_strdup(backing_file_str);
    334    s->cor_filter_bs = cor_filter_bs;
    335    s->target_bs = bs;
    336    s->bs_read_only = bs_read_only;
    337
    338    s->on_error = on_error;
    339    trace_stream_start(bs, base, s);
    340    job_start(&s->common.job);
    341    return;
    342
    343fail:
    344    if (s) {
    345        job_early_fail(&s->common.job);
    346    }
    347    if (cor_filter_bs) {
    348        bdrv_cor_filter_drop(cor_filter_bs);
    349    }
    350    if (bs_read_only) {
    351        bdrv_reopen_set_read_only(bs, true, NULL);
    352    }
    353}