cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

blkdebug.c (32039B)


      1/*
      2 * Block protocol for I/O error injection
      3 *
      4 * Copyright (C) 2016-2017 Red Hat, Inc.
      5 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
      6 *
      7 * Permission is hereby granted, free of charge, to any person obtaining a copy
      8 * of this software and associated documentation files (the "Software"), to deal
      9 * in the Software without restriction, including without limitation the rights
     10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     11 * copies of the Software, and to permit persons to whom the Software is
     12 * furnished to do so, subject to the following conditions:
     13 *
     14 * The above copyright notice and this permission notice shall be included in
     15 * all copies or substantial portions of the Software.
     16 *
     17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     23 * THE SOFTWARE.
     24 */
     25
     26#include "qemu/osdep.h"
     27#include "qapi/error.h"
     28#include "qemu/cutils.h"
     29#include "qemu/config-file.h"
     30#include "block/block_int.h"
     31#include "block/qdict.h"
     32#include "qemu/module.h"
     33#include "qemu/option.h"
     34#include "qapi/qapi-visit-block-core.h"
     35#include "qapi/qmp/qdict.h"
     36#include "qapi/qmp/qlist.h"
     37#include "qapi/qmp/qstring.h"
     38#include "qapi/qobject-input-visitor.h"
     39#include "sysemu/qtest.h"
     40
     41/* All APIs are thread-safe */
     42
     43typedef struct BDRVBlkdebugState {
     44    /* IN: initialized in blkdebug_open() and never changed */
     45    uint64_t align;
     46    uint64_t max_transfer;
     47    uint64_t opt_write_zero;
     48    uint64_t max_write_zero;
     49    uint64_t opt_discard;
     50    uint64_t max_discard;
     51    char *config_file; /* For blkdebug_refresh_filename() */
     52    /* initialized in blkdebug_parse_perms() */
     53    uint64_t take_child_perms;
     54    uint64_t unshare_child_perms;
     55
     56    /* State. Protected by lock */
     57    int state;
     58    QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
     59    QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
     60    QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
     61    QemuMutex lock;
     62} BDRVBlkdebugState;
     63
     64typedef struct BlkdebugAIOCB {
     65    BlockAIOCB common;
     66    int ret;
     67} BlkdebugAIOCB;
     68
     69typedef struct BlkdebugSuspendedReq {
     70    /* IN: initialized in suspend_request() */
     71    Coroutine *co;
     72    char *tag;
     73
     74    /* List entry protected BDRVBlkdebugState's lock */
     75    QLIST_ENTRY(BlkdebugSuspendedReq) next;
     76} BlkdebugSuspendedReq;
     77
     78enum {
     79    ACTION_INJECT_ERROR,
     80    ACTION_SET_STATE,
     81    ACTION_SUSPEND,
     82    ACTION__MAX,
     83};
     84
     85typedef struct BlkdebugRule {
     86    /* IN: initialized in add_rule() or blkdebug_debug_breakpoint() */
     87    BlkdebugEvent event;
     88    int action;
     89    int state;
     90    union {
     91        struct {
     92            uint64_t iotype_mask;
     93            int error;
     94            int immediately;
     95            int once;
     96            int64_t offset;
     97        } inject;
     98        struct {
     99            int new_state;
    100        } set_state;
    101        struct {
    102            char *tag;
    103        } suspend;
    104    } options;
    105
    106    /* List entries protected BDRVBlkdebugState's lock */
    107    QLIST_ENTRY(BlkdebugRule) next;
    108    QSIMPLEQ_ENTRY(BlkdebugRule) active_next;
    109} BlkdebugRule;
    110
    111QEMU_BUILD_BUG_MSG(BLKDEBUG_IO_TYPE__MAX > 64,
    112                   "BlkdebugIOType mask does not fit into an uint64_t");
    113
    114static QemuOptsList inject_error_opts = {
    115    .name = "inject-error",
    116    .head = QTAILQ_HEAD_INITIALIZER(inject_error_opts.head),
    117    .desc = {
    118        {
    119            .name = "event",
    120            .type = QEMU_OPT_STRING,
    121        },
    122        {
    123            .name = "state",
    124            .type = QEMU_OPT_NUMBER,
    125        },
    126        {
    127            .name = "iotype",
    128            .type = QEMU_OPT_STRING,
    129        },
    130        {
    131            .name = "errno",
    132            .type = QEMU_OPT_NUMBER,
    133        },
    134        {
    135            .name = "sector",
    136            .type = QEMU_OPT_NUMBER,
    137        },
    138        {
    139            .name = "once",
    140            .type = QEMU_OPT_BOOL,
    141        },
    142        {
    143            .name = "immediately",
    144            .type = QEMU_OPT_BOOL,
    145        },
    146        { /* end of list */ }
    147    },
    148};
    149
    150static QemuOptsList set_state_opts = {
    151    .name = "set-state",
    152    .head = QTAILQ_HEAD_INITIALIZER(set_state_opts.head),
    153    .desc = {
    154        {
    155            .name = "event",
    156            .type = QEMU_OPT_STRING,
    157        },
    158        {
    159            .name = "state",
    160            .type = QEMU_OPT_NUMBER,
    161        },
    162        {
    163            .name = "new_state",
    164            .type = QEMU_OPT_NUMBER,
    165        },
    166        { /* end of list */ }
    167    },
    168};
    169
    170static QemuOptsList *config_groups[] = {
    171    &inject_error_opts,
    172    &set_state_opts,
    173    NULL
    174};
    175
    176struct add_rule_data {
    177    BDRVBlkdebugState *s;
    178    int action;
    179};
    180
    181static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
    182{
    183    struct add_rule_data *d = opaque;
    184    BDRVBlkdebugState *s = d->s;
    185    const char *event_name;
    186    int event;
    187    struct BlkdebugRule *rule;
    188    int64_t sector;
    189    BlkdebugIOType iotype;
    190    Error *local_error = NULL;
    191
    192    /* Find the right event for the rule */
    193    event_name = qemu_opt_get(opts, "event");
    194    if (!event_name) {
    195        error_setg(errp, "Missing event name for rule");
    196        return -1;
    197    }
    198    event = qapi_enum_parse(&BlkdebugEvent_lookup, event_name, -1, errp);
    199    if (event < 0) {
    200        return -1;
    201    }
    202
    203    /* Set attributes common for all actions */
    204    rule = g_malloc0(sizeof(*rule));
    205    *rule = (struct BlkdebugRule) {
    206        .event  = event,
    207        .action = d->action,
    208        .state  = qemu_opt_get_number(opts, "state", 0),
    209    };
    210
    211    /* Parse action-specific options */
    212    switch (d->action) {
    213    case ACTION_INJECT_ERROR:
    214        rule->options.inject.error = qemu_opt_get_number(opts, "errno", EIO);
    215        rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0);
    216        rule->options.inject.immediately =
    217            qemu_opt_get_bool(opts, "immediately", 0);
    218        sector = qemu_opt_get_number(opts, "sector", -1);
    219        rule->options.inject.offset =
    220            sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
    221
    222        iotype = qapi_enum_parse(&BlkdebugIOType_lookup,
    223                                 qemu_opt_get(opts, "iotype"),
    224                                 BLKDEBUG_IO_TYPE__MAX, &local_error);
    225        if (local_error) {
    226            error_propagate(errp, local_error);
    227            g_free(rule);
    228            return -1;
    229        }
    230        if (iotype != BLKDEBUG_IO_TYPE__MAX) {
    231            rule->options.inject.iotype_mask = (1ull << iotype);
    232        } else {
    233            /* Apply the default */
    234            rule->options.inject.iotype_mask =
    235                (1ull << BLKDEBUG_IO_TYPE_READ)
    236                | (1ull << BLKDEBUG_IO_TYPE_WRITE)
    237                | (1ull << BLKDEBUG_IO_TYPE_WRITE_ZEROES)
    238                | (1ull << BLKDEBUG_IO_TYPE_DISCARD)
    239                | (1ull << BLKDEBUG_IO_TYPE_FLUSH);
    240        }
    241
    242        break;
    243
    244    case ACTION_SET_STATE:
    245        rule->options.set_state.new_state =
    246            qemu_opt_get_number(opts, "new_state", 0);
    247        break;
    248
    249    case ACTION_SUSPEND:
    250        rule->options.suspend.tag =
    251            g_strdup(qemu_opt_get(opts, "tag"));
    252        break;
    253    };
    254
    255    /* Add the rule */
    256    qemu_mutex_lock(&s->lock);
    257    QLIST_INSERT_HEAD(&s->rules[event], rule, next);
    258    qemu_mutex_unlock(&s->lock);
    259
    260    return 0;
    261}
    262
    263/* Called with lock held or from .bdrv_close */
    264static void remove_rule(BlkdebugRule *rule)
    265{
    266    switch (rule->action) {
    267    case ACTION_INJECT_ERROR:
    268    case ACTION_SET_STATE:
    269        break;
    270    case ACTION_SUSPEND:
    271        g_free(rule->options.suspend.tag);
    272        break;
    273    }
    274
    275    QLIST_REMOVE(rule, next);
    276    g_free(rule);
    277}
    278
    279static int read_config(BDRVBlkdebugState *s, const char *filename,
    280                       QDict *options, Error **errp)
    281{
    282    FILE *f = NULL;
    283    int ret;
    284    struct add_rule_data d;
    285    Error *local_err = NULL;
    286
    287    if (filename) {
    288        f = fopen(filename, "r");
    289        if (f == NULL) {
    290            error_setg_errno(errp, errno, "Could not read blkdebug config file");
    291            return -errno;
    292        }
    293
    294        ret = qemu_config_parse(f, config_groups, filename, errp);
    295        if (ret < 0) {
    296            goto fail;
    297        }
    298    }
    299
    300    qemu_config_parse_qdict(options, config_groups, &local_err);
    301    if (local_err) {
    302        error_propagate(errp, local_err);
    303        ret = -EINVAL;
    304        goto fail;
    305    }
    306
    307    d.s = s;
    308    d.action = ACTION_INJECT_ERROR;
    309    qemu_opts_foreach(&inject_error_opts, add_rule, &d, &local_err);
    310    if (local_err) {
    311        error_propagate(errp, local_err);
    312        ret = -EINVAL;
    313        goto fail;
    314    }
    315
    316    d.action = ACTION_SET_STATE;
    317    qemu_opts_foreach(&set_state_opts, add_rule, &d, &local_err);
    318    if (local_err) {
    319        error_propagate(errp, local_err);
    320        ret = -EINVAL;
    321        goto fail;
    322    }
    323
    324    ret = 0;
    325fail:
    326    qemu_opts_reset(&inject_error_opts);
    327    qemu_opts_reset(&set_state_opts);
    328    if (f) {
    329        fclose(f);
    330    }
    331    return ret;
    332}
    333
    334/* Valid blkdebug filenames look like blkdebug:path/to/config:path/to/image */
    335static void blkdebug_parse_filename(const char *filename, QDict *options,
    336                                    Error **errp)
    337{
    338    const char *c;
    339
    340    /* Parse the blkdebug: prefix */
    341    if (!strstart(filename, "blkdebug:", &filename)) {
    342        /* There was no prefix; therefore, all options have to be already
    343           present in the QDict (except for the filename) */
    344        qdict_put_str(options, "x-image", filename);
    345        return;
    346    }
    347
    348    /* Parse config file path */
    349    c = strchr(filename, ':');
    350    if (c == NULL) {
    351        error_setg(errp, "blkdebug requires both config file and image path");
    352        return;
    353    }
    354
    355    if (c != filename) {
    356        QString *config_path;
    357        config_path = qstring_from_substr(filename, 0, c - filename);
    358        qdict_put(options, "config", config_path);
    359    }
    360
    361    /* TODO Allow multi-level nesting and set file.filename here */
    362    filename = c + 1;
    363    qdict_put_str(options, "x-image", filename);
    364}
    365
    366static int blkdebug_parse_perm_list(uint64_t *dest, QDict *options,
    367                                    const char *prefix, Error **errp)
    368{
    369    int ret = 0;
    370    QDict *subqdict = NULL;
    371    QObject *crumpled_subqdict = NULL;
    372    Visitor *v = NULL;
    373    BlockPermissionList *perm_list = NULL, *element;
    374
    375    *dest = 0;
    376
    377    qdict_extract_subqdict(options, &subqdict, prefix);
    378    if (!qdict_size(subqdict)) {
    379        goto out;
    380    }
    381
    382    crumpled_subqdict = qdict_crumple(subqdict, errp);
    383    if (!crumpled_subqdict) {
    384        ret = -EINVAL;
    385        goto out;
    386    }
    387
    388    v = qobject_input_visitor_new(crumpled_subqdict);
    389    if (!visit_type_BlockPermissionList(v, NULL, &perm_list, errp)) {
    390        ret = -EINVAL;
    391        goto out;
    392    }
    393
    394    for (element = perm_list; element; element = element->next) {
    395        *dest |= bdrv_qapi_perm_to_blk_perm(element->value);
    396    }
    397
    398out:
    399    qapi_free_BlockPermissionList(perm_list);
    400    visit_free(v);
    401    qobject_unref(subqdict);
    402    qobject_unref(crumpled_subqdict);
    403    return ret;
    404}
    405
    406static int blkdebug_parse_perms(BDRVBlkdebugState *s, QDict *options,
    407                                Error **errp)
    408{
    409    int ret;
    410
    411    ret = blkdebug_parse_perm_list(&s->take_child_perms, options,
    412                                   "take-child-perms.", errp);
    413    if (ret < 0) {
    414        return ret;
    415    }
    416
    417    ret = blkdebug_parse_perm_list(&s->unshare_child_perms, options,
    418                                   "unshare-child-perms.", errp);
    419    if (ret < 0) {
    420        return ret;
    421    }
    422
    423    return 0;
    424}
    425
    426static QemuOptsList runtime_opts = {
    427    .name = "blkdebug",
    428    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
    429    .desc = {
    430        {
    431            .name = "config",
    432            .type = QEMU_OPT_STRING,
    433            .help = "Path to the configuration file",
    434        },
    435        {
    436            .name = "x-image",
    437            .type = QEMU_OPT_STRING,
    438            .help = "[internal use only, will be removed]",
    439        },
    440        {
    441            .name = "align",
    442            .type = QEMU_OPT_SIZE,
    443            .help = "Required alignment in bytes",
    444        },
    445        {
    446            .name = "max-transfer",
    447            .type = QEMU_OPT_SIZE,
    448            .help = "Maximum transfer size in bytes",
    449        },
    450        {
    451            .name = "opt-write-zero",
    452            .type = QEMU_OPT_SIZE,
    453            .help = "Optimum write zero alignment in bytes",
    454        },
    455        {
    456            .name = "max-write-zero",
    457            .type = QEMU_OPT_SIZE,
    458            .help = "Maximum write zero size in bytes",
    459        },
    460        {
    461            .name = "opt-discard",
    462            .type = QEMU_OPT_SIZE,
    463            .help = "Optimum discard alignment in bytes",
    464        },
    465        {
    466            .name = "max-discard",
    467            .type = QEMU_OPT_SIZE,
    468            .help = "Maximum discard size in bytes",
    469        },
    470        { /* end of list */ }
    471    },
    472};
    473
    474static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
    475                         Error **errp)
    476{
    477    BDRVBlkdebugState *s = bs->opaque;
    478    QemuOpts *opts;
    479    int ret;
    480    uint64_t align;
    481
    482    qemu_mutex_init(&s->lock);
    483    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    484    if (!qemu_opts_absorb_qdict(opts, options, errp)) {
    485        ret = -EINVAL;
    486        goto out;
    487    }
    488
    489    /* Read rules from config file or command line options */
    490    s->config_file = g_strdup(qemu_opt_get(opts, "config"));
    491    ret = read_config(s, s->config_file, options, errp);
    492    if (ret) {
    493        goto out;
    494    }
    495
    496    /* Set initial state */
    497    s->state = 1;
    498
    499    /* Parse permissions modifiers before opening the image file */
    500    ret = blkdebug_parse_perms(s, options, errp);
    501    if (ret < 0) {
    502        goto out;
    503    }
    504
    505    /* Open the image file */
    506    bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image",
    507                               bs, &child_of_bds,
    508                               BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
    509                               false, errp);
    510    if (!bs->file) {
    511        ret = -EINVAL;
    512        goto out;
    513    }
    514
    515    bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
    516        (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
    517    bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
    518        ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
    519            bs->file->bs->supported_zero_flags);
    520    ret = -EINVAL;
    521
    522    /* Set alignment overrides */
    523    s->align = qemu_opt_get_size(opts, "align", 0);
    524    if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) {
    525        error_setg(errp, "Cannot meet constraints with align %" PRIu64,
    526                   s->align);
    527        goto out;
    528    }
    529    align = MAX(s->align, bs->file->bs->bl.request_alignment);
    530
    531    s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0);
    532    if (s->max_transfer &&
    533        (s->max_transfer >= INT_MAX ||
    534         !QEMU_IS_ALIGNED(s->max_transfer, align))) {
    535        error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64,
    536                   s->max_transfer);
    537        goto out;
    538    }
    539
    540    s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0);
    541    if (s->opt_write_zero &&
    542        (s->opt_write_zero >= INT_MAX ||
    543         !QEMU_IS_ALIGNED(s->opt_write_zero, align))) {
    544        error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64,
    545                   s->opt_write_zero);
    546        goto out;
    547    }
    548
    549    s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0);
    550    if (s->max_write_zero &&
    551        (s->max_write_zero >= INT_MAX ||
    552         !QEMU_IS_ALIGNED(s->max_write_zero,
    553                          MAX(s->opt_write_zero, align)))) {
    554        error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64,
    555                   s->max_write_zero);
    556        goto out;
    557    }
    558
    559    s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0);
    560    if (s->opt_discard &&
    561        (s->opt_discard >= INT_MAX ||
    562         !QEMU_IS_ALIGNED(s->opt_discard, align))) {
    563        error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64,
    564                   s->opt_discard);
    565        goto out;
    566    }
    567
    568    s->max_discard = qemu_opt_get_size(opts, "max-discard", 0);
    569    if (s->max_discard &&
    570        (s->max_discard >= INT_MAX ||
    571         !QEMU_IS_ALIGNED(s->max_discard,
    572                          MAX(s->opt_discard, align)))) {
    573        error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64,
    574                   s->max_discard);
    575        goto out;
    576    }
    577
    578    bdrv_debug_event(bs, BLKDBG_NONE);
    579
    580    ret = 0;
    581out:
    582    if (ret < 0) {
    583        qemu_mutex_destroy(&s->lock);
    584        g_free(s->config_file);
    585    }
    586    qemu_opts_del(opts);
    587    return ret;
    588}
    589
    590static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
    591                      BlkdebugIOType iotype)
    592{
    593    BDRVBlkdebugState *s = bs->opaque;
    594    BlkdebugRule *rule = NULL;
    595    int error;
    596    bool immediately;
    597
    598    qemu_mutex_lock(&s->lock);
    599    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
    600        uint64_t inject_offset = rule->options.inject.offset;
    601
    602        if ((inject_offset == -1 ||
    603             (bytes && inject_offset >= offset &&
    604              inject_offset < offset + bytes)) &&
    605            (rule->options.inject.iotype_mask & (1ull << iotype)))
    606        {
    607            break;
    608        }
    609    }
    610
    611    if (!rule || !rule->options.inject.error) {
    612        qemu_mutex_unlock(&s->lock);
    613        return 0;
    614    }
    615
    616    immediately = rule->options.inject.immediately;
    617    error = rule->options.inject.error;
    618
    619    if (rule->options.inject.once) {
    620        QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
    621        remove_rule(rule);
    622    }
    623
    624    qemu_mutex_unlock(&s->lock);
    625    if (!immediately) {
    626        aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
    627        qemu_coroutine_yield();
    628    }
    629
    630    return -error;
    631}
    632
    633static int coroutine_fn
    634blkdebug_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
    635                   QEMUIOVector *qiov, BdrvRequestFlags flags)
    636{
    637    int err;
    638
    639    /* Sanity check block layer guarantees */
    640    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
    641    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
    642    if (bs->bl.max_transfer) {
    643        assert(bytes <= bs->bl.max_transfer);
    644    }
    645
    646    err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_READ);
    647    if (err) {
    648        return err;
    649    }
    650
    651    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
    652}
    653
    654static int coroutine_fn
    655blkdebug_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
    656                    QEMUIOVector *qiov, BdrvRequestFlags flags)
    657{
    658    int err;
    659
    660    /* Sanity check block layer guarantees */
    661    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
    662    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
    663    if (bs->bl.max_transfer) {
    664        assert(bytes <= bs->bl.max_transfer);
    665    }
    666
    667    err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE);
    668    if (err) {
    669        return err;
    670    }
    671
    672    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
    673}
    674
    675static int blkdebug_co_flush(BlockDriverState *bs)
    676{
    677    int err = rule_check(bs, 0, 0, BLKDEBUG_IO_TYPE_FLUSH);
    678
    679    if (err) {
    680        return err;
    681    }
    682
    683    return bdrv_co_flush(bs->file->bs);
    684}
    685
    686static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
    687                                                  int64_t offset, int64_t bytes,
    688                                                  BdrvRequestFlags flags)
    689{
    690    uint32_t align = MAX(bs->bl.request_alignment,
    691                         bs->bl.pwrite_zeroes_alignment);
    692    int err;
    693
    694    /* Only pass through requests that are larger than requested
    695     * preferred alignment (so that we test the fallback to writes on
    696     * unaligned portions), and check that the block layer never hands
    697     * us anything unaligned that crosses an alignment boundary.  */
    698    if (bytes < align) {
    699        assert(QEMU_IS_ALIGNED(offset, align) ||
    700               QEMU_IS_ALIGNED(offset + bytes, align) ||
    701               DIV_ROUND_UP(offset, align) ==
    702               DIV_ROUND_UP(offset + bytes, align));
    703        return -ENOTSUP;
    704    }
    705    assert(QEMU_IS_ALIGNED(offset, align));
    706    assert(QEMU_IS_ALIGNED(bytes, align));
    707    if (bs->bl.max_pwrite_zeroes) {
    708        assert(bytes <= bs->bl.max_pwrite_zeroes);
    709    }
    710
    711    err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE_ZEROES);
    712    if (err) {
    713        return err;
    714    }
    715
    716    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
    717}
    718
    719static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
    720                                             int64_t offset, int64_t bytes)
    721{
    722    uint32_t align = bs->bl.pdiscard_alignment;
    723    int err;
    724
    725    /* Only pass through requests that are larger than requested
    726     * minimum alignment, and ensure that unaligned requests do not
    727     * cross optimum discard boundaries. */
    728    if (bytes < bs->bl.request_alignment) {
    729        assert(QEMU_IS_ALIGNED(offset, align) ||
    730               QEMU_IS_ALIGNED(offset + bytes, align) ||
    731               DIV_ROUND_UP(offset, align) ==
    732               DIV_ROUND_UP(offset + bytes, align));
    733        return -ENOTSUP;
    734    }
    735    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
    736    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
    737    if (align && bytes >= align) {
    738        assert(QEMU_IS_ALIGNED(offset, align));
    739        assert(QEMU_IS_ALIGNED(bytes, align));
    740    }
    741    if (bs->bl.max_pdiscard) {
    742        assert(bytes <= bs->bl.max_pdiscard);
    743    }
    744
    745    err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_DISCARD);
    746    if (err) {
    747        return err;
    748    }
    749
    750    return bdrv_co_pdiscard(bs->file, offset, bytes);
    751}
    752
    753static int coroutine_fn blkdebug_co_block_status(BlockDriverState *bs,
    754                                                 bool want_zero,
    755                                                 int64_t offset,
    756                                                 int64_t bytes,
    757                                                 int64_t *pnum,
    758                                                 int64_t *map,
    759                                                 BlockDriverState **file)
    760{
    761    int err;
    762
    763    assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
    764
    765    err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_BLOCK_STATUS);
    766    if (err) {
    767        return err;
    768    }
    769
    770    assert(bs->file && bs->file->bs);
    771    *pnum = bytes;
    772    *map = offset;
    773    *file = bs->file->bs;
    774    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
    775}
    776
    777static void blkdebug_close(BlockDriverState *bs)
    778{
    779    BDRVBlkdebugState *s = bs->opaque;
    780    BlkdebugRule *rule, *next;
    781    int i;
    782
    783    for (i = 0; i < BLKDBG__MAX; i++) {
    784        QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
    785            remove_rule(rule);
    786        }
    787    }
    788
    789    g_free(s->config_file);
    790    qemu_mutex_destroy(&s->lock);
    791}
    792
    793/* Called with lock held.  */
    794static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
    795{
    796    BDRVBlkdebugState *s = bs->opaque;
    797    BlkdebugSuspendedReq *r;
    798
    799    r = g_new(BlkdebugSuspendedReq, 1);
    800
    801    r->co         = qemu_coroutine_self();
    802    r->tag        = g_strdup(rule->options.suspend.tag);
    803
    804    remove_rule(rule);
    805    QLIST_INSERT_HEAD(&s->suspended_reqs, r, next);
    806
    807    if (!qtest_enabled()) {
    808        printf("blkdebug: Suspended request '%s'\n", r->tag);
    809    }
    810}
    811
    812/* Called with lock held.  */
    813static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
    814                         int *action_count, int *new_state)
    815{
    816    BDRVBlkdebugState *s = bs->opaque;
    817
    818    /* Only process rules for the current state */
    819    if (rule->state && rule->state != s->state) {
    820        return;
    821    }
    822
    823    /* Take the action */
    824    action_count[rule->action]++;
    825    switch (rule->action) {
    826    case ACTION_INJECT_ERROR:
    827        if (action_count[ACTION_INJECT_ERROR] == 1) {
    828            QSIMPLEQ_INIT(&s->active_rules);
    829        }
    830        QSIMPLEQ_INSERT_HEAD(&s->active_rules, rule, active_next);
    831        break;
    832
    833    case ACTION_SET_STATE:
    834        *new_state = rule->options.set_state.new_state;
    835        break;
    836
    837    case ACTION_SUSPEND:
    838        suspend_request(bs, rule);
    839        break;
    840    }
    841}
    842
    843static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
    844{
    845    BDRVBlkdebugState *s = bs->opaque;
    846    struct BlkdebugRule *rule, *next;
    847    int new_state;
    848    int actions_count[ACTION__MAX] = { 0 };
    849
    850    assert((int)event >= 0 && event < BLKDBG__MAX);
    851
    852    WITH_QEMU_LOCK_GUARD(&s->lock) {
    853        new_state = s->state;
    854        QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) {
    855            process_rule(bs, rule, actions_count, &new_state);
    856        }
    857        s->state = new_state;
    858    }
    859
    860    while (actions_count[ACTION_SUSPEND] > 0) {
    861        qemu_coroutine_yield();
    862        actions_count[ACTION_SUSPEND]--;
    863    }
    864}
    865
    866static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
    867                                     const char *tag)
    868{
    869    BDRVBlkdebugState *s = bs->opaque;
    870    struct BlkdebugRule *rule;
    871    int blkdebug_event;
    872
    873    blkdebug_event = qapi_enum_parse(&BlkdebugEvent_lookup, event, -1, NULL);
    874    if (blkdebug_event < 0) {
    875        return -ENOENT;
    876    }
    877
    878    rule = g_malloc(sizeof(*rule));
    879    *rule = (struct BlkdebugRule) {
    880        .event  = blkdebug_event,
    881        .action = ACTION_SUSPEND,
    882        .state  = 0,
    883        .options.suspend.tag = g_strdup(tag),
    884    };
    885
    886    qemu_mutex_lock(&s->lock);
    887    QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next);
    888    qemu_mutex_unlock(&s->lock);
    889
    890    return 0;
    891}
    892
    893/* Called with lock held. May temporarily release lock. */
    894static int resume_req_by_tag(BDRVBlkdebugState *s, const char *tag, bool all)
    895{
    896    BlkdebugSuspendedReq *r;
    897
    898retry:
    899    /*
    900     * No need for _SAFE, since a different coroutine can remove another node
    901     * (not the current one) in this list, and when the current one is removed
    902     * the iteration starts back from beginning anyways.
    903     */
    904    QLIST_FOREACH(r, &s->suspended_reqs, next) {
    905        if (!strcmp(r->tag, tag)) {
    906            Coroutine *co = r->co;
    907
    908            if (!qtest_enabled()) {
    909                printf("blkdebug: Resuming request '%s'\n", r->tag);
    910            }
    911
    912            QLIST_REMOVE(r, next);
    913            g_free(r->tag);
    914            g_free(r);
    915
    916            qemu_mutex_unlock(&s->lock);
    917            qemu_coroutine_enter(co);
    918            qemu_mutex_lock(&s->lock);
    919
    920            if (all) {
    921                goto retry;
    922            }
    923            return 0;
    924        }
    925    }
    926    return -ENOENT;
    927}
    928
    929static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
    930{
    931    BDRVBlkdebugState *s = bs->opaque;
    932    QEMU_LOCK_GUARD(&s->lock);
    933    return resume_req_by_tag(s, tag, false);
    934}
    935
    936static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
    937                                            const char *tag)
    938{
    939    BDRVBlkdebugState *s = bs->opaque;
    940    BlkdebugRule *rule, *next;
    941    int i, ret = -ENOENT;
    942
    943    QEMU_LOCK_GUARD(&s->lock);
    944    for (i = 0; i < BLKDBG__MAX; i++) {
    945        QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
    946            if (rule->action == ACTION_SUSPEND &&
    947                !strcmp(rule->options.suspend.tag, tag)) {
    948                remove_rule(rule);
    949                ret = 0;
    950            }
    951        }
    952    }
    953    if (resume_req_by_tag(s, tag, true) == 0) {
    954        ret = 0;
    955    }
    956    return ret;
    957}
    958
    959static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
    960{
    961    BDRVBlkdebugState *s = bs->opaque;
    962    BlkdebugSuspendedReq *r;
    963
    964    QEMU_LOCK_GUARD(&s->lock);
    965    QLIST_FOREACH(r, &s->suspended_reqs, next) {
    966        if (!strcmp(r->tag, tag)) {
    967            return true;
    968        }
    969    }
    970    return false;
    971}
    972
    973static int64_t blkdebug_getlength(BlockDriverState *bs)
    974{
    975    return bdrv_getlength(bs->file->bs);
    976}
    977
    978static void blkdebug_refresh_filename(BlockDriverState *bs)
    979{
    980    BDRVBlkdebugState *s = bs->opaque;
    981    const QDictEntry *e;
    982    int ret;
    983
    984    if (!bs->file->bs->exact_filename[0]) {
    985        return;
    986    }
    987
    988    for (e = qdict_first(bs->full_open_options); e;
    989         e = qdict_next(bs->full_open_options, e))
    990    {
    991        /* Real child options are under "image", but "x-image" may
    992         * contain a filename */
    993        if (strcmp(qdict_entry_key(e), "config") &&
    994            strcmp(qdict_entry_key(e), "image") &&
    995            strcmp(qdict_entry_key(e), "x-image") &&
    996            strcmp(qdict_entry_key(e), "driver"))
    997        {
    998            return;
    999        }
   1000    }
   1001
   1002    ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
   1003                   "blkdebug:%s:%s",
   1004                   s->config_file ?: "", bs->file->bs->exact_filename);
   1005    if (ret >= sizeof(bs->exact_filename)) {
   1006        /* An overflow makes the filename unusable, so do not report any */
   1007        bs->exact_filename[0] = 0;
   1008    }
   1009}
   1010
   1011static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
   1012{
   1013    BDRVBlkdebugState *s = bs->opaque;
   1014
   1015    if (s->align) {
   1016        bs->bl.request_alignment = s->align;
   1017    }
   1018    if (s->max_transfer) {
   1019        bs->bl.max_transfer = s->max_transfer;
   1020    }
   1021    if (s->opt_write_zero) {
   1022        bs->bl.pwrite_zeroes_alignment = s->opt_write_zero;
   1023    }
   1024    if (s->max_write_zero) {
   1025        bs->bl.max_pwrite_zeroes = s->max_write_zero;
   1026    }
   1027    if (s->opt_discard) {
   1028        bs->bl.pdiscard_alignment = s->opt_discard;
   1029    }
   1030    if (s->max_discard) {
   1031        bs->bl.max_pdiscard = s->max_discard;
   1032    }
   1033}
   1034
   1035static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
   1036                                   BlockReopenQueue *queue, Error **errp)
   1037{
   1038    return 0;
   1039}
   1040
   1041static void blkdebug_child_perm(BlockDriverState *bs, BdrvChild *c,
   1042                                BdrvChildRole role,
   1043                                BlockReopenQueue *reopen_queue,
   1044                                uint64_t perm, uint64_t shared,
   1045                                uint64_t *nperm, uint64_t *nshared)
   1046{
   1047    BDRVBlkdebugState *s = bs->opaque;
   1048
   1049    bdrv_default_perms(bs, c, role, reopen_queue,
   1050                       perm, shared, nperm, nshared);
   1051
   1052    *nperm |= s->take_child_perms;
   1053    *nshared &= ~s->unshare_child_perms;
   1054}
   1055
   1056static const char *const blkdebug_strong_runtime_opts[] = {
   1057    "config",
   1058    "inject-error.",
   1059    "set-state.",
   1060    "align",
   1061    "max-transfer",
   1062    "opt-write-zero",
   1063    "max-write-zero",
   1064    "opt-discard",
   1065    "max-discard",
   1066
   1067    NULL
   1068};
   1069
   1070static BlockDriver bdrv_blkdebug = {
   1071    .format_name            = "blkdebug",
   1072    .protocol_name          = "blkdebug",
   1073    .instance_size          = sizeof(BDRVBlkdebugState),
   1074    .is_filter              = true,
   1075
   1076    .bdrv_parse_filename    = blkdebug_parse_filename,
   1077    .bdrv_file_open         = blkdebug_open,
   1078    .bdrv_close             = blkdebug_close,
   1079    .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
   1080    .bdrv_child_perm        = blkdebug_child_perm,
   1081
   1082    .bdrv_getlength         = blkdebug_getlength,
   1083    .bdrv_refresh_filename  = blkdebug_refresh_filename,
   1084    .bdrv_refresh_limits    = blkdebug_refresh_limits,
   1085
   1086    .bdrv_co_preadv         = blkdebug_co_preadv,
   1087    .bdrv_co_pwritev        = blkdebug_co_pwritev,
   1088    .bdrv_co_flush_to_disk  = blkdebug_co_flush,
   1089    .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
   1090    .bdrv_co_pdiscard       = blkdebug_co_pdiscard,
   1091    .bdrv_co_block_status   = blkdebug_co_block_status,
   1092
   1093    .bdrv_debug_event           = blkdebug_debug_event,
   1094    .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
   1095    .bdrv_debug_remove_breakpoint
   1096                                = blkdebug_debug_remove_breakpoint,
   1097    .bdrv_debug_resume          = blkdebug_debug_resume,
   1098    .bdrv_debug_is_suspended    = blkdebug_debug_is_suspended,
   1099
   1100    .strong_runtime_opts        = blkdebug_strong_runtime_opts,
   1101};
   1102
   1103static void bdrv_blkdebug_init(void)
   1104{
   1105    bdrv_register(&bdrv_blkdebug);
   1106}
   1107
   1108block_init(bdrv_blkdebug_init);